1
0
mirror of https://github.com/facebook/zstd.git synced 2025-03-06 16:56:49 +02:00

Guard against invalid sequences from external matchfinders (#3465)

This commit is contained in:
Elliot Gorokhovsky 2023-01-31 13:55:48 -05:00 committed by GitHub
parent 4794bbfe00
commit 64052ef57d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 96 additions and 37 deletions

View File

@ -277,14 +277,8 @@ static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode,
return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable;
}
/* Enables validation for external sequences in debug builds. */
static int ZSTD_resolveExternalSequenceValidation(int mode) {
#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2)
(void)mode;
return 1;
#else
return mode;
#endif
}
/* Resolves maxBlockSize to the default if no value is present. */
@ -3050,6 +3044,23 @@ static size_t ZSTD_postProcessExternalMatchFinderResult(
}
}
/* ZSTD_fastSequenceLengthSum() :
* Returns sum(litLen) + sum(matchLen) + lastLits for *seqBuf*.
* Similar to another function in zstd_compress.c (determine_blockSize),
* except it doesn't check for a block delimiter to end summation.
* Removing the early exit allows the compiler to auto-vectorize (https://godbolt.org/z/cY1cajz9P).
* This function can be deleted and replaced by determine_blockSize after we resolve issue #3456. */
static size_t ZSTD_fastSequenceLengthSum(ZSTD_Sequence const* seqBuf, size_t seqBufSize) {
size_t matchLenSum, litLenSum, i;
matchLenSum = 0;
litLenSum = 0;
for (i = 0; i < seqBufSize; i++) {
litLenSum += seqBuf[i].litLength;
matchLenSum += seqBuf[i].matchLength;
}
return litLenSum + matchLenSum;
}
typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
@ -3167,8 +3178,15 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
/* Return early if there is no error, since we don't need to worry about last literals */
if (!ZSTD_isError(nbPostProcessedSeqs)) {
ZSTD_sequencePosition seqPos = {0,0,0};
ZSTD_copySequencesToSeqStoreExplicitBlockDelim(
zc, &seqPos, zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs, src, srcSize
size_t const seqLenSum = ZSTD_fastSequenceLengthSum(zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs);
RETURN_ERROR_IF(seqLenSum > srcSize, externalSequences_invalid, "External sequences imply too large a block!");
FORWARD_IF_ERROR(
ZSTD_copySequencesToSeqStoreExplicitBlockDelim(
zc, &seqPos,
zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs,
src, srcSize
),
"Failed to copy external sequences to seqStore!"
);
ms->ldmSeqStore = NULL;
DEBUGLOG(5, "Copied %lu sequences from external matchfinder to internal seqStore.", (unsigned long)nbExternalSeqs);
@ -6286,7 +6304,7 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
ip += inSeqs[idx].litLength;
seqPos->posInSrc += inSeqs[idx].litLength;
}
RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!");
RETURN_ERROR_IF(ip != iend, externalSequences_invalid, "Blocksize doesn't agree with block delimiter!");
seqPos->idx = idx+1;
return 0;
}
@ -6444,13 +6462,13 @@ blockSize_explicitDelimiter(const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD
blockSize += inSeqs[spos].litLength + inSeqs[spos].matchLength;
if (end) {
if (inSeqs[spos].matchLength != 0)
RETURN_ERROR(corruption_detected, "delimiter format error : both matchlength and offset must be == 0");
RETURN_ERROR(externalSequences_invalid, "delimiter format error : both matchlength and offset must be == 0");
break;
}
spos++;
}
if (!end)
RETURN_ERROR(corruption_detected, "Reached end of sequences without finding a block delimiter");
RETURN_ERROR(externalSequences_invalid, "Reached end of sequences without finding a block delimiter");
return blockSize;
}
@ -6471,9 +6489,9 @@ static size_t determine_blockSize(ZSTD_sequenceFormat_e mode,
{ size_t const explicitBlockSize = blockSize_explicitDelimiter(inSeqs, inSeqsSize, seqPos);
FORWARD_IF_ERROR(explicitBlockSize, "Error while determining block size with explicit delimiters");
if (explicitBlockSize > blockSize)
RETURN_ERROR(corruption_detected, "sequences incorrectly define a too large block");
RETURN_ERROR(externalSequences_invalid, "sequences incorrectly define a too large block");
if (explicitBlockSize > remaining)
RETURN_ERROR(srcSize_wrong, "sequences define a frame longer than source");
RETURN_ERROR(externalSequences_invalid, "sequences define a frame longer than source");
return explicitBlockSize;
}
}

View File

@ -108,6 +108,29 @@ size_t zstreamExternalMatchFinder(
compressionLevel,
windowSize
);
case EMF_INVALID_OFFSET:
outSeqs[0].offset = 1 << 20;
outSeqs[0].matchLength = 4;
outSeqs[0].litLength = (U32)(srcSize - 4);
return 1;
case EMF_INVALID_MATCHLEN:
outSeqs[0].offset = 1;
outSeqs[0].matchLength = (U32)(srcSize);
outSeqs[0].litLength = 1;
return 1;
case EMF_INVALID_LITLEN:
outSeqs[0].offset = 0;
outSeqs[0].matchLength = 0;
outSeqs[0].litLength = (U32)(srcSize + 1);
return 1;
case EMF_INVALID_LAST_LITS:
outSeqs[0].offset = 1;
outSeqs[0].matchLength = 1;
outSeqs[0].litLength = 1;
outSeqs[1].offset = 0;
outSeqs[1].matchLength = 0;
outSeqs[1].litLength = (U32)(srcSize - 1);
return 2;
case EMF_SMALL_ERROR:
return outSeqsCapacity + 1;
case EMF_BIG_ERROR:

View File

@ -20,7 +20,11 @@ typedef enum {
EMF_ONE_BIG_SEQ = 1,
EMF_LOTS_OF_SEQS = 2,
EMF_BIG_ERROR = 3,
EMF_SMALL_ERROR = 4
EMF_SMALL_ERROR = 4,
EMF_INVALID_OFFSET = 5,
EMF_INVALID_MATCHLEN = 6,
EMF_INVALID_LITLEN = 7,
EMF_INVALID_LAST_LITS = 8
} EMF_testCase;
size_t zstreamExternalMatchFinder(

View File

@ -128,6 +128,7 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer
setRand(cctx, ZSTD_c_deterministicRefPrefix, 0, 1, producer);
setRand(cctx, ZSTD_c_prefetchCDictTables, 0, 2, producer);
setRand(cctx, ZSTD_c_maxBlockSize, ZSTD_BLOCKSIZE_MAX_MIN, ZSTD_BLOCKSIZE_MAX, producer);
setRand(cctx, ZSTD_c_validateSequences, 0, 1, producer);
if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer);
}

View File

@ -1873,48 +1873,61 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
* check that the reference is preserved across compressions */
ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
for (enableFallback = 0; enableFallback < 1; enableFallback++) {
for (enableFallback = 0; enableFallback <= 1; enableFallback++) {
size_t testCaseId;
size_t const numTestCases = 9;
EMF_testCase const EMF_successCases[] = {
EMF_testCase const testCases[] = {
EMF_ONE_BIG_SEQ,
EMF_LOTS_OF_SEQS,
};
size_t const EMF_numSuccessCases = 2;
EMF_testCase const EMF_failureCases[] = {
EMF_ZERO_SEQS,
EMF_BIG_ERROR,
EMF_SMALL_ERROR,
EMF_INVALID_OFFSET,
EMF_INVALID_MATCHLEN,
EMF_INVALID_LITLEN,
EMF_INVALID_LAST_LITS
};
size_t const EMF_numFailureCases = 3;
/* Test external matchfinder success scenarios */
for (testCaseId = 0; testCaseId < EMF_numSuccessCases; testCaseId++) {
ZSTD_ErrorCode const errorCodes[] = {
ZSTD_error_no_error,
ZSTD_error_no_error,
ZSTD_error_externalMatchFinder_failed,
ZSTD_error_externalMatchFinder_failed,
ZSTD_error_externalMatchFinder_failed,
ZSTD_error_externalSequences_invalid,
ZSTD_error_externalSequences_invalid,
ZSTD_error_externalSequences_invalid,
ZSTD_error_externalSequences_invalid
};
for (testCaseId = 0; testCaseId < numTestCases; testCaseId++) {
size_t res;
externalMatchState = EMF_successCases[testCaseId];
int const compressionShouldSucceed = (
(errorCodes[testCaseId] == ZSTD_error_no_error) ||
(enableFallback && errorCodes[testCaseId] == ZSTD_error_externalMatchFinder_failed)
);
int const testWithSequenceValidation = (
testCases[testCaseId] == EMF_INVALID_OFFSET
);
externalMatchState = testCases[testCaseId];
ZSTD_CCtx_reset(zc, ZSTD_reset_session_only);
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_validateSequences, testWithSequenceValidation));
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback));
res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize);
CHECK(ZSTD_isError(res), "EMF: Compression error: %s", ZSTD_getErrorName(res));
CHECK_Z(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res));
CHECK(memcmp(CNBuffer, checkBuf, CNBufferSize) != 0, "EMF: Corruption!");
}
/* Test external matchfinder failure scenarios */
for (testCaseId = 0; testCaseId < EMF_numFailureCases; testCaseId++) {
size_t res;
externalMatchState = EMF_failureCases[testCaseId];
ZSTD_CCtx_reset(zc, ZSTD_reset_session_only);
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback));
res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize);
if (enableFallback) {
if (compressionShouldSucceed) {
CHECK(ZSTD_isError(res), "EMF: Compression error: %s", ZSTD_getErrorName(res));
CHECK_Z(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res));
CHECK(memcmp(CNBuffer, checkBuf, CNBufferSize) != 0, "EMF: Corruption!");
} else {
CHECK(!ZSTD_isError(res), "EMF: Should have raised an error!");
CHECK(
ZSTD_getErrorCode(res) != ZSTD_error_externalMatchFinder_failed,
ZSTD_getErrorCode(res) != errorCodes[testCaseId],
"EMF: Wrong error code: %s", ZSTD_getErrorName(res)
);
}