diff --git a/.github/workflows/dev-long-tests.yml b/.github/workflows/dev-long-tests.yml index 3e6fb1e8f..a6d91c14d 100644 --- a/.github/workflows/dev-long-tests.yml +++ b/.github/workflows/dev-long-tests.yml @@ -50,6 +50,13 @@ jobs: - name: thread sanitizer zstreamtest run: CC=clang ZSTREAM_TESTTIME=-T3mn make tsan-test-zstream + ubsan-zstreamtest: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: undefined behavior sanitizer zstreamtest + run: CC=clang make uasan-test-zstream + # lasts ~15mn tsan-fuzztest: runs-on: ubuntu-latest @@ -69,6 +76,13 @@ jobs: make gcc8install CC=gcc-8 make -j uasan-test-zstd /dev/null + $(RM) seqBench diff --git a/contrib/seqBench/seqBench.c b/contrib/seqBench/seqBench.c new file mode 100644 index 000000000..7efebec7b --- /dev/null +++ b/contrib/seqBench/seqBench.c @@ -0,0 +1,53 @@ +#define ZSTD_STATIC_LINKING_ONLY +#include +#include +#include +#include +#include + +int main(int argc, char *argv[]) { + ZSTD_CCtx* zc = ZSTD_createCCtx(); + + if (argc != 2) { + printf("Usage: seqBench \n"); // TODO provide the block delim option here + return 1; + } + + FILE *f = fopen(argv[1], "rb"); + fseek(f, 0, SEEK_END); + long inBufSize = ftell(f); + fseek(f, 0, SEEK_SET); + + char *inBuf = malloc(inBufSize + 1); + fread(inBuf, inBufSize, 1, f); + fclose(f); + + size_t seqsSize = ZSTD_sequenceBound(inBufSize); + ZSTD_Sequence *seqs = (ZSTD_Sequence*)malloc(seqsSize * sizeof(ZSTD_Sequence)); + char *outBuf = malloc(ZSTD_compressBound(inBufSize)); + + ZSTD_generateSequences(zc, seqs, seqsSize, inBuf, inBufSize); + ZSTD_CCtx_setParameter(zc, ZSTD_c_blockDelimiters, ZSTD_sf_explicitBlockDelimiters); + size_t outBufSize = ZSTD_compressSequences(zc, outBuf, inBufSize, seqs, seqsSize, inBuf, inBufSize); + if (ZSTD_isError(outBufSize)) { + printf("ERROR: %lu\n", outBufSize); + return 1; + } + + char *validationBuf = malloc(inBufSize); + ZSTD_decompress(validationBuf, inBufSize, outBuf, outBufSize); + + if (memcmp(inBuf, validationBuf, inBufSize) == 0) { + printf("Compression and decompression were successful!\n"); + } else { + printf("ERROR: input and validation buffers don't match!\n"); + for (int i = 0; i < inBufSize; i++) { + if (inBuf[i] != validationBuf[i]) { + printf("First bad index: %d\n", i); + break; + } + } + } + + return 0; +} diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 59d441b2a..8a0c2f191 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3003,6 +3003,10 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) zc->seqCollector.seqIndex += seqStoreSeqSize; } +size_t ZSTD_sequenceBound(size_t srcSize) { + return (srcSize / ZSTD_MINMATCH_MIN) + 1; +} + size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, size_t outSeqsSize, const void* src, size_t srcSize) { diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 5bd412df4..a257f5f65 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -2059,7 +2059,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB if (ZSTD_isError(decompressedSize)) return decompressedSize; DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") ip = istart + cSize; - op += decompressedSize; + op = op ? op + decompressedSize : op; /* can occur if frameContentSize = 0 (empty frame) */ zds->expected = 0; zds->streamStage = zdss_init; someMoreWork = 0; @@ -2177,14 +2177,17 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB break; } case zdss_flush: - { size_t const toFlushSize = zds->outEnd - zds->outStart; + if (op != NULL) { + size_t const toFlushSize = zds->outEnd - zds->outStart; size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize); + op += flushedSize; + zds->outStart += flushedSize; if (flushedSize == toFlushSize) { /* flush completed */ zds->streamStage = zdss_read; if ( (zds->outBuffSize < zds->fParams.frameContentSize) - && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) { + && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) { DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)", (int)(zds->outBuffSize - zds->outStart), (U32)zds->fParams.blockSizeMax); diff --git a/lib/zstd.h b/lib/zstd.h index 65d5adddc..cf3dd7e27 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -836,7 +836,7 @@ ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); /*! ZSTD_decompressStream() : * Streaming decompression function. * Call repetitively to consume full input updating it as necessary. - * Funtion will update both input and output `pos` fields exposing current state via these fields: + * Function will update both input and output `pos` fields exposing current state via these fields: * - `input.pos < input.size`, some input remaining and caller should provide remaining input * on the next call. * - `output.pos < output.size`, decoder finished and flushed all remaining buffers. @@ -844,7 +844,7 @@ ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); * call ZSTD_decompressStream() again to flush remaining data to output. * Note : with no additional input, amount of data flushed <= ZSTD_BLOCKSIZE_MAX. * - * @return : 0 when a frame is completly decoded and fully flushed, + * @return : 0 when a frame is completely decoded and fully flushed, * or an error code, which can be tested using ZSTD_isError(), * or any other value > 0, which means there is some decoding or flushing to do to complete current frame. */ @@ -1396,6 +1396,15 @@ typedef enum { ZSTD_sf_explicitBlockDelimiters = 1 /* Representation of ZSTD_Sequence contains explicit block delimiters */ } ZSTD_sequenceFormat_e; +/*! ZSTD_sequenceBound() : + * `srcSize` : size of the input buffer + * @return : upper-bound for the number of sequences that can be generated + * from a buffer of srcSize bytes + * + * note : returns number of sequences - to get bytes, multiply by sizeof(ZSTD_Sequence). + */ +ZSTDLIB_STATIC_API size_t ZSTD_sequenceBound(size_t srcSize); + /*! ZSTD_generateSequences() : * Generate sequences using ZSTD_compress2(), given a source buffer. * diff --git a/tests/.gitignore b/tests/.gitignore index 9a6939a57..fcb865d61 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -12,6 +12,7 @@ zstreamtest zstreamtest32 zstreamtest_asan zstreamtest_tsan +zstreamtest_ubsan zstreamtest-dll datagen paramgrill diff --git a/tests/Makefile b/tests/Makefile index f4318ebfb..afea6475a 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -190,6 +190,11 @@ zstreamtest_tsan : CFLAGS += -fsanitize=thread zstreamtest_tsan : $(ZSTREAMFILES) $(LINK.c) $(MULTITHREAD) $^ -o $@$(EXT) +CLEAN += zstreamtest_ubsan +zstreamtest_ubsan : CFLAGS += -fsanitize=undefined +zstreamtest_ubsan : $(ZSTREAMFILES) + $(LINK.c) $(MULTITHREAD) $^ -o $@$(EXT) + # note : broken : requires symbols unavailable from dynamic library zstreamtest-dll : $(ZSTDDIR)/common/xxhash.c # xxh symbols not exposed from dll zstreamtest-dll : $(ZSTREAM_LOCAL_FILES) diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 3fcdd5399..94eb848aa 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -522,7 +522,7 @@ static int basicUnitTests(U32 seed, double compressibility) } DISPLAYLEVEL(3, "OK \n"); - DISPLAYLEVEL(3, "test%3i : NULL buffers : ", testNb++); + DISPLAYLEVEL(3, "test%3i : NULL output and NULL input : ", testNb++); inBuff.src = NULL; inBuff.size = 0; inBuff.pos = 0; @@ -548,6 +548,36 @@ static int basicUnitTests(U32 seed, double compressibility) { size_t const ret = ZSTD_decompressStream(zd, &outBuff, &inBuff); if (ret != 0) goto _output_error; } + DISPLAYLEVEL(3, "OK\n"); + + DISPLAYLEVEL(3, "test%3i : NULL output buffer with non-NULL input : ", testNb++); + { + const char* test = "aa"; + inBuff.src = test; + inBuff.size = 2; + inBuff.pos = 0; + outBuff.dst = NULL; + outBuff.size = 0; + outBuff.pos = 0; + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + CHECK(inBuff.pos != inBuff.size, "Entire input should be consumed"); + CHECK_Z( ZSTD_endStream(zc, &outBuff) ); + outBuff.dst = (char*)(compressedBuffer); + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + { size_t const r = ZSTD_endStream(zc, &outBuff); + CHECK(r != 0, "Error or some data not flushed (ret=%zu)", r); + } + inBuff.src = outBuff.dst; + inBuff.size = outBuff.pos; + inBuff.pos = 0; + outBuff.dst = NULL; + outBuff.size = 0; + outBuff.pos = 0; + CHECK_Z( ZSTD_initDStream(zd) ); + CHECK_Z(ZSTD_decompressStream(zd, &outBuff, &inBuff)); + } + DISPLAYLEVEL(3, "OK\n"); /* _srcSize compression test */ DISPLAYLEVEL(3, "test%3i : compress_srcSize %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH);