diff --git a/programs/Makefile b/programs/Makefile index a935c744a..8b97592d5 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -68,12 +68,27 @@ EXT = endif # zlib detection +NO_ZLIB_MSG := ==> no zlib, building zstd without .gz support VOID = /dev/null HAVE_ZLIB := $(shell printf '\#include \nint main(){}' | $(CC) -o have_zlib -x c - -lz 2> $(VOID) && rm have_zlib$(EXT) && echo 1 || echo 0) ifeq ($(HAVE_ZLIB), 1) +ZLIB_MSG := ==> building zstd with .gz compression support ZLIBCPP = -DZSTD_GZCOMPRESS -DZSTD_GZDECOMPRESS ZLIBLD = -lz +else +ZLIB_MSG := $(NO_ZLIB_MSG) endif +# lzma detection +NO_LZMA_MSG := ==> no liblzma, building zstd without .xz/.lzma support +HAVE_LZMA := $(shell printf '\#include \nint main(){}' | $(CC) -o have_lzma -x c - -llzma 2> $(VOID) && rm have_lzma$(EXT) && echo 1 || echo 0) +ifeq ($(HAVE_LZMA), 1) +LZMA_MSG := ==> building zstd with .xz/.lzma compression support +LZMACPP = -DZSTD_LZMACOMPRESS -DZSTD_LZMADECOMPRESS +LZMALD = -llzma +else +LZMA_MSG := $(NO_LZMA_MSG) +endif + .PHONY: default all clean clean_decomp_o install uninstall generate_res @@ -85,14 +100,15 @@ $(ZSTDDECOMP_O): CFLAGS += $(ALIGN_LOOP) zstd : CPPFLAGS += $(ZLIBCPP) zstd : LDFLAGS += $(ZLIBLD) -zstd-nogz : HAVE_ZLIB=0 -zstd zstd-nogz : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) -zstd zstd-nogz : $(ZSTDLIB_OBJ) zstdcli.o fileio.o bench.o datagen.o dibio.o -ifeq ($(HAVE_ZLIB), 1) - @echo "==> building zstd with .gz decompression support " -else - @echo "==> no zlib, building zstd with .zst support only (no .gz support) " -endif +zstd : LZMA_MSG := $(NO_LZMA_MSG) +zstd-nogz : ZLIB_MSG := $(NO_ZLIB_MSG) +zstd-nogz : LZMA_MSG := $(NO_LZMA_MSG) +xzstd : CPPFLAGS += $(ZLIBCPP) $(LZMACPP) +xzstd : LDFLAGS += $(ZLIBLD) $(LZMALD) +zstd zstd-nogz xzstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) +zstd zstd-nogz xzstd : $(ZSTDLIB_OBJ) zstdcli.o fileio.o bench.o datagen.o dibio.o + @echo "$(ZLIB_MSG)" + @echo "$(LZMA_MSG)" ifneq (,$(filter Windows%,$(OS))) windres/generate_res.bat endif diff --git a/programs/fileio.c b/programs/fileio.c index 41daa125e..e6481f1fa 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -44,6 +44,9 @@ # define z_const # endif #endif +#if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS) +# include +#endif /*-************************************* @@ -71,7 +74,6 @@ #define MAX_DICT_SIZE (8 MB) /* protection against large input (attack scenario) */ #define FNSPACE 30 -#define GZ_EXTENSION ".gz" /*-************************************* @@ -434,6 +436,65 @@ static unsigned long long FIO_compressGzFrame(cRess_t* ress, const char* srcFile #endif +#ifdef ZSTD_LZMACOMPRESS +static unsigned long long FIO_compressLzmaFrame(cRess_t* ress, const char* srcFileName, U64 const srcFileSize, int compressionLevel, U64* readsize, int plain_lzma) +{ + unsigned long long inFileSize = 0, outFileSize = 0; + lzma_stream strm = LZMA_STREAM_INIT; + lzma_action action = LZMA_RUN; + lzma_ret ret; + + if (compressionLevel < 0) compressionLevel = 0; + if (compressionLevel > 9) compressionLevel = 9; + + if (plain_lzma) { + lzma_options_lzma opt_lzma; + if (lzma_lzma_preset(&opt_lzma, compressionLevel)) EXM_THROW(71, "zstd: %s: lzma_lzma_preset error", srcFileName); + ret = lzma_alone_encoder(&strm, &opt_lzma); /* LZMA */ + if (ret != LZMA_OK) EXM_THROW(71, "zstd: %s: lzma_alone_encoder error %d", srcFileName, ret); + } else { + ret = lzma_easy_encoder(&strm, compressionLevel, LZMA_CHECK_CRC64); /* XZ */ + if (ret != LZMA_OK) EXM_THROW(71, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret); + } + + strm.next_in = 0; + strm.avail_in = 0; + strm.next_out = ress->dstBuffer; + strm.avail_out = ress->dstBufferSize; + + while (1) { + if (strm.avail_in == 0) { + size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile); + if (inSize == 0) action = LZMA_FINISH; + inFileSize += inSize; + strm.next_in = ress->srcBuffer; + strm.avail_in = inSize; + } + + ret = lzma_code(&strm, action); + + if (ret != LZMA_OK && ret != LZMA_STREAM_END) EXM_THROW(72, "zstd: %s: lzma_code encoding error %d", srcFileName, ret); + { size_t const compBytes = ress->dstBufferSize - strm.avail_out; + if (compBytes) { + if (fwrite(ress->dstBuffer, 1, compBytes, ress->dstFile) != compBytes) EXM_THROW(73, "Write error : cannot write to output file"); + outFileSize += compBytes; + strm.next_out = ress->dstBuffer; + strm.avail_out = ress->dstBufferSize; + } + } + if (!srcFileSize) DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", (U32)(inFileSize>>20), (double)outFileSize/inFileSize*100) + else DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", (U32)(inFileSize>>20), (U32)(srcFileSize>>20), (double)outFileSize/inFileSize*100); + if (ret == LZMA_STREAM_END) break; + } + + lzma_end(&strm); + *readsize = inFileSize; + + return outFileSize; +} +#endif + + /*! FIO_compressFilename_internal() : * same as FIO_compressFilename_extRess(), with `ress.desFile` already opened. * @return : 0 : compression completed correctly, @@ -448,14 +509,26 @@ static int FIO_compressFilename_internal(cRess_t ress, U64 compressedfilesize = 0; U64 const fileSize = UTIL_getFileSize(srcFileName); - if (g_compressionType) { + switch (g_compressionType) { + case FIO_zstdCompression: + break; + case FIO_gzipCompression: #ifdef ZSTD_GZCOMPRESS - compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize); + compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize); #else - (void)compressionLevel; - EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n", srcFileName); + (void)compressionLevel; + EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n", srcFileName); #endif - goto finish; + goto finish; + case FIO_xzCompression: + case FIO_lzmaCompression: +#ifdef ZSTD_LZMACOMPRESS + compressedfilesize = FIO_compressLzmaFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize, g_compressionType==FIO_lzmaCompression); +#else + (void)compressionLevel; + EXM_THROW(20, "zstd: %s: file cannot be compressed as xz/lzma (zstd compiled without ZSTD_LZMACOMPRESS) -- ignored \n", srcFileName); +#endif + goto finish; } /* init */ @@ -763,10 +836,10 @@ static void FIO_fwriteSparseEnd(FILE* file, unsigned storedSkips) { if (storedSkips-->0) { /* implies g_sparseFileSupport>0 */ int const seekResult = LONG_SEEK(file, storedSkips, SEEK_CUR); - if (seekResult != 0) EXM_THROW(69, "Final skip error (sparse file)\n"); + if (seekResult != 0) EXM_THROW(69, "Final skip error (sparse file)"); { const char lastZeroByte[1] = { 0 }; size_t const sizeCheck = fwrite(lastZeroByte, 1, 1, file); - if (sizeCheck != 1) EXM_THROW(69, "Write error : cannot write last zero\n"); + if (sizeCheck != 1) EXM_THROW(69, "Write error : cannot write last zero"); } } } @@ -849,6 +922,7 @@ static unsigned long long FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile, co { unsigned long long outFileSize = 0; z_stream strm; + int flush = Z_NO_FLUSH; int ret; strm.zalloc = Z_NULL; @@ -866,11 +940,12 @@ static unsigned long long FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile, co for ( ; ; ) { if (strm.avail_in == 0) { ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile); - if (ress->srcBufferLoaded == 0) break; + if (ress->srcBufferLoaded == 0) flush = Z_FINISH; strm.next_in = (z_const unsigned char*)ress->srcBuffer; strm.avail_in = (uInt)ress->srcBufferLoaded; } - ret = inflate(&strm, Z_NO_FLUSH); + ret = inflate(&strm, flush); + if (ret == Z_BUF_ERROR) EXM_THROW(39, "zstd: %s: premature end", srcFileName); if (ret != Z_OK && ret != Z_STREAM_END) { DISPLAY("zstd: %s: inflate error %d \n", srcFileName, ret); return 0; } { size_t const decompBytes = ress->dstBufferSize - strm.avail_out; if (decompBytes) { @@ -886,7 +961,60 @@ static unsigned long long FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile, co if (strm.avail_in > 0) memmove(ress->srcBuffer, strm.next_in, strm.avail_in); ress->srcBufferLoaded = strm.avail_in; ret = inflateEnd(&strm); - if (ret != Z_OK) EXM_THROW(32, "zstd: %s: inflateEnd error %d \n", srcFileName, ret); + if (ret != Z_OK) EXM_THROW(32, "zstd: %s: inflateEnd error %d", srcFileName, ret); + return outFileSize; +} +#endif + + +#ifdef ZSTD_LZMADECOMPRESS +static unsigned long long FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile, const char* srcFileName, int plain_lzma) +{ + unsigned long long outFileSize = 0; + lzma_stream strm = LZMA_STREAM_INIT; + lzma_action action = LZMA_RUN; + lzma_ret ret; + + strm.next_in = 0; + strm.avail_in = 0; + if (plain_lzma) { + ret = lzma_alone_decoder(&strm, UINT64_MAX); /* LZMA */ + } else { + ret = lzma_stream_decoder(&strm, UINT64_MAX, 0); /* XZ */ + } + + if (ret != LZMA_OK) EXM_THROW(71, "zstd: %s: lzma_alone_decoder/lzma_stream_decoder error %d", srcFileName, ret); + + strm.next_out = ress->dstBuffer; + strm.avail_out = ress->dstBufferSize; + strm.avail_in = ress->srcBufferLoaded; + strm.next_in = ress->srcBuffer; + + for ( ; ; ) { + if (strm.avail_in == 0) { + ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile); + if (ress->srcBufferLoaded == 0) action = LZMA_FINISH; + strm.next_in = ress->srcBuffer; + strm.avail_in = ress->srcBufferLoaded; + } + ret = lzma_code(&strm, action); + + if (ret == LZMA_BUF_ERROR) EXM_THROW(39, "zstd: %s: premature end", srcFileName); + if (ret != LZMA_OK && ret != LZMA_STREAM_END) { DISPLAY("zstd: %s: lzma_code decoding error %d \n", srcFileName, ret); return 0; } + { size_t const decompBytes = ress->dstBufferSize - strm.avail_out; + if (decompBytes) { + if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes) EXM_THROW(31, "Write error : cannot write to output file"); + outFileSize += decompBytes; + strm.next_out = ress->dstBuffer; + strm.avail_out = ress->dstBufferSize; + } + } + if (ret == LZMA_STREAM_END) break; + } + + if (strm.avail_in > 0) memmove(ress->srcBuffer, strm.next_in, strm.avail_in); + ress->srcBufferLoaded = strm.avail_in; + lzma_end(&strm); return outFileSize; } #endif @@ -924,7 +1052,7 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* dstFileName, const ch } readSomething = 1; /* there is at least >= 4 bytes in srcFile */ if (ress.srcBufferLoaded < toRead) { DISPLAY("zstd: %s: unknown header \n", srcFileName); fclose(srcFile); return 1; } /* srcFileName is empty */ - if (buf[0] == 31 && buf[1] == 139) { /* gz header */ + if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */ #ifdef ZSTD_GZDECOMPRESS unsigned long long const result = FIO_decompressGzFrame(&ress, srcFile, srcFileName); if (result == 0) return 1; @@ -932,6 +1060,16 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* dstFileName, const ch #else DISPLAYLEVEL(1, "zstd: %s: gzip file cannot be uncompressed (zstd compiled without ZSTD_GZDECOMPRESS) -- ignored \n", srcFileName); return 1; +#endif + } else if ((buf[0] == 0xFD && buf[1] == 0x37) /* xz magic number */ + || (buf[0] == 0x5D && buf[1] == 0x00)) { /* lzma header (no magic number) */ +#ifdef ZSTD_LZMADECOMPRESS + unsigned long long const result = FIO_decompressLzmaFrame(&ress, srcFile, srcFileName, buf[0] != 0xFD); + if (result == 0) return 1; + filesize += result; +#else + DISPLAYLEVEL(1, "zstd: %s: xz/lzma file cannot be uncompressed (zstd compiled without ZSTD_LZMADECOMPRESS) -- ignored \n", srcFileName); + return 1; #endif } else { if (!ZSTD_isFrame(ress.srcBuffer, toRead)) { @@ -1020,32 +1158,31 @@ int FIO_decompressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles missingFiles += FIO_decompressSrcFile(ress, suffix, srcNamesTable[u]); if (fclose(ress.dstFile)) EXM_THROW(72, "Write error : cannot properly close stdout"); } else { - size_t const suffixSize = strlen(suffix); - size_t const gzipSuffixSize = strlen(GZ_EXTENSION); + size_t suffixSize; size_t dfnSize = FNSPACE; unsigned u; char* dstFileName = (char*)malloc(FNSPACE); if (dstFileName==NULL) EXM_THROW(73, "not enough memory for dstFileName"); for (u=0; u tmp + $ZSTD -f --format=gzip tmp + $ZSTD -f tmp + cat tmp.gz tmp.zst tmp.gz tmp.zst | $ZSTD -d -f -o tmp + head -c -1 tmp.gz | $ZSTD -t && die "incomplete frame not detected !" + rm tmp* +else + $ECHO "gzip mode not supported" +fi + + +$ECHO "\n**** xz compatibility tests **** " + +LZMAMODE=1 +$ZSTD --format=xz -V || LZMAMODE=0 +if [ $LZMAMODE -eq 1 ]; then + $ECHO "xz support detected" + XZEXE=1 + xz -V && lzma -V || XZEXE=0 + if [ $XZEXE -eq 1 ]; then + ./datagen > tmp + $ZSTD --format=lzma -f tmp + $ZSTD --format=xz -f tmp + xz -t -v tmp.xz + xz -t -v tmp.lzma + xz -f -k tmp + lzma -f -k --lzma1 tmp + $ZSTD -d -f -v tmp.xz + $ZSTD -d -f -v tmp.lzma + rm tmp* + else + $ECHO "xz binary not detected" + fi +else + $ECHO "xz mode not supported" +fi + + +$ECHO "\n**** xz frame tests **** " + +if [ $LZMAMODE -eq 1 ]; then + ./datagen > tmp + $ZSTD -f --format=xz tmp + $ZSTD -f --format=lzma tmp + $ZSTD -f tmp + cat tmp.xz tmp.lzma tmp.zst tmp.lzma tmp.xz tmp.zst | $ZSTD -d -f -o tmp + head -c -1 tmp.xz | $ZSTD -t && die "incomplete frame not detected !" + head -c -1 tmp.lzma | $ZSTD -t && die "incomplete frame not detected !" + rm tmp* +else + $ECHO "xz mode not supported" +fi + + $ECHO "\n**** zstd round-trip tests **** " roundTripTest