mirror of
synced 2025-03-06 16:56:49 +02:00
Added ZSTD_compress_generic()
Used in fileio.c (zstd cli). Need to set macro ZSTD_NEWAPI to trigger it.
This commit is contained in:
@ -51,7 +51,7 @@
<a name="Chapter2"></a><h2>Version</h2><pre></pre>
<pre><b>unsigned ZSTD_versionNumber(void); </b>/**< library version number; to be used when checking dll version */<b>
<pre><b>unsigned ZSTD_versionNumber(void); </b>/**< to be used when checking dll version */<b>
<a name="Chapter3"></a><h2>Simple API</h2><pre></pre>
@ -501,7 +501,7 @@ size_t ZSTD_estimateDDictSize(size_t dictSize);
* More attempts result in better and slower compression.
* This parameter is useless when using "fast" and "dFast" strategies.
* Special: value 0 means "do not change searchLog". */
ZSTD_p_minMatchLength, </b>/* Minimum match size (except for repeat-matches, which limit is hard-coded).<b>
ZSTD_p_minMatch, </b>/* Minimum size of searched matches (note : repCode matches can be smaller).<b>
* Larger values make faster compression and decompression, but decrease ratio.
* Note that currently, for all strategies < btopt, effective minimum is 4.
@ -526,13 +526,13 @@ size_t ZSTD_estimateDDictSize(size_t dictSize);
</b>/* frame parameters */<b>
ZSTD_p_contentSizeFlag=200, </b>/* Content size is written into frame header _whenever known_ (default:1) */<b>
ZSTD_p_contentChecksumFlag, </b>/* A 32-bits content checksum is calculated and written at end of frame (default:0) */<b>
ZSTD_p_checksumFlag, </b>/* A 32-bits checksum of content is written at end of frame (default:0) */<b>
ZSTD_p_dictIDFlag, </b>/* When applicable, dictID of dictionary is provided in frame header (default:1) */<b>
</b>/* dictionary parameters */<b>
ZSTD_p_refDictContent=300, </b>/* Content of dictionary content will be referenced, instead of copied (default:0).<b>
* This avoids duplicating dictionary content.
* But it also requires that dictionary buffer outlives its user (CCtx or CDict) */
* But it also requires that dictionary buffer outlives its user (CDict) */
</b>/* Not ready yet ! */<b>
ZSTD_p_rawContentDict, </b>/* load dictionary in "content-only" mode (no header analysis) (default:0) */<b>
</b>/* question : should there be an option to load dictionary only in zstd format, rejecting others with an error code ? */<b>
@ -557,7 +557,8 @@ size_t ZSTD_estimateDDictSize(size_t dictSize);
<pre><b>size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value);
</b><p> Set one compression parameter, selected by enum ZSTD_cParameter.
@result : 0, or an error code (which can be tested with ZSTD_isError())
Note : when `value` is an enum, cast it to unsigned for proper type checking.
@result : 0, or an error code (which can be tested with ZSTD_isError()).
<pre><b>size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
@ -571,17 +572,32 @@ size_t ZSTD_estimateDDictSize(size_t dictSize);
this value is overriden by srcSize instead.
<pre><b>size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); </b>/* Not ready yet ! */<b>
</b><p> Create an internal CDict from dict buffer.
Decompression will have to use same buffer.
@result : 0, or an error code (which can be tested with ZSTD_isError()).
Special : Adding a NULL (or 0-size) dictionary invalidates any previous prefix,
meaning "return to no-dictionary mode".
Note 1 : Dictionary content will be copied internally,
except if ZSTD_p_refDictContent is set.
Note 2 : Loading a dictionary involves building tables, which are dependent on compression parameters.
For this reason, compression parameters cannot be changed anymore after loading a prefix.
It's also a CPU-heavy operation, with non-negligible impact on latency.
Note 3 : Dictionary will be used for all future compression jobs.
To return to "no-dictionary" situation, load a NULL dictionary
<pre><b>size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize); </b>/* Not ready yet ! */<b>
</b><p> Reference a prefix (content-only dictionary) to bootstrap next compression job.
Decompression will have to use same prefix.
@result : 0, or an error code (which can be tested with ZSTD_isError()).
Special : Adding a NULL (or 0-size) dictionary invalidates any previous prefix, meaning "return to no-dictionary mode".
Note 1 : Prefix content is referenced. It must outlive compression job.
Note 1 : Prefix buffer is referenced. It must outlive compression job.
Note 3 : Prefix is only used once. Tables are discarded at end of compression job.
If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_CDict.
Note 2 : Referencing a prefix involves building tables, which are dependent on compression parameters.
For this reason, compression parameters cannot be changed anymore after loading a prefix.
It's also a CPU-heavy operation, with non-negligible impact on latency.
Note 3 : Prefix is only used once. Tables are discarded at end of compression job.
If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_CDict
It's also a CPU-heavy operation, with non-negligible impact on latency.
<pre><b>ZSTD_CDict* ZSTD_CDict_createEmpty(void); </b>/* Not ready yet ! */<b>
@ -599,10 +615,10 @@ size_t ZSTD_CDict_loadDictionary(ZSTD_CDict* cdict, const void* dict, size_t dic
<pre><b>size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); </b>/* Not ready yet ! */<b>
</b><p> Add a prepared dictionary to cctx, it will used for next compression jobs.
</b><p> Add a prepared dictionary to cctx, to be used for next compression jobs.
Note that compression parameters will be enforced from within CDict.
Currently, they supercede any compression parameter previously set within CCtx.
The dictionary will remain valid for all future compression jobs performed using the same cctx.
The dictionary will remain valid for future compression jobs using same cctx.
@result : 0, or an error code (which can be tested with ZSTD_isError()).
Special : adding a NULL CDict means "return to no-dictionary mode".
Note 1 : Currently, only one dictionary can be managed.
@ -612,23 +628,23 @@ size_t ZSTD_CDict_loadDictionary(ZSTD_CDict* cdict, const void* dict, size_t dic
<pre><b>typedef enum {
ZSTD_e_continue, </b>/* continue sending data, encoder transparently decides when to output result, depending on optimal conditions */<b>
ZSTD_e_flush, </b>/* flush any data provided and buffered so far - frame will continue, future data can still reference previous data for better compression */<b>
ZSTD_e_end </b>/* flush any remaining data and ends current frame. Any future compression starts a new frame. */<b>
ZSTD_e_continue=0, </b>/* collect more data, encoder transparently decides when to output result, for optimal conditions */<b>
ZSTD_e_flush, </b>/* flush any data provided so far - frame will continue, future data can still reference previous data for better compression */<b>
ZSTD_e_end </b>/* flush any remaining data and ends current frame. Any future compression starts a new frame. */<b>
} ZSTD_EndDirective;
<pre><b>size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity, size_t* dstPos,
const void* src, size_t srcSize, size_t* srcPos,
ZSTD_outBuffer* output,
ZSTD_inBuffer* input,
ZSTD_EndDirective endOp);
</b><p> Behave about the same as ZSTD_compressStream. To note :
- Compression parameters are pushed into CCtx before starting compression, using ZSTD_setCCtxParameter()
- Compression parameters cannot be changed once compression is started.
- *dstPos must be <= dstCapacity, *srcPos must be <= srcSize
- *dspPos and *srcPos will be updated. They are guaranteed to remain below their respective limit.
- @return provides the amount of data ready to flush and still within internal buffers
- @return provides the amount of data ready to flush within internal buffers
or an error code, which can be tested using ZSTD_isError().
if @return != 0, flush is not fully completed, so it must be called again to empty internal buffers.
if @return != 0, flush is not fully completed, and must be called again to empty internal buffers.
- after a ZSTD_e_end directive, if internal buffer is not fully flushed,
only ZSTD_e_end and ZSTD_e_flush operations are allowed.
It is necessary to fully flush internal buffers
@ -636,11 +652,10 @@ size_t ZSTD_CDict_loadDictionary(ZSTD_CDict* cdict, const void* dict, size_t dic
<pre><b>// Not ready yet !!!
size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx);
<pre><b>size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx); </b>/* Not ready yet ! */<b>
</b><p> Return a CCtx to clean state.
Useful after an error, or to interrupt an ongoing compression job and start a new one.
It's allowed to change compression parameters after a reset.
It's possible to modify compression parameters after a reset.
Any internal data not yet flushed is cancelled.
@ -781,7 +796,8 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned lo
>0 : `srcSize` is too small, please provide at least @result bytes on next attempt.
errorCode, which can be tested using ZSTD_isError().
Start decompression, with ZSTD_decompressBegin() or ZSTD_decompressBegin_usingDict().
Start decompression, with ZSTD_decompressBegin().
If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict().
Alternatively, you can copy a prepared context, using ZSTD_copyDCtx().
Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
@ -823,12 +839,11 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned lo
<h3>Buffer-less streaming decompression functions</h3><pre></pre><b><pre>size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); </b>/**< doesn't consume input, see details below */<b>
size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
<pre><b>typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
<a name="Chapter20"></a><h2>Block functions</h2><pre>
Block functions produce and decode raw zstd blocks, without frame metadata.
Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
@ -11,8 +11,8 @@
* Tuning parameters
@ -86,7 +86,7 @@ static void ZSTD_resetSeqStore(seqStore_t* ssPtr)
* Context memory management
typedef enum { zcss_init, zcss_load, zcss_flush, zcss_final } ZSTD_cStreamStage;
typedef enum { zcss_init=0, zcss_load, zcss_flush, zcss_final } ZSTD_cStreamStage;
struct ZSTD_CCtx_s {
const BYTE* nextSrc; /* next block here to continue on current prefix */
@ -158,7 +158,7 @@ ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
if (!cctx) return NULL;
memset(cctx, 0, sizeof(ZSTD_CCtx));
cctx->customMem = customMem;
cctx->compressionLevel = ZSTD_DEFAULT_CLEVEL;
cctx->compressionLevel = ZSTD_CLEVEL_DEFAULT;
return cctx;
@ -202,24 +202,18 @@ size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned
static void ZSTD_cLevelToCParams(ZSTD_CCtx* cctx)
if (cctx->compressionLevel==0) return;
if (cctx->compressionLevel==ZSTD_CLEVEL_CUSTOM) return;
cctx->params.cParams = ZSTD_getCParams(cctx->compressionLevel,
cctx->frameContentSize, 0);
cctx->compressionLevel = 0;
cctx->compressionLevel = ZSTD_CLEVEL_CUSTOM;
size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value)
# define CLAMPCHECK(val,min,max) { if ((val<min) | (val>max)) return ERROR(compressionParameter_unsupported); }
# define LOADCPARAMS(cctx) \
if (cctx->compressionLevel!=0) { \
cctx->params.cParams = ZSTD_getCParams( cctx->compressionLevel, \
cctx->frameContentSize, 0); \
cctx->compressionLevel = 0; \
@ -257,7 +251,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v
cctx->params.cParams.searchLog = value;
return 0;
case ZSTD_p_minMatchLength :
case ZSTD_p_minMatch :
if (value == 0) return 0; /* special value : 0 means "don't change anything" */
@ -287,7 +281,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v
cctx->params.fParams.contentSizeFlag = value>0;
return 0;
case ZSTD_p_contentChecksumFlag : /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
case ZSTD_p_checksumFlag : /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
cctx->params.fParams.checksumFlag = value>0;
return 0;
@ -319,6 +313,24 @@ ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long lo
return 0;
ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
ZSTD_freeCDict(cctx->cdictLocal); /* in case one already exists */
if (dict==NULL || dictSize==0) { /* no dictionary mode */
cctx->cdictLocal = NULL;
cctx->cdict = NULL;
} else {
cctx->cdictLocal = ZSTD_createCDict_advanced(
dict, dictSize,
0 /* byReference */,
cctx->params.cParams, cctx->customMem);
cctx->cdict = cctx->cdictLocal;
if (cctx->cdictLocal == NULL)
return ERROR(memory_allocation);
return 0;
/* Not ready yet ! */
ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize)
@ -3306,8 +3318,6 @@ size_t ZSTD_CStreamOutSize(void)
static size_t ZSTD_resetCStream_internal(ZSTD_CStream* zcs, ZSTD_parameters params, unsigned long long pledgedSrcSize)
if (zcs->inBuffSize==0) return ERROR(stage_wrong); /* zcs has not been init at least once => can't reset */
DEBUGLOG(5, "ZSTD_resetCStream_internal : dictIDFlag == %u \n", !zcs->params.fParams.noDictIDFlag);
if (zcs->cdict) CHECK_F(ZSTD_compressBegin_usingCDict_advanced(zcs, zcs->cdict, params.fParams, pledgedSrcSize))
@ -3327,7 +3337,9 @@ size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
ZSTD_parameters params = zcs->params;
params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
DEBUGLOG(5, "ZSTD_resetCStream : dictIDFlag == %u \n", !zcs->params.fParams.noDictIDFlag);
if (zcs->compressionLevel != ZSTD_CLEVEL_CUSTOM) {
params.cParams = ZSTD_getCParams(zcs->compressionLevel, pledgedSrcSize, 0 /* dictSize */);
return ZSTD_resetCStream_internal(zcs, params, pledgedSrcSize);
@ -3437,8 +3449,6 @@ size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
/*====== Compression ======*/
typedef enum { zsf_gather, zsf_flush, zsf_end } ZSTD_flush_e;
MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
size_t const length = MIN(dstCapacity, srcSize);
@ -3449,7 +3459,7 @@ MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src,
static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
void* dst, size_t* dstCapacityPtr,
const void* src, size_t* srcSizePtr,
ZSTD_flush_e const flush)
ZSTD_EndDirective const flush)
U32 someMoreWork = 1;
const char* const istart = (const char*)src;
@ -3460,10 +3470,12 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
char* op = ostart;
DEBUGLOG(5, "ZSTD_compressStream_generic \n");
assert(zcs->inBuff != NULL);
assert(zcs->outBuff!= NULL);
while (someMoreWork) {
case zcss_init: return ERROR(init_missing); /* call ZBUFF_compressInit() first ! */
case zcss_init: return ERROR(init_missing); /* call ZSTD_initCStream() first ! */
case zcss_load:
/* complete inBuffer */
@ -3485,12 +3497,12 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
cDst = op; /* compress directly into output buffer (avoid flush stage) */
cDst = zcs->outBuff, oSize = zcs->outBuffSize;
cSize = (flush == zsf_end) ?
cSize = (flush == ZSTD_e_end) ?
ZSTD_compressEnd(zcs, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize) :
ZSTD_compressContinue(zcs, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize);
if (ZSTD_isError(cSize)) return cSize;
DEBUGLOG(5, "cSize = %u \n", (U32)cSize);
if (flush == zsf_end) zcs->frameEnded = 1;
if (flush == ZSTD_e_end) zcs->frameEnded = 1;
/* prepare next block */
zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
if (zcs->inBuffTarget > zcs->inBuffSize)
@ -3538,14 +3550,60 @@ size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuf
size_t sizeRead = input->size - input->pos;
size_t sizeWritten = output->size - output->pos;
size_t const result = ZSTD_compressStream_generic(zcs,
(char*)(output->dst) + output->pos, &sizeWritten,
(const char*)(input->src) + input->pos, &sizeRead, zsf_gather);
size_t const result = ZSTD_compressStream_generic(
(char*)(output->dst) + output->pos, &sizeWritten,
(const char*)(input->src) + input->pos, &sizeRead, ZSTD_e_continue);
input->pos += sizeRead;
output->pos += sizeWritten;
return result;
size_t ZSTD_compress_generic_integral (
ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity, size_t* dstPos,
const void* src, size_t srcSize, size_t* srcPos,
ZSTD_EndDirective endOp)
/* check conditions */
if (*dstPos > dstCapacity) return ERROR(GENERIC);
if (*srcPos > srcSize) return ERROR(GENERIC);
if (cctx->streamStage == zcss_init) {
/* transparent reset */
ZSTD_parameters params = cctx->params;
if (cctx->compressionLevel != ZSTD_CLEVEL_CUSTOM)
params.cParams = ZSTD_getCParams(cctx->compressionLevel,
cctx->frameContentSize, 0 /* dictSize */);
DEBUGLOG(5, "starting ZSTD_resetCStream");
CHECK_F( ZSTD_initCStream_stage2(cctx, params, cctx->frameContentSize) );
{ size_t sizeRead = srcSize - *srcPos;
size_t sizeWritten = dstCapacity - *dstPos;
DEBUGLOG(5, "starting ZSTD_compressStream_generic");
CHECK_F( ZSTD_compressStream_generic(cctx,
(char*)dst + *dstPos, &sizeWritten,
(const char*)src + *srcPos, &sizeRead, endOp) );
*srcPos += sizeRead;
*dstPos += sizeWritten;
DEBUGLOG(5, "completing ZSTD_compress_generic_integral");
return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */
size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
ZSTD_outBuffer* output,
ZSTD_inBuffer* input,
ZSTD_EndDirective endOp)
return ZSTD_compress_generic_integral(cctx,
output->dst, output->size, &output->pos,
input->src, input->size, &input->pos,
/*====== Finalize ======*/
@ -3556,9 +3614,9 @@ size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
size_t srcSize = 0;
size_t sizeWritten = output->size - output->pos;
size_t const result = ZSTD_compressStream_generic(zcs,
(char*)(output->dst) + output->pos, &sizeWritten,
&srcSize, &srcSize, /* use a valid src address instead of NULL */
(char*)(output->dst) + output->pos, &sizeWritten,
&srcSize, &srcSize, /* use a valid src address instead of NULL */
output->pos += sizeWritten;
if (ZSTD_isError(result)) return result;
return zcs->outBuffContentSize - zcs->outBuffFlushedSize; /* remaining to flush */
@ -3576,8 +3634,10 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
/* flush whatever remains */
size_t srcSize = 0;
size_t sizeWritten = output->size - output->pos;
size_t const notEnded = ZSTD_compressStream_generic(zcs, ostart, &sizeWritten,
&srcSize /* use a valid src address instead of NULL */, &srcSize, zsf_end);
size_t const notEnded = ZSTD_compressStream_generic(zcs,
ostart, &sizeWritten,
&srcSize /* valid address */, &srcSize,
size_t const remainingToFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
op += sizeWritten;
if (remainingToFlush) {
@ -3727,7 +3787,7 @@ ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long l
size_t const addedSize = srcSize ? 0 : 500;
U64 const rSize = srcSize+dictSize ? srcSize+dictSize+addedSize : (U64)-1;
U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */
if (compressionLevel <= 0) compressionLevel = 1; /* 0 == default; no negative compressionLevel yet */
if (compressionLevel <= 0) compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default; no negative compressionLevel yet */
if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL;
cp = ZSTD_defaultCParameters[tableID][compressionLevel];
if (MEM_32bits()) { /* auto-correction, for 32-bits mode */
@ -616,7 +616,7 @@ typedef enum {
* More attempts result in better and slower compression.
* This parameter is useless when using "fast" and "dFast" strategies.
* Special: value 0 means "do not change searchLog". */
ZSTD_p_minMatchLength, /* Minimum match size (except for repeat-matches, which limit is hard-coded).
ZSTD_p_minMatch, /* Minimum size of searched matches (note : repCode matches can be smaller).
* Larger values make faster compression and decompression, but decrease ratio.
* Note that currently, for all strategies < btopt, effective minimum is 4.
@ -641,7 +641,7 @@ typedef enum {
/* frame parameters */
ZSTD_p_contentSizeFlag=200, /* Content size is written into frame header _whenever known_ (default:1) */
ZSTD_p_contentChecksumFlag, /* A 32-bits content checksum is calculated and written at end of frame (default:0) */
ZSTD_p_checksumFlag, /* A 32-bits checksum of content is written at end of frame (default:0) */
ZSTD_p_dictIDFlag, /* When applicable, dictID of dictionary is provided in frame header (default:1) */
/* dictionary parameters */
@ -688,17 +688,32 @@ ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param
* this value is overriden by srcSize instead. */
ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
/*! ZSTD_CCtx_loadDictionary() :
* Create an internal CDict from dict buffer.
* Decompression will have to use same buffer.
* @result : 0, or an error code (which can be tested with ZSTD_isError()).
* Special : Adding a NULL (or 0-size) dictionary invalidates any previous prefix,
* meaning "return to no-dictionary mode".
* Note 1 : Dictionary content will be copied internally,
* except if ZSTD_p_refDictContent is set.
* Note 2 : Loading a dictionary involves building tables, which are dependent on compression parameters.
* For this reason, compression parameters cannot be changed anymore after loading a prefix.
* It's also a CPU-heavy operation, with non-negligible impact on latency.
* Note 3 : Dictionary will be used for all future compression jobs.
* To return to "no-dictionary" situation, load a NULL dictionary */
ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); /* Not ready yet ! */
/*! ZSTD_CCtx_refPrefix() :
* Reference a prefix (content-only dictionary) to bootstrap next compression job.
* Decompression will have to use same prefix.
* @result : 0, or an error code (which can be tested with ZSTD_isError()).
* Special : Adding a NULL (or 0-size) dictionary invalidates any previous prefix, meaning "return to no-dictionary mode".
* Note 1 : Prefix content is referenced. It must outlive compression job.
* Note 1 : Prefix buffer is referenced. It must outlive compression job.
* Note 3 : Prefix is only used once. Tables are discarded at end of compression job.
* If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_CDict.
* Note 2 : Referencing a prefix involves building tables, which are dependent on compression parameters.
* For this reason, compression parameters cannot be changed anymore after loading a prefix.
* It's also a CPU-heavy operation, with non-negligible impact on latency.
* Note 3 : Prefix is only used once. Tables are discarded at end of compression job.
* If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_CDict */
* It's also a CPU-heavy operation, with non-negligible impact on latency. */
ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize); /* Not ready yet ! */
@ -730,13 +745,11 @@ ZSTDLIB_API size_t ZSTD_CDict_loadDictionary(ZSTD_CDict* cdict, const void* dict
ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /* Not ready yet ! */
// Target advanced compression API
// Not ready yet !!!
typedef enum {
ZSTD_e_continue, /* continue sending data, encoder transparently decides when to output result, depending on optimal conditions */
ZSTD_e_flush, /* flush any data provided, compressed and buffered so far - frame will continue, future data can still reference previous data for better compression */
ZSTD_e_end /* flush any remaining data and ends current frame. Any future compression starts a new frame. */
ZSTD_e_continue=0, /* collect more data, encoder transparently decides when to output result, for optimal conditions */
ZSTD_e_flush, /* flush any data provided so far - frame will continue, future data can still reference previous data for better compression */
ZSTD_e_end /* flush any remaining data and ends current frame. Any future compression starts a new frame. */
} ZSTD_EndDirective;
/*! ZSTD_compressStream_generic() :
@ -753,19 +766,18 @@ typedef enum {
* It is necessary to fully flush internal buffers
* before changing compression parameters or start a new compression job.
// Not ready yet !!!
ZSTDLIB_API size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity, size_t* dstPos,
const void* src, size_t srcSize, size_t* srcPos,
ZSTD_outBuffer* output,
ZSTD_inBuffer* input,
ZSTD_EndDirective endOp);
/*! ZSTD_CCtx_reset() :
* Return a CCtx to clean state.
* Useful after an error, or to interrupt an ongoing compression job and start a new one.
* It's allowed to change compression parameters after a reset.
* It's possible to modify compression parameters after a reset.
* Any internal data not yet flushed is cancelled.
ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx);
ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx); /* Not ready yet ! */
@ -78,9 +78,6 @@
#define FIO_FRAMEHEADERSIZE 5 /* as a define, because needed to allocated table on stack */
#define CACHELINE 64
#define DICTSIZE_MAX (32 MB) /* protection against large input (attack scenario) */
@ -106,6 +103,31 @@ static clock_t g_time = 0;
#define MIN(a,b) ((a) < (b) ? (a) : (b))
* Errors
#ifndef DEBUG
# define DEBUG 0
#define EXM_THROW(error, ...) \
{ \
DISPLAYLEVEL(1, "zstd: "); \
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
DISPLAYLEVEL(1, "error %i : ", error); \
DISPLAYLEVEL(1, " \n"); \
exit(error); \
#define CHECK(f) { \
size_t const err = f; \
if (ZSTD_isError(err)) { \
DEBUGOUTPUT("%s \n", #f); \
EXM_THROW(11, "%s", ZSTD_getErrorName(err)); \
} }
/* ************************************************************
* Avoid fseek()'s 2GiB barrier with MSVC, MacOS, *BSD, MinGW
@ -145,7 +167,7 @@ static FIO_compressionType_t g_compressionType = FIO_zstdCompression;
void FIO_setCompressionType(FIO_compressionType_t compressionType) { g_compressionType = compressionType; }
static U32 g_overwrite = 0;
void FIO_overwriteMode(void) { g_overwrite=1; }
static U32 g_sparseFileSupport = 1; /* 0 : no sparse allowed; 1: auto (file yes, stdout no); 2: force sparse */
static U32 g_sparseFileSupport = 1; /* 0: no sparse allowed; 1: auto (file yes, stdout no); 2: force sparse */
void FIO_setSparseWrite(unsigned sparse) { g_sparseFileSupport=sparse; }
static U32 g_dictIDFlag = 1;
void FIO_setDictIDFlag(unsigned dictIDFlag) { g_dictIDFlag = dictIDFlag; }
@ -181,23 +203,6 @@ void FIO_setOverlapLog(unsigned overlapLog){
* Exceptions
#ifndef DEBUG
# define DEBUG 0
#define EXM_THROW(error, ...) \
{ \
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
DISPLAYLEVEL(1, "Error %i : ", error); \
DISPLAYLEVEL(1, " \n"); \
exit(error); \
* Functions
@ -225,7 +230,7 @@ static FILE* FIO_openSrcFile(const char* srcFileName)
f = stdin;
} else {
if (!UTIL_isRegFile(srcFileName)) {
if (!UTIL_isRegularFile(srcFileName)) {
DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n",
return NULL;
@ -306,13 +311,13 @@ static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName)
DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
fileHandle = fopen(fileName, "rb");
if (fileHandle==0) EXM_THROW(31, "zstd: %s: %s", fileName, strerror(errno));
if (fileHandle==0) EXM_THROW(31, "%s: %s", fileName, strerror(errno));
fileSize = UTIL_getFileSize(fileName);
if (fileSize > DICTSIZE_MAX)
EXM_THROW(32, "Dictionary file %s is too large (> %u MB)",
fileName, DICTSIZE_MAX >> 20); /* avoid extreme cases */
*bufferPtr = malloc((size_t)fileSize);
if (*bufferPtr==NULL) EXM_THROW(34, "zstd: %s", strerror(errno));
if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno));
{ size_t const readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle);
if (readSize!=fileSize) EXM_THROW(35, "Error reading dictionary file %s", fileName); }
@ -331,7 +336,7 @@ typedef struct {
size_t srcBufferSize;
void* dstBuffer;
size_t dstBufferSize;
#if !defined(ZSTD_NEWAPI) && defined(ZSTD_MULTITHREAD)
ZSTDMT_CCtx* cctx;
ZSTD_CStream* cctx;
@ -339,15 +344,19 @@ typedef struct {
} cRess_t;
static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
U64 srcSize, int srcRegFile,
U64 srcSize, int srcIsRegularFile,
ZSTD_compressionParameters* comprParams) {
cRess_t ress;
memset(&ress, 0, sizeof(ress));
ress.cctx = ZSTD_createCCtx();
if (ress.cctx == NULL)
EXM_THROW(30, "allocation error : can't create ZSTD_CCtx");
#elif defined(ZSTD_MULTITHREAD)
ress.cctx = ZSTDMT_createCCtx(g_nbThreads);
if (ress.cctx == NULL)
EXM_THROW(30, "zstd: allocation error : can't create ZSTD_CStream");
EXM_THROW(30, "allocation error : can't create ZSTDMT_CCtx");
if ((cLevel==ZSTD_maxCLevel()) && (g_overlapLog==FIO_OVERLAP_LOG_NOTSET))
/* use complete window for overlap */
ZSTDMT_setMTCtxParameter(ress.cctx, ZSTDMT_p_overlapSectionLog, 9);
@ -356,22 +365,43 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
ress.cctx = ZSTD_createCStream();
if (ress.cctx == NULL)
EXM_THROW(30, "zstd: allocation error : can't create ZSTD_CStream");
EXM_THROW(30, "allocation error : can't create ZSTD_CStream");
ress.srcBufferSize = ZSTD_CStreamInSize();
ress.srcBuffer = malloc(ress.srcBufferSize);
ress.dstBufferSize = ZSTD_CStreamOutSize();
ress.dstBuffer = malloc(ress.dstBufferSize);
if (!ress.srcBuffer || !ress.dstBuffer)
EXM_THROW(31, "zstd: allocation error : not enough memory");
EXM_THROW(31, "allocation error : not enough memory");
/* dictionary */
{ void* dictBuffer;
size_t const dictBuffSize = FIO_createDictBuffer(&dictBuffer, dictFileName); /* works with dictFileName==NULL */
if (dictFileName && (dictBuffer==NULL))
EXM_THROW(32, "zstd: allocation error : can't create dictBuffer");
EXM_THROW(32, "allocation error : can't create dictBuffer");
//#define ZSTD_NEWAPI
/* frame parameters */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_contentSizeFlag, srcIsRegularFile) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_dictIDFlag, g_dictIDFlag) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_checksumFlag, g_checksumFlag) );
CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) );
/* compression parameters */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionLevel, cLevel) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_windowLog, comprParams->windowLog) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_chainLog, comprParams->chainLog) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_hashLog, comprParams->hashLog) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_searchLog, comprParams->searchLog) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_minMatch, comprParams->searchLength) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_targetLength, comprParams->targetLength) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionStrategy, (U32)comprParams->strategy) );
/* dictionary */
CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, dictBuffer, dictBuffSize) );
#elif defined(ZSTD_MULTITHREAD)
{ ZSTD_parameters params = ZSTD_getParams(cLevel, srcSize, dictBuffSize);
params.fParams.contentSizeFlag = srcRegFile;
params.fParams.contentSizeFlag = srcIsRegularFile;
params.fParams.checksumFlag = g_checksumFlag;
params.fParams.noDictIDFlag = !g_dictIDFlag;
if (comprParams->windowLog) params.cParams.windowLog = comprParams->windowLog;
@ -381,15 +411,24 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
if (comprParams->searchLength) params.cParams.searchLength = comprParams->searchLength;
if (comprParams->targetLength) params.cParams.targetLength = comprParams->targetLength;
if (comprParams->strategy) params.cParams.strategy = (ZSTD_strategy) comprParams->strategy;
{ size_t const errorCode = ZSTDMT_initCStream_advanced(ress.cctx, dictBuffer, dictBuffSize, params, srcSize);
if (ZSTD_isError(errorCode)) EXM_THROW(33, "Error initializing CStream : %s", ZSTD_getErrorName(errorCode));
ZSTDMT_setMTCtxParameter(ress.cctx, ZSTDMT_p_sectionSize, g_blockSize);
CHECK( ZSTDMT_initCStream_advanced(ress.cctx, dictBuffer, dictBuffSize, params, srcSize) );
ZSTDMT_setMTCtxParameter(ress.cctx, ZSTDMT_p_sectionSize, g_blockSize);
{ size_t const errorCode = ZSTD_initCStream_advanced(ress.cctx, dictBuffer, dictBuffSize, params, srcSize);
if (ZSTD_isError(errorCode)) EXM_THROW(33, "Error initializing CStream : %s", ZSTD_getErrorName(errorCode));
{ ZSTD_parameters params = ZSTD_getParams(cLevel, srcSize, dictBuffSize);
params.fParams.contentSizeFlag = srcIsRegularFile;
params.fParams.checksumFlag = g_checksumFlag;
params.fParams.noDictIDFlag = !g_dictIDFlag;
if (comprParams->windowLog) params.cParams.windowLog = comprParams->windowLog;
if (comprParams->chainLog) params.cParams.chainLog = comprParams->chainLog;
if (comprParams->hashLog) params.cParams.hashLog = comprParams->hashLog;
if (comprParams->searchLog) params.cParams.searchLog = comprParams->searchLog;
if (comprParams->searchLength) params.cParams.searchLength = comprParams->searchLength;
if (comprParams->targetLength) params.cParams.targetLength = comprParams->targetLength;
if (comprParams->strategy) params.cParams.strategy = (ZSTD_strategy) comprParams->strategy;
CHECK( ZSTD_initCStream_advanced(ress.cctx, dictBuffer, dictBuffSize, params, srcSize) );
} }
@ -400,7 +439,7 @@ static void FIO_freeCResources(cRess_t ress)
#if !defined(ZSTD_NEWAPI) && defined(ZSTD_MULTITHREAD)
ZSTD_freeCStream(ress.cctx); /* never fails */
@ -705,41 +744,40 @@ static int FIO_compressFilename_internal(cRess_t ress,
/* init */
{ size_t const resetError = ZSTDMT_resetCStream(ress.cctx, fileSize);
/* nothing, reset is implied */
#elif defined(ZSTD_MULTITHREAD)
CHECK( ZSTDMT_resetCStream(ress.cctx, fileSize) );
{ size_t const resetError = ZSTD_resetCStream(ress.cctx, fileSize);
CHECK( ZSTD_resetCStream(ress.cctx, fileSize) );
if (ZSTD_isError(resetError))
EXM_THROW(21, "Error initializing compression : %s",
/* Main compression loop */
while (1) {
/* Fill input Buffer */
size_t const inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile);
ZSTD_inBuffer inBuff = { ress.srcBuffer, inSize, 0 };
if (inSize==0) break;
readsize += inSize;
{ ZSTD_inBuffer inBuff = { ress.srcBuffer, inSize, 0 };
while (inBuff.pos != inBuff.size) {
ZSTD_outBuffer outBuff = { ress.dstBuffer, ress.dstBufferSize, 0 };
size_t const result = ZSTDMT_compressStream(ress.cctx, &outBuff, &inBuff);
while (inBuff.pos != inBuff.size) {
ZSTD_outBuffer outBuff = { ress.dstBuffer, ress.dstBufferSize, 0 };
CHECK( ZSTD_compress_generic(ress.cctx,
&outBuff, &inBuff, ZSTD_e_continue) );
#elif defined(ZSTD_MULTITHREAD)
CHECK( ZSTDMT_compressStream(ress.cctx, &outBuff, &inBuff) );
size_t const result = ZSTD_compressStream(ress.cctx, &outBuff, &inBuff);
CHECK( ZSTD_compressStream(ress.cctx, &outBuff, &inBuff) );
if (ZSTD_isError(result))
EXM_THROW(23, "Compression error : %s ", ZSTD_getErrorName(result));
/* Write compressed stream */
if (outBuff.pos) {
size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile);
if (sizeCheck!=outBuff.pos)
EXM_THROW(25, "Write error : cannot write compressed block into %s", dstFileName);
compressedfilesize += outBuff.pos;
} } }
/* Write compressed stream */
if (outBuff.pos) {
size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile);
if (sizeCheck!=outBuff.pos)
EXM_THROW(25, "Write error : cannot write compressed block into %s", dstFileName);
compressedfilesize += outBuff.pos;
} }
if (g_nbThreads > 1) {
if (!fileSize)
DISPLAYUPDATE(2, "\rRead : %u MB", (U32)(readsize>>20))
@ -762,12 +800,18 @@ static int FIO_compressFilename_internal(cRess_t ress,
{ size_t result = 1;
while (result!=0) { /* note : is there any possibility of endless loop ? */
ZSTD_outBuffer outBuff = { ress.dstBuffer, ress.dstBufferSize, 0 };
ZSTD_inBuffer inBuff = { NULL, 0, 0};
result = ZSTD_compress_generic(ress.cctx,
&outBuff, &inBuff, ZSTD_e_end);
#elif defined(ZSTD_MULTITHREAD)
result = ZSTDMT_endStream(ress.cctx, &outBuff);
result = ZSTD_endStream(ress.cctx, &outBuff);
if (ZSTD_isError(result)) EXM_THROW(26, "Compression error during frame end : %s", ZSTD_getErrorName(result));
if (ZSTD_isError(result))
EXM_THROW(26, "Compression error during frame end : %s",
{ size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile);
if (sizeCheck!=outBuff.pos) EXM_THROW(27, "Write error : cannot write frame end into %s", dstFileName); }
compressedfilesize += outBuff.pos;
@ -856,9 +900,9 @@ int FIO_compressFilename(const char* dstFileName, const char* srcFileName,
clock_t const start = clock();
U64 const srcSize = UTIL_getFileSize(srcFileName);
int const regFile = UTIL_isRegFile(srcFileName);
int const isRegularFile = UTIL_isRegularFile(srcFileName);
cRess_t const ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, regFile, comprParams);
cRess_t const ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, isRegularFile, comprParams);
int const result = FIO_compressFilename_dstFile(ress, dstFileName, srcFileName, compressionLevel);
double const seconds = (double)(clock() - start) / CLOCKS_PER_SEC;
@ -879,8 +923,8 @@ int FIO_compressMultipleFilenames(const char** inFileNamesTable, unsigned nbFile
char* dstFileName = (char*)malloc(FNSPACE);
size_t const suffixSize = suffix ? strlen(suffix) : 0;
U64 const srcSize = (nbFiles != 1) ? 0 : UTIL_getFileSize(inFileNamesTable[0]) ;
int const regFile = (nbFiles != 1) ? 0 : UTIL_isRegFile(inFileNamesTable[0]);
cRess_t ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, regFile, comprParams);
int const isRegularFile = (nbFiles != 1) ? 0 : UTIL_isRegularFile(inFileNamesTable[0]);
cRess_t ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, isRegularFile, comprParams);
/* init */
if (dstFileName==NULL)
@ -958,9 +1002,7 @@ static dRess_t FIO_createDResources(const char* dictFileName)
/* dictionary */
{ void* dictBuffer;
size_t const dictBufferSize = FIO_createDictBuffer(&dictBuffer, dictFileName);
size_t const initError = ZSTD_initDStream_usingDict(ress.dctx, dictBuffer, dictBufferSize);
if (ZSTD_isError(initError))
EXM_THROW(61, "ZSTD_initDStream_usingDict error : %s", ZSTD_getErrorName(initError));
CHECK( ZSTD_initDStream_usingDict(ress.dctx, dictBuffer, dictBufferSize) );
@ -969,10 +1011,7 @@ static dRess_t FIO_createDResources(const char* dictFileName)
static void FIO_freeDResources(dRess_t ress)
size_t const errorCode = ZSTD_freeDStream(ress.dctx);
if (ZSTD_isError(errorCode))
EXM_THROW(69, "Error : can't free ZSTD_DStream context resource : %s",
CHECK( ZSTD_freeDStream(ress.dctx) );
@ -208,7 +208,7 @@ UTIL_STATIC int UTIL_getFileStat(const char* infilename, stat_t *statbuf)
UTIL_STATIC int UTIL_isRegFile(const char* infilename)
UTIL_STATIC int UTIL_isRegularFile(const char* infilename)
stat_t statbuf;
return UTIL_getFileStat(infilename, &statbuf); /* Only need to know whether it is a regular file */
Reference in New Issue
Block a user