mirror of
https://github.com/facebook/zstd.git
synced 2025-03-06 08:49:28 +02:00
spec update : require minimum nb of literals for 4-streams mode
Reported by @shulib : the specification for 4-streams mode doesn't work when the amount of literals to compress is 5 bytes. Extending it, it also doesn't work for sizes 1 or 2. This patch updates the specification and the implementation to require a minimum of 6 literals to trigger or accept the 4-streams mode. The impact is expected to be a no-op : the 4-streams mode is never triggered for such small quantity of literals anyway, since it would be wasteful (it costs ~7.3 bytes more than single-stream mode). An informal lower limit is set at ~256 bytes, so the technical minimum is very far from this limit. This is just meant for completeness of the specification.
This commit is contained in:
parent
40a7188130
commit
6a9c525903
@ -435,7 +435,7 @@ They can be decoded first, and then copied during [Sequence Execution],
|
||||
or they can be decoded on the flow during [Sequence Execution].
|
||||
|
||||
Literals can be stored uncompressed or compressed using Huffman prefix codes.
|
||||
When compressed, an optional tree description can be present,
|
||||
When compressed, a tree description may optionally be present,
|
||||
followed by 1 or 4 streams.
|
||||
|
||||
| `Literals_Section_Header` | [`Huffman_Tree_Description`] | [jumpTable] | Stream1 | [Stream2] | [Stream3] | [Stream4] |
|
||||
@ -510,7 +510,7 @@ Its value is : `Size_Format = (Literals_Section_Header[0]>>2) & 3`
|
||||
`Regenerated_Size = (Literals_Section_Header[0]>>4) + (Literals_Section_Header[1]<<4) + (Literals_Section_Header[2]<<12)`
|
||||
|
||||
Only Stream1 is present for these cases.
|
||||
Note : it's allowed to represent a short value (for example `13`)
|
||||
Note : it's allowed to represent a short value (for example `27`)
|
||||
using a long format, even if it's less efficient.
|
||||
|
||||
__`Size_Format` for `Compressed_Literals_Block` and `Treeless_Literals_Block`__ :
|
||||
@ -521,19 +521,33 @@ __`Size_Format` for `Compressed_Literals_Block` and `Treeless_Literals_Block`__
|
||||
Both `Regenerated_Size` and `Compressed_Size` use 10 bits (0-1023).
|
||||
`Literals_Section_Header` uses 3 bytes.
|
||||
- `Size_Format` == 01 : 4 streams.
|
||||
Both `Regenerated_Size` and `Compressed_Size` use 10 bits (0-1023).
|
||||
Both `Regenerated_Size` and `Compressed_Size` use 10 bits (6-1023).
|
||||
`Literals_Section_Header` uses 3 bytes.
|
||||
- `Size_Format` == 10 : 4 streams.
|
||||
Both `Regenerated_Size` and `Compressed_Size` use 14 bits (0-16383).
|
||||
Both `Regenerated_Size` and `Compressed_Size` use 14 bits (6-16383).
|
||||
`Literals_Section_Header` uses 4 bytes.
|
||||
- `Size_Format` == 11 : 4 streams.
|
||||
Both `Regenerated_Size` and `Compressed_Size` use 18 bits (0-262143).
|
||||
Both `Regenerated_Size` and `Compressed_Size` use 18 bits (6-262143).
|
||||
`Literals_Section_Header` uses 5 bytes.
|
||||
|
||||
Both `Compressed_Size` and `Regenerated_Size` fields follow __little-endian__ convention.
|
||||
Note: `Compressed_Size` __includes__ the size of the Huffman Tree description
|
||||
_when_ it is present.
|
||||
|
||||
4 streams is superior to 1 stream in decompression speed,
|
||||
by exploiting instruction level parallelism.
|
||||
But it's also more expensive,
|
||||
costing on average ~7.3 bytes more than the 1 stream mode, mostly from the jump table.
|
||||
|
||||
In general, use the 4 streams mode when there are more literals to decode,
|
||||
to favor higher decompression speeds.
|
||||
Beyond 1KB, the 4 streams mode is compulsory anyway.
|
||||
|
||||
Note that a minimum of 6 bytes is required for the 4 streams mode.
|
||||
That's a technical minimum, but it's not recommended to employ the 4 streams mode
|
||||
for such a small quantity, that would be wasteful.
|
||||
A more practical lower bound would be around ~256 bytes.
|
||||
|
||||
#### Raw Literals Block
|
||||
The data in Stream1 is `Regenerated_Size` bytes long,
|
||||
it contains the raw literals data to be used during [Sequence Execution].
|
||||
|
@ -29,6 +29,7 @@ const char* ERR_getErrorString(ERR_enum code)
|
||||
case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
|
||||
case PREFIX(corruption_detected): return "Data corruption detected";
|
||||
case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
|
||||
case PREFIX(literals_headerWrong): return "Header of Literals' block doesn't respect format specification";
|
||||
case PREFIX(parameter_unsupported): return "Unsupported parameter";
|
||||
case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
|
||||
case PREFIX(init_missing): return "Context should be init first";
|
||||
|
@ -88,8 +88,10 @@ HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
|
||||
unsigned maxSymbolValue, unsigned tableLog);
|
||||
|
||||
/** HUF_compress4X_wksp() :
|
||||
* Same as HUF_compress2(), but uses externally allocated `workSpace`.
|
||||
* `workspace` must be at least as large as HUF_WORKSPACE_SIZE */
|
||||
* Same as HUF_compress2(), but uses externally allocated @workSpace.
|
||||
* @workSpace's size, aka @wkspSize, must be >= HUF_WORKSPACE_SIZE
|
||||
* @srcSize must be >= 6
|
||||
*/
|
||||
#define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */)
|
||||
#define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
|
||||
HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
|
||||
|
@ -94,6 +94,7 @@ typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
|
||||
|
||||
#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
|
||||
#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */) /* for a non-null block */
|
||||
#define MIN_LITERALS_FOR_4_STREAMS 6
|
||||
|
||||
typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
|
||||
|
||||
|
@ -164,16 +164,19 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
||||
switch(lhSize)
|
||||
{
|
||||
case 3: /* 2 - 2 - 10 - 10 */
|
||||
if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
|
||||
{ U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
|
||||
MEM_writeLE24(ostart, lhc);
|
||||
break;
|
||||
}
|
||||
case 4: /* 2 - 2 - 14 - 14 */
|
||||
assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
|
||||
{ U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
|
||||
MEM_writeLE32(ostart, lhc);
|
||||
break;
|
||||
}
|
||||
case 5: /* 2 - 2 - 18 - 18 */
|
||||
assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
|
||||
{ U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
|
||||
MEM_writeLE32(ostart, lhc);
|
||||
ostart[4] = (BYTE)(cLitSize >> 10);
|
||||
|
@ -289,10 +289,11 @@ typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1; /* single-symbol decodi
|
||||
static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
|
||||
U64 D4;
|
||||
if (MEM_isLittleEndian()) {
|
||||
D4 = (symbol << 8) + nbBits;
|
||||
D4 = (U64)((symbol << 8) + nbBits);
|
||||
} else {
|
||||
D4 = symbol + (nbBits << 8);
|
||||
D4 = (U64)(symbol + (nbBits << 8));
|
||||
}
|
||||
assert(D4 < (1U << 16));
|
||||
D4 *= 0x0001000100010001ULL;
|
||||
return D4;
|
||||
}
|
||||
@ -383,9 +384,8 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr
|
||||
* rankStart[0] is not filled because there are no entries in the table for
|
||||
* weight 0.
|
||||
*/
|
||||
{
|
||||
int n;
|
||||
int nextRankStart = 0;
|
||||
{ int n;
|
||||
U32 nextRankStart = 0;
|
||||
int const unroll = 4;
|
||||
int const nLimit = (int)nbSymbols - unroll + 1;
|
||||
for (n=0; n<(int)tableLog+1; n++) {
|
||||
@ -412,8 +412,7 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr
|
||||
* We can switch based on the length to a different inner loop which is
|
||||
* optimized for that particular case.
|
||||
*/
|
||||
{
|
||||
U32 w;
|
||||
{ U32 w;
|
||||
int symbol = wksp->rankVal[0];
|
||||
int rankStart = 0;
|
||||
for (w=1; w<tableLog+1; ++w) {
|
||||
@ -525,7 +524,7 @@ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, cons
|
||||
while (p < pEnd)
|
||||
HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
|
||||
|
||||
return pEnd-pStart;
|
||||
return (size_t)(pEnd-pStart);
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE size_t
|
||||
@ -551,6 +550,10 @@ HUF_decompress1X1_usingDTable_internal_body(
|
||||
return dstSize;
|
||||
}
|
||||
|
||||
/* HUF_decompress4X1_usingDTable_internal_body():
|
||||
* Conditions :
|
||||
* @dstSize >= 6
|
||||
*/
|
||||
FORCE_INLINE_TEMPLATE size_t
|
||||
HUF_decompress4X1_usingDTable_internal_body(
|
||||
void* dst, size_t dstSize,
|
||||
@ -594,6 +597,7 @@ HUF_decompress4X1_usingDTable_internal_body(
|
||||
|
||||
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
||||
if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
|
||||
if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */
|
||||
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
||||
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
|
||||
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
|
||||
@ -679,8 +683,7 @@ HUF_decompress4X1_usingDTable_internal_bmi2_asm(
|
||||
const BYTE* const iend = (const BYTE*)cSrc + 6;
|
||||
BYTE* const oend = (BYTE*)dst + dstSize;
|
||||
HUF_DecompressAsmArgs args;
|
||||
{
|
||||
size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
|
||||
{ size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
|
||||
FORWARD_IF_ERROR(ret, "Failed to init asm args");
|
||||
if (ret != 0)
|
||||
return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
|
||||
@ -700,8 +703,7 @@ HUF_decompress4X1_usingDTable_internal_bmi2_asm(
|
||||
(void)iend;
|
||||
|
||||
/* finish bit streams one by one. */
|
||||
{
|
||||
size_t const segmentSize = (dstSize+3) / 4;
|
||||
{ size_t const segmentSize = (dstSize+3) / 4;
|
||||
BYTE* segmentEnd = (BYTE*)dst;
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) {
|
||||
@ -1246,6 +1248,11 @@ HUF_decompress1X2_usingDTable_internal_body(
|
||||
/* decoded size */
|
||||
return dstSize;
|
||||
}
|
||||
|
||||
/* HUF_decompress4X2_usingDTable_internal_body():
|
||||
* Conditions:
|
||||
* @dstSize >= 6
|
||||
*/
|
||||
FORCE_INLINE_TEMPLATE size_t
|
||||
HUF_decompress4X2_usingDTable_internal_body(
|
||||
void* dst, size_t dstSize,
|
||||
@ -1288,6 +1295,7 @@ HUF_decompress4X2_usingDTable_internal_body(
|
||||
|
||||
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
||||
if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
|
||||
if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */
|
||||
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
||||
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
|
||||
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
|
||||
|
@ -166,6 +166,10 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
}
|
||||
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
|
||||
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
|
||||
if (!singleStream)
|
||||
RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
|
||||
"Not enough literals (%zu) for the 4-streams mode (min %u)",
|
||||
litSize, MIN_LITERALS_FOR_4_STREAMS);
|
||||
RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
|
||||
RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
|
||||
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
|
||||
@ -181,6 +185,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
dctx->litBuffer, litSize, istart+lhSize, litCSize,
|
||||
dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
|
||||
} else {
|
||||
assert(litSize >= MIN_LITERALS_FOR_4_STREAMS);
|
||||
hufSuccess = HUF_decompress4X_usingDTable_bmi2(
|
||||
dctx->litBuffer, litSize, istart+lhSize, litCSize,
|
||||
dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
|
||||
@ -509,7 +514,8 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
|
||||
for (i = 8; i < n; i += 8) {
|
||||
MEM_write64(spread + pos + i, sv);
|
||||
}
|
||||
pos += n;
|
||||
assert(n>=0);
|
||||
pos += (size_t)n;
|
||||
}
|
||||
}
|
||||
/* Now we spread those positions across the table.
|
||||
|
@ -70,6 +70,7 @@ typedef enum {
|
||||
ZSTD_error_frameParameter_windowTooLarge = 16,
|
||||
ZSTD_error_corruption_detected = 20,
|
||||
ZSTD_error_checksum_wrong = 22,
|
||||
ZSTD_error_literals_headerWrong = 24,
|
||||
ZSTD_error_dictionary_corrupted = 30,
|
||||
ZSTD_error_dictionary_wrong = 32,
|
||||
ZSTD_error_dictionaryCreation_failed = 34,
|
||||
|
Loading…
x
Reference in New Issue
Block a user