2015-10-18 22:18:32 +01:00
/* ******************************************************************
2020-03-26 15:19:05 -07:00
* huff0 huffman codec ,
* part of Finite State Entropy library
2022-12-20 12:49:47 -05:00
* Copyright ( c ) Meta Platforms , Inc . and affiliates .
2020-03-26 15:19:05 -07:00
*
* You can contact the author at :
* - Source repository : https : //github.com/Cyan4973/FiniteStateEntropy
*
* This source code is licensed under both the BSD - style license ( found in the
* LICENSE file in the root directory of this source tree ) and the GPLv2 ( found
* in the COPYING file in the root directory of this source tree ) .
* You may select , at your option , one of the above - listed licenses .
2015-10-18 22:18:32 +01:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# if defined (__cplusplus)
extern " C " {
# endif
2017-07-12 19:08:24 -07:00
# ifndef HUF_H_298734234
# define HUF_H_298734234
2015-10-18 22:18:32 +01:00
2016-06-05 01:50:33 +02:00
/* *** Dependencies *** */
2020-08-06 20:18:05 -07:00
# include "zstd_deps.h" /* size_t */
2023-01-18 15:30:30 -08:00
# include "mem.h" /* U32 */
# define FSE_STATIC_LINKING_ONLY
# include "fse.h"
2015-10-18 22:18:32 +01:00
2016-12-01 17:47:30 -08:00
/* *** Tool functions *** */
2023-01-18 15:30:30 -08:00
# define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */
size_t HUF_compressBound ( size_t size ) ; /**< maximum compressed size (worst case) */
2015-10-18 22:18:32 +01:00
/* Error Management */
2023-01-18 15:30:30 -08:00
unsigned HUF_isError ( size_t code ) ; /**< tells if a return value is an error code */
const char * HUF_getErrorName ( size_t code ) ; /**< provides error code string (useful for debugging) */
2015-10-18 22:18:32 +01:00
2021-07-08 15:48:57 -04:00
# define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */ )
2021-07-12 11:57:01 -07:00
# define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
2016-06-05 00:42:28 +02:00
2016-06-05 01:33:55 +02:00
/* *** Constants *** */
2021-09-17 11:43:04 -07:00
# define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */
2018-02-26 14:52:23 -08:00
# define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */
2017-03-05 21:07:20 -08:00
# define HUF_SYMBOLVALUE_MAX 255
2021-09-17 11:43:04 -07:00
# define HUF_TABLELOG_ABSOLUTEMAX 12 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
2016-06-05 01:33:55 +02:00
# if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
# error "HUF_TABLELOG_MAX is too large !"
# endif
2016-06-05 00:42:28 +02:00
/* ****************************************
* Static allocation
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/* HUF buffer bounds */
# define HUF_CTABLEBOUND 129
2017-04-28 12:46:48 -07:00
# define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true when incompressible is pre-filtered with fast heuristic */
2016-06-05 00:42:28 +02:00
# define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
/* static allocation of HUF's Compression Table */
2020-12-04 16:43:19 -08:00
/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
2021-07-12 11:57:01 -07:00
typedef size_t HUF_CElt ; /* consider it an incomplete type */
# define HUF_CTABLE_SIZE_ST(maxSymbolValue) ((maxSymbolValue)+2) /* Use tables of size_t, for proper alignment */
# define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_ST(maxSymbolValue) * sizeof(size_t))
2016-06-05 00:42:28 +02:00
# define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
2021-07-12 11:57:01 -07:00
HUF_CElt name [ HUF_CTABLE_SIZE_ST ( maxSymbolValue ) ] /* no final ; */
2016-06-05 00:42:28 +02:00
/* static allocation of HUF's DTable */
2016-06-11 00:23:43 +02:00
typedef U32 HUF_DTable ;
2016-06-08 11:11:02 +02:00
# define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog)))
2018-06-14 15:08:43 -04:00
# define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \
2016-12-01 18:24:59 -08:00
HUF_DTable DTable [ HUF_DTABLE_SIZE ( ( maxTableLog ) - 1 ) ] = { ( ( U32 ) ( ( maxTableLog ) - 1 ) * 0x01000001 ) }
2018-06-14 15:17:03 -04:00
# define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
2016-12-01 18:24:59 -08:00
HUF_DTable DTable [ HUF_DTABLE_SIZE ( maxTableLog ) ] = { ( ( U32 ) ( maxTableLog ) * 0x01000001 ) }
2016-06-05 00:42:28 +02:00
/* ****************************************
* Advanced decompression functions
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2016-06-11 01:46:03 +02:00
2023-01-18 16:30:10 -08:00
/**
* Huffman flags bitset .
* For all flags , 0 is the default value .
*/
typedef enum {
/**
* If compiled with DYNAMIC_BMI2 : Set flag only if the CPU supports BMI2 at runtime .
* Otherwise : Ignored .
*/
HUF_flags_bmi2 = ( 1 < < 0 ) ,
/**
* If set : Test possible table depths to find the one that produces the smallest header + encoded size .
* If unset : Use heuristic to find the table depth .
*/
HUF_flags_optimalDepth = ( 1 < < 1 ) ,
/**
* If set : If the previous table can encode the input , always reuse the previous table .
* If unset : If the previous table can encode the input , reuse the previous table if it results in a smaller output .
*/
HUF_flags_preferRepeat = ( 1 < < 2 ) ,
/**
* If set : Sample the input and check if the sample is uncompressible , if it is then don ' t attempt to compress .
* If unset : Always histogram the entire input .
*/
HUF_flags_suspectUncompressible = ( 1 < < 3 ) ,
/**
* If set : Don ' t use assembly implementations
* If unset : Allow using assembly implementations
*/
2023-01-13 16:34:52 -08:00
HUF_flags_disableAsm = ( 1 < < 4 ) ,
/**
* If set : Don ' t use the fast decoding loop , always use the fallback decoding loop .
* If unset : Use the fast decoding loop when possible .
*/
HUF_flags_disableFast = ( 1 < < 5 )
2023-01-18 16:30:10 -08:00
} HUF_flags_e ;
2016-06-05 00:42:28 +02:00
/* ****************************************
2018-03-13 13:44:10 -07:00
* HUF detailed API
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2022-10-14 11:29:32 -07:00
# define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btultra
2018-03-13 13:44:10 -07:00
/*! HUF_compress() does the following:
* 1. count symbol occurrence from source [ ] into table count [ ] using FSE_count ( ) ( exposed within " fse.h " )
* 2. ( optional ) refine tableLog using HUF_optimalTableLog ( )
* 3. build Huffman table from count using HUF_buildCTable ( )
* 4. save Huffman table to memory buffer using HUF_writeCTable ( )
* 5. encode the data stream using HUF_compress4X_usingCTable ( )
*
* The following API allows targeting specific sub - functions for advanced tasks .
* For example , it ' s possible to compress several blocks using the same ' CTable ' ,
* or to save and regenerate ' CTable ' using external methods .
*/
2022-10-17 08:03:44 -07:00
unsigned HUF_minTableLog ( unsigned symbolCardinality ) ;
2022-10-14 10:37:35 -07:00
unsigned HUF_cardinality ( const unsigned * count , unsigned maxSymbolValue ) ;
2022-10-14 13:37:19 -07:00
unsigned HUF_optimalTableLog ( unsigned maxTableLog , size_t srcSize , unsigned maxSymbolValue , void * workSpace ,
2023-01-18 16:30:10 -08:00
size_t wkspSize , HUF_CElt * table , const unsigned * count , int flags ) ; /* table is used as scratch space for building and testing tables, not a return value */
2020-12-21 10:23:08 -08:00
size_t HUF_writeCTable_wksp ( void * dst , size_t maxDstSize , const HUF_CElt * CTable , unsigned maxSymbolValue , unsigned huffLog , void * workspace , size_t workspaceSize ) ;
2023-01-18 16:30:10 -08:00
size_t HUF_compress4X_usingCTable ( void * dst , size_t dstSize , const void * src , size_t srcSize , const HUF_CElt * CTable , int flags ) ;
2019-11-05 15:51:25 -05:00
size_t HUF_estimateCompressedSize ( const HUF_CElt * CTable , const unsigned * count , unsigned maxSymbolValue ) ;
2020-05-01 16:11:47 -07:00
int HUF_validateCTable ( const HUF_CElt * CTable , const unsigned * count , unsigned maxSymbolValue ) ;
2016-06-05 00:42:28 +02:00
Allow compressor to repeat Huffman tables
* Compressor saves most recently used Huffman table and reuses it
if it produces better results.
* I attempted to preserve CPU usage profile.
I intentionally left all of the existing heuristics in place.
There is only a speed difference on the second block and later.
When compressing large enough blocks (say >= 4 KiB) there is
no significant difference in compression speed.
Dictionary compression of one block is the same speed for blocks
with literals <= 1 KiB, and after that the difference is not
very significant.
* In the synthetic data, with blocks 10 KB or smaller, most blocks
can't use repeated tables because the previous block did not
contain a symbol that the current block contains.
Once blocks are about 12 KB or more, most previous blocks have
valid Huffman tables for the current block, and the compression
ratio and decompression speed jumped.
* In silesia blocks as small as 4KB can frequently reuse the
previous Huffman table (85%), but it isn't as profitable, and
the previous Huffman table only gets used about 3% of the time.
* Microbenchmarks show that `HUF_validateCTable()` takes ~55 ns
and `HUF_estimateCompressedSize()` takes ~35 ns.
They are decently well optimized, the first versions took 90 ns
and 120 ns respectively. `HUF_validateCTable()` could be twice as
fast, if we cast the `HUF_CElt*` to a `U32*` and compare to 0.
However, `U32` has an alignment of 4 instead of 2, so I think that
might be undefined behavior.
* I've ran `zstreamtest` compiled normally, with UASAN and with MSAN
for 4 hours each.
The worst case for the speed difference is a bunch of small blocks
in the same frame. I modified `bench.c` to compress the input in a
single frame but with blocks of the given block size, set by `-B`.
Benchmarks on level 1:
| Program | Block size | Corpus | Ratio | Compression MB/s | Decompression MB/s |
|-----------|------------|-----------|-------|------------------|--------------------|
| zstd.base | 256 | synthetic | 2.364 | 110.0 | 297.0 |
| zstd | 256 | synthetic | 2.367 | 108.9 | 297.0 |
| zstd.base | 256 | silesia | 2.204 | 93.8 | 415.7 |
| zstd | 256 | silesia | 2.204 | 93.4 | 415.7 |
| zstd.base | 512 | synthetic | 2.594 | 144.2 | 420.0 |
| zstd | 512 | synthetic | 2.599 | 141.5 | 425.7 |
| zstd.base | 512 | silesia | 2.358 | 118.4 | 432.6 |
| zstd | 512 | silesia | 2.358 | 119.8 | 432.6 |
| zstd.base | 1024 | synthetic | 2.790 | 192.3 | 594.1 |
| zstd | 1024 | synthetic | 2.794 | 192.3 | 600.0 |
| zstd.base | 1024 | silesia | 2.524 | 148.2 | 464.2 |
| zstd | 1024 | silesia | 2.525 | 148.2 | 467.6 |
| zstd.base | 4096 | synthetic | 3.023 | 300.0 | 1000.0 |
| zstd | 4096 | synthetic | 3.024 | 300.0 | 1010.1 |
| zstd.base | 4096 | silesia | 2.779 | 223.1 | 623.5 |
| zstd | 4096 | silesia | 2.779 | 223.1 | 636.0 |
| zstd.base | 16384 | synthetic | 3.131 | 350.0 | 1150.1 |
| zstd | 16384 | synthetic | 3.152 | 350.0 | 1630.3 |
| zstd.base | 16384 | silesia | 2.871 | 296.5 | 883.3 |
| zstd | 16384 | silesia | 2.872 | 294.4 | 898.3 |
2017-03-01 17:51:56 -08:00
typedef enum {
HUF_repeat_none , /**< Cannot use the previous table */
HUF_repeat_check , /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
2018-05-08 15:37:06 -07:00
HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */
Allow compressor to repeat Huffman tables
* Compressor saves most recently used Huffman table and reuses it
if it produces better results.
* I attempted to preserve CPU usage profile.
I intentionally left all of the existing heuristics in place.
There is only a speed difference on the second block and later.
When compressing large enough blocks (say >= 4 KiB) there is
no significant difference in compression speed.
Dictionary compression of one block is the same speed for blocks
with literals <= 1 KiB, and after that the difference is not
very significant.
* In the synthetic data, with blocks 10 KB or smaller, most blocks
can't use repeated tables because the previous block did not
contain a symbol that the current block contains.
Once blocks are about 12 KB or more, most previous blocks have
valid Huffman tables for the current block, and the compression
ratio and decompression speed jumped.
* In silesia blocks as small as 4KB can frequently reuse the
previous Huffman table (85%), but it isn't as profitable, and
the previous Huffman table only gets used about 3% of the time.
* Microbenchmarks show that `HUF_validateCTable()` takes ~55 ns
and `HUF_estimateCompressedSize()` takes ~35 ns.
They are decently well optimized, the first versions took 90 ns
and 120 ns respectively. `HUF_validateCTable()` could be twice as
fast, if we cast the `HUF_CElt*` to a `U32*` and compare to 0.
However, `U32` has an alignment of 4 instead of 2, so I think that
might be undefined behavior.
* I've ran `zstreamtest` compiled normally, with UASAN and with MSAN
for 4 hours each.
The worst case for the speed difference is a bunch of small blocks
in the same frame. I modified `bench.c` to compress the input in a
single frame but with blocks of the given block size, set by `-B`.
Benchmarks on level 1:
| Program | Block size | Corpus | Ratio | Compression MB/s | Decompression MB/s |
|-----------|------------|-----------|-------|------------------|--------------------|
| zstd.base | 256 | synthetic | 2.364 | 110.0 | 297.0 |
| zstd | 256 | synthetic | 2.367 | 108.9 | 297.0 |
| zstd.base | 256 | silesia | 2.204 | 93.8 | 415.7 |
| zstd | 256 | silesia | 2.204 | 93.4 | 415.7 |
| zstd.base | 512 | synthetic | 2.594 | 144.2 | 420.0 |
| zstd | 512 | synthetic | 2.599 | 141.5 | 425.7 |
| zstd.base | 512 | silesia | 2.358 | 118.4 | 432.6 |
| zstd | 512 | silesia | 2.358 | 119.8 | 432.6 |
| zstd.base | 1024 | synthetic | 2.790 | 192.3 | 594.1 |
| zstd | 1024 | synthetic | 2.794 | 192.3 | 600.0 |
| zstd.base | 1024 | silesia | 2.524 | 148.2 | 464.2 |
| zstd | 1024 | silesia | 2.525 | 148.2 | 467.6 |
| zstd.base | 4096 | synthetic | 3.023 | 300.0 | 1000.0 |
| zstd | 4096 | synthetic | 3.024 | 300.0 | 1010.1 |
| zstd.base | 4096 | silesia | 2.779 | 223.1 | 623.5 |
| zstd | 4096 | silesia | 2.779 | 223.1 | 636.0 |
| zstd.base | 16384 | synthetic | 3.131 | 350.0 | 1150.1 |
| zstd | 16384 | synthetic | 3.152 | 350.0 | 1630.3 |
| zstd.base | 16384 | silesia | 2.871 | 296.5 | 883.3 |
| zstd | 16384 | silesia | 2.872 | 294.4 | 898.3 |
2017-03-01 17:51:56 -08:00
} HUF_repeat ;
2022-10-11 13:12:19 -07:00
Allow compressor to repeat Huffman tables
* Compressor saves most recently used Huffman table and reuses it
if it produces better results.
* I attempted to preserve CPU usage profile.
I intentionally left all of the existing heuristics in place.
There is only a speed difference on the second block and later.
When compressing large enough blocks (say >= 4 KiB) there is
no significant difference in compression speed.
Dictionary compression of one block is the same speed for blocks
with literals <= 1 KiB, and after that the difference is not
very significant.
* In the synthetic data, with blocks 10 KB or smaller, most blocks
can't use repeated tables because the previous block did not
contain a symbol that the current block contains.
Once blocks are about 12 KB or more, most previous blocks have
valid Huffman tables for the current block, and the compression
ratio and decompression speed jumped.
* In silesia blocks as small as 4KB can frequently reuse the
previous Huffman table (85%), but it isn't as profitable, and
the previous Huffman table only gets used about 3% of the time.
* Microbenchmarks show that `HUF_validateCTable()` takes ~55 ns
and `HUF_estimateCompressedSize()` takes ~35 ns.
They are decently well optimized, the first versions took 90 ns
and 120 ns respectively. `HUF_validateCTable()` could be twice as
fast, if we cast the `HUF_CElt*` to a `U32*` and compare to 0.
However, `U32` has an alignment of 4 instead of 2, so I think that
might be undefined behavior.
* I've ran `zstreamtest` compiled normally, with UASAN and with MSAN
for 4 hours each.
The worst case for the speed difference is a bunch of small blocks
in the same frame. I modified `bench.c` to compress the input in a
single frame but with blocks of the given block size, set by `-B`.
Benchmarks on level 1:
| Program | Block size | Corpus | Ratio | Compression MB/s | Decompression MB/s |
|-----------|------------|-----------|-------|------------------|--------------------|
| zstd.base | 256 | synthetic | 2.364 | 110.0 | 297.0 |
| zstd | 256 | synthetic | 2.367 | 108.9 | 297.0 |
| zstd.base | 256 | silesia | 2.204 | 93.8 | 415.7 |
| zstd | 256 | silesia | 2.204 | 93.4 | 415.7 |
| zstd.base | 512 | synthetic | 2.594 | 144.2 | 420.0 |
| zstd | 512 | synthetic | 2.599 | 141.5 | 425.7 |
| zstd.base | 512 | silesia | 2.358 | 118.4 | 432.6 |
| zstd | 512 | silesia | 2.358 | 119.8 | 432.6 |
| zstd.base | 1024 | synthetic | 2.790 | 192.3 | 594.1 |
| zstd | 1024 | synthetic | 2.794 | 192.3 | 600.0 |
| zstd.base | 1024 | silesia | 2.524 | 148.2 | 464.2 |
| zstd | 1024 | silesia | 2.525 | 148.2 | 467.6 |
| zstd.base | 4096 | synthetic | 3.023 | 300.0 | 1000.0 |
| zstd | 4096 | synthetic | 3.024 | 300.0 | 1010.1 |
| zstd.base | 4096 | silesia | 2.779 | 223.1 | 623.5 |
| zstd | 4096 | silesia | 2.779 | 223.1 | 636.0 |
| zstd.base | 16384 | synthetic | 3.131 | 350.0 | 1150.1 |
| zstd | 16384 | synthetic | 3.152 | 350.0 | 1630.3 |
| zstd.base | 16384 | silesia | 2.871 | 296.5 | 883.3 |
| zstd | 16384 | silesia | 2.872 | 294.4 | 898.3 |
2017-03-01 17:51:56 -08:00
/** HUF_compress4X_repeat() :
2018-03-13 13:44:10 -07:00
* Same as HUF_compress4X_wksp ( ) , but considers using hufTable if * repeat ! = HUF_repeat_none .
* If it uses hufTable it does not modify hufTable or repeat .
* If it doesn ' t , it sets * repeat = HUF_repeat_none , and it sets hufTable to the table used .
2021-06-28 10:06:20 -04:00
* If preferRepeat then the old table will always be used if valid .
* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
2018-03-13 13:44:10 -07:00
size_t HUF_compress4X_repeat ( void * dst , size_t dstSize ,
const void * src , size_t srcSize ,
unsigned maxSymbolValue , unsigned tableLog ,
void * workSpace , size_t wkspSize , /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
2023-01-18 16:30:10 -08:00
HUF_CElt * hufTable , HUF_repeat * repeat , int flags ) ;
2016-06-05 00:42:28 +02:00
2016-12-01 17:47:30 -08:00
/** HUF_buildCTable_wksp() :
* Same as HUF_buildCTable ( ) , but using externally allocated scratch buffer .
2018-03-13 13:44:10 -07:00
* ` workSpace ` must be aligned on 4 - bytes boundaries , and its size must be > = HUF_CTABLE_WORKSPACE_SIZE .
2016-12-01 17:47:30 -08:00
*/
2023-01-18 15:30:30 -08:00
# define HUF_CTABLE_WORKSPACE_SIZE_U32 ((4 * (HUF_SYMBOLVALUE_MAX + 1)) + 192)
2018-03-13 13:44:10 -07:00
# define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
2018-05-08 15:37:06 -07:00
size_t HUF_buildCTable_wksp ( HUF_CElt * tree ,
fix confusion between unsigned <-> U32
as suggested in #1441.
generally U32 and unsigned are the same thing,
except when they are not ...
case : 32-bit compilation for MIPS (uint32_t == unsigned long)
A vast majority of transformation consists in transforming U32 into unsigned.
In rare cases, it's the other way around (typically for internal code, such as seeds).
Among a few issues this patches solves :
- some parameters were declared with type `unsigned` in *.h,
but with type `U32` in their implementation *.c .
- some parameters have type unsigned*,
but the caller user a pointer to U32 instead.
These fixes are useful.
However, the bulk of changes is about %u formating,
which requires unsigned type,
but generally receives U32 values instead,
often just for brevity (U32 is shorter than unsigned).
These changes are generally minor, or even annoying.
As a consequence, the amount of code changed is larger than I would expect for such a patch.
Testing is also a pain :
it requires manually modifying `mem.h`,
in order to lie about `U32`
and force it to be an `unsigned long` typically.
On a 64-bit system, this will break the equivalence unsigned == U32.
Unfortunately, it will also break a few static_assert(), controlling structure sizes.
So it also requires modifying `debug.h` to make `static_assert()` a noop.
And then reverting these changes.
So it's inconvenient, and as a consequence,
this property is currently not checked during CI tests.
Therefore, these problems can emerge again in the future.
I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests.
It's another restriction for coding, adding more frustration during merge tests,
since most platforms don't need this distinction (hence contributor will not see it),
and while this can matter in theory, the number of platforms impacted seems minimal.
Thoughts ?
2018-12-21 16:19:44 -08:00
const unsigned * count , U32 maxSymbolValue , U32 maxNbBits ,
2018-05-08 15:37:06 -07:00
void * workSpace , size_t wkspSize ) ;
2016-12-01 17:47:30 -08:00
2016-06-08 11:11:02 +02:00
/*! HUF_readStats() :
2018-03-13 13:44:10 -07:00
* Read compact Huffman tree , saved by HUF_writeCTable ( ) .
* ` huffWeight ` is destination buffer .
* @ return : size read from ` src ` , or an error Code .
* Note : Needed by HUF_readCTable ( ) and HUF_readDTableXn ( ) . */
2018-02-26 14:52:23 -08:00
size_t HUF_readStats ( BYTE * huffWeight , size_t hwSize ,
U32 * rankStats , U32 * nbSymbolsPtr , U32 * tableLogPtr ,
2016-06-08 11:11:02 +02:00
const void * src , size_t srcSize ) ;
2020-08-16 22:22:33 -07:00
/*! HUF_readStats_wksp() :
* Same as HUF_readStats ( ) but takes an external workspace which must be
* 4 - byte aligned and its size must be > = HUF_READ_STATS_WORKSPACE_SIZE .
2020-08-17 13:44:49 -07:00
* If the CPU has BMI2 support , pass bmi2 = 1 , otherwise pass bmi2 = 0.
2020-08-16 22:22:33 -07:00
*/
# define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1)
# define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned))
size_t HUF_readStats_wksp ( BYTE * huffWeight , size_t hwSize ,
U32 * rankStats , U32 * nbSymbolsPtr , U32 * tableLogPtr ,
const void * src , size_t srcSize ,
2020-08-17 13:44:49 -07:00
void * workspace , size_t wkspSize ,
2023-01-18 16:30:10 -08:00
int flags ) ;
2020-08-16 22:22:33 -07:00
2016-06-08 11:11:02 +02:00
/** HUF_readCTable() :
2018-03-13 13:44:10 -07:00
* Loading a CTable saved with HUF_writeCTable ( ) */
2019-11-26 12:24:19 -08:00
size_t HUF_readCTable ( HUF_CElt * CTable , unsigned * maxSymbolValuePtr , const void * src , size_t srcSize , unsigned * hasZeroWeights ) ;
2016-06-08 11:11:02 +02:00
2021-07-12 11:57:01 -07:00
/** HUF_getNbBitsFromCTable() :
2018-05-08 15:37:06 -07:00
* Read nbBits from CTable symbolTable , for symbol ` symbolValue ` presumed < = HUF_SYMBOLVALUE_MAX
2023-08-24 14:41:21 -07:00
* Note 1 : If symbolValue > HUF_readCTableHeader ( symbolTable ) . maxSymbolValue , returns 0
* Note 2 : is not inlined , as HUF_CElt definition is private
*/
2021-07-12 11:57:01 -07:00
U32 HUF_getNbBitsFromCTable ( const HUF_CElt * symbolTable , U32 symbolValue ) ;
2016-06-08 11:11:02 +02:00
2023-08-24 14:41:21 -07:00
typedef struct {
BYTE tableLog ;
BYTE maxSymbolValue ;
BYTE unused [ sizeof ( size_t ) - 2 ] ;
} HUF_CTableHeader ;
/** HUF_readCTableHeader() :
* @ returns The header from the CTable specifying the tableLog and the maxSymbolValue .
*/
HUF_CTableHeader HUF_readCTableHeader ( HUF_CElt const * ctable ) ;
2016-06-08 11:11:02 +02:00
/*
2018-03-13 13:44:10 -07:00
* HUF_decompress ( ) does the following :
2018-06-14 15:17:03 -04:00
* 1. select the decompression algorithm ( X1 , X2 ) based on pre - computed heuristics
2018-03-13 13:44:10 -07:00
* 2. build Huffman table from save , using HUF_readDTableX ? ( )
* 3. decode 1 or 4 segments in parallel using HUF_decompress ? X ? _usingDTable ( )
*/
2016-06-05 00:42:28 +02:00
2016-06-08 11:11:02 +02:00
/** HUF_selectDecoder() :
2018-03-13 13:44:10 -07:00
* Tells which decoder is likely to decode faster ,
* based on a set of pre - computed metrics .
2018-06-14 15:17:03 -04:00
* @ return : 0 = = HUF_decompress4X1 , 1 = = HUF_decompress4X2 .
2018-03-13 13:44:10 -07:00
* Assumption : 0 < dstSize < = 128 KB */
2016-06-08 11:11:02 +02:00
U32 HUF_selectDecoder ( size_t dstSize , size_t cSrcSize ) ;
2018-03-13 13:44:10 -07:00
/**
* The minimum workspace size for the ` workSpace ` used in
2018-06-14 15:17:03 -04:00
* HUF_readDTableX1_wksp ( ) and HUF_readDTableX2_wksp ( ) .
2018-03-13 13:44:10 -07:00
*
* The space used depends on HUF_TABLELOG_MAX , ranging from ~ 1500 bytes when
* HUF_TABLE_LOG_MAX = 12 to ~ 1850 bytes when HUF_TABLE_LOG_MAX = 15.
* Buffer overflow errors may potentially occur if code modifications result in
* a required workspace size greater than that specified in the following
* macro .
*/
2021-03-05 13:14:06 -08:00
# define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9))
2018-03-13 13:44:10 -07:00
# define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
2016-06-05 00:42:28 +02:00
2018-03-13 13:44:10 -07:00
/* ====================== */
2016-06-05 00:42:28 +02:00
/* single stream variants */
2018-03-13 13:44:10 -07:00
/* ====================== */
2016-06-05 00:42:28 +02:00
2023-01-18 16:30:10 -08:00
size_t HUF_compress1X_usingCTable ( void * dst , size_t dstSize , const void * src , size_t srcSize , const HUF_CElt * CTable , int flags ) ;
Allow compressor to repeat Huffman tables
* Compressor saves most recently used Huffman table and reuses it
if it produces better results.
* I attempted to preserve CPU usage profile.
I intentionally left all of the existing heuristics in place.
There is only a speed difference on the second block and later.
When compressing large enough blocks (say >= 4 KiB) there is
no significant difference in compression speed.
Dictionary compression of one block is the same speed for blocks
with literals <= 1 KiB, and after that the difference is not
very significant.
* In the synthetic data, with blocks 10 KB or smaller, most blocks
can't use repeated tables because the previous block did not
contain a symbol that the current block contains.
Once blocks are about 12 KB or more, most previous blocks have
valid Huffman tables for the current block, and the compression
ratio and decompression speed jumped.
* In silesia blocks as small as 4KB can frequently reuse the
previous Huffman table (85%), but it isn't as profitable, and
the previous Huffman table only gets used about 3% of the time.
* Microbenchmarks show that `HUF_validateCTable()` takes ~55 ns
and `HUF_estimateCompressedSize()` takes ~35 ns.
They are decently well optimized, the first versions took 90 ns
and 120 ns respectively. `HUF_validateCTable()` could be twice as
fast, if we cast the `HUF_CElt*` to a `U32*` and compare to 0.
However, `U32` has an alignment of 4 instead of 2, so I think that
might be undefined behavior.
* I've ran `zstreamtest` compiled normally, with UASAN and with MSAN
for 4 hours each.
The worst case for the speed difference is a bunch of small blocks
in the same frame. I modified `bench.c` to compress the input in a
single frame but with blocks of the given block size, set by `-B`.
Benchmarks on level 1:
| Program | Block size | Corpus | Ratio | Compression MB/s | Decompression MB/s |
|-----------|------------|-----------|-------|------------------|--------------------|
| zstd.base | 256 | synthetic | 2.364 | 110.0 | 297.0 |
| zstd | 256 | synthetic | 2.367 | 108.9 | 297.0 |
| zstd.base | 256 | silesia | 2.204 | 93.8 | 415.7 |
| zstd | 256 | silesia | 2.204 | 93.4 | 415.7 |
| zstd.base | 512 | synthetic | 2.594 | 144.2 | 420.0 |
| zstd | 512 | synthetic | 2.599 | 141.5 | 425.7 |
| zstd.base | 512 | silesia | 2.358 | 118.4 | 432.6 |
| zstd | 512 | silesia | 2.358 | 119.8 | 432.6 |
| zstd.base | 1024 | synthetic | 2.790 | 192.3 | 594.1 |
| zstd | 1024 | synthetic | 2.794 | 192.3 | 600.0 |
| zstd.base | 1024 | silesia | 2.524 | 148.2 | 464.2 |
| zstd | 1024 | silesia | 2.525 | 148.2 | 467.6 |
| zstd.base | 4096 | synthetic | 3.023 | 300.0 | 1000.0 |
| zstd | 4096 | synthetic | 3.024 | 300.0 | 1010.1 |
| zstd.base | 4096 | silesia | 2.779 | 223.1 | 623.5 |
| zstd | 4096 | silesia | 2.779 | 223.1 | 636.0 |
| zstd.base | 16384 | synthetic | 3.131 | 350.0 | 1150.1 |
| zstd | 16384 | synthetic | 3.152 | 350.0 | 1630.3 |
| zstd.base | 16384 | silesia | 2.871 | 296.5 | 883.3 |
| zstd | 16384 | silesia | 2.872 | 294.4 | 898.3 |
2017-03-01 17:51:56 -08:00
/** HUF_compress1X_repeat() :
2018-03-13 13:44:10 -07:00
* Same as HUF_compress1X_wksp ( ) , but considers using hufTable if * repeat ! = HUF_repeat_none .
* If it uses hufTable it does not modify hufTable or repeat .
* If it doesn ' t , it sets * repeat = HUF_repeat_none , and it sets hufTable to the table used .
2021-06-28 10:06:20 -04:00
* If preferRepeat then the old table will always be used if valid .
* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
2018-03-13 13:44:10 -07:00
size_t HUF_compress1X_repeat ( void * dst , size_t dstSize ,
const void * src , size_t srcSize ,
unsigned maxSymbolValue , unsigned tableLog ,
void * workSpace , size_t wkspSize , /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
2023-01-18 16:30:10 -08:00
HUF_CElt * hufTable , HUF_repeat * repeat , int flags ) ;
2016-06-05 00:42:28 +02:00
2023-01-18 16:30:10 -08:00
size_t HUF_decompress1X_DCtx_wksp ( HUF_DTable * dctx , void * dst , size_t dstSize , const void * cSrc , size_t cSrcSize , void * workSpace , size_t wkspSize , int flags ) ;
2018-12-04 10:01:58 -08:00
# ifndef HUF_FORCE_DECOMPRESS_X1
2023-01-18 16:30:10 -08:00
size_t HUF_decompress1X2_DCtx_wksp ( HUF_DTable * dctx , void * dst , size_t dstSize , const void * cSrc , size_t cSrcSize , void * workSpace , size_t wkspSize , int flags ) ; /**< double-symbols decoder */
2018-11-16 15:30:49 -08:00
# endif
2016-06-05 00:42:28 +02:00
2018-02-02 18:03:09 -08:00
/* BMI2 variants.
2018-03-13 13:44:10 -07:00
* If the CPU has BMI2 support , pass bmi2 = 1 , otherwise pass bmi2 = 0.
2018-02-02 18:03:09 -08:00
*/
2023-01-18 16:30:10 -08:00
size_t HUF_decompress1X_usingDTable ( void * dst , size_t maxDstSize , const void * cSrc , size_t cSrcSize , const HUF_DTable * DTable , int flags ) ;
2018-12-04 11:44:02 -08:00
# ifndef HUF_FORCE_DECOMPRESS_X2
2023-01-18 16:30:10 -08:00
size_t HUF_decompress1X1_DCtx_wksp ( HUF_DTable * dctx , void * dst , size_t dstSize , const void * cSrc , size_t cSrcSize , void * workSpace , size_t wkspSize , int flags ) ;
2018-12-04 11:44:02 -08:00
# endif
2023-01-18 16:30:10 -08:00
size_t HUF_decompress4X_usingDTable ( void * dst , size_t maxDstSize , const void * cSrc , size_t cSrcSize , const HUF_DTable * DTable , int flags ) ;
size_t HUF_decompress4X_hufOnly_wksp ( HUF_DTable * dctx , void * dst , size_t dstSize , const void * cSrc , size_t cSrcSize , void * workSpace , size_t wkspSize , int flags ) ;
2020-08-17 13:44:49 -07:00
# ifndef HUF_FORCE_DECOMPRESS_X2
2023-01-18 16:30:10 -08:00
size_t HUF_readDTableX1_wksp ( HUF_DTable * DTable , const void * src , size_t srcSize , void * workSpace , size_t wkspSize , int flags ) ;
2020-08-17 13:44:49 -07:00
# endif
2021-09-17 11:43:04 -07:00
# ifndef HUF_FORCE_DECOMPRESS_X1
2023-01-18 16:30:10 -08:00
size_t HUF_readDTableX2_wksp ( HUF_DTable * DTable , const void * src , size_t srcSize , void * workSpace , size_t wkspSize , int flags ) ;
2021-09-17 11:43:04 -07:00
# endif
2018-02-02 18:03:09 -08:00
2023-01-18 15:30:30 -08:00
# endif /* HUF_H_298734234 */
2016-06-05 00:42:28 +02:00
2015-10-18 22:18:32 +01:00
# if defined (__cplusplus)
}
# endif