mirror of
https://github.com/facebook/zstd.git
synced 2025-03-06 16:56:49 +02:00
Add generic C versions of the fast decoding loops to serve architectures that don't have an assembly implementation. Also allow selecting the C decoding loop over the assembly decoding loop through a zstd decompression parameter `ZSTD_d_disableHuffmanAssembly`. I benchmarked on my Intel i9-9900K and my Macbook Air with an M1 processor. The benchmark command forces zstd to compress without any matches, using only literals compression, and measures only Huffman decompression speed: ``` zstd -b1e1 --compress-literals --zstd=tlen=131072 silesia.tar ``` The new fast decoding loops outperform the previous implementation uniformly, but don't beat the x86-64 assembly. Additionally, the fast C decoding loops suffer from the same stability problems that we've seen in the past, where the assembly version doesn't. So even though clang gets close to assembly on x86-64, it still has stability issues. | Arch | Function | Compiler | Default (MB/s) | Assembly (MB/s) | Fast (MB/s) | |---------|----------------|--------------|----------------|-----------------|-------------| | x86-64 | decompress 4X1 | gcc-12.2.0 | 1029.6 | 1308.1 | 1208.1 | | x86-64 | decompress 4X1 | clang-14.0.6 | 1019.3 | 1305.6 | 1276.3 | | x86-64 | decompress 4X2 | gcc-12.2.0 | 1348.5 | 1657.0 | 1374.1 | | x86-64 | decompress 4X2 | clang-14.0.6 | 1027.6 | 1659.9 | 1468.1 | | aarch64 | decompress 4X1 | clang-12.0.5 | 1081.0 | N/A | 1234.9 | | aarch64 | decompress 4X2 | clang-12.0.5 | 1270.0 | N/A | 1516.6 |
69 lines
2.8 KiB
C
69 lines
2.8 KiB
C
/*
|
|
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
* All rights reserved.
|
|
*
|
|
* This source code is licensed under both the BSD-style license (found in the
|
|
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
|
* in the COPYING file in the root directory of this source tree).
|
|
* You may select, at your option, one of the above-listed licenses.
|
|
*/
|
|
|
|
/**
|
|
* This fuzz target performs a zstd round-trip test (compress & decompress),
|
|
* compares the result with the original, and calls abort() on corruption.
|
|
*/
|
|
|
|
#include <stddef.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include "common/cpu.h"
|
|
#include "common/huf.h"
|
|
#include "fuzz_helpers.h"
|
|
#include "fuzz_data_producer.h"
|
|
|
|
int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
|
|
{
|
|
FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
|
|
/* Select random parameters: #streams, X1 or X2 decoding, bmi2 */
|
|
int const streams = FUZZ_dataProducer_int32Range(producer, 0, 1);
|
|
int const symbols = FUZZ_dataProducer_int32Range(producer, 0, 1);
|
|
int const flags = 0
|
|
| (ZSTD_cpuid_bmi2(ZSTD_cpuid()) && FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_bmi2 : 0)
|
|
| (FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_optimalDepth : 0)
|
|
| (FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_preferRepeat : 0)
|
|
| (FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_suspectUncompressible : 0)
|
|
| (FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_disableAsm : 0)
|
|
| (FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_disableFast : 0);
|
|
/* Select a random cBufSize - it may be too small */
|
|
size_t const dBufSize = FUZZ_dataProducer_uint32Range(producer, 0, 8 * size + 500);
|
|
size_t const maxTableLog = FUZZ_dataProducer_uint32Range(producer, 1, HUF_TABLELOG_MAX);
|
|
HUF_DTable* dt = (HUF_DTable*)FUZZ_malloc(HUF_DTABLE_SIZE(maxTableLog) * sizeof(HUF_DTable));
|
|
size_t const wkspSize = HUF_WORKSPACE_SIZE;
|
|
void* wksp = FUZZ_malloc(wkspSize);
|
|
void* dBuf = FUZZ_malloc(dBufSize);
|
|
dt[0] = maxTableLog * 0x01000001;
|
|
size = FUZZ_dataProducer_remainingBytes(producer);
|
|
|
|
if (symbols == 0) {
|
|
size_t const err = HUF_readDTableX1_wksp(dt, src, size, wksp, wkspSize, flags);
|
|
if (ZSTD_isError(err))
|
|
goto _out;
|
|
} else {
|
|
size_t const err = HUF_readDTableX2_wksp(dt, src, size, wksp, wkspSize, flags);
|
|
if (ZSTD_isError(err))
|
|
goto _out;
|
|
}
|
|
if (streams == 0)
|
|
HUF_decompress1X_usingDTable(dBuf, dBufSize, src, size, dt, flags);
|
|
else
|
|
HUF_decompress4X_usingDTable(dBuf, dBufSize, src, size, dt, flags);
|
|
|
|
_out:
|
|
free(dt);
|
|
free(wksp);
|
|
free(dBuf);
|
|
FUZZ_dataProducer_free(producer);
|
|
return 0;
|
|
}
|