mirror of
https://github.com/facebook/zstd.git
synced 2025-03-06 16:56:49 +02:00
Merge branch 'dev' into tomerge2051
This commit is contained in:
commit
3f64b31585
@ -7,6 +7,8 @@ jobs:
|
||||
# preinstalled to reduce installation time.
|
||||
docker:
|
||||
- image: fbopensource/zstd-circleci-primary:0.0.1
|
||||
# TODO: Re-enable aarch64 build:
|
||||
# make aarch64build && make clean
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
@ -14,12 +16,11 @@ jobs:
|
||||
command: |
|
||||
./tests/test-license.py
|
||||
cc -v; CFLAGS="-O0 -Werror -pedantic" make all && make clean
|
||||
make c99build ; make clean
|
||||
make c11build ; make clean
|
||||
make aarch64build ; make clean
|
||||
make -j regressiontest; make clean
|
||||
make shortest ; make clean
|
||||
make cxxtest ; make clean
|
||||
make c99build && make clean
|
||||
make c11build && make clean
|
||||
make -j regressiontest&& make clean
|
||||
make shortest && make clean
|
||||
make cxxtest && make clean
|
||||
# the second half of the jobs are in this test
|
||||
short-tests-1:
|
||||
docker:
|
||||
@ -84,31 +85,10 @@ workflows:
|
||||
commit:
|
||||
jobs:
|
||||
# Run the tests in parallel
|
||||
- short-tests-0:
|
||||
filters:
|
||||
tags:
|
||||
only: /.*/
|
||||
- short-tests-1:
|
||||
filters:
|
||||
tags:
|
||||
only: /.*/
|
||||
# Create a branch called regression and set it to dev to force a
|
||||
# regression test run
|
||||
- regression-test:
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- regression
|
||||
# Only run on release tags.
|
||||
- publish-github-release:
|
||||
requires:
|
||||
- short-tests-0
|
||||
- short-tests-1
|
||||
filters:
|
||||
branches:
|
||||
ignore: /.*/
|
||||
tags:
|
||||
only: /^v\d+\.\d+\.\d+$/
|
||||
- short-tests-0
|
||||
- short-tests-1
|
||||
- regression-test
|
||||
|
||||
nightly:
|
||||
triggers:
|
||||
- schedule:
|
||||
@ -120,7 +100,7 @@ workflows:
|
||||
- dev
|
||||
- master
|
||||
jobs:
|
||||
# Run daily long regression tests
|
||||
# Run daily regression tests
|
||||
- regression-test
|
||||
|
||||
|
||||
|
233
.github/workflows/dev-long-tests.yml
vendored
Normal file
233
.github/workflows/dev-long-tests.yml
vendored
Normal file
@ -0,0 +1,233 @@
|
||||
name: dev-long-tests
|
||||
# Tests longer than 10mn
|
||||
|
||||
concurrency:
|
||||
group: long-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [ dev, release, actionsTest ]
|
||||
|
||||
jobs:
|
||||
# lasts ~24mn
|
||||
make-test:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
DEVNULLRIGHTS: 1
|
||||
READFROMBLOCKDEVICE: 1
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: make test
|
||||
run: make test
|
||||
|
||||
# lasts ~26mn
|
||||
make-test-osx:
|
||||
runs-on: macos-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: OS-X test
|
||||
run: make test # make -c lib all doesn't work because of the fact that it's not a tty
|
||||
|
||||
no-intrinsics-fuzztest:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: no intrinsics fuzztest
|
||||
run: MOREFLAGS="-DZSTD_NO_INTRINSICS" make -C tests fuzztest
|
||||
|
||||
tsan-zstreamtest:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: thread sanitizer zstreamtest
|
||||
run: CC=clang ZSTREAM_TESTTIME=-T3mn make tsan-test-zstream
|
||||
|
||||
# lasts ~15mn
|
||||
tsan-fuzztest:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: thread sanitizer fuzztest
|
||||
run: CC=clang make tsan-fuzztest
|
||||
|
||||
# lasts ~23mn
|
||||
gcc-8-asan-ubsan-testzstd:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: gcc-8 + ASan + UBSan + Test Zstd
|
||||
run: |
|
||||
sudo apt-get -qqq update
|
||||
make gcc8install
|
||||
CC=gcc-8 make -j uasan-test-zstd </dev/null V=1
|
||||
|
||||
gcc-asan-ubsan-testzstd-32bit:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: ASan + UBSan + Test Zstd, 32bit mode
|
||||
run: |
|
||||
make libc6install
|
||||
make -j uasan-test-zstd32 V=1
|
||||
|
||||
# Note : external libraries must be turned off when using MSAN tests,
|
||||
# because they are not msan-instrumented,
|
||||
# so any data coming from these libraries is always considered "uninitialized"
|
||||
|
||||
gcc-8-asan-ubsan-fuzz:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: gcc-8 + ASan + UBSan + Fuzz Test
|
||||
run: |
|
||||
make gcc8install
|
||||
CC=gcc-8 FUZZER_FLAGS="--long-tests" make clean uasan-fuzztest
|
||||
|
||||
gcc-asan-ubsan-fuzz32:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: ASan + UBSan + Fuzz Test 32bit
|
||||
run: |
|
||||
make libc6install
|
||||
CFLAGS="-O3 -m32" FUZZER_FLAGS="--long-tests" make uasan-fuzztest
|
||||
|
||||
asan-ubsan-regression:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: ASan + UBSan + Regression Test
|
||||
run: make -j uasanregressiontest
|
||||
|
||||
msan-regression:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: MSan + Regression Test
|
||||
run: make -j msanregressiontest
|
||||
|
||||
clang-msan-fuzz:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: clang + MSan + Fuzz Test
|
||||
run: |
|
||||
sudo apt-get -qqq update
|
||||
sudo apt-get install clang
|
||||
CC=clang FUZZER_FLAGS="--long-tests" make clean msan-fuzztest
|
||||
|
||||
# lasts ~24mn
|
||||
clang-msan-testzstd:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: clang + MSan + Test Zstd
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install clang
|
||||
CC=clang make msan-test-zstd HAVE_ZLIB=0 HAVE_LZ4=0 HAVE_LZMA=0 V=1
|
||||
|
||||
armfuzz:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Qemu ARM emulation + Fuzz Test
|
||||
run: |
|
||||
sudo apt-get -qqq update
|
||||
make arminstall
|
||||
make armfuzz
|
||||
|
||||
valgrind-fuzz-test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: valgrind + fuzz test stack mode # ~ 7mn
|
||||
shell: 'script -q -e -c "bash {0}"'
|
||||
run: |
|
||||
make valgrindinstall
|
||||
make -C tests valgrindTest
|
||||
make clean
|
||||
make -C tests test-fuzzer-stackmode
|
||||
|
||||
mingw-long-test:
|
||||
runs-on: windows-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include: [
|
||||
{ compiler: clang, platform: x64, action: build, script: "MOREFLAGS='--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion -Wno-unused-command-line-argument -Wno-implicit-int-float-conversion' make -j allzstd V=1"},
|
||||
{ compiler: gcc, platform: x64, action: test, script: ""},
|
||||
]
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Mingw long test
|
||||
run: |
|
||||
$env:PATH_ORIGINAL = $env:PATH
|
||||
$env:PATH_MINGW32 = "C:\msys64\mingw32\bin"
|
||||
$env:PATH_MINGW64 = "C:\msys64\mingw64\bin"
|
||||
COPY C:\msys64\usr\bin\make.exe C:\msys64\mingw32\bin\make.exe
|
||||
COPY C:\msys64\usr\bin\make.exe C:\msys64\mingw64\bin\make.exe
|
||||
IF ("${{matrix.platform}}" -eq "x64")
|
||||
{
|
||||
$env:PATH = $env:PATH_MINGW64 + ";" + $env:PATH_ORIGINAL
|
||||
}
|
||||
ELSEIF ("${{matrix.platform}}" -eq "x86")
|
||||
{
|
||||
$env:PATH = $env:PATH_MINGW32 + ";" + $env:PATH_ORIGINAL
|
||||
}
|
||||
IF ("${{matrix.action}}" -eq "build")
|
||||
{
|
||||
make -v
|
||||
sh -c "${{matrix.compiler}} -v"
|
||||
ECHO "Building zlib to static link"
|
||||
$env:CC = "${{matrix.compiler}}"
|
||||
sh -c "cd .. && git clone --depth 1 --branch v1.2.11 https://github.com/madler/zlib"
|
||||
sh -c "cd ../zlib && make -f win32/Makefile.gcc libz.a"
|
||||
ECHO "Building zstd"
|
||||
$env:CPPFLAGS = "-I../../zlib"
|
||||
$env:LDFLAGS = "../../zlib/libz.a"
|
||||
sh -c "${{matrix.script}}"
|
||||
}
|
||||
ELSEIF ("${{matrix.action}}" -eq "test")
|
||||
{
|
||||
ECHO "Testing ${{matrix.compiler}} ${{matrix.platform}}"
|
||||
$env:CC = "gcc"
|
||||
$env:CXX = "g++"
|
||||
MKDIR build\cmake\build
|
||||
CD build\cmake\build
|
||||
$env:FUZZERTEST = "-T2mn"
|
||||
$env:ZSTREAM_TESTTIME = "-T2mn"
|
||||
cmake -G "Visual Studio 14 2015 Win64" ..
|
||||
cd ..\..\..
|
||||
make clean
|
||||
}
|
||||
|
||||
# lasts ~20mn
|
||||
oss-fuzz:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
sanitizer: [address, undefined, memory]
|
||||
steps:
|
||||
- name: Build Fuzzers (${{ matrix.sanitizer }})
|
||||
id: build
|
||||
uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
|
||||
with:
|
||||
oss-fuzz-project-name: 'zstd'
|
||||
dry-run: false
|
||||
sanitizer: ${{ matrix.sanitizer }}
|
||||
- name: Run Fuzzers (${{ matrix.sanitizer }})
|
||||
uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
|
||||
with:
|
||||
oss-fuzz-project-name: 'zstd'
|
||||
fuzz-seconds: 600
|
||||
dry-run: false
|
||||
sanitizer: ${{ matrix.sanitizer }}
|
||||
- name: Upload Crash
|
||||
uses: actions/upload-artifact@v1
|
||||
if: failure() && steps.build.outcome == 'success'
|
||||
with:
|
||||
name: ${{ matrix.sanitizer }}-artifacts
|
||||
path: ./out/artifacts
|
396
.github/workflows/dev-short-tests.yml
vendored
Normal file
396
.github/workflows/dev-short-tests.yml
vendored
Normal file
@ -0,0 +1,396 @@
|
||||
name: dev-short-tests
|
||||
# Faster tests: mostly build tests, along with some other
|
||||
# misc tests
|
||||
|
||||
concurrency:
|
||||
group: fast-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [ dev, release, actionsTest ]
|
||||
|
||||
jobs:
|
||||
linux-kernel:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: linux kernel, library + build + test
|
||||
run: make -C contrib/linux-kernel test CFLAGS="-Werror -Wunused-const-variable -Wunused-but-set-variable"
|
||||
|
||||
benchmarking:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: make benchmarking
|
||||
run: make benchmarking
|
||||
|
||||
check-32bit: # designed to catch https://github.com/facebook/zstd/issues/2428
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: make check on 32-bit
|
||||
env:
|
||||
CHECK_CONSTRAINED_MEM: true
|
||||
run: |
|
||||
sudo apt update
|
||||
APT_PACKAGES="gcc-multilib" make apt-install
|
||||
CFLAGS="-m32 -O1 -fstack-protector" make check V=1
|
||||
|
||||
gcc-7-libzstd:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: gcc-7 + libzstdmt compilation
|
||||
run: |
|
||||
sudo apt-get -qqq update
|
||||
make gcc7install
|
||||
CC=gcc-7 CFLAGS=-Werror make -j all
|
||||
make clean
|
||||
LDFLAGS=-Wl,--no-undefined make -C lib libzstd-mt
|
||||
|
||||
# candidate test (to check) : underlink test
|
||||
# LDFLAGS=-Wl,--no-undefined : will make the linker fail if dll is underlinked
|
||||
|
||||
cmake-build-and-test-check:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: cmake build and test check
|
||||
run: |
|
||||
FUZZERTEST=-T1mn ZSTREAM_TESTTIME=-T1mn make cmakebuild
|
||||
cp -r ./ "../zstd source"
|
||||
cd "../zstd source"
|
||||
FUZZERTEST=-T1mn ZSTREAM_TESTTIME=-T1mn make cmakebuild
|
||||
|
||||
cpp-gnu90-c99-compatibility:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: C++, gnu90 and c99 compatibility
|
||||
run: |
|
||||
make cxxtest
|
||||
make clean
|
||||
make gnu90build
|
||||
make clean
|
||||
make c99build
|
||||
make clean
|
||||
make travis-install # just ensures `make install` works
|
||||
|
||||
mingw-cross-compilation:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: mingw cross-compilation
|
||||
run: |
|
||||
# sudo update-alternatives --set x86_64-w64-mingw32-g++ /usr/bin/x86_64-w64-mingw32-g++-posix; (doesn't work)
|
||||
sudo apt-get -qqq update
|
||||
sudo apt-get install gcc-mingw-w64
|
||||
CC=x86_64-w64-mingw32-gcc CXX=x86_64-w64-mingw32-g++ CFLAGS="-Werror -O1" make zstd
|
||||
|
||||
armbuild:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: ARM Build Test
|
||||
run: |
|
||||
sudo apt-get -qqq update
|
||||
make arminstall
|
||||
make armbuild
|
||||
|
||||
bourne-shell:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Bourne shell compatibility (shellcheck)
|
||||
run: |
|
||||
wget https://github.com/koalaman/shellcheck/releases/download/v0.7.1/shellcheck-v0.7.1.linux.x86_64.tar.xz
|
||||
tar -xf shellcheck-v0.7.1.linux.x86_64.tar.xz
|
||||
shellcheck-v0.7.1/shellcheck --shell=sh --severity=warning --exclude=SC2010 tests/playTests.sh
|
||||
|
||||
zlib-wrapper:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: zlib wrapper test
|
||||
run: |
|
||||
sudo apt-get -qqq update
|
||||
make valgrindinstall
|
||||
make -C zlibWrapper test
|
||||
make -C zlibWrapper valgrindTest
|
||||
|
||||
lz4-threadpool-libs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: LZ4, thread pool, and libs build testslib wrapper test
|
||||
run: |
|
||||
make lz4install
|
||||
make -C tests test-lz4
|
||||
make check < /dev/null | tee # mess with lz4 console detection
|
||||
make clean
|
||||
make -C tests test-pool
|
||||
make clean
|
||||
bash tests/libzstd_builds.sh
|
||||
|
||||
gcc-make-tests-32bit:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Make all, 32bit mode
|
||||
run: |
|
||||
sudo apt-get -qqq update
|
||||
make libc6install
|
||||
CFLAGS="-Werror -m32" make -j all32
|
||||
|
||||
gcc-8-make:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: gcc-8 build
|
||||
run: |
|
||||
sudo apt-get -qqq update
|
||||
make gcc8install
|
||||
CC=gcc-8 CFLAGS="-Werror" make -j all
|
||||
|
||||
implicit-fall-through:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: -Wimplicit-fallthrough build
|
||||
run: |
|
||||
make clean
|
||||
CC=gcc MOREFLAGS="-Werror -Wimplicit-fallthrough=2 -O0" make -C lib -j libzstd.a ZSTD_LEGACY_SUPPORT=0
|
||||
make clean
|
||||
CC=clang MOREFLAGS="-Werror -Wimplicit-fallthrough -O0" make -C lib -j libzstd.a ZSTD_LEGACY_SUPPORT=0
|
||||
|
||||
|
||||
visual-2019:
|
||||
runs-on: windows-latest
|
||||
strategy:
|
||||
matrix:
|
||||
platform: [x64, Win32]
|
||||
configuration: [Debug, Release]
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Add MSBuild to PATH
|
||||
uses: microsoft/setup-msbuild@v1.0.2
|
||||
- name: Build
|
||||
working-directory: ${{env.GITHUB_WORKSPACE}}
|
||||
# See https://docs.microsoft.com/visualstudio/msbuild/msbuild-command-line-reference
|
||||
run: >
|
||||
msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v142
|
||||
/t:Clean,Build /p:Platform=${{matrix.platform}} /p:Configuration=${{matrix.configuration}}
|
||||
|
||||
visual-2015:
|
||||
# only GH actions windows-2016 contains VS 2015
|
||||
runs-on: windows-2016
|
||||
strategy:
|
||||
matrix:
|
||||
platform: [x64, Win32]
|
||||
configuration: [Debug, Release]
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Add MSBuild to PATH
|
||||
uses: microsoft/setup-msbuild@v1.0.2
|
||||
- name: Build
|
||||
working-directory: ${{env.GITHUB_WORKSPACE}}
|
||||
run: >
|
||||
msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v140
|
||||
/t:Clean,Build /p:Platform=${{matrix.platform}} /p:Configuration=${{matrix.configuration}}
|
||||
|
||||
minimal-decompressor-macros:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: minimal decompressor macros
|
||||
run: |
|
||||
make clean && make -j all ZSTD_LIB_MINIFY=1 MOREFLAGS="-Werror"
|
||||
make clean && make check ZSTD_LIB_MINIFY=1 MOREFLAGS="-Werror"
|
||||
make clean && make -j all MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X1 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT"
|
||||
make clean && make check MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X1 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT"
|
||||
make clean && make -j all MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X2 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG"
|
||||
make clean && make check MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X2 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG"
|
||||
make clean && make -j all MOREFLAGS="-Werror -DZSTD_NO_INLINE -DZSTD_STRIP_ERROR_STRINGS"
|
||||
make clean && make check MOREFLAGS="-Werror -DZSTD_NO_INLINE -DZSTD_STRIP_ERROR_STRINGS"
|
||||
|
||||
dynamic-bmi2:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: dynamic bmi2 tests
|
||||
run: |
|
||||
make clean && make -j check MOREFLAGS="-O0 -Werror -mbmi2"
|
||||
make clean && make -j check MOREFLAGS="-O0 -Werror -DDYNAMIC_BMI2=1"
|
||||
make clean && make -j check MOREFLAGS="-O0 -Werror -DDYNAMIC_BMI2=1 -mbmi2"
|
||||
make clean && make -j check MOREFLAGS="-O0 -Werror -DDYNAMIC_BMI2=0"
|
||||
make clean && make -j check MOREFLAGS="-O0 -Werror -DDYNAMIC_BMI2=0 -mbmi2"
|
||||
|
||||
test-variants:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: make all variants & validate
|
||||
run: |
|
||||
make -j -C programs allVariants MOREFLAGS=-O0
|
||||
./tests/test-variants.sh
|
||||
|
||||
|
||||
qemu-consistency:
|
||||
name: QEMU ${{ matrix.name }}
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false # 'false' means Don't stop matrix workflows even if some matrix failed.
|
||||
matrix:
|
||||
include: [
|
||||
{ name: ARM, xcc_pkg: gcc-arm-linux-gnueabi, xcc: arm-linux-gnueabi-gcc, xemu_pkg: qemu-system-arm, xemu: qemu-arm-static },
|
||||
{ name: ARM64, xcc_pkg: gcc-aarch64-linux-gnu, xcc: aarch64-linux-gnu-gcc, xemu_pkg: qemu-system-arm, xemu: qemu-aarch64-static },
|
||||
{ name: PPC, xcc_pkg: gcc-powerpc-linux-gnu, xcc: powerpc-linux-gnu-gcc, xemu_pkg: qemu-system-ppc, xemu: qemu-ppc-static },
|
||||
{ name: PPC64LE, xcc_pkg: gcc-powerpc64le-linux-gnu, xcc: powerpc64le-linux-gnu-gcc, xemu_pkg: qemu-system-ppc, xemu: qemu-ppc64le-static },
|
||||
{ name: S390X, xcc_pkg: gcc-s390x-linux-gnu, xcc: s390x-linux-gnu-gcc, xemu_pkg: qemu-system-s390x, xemu: qemu-s390x-static },
|
||||
{ name: MIPS, xcc_pkg: gcc-mips-linux-gnu, xcc: mips-linux-gnu-gcc, xemu_pkg: qemu-system-mips, xemu: qemu-mips-static },
|
||||
{ name: M68K, xcc_pkg: gcc-m68k-linux-gnu, xcc: m68k-linux-gnu-gcc, xemu_pkg: qemu-system-m68k, xemu: qemu-m68k-static },
|
||||
]
|
||||
env: # Set environment variables
|
||||
XCC: ${{ matrix.xcc }}
|
||||
XEMU: ${{ matrix.xemu }}
|
||||
steps:
|
||||
- uses: actions/checkout@v2 # https://github.com/actions/checkout
|
||||
- name: apt update & install
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install gcc-multilib g++-multilib qemu-utils qemu-user-static
|
||||
sudo apt-get install ${{ matrix.xcc_pkg }} ${{ matrix.xemu_pkg }}
|
||||
- name: Environment info
|
||||
run: |
|
||||
echo && which $XCC
|
||||
echo && $XCC --version
|
||||
echo && $XCC -v # Show built-in specs
|
||||
echo && which $XEMU
|
||||
echo && $XEMU --version
|
||||
- name: ARM
|
||||
if: ${{ matrix.name == 'ARM' }}
|
||||
run: |
|
||||
LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make clean check
|
||||
- name: ARM64
|
||||
if: ${{ matrix.name == 'ARM64' }}
|
||||
run: |
|
||||
LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make clean check
|
||||
- name: PPC
|
||||
if: ${{ matrix.name == 'PPC' }}
|
||||
run: |
|
||||
LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make clean check
|
||||
- name: PPC64LE
|
||||
if: ${{ matrix.name == 'PPC64LE' }}
|
||||
run: |
|
||||
LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make clean check
|
||||
- name: S390X
|
||||
if: ${{ matrix.name == 'S390X' }}
|
||||
run: |
|
||||
LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make clean check
|
||||
- name: MIPS
|
||||
if: ${{ matrix.name == 'MIPS' }}
|
||||
run: |
|
||||
LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make clean check
|
||||
- name: M68K
|
||||
if: ${{ matrix.name == 'M68K' }}
|
||||
continue-on-error: true # disable reporting errors (alignment issues)
|
||||
run: |
|
||||
LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make clean check
|
||||
|
||||
mingw-short-test:
|
||||
runs-on: windows-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include: [
|
||||
{ compiler: gcc, platform: x64, script: "CFLAGS=-Werror make -j allzstd DEBUGLEVEL=2"},
|
||||
{ compiler: gcc, platform: x86, script: "CFLAGS=-Werror make -j allzstd"},
|
||||
{ compiler: clang, platform: x64, script: "CFLAGS='--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion' make -j allzstd V=1"},
|
||||
]
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Mingw short test
|
||||
run: |
|
||||
ECHO "Building ${{matrix.compiler}} ${{matrix.platform}}"
|
||||
$env:PATH_ORIGINAL = $env:PATH
|
||||
$env:PATH_MINGW32 = "C:\msys64\mingw32\bin"
|
||||
$env:PATH_MINGW64 = "C:\msys64\mingw64\bin"
|
||||
COPY C:\msys64\usr\bin\make.exe C:\msys64\mingw32\bin\make.exe
|
||||
COPY C:\msys64\usr\bin\make.exe C:\msys64\mingw64\bin\make.exe
|
||||
IF ("${{matrix.platform}}" -eq "x64")
|
||||
{
|
||||
$env:PATH = $env:PATH_MINGW64 + ";" + $env:PATH_ORIGINAL
|
||||
}
|
||||
ELSEIF ("${{matrix.platform}}" -eq "x86")
|
||||
{
|
||||
$env:PATH = $env:PATH_MINGW32 + ";" + $env:PATH_ORIGINAL
|
||||
}
|
||||
make -v
|
||||
sh -c "${{matrix.compiler}} -v"
|
||||
$env:CC = "${{matrix.compiler}}"
|
||||
sh -c "${{matrix.script}}"
|
||||
ECHO "Testing ${{matrix.compiler}} ${{matrix.platform}}"
|
||||
make clean
|
||||
make check
|
||||
|
||||
|
||||
visual-runtime-tests:
|
||||
runs-on: windows-latest
|
||||
strategy:
|
||||
matrix:
|
||||
platform: [x64, Win32]
|
||||
configuration: [Release]
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Add MSBuild to PATH
|
||||
uses: microsoft/setup-msbuild@v1.0.2
|
||||
- name: Build and run tests
|
||||
working-directory: ${{env.GITHUB_WORKSPACE}}
|
||||
env:
|
||||
ZSTD_BIN: ./zstd.exe
|
||||
DATAGEN_BIN: ./datagen.exe
|
||||
# See https://docs.microsoft.com/visualstudio/msbuild/msbuild-command-line-reference
|
||||
run: |
|
||||
msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v142 /t:Clean,Build /p:Platform=${{matrix.platform}} /p:Configuration=${{matrix.configuration}}
|
||||
COPY build\VS2010\bin\${{matrix.platform}}_${{matrix.configuration}}\*.exe tests\
|
||||
CD tests
|
||||
sh -e playTests.sh
|
||||
DIR
|
||||
.\fuzzer.exe -T2m
|
||||
|
||||
# This test currently fails on Github Actions specifically.
|
||||
# Possible reason : TTY emulation.
|
||||
# Note that the same test works fine locally and on travisCI.
|
||||
# This will have to be fixed before transferring the test to GA.
|
||||
# versions-compatibility:
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
# - uses: actions/checkout@v2
|
||||
# - name: Versions Compatibility Test
|
||||
# run: |
|
||||
# make -C tests versionsTest
|
||||
|
||||
|
||||
# For reference : icc tests
|
||||
# icc tests are currently failing on Github Actions, likely to issues during installation stage
|
||||
# To be fixed later
|
||||
#
|
||||
# icc:
|
||||
# name: icc-check
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
# - name: install icc
|
||||
# run: |
|
||||
# export DEBIAN_FRONTEND=noninteractive
|
||||
# sudo apt-get -qqq update
|
||||
# sudo apt-get install -y wget build-essential pkg-config cmake ca-certificates gnupg
|
||||
# sudo wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
|
||||
# sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
|
||||
# sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
|
||||
# sudo apt-get update
|
||||
# sudo apt-get install -y intel-basekit intel-hpckit
|
||||
# - uses: actions/checkout@v2
|
||||
# - name: make check
|
||||
# run: |
|
||||
# make CC=/opt/intel/oneapi/compiler/latest/linux/bin/intel64/icc check
|
225
.github/workflows/generic-dev.yml
vendored
225
.github/workflows/generic-dev.yml
vendored
@ -1,225 +0,0 @@
|
||||
name: generic-dev
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [ dev, release, actionsTest ]
|
||||
|
||||
jobs:
|
||||
|
||||
# Dev PR jobs that still have to be migrated from travis
|
||||
#
|
||||
# versionTag (only on release tags)
|
||||
# valgrindTest (keeps failing for some reason. need investigation)
|
||||
# staticAnalyze (need trusty so need self-hosted)
|
||||
# pcc-fuzz: (need trusty so need self-hosted)
|
||||
# min-decomp-macros (flakey)
|
||||
#
|
||||
# setting up self-hosted is pretty straightforward, but
|
||||
# I need admins permissions to the repo for that it looks like
|
||||
# So I'm tabling that for now
|
||||
#
|
||||
# The release branch exclusive jobs will be in a separate
|
||||
# workflow file (the osx tests and meson build that is)
|
||||
|
||||
benchmarking:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: make benchmarking
|
||||
run: make benchmarking
|
||||
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
DEVNULLRIGHTS: 1
|
||||
READFROMBLOCKDEVICE: 1
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: make test
|
||||
run: make test
|
||||
|
||||
check-32bit: # designed to catch https://github.com/facebook/zstd/issues/2428
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: make check on 32-bit
|
||||
run: |
|
||||
sudo apt update
|
||||
APT_PACKAGES="gcc-multilib" make apt-install
|
||||
CFLAGS="-m32 -O1 -fstack-protector" make check V=1
|
||||
|
||||
gcc-7-libzstd:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: gcc-7 + libzstdmt compilation
|
||||
run: |
|
||||
make gcc7install
|
||||
CC=gcc-7 CFLAGS=-Werror make -j all
|
||||
make clean
|
||||
LDFLAGS=-Wl,--no-undefined make -C lib libzstd-mt
|
||||
|
||||
# candidate test (to check) : underlink test
|
||||
# LDFLAGS=-Wl,--no-undefined : will make the linker fail if dll is underlinked
|
||||
|
||||
gcc-8-asan-ubsan-testzstd:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: gcc-8 + ASan + UBSan + Test Zstd
|
||||
run: |
|
||||
make gcc8install
|
||||
CC=gcc-8 CFLAGS="-Werror" make -j all
|
||||
make clean
|
||||
CC=gcc-8 make -j uasan-test-zstd </dev/null V=1
|
||||
|
||||
gcc-asan-ubsan-testzstd-32bit:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: ASan + UBSan + Test Zstd, 32bit mode
|
||||
run: |
|
||||
make libc6install
|
||||
CFLAGS="-Werror -m32" make -j all32
|
||||
make clean
|
||||
make -j uasan-test-zstd32 V=1
|
||||
|
||||
clang-msan-testzstd:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: clang + MSan + Test Zstd
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install clang
|
||||
CC=clang make msan-test-zstd HAVE_ZLIB=0 HAVE_LZ4=0 HAVE_LZMA=0 V=1
|
||||
|
||||
# Note : external libraries must be turned off when using MSAN tests,
|
||||
# because they are not msan-instrumented,
|
||||
# so any data coming from these libraries is always considered "uninitialized"
|
||||
|
||||
cmake-build-and-test-check:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: cmake build and test check
|
||||
run: |
|
||||
FUZZERTEST=-T1mn ZSTREAM_TESTTIME=-T1mn make cmakebuild
|
||||
cp -r ./ "../zstd source"
|
||||
cd "../zstd source"
|
||||
FUZZERTEST=-T1mn ZSTREAM_TESTTIME=-T1mn make cmakebuild
|
||||
|
||||
gcc-8-asan-ubsan-fuzz:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: gcc-8 + ASan + UBSan + Fuzz Test
|
||||
run: |
|
||||
make gcc8install
|
||||
CC=gcc-8 FUZZER_FLAGS="--long-tests" make clean uasan-fuzztest
|
||||
|
||||
gcc-asan-ubsan-fuzz32:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: ASan + UBSan + Fuzz Test 32bit
|
||||
run: |
|
||||
make libc6install
|
||||
CFLAGS="-O2 -m32" FUZZER_FLAGS="--long-tests" make uasan-fuzztest
|
||||
|
||||
clang-msan-fuzz:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: clang + MSan + Fuzz Test
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install clang
|
||||
CC=clang FUZZER_FLAGS="--long-tests" make clean msan-fuzztest
|
||||
|
||||
asan-ubsan-msan-regression:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: ASan + UBSan + MSan + Regression Test
|
||||
run: |
|
||||
make -j uasanregressiontest
|
||||
make clean
|
||||
make -j msanregressiontest
|
||||
|
||||
cpp-gnu90-c99-compatibility:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: C++, gnu90 and c99 compatibility
|
||||
run: |
|
||||
make cxxtest
|
||||
make clean
|
||||
make gnu90build
|
||||
make clean
|
||||
make c99build
|
||||
make clean
|
||||
make travis-install # just ensures `make install` works
|
||||
|
||||
mingw-cross-compilation:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: mingw cross-compilation
|
||||
run: |
|
||||
# sudo update-alternatives --set x86_64-w64-mingw32-g++ /usr/bin/x86_64-w64-mingw32-g++-posix; (doesn't work)
|
||||
sudo apt-get install gcc-mingw-w64
|
||||
CC=x86_64-w64-mingw32-gcc CXX=x86_64-w64-mingw32-g++ CFLAGS="-Werror -O1" make zstd
|
||||
|
||||
# TODO: Broken test - fix and uncomment
|
||||
# armbuild:
|
||||
# runs-on: ubuntu-16.04 # doesn't work on latest
|
||||
# steps:
|
||||
# - uses: actions/checkout@v2
|
||||
# - name: ARM Build Test
|
||||
# run: |
|
||||
# make arminstall
|
||||
# make armbuild
|
||||
|
||||
# TODO: Broken test - fix and uncomment
|
||||
# armfuzz:
|
||||
# runs-on: ubuntu-16.04 # doesn't work on latest
|
||||
# steps:
|
||||
# - uses: actions/checkout@v2
|
||||
# - name: Qemu ARM emulation + Fuzz Test
|
||||
# run: |
|
||||
# make arminstall
|
||||
# make armfuzz
|
||||
|
||||
bourne-shell:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Bourne shell compatibility (shellcheck)
|
||||
run: |
|
||||
wget https://github.com/koalaman/shellcheck/releases/download/v0.7.1/shellcheck-v0.7.1.linux.x86_64.tar.xz
|
||||
tar -xf shellcheck-v0.7.1.linux.x86_64.tar.xz
|
||||
shellcheck-v0.7.1/shellcheck --shell=sh --severity=warning --exclude=SC2010 tests/playTests.sh
|
||||
|
||||
# For reference : icc tests
|
||||
# icc tests are currently failing on Github Actions, likely to issues during installation stage
|
||||
# To be fixed later
|
||||
#
|
||||
# icc:
|
||||
# name: icc-check
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
# - name: install icc
|
||||
# run: |
|
||||
# export DEBIAN_FRONTEND=noninteractive
|
||||
# sudo apt-get -qqq update
|
||||
# sudo apt-get install -y wget build-essential pkg-config cmake ca-certificates gnupg
|
||||
# sudo wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
|
||||
# sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
|
||||
# sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
|
||||
# sudo apt-get update
|
||||
# sudo apt-get install -y intel-basekit intel-hpckit
|
||||
# - uses: actions/checkout@v2
|
||||
# - name: make check
|
||||
# run: |
|
||||
# make CC=/opt/intel/oneapi/compiler/latest/linux/bin/intel64/icc check
|
59
.github/workflows/generic-release.yml
vendored
59
.github/workflows/generic-release.yml
vendored
@ -1,59 +0,0 @@
|
||||
name: generic-release
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
# This will eventually only be for pushes to release
|
||||
# but for dogfooding purposes, I'm running it even
|
||||
# on dev pushes
|
||||
branches: [ dev, release, actionsTest ]
|
||||
|
||||
jobs:
|
||||
# missing jobs
|
||||
#
|
||||
# ppc64le + fuzz test
|
||||
# Qemu PPC64 + Fuzz test
|
||||
# Qemu aarch64 + Fuzz Test (on Xenial)
|
||||
# versions comp
|
||||
# meson test
|
||||
|
||||
osx:
|
||||
runs-on: macos-10.15
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: OS-X
|
||||
run: |
|
||||
make test
|
||||
# make -c lib all (need to fix. not working right now)
|
||||
|
||||
tsan:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: thread sanitizer
|
||||
run: |
|
||||
CC=clang make tsan-test-zstream
|
||||
CC=clang make tsan-fuzztest
|
||||
|
||||
zlib-wrapper:
|
||||
runs-on: ubuntu-16.04
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: zlib wrapper test
|
||||
run: |
|
||||
make valgrindinstall
|
||||
make -C zlibWrapper test
|
||||
make -C zlibWrapper valgrindTest
|
||||
|
||||
lz4-threadpool-partial-libs:
|
||||
runs-on: ubuntu-16.04
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: LZ4, thread pool, and partial libs testslib wrapper test
|
||||
run: |
|
||||
make lz4install
|
||||
make -C tests test-lz4
|
||||
make check < /dev/null | tee # mess with lz4 console detection
|
||||
make clean
|
||||
make -C tests test-pool
|
||||
make clean
|
||||
bash tests/libzstd_partial_builds.sh
|
13
.github/workflows/linux-kernel.yml
vendored
13
.github/workflows/linux-kernel.yml
vendored
@ -1,13 +0,0 @@
|
||||
name: linux-kernel
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [ dev, release, actionsTest ]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Create linux kernel library + build + test
|
||||
run: make -C contrib/linux-kernel test CFLAGS="-Werror -Wunused-const-variable -Wunused-but-set-variable"
|
30
.github/workflows/main.yml
vendored
30
.github/workflows/main.yml
vendored
@ -1,30 +0,0 @@
|
||||
name: CIFuzz
|
||||
on: [pull_request]
|
||||
jobs:
|
||||
Fuzzing:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
sanitizer: [address, undefined]
|
||||
steps:
|
||||
- name: Build Fuzzers (${{ matrix.sanitizer }})
|
||||
id: build
|
||||
uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
|
||||
with:
|
||||
oss-fuzz-project-name: 'zstd'
|
||||
dry-run: false
|
||||
sanitizer: ${{ matrix.sanitizer }}
|
||||
- name: Run Fuzzers (${{ matrix.sanitizer }})
|
||||
uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
|
||||
with:
|
||||
oss-fuzz-project-name: 'zstd'
|
||||
fuzz-seconds: 600
|
||||
dry-run: false
|
||||
sanitizer: ${{ matrix.sanitizer }}
|
||||
- name: Upload Crash
|
||||
uses: actions/upload-artifact@v1
|
||||
if: failure() && steps.build.outcome == 'success'
|
||||
with:
|
||||
name: ${{ matrix.sanitizer }}-artifacts
|
||||
path: ./out/artifacts
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -29,6 +29,7 @@ dictionary
|
||||
NUL
|
||||
|
||||
# Build artefacts
|
||||
contrib/linux-kernel/linux/
|
||||
projects/
|
||||
bin/
|
||||
.buckd/
|
||||
|
108
.travis.yml
108
.travis.yml
@ -1,5 +1,6 @@
|
||||
# Medium Tests: Run on all commits/PRs to dev branch
|
||||
|
||||
# Travis CI is used to test platforms that github-actions currently doesn't support
|
||||
# without either self-hosting or some finnicky work-around. Also, some tests
|
||||
# are troublesome to migrate since GH Actions runs tests not in a tty.
|
||||
language: c
|
||||
|
||||
git:
|
||||
@ -18,47 +19,48 @@ addons:
|
||||
|
||||
env:
|
||||
global:
|
||||
- FUZZERTEST=-T2mn
|
||||
ZSTREAM_TESTTIME=-T2mn
|
||||
- FUZZERTEST=-T1mn
|
||||
ZSTREAM_TESTTIME=-T1mn
|
||||
DECODECORPUS_TESTTIME=-T1mn
|
||||
|
||||
|
||||
matrix:
|
||||
fast_finish: true
|
||||
include:
|
||||
- name: S390X (big endian) + Fuzz test
|
||||
dist: trusty
|
||||
arch: s390x
|
||||
script:
|
||||
- FUZZER_FLAGS=--no-big-tests make -C tests fuzztest
|
||||
|
||||
- name: S390X (big endian) + Fuzz test + no intrinsics
|
||||
dist: trusty
|
||||
arch: s390x
|
||||
script:
|
||||
- MOREFLAGS="-DZSTD_NO_INTRINSICS" FUZZER_FLAGS=--no-big-tests make -C tests fuzztest
|
||||
|
||||
- name: arm64 # ~2.5 mn
|
||||
os: linux
|
||||
arch: arm64
|
||||
script:
|
||||
- make check
|
||||
|
||||
- name: Minimal Decompressor Macros # ~5mn
|
||||
- name: arm64fuzz
|
||||
os: linux
|
||||
arch: arm64
|
||||
script:
|
||||
- make clean && make -j all ZSTD_LIB_MINIFY=1 MOREFLAGS="-Werror"
|
||||
- make clean && make check ZSTD_LIB_MINIFY=1 MOREFLAGS="-Werror"
|
||||
- make clean && make -j all MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X1 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT"
|
||||
- make clean && make check MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X1 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT"
|
||||
- make clean && make -j all MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X2 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG"
|
||||
- make clean && make check MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X2 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG"
|
||||
- make clean && make -j all MOREFLAGS="-Werror -DZSTD_NO_INLINE -DZSTD_STRIP_ERROR_STRINGS"
|
||||
- make clean && make check MOREFLAGS="-Werror -DZSTD_NO_INLINE -DZSTD_STRIP_ERROR_STRINGS"
|
||||
- make -C tests fuzztest
|
||||
|
||||
- name: static analyzer scanbuild # ~26mn
|
||||
# TODO: migrate to GH Actions once newest clang staticanalyze warnings are fixed
|
||||
- name: static analyzer scanbuild # ~8mn
|
||||
dist: trusty # note : it's important to pin down a version of static analyzer, since different versions report different false positives
|
||||
script:
|
||||
- make staticAnalyze
|
||||
|
||||
- name: Valgrind + Fuzz Test Stack Mode # ~ 7mn
|
||||
# GH actions can't run this command on OS-X, non-tty issues
|
||||
- name: OS-X make all lib
|
||||
os: osx
|
||||
script:
|
||||
- make valgrindinstall
|
||||
- make -C tests clean valgrindTest
|
||||
- make clean
|
||||
- make -C tests test-fuzzer-stackmode
|
||||
|
||||
- name: Qemu ARM emulation + Fuzz Test # ~13.5mn
|
||||
script:
|
||||
- make arminstall
|
||||
- make armfuzz
|
||||
- make -C lib all
|
||||
|
||||
# Introduced to check compat with old toolchains, to prevent e.g. #1872
|
||||
- name: ARM Build Test (on Trusty)
|
||||
@ -67,64 +69,30 @@ matrix:
|
||||
- make arminstall
|
||||
- make armbuild
|
||||
|
||||
- name: Qemu PPC + Fuzz Test # ~13mn
|
||||
dist: trusty # it seems ppc cross-compilation fails on "current"
|
||||
script:
|
||||
- make ppcinstall
|
||||
- make ppcfuzz
|
||||
|
||||
# check release number (release only)
|
||||
# check release number (release/new tag only)
|
||||
- name: Tag-Specific Test
|
||||
if: tag =~ ^v[0-9]\.[0-9]
|
||||
script:
|
||||
- make -C tests checkTag
|
||||
- tests/checkTag "$TRAVIS_BRANCH"
|
||||
|
||||
# tests for release branch and cron job only
|
||||
- name: OS-X # ~13mn
|
||||
if: branch = release
|
||||
os: osx
|
||||
- name: PPC64LE + Fuzz test # ~13mn
|
||||
arch: ppc64le
|
||||
env:
|
||||
- FUZZER_FLAGS=--no-big-tests
|
||||
- MOREFLAGS="-static"
|
||||
script:
|
||||
- make test
|
||||
- make -C lib all
|
||||
- cat /proc/cpuinfo
|
||||
- make -C tests fuzztest
|
||||
|
||||
- name: Versions Compatibility Test # 11.5mn
|
||||
if: branch = release
|
||||
# This test currently fails on GA specifically, for no obvious reason
|
||||
# (it works fine on travisCI, and on local test platforms).
|
||||
- name: Versions Compatibility Test # ~6mn
|
||||
script:
|
||||
- make -C tests versionsTest
|
||||
|
||||
- name: thread sanitizer # ~29mn
|
||||
if: branch = release
|
||||
script:
|
||||
- make clang38install
|
||||
- CC=clang-3.8 make tsan-test-zstream
|
||||
- CC=clang-3.8 make tsan-fuzztest
|
||||
|
||||
- name: PPC64LE + Fuzz test # ~13mn
|
||||
if: branch = release
|
||||
arch: ppc64le
|
||||
script:
|
||||
- cat /proc/cpuinfo
|
||||
- make test
|
||||
|
||||
- name: Qemu PPC64 + Fuzz test # ~13mn, presumed Big-Endian (?)
|
||||
dist: trusty # note : PPC64 cross-compilation for Qemu tests seems broken on Xenial
|
||||
if: branch = release
|
||||
script:
|
||||
- make ppcinstall
|
||||
- make ppc64fuzz
|
||||
|
||||
# note : we already have aarch64 tests on hardware
|
||||
- name: Qemu aarch64 + Fuzz Test (on Xenial) # ~14mn
|
||||
if: branch = release
|
||||
dist: xenial
|
||||
script:
|
||||
- make arminstall
|
||||
- make aarch64fuzz
|
||||
|
||||
# meson dedicated test
|
||||
- name: Xenial (Meson + clang) # ~15mn
|
||||
if: branch = release
|
||||
dist: bionic
|
||||
language: cpp
|
||||
compiler: clang
|
||||
|
@ -47,7 +47,7 @@ Our contribution process works in three main stages:
|
||||
* Topic and development:
|
||||
* Make a new branch on your fork about the topic you're developing for
|
||||
```
|
||||
# branch names should be consise but sufficiently informative
|
||||
# branch names should be concise but sufficiently informative
|
||||
git checkout -b <branch-name>
|
||||
git push origin <branch-name>
|
||||
```
|
||||
@ -104,7 +104,7 @@ Our contribution process works in three main stages:
|
||||
issue at hand, then please indicate this by requesting that an issue be closed by commenting.
|
||||
* Just because your changes have been merged does not mean the topic or larger issue is complete. Remember
|
||||
that the change must make it to an official zstd release for it to be meaningful. We recommend
|
||||
that contributers track the activity on their pull request and corresponding issue(s) page(s) until
|
||||
that contributors track the activity on their pull request and corresponding issue(s) page(s) until
|
||||
their change makes it to the next release of zstd. Users will often discover bugs in your code or
|
||||
suggest ways to refine and improve your initial changes even after the pull request is merged.
|
||||
|
||||
@ -270,15 +270,15 @@ for level 1 compression on Zstd. Typically this means, you have identified a sec
|
||||
code that you think can be made to run faster.
|
||||
|
||||
The first thing you will want to do is make sure that the piece of code is actually taking up
|
||||
a notable amount of time to run. It is usually not worth optimzing something which accounts for less than
|
||||
a notable amount of time to run. It is usually not worth optimizing something which accounts for less than
|
||||
0.0001% of the total running time. Luckily, there are tools to help with this.
|
||||
Profilers will let you see how much time your code spends inside a particular function.
|
||||
If your target code snippit is only part of a function, it might be worth trying to
|
||||
isolate that snippit by moving it to its own function (this is usually not necessary but
|
||||
If your target code snippet is only part of a function, it might be worth trying to
|
||||
isolate that snippet by moving it to its own function (this is usually not necessary but
|
||||
might be).
|
||||
|
||||
Most profilers (including the profilers dicusssed below) will generate a call graph of
|
||||
functions for you. Your goal will be to find your function of interest in this call grapch
|
||||
Most profilers (including the profilers discussed below) will generate a call graph of
|
||||
functions for you. Your goal will be to find your function of interest in this call graph
|
||||
and then inspect the time spent inside of it. You might also want to to look at the
|
||||
annotated assembly which most profilers will provide you with.
|
||||
|
||||
@ -301,16 +301,16 @@ $ zstd -b1 -i5 <my-data> # this will run for 5 seconds
|
||||
5. Once you run your benchmarking script, switch back over to instruments and attach your
|
||||
process to the time profiler. You can do this by:
|
||||
* Clicking on the `All Processes` drop down in the top left of the toolbar.
|
||||
* Selecting your process from the dropdown. In my case, it is just going to be labled
|
||||
* Selecting your process from the dropdown. In my case, it is just going to be labeled
|
||||
`zstd`
|
||||
* Hitting the bright red record circle button on the top left of the toolbar
|
||||
6. You profiler will now start collecting metrics from your bencharking script. Once
|
||||
6. You profiler will now start collecting metrics from your benchmarking script. Once
|
||||
you think you have collected enough samples (usually this is the case after 3 seconds of
|
||||
recording), stop your profiler.
|
||||
7. Make sure that in toolbar of the bottom window, `profile` is selected.
|
||||
8. You should be able to see your call graph.
|
||||
* If you don't see the call graph or an incomplete call graph, make sure you have compiled
|
||||
zstd and your benchmarking scripg using debug flags. On mac and linux, this just means
|
||||
zstd and your benchmarking script using debug flags. On mac and linux, this just means
|
||||
you will have to supply the `-g` flag alone with your build script. You might also
|
||||
have to provide the `-fno-omit-frame-pointer` flag
|
||||
9. Dig down the graph to find your function call and then inspect it by double clicking
|
||||
@ -329,7 +329,7 @@ Some general notes on perf:
|
||||
counter statistics. Perf uses a high resolution timer and this is likely one
|
||||
of the first things your team will run when assessing your PR.
|
||||
* Perf has a long list of hardware counters that can be viewed with `perf --list`.
|
||||
When measuring optimizations, something worth trying is to make sure the handware
|
||||
When measuring optimizations, something worth trying is to make sure the hardware
|
||||
counters you expect to be impacted by your change are in fact being so. For example,
|
||||
if you expect the L1 cache misses to decrease with your change, you can look at the
|
||||
counter `L1-dcache-load-misses`
|
||||
@ -368,7 +368,7 @@ Follow these steps to link travis-ci with your github fork of zstd
|
||||
TODO
|
||||
|
||||
### appveyor
|
||||
Follow these steps to link circle-ci with your girhub fork of zstd
|
||||
Follow these steps to link circle-ci with your github fork of zstd
|
||||
|
||||
1. Make sure you are logged into your github account
|
||||
2. Go to https://www.appveyor.com/
|
||||
|
16
Makefile
16
Makefile
@ -148,7 +148,7 @@ clean:
|
||||
#------------------------------------------------------------------------------
|
||||
# make install is validated only for Linux, macOS, Hurd and some BSD targets
|
||||
#------------------------------------------------------------------------------
|
||||
ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD DragonFly NetBSD MSYS_NT Haiku))
|
||||
ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD DragonFly NetBSD MSYS_NT Haiku AIX))
|
||||
|
||||
HOST_OS = POSIX
|
||||
|
||||
@ -217,7 +217,7 @@ armbuild: clean
|
||||
CC=arm-linux-gnueabi-gcc CFLAGS="-Werror" $(MAKE) allzstd
|
||||
|
||||
aarch64build: clean
|
||||
CC=aarch64-linux-gnu-gcc CFLAGS="-Werror" $(MAKE) allzstd
|
||||
CC=aarch64-linux-gnu-gcc CFLAGS="-Werror -O0" $(MAKE) allzstd
|
||||
|
||||
ppcbuild: clean
|
||||
CC=powerpc-linux-gnu-gcc CFLAGS="-m32 -Wno-attributes -Werror" $(MAKE) -j allzstd
|
||||
@ -381,23 +381,23 @@ cmakebuild:
|
||||
|
||||
c89build: clean
|
||||
$(CC) -v
|
||||
CFLAGS="-std=c89 -Werror" $(MAKE) allmost # will fail, due to missing support for `long long`
|
||||
CFLAGS="-std=c89 -Werror -O0" $(MAKE) allmost # will fail, due to missing support for `long long`
|
||||
|
||||
gnu90build: clean
|
||||
$(CC) -v
|
||||
CFLAGS="-std=gnu90 -Werror" $(MAKE) allmost
|
||||
CFLAGS="-std=gnu90 -Werror -O0" $(MAKE) allmost
|
||||
|
||||
c99build: clean
|
||||
$(CC) -v
|
||||
CFLAGS="-std=c99 -Werror" $(MAKE) allmost
|
||||
CFLAGS="-std=c99 -Werror -O0" $(MAKE) allmost
|
||||
|
||||
gnu99build: clean
|
||||
$(CC) -v
|
||||
CFLAGS="-std=gnu99 -Werror" $(MAKE) allmost
|
||||
CFLAGS="-std=gnu99 -Werror -O0" $(MAKE) allmost
|
||||
|
||||
c11build: clean
|
||||
$(CC) -v
|
||||
CFLAGS="-std=c11 -Werror" $(MAKE) allmost
|
||||
CFLAGS="-std=c11 -Werror -O0" $(MAKE) allmost
|
||||
|
||||
bmix64build: clean
|
||||
$(CC) -v
|
||||
@ -416,5 +416,5 @@ bmi32build: clean
|
||||
staticAnalyze: SCANBUILD ?= scan-build
|
||||
staticAnalyze:
|
||||
$(CC) -v
|
||||
CC=$(CC) CPPFLAGS=-g $(SCANBUILD) --status-bugs -v $(MAKE) allzstd examples contrib
|
||||
CC=$(CC) CPPFLAGS=-g $(SCANBUILD) --status-bugs -v $(MAKE) zstd
|
||||
endif
|
||||
|
36
Package.swift
Normal file
36
Package.swift
Normal file
@ -0,0 +1,36 @@
|
||||
// swift-tools-version:5.0
|
||||
// The swift-tools-version declares the minimum version of Swift required to build this package.
|
||||
|
||||
import PackageDescription
|
||||
|
||||
let package = Package(
|
||||
name: "zstd",
|
||||
platforms: [
|
||||
.macOS(.v10_10), .iOS(.v9), .tvOS(.v9)
|
||||
],
|
||||
products: [
|
||||
// Products define the executables and libraries a package produces, and make them visible to other packages.
|
||||
.library(
|
||||
name: "libzstd",
|
||||
targets: [ "libzstd" ])
|
||||
],
|
||||
dependencies: [
|
||||
// Dependencies declare other packages that this package depends on.
|
||||
// .package(url: /* package url */, from: "1.0.0"),
|
||||
],
|
||||
targets: [
|
||||
// Targets are the basic building blocks of a package. A target can define a module or a test suite.
|
||||
// Targets can depend on other targets in this package, and on products in packages this package depends on.
|
||||
.target(
|
||||
name: "libzstd",
|
||||
path: "lib",
|
||||
sources: [ "common", "compress", "decompress", "dictBuilder" ],
|
||||
publicHeadersPath: "modulemap",
|
||||
cSettings: [
|
||||
.headerSearchPath(".")
|
||||
])
|
||||
],
|
||||
swiftLanguageVersions: [.v5],
|
||||
cLanguageStandard: .gnu11,
|
||||
cxxLanguageStandard: .gnucxx14
|
||||
)
|
@ -4,7 +4,8 @@ __Zstandard__, or `zstd` as short version, is a fast lossless compression algori
|
||||
targeting real-time compression scenarios at zlib-level and better compression ratios.
|
||||
It's backed by a very fast entropy stage, provided by [Huff0 and FSE library](https://github.com/Cyan4973/FiniteStateEntropy).
|
||||
|
||||
The project is provided as an open-source dual [BSD](LICENSE) and [GPLv2](COPYING) licensed **C** library,
|
||||
Zstandard's format is stable and documented in [RFC8878](https://datatracker.ietf.org/doc/html/rfc8878). Multiple independent implementations are already available.
|
||||
This repository represents the reference implementation, provided as an open-source dual [BSD](LICENSE) and [GPLv2](COPYING) licensed **C** library,
|
||||
and a command line utility producing and decoding `.zst`, `.gz`, `.xz` and `.lz4` files.
|
||||
Should your project require another programming language,
|
||||
a list of known ports and bindings is provided on [Zstandard homepage](http://www.zstd.net/#other-languages).
|
||||
@ -17,8 +18,8 @@ a list of known ports and bindings is provided on [Zstandard homepage](http://ww
|
||||
[![Build status][CirrusDevBadge]][CirrusLink]
|
||||
[![Fuzzing Status][OSSFuzzBadge]][OSSFuzzLink]
|
||||
|
||||
[travisDevBadge]: https://travis-ci.org/facebook/zstd.svg?branch=dev "Continuous Integration test suite"
|
||||
[travisLink]: https://travis-ci.org/facebook/zstd
|
||||
[travisDevBadge]: https://api.travis-ci.com/facebook/zstd.svg?branch=dev "Continuous Integration test suite"
|
||||
[travisLink]: https://travis-ci.com/facebook/zstd
|
||||
[AppveyorDevBadge]: https://ci.appveyor.com/api/projects/status/xt38wbdxjk5mrbem/branch/dev?svg=true "Windows test suite"
|
||||
[AppveyorLink]: https://ci.appveyor.com/project/YannCollet/zstd-p0yf0
|
||||
[CircleDevBadge]: https://circleci.com/gh/facebook/zstd/tree/dev.svg?style=shield "Short test suite"
|
||||
|
178
appveyor.yml
178
appveyor.yml
@ -23,34 +23,6 @@
|
||||
SCRIPT: "make allzstd MOREFLAGS=-static"
|
||||
ARTIFACT: "true"
|
||||
BUILD: "true"
|
||||
- COMPILER: "clang"
|
||||
HOST: "mingw"
|
||||
PLATFORM: "x64"
|
||||
SCRIPT: "MOREFLAGS='--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion' make -j allzstd V=1"
|
||||
BUILD: "true"
|
||||
|
||||
- COMPILER: "gcc"
|
||||
HOST: "mingw"
|
||||
PLATFORM: "x64"
|
||||
SCRIPT: ""
|
||||
TEST: "cmake"
|
||||
|
||||
- COMPILER: "visual"
|
||||
HOST: "visual"
|
||||
PLATFORM: "x64"
|
||||
CONFIGURATION: "Debug"
|
||||
- COMPILER: "visual"
|
||||
HOST: "visual"
|
||||
PLATFORM: "Win32"
|
||||
CONFIGURATION: "Debug"
|
||||
- COMPILER: "visual"
|
||||
HOST: "visual"
|
||||
PLATFORM: "x64"
|
||||
CONFIGURATION: "Release"
|
||||
- COMPILER: "visual"
|
||||
HOST: "visual"
|
||||
PLATFORM: "Win32"
|
||||
CONFIGURATION: "Release"
|
||||
|
||||
- COMPILER: "clang-cl"
|
||||
HOST: "cmake-visual"
|
||||
@ -113,56 +85,6 @@
|
||||
appveyor PushArtifact zstd-win-release-%PLATFORM%.zip
|
||||
)
|
||||
)
|
||||
- if [%HOST%]==[visual] (
|
||||
ECHO *** &&
|
||||
ECHO *** Building Visual Studio 2008 %PLATFORM%\%CONFIGURATION% in %APPVEYOR_BUILD_FOLDER% &&
|
||||
ECHO *** &&
|
||||
msbuild "build\VS2008\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v90 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
|
||||
DIR build\VS2008\bin\%PLATFORM%\%CONFIGURATION%\*.exe &&
|
||||
MD5sum build/VS2008/bin/%PLATFORM%/%CONFIGURATION%/*.exe &&
|
||||
COPY build\VS2008\bin\%PLATFORM%\%CONFIGURATION%\fuzzer.exe tests\fuzzer_VS2008_%PLATFORM%_%CONFIGURATION%.exe &&
|
||||
ECHO *** &&
|
||||
ECHO *** Building Visual Studio 2010 %PLATFORM%\%CONFIGURATION% &&
|
||||
ECHO *** &&
|
||||
msbuild "build\VS2010\zstd.sln" %ADDITIONALPARAM% /m /verbosity:minimal /property:PlatformToolset=v100 /p:ForceImportBeforeCppTargets=%APPVEYOR_BUILD_FOLDER%\build\VS2010\CompileAsCpp.props /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
|
||||
DIR build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe &&
|
||||
MD5sum build/VS2010/bin/%PLATFORM%_%CONFIGURATION%/*.exe &&
|
||||
msbuild "build\VS2010\zstd.sln" %ADDITIONALPARAM% /m /verbosity:minimal /property:PlatformToolset=v100 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
|
||||
DIR build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe &&
|
||||
MD5sum build/VS2010/bin/%PLATFORM%_%CONFIGURATION%/*.exe &&
|
||||
COPY build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\fuzzer.exe tests\fuzzer_VS2010_%PLATFORM%_%CONFIGURATION%.exe &&
|
||||
ECHO *** &&
|
||||
ECHO *** Building Visual Studio 2012 %PLATFORM%\%CONFIGURATION% &&
|
||||
ECHO *** &&
|
||||
msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v110 /p:ForceImportBeforeCppTargets=%APPVEYOR_BUILD_FOLDER%\build\VS2010\CompileAsCpp.props /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
|
||||
DIR build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe &&
|
||||
MD5sum build/VS2010/bin/%PLATFORM%_%CONFIGURATION%/*.exe &&
|
||||
msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v110 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
|
||||
DIR build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe &&
|
||||
MD5sum build/VS2010/bin/%PLATFORM%_%CONFIGURATION%/*.exe &&
|
||||
COPY build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\fuzzer.exe tests\fuzzer_VS2012_%PLATFORM%_%CONFIGURATION%.exe &&
|
||||
ECHO *** &&
|
||||
ECHO *** Building Visual Studio 2013 %PLATFORM%\%CONFIGURATION% &&
|
||||
ECHO *** &&
|
||||
msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v120 /p:ForceImportBeforeCppTargets=%APPVEYOR_BUILD_FOLDER%\build\VS2010\CompileAsCpp.props /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
|
||||
DIR build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe &&
|
||||
MD5sum build/VS2010/bin/%PLATFORM%_%CONFIGURATION%/*.exe &&
|
||||
msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v120 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
|
||||
DIR build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe &&
|
||||
MD5sum build/VS2010/bin/%PLATFORM%_%CONFIGURATION%/*.exe &&
|
||||
COPY build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\fuzzer.exe tests\fuzzer_VS2013_%PLATFORM%_%CONFIGURATION%.exe &&
|
||||
ECHO *** &&
|
||||
ECHO *** Building Visual Studio 2015 %PLATFORM%\%CONFIGURATION% &&
|
||||
ECHO *** &&
|
||||
msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v140 /p:ForceImportBeforeCppTargets=%APPVEYOR_BUILD_FOLDER%\build\VS2010\CompileAsCpp.props /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
|
||||
DIR build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe &&
|
||||
MD5sum build/VS2010/bin/%PLATFORM%_%CONFIGURATION%/*.exe &&
|
||||
msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v140 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
|
||||
DIR build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe &&
|
||||
MD5sum build/VS2010/bin/%PLATFORM%_%CONFIGURATION%/*.exe &&
|
||||
COPY build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\fuzzer.exe tests\fuzzer_VS2015_%PLATFORM%_%CONFIGURATION%.exe &&
|
||||
COPY build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe tests\
|
||||
)
|
||||
- if [%HOST%]==[cmake-visual] (
|
||||
ECHO *** &&
|
||||
ECHO *** Building %CMAKE_GENERATOR% ^(%CMAKE_GENERATOR_TOOLSET%^) %PLATFORM%\%CONFIGURATION% &&
|
||||
@ -186,18 +108,6 @@
|
||||
cd ..\..\.. &&
|
||||
make clean
|
||||
)
|
||||
- SET "FUZZERTEST=-T30s"
|
||||
- if [%HOST%]==[visual] if [%CONFIGURATION%]==[Release] (
|
||||
CD tests &&
|
||||
SET ZSTD_BIN=./zstd.exe&&
|
||||
SET DATAGEN_BIN=./datagen.exe&&
|
||||
sh -e playTests.sh --test-large-data &&
|
||||
fullbench.exe -i1 &&
|
||||
fullbench.exe -i1 -P0 &&
|
||||
fuzzer_VS2012_%PLATFORM%_Release.exe %FUZZERTEST% &&
|
||||
fuzzer_VS2013_%PLATFORM%_Release.exe %FUZZERTEST% &&
|
||||
fuzzer_VS2015_%PLATFORM%_Release.exe %FUZZERTEST%
|
||||
)
|
||||
|
||||
|
||||
# The following tests are for regular pushes
|
||||
@ -208,38 +118,26 @@
|
||||
version: 1.0.{build}
|
||||
environment:
|
||||
matrix:
|
||||
- COMPILER: "visual"
|
||||
HOST: "visual"
|
||||
PLATFORM: "x64"
|
||||
CONFIGURATION: "Debug"
|
||||
- COMPILER: "visual"
|
||||
HOST: "visual"
|
||||
PLATFORM: "Win32"
|
||||
CONFIGURATION: "Debug"
|
||||
- COMPILER: "visual"
|
||||
HOST: "visual"
|
||||
PLATFORM: "x64"
|
||||
CONFIGURATION: "Release"
|
||||
- COMPILER: "visual"
|
||||
HOST: "visual"
|
||||
PLATFORM: "Win32"
|
||||
CONFIGURATION: "Release"
|
||||
|
||||
- COMPILER: "gcc"
|
||||
HOST: "cygwin"
|
||||
PLATFORM: "x64"
|
||||
- COMPILER: "gcc"
|
||||
HOST: "mingw"
|
||||
PLATFORM: "x64"
|
||||
SCRIPT: "CFLAGS=-Werror make -j allzstd DEBUGLEVEL=2"
|
||||
- COMPILER: "gcc"
|
||||
HOST: "mingw"
|
||||
PLATFORM: "x86"
|
||||
SCRIPT: "CFLAGS=-Werror make -j allzstd"
|
||||
- COMPILER: "clang"
|
||||
HOST: "mingw"
|
||||
PLATFORM: "x64"
|
||||
SCRIPT: "CFLAGS='--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion' make -j allzstd V=1"
|
||||
|
||||
- COMPILER: "visual"
|
||||
HOST: "visual"
|
||||
PLATFORM: "x64"
|
||||
CONFIGURATION: "Debug"
|
||||
- COMPILER: "visual"
|
||||
HOST: "visual"
|
||||
PLATFORM: "Win32"
|
||||
CONFIGURATION: "Debug"
|
||||
- COMPILER: "visual"
|
||||
HOST: "visual"
|
||||
PLATFORM: "x64"
|
||||
CONFIGURATION: "Release"
|
||||
- COMPILER: "visual"
|
||||
HOST: "visual"
|
||||
PLATFORM: "Win32"
|
||||
CONFIGURATION: "Release"
|
||||
|
||||
- COMPILER: "clang-cl"
|
||||
HOST: "cmake-visual"
|
||||
@ -260,12 +158,6 @@
|
||||
cmake,^
|
||||
make
|
||||
)
|
||||
- if [%HOST%]==[mingw] (
|
||||
SET "PATH_MINGW32=C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw32\bin" &&
|
||||
SET "PATH_MINGW64=C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin" &&
|
||||
COPY C:\msys64\usr\bin\make.exe C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw32\bin\make.exe &&
|
||||
COPY C:\msys64\usr\bin\make.exe C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin\make.exe
|
||||
)
|
||||
- IF [%HOST%]==[visual] IF [%PLATFORM%]==[x64] (
|
||||
SET ADDITIONALPARAM=/p:LibraryPath="C:\Program Files\Microsoft SDKs\Windows\v7.1\lib\x64;c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\lib\amd64;C:\Program Files (x86)\Microsoft Visual Studio 10.0\;C:\Program Files (x86)\Microsoft Visual Studio 10.0\lib\amd64;"
|
||||
)
|
||||
@ -283,30 +175,6 @@
|
||||
ctest -V -L Medium;
|
||||
"
|
||||
)
|
||||
- if [%HOST%]==[mingw] (
|
||||
( if [%PLATFORM%]==[x64] (
|
||||
SET "PATH=%PATH_MINGW64%;%PATH_ORIGINAL%"
|
||||
) else if [%PLATFORM%]==[x86] (
|
||||
SET "PATH=%PATH_MINGW32%;%PATH_ORIGINAL%"
|
||||
) ) &&
|
||||
make -v &&
|
||||
sh -c "%COMPILER% -v" &&
|
||||
set "CC=%COMPILER%" &&
|
||||
sh -c "%SCRIPT%"
|
||||
)
|
||||
- if [%HOST%]==[visual] (
|
||||
ECHO *** &&
|
||||
ECHO *** Building Visual Studio 2015 %PLATFORM%\%CONFIGURATION% &&
|
||||
ECHO *** &&
|
||||
msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v140 /p:ForceImportBeforeCppTargets=%APPVEYOR_BUILD_FOLDER%\build\VS2010\CompileAsCpp.props /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
|
||||
DIR build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe &&
|
||||
MD5sum build/VS2010/bin/%PLATFORM%_%CONFIGURATION%/*.exe &&
|
||||
msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v140 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
|
||||
DIR build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe &&
|
||||
MD5sum build/VS2010/bin/%PLATFORM%_%CONFIGURATION%/*.exe &&
|
||||
COPY build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\fuzzer.exe tests\fuzzer_VS2015_%PLATFORM%_%CONFIGURATION%.exe &&
|
||||
COPY build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe tests\
|
||||
)
|
||||
- if [%HOST%]==[cmake-visual] (
|
||||
ECHO *** &&
|
||||
ECHO *** Building %CMAKE_GENERATOR% ^(%CMAKE_GENERATOR_TOOLSET%^) %PLATFORM%\%CONFIGURATION% &&
|
||||
@ -316,12 +184,22 @@
|
||||
POPD &&
|
||||
ECHO ***
|
||||
)
|
||||
- if [%HOST%]==[visual] (
|
||||
ECHO *** &&
|
||||
ECHO *** Building Visual Studio 2012 %PLATFORM%\%CONFIGURATION% &&
|
||||
ECHO *** &&
|
||||
msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v110 /p:ForceImportBeforeCppTargets=%APPVEYOR_BUILD_FOLDER%\build\VS2010\CompileAsCpp.props /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
|
||||
DIR build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe &&
|
||||
msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v110 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
|
||||
DIR build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe
|
||||
)
|
||||
|
||||
|
||||
test_script:
|
||||
- ECHO Testing %COMPILER% %PLATFORM% %CONFIGURATION%
|
||||
- SET "FUZZERTEST=-T10s"
|
||||
- if [%HOST%]==[mingw] (
|
||||
set "CC=%COMPILER%" &&
|
||||
make clean &&
|
||||
make check
|
||||
)
|
||||
)
|
@ -19,10 +19,10 @@ GOTO build
|
||||
:display_help
|
||||
|
||||
echo Syntax: build.generic.cmd msbuild_version msbuild_platform msbuild_configuration msbuild_toolset
|
||||
echo msbuild_version: VS installed version (VS2012, VS2013, VS2015, VS2017, ...)
|
||||
echo msbuild_version: VS installed version (VS2012, VS2013, VS2015, VS2017, VS2019, ...)
|
||||
echo msbuild_platform: Platform (x64 or Win32)
|
||||
echo msbuild_configuration: VS configuration (Release or Debug)
|
||||
echo msbuild_toolset: Platform Toolset (v100, v110, v120, v140, v141)
|
||||
echo msbuild_toolset: Platform Toolset (v100, v110, v120, v140, v141, v142, ...)
|
||||
|
||||
EXIT /B 1
|
||||
|
||||
@ -43,6 +43,16 @@ IF %msbuild_version% == VS2017 (
|
||||
IF EXIST %msbuild_vs2017enterprise% SET msbuild=%msbuild_vs2017enterprise%
|
||||
)
|
||||
|
||||
:: VS2019
|
||||
SET msbuild_vs2019community="%programfiles(x86)%\Microsoft Visual Studio\2019\Community\MSBuild\Current\Bin\MSBuild.exe"
|
||||
SET msbuild_vs2019professional="%programfiles(x86)%\Microsoft Visual Studio\2019\Professional\MSBuild\Current\Bin\MSBuild.exe"
|
||||
SET msbuild_vs2019enterprise="%programfiles(x86)%\Microsoft Visual Studio\2019\Enterprise\MSBuild\Current\Bin\MSBuild.exe"
|
||||
IF %msbuild_version% == VS2019 (
|
||||
IF EXIST %msbuild_vs2019community% SET msbuild=%msbuild_vs2019community%
|
||||
IF EXIST %msbuild_vs2019professional% SET msbuild=%msbuild_vs2019professional%
|
||||
IF EXIST %msbuild_vs2019enterprise% SET msbuild=%msbuild_vs2019enterprise%
|
||||
)
|
||||
|
||||
SET project="%~p0\..\VS2010\zstd.sln"
|
||||
|
||||
SET msbuild_params=/verbosity:minimal /nologo /t:Clean,Build /p:Platform=%msbuild_platform% /p:Configuration=%msbuild_configuration%
|
||||
|
@ -21,10 +21,16 @@ add_executable(pzstd ${PROGRAMS_DIR}/util.c ${PZSTD_DIR}/main.cpp ${PZSTD_DIR}/O
|
||||
set_property(TARGET pzstd APPEND PROPERTY COMPILE_DEFINITIONS "NDEBUG")
|
||||
set_property(TARGET pzstd APPEND PROPERTY COMPILE_OPTIONS "-Wno-shadow")
|
||||
|
||||
if (ZSTD_BUILD_SHARED)
|
||||
set(ZSTD_LIB libzstd_shared)
|
||||
else()
|
||||
set(ZSTD_LIB libzstd_static)
|
||||
endif()
|
||||
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
find_package(Threads REQUIRED)
|
||||
if (CMAKE_USE_PTHREADS_INIT)
|
||||
target_link_libraries(pzstd libzstd_shared ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_link_libraries(pzstd ${ZSTD_LIB} ${CMAKE_THREAD_LIBS_INIT})
|
||||
else()
|
||||
message(SEND_ERROR "ZSTD currently does not support thread libraries other than pthreads")
|
||||
endif()
|
||||
|
@ -7,7 +7,7 @@
|
||||
# in the COPYING file in the root directory of this source tree).
|
||||
# ################################################################
|
||||
|
||||
project(libzstd C)
|
||||
project(libzstd C ASM)
|
||||
|
||||
set(CMAKE_INCLUDE_CURRENT_DIR TRUE)
|
||||
option(ZSTD_BUILD_STATIC "BUILD STATIC LIBRARIES" ON)
|
||||
@ -22,7 +22,7 @@ include_directories(${LIBRARY_DIR} ${LIBRARY_DIR}/common)
|
||||
|
||||
file(GLOB CommonSources ${LIBRARY_DIR}/common/*.c)
|
||||
file(GLOB CompressSources ${LIBRARY_DIR}/compress/*.c)
|
||||
file(GLOB DecompressSources ${LIBRARY_DIR}/decompress/*.c)
|
||||
file(GLOB DecompressSources ${LIBRARY_DIR}/decompress/*.c ${LIBRARY_DIR}/decompress/*.S)
|
||||
file(GLOB DictBuilderSources ${LIBRARY_DIR}/dictBuilder/*.c)
|
||||
|
||||
set(Sources
|
||||
@ -106,7 +106,7 @@ if (MSVC)
|
||||
endif ()
|
||||
|
||||
# With MSVC static library needs to be renamed to avoid conflict with import library
|
||||
if (MSVC)
|
||||
if (MSVC OR (WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang"))
|
||||
set(STATIC_LIBRARY_BASE_NAME zstd_static)
|
||||
else ()
|
||||
set(STATIC_LIBRARY_BASE_NAME zstd)
|
||||
@ -168,6 +168,7 @@ install(TARGETS ${library_targets}
|
||||
ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
|
||||
LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
|
||||
RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}"
|
||||
BUNDLE DESTINATION "${CMAKE_INSTALL_BINDIR}"
|
||||
)
|
||||
|
||||
# uninstall target
|
||||
|
@ -37,7 +37,9 @@ target_link_libraries(zstd ${PROGRAMS_ZSTD_LINK_TARGET})
|
||||
if (CMAKE_SYSTEM_NAME MATCHES "(Solaris|SunOS)")
|
||||
target_link_libraries(zstd rt)
|
||||
endif ()
|
||||
install(TARGETS zstd RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}")
|
||||
install(TARGETS zstd
|
||||
RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}"
|
||||
BUNDLE DESTINATION "${CMAKE_INSTALL_BINDIR}")
|
||||
|
||||
if (UNIX)
|
||||
add_custom_target(zstdcat ALL ${CMAKE_COMMAND} -E create_symlink zstd zstdcat DEPENDS zstd COMMENT "Creating zstdcat symlink")
|
||||
|
@ -18,7 +18,7 @@ pzstd_sources = [join_paths(zstd_rootdir, 'programs/util.c'),
|
||||
join_paths(zstd_rootdir, 'contrib/pzstd/SkippableFrame.cpp')]
|
||||
pzstd = executable('pzstd',
|
||||
pzstd_sources,
|
||||
cpp_args: [ '-DNDEBUG', '-Wno-shadow', '-pedantic', '-Wno-deprecated-declarations' ],
|
||||
cpp_args: [ '-DNDEBUG', '-Wno-shadow', '-Wno-deprecated-declarations' ],
|
||||
include_directories: pzstd_includes,
|
||||
dependencies: [ libzstd_dep, thread_dep ],
|
||||
install: true)
|
||||
|
@ -37,6 +37,7 @@ libzstd_sources = [join_paths(zstd_rootdir, 'lib/common/entropy_common.c'),
|
||||
join_paths(zstd_rootdir, 'lib/compress/zstd_opt.c'),
|
||||
join_paths(zstd_rootdir, 'lib/compress/zstd_ldm.c'),
|
||||
join_paths(zstd_rootdir, 'lib/decompress/huf_decompress.c'),
|
||||
join_paths(zstd_rootdir, 'lib/decompress/huf_decompress_amd64.S'),
|
||||
join_paths(zstd_rootdir, 'lib/decompress/zstd_decompress.c'),
|
||||
join_paths(zstd_rootdir, 'lib/decompress/zstd_decompress_block.c'),
|
||||
join_paths(zstd_rootdir, 'lib/decompress/zstd_ddict.c'),
|
||||
@ -108,6 +109,7 @@ libzstd = library('zstd',
|
||||
libzstd_sources,
|
||||
include_directories: libzstd_includes,
|
||||
c_args: libzstd_c_args,
|
||||
gnu_symbol_visibility: 'hidden',
|
||||
dependencies: libzstd_deps,
|
||||
install: true,
|
||||
version: zstd_libversion)
|
||||
|
@ -14,10 +14,14 @@ project('zstd',
|
||||
default_options : [
|
||||
'c_std=gnu99',
|
||||
'cpp_std=c++11',
|
||||
'buildtype=release'
|
||||
'buildtype=release',
|
||||
'warning_level=3',
|
||||
# -Wdocumentation does not actually pass, nor do the test binaries,
|
||||
# so this isn't safe
|
||||
#'werror=true'
|
||||
],
|
||||
version: 'DUMMY',
|
||||
meson_version: '>=0.47.0')
|
||||
meson_version: '>=0.48.0')
|
||||
|
||||
cc = meson.get_compiler('c')
|
||||
cxx = meson.get_compiler('cpp')
|
||||
@ -106,10 +110,8 @@ use_lz4 = lz4_dep.found()
|
||||
add_project_arguments('-DXXH_NAMESPACE=ZSTD_', language: ['c'])
|
||||
|
||||
if [compiler_gcc, compiler_clang].contains(cc_id)
|
||||
common_warning_flags = [ '-Wextra', '-Wundef', '-Wshadow', '-Wcast-align', '-Wcast-qual' ]
|
||||
common_warning_flags = [ '-Wundef', '-Wshadow', '-Wcast-align', '-Wcast-qual' ]
|
||||
if cc_id == compiler_clang
|
||||
# Should use Meson's own --werror build option
|
||||
#common_warning_flags += '-Werror'
|
||||
common_warning_flags += ['-Wconversion', '-Wno-sign-conversion', '-Wdocumentation']
|
||||
endif
|
||||
cc_compile_flags = cc.get_supported_arguments(common_warning_flags + ['-Wstrict-prototypes'])
|
||||
|
@ -10,7 +10,7 @@
|
||||
|
||||
# Read guidelines from https://wiki.gnome.org/Initiatives/GnomeGoals/MesonPorting
|
||||
|
||||
option('legacy_level', type: 'integer', min: 0, max: 7, value: '5',
|
||||
option('legacy_level', type: 'integer', min: 0, max: 7, value: 5,
|
||||
description: 'Support any legacy format: 7 to 1 for v0.7+ to v0.1+')
|
||||
option('debug_level', type: 'integer', min: 0, max: 9, value: 1,
|
||||
description: 'Enable run-time debug. See lib/common/debug.h')
|
||||
|
@ -18,7 +18,9 @@ zstd_programs_sources = [join_paths(zstd_rootdir, 'programs/zstdcli.c'),
|
||||
join_paths(zstd_rootdir, 'programs/benchzstd.c'),
|
||||
join_paths(zstd_rootdir, 'programs/datagen.c'),
|
||||
join_paths(zstd_rootdir, 'programs/dibio.c'),
|
||||
join_paths(zstd_rootdir, 'programs/zstdcli_trace.c')]
|
||||
join_paths(zstd_rootdir, 'programs/zstdcli_trace.c'),
|
||||
# needed due to use of private symbol + -fvisibility=hidden
|
||||
join_paths(zstd_rootdir, 'lib/common/xxhash.c')]
|
||||
|
||||
zstd_c_args = libzstd_debug_cflags
|
||||
if use_multi_thread
|
||||
|
@ -29,64 +29,62 @@ ZSTDRTTEST = ['--test-large-data']
|
||||
|
||||
test_includes = [ include_directories(join_paths(zstd_rootdir, 'programs')) ]
|
||||
|
||||
datagen_sources = [join_paths(zstd_rootdir, 'programs/datagen.c'),
|
||||
join_paths(zstd_rootdir, 'tests/datagencli.c')]
|
||||
testcommon_sources = [join_paths(zstd_rootdir, 'programs/datagen.c'),
|
||||
join_paths(zstd_rootdir, 'programs/util.c'),
|
||||
join_paths(zstd_rootdir, 'programs/timefn.c'),
|
||||
join_paths(zstd_rootdir, 'programs/benchfn.c'),
|
||||
join_paths(zstd_rootdir, 'programs/benchzstd.c')]
|
||||
|
||||
testcommon = static_library('testcommon',
|
||||
testcommon_sources,
|
||||
# needed due to use of private symbol + -fvisibility=hidden
|
||||
objects: libzstd.extract_all_objects(recursive: false))
|
||||
|
||||
testcommon_dep = declare_dependency(link_with: testcommon,
|
||||
dependencies: libzstd_deps,
|
||||
include_directories: libzstd_includes)
|
||||
|
||||
datagen_sources = [join_paths(zstd_rootdir, 'tests/datagencli.c')]
|
||||
datagen = executable('datagen',
|
||||
datagen_sources,
|
||||
c_args: [ '-DNDEBUG' ],
|
||||
include_directories: test_includes,
|
||||
dependencies: libzstd_dep,
|
||||
dependencies: testcommon_dep,
|
||||
install: false)
|
||||
|
||||
fullbench_sources = [join_paths(zstd_rootdir, 'programs/datagen.c'),
|
||||
join_paths(zstd_rootdir, 'programs/util.c'),
|
||||
join_paths(zstd_rootdir, 'programs/timefn.c'),
|
||||
join_paths(zstd_rootdir, 'programs/benchfn.c'),
|
||||
join_paths(zstd_rootdir, 'programs/benchzstd.c'),
|
||||
join_paths(zstd_rootdir, 'tests/fullbench.c')]
|
||||
fullbench_sources = [join_paths(zstd_rootdir, 'tests/fullbench.c')]
|
||||
fullbench = executable('fullbench',
|
||||
fullbench_sources,
|
||||
include_directories: test_includes,
|
||||
dependencies: libzstd_dep,
|
||||
dependencies: testcommon_dep,
|
||||
install: false)
|
||||
|
||||
fuzzer_sources = [join_paths(zstd_rootdir, 'programs/datagen.c'),
|
||||
join_paths(zstd_rootdir, 'programs/util.c'),
|
||||
join_paths(zstd_rootdir, 'programs/timefn.c'),
|
||||
join_paths(zstd_rootdir, 'tests/fuzzer.c')]
|
||||
fuzzer_sources = [join_paths(zstd_rootdir, 'tests/fuzzer.c')]
|
||||
fuzzer = executable('fuzzer',
|
||||
fuzzer_sources,
|
||||
include_directories: test_includes,
|
||||
dependencies: [ libzstd_dep, thread_dep ],
|
||||
dependencies: [ testcommon_dep, thread_dep ],
|
||||
install: false)
|
||||
|
||||
zstreamtest_sources = [join_paths(zstd_rootdir, 'programs/datagen.c'),
|
||||
join_paths(zstd_rootdir, 'programs/util.c'),
|
||||
join_paths(zstd_rootdir, 'programs/timefn.c'),
|
||||
join_paths(zstd_rootdir, 'tests/seqgen.c'),
|
||||
zstreamtest_sources = [join_paths(zstd_rootdir, 'tests/seqgen.c'),
|
||||
join_paths(zstd_rootdir, 'tests/zstreamtest.c')]
|
||||
zstreamtest = executable('zstreamtest',
|
||||
zstreamtest_sources,
|
||||
include_directories: test_includes,
|
||||
dependencies: libzstd_dep,
|
||||
dependencies: testcommon_dep,
|
||||
install: false)
|
||||
|
||||
paramgrill_sources = [join_paths(zstd_rootdir, 'programs/benchfn.c'),
|
||||
join_paths(zstd_rootdir, 'programs/timefn.c'),
|
||||
join_paths(zstd_rootdir, 'programs/benchzstd.c'),
|
||||
join_paths(zstd_rootdir, 'programs/datagen.c'),
|
||||
join_paths(zstd_rootdir, 'programs/util.c'),
|
||||
join_paths(zstd_rootdir, 'tests/paramgrill.c')]
|
||||
paramgrill_sources = [join_paths(zstd_rootdir, 'tests/paramgrill.c')]
|
||||
paramgrill = executable('paramgrill',
|
||||
paramgrill_sources,
|
||||
include_directories: test_includes,
|
||||
dependencies: [ libzstd_dep, libm_dep ],
|
||||
dependencies: [ testcommon_dep, libm_dep ],
|
||||
install: false)
|
||||
|
||||
roundTripCrash_sources = [join_paths(zstd_rootdir, 'tests/roundTripCrash.c')]
|
||||
roundTripCrash = executable('roundTripCrash',
|
||||
roundTripCrash_sources,
|
||||
dependencies: [ libzstd_dep ],
|
||||
dependencies: [ testcommon_dep ],
|
||||
install: false)
|
||||
|
||||
longmatch_sources = [join_paths(zstd_rootdir, 'tests/longmatch.c')]
|
||||
@ -111,18 +109,14 @@ if 0 < legacy_level and legacy_level <= 4
|
||||
install: false)
|
||||
endif
|
||||
|
||||
decodecorpus_sources = [join_paths(zstd_rootdir, 'programs/util.c'),
|
||||
join_paths(zstd_rootdir, 'programs/timefn.c'),
|
||||
join_paths(zstd_rootdir, 'tests/decodecorpus.c')]
|
||||
decodecorpus_sources = [join_paths(zstd_rootdir, 'tests/decodecorpus.c')]
|
||||
decodecorpus = executable('decodecorpus',
|
||||
decodecorpus_sources,
|
||||
include_directories: test_includes,
|
||||
dependencies: [ libzstd_dep, libm_dep ],
|
||||
dependencies: [ testcommon_dep, libm_dep ],
|
||||
install: false)
|
||||
|
||||
poolTests_sources = [join_paths(zstd_rootdir, 'programs/util.c'),
|
||||
join_paths(zstd_rootdir, 'programs/timefn.c'),
|
||||
join_paths(zstd_rootdir, 'tests/poolTests.c'),
|
||||
poolTests_sources = [join_paths(zstd_rootdir, 'tests/poolTests.c'),
|
||||
join_paths(zstd_rootdir, 'lib/common/pool.c'),
|
||||
join_paths(zstd_rootdir, 'lib/common/threading.c'),
|
||||
join_paths(zstd_rootdir, 'lib/common/zstd_common.c'),
|
||||
@ -130,7 +124,7 @@ poolTests_sources = [join_paths(zstd_rootdir, 'programs/util.c'),
|
||||
poolTests = executable('poolTests',
|
||||
poolTests_sources,
|
||||
include_directories: test_includes,
|
||||
dependencies: [ libzstd_dep, thread_dep ],
|
||||
dependencies: [ testcommon_dep, thread_dep ],
|
||||
install: false)
|
||||
|
||||
checkTag_sources = [join_paths(zstd_rootdir, 'tests/checkTag.c')]
|
||||
@ -186,10 +180,6 @@ test('test-zstream-1',
|
||||
zstreamtest,
|
||||
args: ['-v', ZSTREAM_TESTTIME] + FUZZER_FLAGS,
|
||||
timeout: 240)
|
||||
test('test-zstream-2',
|
||||
zstreamtest,
|
||||
args: ['-mt', '-t1', ZSTREAM_TESTTIME] + FUZZER_FLAGS,
|
||||
timeout: 120)
|
||||
test('test-zstream-3',
|
||||
zstreamtest,
|
||||
args: ['--newapi', '-t1', ZSTREAM_TESTTIME] + FUZZER_FLAGS,
|
||||
|
@ -25,7 +25,7 @@
|
||||
* Note: MEM_MODULE stops xxhash redefining BYTE, U16, etc., which are also
|
||||
* defined in mem.h (breaking C99 compatibility).
|
||||
*
|
||||
* Note: the undefs for xxHash allow Zstd's implementation to coinside with with
|
||||
* Note: the undefs for xxHash allow Zstd's implementation to coincide with with
|
||||
* standalone xxHash usage (with global defines).
|
||||
*
|
||||
* Note: multithreading is enabled for all platforms apart from Emscripten.
|
||||
@ -43,6 +43,8 @@
|
||||
#define ZSTD_MULTITHREAD
|
||||
#endif
|
||||
#define ZSTD_TRACE 0
|
||||
/* TODO: Can't amalgamate ASM function */
|
||||
#define HUF_DISABLE_ASM 1
|
||||
|
||||
/* Include zstd_deps.h first with all the options we need enabled. */
|
||||
#define ZSTD_DEPS_NEED_MALLOC
|
||||
|
@ -25,7 +25,7 @@
|
||||
* Note: MEM_MODULE stops xxhash redefining BYTE, U16, etc., which are also
|
||||
* defined in mem.h (breaking C99 compatibility).
|
||||
*
|
||||
* Note: the undefs for xxHash allow Zstd's implementation to coinside with with
|
||||
* Note: the undefs for xxHash allow Zstd's implementation to coincide with with
|
||||
* standalone xxHash usage (with global defines).
|
||||
*/
|
||||
#define DEBUGLEVEL 0
|
||||
@ -39,6 +39,8 @@
|
||||
#define ZSTD_LEGACY_SUPPORT 0
|
||||
#define ZSTD_STRIP_ERROR_STRINGS
|
||||
#define ZSTD_TRACE 0
|
||||
/* TODO: Can't amalgamate ASM function */
|
||||
#define HUF_DISABLE_ASM 1
|
||||
|
||||
/* Include zstd_deps.h first with all the options we need enabled. */
|
||||
#define ZSTD_DEPS_NEED_MALLOC
|
||||
|
@ -27,7 +27,6 @@ SKIPPED_FILES = [
|
||||
"common/pool.h",
|
||||
"common/threading.c",
|
||||
"common/threading.h",
|
||||
"common/zstd_trace.c",
|
||||
"common/zstd_trace.h",
|
||||
"compress/zstdmt_compress.h",
|
||||
"compress/zstdmt_compress.c",
|
||||
@ -461,7 +460,8 @@ class Freestanding(object):
|
||||
print(*args, **kwargs)
|
||||
|
||||
def _copy_file(self, lib_path):
|
||||
if not (lib_path.endswith(".c") or lib_path.endswith(".h")):
|
||||
suffixes = [".c", ".h", ".S"]
|
||||
if not any((lib_path.endswith(suffix) for suffix in suffixes)):
|
||||
return
|
||||
if lib_path in SKIPPED_FILES:
|
||||
self._log(f"\tSkipping file: {lib_path}")
|
||||
|
@ -35,11 +35,12 @@ libzstd:
|
||||
-DXXH_STATIC_LINKING_ONLY \
|
||||
-DMEM_FORCE_MEMORY_ACCESS=0 \
|
||||
-D__GNUC__ \
|
||||
-D__linux__=1 \
|
||||
-DSTATIC_BMI2=0 \
|
||||
-DZSTD_ADDRESS_SANITIZER=0 \
|
||||
-DZSTD_MEMORY_SANITIZER=0 \
|
||||
-DZSTD_DATAFLOW_SANITIZER=0 \
|
||||
-DZSTD_COMPRESS_HEAPMODE=1 \
|
||||
-UZSTD_NO_INLINE \
|
||||
-UNO_PREFETCH \
|
||||
-U__cplusplus \
|
||||
-UZSTD_DLL_EXPORT \
|
||||
@ -50,9 +51,11 @@ libzstd:
|
||||
-U_WIN32 \
|
||||
-RZSTDLIB_VISIBILITY= \
|
||||
-RZSTDERRORLIB_VISIBILITY= \
|
||||
-RZSTD_FALLTHROUGH=fallthrough \
|
||||
-DZSTD_HAVE_WEAK_SYMBOLS=0 \
|
||||
-DZSTD_TRACE=0 \
|
||||
-DZSTD_NO_TRACE
|
||||
-DZSTD_NO_TRACE \
|
||||
-DZSTD_LINUX_KERNEL
|
||||
mv linux/lib/zstd/zstd.h linux/include/linux/zstd_lib.h
|
||||
mv linux/lib/zstd/zstd_errors.h linux/include/linux/
|
||||
cp linux_zstd.h linux/include/linux/zstd.h
|
||||
@ -86,10 +89,17 @@ import-upstream:
|
||||
rm $(LINUX)/lib/zstd/common/xxhash.*
|
||||
rm $(LINUX)/lib/zstd/compress/zstdmt_*
|
||||
|
||||
DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
|
||||
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
|
||||
-Wstrict-prototypes -Wundef -Wpointer-arith \
|
||||
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
|
||||
-Wredundant-decls -Wmissing-prototypes -Wc++-compat \
|
||||
-Wimplicit-fallthrough
|
||||
|
||||
.PHONY: test
|
||||
test: libzstd
|
||||
$(MAKE) -C test run-test CFLAGS="-O3 $(CFLAGS)" -j
|
||||
$(MAKE) -C test run-test CFLAGS="-O3 $(CFLAGS) $(DEBUGFLAGS) -Werror" -j
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
$(RM) -rf linux
|
||||
$(RM) -rf linux test/test test/static_test
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
|
||||
/*
|
||||
* Copyright (c) Facebook, Inc.
|
||||
* All rights reserved.
|
||||
@ -21,6 +21,11 @@
|
||||
#include "common/error_private.c"
|
||||
#include "common/fse_decompress.c"
|
||||
#include "common/zstd_common.c"
|
||||
/*
|
||||
* Disable the ASM Huffman implementation because we need to
|
||||
* include all the sources.
|
||||
*/
|
||||
#define HUF_DISABLE_ASM 1
|
||||
#include "decompress/huf_decompress.c"
|
||||
#include "decompress/zstd_ddict.c"
|
||||
#include "decompress/zstd_decompress.c"
|
||||
|
@ -1,4 +1,4 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
# SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
|
||||
# ################################################################
|
||||
# Copyright (c) Facebook, Inc.
|
||||
# All rights reserved.
|
||||
@ -11,7 +11,7 @@
|
||||
obj-$(CONFIG_ZSTD_COMPRESS) += zstd_compress.o
|
||||
obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd_decompress.o
|
||||
|
||||
ccflags-y += -O3
|
||||
ccflags-y += -Wno-error=deprecated-declarations
|
||||
|
||||
zstd_compress-y := \
|
||||
zstd_compress_module.o \
|
||||
@ -41,6 +41,7 @@ zstd_decompress-y := \
|
||||
common/fse_decompress.o \
|
||||
common/zstd_common.o \
|
||||
decompress/huf_decompress.o \
|
||||
decompress/huf_decompress_amd64.o \
|
||||
decompress/zstd_ddict.o \
|
||||
decompress/zstd_decompress.o \
|
||||
decompress/zstd_decompress_block.o \
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
|
@ -18,9 +18,13 @@ CPPFLAGS += -DZSTD_ASAN_DONT_POISON_WORKSPACE
|
||||
LINUX_ZSTD_MODULE := $(wildcard $(LINUX_ZSTDLIB)/*.c)
|
||||
LINUX_ZSTD_COMMON := $(wildcard $(LINUX_ZSTDLIB)/common/*.c)
|
||||
LINUX_ZSTD_COMPRESS := $(wildcard $(LINUX_ZSTDLIB)/compress/*.c)
|
||||
LINUX_ZSTD_DECOMPRESS := $(wildcard $(LINUX_ZSTDLIB)/decompress/*.c)
|
||||
LINUX_ZSTD_DECOMPRESS := $(wildcard $(LINUX_ZSTDLIB)/decompress/*.c $(LINUX_ZSTDLIB)/decompress/*.S)
|
||||
LINUX_ZSTD_FILES := $(LINUX_ZSTD_MODULE) $(LINUX_ZSTD_COMMON) $(LINUX_ZSTD_COMPRESS) $(LINUX_ZSTD_DECOMPRESS)
|
||||
LINUX_ZSTD_OBJECTS := $(LINUX_ZSTD_FILES:.c=.o)
|
||||
LINUX_ZSTD_OBJECTS0 := $(LINUX_ZSTD_FILES:.c=.o)
|
||||
LINUX_ZSTD_OBJECTS := $(LINUX_ZSTD_OBJECTS0:.S=.o)
|
||||
|
||||
%.o: %.S
|
||||
$(CC) -c $(CPPFLAGS) $(CFLAGS) $^ -o $@
|
||||
|
||||
liblinuxzstd.a: $(LINUX_ZSTD_OBJECTS)
|
||||
$(AR) $(ARFLAGS) $@ $^
|
||||
|
@ -20,6 +20,7 @@ static unsigned _isLittleEndian(void)
|
||||
{
|
||||
const union { uint32_t u; uint8_t c[4]; } one = { 1 };
|
||||
assert(_IS_LITTLE_ENDIAN == one.c[0]);
|
||||
(void)one;
|
||||
return _IS_LITTLE_ENDIAN;
|
||||
}
|
||||
|
||||
|
@ -18,4 +18,6 @@
|
||||
#define noinline __attribute__((noinline))
|
||||
#endif
|
||||
|
||||
#define fallthrough __attribute__((__fallthrough__))
|
||||
|
||||
#endif
|
||||
|
@ -124,11 +124,10 @@ XXH_API uint64_t xxh64(const void *input, size_t length, uint64_t seed);
|
||||
static inline unsigned long xxhash(const void *input, size_t length,
|
||||
uint64_t seed)
|
||||
{
|
||||
#if BITS_PER_LONG == 64
|
||||
return xxh64(input, length, seed);
|
||||
#else
|
||||
return xxh32(input, length, seed);
|
||||
#endif
|
||||
if (sizeof(size_t) == 8)
|
||||
return xxh64(input, length, seed);
|
||||
else
|
||||
return xxh32(input, length, seed);
|
||||
}
|
||||
|
||||
/*-****************************
|
||||
|
@ -36,9 +36,9 @@ test_not_present "ZSTD_NO_INTRINSICS"
|
||||
test_not_present "ZSTD_NO_UNUSED_FUNCTIONS"
|
||||
test_not_present "ZSTD_LEGACY_SUPPORT"
|
||||
test_not_present "STATIC_BMI2"
|
||||
test_not_present "ZSTD_NO_INLINE"
|
||||
test_not_present "ZSTD_DLL_EXPORT"
|
||||
test_not_present "ZSTD_DLL_IMPORT"
|
||||
test_not_present "__ICCARM__"
|
||||
test_not_present "_MSC_VER"
|
||||
test_not_present "_WIN32"
|
||||
test_not_present "__linux__"
|
||||
|
@ -28,17 +28,19 @@ static const char kEmptyZstdFrame[] = {
|
||||
0x28, 0xb5, 0x2f, 0xfd, 0x24, 0x00, 0x01, 0x00, 0x00, 0x99, 0xe9, 0xd8, 0x51
|
||||
};
|
||||
|
||||
static void test_decompress_unzstd() {
|
||||
static void test_decompress_unzstd(void) {
|
||||
fprintf(stderr, "Testing decompress unzstd... ");
|
||||
{
|
||||
size_t const wkspSize = zstd_dctx_workspace_bound();
|
||||
void* wksp = malloc(wkspSize);
|
||||
CONTROL(wksp != NULL);
|
||||
ZSTD_DCtx* dctx = zstd_init_dctx(wksp, wkspSize);
|
||||
CONTROL(wksp != NULL);
|
||||
CONTROL(dctx != NULL);
|
||||
size_t const dSize = zstd_decompress_dctx(dctx, NULL, 0, kEmptyZstdFrame, sizeof(kEmptyZstdFrame));
|
||||
CONTROL(!zstd_is_error(dSize));
|
||||
CONTROL(dSize == 0);
|
||||
{
|
||||
size_t const dSize = zstd_decompress_dctx(dctx, NULL, 0, kEmptyZstdFrame, sizeof(kEmptyZstdFrame));
|
||||
CONTROL(!zstd_is_error(dSize));
|
||||
CONTROL(dSize == 0);
|
||||
}
|
||||
free(wksp);
|
||||
}
|
||||
fprintf(stderr, "Ok\n");
|
||||
|
@ -30,15 +30,15 @@ typedef struct {
|
||||
size_t compSize;
|
||||
} test_data_t;
|
||||
|
||||
test_data_t create_test_data(void) {
|
||||
static test_data_t create_test_data(void) {
|
||||
test_data_t data;
|
||||
data.dataSize = 128 * 1024;
|
||||
data.data = malloc(data.dataSize);
|
||||
data.data = (char*)malloc(data.dataSize);
|
||||
CONTROL(data.data != NULL);
|
||||
data.data2 = malloc(data.dataSize);
|
||||
data.data2 = (char*)malloc(data.dataSize);
|
||||
CONTROL(data.data2 != NULL);
|
||||
data.compSize = zstd_compress_bound(data.dataSize);
|
||||
data.comp = malloc(data.compSize);
|
||||
data.comp = (char*)malloc(data.compSize);
|
||||
CONTROL(data.comp != NULL);
|
||||
memset(data.data, 0, data.dataSize);
|
||||
return data;
|
||||
@ -54,26 +54,27 @@ static void free_test_data(test_data_t const *data) {
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
static void test_btrfs(test_data_t const *data) {
|
||||
fprintf(stderr, "testing btrfs use cases... ");
|
||||
size_t const size = MIN(data->dataSize, 128 * 1024);
|
||||
fprintf(stderr, "testing btrfs use cases... ");
|
||||
for (int level = -1; level < 16; ++level) {
|
||||
zstd_parameters params = zstd_get_params(level, size);
|
||||
CONTROL(params.cParams.windowLog <= 17);
|
||||
size_t const workspaceSize =
|
||||
MAX(zstd_cstream_workspace_bound(¶ms.cParams),
|
||||
zstd_dstream_workspace_bound(size));
|
||||
void *workspace = malloc(workspaceSize);
|
||||
CONTROL(workspace != NULL);
|
||||
|
||||
char const *ip = data->data;
|
||||
char const *iend = ip + size;
|
||||
char *op = data->comp;
|
||||
char *oend = op + data->compSize;
|
||||
|
||||
CONTROL(params.cParams.windowLog <= 17);
|
||||
CONTROL(workspace != NULL);
|
||||
{
|
||||
zstd_cstream *cctx = zstd_init_cstream(¶ms, size, workspace, workspaceSize);
|
||||
CONTROL(cctx != NULL);
|
||||
zstd_out_buffer out = {NULL, 0, 0};
|
||||
zstd_in_buffer in = {NULL, 0, 0};
|
||||
CONTROL(cctx != NULL);
|
||||
for (;;) {
|
||||
if (in.pos == in.size) {
|
||||
in.src = ip;
|
||||
@ -108,9 +109,9 @@ static void test_btrfs(test_data_t const *data) {
|
||||
oend = op + size;
|
||||
{
|
||||
zstd_dstream *dctx = zstd_init_dstream(1ULL << params.cParams.windowLog, workspace, workspaceSize);
|
||||
CONTROL(dctx != NULL);
|
||||
zstd_out_buffer out = {NULL, 0, 0};
|
||||
zstd_in_buffer in = {NULL, 0, 0};
|
||||
CONTROL(dctx != NULL);
|
||||
for (;;) {
|
||||
if (in.pos == in.size) {
|
||||
in.src = ip;
|
||||
@ -125,15 +126,16 @@ static void test_btrfs(test_data_t const *data) {
|
||||
out.pos = 0;
|
||||
op += out.size;
|
||||
}
|
||||
|
||||
size_t const ret = zstd_decompress_stream(dctx, &out, &in);
|
||||
CONTROL(!zstd_is_error(ret));
|
||||
if (ret == 0) {
|
||||
break;
|
||||
{
|
||||
size_t const ret = zstd_decompress_stream(dctx, &out, &in);
|
||||
CONTROL(!zstd_is_error(ret));
|
||||
if (ret == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
CONTROL(op - data->data2 == data->dataSize);
|
||||
CONTROL((size_t)(op - data->data2) == data->dataSize);
|
||||
CONTROL(!memcmp(data->data, data->data2, data->dataSize));
|
||||
free(workspace);
|
||||
}
|
||||
@ -141,14 +143,14 @@ static void test_btrfs(test_data_t const *data) {
|
||||
}
|
||||
|
||||
static void test_decompress_unzstd(test_data_t const *data) {
|
||||
fprintf(stderr, "Testing decompress unzstd... ");
|
||||
size_t cSize;
|
||||
fprintf(stderr, "Testing decompress unzstd... ");
|
||||
{
|
||||
zstd_parameters params = zstd_get_params(19, 0);
|
||||
size_t const wkspSize = zstd_cctx_workspace_bound(¶ms.cParams);
|
||||
void* wksp = malloc(wkspSize);
|
||||
CONTROL(wksp != NULL);
|
||||
zstd_cctx* cctx = zstd_init_cctx(wksp, wkspSize);
|
||||
CONTROL(wksp != NULL);
|
||||
CONTROL(cctx != NULL);
|
||||
cSize = zstd_compress_cctx(cctx, data->comp, data->compSize, data->data, data->dataSize, ¶ms);
|
||||
CONTROL(!zstd_is_error(cSize));
|
||||
@ -157,19 +159,21 @@ static void test_decompress_unzstd(test_data_t const *data) {
|
||||
{
|
||||
size_t const wkspSize = zstd_dctx_workspace_bound();
|
||||
void* wksp = malloc(wkspSize);
|
||||
CONTROL(wksp != NULL);
|
||||
zstd_dctx* dctx = zstd_init_dctx(wksp, wkspSize);
|
||||
CONTROL(wksp != NULL);
|
||||
CONTROL(dctx != NULL);
|
||||
size_t const dSize = zstd_decompress_dctx(dctx, data->data2, data->dataSize, data->comp, cSize);
|
||||
CONTROL(!zstd_is_error(dSize));
|
||||
CONTROL(dSize == data->dataSize);
|
||||
{
|
||||
size_t const dSize = zstd_decompress_dctx(dctx, data->data2, data->dataSize, data->comp, cSize);
|
||||
CONTROL(!zstd_is_error(dSize));
|
||||
CONTROL(dSize == data->dataSize);
|
||||
}
|
||||
CONTROL(!memcmp(data->data, data->data2, data->dataSize));
|
||||
free(wksp);
|
||||
}
|
||||
fprintf(stderr, "Ok\n");
|
||||
}
|
||||
|
||||
static void test_f2fs() {
|
||||
static void test_f2fs(void) {
|
||||
fprintf(stderr, "testing f2fs uses... ");
|
||||
CONTROL(zstd_min_clevel() < 0);
|
||||
CONTROL(zstd_max_clevel() == 22);
|
||||
@ -182,7 +186,7 @@ static void __attribute__((noinline)) use(void *x) {
|
||||
asm volatile("" : "+r"(x));
|
||||
}
|
||||
|
||||
static void __attribute__((noinline)) set_stack() {
|
||||
static void __attribute__((noinline)) set_stack(void) {
|
||||
|
||||
char stack[8192];
|
||||
g_stack = stack;
|
||||
@ -190,14 +194,16 @@ static void __attribute__((noinline)) set_stack() {
|
||||
use(g_stack);
|
||||
}
|
||||
|
||||
static void __attribute__((noinline)) check_stack() {
|
||||
static void __attribute__((noinline)) check_stack(void) {
|
||||
size_t cleanStack = 0;
|
||||
while (cleanStack < 8192 && g_stack[cleanStack] == 0x33) {
|
||||
++cleanStack;
|
||||
}
|
||||
size_t const stackSize = 8192 - cleanStack;
|
||||
fprintf(stderr, "Maximum stack size: %zu\n", stackSize);
|
||||
CONTROL(stackSize <= 2048 + 512);
|
||||
{
|
||||
size_t const stackSize = 8192 - cleanStack;
|
||||
fprintf(stderr, "Maximum stack size: %zu\n", stackSize);
|
||||
CONTROL(stackSize <= 2048 + 512);
|
||||
}
|
||||
}
|
||||
|
||||
static void test_stack_usage(test_data_t const *data) {
|
||||
|
@ -1,4 +1,4 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
|
||||
/*
|
||||
* Copyright (c) Facebook, Inc.
|
||||
* All rights reserved.
|
||||
@ -17,6 +17,43 @@
|
||||
#include "common/zstd_deps.h"
|
||||
#include "common/zstd_internal.h"
|
||||
|
||||
#define ZSTD_FORWARD_IF_ERR(ret) \
|
||||
do { \
|
||||
size_t const __ret = (ret); \
|
||||
if (ZSTD_isError(__ret)) \
|
||||
return __ret; \
|
||||
} while (0)
|
||||
|
||||
static size_t zstd_cctx_init(zstd_cctx *cctx, const zstd_parameters *parameters,
|
||||
unsigned long long pledged_src_size)
|
||||
{
|
||||
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_reset(
|
||||
cctx, ZSTD_reset_session_and_parameters));
|
||||
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setPledgedSrcSize(
|
||||
cctx, pledged_src_size));
|
||||
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
|
||||
cctx, ZSTD_c_windowLog, parameters->cParams.windowLog));
|
||||
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
|
||||
cctx, ZSTD_c_hashLog, parameters->cParams.hashLog));
|
||||
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
|
||||
cctx, ZSTD_c_chainLog, parameters->cParams.chainLog));
|
||||
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
|
||||
cctx, ZSTD_c_searchLog, parameters->cParams.searchLog));
|
||||
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
|
||||
cctx, ZSTD_c_minMatch, parameters->cParams.minMatch));
|
||||
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
|
||||
cctx, ZSTD_c_targetLength, parameters->cParams.targetLength));
|
||||
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
|
||||
cctx, ZSTD_c_strategy, parameters->cParams.strategy));
|
||||
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
|
||||
cctx, ZSTD_c_contentSizeFlag, parameters->fParams.contentSizeFlag));
|
||||
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
|
||||
cctx, ZSTD_c_checksumFlag, parameters->fParams.checksumFlag));
|
||||
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
|
||||
cctx, ZSTD_c_dictIDFlag, !parameters->fParams.noDictIDFlag));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int zstd_min_clevel(void)
|
||||
{
|
||||
return ZSTD_minCLevel();
|
||||
@ -59,7 +96,8 @@ EXPORT_SYMBOL(zstd_init_cctx);
|
||||
size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
|
||||
const void *src, size_t src_size, const zstd_parameters *parameters)
|
||||
{
|
||||
return ZSTD_compress_advanced(cctx, dst, dst_capacity, src, src_size, NULL, 0, *parameters);
|
||||
ZSTD_FORWARD_IF_ERR(zstd_cctx_init(cctx, parameters, src_size));
|
||||
return ZSTD_compress2(cctx, dst, dst_capacity, src, src_size);
|
||||
}
|
||||
EXPORT_SYMBOL(zstd_compress_cctx);
|
||||
|
||||
@ -73,7 +111,6 @@ zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
|
||||
unsigned long long pledged_src_size, void *workspace, size_t workspace_size)
|
||||
{
|
||||
zstd_cstream *cstream;
|
||||
size_t ret;
|
||||
|
||||
if (workspace == NULL)
|
||||
return NULL;
|
||||
@ -86,8 +123,7 @@ zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
|
||||
if (pledged_src_size == 0)
|
||||
pledged_src_size = ZSTD_CONTENTSIZE_UNKNOWN;
|
||||
|
||||
ret = ZSTD_initCStream_advanced(cstream, NULL, 0, *parameters, pledged_src_size);
|
||||
if (ZSTD_isError(ret))
|
||||
if (ZSTD_isError(zstd_cctx_init(cstream, parameters, pledged_src_size)))
|
||||
return NULL;
|
||||
|
||||
return cstream;
|
||||
|
@ -1,4 +1,4 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
|
||||
/*
|
||||
* Copyright (c) Facebook, Inc.
|
||||
* All rights reserved.
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
|
||||
/*
|
||||
* Copyright (c) Facebook, Inc.
|
||||
* All rights reserved.
|
||||
|
@ -57,19 +57,6 @@ LD_COMMAND = $(CXX) $^ $(ALL_LDFLAGS) $(LIBS) -pthread -o $@
|
||||
CC_COMMAND = $(CC) $(DEPFLAGS) $(ALL_CFLAGS) -c $< -o $@
|
||||
CXX_COMMAND = $(CXX) $(DEPFLAGS) $(ALL_CXXFLAGS) -c $< -o $@
|
||||
|
||||
# Get a list of all zstd files so we rebuild the static library when we need to
|
||||
ZSTDCOMMON_FILES := $(wildcard $(ZSTDDIR)/common/*.c) \
|
||||
$(wildcard $(ZSTDDIR)/common/*.h)
|
||||
ZSTDCOMP_FILES := $(wildcard $(ZSTDDIR)/compress/*.c) \
|
||||
$(wildcard $(ZSTDDIR)/compress/*.h)
|
||||
ZSTDDECOMP_FILES := $(wildcard $(ZSTDDIR)/decompress/*.c) \
|
||||
$(wildcard $(ZSTDDIR)/decompress/*.h)
|
||||
ZSTDPROG_FILES := $(wildcard $(PROGDIR)/*.c) \
|
||||
$(wildcard $(PROGDIR)/*.h)
|
||||
ZSTD_FILES := $(wildcard $(ZSTDDIR)/*.h) \
|
||||
$(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) \
|
||||
$(ZSTDPROG_FILES)
|
||||
|
||||
# List all the pzstd source files so we can determine their dependencies
|
||||
PZSTD_SRCS := $(wildcard *.cpp)
|
||||
PZSTD_TESTS := $(wildcard test/*.cpp)
|
||||
@ -189,7 +176,8 @@ roundtrip: test/RoundTripTest$(EXT)
|
||||
|
||||
# Use the static library that zstd builds for simplicity and
|
||||
# so we get the compiler options correct
|
||||
$(ZSTDDIR)/libzstd.a: $(ZSTD_FILES)
|
||||
.PHONY: $(ZSTDDIR)/libzstd.a
|
||||
$(ZSTDDIR)/libzstd.a:
|
||||
CFLAGS="$(ALL_CFLAGS)" LDFLAGS="$(ALL_LDFLAGS)" $(MAKE) -C $(ZSTDDIR) libzstd.a
|
||||
|
||||
# Rules to build the tests
|
||||
|
@ -87,7 +87,7 @@ void usage() {
|
||||
std::fprintf(stderr, " -V, --version : display version number and exit\n");
|
||||
std::fprintf(stderr, " -v, --verbose : verbose mode; specify multiple times to increase log level (default:2)\n");
|
||||
std::fprintf(stderr, " -q, --quiet : suppress warnings; specify twice to suppress errors too\n");
|
||||
std::fprintf(stderr, " -c, --stdout : force write to standard output, even if it is the console\n");
|
||||
std::fprintf(stderr, " -c, --stdout : write to standard output (even if it is the console)\n");
|
||||
#ifdef UTIL_HAS_CREATEFILELIST
|
||||
std::fprintf(stderr, " -r : operate recursively on directories\n");
|
||||
#endif
|
||||
|
@ -45,7 +45,7 @@ class Buffer {
|
||||
: buffer_(buffer), range_(data) {}
|
||||
|
||||
Buffer(Buffer&&) = default;
|
||||
Buffer& operator=(Buffer&&) & = default;
|
||||
Buffer& operator=(Buffer&&) = default;
|
||||
|
||||
/**
|
||||
* Splits the data into two pieces: [begin, begin + n), [begin + n, end).
|
||||
|
@ -6,7 +6,7 @@
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* A subset of `folly/Range.h`.
|
||||
* All code copied verbatim modulo formatting
|
||||
@ -83,8 +83,8 @@ class Range {
|
||||
Range(const Range&) = default;
|
||||
Range(Range&&) = default;
|
||||
|
||||
Range& operator=(const Range&) & = default;
|
||||
Range& operator=(Range&&) & = default;
|
||||
Range& operator=(const Range&) = default;
|
||||
Range& operator=(Range&&) = default;
|
||||
|
||||
constexpr size_type size() const {
|
||||
return e_ - b_;
|
||||
|
@ -2145,7 +2145,7 @@ static void FSE_init_dtable(FSE_dtable *const dtable,
|
||||
|
||||
// "All remaining symbols are sorted in their natural order. Starting from
|
||||
// symbol 0 and table position 0, each symbol gets attributed as many cells
|
||||
// as its probability. Cell allocation is spreaded, not linear."
|
||||
// as its probability. Cell allocation is spread, not linear."
|
||||
// Place the rest in the table
|
||||
const u16 step = (size >> 1) + (size >> 3) + 3;
|
||||
const u16 mask = size - 1;
|
||||
|
@ -1124,7 +1124,7 @@ These symbols define a full state reset, reading `Accuracy_Log` bits.
|
||||
Then, all remaining symbols, sorted in natural order, are allocated cells.
|
||||
Starting from symbol `0` (if it exists), and table position `0`,
|
||||
each symbol gets allocated as many cells as its probability.
|
||||
Cell allocation is spreaded, not linear :
|
||||
Cell allocation is spread, not linear :
|
||||
each successor position follows this rule :
|
||||
|
||||
```
|
||||
@ -1669,7 +1669,7 @@ or at least provide a meaningful error code explaining for which reason it canno
|
||||
|
||||
Version changes
|
||||
---------------
|
||||
- 0.3.7 : clarifications for Repeat_Offsets
|
||||
- 0.3.7 : clarifications for Repeat_Offsets, matching RFC8878
|
||||
- 0.3.6 : clarifications for Dictionary_ID
|
||||
- 0.3.5 : clarifications for Block_Maximum_Size
|
||||
- 0.3.4 : clarifications for FSE decoding table
|
||||
|
@ -1,10 +1,10 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>zstd 1.5.0 Manual</title>
|
||||
<title>zstd 1.5.1 Manual</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>zstd 1.5.0 Manual</h1>
|
||||
<h1>zstd 1.5.1 Manual</h1>
|
||||
<hr>
|
||||
<a name="Contents"></a><h2>Contents</h2>
|
||||
<ol>
|
||||
@ -40,7 +40,7 @@
|
||||
functions.
|
||||
|
||||
The library supports regular compression levels from 1 up to ZSTD_maxCLevel(),
|
||||
which is currently 22. Levels >= 20, labeled `--ultra`, should be used with
|
||||
which is 22 in most cases. Levels >= 20, labeled `--ultra`, should be used with
|
||||
caution, as they require more memory. The library also offers negative
|
||||
compression levels, which extend the range of speed vs. ratio preferences.
|
||||
The lower the level, the faster the speed (at the cost of compression).
|
||||
@ -357,7 +357,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); </b>/* accept NULL pointer */<b>
|
||||
* ZSTD_c_stableOutBuffer
|
||||
* ZSTD_c_blockDelimiters
|
||||
* ZSTD_c_validateSequences
|
||||
* ZSTD_c_splitBlocks
|
||||
* ZSTD_c_useBlockSplitter
|
||||
* ZSTD_c_useRowMatchFinder
|
||||
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
|
||||
* note : never ever use experimentalParam? names directly;
|
||||
@ -803,7 +803,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds); </b>/* accept NULL pointer */<b>
|
||||
|
||||
<a name="Chapter13"></a><h2>Advanced dictionary and prefix API (Requires v1.4.0+)</h2><pre>
|
||||
This API allows dictionaries to be used with ZSTD_compress2(),
|
||||
ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and
|
||||
ZSTD_compressStream2(), and ZSTD_decompressDCtx(). Dictionaries are sticky, and
|
||||
only reset with the context is reset with ZSTD_reset_parameters or
|
||||
ZSTD_reset_session_and_parameters. Prefixes are single-use.
|
||||
<BR></pre>
|
||||
@ -1072,10 +1072,14 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
|
||||
} ZSTD_literalCompressionMode_e;
|
||||
</b></pre><BR>
|
||||
<pre><b>typedef enum {
|
||||
ZSTD_urm_auto = 0, </b>/* Automatically determine whether or not we use row matchfinder */<b>
|
||||
ZSTD_urm_disableRowMatchFinder = 1, </b>/* Never use row matchfinder */<b>
|
||||
ZSTD_urm_enableRowMatchFinder = 2 </b>/* Always use row matchfinder when applicable */<b>
|
||||
} ZSTD_useRowMatchFinderMode_e;
|
||||
</b>/* Note: This enum controls features which are conditionally beneficial. Zstd typically will make a final<b>
|
||||
* decision on whether or not to enable the feature (ZSTD_ps_auto), but setting the switch to ZSTD_ps_enable
|
||||
* or ZSTD_ps_disable allow for a force enable/disable the feature.
|
||||
*/
|
||||
ZSTD_ps_auto = 0, </b>/* Let the library automatically determine whether the feature shall be enabled */<b>
|
||||
ZSTD_ps_enable = 1, </b>/* Force-enable the feature */<b>
|
||||
ZSTD_ps_disable = 2 </b>/* Do not use the feature */<b>
|
||||
} ZSTD_paramSwitch_e;
|
||||
</b></pre><BR>
|
||||
<a name="Chapter15"></a><h2>Frame size functions</h2><pre></pre>
|
||||
|
||||
@ -1205,6 +1209,25 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
|
||||
|
||||
</p></pre><BR>
|
||||
|
||||
<pre><b>size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant,
|
||||
const void* src, size_t srcSize);
|
||||
</b><p> Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer.
|
||||
|
||||
The parameter magicVariant will receive the magicVariant that was supplied when the frame was written,
|
||||
i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested
|
||||
in the magicVariant.
|
||||
|
||||
Returns an error if destination buffer is not large enough, or if the frame is not skippable.
|
||||
|
||||
@return : number of bytes written or a ZSTD error.
|
||||
|
||||
</p></pre><BR>
|
||||
|
||||
<pre><b>unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size);
|
||||
</b><p> Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame.
|
||||
|
||||
</p></pre><BR>
|
||||
|
||||
<a name="Chapter16"></a><h2>Memory management</h2><pre></pre>
|
||||
|
||||
<pre><b>size_t ZSTD_estimateCCtxSize(int compressionLevel);
|
||||
@ -1303,6 +1326,21 @@ ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; </b>/**< this con
|
||||
|
||||
</p></pre><BR>
|
||||
|
||||
<pre><b>typedef struct POOL_ctx_s ZSTD_threadPool;
|
||||
ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads);
|
||||
void ZSTD_freeThreadPool (ZSTD_threadPool* pool); </b>/* accept NULL pointer */<b>
|
||||
size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool);
|
||||
</b><p> These prototypes make it possible to share a thread pool among multiple compression contexts.
|
||||
This can limit resources for applications with multiple threads where each one uses
|
||||
a threaded compression mode (via ZSTD_c_nbWorkers parameter).
|
||||
ZSTD_createThreadPool creates a new thread pool with a given number of threads.
|
||||
Note that the lifetime of such pool must exist while being used.
|
||||
ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value
|
||||
to use an internal thread pool).
|
||||
ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer.
|
||||
|
||||
</p></pre><BR>
|
||||
|
||||
<a name="Chapter17"></a><h2>Advanced compression functions</h2><pre></pre>
|
||||
|
||||
<pre><b>ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
|
||||
@ -1594,7 +1632,7 @@ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
|
||||
</b><p> This function is DEPRECATED, and equivalent to:
|
||||
ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
|
||||
ZSTD_CCtx_refCDict(zcs, cdict);
|
||||
|
||||
|
||||
note : cdict will just be referenced, and must outlive compression session
|
||||
This prototype will generate compilation warnings.
|
||||
|
||||
|
@ -15,9 +15,12 @@
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD()
|
||||
|
||||
|
||||
static void compressFile_orDie(const char* fname, const char* outName, int cLevel)
|
||||
static void compressFile_orDie(const char* fname, const char* outName, int cLevel,
|
||||
int nbThreads)
|
||||
{
|
||||
fprintf (stderr, "Starting compression of %s with level %d, using %d threads\n",
|
||||
fname, cLevel, nbThreads);
|
||||
|
||||
/* Open the input and output files. */
|
||||
FILE* const fin = fopen_orDie(fname, "rb");
|
||||
FILE* const fout = fopen_orDie(outName, "wb");
|
||||
@ -39,7 +42,7 @@ static void compressFile_orDie(const char* fname, const char* outName, int cLeve
|
||||
*/
|
||||
CHECK_ZSTD( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel) );
|
||||
CHECK_ZSTD( ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1) );
|
||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 4);
|
||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, nbThreads);
|
||||
|
||||
/* This loop read from the input file, compresses that entire chunk,
|
||||
* and writes all output produced to the output file.
|
||||
@ -106,17 +109,30 @@ int main(int argc, const char** argv)
|
||||
{
|
||||
const char* const exeName = argv[0];
|
||||
|
||||
if (argc!=2) {
|
||||
if (argc < 2) {
|
||||
printf("wrong arguments\n");
|
||||
printf("usage:\n");
|
||||
printf("%s FILE\n", exeName);
|
||||
printf("%s FILE [LEVEL] [THREADS]\n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int cLevel = 1;
|
||||
int nbThreads = 4;
|
||||
|
||||
if (argc >= 3) {
|
||||
cLevel = atoi (argv[2]);
|
||||
CHECK(cLevel != 0, "can't parse LEVEL!");
|
||||
}
|
||||
|
||||
if (argc >= 4) {
|
||||
nbThreads = atoi (argv[3]);
|
||||
CHECK(nbThreads != 0, "can't parse THREADS!");
|
||||
}
|
||||
|
||||
const char* const inFilename = argv[1];
|
||||
|
||||
char* const outFilename = createOutFilename_orDie(inFilename);
|
||||
compressFile_orDie(inFilename, outFilename, 1);
|
||||
compressFile_orDie(inFilename, outFilename, cLevel, nbThreads);
|
||||
|
||||
free(outFilename); /* not strictly required, since program execution stops there,
|
||||
* but some static analyzer main complain otherwise */
|
||||
|
@ -28,8 +28,10 @@ typedef struct compress_args
|
||||
|
||||
static void *compressFile_orDie(void *data)
|
||||
{
|
||||
const int nbThreads = 16;
|
||||
|
||||
compress_args_t *args = (compress_args_t *)data;
|
||||
fprintf (stderr, "Starting compression of %s with level %d\n", args->fname, args->cLevel);
|
||||
fprintf (stderr, "Starting compression of %s with level %d, using %d threads\n", args->fname, args->cLevel, nbThreads);
|
||||
/* Open the input and output files. */
|
||||
FILE* const fin = fopen_orDie(args->fname, "rb");
|
||||
FILE* const fout = fopen_orDie(args->outName, "wb");
|
||||
@ -56,7 +58,7 @@ static void *compressFile_orDie(void *data)
|
||||
*/
|
||||
CHECK_ZSTD( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, args->cLevel) );
|
||||
CHECK_ZSTD( ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1) );
|
||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 16);
|
||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, nbThreads);
|
||||
|
||||
/* This loop read from the input file, compresses that entire chunk,
|
||||
* and writes all output produced to the output file.
|
||||
|
129
lib/Makefile
129
lib/Makefile
@ -100,18 +100,7 @@ ZSTD_LIB_DECOMPRESSION ?= 1
|
||||
ZSTD_LIB_DICTBUILDER ?= 1
|
||||
ZSTD_LIB_DEPRECATED ?= 0
|
||||
|
||||
# Legacy support
|
||||
ZSTD_LEGACY_SUPPORT ?= 5
|
||||
ZSTD_LEGACY_MULTITHREADED_API ?= 0
|
||||
|
||||
# Build size optimizations
|
||||
HUF_FORCE_DECOMPRESS_X1 ?= 0
|
||||
HUF_FORCE_DECOMPRESS_X2 ?= 0
|
||||
ZSTD_FORCE_DECOMPRESS_SHORT ?= 0
|
||||
ZSTD_FORCE_DECOMPRESS_LONG ?= 0
|
||||
ZSTD_NO_INLINE ?= 0
|
||||
ZSTD_STRIP_ERROR_STRINGS ?= 0
|
||||
|
||||
# Input variables for libzstd.mk
|
||||
ifeq ($(ZSTD_LIB_COMPRESSION), 0)
|
||||
ZSTD_LIB_DICTBUILDER = 0
|
||||
ZSTD_LIB_DEPRECATED = 0
|
||||
@ -122,86 +111,46 @@ ifeq ($(ZSTD_LIB_DECOMPRESSION), 0)
|
||||
ZSTD_LIB_DEPRECATED = 0
|
||||
endif
|
||||
|
||||
include libzstd.mk
|
||||
|
||||
ZSTD_FILES := $(ZSTD_COMMON_FILES) $(ZSTD_LEGACY_FILES)
|
||||
|
||||
ifneq ($(ZSTD_LIB_COMPRESSION), 0)
|
||||
ZSTD_FILES += $(ZSTDCOMP_FILES)
|
||||
ZSTD_FILES += $(ZSTD_COMPRESS_FILES)
|
||||
endif
|
||||
|
||||
ifneq ($(ZSTD_LIB_DECOMPRESSION), 0)
|
||||
ZSTD_FILES += $(ZSTDDECOMP_FILES)
|
||||
ZSTD_FILES += $(ZSTD_DECOMPRESS_FILES)
|
||||
endif
|
||||
|
||||
ifneq ($(ZSTD_LIB_DEPRECATED), 0)
|
||||
ZSTD_FILES += $(ZDEPR_FILES)
|
||||
ZSTD_FILES += $(ZSTD_DEPRECATED_FILES)
|
||||
endif
|
||||
|
||||
ifneq ($(ZSTD_LIB_DICTBUILDER), 0)
|
||||
ZSTD_FILES += $(ZDICT_FILES)
|
||||
ZSTD_FILES += $(ZSTD_DICTBUILDER_FILES)
|
||||
endif
|
||||
|
||||
ifneq ($(HUF_FORCE_DECOMPRESS_X1), 0)
|
||||
CFLAGS += -DHUF_FORCE_DECOMPRESS_X1
|
||||
endif
|
||||
|
||||
ifneq ($(HUF_FORCE_DECOMPRESS_X2), 0)
|
||||
CFLAGS += -DHUF_FORCE_DECOMPRESS_X2
|
||||
endif
|
||||
|
||||
ifneq ($(ZSTD_FORCE_DECOMPRESS_SHORT), 0)
|
||||
CFLAGS += -DZSTD_FORCE_DECOMPRESS_SHORT
|
||||
endif
|
||||
|
||||
ifneq ($(ZSTD_FORCE_DECOMPRESS_LONG), 0)
|
||||
CFLAGS += -DZSTD_FORCE_DECOMPRESS_LONG
|
||||
endif
|
||||
|
||||
ifneq ($(ZSTD_NO_INLINE), 0)
|
||||
CFLAGS += -DZSTD_NO_INLINE
|
||||
endif
|
||||
|
||||
ifneq ($(ZSTD_STRIP_ERROR_STRINGS), 0)
|
||||
CFLAGS += -DZSTD_STRIP_ERROR_STRINGS
|
||||
endif
|
||||
|
||||
ifneq ($(ZSTD_LEGACY_MULTITHREADED_API), 0)
|
||||
CFLAGS += -DZSTD_LEGACY_MULTITHREADED_API
|
||||
endif
|
||||
|
||||
ifneq ($(ZSTD_LEGACY_SUPPORT), 0)
|
||||
ifeq ($(shell test $(ZSTD_LEGACY_SUPPORT) -lt 8; echo $$?), 0)
|
||||
ZSTD_FILES += $(shell ls legacy/*.c | $(GREP) 'v0[$(ZSTD_LEGACY_SUPPORT)-7]')
|
||||
endif
|
||||
endif
|
||||
CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
|
||||
|
||||
ZSTD_LOCAL_SRC := $(notdir $(ZSTD_FILES))
|
||||
ZSTD_LOCAL_OBJ := $(ZSTD_LOCAL_SRC:.c=.o)
|
||||
ZSTD_LOCAL_OBJ0 := $(ZSTD_LOCAL_SRC:.c=.o)
|
||||
ZSTD_LOCAL_OBJ := $(ZSTD_LOCAL_OBJ0:.S=.o)
|
||||
|
||||
ZSTD_SUBDIR := common compress decompress dictBuilder legacy deprecated
|
||||
vpath %.c $(ZSTD_SUBDIR)
|
||||
VERSION := $(ZSTD_VERSION)
|
||||
|
||||
UNAME := $(shell uname)
|
||||
# Note: by default, the static library is built single-threaded and dynamic library is built
|
||||
# multi-threaded. It is possible to force multi or single threaded builds by appending
|
||||
# -mt or -nomt to the build target (like lib-mt for multi-threaded, lib-nomt for single-threaded).
|
||||
.PHONY: default
|
||||
default: lib-release
|
||||
|
||||
ifndef BUILD_DIR
|
||||
ifeq ($(UNAME), Darwin)
|
||||
ifeq ($(shell md5 < /dev/null > /dev/null; echo $$?), 0)
|
||||
HASH ?= md5
|
||||
endif
|
||||
else ifeq ($(UNAME), FreeBSD)
|
||||
HASH ?= gmd5sum
|
||||
else ifeq ($(UNAME), NetBSD)
|
||||
HASH ?= md5 -n
|
||||
else ifeq ($(UNAME), OpenBSD)
|
||||
HASH ?= md5
|
||||
CPPFLAGS_DYNLIB = -DZSTD_MULTITHREAD # dynamic library build defaults to multi-threaded
|
||||
LDFLAGS_DYNLIB = -pthread
|
||||
CPPFLAGS_STATLIB = # static library build defaults to single-threaded
|
||||
|
||||
|
||||
ifeq ($(findstring GCC,$(CCVER)),GCC)
|
||||
decompress/zstd_decompress_block.o : CFLAGS+=-fno-tree-vectorize
|
||||
endif
|
||||
HASH ?= md5sum
|
||||
|
||||
HASH_DIR = conf_$(shell echo $(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(ZSTD_FILES) | $(HASH) | cut -f 1 -d " " )
|
||||
HAVE_HASH :=$(shell echo 1 | $(HASH) > /dev/null && echo 1 || echo 0)
|
||||
ifeq ($(HAVE_HASH),0)
|
||||
$(info warning : could not find HASH ($(HASH)), needed to differentiate builds using different flags)
|
||||
BUILD_DIR := obj/generic_noconf
|
||||
endif
|
||||
endif # BUILD_DIR
|
||||
|
||||
|
||||
# macOS linker doesn't support -soname, and use different extension
|
||||
@ -212,19 +161,16 @@ ifeq ($(UNAME), Darwin)
|
||||
SHARED_EXT_VER = $(LIBVER).$(SHARED_EXT)
|
||||
SONAME_FLAGS = -install_name $(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR) -compatibility_version $(LIBVER_MAJOR) -current_version $(LIBVER)
|
||||
else
|
||||
SONAME_FLAGS = -Wl,-soname=libzstd.$(SHARED_EXT).$(LIBVER_MAJOR)
|
||||
ifeq ($(UNAME), AIX)
|
||||
SONAME_FLAGS =
|
||||
else
|
||||
SONAME_FLAGS = -Wl,-soname=libzstd.$(SHARED_EXT).$(LIBVER_MAJOR)
|
||||
endif
|
||||
SHARED_EXT = so
|
||||
SHARED_EXT_MAJOR = $(SHARED_EXT).$(LIBVER_MAJOR)
|
||||
SHARED_EXT_VER = $(SHARED_EXT).$(LIBVER)
|
||||
endif
|
||||
|
||||
SET_CACHE_DIRECTORY = \
|
||||
+$(MAKE) --no-print-directory $@ \
|
||||
BUILD_DIR=obj/$(HASH_DIR) \
|
||||
CPPFLAGS="$(CPPFLAGS)" \
|
||||
CFLAGS="$(CFLAGS)" \
|
||||
LDFLAGS="$(LDFLAGS)"
|
||||
|
||||
|
||||
.PHONY: all
|
||||
all: lib
|
||||
@ -233,6 +179,13 @@ all: lib
|
||||
.PHONY: libzstd.a # must be run every time
|
||||
libzstd.a: CPPFLAGS += $(CPPFLAGS_STATLIB)
|
||||
|
||||
SET_CACHE_DIRECTORY = \
|
||||
+$(MAKE) --no-print-directory $@ \
|
||||
BUILD_DIR=obj/$(HASH_DIR) \
|
||||
CPPFLAGS="$(CPPFLAGS)" \
|
||||
CFLAGS="$(CFLAGS)" \
|
||||
LDFLAGS="$(LDFLAGS)"
|
||||
|
||||
ifndef BUILD_DIR
|
||||
# determine BUILD_DIR from compilation flags
|
||||
|
||||
@ -343,6 +296,14 @@ $(ZSTD_STATLIB_DIR)/%.o : %.c $(ZSTD_STATLIB_DIR)/%.d | $(ZSTD_STATLIB_DIR)
|
||||
@echo CC $@
|
||||
$(COMPILE.c) $(DEPFLAGS) $(ZSTD_STATLIB_DIR)/$*.d $(OUTPUT_OPTION) $<
|
||||
|
||||
$(ZSTD_DYNLIB_DIR)/%.o : %.S | $(ZSTD_DYNLIB_DIR)
|
||||
@echo AS $@
|
||||
$(COMPILE.c) $(OUTPUT_OPTION) $<
|
||||
|
||||
$(ZSTD_STATLIB_DIR)/%.o : %.S | $(ZSTD_STATLIB_DIR)
|
||||
@echo AS $@
|
||||
$(COMPILE.c) $(OUTPUT_OPTION) $<
|
||||
|
||||
MKDIR ?= mkdir
|
||||
$(BUILD_DIR) $(ZSTD_DYNLIB_DIR) $(ZSTD_STATLIB_DIR):
|
||||
$(MKDIR) -p $@
|
||||
@ -374,7 +335,7 @@ clean:
|
||||
#-----------------------------------------------------------------------------
|
||||
# make install is validated only for below listed environments
|
||||
#-----------------------------------------------------------------------------
|
||||
ifneq (,$(filter $(UNAME),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku))
|
||||
ifneq (,$(filter $(UNAME),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku AIX))
|
||||
|
||||
all: libzstd.pc
|
||||
|
||||
|
@ -125,7 +125,7 @@ The file structure is designed to make this selection manually achievable for an
|
||||
`ZSTD_getErrorName` (implied by `ZSTD_LIB_MINIFY`).
|
||||
|
||||
Finally, when integrating into your application, make sure you're doing link-
|
||||
time optimation and unused symbol garbage collection (via some combination of,
|
||||
time optimization and unused symbol garbage collection (via some combination of,
|
||||
e.g., `-flto`, `-ffat-lto-objects`, `-fuse-linker-plugin`,
|
||||
`-ffunction-sections`, `-fdata-sections`, `-fmerge-all-constants`,
|
||||
`-Wl,--gc-sections`, `-Wl,-z,norelro`, and an archiver that understands
|
||||
@ -155,6 +155,12 @@ The file structure is designed to make this selection manually achievable for an
|
||||
- The build macro `ZSTD_NO_INTRINSICS` can be defined to disable all explicit intrinsics.
|
||||
Compiler builtins are still used.
|
||||
|
||||
- The build macro `ZSTD_DECODER_INTERNAL_BUFFER` can be set to control
|
||||
the amount of extra memory used during decompression to store literals.
|
||||
This defaults to 64kB. Reducing this value reduces the memory footprint of
|
||||
`ZSTD_DCtx` decompression contexts,
|
||||
but might also result in a small decompression speed cost.
|
||||
|
||||
|
||||
#### Windows : using MinGW+MSYS to create DLL
|
||||
|
||||
|
@ -143,10 +143,16 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
|
||||
{
|
||||
# if defined(_MSC_VER) /* Visual */
|
||||
# if STATIC_BMI2 == 1
|
||||
return _lzcnt_u32(val) ^ 31;
|
||||
return _lzcnt_u32(val) ^ 31;
|
||||
# else
|
||||
unsigned long r = 0;
|
||||
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
|
||||
if (val != 0) {
|
||||
unsigned long r;
|
||||
_BitScanReverse(&r, val);
|
||||
return (unsigned)r;
|
||||
} else {
|
||||
/* Should not reach this code path */
|
||||
__assume(0);
|
||||
}
|
||||
# endif
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
|
||||
return __builtin_clz (val) ^ 31;
|
||||
@ -293,22 +299,22 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
|
||||
switch(srcSize)
|
||||
{
|
||||
case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
|
||||
/* fall-through */
|
||||
ZSTD_FALLTHROUGH;
|
||||
|
||||
case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
|
||||
/* fall-through */
|
||||
ZSTD_FALLTHROUGH;
|
||||
|
||||
case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
|
||||
/* fall-through */
|
||||
ZSTD_FALLTHROUGH;
|
||||
|
||||
case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
|
||||
/* fall-through */
|
||||
ZSTD_FALLTHROUGH;
|
||||
|
||||
case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
|
||||
/* fall-through */
|
||||
ZSTD_FALLTHROUGH;
|
||||
|
||||
case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;
|
||||
/* fall-through */
|
||||
ZSTD_FALLTHROUGH;
|
||||
|
||||
default: break;
|
||||
}
|
||||
@ -332,7 +338,16 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c
|
||||
U32 const regMask = sizeof(bitContainer)*8 - 1;
|
||||
/* if start > regMask, bitstream is corrupted, and result is undefined */
|
||||
assert(nbBits < BIT_MASK_SIZE);
|
||||
/* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
|
||||
* than accessing memory. When bmi2 instruction is not present, we consider
|
||||
* such cpus old (pre-Haswell, 2013) and their performance is not of that
|
||||
* importance.
|
||||
*/
|
||||
#if defined(__x86_64__) || defined(_M_X86)
|
||||
return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
|
||||
#else
|
||||
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
|
||||
#endif
|
||||
}
|
||||
|
||||
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
||||
|
@ -40,7 +40,7 @@
|
||||
|
||||
/**
|
||||
On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
|
||||
This explictly marks such functions as __cdecl so that the code will still compile
|
||||
This explicitly marks such functions as __cdecl so that the code will still compile
|
||||
if a CC other than __cdecl has been made the default.
|
||||
*/
|
||||
#if defined(_MSC_VER)
|
||||
@ -101,6 +101,13 @@
|
||||
# define TARGET_ATTRIBUTE(target)
|
||||
#endif
|
||||
|
||||
/* Target attribute for BMI2 dynamic dispatch.
|
||||
* Enable lzcnt, bmi, and bmi2.
|
||||
* We test for bmi1 & bmi2. lzcnt is included in bmi1.
|
||||
*/
|
||||
#define BMI2_TARGET_ATTRIBUTE TARGET_ATTRIBUTE("lzcnt,bmi,bmi2")
|
||||
|
||||
|
||||
/* Enable runtime BMI2 dispatch based on the CPU.
|
||||
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
|
||||
*/
|
||||
@ -108,7 +115,7 @@
|
||||
#if ((defined(__clang__) && __has_attribute(__target__)) \
|
||||
|| (defined(__GNUC__) \
|
||||
&& (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
|
||||
&& (defined(__x86_64__) || defined(_M_X86)) \
|
||||
&& (defined(__x86_64__) || defined(_M_X64)) \
|
||||
&& !defined(__BMI2__)
|
||||
# define DYNAMIC_BMI2 1
|
||||
#else
|
||||
@ -150,8 +157,9 @@
|
||||
}
|
||||
|
||||
/* vectorization
|
||||
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
|
||||
#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__)
|
||||
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax,
|
||||
* and some compilers, like Intel ICC and MCST LCC, do not support it at all. */
|
||||
#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) && !defined(__LCC__)
|
||||
# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
|
||||
# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
|
||||
# else
|
||||
@ -197,6 +205,22 @@
|
||||
#define STATIC_BMI2 0
|
||||
#endif
|
||||
|
||||
/* compile time determination of SIMD support */
|
||||
#if !defined(ZSTD_NO_INTRINSICS)
|
||||
# if defined(__SSE2__) || defined(_M_AMD64) || (defined (_M_IX86) && defined(_M_IX86_FP) && (_M_IX86_FP >= 2))
|
||||
# define ZSTD_ARCH_X86_SSE2
|
||||
# endif
|
||||
# if defined(__ARM_NEON) || defined(_M_ARM64)
|
||||
# define ZSTD_ARCH_ARM_NEON
|
||||
# endif
|
||||
#
|
||||
# if defined(ZSTD_ARCH_X86_SSE2)
|
||||
# include <emmintrin.h>
|
||||
# elif defined(ZSTD_ARCH_ARM_NEON)
|
||||
# include <arm_neon.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* compat. with non-clang compilers */
|
||||
#ifndef __has_builtin
|
||||
# define __has_builtin(x) 0
|
||||
@ -207,6 +231,39 @@
|
||||
# define __has_feature(x) 0
|
||||
#endif
|
||||
|
||||
/* C-language Attributes are added in C23. */
|
||||
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
|
||||
# define ZSTD_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
|
||||
#else
|
||||
# define ZSTD_HAS_C_ATTRIBUTE(x) 0
|
||||
#endif
|
||||
|
||||
/* Only use C++ attributes in C++. Some compilers report support for C++
|
||||
* attributes when compiling with C.
|
||||
*/
|
||||
#if defined(__cplusplus) && defined(__has_cpp_attribute)
|
||||
# define ZSTD_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
|
||||
#else
|
||||
# define ZSTD_HAS_CPP_ATTRIBUTE(x) 0
|
||||
#endif
|
||||
|
||||
/* Define ZSTD_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute.
|
||||
* - C23: https://en.cppreference.com/w/c/language/attributes/fallthrough
|
||||
* - CPP17: https://en.cppreference.com/w/cpp/language/attributes/fallthrough
|
||||
* - Else: __attribute__((__fallthrough__))
|
||||
*/
|
||||
#ifndef ZSTD_FALLTHROUGH
|
||||
# if ZSTD_HAS_C_ATTRIBUTE(fallthrough)
|
||||
# define ZSTD_FALLTHROUGH [[fallthrough]]
|
||||
# elif ZSTD_HAS_CPP_ATTRIBUTE(fallthrough)
|
||||
# define ZSTD_FALLTHROUGH [[fallthrough]]
|
||||
# elif __has_attribute(__fallthrough__)
|
||||
# define ZSTD_FALLTHROUGH __attribute__((__fallthrough__))
|
||||
# else
|
||||
# define ZSTD_FALLTHROUGH
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* detects whether we are being compiled under msan */
|
||||
#ifndef ZSTD_MEMORY_SANITIZER
|
||||
# if __has_feature(memory_sanitizer)
|
||||
@ -216,6 +273,15 @@
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* detects whether we are being compiled undef dfsan */
|
||||
#ifndef ZSTD_DATAFLOW_SANITIZER
|
||||
# if __has_feature(dataflow_sanitizer)
|
||||
# define ZSTD_DATAFLOW_SANITIZER 1
|
||||
# else
|
||||
# define ZSTD_DATAFLOW_SANITIZER 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if ZSTD_MEMORY_SANITIZER
|
||||
/* Not all platforms that support msan provide sanitizers/msan_interface.h.
|
||||
* We therefore declare the functions we need ourselves, rather than trying to
|
||||
|
@ -43,8 +43,14 @@ static U32 FSE_ctz(U32 val)
|
||||
assert(val != 0);
|
||||
{
|
||||
# if defined(_MSC_VER) /* Visual */
|
||||
unsigned long r=0;
|
||||
return _BitScanForward(&r, val) ? (unsigned)r : 0;
|
||||
if (val != 0) {
|
||||
unsigned long r;
|
||||
_BitScanForward(&r, val);
|
||||
return (unsigned)r;
|
||||
} else {
|
||||
/* Should not reach this code path */
|
||||
__assume(0);
|
||||
}
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
|
||||
return __builtin_ctz(val);
|
||||
# elif defined(__ICCARM__) /* IAR Intrinsic */
|
||||
@ -217,7 +223,7 @@ static size_t FSE_readNCount_body_default(
|
||||
}
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2(
|
||||
BMI2_TARGET_ATTRIBUTE static size_t FSE_readNCount_body_bmi2(
|
||||
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
|
||||
const void* headerBuffer, size_t hbSize)
|
||||
{
|
||||
@ -299,7 +305,7 @@ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
|
||||
weightTotal = 0;
|
||||
{ U32 n; for (n=0; n<oSize; n++) {
|
||||
if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
|
||||
if (huffWeight[n] > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
|
||||
rankStats[huffWeight[n]]++;
|
||||
weightTotal += (1 << huffWeight[n]) >> 1;
|
||||
} }
|
||||
@ -337,7 +343,7 @@ static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* r
|
||||
}
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
static BMI2_TARGET_ATTRIBUTE size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
U32* nbSymbolsPtr, U32* tableLogPtr,
|
||||
const void* src, size_t srcSize,
|
||||
void* workSpace, size_t wkspSize)
|
||||
|
@ -22,6 +22,8 @@ extern "C" {
|
||||
* Dependencies
|
||||
******************************************/
|
||||
#include "../zstd_errors.h" /* enum list */
|
||||
#include "compiler.h"
|
||||
#include "debug.h"
|
||||
#include "zstd_deps.h" /* size_t */
|
||||
|
||||
|
||||
@ -73,6 +75,83 @@ ERR_STATIC const char* ERR_getErrorName(size_t code)
|
||||
return ERR_getErrorString(ERR_getErrorCode(code));
|
||||
}
|
||||
|
||||
/**
|
||||
* Ignore: this is an internal helper.
|
||||
*
|
||||
* This is a helper function to help force C99-correctness during compilation.
|
||||
* Under strict compilation modes, variadic macro arguments can't be empty.
|
||||
* However, variadic function arguments can be. Using a function therefore lets
|
||||
* us statically check that at least one (string) argument was passed,
|
||||
* independent of the compilation flags.
|
||||
*/
|
||||
static INLINE_KEYWORD UNUSED_ATTR
|
||||
void _force_has_format_string(const char *format, ...) {
|
||||
(void)format;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ignore: this is an internal helper.
|
||||
*
|
||||
* We want to force this function invocation to be syntactically correct, but
|
||||
* we don't want to force runtime evaluation of its arguments.
|
||||
*/
|
||||
#define _FORCE_HAS_FORMAT_STRING(...) \
|
||||
if (0) { \
|
||||
_force_has_format_string(__VA_ARGS__); \
|
||||
}
|
||||
|
||||
#define ERR_QUOTE(str) #str
|
||||
|
||||
/**
|
||||
* Return the specified error if the condition evaluates to true.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
* In order to do that (particularly, printing the conditional that failed),
|
||||
* this can't just wrap RETURN_ERROR().
|
||||
*/
|
||||
#define RETURN_ERROR_IF(cond, err, ...) \
|
||||
if (cond) { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
|
||||
__FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
|
||||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return ERROR(err); \
|
||||
}
|
||||
|
||||
/**
|
||||
* Unconditionally return the specified error.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
*/
|
||||
#define RETURN_ERROR(err, ...) \
|
||||
do { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
|
||||
__FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \
|
||||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return ERROR(err); \
|
||||
} while(0);
|
||||
|
||||
/**
|
||||
* If the provided expression evaluates to an error code, returns that error code.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
*/
|
||||
#define FORWARD_IF_ERROR(err, ...) \
|
||||
do { \
|
||||
size_t const err_code = (err); \
|
||||
if (ERR_isError(err_code)) { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
|
||||
__FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
|
||||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return err_code; \
|
||||
} \
|
||||
} while(0);
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
@ -336,8 +336,9 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
|
||||
/* FSE_buildCTable_wksp() :
|
||||
* Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
|
||||
* `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
|
||||
* See FSE_buildCTable_wksp() for breakdown of workspace usage.
|
||||
*/
|
||||
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2)))
|
||||
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (((maxSymbolValue + 2) + (1ull << (tableLog)))/2 + sizeof(U64)/sizeof(U32) /* additional 8 bytes for potential table overwrite */)
|
||||
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
|
||||
size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
|
||||
|
||||
|
@ -365,7 +365,7 @@ static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, co
|
||||
}
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
|
||||
BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
|
||||
{
|
||||
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
|
||||
}
|
||||
|
@ -89,9 +89,9 @@ HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
|
||||
|
||||
/** HUF_compress4X_wksp() :
|
||||
* Same as HUF_compress2(), but uses externally allocated `workSpace`.
|
||||
* `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */
|
||||
#define HUF_WORKSPACE_SIZE ((6 << 10) + 256)
|
||||
#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
|
||||
* `workspace` must be at least as large as HUF_WORKSPACE_SIZE */
|
||||
#define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */)
|
||||
#define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
|
||||
HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned tableLog,
|
||||
@ -116,11 +116,11 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
|
||||
|
||||
|
||||
/* *** Constants *** */
|
||||
#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
|
||||
#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */
|
||||
#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */
|
||||
#define HUF_SYMBOLVALUE_MAX 255
|
||||
|
||||
#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
|
||||
#define HUF_TABLELOG_ABSOLUTEMAX 12 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
|
||||
#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
|
||||
# error "HUF_TABLELOG_MAX is too large !"
|
||||
#endif
|
||||
@ -136,15 +136,11 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
|
||||
|
||||
/* static allocation of HUF's Compression Table */
|
||||
/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
|
||||
struct HUF_CElt_s {
|
||||
U16 val;
|
||||
BYTE nbBits;
|
||||
}; /* typedef'd to HUF_CElt */
|
||||
typedef struct HUF_CElt_s HUF_CElt; /* consider it an incomplete type */
|
||||
#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */
|
||||
#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
|
||||
typedef size_t HUF_CElt; /* consider it an incomplete type */
|
||||
#define HUF_CTABLE_SIZE_ST(maxSymbolValue) ((maxSymbolValue)+2) /* Use tables of size_t, for proper alignment */
|
||||
#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_ST(maxSymbolValue) * sizeof(size_t))
|
||||
#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
|
||||
HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */
|
||||
HUF_CElt name[HUF_CTABLE_SIZE_ST(maxSymbolValue)] /* no final ; */
|
||||
|
||||
/* static allocation of HUF's DTable */
|
||||
typedef U32 HUF_DTable;
|
||||
@ -194,6 +190,7 @@ size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSym
|
||||
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
|
||||
size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
|
||||
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
|
||||
size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
|
||||
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
|
||||
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
|
||||
|
||||
@ -206,12 +203,13 @@ typedef enum {
|
||||
* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
|
||||
* If it uses hufTable it does not modify hufTable or repeat.
|
||||
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
|
||||
* If preferRepeat then the old table will always be used if valid. */
|
||||
* If preferRepeat then the old table will always be used if valid.
|
||||
* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
|
||||
size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned tableLog,
|
||||
void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible);
|
||||
|
||||
/** HUF_buildCTable_wksp() :
|
||||
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
|
||||
@ -249,11 +247,10 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
|
||||
* Loading a CTable saved with HUF_writeCTable() */
|
||||
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
|
||||
|
||||
/** HUF_getNbBits() :
|
||||
/** HUF_getNbBitsFromCTable() :
|
||||
* Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
|
||||
* Note 1 : is not inlined, as HUF_CElt definition is private
|
||||
* Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */
|
||||
U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue);
|
||||
* Note 1 : is not inlined, as HUF_CElt definition is private */
|
||||
U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue);
|
||||
|
||||
/*
|
||||
* HUF_decompress() does the following:
|
||||
@ -305,18 +302,20 @@ size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* c
|
||||
/* ====================== */
|
||||
|
||||
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
|
||||
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
|
||||
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U64 U64 */
|
||||
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
|
||||
size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
|
||||
/** HUF_compress1X_repeat() :
|
||||
* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
|
||||
* If it uses hufTable it does not modify hufTable or repeat.
|
||||
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
|
||||
* If preferRepeat then the old table will always be used if valid. */
|
||||
* If preferRepeat then the old table will always be used if valid.
|
||||
* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
|
||||
size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned tableLog,
|
||||
void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible);
|
||||
|
||||
size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
@ -354,6 +353,9 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X2
|
||||
size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
|
||||
#endif
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
|
||||
#endif
|
||||
|
||||
#endif /* HUF_STATIC_LINKING_ONLY */
|
||||
|
||||
|
@ -153,8 +153,22 @@ MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; }
|
||||
|
||||
MEM_STATIC unsigned MEM_isLittleEndian(void)
|
||||
{
|
||||
#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||||
return 1;
|
||||
#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
|
||||
return 0;
|
||||
#elif defined(__clang__) && __LITTLE_ENDIAN__
|
||||
return 1;
|
||||
#elif defined(__clang__) && __BIG_ENDIAN__
|
||||
return 0;
|
||||
#elif defined(_MSC_VER) && (_M_AMD64 || _M_IX86)
|
||||
return 1;
|
||||
#elif defined(__DMC__) && defined(_M_IX86)
|
||||
return 1;
|
||||
#else
|
||||
const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
|
||||
return one.c[0];
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
|
||||
|
@ -19,10 +19,8 @@
|
||||
/*-*************************************
|
||||
* Dependencies
|
||||
***************************************/
|
||||
#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#include "compiler.h"
|
||||
#include "cpu.h"
|
||||
#include "mem.h"
|
||||
#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
|
||||
#include "error_private.h"
|
||||
@ -60,81 +58,7 @@ extern "C" {
|
||||
#undef MAX
|
||||
#define MIN(a,b) ((a)<(b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a)>(b) ? (a) : (b))
|
||||
|
||||
/**
|
||||
* Ignore: this is an internal helper.
|
||||
*
|
||||
* This is a helper function to help force C99-correctness during compilation.
|
||||
* Under strict compilation modes, variadic macro arguments can't be empty.
|
||||
* However, variadic function arguments can be. Using a function therefore lets
|
||||
* us statically check that at least one (string) argument was passed,
|
||||
* independent of the compilation flags.
|
||||
*/
|
||||
static INLINE_KEYWORD UNUSED_ATTR
|
||||
void _force_has_format_string(const char *format, ...) {
|
||||
(void)format;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ignore: this is an internal helper.
|
||||
*
|
||||
* We want to force this function invocation to be syntactically correct, but
|
||||
* we don't want to force runtime evaluation of its arguments.
|
||||
*/
|
||||
#define _FORCE_HAS_FORMAT_STRING(...) \
|
||||
if (0) { \
|
||||
_force_has_format_string(__VA_ARGS__); \
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the specified error if the condition evaluates to true.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
* In order to do that (particularly, printing the conditional that failed),
|
||||
* this can't just wrap RETURN_ERROR().
|
||||
*/
|
||||
#define RETURN_ERROR_IF(cond, err, ...) \
|
||||
if (cond) { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
|
||||
__FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
|
||||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return ERROR(err); \
|
||||
}
|
||||
|
||||
/**
|
||||
* Unconditionally return the specified error.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
*/
|
||||
#define RETURN_ERROR(err, ...) \
|
||||
do { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
|
||||
__FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
|
||||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return ERROR(err); \
|
||||
} while(0);
|
||||
|
||||
/**
|
||||
* If the provided expression evaluates to an error code, returns that error code.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
*/
|
||||
#define FORWARD_IF_ERROR(err, ...) \
|
||||
do { \
|
||||
size_t const err_code = (err); \
|
||||
if (ERR_isError(err_code)) { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
|
||||
__FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
|
||||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return err_code; \
|
||||
} \
|
||||
} while(0);
|
||||
#define BOUNDED(min,val,max) (MAX(min,MIN(val,max)))
|
||||
|
||||
|
||||
/*-*************************************
|
||||
@ -247,19 +171,25 @@ static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
|
||||
* Shared functions to include for inlining
|
||||
*********************************************/
|
||||
static void ZSTD_copy8(void* dst, const void* src) {
|
||||
#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
|
||||
#if defined(ZSTD_ARCH_ARM_NEON)
|
||||
vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
|
||||
#else
|
||||
ZSTD_memcpy(dst, src, 8);
|
||||
#endif
|
||||
}
|
||||
|
||||
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
|
||||
|
||||
/* Need to use memmove here since the literal buffer can now be located within
|
||||
the dst buffer. In circumstances where the op "catches up" to where the
|
||||
literal buffer is, there can be partial overlaps in this call on the final
|
||||
copy if the literal is being shifted by less than 16 bytes. */
|
||||
static void ZSTD_copy16(void* dst, const void* src) {
|
||||
#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
|
||||
#if defined(ZSTD_ARCH_ARM_NEON)
|
||||
vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
|
||||
#elif defined(ZSTD_ARCH_X86_SSE2)
|
||||
_mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src));
|
||||
#else
|
||||
ZSTD_memcpy(dst, src, 16);
|
||||
ZSTD_memmove(dst, src, 16);
|
||||
#endif
|
||||
}
|
||||
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
|
||||
@ -288,8 +218,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
|
||||
BYTE* op = (BYTE*)dst;
|
||||
BYTE* const oend = op + length;
|
||||
|
||||
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
|
||||
|
||||
if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
|
||||
/* Handle short offset copies. */
|
||||
do {
|
||||
@ -436,8 +364,14 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus
|
||||
# if STATIC_BMI2 == 1
|
||||
return _lzcnt_u32(val)^31;
|
||||
# else
|
||||
unsigned long r=0;
|
||||
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
|
||||
if (val != 0) {
|
||||
unsigned long r;
|
||||
_BitScanReverse(&r, val);
|
||||
return (unsigned)r;
|
||||
} else {
|
||||
/* Should not reach this code path */
|
||||
__assume(0);
|
||||
}
|
||||
# endif
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
|
||||
return __builtin_clz (val) ^ 31;
|
||||
@ -456,6 +390,63 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Counts the number of trailing zeros of a `size_t`.
|
||||
* Most compilers should support CTZ as a builtin. A backup
|
||||
* implementation is provided if the builtin isn't supported, but
|
||||
* it may not be terribly efficient.
|
||||
*/
|
||||
MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val)
|
||||
{
|
||||
if (MEM_64bits()) {
|
||||
# if defined(_MSC_VER) && defined(_WIN64)
|
||||
# if STATIC_BMI2
|
||||
return _tzcnt_u64(val);
|
||||
# else
|
||||
if (val != 0) {
|
||||
unsigned long r;
|
||||
_BitScanForward64(&r, (U64)val);
|
||||
return (unsigned)r;
|
||||
} else {
|
||||
/* Should not reach this code path */
|
||||
__assume(0);
|
||||
}
|
||||
# endif
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
||||
return __builtin_ctzll((U64)val);
|
||||
# else
|
||||
static const int DeBruijnBytePos[64] = { 0, 1, 2, 7, 3, 13, 8, 19,
|
||||
4, 25, 14, 28, 9, 34, 20, 56,
|
||||
5, 17, 26, 54, 15, 41, 29, 43,
|
||||
10, 31, 38, 35, 21, 45, 49, 57,
|
||||
63, 6, 12, 18, 24, 27, 33, 55,
|
||||
16, 53, 40, 42, 30, 37, 44, 48,
|
||||
62, 11, 23, 32, 52, 39, 36, 47,
|
||||
61, 22, 51, 46, 60, 50, 59, 58 };
|
||||
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
|
||||
# endif
|
||||
} else { /* 32 bits */
|
||||
# if defined(_MSC_VER)
|
||||
if (val != 0) {
|
||||
unsigned long r;
|
||||
_BitScanForward(&r, (U32)val);
|
||||
return (unsigned)r;
|
||||
} else {
|
||||
/* Should not reach this code path */
|
||||
__assume(0);
|
||||
}
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
||||
return __builtin_ctz((U32)val);
|
||||
# else
|
||||
static const int DeBruijnBytePos[32] = { 0, 1, 28, 2, 29, 14, 24, 3,
|
||||
30, 22, 20, 15, 25, 17, 4, 8,
|
||||
31, 27, 13, 23, 21, 19, 16, 7,
|
||||
26, 12, 18, 6, 11, 5, 10, 9 };
|
||||
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
|
||||
# endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* ZSTD_invalidateRepCodes() :
|
||||
* ensures next compression will not use repcodes from previous block.
|
||||
@ -482,6 +473,14 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
||||
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
||||
const void* src, size_t srcSize);
|
||||
|
||||
/**
|
||||
* @returns true iff the CPU supports dynamic BMI2 dispatch.
|
||||
*/
|
||||
MEM_STATIC int ZSTD_cpuSupportsBmi2(void)
|
||||
{
|
||||
ZSTD_cpuid_t cpuid = ZSTD_cpuid();
|
||||
return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid);
|
||||
}
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
|
@ -17,10 +17,19 @@ extern "C" {
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
/* weak symbol support */
|
||||
#if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && defined(__GNUC__) && \
|
||||
/* weak symbol support
|
||||
* For now, enable conservatively:
|
||||
* - Only GNUC
|
||||
* - Only ELF
|
||||
* - Only x86-64 and i386
|
||||
* Also, explicitly disable on platforms known not to work so they aren't
|
||||
* forgotten in the future.
|
||||
*/
|
||||
#if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && \
|
||||
defined(__GNUC__) && defined(__ELF__) && \
|
||||
(defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)) && \
|
||||
!defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \
|
||||
!defined(__CYGWIN__)
|
||||
!defined(__CYGWIN__) && !defined(_AIX)
|
||||
# define ZSTD_HAVE_WEAK_SYMBOLS 1
|
||||
#else
|
||||
# define ZSTD_HAVE_WEAK_SYMBOLS 0
|
||||
|
135
lib/compress/clevels.h
Normal file
135
lib/compress/clevels.h
Normal file
@ -0,0 +1,135 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#ifndef ZSTD_CLEVELS_H
|
||||
#define ZSTD_CLEVELS_H
|
||||
|
||||
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */
|
||||
#include "../zstd.h"
|
||||
|
||||
/*-===== Pre-defined compression levels =====-*/
|
||||
|
||||
#define ZSTD_MAX_CLEVEL 22
|
||||
#define ZSTD_MAX_32BIT_CLEVEL 21
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__unused__))
|
||||
#endif
|
||||
|
||||
static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
|
||||
{ /* "default" - for any srcSize > 256 KB */
|
||||
/* W, C, H, S, L, TL, strat */
|
||||
{ 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */
|
||||
{ 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */
|
||||
{ 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */
|
||||
{ 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */
|
||||
{ 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */
|
||||
{ 21, 18, 19, 4, 5, 2, ZSTD_greedy }, /* level 5 */
|
||||
{ 21, 19, 20, 5, 5, 4, ZSTD_greedy }, /* level 6 */
|
||||
{ 21, 19, 20, 4, 5, 8, ZSTD_lazy }, /* level 7 */
|
||||
{ 21, 19, 20, 5, 5, 16, ZSTD_lazy }, /* level 8 */
|
||||
{ 21, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */
|
||||
{ 22, 21, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */
|
||||
{ 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 11 */
|
||||
{ 22, 21, 22, 6, 5, 32, ZSTD_lazy2 }, /* level 12 */
|
||||
{ 22, 22, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */
|
||||
{ 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */
|
||||
{ 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */
|
||||
{ 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */
|
||||
{ 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */
|
||||
{ 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */
|
||||
{ 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */
|
||||
{ 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */
|
||||
{ 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */
|
||||
{ 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */
|
||||
},
|
||||
{ /* for srcSize <= 256 KB */
|
||||
/* W, C, H, S, L, T, strat */
|
||||
{ 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
|
||||
{ 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */
|
||||
{ 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */
|
||||
{ 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */
|
||||
{ 18, 16, 17, 3, 5, 2, ZSTD_greedy }, /* level 4.*/
|
||||
{ 18, 17, 18, 5, 5, 2, ZSTD_greedy }, /* level 5.*/
|
||||
{ 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/
|
||||
{ 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */
|
||||
{ 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
|
||||
{ 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
|
||||
{ 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
|
||||
{ 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/
|
||||
{ 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/
|
||||
{ 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */
|
||||
{ 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
|
||||
{ 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/
|
||||
{ 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/
|
||||
{ 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/
|
||||
{ 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/
|
||||
{ 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
|
||||
{ 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/
|
||||
{ 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/
|
||||
{ 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/
|
||||
},
|
||||
{ /* for srcSize <= 128 KB */
|
||||
/* W, C, H, S, L, T, strat */
|
||||
{ 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
|
||||
{ 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */
|
||||
{ 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */
|
||||
{ 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */
|
||||
{ 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */
|
||||
{ 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */
|
||||
{ 17, 16, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */
|
||||
{ 17, 16, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */
|
||||
{ 17, 16, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
|
||||
{ 17, 16, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
|
||||
{ 17, 16, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
|
||||
{ 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */
|
||||
{ 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */
|
||||
{ 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/
|
||||
{ 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
|
||||
{ 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/
|
||||
{ 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/
|
||||
{ 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/
|
||||
{ 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/
|
||||
{ 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/
|
||||
{ 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/
|
||||
{ 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
|
||||
{ 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/
|
||||
},
|
||||
{ /* for srcSize <= 16 KB */
|
||||
/* W, C, H, S, L, T, strat */
|
||||
{ 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
|
||||
{ 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */
|
||||
{ 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */
|
||||
{ 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */
|
||||
{ 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */
|
||||
{ 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/
|
||||
{ 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */
|
||||
{ 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */
|
||||
{ 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/
|
||||
{ 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/
|
||||
{ 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/
|
||||
{ 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/
|
||||
{ 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/
|
||||
{ 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/
|
||||
{ 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/
|
||||
{ 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/
|
||||
{ 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/
|
||||
{ 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/
|
||||
{ 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/
|
||||
{ 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
|
||||
{ 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/
|
||||
{ 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
|
||||
{ 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif /* ZSTD_CLEVELS_H */
|
@ -75,13 +75,14 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
||||
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
|
||||
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
|
||||
U32 const step = FSE_TABLESTEP(tableSize);
|
||||
U32 const maxSV1 = maxSymbolValue+1;
|
||||
|
||||
U32* cumul = (U32*)workSpace;
|
||||
FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2));
|
||||
U16* cumul = (U16*)workSpace; /* size = maxSV1 */
|
||||
FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */
|
||||
|
||||
U32 highThreshold = tableSize-1;
|
||||
|
||||
if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */
|
||||
assert(((size_t)workSpace & 1) == 0); /* Must be 2 bytes-aligned */
|
||||
if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
|
||||
/* CTable header */
|
||||
tableU16[-2] = (U16) tableLog;
|
||||
@ -98,20 +99,61 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
||||
/* symbol start positions */
|
||||
{ U32 u;
|
||||
cumul[0] = 0;
|
||||
for (u=1; u <= maxSymbolValue+1; u++) {
|
||||
for (u=1; u <= maxSV1; u++) {
|
||||
if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
|
||||
cumul[u] = cumul[u-1] + 1;
|
||||
tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
|
||||
} else {
|
||||
cumul[u] = cumul[u-1] + normalizedCounter[u-1];
|
||||
assert(normalizedCounter[u-1] >= 0);
|
||||
cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1];
|
||||
assert(cumul[u] >= cumul[u-1]); /* no overflow */
|
||||
} }
|
||||
cumul[maxSymbolValue+1] = tableSize+1;
|
||||
cumul[maxSV1] = (U16)(tableSize+1);
|
||||
}
|
||||
|
||||
/* Spread symbols */
|
||||
{ U32 position = 0;
|
||||
if (highThreshold == tableSize - 1) {
|
||||
/* Case for no low prob count symbols. Lay down 8 bytes at a time
|
||||
* to reduce branch misses since we are operating on a small block
|
||||
*/
|
||||
BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */
|
||||
{ U64 const add = 0x0101010101010101ull;
|
||||
size_t pos = 0;
|
||||
U64 sv = 0;
|
||||
U32 s;
|
||||
for (s=0; s<maxSV1; ++s, sv += add) {
|
||||
int i;
|
||||
int const n = normalizedCounter[s];
|
||||
MEM_write64(spread + pos, sv);
|
||||
for (i = 8; i < n; i += 8) {
|
||||
MEM_write64(spread + pos + i, sv);
|
||||
}
|
||||
assert(n>=0);
|
||||
pos += (size_t)n;
|
||||
}
|
||||
}
|
||||
/* Spread symbols across the table. Lack of lowprob symbols means that
|
||||
* we don't need variable sized inner loop, so we can unroll the loop and
|
||||
* reduce branch misses.
|
||||
*/
|
||||
{ size_t position = 0;
|
||||
size_t s;
|
||||
size_t const unroll = 2; /* Experimentally determined optimal unroll */
|
||||
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
|
||||
for (s = 0; s < (size_t)tableSize; s += unroll) {
|
||||
size_t u;
|
||||
for (u = 0; u < unroll; ++u) {
|
||||
size_t const uPosition = (position + (u * step)) & tableMask;
|
||||
tableSymbol[uPosition] = spread[s + u];
|
||||
}
|
||||
position = (position + (unroll * step)) & tableMask;
|
||||
}
|
||||
assert(position == 0); /* Must have initialized all positions */
|
||||
}
|
||||
} else {
|
||||
U32 position = 0;
|
||||
U32 symbol;
|
||||
for (symbol=0; symbol<=maxSymbolValue; symbol++) {
|
||||
for (symbol=0; symbol<maxSV1; symbol++) {
|
||||
int nbOccurrences;
|
||||
int const freq = normalizedCounter[symbol];
|
||||
for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
|
||||
@ -120,7 +162,6 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
||||
while (position > highThreshold)
|
||||
position = (position + step) & tableMask; /* Low proba area */
|
||||
} }
|
||||
|
||||
assert(position==0); /* Must have initialized all positions */
|
||||
}
|
||||
|
||||
@ -144,16 +185,17 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
||||
case -1:
|
||||
case 1:
|
||||
symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
|
||||
symbolTT[s].deltaFindState = total - 1;
|
||||
assert(total <= INT_MAX);
|
||||
symbolTT[s].deltaFindState = (int)(total - 1);
|
||||
total ++;
|
||||
break;
|
||||
default :
|
||||
{
|
||||
U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
|
||||
U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
|
||||
assert(normalizedCounter[s] > 1);
|
||||
{ U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1);
|
||||
U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
|
||||
symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
|
||||
symbolTT[s].deltaFindState = total - normalizedCounter[s];
|
||||
total += normalizedCounter[s];
|
||||
symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
|
||||
total += (unsigned)normalizedCounter[s];
|
||||
} } } }
|
||||
|
||||
#if 0 /* debug : symbol costs */
|
||||
@ -164,32 +206,26 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
||||
symbol, normalizedCounter[symbol],
|
||||
FSE_getMaxNbBits(symbolTT, symbol),
|
||||
(double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
|
||||
}
|
||||
}
|
||||
} }
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
|
||||
size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
|
||||
{
|
||||
FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
|
||||
return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#ifndef FSE_COMMONDEFS_ONLY
|
||||
|
||||
|
||||
/*-**************************************************************
|
||||
* FSE NCount encoding
|
||||
****************************************************************/
|
||||
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
|
||||
{
|
||||
size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
|
||||
size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
|
||||
+ 4 /* bitCount initialized at 4 */
|
||||
+ 2 /* first two symbols may use one additional bit each */) / 8)
|
||||
+ 1 /* round up to whole nb bytes */
|
||||
+ 2 /* additional two bytes for bitstream flush */;
|
||||
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
|
||||
}
|
||||
|
||||
|
@ -53,6 +53,28 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
|
||||
/* *******************************************************
|
||||
* HUF : Huffman block compression
|
||||
*********************************************************/
|
||||
#define HUF_WORKSPACE_MAX_ALIGNMENT 8
|
||||
|
||||
static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align)
|
||||
{
|
||||
size_t const mask = align - 1;
|
||||
size_t const rem = (size_t)workspace & mask;
|
||||
size_t const add = (align - rem) & mask;
|
||||
BYTE* const aligned = (BYTE*)workspace + add;
|
||||
assert((align & (align - 1)) == 0); /* pow 2 */
|
||||
assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT);
|
||||
if (*workspaceSizePtr >= add) {
|
||||
assert(add < align);
|
||||
assert(((size_t)aligned & mask) == 0);
|
||||
*workspaceSizePtr -= add;
|
||||
return aligned;
|
||||
} else {
|
||||
*workspaceSizePtr = 0;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* HUF_compressWeights() :
|
||||
* Same as FSE_compress(), but dedicated to huff0's weights compression.
|
||||
* The use case needs much less stack memory.
|
||||
@ -75,7 +97,7 @@ static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightT
|
||||
|
||||
unsigned maxSymbolValue = HUF_TABLELOG_MAX;
|
||||
U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
|
||||
HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)workspace;
|
||||
HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, sizeof(U32));
|
||||
|
||||
if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
|
||||
|
||||
@ -106,6 +128,40 @@ static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightT
|
||||
return (size_t)(op-ostart);
|
||||
}
|
||||
|
||||
static size_t HUF_getNbBits(HUF_CElt elt)
|
||||
{
|
||||
return elt & 0xFF;
|
||||
}
|
||||
|
||||
static size_t HUF_getNbBitsFast(HUF_CElt elt)
|
||||
{
|
||||
return elt;
|
||||
}
|
||||
|
||||
static size_t HUF_getValue(HUF_CElt elt)
|
||||
{
|
||||
return elt & ~0xFF;
|
||||
}
|
||||
|
||||
static size_t HUF_getValueFast(HUF_CElt elt)
|
||||
{
|
||||
return elt;
|
||||
}
|
||||
|
||||
static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits)
|
||||
{
|
||||
assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX);
|
||||
*elt = nbBits;
|
||||
}
|
||||
|
||||
static void HUF_setValue(HUF_CElt* elt, size_t value)
|
||||
{
|
||||
size_t const nbBits = HUF_getNbBits(*elt);
|
||||
if (nbBits > 0) {
|
||||
assert((value >> nbBits) == 0);
|
||||
*elt |= value << (sizeof(HUF_CElt) * 8 - nbBits);
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
HUF_CompressWeightsWksp wksp;
|
||||
@ -117,9 +173,10 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
|
||||
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
|
||||
void* workspace, size_t workspaceSize)
|
||||
{
|
||||
HUF_CElt const* const ct = CTable + 1;
|
||||
BYTE* op = (BYTE*)dst;
|
||||
U32 n;
|
||||
HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)workspace;
|
||||
HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, sizeof(U32));
|
||||
|
||||
/* check conditions */
|
||||
if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
|
||||
@ -130,9 +187,10 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
|
||||
for (n=1; n<huffLog+1; n++)
|
||||
wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
|
||||
for (n=0; n<maxSymbolValue; n++)
|
||||
wksp->huffWeight[n] = wksp->bitsToWeight[CTable[n].nbBits];
|
||||
wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])];
|
||||
|
||||
/* attempt weights compression by FSE */
|
||||
if (maxDstSize < 1) return ERROR(dstSize_tooSmall);
|
||||
{ CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
|
||||
if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */
|
||||
op[0] = (BYTE)hSize;
|
||||
@ -166,6 +224,7 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
||||
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
|
||||
U32 tableLog = 0;
|
||||
U32 nbSymbols = 0;
|
||||
HUF_CElt* const ct = CTable + 1;
|
||||
|
||||
/* get symbol weights */
|
||||
CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
|
||||
@ -175,6 +234,8 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
||||
if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
|
||||
if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
|
||||
|
||||
CTable[0] = tableLog;
|
||||
|
||||
/* Prepare base value per rank */
|
||||
{ U32 n, nextRankStart = 0;
|
||||
for (n=1; n<=tableLog; n++) {
|
||||
@ -186,13 +247,13 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
||||
/* fill nbBits */
|
||||
{ U32 n; for (n=0; n<nbSymbols; n++) {
|
||||
const U32 w = huffWeight[n];
|
||||
CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
|
||||
HUF_setNbBits(ct + n, (BYTE)(tableLog + 1 - w) & -(w != 0));
|
||||
} }
|
||||
|
||||
/* fill val */
|
||||
{ U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */
|
||||
U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
|
||||
{ U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
|
||||
{ U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(ct[n])]++; }
|
||||
/* determine stating value per rank */
|
||||
valPerRank[tableLog+1] = 0; /* for w==0 */
|
||||
{ U16 min = 0;
|
||||
@ -202,18 +263,18 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
||||
min >>= 1;
|
||||
} }
|
||||
/* assign value within rank, symbol order */
|
||||
{ U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
|
||||
{ U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
|
||||
}
|
||||
|
||||
*maxSymbolValuePtr = nbSymbols - 1;
|
||||
return readSize;
|
||||
}
|
||||
|
||||
U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
|
||||
U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
|
||||
{
|
||||
const HUF_CElt* table = (const HUF_CElt*)symbolTable;
|
||||
const HUF_CElt* ct = CTable + 1;
|
||||
assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
|
||||
return table[symbolValue].nbBits;
|
||||
return (U32)HUF_getNbBits(ct[symbolValue]);
|
||||
}
|
||||
|
||||
|
||||
@ -367,22 +428,118 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
U32 base;
|
||||
U32 curr;
|
||||
U16 base;
|
||||
U16 curr;
|
||||
} rankPos;
|
||||
|
||||
typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
|
||||
|
||||
#define RANK_POSITION_TABLE_SIZE 32
|
||||
/* Number of buckets available for HUF_sort() */
|
||||
#define RANK_POSITION_TABLE_SIZE 192
|
||||
|
||||
typedef struct {
|
||||
huffNodeTable huffNodeTbl;
|
||||
rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
|
||||
} HUF_buildCTable_wksp_tables;
|
||||
|
||||
/* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.
|
||||
* Strategy is to use as many buckets as possible for representing distinct
|
||||
* counts while using the remainder to represent all "large" counts.
|
||||
*
|
||||
* To satisfy this requirement for 192 buckets, we can do the following:
|
||||
* Let buckets 0-166 represent distinct counts of [0, 166]
|
||||
* Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
|
||||
*/
|
||||
#define RANK_POSITION_MAX_COUNT_LOG 32
|
||||
#define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */
|
||||
#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */
|
||||
|
||||
/* Return the appropriate bucket index for a given count. See definition of
|
||||
* RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
|
||||
*/
|
||||
static U32 HUF_getIndex(U32 const count) {
|
||||
return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
|
||||
? count
|
||||
: BIT_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
|
||||
}
|
||||
|
||||
/* Helper swap function for HUF_quickSortPartition() */
|
||||
static void HUF_swapNodes(nodeElt* a, nodeElt* b) {
|
||||
nodeElt tmp = *a;
|
||||
*a = *b;
|
||||
*b = tmp;
|
||||
}
|
||||
|
||||
/* Returns 0 if the huffNode array is not sorted by descending count */
|
||||
MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) {
|
||||
U32 i;
|
||||
for (i = 1; i < maxSymbolValue1; ++i) {
|
||||
if (huffNode[i].count > huffNode[i-1].count) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Insertion sort by descending order */
|
||||
HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) {
|
||||
int i;
|
||||
int const size = high-low+1;
|
||||
huffNode += low;
|
||||
for (i = 1; i < size; ++i) {
|
||||
nodeElt const key = huffNode[i];
|
||||
int j = i - 1;
|
||||
while (j >= 0 && huffNode[j].count < key.count) {
|
||||
huffNode[j + 1] = huffNode[j];
|
||||
j--;
|
||||
}
|
||||
huffNode[j + 1] = key;
|
||||
}
|
||||
}
|
||||
|
||||
/* Pivot helper function for quicksort. */
|
||||
static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) {
|
||||
/* Simply select rightmost element as pivot. "Better" selectors like
|
||||
* median-of-three don't experimentally appear to have any benefit.
|
||||
*/
|
||||
U32 const pivot = arr[high].count;
|
||||
int i = low - 1;
|
||||
int j = low;
|
||||
for ( ; j < high; j++) {
|
||||
if (arr[j].count > pivot) {
|
||||
i++;
|
||||
HUF_swapNodes(&arr[i], &arr[j]);
|
||||
}
|
||||
}
|
||||
HUF_swapNodes(&arr[i + 1], &arr[high]);
|
||||
return i + 1;
|
||||
}
|
||||
|
||||
/* Classic quicksort by descending with partially iterative calls
|
||||
* to reduce worst case callstack size.
|
||||
*/
|
||||
static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) {
|
||||
int const kInsertionSortThreshold = 8;
|
||||
if (high - low < kInsertionSortThreshold) {
|
||||
HUF_insertionSort(arr, low, high);
|
||||
return;
|
||||
}
|
||||
while (low < high) {
|
||||
int const idx = HUF_quickSortPartition(arr, low, high);
|
||||
if (idx - low < high - idx) {
|
||||
HUF_simpleQuickSort(arr, low, idx - 1);
|
||||
low = idx + 1;
|
||||
} else {
|
||||
HUF_simpleQuickSort(arr, idx + 1, high);
|
||||
high = idx - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* HUF_sort():
|
||||
* Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
|
||||
* This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket.
|
||||
*
|
||||
* @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
|
||||
* Must have (maxSymbolValue + 1) entries.
|
||||
@ -390,44 +547,52 @@ typedef struct {
|
||||
* @param[in] maxSymbolValue Maximum symbol value.
|
||||
* @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
|
||||
*/
|
||||
static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition)
|
||||
{
|
||||
int n;
|
||||
int const maxSymbolValue1 = (int)maxSymbolValue + 1;
|
||||
static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) {
|
||||
U32 n;
|
||||
U32 const maxSymbolValue1 = maxSymbolValue+1;
|
||||
|
||||
/* Compute base and set curr to base.
|
||||
* For symbol s let lowerRank = BIT_highbit32(count[n]+1) and rank = lowerRank + 1.
|
||||
* Then 2^lowerRank <= count[n]+1 <= 2^rank.
|
||||
* For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1.
|
||||
* See HUF_getIndex to see bucketing strategy.
|
||||
* We attribute each symbol to lowerRank's base value, because we want to know where
|
||||
* each rank begins in the output, so for rank R we want to count ranks R+1 and above.
|
||||
*/
|
||||
ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
|
||||
for (n = 0; n < maxSymbolValue1; ++n) {
|
||||
U32 lowerRank = BIT_highbit32(count[n] + 1);
|
||||
U32 lowerRank = HUF_getIndex(count[n]);
|
||||
assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1);
|
||||
rankPosition[lowerRank].base++;
|
||||
}
|
||||
|
||||
assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
|
||||
/* Set up the rankPosition table */
|
||||
for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
|
||||
rankPosition[n-1].base += rankPosition[n].base;
|
||||
rankPosition[n-1].curr = rankPosition[n-1].base;
|
||||
}
|
||||
/* Sort */
|
||||
|
||||
/* Insert each symbol into their appropriate bucket, setting up rankPosition table. */
|
||||
for (n = 0; n < maxSymbolValue1; ++n) {
|
||||
U32 const c = count[n];
|
||||
U32 const r = BIT_highbit32(c+1) + 1;
|
||||
U32 pos = rankPosition[r].curr++;
|
||||
/* Insert into the correct position in the rank.
|
||||
* We have at most 256 symbols, so this insertion should be fine.
|
||||
*/
|
||||
while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) {
|
||||
huffNode[pos] = huffNode[pos-1];
|
||||
pos--;
|
||||
}
|
||||
U32 const r = HUF_getIndex(c) + 1;
|
||||
U32 const pos = rankPosition[r].curr++;
|
||||
assert(pos < maxSymbolValue1);
|
||||
huffNode[pos].count = c;
|
||||
huffNode[pos].byte = (BYTE)n;
|
||||
}
|
||||
}
|
||||
|
||||
/* Sort each bucket. */
|
||||
for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
|
||||
U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base;
|
||||
U32 const bucketStartIdx = rankPosition[n].base;
|
||||
if (bucketSize > 1) {
|
||||
assert(bucketStartIdx < maxSymbolValue1);
|
||||
HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1);
|
||||
}
|
||||
}
|
||||
|
||||
assert(HUF_isSorted(huffNode, maxSymbolValue1));
|
||||
}
|
||||
|
||||
/** HUF_buildCTable_wksp() :
|
||||
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
|
||||
@ -490,6 +655,7 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
|
||||
*/
|
||||
static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
|
||||
{
|
||||
HUF_CElt* const ct = CTable + 1;
|
||||
/* fill result into ctable (val, nbBits) */
|
||||
int n;
|
||||
U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
|
||||
@ -505,20 +671,20 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i
|
||||
min >>= 1;
|
||||
} }
|
||||
for (n=0; n<alphabetSize; n++)
|
||||
CTable[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
|
||||
HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */
|
||||
for (n=0; n<alphabetSize; n++)
|
||||
CTable[n].val = valPerRank[CTable[n].nbBits]++; /* assign value within rank, symbol order */
|
||||
HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */
|
||||
CTable[0] = maxNbBits;
|
||||
}
|
||||
|
||||
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
|
||||
size_t HUF_buildCTable_wksp (HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
|
||||
{
|
||||
HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
|
||||
HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, sizeof(U32));
|
||||
nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
|
||||
nodeElt* const huffNode = huffNode0+1;
|
||||
int nonNullRank;
|
||||
|
||||
/* safety checks */
|
||||
if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
|
||||
if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
|
||||
return ERROR(workSpace_tooSmall);
|
||||
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
|
||||
@ -536,96 +702,334 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo
|
||||
maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
|
||||
if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
|
||||
|
||||
HUF_buildCTableFromTree(tree, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
|
||||
HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
|
||||
|
||||
return maxNbBits;
|
||||
}
|
||||
|
||||
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
|
||||
{
|
||||
HUF_CElt const* ct = CTable + 1;
|
||||
size_t nbBits = 0;
|
||||
int s;
|
||||
for (s = 0; s <= (int)maxSymbolValue; ++s) {
|
||||
nbBits += CTable[s].nbBits * count[s];
|
||||
nbBits += HUF_getNbBits(ct[s]) * count[s];
|
||||
}
|
||||
return nbBits >> 3;
|
||||
}
|
||||
|
||||
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
|
||||
HUF_CElt const* ct = CTable + 1;
|
||||
int bad = 0;
|
||||
int s;
|
||||
for (s = 0; s <= (int)maxSymbolValue; ++s) {
|
||||
bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
|
||||
bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
|
||||
}
|
||||
return !bad;
|
||||
}
|
||||
|
||||
size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
|
||||
|
||||
FORCE_INLINE_TEMPLATE void
|
||||
HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
|
||||
/** HUF_CStream_t:
|
||||
* Huffman uses its own BIT_CStream_t implementation.
|
||||
* There are three major differences from BIT_CStream_t:
|
||||
* 1. HUF_addBits() takes a HUF_CElt (size_t) which is
|
||||
* the pair (nbBits, value) in the format:
|
||||
* format:
|
||||
* - Bits [0, 4) = nbBits
|
||||
* - Bits [4, 64 - nbBits) = 0
|
||||
* - Bits [64 - nbBits, 64) = value
|
||||
* 2. The bitContainer is built from the upper bits and
|
||||
* right shifted. E.g. to add a new value of N bits
|
||||
* you right shift the bitContainer by N, then or in
|
||||
* the new value into the N upper bits.
|
||||
* 3. The bitstream has two bit containers. You can add
|
||||
* bits to the second container and merge them into
|
||||
* the first container.
|
||||
*/
|
||||
|
||||
#define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8)
|
||||
|
||||
typedef struct {
|
||||
size_t bitContainer[2];
|
||||
size_t bitPos[2];
|
||||
|
||||
BYTE* startPtr;
|
||||
BYTE* ptr;
|
||||
BYTE* endPtr;
|
||||
} HUF_CStream_t;
|
||||
|
||||
/**! HUF_initCStream():
|
||||
* Initializes the bitstream.
|
||||
* @returns 0 or an error code.
|
||||
*/
|
||||
static size_t HUF_initCStream(HUF_CStream_t* bitC,
|
||||
void* startPtr, size_t dstCapacity)
|
||||
{
|
||||
BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
|
||||
ZSTD_memset(bitC, 0, sizeof(*bitC));
|
||||
bitC->startPtr = (BYTE*)startPtr;
|
||||
bitC->ptr = bitC->startPtr;
|
||||
bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]);
|
||||
if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HUF_FLUSHBITS(s) BIT_flushBits(s)
|
||||
/*! HUF_addBits():
|
||||
* Adds the symbol stored in HUF_CElt elt to the bitstream.
|
||||
*
|
||||
* @param elt The element we're adding. This is a (nbBits, value) pair.
|
||||
* See the HUF_CStream_t docs for the format.
|
||||
* @param idx Insert into the bitstream at this idx.
|
||||
* @param kFast This is a template parameter. If the bitstream is guaranteed
|
||||
* to have at least 4 unused bits after this call it may be 1,
|
||||
* otherwise it must be 0. HUF_addBits() is faster when fast is set.
|
||||
*/
|
||||
FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast)
|
||||
{
|
||||
assert(idx <= 1);
|
||||
assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX);
|
||||
/* This is efficient on x86-64 with BMI2 because shrx
|
||||
* only reads the low 6 bits of the register. The compiler
|
||||
* knows this and elides the mask. When fast is set,
|
||||
* every operation can use the same value loaded from elt.
|
||||
*/
|
||||
bitC->bitContainer[idx] >>= HUF_getNbBits(elt);
|
||||
bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt);
|
||||
/* We only read the low 8 bits of bitC->bitPos[idx] so it
|
||||
* doesn't matter that the high bits have noise from the value.
|
||||
*/
|
||||
bitC->bitPos[idx] += HUF_getNbBitsFast(elt);
|
||||
assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
||||
/* The last 4-bits of elt are dirty if fast is set,
|
||||
* so we must not be overwriting bits that have already been
|
||||
* inserted into the bit container.
|
||||
*/
|
||||
#if DEBUGLEVEL >= 1
|
||||
{
|
||||
size_t const nbBits = HUF_getNbBits(elt);
|
||||
size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1;
|
||||
(void)dirtyBits;
|
||||
/* Middle bits are 0. */
|
||||
assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
|
||||
/* We didn't overwrite any bits in the bit container. */
|
||||
assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
||||
(void)dirtyBits;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#define HUF_FLUSHBITS_1(stream) \
|
||||
if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
|
||||
FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC)
|
||||
{
|
||||
bitC->bitContainer[1] = 0;
|
||||
bitC->bitPos[1] = 0;
|
||||
}
|
||||
|
||||
/*! HUF_mergeIndex1() :
|
||||
* Merges the bit container @ index 1 into the bit container @ index 0
|
||||
* and zeros the bit container @ index 1.
|
||||
*/
|
||||
FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC)
|
||||
{
|
||||
assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER);
|
||||
bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF);
|
||||
bitC->bitContainer[0] |= bitC->bitContainer[1];
|
||||
bitC->bitPos[0] += bitC->bitPos[1];
|
||||
assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
||||
}
|
||||
|
||||
/*! HUF_flushBits() :
|
||||
* Flushes the bits in the bit container @ index 0.
|
||||
*
|
||||
* @post bitPos will be < 8.
|
||||
* @param kFast If kFast is set then we must know a-priori that
|
||||
* the bit container will not overflow.
|
||||
*/
|
||||
FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast)
|
||||
{
|
||||
/* The upper bits of bitPos are noisy, so we must mask by 0xFF. */
|
||||
size_t const nbBits = bitC->bitPos[0] & 0xFF;
|
||||
size_t const nbBytes = nbBits >> 3;
|
||||
/* The top nbBits bits of bitContainer are the ones we need. */
|
||||
size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits);
|
||||
/* Mask bitPos to account for the bytes we consumed. */
|
||||
bitC->bitPos[0] &= 7;
|
||||
assert(nbBits > 0);
|
||||
assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8);
|
||||
assert(bitC->ptr <= bitC->endPtr);
|
||||
MEM_writeLEST(bitC->ptr, bitContainer);
|
||||
bitC->ptr += nbBytes;
|
||||
assert(!kFast || bitC->ptr <= bitC->endPtr);
|
||||
if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
|
||||
/* bitContainer doesn't need to be modified because the leftover
|
||||
* bits are already the top bitPos bits. And we don't care about
|
||||
* noise in the lower values.
|
||||
*/
|
||||
}
|
||||
|
||||
/*! HUF_endMark()
|
||||
* @returns The Huffman stream end mark: A 1-bit value = 1.
|
||||
*/
|
||||
static HUF_CElt HUF_endMark(void)
|
||||
{
|
||||
HUF_CElt endMark;
|
||||
HUF_setNbBits(&endMark, 1);
|
||||
HUF_setValue(&endMark, 1);
|
||||
return endMark;
|
||||
}
|
||||
|
||||
/*! HUF_closeCStream() :
|
||||
* @return Size of CStream, in bytes,
|
||||
* or 0 if it could not fit into dstBuffer */
|
||||
static size_t HUF_closeCStream(HUF_CStream_t* bitC)
|
||||
{
|
||||
HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0);
|
||||
HUF_flushBits(bitC, /* kFast */ 0);
|
||||
{
|
||||
size_t const nbBits = bitC->bitPos[0] & 0xFF;
|
||||
if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
|
||||
return (bitC->ptr - bitC->startPtr) + (nbBits > 0);
|
||||
}
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE void
|
||||
HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast)
|
||||
{
|
||||
HUF_addBits(bitCPtr, CTable[symbol], idx, fast);
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE void
|
||||
HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC,
|
||||
const BYTE* ip, size_t srcSize,
|
||||
const HUF_CElt* ct,
|
||||
int kUnroll, int kFastFlush, int kLastFast)
|
||||
{
|
||||
/* Join to kUnroll */
|
||||
int n = (int)srcSize;
|
||||
int rem = n % kUnroll;
|
||||
if (rem > 0) {
|
||||
for (; rem > 0; --rem) {
|
||||
HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0);
|
||||
}
|
||||
HUF_flushBits(bitC, kFastFlush);
|
||||
}
|
||||
assert(n % kUnroll == 0);
|
||||
|
||||
/* Join to 2 * kUnroll */
|
||||
if (n % (2 * kUnroll)) {
|
||||
int u;
|
||||
for (u = 1; u < kUnroll; ++u) {
|
||||
HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1);
|
||||
}
|
||||
HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast);
|
||||
HUF_flushBits(bitC, kFastFlush);
|
||||
n -= kUnroll;
|
||||
}
|
||||
assert(n % (2 * kUnroll) == 0);
|
||||
|
||||
for (; n>0; n-= 2 * kUnroll) {
|
||||
/* Encode kUnroll symbols into the bitstream @ index 0. */
|
||||
int u;
|
||||
for (u = 1; u < kUnroll; ++u) {
|
||||
HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1);
|
||||
}
|
||||
HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast);
|
||||
HUF_flushBits(bitC, kFastFlush);
|
||||
/* Encode kUnroll symbols into the bitstream @ index 1.
|
||||
* This allows us to start filling the bit container
|
||||
* without any data dependencies.
|
||||
*/
|
||||
HUF_zeroIndex1(bitC);
|
||||
for (u = 1; u < kUnroll; ++u) {
|
||||
HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1);
|
||||
}
|
||||
HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast);
|
||||
/* Merge bitstream @ index 1 into the bitstream @ index 0 */
|
||||
HUF_mergeIndex1(bitC);
|
||||
HUF_flushBits(bitC, kFastFlush);
|
||||
}
|
||||
assert(n == 0);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a tight upper bound on the output space needed by Huffman
|
||||
* with 8 bytes buffer to handle over-writes. If the output is at least
|
||||
* this large we don't need to do bounds checks during Huffman encoding.
|
||||
*/
|
||||
static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog)
|
||||
{
|
||||
return ((srcSize * tableLog) >> 3) + 8;
|
||||
}
|
||||
|
||||
#define HUF_FLUSHBITS_2(stream) \
|
||||
if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
|
||||
|
||||
FORCE_INLINE_TEMPLATE size_t
|
||||
HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
const HUF_CElt* CTable)
|
||||
{
|
||||
U32 const tableLog = (U32)CTable[0];
|
||||
HUF_CElt const* ct = CTable + 1;
|
||||
const BYTE* ip = (const BYTE*) src;
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
BYTE* const oend = ostart + dstSize;
|
||||
BYTE* op = ostart;
|
||||
size_t n;
|
||||
BIT_CStream_t bitC;
|
||||
HUF_CStream_t bitC;
|
||||
|
||||
/* init */
|
||||
if (dstSize < 8) return 0; /* not enough space to compress */
|
||||
{ size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op));
|
||||
{ size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
|
||||
if (HUF_isError(initErr)) return 0; }
|
||||
|
||||
n = srcSize & ~3; /* join to mod 4 */
|
||||
switch (srcSize & 3)
|
||||
{
|
||||
case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
|
||||
HUF_FLUSHBITS_2(&bitC);
|
||||
/* fall-through */
|
||||
case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
|
||||
HUF_FLUSHBITS_1(&bitC);
|
||||
/* fall-through */
|
||||
case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
|
||||
HUF_FLUSHBITS(&bitC);
|
||||
/* fall-through */
|
||||
case 0 : /* fall-through */
|
||||
default: break;
|
||||
if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0);
|
||||
else {
|
||||
if (MEM_32bits()) {
|
||||
switch (tableLog) {
|
||||
case 11:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0);
|
||||
break;
|
||||
case 10: ZSTD_FALLTHROUGH;
|
||||
case 9: ZSTD_FALLTHROUGH;
|
||||
case 8:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1);
|
||||
break;
|
||||
case 7: ZSTD_FALLTHROUGH;
|
||||
default:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (tableLog) {
|
||||
case 11:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0);
|
||||
break;
|
||||
case 10:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1);
|
||||
break;
|
||||
case 9:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0);
|
||||
break;
|
||||
case 8:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0);
|
||||
break;
|
||||
case 7:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0);
|
||||
break;
|
||||
case 6: ZSTD_FALLTHROUGH;
|
||||
default:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
assert(bitC.ptr <= bitC.endPtr);
|
||||
|
||||
for (; n>0; n-=4) { /* note : n&3==0 at this stage */
|
||||
HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
|
||||
HUF_FLUSHBITS_1(&bitC);
|
||||
HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
|
||||
HUF_FLUSHBITS_2(&bitC);
|
||||
HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
|
||||
HUF_FLUSHBITS_1(&bitC);
|
||||
HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
|
||||
HUF_FLUSHBITS(&bitC);
|
||||
}
|
||||
|
||||
return BIT_closeCStream(&bitC);
|
||||
return HUF_closeCStream(&bitC);
|
||||
}
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
|
||||
static TARGET_ATTRIBUTE("bmi2") size_t
|
||||
static BMI2_TARGET_ATTRIBUTE size_t
|
||||
HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
const HUF_CElt* CTable)
|
||||
@ -667,9 +1071,13 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
|
||||
|
||||
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
|
||||
{
|
||||
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
|
||||
return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
|
||||
}
|
||||
|
||||
size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
|
||||
{
|
||||
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
|
||||
}
|
||||
|
||||
static size_t
|
||||
HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
||||
@ -689,8 +1097,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
||||
|
||||
assert(op <= oend);
|
||||
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
|
||||
if (cSize==0) return 0;
|
||||
assert(cSize <= 65535);
|
||||
if (cSize == 0 || cSize > 65535) return 0;
|
||||
MEM_writeLE16(ostart, (U16)cSize);
|
||||
op += cSize;
|
||||
}
|
||||
@ -698,8 +1105,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
||||
ip += segmentSize;
|
||||
assert(op <= oend);
|
||||
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
|
||||
if (cSize==0) return 0;
|
||||
assert(cSize <= 65535);
|
||||
if (cSize == 0 || cSize > 65535) return 0;
|
||||
MEM_writeLE16(ostart+2, (U16)cSize);
|
||||
op += cSize;
|
||||
}
|
||||
@ -707,8 +1113,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
||||
ip += segmentSize;
|
||||
assert(op <= oend);
|
||||
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
|
||||
if (cSize==0) return 0;
|
||||
assert(cSize <= 65535);
|
||||
if (cSize == 0 || cSize > 65535) return 0;
|
||||
MEM_writeLE16(ostart+4, (U16)cSize);
|
||||
op += cSize;
|
||||
}
|
||||
@ -717,7 +1122,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
||||
assert(op <= oend);
|
||||
assert(ip <= iend);
|
||||
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
|
||||
if (cSize==0) return 0;
|
||||
if (cSize == 0 || cSize > 65535) return 0;
|
||||
op += cSize;
|
||||
}
|
||||
|
||||
@ -726,7 +1131,12 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
||||
|
||||
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
|
||||
{
|
||||
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
|
||||
return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
|
||||
}
|
||||
|
||||
size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
|
||||
{
|
||||
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
|
||||
}
|
||||
|
||||
typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
|
||||
@ -750,35 +1160,38 @@ static size_t HUF_compressCTable_internal(
|
||||
|
||||
typedef struct {
|
||||
unsigned count[HUF_SYMBOLVALUE_MAX + 1];
|
||||
HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
|
||||
HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)];
|
||||
union {
|
||||
HUF_buildCTable_wksp_tables buildCTable_wksp;
|
||||
HUF_WriteCTableWksp writeCTable_wksp;
|
||||
U32 hist_wksp[HIST_WKSP_SIZE_U32];
|
||||
} wksps;
|
||||
} HUF_compress_tables_t;
|
||||
|
||||
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
|
||||
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */
|
||||
|
||||
/* HUF_compress_internal() :
|
||||
* `workSpace_align4` must be aligned on 4-bytes boundaries,
|
||||
* and occupies the same space as a table of HUF_WORKSPACE_SIZE_U32 unsigned */
|
||||
* and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
|
||||
static size_t
|
||||
HUF_compress_internal (void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned huffLog,
|
||||
HUF_nbStreams_e nbStreams,
|
||||
void* workSpace_align4, size_t wkspSize,
|
||||
void* workSpace, size_t wkspSize,
|
||||
HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
|
||||
const int bmi2)
|
||||
const int bmi2, unsigned suspectUncompressible)
|
||||
{
|
||||
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace_align4;
|
||||
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, sizeof(size_t));
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
BYTE* const oend = ostart + dstSize;
|
||||
BYTE* op = ostart;
|
||||
|
||||
HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE);
|
||||
assert(((size_t)workSpace_align4 & 3) == 0); /* must be aligned on 4-bytes boundaries */
|
||||
HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
|
||||
|
||||
/* checks & inits */
|
||||
if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
|
||||
if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
|
||||
if (!srcSize) return 0; /* Uncompressed */
|
||||
if (!dstSize) return 0; /* cannot fit anything within dst budget */
|
||||
if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
|
||||
@ -794,8 +1207,23 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
||||
nbStreams, oldHufTable, bmi2);
|
||||
}
|
||||
|
||||
/* If uncompressible data is suspected, do a smaller sampling first */
|
||||
DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);
|
||||
if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
|
||||
size_t largestTotal = 0;
|
||||
{ unsigned maxSymbolValueBegin = maxSymbolValue;
|
||||
CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
|
||||
largestTotal += largestBegin;
|
||||
}
|
||||
{ unsigned maxSymbolValueEnd = maxSymbolValue;
|
||||
CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
|
||||
largestTotal += largestEnd;
|
||||
}
|
||||
if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0; /* heuristic : probably not compressible enough */
|
||||
}
|
||||
|
||||
/* Scan input and build symbol stats */
|
||||
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace_align4, wkspSize) );
|
||||
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) );
|
||||
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
|
||||
if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
|
||||
}
|
||||
@ -820,9 +1248,12 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
||||
&table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
|
||||
CHECK_F(maxBits);
|
||||
huffLog = (U32)maxBits;
|
||||
/* Zero unused symbols in CTable, so we can check it for validity */
|
||||
ZSTD_memset(table->CTable + (maxSymbolValue + 1), 0,
|
||||
sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
|
||||
}
|
||||
/* Zero unused symbols in CTable, so we can check it for validity */
|
||||
{
|
||||
size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue);
|
||||
size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt);
|
||||
ZSTD_memset(table->CTable + ctableSize, 0, unusedSize);
|
||||
}
|
||||
|
||||
/* Write table description header */
|
||||
@ -859,19 +1290,20 @@ size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
|
||||
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
||||
maxSymbolValue, huffLog, HUF_singleStream,
|
||||
workSpace, wkspSize,
|
||||
NULL, NULL, 0, 0 /*bmi2*/);
|
||||
NULL, NULL, 0, 0 /*bmi2*/, 0);
|
||||
}
|
||||
|
||||
size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned huffLog,
|
||||
void* workSpace, size_t wkspSize,
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat,
|
||||
int bmi2, unsigned suspectUncompressible)
|
||||
{
|
||||
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
||||
maxSymbolValue, huffLog, HUF_singleStream,
|
||||
workSpace, wkspSize, hufTable,
|
||||
repeat, preferRepeat, bmi2);
|
||||
repeat, preferRepeat, bmi2, suspectUncompressible);
|
||||
}
|
||||
|
||||
/* HUF_compress4X_repeat():
|
||||
@ -885,22 +1317,23 @@ size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
|
||||
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
||||
maxSymbolValue, huffLog, HUF_fourStreams,
|
||||
workSpace, wkspSize,
|
||||
NULL, NULL, 0, 0 /*bmi2*/);
|
||||
NULL, NULL, 0, 0 /*bmi2*/, 0);
|
||||
}
|
||||
|
||||
/* HUF_compress4X_repeat():
|
||||
* compress input using 4 streams.
|
||||
* consider skipping quickly
|
||||
* re-use an existing huffman compression table */
|
||||
size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned huffLog,
|
||||
void* workSpace, size_t wkspSize,
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible)
|
||||
{
|
||||
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
||||
maxSymbolValue, huffLog, HUF_fourStreams,
|
||||
workSpace, wkspSize,
|
||||
hufTable, repeat, preferRepeat, bmi2);
|
||||
hufTable, repeat, preferRepeat, bmi2, suspectUncompressible);
|
||||
}
|
||||
|
||||
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
|
||||
@ -918,7 +1351,7 @@ size_t HUF_compress1X (void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned huffLog)
|
||||
{
|
||||
unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
|
||||
U64 workSpace[HUF_WORKSPACE_SIZE_U64];
|
||||
return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
|
||||
}
|
||||
|
||||
@ -926,7 +1359,7 @@ size_t HUF_compress2 (void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned huffLog)
|
||||
{
|
||||
unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
|
||||
U64 workSpace[HUF_WORKSPACE_SIZE_U64];
|
||||
return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -63,7 +63,7 @@ typedef struct {
|
||||
} ZSTD_localDict;
|
||||
|
||||
typedef struct {
|
||||
HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)];
|
||||
HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)];
|
||||
HUF_repeat repeatMode;
|
||||
} ZSTD_hufCTables_t;
|
||||
|
||||
@ -179,7 +179,7 @@ typedef struct {
|
||||
U32 offCodeSumBasePrice; /* to compare to log2(offreq) */
|
||||
ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */
|
||||
const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */
|
||||
ZSTD_literalCompressionMode_e literalCompressionMode;
|
||||
ZSTD_paramSwitch_e literalCompressionMode;
|
||||
} optState_t;
|
||||
|
||||
typedef struct {
|
||||
@ -199,6 +199,8 @@ typedef struct {
|
||||
*/
|
||||
} ZSTD_window_t;
|
||||
|
||||
#define ZSTD_WINDOW_START_INDEX 2
|
||||
|
||||
typedef struct ZSTD_matchState_t ZSTD_matchState_t;
|
||||
|
||||
#define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */
|
||||
@ -264,7 +266,7 @@ typedef struct {
|
||||
} ldmState_t;
|
||||
|
||||
typedef struct {
|
||||
U32 enableLdm; /* 1 if enable long distance matching */
|
||||
ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */
|
||||
U32 hashLog; /* Log size of hashTable */
|
||||
U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */
|
||||
U32 minMatchLength; /* Minimum match length */
|
||||
@ -295,7 +297,7 @@ struct ZSTD_CCtx_params_s {
|
||||
* There is no guarantee that hint is close to actual source size */
|
||||
|
||||
ZSTD_dictAttachPref_e attachDictPref;
|
||||
ZSTD_literalCompressionMode_e literalCompressionMode;
|
||||
ZSTD_paramSwitch_e literalCompressionMode;
|
||||
|
||||
/* Multithreading: used to pass parameters to mtctx */
|
||||
int nbWorkers;
|
||||
@ -318,10 +320,10 @@ struct ZSTD_CCtx_params_s {
|
||||
int validateSequences;
|
||||
|
||||
/* Block splitting */
|
||||
int splitBlocks;
|
||||
ZSTD_paramSwitch_e useBlockSplitter;
|
||||
|
||||
/* Param for deciding whether to use row-based matchfinder */
|
||||
ZSTD_useRowMatchFinderMode_e useRowMatchFinder;
|
||||
ZSTD_paramSwitch_e useRowMatchFinder;
|
||||
|
||||
/* Always load a dictionary in ext-dict mode (not prefix mode)? */
|
||||
int deterministicRefPrefix;
|
||||
@ -343,6 +345,22 @@ typedef enum {
|
||||
ZSTDb_buffered
|
||||
} ZSTD_buffered_policy_e;
|
||||
|
||||
/**
|
||||
* Struct that contains all elements of block splitter that should be allocated
|
||||
* in a wksp.
|
||||
*/
|
||||
#define ZSTD_MAX_NB_BLOCK_SPLITS 196
|
||||
typedef struct {
|
||||
seqStore_t fullSeqStoreChunk;
|
||||
seqStore_t firstHalfSeqStore;
|
||||
seqStore_t secondHalfSeqStore;
|
||||
seqStore_t currSeqStore;
|
||||
seqStore_t nextSeqStore;
|
||||
|
||||
U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS];
|
||||
ZSTD_entropyCTablesMetadata_t entropyMetadata;
|
||||
} ZSTD_blockSplitCtx;
|
||||
|
||||
struct ZSTD_CCtx_s {
|
||||
ZSTD_compressionStage_e stage;
|
||||
int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
|
||||
@ -374,7 +392,7 @@ struct ZSTD_CCtx_s {
|
||||
ZSTD_blockState_t blockState;
|
||||
U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
|
||||
|
||||
/* Wether we are streaming or not */
|
||||
/* Whether we are streaming or not */
|
||||
ZSTD_buffered_policy_e bufferedPolicy;
|
||||
|
||||
/* streaming */
|
||||
@ -408,6 +426,9 @@ struct ZSTD_CCtx_s {
|
||||
#if ZSTD_TRACE
|
||||
ZSTD_TraceCtx traceCtx;
|
||||
#endif
|
||||
|
||||
/* Workspace for block splitter */
|
||||
ZSTD_blockSplitCtx blockSplitCtx;
|
||||
};
|
||||
|
||||
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
|
||||
@ -442,7 +463,7 @@ typedef enum {
|
||||
typedef size_t (*ZSTD_blockCompressor) (
|
||||
ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
|
||||
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
|
||||
|
||||
|
||||
MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
|
||||
@ -549,17 +570,17 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
|
||||
return (srcSize >> minlog) + 2;
|
||||
}
|
||||
|
||||
MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
|
||||
MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams)
|
||||
{
|
||||
switch (cctxParams->literalCompressionMode) {
|
||||
case ZSTD_lcm_huffman:
|
||||
case ZSTD_ps_enable:
|
||||
return 0;
|
||||
case ZSTD_lcm_uncompressed:
|
||||
case ZSTD_ps_disable:
|
||||
return 1;
|
||||
default:
|
||||
assert(0 /* impossible: pre-validated */);
|
||||
/* fall-through */
|
||||
case ZSTD_lcm_auto:
|
||||
ZSTD_FALLTHROUGH;
|
||||
case ZSTD_ps_auto:
|
||||
return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
|
||||
}
|
||||
}
|
||||
@ -651,8 +672,14 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
|
||||
# if STATIC_BMI2
|
||||
return _tzcnt_u64(val) >> 3;
|
||||
# else
|
||||
unsigned long r = 0;
|
||||
return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
|
||||
if (val != 0) {
|
||||
unsigned long r;
|
||||
_BitScanForward64(&r, (U64)val);
|
||||
return (unsigned)(r >> 3);
|
||||
} else {
|
||||
/* Should not reach this code path */
|
||||
__assume(0);
|
||||
}
|
||||
# endif
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
||||
return (__builtin_ctzll((U64)val) >> 3);
|
||||
@ -669,8 +696,14 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
|
||||
# endif
|
||||
} else { /* 32 bits */
|
||||
# if defined(_MSC_VER)
|
||||
unsigned long r=0;
|
||||
return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0;
|
||||
if (val != 0) {
|
||||
unsigned long r;
|
||||
_BitScanForward(&r, (U32)val);
|
||||
return (unsigned)(r >> 3);
|
||||
} else {
|
||||
/* Should not reach this code path */
|
||||
__assume(0);
|
||||
}
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
||||
return (__builtin_ctz((U32)val) >> 3);
|
||||
# else
|
||||
@ -687,8 +720,14 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
|
||||
# if STATIC_BMI2
|
||||
return _lzcnt_u64(val) >> 3;
|
||||
# else
|
||||
unsigned long r = 0;
|
||||
return _BitScanReverse64(&r, (U64)val) ? (unsigned)(r >> 3) : 0;
|
||||
if (val != 0) {
|
||||
unsigned long r;
|
||||
_BitScanReverse64(&r, (U64)val);
|
||||
return (unsigned)(r >> 3);
|
||||
} else {
|
||||
/* Should not reach this code path */
|
||||
__assume(0);
|
||||
}
|
||||
# endif
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
||||
return (__builtin_clzll(val) >> 3);
|
||||
@ -702,8 +741,14 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
|
||||
# endif
|
||||
} else { /* 32 bits */
|
||||
# if defined(_MSC_VER)
|
||||
unsigned long r = 0;
|
||||
return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0;
|
||||
if (val != 0) {
|
||||
unsigned long r;
|
||||
_BitScanReverse(&r, (unsigned long)val);
|
||||
return (unsigned)(r >> 3);
|
||||
} else {
|
||||
/* Should not reach this code path */
|
||||
__assume(0);
|
||||
}
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
||||
return (__builtin_clz((U32)val) >> 3);
|
||||
# else
|
||||
@ -884,9 +929,9 @@ MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
|
||||
|
||||
MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
|
||||
{
|
||||
return window.dictLimit == 1 &&
|
||||
window.lowLimit == 1 &&
|
||||
(window.nextSrc - window.base) == 1;
|
||||
return window.dictLimit == ZSTD_WINDOW_START_INDEX &&
|
||||
window.lowLimit == ZSTD_WINDOW_START_INDEX &&
|
||||
(window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -937,7 +982,9 @@ MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
|
||||
{
|
||||
U32 const cycleSize = 1u << cycleLog;
|
||||
U32 const curr = (U32)((BYTE const*)src - window.base);
|
||||
U32 const minIndexToOverflowCorrect = cycleSize + MAX(maxDist, cycleSize);
|
||||
U32 const minIndexToOverflowCorrect = cycleSize
|
||||
+ MAX(maxDist, cycleSize)
|
||||
+ ZSTD_WINDOW_START_INDEX;
|
||||
|
||||
/* Adjust the min index to backoff the overflow correction frequency,
|
||||
* so we don't waste too much CPU in overflow correction. If this
|
||||
@ -1012,10 +1059,14 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
|
||||
U32 const cycleSize = 1u << cycleLog;
|
||||
U32 const cycleMask = cycleSize - 1;
|
||||
U32 const curr = (U32)((BYTE const*)src - window->base);
|
||||
U32 const currentCycle0 = curr & cycleMask;
|
||||
/* Exclude zero so that newCurrent - maxDist >= 1. */
|
||||
U32 const currentCycle1 = currentCycle0 == 0 ? cycleSize : currentCycle0;
|
||||
U32 const newCurrent = currentCycle1 + MAX(maxDist, cycleSize);
|
||||
U32 const currentCycle = curr & cycleMask;
|
||||
/* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */
|
||||
U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX
|
||||
? MAX(cycleSize, ZSTD_WINDOW_START_INDEX)
|
||||
: 0;
|
||||
U32 const newCurrent = currentCycle
|
||||
+ currentCycleCorrection
|
||||
+ MAX(maxDist, cycleSize);
|
||||
U32 const correction = curr - newCurrent;
|
||||
/* maxDist must be a power of two so that:
|
||||
* (newCurrent & cycleMask) == (curr & cycleMask)
|
||||
@ -1031,14 +1082,20 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
|
||||
|
||||
window->base += correction;
|
||||
window->dictBase += correction;
|
||||
if (window->lowLimit <= correction) window->lowLimit = 1;
|
||||
else window->lowLimit -= correction;
|
||||
if (window->dictLimit <= correction) window->dictLimit = 1;
|
||||
else window->dictLimit -= correction;
|
||||
if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) {
|
||||
window->lowLimit = ZSTD_WINDOW_START_INDEX;
|
||||
} else {
|
||||
window->lowLimit -= correction;
|
||||
}
|
||||
if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) {
|
||||
window->dictLimit = ZSTD_WINDOW_START_INDEX;
|
||||
} else {
|
||||
window->dictLimit -= correction;
|
||||
}
|
||||
|
||||
/* Ensure we can still reference the full window. */
|
||||
assert(newCurrent >= maxDist);
|
||||
assert(newCurrent - maxDist >= 1);
|
||||
assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX);
|
||||
/* Ensure that lowLimit and dictLimit didn't underflow. */
|
||||
assert(window->lowLimit <= newCurrent);
|
||||
assert(window->dictLimit <= newCurrent);
|
||||
@ -1149,11 +1206,12 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window,
|
||||
|
||||
MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
|
||||
ZSTD_memset(window, 0, sizeof(*window));
|
||||
window->base = (BYTE const*)"";
|
||||
window->dictBase = (BYTE const*)"";
|
||||
window->dictLimit = 1; /* start from 1, so that 1st position is valid */
|
||||
window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */
|
||||
window->nextSrc = window->base + 1; /* see issue #1241 */
|
||||
window->base = (BYTE const*)" ";
|
||||
window->dictBase = (BYTE const*)" ";
|
||||
ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */
|
||||
window->dictLimit = ZSTD_WINDOW_START_INDEX; /* start from >0, so that 1st position is valid */
|
||||
window->lowLimit = ZSTD_WINDOW_START_INDEX; /* it ensures first and later CCtx usages compress the same */
|
||||
window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX; /* see issue #1241 */
|
||||
window->nbOverflowCorrections = 0;
|
||||
}
|
||||
|
||||
@ -1206,15 +1264,15 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
|
||||
*/
|
||||
MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
|
||||
{
|
||||
U32 const maxDistance = 1U << windowLog;
|
||||
U32 const lowestValid = ms->window.lowLimit;
|
||||
U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
|
||||
U32 const isDictionary = (ms->loadedDictEnd != 0);
|
||||
U32 const maxDistance = 1U << windowLog;
|
||||
U32 const lowestValid = ms->window.lowLimit;
|
||||
U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
|
||||
U32 const isDictionary = (ms->loadedDictEnd != 0);
|
||||
/* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
|
||||
* is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
|
||||
* valid for the entire block. So this check is sufficient to find the lowest valid match index.
|
||||
*/
|
||||
U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
|
||||
U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
|
||||
return matchLowest;
|
||||
}
|
||||
|
||||
|
@ -73,7 +73,8 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
void* entropyWorkspace, size_t entropyWorkspaceSize,
|
||||
const int bmi2)
|
||||
const int bmi2,
|
||||
unsigned suspectUncompressible)
|
||||
{
|
||||
size_t const minGain = ZSTD_minGain(srcSize, strategy);
|
||||
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
|
||||
@ -105,11 +106,11 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
||||
HUF_compress1X_repeat(
|
||||
ostart+lhSize, dstCapacity-lhSize, src, srcSize,
|
||||
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
|
||||
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
|
||||
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible) :
|
||||
HUF_compress4X_repeat(
|
||||
ostart+lhSize, dstCapacity-lhSize, src, srcSize,
|
||||
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
|
||||
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
|
||||
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible);
|
||||
if (repeat != HUF_repeat_none) {
|
||||
/* reused the existing table */
|
||||
DEBUGLOG(5, "Reusing previous huffman table");
|
||||
|
@ -18,12 +18,14 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
|
||||
|
||||
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
|
||||
|
||||
/* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
|
||||
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
||||
ZSTD_hufCTables_t* nextHuf,
|
||||
ZSTD_strategy strategy, int disableLiteralCompression,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
void* entropyWorkspace, size_t entropyWorkspaceSize,
|
||||
const int bmi2);
|
||||
const int bmi2,
|
||||
unsigned suspectUncompressible);
|
||||
|
||||
#endif /* ZSTD_COMPRESS_LITERALS_H */
|
||||
|
@ -275,10 +275,11 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
|
||||
assert(nbSeq_1 > 1);
|
||||
assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
|
||||
(void)entropyWorkspaceSize;
|
||||
FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
|
||||
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog); /* overflow protected */
|
||||
FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "FSE_normalizeCount failed");
|
||||
assert(oend >= op);
|
||||
{ size_t const NCountSize = FSE_writeNCount(op, (size_t)(oend - op), wksp->norm, max, tableLog); /* overflow protected */
|
||||
FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
|
||||
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "");
|
||||
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "FSE_buildCTable_wksp failed");
|
||||
return NCountSize;
|
||||
}
|
||||
}
|
||||
@ -398,7 +399,7 @@ ZSTD_encodeSequences_default(
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
|
||||
static TARGET_ATTRIBUTE("bmi2") size_t
|
||||
static BMI2_TARGET_ATTRIBUTE size_t
|
||||
ZSTD_encodeSequences_bmi2(
|
||||
void* dst, size_t dstCapacity,
|
||||
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
||||
|
@ -132,6 +132,7 @@ static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef*
|
||||
const seqDef* sp = sstart;
|
||||
size_t matchLengthSum = 0;
|
||||
size_t litLengthSum = 0;
|
||||
(void)(litLengthSum); /* suppress unused variable warning on some environments */
|
||||
while (send-sp > 0) {
|
||||
ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
|
||||
litLengthSum += seqLen.litLength;
|
||||
@ -474,7 +475,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
||||
/* I think there is an optimization opportunity here.
|
||||
* Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
|
||||
* since it recalculates estimate from scratch.
|
||||
* For example, it would recount literal distribution and symbol codes everytime.
|
||||
* For example, it would recount literal distribution and symbol codes every time.
|
||||
*/
|
||||
cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
|
||||
&nextCBlock->entropy, entropyMetadata,
|
||||
|
@ -219,7 +219,7 @@ MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
|
||||
MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
|
||||
/* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes
|
||||
* to align the beginning of tables section, as well as another n_2=[0, 63] bytes
|
||||
* to align the beginning of the aligned secion.
|
||||
* to align the beginning of the aligned section.
|
||||
*
|
||||
* n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and
|
||||
* aligneds being sized in multiples of 64 bytes.
|
||||
|
@ -48,10 +48,216 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
||||
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
size_t ZSTD_compressBlock_doubleFast_noDict_generic(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize, U32 const mls /* template */)
|
||||
{
|
||||
ZSTD_compressionParameters const* cParams = &ms->cParams;
|
||||
U32* const hashLong = ms->hashTable;
|
||||
const U32 hBitsL = cParams->hashLog;
|
||||
U32* const hashSmall = ms->chainTable;
|
||||
const U32 hBitsS = cParams->chainLog;
|
||||
const BYTE* const base = ms->window.base;
|
||||
const BYTE* const istart = (const BYTE*)src;
|
||||
const BYTE* anchor = istart;
|
||||
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
||||
/* presumes that, if there is a dictionary, it must be using Attach mode */
|
||||
const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
|
||||
const BYTE* const prefixLowest = base + prefixLowestIndex;
|
||||
const BYTE* const iend = istart + srcSize;
|
||||
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
||||
U32 offset_1=rep[0], offset_2=rep[1];
|
||||
U32 offsetSaved = 0;
|
||||
|
||||
size_t mLength;
|
||||
U32 offset;
|
||||
U32 curr;
|
||||
|
||||
/* how many positions to search before increasing step size */
|
||||
const size_t kStepIncr = 1 << kSearchStrength;
|
||||
/* the position at which to increment the step size if no match is found */
|
||||
const BYTE* nextStep;
|
||||
size_t step; /* the current step size */
|
||||
|
||||
size_t hl0; /* the long hash at ip */
|
||||
size_t hl1; /* the long hash at ip1 */
|
||||
|
||||
U32 idxl0; /* the long match index for ip */
|
||||
U32 idxl1; /* the long match index for ip1 */
|
||||
|
||||
const BYTE* matchl0; /* the long match for ip */
|
||||
const BYTE* matchs0; /* the short match for ip */
|
||||
const BYTE* matchl1; /* the long match for ip1 */
|
||||
|
||||
const BYTE* ip = istart; /* the current position */
|
||||
const BYTE* ip1; /* the next position */
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic");
|
||||
|
||||
/* init */
|
||||
ip += ((ip - prefixLowest) == 0);
|
||||
{
|
||||
U32 const current = (U32)(ip - base);
|
||||
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
|
||||
U32 const maxRep = current - windowLow;
|
||||
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
|
||||
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
||||
}
|
||||
|
||||
/* Outer Loop: one iteration per match found and stored */
|
||||
while (1) {
|
||||
step = 1;
|
||||
nextStep = ip + kStepIncr;
|
||||
ip1 = ip + step;
|
||||
|
||||
if (ip1 > ilimit) {
|
||||
goto _cleanup;
|
||||
}
|
||||
|
||||
hl0 = ZSTD_hashPtr(ip, hBitsL, 8);
|
||||
idxl0 = hashLong[hl0];
|
||||
matchl0 = base + idxl0;
|
||||
|
||||
/* Inner Loop: one iteration per search / position */
|
||||
do {
|
||||
const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls);
|
||||
const U32 idxs0 = hashSmall[hs0];
|
||||
curr = (U32)(ip-base);
|
||||
matchs0 = base + idxs0;
|
||||
|
||||
hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */
|
||||
|
||||
/* check noDict repcode */
|
||||
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
|
||||
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
|
||||
goto _match_stored;
|
||||
}
|
||||
|
||||
hl1 = ZSTD_hashPtr(ip1, hBitsL, 8);
|
||||
|
||||
if (idxl0 > prefixLowestIndex) {
|
||||
/* check prefix long match */
|
||||
if (MEM_read64(matchl0) == MEM_read64(ip)) {
|
||||
mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;
|
||||
offset = (U32)(ip-matchl0);
|
||||
while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
}
|
||||
}
|
||||
|
||||
idxl1 = hashLong[hl1];
|
||||
matchl1 = base + idxl1;
|
||||
|
||||
if (idxs0 > prefixLowestIndex) {
|
||||
/* check prefix short match */
|
||||
if (MEM_read32(matchs0) == MEM_read32(ip)) {
|
||||
goto _search_next_long;
|
||||
}
|
||||
}
|
||||
|
||||
if (ip1 >= nextStep) {
|
||||
PREFETCH_L1(ip1 + 64);
|
||||
PREFETCH_L1(ip1 + 128);
|
||||
step++;
|
||||
nextStep += kStepIncr;
|
||||
}
|
||||
ip = ip1;
|
||||
ip1 += step;
|
||||
|
||||
hl0 = hl1;
|
||||
idxl0 = idxl1;
|
||||
matchl0 = matchl1;
|
||||
#if defined(__aarch64__)
|
||||
PREFETCH_L1(ip+256);
|
||||
#endif
|
||||
} while (ip1 <= ilimit);
|
||||
|
||||
_cleanup:
|
||||
/* save reps for next block */
|
||||
rep[0] = offset_1 ? offset_1 : offsetSaved;
|
||||
rep[1] = offset_2 ? offset_2 : offsetSaved;
|
||||
|
||||
/* Return the last literals size */
|
||||
return (size_t)(iend - anchor);
|
||||
|
||||
_search_next_long:
|
||||
|
||||
/* check prefix long +1 match */
|
||||
if (idxl1 > prefixLowestIndex) {
|
||||
if (MEM_read64(matchl1) == MEM_read64(ip1)) {
|
||||
ip = ip1;
|
||||
mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8;
|
||||
offset = (U32)(ip-matchl1);
|
||||
while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
}
|
||||
}
|
||||
|
||||
/* if no long +1 match, explore the short match we found */
|
||||
mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;
|
||||
offset = (U32)(ip - matchs0);
|
||||
while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */
|
||||
|
||||
/* fall-through */
|
||||
|
||||
_match_found: /* requires ip, offset, mLength */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
|
||||
if (step < 4) {
|
||||
/* It is unsafe to write this value back to the hashtable when ip1 is
|
||||
* greater than or equal to the new ip we will have after we're done
|
||||
* processing this match. Rather than perform that test directly
|
||||
* (ip1 >= ip + mLength), which costs speed in practice, we do a simpler
|
||||
* more predictable test. The minmatch even if we take a short match is
|
||||
* 4 bytes, so as long as step, the distance between ip and ip1
|
||||
* (initially) is less than 4, we know ip1 < new ip. */
|
||||
hashLong[hl1] = (U32)(ip1 - base);
|
||||
}
|
||||
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
|
||||
_match_stored:
|
||||
/* match found */
|
||||
ip += mLength;
|
||||
anchor = ip;
|
||||
|
||||
if (ip <= ilimit) {
|
||||
/* Complementary insertion */
|
||||
/* done after iLimit test, as candidates could be > iend-8 */
|
||||
{ U32 const indexToInsert = curr+2;
|
||||
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
|
||||
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
|
||||
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
|
||||
hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
|
||||
}
|
||||
|
||||
/* check immediate repcode */
|
||||
while ( (ip <= ilimit)
|
||||
&& ( (offset_2>0)
|
||||
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
|
||||
/* store sequence */
|
||||
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
|
||||
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
|
||||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
|
||||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
|
||||
ip += rLength;
|
||||
anchor = ip;
|
||||
continue; /* faster when present ... (?) */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize,
|
||||
U32 const mls /* template */, ZSTD_dictMode_e const dictMode)
|
||||
U32 const mls /* template */)
|
||||
{
|
||||
ZSTD_compressionParameters const* cParams = &ms->cParams;
|
||||
U32* const hashLong = ms->hashTable;
|
||||
@ -72,54 +278,30 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
U32 offsetSaved = 0;
|
||||
|
||||
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
||||
const ZSTD_compressionParameters* const dictCParams =
|
||||
dictMode == ZSTD_dictMatchState ?
|
||||
&dms->cParams : NULL;
|
||||
const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ?
|
||||
dms->hashTable : NULL;
|
||||
const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ?
|
||||
dms->chainTable : NULL;
|
||||
const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ?
|
||||
dms->window.dictLimit : 0;
|
||||
const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
|
||||
dms->window.base : NULL;
|
||||
const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ?
|
||||
dictBase + dictStartIndex : NULL;
|
||||
const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
|
||||
dms->window.nextSrc : NULL;
|
||||
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
|
||||
prefixLowestIndex - (U32)(dictEnd - dictBase) :
|
||||
0;
|
||||
const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ?
|
||||
dictCParams->hashLog : hBitsL;
|
||||
const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ?
|
||||
dictCParams->chainLog : hBitsS;
|
||||
const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
|
||||
const U32* const dictHashLong = dms->hashTable;
|
||||
const U32* const dictHashSmall = dms->chainTable;
|
||||
const U32 dictStartIndex = dms->window.dictLimit;
|
||||
const BYTE* const dictBase = dms->window.base;
|
||||
const BYTE* const dictStart = dictBase + dictStartIndex;
|
||||
const BYTE* const dictEnd = dms->window.nextSrc;
|
||||
const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase);
|
||||
const U32 dictHBitsL = dictCParams->hashLog;
|
||||
const U32 dictHBitsS = dictCParams->chainLog;
|
||||
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
|
||||
|
||||
assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
|
||||
|
||||
/* if a dictionary is attached, it must be within window range */
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
|
||||
}
|
||||
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
|
||||
|
||||
/* init */
|
||||
ip += (dictAndPrefixLength == 0);
|
||||
if (dictMode == ZSTD_noDict) {
|
||||
U32 const curr = (U32)(ip - base);
|
||||
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
|
||||
U32 const maxRep = curr - windowLow;
|
||||
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
|
||||
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
||||
}
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
/* dictMatchState repCode checks don't currently handle repCode == 0
|
||||
* disabling. */
|
||||
assert(offset_1 <= dictAndPrefixLength);
|
||||
assert(offset_2 <= dictAndPrefixLength);
|
||||
}
|
||||
|
||||
/* dictMatchState repCode checks don't currently handle repCode == 0
|
||||
* disabling. */
|
||||
assert(offset_1 <= dictAndPrefixLength);
|
||||
assert(offset_2 <= dictAndPrefixLength);
|
||||
|
||||
/* Main Search Loop */
|
||||
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
|
||||
@ -135,15 +317,13 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
const BYTE* matchLong = base + matchIndexL;
|
||||
const BYTE* match = base + matchIndexS;
|
||||
const U32 repIndex = curr + 1 - offset_1;
|
||||
const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
|
||||
&& repIndex < prefixLowestIndex) ?
|
||||
const BYTE* repMatch = (repIndex < prefixLowestIndex) ?
|
||||
dictBase + (repIndex - dictIndexDelta) :
|
||||
base + repIndex;
|
||||
hashLong[h2] = hashSmall[h] = curr; /* update hash tables */
|
||||
|
||||
/* check dictMatchState repcode */
|
||||
if (dictMode == ZSTD_dictMatchState
|
||||
&& ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
|
||||
/* check repcode */
|
||||
if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
|
||||
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
||||
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
|
||||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
|
||||
@ -152,15 +332,6 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
goto _match_stored;
|
||||
}
|
||||
|
||||
/* check noDict repcode */
|
||||
if ( dictMode == ZSTD_noDict
|
||||
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
|
||||
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
|
||||
goto _match_stored;
|
||||
}
|
||||
|
||||
if (matchIndexL > prefixLowestIndex) {
|
||||
/* check prefix long match */
|
||||
if (MEM_read64(matchLong) == MEM_read64(ip)) {
|
||||
@ -169,7 +340,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
}
|
||||
} else if (dictMode == ZSTD_dictMatchState) {
|
||||
} else {
|
||||
/* check dictMatchState long match */
|
||||
U32 const dictMatchIndexL = dictHashLong[dictHL];
|
||||
const BYTE* dictMatchL = dictBase + dictMatchIndexL;
|
||||
@ -187,7 +358,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
if (MEM_read32(match) == MEM_read32(ip)) {
|
||||
goto _search_next_long;
|
||||
}
|
||||
} else if (dictMode == ZSTD_dictMatchState) {
|
||||
} else {
|
||||
/* check dictMatchState short match */
|
||||
U32 const dictMatchIndexS = dictHashSmall[dictHS];
|
||||
match = dictBase + dictMatchIndexS;
|
||||
@ -220,7 +391,7 @@ _search_next_long:
|
||||
while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
}
|
||||
} else if (dictMode == ZSTD_dictMatchState) {
|
||||
} else {
|
||||
/* check dict long +1 match */
|
||||
U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
|
||||
const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
|
||||
@ -234,7 +405,7 @@ _search_next_long:
|
||||
} } }
|
||||
|
||||
/* if no long +1 match, explore the short match we found */
|
||||
if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
|
||||
if (matchIndexS < prefixLowestIndex) {
|
||||
mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
|
||||
offset = (U32)(curr - matchIndexS);
|
||||
while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
||||
@ -244,8 +415,6 @@ _search_next_long:
|
||||
while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
||||
}
|
||||
|
||||
/* fall-through */
|
||||
|
||||
_match_found:
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
@ -268,43 +437,27 @@ _match_stored:
|
||||
}
|
||||
|
||||
/* check immediate repcode */
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
while (ip <= ilimit) {
|
||||
U32 const current2 = (U32)(ip-base);
|
||||
U32 const repIndex2 = current2 - offset_2;
|
||||
const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
|
||||
&& repIndex2 < prefixLowestIndex ?
|
||||
dictBase + repIndex2 - dictIndexDelta :
|
||||
base + repIndex2;
|
||||
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
|
||||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
||||
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
|
||||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
|
||||
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
|
||||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
|
||||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
|
||||
ip += repLength2;
|
||||
anchor = ip;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
} }
|
||||
|
||||
if (dictMode == ZSTD_noDict) {
|
||||
while ( (ip <= ilimit)
|
||||
&& ( (offset_2>0)
|
||||
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
|
||||
/* store sequence */
|
||||
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
|
||||
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
|
||||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
|
||||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
|
||||
ip += rLength;
|
||||
while (ip <= ilimit) {
|
||||
U32 const current2 = (U32)(ip-base);
|
||||
U32 const repIndex2 = current2 - offset_2;
|
||||
const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?
|
||||
dictBase + repIndex2 - dictIndexDelta :
|
||||
base + repIndex2;
|
||||
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
|
||||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
||||
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
|
||||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
|
||||
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
|
||||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
|
||||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
|
||||
ip += repLength2;
|
||||
anchor = ip;
|
||||
continue; /* faster when present ... (?) */
|
||||
} } }
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
} /* while (ip < ilimit) */
|
||||
|
||||
/* save reps for next block */
|
||||
@ -315,6 +468,24 @@ _match_stored:
|
||||
return (size_t)(iend - anchor);
|
||||
}
|
||||
|
||||
#define ZSTD_GEN_DFAST_FN(dictMode, mls) \
|
||||
static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
|
||||
void const* src, size_t srcSize) \
|
||||
{ \
|
||||
return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
|
||||
}
|
||||
|
||||
ZSTD_GEN_DFAST_FN(noDict, 4)
|
||||
ZSTD_GEN_DFAST_FN(noDict, 5)
|
||||
ZSTD_GEN_DFAST_FN(noDict, 6)
|
||||
ZSTD_GEN_DFAST_FN(noDict, 7)
|
||||
|
||||
ZSTD_GEN_DFAST_FN(dictMatchState, 4)
|
||||
ZSTD_GEN_DFAST_FN(dictMatchState, 5)
|
||||
ZSTD_GEN_DFAST_FN(dictMatchState, 6)
|
||||
ZSTD_GEN_DFAST_FN(dictMatchState, 7)
|
||||
|
||||
|
||||
size_t ZSTD_compressBlock_doubleFast(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
@ -325,13 +496,13 @@ size_t ZSTD_compressBlock_doubleFast(
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
|
||||
return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
|
||||
return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
|
||||
return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
|
||||
return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);
|
||||
}
|
||||
}
|
||||
|
||||
@ -345,13 +516,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState(
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
|
||||
}
|
||||
}
|
||||
|
||||
@ -387,7 +558,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
|
||||
/* if extDict is invalidated due to maxDistance, switch to "regular" variant */
|
||||
if (prefixStartIndex == dictStartIndex)
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
|
||||
return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);
|
||||
|
||||
/* Search Loop */
|
||||
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
|
||||
@ -409,7 +580,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
|
||||
|
||||
if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
|
||||
& (offset_1 < curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
|
||||
& (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
|
||||
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
||||
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
||||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
|
||||
@ -477,7 +648,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
U32 const repIndex2 = current2 - offset_2;
|
||||
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
|
||||
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
|
||||
& (offset_2 < current2 - dictStartIndex))
|
||||
& (offset_2 <= current2 - dictStartIndex))
|
||||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
||||
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
||||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
||||
@ -500,6 +671,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
return (size_t)(iend - anchor);
|
||||
}
|
||||
|
||||
ZSTD_GEN_DFAST_FN(extDict, 4)
|
||||
ZSTD_GEN_DFAST_FN(extDict, 5)
|
||||
ZSTD_GEN_DFAST_FN(extDict, 6)
|
||||
ZSTD_GEN_DFAST_FN(extDict, 7)
|
||||
|
||||
size_t ZSTD_compressBlock_doubleFast_extDict(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
@ -510,12 +685,12 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
|
||||
return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
|
||||
return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
|
||||
return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
|
||||
return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
|
||||
}
|
||||
}
|
||||
|
@ -43,8 +43,54 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* If you squint hard enough (and ignore repcodes), the search operation at any
|
||||
* given position is broken into 4 stages:
|
||||
*
|
||||
* 1. Hash (map position to hash value via input read)
|
||||
* 2. Lookup (map hash val to index via hashtable read)
|
||||
* 3. Load (map index to value at that position via input read)
|
||||
* 4. Compare
|
||||
*
|
||||
* Each of these steps involves a memory read at an address which is computed
|
||||
* from the previous step. This means these steps must be sequenced and their
|
||||
* latencies are cumulative.
|
||||
*
|
||||
* Rather than do 1->2->3->4 sequentially for a single position before moving
|
||||
* onto the next, this implementation interleaves these operations across the
|
||||
* next few positions:
|
||||
*
|
||||
* R = Repcode Read & Compare
|
||||
* H = Hash
|
||||
* T = Table Lookup
|
||||
* M = Match Read & Compare
|
||||
*
|
||||
* Pos | Time -->
|
||||
* ----+-------------------
|
||||
* N | ... M
|
||||
* N+1 | ... TM
|
||||
* N+2 | R H T M
|
||||
* N+3 | H TM
|
||||
* N+4 | R H T M
|
||||
* N+5 | H ...
|
||||
* N+6 | R ...
|
||||
*
|
||||
* This is very much analogous to the pipelining of execution in a CPU. And just
|
||||
* like a CPU, we have to dump the pipeline when we find a match (i.e., take a
|
||||
* branch).
|
||||
*
|
||||
* When this happens, we throw away our current state, and do the following prep
|
||||
* to re-enter the loop:
|
||||
*
|
||||
* Pos | Time -->
|
||||
* ----+-------------------
|
||||
* N | H T
|
||||
* N+1 | H
|
||||
*
|
||||
* This is also the work we do at the beginning to enter the loop initially.
|
||||
*/
|
||||
FORCE_INLINE_TEMPLATE size_t
|
||||
ZSTD_compressBlock_fast_generic(
|
||||
ZSTD_compressBlock_fast_noDict_generic(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize,
|
||||
U32 const mls)
|
||||
@ -53,135 +99,229 @@ ZSTD_compressBlock_fast_generic(
|
||||
U32* const hashTable = ms->hashTable;
|
||||
U32 const hlog = cParams->hashLog;
|
||||
/* support stepSize of 0 */
|
||||
size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
|
||||
size_t const stepSize = cParams->targetLength + !(cParams->targetLength);
|
||||
const BYTE* const base = ms->window.base;
|
||||
const BYTE* const istart = (const BYTE*)src;
|
||||
/* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */
|
||||
const BYTE* ip0 = istart;
|
||||
const BYTE* ip1;
|
||||
const BYTE* anchor = istart;
|
||||
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
||||
const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
|
||||
const BYTE* const prefixStart = base + prefixStartIndex;
|
||||
const BYTE* const iend = istart + srcSize;
|
||||
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
||||
U32 offset_1=rep[0], offset_2=rep[1];
|
||||
|
||||
const BYTE* anchor = istart;
|
||||
const BYTE* ip0 = istart;
|
||||
const BYTE* ip1;
|
||||
const BYTE* ip2;
|
||||
const BYTE* ip3;
|
||||
U32 current0;
|
||||
|
||||
U32 rep_offset1 = rep[0];
|
||||
U32 rep_offset2 = rep[1];
|
||||
U32 offsetSaved = 0;
|
||||
|
||||
/* init */
|
||||
size_t hash0; /* hash for ip0 */
|
||||
size_t hash1; /* hash for ip1 */
|
||||
U32 idx; /* match idx for ip0 */
|
||||
U32 mval; /* src value at match idx */
|
||||
|
||||
U32 offcode;
|
||||
const BYTE* match0;
|
||||
size_t mLength;
|
||||
|
||||
size_t step;
|
||||
const BYTE* nextStep;
|
||||
const size_t kStepIncr = (1 << (kSearchStrength - 1));
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
|
||||
ip0 += (ip0 == prefixStart);
|
||||
ip1 = ip0 + 1;
|
||||
{ U32 const curr = (U32)(ip0 - base);
|
||||
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
|
||||
U32 const maxRep = curr - windowLow;
|
||||
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
|
||||
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
||||
if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0;
|
||||
if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0;
|
||||
}
|
||||
|
||||
/* Main Search Loop */
|
||||
#ifdef __INTEL_COMPILER
|
||||
/* From intel 'The vector pragma indicates that the loop should be
|
||||
* vectorized if it is legal to do so'. Can be used together with
|
||||
* #pragma ivdep (but have opted to exclude that because intel
|
||||
* warns against using it).*/
|
||||
#pragma vector always
|
||||
#endif
|
||||
while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */
|
||||
size_t mLength;
|
||||
BYTE const* ip2 = ip0 + 2;
|
||||
size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls);
|
||||
U32 const val0 = MEM_read32(ip0);
|
||||
size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls);
|
||||
U32 const val1 = MEM_read32(ip1);
|
||||
U32 const current0 = (U32)(ip0-base);
|
||||
U32 const current1 = (U32)(ip1-base);
|
||||
U32 const matchIndex0 = hashTable[h0];
|
||||
U32 const matchIndex1 = hashTable[h1];
|
||||
BYTE const* repMatch = ip2 - offset_1;
|
||||
const BYTE* match0 = base + matchIndex0;
|
||||
const BYTE* match1 = base + matchIndex1;
|
||||
U32 offcode;
|
||||
/* start each op */
|
||||
_start: /* Requires: ip0 */
|
||||
|
||||
#if defined(__aarch64__)
|
||||
PREFETCH_L1(ip0+256);
|
||||
#endif
|
||||
step = stepSize;
|
||||
nextStep = ip0 + kStepIncr;
|
||||
|
||||
hashTable[h0] = current0; /* update hash table */
|
||||
hashTable[h1] = current1; /* update hash table */
|
||||
/* calculate positions, ip0 - anchor == 0, so we skip step calc */
|
||||
ip1 = ip0 + stepSize;
|
||||
ip2 = ip1 + stepSize;
|
||||
ip3 = ip2 + stepSize;
|
||||
|
||||
assert(ip0 + 1 == ip1);
|
||||
if (ip3 >= ilimit) {
|
||||
goto _cleanup;
|
||||
}
|
||||
|
||||
if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
|
||||
mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0;
|
||||
ip0 = ip2 - mLength;
|
||||
match0 = repMatch - mLength;
|
||||
mLength += 4;
|
||||
hash0 = ZSTD_hashPtr(ip0, hlog, mls);
|
||||
hash1 = ZSTD_hashPtr(ip1, hlog, mls);
|
||||
|
||||
idx = hashTable[hash0];
|
||||
|
||||
do {
|
||||
/* load repcode match for ip[2]*/
|
||||
const U32 rval = MEM_read32(ip2 - rep_offset1);
|
||||
|
||||
/* write back hash table entry */
|
||||
current0 = (U32)(ip0 - base);
|
||||
hashTable[hash0] = current0;
|
||||
|
||||
/* check repcode at ip[2] */
|
||||
if ((MEM_read32(ip2) == rval) & (rep_offset1 > 0)) {
|
||||
ip0 = ip2;
|
||||
match0 = ip0 - rep_offset1;
|
||||
mLength = ip0[-1] == match0[-1];
|
||||
ip0 -= mLength;
|
||||
match0 -= mLength;
|
||||
offcode = 0;
|
||||
mLength += 4;
|
||||
goto _match;
|
||||
}
|
||||
if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) {
|
||||
/* found a regular match */
|
||||
|
||||
/* load match for ip[0] */
|
||||
if (idx >= prefixStartIndex) {
|
||||
mval = MEM_read32(base + idx);
|
||||
} else {
|
||||
mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
|
||||
}
|
||||
|
||||
/* check match at ip[0] */
|
||||
if (MEM_read32(ip0) == mval) {
|
||||
/* found a match! */
|
||||
goto _offset;
|
||||
}
|
||||
if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) {
|
||||
/* found a regular match after one literal */
|
||||
ip0 = ip1;
|
||||
match0 = match1;
|
||||
|
||||
/* lookup ip[1] */
|
||||
idx = hashTable[hash1];
|
||||
|
||||
/* hash ip[2] */
|
||||
hash0 = hash1;
|
||||
hash1 = ZSTD_hashPtr(ip2, hlog, mls);
|
||||
|
||||
/* advance to next positions */
|
||||
ip0 = ip1;
|
||||
ip1 = ip2;
|
||||
ip2 = ip3;
|
||||
|
||||
/* write back hash table entry */
|
||||
current0 = (U32)(ip0 - base);
|
||||
hashTable[hash0] = current0;
|
||||
|
||||
/* load match for ip[0] */
|
||||
if (idx >= prefixStartIndex) {
|
||||
mval = MEM_read32(base + idx);
|
||||
} else {
|
||||
mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
|
||||
}
|
||||
|
||||
/* check match at ip[0] */
|
||||
if (MEM_read32(ip0) == mval) {
|
||||
/* found a match! */
|
||||
goto _offset;
|
||||
}
|
||||
{ size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
|
||||
assert(step >= 2);
|
||||
ip0 += step;
|
||||
ip1 += step;
|
||||
continue;
|
||||
|
||||
/* lookup ip[1] */
|
||||
idx = hashTable[hash1];
|
||||
|
||||
/* hash ip[2] */
|
||||
hash0 = hash1;
|
||||
hash1 = ZSTD_hashPtr(ip2, hlog, mls);
|
||||
|
||||
/* calculate step */
|
||||
if (ip2 >= nextStep) {
|
||||
PREFETCH_L1(ip1 + 64);
|
||||
PREFETCH_L1(ip1 + 128);
|
||||
step++;
|
||||
nextStep += kStepIncr;
|
||||
}
|
||||
_offset: /* Requires: ip0, match0 */
|
||||
/* Compute the offset code */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = (U32)(ip0-match0);
|
||||
offcode = offset_1 + ZSTD_REP_MOVE;
|
||||
mLength = 4;
|
||||
/* Count the backwards match length */
|
||||
while (((ip0>anchor) & (match0>prefixStart))
|
||||
&& (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
|
||||
|
||||
_match: /* Requires: ip0, match0, offcode */
|
||||
/* Count the forward length */
|
||||
mLength += ZSTD_count(ip0+mLength, match0+mLength, iend);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
|
||||
/* match found */
|
||||
ip0 += mLength;
|
||||
anchor = ip0;
|
||||
/* advance to next positions */
|
||||
ip0 = ip1;
|
||||
ip1 = ip2;
|
||||
ip2 = ip2 + step;
|
||||
ip3 = ip2 + step;
|
||||
} while (ip3 < ilimit);
|
||||
|
||||
if (ip0 <= ilimit) {
|
||||
/* Fill Table */
|
||||
assert(base+current0+2 > istart); /* check base overflow */
|
||||
hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
|
||||
hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
|
||||
|
||||
if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */
|
||||
while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
|
||||
/* store sequence */
|
||||
size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
|
||||
{ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
|
||||
hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
|
||||
ip0 += rLength;
|
||||
ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
|
||||
anchor = ip0;
|
||||
continue; /* faster when present (confirmed on gcc-8) ... (?) */
|
||||
} } }
|
||||
ip1 = ip0 + 1;
|
||||
}
|
||||
_cleanup:
|
||||
/* Note that there are probably still a couple positions we could search.
|
||||
* However, it seems to be a meaningful performance hit to try to search
|
||||
* them. So let's not. */
|
||||
|
||||
/* save reps for next block */
|
||||
rep[0] = offset_1 ? offset_1 : offsetSaved;
|
||||
rep[1] = offset_2 ? offset_2 : offsetSaved;
|
||||
rep[0] = rep_offset1 ? rep_offset1 : offsetSaved;
|
||||
rep[1] = rep_offset2 ? rep_offset2 : offsetSaved;
|
||||
|
||||
/* Return the last literals size */
|
||||
return (size_t)(iend - anchor);
|
||||
|
||||
_offset: /* Requires: ip0, idx */
|
||||
|
||||
/* Compute the offset code. */
|
||||
match0 = base + idx;
|
||||
rep_offset2 = rep_offset1;
|
||||
rep_offset1 = (U32)(ip0-match0);
|
||||
offcode = rep_offset1 + ZSTD_REP_MOVE;
|
||||
mLength = 4;
|
||||
|
||||
/* Count the backwards match length. */
|
||||
while (((ip0>anchor) & (match0>prefixStart)) && (ip0[-1] == match0[-1])) {
|
||||
ip0--;
|
||||
match0--;
|
||||
mLength++;
|
||||
}
|
||||
|
||||
_match: /* Requires: ip0, match0, offcode */
|
||||
|
||||
/* Count the forward length. */
|
||||
mLength += ZSTD_count(ip0 + mLength, match0 + mLength, iend);
|
||||
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength - MINMATCH);
|
||||
|
||||
ip0 += mLength;
|
||||
anchor = ip0;
|
||||
|
||||
/* write next hash table entry */
|
||||
if (ip1 < ip0) {
|
||||
hashTable[hash1] = (U32)(ip1 - base);
|
||||
}
|
||||
|
||||
/* Fill table and check for immediate repcode. */
|
||||
if (ip0 <= ilimit) {
|
||||
/* Fill Table */
|
||||
assert(base+current0+2 > istart); /* check base overflow */
|
||||
hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
|
||||
hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
|
||||
|
||||
if (rep_offset2 > 0) { /* rep_offset2==0 means rep_offset2 is invalidated */
|
||||
while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - rep_offset2)) ) {
|
||||
/* store sequence */
|
||||
size_t const rLength = ZSTD_count(ip0+4, ip0+4-rep_offset2, iend) + 4;
|
||||
{ U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */
|
||||
hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
|
||||
ip0 += rLength;
|
||||
ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
|
||||
anchor = ip0;
|
||||
continue; /* faster when present (confirmed on gcc-8) ... (?) */
|
||||
} } }
|
||||
|
||||
goto _start;
|
||||
}
|
||||
|
||||
#define ZSTD_GEN_FAST_FN(dictMode, mls) \
|
||||
static size_t ZSTD_compressBlock_fast_##dictMode##_##mls( \
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
|
||||
void const* src, size_t srcSize) \
|
||||
{ \
|
||||
return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
|
||||
}
|
||||
|
||||
ZSTD_GEN_FAST_FN(noDict, 4)
|
||||
ZSTD_GEN_FAST_FN(noDict, 5)
|
||||
ZSTD_GEN_FAST_FN(noDict, 6)
|
||||
ZSTD_GEN_FAST_FN(noDict, 7)
|
||||
|
||||
size_t ZSTD_compressBlock_fast(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
@ -193,13 +333,13 @@ size_t ZSTD_compressBlock_fast(
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4);
|
||||
return ZSTD_compressBlock_fast_noDict_4(ms, seqStore, rep, src, srcSize);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5);
|
||||
return ZSTD_compressBlock_fast_noDict_5(ms, seqStore, rep, src, srcSize);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6);
|
||||
return ZSTD_compressBlock_fast_noDict_6(ms, seqStore, rep, src, srcSize);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7);
|
||||
return ZSTD_compressBlock_fast_noDict_7(ms, seqStore, rep, src, srcSize);
|
||||
}
|
||||
}
|
||||
|
||||
@ -351,6 +491,12 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
return (size_t)(iend - anchor);
|
||||
}
|
||||
|
||||
|
||||
ZSTD_GEN_FAST_FN(dictMatchState, 4)
|
||||
ZSTD_GEN_FAST_FN(dictMatchState, 5)
|
||||
ZSTD_GEN_FAST_FN(dictMatchState, 6)
|
||||
ZSTD_GEN_FAST_FN(dictMatchState, 7)
|
||||
|
||||
size_t ZSTD_compressBlock_fast_dictMatchState(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize)
|
||||
@ -361,13 +507,13 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4);
|
||||
return ZSTD_compressBlock_fast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5);
|
||||
return ZSTD_compressBlock_fast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6);
|
||||
return ZSTD_compressBlock_fast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7);
|
||||
return ZSTD_compressBlock_fast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
|
||||
}
|
||||
}
|
||||
|
||||
@ -402,7 +548,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
||||
|
||||
/* switch to "regular" variant if extDict is invalidated due to maxDistance */
|
||||
if (prefixStartIndex == dictStartIndex)
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
|
||||
return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize);
|
||||
|
||||
/* Search Loop */
|
||||
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
|
||||
@ -418,7 +564,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
||||
DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
|
||||
|
||||
if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
|
||||
& (offset_1 < curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
|
||||
& (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
|
||||
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
||||
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
||||
size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
|
||||
@ -453,7 +599,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
||||
U32 const current2 = (U32)(ip-base);
|
||||
U32 const repIndex2 = current2 - offset_2;
|
||||
const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
|
||||
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 < curr - dictStartIndex)) /* intentional overflow */
|
||||
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex)) /* intentional overflow */
|
||||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
||||
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
||||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
||||
@ -475,6 +621,10 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
||||
return (size_t)(iend - anchor);
|
||||
}
|
||||
|
||||
ZSTD_GEN_FAST_FN(extDict, 4)
|
||||
ZSTD_GEN_FAST_FN(extDict, 5)
|
||||
ZSTD_GEN_FAST_FN(extDict, 6)
|
||||
ZSTD_GEN_FAST_FN(extDict, 7)
|
||||
|
||||
size_t ZSTD_compressBlock_fast_extDict(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
@ -485,12 +635,12 @@ size_t ZSTD_compressBlock_fast_extDict(
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
|
||||
return ZSTD_compressBlock_fast_extDict_4(ms, seqStore, rep, src, srcSize);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
|
||||
return ZSTD_compressBlock_fast_extDict_5(ms, seqStore, rep, src, srcSize);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
|
||||
return ZSTD_compressBlock_fast_extDict_6(ms, seqStore, rep, src, srcSize);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
|
||||
return ZSTD_compressBlock_fast_extDict_7(ms, seqStore, rep, src, srcSize);
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -159,12 +159,12 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
|
||||
size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
|
||||
size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
|
||||
+ ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
|
||||
return params.enableLdm ? totalSize : 0;
|
||||
return params.enableLdm == ZSTD_ps_enable ? totalSize : 0;
|
||||
}
|
||||
|
||||
size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
|
||||
{
|
||||
return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
|
||||
return params.enableLdm == ZSTD_ps_enable ? (maxChunkSize / params.minMatchLength) : 0;
|
||||
}
|
||||
|
||||
/** ZSTD_ldm_getBucket() :
|
||||
@ -478,7 +478,7 @@ static size_t ZSTD_ldm_generateSequences_internal(
|
||||
*/
|
||||
if (anchor > ip + hashed) {
|
||||
ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength);
|
||||
/* Continue the outter loop at anchor (ip + hashed == anchor). */
|
||||
/* Continue the outer loop at anchor (ip + hashed == anchor). */
|
||||
ip = anchor - hashed;
|
||||
break;
|
||||
}
|
||||
@ -657,7 +657,7 @@ void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
|
||||
|
||||
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
|
||||
ZSTD_paramSwitch_e useRowMatchFinder,
|
||||
void const* src, size_t srcSize)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
|
@ -66,7 +66,7 @@ size_t ZSTD_ldm_generateSequences(
|
||||
*/
|
||||
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
|
||||
ZSTD_paramSwitch_e useRowMatchFinder,
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
/**
|
||||
|
@ -11,7 +11,10 @@
|
||||
#ifndef ZSTD_LDM_GEARTAB_H
|
||||
#define ZSTD_LDM_GEARTAB_H
|
||||
|
||||
static U64 ZSTD_ldm_gearTab[256] = {
|
||||
#include "../common/compiler.h" /* UNUSED_ATTR */
|
||||
#include "../common/mem.h" /* U64 */
|
||||
|
||||
static UNUSED_ATTR const U64 ZSTD_ldm_gearTab[256] = {
|
||||
0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc,
|
||||
0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05,
|
||||
0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e,
|
||||
|
@ -8,13 +8,26 @@
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Disable inlining for the optimal parser for the kernel build.
|
||||
* It is unlikely to be used in the kernel, and where it is used
|
||||
* latency shouldn't matter because it is very slow to begin with.
|
||||
* We prefer a ~180KB binary size win over faster optimal parsing.
|
||||
*
|
||||
* TODO(https://github.com/facebook/zstd/issues/2862):
|
||||
* Improve the code size of the optimal parser in general, so we
|
||||
* don't need this hack for the kernel build.
|
||||
*/
|
||||
#ifdef ZSTD_LINUX_KERNEL
|
||||
#define ZSTD_NO_INLINE 1
|
||||
#endif
|
||||
|
||||
#include "zstd_compress_internal.h"
|
||||
#include "hist.h"
|
||||
#include "zstd_opt.h"
|
||||
|
||||
|
||||
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
|
||||
#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
|
||||
#define ZSTD_MAX_PRICE (1<<30)
|
||||
|
||||
#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
|
||||
@ -24,11 +37,11 @@
|
||||
* Price functions for optimal parser
|
||||
***************************************/
|
||||
|
||||
#if 0 /* approximation at bit level */
|
||||
#if 0 /* approximation at bit level (for tests) */
|
||||
# define BITCOST_ACCURACY 0
|
||||
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
||||
# define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat))
|
||||
#elif 0 /* fractional bit accuracy */
|
||||
# define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
|
||||
#elif 0 /* fractional bit accuracy (for tests) */
|
||||
# define BITCOST_ACCURACY 8
|
||||
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
||||
# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
|
||||
@ -66,7 +79,7 @@ MEM_STATIC double ZSTD_fCost(U32 price)
|
||||
|
||||
static int ZSTD_compressedLiterals(optState_t const* const optPtr)
|
||||
{
|
||||
return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed;
|
||||
return optPtr->literalCompressionMode != ZSTD_ps_disable;
|
||||
}
|
||||
|
||||
static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
|
||||
@ -79,25 +92,46 @@ static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
|
||||
}
|
||||
|
||||
|
||||
/* ZSTD_downscaleStat() :
|
||||
* reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
|
||||
* return the resulting sum of elements */
|
||||
static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus)
|
||||
static U32 sum_u32(const unsigned table[], size_t nbElts)
|
||||
{
|
||||
size_t n;
|
||||
U32 total = 0;
|
||||
for (n=0; n<nbElts; n++) {
|
||||
total += table[n];
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
|
||||
{
|
||||
U32 s, sum=0;
|
||||
DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1);
|
||||
assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
|
||||
DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
|
||||
assert(shift < 30);
|
||||
for (s=0; s<lastEltIndex+1; s++) {
|
||||
table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
|
||||
table[s] = 1 + (table[s] >> shift);
|
||||
sum += table[s];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
/* ZSTD_scaleStats() :
|
||||
* reduce all elements in table is sum too large
|
||||
* return the resulting sum of elements */
|
||||
static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
|
||||
{
|
||||
U32 const prevsum = sum_u32(table, lastEltIndex+1);
|
||||
U32 const factor = prevsum >> logTarget;
|
||||
DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
|
||||
assert(logTarget < 30);
|
||||
if (factor <= 1) return prevsum;
|
||||
return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
|
||||
}
|
||||
|
||||
/* ZSTD_rescaleFreqs() :
|
||||
* if first block (detected by optPtr->litLengthSum == 0) : init statistics
|
||||
* take hints from dictionary if there is one
|
||||
* or init from zero, using src for literals stats, or flat 1 for match symbols
|
||||
* and init from zero if there is none,
|
||||
* using src for literals stats, and baseline stats for sequence symbols
|
||||
* otherwise downscale existing stats, to be used as seed for next block.
|
||||
*/
|
||||
static void
|
||||
@ -126,7 +160,7 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
||||
optPtr->litSum = 0;
|
||||
for (lit=0; lit<=MaxLit; lit++) {
|
||||
U32 const scaleLog = 11; /* scale to 2K */
|
||||
U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
|
||||
U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
|
||||
assert(bitCost <= scaleLog);
|
||||
optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
|
||||
optPtr->litSum += optPtr->litFreq[lit];
|
||||
@ -174,14 +208,18 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
||||
if (compressedLiterals) {
|
||||
unsigned lit = MaxLit;
|
||||
HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
|
||||
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
|
||||
optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
|
||||
}
|
||||
|
||||
{ unsigned ll;
|
||||
for (ll=0; ll<=MaxLL; ll++)
|
||||
optPtr->litLengthFreq[ll] = 1;
|
||||
{ unsigned const baseLLfreqs[MaxLL+1] = {
|
||||
4, 2, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1
|
||||
};
|
||||
ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs)); optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
|
||||
}
|
||||
optPtr->litLengthSum = MaxLL+1;
|
||||
|
||||
{ unsigned ml;
|
||||
for (ml=0; ml<=MaxML; ml++)
|
||||
@ -189,21 +227,25 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
||||
}
|
||||
optPtr->matchLengthSum = MaxML+1;
|
||||
|
||||
{ unsigned of;
|
||||
for (of=0; of<=MaxOff; of++)
|
||||
optPtr->offCodeFreq[of] = 1;
|
||||
{ unsigned const baseOFCfreqs[MaxOff+1] = {
|
||||
6, 2, 1, 1, 2, 3, 4, 4,
|
||||
4, 3, 2, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1
|
||||
};
|
||||
ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs)); optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
|
||||
}
|
||||
optPtr->offCodeSum = MaxOff+1;
|
||||
|
||||
|
||||
}
|
||||
|
||||
} else { /* new block : re-use previous statistics, scaled down */
|
||||
|
||||
if (compressedLiterals)
|
||||
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
|
||||
optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
|
||||
optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
|
||||
optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
|
||||
optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
|
||||
optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
|
||||
optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
|
||||
optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
|
||||
}
|
||||
|
||||
ZSTD_setBasePrices(optPtr, optLevel);
|
||||
@ -338,7 +380,7 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
|
||||
|
||||
/* Update hashTable3 up to ip (excluded)
|
||||
Assumption : always within prefix (i.e. not within extDict) */
|
||||
static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
|
||||
static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
|
||||
U32* nextToUpdate3,
|
||||
const BYTE* const ip)
|
||||
{
|
||||
@ -364,11 +406,13 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
|
||||
* Binary Tree search
|
||||
***************************************/
|
||||
/** ZSTD_insertBt1() : add one or multiple positions to tree.
|
||||
* ip : assumed <= iend-8 .
|
||||
* @param ip assumed <= iend-8 .
|
||||
* @param target The target of ZSTD_updateTree_internal() - we are filling to this position
|
||||
* @return : nb of positions added */
|
||||
static U32 ZSTD_insertBt1(
|
||||
ZSTD_matchState_t* ms,
|
||||
const ZSTD_matchState_t* ms,
|
||||
const BYTE* const ip, const BYTE* const iend,
|
||||
U32 const target,
|
||||
U32 const mls, const int extDict)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
@ -391,7 +435,10 @@ static U32 ZSTD_insertBt1(
|
||||
U32* smallerPtr = bt + 2*(curr&btMask);
|
||||
U32* largerPtr = smallerPtr + 1;
|
||||
U32 dummy32; /* to be nullified at the end */
|
||||
U32 const windowLow = ms->window.lowLimit;
|
||||
/* windowLow is based on target because
|
||||
* we only need positions that will be in the window at the end of the tree update.
|
||||
*/
|
||||
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
|
||||
U32 matchEndIdx = curr+8+1;
|
||||
size_t bestLength = 8;
|
||||
U32 nbCompares = 1U << cParams->searchLog;
|
||||
@ -404,11 +451,12 @@ static U32 ZSTD_insertBt1(
|
||||
|
||||
DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
|
||||
|
||||
assert(curr <= target);
|
||||
assert(ip <= iend-8); /* required for h calculation */
|
||||
hashTable[h] = curr; /* Update Hash Table */
|
||||
|
||||
assert(windowLow > 0);
|
||||
while (nbCompares-- && (matchIndex >= windowLow)) {
|
||||
for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
|
||||
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
||||
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
||||
assert(matchIndex < curr);
|
||||
@ -492,7 +540,7 @@ void ZSTD_updateTree_internal(
|
||||
idx, target, dictMode);
|
||||
|
||||
while(idx < target) {
|
||||
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
|
||||
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
|
||||
assert(idx < (U32)(idx + forward));
|
||||
idx += forward;
|
||||
}
|
||||
@ -635,11 +683,11 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
return 1;
|
||||
} } }
|
||||
/* no dictMatchState lookup: dicts don't have a populated HC3 table */
|
||||
}
|
||||
} /* if (mls == 3) */
|
||||
|
||||
hashTable[h] = curr; /* Update Hash Table */
|
||||
|
||||
while (nbCompares-- && (matchIndex >= matchLow)) {
|
||||
for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
|
||||
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
||||
const BYTE* match;
|
||||
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
||||
@ -672,8 +720,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
| (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
|
||||
if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
|
||||
break; /* drop, to preserve bt consistency (miss a little bit of compression) */
|
||||
}
|
||||
}
|
||||
} }
|
||||
|
||||
if (match[matchLength] < ip[matchLength]) {
|
||||
/* match smaller than current */
|
||||
@ -692,12 +739,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
|
||||
*smallerPtr = *largerPtr = 0;
|
||||
|
||||
assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
|
||||
if (dictMode == ZSTD_dictMatchState && nbCompares) {
|
||||
size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
|
||||
U32 dictMatchIndex = dms->hashTable[dmsH];
|
||||
const U32* const dmsBt = dms->chainTable;
|
||||
commonLengthSmaller = commonLengthLarger = 0;
|
||||
while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) {
|
||||
for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
|
||||
const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
|
||||
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
||||
const BYTE* match = dmsBase + dictMatchIndex;
|
||||
@ -718,8 +766,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
if ( (matchLength > ZSTD_OPT_NUM)
|
||||
| (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
|
||||
break; /* drop, to guarantee consistency (miss a little bit of compression) */
|
||||
}
|
||||
}
|
||||
} }
|
||||
|
||||
if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
|
||||
if (match[matchLength] < ip[matchLength]) {
|
||||
@ -729,9 +776,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
/* match is larger than current */
|
||||
commonLengthLarger = matchLength;
|
||||
dictMatchIndex = nextPtr[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
} } } /* if (dictMode == ZSTD_dictMatchState) */
|
||||
|
||||
assert(matchEndIdx > curr+8);
|
||||
ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
|
||||
@ -893,17 +938,17 @@ static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_
|
||||
*/
|
||||
U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
|
||||
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
|
||||
}
|
||||
}
|
||||
ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
|
||||
}
|
||||
ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
|
||||
}
|
||||
|
||||
|
||||
/*-*******************************
|
||||
* Optimal parser
|
||||
*********************************/
|
||||
|
||||
|
||||
static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
|
||||
{
|
||||
return sol.litlen + sol.mlen;
|
||||
@ -985,7 +1030,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
* in every price. We include the literal length to avoid negative
|
||||
* prices when we subtract the previous literal length.
|
||||
*/
|
||||
opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
|
||||
opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
|
||||
|
||||
/* large match -> immediate encoding */
|
||||
{ U32 const maxML = matches[nbMatches-1].len;
|
||||
@ -1005,7 +1050,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
} }
|
||||
|
||||
/* set prices for first matches starting position == 0 */
|
||||
{ U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
|
||||
assert(opt[0].price >= 0);
|
||||
{ U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
|
||||
U32 pos;
|
||||
U32 matchNb;
|
||||
for (pos = 1; pos < minMatch; pos++) {
|
||||
@ -1022,7 +1068,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
opt[pos].mlen = pos;
|
||||
opt[pos].off = offset;
|
||||
opt[pos].litlen = litlen;
|
||||
opt[pos].price = sequencePrice;
|
||||
opt[pos].price = (int)sequencePrice;
|
||||
} }
|
||||
last_pos = pos-1;
|
||||
}
|
||||
@ -1037,9 +1083,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
/* Fix current position with one literal if cheaper */
|
||||
{ U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
|
||||
int const price = opt[cur-1].price
|
||||
+ ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
|
||||
+ ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
|
||||
- ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
|
||||
+ (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
|
||||
+ (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
|
||||
- (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
|
||||
assert(price < 1000000000); /* overflow check */
|
||||
if (price <= opt[cur].price) {
|
||||
DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
|
||||
@ -1082,9 +1128,10 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
|
||||
}
|
||||
|
||||
assert(opt[cur].price >= 0);
|
||||
{ U32 const ll0 = (opt[cur].mlen != 0);
|
||||
U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
|
||||
U32 const previousPrice = opt[cur].price;
|
||||
U32 const previousPrice = (U32)opt[cur].price;
|
||||
U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
|
||||
U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
|
||||
U32 matchNb;
|
||||
@ -1124,7 +1171,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
|
||||
for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
|
||||
U32 const pos = cur + mlen;
|
||||
int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
|
||||
int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
|
||||
|
||||
if ((pos > last_pos) || (price < opt[pos].price)) {
|
||||
DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
|
||||
@ -1220,28 +1267,7 @@ size_t ZSTD_compressBlock_btopt(
|
||||
}
|
||||
|
||||
|
||||
/* used in 2-pass strategy */
|
||||
static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
|
||||
{
|
||||
U32 s, sum=0;
|
||||
assert(ZSTD_FREQ_DIV+bonus >= 0);
|
||||
for (s=0; s<lastEltIndex+1; s++) {
|
||||
table[s] <<= ZSTD_FREQ_DIV+bonus;
|
||||
table[s]--;
|
||||
sum += table[s];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
/* used in 2-pass strategy */
|
||||
MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
|
||||
{
|
||||
if (ZSTD_compressedLiterals(optPtr))
|
||||
optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
|
||||
optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
|
||||
optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
|
||||
optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
|
||||
}
|
||||
|
||||
/* ZSTD_initStats_ultra():
|
||||
* make a first compression pass, just to seed stats with more accurate starting values.
|
||||
@ -1272,8 +1298,6 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
||||
ms->window.lowLimit = ms->window.dictLimit;
|
||||
ms->nextToUpdate = ms->window.dictLimit;
|
||||
|
||||
/* re-inforce weight of collected statistics */
|
||||
ZSTD_upscaleStats(&ms->opt);
|
||||
}
|
||||
|
||||
size_t ZSTD_compressBlock_btultra(
|
||||
|
@ -467,7 +467,7 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
|
||||
ZSTD_dictContentType_e dictContentType)
|
||||
{
|
||||
/* Adjust parameters */
|
||||
if (params.ldmParams.enableLdm) {
|
||||
if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
|
||||
DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
|
||||
ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams);
|
||||
assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
|
||||
@ -478,7 +478,7 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
|
||||
serialState->nextJobID = 0;
|
||||
if (params.fParams.checksumFlag)
|
||||
XXH64_reset(&serialState->xxhState, 0);
|
||||
if (params.ldmParams.enableLdm) {
|
||||
if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
|
||||
ZSTD_customMem cMem = params.customMem;
|
||||
unsigned const hashLog = params.ldmParams.hashLog;
|
||||
size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t);
|
||||
@ -564,7 +564,7 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
|
||||
/* A future job may error and skip our job */
|
||||
if (serialState->nextJobID == jobID) {
|
||||
/* It is now our turn, do any processing necessary */
|
||||
if (serialState->params.ldmParams.enableLdm) {
|
||||
if (serialState->params.ldmParams.enableLdm == ZSTD_ps_enable) {
|
||||
size_t error;
|
||||
assert(seqStore.seq != NULL && seqStore.pos == 0 &&
|
||||
seqStore.size == 0 && seqStore.capacity > 0);
|
||||
@ -594,7 +594,7 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
|
||||
if (seqStore.size > 0) {
|
||||
size_t const err = ZSTD_referenceExternalSequences(
|
||||
jobCCtx, seqStore.seq, seqStore.size);
|
||||
assert(serialState->params.ldmParams.enableLdm);
|
||||
assert(serialState->params.ldmParams.enableLdm == ZSTD_ps_enable);
|
||||
assert(!ZSTD_isError(err));
|
||||
(void)err;
|
||||
}
|
||||
@ -672,7 +672,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
||||
if (dstBuff.start==NULL) JOB_ERROR(ERROR(memory_allocation));
|
||||
job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */
|
||||
}
|
||||
if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL)
|
||||
if (jobParams.ldmParams.enableLdm == ZSTD_ps_enable && rawSeqStore.seq == NULL)
|
||||
JOB_ERROR(ERROR(memory_allocation));
|
||||
|
||||
/* Don't compute the checksum for chunks, since we compute it externally,
|
||||
@ -680,7 +680,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
||||
*/
|
||||
if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
|
||||
/* Don't run LDM for the chunks, since we handle it externally */
|
||||
jobParams.ldmParams.enableLdm = 0;
|
||||
jobParams.ldmParams.enableLdm = ZSTD_ps_disable;
|
||||
/* Correct nbWorkers to 0. */
|
||||
jobParams.nbWorkers = 0;
|
||||
|
||||
@ -807,6 +807,15 @@ typedef struct {
|
||||
static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
|
||||
|
||||
#define RSYNC_LENGTH 32
|
||||
/* Don't create chunks smaller than the zstd block size.
|
||||
* This stops us from regressing compression ratio too much,
|
||||
* and ensures our output fits in ZSTD_compressBound().
|
||||
*
|
||||
* If this is shrunk < ZSTD_BLOCKSIZELOG_MIN then
|
||||
* ZSTD_COMPRESSBOUND() will need to be updated.
|
||||
*/
|
||||
#define RSYNC_MIN_BLOCK_LOG ZSTD_BLOCKSIZELOG_MAX
|
||||
#define RSYNC_MIN_BLOCK_SIZE (1<<RSYNC_MIN_BLOCK_LOG)
|
||||
|
||||
typedef struct {
|
||||
U64 hash;
|
||||
@ -1135,7 +1144,7 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
|
||||
static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
|
||||
{
|
||||
unsigned jobLog;
|
||||
if (params->ldmParams.enableLdm) {
|
||||
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
|
||||
/* In Long Range Mode, the windowLog is typically oversized.
|
||||
* In which case, it's preferable to determine the jobSize
|
||||
* based on cycleLog instead. */
|
||||
@ -1179,7 +1188,7 @@ static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
|
||||
int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
|
||||
int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
|
||||
assert(0 <= overlapRLog && overlapRLog <= 8);
|
||||
if (params->ldmParams.enableLdm) {
|
||||
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
|
||||
/* In Long Range Mode, the windowLog is typically oversized.
|
||||
* In which case, it's preferable to determine the jobSize
|
||||
* based on chainLog instead.
|
||||
@ -1252,6 +1261,9 @@ size_t ZSTDMT_initCStream_internal(
|
||||
/* Aim for the targetsectionSize as the average job size. */
|
||||
U32 const jobSizeKB = (U32)(mtctx->targetSectionSize >> 10);
|
||||
U32 const rsyncBits = (assert(jobSizeKB >= 1), ZSTD_highbit32(jobSizeKB) + 10);
|
||||
/* We refuse to create jobs < RSYNC_MIN_BLOCK_SIZE bytes, so make sure our
|
||||
* expected job size is at least 4x larger. */
|
||||
assert(rsyncBits >= RSYNC_MIN_BLOCK_LOG + 2);
|
||||
DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
|
||||
mtctx->rsync.hash = 0;
|
||||
mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
|
||||
@ -1263,7 +1275,7 @@ size_t ZSTDMT_initCStream_internal(
|
||||
ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
|
||||
{
|
||||
/* If ldm is enabled we need windowSize space. */
|
||||
size_t const windowSize = mtctx->params.ldmParams.enableLdm ? (1U << mtctx->params.cParams.windowLog) : 0;
|
||||
size_t const windowSize = mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable ? (1U << mtctx->params.cParams.windowLog) : 0;
|
||||
/* Two buffers of slack, plus extra space for the overlap
|
||||
* This is the minimum slack that LDM works with. One extra because
|
||||
* flush might waste up to targetSectionSize-1 bytes. Another extra
|
||||
@ -1575,7 +1587,7 @@ static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
|
||||
|
||||
static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer)
|
||||
{
|
||||
if (mtctx->params.ldmParams.enableLdm) {
|
||||
if (mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable) {
|
||||
ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex;
|
||||
DEBUGLOG(5, "ZSTDMT_waitForLdmComplete");
|
||||
DEBUGLOG(5, "source [0x%zx, 0x%zx)",
|
||||
@ -1678,6 +1690,11 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
||||
if (!mtctx->params.rsyncable)
|
||||
/* Rsync is disabled. */
|
||||
return syncPoint;
|
||||
if (mtctx->inBuff.filled + input.size - input.pos < RSYNC_MIN_BLOCK_SIZE)
|
||||
/* We don't emit synchronization points if it would produce too small blocks.
|
||||
* We don't have enough input to find a synchronization point, so don't look.
|
||||
*/
|
||||
return syncPoint;
|
||||
if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH)
|
||||
/* Not enough to compute the hash.
|
||||
* We will miss any synchronization points in this RSYNC_LENGTH byte
|
||||
@ -1688,10 +1705,28 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
||||
*/
|
||||
return syncPoint;
|
||||
/* Initialize the loop variables. */
|
||||
if (mtctx->inBuff.filled >= RSYNC_LENGTH) {
|
||||
/* We have enough bytes buffered to initialize the hash.
|
||||
if (mtctx->inBuff.filled < RSYNC_MIN_BLOCK_SIZE) {
|
||||
/* We don't need to scan the first RSYNC_MIN_BLOCK_SIZE positions
|
||||
* because they can't possibly be a sync point. So we can start
|
||||
* part way through the input buffer.
|
||||
*/
|
||||
pos = RSYNC_MIN_BLOCK_SIZE - mtctx->inBuff.filled;
|
||||
if (pos >= RSYNC_LENGTH) {
|
||||
prev = istart + pos - RSYNC_LENGTH;
|
||||
hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
|
||||
} else {
|
||||
assert(mtctx->inBuff.filled >= RSYNC_LENGTH);
|
||||
prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
|
||||
hash = ZSTD_rollingHash_compute(prev + pos, (RSYNC_LENGTH - pos));
|
||||
hash = ZSTD_rollingHash_append(hash, istart, pos);
|
||||
}
|
||||
} else {
|
||||
/* We have enough bytes buffered to initialize the hash,
|
||||
* and are have processed enough bytes to find a sync point.
|
||||
* Start scanning at the beginning of the input.
|
||||
*/
|
||||
assert(mtctx->inBuff.filled >= RSYNC_MIN_BLOCK_SIZE);
|
||||
assert(RSYNC_MIN_BLOCK_SIZE >= RSYNC_LENGTH);
|
||||
pos = 0;
|
||||
prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
|
||||
hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
|
||||
@ -1705,16 +1740,6 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
||||
syncPoint.flush = 1;
|
||||
return syncPoint;
|
||||
}
|
||||
} else {
|
||||
/* We don't have enough bytes buffered to initialize the hash, but
|
||||
* we know we have at least RSYNC_LENGTH bytes total.
|
||||
* Start scanning after the first RSYNC_LENGTH bytes less the bytes
|
||||
* already buffered.
|
||||
*/
|
||||
pos = RSYNC_LENGTH - mtctx->inBuff.filled;
|
||||
prev = (BYTE const*)mtctx->inBuff.buffer.start - pos;
|
||||
hash = ZSTD_rollingHash_compute(mtctx->inBuff.buffer.start, mtctx->inBuff.filled);
|
||||
hash = ZSTD_rollingHash_append(hash, istart, pos);
|
||||
}
|
||||
/* Starting with the hash of the previous RSYNC_LENGTH bytes, roll
|
||||
* through the input. If we hit a synchronization point, then cut the
|
||||
@ -1726,8 +1751,9 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
||||
*/
|
||||
for (; pos < syncPoint.toLoad; ++pos) {
|
||||
BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];
|
||||
/* if (pos >= RSYNC_LENGTH) assert(ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash); */
|
||||
assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
|
||||
hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);
|
||||
assert(mtctx->inBuff.filled + pos >= RSYNC_MIN_BLOCK_SIZE);
|
||||
if ((hash & hitMask) == hitMask) {
|
||||
syncPoint.toLoad = pos + 1;
|
||||
syncPoint.flush = 1;
|
||||
|
File diff suppressed because it is too large
Load Diff
578
lib/decompress/huf_decompress_amd64.S
Normal file
578
lib/decompress/huf_decompress_amd64.S
Normal file
@ -0,0 +1,578 @@
|
||||
#if !defined(HUF_DISABLE_ASM) && defined(__x86_64__)
|
||||
|
||||
/* Stack marking
|
||||
* ref: https://wiki.gentoo.org/wiki/Hardened/GNU_stack_quickstart
|
||||
*/
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
||||
|
||||
/* Calling convention:
|
||||
*
|
||||
* %rdi contains the first argument: HUF_DecompressAsmArgs*.
|
||||
* %rbp isn't maintained (no frame pointer).
|
||||
* %rsp contains the stack pointer that grows down.
|
||||
* No red-zone is assumed, only addresses >= %rsp are used.
|
||||
* All register contents are preserved.
|
||||
*
|
||||
* TODO: Support Windows calling convention.
|
||||
*/
|
||||
|
||||
.global HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop
|
||||
.global HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop
|
||||
.global _HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop
|
||||
.global _HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop
|
||||
.text
|
||||
|
||||
/* Sets up register mappings for clarity.
|
||||
* op[], bits[], dtable & ip[0] each get their own register.
|
||||
* ip[1,2,3] & olimit alias var[].
|
||||
* %rax is a scratch register.
|
||||
*/
|
||||
|
||||
#define op0 rsi
|
||||
#define op1 rbx
|
||||
#define op2 rcx
|
||||
#define op3 rdi
|
||||
|
||||
#define ip0 r8
|
||||
#define ip1 r9
|
||||
#define ip2 r10
|
||||
#define ip3 r11
|
||||
|
||||
#define bits0 rbp
|
||||
#define bits1 rdx
|
||||
#define bits2 r12
|
||||
#define bits3 r13
|
||||
#define dtable r14
|
||||
#define olimit r15
|
||||
|
||||
/* var[] aliases ip[1,2,3] & olimit
|
||||
* ip[1,2,3] are saved every iteration.
|
||||
* olimit is only used in compute_olimit.
|
||||
*/
|
||||
#define var0 r15
|
||||
#define var1 r9
|
||||
#define var2 r10
|
||||
#define var3 r11
|
||||
|
||||
/* 32-bit var registers */
|
||||
#define vard0 r15d
|
||||
#define vard1 r9d
|
||||
#define vard2 r10d
|
||||
#define vard3 r11d
|
||||
|
||||
/* Helper macro: args if idx != 4. */
|
||||
#define IF_NOT_4_0(...) __VA_ARGS__
|
||||
#define IF_NOT_4_1(...) __VA_ARGS__
|
||||
#define IF_NOT_4_2(...) __VA_ARGS__
|
||||
#define IF_NOT_4_3(...) __VA_ARGS__
|
||||
#define IF_NOT_4_4(...)
|
||||
#define IF_NOT_4_(idx, ...) IF_NOT_4_##idx(__VA_ARGS__)
|
||||
#define IF_NOT_4(idx, ...) IF_NOT_4_(idx, __VA_ARGS__)
|
||||
|
||||
/* Calls X(N) for each stream 0, 1, 2, 3. */
|
||||
#define FOR_EACH_STREAM(X) \
|
||||
X(0); \
|
||||
X(1); \
|
||||
X(2); \
|
||||
X(3)
|
||||
|
||||
/* Calls X(N, idx) for each stream 0, 1, 2, 3. */
|
||||
#define FOR_EACH_STREAM_WITH_INDEX(X, idx) \
|
||||
X(0, idx); \
|
||||
X(1, idx); \
|
||||
X(2, idx); \
|
||||
X(3, idx)
|
||||
|
||||
/* Define both _HUF_* & HUF_* symbols because MacOS
|
||||
* C symbols are prefixed with '_' & Linux symbols aren't.
|
||||
*/
|
||||
_HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop:
|
||||
HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop:
|
||||
/* Save all registers - even if they are callee saved for simplicity. */
|
||||
push %rax
|
||||
push %rbx
|
||||
push %rcx
|
||||
push %rdx
|
||||
push %rbp
|
||||
push %rsi
|
||||
push %rdi
|
||||
push %r8
|
||||
push %r9
|
||||
push %r10
|
||||
push %r11
|
||||
push %r12
|
||||
push %r13
|
||||
push %r14
|
||||
push %r15
|
||||
|
||||
/* Read HUF_DecompressAsmArgs* args from %rax */
|
||||
movq %rdi, %rax
|
||||
movq 0(%rax), %ip0
|
||||
movq 8(%rax), %ip1
|
||||
movq 16(%rax), %ip2
|
||||
movq 24(%rax), %ip3
|
||||
movq 32(%rax), %op0
|
||||
movq 40(%rax), %op1
|
||||
movq 48(%rax), %op2
|
||||
movq 56(%rax), %op3
|
||||
movq 64(%rax), %bits0
|
||||
movq 72(%rax), %bits1
|
||||
movq 80(%rax), %bits2
|
||||
movq 88(%rax), %bits3
|
||||
movq 96(%rax), %dtable
|
||||
push %rax /* argument */
|
||||
push 104(%rax) /* ilimit */
|
||||
push 112(%rax) /* oend */
|
||||
push %olimit /* olimit space */
|
||||
|
||||
subq $24, %rsp
|
||||
|
||||
.L_4X1_compute_olimit:
|
||||
/* Computes how many iterations we can do safely
|
||||
* %r15, %rax may be clobbered
|
||||
* rbx, rdx must be saved
|
||||
* op3 & ip0 mustn't be clobbered
|
||||
*/
|
||||
movq %rbx, 0(%rsp)
|
||||
movq %rdx, 8(%rsp)
|
||||
|
||||
movq 32(%rsp), %rax /* rax = oend */
|
||||
subq %op3, %rax /* rax = oend - op3 */
|
||||
|
||||
/* r15 = (oend - op3) / 5 */
|
||||
movabsq $-3689348814741910323, %rdx
|
||||
mulq %rdx
|
||||
movq %rdx, %r15
|
||||
shrq $2, %r15
|
||||
|
||||
movq %ip0, %rax /* rax = ip0 */
|
||||
movq 40(%rsp), %rdx /* rdx = ilimit */
|
||||
subq %rdx, %rax /* rax = ip0 - ilimit */
|
||||
movq %rax, %rbx /* rbx = ip0 - ilimit */
|
||||
|
||||
/* rdx = (ip0 - ilimit) / 7 */
|
||||
movabsq $2635249153387078803, %rdx
|
||||
mulq %rdx
|
||||
subq %rdx, %rbx
|
||||
shrq %rbx
|
||||
addq %rbx, %rdx
|
||||
shrq $2, %rdx
|
||||
|
||||
/* r15 = min(%rdx, %r15) */
|
||||
cmpq %rdx, %r15
|
||||
cmova %rdx, %r15
|
||||
|
||||
/* r15 = r15 * 5 */
|
||||
leaq (%r15, %r15, 4), %r15
|
||||
|
||||
/* olimit = op3 + r15 */
|
||||
addq %op3, %olimit
|
||||
|
||||
movq 8(%rsp), %rdx
|
||||
movq 0(%rsp), %rbx
|
||||
|
||||
/* If (op3 + 20 > olimit) */
|
||||
movq %op3, %rax /* rax = op3 */
|
||||
addq $20, %rax /* rax = op3 + 20 */
|
||||
cmpq %rax, %olimit /* op3 + 20 > olimit */
|
||||
jb .L_4X1_exit
|
||||
|
||||
/* If (ip1 < ip0) go to exit */
|
||||
cmpq %ip0, %ip1
|
||||
jb .L_4X1_exit
|
||||
|
||||
/* If (ip2 < ip1) go to exit */
|
||||
cmpq %ip1, %ip2
|
||||
jb .L_4X1_exit
|
||||
|
||||
/* If (ip3 < ip2) go to exit */
|
||||
cmpq %ip2, %ip3
|
||||
jb .L_4X1_exit
|
||||
|
||||
/* Reads top 11 bits from bits[n]
|
||||
* Loads dt[bits[n]] into var[n]
|
||||
*/
|
||||
#define GET_NEXT_DELT(n) \
|
||||
movq $53, %var##n; \
|
||||
shrxq %var##n, %bits##n, %var##n; \
|
||||
movzwl (%dtable,%var##n,2),%vard##n
|
||||
|
||||
/* var[n] must contain the DTable entry computed with GET_NEXT_DELT
|
||||
* Moves var[n] to %rax
|
||||
* bits[n] <<= var[n] & 63
|
||||
* op[n][idx] = %rax >> 8
|
||||
* %ah is a way to access bits [8, 16) of %rax
|
||||
*/
|
||||
#define DECODE_FROM_DELT(n, idx) \
|
||||
movq %var##n, %rax; \
|
||||
shlxq %var##n, %bits##n, %bits##n; \
|
||||
movb %ah, idx(%op##n)
|
||||
|
||||
/* Assumes GET_NEXT_DELT has been called.
|
||||
* Calls DECODE_FROM_DELT then GET_NEXT_DELT if n < 4
|
||||
*/
|
||||
#define DECODE(n, idx) \
|
||||
DECODE_FROM_DELT(n, idx); \
|
||||
IF_NOT_4(idx, GET_NEXT_DELT(n))
|
||||
|
||||
/* // ctz & nbBytes is stored in bits[n]
|
||||
* // nbBits is stored in %rax
|
||||
* ctz = CTZ[bits[n]]
|
||||
* nbBits = ctz & 7
|
||||
* nbBytes = ctz >> 3
|
||||
* op[n] += 5
|
||||
* ip[n] -= nbBytes
|
||||
* // Note: x86-64 is little-endian ==> no bswap
|
||||
* bits[n] = MEM_readST(ip[n]) | 1
|
||||
* bits[n] <<= nbBits
|
||||
*/
|
||||
#define RELOAD_BITS(n) \
|
||||
bsfq %bits##n, %bits##n; \
|
||||
movq %bits##n, %rax; \
|
||||
andq $7, %rax; \
|
||||
shrq $3, %bits##n; \
|
||||
leaq 5(%op##n), %op##n; \
|
||||
subq %bits##n, %ip##n; \
|
||||
movq (%ip##n), %bits##n; \
|
||||
orq $1, %bits##n; \
|
||||
shlx %rax, %bits##n, %bits##n
|
||||
|
||||
/* Store clobbered variables on the stack */
|
||||
movq %olimit, 24(%rsp)
|
||||
movq %ip1, 0(%rsp)
|
||||
movq %ip2, 8(%rsp)
|
||||
movq %ip3, 16(%rsp)
|
||||
|
||||
/* Call GET_NEXT_DELT for each stream */
|
||||
FOR_EACH_STREAM(GET_NEXT_DELT)
|
||||
|
||||
.p2align 6
|
||||
|
||||
.L_4X1_loop_body:
|
||||
/* Decode 5 symbols in each of the 4 streams (20 total)
|
||||
* Must have called GET_NEXT_DELT for each stream
|
||||
*/
|
||||
FOR_EACH_STREAM_WITH_INDEX(DECODE, 0)
|
||||
FOR_EACH_STREAM_WITH_INDEX(DECODE, 1)
|
||||
FOR_EACH_STREAM_WITH_INDEX(DECODE, 2)
|
||||
FOR_EACH_STREAM_WITH_INDEX(DECODE, 3)
|
||||
FOR_EACH_STREAM_WITH_INDEX(DECODE, 4)
|
||||
|
||||
/* Load ip[1,2,3] from stack (var[] aliases them)
|
||||
* ip[] is needed for RELOAD_BITS
|
||||
* Each will be stored back to the stack after RELOAD
|
||||
*/
|
||||
movq 0(%rsp), %ip1
|
||||
movq 8(%rsp), %ip2
|
||||
movq 16(%rsp), %ip3
|
||||
|
||||
/* Reload each stream & fetch the next table entry
|
||||
* to prepare for the next iteration
|
||||
*/
|
||||
RELOAD_BITS(0)
|
||||
GET_NEXT_DELT(0)
|
||||
|
||||
RELOAD_BITS(1)
|
||||
movq %ip1, 0(%rsp)
|
||||
GET_NEXT_DELT(1)
|
||||
|
||||
RELOAD_BITS(2)
|
||||
movq %ip2, 8(%rsp)
|
||||
GET_NEXT_DELT(2)
|
||||
|
||||
RELOAD_BITS(3)
|
||||
movq %ip3, 16(%rsp)
|
||||
GET_NEXT_DELT(3)
|
||||
|
||||
/* If op3 < olimit: continue the loop */
|
||||
cmp %op3, 24(%rsp)
|
||||
ja .L_4X1_loop_body
|
||||
|
||||
/* Reload ip[1,2,3] from stack */
|
||||
movq 0(%rsp), %ip1
|
||||
movq 8(%rsp), %ip2
|
||||
movq 16(%rsp), %ip3
|
||||
|
||||
/* Re-compute olimit */
|
||||
jmp .L_4X1_compute_olimit
|
||||
|
||||
#undef GET_NEXT_DELT
|
||||
#undef DECODE_FROM_DELT
|
||||
#undef DECODE
|
||||
#undef RELOAD_BITS
|
||||
.L_4X1_exit:
|
||||
addq $24, %rsp
|
||||
|
||||
/* Restore stack (oend & olimit) */
|
||||
pop %rax /* olimit */
|
||||
pop %rax /* oend */
|
||||
pop %rax /* ilimit */
|
||||
pop %rax /* arg */
|
||||
|
||||
/* Save ip / op / bits */
|
||||
movq %ip0, 0(%rax)
|
||||
movq %ip1, 8(%rax)
|
||||
movq %ip2, 16(%rax)
|
||||
movq %ip3, 24(%rax)
|
||||
movq %op0, 32(%rax)
|
||||
movq %op1, 40(%rax)
|
||||
movq %op2, 48(%rax)
|
||||
movq %op3, 56(%rax)
|
||||
movq %bits0, 64(%rax)
|
||||
movq %bits1, 72(%rax)
|
||||
movq %bits2, 80(%rax)
|
||||
movq %bits3, 88(%rax)
|
||||
|
||||
/* Restore registers */
|
||||
pop %r15
|
||||
pop %r14
|
||||
pop %r13
|
||||
pop %r12
|
||||
pop %r11
|
||||
pop %r10
|
||||
pop %r9
|
||||
pop %r8
|
||||
pop %rdi
|
||||
pop %rsi
|
||||
pop %rbp
|
||||
pop %rdx
|
||||
pop %rcx
|
||||
pop %rbx
|
||||
pop %rax
|
||||
ret
|
||||
|
||||
_HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop:
|
||||
HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop:
|
||||
/* Save all registers - even if they are callee saved for simplicity. */
|
||||
push %rax
|
||||
push %rbx
|
||||
push %rcx
|
||||
push %rdx
|
||||
push %rbp
|
||||
push %rsi
|
||||
push %rdi
|
||||
push %r8
|
||||
push %r9
|
||||
push %r10
|
||||
push %r11
|
||||
push %r12
|
||||
push %r13
|
||||
push %r14
|
||||
push %r15
|
||||
|
||||
movq %rdi, %rax
|
||||
movq 0(%rax), %ip0
|
||||
movq 8(%rax), %ip1
|
||||
movq 16(%rax), %ip2
|
||||
movq 24(%rax), %ip3
|
||||
movq 32(%rax), %op0
|
||||
movq 40(%rax), %op1
|
||||
movq 48(%rax), %op2
|
||||
movq 56(%rax), %op3
|
||||
movq 64(%rax), %bits0
|
||||
movq 72(%rax), %bits1
|
||||
movq 80(%rax), %bits2
|
||||
movq 88(%rax), %bits3
|
||||
movq 96(%rax), %dtable
|
||||
push %rax /* argument */
|
||||
push %rax /* olimit */
|
||||
push 104(%rax) /* ilimit */
|
||||
|
||||
movq 112(%rax), %rax
|
||||
push %rax /* oend3 */
|
||||
|
||||
movq %op3, %rax
|
||||
push %rax /* oend2 */
|
||||
|
||||
movq %op2, %rax
|
||||
push %rax /* oend1 */
|
||||
|
||||
movq %op1, %rax
|
||||
push %rax /* oend0 */
|
||||
|
||||
/* Scratch space */
|
||||
subq $8, %rsp
|
||||
|
||||
.L_4X2_compute_olimit:
|
||||
/* Computes how many iterations we can do safely
|
||||
* %r15, %rax may be clobbered
|
||||
* rdx must be saved
|
||||
* op[1,2,3,4] & ip0 mustn't be clobbered
|
||||
*/
|
||||
movq %rdx, 0(%rsp)
|
||||
|
||||
/* We can consume up to 7 input bytes each iteration. */
|
||||
movq %ip0, %rax /* rax = ip0 */
|
||||
movq 40(%rsp), %rdx /* rdx = ilimit */
|
||||
subq %rdx, %rax /* rax = ip0 - ilimit */
|
||||
movq %rax, %r15 /* r15 = ip0 - ilimit */
|
||||
|
||||
/* rdx = rax / 7 */
|
||||
movabsq $2635249153387078803, %rdx
|
||||
mulq %rdx
|
||||
subq %rdx, %r15
|
||||
shrq %r15
|
||||
addq %r15, %rdx
|
||||
shrq $2, %rdx
|
||||
|
||||
/* r15 = (ip0 - ilimit) / 7 */
|
||||
movq %rdx, %r15
|
||||
|
||||
movabsq $-3689348814741910323, %rdx
|
||||
movq 8(%rsp), %rax /* rax = oend0 */
|
||||
subq %op0, %rax /* rax = oend0 - op0 */
|
||||
mulq %rdx
|
||||
shrq $3, %rdx /* rdx = rax / 10 */
|
||||
|
||||
/* r15 = min(%rdx, %r15) */
|
||||
cmpq %rdx, %r15
|
||||
cmova %rdx, %r15
|
||||
|
||||
movabsq $-3689348814741910323, %rdx
|
||||
movq 16(%rsp), %rax /* rax = oend1 */
|
||||
subq %op1, %rax /* rax = oend1 - op1 */
|
||||
mulq %rdx
|
||||
shrq $3, %rdx /* rdx = rax / 10 */
|
||||
|
||||
/* r15 = min(%rdx, %r15) */
|
||||
cmpq %rdx, %r15
|
||||
cmova %rdx, %r15
|
||||
|
||||
movabsq $-3689348814741910323, %rdx
|
||||
movq 24(%rsp), %rax /* rax = oend2 */
|
||||
subq %op2, %rax /* rax = oend2 - op2 */
|
||||
mulq %rdx
|
||||
shrq $3, %rdx /* rdx = rax / 10 */
|
||||
|
||||
/* r15 = min(%rdx, %r15) */
|
||||
cmpq %rdx, %r15
|
||||
cmova %rdx, %r15
|
||||
|
||||
movabsq $-3689348814741910323, %rdx
|
||||
movq 32(%rsp), %rax /* rax = oend3 */
|
||||
subq %op3, %rax /* rax = oend3 - op3 */
|
||||
mulq %rdx
|
||||
shrq $3, %rdx /* rdx = rax / 10 */
|
||||
|
||||
/* r15 = min(%rdx, %r15) */
|
||||
cmpq %rdx, %r15
|
||||
cmova %rdx, %r15
|
||||
|
||||
/* olimit = op3 + 5 * r15 */
|
||||
movq %r15, %rax
|
||||
leaq (%op3, %rax, 4), %olimit
|
||||
addq %rax, %olimit
|
||||
|
||||
movq 0(%rsp), %rdx
|
||||
|
||||
/* If (op3 + 10 > olimit) */
|
||||
movq %op3, %rax /* rax = op3 */
|
||||
addq $10, %rax /* rax = op3 + 10 */
|
||||
cmpq %rax, %olimit /* op3 + 10 > olimit */
|
||||
jb .L_4X2_exit
|
||||
|
||||
/* If (ip1 < ip0) go to exit */
|
||||
cmpq %ip0, %ip1
|
||||
jb .L_4X2_exit
|
||||
|
||||
/* If (ip2 < ip1) go to exit */
|
||||
cmpq %ip1, %ip2
|
||||
jb .L_4X2_exit
|
||||
|
||||
/* If (ip3 < ip2) go to exit */
|
||||
cmpq %ip2, %ip3
|
||||
jb .L_4X2_exit
|
||||
|
||||
#define DECODE(n, idx) \
|
||||
movq %bits##n, %rax; \
|
||||
shrq $53, %rax; \
|
||||
movzwl 0(%dtable,%rax,4),%r8d; \
|
||||
movzbl 2(%dtable,%rax,4),%r15d; \
|
||||
movzbl 3(%dtable,%rax,4),%eax; \
|
||||
movw %r8w, (%op##n); \
|
||||
shlxq %r15, %bits##n, %bits##n; \
|
||||
addq %rax, %op##n
|
||||
|
||||
#define RELOAD_BITS(n) \
|
||||
bsfq %bits##n, %bits##n; \
|
||||
movq %bits##n, %rax; \
|
||||
shrq $3, %bits##n; \
|
||||
andq $7, %rax; \
|
||||
subq %bits##n, %ip##n; \
|
||||
movq (%ip##n), %bits##n; \
|
||||
orq $1, %bits##n; \
|
||||
shlxq %rax, %bits##n, %bits##n
|
||||
|
||||
|
||||
movq %olimit, 48(%rsp)
|
||||
|
||||
.p2align 6
|
||||
|
||||
.L_4X2_loop_body:
|
||||
/* We clobber r8, so store it on the stack */
|
||||
movq %r8, 0(%rsp)
|
||||
|
||||
/* Decode 5 symbols from each of the 4 streams (20 symbols total). */
|
||||
FOR_EACH_STREAM_WITH_INDEX(DECODE, 0)
|
||||
FOR_EACH_STREAM_WITH_INDEX(DECODE, 1)
|
||||
FOR_EACH_STREAM_WITH_INDEX(DECODE, 2)
|
||||
FOR_EACH_STREAM_WITH_INDEX(DECODE, 3)
|
||||
FOR_EACH_STREAM_WITH_INDEX(DECODE, 4)
|
||||
|
||||
/* Reload r8 */
|
||||
movq 0(%rsp), %r8
|
||||
|
||||
FOR_EACH_STREAM(RELOAD_BITS)
|
||||
|
||||
cmp %op3, 48(%rsp)
|
||||
ja .L_4X2_loop_body
|
||||
jmp .L_4X2_compute_olimit
|
||||
|
||||
#undef DECODE
|
||||
#undef RELOAD_BITS
|
||||
.L_4X2_exit:
|
||||
addq $8, %rsp
|
||||
/* Restore stack (oend & olimit) */
|
||||
pop %rax /* oend0 */
|
||||
pop %rax /* oend1 */
|
||||
pop %rax /* oend2 */
|
||||
pop %rax /* oend3 */
|
||||
pop %rax /* ilimit */
|
||||
pop %rax /* olimit */
|
||||
pop %rax /* arg */
|
||||
|
||||
/* Save ip / op / bits */
|
||||
movq %ip0, 0(%rax)
|
||||
movq %ip1, 8(%rax)
|
||||
movq %ip2, 16(%rax)
|
||||
movq %ip3, 24(%rax)
|
||||
movq %op0, 32(%rax)
|
||||
movq %op1, 40(%rax)
|
||||
movq %op2, 48(%rax)
|
||||
movq %op3, 56(%rax)
|
||||
movq %bits0, 64(%rax)
|
||||
movq %bits1, 72(%rax)
|
||||
movq %bits2, 80(%rax)
|
||||
movq %bits3, 88(%rax)
|
||||
|
||||
/* Restore registers */
|
||||
pop %r15
|
||||
pop %r14
|
||||
pop %r13
|
||||
pop %r12
|
||||
pop %r11
|
||||
pop %r10
|
||||
pop %r9
|
||||
pop %r8
|
||||
pop %rdi
|
||||
pop %rsi
|
||||
pop %rbp
|
||||
pop %rdx
|
||||
pop %rcx
|
||||
pop %rbx
|
||||
pop %rax
|
||||
ret
|
||||
|
||||
#endif
|
@ -56,7 +56,6 @@
|
||||
* Dependencies
|
||||
*********************************************************/
|
||||
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
|
||||
#include "../common/cpu.h" /* bmi2 */
|
||||
#include "../common/mem.h" /* low level memory routines */
|
||||
#define FSE_STATIC_LINKING_ONLY
|
||||
#include "../common/fse.h"
|
||||
@ -177,12 +176,15 @@ static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet,
|
||||
static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) {
|
||||
ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem);
|
||||
DEBUGLOG(4, "Allocating new hash set");
|
||||
if (!ret)
|
||||
return NULL;
|
||||
ret->ddictPtrTable = (const ZSTD_DDict**)ZSTD_customCalloc(DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem);
|
||||
ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE;
|
||||
ret->ddictPtrCount = 0;
|
||||
if (!ret || !ret->ddictPtrTable) {
|
||||
if (!ret->ddictPtrTable) {
|
||||
ZSTD_customFree(ret, customMem);
|
||||
return NULL;
|
||||
}
|
||||
ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE;
|
||||
ret->ddictPtrCount = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -255,11 +257,15 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
|
||||
dctx->inBuffSize = 0;
|
||||
dctx->outBuffSize = 0;
|
||||
dctx->streamStage = zdss_init;
|
||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
|
||||
dctx->legacyContext = NULL;
|
||||
dctx->previousLegacyVersion = 0;
|
||||
#endif
|
||||
dctx->noForwardProgress = 0;
|
||||
dctx->oversizedDuration = 0;
|
||||
dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
|
||||
#if DYNAMIC_BMI2
|
||||
dctx->bmi2 = ZSTD_cpuSupportsBmi2();
|
||||
#endif
|
||||
dctx->ddictSet = NULL;
|
||||
ZSTD_DCtx_resetParameters(dctx);
|
||||
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
@ -280,8 +286,7 @@ ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
|
||||
return dctx;
|
||||
}
|
||||
|
||||
ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
|
||||
{
|
||||
static ZSTD_DCtx* ZSTD_createDCtx_internal(ZSTD_customMem customMem) {
|
||||
if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
|
||||
|
||||
{ ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem);
|
||||
@ -292,10 +297,15 @@ ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
|
||||
}
|
||||
}
|
||||
|
||||
ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
|
||||
{
|
||||
return ZSTD_createDCtx_internal(customMem);
|
||||
}
|
||||
|
||||
ZSTD_DCtx* ZSTD_createDCtx(void)
|
||||
{
|
||||
DEBUGLOG(3, "ZSTD_createDCtx");
|
||||
return ZSTD_createDCtx_advanced(ZSTD_defaultCMem);
|
||||
return ZSTD_createDCtx_internal(ZSTD_defaultCMem);
|
||||
}
|
||||
|
||||
static void ZSTD_clearDict(ZSTD_DCtx* dctx)
|
||||
@ -380,6 +390,19 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*! ZSTD_isSkippableFrame() :
|
||||
* Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame.
|
||||
* Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
|
||||
*/
|
||||
unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size)
|
||||
{
|
||||
if (size < ZSTD_FRAMEIDSIZE) return 0;
|
||||
{ U32 const magic = MEM_readLE32(buffer);
|
||||
if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** ZSTD_frameHeaderSize_internal() :
|
||||
* srcSize must be large enough to reach header size fields.
|
||||
* note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless.
|
||||
@ -466,7 +489,9 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
|
||||
}
|
||||
switch(dictIDSizeCode)
|
||||
{
|
||||
default: assert(0); /* impossible */
|
||||
default:
|
||||
assert(0); /* impossible */
|
||||
ZSTD_FALLTHROUGH;
|
||||
case 0 : break;
|
||||
case 1 : dictID = ip[pos]; pos++; break;
|
||||
case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break;
|
||||
@ -474,7 +499,9 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
|
||||
}
|
||||
switch(fcsID)
|
||||
{
|
||||
default: assert(0); /* impossible */
|
||||
default:
|
||||
assert(0); /* impossible */
|
||||
ZSTD_FALLTHROUGH;
|
||||
case 0 : if (singleSegment) frameContentSize = ip[pos]; break;
|
||||
case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break;
|
||||
case 2 : frameContentSize = MEM_readLE32(ip+pos); break;
|
||||
@ -503,7 +530,6 @@ size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t src
|
||||
return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1);
|
||||
}
|
||||
|
||||
|
||||
/** ZSTD_getFrameContentSize() :
|
||||
* compatible with legacy mode
|
||||
* @return : decompressed size of the single frame pointed to be `src` if known, otherwise
|
||||
@ -544,6 +570,37 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
|
||||
}
|
||||
}
|
||||
|
||||
/*! ZSTD_readSkippableFrame() :
|
||||
* Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer.
|
||||
*
|
||||
* The parameter magicVariant will receive the magicVariant that was supplied when the frame was written,
|
||||
* i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested
|
||||
* in the magicVariant.
|
||||
*
|
||||
* Returns an error if destination buffer is not large enough, or if the frame is not skippable.
|
||||
*
|
||||
* @return : number of bytes written or a ZSTD error.
|
||||
*/
|
||||
ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant,
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
U32 const magicNumber = MEM_readLE32(src);
|
||||
size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
|
||||
size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
|
||||
|
||||
/* check input validity */
|
||||
RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, "");
|
||||
RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, "");
|
||||
RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
|
||||
|
||||
/* deliver payload */
|
||||
if (skippableContentSize > 0 && dst != NULL)
|
||||
ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize);
|
||||
if (magicVariant != NULL)
|
||||
*magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
|
||||
return skippableContentSize;
|
||||
}
|
||||
|
||||
/** ZSTD_findDecompressedSize() :
|
||||
* compatible with legacy mode
|
||||
* `srcSize` must be the exact length of some number of ZSTD compressed and/or
|
||||
@ -858,7 +915,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
|
||||
switch(blockProperties.blockType)
|
||||
{
|
||||
case bt_compressed:
|
||||
decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1);
|
||||
decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1, not_streaming);
|
||||
break;
|
||||
case bt_raw :
|
||||
decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize);
|
||||
@ -1009,7 +1066,7 @@ static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx)
|
||||
switch (dctx->dictUses) {
|
||||
default:
|
||||
assert(0 /* Impossible */);
|
||||
/* fall-through */
|
||||
ZSTD_FALLTHROUGH;
|
||||
case ZSTD_dont_use:
|
||||
ZSTD_clearDict(dctx);
|
||||
return NULL;
|
||||
@ -1031,7 +1088,7 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr
|
||||
{
|
||||
#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1)
|
||||
size_t regenSize;
|
||||
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
|
||||
ZSTD_DCtx* const dctx = ZSTD_createDCtx_internal(ZSTD_defaultCMem);
|
||||
RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!");
|
||||
regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
|
||||
ZSTD_freeDCtx(dctx);
|
||||
@ -1065,7 +1122,7 @@ static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t
|
||||
return dctx->expected;
|
||||
if (dctx->bType != bt_raw)
|
||||
return dctx->expected;
|
||||
return MIN(MAX(inputSize, 1), dctx->expected);
|
||||
return BOUNDED(1, inputSize, dctx->expected);
|
||||
}
|
||||
|
||||
ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
|
||||
@ -1073,7 +1130,9 @@ ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
|
||||
{
|
||||
default: /* should not happen */
|
||||
assert(0);
|
||||
ZSTD_FALLTHROUGH;
|
||||
case ZSTDds_getFrameHeaderSize:
|
||||
ZSTD_FALLTHROUGH;
|
||||
case ZSTDds_decodeFrameHeader:
|
||||
return ZSTDnit_frameHeader;
|
||||
case ZSTDds_decodeBlockHeader:
|
||||
@ -1085,6 +1144,7 @@ ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
|
||||
case ZSTDds_checkChecksum:
|
||||
return ZSTDnit_checksum;
|
||||
case ZSTDds_decodeSkippableHeader:
|
||||
ZSTD_FALLTHROUGH;
|
||||
case ZSTDds_skipFrame:
|
||||
return ZSTDnit_skippableFrame;
|
||||
}
|
||||
@ -1168,7 +1228,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
|
||||
{
|
||||
case bt_compressed:
|
||||
DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
|
||||
rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1);
|
||||
rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1, is_streaming);
|
||||
dctx->expected = 0; /* Streaming not supported */
|
||||
break;
|
||||
case bt_raw :
|
||||
@ -1493,7 +1553,7 @@ size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
|
||||
ZSTD_DStream* ZSTD_createDStream(void)
|
||||
{
|
||||
DEBUGLOG(3, "ZSTD_createDStream");
|
||||
return ZSTD_createDStream_advanced(ZSTD_defaultCMem);
|
||||
return ZSTD_createDCtx_internal(ZSTD_defaultCMem);
|
||||
}
|
||||
|
||||
ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize)
|
||||
@ -1503,7 +1563,7 @@ ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize)
|
||||
|
||||
ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem)
|
||||
{
|
||||
return ZSTD_createDCtx_advanced(customMem);
|
||||
return ZSTD_createDCtx_internal(customMem);
|
||||
}
|
||||
|
||||
size_t ZSTD_freeDStream(ZSTD_DStream* zds)
|
||||
@ -1763,7 +1823,8 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
|
||||
size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
|
||||
{
|
||||
size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
|
||||
unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2);
|
||||
/* space is needed to store the litbuffer after the output of a given block without stomping the extDict of a previous run, as well as to cover both windows against wildcopy*/
|
||||
unsigned long long const neededRBSize = windowSize + blockSize + ZSTD_BLOCKSIZE_MAX + (WILDCOPY_OVERLENGTH * 2);
|
||||
unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
|
||||
size_t const minRBSize = (size_t) neededSize;
|
||||
RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
|
||||
@ -1897,10 +1958,12 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
DEBUGLOG(5, "stage zdss_init => transparent reset ");
|
||||
zds->streamStage = zdss_loadHeader;
|
||||
zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
|
||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
|
||||
zds->legacyVersion = 0;
|
||||
#endif
|
||||
zds->hostageByte = 0;
|
||||
zds->expectedOutBuffer = *output;
|
||||
/* fall-through */
|
||||
ZSTD_FALLTHROUGH;
|
||||
|
||||
case zdss_loadHeader :
|
||||
DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip));
|
||||
@ -2038,7 +2101,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
zds->outBuffSize = neededOutBuffSize;
|
||||
} } }
|
||||
zds->streamStage = zdss_read;
|
||||
/* fall-through */
|
||||
ZSTD_FALLTHROUGH;
|
||||
|
||||
case zdss_read:
|
||||
DEBUGLOG(5, "stage zdss_read");
|
||||
@ -2057,7 +2120,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
} }
|
||||
if (ip==iend) { someMoreWork = 0; break; } /* no more input */
|
||||
zds->streamStage = zdss_load;
|
||||
/* fall-through */
|
||||
ZSTD_FALLTHROUGH;
|
||||
|
||||
case zdss_load:
|
||||
{ size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -33,6 +33,12 @@
|
||||
*/
|
||||
|
||||
|
||||
/* Streaming state is used to inform allocation of the literal buffer */
|
||||
typedef enum {
|
||||
not_streaming = 0,
|
||||
is_streaming = 1
|
||||
} streaming_operation;
|
||||
|
||||
/* ZSTD_decompressBlock_internal() :
|
||||
* decompress block, starting at `src`,
|
||||
* into destination buffer `dst`.
|
||||
@ -41,7 +47,7 @@
|
||||
*/
|
||||
size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize, const int frame);
|
||||
const void* src, size_t srcSize, const int frame, const streaming_operation streaming);
|
||||
|
||||
/* ZSTD_buildFSETable() :
|
||||
* generate FSE decoding table for one symbol (ll, ml or off)
|
||||
|
@ -20,7 +20,7 @@
|
||||
* Dependencies
|
||||
*********************************************************/
|
||||
#include "../common/mem.h" /* BYTE, U16, U32 */
|
||||
#include "../common/zstd_internal.h" /* ZSTD_seqSymbol */
|
||||
#include "../common/zstd_internal.h" /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */
|
||||
|
||||
|
||||
|
||||
@ -106,6 +106,22 @@ typedef struct {
|
||||
size_t ddictPtrCount;
|
||||
} ZSTD_DDictHashSet;
|
||||
|
||||
#ifndef ZSTD_DECODER_INTERNAL_BUFFER
|
||||
# define ZSTD_DECODER_INTERNAL_BUFFER (1 << 16)
|
||||
#endif
|
||||
|
||||
#define ZSTD_LBMIN 64
|
||||
#define ZSTD_LBMAX (128 << 10)
|
||||
|
||||
/* extra buffer, compensates when dst is not large enough to store litBuffer */
|
||||
#define ZSTD_LITBUFFEREXTRASIZE BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX)
|
||||
|
||||
typedef enum {
|
||||
ZSTD_not_in_dst = 0, /* Stored entirely within litExtraBuffer */
|
||||
ZSTD_in_dst = 1, /* Stored entirely within dst (in memory after current output write) */
|
||||
ZSTD_split = 2 /* Split between litExtraBuffer and dst */
|
||||
} ZSTD_litLocation_e;
|
||||
|
||||
struct ZSTD_DCtx_s
|
||||
{
|
||||
const ZSTD_seqSymbol* LLTptr;
|
||||
@ -136,7 +152,9 @@ struct ZSTD_DCtx_s
|
||||
size_t litSize;
|
||||
size_t rleSize;
|
||||
size_t staticSize;
|
||||
#if DYNAMIC_BMI2 != 0
|
||||
int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
|
||||
#endif
|
||||
|
||||
/* dictionary */
|
||||
ZSTD_DDict* ddictLocal;
|
||||
@ -158,16 +176,21 @@ struct ZSTD_DCtx_s
|
||||
size_t outStart;
|
||||
size_t outEnd;
|
||||
size_t lhSize;
|
||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
|
||||
void* legacyContext;
|
||||
U32 previousLegacyVersion;
|
||||
U32 legacyVersion;
|
||||
#endif
|
||||
U32 hostageByte;
|
||||
int noForwardProgress;
|
||||
ZSTD_bufferMode_e outBufferMode;
|
||||
ZSTD_outBuffer expectedOutBuffer;
|
||||
|
||||
/* workspace */
|
||||
BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
|
||||
BYTE* litBuffer;
|
||||
const BYTE* litBufferEnd;
|
||||
ZSTD_litLocation_e litBufferLocation;
|
||||
BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */
|
||||
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
|
||||
|
||||
size_t oversizedDuration;
|
||||
@ -183,6 +206,14 @@ struct ZSTD_DCtx_s
|
||||
#endif
|
||||
}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
|
||||
|
||||
MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) {
|
||||
#if DYNAMIC_BMI2 != 0
|
||||
return dctx->bmi2;
|
||||
#else
|
||||
(void)dctx;
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*-*******************************************************
|
||||
* Shared internal functions
|
||||
|
@ -40,6 +40,13 @@
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
/**
|
||||
* There are 32bit indexes used to ref samples, so limit samples size to 4GB
|
||||
* on 64bit builds.
|
||||
* For 32bit builds we choose 1 GB.
|
||||
* Most 32bit platforms have 2GB user-mode addressable space and we allocate a large
|
||||
* contiguous buffer, so 1GB is already a high limit.
|
||||
*/
|
||||
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
||||
#define COVER_DEFAULT_SPLITPOINT 1.0
|
||||
|
||||
@ -47,7 +54,7 @@
|
||||
* Console display
|
||||
***************************************/
|
||||
#ifndef LOCALDISPLAYLEVEL
|
||||
static int g_displayLevel = 2;
|
||||
static int g_displayLevel = 0;
|
||||
#endif
|
||||
#undef DISPLAY
|
||||
#define DISPLAY(...) \
|
||||
@ -735,7 +742,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
||||
COVER_map_t activeDmers;
|
||||
parameters.splitPoint = 1.0;
|
||||
/* Initialize global data */
|
||||
g_displayLevel = parameters.zParams.notificationLevel;
|
||||
g_displayLevel = (int)parameters.zParams.notificationLevel;
|
||||
/* Checks */
|
||||
if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
|
||||
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
|
||||
|
@ -32,6 +32,13 @@
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
/**
|
||||
* There are 32bit indexes used to ref samples, so limit samples size to 4GB
|
||||
* on 64bit builds.
|
||||
* For 32bit builds we choose 1 GB.
|
||||
* Most 32bit platforms have 2GB user-mode addressable space and we allocate a large
|
||||
* contiguous buffer, so 1GB is already a high limit.
|
||||
*/
|
||||
#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
||||
#define FASTCOVER_MAX_F 31
|
||||
#define FASTCOVER_MAX_ACCEL 10
|
||||
@ -44,7 +51,7 @@
|
||||
* Console display
|
||||
***************************************/
|
||||
#ifndef LOCALDISPLAYLEVEL
|
||||
static int g_displayLevel = 2;
|
||||
static int g_displayLevel = 0;
|
||||
#endif
|
||||
#undef DISPLAY
|
||||
#define DISPLAY(...) \
|
||||
@ -549,7 +556,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
|
||||
ZDICT_cover_params_t coverParams;
|
||||
FASTCOVER_accel_t accelParams;
|
||||
/* Initialize global data */
|
||||
g_displayLevel = parameters.zParams.notificationLevel;
|
||||
g_displayLevel = (int)parameters.zParams.notificationLevel;
|
||||
/* Assign splitPoint and f if not provided */
|
||||
parameters.splitPoint = 1.0;
|
||||
parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f;
|
||||
@ -632,7 +639,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
||||
const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
|
||||
const unsigned shrinkDict = 0;
|
||||
/* Local variables */
|
||||
const int displayLevel = parameters->zParams.notificationLevel;
|
||||
const int displayLevel = (int)parameters->zParams.notificationLevel;
|
||||
unsigned iteration = 1;
|
||||
unsigned d;
|
||||
unsigned k;
|
||||
@ -716,7 +723,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
||||
data->parameters.splitPoint = splitPoint;
|
||||
data->parameters.steps = kSteps;
|
||||
data->parameters.shrinkDict = shrinkDict;
|
||||
data->parameters.zParams.notificationLevel = g_displayLevel;
|
||||
data->parameters.zParams.notificationLevel = (unsigned)g_displayLevel;
|
||||
/* Check the parameters */
|
||||
if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
|
||||
data->ctx->f, accel)) {
|
||||
|
@ -135,22 +135,32 @@ static unsigned ZDICT_NbCommonBytes (size_t val)
|
||||
if (MEM_isLittleEndian()) {
|
||||
if (MEM_64bits()) {
|
||||
# if defined(_MSC_VER) && defined(_WIN64)
|
||||
unsigned long r = 0;
|
||||
_BitScanForward64( &r, (U64)val );
|
||||
return (unsigned)(r>>3);
|
||||
if (val != 0) {
|
||||
unsigned long r;
|
||||
_BitScanForward64(&r, (U64)val);
|
||||
return (unsigned)(r >> 3);
|
||||
} else {
|
||||
/* Should not reach this code path */
|
||||
__assume(0);
|
||||
}
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
||||
return (__builtin_ctzll((U64)val) >> 3);
|
||||
return (unsigned)(__builtin_ctzll((U64)val) >> 3);
|
||||
# else
|
||||
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
|
||||
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
|
||||
# endif
|
||||
} else { /* 32 bits */
|
||||
# if defined(_MSC_VER)
|
||||
unsigned long r=0;
|
||||
_BitScanForward( &r, (U32)val );
|
||||
return (unsigned)(r>>3);
|
||||
if (val != 0) {
|
||||
unsigned long r;
|
||||
_BitScanForward(&r, (U32)val);
|
||||
return (unsigned)(r >> 3);
|
||||
} else {
|
||||
/* Should not reach this code path */
|
||||
__assume(0);
|
||||
}
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
||||
return (__builtin_ctz((U32)val) >> 3);
|
||||
return (unsigned)(__builtin_ctz((U32)val) >> 3);
|
||||
# else
|
||||
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
|
||||
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
|
||||
@ -159,11 +169,16 @@ static unsigned ZDICT_NbCommonBytes (size_t val)
|
||||
} else { /* Big Endian CPU */
|
||||
if (MEM_64bits()) {
|
||||
# if defined(_MSC_VER) && defined(_WIN64)
|
||||
unsigned long r = 0;
|
||||
_BitScanReverse64( &r, val );
|
||||
return (unsigned)(r>>3);
|
||||
if (val != 0) {
|
||||
unsigned long r;
|
||||
_BitScanReverse64(&r, val);
|
||||
return (unsigned)(r >> 3);
|
||||
} else {
|
||||
/* Should not reach this code path */
|
||||
__assume(0);
|
||||
}
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
||||
return (__builtin_clzll(val) >> 3);
|
||||
return (unsigned)(__builtin_clzll(val) >> 3);
|
||||
# else
|
||||
unsigned r;
|
||||
const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
|
||||
@ -174,11 +189,16 @@ static unsigned ZDICT_NbCommonBytes (size_t val)
|
||||
# endif
|
||||
} else { /* 32 bits */
|
||||
# if defined(_MSC_VER)
|
||||
unsigned long r = 0;
|
||||
_BitScanReverse( &r, (unsigned long)val );
|
||||
return (unsigned)(r>>3);
|
||||
if (val != 0) {
|
||||
unsigned long r;
|
||||
_BitScanReverse(&r, (unsigned long)val);
|
||||
return (unsigned)(r >> 3);
|
||||
} else {
|
||||
/* Should not reach this code path */
|
||||
__assume(0);
|
||||
}
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
||||
return (__builtin_clz((U32)val) >> 3);
|
||||
return (unsigned)(__builtin_clz((U32)val) >> 3);
|
||||
# else
|
||||
unsigned r;
|
||||
if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
|
||||
@ -235,7 +255,7 @@ static dictItem ZDICT_analyzePos(
|
||||
U32 savings[LLIMIT] = {0};
|
||||
const BYTE* b = (const BYTE*)buffer;
|
||||
size_t maxLength = LLIMIT;
|
||||
size_t pos = suffix[start];
|
||||
size_t pos = (size_t)suffix[start];
|
||||
U32 end = start;
|
||||
dictItem solution;
|
||||
|
||||
@ -369,7 +389,7 @@ static dictItem ZDICT_analyzePos(
|
||||
savings[i] = savings[i-1] + (lengthList[i] * (i-3));
|
||||
|
||||
DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n",
|
||||
(unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / maxLength);
|
||||
(unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / (double)maxLength);
|
||||
|
||||
solution.pos = (U32)pos;
|
||||
solution.length = (U32)maxLength;
|
||||
@ -379,7 +399,7 @@ static dictItem ZDICT_analyzePos(
|
||||
{ U32 id;
|
||||
for (id=start; id<end; id++) {
|
||||
U32 p, pEnd, length;
|
||||
U32 const testedPos = suffix[id];
|
||||
U32 const testedPos = (U32)suffix[id];
|
||||
if (testedPos == pos)
|
||||
length = solution.length;
|
||||
else {
|
||||
@ -442,7 +462,7 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const
|
||||
|
||||
if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */
|
||||
/* append */
|
||||
int const addedLength = (int)eltEnd - (table[u].pos + table[u].length);
|
||||
int const addedLength = (int)eltEnd - (int)(table[u].pos + table[u].length);
|
||||
table[u].savings += elt.length / 8; /* rough approx bonus */
|
||||
if (addedLength > 0) { /* otherwise, elt fully included into existing */
|
||||
table[u].length += addedLength;
|
||||
@ -766,6 +786,13 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
pos += fileSizes[u];
|
||||
}
|
||||
|
||||
if (notificationLevel >= 4) {
|
||||
/* writeStats */
|
||||
DISPLAYLEVEL(4, "Offset Code Frequencies : \n");
|
||||
for (u=0; u<=offcodeMax; u++) {
|
||||
DISPLAYLEVEL(4, "%2u :%7u \n", u, offcodeCount[u]);
|
||||
} }
|
||||
|
||||
/* analyze, build stats, starting with literals */
|
||||
{ size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
|
||||
if (HUF_isError(maxNbBits)) {
|
||||
@ -872,7 +899,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
MEM_writeLE32(dstPtr+8, bestRepOffset[2].offset);
|
||||
#else
|
||||
/* at this stage, we don't use the result of "most common first offset",
|
||||
as the impact of statistics is not properly evaluated */
|
||||
* as the impact of statistics is not properly evaluated */
|
||||
MEM_writeLE32(dstPtr+0, repStartValue[0]);
|
||||
MEM_writeLE32(dstPtr+4, repStartValue[1]);
|
||||
MEM_writeLE32(dstPtr+8, repStartValue[2]);
|
||||
@ -888,6 +915,17 @@ _cleanup:
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @returns the maximum repcode value
|
||||
*/
|
||||
static U32 ZDICT_maxRep(U32 const reps[ZSTD_REP_NUM])
|
||||
{
|
||||
U32 maxRep = reps[0];
|
||||
int r;
|
||||
for (r = 1; r < ZSTD_REP_NUM; ++r)
|
||||
maxRep = MAX(maxRep, reps[r]);
|
||||
return maxRep;
|
||||
}
|
||||
|
||||
size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
||||
const void* customDictContent, size_t dictContentSize,
|
||||
@ -899,11 +937,13 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
||||
BYTE header[HBUFFSIZE];
|
||||
int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
|
||||
U32 const notificationLevel = params.notificationLevel;
|
||||
/* The final dictionary content must be at least as large as the largest repcode */
|
||||
size_t const minContentSize = (size_t)ZDICT_maxRep(repStartValue);
|
||||
size_t paddingSize;
|
||||
|
||||
/* check conditions */
|
||||
DEBUGLOG(4, "ZDICT_finalizeDictionary");
|
||||
if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
|
||||
if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
|
||||
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
|
||||
|
||||
/* dictionary header */
|
||||
@ -927,12 +967,43 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
||||
hSize += eSize;
|
||||
}
|
||||
|
||||
/* copy elements in final buffer ; note : src and dst buffer can overlap */
|
||||
if (hSize + dictContentSize > dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize;
|
||||
{ size_t const dictSize = hSize + dictContentSize;
|
||||
char* dictEnd = (char*)dictBuffer + dictSize;
|
||||
memmove(dictEnd - dictContentSize, customDictContent, dictContentSize);
|
||||
memcpy(dictBuffer, header, hSize);
|
||||
/* Shrink the content size if it doesn't fit in the buffer */
|
||||
if (hSize + dictContentSize > dictBufferCapacity) {
|
||||
dictContentSize = dictBufferCapacity - hSize;
|
||||
}
|
||||
|
||||
/* Pad the dictionary content with zeros if it is too small */
|
||||
if (dictContentSize < minContentSize) {
|
||||
RETURN_ERROR_IF(hSize + minContentSize > dictBufferCapacity, dstSize_tooSmall,
|
||||
"dictBufferCapacity too small to fit max repcode");
|
||||
paddingSize = minContentSize - dictContentSize;
|
||||
} else {
|
||||
paddingSize = 0;
|
||||
}
|
||||
|
||||
{
|
||||
size_t const dictSize = hSize + paddingSize + dictContentSize;
|
||||
|
||||
/* The dictionary consists of the header, optional padding, and the content.
|
||||
* The padding comes before the content because the "best" position in the
|
||||
* dictionary is the last byte.
|
||||
*/
|
||||
BYTE* const outDictHeader = (BYTE*)dictBuffer;
|
||||
BYTE* const outDictPadding = outDictHeader + hSize;
|
||||
BYTE* const outDictContent = outDictPadding + paddingSize;
|
||||
|
||||
assert(dictSize <= dictBufferCapacity);
|
||||
assert(outDictContent + dictContentSize == (BYTE*)dictBuffer + dictSize);
|
||||
|
||||
/* First copy the customDictContent into its final location.
|
||||
* `customDictContent` and `dictBuffer` may overlap, so we must
|
||||
* do this before any other writes into the output buffer.
|
||||
* Then copy the header & padding into the output buffer.
|
||||
*/
|
||||
memmove(outDictContent, customDictContent, dictContentSize);
|
||||
memcpy(outDictHeader, header, hSize);
|
||||
memset(outDictPadding, 0, paddingSize);
|
||||
|
||||
return dictSize;
|
||||
}
|
||||
}
|
||||
|
@ -343,8 +343,7 @@ FORCE_INLINE unsigned FSE_highbit32 (U32 val)
|
||||
{
|
||||
# if defined(_MSC_VER) /* Visual */
|
||||
unsigned long r;
|
||||
_BitScanReverse ( &r, val );
|
||||
return (unsigned) r;
|
||||
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
|
||||
# elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */
|
||||
return __builtin_clz (val) ^ 31;
|
||||
# else /* Software version */
|
||||
|
@ -353,9 +353,8 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
|
||||
MEM_STATIC unsigned BIT_highbit32 (U32 val)
|
||||
{
|
||||
# if defined(_MSC_VER) /* Visual */
|
||||
unsigned long r=0;
|
||||
_BitScanReverse ( &r, val );
|
||||
return (unsigned) r;
|
||||
unsigned long r;
|
||||
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
|
||||
return __builtin_clz (val) ^ 31;
|
||||
# else /* Software version */
|
||||
|
@ -356,9 +356,8 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
|
||||
MEM_STATIC unsigned BIT_highbit32 (U32 val)
|
||||
{
|
||||
# if defined(_MSC_VER) /* Visual */
|
||||
unsigned long r=0;
|
||||
_BitScanReverse ( &r, val );
|
||||
return (unsigned) r;
|
||||
unsigned long r;
|
||||
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
|
||||
return __builtin_clz (val) ^ 31;
|
||||
# else /* Software version */
|
||||
|
@ -627,9 +627,8 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
|
||||
MEM_STATIC unsigned BIT_highbit32 (U32 val)
|
||||
{
|
||||
# if defined(_MSC_VER) /* Visual */
|
||||
unsigned long r=0;
|
||||
_BitScanReverse ( &r, val );
|
||||
return (unsigned) r;
|
||||
unsigned long r;
|
||||
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
|
||||
return __builtin_clz (val) ^ 31;
|
||||
# else /* Software version */
|
||||
|
@ -756,9 +756,8 @@ MEM_STATIC size_t BITv05_readBitsFast(BITv05_DStream_t* bitD, unsigned nbBits);
|
||||
MEM_STATIC unsigned BITv05_highbit32 (U32 val)
|
||||
{
|
||||
# if defined(_MSC_VER) /* Visual */
|
||||
unsigned long r=0;
|
||||
_BitScanReverse ( &r, val );
|
||||
return (unsigned) r;
|
||||
unsigned long r;
|
||||
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
|
||||
return __builtin_clz (val) ^ 31;
|
||||
# else /* Software version */
|
||||
|
@ -860,9 +860,8 @@ MEM_STATIC size_t BITv06_readBitsFast(BITv06_DStream_t* bitD, unsigned nbBits);
|
||||
MEM_STATIC unsigned BITv06_highbit32 ( U32 val)
|
||||
{
|
||||
# if defined(_MSC_VER) /* Visual */
|
||||
unsigned long r=0;
|
||||
_BitScanReverse ( &r, val );
|
||||
return (unsigned) r;
|
||||
unsigned long r;
|
||||
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
|
||||
return __builtin_clz (val) ^ 31;
|
||||
# else /* Software version */
|
||||
|
@ -530,9 +530,8 @@ MEM_STATIC size_t BITv07_readBitsFast(BITv07_DStream_t* bitD, unsigned nbBits);
|
||||
MEM_STATIC unsigned BITv07_highbit32 (U32 val)
|
||||
{
|
||||
# if defined(_MSC_VER) /* Visual */
|
||||
unsigned long r=0;
|
||||
_BitScanReverse ( &r, val );
|
||||
return (unsigned) r;
|
||||
unsigned long r;
|
||||
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
|
||||
return __builtin_clz (val) ^ 31;
|
||||
# else /* Software version */
|
||||
|
185
lib/libzstd.mk
Normal file
185
lib/libzstd.mk
Normal file
@ -0,0 +1,185 @@
|
||||
# ################################################################
|
||||
# Copyright (c) Yann Collet, Facebook, Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under both the BSD-style license (found in the
|
||||
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
# in the COPYING file in the root directory of this source tree).
|
||||
# You may select, at your option, one of the above-listed licenses.
|
||||
# ################################################################
|
||||
|
||||
##################################################################
|
||||
# Input Variables
|
||||
##################################################################
|
||||
|
||||
# Zstd lib directory
|
||||
LIBZSTD ?= ./
|
||||
|
||||
# Legacy support
|
||||
ZSTD_LEGACY_SUPPORT ?= 5
|
||||
ZSTD_LEGACY_MULTITHREADED_API ?= 0
|
||||
|
||||
# Build size optimizations
|
||||
HUF_FORCE_DECOMPRESS_X1 ?= 0
|
||||
HUF_FORCE_DECOMPRESS_X2 ?= 0
|
||||
ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT ?= 0
|
||||
ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG ?= 0
|
||||
ZSTD_NO_INLINE ?= 0
|
||||
ZSTD_STRIP_ERROR_STRINGS ?= 0
|
||||
|
||||
# Assembly support
|
||||
ZSTD_NO_ASM ?= 0
|
||||
|
||||
##################################################################
|
||||
# libzstd helpers
|
||||
##################################################################
|
||||
|
||||
# Make 4.3 doesn't support '\#' anymore (https://lwn.net/Articles/810071/)
|
||||
NUM_SYMBOL := \#
|
||||
|
||||
# define silent mode as default (verbose mode with V=1 or VERBOSE=1)
|
||||
$(V)$(VERBOSE).SILENT:
|
||||
|
||||
# When cross-compiling from linux to windows,
|
||||
# one might need to specify TARGET_SYSTEM as "Windows."
|
||||
# Building from Fedora fails without it.
|
||||
# (but Ubuntu and Debian don't need to set anything)
|
||||
TARGET_SYSTEM ?= $(OS)
|
||||
|
||||
# Version numbers
|
||||
LIBVER_SRC := $(LIBZSTD)/zstd.h
|
||||
LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LIBVER_SRC)`
|
||||
LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LIBVER_SRC)`
|
||||
LIBVER_PATCH_SCRIPT:=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LIBVER_SRC)`
|
||||
LIBVER_SCRIPT:= $(LIBVER_MAJOR_SCRIPT).$(LIBVER_MINOR_SCRIPT).$(LIBVER_PATCH_SCRIPT)
|
||||
LIBVER_MAJOR := $(shell echo $(LIBVER_MAJOR_SCRIPT))
|
||||
LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT))
|
||||
LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT))
|
||||
LIBVER := $(shell echo $(LIBVER_SCRIPT))
|
||||
CCVER := $(shell $(CC) --version)
|
||||
ZSTD_VERSION?= $(LIBVER)
|
||||
|
||||
# ZSTD_LIB_MINIFY is a helper variable that
|
||||
# configures a bunch of other variables to space-optimized defaults.
|
||||
ZSTD_LIB_MINIFY ?= 0
|
||||
ifneq ($(ZSTD_LIB_MINIFY), 0)
|
||||
HAVE_CC_OZ ?= $(shell echo "" | $(CC) -Oz -x c -c - -o /dev/null 2> /dev/null && echo 1 || echo 0)
|
||||
ZSTD_LEGACY_SUPPORT ?= 0
|
||||
ZSTD_LIB_DEPRECATED ?= 0
|
||||
HUF_FORCE_DECOMPRESS_X1 ?= 1
|
||||
ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT ?= 1
|
||||
ZSTD_NO_INLINE ?= 1
|
||||
ZSTD_STRIP_ERROR_STRINGS ?= 1
|
||||
ifneq ($(HAVE_CC_OZ), 0)
|
||||
# Some compilers (clang) support an even more space-optimized setting.
|
||||
CFLAGS += -Oz
|
||||
else
|
||||
CFLAGS += -Os
|
||||
endif
|
||||
CFLAGS += -fno-stack-protector -fomit-frame-pointer -fno-ident \
|
||||
-DDYNAMIC_BMI2=0 -DNDEBUG
|
||||
else
|
||||
CFLAGS += -O3
|
||||
endif
|
||||
|
||||
DEBUGLEVEL ?= 0
|
||||
CPPFLAGS += -DXXH_NAMESPACE=ZSTD_ -DDEBUGLEVEL=$(DEBUGLEVEL)
|
||||
ifeq ($(TARGET_SYSTEM),Windows_NT) # MinGW assumed
|
||||
CPPFLAGS += -D__USE_MINGW_ANSI_STDIO # compatibility with %zu formatting
|
||||
endif
|
||||
DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
|
||||
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
|
||||
-Wstrict-prototypes -Wundef -Wpointer-arith \
|
||||
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
|
||||
-Wredundant-decls -Wmissing-prototypes -Wc++-compat
|
||||
CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS)
|
||||
LDFLAGS += $(MOREFLAGS)
|
||||
FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
|
||||
|
||||
HAVE_COLORNEVER = $(shell echo a | grep --color=never a > /dev/null 2> /dev/null && echo 1 || echo 0)
|
||||
GREP_OPTIONS ?=
|
||||
ifeq ($HAVE_COLORNEVER, 1)
|
||||
GREP_OPTIONS += --color=never
|
||||
endif
|
||||
GREP = grep $(GREP_OPTIONS)
|
||||
SED_ERE_OPT ?= -E
|
||||
|
||||
ZSTD_COMMON_FILES := $(sort $(wildcard $(LIBZSTD)/common/*.c))
|
||||
ZSTD_COMPRESS_FILES := $(sort $(wildcard $(LIBZSTD)/compress/*.c))
|
||||
ZSTD_DECOMPRESS_FILES := $(sort $(wildcard $(LIBZSTD)/decompress/*.c))
|
||||
ZSTD_DICTBUILDER_FILES := $(sort $(wildcard $(LIBZSTD)/dictBuilder/*.c))
|
||||
ZSTD_DEPRECATED_FILES := $(sort $(wildcard $(LIBZSTD)/deprecated/*.c))
|
||||
ZSTD_LEGACY_FILES :=
|
||||
|
||||
ZSTD_DECOMPRESS_AMD64_ASM_FILES := $(sort $(wildcard $(LIBZSTD)/decompress/*_amd64.S))
|
||||
|
||||
ifneq ($(ZSTD_NO_ASM), 0)
|
||||
CPPFLAGS += -DHUF_DISABLE_ASM
|
||||
else
|
||||
# Unconditionally add the ASM files they are disabled by
|
||||
# macros in the .S file.
|
||||
ZSTD_DECOMPRESS_FILES += $(ZSTD_DECOMPRESS_AMD64_ASM_FILES)
|
||||
endif
|
||||
|
||||
ifneq ($(HUF_FORCE_DECOMPRESS_X1), 0)
|
||||
CFLAGS += -DHUF_FORCE_DECOMPRESS_X1
|
||||
endif
|
||||
|
||||
ifneq ($(HUF_FORCE_DECOMPRESS_X2), 0)
|
||||
CFLAGS += -DHUF_FORCE_DECOMPRESS_X2
|
||||
endif
|
||||
|
||||
ifneq ($(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT), 0)
|
||||
CFLAGS += -DZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
||||
endif
|
||||
|
||||
ifneq ($(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG), 0)
|
||||
CFLAGS += -DZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
||||
endif
|
||||
|
||||
ifneq ($(ZSTD_NO_INLINE), 0)
|
||||
CFLAGS += -DZSTD_NO_INLINE
|
||||
endif
|
||||
|
||||
ifneq ($(ZSTD_STRIP_ERROR_STRINGS), 0)
|
||||
CFLAGS += -DZSTD_STRIP_ERROR_STRINGS
|
||||
endif
|
||||
|
||||
ifneq ($(ZSTD_LEGACY_MULTITHREADED_API), 0)
|
||||
CFLAGS += -DZSTD_LEGACY_MULTITHREADED_API
|
||||
endif
|
||||
|
||||
ifneq ($(ZSTD_LEGACY_SUPPORT), 0)
|
||||
ifeq ($(shell test $(ZSTD_LEGACY_SUPPORT) -lt 8; echo $$?), 0)
|
||||
ZSTD_LEGACY_FILES += $(shell ls $(LIBZSTD)/legacy/*.c | $(GREP) 'v0[$(ZSTD_LEGACY_SUPPORT)-7]')
|
||||
endif
|
||||
endif
|
||||
CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
|
||||
|
||||
UNAME := $(shell uname)
|
||||
|
||||
ifndef BUILD_DIR
|
||||
ifeq ($(UNAME), Darwin)
|
||||
ifeq ($(shell md5 < /dev/null > /dev/null; echo $$?), 0)
|
||||
HASH ?= md5
|
||||
endif
|
||||
else ifeq ($(UNAME), FreeBSD)
|
||||
HASH ?= gmd5sum
|
||||
else ifeq ($(UNAME), NetBSD)
|
||||
HASH ?= md5 -n
|
||||
else ifeq ($(UNAME), OpenBSD)
|
||||
HASH ?= md5
|
||||
endif
|
||||
HASH ?= md5sum
|
||||
|
||||
HASH_DIR = conf_$(shell echo $(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(ZSTD_FILES) | $(HASH) | cut -f 1 -d " " )
|
||||
HAVE_HASH :=$(shell echo 1 | $(HASH) > /dev/null && echo 1 || echo 0)
|
||||
ifeq ($(HAVE_HASH),0)
|
||||
$(info warning : could not find HASH ($(HASH)), needed to differentiate builds using different flags)
|
||||
BUILD_DIR := obj/generic_noconf
|
||||
endif
|
||||
endif # BUILD_DIR
|
||||
|
||||
ZSTD_SUBDIR := $(LIBZSTD)/common $(LIBZSTD)/compress $(LIBZSTD)/decompress $(LIBZSTD)/dictBuilder $(LIBZSTD)/legacy $(LIBZSTD)/deprecated
|
||||
vpath %.c $(ZSTD_SUBDIR)
|
||||
vpath %.S $(ZSTD_SUBDIR)
|
4
lib/modulemap/module.modulemap
Normal file
4
lib/modulemap/module.modulemap
Normal file
@ -0,0 +1,4 @@
|
||||
module libzstd [extern_c] {
|
||||
header "../zstd.h"
|
||||
export *
|
||||
}
|
@ -46,7 +46,7 @@ extern "C" {
|
||||
*
|
||||
* Zstd can use dictionaries to improve compression ratio of small data.
|
||||
* Traditionally small files don't compress well because there is very little
|
||||
* repetion in a single sample, since it is small. But, if you are compressing
|
||||
* repetition in a single sample, since it is small. But, if you are compressing
|
||||
* many similar files, like a bunch of JSON records that share the same
|
||||
* structure, you can train a dictionary on ahead of time on some samples of
|
||||
* these files. Then, zstd can use the dictionary to find repetitions that are
|
||||
@ -132,7 +132,7 @@ extern "C" {
|
||||
*
|
||||
* # Benchmark levels 1-3 without a dictionary
|
||||
* zstd -b1e3 -r /path/to/my/files
|
||||
* # Benchmark levels 1-3 with a dictioanry
|
||||
* # Benchmark levels 1-3 with a dictionary
|
||||
* zstd -b1e3 -r /path/to/my/files -D /path/to/my/dictionary
|
||||
*
|
||||
* When should I retrain a dictionary?
|
||||
@ -237,7 +237,6 @@ typedef struct {
|
||||
* is presumed that the most profitable content is at the end of the dictionary,
|
||||
* since that is the cheapest to reference.
|
||||
*
|
||||
* `dictContentSize` must be >= ZDICT_CONTENTSIZE_MIN bytes.
|
||||
* `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN).
|
||||
*
|
||||
* @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`),
|
||||
@ -272,8 +271,9 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
|
||||
* Use them only in association with static linking.
|
||||
* ==================================================================================== */
|
||||
|
||||
#define ZDICT_CONTENTSIZE_MIN 128
|
||||
#define ZDICT_DICTSIZE_MIN 256
|
||||
/* Deprecated: Remove in v1.6.0 */
|
||||
#define ZDICT_CONTENTSIZE_MIN 128
|
||||
|
||||
/*! ZDICT_cover_params_t:
|
||||
* k and d are the only required parameters.
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user