mirror of
https://github.com/facebook/zstd.git
synced 2025-03-06 16:56:49 +02:00
Merge pull request #1411 from facebook/prefetch_dict
Improves decompression speed when using cold dictionary
This commit is contained in:
commit
7b0c551bff
@ -33,7 +33,7 @@ largeNbDicts: util.o bench.o datagen.o xxhash.o largeNbDicts.c $(LIBZSTD)
|
||||
|
||||
.PHONY: $(LIBZSTD)
|
||||
$(LIBZSTD):
|
||||
$(MAKE) -C $(LIBDIR) libzstd.a
|
||||
$(MAKE) -C $(LIBDIR) libzstd.a CFLAGS="$(CFLAGS)"
|
||||
|
||||
bench.o : $(PROGDIR)/bench.c
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
|
||||
@ -50,4 +50,5 @@ xxhash.o : $(LIBDIR)/common/xxhash.c
|
||||
|
||||
clean:
|
||||
$(RM) *.o
|
||||
$(MAKE) -C $(LIBDIR) clean > /dev/null
|
||||
$(RM) largeNbDicts
|
||||
|
@ -49,6 +49,7 @@
|
||||
|
||||
|
||||
/*--- Macros ---*/
|
||||
|
||||
#define CONTROL(c) { if (!(c)) abort(); }
|
||||
#undef MIN
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
@ -594,6 +595,7 @@ int bench(const char** fileNameTable, unsigned nbFiles,
|
||||
if (blockSize)
|
||||
DISPLAYLEVEL(3, "of max size %u bytes ", (unsigned)blockSize);
|
||||
DISPLAYLEVEL(3, "\n");
|
||||
size_t const totalSrcSlicesSize = sliceCollection_totalCapacity(srcSlices);
|
||||
|
||||
|
||||
size_t* const dstCapacities = malloc(nbBlocks * sizeof(*dstCapacities));
|
||||
@ -625,8 +627,8 @@ int bench(const char** fileNameTable, unsigned nbFiles,
|
||||
|
||||
/* dictionary determination */
|
||||
buffer_t const dictBuffer = createDictionaryBuffer(dictionary,
|
||||
srcBuffer.ptr,
|
||||
srcSlices.capacities, nbBlocks,
|
||||
srcs.buffer.ptr,
|
||||
srcs.slices.capacities, srcs.slices.nbSlices,
|
||||
DICTSIZE);
|
||||
CONTROL(dictBuffer.ptr != NULL);
|
||||
|
||||
@ -637,7 +639,7 @@ int bench(const char** fileNameTable, unsigned nbFiles,
|
||||
CONTROL(cTotalSizeNoDict != 0);
|
||||
DISPLAYLEVEL(3, "compressing at level %u without dictionary : Ratio=%.2f (%u bytes) \n",
|
||||
clevel,
|
||||
(double)srcSize / cTotalSizeNoDict, (unsigned)cTotalSizeNoDict);
|
||||
(double)totalSrcSlicesSize / cTotalSizeNoDict, (unsigned)cTotalSizeNoDict);
|
||||
|
||||
size_t* const cSizes = malloc(nbBlocks * sizeof(size_t));
|
||||
CONTROL(cSizes != NULL);
|
||||
@ -646,7 +648,7 @@ int bench(const char** fileNameTable, unsigned nbFiles,
|
||||
CONTROL(cTotalSize != 0);
|
||||
DISPLAYLEVEL(3, "compressed using a %u bytes dictionary : Ratio=%.2f (%u bytes) \n",
|
||||
(unsigned)dictBuffer.size,
|
||||
(double)srcSize / cTotalSize, (unsigned)cTotalSize);
|
||||
(double)totalSrcSlicesSize / cTotalSize, (unsigned)cTotalSize);
|
||||
|
||||
/* now dstSlices contain the real compressed size of each block, instead of the maximum capacity */
|
||||
shrinkSizes(dstSlices, cSizes);
|
||||
|
@ -15,7 +15,6 @@
|
||||
* Dependencies
|
||||
*********************************************************/
|
||||
#include <string.h> /* memcpy, memmove, memset */
|
||||
#include "compiler.h" /* prefetch */
|
||||
#include "cpu.h" /* bmi2 */
|
||||
#include "mem.h" /* low level memory routines */
|
||||
#define FSE_STATIC_LINKING_ONLY
|
||||
|
@ -56,7 +56,6 @@
|
||||
* Dependencies
|
||||
*********************************************************/
|
||||
#include <string.h> /* memcpy, memmove, memset */
|
||||
#include "compiler.h" /* prefetch */
|
||||
#include "cpu.h" /* bmi2 */
|
||||
#include "mem.h" /* low level memory routines */
|
||||
#define FSE_STATIC_LINKING_ONLY
|
||||
|
@ -507,16 +507,6 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
||||
}
|
||||
}
|
||||
|
||||
/* prefetch dictionary content */
|
||||
if (dctx->ddictIsCold) {
|
||||
size_t const dictSize = (const char*)dctx->prefixStart - (const char*)dctx->virtualStart;
|
||||
size_t const psmin = MIN(dictSize, (size_t)(64*nbSeq) /* heuristic */ );
|
||||
size_t const pSize = MIN(psmin, 128 KB /* protection */ );
|
||||
const void* const pStart = (const char*)dctx->dictEnd - pSize;
|
||||
PREFETCH_AREA(pStart, pSize);
|
||||
dctx->ddictIsCold = 0;
|
||||
}
|
||||
|
||||
return ip-istart;
|
||||
}
|
||||
|
||||
@ -1046,6 +1036,7 @@ ZSTD_decompressSequencesLong_body(
|
||||
/* prepare in advance */
|
||||
for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
|
||||
sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
|
||||
PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
|
||||
}
|
||||
if (seqNb<seqAdvance) return ERROR(corruption_detected);
|
||||
|
||||
@ -1070,9 +1061,6 @@ ZSTD_decompressSequencesLong_body(
|
||||
|
||||
/* save reps for next block */
|
||||
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
|
||||
#undef STORED_SEQS
|
||||
#undef STORED_SEQS_MASK
|
||||
#undef ADVANCED_SEQS
|
||||
}
|
||||
|
||||
/* last literal segment */
|
||||
@ -1213,20 +1201,27 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
||||
}
|
||||
|
||||
/* Build Decoding Tables */
|
||||
{ int nbSeq;
|
||||
{ int usePrefetchDecoder = dctx->ddictIsCold;
|
||||
int nbSeq;
|
||||
size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
|
||||
if (ZSTD_isError(seqHSize)) return seqHSize;
|
||||
ip += seqHSize;
|
||||
srcSize -= seqHSize;
|
||||
|
||||
if ( (!frame || (dctx->fParams.windowSize > (1<<24)))
|
||||
&& (nbSeq>0) ) { /* could probably use a larger nbSeq limit */
|
||||
if ( !usePrefetchDecoder
|
||||
&& (!frame || (dctx->fParams.windowSize > (1<<24)))
|
||||
&& (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */
|
||||
U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
|
||||
U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
|
||||
if (shareLongOffsets >= minShare)
|
||||
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
|
||||
usePrefetchDecoder = (shareLongOffsets >= minShare);
|
||||
}
|
||||
|
||||
dctx->ddictIsCold = 0;
|
||||
|
||||
if (usePrefetchDecoder)
|
||||
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
|
||||
|
||||
/* else */
|
||||
return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user