1
0
mirror of https://github.com/facebook/zstd.git synced 2025-03-07 09:26:03 +02:00

datagen refactoring

This commit is contained in:
Yann Collet 2015-02-10 18:15:20 +01:00
parent 6610cb381c
commit fb98fd0bd4
5 changed files with 91 additions and 76 deletions

View File

@ -861,13 +861,13 @@ static size_t ZSTD_compressSequences(BYTE* dst, size_t maxDstSize,
} }
static void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* anchor, size_t offset, size_t matchLength) static void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, size_t offset, size_t matchLength)
{ {
BYTE* op_lit = seqStorePtr->lit; BYTE* op_lit = seqStorePtr->lit;
BYTE* const l_end = op_lit + litLength; BYTE* const l_end = op_lit + litLength;
/* copy Literals */ /* copy Literals */
while (op_lit<l_end) COPY8(op_lit, anchor); while (op_lit<l_end) COPY8(op_lit, literals);
seqStorePtr->lit += litLength; seqStorePtr->lit += litLength;
/* literal Length */ /* literal Length */

View File

@ -122,84 +122,85 @@ static char RDG_genChar(U32* seed, const void* ltctx)
return lt[id]; return lt[id];
} }
#define RDG_DICTSIZE (32 KB)
#define RDG_RAND15BITS ((RDG_rand(seed) >> 3) & 32767) #define RDG_RAND15BITS ((RDG_rand(seed) >> 3) & 32767)
#define RDG_RANDLENGTH ( ((RDG_rand(seed) >> 7) & 7) ? (RDG_rand(seed) & 15) : (RDG_rand(seed) & 511) + 15) #define RDG_RANDLENGTH ( ((RDG_rand(seed) >> 7) & 7) ? (RDG_rand(seed) & 15) : (RDG_rand(seed) & 511) + 15)
#define RDG_DICTSIZE (32 KB) void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, void* litTable, unsigned* seedPtr)
void RDG_generate(U64 size, U32 seedInit, double matchProba, double litProba)
{ {
BYTE fullbuff[RDG_DICTSIZE + 128 KB + 1]; BYTE* buffPtr = ((BYTE*)buffer) - prefixSize;
BYTE* buff = fullbuff + RDG_DICTSIZE; const U32 matchProba32 = (U32)(32768 * matchProba);
U64 total=0; size_t pos = prefixSize;
U32 P32 = (U32)(32768 * matchProba); void* ldctx = litTable;
U32 pos=1; U32* seed = seedPtr;
U32 genBlockSize = 128 KB;
void* ldctx = RDG_createLiteralDistrib(litProba);
FILE* fout = stdout;
U32* seed = &seedInit;;
/* init */ /* init */
SET_BINARY_MODE(stdout); if (pos==0) buffPtr[0] = RDG_genChar(seed, ldctx), pos=1;
fullbuff[0] = RDG_genChar(seed, ldctx);
while (pos<32 KB) /* Generate compressible data */
while (pos < buffSize)
{ {
/* Select : Literal (char) or Match (within 32K) */ /* Select : Literal (char) or Match (within 32K) */
if (RDG_RAND15BITS < P32) if (RDG_RAND15BITS < matchProba32)
{ {
/* Copy (within 64K) */ /* Copy (within 32K) */
int match;
U32 d; U32 d;
int ref;
int length = RDG_RANDLENGTH + 4; int length = RDG_RANDLENGTH + 4;
U32 offset = RDG_RAND15BITS + 1; U32 offset = RDG_RAND15BITS + 1;
if (offset > pos) offset = pos; if (offset > pos) offset = pos;
ref = pos - offset; if (pos + length > buffSize) length = buffSize - pos;
match = pos - offset;
d = pos + length; d = pos + length;
while (pos < d) fullbuff[pos++] = fullbuff[ref++]; while (pos < d) buffPtr[pos++] = buffPtr[match++];
}
else
{
/* Literal (noise) */
U32 d = pos + RDG_RANDLENGTH;
while (pos < d) fullbuff[pos++] = RDG_genChar(seed, ldctx);
}
}
/* Generate compressible data */
pos = 0;
while (total < size)
{
if (size-total < 128 KB) genBlockSize = (U32)(size-total);
total += genBlockSize;
buff[genBlockSize] = 0;
pos = 0;
while (pos<genBlockSize)
{
/* Select : Literal (char) or Match (within 32K) */
if (RDG_RAND15BITS < P32)
{
/* Copy (within 64K) */
int ref;
U32 d;
int length = RDG_RANDLENGTH + 4;
U32 offset = RDG_RAND15BITS + 1;
if (pos + length > genBlockSize ) length = genBlockSize - pos;
ref = pos - offset;
d = pos + length;
while (pos < d) buff[pos++] = buff[ref++];
} }
else else
{ {
/* Literal (noise) */ /* Literal (noise) */
U32 d; U32 d;
int length = RDG_RANDLENGTH; int length = RDG_RANDLENGTH;
if (pos + length > genBlockSize) length = genBlockSize - pos; if (pos + length > buffSize) length = buffSize - pos;
d = pos + length; d = pos + length;
while (pos < d) buff[pos++] = RDG_genChar(seed, ldctx); while (pos < d) buffPtr[pos++] = RDG_genChar(seed, ldctx);
} }
} }
}
/* output generated data */
fwrite(buff, 1, genBlockSize, fout);
/* Regenerate prefix */ void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed)
memcpy(fullbuff, buff + 96 KB, 32 KB); {
} void* ldctx;
if (litProba==0.0) litProba = matchProba / 3.6;
ldctx = RDG_createLiteralDistrib(litProba);
RDG_genBlock(buffer, size, 0, matchProba, ldctx, &seed);
}
#define RDG_BLOCKSIZE (128 KB)
void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed)
{
BYTE fullbuff[RDG_DICTSIZE + RDG_BLOCKSIZE + 1];
BYTE* buff = fullbuff + RDG_DICTSIZE;
U64 total = 0;
U32 genBlockSize = RDG_BLOCKSIZE;
void* ldctx;
/* init */
if (litProba==0.0) litProba = matchProba / 3.6;
ldctx = RDG_createLiteralDistrib(litProba);
SET_BINARY_MODE(stdout);
/* Generate dict */
RDG_genBlock(fullbuff, RDG_DICTSIZE, 0, matchProba, ldctx, &seed);
/* Generate compressible data */
while (total < size)
{
RDG_genBlock(buff, RDG_BLOCKSIZE, RDG_DICTSIZE, matchProba, ldctx, &seed);
if (size-total < RDG_BLOCKSIZE) genBlockSize = (U32)(size-total);
total += genBlockSize;
buff[genBlockSize] = 0;
fwrite(buff, 1, genBlockSize, stdout);
/* update dict */
memcpy(fullbuff, buff + (RDG_BLOCKSIZE - RDG_DICTSIZE), RDG_DICTSIZE);
}
} }

View File

@ -24,4 +24,17 @@
*/ */
void RDG_generate(unsigned long long size, unsigned seed, double matchProba, double litProba); #include <stddef.h> /* size_t */
void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed);
void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed);
/* RDG_genOut
Generate 'size' bytes of compressible data into stdout.
Compressibility can be controlled using 'matchProba'.
'LitProba' is optional, and affect variability of bytes. If litProba==0.0, default value is used.
Generated data can be selected using 'seed'.
If (matchProba, litProba and seed) are equal, the function always generate the same content.
RDG_genBuffer
Same as RDG_genOut, but generate data into provided buffer
*/

View File

@ -96,7 +96,7 @@ int main(int argc, char** argv)
{ {
int argNb; int argNb;
double proba = (double)COMPRESSIBILITY_DEFAULT / 100; double proba = (double)COMPRESSIBILITY_DEFAULT / 100;
double litProba = proba / 3.6; double litProba = 0.0;
U64 size = SIZE_DEFAULT; U64 size = SIZE_DEFAULT;
U32 seed = SEED_DEFAULT; U32 seed = SEED_DEFAULT;
char* programName; char* programName;
@ -154,7 +154,6 @@ int main(int argc, char** argv)
} }
if (proba>100.) proba=100.; if (proba>100.) proba=100.;
proba /= 100.; proba /= 100.;
litProba = proba / 3.6;
break; break;
case 'L': /* hidden argument : Literal distribution probability */ case 'L': /* hidden argument : Literal distribution probability */
argument++; argument++;
@ -184,7 +183,7 @@ int main(int argc, char** argv)
DISPLAYLEVEL(3, "Seed = %u \n", seed); DISPLAYLEVEL(3, "Seed = %u \n", seed);
if (proba!=COMPRESSIBILITY_DEFAULT) DISPLAYLEVEL(3, "Compressibility : %i%%\n", (U32)(proba*100)); if (proba!=COMPRESSIBILITY_DEFAULT) DISPLAYLEVEL(3, "Compressibility : %i%%\n", (U32)(proba*100));
RDG_generate(size, seed, proba, litProba); RDG_genOut(size, proba, litProba, seed);
DISPLAYLEVEL(1, "\n"); DISPLAYLEVEL(1, "\n");
return 0; return 0;

View File

@ -62,6 +62,7 @@
#include "zstd.h" #include "zstd.h"
#include "fse_static.h" #include "fse_static.h"
#include "datagen.h"
/************************************** /**************************************
@ -215,6 +216,7 @@ static U64 BMK_GetFileSize(char* infilename)
} }
#if 1
static U32 BMK_rotl32(unsigned val32, unsigned nbBits) { return((val32 << nbBits) | (val32 >> (32 - nbBits))); } static U32 BMK_rotl32(unsigned val32, unsigned nbBits) { return((val32 << nbBits) | (val32 >> (32 - nbBits))); }
static U32 BMK_rand(U32* src) static U32 BMK_rand(U32* src)
@ -265,7 +267,7 @@ static void BMK_datagen(void* buffer, size_t bufferSize, double proba, U32 seed)
} }
} }
} }
#endif
/********************************************************* /*********************************************************
* Benchmark wrappers * Benchmark wrappers
@ -329,8 +331,8 @@ size_t local_conditionalNull(void* dst, size_t dstSize, void* buff2, const void*
if (b==0) total = 0; // 825 if (b==0) total = 0; // 825
//if (!b) total = 0; // 825 //if (!b) total = 0; // 825
//total = b ? total : 0; // 622 //total = b ? total : 0; // 622
//total *= !!b; // 465
//total &= -!b; // 622 //total &= -!b; // 622
//total *= !!b; // 465
} }
return total; return total;
} }
@ -357,7 +359,7 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb)
double bestTime = 100000000.; double bestTime = 100000000.;
size_t errorCode = 0; size_t errorCode = 0;
// Declaration /* Selection */
switch(benchNb) switch(benchNb)
{ {
case 1: case 1:
@ -399,7 +401,7 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb)
case 11: case 11:
g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize); g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize);
break; break;
case 31: // ZSTD_decodeLiteralsBlock case 31: /* ZSTD_decodeLiteralsBlock */
{ {
blockProperties_t bp; blockProperties_t bp;
ZSTD_compress(dstBuff, dstBuffSize, src, srcSize); ZSTD_compress(dstBuff, dstBuffSize, src, srcSize);
@ -417,7 +419,7 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb)
srcSize = srcSize > 128 KB ? 128 KB : srcSize; // relative to block srcSize = srcSize > 128 KB ? 128 KB : srcSize; // relative to block
break; break;
} }
case 32: // ZSTD_decodeSeqHeaders case 32: /* ZSTD_decodeSeqHeaders */
{ {
blockProperties_t bp; blockProperties_t bp;
const BYTE* ip = dstBuff; const BYTE* ip = dstBuff;
@ -444,15 +446,14 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb)
/* test functions */ /* test functions */
case 101: // conditionalNull case 101: /* conditionalNull */
{ {
size_t i; size_t i;
U32 seed = (U32)srcSize;
for (i=0; i<srcSize; i++) for (i=0; i<srcSize; i++)
buff2[i] = (BYTE)(BMK_rand(&seed) & 15); buff2[i] = i & 15;
break; break;
} }
case 102: // case 102: /* local_decodeLiteralsForward */
{ {
blockProperties_t bp; blockProperties_t bp;
ZSTD_compress(dstBuff, dstBuffSize, src, srcSize); ZSTD_compress(dstBuff, dstBuffSize, src, srcSize);
@ -515,6 +516,7 @@ int benchSample(U32 benchNb)
/* Fill buffer */ /* Fill buffer */
BMK_datagen(origBuff, benchedSize, g_compressibilityDefault, 0); BMK_datagen(origBuff, benchedSize, g_compressibilityDefault, 0);
//RDG_generate(benchedSize, 0, g_compressibilityDefault, g_compressibilityDefault / 3.6);
/* bench */ /* bench */
DISPLAY("\r%79s\r", ""); DISPLAY("\r%79s\r", "");