From 2e45badff48a92842a8e9acc4c19589d272ebfc0 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 23 Aug 2018 14:21:18 -0700 Subject: [PATCH] refactored bench.c for clarity and safety, especially at interface level --- lib/compress/zstd_compress.c | 3 +- programs/bench.c | 893 +++++++++++++++++++---------------- programs/bench.h | 166 ++++--- 3 files changed, 587 insertions(+), 475 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 29dce1250..48ccdeefa 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1322,8 +1322,7 @@ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, } /* copy dictionary offsets */ - { - ZSTD_matchState_t const* srcMatchState = &cdict->matchState; + { ZSTD_matchState_t const* srcMatchState = &cdict->matchState; ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; dstMatchState->window = srcMatchState->window; dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; diff --git a/programs/bench.c b/programs/bench.c index f280f00ef..d95adb554 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -63,7 +63,10 @@ #define MB *(1 <<20) #define GB *(1U<<30) -static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31)); +static const size_t maxMemory = (sizeof(size_t)==4) ? + /* 32-bit */ (2 GB - 64 MB) : + /* 64-bit */ (size_t)(1ULL << ((sizeof(size_t)*8)-31)); + /* ************************************* * console display @@ -97,26 +100,26 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; return errorNum; \ } -#define EXM_THROW(errorNum, retType, ...) { \ +#define RETURN_ERROR(errorNum, retType, ...) { \ retType r; \ memset(&r, 0, sizeof(retType)); \ DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ DISPLAYLEVEL(1, "Error %i : ", errorNum); \ DISPLAYLEVEL(1, __VA_ARGS__); \ DISPLAYLEVEL(1, " \n"); \ - r.error = errorNum; \ + r.tag = errorNum; \ return r; \ } /* error without displaying */ -#define EXM_THROW_ND(errorNum, retType, ...) { \ +#define RETURN_QUIET_ERROR(errorNum, retType, ...) { \ retType r; \ memset(&r, 0, sizeof(retType)); \ DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ DEBUGOUTPUT("Error %i : ", errorNum); \ DEBUGOUTPUT(__VA_ARGS__); \ DEBUGOUTPUT(" \n"); \ - r.error = errorNum; \ + r.tag = errorNum; \ return r; \ } @@ -125,9 +128,8 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; ***************************************/ BMK_advancedParams_t BMK_initAdvancedParams(void) { - BMK_advancedParams_t res = { + BMK_advancedParams_t const res = { BMK_both, /* mode */ - BMK_timeMode, /* loopMode */ BMK_TIMETEST_DEFAULT_S, /* nbSeconds */ 0, /* blockSize */ 0, /* nbWorkers */ @@ -156,13 +158,6 @@ typedef struct { size_t resSize; } blockParam_t; -struct BMK_timeState_t{ - unsigned nbLoops; - U64 timeRemaining; - UTIL_time_t coolTime; - U64 fastestTime; -}; - #undef MIN #undef MAX #define MIN(a,b) ((a) < (b) ? (a) : (b)) @@ -194,15 +189,15 @@ static void BMK_initCCtx(ZSTD_CCtx* ctx, ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize); } - static void BMK_initDCtx(ZSTD_DCtx* dctx, const void* dictBuffer, size_t dictBufferSize) { ZSTD_DCtx_reset(dctx); ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictBufferSize); } + typedef struct { - ZSTD_CCtx* ctx; + ZSTD_CCtx* cctx; const void* dictBuffer; size_t dictBufferSize; int cLevel; @@ -212,7 +207,7 @@ typedef struct { static size_t local_initCCtx(void* payload) { BMK_initCCtxArgs* ag = (BMK_initCCtxArgs*)payload; - BMK_initCCtx(ag->ctx, ag->dictBuffer, ag->dictBufferSize, ag->cLevel, ag->comprParams, ag->adv); + BMK_initCCtx(ag->cctx, ag->dictBuffer, ag->dictBufferSize, ag->cLevel, ag->comprParams, ag->adv); return 0; } @@ -228,26 +223,24 @@ static size_t local_initDCtx(void* payload) { return 0; } -/* additional argument is just the context */ + +/* `addArgs` is the context */ static size_t local_defaultCompress( - const void* srcBuffer, size_t srcSize, - void* dstBuffer, size_t dstSize, - void* addArgs) { + const void* srcBuffer, size_t srcSize, + void* dstBuffer, size_t dstSize, + void* addArgs) +{ size_t moreToFlush = 1; - ZSTD_CCtx* ctx = (ZSTD_CCtx*)addArgs; + ZSTD_CCtx* const cctx = (ZSTD_CCtx*)addArgs; ZSTD_inBuffer in; ZSTD_outBuffer out; - in.src = srcBuffer; - in.size = srcSize; - in.pos = 0; - out.dst = dstBuffer; - out.size = dstSize; - out.pos = 0; + in.src = srcBuffer; in.size = srcSize; in.pos = 0; + out.dst = dstBuffer; out.size = dstSize; out.pos = 0; while (moreToFlush) { if(out.pos == out.size) { return (size_t)-ZSTD_error_dstSize_tooSmall; } - moreToFlush = ZSTD_compress_generic(ctx, &out, &in, ZSTD_e_end); + moreToFlush = ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_end); if (ZSTD_isError(moreToFlush)) { return moreToFlush; } @@ -255,27 +248,23 @@ static size_t local_defaultCompress( return out.pos; } -/* additional argument is just the context */ +/* `addArgs` is the context */ static size_t local_defaultDecompress( - const void* srcBuffer, size_t srcSize, - void* dstBuffer, size_t dstSize, - void* addArgs) { + const void* srcBuffer, size_t srcSize, + void* dstBuffer, size_t dstSize, + void* addArgs) +{ size_t moreToFlush = 1; - ZSTD_DCtx* dctx = (ZSTD_DCtx*)addArgs; + ZSTD_DCtx* const dctx = (ZSTD_DCtx*)addArgs; ZSTD_inBuffer in; ZSTD_outBuffer out; - in.src = srcBuffer; - in.size = srcSize; - in.pos = 0; - out.dst = dstBuffer; - out.size = dstSize; - out.pos = 0; + in.src = srcBuffer; in.size = srcSize; in.pos = 0; + out.dst = dstBuffer; out.size = dstSize; out.pos = 0; while (moreToFlush) { if(out.pos == out.size) { return (size_t)-ZSTD_error_dstSize_tooSmall; } - moreToFlush = ZSTD_decompress_generic(dctx, - &out, &in); + moreToFlush = ZSTD_decompress_generic(dctx, &out, &in); if (ZSTD_isError(moreToFlush)) { return moreToFlush; } @@ -284,30 +273,56 @@ static size_t local_defaultDecompress( } -/* initFn will be measured once, bench fn will be measured x times */ -/* benchFn should return error value or out Size */ + +/*=== Benchmarking an arbitrary function ===*/ + +int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome) +{ + return outcome.tag == 0; +} + +/* warning : this function will stop program execution if outcome is invalid ! + * check outcome validity first, using BMK_isValid_runResult() */ +BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome) +{ + assert(outcome.tag == 0); + return outcome.internal_never_use_directly; +} + +static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime) +{ + BMK_runOutcome_t outcome; + outcome.tag = 0; + outcome.internal_never_use_directly = runTime; + return outcome; +} + + +/* initFn will be measured once, benchFn will be measured `nbLoops` times */ +/* initFn is optional, provide NULL if none */ +/* benchFn must return size_t field compliant with ZSTD_isError for error valuee */ /* takes # of blocks and list of size & stuff for each. */ -/* only does looping */ -/* note time per loop could be zero if interval too short */ -BMK_customReturn_t BMK_benchFunction( - BMK_benchFn_t benchFn, void* benchPayload, - BMK_initFn_t initFn, void* initPayload, - size_t blockCount, - const void* const * const srcBlockBuffers, const size_t* srcBlockSizes, - void* const * const dstBlockBuffers, const size_t* dstBlockCapacities, size_t* blockResult, - unsigned nbLoops) { +/* can report result of benchFn for each block into blockResult. */ +/* blockResult is optional, provide NULL if this information is not required */ +/* note : time per loop could be zero if run time < timer resolution */ +BMK_runOutcome_t BMK_benchFunction( + BMK_benchFn_t benchFn, void* benchPayload, + BMK_initFn_t initFn, void* initPayload, + size_t blockCount, + const void* const * srcBlockBuffers, const size_t* srcBlockSizes, + void* const * dstBlockBuffers, const size_t* dstBlockCapacities, + size_t* blockResult, + unsigned nbLoops) +{ size_t dstSize = 0; U64 totalTime; - BMK_customReturn_t retval; - UTIL_time_t clockStart; - if(!nbLoops) { - EXM_THROW_ND(1, BMK_customReturn_t, "nbLoops must be nonzero \n"); + RETURN_QUIET_ERROR(1, BMK_runOutcome_t, "nbLoops must be nonzero "); } - { - size_t i; + /* init */ + { size_t i; for(i = 0; i < blockCount; i++) { memset(dstBlockBuffers[i], 0xE5, dstBlockCapacities[i]); /* warm up and erase result buffer */ } @@ -320,157 +335,247 @@ BMK_customReturn_t BMK_benchFunction( #endif } - { - unsigned i, j; - clockStart = UTIL_getTime(); - if(initFn != NULL) { initFn(initPayload); } - for(i = 0; i < nbLoops; i++) { - for(j = 0; j < blockCount; j++) { - size_t res = benchFn(srcBlockBuffers[j], srcBlockSizes[j], dstBlockBuffers[j], dstBlockCapacities[j], benchPayload); + /* benchmark loop */ + { UTIL_time_t const clockStart = UTIL_getTime(); + unsigned loopNb, blockNb; + if (initFn != NULL) initFn(initPayload); + for (loopNb = 0; loopNb < nbLoops; loopNb++) { + for (blockNb = 0; blockNb < blockCount; blockNb++) { + size_t const res = benchFn(srcBlockBuffers[blockNb], srcBlockSizes[blockNb], + dstBlockBuffers[blockNb], dstBlockCapacities[blockNb], + benchPayload); if(ZSTD_isError(res)) { - EXM_THROW_ND(2, BMK_customReturn_t, "Function benchmarking failed on block %u of size %u : %s \n", - j, (U32)dstBlockCapacities[j], ZSTD_getErrorName(res)); - } else if(i == nbLoops - 1) { + RETURN_QUIET_ERROR(2, BMK_runOutcome_t, + "Function benchmark failed on block %u of size %u : %s", + blockNb, (U32)dstBlockCapacities[blockNb], ZSTD_getErrorName(res)); + } else if (loopNb == 0) { dstSize += res; - if(blockResult != NULL) { - blockResult[j] = res; - } - } - } - } + if (blockResult != NULL) blockResult[blockNb] = res; + dstSize += res; + } } + } /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */ totalTime = UTIL_clockSpanNano(clockStart); } - retval.error = 0; - retval.result.nanoSecPerRun = totalTime / nbLoops; - retval.result.sumOfReturn = dstSize; - return retval; + { BMK_runTime_t rt; + rt.nanoSecPerRun = totalTime / nbLoops; + rt.sumOfReturn = dstSize; + return BMK_setValid_runTime(rt); + } } -#define MINUSABLETIME 500000000ULL /* 0.5 seconds in ns */ -void BMK_resetTimedFnState(BMK_timedFnState_t* r, unsigned nbSeconds) { - r->nbLoops = 1; - r->timeRemaining = (U64)nbSeconds * TIMELOOP_NANOSEC; - r->coolTime = UTIL_getTime(); - r->fastestTime = (U64)(-1LL); -} +/* ==== Benchmarking any function, providing intermediate results ==== */ + +struct BMK_timedFnState_s { + U64 timeSpent_ns; + U64 timeBudget_ns; + BMK_runTime_t fastestRun; + unsigned nbLoops; + UTIL_time_t coolTime; +}; /* typedef'd to BMK_timedFnState_t within bench.h */ BMK_timedFnState_t* BMK_createTimedFnState(unsigned nbSeconds) { - BMK_timedFnState_t* r = (BMK_timedFnState_t*)malloc(sizeof(struct BMK_timeState_t)); - if(r == NULL) { - return r; - } + BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r)); + if (r == NULL) return NULL; /* malloc() error */ BMK_resetTimedFnState(r, nbSeconds); return r; } +void BMK_resetTimedFnState(BMK_timedFnState_t* r, unsigned nbSeconds) { + r->timeSpent_ns = 0; + r->timeBudget_ns = (U64)nbSeconds * TIMELOOP_NANOSEC; + r->fastestRun.nanoSecPerRun = (U64)(-1LL); + r->fastestRun.sumOfReturn = (size_t)(-1LL); + r->nbLoops = 1; + r->coolTime = UTIL_getTime(); +} + void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); } -/* make option for dstBlocks to be */ -BMK_customTimedReturn_t BMK_benchFunctionTimed( - BMK_timedFnState_t* cont, - BMK_benchFn_t benchFn, void* benchPayload, - BMK_initFn_t initFn, void* initPayload, - size_t blockCount, - const void* const* const srcBlockBuffers, const size_t* srcBlockSizes, - void * const * const dstBlockBuffers, const size_t * dstBlockCapacities, size_t* blockResults) -{ - U64 fastest = cont->fastestTime; - int completed = 0; - BMK_customTimedReturn_t r; - r.completed = 0; - while(!r.completed && !completed) - { +/* check first if the return structure represents an error or a valid result */ +int BMK_isSuccessful_timedFnOutcome(BMK_timedFnOutcome_t outcome) +{ + return (outcome.tag < 2); +} + +/* extract intermediate results from variant type. + * note : this function will abort() program execution if result is not valid. + * check result validity first, by using BMK_isSuccessful_timedFnOutcome() */ +BMK_runTime_t BMK_extract_timedFnResult(BMK_timedFnOutcome_t outcome) +{ + assert(outcome.tag < 2); + return outcome.internal_never_use_directly; +} + +/* Tells if nb of seconds set in timedFnState for all runs is spent. + * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */ +int BMK_isCompleted_timedFnOutcome(BMK_timedFnOutcome_t outcome) +{ + return (outcome.tag >= 1); +} + + +#define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */ + +BMK_timedFnOutcome_t BMK_benchFunctionTimed( + BMK_timedFnState_t* cont, + BMK_benchFn_t benchFn, void* benchPayload, + BMK_initFn_t initFn, void* initPayload, + size_t blockCount, + const void* const* srcBlockBuffers, const size_t* srcBlockSizes, + void * const * dstBlockBuffers, const size_t * dstBlockCapacities, + size_t* blockResults) +{ + int completed = 0; + BMK_timedFnOutcome_t r; + BMK_runTime_t bestRunTime = cont->fastestRun; + + r.tag = 2; /* error by default */ + + while (!completed) { + BMK_runOutcome_t runResult; + /* Overheat protection */ if (UTIL_clockSpanMicro(cont->coolTime) > ACTIVEPERIOD_MICROSEC) { DEBUGOUTPUT("\rcooling down ... \r"); UTIL_sleep(COOLPERIOD_SEC); cont->coolTime = UTIL_getTime(); } + /* reinitialize capacity */ - r.result = BMK_benchFunction(benchFn, benchPayload, initFn, initPayload, - blockCount, srcBlockBuffers, srcBlockSizes, dstBlockBuffers, dstBlockCapacities, blockResults, cont->nbLoops); - if(r.result.error) { /* completed w/ error */ - r.completed = 1; + runResult = BMK_benchFunction(benchFn, benchPayload, + initFn, initPayload, + blockCount, + srcBlockBuffers, srcBlockSizes, + dstBlockBuffers, dstBlockCapacities, + blockResults, + cont->nbLoops); + + if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */ + r.tag = 2; return r; } - { U64 const loopDuration = r.result.result.nanoSecPerRun * cont->nbLoops; - r.completed = (cont->timeRemaining <= loopDuration); - cont->timeRemaining -= loopDuration; - if (loopDuration > (TIMELOOP_NANOSEC / 100)) { - fastest = MIN(fastest, r.result.result.nanoSecPerRun); - if(loopDuration >= MINUSABLETIME) { - r.result.result.nanoSecPerRun = fastest; - cont->fastestTime = fastest; - } - cont->nbLoops = (U32)(TIMELOOP_NANOSEC / r.result.result.nanoSecPerRun) + 1; + { BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult); + U64 const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops; + + cont->timeSpent_ns += loopDuration_ns; + + /* estimate nbLoops for next run to last approximately 1 second */ + if (loopDuration_ns > (TIMELOOP_NANOSEC / 50)) { + U64 const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun); + cont->nbLoops = (U32)(TIMELOOP_NANOSEC / fastestRun_ns) + 1; } else { - const unsigned multiplier = 2; + /* previous run was too short : blindly increase workload by x multiplier */ + const unsigned multiplier = 10; assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */ cont->nbLoops *= multiplier; } - if(loopDuration < MINUSABLETIME) { /* don't report results which have time too low */ - continue; - } + if(loopDuration_ns < MINUSABLETIME) { + /* don't report results for which benchmark run time was too small : increased risks of rounding errors */ + assert(completed == 0); + continue; + } else { + if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) { + bestRunTime = newRunTime; + } + completed = 1; + } } - completed = 1; - } + } /* while (!completed) */ + + r.tag = (cont->timeSpent_ns >= cont->timeBudget_ns); /* report if time budget is spent */ + r.internal_never_use_directly = bestRunTime; return r; } -/* benchMem with no allocation */ -static BMK_return_t BMK_benchMemAdvancedNoAlloc( - const void ** const srcPtrs, size_t* const srcSizes, - void** const cPtrs, size_t* const cCapacities, size_t* const cSizes, - void** const resPtrs, size_t* const resSizes, - void** resultBufferPtr, void* compressedBuffer, - const size_t maxCompressedSize, - BMK_timedFnState_t* timeStateCompress, BMK_timedFnState_t* timeStateDecompress, - const void* srcBuffer, size_t srcSize, - const size_t* fileSizes, unsigned nbFiles, - const int cLevel, const ZSTD_compressionParameters* comprParams, - const void* dictBuffer, size_t dictBufferSize, - ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, - int displayLevel, const char* displayName, const BMK_advancedParams_t* adv) +/* ================================================================= */ +/* Benchmark Zstandard, mem-to-mem scenarios */ +/* ================================================================= */ + +int BMK_isSuccessful_benchOutcome(BMK_benchOutcome_t outcome) +{ + return outcome.tag == 0; +} + +BMK_benchResult_t BMK_extract_benchResult(BMK_benchOutcome_t outcome) +{ + assert(outcome.tag == 0); + return outcome.internal_never_use_directly; +} + +static BMK_benchOutcome_t BMK_benchOutcome_error() +{ + BMK_benchOutcome_t b; + memset(&b, 0, sizeof(b)); + b.tag = 1; + return b; +} + +static BMK_benchOutcome_t BMK_benchOutcome_setValidResult(BMK_benchResult_t result) +{ + BMK_benchOutcome_t b; + b.tag = 0; + b.internal_never_use_directly = result; + return b; +} + + +/* benchMem with no allocation */ +static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc( + const void** srcPtrs, size_t* srcSizes, + void** cPtrs, size_t* cCapacities, size_t* cSizes, + void** resPtrs, size_t* resSizes, + void** resultBufferPtr, void* compressedBuffer, + size_t maxCompressedSize, + BMK_timedFnState_t* timeStateCompress, + BMK_timedFnState_t* timeStateDecompress, + + const void* srcBuffer, size_t srcSize, + const size_t* fileSizes, unsigned nbFiles, + const int cLevel, const ZSTD_compressionParameters* comprParams, + const void* dictBuffer, size_t dictBufferSize, + ZSTD_CCtx* cctx, ZSTD_DCtx* dctx, + int displayLevel, const char* displayName, + const BMK_advancedParams_t* adv) { size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize); /* avoid div by 0 */ - BMK_return_t results = { { 0, 0, 0, 0 }, 0 } ; + BMK_benchResult_t benchResult; size_t const loadedCompressedSize = srcSize; size_t cSize = 0; double ratio = 0.; U32 nbBlocks; - if(!ctx || !dctx) - EXM_THROW(31, BMK_return_t, "error: passed in null context"); + assert(cctx != NULL); assert(dctx != NULL); /* init */ - if (strlen(displayName)>17) displayName += strlen(displayName)-17; /* display last 17 characters */ + if (strlen(displayName)>17) displayName += strlen(displayName) - 17; /* display last 17 characters */ if (adv->mode == BMK_decodeOnly) { /* benchmark only decompression : source must be already compressed */ const char* srcPtr = (const char*)srcBuffer; U64 totalDSize64 = 0; U32 fileNb; for (fileNb=0; fileNb decodedSize) { + if (totalDSize64 > decodedSize) { /* size_t overflow */ free(*resultBufferPtr); - EXM_THROW(32, BMK_return_t, "original size is too large"); /* size_t overflow */ + RETURN_ERROR(32, BMK_benchOutcome_t, "original size is too large"); } cSize = srcSize; srcSize = decodedSize; @@ -489,11 +594,11 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( U32 const blockEnd = nbBlocks + nbBlocksforThisFile; for ( ; nbBlocksmode == BMK_decodeOnly) ? thisBlockSize : ZSTD_compressBound(thisBlockSize); - resPtrs[nbBlocks] = (void*)resPtr; + resPtrs[nbBlocks] = resPtr; resSizes[nbBlocks] = (adv->mode == BMK_decodeOnly) ? (size_t) ZSTD_findDecompressedSize(srcPtr, thisBlockSize) : thisBlockSize; srcPtr += thisBlockSize; cPtr += cCapacities[nbBlocks]; @@ -503,7 +608,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( } } - /* warmimg up memory */ + /* warmimg up `compressedBuffer` */ if (adv->mode == BMK_decodeOnly) { memcpy(compressedBuffer, srcBuffer, loadedCompressedSize); } else { @@ -511,151 +616,99 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( } /* Bench */ - { - U64 const crcOrig = (adv->mode == BMK_decodeOnly) ? 0 : XXH64(srcBuffer, srcSize, 0); + { U64 const crcOrig = (adv->mode == BMK_decodeOnly) ? 0 : XXH64(srcBuffer, srcSize, 0); # define NB_MARKS 4 - const char* const marks[NB_MARKS] = { " |", " /", " =", "\\" }; + const char* marks[NB_MARKS] = { " |", " /", " =", " \\" }; U32 markNb = 0; - DISPLAYLEVEL(2, "\r%79s\r", ""); + int compressionCompleted = (adv->mode == BMK_decodeOnly); + int decompressionCompleted = (adv->mode == BMK_compressOnly); + BMK_initCCtxArgs cctxprep; + BMK_initDCtxArgs dctxprep; + cctxprep.cctx = cctx; + cctxprep.dictBuffer = dictBuffer; + cctxprep.dictBufferSize = dictBufferSize; + cctxprep.cLevel = cLevel; + cctxprep.comprParams = comprParams; + cctxprep.adv = adv; + dctxprep.dctx = dctx; + dctxprep.dictBuffer = dictBuffer; + dctxprep.dictBufferSize = dictBufferSize; + DISPLAYLEVEL(2, "\r%70s\r", ""); /* blank line */ DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize); - { - BMK_initCCtxArgs cctxprep; - BMK_initDCtxArgs dctxprep; - cctxprep.ctx = ctx; - cctxprep.dictBuffer = dictBuffer; - cctxprep.dictBufferSize = dictBufferSize; - cctxprep.cLevel = cLevel; - cctxprep.comprParams = comprParams; - cctxprep.adv = adv; - dctxprep.dctx = dctx; - dctxprep.dictBuffer = dictBuffer; - dctxprep.dictBufferSize = dictBufferSize; - if(adv->loopMode == BMK_timeMode) { - BMK_customTimedReturn_t intermediateResultCompress; - BMK_customTimedReturn_t intermediateResultDecompress; - if(adv->mode == BMK_compressOnly) { - intermediateResultCompress.completed = 0; - intermediateResultDecompress.completed = 1; - } else if (adv->mode == BMK_decodeOnly) { - intermediateResultCompress.completed = 1; - intermediateResultDecompress.completed = 0; - } else { /* both */ - intermediateResultCompress.completed = 0; - intermediateResultDecompress.completed = 0; - } - while(!(intermediateResultCompress.completed && intermediateResultDecompress.completed)) { - if(!intermediateResultCompress.completed) { - intermediateResultCompress = BMK_benchFunctionTimed(timeStateCompress, &local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep, - nbBlocks, srcPtrs, srcSizes, cPtrs, cCapacities, cSizes); - if(intermediateResultCompress.result.error) { - results.error = intermediateResultCompress.result.error; - return results; - } - ratio = (double)(srcSize / intermediateResultCompress.result.result.sumOfReturn); - { - int const ratioAccuracy = (ratio < 10.) ? 3 : 2; - results.result.cSpeed = (srcSize * TIMELOOP_NANOSEC / intermediateResultCompress.result.result.nanoSecPerRun); - cSize = intermediateResultCompress.result.result.sumOfReturn; - results.result.cSize = cSize; - ratio = (double)srcSize / results.result.cSize; - markNb = (markNb+1) % NB_MARKS; - DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r", - marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, - ratioAccuracy, ratio, - results.result.cSpeed < (10 MB) ? 2 : 1, (double)results.result.cSpeed / (1 MB)); - } - } - if(!intermediateResultDecompress.completed) { - intermediateResultDecompress = BMK_benchFunctionTimed(timeStateDecompress, &local_defaultDecompress, (void*)(dctx), &local_initDCtx, (void*)&dctxprep, - nbBlocks, (const void* const*)cPtrs, cSizes, resPtrs, resSizes, NULL); - if(intermediateResultDecompress.result.error) { - results.error = intermediateResultDecompress.result.error; - return results; - } + while (!(compressionCompleted && decompressionCompleted)) { - { - int const ratioAccuracy = (ratio < 10.) ? 3 : 2; - results.result.dSpeed = (srcSize * TIMELOOP_NANOSEC/ intermediateResultDecompress.result.result.nanoSecPerRun); - markNb = (markNb+1) % NB_MARKS; - DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r", - marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, - ratioAccuracy, ratio, - results.result.cSpeed < (10 MB) ? 2 : 1, (double)results.result.cSpeed / (1 MB), - (double)results.result.dSpeed / (1 MB)); - } - } + if (!compressionCompleted) { + BMK_runTime_t cResult; + + BMK_timedFnOutcome_t const cOutcome = + BMK_benchFunctionTimed(timeStateCompress, + &local_defaultCompress, (void*)cctx, + &local_initCCtx, (void*)&cctxprep, + nbBlocks, + srcPtrs, srcSizes, + cPtrs, cCapacities, + cSizes); + + if (!BMK_isSuccessful_timedFnOutcome(cOutcome)) { + return BMK_benchOutcome_error(); } - } else { //iterMode; - if(adv->mode != BMK_decodeOnly) { + cResult = BMK_extract_timedFnResult(cOutcome); + ratio = (double)(srcSize / cResult.sumOfReturn); - BMK_customReturn_t compressionResults = BMK_benchFunction(&local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep, - nbBlocks, srcPtrs, srcSizes, cPtrs, cCapacities, cSizes, adv->nbSeconds); - if(compressionResults.error) { - results.error = compressionResults.error; - return results; - } - - if(compressionResults.result.nanoSecPerRun == 0) { - results.result.cSpeed = 0; - } else { - results.result.cSpeed = srcSize * TIMELOOP_NANOSEC / compressionResults.result.nanoSecPerRun; - } - - results.result.cSize = compressionResults.result.sumOfReturn; - { - int const ratioAccuracy = (ratio < 10.) ? 3 : 2; - cSize = compressionResults.result.sumOfReturn; - results.result.cSize = cSize; - ratio = (double)srcSize / results.result.cSize; - markNb = (markNb+1) % NB_MARKS; - DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r", - marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, - ratioAccuracy, ratio, - results.result.cSpeed < (10 MB) ? 2 : 1, (double)results.result.cSpeed / (1 MB)); - } + { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; + cSize = cResult.sumOfReturn; + benchResult.cSpeed = (srcSize * TIMELOOP_NANOSEC / cResult.nanoSecPerRun); + benchResult.cSize = cSize; + ratio = (double)srcSize / cSize; + markNb = (markNb+1) % NB_MARKS; + DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r", + marks[markNb], displayName, (U32)srcSize, (U32)cSize, + ratioAccuracy, ratio, + benchResult.cSpeed < (10 MB) ? 2 : 1, (double)benchResult.cSpeed / (1 MB)); } - if(adv->mode != BMK_compressOnly) { - BMK_customReturn_t decompressionResults = BMK_benchFunction( - &local_defaultDecompress, (void*)(dctx), - &local_initDCtx, (void*)&dctxprep, nbBlocks, - (const void* const*)cPtrs, cSizes, resPtrs, resSizes, NULL, - adv->nbSeconds); - if(decompressionResults.error) { - results.error = decompressionResults.error; - return results; - } + } - if(decompressionResults.result.nanoSecPerRun == 0) { - results.result.dSpeed = 0; - } else { - results.result.dSpeed = srcSize * TIMELOOP_NANOSEC / decompressionResults.result.nanoSecPerRun; - } + if(!decompressionCompleted) { + BMK_runTime_t dResult; - { - int const ratioAccuracy = (ratio < 10.) ? 3 : 2; - markNb = (markNb+1) % NB_MARKS; - DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r", - marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, - ratioAccuracy, ratio, - results.result.cSpeed < (10 MB) ? 2 : 1, (double)results.result.cSpeed / (1 MB), - (double)results.result.dSpeed / (1 MB)); - } + BMK_timedFnOutcome_t const dOutcome = + BMK_benchFunctionTimed(timeStateDecompress, + &local_defaultDecompress, (void*)(dctx), + &local_initDCtx, (void*)&dctxprep, + nbBlocks, + (const void* const*)cPtrs, cSizes, + resPtrs, resSizes, + NULL); + + if(!BMK_isSuccessful_timedFnOutcome(dOutcome)) { + return BMK_benchOutcome_error(); + } + + dResult = BMK_extract_timedFnResult(dOutcome); + + { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; + benchResult.dSpeed = (srcSize * TIMELOOP_NANOSEC / dResult.nanoSecPerRun); + markNb = (markNb+1) % NB_MARKS; + DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r", + marks[markNb], displayName, (U32)srcSize, (U32)benchResult.cSize, + ratioAccuracy, ratio, + benchResult.cSpeed < (10 MB) ? 2 : 1, (double)benchResult.cSpeed / (1 MB), + (double)benchResult.dSpeed / (1 MB)); } } } /* CRC Checking */ - { void* resultBuffer = *resultBufferPtr; + { const BYTE* resultBuffer = (const BYTE*)(*resultBufferPtr); U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); - /* adv->mode == 0 -> compress + decompress */ if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) { size_t u; DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck); for (u=0; u