diff --git a/build/VS2010/fullbench-dll/fullbench-dll.vcxproj b/build/VS2010/fullbench-dll/fullbench-dll.vcxproj index fae4e5c19..29762860f 100644 --- a/build/VS2010/fullbench-dll/fullbench-dll.vcxproj +++ b/build/VS2010/fullbench-dll/fullbench-dll.vcxproj @@ -168,13 +168,13 @@ - + - + diff --git a/build/VS2010/fullbench/fullbench.vcxproj b/build/VS2010/fullbench/fullbench.vcxproj index 24f62c9f7..a3a884a7b 100644 --- a/build/VS2010/fullbench/fullbench.vcxproj +++ b/build/VS2010/fullbench/fullbench.vcxproj @@ -179,7 +179,7 @@ - + @@ -202,7 +202,7 @@ - + diff --git a/build/VS2010/zstd/zstd.vcxproj b/build/VS2010/zstd/zstd.vcxproj index 554ae7958..6681e581d 100644 --- a/build/VS2010/zstd/zstd.vcxproj +++ b/build/VS2010/zstd/zstd.vcxproj @@ -53,7 +53,8 @@ - + + @@ -87,7 +88,7 @@ - + diff --git a/build/cmake/programs/CMakeLists.txt b/build/cmake/programs/CMakeLists.txt index e0a32047f..a54d49d9e 100644 --- a/build/cmake/programs/CMakeLists.txt +++ b/build/cmake/programs/CMakeLists.txt @@ -26,7 +26,7 @@ IF (MSVC) SET(PlatformDependResources ${MSVC_RESOURCE_DIR}/zstd.rc) ENDIF (MSVC) -ADD_EXECUTABLE(zstd ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/fileio.c ${PROGRAMS_DIR}/bench.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/dibio.c ${PlatformDependResources}) +ADD_EXECUTABLE(zstd ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/fileio.c ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/dibio.c ${PlatformDependResources}) TARGET_LINK_LIBRARIES(zstd libzstd_static) IF (CMAKE_SYSTEM_NAME MATCHES "(Solaris|SunOS)") TARGET_LINK_LIBRARIES(zstd rt) diff --git a/build/cmake/tests/CMakeLists.txt b/build/cmake/tests/CMakeLists.txt index 4c1989960..d5e482775 100644 --- a/build/cmake/tests/CMakeLists.txt +++ b/build/cmake/tests/CMakeLists.txt @@ -43,13 +43,13 @@ INCLUDE_DIRECTORIES(${TESTS_DIR} ${PROGRAMS_DIR} ${LIBRARY_DIR} ${LIBRARY_DIR}/c ADD_EXECUTABLE(datagen ${PROGRAMS_DIR}/datagen.c ${TESTS_DIR}/datagencli.c) TARGET_LINK_LIBRARIES(datagen libzstd_static) -ADD_EXECUTABLE(fullbench ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/bench.c ${TESTS_DIR}/fullbench.c) +ADD_EXECUTABLE(fullbench ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${TESTS_DIR}/fullbench.c) TARGET_LINK_LIBRARIES(fullbench libzstd_static) ADD_EXECUTABLE(fuzzer ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${TESTS_DIR}/fuzzer.c) TARGET_LINK_LIBRARIES(fuzzer libzstd_static) IF (UNIX) - ADD_EXECUTABLE(paramgrill ${PROGRAMS_DIR}/bench.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${TESTS_DIR}/paramgrill.c) + ADD_EXECUTABLE(paramgrill ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${TESTS_DIR}/paramgrill.c) TARGET_LINK_LIBRARIES(paramgrill libzstd_static m) #m is math library ENDIF (UNIX) diff --git a/contrib/largeNbDicts/Makefile b/contrib/largeNbDicts/Makefile index 730250f96..541f3969b 100644 --- a/contrib/largeNbDicts/Makefile +++ b/contrib/largeNbDicts/Makefile @@ -28,14 +28,14 @@ default: largeNbDicts all : largeNbDicts -largeNbDicts: util.o bench.o datagen.o xxhash.o largeNbDicts.c $(LIBZSTD) +largeNbDicts: util.o benchfn.o datagen.o xxhash.o largeNbDicts.c $(LIBZSTD) $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ .PHONY: $(LIBZSTD) $(LIBZSTD): $(MAKE) -C $(LIBDIR) libzstd.a CFLAGS="$(CFLAGS)" -bench.o : $(PROGDIR)/bench.c +benchfn.o : $(PROGDIR)/benchfn.c $(CC) $(CPPFLAGS) $(CFLAGS) $^ -c datagen.o: $(PROGDIR)/datagen.c diff --git a/contrib/largeNbDicts/largeNbDicts.c b/contrib/largeNbDicts/largeNbDicts.c index d7639fc40..5a890cd90 100644 --- a/contrib/largeNbDicts/largeNbDicts.c +++ b/contrib/largeNbDicts/largeNbDicts.c @@ -24,7 +24,7 @@ #include /* assert */ #include "util.h" -#include "bench.h" +#include "benchfn.h" #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" #include "zdict.h" @@ -538,15 +538,22 @@ static int benchMem(slice_collection_t dstBlocks, BMK_timedFnState_t* const benchState = BMK_createTimedFnState(total_time_ms, ms_per_round); decompressInstructions di = createDecompressInstructions(dictionaries); + BMK_benchParams_t const bp = { + .benchFn = decompress, + .benchPayload = &di, + .initFn = NULL, + .initPayload = NULL, + .errorFn = ZSTD_isError, + .blockCount = dstBlocks.nbSlices, + .srcBuffers = (const void* const*) srcBlocks.slicePtrs, + .srcSizes = srcBlocks.capacities, + .dstBuffers = dstBlocks.slicePtrs, + .dstCapacities = dstBlocks.capacities, + .blockResults = NULL + }; for (;;) { - BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState, - decompress, &di, - NULL, NULL, - dstBlocks.nbSlices, - (const void* const *)srcBlocks.slicePtrs, srcBlocks.capacities, - dstBlocks.slicePtrs, dstBlocks.capacities, - NULL); + BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState, bp); CONTROL(BMK_isSuccessful_runOutcome(outcome)); BMK_runTime_t const result = BMK_extract_runTime(outcome); diff --git a/programs/Makefile b/programs/Makefile index 0bfb8b9a6..77c1d6a2d 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -160,7 +160,7 @@ $(ZSTDDECOMP_O): CFLAGS += $(ALIGN_LOOP) zstd : CPPFLAGS += $(THREAD_CPP) $(ZLIBCPP) $(LZMACPP) $(LZ4CPP) zstd : LDFLAGS += $(THREAD_LD) $(ZLIBLD) $(LZMALD) $(LZ4LD) $(DEBUGFLAGS_LD) zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) -zstd : $(ZSTDLIB_FILES) zstdcli.o util.o fileio.o bench.o datagen.o dibio.o +zstd : $(ZSTDLIB_FILES) zstdcli.o util.o fileio.o benchfn.o benchzstd.o datagen.o dibio.o @echo "$(THREAD_MSG)" @echo "$(ZLIB_MSG)" @echo "$(LZMA_MSG)" @@ -178,13 +178,13 @@ zstd-release: zstd zstd32 : CPPFLAGS += $(THREAD_CPP) zstd32 : LDFLAGS += $(THREAD_LD) zstd32 : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) -zstd32 : $(ZSTDLIB_FILES) zstdcli.c util.c fileio.c bench.c datagen.c dibio.c +zstd32 : $(ZSTDLIB_FILES) zstdcli.c util.c fileio.c benchfn.c benchzstd.c datagen.c dibio.c ifneq (,$(filter Windows%,$(OS))) windres/generate_res.bat endif $(CC) -m32 $(FLAGS) $^ $(RES32_FILE) -o $@$(EXT) -zstd-nolegacy : $(ZSTD_FILES) $(ZDICT_FILES) zstdcli.o util.o fileio.c bench.o datagen.o dibio.o +zstd-nolegacy : $(ZSTD_FILES) $(ZDICT_FILES) zstdcli.o util.o fileio.c benchfn.o benchzstd.o datagen.o dibio.o $(CC) $(FLAGS) $^ -o $@$(EXT) $(LDFLAGS) zstd-nomt : THREAD_CPP := diff --git a/programs/benchfn.c b/programs/benchfn.c new file mode 100644 index 000000000..4a9573e2e --- /dev/null +++ b/programs/benchfn.c @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +/* ************************************* +* Includes +***************************************/ +#include "platform.h" /* Large Files support */ +#include "util.h" /* UTIL_getFileSize, UTIL_sleep */ +#include /* malloc, free */ +#include /* memset */ +#include /* fprintf, fopen */ +#undef NDEBUG /* assert must not be disabled */ +#include /* assert */ + +#include "mem.h" +#include "benchfn.h" + + +/* ************************************* +* Constants +***************************************/ +#define TIMELOOP_MICROSEC (1*1000000ULL) /* 1 second */ +#define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */ +#define ACTIVEPERIOD_MICROSEC (70*TIMELOOP_MICROSEC) /* 70 seconds */ +#define COOLPERIOD_SEC 10 + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + + +/* ************************************* +* Errors +***************************************/ +#ifndef DEBUG +# define DEBUG 0 +#endif + +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); } + +/* error without displaying */ +#define RETURN_QUIET_ERROR(retValue, ...) { \ + DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ + DEBUGOUTPUT("Error : "); \ + DEBUGOUTPUT(__VA_ARGS__); \ + DEBUGOUTPUT(" \n"); \ + return retValue; \ +} + + +/* ************************************* +* Benchmarking an arbitrary function +***************************************/ + +int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome) +{ + return outcome.error_tag_never_ever_use_directly == 0; +} + +/* warning : this function will stop program execution if outcome is invalid ! + * check outcome validity first, using BMK_isValid_runResult() */ +BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome) +{ + assert(outcome.error_tag_never_ever_use_directly == 0); + return outcome.internal_never_ever_use_directly; +} + +size_t BMK_extract_errorResult(BMK_runOutcome_t outcome) +{ + assert(outcome.error_tag_never_ever_use_directly != 0); + return outcome.error_result_never_ever_use_directly; +} + +static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult) +{ + BMK_runOutcome_t b; + memset(&b, 0, sizeof(b)); + b.error_tag_never_ever_use_directly = 1; + b.error_result_never_ever_use_directly = errorResult; + return b; +} + +static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime) +{ + BMK_runOutcome_t outcome; + outcome.error_tag_never_ever_use_directly = 0; + outcome.internal_never_ever_use_directly = runTime; + return outcome; +} + + +/* initFn will be measured once, benchFn will be measured `nbLoops` times */ +/* initFn is optional, provide NULL if none */ +/* benchFn must return a size_t value that errorFn can interpret */ +/* takes # of blocks and list of size & stuff for each. */ +/* can report result of benchFn for each block into blockResult. */ +/* blockResult is optional, provide NULL if this information is not required */ +/* note : time per loop can be reported as zero if run time < timer resolution */ +BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p, + unsigned nbLoops) +{ + size_t dstSize = 0; + nbLoops += !nbLoops; /* minimum nbLoops is 1 */ + + /* init */ + { size_t i; + for(i = 0; i < p.blockCount; i++) { + memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]); /* warm up and erase result buffer */ + } +#if 0 + /* based on testing these seem to lower accuracy of multiple calls of 1 nbLoops vs 1 call of multiple nbLoops + * (Makes former slower) + */ + UTIL_sleepMilli(5); /* give processor time to other processes */ + UTIL_waitForNextTick(); +#endif + } + + /* benchmark */ + { UTIL_time_t const clockStart = UTIL_getTime(); + unsigned loopNb, blockNb; + if (p.initFn != NULL) p.initFn(p.initPayload); + for (loopNb = 0; loopNb < nbLoops; loopNb++) { + for (blockNb = 0; blockNb < p.blockCount; blockNb++) { + size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb], + p.dstBuffers[blockNb], p.dstCapacities[blockNb], + p.benchPayload); + if (loopNb == 0) { + if (p.blockResults != NULL) p.blockResults[blockNb] = res; + if ((p.errorFn != NULL) && (p.errorFn(res))) { + RETURN_QUIET_ERROR(BMK_runOutcome_error(res), + "Function benchmark failed on block %u (of size %u) with error %i", + blockNb, (U32)p.srcSizes[blockNb], (int)res); + } + dstSize += res; + } } + } /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */ + + { U64 const totalTime = UTIL_clockSpanNano(clockStart); + BMK_runTime_t rt; + rt.nanoSecPerRun = totalTime / nbLoops; + rt.sumOfReturn = dstSize; + return BMK_setValid_runTime(rt); + } } +} + + +/* ==== Benchmarking any function, providing intermediate results ==== */ + +struct BMK_timedFnState_s { + U64 timeSpent_ns; + U64 timeBudget_ns; + U64 runBudget_ns; + BMK_runTime_t fastestRun; + unsigned nbLoops; + UTIL_time_t coolTime; +}; /* typedef'd to BMK_timedFnState_t within bench.h */ + +BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms) +{ + BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r)); + if (r == NULL) return NULL; /* malloc() error */ + BMK_resetTimedFnState(r, total_ms, run_ms); + return r; +} + +void BMK_freeTimedFnState(BMK_timedFnState_t* state) { + free(state); +} + +void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms) +{ + if (!total_ms) total_ms = 1 ; + if (!run_ms) run_ms = 1; + if (run_ms > total_ms) run_ms = total_ms; + timedFnState->timeSpent_ns = 0; + timedFnState->timeBudget_ns = (U64)total_ms * TIMELOOP_NANOSEC / 1000; + timedFnState->runBudget_ns = (U64)run_ms * TIMELOOP_NANOSEC / 1000; + timedFnState->fastestRun.nanoSecPerRun = (U64)(-1LL); + timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL); + timedFnState->nbLoops = 1; + timedFnState->coolTime = UTIL_getTime(); +} + +/* Tells if nb of seconds set in timedFnState for all runs is spent. + * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */ +int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState) +{ + return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns); +} + + +#undef MIN +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + +#define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */ + +BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont, + BMK_benchParams_t p) +{ + U64 const runBudget_ns = cont->runBudget_ns; + U64 const runTimeMin_ns = runBudget_ns / 2; + int completed = 0; + BMK_runTime_t bestRunTime = cont->fastestRun; + + while (!completed) { + BMK_runOutcome_t runResult; + + /* Overheat protection */ + if (UTIL_clockSpanMicro(cont->coolTime) > ACTIVEPERIOD_MICROSEC) { + DEBUGOUTPUT("\rcooling down ... \r"); + UTIL_sleep(COOLPERIOD_SEC); + cont->coolTime = UTIL_getTime(); + } + + /* reinitialize capacity */ + runResult = BMK_benchFunction(p, cont->nbLoops); + + if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */ + return runResult; + } + + { BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult); + U64 const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops; + + cont->timeSpent_ns += loopDuration_ns; + + /* estimate nbLoops for next run to last approximately 1 second */ + if (loopDuration_ns > (runBudget_ns / 50)) { + U64 const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun); + cont->nbLoops = (U32)(runBudget_ns / fastestRun_ns) + 1; + } else { + /* previous run was too short : blindly increase workload by x multiplier */ + const unsigned multiplier = 10; + assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */ + cont->nbLoops *= multiplier; + } + + if(loopDuration_ns < runTimeMin_ns) { + /* don't report results for which benchmark run time was too small : increased risks of rounding errors */ + assert(completed == 0); + continue; + } else { + if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) { + bestRunTime = newRunTime; + } + completed = 1; + } + } + } /* while (!completed) */ + + return BMK_setValid_runTime(bestRunTime); +} diff --git a/programs/benchfn.h b/programs/benchfn.h new file mode 100644 index 000000000..3ca36e362 --- /dev/null +++ b/programs/benchfn.h @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* benchfn : + * benchmark any function on a set of input + * providing result in nanoSecPerRun + * or detecting and returning an error + */ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef BENCH_FN_H_23876 +#define BENCH_FN_H_23876 + +/* === Dependencies === */ +#include /* size_t */ + + +/* ==== Benchmark any function, iterated on a set of blocks ==== */ + +/* BMK_runTime_t: valid result return type */ + +typedef struct { + unsigned long long nanoSecPerRun; /* time per iteration (over all blocks) */ + size_t sumOfReturn; /* sum of return values */ +} BMK_runTime_t; + + +/* BMK_runOutcome_t: + * type expressing the outcome of a benchmark run by BMK_benchFunction(), + * which can be either valid or invalid. + * benchmark outcome can be invalid if errorFn is provided. + * BMK_runOutcome_t must be considered "opaque" : never access its members directly. + * Instead, use its assigned methods : + * BMK_isSuccessful_runOutcome, BMK_extract_runTime, BMK_extract_errorResult. + * The structure is only described here to allow its allocation on stack. */ + +typedef struct { + BMK_runTime_t internal_never_ever_use_directly; + size_t error_result_never_ever_use_directly; + int error_tag_never_ever_use_directly; +} BMK_runOutcome_t; + + +/* prototypes for benchmarked functions */ +typedef size_t (*BMK_benchFn_t)(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* customPayload); +typedef size_t (*BMK_initFn_t)(void* initPayload); +typedef unsigned (*BMK_errorFn_t)(size_t); + + +/* BMK_benchFunction() parameters are provided through following structure. + * This is preferable for readability, + * as the number of parameters required is pretty large. + * No initializer is provided, because it doesn't make sense to provide some "default" : + * all parameters should be specified by the caller */ +typedef struct { + BMK_benchFn_t benchFn; /* the function to benchmark, over the set of blocks */ + void* benchPayload; /* pass custom parameters to benchFn : + * (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload) */ + BMK_initFn_t initFn; /* (*initFn)(initPayload) is run once per run, at the beginning. */ + void* initPayload; /* Both arguments can be NULL, in which case nothing is run. */ + BMK_errorFn_t errorFn; /* errorFn will check each return value of benchFn over each block, to determine if it failed or not. + * errorFn can be NULL, in which case no check is performed. + * errorFn must return 0 when benchFn was successful, and >= 1 if it detects an error. + * Execution is stopped as soon as an error is detected. + * the triggering return value can be retrieved using BMK_extract_errorResult(). */ + size_t blockCount; /* number of blocks to operate benchFn on. + * It's also the size of all array parameters : + * srcBuffers, srcSizes, dstBuffers, dstCapacities, blockResults */ + const void *const * srcBuffers; /* array of buffers to be operated on by benchFn */ + const size_t* srcSizes; /* array of the sizes of srcBuffers buffers */ + void *const * dstBuffers;/* array of buffers to be written into by benchFn */ + const size_t* dstCapacities; /* array of the capacities of dstBuffers buffers */ + size_t* blockResults; /* Optional: store the return value of benchFn for each block. Use NULL if this result is not requested. */ +} BMK_benchParams_t; + + +/* BMK_benchFunction() : + * This function benchmarks benchFn and initFn, providing a result. + * + * params : see description of BMK_benchParams_t above. + * nbLoops: defines number of times benchFn is run over the full set of blocks. + * Minimum value is 1. A 0 is interpreted as a 1. + * + * @return: can express either an error or a successful result. + * Use BMK_isSuccessful_runOutcome() to check if benchmark was successful. + * If yes, extract the result with BMK_extract_runTime(), + * it will contain : + * .sumOfReturn : the sum of all return values of benchFn through all of blocks + * .nanoSecPerRun : time per run of benchFn + (time for initFn / nbLoops) + * .sumOfReturn is generally intended for functions which return a # of bytes written into dstBuffer, + * in which case, this value will be the total amount of bytes written into dstBuffer. + * + * blockResults : when provided (!= NULL), and when benchmark is successful, + * params.blockResults contains all return values of `benchFn` over all blocks. + * when provided (!= NULL), and when benchmark failed, + * params.blockResults contains return values of `benchFn` over all blocks preceding and including the failed block. + */ +BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t params, unsigned nbLoops); + + + +/* check first if the benchmark was successful or not */ +int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome); + +/* If the benchmark was successful, extract the result. + * note : this function will abort() program execution if benchmark failed ! + * always check if benchmark was successful first ! + */ +BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome); + +/* when benchmark failed, it means one invocation of `benchFn` failed. + * The failure was detected by `errorFn`, operating on return values of `benchFn`. + * Returns the faulty return value. + * note : this function will abort() program execution if benchmark did not failed. + * always check if benchmark failed first ! + */ +size_t BMK_extract_errorResult(BMK_runOutcome_t outcome); + + + +/* ==== Benchmark any function, returning intermediate results ==== */ + +/* state information tracking benchmark session */ +typedef struct BMK_timedFnState_s BMK_timedFnState_t; + +/* BMK_benchTimedFn() : + * Similar to BMK_benchFunction(), most arguments being identical. + * Automatically determines `nbLoops` so that each result is regularly produced at interval of about run_ms. + * Note : minimum `nbLoops` is 1, therefore a run may last more than run_ms, and possibly even more than total_ms. + * Usage - initialize timedFnState, select benchmark duration (total_ms) and each measurement duration (run_ms) + * call BMK_benchTimedFn() repetitively, each measurement is supposed to last about run_ms + * Check if total time budget is spent or exceeded, using BMK_isCompleted_TimedFn() + */ +BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* timedFnState, + BMK_benchParams_t params); + +/* Tells if duration of all benchmark runs has exceeded total_ms + */ +int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState); + +/* BMK_createTimedFnState() and BMK_resetTimedFnState() : + * Create/Set BMK_timedFnState_t for next benchmark session, + * which shall last a minimum of total_ms milliseconds, + * producing intermediate results, paced at interval of (approximately) run_ms. + */ +BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms); +void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms); +void BMK_freeTimedFnState(BMK_timedFnState_t* state); + + + +#endif /* BENCH_FN_H_23876 */ + +#if defined (__cplusplus) +} +#endif diff --git a/programs/bench.c b/programs/benchzstd.c similarity index 78% rename from programs/bench.c rename to programs/benchzstd.c index 2bbaa9d0a..5c3de10c2 100644 --- a/programs/bench.c +++ b/programs/benchzstd.c @@ -9,7 +9,6 @@ */ - /* ************************************** * Tuning parameters ****************************************/ @@ -18,14 +17,6 @@ #endif -/* ************************************** -* Compiler Warnings -****************************************/ -#ifdef _MSC_VER -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -#endif - - /* ************************************* * Includes ***************************************/ @@ -36,12 +27,13 @@ #include /* fprintf, fopen */ #include /* assert */ +#include "benchfn.h" #include "mem.h" #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" #include "datagen.h" /* RDG_genBuffer */ #include "xxhash.h" -#include "bench.h" +#include "benchzstd.h" #include "zstd_errors.h" @@ -276,219 +268,6 @@ static size_t local_defaultDecompress( } -/*=== Benchmarking an arbitrary function ===*/ - -int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome) -{ - return outcome.tag == 0; -} - -/* warning : this function will stop program execution if outcome is invalid ! - * check outcome validity first, using BMK_isValid_runResult() */ -BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome) -{ - assert(outcome.tag == 0); - return outcome.internal_never_use_directly; -} - -static BMK_runOutcome_t BMK_runOutcome_error(void) -{ - BMK_runOutcome_t b; - memset(&b, 0, sizeof(b)); - b.tag = 1; - return b; -} - -static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime) -{ - BMK_runOutcome_t outcome; - outcome.tag = 0; - outcome.internal_never_use_directly = runTime; - return outcome; -} - - -/* initFn will be measured once, benchFn will be measured `nbLoops` times */ -/* initFn is optional, provide NULL if none */ -/* benchFn must return size_t field compliant with ZSTD_isError for error valuee */ -/* takes # of blocks and list of size & stuff for each. */ -/* can report result of benchFn for each block into blockResult. */ -/* blockResult is optional, provide NULL if this information is not required */ -/* note : time per loop could be zero if run time < timer resolution */ -BMK_runOutcome_t BMK_benchFunction( - BMK_benchFn_t benchFn, void* benchPayload, - BMK_initFn_t initFn, void* initPayload, - size_t blockCount, - const void* const * srcBlockBuffers, const size_t* srcBlockSizes, - void* const * dstBlockBuffers, const size_t* dstBlockCapacities, - size_t* blockResults, - unsigned nbLoops) -{ - size_t dstSize = 0; - - if(!nbLoops) { - RETURN_QUIET_ERROR(2, BMK_runOutcome_t, "nbLoops must be nonzero "); - } - - /* init */ - { size_t i; - for(i = 0; i < blockCount; i++) { - memset(dstBlockBuffers[i], 0xE5, dstBlockCapacities[i]); /* warm up and erase result buffer */ - } -#if 0 - /* based on testing these seem to lower accuracy of multiple calls of 1 nbLoops vs 1 call of multiple nbLoops - * (Makes former slower) - */ - UTIL_sleepMilli(5); /* give processor time to other processes */ - UTIL_waitForNextTick(); -#endif - } - - /* benchmark */ - { UTIL_time_t const clockStart = UTIL_getTime(); - unsigned loopNb, blockNb; - if (initFn != NULL) initFn(initPayload); - for (loopNb = 0; loopNb < nbLoops; loopNb++) { - for (blockNb = 0; blockNb < blockCount; blockNb++) { - size_t const res = benchFn(srcBlockBuffers[blockNb], srcBlockSizes[blockNb], - dstBlockBuffers[blockNb], dstBlockCapacities[blockNb], - benchPayload); - if(ZSTD_isError(res)) { - RETURN_QUIET_ERROR(2, BMK_runOutcome_t, - "Function benchmark failed on block %u of size %u : %s", - blockNb, (U32)dstBlockCapacities[blockNb], ZSTD_getErrorName(res)); - } else if (loopNb == 0) { - dstSize += res; - if (blockResults != NULL) blockResults[blockNb] = res; - } } - } /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */ - - { U64 const totalTime = UTIL_clockSpanNano(clockStart); - BMK_runTime_t rt; - rt.nanoSecPerRun = totalTime / nbLoops; - rt.sumOfReturn = dstSize; - return BMK_setValid_runTime(rt); - } } -} - - -/* ==== Benchmarking any function, providing intermediate results ==== */ - -struct BMK_timedFnState_s { - U64 timeSpent_ns; - U64 timeBudget_ns; - U64 runBudget_ns; - BMK_runTime_t fastestRun; - unsigned nbLoops; - UTIL_time_t coolTime; -}; /* typedef'd to BMK_timedFnState_t within bench.h */ - -BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms) -{ - BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r)); - if (r == NULL) return NULL; /* malloc() error */ - BMK_resetTimedFnState(r, total_ms, run_ms); - return r; -} - -void BMK_freeTimedFnState(BMK_timedFnState_t* state) { - free(state); -} - -void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms) -{ - if (!total_ms) total_ms = 1 ; - if (!run_ms) run_ms = 1; - if (run_ms > total_ms) run_ms = total_ms; - timedFnState->timeSpent_ns = 0; - timedFnState->timeBudget_ns = (U64)total_ms * TIMELOOP_NANOSEC / 1000; - timedFnState->runBudget_ns = (U64)run_ms * TIMELOOP_NANOSEC / 1000; - timedFnState->fastestRun.nanoSecPerRun = (U64)(-1LL); - timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL); - timedFnState->nbLoops = 1; - timedFnState->coolTime = UTIL_getTime(); -} - -/* Tells if nb of seconds set in timedFnState for all runs is spent. - * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */ -int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState) -{ - return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns); -} - - -#define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */ - -BMK_runOutcome_t BMK_benchTimedFn( - BMK_timedFnState_t* cont, - BMK_benchFn_t benchFn, void* benchPayload, - BMK_initFn_t initFn, void* initPayload, - size_t blockCount, - const void* const* srcBlockBuffers, const size_t* srcBlockSizes, - void * const * dstBlockBuffers, const size_t * dstBlockCapacities, - size_t* blockResults) -{ - U64 const runBudget_ns = cont->runBudget_ns; - U64 const runTimeMin_ns = runBudget_ns / 2; - int completed = 0; - BMK_runTime_t bestRunTime = cont->fastestRun; - - while (!completed) { - BMK_runOutcome_t runResult; - - /* Overheat protection */ - if (UTIL_clockSpanMicro(cont->coolTime) > ACTIVEPERIOD_MICROSEC) { - DEBUGOUTPUT("\rcooling down ... \r"); - UTIL_sleep(COOLPERIOD_SEC); - cont->coolTime = UTIL_getTime(); - } - - /* reinitialize capacity */ - runResult = BMK_benchFunction(benchFn, benchPayload, - initFn, initPayload, - blockCount, - srcBlockBuffers, srcBlockSizes, - dstBlockBuffers, dstBlockCapacities, - blockResults, - cont->nbLoops); - - if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */ - return BMK_runOutcome_error(); - } - - { BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult); - U64 const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops; - - cont->timeSpent_ns += loopDuration_ns; - - /* estimate nbLoops for next run to last approximately 1 second */ - if (loopDuration_ns > (runBudget_ns / 50)) { - U64 const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun); - cont->nbLoops = (U32)(runBudget_ns / fastestRun_ns) + 1; - } else { - /* previous run was too short : blindly increase workload by x multiplier */ - const unsigned multiplier = 10; - assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */ - cont->nbLoops *= multiplier; - } - - if(loopDuration_ns < runTimeMin_ns) { - /* don't report results for which benchmark run time was too small : increased risks of rounding errors */ - assert(completed == 0); - continue; - } else { - if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) { - bestRunTime = newRunTime; - } - completed = 1; - } - } - } /* while (!completed) */ - - return BMK_setValid_runTime(bestRunTime); -} - - /* ================================================================= */ /* Benchmark Zstandard, mem-to-mem scenarios */ /* ================================================================= */ @@ -601,7 +380,7 @@ BMK_benchMemAdvancedNoAlloc( cPtr += cCapacities[nbBlocks]; resPtr += thisBlockSize; remaining -= thisBlockSize; - if (BMK_decodeOnly) { + if (adv->mode == BMK_decodeOnly) { assert(nbBlocks==0); cSizes[nbBlocks] = thisBlockSize; benchResult.cSize = thisBlockSize; @@ -624,14 +403,41 @@ BMK_benchMemAdvancedNoAlloc( U32 markNb = 0; int compressionCompleted = (adv->mode == BMK_decodeOnly); int decompressionCompleted = (adv->mode == BMK_compressOnly); + BMK_benchParams_t cbp, dbp; BMK_initCCtxArgs cctxprep; BMK_initDCtxArgs dctxprep; + + cbp.benchFn = local_defaultCompress; + cbp.benchPayload = cctx; + cbp.initFn = local_initCCtx; + cbp.initPayload = &cctxprep; + cbp.errorFn = ZSTD_isError; + cbp.blockCount = nbBlocks; + cbp.srcBuffers = srcPtrs; + cbp.srcSizes = srcSizes; + cbp.dstBuffers = cPtrs; + cbp.dstCapacities = cCapacities; + cbp.blockResults = cSizes; + cctxprep.cctx = cctx; cctxprep.dictBuffer = dictBuffer; cctxprep.dictBufferSize = dictBufferSize; cctxprep.cLevel = cLevel; cctxprep.comprParams = comprParams; cctxprep.adv = adv; + + dbp.benchFn = local_defaultDecompress; + dbp.benchPayload = dctx; + dbp.initFn = local_initDCtx; + dbp.initPayload = &dctxprep; + dbp.errorFn = ZSTD_isError; + dbp.blockCount = nbBlocks; + dbp.srcBuffers = (const void* const *) cPtrs; + dbp.srcSizes = cSizes; + dbp.dstBuffers = resPtrs; + dbp.dstCapacities = resSizes; + dbp.blockResults = NULL; + dctxprep.dctx = dctx; dctxprep.dictBuffer = dictBuffer; dctxprep.dictBufferSize = dictBufferSize; @@ -641,14 +447,7 @@ BMK_benchMemAdvancedNoAlloc( while (!(compressionCompleted && decompressionCompleted)) { if (!compressionCompleted) { - BMK_runOutcome_t const cOutcome = - BMK_benchTimedFn( timeStateCompress, - &local_defaultCompress, cctx, - &local_initCCtx, &cctxprep, - nbBlocks, - srcPtrs, srcSizes, - cPtrs, cCapacities, - cSizes); + BMK_runOutcome_t const cOutcome = BMK_benchTimedFn( timeStateCompress, cbp); if (!BMK_isSuccessful_runOutcome(cOutcome)) { return BMK_benchOutcome_error(); @@ -675,14 +474,7 @@ BMK_benchMemAdvancedNoAlloc( } if(!decompressionCompleted) { - BMK_runOutcome_t const dOutcome = - BMK_benchTimedFn(timeStateDecompress, - &local_defaultDecompress, dctx, - &local_initDCtx, &dctxprep, - nbBlocks, - (const void *const *)cPtrs, cSizes, - resPtrs, resSizes, - NULL); + BMK_runOutcome_t const dOutcome = BMK_benchTimedFn(timeStateDecompress, dbp); if(!BMK_isSuccessful_runOutcome(dOutcome)) { return BMK_benchOutcome_error(); diff --git a/programs/bench.h b/programs/benchzstd.h similarity index 61% rename from programs/bench.h rename to programs/benchzstd.h index 13ca5b50b..9860adf16 100644 --- a/programs/bench.h +++ b/programs/benchzstd.h @@ -8,12 +8,18 @@ * You may select, at your option, one of the above-listed licenses. */ + /* benchzstd : + * benchmark Zstandard compression / decompression + * over a set of files or buffers + * and display progress result and final summary + */ + #if defined (__cplusplus) extern "C" { #endif -#ifndef BENCH_H_121279284357 -#define BENCH_H_121279284357 +#ifndef BENCH_ZSTD_H_3242387 +#define BENCH_ZSTD_H_3242387 /* === Dependencies === */ #include /* size_t */ @@ -142,9 +148,9 @@ BMK_benchOutcome_t BMK_benchFilesAdvanced( * .cMem : memory budget required for the compression context */ BMK_benchOutcome_t BMK_syntheticTest( - int cLevel, double compressibility, - const ZSTD_compressionParameters* compressionParams, - int displayLevel, const BMK_advancedParams_t* adv); + int cLevel, double compressibility, + const ZSTD_compressionParameters* compressionParams, + int displayLevel, const BMK_advancedParams_t* adv); @@ -181,6 +187,7 @@ BMK_benchOutcome_t BMK_benchMem(const void* srcBuffer, size_t srcSize, const void* dictBuffer, size_t dictBufferSize, int displayLevel, const char* displayName); + /* BMK_benchMemAdvanced() : same as BMK_benchMem() * with following additional options : * dstBuffer - destination buffer to write compressed output in, NULL if none provided. @@ -197,106 +204,8 @@ BMK_benchOutcome_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, -/* ==== Benchmarking any function, iterated on a set of blocks ==== */ -typedef struct { - unsigned long long nanoSecPerRun; /* time per iteration */ - size_t sumOfReturn; /* sum of return values */ -} BMK_runTime_t; - -VARIANT_ERROR_RESULT(BMK_runTime_t, BMK_runOutcome_t); - -/* check first if the return structure represents an error or a valid result */ -int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome); - -/* extract result from variant type. - * note : this function will abort() program execution if result is not valid - * check result validity first, by using BMK_isSuccessful_runOutcome() - */ -BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome); - - - -typedef size_t (*BMK_benchFn_t)(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* customPayload); -typedef size_t (*BMK_initFn_t)(void* initPayload); - - -/* BMK_benchFunction() : - * This function times the execution of 2 argument functions, benchFn and initFn */ - -/* benchFn - (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload) - * is run nbLoops times - * initFn - (*initFn)(initPayload) is run once per benchmark, at the beginning. - * This argument can be NULL, in which case nothing is run. - * blockCount - number of blocks. Size of all array parameters : srcBuffers, srcSizes, dstBuffers, dstCapacities, blockResults - * srcBuffers - an array of buffers to be operated on by benchFn - * srcSizes - an array of the sizes of above buffers - * dstBuffers - an array of buffers to be written into by benchFn - * dstCapacities - an array of the capacities of above buffers - * blockResults - Optional: store the return value of benchFn for each block. Use NULL if this result is not requested. - * nbLoops - defines number of times benchFn is run. - * @return: a variant, which express either an error, or can generate a valid BMK_runTime_t result. - * Use BMK_isSuccessful_runOutcome() to check if function was successful. - * If yes, extract the result with BMK_extract_runTime(), - * it will contain : - * .sumOfReturn : the sum of all return values of benchFn through all of blocks - * .nanoSecPerRun : time per run of benchFn + (time for initFn / nbLoops) - * .sumOfReturn is generally intended for functions which return a # of bytes written into dstBuffer, - * in which case, this value will be the total amount of bytes written into dstBuffer. - */ -BMK_runOutcome_t BMK_benchFunction( - BMK_benchFn_t benchFn, void* benchPayload, - BMK_initFn_t initFn, void* initPayload, - size_t blockCount, - const void *const * srcBuffers, const size_t* srcSizes, - void *const * dstBuffers, const size_t* dstCapacities, - size_t* blockResults, - unsigned nbLoops); - - - -/* ==== Benchmark any function, providing intermediate results ==== */ - -/* state information tracking benchmark session */ -typedef struct BMK_timedFnState_s BMK_timedFnState_t; - -/* BMK_createTimedFnState() and BMK_resetTimedFnState() : - * Create/Set BMK_timedFnState_t for next benchmark session, - * which shall last a minimum of total_ms milliseconds, - * producing intermediate results, paced at interval of (approximately) run_ms. - */ -BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms); -void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms); -void BMK_freeTimedFnState(BMK_timedFnState_t* state); - - -/* Tells if duration of all benchmark runs has exceeded total_ms - */ -int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState); - - -/* BMK_benchTimedFn() : - * Similar to BMK_benchFunction(), most arguments being identical. - * Automatically determines `nbLoops` so that each result is regularly produced at interval of about run_ms. - * Note : minimum `nbLoops` is 1, therefore a run may last more than run_ms, and possibly even more than total_ms. - * Usage - initialize timedFnState, select benchmark duration (total_ms) and each measurement duration (run_ms) - * call BMK_benchTimedFn() repetitively, each measurement is supposed to last about run_ms - * Check if total time budget is spent or exceeded, using BMK_isCompleted_TimedFn() - */ -BMK_runOutcome_t BMK_benchTimedFn( - BMK_timedFnState_t* timedFnState, - BMK_benchFn_t benchFn, void* benchPayload, - BMK_initFn_t initFn, void* initPayload, - size_t blockCount, - const void *const * srcBlockBuffers, const size_t* srcBlockSizes, - void *const * dstBlockBuffers, const size_t* dstBlockCapacities, - size_t* blockResults); - - - - - -#endif /* BENCH_H_121279284357 */ +#endif /* BENCH_ZSTD_H_3242387 */ #if defined (__cplusplus) } diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 1545d1cac..153de961d 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -32,7 +32,7 @@ #include /* errno */ #include "fileio.h" /* stdinmark, stdoutmark, ZSTD_EXTENSION */ #ifndef ZSTD_NOBENCH -# include "bench.h" /* BMK_benchFiles */ +# include "benchzstd.h" /* BMK_benchFiles */ #endif #ifndef ZSTD_NODICT # include "dibio.h" /* ZDICT_cover_params_t, DiB_trainFromFiles() */ diff --git a/tests/Makefile b/tests/Makefile index da68bddcf..f363001b1 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -132,18 +132,18 @@ fullbench fullbench32 : CPPFLAGS += $(MULTITHREAD_CPP) fullbench fullbench32 : LDFLAGS += $(MULTITHREAD_LD) fullbench fullbench32 : DEBUGFLAGS = -DNDEBUG # turn off assert() for speed measurements fullbench fullbench32 : $(ZSTD_FILES) -fullbench fullbench32 : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/bench.c fullbench.c +fullbench fullbench32 : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/benchfn.c fullbench.c $(CC) $(FLAGS) $^ -o $@$(EXT) fullbench-lib : CPPFLAGS += -DXXH_NAMESPACE=ZSTD_ fullbench-lib : zstd-staticLib -fullbench-lib : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/bench.c fullbench.c +fullbench-lib : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/benchfn.c fullbench.c $(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) $(ZSTDDIR)/libzstd.a # note : broken : requires unavailable symbols fullbench-dll : zstd-dll fullbench-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd -fullbench-dll: $(PRGDIR)/datagen.c $(PRGDIR)/util.c fullbench.c +fullbench-dll: $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/benchfn.c fullbench.c # $(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) -DZSTD_DLL_IMPORT=1 $(ZSTDDIR)/dll/libzstd.dll $(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) @@ -203,7 +203,7 @@ zstreamtest-dll : $(ZSTREAM_LOCAL_FILES) $(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT) paramgrill : DEBUGFLAGS = # turn off assert() by default for speed measurements -paramgrill : $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/bench.c $(PRGDIR)/datagen.c paramgrill.c +paramgrill : $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/benchfn.c $(PRGDIR)/benchzstd.c $(PRGDIR)/datagen.c paramgrill.c $(CC) $(FLAGS) $^ -lm -o $@$(EXT) datagen : $(PRGDIR)/datagen.c datagencli.c diff --git a/tests/fullbench.c b/tests/fullbench.c index faf8fe759..409de6395 100644 --- a/tests/fullbench.c +++ b/tests/fullbench.c @@ -30,7 +30,8 @@ #include "zstd.h" /* ZSTD_versionString */ #include "util.h" /* time functions */ #include "datagen.h" -#include "bench.h" /* CustomBench*/ +#include "benchfn.h" /* CustomBench*/ +#include "benchzstd.h" /* MB_UNIT */ /*_************************************ @@ -514,20 +515,28 @@ static size_t benchMem(U32 benchNb, /* benchmark loop */ { BMK_timedFnState_t* const tfs = BMK_createTimedFnState(g_nbIterations * 1000, 1000); + void* const avoidStrictAliasingPtr = &dstBuff; + BMK_benchParams_t bp; BMK_runTime_t bestResult; bestResult.sumOfReturn = 0; bestResult.nanoSecPerRun = (unsigned long long)(-1LL); assert(tfs != NULL); + + bp.benchFn = benchFunction; + bp.benchPayload = buff2; + bp.initFn = NULL; + bp.initPayload = NULL; + bp.errorFn = ZSTD_isError; + bp.blockCount = 1; + bp.srcBuffers = &src; + bp.srcSizes = &srcSize; + bp.dstBuffers = (void* const*) avoidStrictAliasingPtr; /* circumvent strict aliasing warning on gcc-8, + * because gcc considers that `void* const *` and `void**` are 2 different types */ + bp.dstCapacities = &dstBuffSize; + bp.blockResults = NULL; + for (;;) { - void* const dstBuffv = dstBuff; - BMK_runOutcome_t const bOutcome = - BMK_benchTimedFn( tfs, - benchFunction, buff2, - NULL, NULL, /* initFn */ - 1, /* blockCount */ - &src, &srcSize, - &dstBuffv, &dstBuffSize, - NULL); + BMK_runOutcome_t const bOutcome = BMK_benchTimedFn(tfs, bp); if (!BMK_isSuccessful_runOutcome(bOutcome)) { DISPLAY("ERROR benchmarking function ! ! \n"); diff --git a/tests/paramgrill.c b/tests/paramgrill.c index 7a4be854a..cd272d9af 100644 --- a/tests/paramgrill.c +++ b/tests/paramgrill.c @@ -25,7 +25,8 @@ #include "datagen.h" #include "xxhash.h" #include "util.h" -#include "bench.h" +#include "benchfn.h" +#include "benchzstd.h" #include "zstd_errors.h" #include "zstd_internal.h" /* should not be needed */ @@ -1397,7 +1398,8 @@ static void randomConstrainedParams(paramValues_t* pc, const memoTable_t* memoTa /* nbSeconds used in same way as in BMK_advancedParams_t */ /* if in decodeOnly, then srcPtr's will be compressed blocks, and uncompressedBlocks will be written to dstPtrs */ /* dictionary nullable, nothing else though. */ -/* note : it would be better if this function was in bench.c, sharing code with benchMemAdvanced(), since it's technically a part of it */ +/* note : it would be a lot better if this function was present in benchzstd.c, + * sharing code with benchMemAdvanced(), since it's technically a part of it */ static BMK_benchOutcome_t BMK_benchMemInvertible( buffers_t buf, contexts_t ctx, int cLevel, const paramValues_t* comprParams, @@ -1438,13 +1440,40 @@ BMK_benchMemInvertible( buffers_t buf, contexts_t ctx, int decompressionCompleted = (mode == BMK_compressOnly); BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(nbSeconds * 1000, 1000); BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(nbSeconds * 1000, 1000); + BMK_benchParams_t cbp, dbp; BMK_initCCtxArgs cctxprep; BMK_initDCtxArgs dctxprep; + + cbp.benchFn = local_defaultCompress; + cbp.benchPayload = cctx; + cbp.initFn = local_initCCtx; + cbp.initPayload = &cctxprep; + cbp.errorFn = ZSTD_isError; + cbp.blockCount = nbBlocks; + cbp.srcBuffers = srcPtrs; + cbp.srcSizes = srcSizes; + cbp.dstBuffers = dstPtrs; + cbp.dstCapacities = dstCapacities; + cbp.blockResults = dstSizes; + cctxprep.cctx = cctx; cctxprep.dictBuffer = dictBuffer; cctxprep.dictBufferSize = dictBufferSize; cctxprep.cLevel = cLevel; cctxprep.comprParams = comprParams; + + dbp.benchFn = local_defaultDecompress; + dbp.benchPayload = dctx; + dbp.initFn = local_initDCtx; + dbp.initPayload = &dctxprep; + dbp.errorFn = ZSTD_isError; + dbp.blockCount = nbBlocks; + dbp.srcBuffers = (const void* const *) dstPtrs; + dbp.srcSizes = dstCapacities; + dbp.dstBuffers = resPtrs; + dbp.dstCapacities = resSizes; + dbp.blockResults = NULL; + dctxprep.dctx = dctx; dctxprep.dictBuffer = dictBuffer; dctxprep.dictBufferSize = dictBufferSize; @@ -1452,13 +1481,7 @@ BMK_benchMemInvertible( buffers_t buf, contexts_t ctx, assert(timeStateCompress != NULL); assert(timeStateDecompress != NULL); while(!compressionCompleted) { - BMK_runOutcome_t const cOutcome = BMK_benchTimedFn(timeStateCompress, - &local_defaultCompress, cctx, - &local_initCCtx, &cctxprep, - nbBlocks, - srcPtrs, srcSizes, - dstPtrs, dstCapacities, - dstSizes); + BMK_runOutcome_t const cOutcome = BMK_benchTimedFn(timeStateCompress, cbp); if (!BMK_isSuccessful_runOutcome(cOutcome)) { BMK_benchOutcome_t bOut; @@ -1476,13 +1499,7 @@ BMK_benchMemInvertible( buffers_t buf, contexts_t ctx, } while (!decompressionCompleted) { - BMK_runOutcome_t const dOutcome = BMK_benchTimedFn(timeStateDecompress, - &local_defaultDecompress, dctx, - &local_initDCtx, &dctxprep, - nbBlocks, - (const void* const*)dstPtrs, dstSizes, - resPtrs, resSizes, - NULL); + BMK_runOutcome_t const dOutcome = BMK_benchTimedFn(timeStateDecompress, dbp); if (!BMK_isSuccessful_runOutcome(dOutcome)) { BMK_benchOutcome_t bOut;