mirror of
https://github.com/facebook/zstd.git
synced 2025-03-06 16:56:49 +02:00
Merge pull request #1415 from facebook/benchfn
Separating benchfn and benchzstd
This commit is contained in:
commit
c584e84e68
@ -168,13 +168,13 @@
|
||||
<ClCompile Include="..\..\..\lib\common\xxhash.c" />
|
||||
<ClCompile Include="..\..\..\programs\util.c" />
|
||||
<ClCompile Include="..\..\..\programs\datagen.c" />
|
||||
<ClCompile Include="..\..\..\programs\bench.c" />
|
||||
<ClCompile Include="..\..\..\programs\benchfn.c" />
|
||||
<ClCompile Include="..\..\..\tests\fullbench.c" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\..\..\lib\zstd.h" />
|
||||
<ClInclude Include="..\..\..\programs\datagen.h" />
|
||||
<ClInclude Include="..\..\..\programs\bench.h" />
|
||||
<ClInclude Include="..\..\..\programs\benchfn.h" />
|
||||
<ClInclude Include="..\..\..\programs\util.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
|
@ -179,7 +179,7 @@
|
||||
<ClCompile Include="..\..\..\lib\decompress\zstd_decompress_block.c" />
|
||||
<ClCompile Include="..\..\..\lib\decompress\zstd_ddict.c" />
|
||||
<ClCompile Include="..\..\..\programs\datagen.c" />
|
||||
<ClCompile Include="..\..\..\programs\bench.c" />
|
||||
<ClCompile Include="..\..\..\programs\benchfn.c" />
|
||||
<ClCompile Include="..\..\..\tests\fullbench.c" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
@ -202,7 +202,7 @@
|
||||
<ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h" />
|
||||
<ClInclude Include="..\..\..\programs\datagen.h" />
|
||||
<ClInclude Include="..\..\..\programs\util.h" />
|
||||
<ClInclude Include="..\..\..\programs\bench.h" />
|
||||
<ClInclude Include="..\..\..\programs\benchfn.h" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
|
@ -53,7 +53,8 @@
|
||||
<ClCompile Include="..\..\..\lib\legacy\zstd_v06.c" />
|
||||
<ClCompile Include="..\..\..\lib\legacy\zstd_v07.c" />
|
||||
<ClCompile Include="..\..\..\programs\util.c" />
|
||||
<ClCompile Include="..\..\..\programs\bench.c" />
|
||||
<ClCompile Include="..\..\..\programs\benchfn.c" />
|
||||
<ClCompile Include="..\..\..\programs\benchzstd.c" />
|
||||
<ClCompile Include="..\..\..\programs\datagen.c" />
|
||||
<ClCompile Include="..\..\..\programs\dibio.c" />
|
||||
<ClCompile Include="..\..\..\programs\fileio.c" />
|
||||
@ -87,7 +88,7 @@
|
||||
<ClInclude Include="..\..\..\lib\legacy\zstd_v05.h" />
|
||||
<ClInclude Include="..\..\..\lib\legacy\zstd_v06.h" />
|
||||
<ClInclude Include="..\..\..\lib\legacy\zstd_v07.h" />
|
||||
<ClInclude Include="..\..\..\programs\bench.h" />
|
||||
<ClInclude Include="..\..\..\programs\benchzstd.h" />
|
||||
<ClInclude Include="..\..\..\programs\datagen.h" />
|
||||
<ClInclude Include="..\..\..\programs\dibio.h" />
|
||||
<ClInclude Include="..\..\..\programs\fileio.h" />
|
||||
|
@ -26,7 +26,7 @@ IF (MSVC)
|
||||
SET(PlatformDependResources ${MSVC_RESOURCE_DIR}/zstd.rc)
|
||||
ENDIF (MSVC)
|
||||
|
||||
ADD_EXECUTABLE(zstd ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/fileio.c ${PROGRAMS_DIR}/bench.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/dibio.c ${PlatformDependResources})
|
||||
ADD_EXECUTABLE(zstd ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/fileio.c ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/dibio.c ${PlatformDependResources})
|
||||
TARGET_LINK_LIBRARIES(zstd libzstd_static)
|
||||
IF (CMAKE_SYSTEM_NAME MATCHES "(Solaris|SunOS)")
|
||||
TARGET_LINK_LIBRARIES(zstd rt)
|
||||
|
@ -43,13 +43,13 @@ INCLUDE_DIRECTORIES(${TESTS_DIR} ${PROGRAMS_DIR} ${LIBRARY_DIR} ${LIBRARY_DIR}/c
|
||||
ADD_EXECUTABLE(datagen ${PROGRAMS_DIR}/datagen.c ${TESTS_DIR}/datagencli.c)
|
||||
TARGET_LINK_LIBRARIES(datagen libzstd_static)
|
||||
|
||||
ADD_EXECUTABLE(fullbench ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/bench.c ${TESTS_DIR}/fullbench.c)
|
||||
ADD_EXECUTABLE(fullbench ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${TESTS_DIR}/fullbench.c)
|
||||
TARGET_LINK_LIBRARIES(fullbench libzstd_static)
|
||||
|
||||
ADD_EXECUTABLE(fuzzer ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${TESTS_DIR}/fuzzer.c)
|
||||
TARGET_LINK_LIBRARIES(fuzzer libzstd_static)
|
||||
|
||||
IF (UNIX)
|
||||
ADD_EXECUTABLE(paramgrill ${PROGRAMS_DIR}/bench.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${TESTS_DIR}/paramgrill.c)
|
||||
ADD_EXECUTABLE(paramgrill ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${TESTS_DIR}/paramgrill.c)
|
||||
TARGET_LINK_LIBRARIES(paramgrill libzstd_static m) #m is math library
|
||||
ENDIF (UNIX)
|
||||
|
@ -28,14 +28,14 @@ default: largeNbDicts
|
||||
|
||||
all : largeNbDicts
|
||||
|
||||
largeNbDicts: util.o bench.o datagen.o xxhash.o largeNbDicts.c $(LIBZSTD)
|
||||
largeNbDicts: util.o benchfn.o datagen.o xxhash.o largeNbDicts.c $(LIBZSTD)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
.PHONY: $(LIBZSTD)
|
||||
$(LIBZSTD):
|
||||
$(MAKE) -C $(LIBDIR) libzstd.a CFLAGS="$(CFLAGS)"
|
||||
|
||||
bench.o : $(PROGDIR)/bench.c
|
||||
benchfn.o : $(PROGDIR)/benchfn.c
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
|
||||
|
||||
datagen.o: $(PROGDIR)/datagen.c
|
||||
|
@ -24,7 +24,7 @@
|
||||
#include <assert.h> /* assert */
|
||||
|
||||
#include "util.h"
|
||||
#include "bench.h"
|
||||
#include "benchfn.h"
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
#include "zstd.h"
|
||||
#include "zdict.h"
|
||||
@ -538,15 +538,22 @@ static int benchMem(slice_collection_t dstBlocks,
|
||||
BMK_timedFnState_t* const benchState =
|
||||
BMK_createTimedFnState(total_time_ms, ms_per_round);
|
||||
decompressInstructions di = createDecompressInstructions(dictionaries);
|
||||
BMK_benchParams_t const bp = {
|
||||
.benchFn = decompress,
|
||||
.benchPayload = &di,
|
||||
.initFn = NULL,
|
||||
.initPayload = NULL,
|
||||
.errorFn = ZSTD_isError,
|
||||
.blockCount = dstBlocks.nbSlices,
|
||||
.srcBuffers = (const void* const*) srcBlocks.slicePtrs,
|
||||
.srcSizes = srcBlocks.capacities,
|
||||
.dstBuffers = dstBlocks.slicePtrs,
|
||||
.dstCapacities = dstBlocks.capacities,
|
||||
.blockResults = NULL
|
||||
};
|
||||
|
||||
for (;;) {
|
||||
BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState,
|
||||
decompress, &di,
|
||||
NULL, NULL,
|
||||
dstBlocks.nbSlices,
|
||||
(const void* const *)srcBlocks.slicePtrs, srcBlocks.capacities,
|
||||
dstBlocks.slicePtrs, dstBlocks.capacities,
|
||||
NULL);
|
||||
BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState, bp);
|
||||
CONTROL(BMK_isSuccessful_runOutcome(outcome));
|
||||
|
||||
BMK_runTime_t const result = BMK_extract_runTime(outcome);
|
||||
|
@ -160,7 +160,7 @@ $(ZSTDDECOMP_O): CFLAGS += $(ALIGN_LOOP)
|
||||
zstd : CPPFLAGS += $(THREAD_CPP) $(ZLIBCPP) $(LZMACPP) $(LZ4CPP)
|
||||
zstd : LDFLAGS += $(THREAD_LD) $(ZLIBLD) $(LZMALD) $(LZ4LD) $(DEBUGFLAGS_LD)
|
||||
zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
|
||||
zstd : $(ZSTDLIB_FILES) zstdcli.o util.o fileio.o bench.o datagen.o dibio.o
|
||||
zstd : $(ZSTDLIB_FILES) zstdcli.o util.o fileio.o benchfn.o benchzstd.o datagen.o dibio.o
|
||||
@echo "$(THREAD_MSG)"
|
||||
@echo "$(ZLIB_MSG)"
|
||||
@echo "$(LZMA_MSG)"
|
||||
@ -178,13 +178,13 @@ zstd-release: zstd
|
||||
zstd32 : CPPFLAGS += $(THREAD_CPP)
|
||||
zstd32 : LDFLAGS += $(THREAD_LD)
|
||||
zstd32 : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
|
||||
zstd32 : $(ZSTDLIB_FILES) zstdcli.c util.c fileio.c bench.c datagen.c dibio.c
|
||||
zstd32 : $(ZSTDLIB_FILES) zstdcli.c util.c fileio.c benchfn.c benchzstd.c datagen.c dibio.c
|
||||
ifneq (,$(filter Windows%,$(OS)))
|
||||
windres/generate_res.bat
|
||||
endif
|
||||
$(CC) -m32 $(FLAGS) $^ $(RES32_FILE) -o $@$(EXT)
|
||||
|
||||
zstd-nolegacy : $(ZSTD_FILES) $(ZDICT_FILES) zstdcli.o util.o fileio.c bench.o datagen.o dibio.o
|
||||
zstd-nolegacy : $(ZSTD_FILES) $(ZDICT_FILES) zstdcli.o util.o fileio.c benchfn.o benchzstd.o datagen.o dibio.o
|
||||
$(CC) $(FLAGS) $^ -o $@$(EXT) $(LDFLAGS)
|
||||
|
||||
zstd-nomt : THREAD_CPP :=
|
||||
|
263
programs/benchfn.c
Normal file
263
programs/benchfn.c
Normal file
@ -0,0 +1,263 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Includes
|
||||
***************************************/
|
||||
#include "platform.h" /* Large Files support */
|
||||
#include "util.h" /* UTIL_getFileSize, UTIL_sleep */
|
||||
#include <stdlib.h> /* malloc, free */
|
||||
#include <string.h> /* memset */
|
||||
#include <stdio.h> /* fprintf, fopen */
|
||||
#undef NDEBUG /* assert must not be disabled */
|
||||
#include <assert.h> /* assert */
|
||||
|
||||
#include "mem.h"
|
||||
#include "benchfn.h"
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
#define TIMELOOP_MICROSEC (1*1000000ULL) /* 1 second */
|
||||
#define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */
|
||||
#define ACTIVEPERIOD_MICROSEC (70*TIMELOOP_MICROSEC) /* 70 seconds */
|
||||
#define COOLPERIOD_SEC 10
|
||||
|
||||
#define KB *(1 <<10)
|
||||
#define MB *(1 <<20)
|
||||
#define GB *(1U<<30)
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Errors
|
||||
***************************************/
|
||||
#ifndef DEBUG
|
||||
# define DEBUG 0
|
||||
#endif
|
||||
|
||||
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
|
||||
|
||||
/* error without displaying */
|
||||
#define RETURN_QUIET_ERROR(retValue, ...) { \
|
||||
DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
|
||||
DEBUGOUTPUT("Error : "); \
|
||||
DEBUGOUTPUT(__VA_ARGS__); \
|
||||
DEBUGOUTPUT(" \n"); \
|
||||
return retValue; \
|
||||
}
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Benchmarking an arbitrary function
|
||||
***************************************/
|
||||
|
||||
int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
|
||||
{
|
||||
return outcome.error_tag_never_ever_use_directly == 0;
|
||||
}
|
||||
|
||||
/* warning : this function will stop program execution if outcome is invalid !
|
||||
* check outcome validity first, using BMK_isValid_runResult() */
|
||||
BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
|
||||
{
|
||||
assert(outcome.error_tag_never_ever_use_directly == 0);
|
||||
return outcome.internal_never_ever_use_directly;
|
||||
}
|
||||
|
||||
size_t BMK_extract_errorResult(BMK_runOutcome_t outcome)
|
||||
{
|
||||
assert(outcome.error_tag_never_ever_use_directly != 0);
|
||||
return outcome.error_result_never_ever_use_directly;
|
||||
}
|
||||
|
||||
static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult)
|
||||
{
|
||||
BMK_runOutcome_t b;
|
||||
memset(&b, 0, sizeof(b));
|
||||
b.error_tag_never_ever_use_directly = 1;
|
||||
b.error_result_never_ever_use_directly = errorResult;
|
||||
return b;
|
||||
}
|
||||
|
||||
static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime)
|
||||
{
|
||||
BMK_runOutcome_t outcome;
|
||||
outcome.error_tag_never_ever_use_directly = 0;
|
||||
outcome.internal_never_ever_use_directly = runTime;
|
||||
return outcome;
|
||||
}
|
||||
|
||||
|
||||
/* initFn will be measured once, benchFn will be measured `nbLoops` times */
|
||||
/* initFn is optional, provide NULL if none */
|
||||
/* benchFn must return a size_t value that errorFn can interpret */
|
||||
/* takes # of blocks and list of size & stuff for each. */
|
||||
/* can report result of benchFn for each block into blockResult. */
|
||||
/* blockResult is optional, provide NULL if this information is not required */
|
||||
/* note : time per loop can be reported as zero if run time < timer resolution */
|
||||
BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
|
||||
unsigned nbLoops)
|
||||
{
|
||||
size_t dstSize = 0;
|
||||
nbLoops += !nbLoops; /* minimum nbLoops is 1 */
|
||||
|
||||
/* init */
|
||||
{ size_t i;
|
||||
for(i = 0; i < p.blockCount; i++) {
|
||||
memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]); /* warm up and erase result buffer */
|
||||
}
|
||||
#if 0
|
||||
/* based on testing these seem to lower accuracy of multiple calls of 1 nbLoops vs 1 call of multiple nbLoops
|
||||
* (Makes former slower)
|
||||
*/
|
||||
UTIL_sleepMilli(5); /* give processor time to other processes */
|
||||
UTIL_waitForNextTick();
|
||||
#endif
|
||||
}
|
||||
|
||||
/* benchmark */
|
||||
{ UTIL_time_t const clockStart = UTIL_getTime();
|
||||
unsigned loopNb, blockNb;
|
||||
if (p.initFn != NULL) p.initFn(p.initPayload);
|
||||
for (loopNb = 0; loopNb < nbLoops; loopNb++) {
|
||||
for (blockNb = 0; blockNb < p.blockCount; blockNb++) {
|
||||
size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb],
|
||||
p.dstBuffers[blockNb], p.dstCapacities[blockNb],
|
||||
p.benchPayload);
|
||||
if (loopNb == 0) {
|
||||
if (p.blockResults != NULL) p.blockResults[blockNb] = res;
|
||||
if ((p.errorFn != NULL) && (p.errorFn(res))) {
|
||||
RETURN_QUIET_ERROR(BMK_runOutcome_error(res),
|
||||
"Function benchmark failed on block %u (of size %u) with error %i",
|
||||
blockNb, (U32)p.srcSizes[blockNb], (int)res);
|
||||
}
|
||||
dstSize += res;
|
||||
} }
|
||||
} /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
|
||||
|
||||
{ U64 const totalTime = UTIL_clockSpanNano(clockStart);
|
||||
BMK_runTime_t rt;
|
||||
rt.nanoSecPerRun = totalTime / nbLoops;
|
||||
rt.sumOfReturn = dstSize;
|
||||
return BMK_setValid_runTime(rt);
|
||||
} }
|
||||
}
|
||||
|
||||
|
||||
/* ==== Benchmarking any function, providing intermediate results ==== */
|
||||
|
||||
struct BMK_timedFnState_s {
|
||||
U64 timeSpent_ns;
|
||||
U64 timeBudget_ns;
|
||||
U64 runBudget_ns;
|
||||
BMK_runTime_t fastestRun;
|
||||
unsigned nbLoops;
|
||||
UTIL_time_t coolTime;
|
||||
}; /* typedef'd to BMK_timedFnState_t within bench.h */
|
||||
|
||||
BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
|
||||
{
|
||||
BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r));
|
||||
if (r == NULL) return NULL; /* malloc() error */
|
||||
BMK_resetTimedFnState(r, total_ms, run_ms);
|
||||
return r;
|
||||
}
|
||||
|
||||
void BMK_freeTimedFnState(BMK_timedFnState_t* state) {
|
||||
free(state);
|
||||
}
|
||||
|
||||
void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
|
||||
{
|
||||
if (!total_ms) total_ms = 1 ;
|
||||
if (!run_ms) run_ms = 1;
|
||||
if (run_ms > total_ms) run_ms = total_ms;
|
||||
timedFnState->timeSpent_ns = 0;
|
||||
timedFnState->timeBudget_ns = (U64)total_ms * TIMELOOP_NANOSEC / 1000;
|
||||
timedFnState->runBudget_ns = (U64)run_ms * TIMELOOP_NANOSEC / 1000;
|
||||
timedFnState->fastestRun.nanoSecPerRun = (U64)(-1LL);
|
||||
timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
|
||||
timedFnState->nbLoops = 1;
|
||||
timedFnState->coolTime = UTIL_getTime();
|
||||
}
|
||||
|
||||
/* Tells if nb of seconds set in timedFnState for all runs is spent.
|
||||
* note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */
|
||||
int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
|
||||
{
|
||||
return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns);
|
||||
}
|
||||
|
||||
|
||||
#undef MIN
|
||||
#define MIN(a,b) ( (a) < (b) ? (a) : (b) )
|
||||
|
||||
#define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */
|
||||
|
||||
BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont,
|
||||
BMK_benchParams_t p)
|
||||
{
|
||||
U64 const runBudget_ns = cont->runBudget_ns;
|
||||
U64 const runTimeMin_ns = runBudget_ns / 2;
|
||||
int completed = 0;
|
||||
BMK_runTime_t bestRunTime = cont->fastestRun;
|
||||
|
||||
while (!completed) {
|
||||
BMK_runOutcome_t runResult;
|
||||
|
||||
/* Overheat protection */
|
||||
if (UTIL_clockSpanMicro(cont->coolTime) > ACTIVEPERIOD_MICROSEC) {
|
||||
DEBUGOUTPUT("\rcooling down ... \r");
|
||||
UTIL_sleep(COOLPERIOD_SEC);
|
||||
cont->coolTime = UTIL_getTime();
|
||||
}
|
||||
|
||||
/* reinitialize capacity */
|
||||
runResult = BMK_benchFunction(p, cont->nbLoops);
|
||||
|
||||
if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
|
||||
return runResult;
|
||||
}
|
||||
|
||||
{ BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
|
||||
U64 const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
|
||||
|
||||
cont->timeSpent_ns += loopDuration_ns;
|
||||
|
||||
/* estimate nbLoops for next run to last approximately 1 second */
|
||||
if (loopDuration_ns > (runBudget_ns / 50)) {
|
||||
U64 const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
|
||||
cont->nbLoops = (U32)(runBudget_ns / fastestRun_ns) + 1;
|
||||
} else {
|
||||
/* previous run was too short : blindly increase workload by x multiplier */
|
||||
const unsigned multiplier = 10;
|
||||
assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */
|
||||
cont->nbLoops *= multiplier;
|
||||
}
|
||||
|
||||
if(loopDuration_ns < runTimeMin_ns) {
|
||||
/* don't report results for which benchmark run time was too small : increased risks of rounding errors */
|
||||
assert(completed == 0);
|
||||
continue;
|
||||
} else {
|
||||
if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) {
|
||||
bestRunTime = newRunTime;
|
||||
}
|
||||
completed = 1;
|
||||
}
|
||||
}
|
||||
} /* while (!completed) */
|
||||
|
||||
return BMK_setValid_runTime(bestRunTime);
|
||||
}
|
167
programs/benchfn.h
Normal file
167
programs/benchfn.h
Normal file
@ -0,0 +1,167 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
|
||||
/* benchfn :
|
||||
* benchmark any function on a set of input
|
||||
* providing result in nanoSecPerRun
|
||||
* or detecting and returning an error
|
||||
*/
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef BENCH_FN_H_23876
|
||||
#define BENCH_FN_H_23876
|
||||
|
||||
/* === Dependencies === */
|
||||
#include <stddef.h> /* size_t */
|
||||
|
||||
|
||||
/* ==== Benchmark any function, iterated on a set of blocks ==== */
|
||||
|
||||
/* BMK_runTime_t: valid result return type */
|
||||
|
||||
typedef struct {
|
||||
unsigned long long nanoSecPerRun; /* time per iteration (over all blocks) */
|
||||
size_t sumOfReturn; /* sum of return values */
|
||||
} BMK_runTime_t;
|
||||
|
||||
|
||||
/* BMK_runOutcome_t:
|
||||
* type expressing the outcome of a benchmark run by BMK_benchFunction(),
|
||||
* which can be either valid or invalid.
|
||||
* benchmark outcome can be invalid if errorFn is provided.
|
||||
* BMK_runOutcome_t must be considered "opaque" : never access its members directly.
|
||||
* Instead, use its assigned methods :
|
||||
* BMK_isSuccessful_runOutcome, BMK_extract_runTime, BMK_extract_errorResult.
|
||||
* The structure is only described here to allow its allocation on stack. */
|
||||
|
||||
typedef struct {
|
||||
BMK_runTime_t internal_never_ever_use_directly;
|
||||
size_t error_result_never_ever_use_directly;
|
||||
int error_tag_never_ever_use_directly;
|
||||
} BMK_runOutcome_t;
|
||||
|
||||
|
||||
/* prototypes for benchmarked functions */
|
||||
typedef size_t (*BMK_benchFn_t)(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* customPayload);
|
||||
typedef size_t (*BMK_initFn_t)(void* initPayload);
|
||||
typedef unsigned (*BMK_errorFn_t)(size_t);
|
||||
|
||||
|
||||
/* BMK_benchFunction() parameters are provided through following structure.
|
||||
* This is preferable for readability,
|
||||
* as the number of parameters required is pretty large.
|
||||
* No initializer is provided, because it doesn't make sense to provide some "default" :
|
||||
* all parameters should be specified by the caller */
|
||||
typedef struct {
|
||||
BMK_benchFn_t benchFn; /* the function to benchmark, over the set of blocks */
|
||||
void* benchPayload; /* pass custom parameters to benchFn :
|
||||
* (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload) */
|
||||
BMK_initFn_t initFn; /* (*initFn)(initPayload) is run once per run, at the beginning. */
|
||||
void* initPayload; /* Both arguments can be NULL, in which case nothing is run. */
|
||||
BMK_errorFn_t errorFn; /* errorFn will check each return value of benchFn over each block, to determine if it failed or not.
|
||||
* errorFn can be NULL, in which case no check is performed.
|
||||
* errorFn must return 0 when benchFn was successful, and >= 1 if it detects an error.
|
||||
* Execution is stopped as soon as an error is detected.
|
||||
* the triggering return value can be retrieved using BMK_extract_errorResult(). */
|
||||
size_t blockCount; /* number of blocks to operate benchFn on.
|
||||
* It's also the size of all array parameters :
|
||||
* srcBuffers, srcSizes, dstBuffers, dstCapacities, blockResults */
|
||||
const void *const * srcBuffers; /* array of buffers to be operated on by benchFn */
|
||||
const size_t* srcSizes; /* array of the sizes of srcBuffers buffers */
|
||||
void *const * dstBuffers;/* array of buffers to be written into by benchFn */
|
||||
const size_t* dstCapacities; /* array of the capacities of dstBuffers buffers */
|
||||
size_t* blockResults; /* Optional: store the return value of benchFn for each block. Use NULL if this result is not requested. */
|
||||
} BMK_benchParams_t;
|
||||
|
||||
|
||||
/* BMK_benchFunction() :
|
||||
* This function benchmarks benchFn and initFn, providing a result.
|
||||
*
|
||||
* params : see description of BMK_benchParams_t above.
|
||||
* nbLoops: defines number of times benchFn is run over the full set of blocks.
|
||||
* Minimum value is 1. A 0 is interpreted as a 1.
|
||||
*
|
||||
* @return: can express either an error or a successful result.
|
||||
* Use BMK_isSuccessful_runOutcome() to check if benchmark was successful.
|
||||
* If yes, extract the result with BMK_extract_runTime(),
|
||||
* it will contain :
|
||||
* .sumOfReturn : the sum of all return values of benchFn through all of blocks
|
||||
* .nanoSecPerRun : time per run of benchFn + (time for initFn / nbLoops)
|
||||
* .sumOfReturn is generally intended for functions which return a # of bytes written into dstBuffer,
|
||||
* in which case, this value will be the total amount of bytes written into dstBuffer.
|
||||
*
|
||||
* blockResults : when provided (!= NULL), and when benchmark is successful,
|
||||
* params.blockResults contains all return values of `benchFn` over all blocks.
|
||||
* when provided (!= NULL), and when benchmark failed,
|
||||
* params.blockResults contains return values of `benchFn` over all blocks preceding and including the failed block.
|
||||
*/
|
||||
BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t params, unsigned nbLoops);
|
||||
|
||||
|
||||
|
||||
/* check first if the benchmark was successful or not */
|
||||
int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome);
|
||||
|
||||
/* If the benchmark was successful, extract the result.
|
||||
* note : this function will abort() program execution if benchmark failed !
|
||||
* always check if benchmark was successful first !
|
||||
*/
|
||||
BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome);
|
||||
|
||||
/* when benchmark failed, it means one invocation of `benchFn` failed.
|
||||
* The failure was detected by `errorFn`, operating on return values of `benchFn`.
|
||||
* Returns the faulty return value.
|
||||
* note : this function will abort() program execution if benchmark did not failed.
|
||||
* always check if benchmark failed first !
|
||||
*/
|
||||
size_t BMK_extract_errorResult(BMK_runOutcome_t outcome);
|
||||
|
||||
|
||||
|
||||
/* ==== Benchmark any function, returning intermediate results ==== */
|
||||
|
||||
/* state information tracking benchmark session */
|
||||
typedef struct BMK_timedFnState_s BMK_timedFnState_t;
|
||||
|
||||
/* BMK_benchTimedFn() :
|
||||
* Similar to BMK_benchFunction(), most arguments being identical.
|
||||
* Automatically determines `nbLoops` so that each result is regularly produced at interval of about run_ms.
|
||||
* Note : minimum `nbLoops` is 1, therefore a run may last more than run_ms, and possibly even more than total_ms.
|
||||
* Usage - initialize timedFnState, select benchmark duration (total_ms) and each measurement duration (run_ms)
|
||||
* call BMK_benchTimedFn() repetitively, each measurement is supposed to last about run_ms
|
||||
* Check if total time budget is spent or exceeded, using BMK_isCompleted_TimedFn()
|
||||
*/
|
||||
BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* timedFnState,
|
||||
BMK_benchParams_t params);
|
||||
|
||||
/* Tells if duration of all benchmark runs has exceeded total_ms
|
||||
*/
|
||||
int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState);
|
||||
|
||||
/* BMK_createTimedFnState() and BMK_resetTimedFnState() :
|
||||
* Create/Set BMK_timedFnState_t for next benchmark session,
|
||||
* which shall last a minimum of total_ms milliseconds,
|
||||
* producing intermediate results, paced at interval of (approximately) run_ms.
|
||||
*/
|
||||
BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms);
|
||||
void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms);
|
||||
void BMK_freeTimedFnState(BMK_timedFnState_t* state);
|
||||
|
||||
|
||||
|
||||
#endif /* BENCH_FN_H_23876 */
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
@ -9,7 +9,6 @@
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/* **************************************
|
||||
* Tuning parameters
|
||||
****************************************/
|
||||
@ -18,14 +17,6 @@
|
||||
#endif
|
||||
|
||||
|
||||
/* **************************************
|
||||
* Compiler Warnings
|
||||
****************************************/
|
||||
#ifdef _MSC_VER
|
||||
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
|
||||
#endif
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Includes
|
||||
***************************************/
|
||||
@ -36,12 +27,13 @@
|
||||
#include <stdio.h> /* fprintf, fopen */
|
||||
#include <assert.h> /* assert */
|
||||
|
||||
#include "benchfn.h"
|
||||
#include "mem.h"
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
#include "zstd.h"
|
||||
#include "datagen.h" /* RDG_genBuffer */
|
||||
#include "xxhash.h"
|
||||
#include "bench.h"
|
||||
#include "benchzstd.h"
|
||||
#include "zstd_errors.h"
|
||||
|
||||
|
||||
@ -276,219 +268,6 @@ static size_t local_defaultDecompress(
|
||||
}
|
||||
|
||||
|
||||
/*=== Benchmarking an arbitrary function ===*/
|
||||
|
||||
int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
|
||||
{
|
||||
return outcome.tag == 0;
|
||||
}
|
||||
|
||||
/* warning : this function will stop program execution if outcome is invalid !
|
||||
* check outcome validity first, using BMK_isValid_runResult() */
|
||||
BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
|
||||
{
|
||||
assert(outcome.tag == 0);
|
||||
return outcome.internal_never_use_directly;
|
||||
}
|
||||
|
||||
static BMK_runOutcome_t BMK_runOutcome_error(void)
|
||||
{
|
||||
BMK_runOutcome_t b;
|
||||
memset(&b, 0, sizeof(b));
|
||||
b.tag = 1;
|
||||
return b;
|
||||
}
|
||||
|
||||
static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime)
|
||||
{
|
||||
BMK_runOutcome_t outcome;
|
||||
outcome.tag = 0;
|
||||
outcome.internal_never_use_directly = runTime;
|
||||
return outcome;
|
||||
}
|
||||
|
||||
|
||||
/* initFn will be measured once, benchFn will be measured `nbLoops` times */
|
||||
/* initFn is optional, provide NULL if none */
|
||||
/* benchFn must return size_t field compliant with ZSTD_isError for error valuee */
|
||||
/* takes # of blocks and list of size & stuff for each. */
|
||||
/* can report result of benchFn for each block into blockResult. */
|
||||
/* blockResult is optional, provide NULL if this information is not required */
|
||||
/* note : time per loop could be zero if run time < timer resolution */
|
||||
BMK_runOutcome_t BMK_benchFunction(
|
||||
BMK_benchFn_t benchFn, void* benchPayload,
|
||||
BMK_initFn_t initFn, void* initPayload,
|
||||
size_t blockCount,
|
||||
const void* const * srcBlockBuffers, const size_t* srcBlockSizes,
|
||||
void* const * dstBlockBuffers, const size_t* dstBlockCapacities,
|
||||
size_t* blockResults,
|
||||
unsigned nbLoops)
|
||||
{
|
||||
size_t dstSize = 0;
|
||||
|
||||
if(!nbLoops) {
|
||||
RETURN_QUIET_ERROR(2, BMK_runOutcome_t, "nbLoops must be nonzero ");
|
||||
}
|
||||
|
||||
/* init */
|
||||
{ size_t i;
|
||||
for(i = 0; i < blockCount; i++) {
|
||||
memset(dstBlockBuffers[i], 0xE5, dstBlockCapacities[i]); /* warm up and erase result buffer */
|
||||
}
|
||||
#if 0
|
||||
/* based on testing these seem to lower accuracy of multiple calls of 1 nbLoops vs 1 call of multiple nbLoops
|
||||
* (Makes former slower)
|
||||
*/
|
||||
UTIL_sleepMilli(5); /* give processor time to other processes */
|
||||
UTIL_waitForNextTick();
|
||||
#endif
|
||||
}
|
||||
|
||||
/* benchmark */
|
||||
{ UTIL_time_t const clockStart = UTIL_getTime();
|
||||
unsigned loopNb, blockNb;
|
||||
if (initFn != NULL) initFn(initPayload);
|
||||
for (loopNb = 0; loopNb < nbLoops; loopNb++) {
|
||||
for (blockNb = 0; blockNb < blockCount; blockNb++) {
|
||||
size_t const res = benchFn(srcBlockBuffers[blockNb], srcBlockSizes[blockNb],
|
||||
dstBlockBuffers[blockNb], dstBlockCapacities[blockNb],
|
||||
benchPayload);
|
||||
if(ZSTD_isError(res)) {
|
||||
RETURN_QUIET_ERROR(2, BMK_runOutcome_t,
|
||||
"Function benchmark failed on block %u of size %u : %s",
|
||||
blockNb, (U32)dstBlockCapacities[blockNb], ZSTD_getErrorName(res));
|
||||
} else if (loopNb == 0) {
|
||||
dstSize += res;
|
||||
if (blockResults != NULL) blockResults[blockNb] = res;
|
||||
} }
|
||||
} /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
|
||||
|
||||
{ U64 const totalTime = UTIL_clockSpanNano(clockStart);
|
||||
BMK_runTime_t rt;
|
||||
rt.nanoSecPerRun = totalTime / nbLoops;
|
||||
rt.sumOfReturn = dstSize;
|
||||
return BMK_setValid_runTime(rt);
|
||||
} }
|
||||
}
|
||||
|
||||
|
||||
/* ==== Benchmarking any function, providing intermediate results ==== */
|
||||
|
||||
struct BMK_timedFnState_s {
|
||||
U64 timeSpent_ns;
|
||||
U64 timeBudget_ns;
|
||||
U64 runBudget_ns;
|
||||
BMK_runTime_t fastestRun;
|
||||
unsigned nbLoops;
|
||||
UTIL_time_t coolTime;
|
||||
}; /* typedef'd to BMK_timedFnState_t within bench.h */
|
||||
|
||||
BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
|
||||
{
|
||||
BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r));
|
||||
if (r == NULL) return NULL; /* malloc() error */
|
||||
BMK_resetTimedFnState(r, total_ms, run_ms);
|
||||
return r;
|
||||
}
|
||||
|
||||
void BMK_freeTimedFnState(BMK_timedFnState_t* state) {
|
||||
free(state);
|
||||
}
|
||||
|
||||
void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
|
||||
{
|
||||
if (!total_ms) total_ms = 1 ;
|
||||
if (!run_ms) run_ms = 1;
|
||||
if (run_ms > total_ms) run_ms = total_ms;
|
||||
timedFnState->timeSpent_ns = 0;
|
||||
timedFnState->timeBudget_ns = (U64)total_ms * TIMELOOP_NANOSEC / 1000;
|
||||
timedFnState->runBudget_ns = (U64)run_ms * TIMELOOP_NANOSEC / 1000;
|
||||
timedFnState->fastestRun.nanoSecPerRun = (U64)(-1LL);
|
||||
timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
|
||||
timedFnState->nbLoops = 1;
|
||||
timedFnState->coolTime = UTIL_getTime();
|
||||
}
|
||||
|
||||
/* Tells if nb of seconds set in timedFnState for all runs is spent.
|
||||
* note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */
|
||||
int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
|
||||
{
|
||||
return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns);
|
||||
}
|
||||
|
||||
|
||||
#define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */
|
||||
|
||||
BMK_runOutcome_t BMK_benchTimedFn(
|
||||
BMK_timedFnState_t* cont,
|
||||
BMK_benchFn_t benchFn, void* benchPayload,
|
||||
BMK_initFn_t initFn, void* initPayload,
|
||||
size_t blockCount,
|
||||
const void* const* srcBlockBuffers, const size_t* srcBlockSizes,
|
||||
void * const * dstBlockBuffers, const size_t * dstBlockCapacities,
|
||||
size_t* blockResults)
|
||||
{
|
||||
U64 const runBudget_ns = cont->runBudget_ns;
|
||||
U64 const runTimeMin_ns = runBudget_ns / 2;
|
||||
int completed = 0;
|
||||
BMK_runTime_t bestRunTime = cont->fastestRun;
|
||||
|
||||
while (!completed) {
|
||||
BMK_runOutcome_t runResult;
|
||||
|
||||
/* Overheat protection */
|
||||
if (UTIL_clockSpanMicro(cont->coolTime) > ACTIVEPERIOD_MICROSEC) {
|
||||
DEBUGOUTPUT("\rcooling down ... \r");
|
||||
UTIL_sleep(COOLPERIOD_SEC);
|
||||
cont->coolTime = UTIL_getTime();
|
||||
}
|
||||
|
||||
/* reinitialize capacity */
|
||||
runResult = BMK_benchFunction(benchFn, benchPayload,
|
||||
initFn, initPayload,
|
||||
blockCount,
|
||||
srcBlockBuffers, srcBlockSizes,
|
||||
dstBlockBuffers, dstBlockCapacities,
|
||||
blockResults,
|
||||
cont->nbLoops);
|
||||
|
||||
if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
|
||||
return BMK_runOutcome_error();
|
||||
}
|
||||
|
||||
{ BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
|
||||
U64 const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
|
||||
|
||||
cont->timeSpent_ns += loopDuration_ns;
|
||||
|
||||
/* estimate nbLoops for next run to last approximately 1 second */
|
||||
if (loopDuration_ns > (runBudget_ns / 50)) {
|
||||
U64 const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
|
||||
cont->nbLoops = (U32)(runBudget_ns / fastestRun_ns) + 1;
|
||||
} else {
|
||||
/* previous run was too short : blindly increase workload by x multiplier */
|
||||
const unsigned multiplier = 10;
|
||||
assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */
|
||||
cont->nbLoops *= multiplier;
|
||||
}
|
||||
|
||||
if(loopDuration_ns < runTimeMin_ns) {
|
||||
/* don't report results for which benchmark run time was too small : increased risks of rounding errors */
|
||||
assert(completed == 0);
|
||||
continue;
|
||||
} else {
|
||||
if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) {
|
||||
bestRunTime = newRunTime;
|
||||
}
|
||||
completed = 1;
|
||||
}
|
||||
}
|
||||
} /* while (!completed) */
|
||||
|
||||
return BMK_setValid_runTime(bestRunTime);
|
||||
}
|
||||
|
||||
|
||||
/* ================================================================= */
|
||||
/* Benchmark Zstandard, mem-to-mem scenarios */
|
||||
/* ================================================================= */
|
||||
@ -601,7 +380,7 @@ BMK_benchMemAdvancedNoAlloc(
|
||||
cPtr += cCapacities[nbBlocks];
|
||||
resPtr += thisBlockSize;
|
||||
remaining -= thisBlockSize;
|
||||
if (BMK_decodeOnly) {
|
||||
if (adv->mode == BMK_decodeOnly) {
|
||||
assert(nbBlocks==0);
|
||||
cSizes[nbBlocks] = thisBlockSize;
|
||||
benchResult.cSize = thisBlockSize;
|
||||
@ -624,14 +403,41 @@ BMK_benchMemAdvancedNoAlloc(
|
||||
U32 markNb = 0;
|
||||
int compressionCompleted = (adv->mode == BMK_decodeOnly);
|
||||
int decompressionCompleted = (adv->mode == BMK_compressOnly);
|
||||
BMK_benchParams_t cbp, dbp;
|
||||
BMK_initCCtxArgs cctxprep;
|
||||
BMK_initDCtxArgs dctxprep;
|
||||
|
||||
cbp.benchFn = local_defaultCompress;
|
||||
cbp.benchPayload = cctx;
|
||||
cbp.initFn = local_initCCtx;
|
||||
cbp.initPayload = &cctxprep;
|
||||
cbp.errorFn = ZSTD_isError;
|
||||
cbp.blockCount = nbBlocks;
|
||||
cbp.srcBuffers = srcPtrs;
|
||||
cbp.srcSizes = srcSizes;
|
||||
cbp.dstBuffers = cPtrs;
|
||||
cbp.dstCapacities = cCapacities;
|
||||
cbp.blockResults = cSizes;
|
||||
|
||||
cctxprep.cctx = cctx;
|
||||
cctxprep.dictBuffer = dictBuffer;
|
||||
cctxprep.dictBufferSize = dictBufferSize;
|
||||
cctxprep.cLevel = cLevel;
|
||||
cctxprep.comprParams = comprParams;
|
||||
cctxprep.adv = adv;
|
||||
|
||||
dbp.benchFn = local_defaultDecompress;
|
||||
dbp.benchPayload = dctx;
|
||||
dbp.initFn = local_initDCtx;
|
||||
dbp.initPayload = &dctxprep;
|
||||
dbp.errorFn = ZSTD_isError;
|
||||
dbp.blockCount = nbBlocks;
|
||||
dbp.srcBuffers = (const void* const *) cPtrs;
|
||||
dbp.srcSizes = cSizes;
|
||||
dbp.dstBuffers = resPtrs;
|
||||
dbp.dstCapacities = resSizes;
|
||||
dbp.blockResults = NULL;
|
||||
|
||||
dctxprep.dctx = dctx;
|
||||
dctxprep.dictBuffer = dictBuffer;
|
||||
dctxprep.dictBufferSize = dictBufferSize;
|
||||
@ -641,14 +447,7 @@ BMK_benchMemAdvancedNoAlloc(
|
||||
|
||||
while (!(compressionCompleted && decompressionCompleted)) {
|
||||
if (!compressionCompleted) {
|
||||
BMK_runOutcome_t const cOutcome =
|
||||
BMK_benchTimedFn( timeStateCompress,
|
||||
&local_defaultCompress, cctx,
|
||||
&local_initCCtx, &cctxprep,
|
||||
nbBlocks,
|
||||
srcPtrs, srcSizes,
|
||||
cPtrs, cCapacities,
|
||||
cSizes);
|
||||
BMK_runOutcome_t const cOutcome = BMK_benchTimedFn( timeStateCompress, cbp);
|
||||
|
||||
if (!BMK_isSuccessful_runOutcome(cOutcome)) {
|
||||
return BMK_benchOutcome_error();
|
||||
@ -675,14 +474,7 @@ BMK_benchMemAdvancedNoAlloc(
|
||||
}
|
||||
|
||||
if(!decompressionCompleted) {
|
||||
BMK_runOutcome_t const dOutcome =
|
||||
BMK_benchTimedFn(timeStateDecompress,
|
||||
&local_defaultDecompress, dctx,
|
||||
&local_initDCtx, &dctxprep,
|
||||
nbBlocks,
|
||||
(const void *const *)cPtrs, cSizes,
|
||||
resPtrs, resSizes,
|
||||
NULL);
|
||||
BMK_runOutcome_t const dOutcome = BMK_benchTimedFn(timeStateDecompress, dbp);
|
||||
|
||||
if(!BMK_isSuccessful_runOutcome(dOutcome)) {
|
||||
return BMK_benchOutcome_error();
|
@ -8,12 +8,18 @@
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
/* benchzstd :
|
||||
* benchmark Zstandard compression / decompression
|
||||
* over a set of files or buffers
|
||||
* and display progress result and final summary
|
||||
*/
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef BENCH_H_121279284357
|
||||
#define BENCH_H_121279284357
|
||||
#ifndef BENCH_ZSTD_H_3242387
|
||||
#define BENCH_ZSTD_H_3242387
|
||||
|
||||
/* === Dependencies === */
|
||||
#include <stddef.h> /* size_t */
|
||||
@ -181,6 +187,7 @@ BMK_benchOutcome_t BMK_benchMem(const void* srcBuffer, size_t srcSize,
|
||||
const void* dictBuffer, size_t dictBufferSize,
|
||||
int displayLevel, const char* displayName);
|
||||
|
||||
|
||||
/* BMK_benchMemAdvanced() : same as BMK_benchMem()
|
||||
* with following additional options :
|
||||
* dstBuffer - destination buffer to write compressed output in, NULL if none provided.
|
||||
@ -197,106 +204,8 @@ BMK_benchOutcome_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize,
|
||||
|
||||
|
||||
|
||||
/* ==== Benchmarking any function, iterated on a set of blocks ==== */
|
||||
|
||||
typedef struct {
|
||||
unsigned long long nanoSecPerRun; /* time per iteration */
|
||||
size_t sumOfReturn; /* sum of return values */
|
||||
} BMK_runTime_t;
|
||||
|
||||
VARIANT_ERROR_RESULT(BMK_runTime_t, BMK_runOutcome_t);
|
||||
|
||||
/* check first if the return structure represents an error or a valid result */
|
||||
int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome);
|
||||
|
||||
/* extract result from variant type.
|
||||
* note : this function will abort() program execution if result is not valid
|
||||
* check result validity first, by using BMK_isSuccessful_runOutcome()
|
||||
*/
|
||||
BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome);
|
||||
|
||||
|
||||
|
||||
typedef size_t (*BMK_benchFn_t)(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* customPayload);
|
||||
typedef size_t (*BMK_initFn_t)(void* initPayload);
|
||||
|
||||
|
||||
/* BMK_benchFunction() :
|
||||
* This function times the execution of 2 argument functions, benchFn and initFn */
|
||||
|
||||
/* benchFn - (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload)
|
||||
* is run nbLoops times
|
||||
* initFn - (*initFn)(initPayload) is run once per benchmark, at the beginning.
|
||||
* This argument can be NULL, in which case nothing is run.
|
||||
* blockCount - number of blocks. Size of all array parameters : srcBuffers, srcSizes, dstBuffers, dstCapacities, blockResults
|
||||
* srcBuffers - an array of buffers to be operated on by benchFn
|
||||
* srcSizes - an array of the sizes of above buffers
|
||||
* dstBuffers - an array of buffers to be written into by benchFn
|
||||
* dstCapacities - an array of the capacities of above buffers
|
||||
* blockResults - Optional: store the return value of benchFn for each block. Use NULL if this result is not requested.
|
||||
* nbLoops - defines number of times benchFn is run.
|
||||
* @return: a variant, which express either an error, or can generate a valid BMK_runTime_t result.
|
||||
* Use BMK_isSuccessful_runOutcome() to check if function was successful.
|
||||
* If yes, extract the result with BMK_extract_runTime(),
|
||||
* it will contain :
|
||||
* .sumOfReturn : the sum of all return values of benchFn through all of blocks
|
||||
* .nanoSecPerRun : time per run of benchFn + (time for initFn / nbLoops)
|
||||
* .sumOfReturn is generally intended for functions which return a # of bytes written into dstBuffer,
|
||||
* in which case, this value will be the total amount of bytes written into dstBuffer.
|
||||
*/
|
||||
BMK_runOutcome_t BMK_benchFunction(
|
||||
BMK_benchFn_t benchFn, void* benchPayload,
|
||||
BMK_initFn_t initFn, void* initPayload,
|
||||
size_t blockCount,
|
||||
const void *const * srcBuffers, const size_t* srcSizes,
|
||||
void *const * dstBuffers, const size_t* dstCapacities,
|
||||
size_t* blockResults,
|
||||
unsigned nbLoops);
|
||||
|
||||
|
||||
|
||||
/* ==== Benchmark any function, providing intermediate results ==== */
|
||||
|
||||
/* state information tracking benchmark session */
|
||||
typedef struct BMK_timedFnState_s BMK_timedFnState_t;
|
||||
|
||||
/* BMK_createTimedFnState() and BMK_resetTimedFnState() :
|
||||
* Create/Set BMK_timedFnState_t for next benchmark session,
|
||||
* which shall last a minimum of total_ms milliseconds,
|
||||
* producing intermediate results, paced at interval of (approximately) run_ms.
|
||||
*/
|
||||
BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms);
|
||||
void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms);
|
||||
void BMK_freeTimedFnState(BMK_timedFnState_t* state);
|
||||
|
||||
|
||||
/* Tells if duration of all benchmark runs has exceeded total_ms
|
||||
*/
|
||||
int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState);
|
||||
|
||||
|
||||
/* BMK_benchTimedFn() :
|
||||
* Similar to BMK_benchFunction(), most arguments being identical.
|
||||
* Automatically determines `nbLoops` so that each result is regularly produced at interval of about run_ms.
|
||||
* Note : minimum `nbLoops` is 1, therefore a run may last more than run_ms, and possibly even more than total_ms.
|
||||
* Usage - initialize timedFnState, select benchmark duration (total_ms) and each measurement duration (run_ms)
|
||||
* call BMK_benchTimedFn() repetitively, each measurement is supposed to last about run_ms
|
||||
* Check if total time budget is spent or exceeded, using BMK_isCompleted_TimedFn()
|
||||
*/
|
||||
BMK_runOutcome_t BMK_benchTimedFn(
|
||||
BMK_timedFnState_t* timedFnState,
|
||||
BMK_benchFn_t benchFn, void* benchPayload,
|
||||
BMK_initFn_t initFn, void* initPayload,
|
||||
size_t blockCount,
|
||||
const void *const * srcBlockBuffers, const size_t* srcBlockSizes,
|
||||
void *const * dstBlockBuffers, const size_t* dstBlockCapacities,
|
||||
size_t* blockResults);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif /* BENCH_H_121279284357 */
|
||||
#endif /* BENCH_ZSTD_H_3242387 */
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
@ -32,7 +32,7 @@
|
||||
#include <errno.h> /* errno */
|
||||
#include "fileio.h" /* stdinmark, stdoutmark, ZSTD_EXTENSION */
|
||||
#ifndef ZSTD_NOBENCH
|
||||
# include "bench.h" /* BMK_benchFiles */
|
||||
# include "benchzstd.h" /* BMK_benchFiles */
|
||||
#endif
|
||||
#ifndef ZSTD_NODICT
|
||||
# include "dibio.h" /* ZDICT_cover_params_t, DiB_trainFromFiles() */
|
||||
|
@ -132,18 +132,18 @@ fullbench fullbench32 : CPPFLAGS += $(MULTITHREAD_CPP)
|
||||
fullbench fullbench32 : LDFLAGS += $(MULTITHREAD_LD)
|
||||
fullbench fullbench32 : DEBUGFLAGS = -DNDEBUG # turn off assert() for speed measurements
|
||||
fullbench fullbench32 : $(ZSTD_FILES)
|
||||
fullbench fullbench32 : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/bench.c fullbench.c
|
||||
fullbench fullbench32 : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/benchfn.c fullbench.c
|
||||
$(CC) $(FLAGS) $^ -o $@$(EXT)
|
||||
|
||||
fullbench-lib : CPPFLAGS += -DXXH_NAMESPACE=ZSTD_
|
||||
fullbench-lib : zstd-staticLib
|
||||
fullbench-lib : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/bench.c fullbench.c
|
||||
fullbench-lib : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/benchfn.c fullbench.c
|
||||
$(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) $(ZSTDDIR)/libzstd.a
|
||||
|
||||
# note : broken : requires unavailable symbols
|
||||
fullbench-dll : zstd-dll
|
||||
fullbench-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd
|
||||
fullbench-dll: $(PRGDIR)/datagen.c $(PRGDIR)/util.c fullbench.c
|
||||
fullbench-dll: $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/benchfn.c fullbench.c
|
||||
# $(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) -DZSTD_DLL_IMPORT=1 $(ZSTDDIR)/dll/libzstd.dll
|
||||
$(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT)
|
||||
|
||||
@ -203,7 +203,7 @@ zstreamtest-dll : $(ZSTREAM_LOCAL_FILES)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT)
|
||||
|
||||
paramgrill : DEBUGFLAGS = # turn off assert() by default for speed measurements
|
||||
paramgrill : $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/bench.c $(PRGDIR)/datagen.c paramgrill.c
|
||||
paramgrill : $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/benchfn.c $(PRGDIR)/benchzstd.c $(PRGDIR)/datagen.c paramgrill.c
|
||||
$(CC) $(FLAGS) $^ -lm -o $@$(EXT)
|
||||
|
||||
datagen : $(PRGDIR)/datagen.c datagencli.c
|
||||
|
@ -30,7 +30,8 @@
|
||||
#include "zstd.h" /* ZSTD_versionString */
|
||||
#include "util.h" /* time functions */
|
||||
#include "datagen.h"
|
||||
#include "bench.h" /* CustomBench*/
|
||||
#include "benchfn.h" /* CustomBench*/
|
||||
#include "benchzstd.h" /* MB_UNIT */
|
||||
|
||||
|
||||
/*_************************************
|
||||
@ -514,20 +515,28 @@ static size_t benchMem(U32 benchNb,
|
||||
|
||||
/* benchmark loop */
|
||||
{ BMK_timedFnState_t* const tfs = BMK_createTimedFnState(g_nbIterations * 1000, 1000);
|
||||
void* const avoidStrictAliasingPtr = &dstBuff;
|
||||
BMK_benchParams_t bp;
|
||||
BMK_runTime_t bestResult;
|
||||
bestResult.sumOfReturn = 0;
|
||||
bestResult.nanoSecPerRun = (unsigned long long)(-1LL);
|
||||
assert(tfs != NULL);
|
||||
|
||||
bp.benchFn = benchFunction;
|
||||
bp.benchPayload = buff2;
|
||||
bp.initFn = NULL;
|
||||
bp.initPayload = NULL;
|
||||
bp.errorFn = ZSTD_isError;
|
||||
bp.blockCount = 1;
|
||||
bp.srcBuffers = &src;
|
||||
bp.srcSizes = &srcSize;
|
||||
bp.dstBuffers = (void* const*) avoidStrictAliasingPtr; /* circumvent strict aliasing warning on gcc-8,
|
||||
* because gcc considers that `void* const *` and `void**` are 2 different types */
|
||||
bp.dstCapacities = &dstBuffSize;
|
||||
bp.blockResults = NULL;
|
||||
|
||||
for (;;) {
|
||||
void* const dstBuffv = dstBuff;
|
||||
BMK_runOutcome_t const bOutcome =
|
||||
BMK_benchTimedFn( tfs,
|
||||
benchFunction, buff2,
|
||||
NULL, NULL, /* initFn */
|
||||
1, /* blockCount */
|
||||
&src, &srcSize,
|
||||
&dstBuffv, &dstBuffSize,
|
||||
NULL);
|
||||
BMK_runOutcome_t const bOutcome = BMK_benchTimedFn(tfs, bp);
|
||||
|
||||
if (!BMK_isSuccessful_runOutcome(bOutcome)) {
|
||||
DISPLAY("ERROR benchmarking function ! ! \n");
|
||||
|
@ -25,7 +25,8 @@
|
||||
#include "datagen.h"
|
||||
#include "xxhash.h"
|
||||
#include "util.h"
|
||||
#include "bench.h"
|
||||
#include "benchfn.h"
|
||||
#include "benchzstd.h"
|
||||
#include "zstd_errors.h"
|
||||
#include "zstd_internal.h" /* should not be needed */
|
||||
|
||||
@ -1397,7 +1398,8 @@ static void randomConstrainedParams(paramValues_t* pc, const memoTable_t* memoTa
|
||||
/* nbSeconds used in same way as in BMK_advancedParams_t */
|
||||
/* if in decodeOnly, then srcPtr's will be compressed blocks, and uncompressedBlocks will be written to dstPtrs */
|
||||
/* dictionary nullable, nothing else though. */
|
||||
/* note : it would be better if this function was in bench.c, sharing code with benchMemAdvanced(), since it's technically a part of it */
|
||||
/* note : it would be a lot better if this function was present in benchzstd.c,
|
||||
* sharing code with benchMemAdvanced(), since it's technically a part of it */
|
||||
static BMK_benchOutcome_t
|
||||
BMK_benchMemInvertible( buffers_t buf, contexts_t ctx,
|
||||
int cLevel, const paramValues_t* comprParams,
|
||||
@ -1438,13 +1440,40 @@ BMK_benchMemInvertible( buffers_t buf, contexts_t ctx,
|
||||
int decompressionCompleted = (mode == BMK_compressOnly);
|
||||
BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(nbSeconds * 1000, 1000);
|
||||
BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(nbSeconds * 1000, 1000);
|
||||
BMK_benchParams_t cbp, dbp;
|
||||
BMK_initCCtxArgs cctxprep;
|
||||
BMK_initDCtxArgs dctxprep;
|
||||
|
||||
cbp.benchFn = local_defaultCompress;
|
||||
cbp.benchPayload = cctx;
|
||||
cbp.initFn = local_initCCtx;
|
||||
cbp.initPayload = &cctxprep;
|
||||
cbp.errorFn = ZSTD_isError;
|
||||
cbp.blockCount = nbBlocks;
|
||||
cbp.srcBuffers = srcPtrs;
|
||||
cbp.srcSizes = srcSizes;
|
||||
cbp.dstBuffers = dstPtrs;
|
||||
cbp.dstCapacities = dstCapacities;
|
||||
cbp.blockResults = dstSizes;
|
||||
|
||||
cctxprep.cctx = cctx;
|
||||
cctxprep.dictBuffer = dictBuffer;
|
||||
cctxprep.dictBufferSize = dictBufferSize;
|
||||
cctxprep.cLevel = cLevel;
|
||||
cctxprep.comprParams = comprParams;
|
||||
|
||||
dbp.benchFn = local_defaultDecompress;
|
||||
dbp.benchPayload = dctx;
|
||||
dbp.initFn = local_initDCtx;
|
||||
dbp.initPayload = &dctxprep;
|
||||
dbp.errorFn = ZSTD_isError;
|
||||
dbp.blockCount = nbBlocks;
|
||||
dbp.srcBuffers = (const void* const *) dstPtrs;
|
||||
dbp.srcSizes = dstCapacities;
|
||||
dbp.dstBuffers = resPtrs;
|
||||
dbp.dstCapacities = resSizes;
|
||||
dbp.blockResults = NULL;
|
||||
|
||||
dctxprep.dctx = dctx;
|
||||
dctxprep.dictBuffer = dictBuffer;
|
||||
dctxprep.dictBufferSize = dictBufferSize;
|
||||
@ -1452,13 +1481,7 @@ BMK_benchMemInvertible( buffers_t buf, contexts_t ctx,
|
||||
assert(timeStateCompress != NULL);
|
||||
assert(timeStateDecompress != NULL);
|
||||
while(!compressionCompleted) {
|
||||
BMK_runOutcome_t const cOutcome = BMK_benchTimedFn(timeStateCompress,
|
||||
&local_defaultCompress, cctx,
|
||||
&local_initCCtx, &cctxprep,
|
||||
nbBlocks,
|
||||
srcPtrs, srcSizes,
|
||||
dstPtrs, dstCapacities,
|
||||
dstSizes);
|
||||
BMK_runOutcome_t const cOutcome = BMK_benchTimedFn(timeStateCompress, cbp);
|
||||
|
||||
if (!BMK_isSuccessful_runOutcome(cOutcome)) {
|
||||
BMK_benchOutcome_t bOut;
|
||||
@ -1476,13 +1499,7 @@ BMK_benchMemInvertible( buffers_t buf, contexts_t ctx,
|
||||
}
|
||||
|
||||
while (!decompressionCompleted) {
|
||||
BMK_runOutcome_t const dOutcome = BMK_benchTimedFn(timeStateDecompress,
|
||||
&local_defaultDecompress, dctx,
|
||||
&local_initDCtx, &dctxprep,
|
||||
nbBlocks,
|
||||
(const void* const*)dstPtrs, dstSizes,
|
||||
resPtrs, resSizes,
|
||||
NULL);
|
||||
BMK_runOutcome_t const dOutcome = BMK_benchTimedFn(timeStateDecompress, dbp);
|
||||
|
||||
if (!BMK_isSuccessful_runOutcome(dOutcome)) {
|
||||
BMK_benchOutcome_t bOut;
|
||||
|
Loading…
x
Reference in New Issue
Block a user