mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-26 19:01:44 +02:00
Merge commit '8bc67ec2c0d2b5444d51a1bed1d50f0e10d92717'
* commit '8bc67ec2c0d2b5444d51a1bed1d50f0e10d92717': Checkasm: assembly testing and benchmarking tool Merged-by: Michael Niedermayer <michael@niedermayer.cc>
This commit is contained in:
commit
f14fc55969
1
.gitignore
vendored
1
.gitignore
vendored
@ -63,6 +63,7 @@
|
||||
/libavutil/ffversion.h
|
||||
/tests/audiogen
|
||||
/tests/base64
|
||||
/tests/checkasm/checkasm
|
||||
/tests/data/
|
||||
/tests/pixfmts.mak
|
||||
/tests/rotozoom
|
||||
|
@ -230,5 +230,7 @@ testclean::
|
||||
|
||||
-include $(wildcard tests/*.d)
|
||||
|
||||
include $(SRC_PATH)/tests/checkasm/Makefile
|
||||
|
||||
.PHONY: fate* lcov lcov-reset
|
||||
.INTERMEDIATE: coverage.info
|
||||
|
33
tests/checkasm/Makefile
Normal file
33
tests/checkasm/Makefile
Normal file
@ -0,0 +1,33 @@
|
||||
# libavcodec tests
|
||||
AVCODECOBJS-$(CONFIG_H264PRED) += h264pred.o
|
||||
|
||||
CHECKASMOBJS-$(CONFIG_AVCODEC) += $(AVCODECOBJS-yes)
|
||||
|
||||
|
||||
-include $(SRC_PATH)/tests/checkasm/$(ARCH)/Makefile
|
||||
|
||||
CHECKASMOBJS += $(CHECKASMOBJS-yes) checkasm.o
|
||||
CHECKASMOBJS := $(sort $(CHECKASMOBJS:%=tests/checkasm/%))
|
||||
|
||||
-include $(CHECKASMOBJS:.o=.d)
|
||||
|
||||
CHECKASMDIRS := $(sort $(dir $(CHECKASMOBJS)))
|
||||
$(CHECKASMOBJS): | $(CHECKASMDIRS)
|
||||
OBJDIRS += $(CHECKASMDIRS)
|
||||
|
||||
# We rely on function pointers intentionally declared without specified argument types.
|
||||
tests/checkasm/%.o: CFLAGS := $(CFLAGS:-Wstrict-prototypes=-Wno-strict-prototypes)
|
||||
|
||||
CHECKASM := tests/checkasm/checkasm$(EXESUF)
|
||||
|
||||
$(CHECKASM): $(EXEOBJS) $(CHECKASMOBJS) $(FF_DEP_LIBS)
|
||||
$(LD) $(LDFLAGS) $(LDEXEFLAGS) $(LD_O) $(CHECKASMOBJS) $(FF_EXTRALIBS)
|
||||
|
||||
checkasm: $(CHECKASM)
|
||||
|
||||
clean:: checkasmclean
|
||||
|
||||
checkasmclean:
|
||||
$(RM) $(CHECKASM) $(CLEANSUFFIXES:%=tests/checkasm/%) $(CLEANSUFFIXES:%=tests/checkasm/$(ARCH)/%)
|
||||
|
||||
.PHONY: checkasm
|
484
tests/checkasm/checkasm.c
Normal file
484
tests/checkasm/checkasm.c
Normal file
@ -0,0 +1,484 @@
|
||||
/*
|
||||
* Assembly testing and benchmarking tool
|
||||
* Copyright (c) 2015 Henrik Gramner
|
||||
* Copyright (c) 2008 Loren Merritt
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "checkasm.h"
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/random_seed.h"
|
||||
|
||||
#if ARCH_X86
|
||||
#include "libavutil/x86/cpu.h"
|
||||
#endif
|
||||
|
||||
#if HAVE_SETCONSOLETEXTATTRIBUTE
|
||||
#include <windows.h>
|
||||
#define COLOR_RED FOREGROUND_RED
|
||||
#define COLOR_GREEN FOREGROUND_GREEN
|
||||
#define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN)
|
||||
#else
|
||||
#define COLOR_RED 1
|
||||
#define COLOR_GREEN 2
|
||||
#define COLOR_YELLOW 3
|
||||
#endif
|
||||
|
||||
#if HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#if !HAVE_ISATTY
|
||||
#define isatty(fd) 1
|
||||
#endif
|
||||
|
||||
/* List of tests to invoke */
|
||||
static void (* const tests[])(void) = {
|
||||
#if CONFIG_H264PRED
|
||||
checkasm_check_h264pred,
|
||||
#endif
|
||||
NULL
|
||||
};
|
||||
|
||||
/* List of cpu flags to check */
|
||||
static const struct {
|
||||
const char *name;
|
||||
const char *suffix;
|
||||
int flag;
|
||||
} cpus[] = {
|
||||
#if ARCH_X86
|
||||
{ "MMX", "mmx", AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV },
|
||||
{ "MMXEXT", "mmxext", AV_CPU_FLAG_MMXEXT },
|
||||
{ "3DNOW", "3dnow", AV_CPU_FLAG_3DNOW },
|
||||
{ "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT },
|
||||
{ "SSE", "sse", AV_CPU_FLAG_SSE },
|
||||
{ "SSE2", "sse2", AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW },
|
||||
{ "SSE3", "sse3", AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW },
|
||||
{ "SSSE3", "ssse3", AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM },
|
||||
{ "SSE4.1", "sse4", AV_CPU_FLAG_SSE4 },
|
||||
{ "SSE4.2", "sse42", AV_CPU_FLAG_SSE42 },
|
||||
{ "AVX", "avx", AV_CPU_FLAG_AVX },
|
||||
{ "XOP", "xop", AV_CPU_FLAG_XOP },
|
||||
{ "FMA3", "fma3", AV_CPU_FLAG_FMA3 },
|
||||
{ "FMA4", "fma4", AV_CPU_FLAG_FMA4 },
|
||||
{ "AVX2", "avx2", AV_CPU_FLAG_AVX2 },
|
||||
#endif
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
typedef struct CheckasmFuncVersion {
|
||||
struct CheckasmFuncVersion *next;
|
||||
intptr_t (*func)();
|
||||
int ok;
|
||||
int cpu;
|
||||
int iterations;
|
||||
uint64_t cycles;
|
||||
} CheckasmFuncVersion;
|
||||
|
||||
/* Binary search tree node */
|
||||
typedef struct CheckasmFunc {
|
||||
struct CheckasmFunc *child[2];
|
||||
CheckasmFuncVersion versions;
|
||||
char name[1];
|
||||
} CheckasmFunc;
|
||||
|
||||
/* Internal state */
|
||||
static struct {
|
||||
CheckasmFunc *funcs;
|
||||
CheckasmFunc *current_func;
|
||||
CheckasmFuncVersion *current_func_ver;
|
||||
const char *bench_pattern;
|
||||
int bench_pattern_len;
|
||||
int num_checked;
|
||||
int num_failed;
|
||||
int nop_time;
|
||||
int cpu_flag;
|
||||
const char *cpu_flag_name;
|
||||
} state;
|
||||
|
||||
/* PRNG state */
|
||||
AVLFG checkasm_lfg;
|
||||
|
||||
/* Print colored text to stderr if the terminal supports it */
|
||||
static void color_printf(int color, const char *fmt, ...)
|
||||
{
|
||||
static int use_color = -1;
|
||||
va_list arg;
|
||||
|
||||
#if HAVE_SETCONSOLETEXTATTRIBUTE
|
||||
static HANDLE con;
|
||||
static WORD org_attributes;
|
||||
|
||||
if (use_color < 0) {
|
||||
CONSOLE_SCREEN_BUFFER_INFO con_info;
|
||||
con = GetStdHandle(STD_ERROR_HANDLE);
|
||||
if (con && con != INVALID_HANDLE_VALUE && GetConsoleScreenBufferInfo(con, &con_info)) {
|
||||
org_attributes = con_info.wAttributes;
|
||||
use_color = 1;
|
||||
} else
|
||||
use_color = 0;
|
||||
}
|
||||
if (use_color)
|
||||
SetConsoleTextAttribute(con, (org_attributes & 0xfff0) | (color & 0x0f));
|
||||
#else
|
||||
if (use_color < 0) {
|
||||
const char *term = getenv("TERM");
|
||||
use_color = term && strcmp(term, "dumb") && isatty(2);
|
||||
}
|
||||
if (use_color)
|
||||
fprintf(stderr, "\x1b[%d;3%dm", (color & 0x08) >> 3, color & 0x07);
|
||||
#endif
|
||||
|
||||
va_start(arg, fmt);
|
||||
vfprintf(stderr, fmt, arg);
|
||||
va_end(arg);
|
||||
|
||||
if (use_color) {
|
||||
#if HAVE_SETCONSOLETEXTATTRIBUTE
|
||||
SetConsoleTextAttribute(con, org_attributes);
|
||||
#else
|
||||
fprintf(stderr, "\x1b[0m");
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/* Deallocate a tree */
|
||||
static void destroy_func_tree(CheckasmFunc *f)
|
||||
{
|
||||
if (f) {
|
||||
CheckasmFuncVersion *v = f->versions.next;
|
||||
while (v) {
|
||||
CheckasmFuncVersion *next = v->next;
|
||||
free(v);
|
||||
v = next;
|
||||
}
|
||||
|
||||
destroy_func_tree(f->child[0]);
|
||||
destroy_func_tree(f->child[1]);
|
||||
free(f);
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate a zero-initialized block, clean up and exit on failure */
|
||||
static void *checkasm_malloc(size_t size)
|
||||
{
|
||||
void *ptr = calloc(1, size);
|
||||
if (!ptr) {
|
||||
fprintf(stderr, "checkasm: malloc failed\n");
|
||||
destroy_func_tree(state.funcs);
|
||||
exit(1);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/* Get the suffix of the specified cpu flag */
|
||||
static const char *cpu_suffix(int cpu)
|
||||
{
|
||||
int i = FF_ARRAY_ELEMS(cpus);
|
||||
|
||||
while (--i >= 0)
|
||||
if (cpu & cpus[i].flag)
|
||||
return cpus[i].suffix;
|
||||
|
||||
return "c";
|
||||
}
|
||||
|
||||
#ifdef AV_READ_TIME
|
||||
static int cmp_nop(const void *a, const void *b)
|
||||
{
|
||||
return *(const uint16_t*)a - *(const uint16_t*)b;
|
||||
}
|
||||
|
||||
/* Measure the overhead of the timing code (in decicycles) */
|
||||
static int measure_nop_time(void)
|
||||
{
|
||||
uint16_t nops[10000];
|
||||
int i, nop_sum = 0;
|
||||
|
||||
for (i = 0; i < 10000; i++) {
|
||||
uint64_t t = AV_READ_TIME();
|
||||
nops[i] = AV_READ_TIME() - t;
|
||||
}
|
||||
|
||||
qsort(nops, 10000, sizeof(uint16_t), cmp_nop);
|
||||
for (i = 2500; i < 7500; i++)
|
||||
nop_sum += nops[i];
|
||||
|
||||
return nop_sum / 500;
|
||||
}
|
||||
|
||||
/* Print benchmark results */
|
||||
static void print_benchs(CheckasmFunc *f)
|
||||
{
|
||||
if (f) {
|
||||
print_benchs(f->child[0]);
|
||||
|
||||
/* Only print functions with at least one assembly version */
|
||||
if (f->versions.cpu || f->versions.next) {
|
||||
CheckasmFuncVersion *v = &f->versions;
|
||||
do {
|
||||
if (v->iterations) {
|
||||
int decicycles = (10*v->cycles/v->iterations - state.nop_time) / 4;
|
||||
printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu), decicycles/10, decicycles%10);
|
||||
}
|
||||
} while ((v = v->next));
|
||||
}
|
||||
|
||||
print_benchs(f->child[1]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* ASCIIbetical sort except preserving natural order for numbers */
|
||||
static int cmp_func_names(const char *a, const char *b)
|
||||
{
|
||||
int ascii_diff, digit_diff;
|
||||
|
||||
for (; !(ascii_diff = *a - *b) && *a; a++, b++);
|
||||
for (; av_isdigit(*a) && av_isdigit(*b); a++, b++);
|
||||
|
||||
return (digit_diff = av_isdigit(*a) - av_isdigit(*b)) ? digit_diff : ascii_diff;
|
||||
}
|
||||
|
||||
/* Get a node with the specified name, creating it if it doesn't exist */
|
||||
static CheckasmFunc *get_func(const char *name, int length)
|
||||
{
|
||||
CheckasmFunc *f, **f_ptr = &state.funcs;
|
||||
|
||||
/* Search the tree for a matching node */
|
||||
while ((f = *f_ptr)) {
|
||||
int cmp = cmp_func_names(name, f->name);
|
||||
if (!cmp)
|
||||
return f;
|
||||
|
||||
f_ptr = &f->child[(cmp > 0)];
|
||||
}
|
||||
|
||||
/* Allocate and insert a new node into the tree */
|
||||
f = *f_ptr = checkasm_malloc(sizeof(CheckasmFunc) + length);
|
||||
memcpy(f->name, name, length+1);
|
||||
|
||||
return f;
|
||||
}
|
||||
|
||||
/* Perform tests and benchmarks for the specified cpu flag if supported by the host */
|
||||
static void check_cpu_flag(const char *name, int flag)
|
||||
{
|
||||
int old_cpu_flag = state.cpu_flag;
|
||||
|
||||
flag |= old_cpu_flag;
|
||||
av_set_cpu_flags_mask(flag);
|
||||
state.cpu_flag = av_get_cpu_flags();
|
||||
|
||||
if (!flag || state.cpu_flag != old_cpu_flag) {
|
||||
int i;
|
||||
|
||||
state.cpu_flag_name = name;
|
||||
for (i = 0; tests[i]; i++)
|
||||
tests[i]();
|
||||
}
|
||||
}
|
||||
|
||||
/* Print the name of the current CPU flag, but only do it once */
|
||||
static void print_cpu_name(void)
|
||||
{
|
||||
if (state.cpu_flag_name) {
|
||||
color_printf(COLOR_YELLOW, "%s:\n", state.cpu_flag_name);
|
||||
state.cpu_flag_name = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, seed, ret = 0;
|
||||
|
||||
if (!tests[0] || !cpus[0].flag) {
|
||||
fprintf(stderr, "checkasm: no tests to perform\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (argc > 1 && !strncmp(argv[1], "--bench", 7)) {
|
||||
#ifndef AV_READ_TIME
|
||||
fprintf(stderr, "checkasm: --bench is not supported on your system\n");
|
||||
return 1;
|
||||
#endif
|
||||
if (argv[1][7] == '=') {
|
||||
state.bench_pattern = argv[1] + 8;
|
||||
state.bench_pattern_len = strlen(state.bench_pattern);
|
||||
} else
|
||||
state.bench_pattern = "";
|
||||
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
|
||||
seed = (argc > 1) ? atoi(argv[1]) : av_get_random_seed();
|
||||
fprintf(stderr, "checkasm: using random seed %u\n", seed);
|
||||
av_lfg_init(&checkasm_lfg, seed);
|
||||
|
||||
check_cpu_flag(NULL, 0);
|
||||
for (i = 0; cpus[i].flag; i++)
|
||||
check_cpu_flag(cpus[i].name, cpus[i].flag);
|
||||
|
||||
if (state.num_failed) {
|
||||
fprintf(stderr, "checkasm: %d of %d tests have failed\n", state.num_failed, state.num_checked);
|
||||
ret = 1;
|
||||
} else {
|
||||
fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked);
|
||||
#ifdef AV_READ_TIME
|
||||
if (state.bench_pattern) {
|
||||
state.nop_time = measure_nop_time();
|
||||
printf("nop: %d.%d\n", state.nop_time/10, state.nop_time%10);
|
||||
print_benchs(state.funcs);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
destroy_func_tree(state.funcs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Decide whether or not the specified function needs to be tested and
|
||||
* allocate/initialize data structures if needed. Returns a pointer to a
|
||||
* reference function if the function should be tested, otherwise NULL */
|
||||
intptr_t (*checkasm_check_func(intptr_t (*func)(), const char *name, ...))()
|
||||
{
|
||||
char name_buf[256];
|
||||
intptr_t (*ref)() = func;
|
||||
CheckasmFuncVersion *v;
|
||||
int name_length;
|
||||
va_list arg;
|
||||
|
||||
va_start(arg, name);
|
||||
name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg);
|
||||
va_end(arg);
|
||||
|
||||
if (!func || name_length <= 0 || name_length >= sizeof(name_buf))
|
||||
return NULL;
|
||||
|
||||
state.current_func = get_func(name_buf, name_length);
|
||||
v = &state.current_func->versions;
|
||||
|
||||
if (v->func) {
|
||||
CheckasmFuncVersion *prev;
|
||||
do {
|
||||
/* Only test functions that haven't already been tested */
|
||||
if (v->func == func)
|
||||
return NULL;
|
||||
|
||||
if (v->ok)
|
||||
ref = v->func;
|
||||
|
||||
prev = v;
|
||||
} while ((v = v->next));
|
||||
|
||||
v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion));
|
||||
}
|
||||
|
||||
v->func = func;
|
||||
v->ok = 1;
|
||||
v->cpu = state.cpu_flag;
|
||||
state.current_func_ver = v;
|
||||
|
||||
if (state.cpu_flag)
|
||||
state.num_checked++;
|
||||
|
||||
return ref;
|
||||
}
|
||||
|
||||
/* Decide whether or not the current function needs to be benchmarked */
|
||||
int checkasm_bench_func(void)
|
||||
{
|
||||
return !state.num_failed && state.bench_pattern &&
|
||||
!strncmp(state.current_func->name, state.bench_pattern, state.bench_pattern_len);
|
||||
}
|
||||
|
||||
/* Indicate that the current test has failed */
|
||||
void checkasm_fail_func(const char *msg, ...)
|
||||
{
|
||||
if (state.current_func_ver->cpu && state.current_func_ver->ok) {
|
||||
va_list arg;
|
||||
|
||||
print_cpu_name();
|
||||
fprintf(stderr, " %s_%s (", state.current_func->name, cpu_suffix(state.current_func_ver->cpu));
|
||||
va_start(arg, msg);
|
||||
vfprintf(stderr, msg, arg);
|
||||
va_end(arg);
|
||||
fprintf(stderr, ")\n");
|
||||
|
||||
state.current_func_ver->ok = 0;
|
||||
state.num_failed++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Update benchmark results of the current function */
|
||||
void checkasm_update_bench(int iterations, uint64_t cycles)
|
||||
{
|
||||
state.current_func_ver->iterations += iterations;
|
||||
state.current_func_ver->cycles += cycles;
|
||||
}
|
||||
|
||||
/* Print the outcome of all tests performed since the last time this function was called */
|
||||
void checkasm_report(const char *name, ...)
|
||||
{
|
||||
static int prev_checked, prev_failed, max_length;
|
||||
|
||||
if (state.num_checked > prev_checked) {
|
||||
print_cpu_name();
|
||||
|
||||
if (*name) {
|
||||
int pad_length = max_length;
|
||||
va_list arg;
|
||||
|
||||
fprintf(stderr, " - ");
|
||||
va_start(arg, name);
|
||||
pad_length -= vfprintf(stderr, name, arg);
|
||||
va_end(arg);
|
||||
fprintf(stderr, "%*c", FFMAX(pad_length, 0) + 2, '[');
|
||||
} else
|
||||
fprintf(stderr, " - %-*s [", max_length, state.current_func->name);
|
||||
|
||||
if (state.num_failed == prev_failed)
|
||||
color_printf(COLOR_GREEN, "OK");
|
||||
else
|
||||
color_printf(COLOR_RED, "FAILED");
|
||||
fprintf(stderr, "]\n");
|
||||
|
||||
prev_checked = state.num_checked;
|
||||
prev_failed = state.num_failed;
|
||||
} else if (!state.cpu_flag) {
|
||||
int length;
|
||||
|
||||
/* Calculate the amount of padding required to make the output vertically aligned */
|
||||
if (*name) {
|
||||
va_list arg;
|
||||
va_start(arg, name);
|
||||
length = vsnprintf(NULL, 0, name, arg);
|
||||
va_end(arg);
|
||||
} else
|
||||
length = strlen(state.current_func->name);
|
||||
|
||||
if (length > max_length)
|
||||
max_length = length;
|
||||
}
|
||||
}
|
115
tests/checkasm/checkasm.h
Normal file
115
tests/checkasm/checkasm.h
Normal file
@ -0,0 +1,115 @@
|
||||
/*
|
||||
* Assembly testing and benchmarking tool
|
||||
* Copyright (c) 2015 Henrik Gramner
|
||||
* Copyright (c) 2008 Loren Merritt
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#ifndef CHECKASM_H
|
||||
#define CHECKASM_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "config.h"
|
||||
#include "libavutil/avstring.h"
|
||||
#include "libavutil/lfg.h"
|
||||
#include "libavutil/timer.h"
|
||||
|
||||
void checkasm_check_h264pred(void);
|
||||
|
||||
intptr_t (*checkasm_check_func(intptr_t (*func)(), const char *name, ...))() av_printf_format(2, 3);
|
||||
int checkasm_bench_func(void);
|
||||
void checkasm_fail_func(const char *msg, ...) av_printf_format(1, 2);
|
||||
void checkasm_update_bench(int iterations, uint64_t cycles);
|
||||
void checkasm_report(const char *name, ...) av_printf_format(1, 2);
|
||||
|
||||
extern AVLFG checkasm_lfg;
|
||||
#define rnd() av_lfg_get(&checkasm_lfg)
|
||||
|
||||
static av_unused intptr_t (*func_ref)();
|
||||
static av_unused intptr_t (*func_new)();
|
||||
|
||||
#define BENCH_RUNS 1000 /* Trade-off between accuracy and speed */
|
||||
|
||||
/* Decide whether or not the specified function needs to be tested */
|
||||
#define check_func(func, ...) ((func_new = (intptr_t (*)())func) &&\
|
||||
(func_ref = checkasm_check_func(func_new, __VA_ARGS__)))
|
||||
|
||||
/* Indicate that the current test has failed */
|
||||
#define fail() checkasm_fail_func("%s:%d", av_basename(__FILE__), __LINE__)
|
||||
|
||||
/* Print the test outcome */
|
||||
#define report(...) checkasm_report("" __VA_ARGS__)
|
||||
|
||||
/* Call the reference function */
|
||||
#define call_ref(...) func_ref(__VA_ARGS__)
|
||||
|
||||
#if ARCH_X86 && HAVE_YASM
|
||||
/* Verifies that clobbered callee-saved registers are properly saved and restored */
|
||||
intptr_t checkasm_checked_call(intptr_t (*func)(), ...);
|
||||
#endif
|
||||
|
||||
/* Call the function */
|
||||
#if ARCH_X86_64 && HAVE_YASM
|
||||
/* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
|
||||
* This is done by clobbering the stack with junk around the stack pointer and calling the
|
||||
* assembly function through x264_checkasm_call with added dummy arguments which forces all
|
||||
* real arguments to be passed on the stack and not in registers. For 32-bit arguments the
|
||||
* upper half of the 64-bit register locations on the stack will now contain junk which will
|
||||
* cause misbehaving functions to either produce incorrect output or segfault. Note that
|
||||
* even though this works extremely well in practice, it's technically not guaranteed
|
||||
* and false negatives is theoretically possible, but there can never be any false positives.
|
||||
*/
|
||||
void checkasm_stack_clobber(uint64_t clobber, ...);
|
||||
#define CLOB (UINT64_C(0xdeadbeefdeadbeef))
|
||||
#define call_new(...) (checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,\
|
||||
CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB),\
|
||||
checkasm_checked_call(func_new, 0, 0, 0, 0, 0, __VA_ARGS__))
|
||||
#elif ARCH_X86_32 && HAVE_YASM
|
||||
#define call_new(...) checkasm_checked_call(func_new, __VA_ARGS__)
|
||||
#else
|
||||
#define call_new(...) func_new(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
/* Benchmark the function */
|
||||
#ifdef AV_READ_TIME
|
||||
#define bench_new(...)\
|
||||
do {\
|
||||
if (checkasm_bench_func()) {\
|
||||
intptr_t (*tfunc)() = func_new;\
|
||||
uint64_t tsum = 0;\
|
||||
int ti, tcount = 0;\
|
||||
for (ti = 0; ti < BENCH_RUNS; ti++) {\
|
||||
uint64_t t = AV_READ_TIME();\
|
||||
tfunc(__VA_ARGS__);\
|
||||
tfunc(__VA_ARGS__);\
|
||||
tfunc(__VA_ARGS__);\
|
||||
tfunc(__VA_ARGS__);\
|
||||
t = AV_READ_TIME() - t;\
|
||||
if (t*tcount <= tsum*4 && ti > 0) {\
|
||||
tsum += t;\
|
||||
tcount++;\
|
||||
}\
|
||||
}\
|
||||
checkasm_update_bench(tcount, tsum);\
|
||||
}\
|
||||
} while (0)
|
||||
#else
|
||||
#define bench_new(...)
|
||||
#endif
|
||||
|
||||
#endif
|
252
tests/checkasm/h264pred.c
Normal file
252
tests/checkasm/h264pred.c
Normal file
@ -0,0 +1,252 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Henrik Gramner
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "checkasm.h"
|
||||
#include "libavcodec/avcodec.h"
|
||||
#include "libavcodec/h264pred.h"
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
|
||||
static const int codec_ids[4] = { AV_CODEC_ID_H264, AV_CODEC_ID_VP8, AV_CODEC_ID_RV40, AV_CODEC_ID_SVQ3 };
|
||||
|
||||
static const char * const pred4x4_modes[4][15] = {
|
||||
{ /* H264 */
|
||||
[VERT_PRED ] = "vertical",
|
||||
[HOR_PRED ] = "horizontal",
|
||||
[DC_PRED ] = "dc",
|
||||
[DIAG_DOWN_LEFT_PRED ] = "down_left",
|
||||
[DIAG_DOWN_RIGHT_PRED] = "down_right",
|
||||
[VERT_RIGHT_PRED ] = "vertical_right",
|
||||
[HOR_DOWN_PRED ] = "horizontal_right",
|
||||
[VERT_LEFT_PRED ] = "vertical_left",
|
||||
[HOR_UP_PRED ] = "horizontal_up",
|
||||
[LEFT_DC_PRED ] = "left_dc",
|
||||
[TOP_DC_PRED ] = "top_dc",
|
||||
[DC_128_PRED ] = "dc_128",
|
||||
},
|
||||
{ /* VP8 */
|
||||
[VERT_PRED ] = "vertical_vp8",
|
||||
[HOR_PRED ] = "horizontal_vp8",
|
||||
[VERT_LEFT_PRED] = "vertical_left_vp8",
|
||||
[TM_VP8_PRED ] = "tm_vp8",
|
||||
[DC_127_PRED ] = "dc_127_vp8",
|
||||
[DC_129_PRED ] = "dc_129_vp8",
|
||||
},
|
||||
{ /* RV40 */
|
||||
[DIAG_DOWN_LEFT_PRED ] = "down_left_rv40",
|
||||
[VERT_LEFT_PRED ] = "vertical_left_rv40",
|
||||
[HOR_UP_PRED ] = "horizontal_up_rv40",
|
||||
[DIAG_DOWN_LEFT_PRED_RV40_NODOWN] = "down_left_nodown_rv40",
|
||||
[HOR_UP_PRED_RV40_NODOWN ] = "horizontal_up_nodown_rv40",
|
||||
[VERT_LEFT_PRED_RV40_NODOWN ] = "vertical_left_nodown_rv40",
|
||||
},
|
||||
{ /* SVQ3 */
|
||||
[DIAG_DOWN_LEFT_PRED] = "down_left_svq3",
|
||||
},
|
||||
};
|
||||
|
||||
static const char * const pred8x8_modes[4][11] = {
|
||||
{ /* H264 */
|
||||
[DC_PRED8x8 ] = "dc",
|
||||
[HOR_PRED8x8 ] = "horizontal",
|
||||
[VERT_PRED8x8 ] = "vertical",
|
||||
[PLANE_PRED8x8 ] = "plane",
|
||||
[LEFT_DC_PRED8x8 ] = "left_dc",
|
||||
[TOP_DC_PRED8x8 ] = "top_dc",
|
||||
[DC_128_PRED8x8 ] = "dc_128",
|
||||
[ALZHEIMER_DC_L0T_PRED8x8] = "mad_cow_dc_l0t",
|
||||
[ALZHEIMER_DC_0LT_PRED8x8] = "mad_cow_dc_0lt",
|
||||
[ALZHEIMER_DC_L00_PRED8x8] = "mad_cow_dc_l00",
|
||||
[ALZHEIMER_DC_0L0_PRED8x8] = "mad_cow_dc_0l0",
|
||||
},
|
||||
{ /* VP8 */
|
||||
[PLANE_PRED8x8 ] = "tm_vp8",
|
||||
[DC_127_PRED8x8] = "dc_127_vp8",
|
||||
[DC_129_PRED8x8] = "dc_129_vp8",
|
||||
},
|
||||
{ /* RV40 */
|
||||
[DC_PRED8x8 ] = "dc_rv40",
|
||||
[LEFT_DC_PRED8x8] = "left_dc_rv40",
|
||||
[TOP_DC_PRED8x8 ] = "top_dc_rv40",
|
||||
},
|
||||
{ /* SVQ3 */
|
||||
},
|
||||
};
|
||||
|
||||
static const char * const pred16x16_modes[4][9] = {
|
||||
{ /* H264 */
|
||||
[DC_PRED8x8 ] = "dc",
|
||||
[HOR_PRED8x8 ] = "horizontal",
|
||||
[VERT_PRED8x8 ] = "vertical",
|
||||
[PLANE_PRED8x8 ] = "plane",
|
||||
[LEFT_DC_PRED8x8] = "left_dc",
|
||||
[TOP_DC_PRED8x8 ] = "top_dc",
|
||||
[DC_128_PRED8x8 ] = "dc_128",
|
||||
},
|
||||
{ /* VP8 */
|
||||
[PLANE_PRED8x8 ] = "tm_vp8",
|
||||
[DC_127_PRED8x8] = "dc_127_vp8",
|
||||
[DC_129_PRED8x8] = "dc_129_vp8",
|
||||
},
|
||||
{ /* RV40 */
|
||||
[PLANE_PRED8x8] = "plane_rv40",
|
||||
},
|
||||
{ /* SVQ3 */
|
||||
[PLANE_PRED8x8] = "plane_svq3",
|
||||
},
|
||||
};
|
||||
|
||||
static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff };
|
||||
|
||||
#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
|
||||
#define BUF_SIZE (3*16*17)
|
||||
|
||||
#define check_pred_func(func, name, mode_name)\
|
||||
(mode_name && ((codec_ids[codec] == AV_CODEC_ID_H264) ?\
|
||||
check_func(func, "pred%s_%s_%d", name, mode_name, bit_depth) :\
|
||||
check_func(func, "pred%s_%s", name, mode_name)))
|
||||
|
||||
#define randomize_buffers()\
|
||||
do {\
|
||||
uint32_t mask = pixel_mask[bit_depth-8];\
|
||||
int i;\
|
||||
for (i = 0; i < BUF_SIZE; i += 4) {\
|
||||
uint32_t r = rnd() & mask;\
|
||||
AV_WN32A(buf0+i, r);\
|
||||
AV_WN32A(buf1+i, r);\
|
||||
}\
|
||||
} while (0)
|
||||
|
||||
#define src0 (buf0 + 4*16) /* Offset to allow room for top and left */
|
||||
#define src1 (buf1 + 4*16)
|
||||
|
||||
static void check_pred4x4(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
|
||||
int codec, int chroma_format, int bit_depth)
|
||||
{
|
||||
if (chroma_format == 1) {
|
||||
uint8_t *topright = buf0 + 2*16;
|
||||
int pred_mode;
|
||||
for (pred_mode = 0; pred_mode < 15; pred_mode++) {
|
||||
if (check_pred_func(h->pred4x4[pred_mode], "4x4", pred4x4_modes[codec][pred_mode])) {
|
||||
randomize_buffers();
|
||||
call_ref(src0, topright, (ptrdiff_t)12*SIZEOF_PIXEL);
|
||||
call_new(src1, topright, (ptrdiff_t)12*SIZEOF_PIXEL);
|
||||
if (memcmp(buf0, buf1, BUF_SIZE))
|
||||
fail();
|
||||
bench_new(src1, topright, (ptrdiff_t)12*SIZEOF_PIXEL);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void check_pred8x8(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
|
||||
int codec, int chroma_format, int bit_depth)
|
||||
{
|
||||
int pred_mode;
|
||||
for (pred_mode = 0; pred_mode < 11; pred_mode++) {
|
||||
if (check_pred_func(h->pred8x8[pred_mode], (chroma_format == 2) ? "8x16" : "8x8",
|
||||
pred8x8_modes[codec][pred_mode])) {
|
||||
randomize_buffers();
|
||||
call_ref(src0, (ptrdiff_t)24*SIZEOF_PIXEL);
|
||||
call_new(src1, (ptrdiff_t)24*SIZEOF_PIXEL);
|
||||
if (memcmp(buf0, buf1, BUF_SIZE))
|
||||
fail();
|
||||
bench_new(src1, (ptrdiff_t)24*SIZEOF_PIXEL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void check_pred16x16(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
|
||||
int codec, int chroma_format, int bit_depth)
|
||||
{
|
||||
if (chroma_format == 1) {
|
||||
int pred_mode;
|
||||
for (pred_mode = 0; pred_mode < 9; pred_mode++) {
|
||||
if (check_pred_func(h->pred16x16[pred_mode], "16x16", pred16x16_modes[codec][pred_mode])) {
|
||||
randomize_buffers();
|
||||
call_ref(src0, (ptrdiff_t)48);
|
||||
call_new(src1, (ptrdiff_t)48);
|
||||
if (memcmp(buf0, buf1, BUF_SIZE))
|
||||
fail();
|
||||
bench_new(src1, (ptrdiff_t)48);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void check_pred8x8l(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
|
||||
int codec, int chroma_format, int bit_depth)
|
||||
{
|
||||
if (chroma_format == 1 && codec_ids[codec] == AV_CODEC_ID_H264) {
|
||||
int pred_mode;
|
||||
for (pred_mode = 0; pred_mode < 12; pred_mode++) {
|
||||
if (check_pred_func(h->pred8x8l[pred_mode], "8x8l", pred4x4_modes[codec][pred_mode])) {
|
||||
int neighbors;
|
||||
for (neighbors = 0; neighbors <= 0xc000; neighbors += 0x4000) {
|
||||
int has_topleft = neighbors & 0x8000;
|
||||
int has_topright = neighbors & 0x4000;
|
||||
|
||||
if ((pred_mode == DIAG_DOWN_RIGHT_PRED || pred_mode == VERT_RIGHT_PRED) && !has_topleft)
|
||||
continue; /* Those aren't allowed according to the spec */
|
||||
|
||||
randomize_buffers();
|
||||
call_ref(src0, has_topleft, has_topright, (ptrdiff_t)24*SIZEOF_PIXEL);
|
||||
call_new(src1, has_topleft, has_topright, (ptrdiff_t)24*SIZEOF_PIXEL);
|
||||
if (memcmp(buf0, buf1, BUF_SIZE))
|
||||
fail();
|
||||
bench_new(src1, has_topleft, has_topright, (ptrdiff_t)24*SIZEOF_PIXEL);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: Add tests for H.264 lossless H/V prediction */
|
||||
|
||||
void checkasm_check_h264pred(void)
|
||||
{
|
||||
static const struct {
|
||||
void (*func)(H264PredContext*, uint8_t*, uint8_t*, int, int, int);
|
||||
const char *name;
|
||||
} tests[] = {
|
||||
{ check_pred4x4, "pred4x4" },
|
||||
{ check_pred8x8, "pred8x8" },
|
||||
{ check_pred16x16, "pred16x16" },
|
||||
{ check_pred8x8l, "pred8x8l" },
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, uint8_t, buf0)[BUF_SIZE];
|
||||
DECLARE_ALIGNED(16, uint8_t, buf1)[BUF_SIZE];
|
||||
H264PredContext h;
|
||||
int test, codec, chroma_format, bit_depth;
|
||||
|
||||
for (test = 0; test < FF_ARRAY_ELEMS(tests); test++) {
|
||||
for (codec = 0; codec < 4; codec++) {
|
||||
int codec_id = codec_ids[codec];
|
||||
for (bit_depth = 8; bit_depth <= (codec_id == AV_CODEC_ID_H264 ? 10 : 8); bit_depth++)
|
||||
for (chroma_format = 1; chroma_format <= (codec_id == AV_CODEC_ID_H264 ? 2 : 1); chroma_format++) {
|
||||
ff_h264_pred_init(&h, codec_id, bit_depth, chroma_format);
|
||||
tests[test].func(&h, buf0, buf1, codec, chroma_format, bit_depth);
|
||||
}
|
||||
}
|
||||
report("%s", tests[test].name);
|
||||
}
|
||||
}
|
6
tests/checkasm/x86/Makefile
Normal file
6
tests/checkasm/x86/Makefile
Normal file
@ -0,0 +1,6 @@
|
||||
CHECKASMOBJS-$(HAVE_YASM) += x86/checkasm.o
|
||||
|
||||
tests/checkasm/x86/%.o: tests/checkasm/x86/%.asm
|
||||
$(DEPYASM) $(YASMFLAGS) -I $(<D)/ -M -o $@ $< > $(@:.o=.d)
|
||||
$(YASM) $(YASMFLAGS) -I $(<D)/ -o $@ $<
|
||||
-$(STRIP) $(STRIPFLAGS) $@
|
193
tests/checkasm/x86/checkasm.asm
Normal file
193
tests/checkasm/x86/checkasm.asm
Normal file
@ -0,0 +1,193 @@
|
||||
;*****************************************************************************
|
||||
;* Assembly testing and benchmarking tool
|
||||
;* Copyright (c) 2008 Loren Merritt
|
||||
;* Copyright (c) 2012 Henrik Gramner
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
;* FFmpeg is free software; you can redistribute it and/or modify
|
||||
;* it under the terms of the GNU General Public License as published by
|
||||
;* the Free Software Foundation; either version 2 of the License, or
|
||||
;* (at your option) any later version.
|
||||
;*
|
||||
;* FFmpeg is distributed in the hope that it will be useful,
|
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;* GNU General Public License for more details.
|
||||
;*
|
||||
;* You should have received a copy of the GNU General Public License
|
||||
;* along with this program; if not, write to the Free Software
|
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
;*****************************************************************************
|
||||
|
||||
%define private_prefix checkasm
|
||||
%include "libavutil/x86/x86inc.asm"
|
||||
|
||||
SECTION_RODATA
|
||||
|
||||
error_message: db "failed to preserve register", 0
|
||||
|
||||
%if ARCH_X86_64
|
||||
; just random numbers to reduce the chance of incidental match
|
||||
ALIGN 16
|
||||
x6: dq 0x1a1b2550a612b48c,0x79445c159ce79064
|
||||
x7: dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636
|
||||
x8: dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e
|
||||
x9: dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f
|
||||
x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9
|
||||
x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d
|
||||
x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b
|
||||
x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786
|
||||
x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef
|
||||
x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5
|
||||
n7: dq 0x21f86d66c8ca00ce
|
||||
n8: dq 0x75b6ba21077c48ad
|
||||
n9: dq 0xed56bb2dcb3c7736
|
||||
n10: dq 0x8bda43d3fd1a7e06
|
||||
n11: dq 0xb64a9c9e5d318408
|
||||
n12: dq 0xdf9a54b303f1d3a3
|
||||
n13: dq 0x4a75479abd64e097
|
||||
n14: dq 0x249214109d5d1c88
|
||||
%endif
|
||||
|
||||
SECTION .text
|
||||
|
||||
cextern fail_func
|
||||
|
||||
; max number of args used by any asm function.
|
||||
; (max_args % 4) must equal 3 for stack alignment
|
||||
%define max_args 15
|
||||
|
||||
%if ARCH_X86_64
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; int checkasm_stack_clobber(uint64_t clobber, ...)
|
||||
;-----------------------------------------------------------------------------
|
||||
cglobal stack_clobber, 1,2
|
||||
; Clobber the stack with junk below the stack pointer
|
||||
%define size (max_args+6)*8
|
||||
SUB rsp, size
|
||||
mov r1, size-8
|
||||
.loop:
|
||||
mov [rsp+r1], r0
|
||||
sub r1, 8
|
||||
jge .loop
|
||||
ADD rsp, size
|
||||
RET
|
||||
|
||||
%if WIN64
|
||||
%assign free_regs 7
|
||||
%else
|
||||
%assign free_regs 9
|
||||
%endif
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; intptr_t checkasm_checked_call(intptr_t (*func)(), ...)
|
||||
;-----------------------------------------------------------------------------
|
||||
INIT_XMM
|
||||
cglobal checked_call, 2,15,16,max_args*8+8
|
||||
mov r6, r0
|
||||
|
||||
; All arguments have been pushed on the stack instead of registers in order to
|
||||
; test for incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
|
||||
mov r0, r6mp
|
||||
mov r1, r7mp
|
||||
mov r2, r8mp
|
||||
mov r3, r9mp
|
||||
%if UNIX64
|
||||
mov r4, r10mp
|
||||
mov r5, r11mp
|
||||
%assign i 6
|
||||
%rep max_args-6
|
||||
mov r9, [rsp+stack_offset+(i+1)*8]
|
||||
mov [rsp+(i-6)*8], r9
|
||||
%assign i i+1
|
||||
%endrep
|
||||
%else
|
||||
%assign i 4
|
||||
%rep max_args-4
|
||||
mov r9, [rsp+stack_offset+(i+7)*8]
|
||||
mov [rsp+i*8], r9
|
||||
%assign i i+1
|
||||
%endrep
|
||||
%endif
|
||||
|
||||
%if WIN64
|
||||
%assign i 6
|
||||
%rep 16-6
|
||||
mova m %+ i, [x %+ i]
|
||||
%assign i i+1
|
||||
%endrep
|
||||
%endif
|
||||
|
||||
%assign i 14
|
||||
%rep 15-free_regs
|
||||
mov r %+ i, [n %+ i]
|
||||
%assign i i-1
|
||||
%endrep
|
||||
call r6
|
||||
%assign i 14
|
||||
%rep 15-free_regs
|
||||
xor r %+ i, [n %+ i]
|
||||
or r14, r %+ i
|
||||
%assign i i-1
|
||||
%endrep
|
||||
|
||||
%if WIN64
|
||||
%assign i 6
|
||||
%rep 16-6
|
||||
pxor m %+ i, [x %+ i]
|
||||
por m6, m %+ i
|
||||
%assign i i+1
|
||||
%endrep
|
||||
packsswb m6, m6
|
||||
movq r5, m6
|
||||
or r14, r5
|
||||
%endif
|
||||
|
||||
jz .ok
|
||||
mov r9, rax
|
||||
lea r0, [error_message]
|
||||
call fail_func
|
||||
mov rax, r9
|
||||
.ok:
|
||||
RET
|
||||
|
||||
%else
|
||||
|
||||
; just random numbers to reduce the chance of incidental match
|
||||
%define n3 dword 0x6549315c
|
||||
%define n4 dword 0xe02f3e23
|
||||
%define n5 dword 0xb78d0d1d
|
||||
%define n6 dword 0x33627ba7
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; intptr_t checkasm_checked_call(intptr_t (*func)(), ...)
|
||||
;-----------------------------------------------------------------------------
|
||||
cglobal checked_call, 1,7
|
||||
mov r3, n3
|
||||
mov r4, n4
|
||||
mov r5, n5
|
||||
mov r6, n6
|
||||
%rep max_args
|
||||
PUSH dword [esp+20+max_args*4]
|
||||
%endrep
|
||||
call r0
|
||||
xor r3, n3
|
||||
xor r4, n4
|
||||
xor r5, n5
|
||||
xor r6, n6
|
||||
or r3, r4
|
||||
or r5, r6
|
||||
or r3, r5
|
||||
jz .ok
|
||||
mov r3, eax
|
||||
lea r0, [error_message]
|
||||
mov [esp], r0
|
||||
call fail_func
|
||||
mov eax, r3
|
||||
.ok:
|
||||
add esp, max_args*4
|
||||
REP_RET
|
||||
|
||||
%endif ; ARCH_X86_64
|
Loading…
Reference in New Issue
Block a user