mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
avfilter,swresample,swscale: use fabs, fabsf instead of FFABS
It is well known that fabs and fabsf are at least as fast and sometimes faster than the FFABS macro, at least on the gcc+glibc combination. For instance, see the reference: http://patchwork.sourceware.org/patch/6735/. This was a patch to glibc in order to remove their usages of a macro. The reason essentially boils down to fabs using the __builtin_fabs of the compiler, while FFABS needs to infer to not use a branch and to simply change the sign bit. Usually the inference works, but sometimes it does not. This may be easily checked by looking at the asm. This also has the added benefit of reducing macro usage, which has problems with side-effects. Note that avcodec is not handled here, as it is huge and most things there are integer arithmetic anyway. Tested with FATE. Reviewed-by: Clément Bœsch <u@pkh.me> Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
This commit is contained in:
parent
dde8e5ad02
commit
8507b98c10
@ -146,7 +146,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
|
|||||||
double *buffer = s->buffer;
|
double *buffer = s->buffer;
|
||||||
AVFrame *out;
|
AVFrame *out;
|
||||||
double *dst;
|
double *dst;
|
||||||
int nbuf = inlink->sample_rate * (FFABS(delay) / 1000.);
|
int nbuf = inlink->sample_rate * (fabs(delay) / 1000.);
|
||||||
int n;
|
int n;
|
||||||
|
|
||||||
nbuf -= nbuf % 2;
|
nbuf -= nbuf % 2;
|
||||||
|
@ -220,7 +220,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
|
|||||||
cx = sx * sqrtf(1 - 0.5*sy*sy);
|
cx = sx * sqrtf(1 - 0.5*sy*sy);
|
||||||
cy = sy * sqrtf(1 - 0.5*sx*sx);
|
cy = sy * sqrtf(1 - 0.5*sx*sx);
|
||||||
x = hw + hw * FFSIGN(cx + cy) * (cx - cy) * .7;
|
x = hw + hw * FFSIGN(cx + cy) * (cx - cy) * .7;
|
||||||
y = s->h - s->h * FFABS(cx + cy) * .7;
|
y = s->h - s->h * fabsf(cx + cy) * .7;
|
||||||
}
|
}
|
||||||
|
|
||||||
draw_dot(s, x, y);
|
draw_dot(s, x, y);
|
||||||
@ -244,7 +244,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
|
|||||||
cx = sx * sqrtf(1 - 0.5 * sy * sy);
|
cx = sx * sqrtf(1 - 0.5 * sy * sy);
|
||||||
cy = sy * sqrtf(1 - 0.5 * sx * sx);
|
cy = sy * sqrtf(1 - 0.5 * sx * sx);
|
||||||
x = hw + hw * FFSIGN(cx + cy) * (cx - cy) * .7;
|
x = hw + hw * FFSIGN(cx + cy) * (cx - cy) * .7;
|
||||||
y = s->h - s->h * FFABS(cx + cy) * .7;
|
y = s->h - s->h * fabsf(cx + cy) * .7;
|
||||||
}
|
}
|
||||||
|
|
||||||
draw_dot(s, x, y);
|
draw_dot(s, x, y);
|
||||||
|
@ -371,7 +371,7 @@ static int config_output(AVFilterLink *outlink)
|
|||||||
tlength = s->timeclamp;
|
tlength = s->timeclamp;
|
||||||
}
|
}
|
||||||
|
|
||||||
volume = FFABS(av_expr_eval(volume_expr, expr_vars_val, NULL));
|
volume = fabs(av_expr_eval(volume_expr, expr_vars_val, NULL));
|
||||||
if (isnan(volume)) {
|
if (isnan(volume)) {
|
||||||
av_log(ctx, AV_LOG_WARNING, "at freq %g: volume is nan, setting it to 0\n", freq);
|
av_log(ctx, AV_LOG_WARNING, "at freq %g: volume is nan, setting it to 0\n", freq);
|
||||||
volume = VOLUME_MIN;
|
volume = VOLUME_MIN;
|
||||||
|
@ -163,7 +163,7 @@ static void generate_window_func(float *lut, int N, int win_func, float *overlap
|
|||||||
break;
|
break;
|
||||||
case WFUNC_BARTLETT:
|
case WFUNC_BARTLETT:
|
||||||
for (n = 0; n < N; n++)
|
for (n = 0; n < N; n++)
|
||||||
lut[n] = 1.-FFABS((n-(N-1)/2.)/((N-1)/2.));
|
lut[n] = 1.-fabs((n-(N-1)/2.)/((N-1)/2.));
|
||||||
*overlap = 0.5;
|
*overlap = 0.5;
|
||||||
break;
|
break;
|
||||||
case WFUNC_HANNING:
|
case WFUNC_HANNING:
|
||||||
@ -207,7 +207,7 @@ static void generate_window_func(float *lut, int N, int win_func, float *overlap
|
|||||||
break;
|
break;
|
||||||
case WFUNC_BHANN:
|
case WFUNC_BHANN:
|
||||||
for (n = 0; n < N; n++)
|
for (n = 0; n < N; n++)
|
||||||
lut[n] = 0.62-0.48*FFABS(n/(double)(N-1)-.5)-0.38*cos(2*M_PI*n/(N-1));
|
lut[n] = 0.62-0.48*fabs(n/(double)(N-1)-.5)-0.38*cos(2*M_PI*n/(N-1));
|
||||||
*overlap = 0.5;
|
*overlap = 0.5;
|
||||||
break;
|
break;
|
||||||
case WFUNC_SINE:
|
case WFUNC_SINE:
|
||||||
|
@ -558,9 +558,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
|
|||||||
ebur128->true_peaks_per_frame[ch] = 0.0;
|
ebur128->true_peaks_per_frame[ch] = 0.0;
|
||||||
for (idx_insample = 0; idx_insample < ret; idx_insample++) {
|
for (idx_insample = 0; idx_insample < ret; idx_insample++) {
|
||||||
for (ch = 0; ch < nb_channels; ch++) {
|
for (ch = 0; ch < nb_channels; ch++) {
|
||||||
ebur128->true_peaks[ch] = FFMAX(ebur128->true_peaks[ch], FFABS(*swr_samples));
|
ebur128->true_peaks[ch] = FFMAX(ebur128->true_peaks[ch], fabs(*swr_samples));
|
||||||
ebur128->true_peaks_per_frame[ch] = FFMAX(ebur128->true_peaks_per_frame[ch],
|
ebur128->true_peaks_per_frame[ch] = FFMAX(ebur128->true_peaks_per_frame[ch],
|
||||||
FFABS(*swr_samples));
|
fabs(*swr_samples));
|
||||||
swr_samples++;
|
swr_samples++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -586,7 +586,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
|
|||||||
double bin;
|
double bin;
|
||||||
|
|
||||||
if (ebur128->peak_mode & PEAK_MODE_SAMPLES_PEAKS)
|
if (ebur128->peak_mode & PEAK_MODE_SAMPLES_PEAKS)
|
||||||
ebur128->sample_peaks[ch] = FFMAX(ebur128->sample_peaks[ch], FFABS(*samples));
|
ebur128->sample_peaks[ch] = FFMAX(ebur128->sample_peaks[ch], fabs(*samples));
|
||||||
|
|
||||||
ebur128->x[ch * 3] = *samples++; // set X[i]
|
ebur128->x[ch * 3] = *samples++; // set X[i]
|
||||||
|
|
||||||
|
@ -241,7 +241,7 @@ DEFINE_BLEND8(lighten, FFMAX(A, B))
|
|||||||
DEFINE_BLEND8(divide, av_clip_uint8(((float)A / ((float)B) * 255)))
|
DEFINE_BLEND8(divide, av_clip_uint8(((float)A / ((float)B) * 255)))
|
||||||
DEFINE_BLEND8(dodge, DODGE(A, B))
|
DEFINE_BLEND8(dodge, DODGE(A, B))
|
||||||
DEFINE_BLEND8(burn, BURN(A, B))
|
DEFINE_BLEND8(burn, BURN(A, B))
|
||||||
DEFINE_BLEND8(softlight, (A > 127) ? B + (255 - B) * (A - 127.5) / 127.5 * (0.5 - FFABS(B - 127.5) / 255): B - B * ((127.5 - A) / 127.5) * (0.5 - FFABS(B - 127.5)/255))
|
DEFINE_BLEND8(softlight, (A > 127) ? B + (255 - B) * (A - 127.5) / 127.5 * (0.5 - fabs(B - 127.5) / 255): B - B * ((127.5 - A) / 127.5) * (0.5 - fabs(B - 127.5)/255))
|
||||||
DEFINE_BLEND8(exclusion, A + B - 2 * A * B / 255)
|
DEFINE_BLEND8(exclusion, A + B - 2 * A * B / 255)
|
||||||
DEFINE_BLEND8(pinlight, (B < 128) ? FFMIN(A, 2 * B) : FFMAX(A, 2 * (B - 128)))
|
DEFINE_BLEND8(pinlight, (B < 128) ? FFMIN(A, 2 * B) : FFMAX(A, 2 * (B - 128)))
|
||||||
DEFINE_BLEND8(phoenix, FFMIN(A, B) - FFMAX(A, B) + 255)
|
DEFINE_BLEND8(phoenix, FFMIN(A, B) - FFMAX(A, B) + 255)
|
||||||
@ -280,7 +280,7 @@ DEFINE_BLEND16(lighten, FFMAX(A, B))
|
|||||||
DEFINE_BLEND16(divide, av_clip_uint16(((float)A / ((float)B) * 65535)))
|
DEFINE_BLEND16(divide, av_clip_uint16(((float)A / ((float)B) * 65535)))
|
||||||
DEFINE_BLEND16(dodge, DODGE(A, B))
|
DEFINE_BLEND16(dodge, DODGE(A, B))
|
||||||
DEFINE_BLEND16(burn, BURN(A, B))
|
DEFINE_BLEND16(burn, BURN(A, B))
|
||||||
DEFINE_BLEND16(softlight, (A > 32767) ? B + (65535 - B) * (A - 32767.5) / 32767.5 * (0.5 - FFABS(B - 32767.5) / 65535): B - B * ((32767.5 - A) / 32767.5) * (0.5 - FFABS(B - 32767.5)/65535))
|
DEFINE_BLEND16(softlight, (A > 32767) ? B + (65535 - B) * (A - 32767.5) / 32767.5 * (0.5 - fabs(B - 32767.5) / 65535): B - B * ((32767.5 - A) / 32767.5) * (0.5 - fabs(B - 32767.5)/65535))
|
||||||
DEFINE_BLEND16(exclusion, A + B - 2 * A * B / 65535)
|
DEFINE_BLEND16(exclusion, A + B - 2 * A * B / 65535)
|
||||||
DEFINE_BLEND16(pinlight, (B < 32768) ? FFMIN(A, 2 * B) : FFMAX(A, 2 * (B - 32768)))
|
DEFINE_BLEND16(pinlight, (B < 32768) ? FFMIN(A, 2 * B) : FFMAX(A, 2 * (B - 32768)))
|
||||||
DEFINE_BLEND16(phoenix, FFMIN(A, B) - FFMAX(A, B) + 65535)
|
DEFINE_BLEND16(phoenix, FFMIN(A, B) - FFMAX(A, B) + 65535)
|
||||||
|
@ -367,10 +367,10 @@ static av_always_inline void filter_freq_##bsize(const float *src, int src_lines
|
|||||||
float *b = &tmp_block2[i]; \
|
float *b = &tmp_block2[i]; \
|
||||||
/* frequency filtering */ \
|
/* frequency filtering */ \
|
||||||
if (expr) { \
|
if (expr) { \
|
||||||
var_values[VAR_C] = FFABS(*b); \
|
var_values[VAR_C] = fabsf(*b); \
|
||||||
*b *= av_expr_eval(expr, var_values, NULL); \
|
*b *= av_expr_eval(expr, var_values, NULL); \
|
||||||
} else { \
|
} else { \
|
||||||
if (FFABS(*b) < sigma_th) \
|
if (fabsf(*b) < sigma_th) \
|
||||||
*b = 0; \
|
*b = 0; \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
|
@ -223,7 +223,7 @@ static int blend_frames16(AVFilterContext *ctx, float interpolate,
|
|||||||
}
|
}
|
||||||
// decide if the shot-change detection allows us to blend two frames
|
// decide if the shot-change detection allows us to blend two frames
|
||||||
if (interpolate_scene_score < s->scene_score && copy_src2) {
|
if (interpolate_scene_score < s->scene_score && copy_src2) {
|
||||||
uint16_t src2_factor = FFABS(interpolate) * (1 << (s->bitdepth - 8));
|
uint16_t src2_factor = fabsf(interpolate) * (1 << (s->bitdepth - 8));
|
||||||
uint16_t src1_factor = s->max - src2_factor;
|
uint16_t src1_factor = s->max - src2_factor;
|
||||||
const int half = s->max / 2;
|
const int half = s->max / 2;
|
||||||
const int uv = (s->max + 1) * half;
|
const int uv = (s->max + 1) * half;
|
||||||
@ -287,7 +287,7 @@ static int blend_frames8(AVFilterContext *ctx, float interpolate,
|
|||||||
}
|
}
|
||||||
// decide if the shot-change detection allows us to blend two frames
|
// decide if the shot-change detection allows us to blend two frames
|
||||||
if (interpolate_scene_score < s->scene_score && copy_src2) {
|
if (interpolate_scene_score < s->scene_score && copy_src2) {
|
||||||
uint16_t src2_factor = FFABS(interpolate);
|
uint16_t src2_factor = fabsf(interpolate);
|
||||||
uint16_t src1_factor = 256 - src2_factor;
|
uint16_t src1_factor = 256 - src2_factor;
|
||||||
int plane, line, pixel;
|
int plane, line, pixel;
|
||||||
|
|
||||||
|
@ -182,7 +182,7 @@ static int16_t *precalc_coefs(double dist25, int depth)
|
|||||||
|
|
||||||
for (i = -256<<LUT_BITS; i < 256<<LUT_BITS; i++) {
|
for (i = -256<<LUT_BITS; i < 256<<LUT_BITS; i++) {
|
||||||
double f = ((i<<(9-LUT_BITS)) + (1<<(8-LUT_BITS)) - 1) / 512.0; // midpoint of the bin
|
double f = ((i<<(9-LUT_BITS)) + (1<<(8-LUT_BITS)) - 1) / 512.0; // midpoint of the bin
|
||||||
simil = FFMAX(0, 1.0 - FFABS(f) / 255.0);
|
simil = FFMAX(0, 1.0 - fabs(f) / 255.0);
|
||||||
C = pow(simil, gamma) * 256.0 * f;
|
C = pow(simil, gamma) * 256.0 * f;
|
||||||
ct[(256<<LUT_BITS)+i] = lrint(C);
|
ct[(256<<LUT_BITS)+i] = lrint(C);
|
||||||
}
|
}
|
||||||
|
@ -317,7 +317,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
|
|||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
if (av_cmp_q(st->sample_aspect_ratio, codec->sample_aspect_ratio)
|
if (av_cmp_q(st->sample_aspect_ratio, codec->sample_aspect_ratio)
|
||||||
&& FFABS(av_q2d(st->sample_aspect_ratio) - av_q2d(codec->sample_aspect_ratio)) > 0.004*av_q2d(st->sample_aspect_ratio)
|
&& fabs(av_q2d(st->sample_aspect_ratio) - av_q2d(codec->sample_aspect_ratio)) > 0.004*av_q2d(st->sample_aspect_ratio)
|
||||||
) {
|
) {
|
||||||
if (st->sample_aspect_ratio.num != 0 &&
|
if (st->sample_aspect_ratio.num != 0 &&
|
||||||
st->sample_aspect_ratio.den != 0 &&
|
st->sample_aspect_ratio.den != 0 &&
|
||||||
|
@ -374,7 +374,7 @@ int main(int argc, char **argv){
|
|||||||
sum_aa+= a*a;
|
sum_aa+= a*a;
|
||||||
sum_bb+= b*b;
|
sum_bb+= b*b;
|
||||||
sum_ab+= a*b;
|
sum_ab+= a*b;
|
||||||
maxdiff= FFMAX(maxdiff, FFABS(a-b));
|
maxdiff= FFMAX(maxdiff, fabs(a-b));
|
||||||
}
|
}
|
||||||
sse= sum_aa + sum_bb - 2*sum_ab;
|
sse= sum_aa + sum_bb - 2*sum_ab;
|
||||||
if(sse < 0 && sse > -0.00001) sse=0; //fix rounding error
|
if(sse < 0 && sse > -0.00001) sse=0; //fix rounding error
|
||||||
@ -404,7 +404,7 @@ int main(int argc, char **argv){
|
|||||||
sum_aa+= a*a;
|
sum_aa+= a*a;
|
||||||
sum_bb+= b*b;
|
sum_bb+= b*b;
|
||||||
sum_ab+= a*b;
|
sum_ab+= a*b;
|
||||||
maxdiff= FFMAX(maxdiff, FFABS(a-b));
|
maxdiff= FFMAX(maxdiff, fabs(a-b));
|
||||||
}
|
}
|
||||||
sse= sum_aa + sum_bb - 2*sum_ab;
|
sse= sum_aa + sum_bb - 2*sum_ab;
|
||||||
if(sse < 0 && sse > -0.00001) sse=0; //fix rounding error
|
if(sse < 0 && sse > -0.00001) sse=0; //fix rounding error
|
||||||
|
Loading…
Reference in New Issue
Block a user