From c4c5f5386c83bb8d66f8d67cd8533c8697f06d04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20B=C5=93sch?= Date: Fri, 22 Nov 2013 20:16:09 +0100 Subject: [PATCH] vp9dsp: add DC only versions for idct/idct. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit before: time ./avconv -v 0 -nostats -threads 1 -i sintel_vp9_500kbps.webm -f null - real 0m11.125s user 0m11.059s sys 0m0.050s time ./avconv -v 0 -nostats -threads 1 -i sintel_vp9_500kbps.webm -f null - real 0m10.944s user 0m10.819s sys 0m0.064s after: time ./avconv -v 0 -nostats -threads 1 -i sintel_vp9_500kbps.webm -f null - real 0m8.153s user 0m8.034s sys 0m0.050s time ./avconv -v 0 -nostats -threads 1 -i sintel_vp9_500kbps.webm -f null - real 0m8.038s user 0m7.980s sys 0m0.039s Signed-off-by: Martin Storsjö --- libavcodec/vp9dsp.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/libavcodec/vp9dsp.c b/libavcodec/vp9dsp.c index 73006fa1e6..7f86394328 100644 --- a/libavcodec/vp9dsp.c +++ b/libavcodec/vp9dsp.c @@ -944,7 +944,7 @@ static av_cold void vp9dsp_intrapred_init(VP9DSPContext *dsp) #undef init_intra_pred } -#define itxfm_wrapper(type_a, type_b, sz, bits) \ +#define itxfm_wrapper(type_a, type_b, sz, bits, has_dconly) \ static void \ type_a ## _ ## type_b ## _ ## sz ## x ## sz ## _add_c(uint8_t *dst, \ ptrdiff_t stride, \ @@ -953,6 +953,22 @@ type_a ## _ ## type_b ## _ ## sz ## x ## sz ## _add_c(uint8_t *dst, \ { \ int i, j; \ int16_t tmp[sz * sz], out[sz]; \ + \ + if (has_dconly && eob == 1) { \ + const int t = (((block[0] * 11585 + (1 << 13)) >> 14) \ + * 11585 + (1 << 13)) >> 14; \ + block[0] = 0; \ + for (i = 0; i < sz; i++) { \ + for (j = 0; j < sz; j++) \ + dst[j * stride] = \ + av_clip_uint8(dst[j * stride] + \ + (bits ? (t + (1 << (bits - 1))) >> bits \ + : t)); \ + dst++; \ + } \ + return; \ + } \ + \ for (i = 0; i < sz; i++) \ type_a ## sz ## _1d(tmp + i * sz, block + i, sz, 0); \ memset(block, 0, sz * sz * sizeof(*block)); \ @@ -967,11 +983,11 @@ type_a ## _ ## type_b ## _ ## sz ## x ## sz ## _add_c(uint8_t *dst, \ } \ } -#define itxfm_wrap(sz, bits) \ - itxfm_wrapper(idct, idct, sz, bits) \ - itxfm_wrapper(iadst, idct, sz, bits) \ - itxfm_wrapper(idct, iadst, sz, bits) \ - itxfm_wrapper(iadst, iadst, sz, bits) +#define itxfm_wrap(sz, bits) \ + itxfm_wrapper(idct, idct, sz, bits, 1) \ + itxfm_wrapper(iadst, idct, sz, bits, 0) \ + itxfm_wrapper(idct, iadst, sz, bits, 0) \ + itxfm_wrapper(iadst, iadst, sz, bits, 0) #define IN(x) in[x * stride] @@ -1490,7 +1506,7 @@ static av_always_inline void idct32_1d(int16_t *out, const int16_t *in, out[31] = t0 - t31; } -itxfm_wrapper(idct, idct, 32, 6) +itxfm_wrapper(idct, idct, 32, 6, 1) static av_always_inline void iwht4_1d(int16_t *out, const int16_t *in, ptrdiff_t stride, int pass) @@ -1523,7 +1539,7 @@ static av_always_inline void iwht4_1d(int16_t *out, const int16_t *in, out[3] = t3; } -itxfm_wrapper(iwht, iwht, 4, 0) +itxfm_wrapper(iwht, iwht, 4, 0, 0) #undef IN #undef itxfm_wrapper