From ce611a27be7c9201b5920d8232e68209529065c4 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Tue, 21 Aug 2007 16:29:40 +0000 Subject: [PATCH] Change rounding of the horizontal DWT to match the vertical one. This allows some simplifications and optimizations and should not have any effect on quality. Originally committed as revision 10172 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/i386/snowdsp_mmx.c | 44 ++++++++++++++--------------------- libavcodec/snow.c | 12 +++++----- libavcodec/snow.h | 4 ++-- tests/ffmpeg.regression.ref | 6 ++--- tests/rotozoom.regression.ref | 6 ++--- tests/seek.regression.ref | 40 +++++++++++++++---------------- 6 files changed, 51 insertions(+), 61 deletions(-) diff --git a/libavcodec/i386/snowdsp_mmx.c b/libavcodec/i386/snowdsp_mmx.c index f2eb14b2b3..03f622b756 100644 --- a/libavcodec/i386/snowdsp_mmx.c +++ b/libavcodec/i386/snowdsp_mmx.c @@ -111,8 +111,7 @@ void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width){ i = 0; asm volatile( - "pcmpeqd %%xmm7, %%xmm7 \n\t" - "psrad $29, %%xmm7 \n\t" + "pslld $1, %%xmm7 \n\t" ::); for(; i> W_BS); + b[0] = b[0] + (((2 * ref[1] + W_BO) + 4 * b[0]) >> W_BS); asm volatile( - "pcmpeqd %%mm7, %%mm7 \n\t" - "psrld $29, %%mm7 \n\t" + "pslld $1, %%mm7 \n\t" ::); for(; i>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23)) +#define LIFTS(src, ref, inv) ((inv) ? (src) + (((ref) + 4*(src))>>shift): -((-16*4*(src) + 4*(ref) + add + 5 + (5<<27))/(5*16) - (1<<23))) if(mirror_left){ dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse); dst += dst_step; @@ -1113,8 +1113,8 @@ static void horizontal_decompose97i(DWTELEM *b, int width){ DWTELEM temp[width]; const int w2= (width+1)>>1; - lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0); - liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0); + lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1); + liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0); lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0); lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0); } @@ -1150,7 +1150,7 @@ static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int w #ifdef liftS b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; #else - b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23); + b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23); #endif } } @@ -1344,8 +1344,8 @@ void ff_snow_horizontal_compose97i(DWTELEM *b, int width){ lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1); lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1); - liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO-1, W_BS, 0, 1); - lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1); + liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1); + lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0); } static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ diff --git a/libavcodec/snow.h b/libavcodec/snow.h index 19df4ad791..9dd66031cb 100644 --- a/libavcodec/snow.h +++ b/libavcodec/snow.h @@ -165,11 +165,11 @@ static av_always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELE static av_always_inline void snow_horizontal_compose_liftS_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w){ for(; i> W_BS); + dst[i] = src[i] + ((ref[i] + ref[(i+1)]+W_BO + 4 * src[i]) >> W_BS); } if(width&1){ - dst[w] = src[w] + ((2 * ref[w] + W_BO-1 + 4 * src[w]) >> W_BS); + dst[w] = src[w] + ((2 * ref[w] + W_BO + 4 * src[w]) >> W_BS); } } diff --git a/tests/ffmpeg.regression.ref b/tests/ffmpeg.regression.ref index 94ad71085e..ddca176149 100644 --- a/tests/ffmpeg.regression.ref +++ b/tests/ffmpeg.regression.ref @@ -141,9 +141,9 @@ f8f51fa737add17f7fecaefa118b57ed *./tests/data/a-ffv1.avi 2654678 ./tests/data/a-ffv1.avi 799d3db687f6cdd7a837ec156efc171f *./tests/data/out.yuv stddev: 0.00 PSNR:99.99 bytes:7602176 -9078723c943de5d79490f54b99e6ea9e *./tests/data/a-snow.avi -156656 ./tests/data/a-snow.avi -f2932084b52e2ede167c9ba21eae0656 *./tests/data/out.yuv +958d649d09b7361d5f00b5b3fcccbcd2 *./tests/data/a-snow.avi +156606 ./tests/data/a-snow.avi +b19cb7f9134f922326028c6bb44e96de *./tests/data/out.yuv stddev: 23.14 PSNR:20.83 bytes:7602176 ba999e86070aa971376e7f317a022c37 *./tests/data/a-snow53.avi 3519486 ./tests/data/a-snow53.avi diff --git a/tests/rotozoom.regression.ref b/tests/rotozoom.regression.ref index c113e106dc..21fe28bb64 100644 --- a/tests/rotozoom.regression.ref +++ b/tests/rotozoom.regression.ref @@ -141,9 +141,9 @@ d72b0960e162d4998b9acbabb07e99ab *./tests/data/a-ffv1.avi 3525804 ./tests/data/a-ffv1.avi dde5895817ad9d219f79a52d0bdfb001 *./tests/data/out.yuv stddev: 0.00 PSNR:99.99 bytes:7602176 -40a6e938ac2bd92ee12cd57925e86454 *./tests/data/a-snow.avi -68758 ./tests/data/a-snow.avi -1e356854142898c7c4aab4bfedadf235 *./tests/data/out.yuv +2cfa1bdb443d04a890208a83fd239461 *./tests/data/a-snow.avi +68872 ./tests/data/a-snow.avi +64a0495b7ab53509d3b791465262795c *./tests/data/out.yuv stddev: 10.86 PSNR:27.40 bytes:7602176 3d0da6aeec9b80c6ee0ff4b747bdd0f0 *./tests/data/a-snow53.avi 2721980 ./tests/data/a-snow53.avi diff --git a/tests/seek.regression.ref b/tests/seek.regression.ref index 798632b239..d9d4b05ff5 100644 --- a/tests/seek.regression.ref +++ b/tests/seek.regression.ref @@ -2046,51 +2046,51 @@ ret: 0 st:-1 ts:-0.645825 flags:1 ret: 0 st: 0 dts:0.040000 pts:0.040000 pos:9610 size:1075 flags:0 ---------------- tests/data/a-snow.avi -ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2986 flags:1 +ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1 ret: 0 st:-1 ts:-1.000000 flags:0 -ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2986 flags:1 +ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1 ret: 0 st:-1 ts:1.894167 flags:1 -ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46794 size:3663 flags:1 +ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46908 size:3663 flags:1 ret: 0 st: 0 ts:0.800000 flags:0 -ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31726 size:3478 flags:1 +ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31690 size:3478 flags:1 ret:-1 st: 0 ts:-0.320000 flags:1 ret:-1 st:-1 ts:2.576668 flags:0 ret: 0 st:-1 ts:1.470835 flags:1 -ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46794 size:3663 flags:1 +ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46908 size:3663 flags:1 ret: 0 st: 0 ts:0.360000 flags:0 -ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:18006 size:3229 flags:1 +ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17990 size:3229 flags:1 ret:-1 st: 0 ts:-0.760000 flags:1 ret:-1 st:-1 ts:2.153336 flags:0 ret: 0 st:-1 ts:1.047503 flags:1 -ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31726 size:3478 flags:1 +ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31690 size:3478 flags:1 ret: 0 st: 0 ts:-0.040000 flags:0 -ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2986 flags:1 +ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1 ret: 0 st: 0 ts:2.840000 flags:1 -ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63240 size:3635 flags:1 +ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63350 size:3635 flags:1 ret: 0 st:-1 ts:1.730004 flags:0 -ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63240 size:3635 flags:1 +ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63350 size:3635 flags:1 ret: 0 st:-1 ts:0.624171 flags:1 -ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:18006 size:3229 flags:1 +ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17990 size:3229 flags:1 ret: 0 st: 0 ts:-0.480000 flags:0 -ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2986 flags:1 +ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1 ret: 0 st: 0 ts:2.400000 flags:1 -ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63240 size:3635 flags:1 +ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63350 size:3635 flags:1 ret: 0 st:-1 ts:1.306672 flags:0 -ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46794 size:3663 flags:1 +ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46908 size:3663 flags:1 ret: 0 st:-1 ts:0.200839 flags:1 -ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2986 flags:1 +ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1 ret: 0 st: 0 ts:-0.920000 flags:0 -ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2986 flags:1 +ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1 ret: 0 st: 0 ts:2.000000 flags:1 -ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63240 size:3635 flags:1 +ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63350 size:3635 flags:1 ret: 0 st:-1 ts:0.883340 flags:0 -ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31726 size:3478 flags:1 +ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31690 size:3478 flags:1 ret:-1 st:-1 ts:-0.222493 flags:1 ret:-1 st: 0 ts:2.680000 flags:0 ret: 0 st: 0 ts:1.560000 flags:1 -ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46794 size:3663 flags:1 +ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46908 size:3663 flags:1 ret: 0 st:-1 ts:0.460008 flags:0 -ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:18006 size:3229 flags:1 +ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17990 size:3229 flags:1 ret:-1 st:-1 ts:-0.645825 flags:1 ---------------- tests/data/a-snow53.avi