From 271593f123caf46d4703f278bc59134c07cd47d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5ns=20Rullg=C3=A5rd?= Date: Mon, 5 Feb 2007 21:16:31 +0000 Subject: [PATCH] fix multichannel decoding Originally committed as revision 7839 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/armv4l/simple_idct_armv6.S | 144 ++++++++++++----- libavcodec/dtsdec.c | 214 +++++++++++--------------- 2 files changed, 192 insertions(+), 166 deletions(-) diff --git a/libavcodec/armv4l/simple_idct_armv6.S b/libavcodec/armv4l/simple_idct_armv6.S index 401e1910df..328e0a9147 100644 --- a/libavcodec/armv4l/simple_idct_armv6.S +++ b/libavcodec/armv4l/simple_idct_armv6.S @@ -47,6 +47,19 @@ w42n: .long W42n w46: .long W46 w57: .long W57 + .macro idct_row_start shift + ldr ip, [pc, #(w42-.-8)] /* ip = W4 | (W2 << 16) */ + ldr lr, [pc, #(w46-.-8)] /* lr = W4 | (W6 << 16) */ + ldr v7, [pc, #(w57-.-8)] /* v7 = W5 | (W7 << 16) */ + mov a2, #(1<<(\shift-1)) + smlad v1, a3, ip, a2 + smlsd v4, a3, ip, a2 + ldr ip, [pc, #(w13-.-8)] /* ip = W1 | (W3 << 16) */ + smlad v2, a3, lr, a2 + smlsd v3, a3, lr, a2 + smusdx fp, a4, v7 /* fp = B3 = W7*row[1] - W5*row[3] */ + smuad v5, a4, ip /* v5 = B0 = W1*row[1] + W3*row[3] */ + .endm /* Compute partial IDCT of single row. shift = left-shift amount @@ -58,17 +71,6 @@ w57: .long W57 Output in registers v1--v8 */ .macro idct_row shift - ldr lr, [pc, #(w46-.-8)] /* lr = W4 | (W6 << 16) */ - mov a2, #(1<<(\shift-1)) - smlad v1, a3, ip, a2 - smlsd v4, a3, ip, a2 - ldr ip, [pc, #(w13-.-8)] /* ip = W1 | (W3 << 16) */ - ldr v7, [pc, #(w57-.-8)] /* v7 = W5 | (W7 << 16) */ - smlad v2, a3, lr, a2 - smlsd v3, a3, lr, a2 - - smuad v5, a4, ip /* v5 = B0 = W1*row[1] + W3*row[3] */ - smusdx fp, a4, v7 /* fp = B3 = W7*row[1] - W5*row[3] */ ldr lr, [a1, #12] /* lr = row[7,5] */ pkhtb a3, ip, v7, asr #16 /* a4 = W7 | (W3 << 16) */ pkhbt a2, ip, v7, lsl #16 /* a2 = W1 | (W5 << 16) */ @@ -99,16 +101,6 @@ w57: .long W57 Output in registers v1--v8 */ .macro idct_row4 shift - ldr lr, [pc, #(w46-.-8)] /* lr = W4 | (W6 << 16) */ - ldr v7, [pc, #(w57-.-8)] /* v7 = W5 | (W7 << 16) */ - mov a2, #(1<<(\shift-1)) - smlad v1, a3, ip, a2 - smlsd v4, a3, ip, a2 - ldr ip, [pc, #(w13-.-8)] /* ip = W1 | (W3 << 16) */ - smlad v2, a3, lr, a2 - smlsd v3, a3, lr, a2 - smusdx fp, a4, v7 /* fp = B3 = W7*row[1] - W5*row[3] */ - smuad v5, a4, ip /* v5 = B0 = W1*row[1] + W3*row[3] */ pkhtb a3, ip, v7, asr #16 /* a4 = W7 | (W3 << 16) */ pkhbt a2, ip, v7, lsl #16 /* a2 = W1 | (W5 << 16) */ smusdx v6, a3, a4 /* v6 = -B1 = W7*row[3] - W3*row[1] */ @@ -193,28 +185,100 @@ w57: .long W57 .align .func idct_row_armv6 idct_row_armv6: - str lr, [sp, #-4]! - - ldr lr, [a1, #12] /* lr = row[7,5] */ - ldr ip, [a1, #4] /* ip = row[6,4] */ + ldr fp, [a1, #12] /* fp = row[7,5] */ + ldr v7, [a1, #4] /* v7 = row[6,4] */ ldr a4, [a1, #8] /* a4 = row[3,1] */ ldr a3, [a1] /* a3 = row[2,0] */ - orrs lr, lr, ip - cmpeq lr, a4 - cmpeq lr, a3, lsr #16 + mov ip, #(1<<(ROW_SHIFT-1)) + orrs v5, fp, v7 + cmpeq v5, a4 + cmpeq v5, a3, lsr #16 beq 1f - str a2, [sp, #-4]! - ldr ip, [pc, #(w42-.-8)] /* ip = W4 | (W2 << 16) */ - cmp lr, #0 - beq 2f + cmp v5, #0 + stmfd sp!, {a2, lr} + ldr v5, [pc, #(w42-.-8)] /* v5 = W4 | (W2 << 16) */ + ldr v6, [pc, #(w46-.-8)] /* v6 = W4 | (W6 << 16) */ + ldr v7, [pc, #(w57-.-8)] /* v7 = W5 | (W7 << 16) */ - idct_row ROW_SHIFT - b 3f + smlad v1, a3, v5, ip + smlsd v4, a3, v5, ip + ldr a2, [pc, #(w13-.-8)] /* a2 = W1 | (W3 << 16) */ + smlad v2, a3, v6, ip + smlsd v3, a3, v6, ip + smusdx lr, a4, v7 /* lr = B3 = W7*row[1] - W5*row[3] */ + smuad v5, a4, a2 /* v5 = B0 = W1*row[1] + W3*row[3] */ -2: idct_row4 ROW_SHIFT + pkhtb a3, a2, v7, asr #16 /* a3 = W7 | (W3 << 16) */ + pkhbt ip, a2, v7, lsl #16 /* ip = W1 | (W5 << 16) */ + smusdx v6, a3, a4 /* v6 = -B1 = W7*row[3] - W3*row[1] */ + smusdx a4, a4, ip /* v7 = B2 = W5*row[1] - W1*row[3] */ + beq 3f + + smlad v5, fp, v7, v5 /* B0 += W5*row[5] + W7*row[7] */ + smlad v7, fp, a3, a4 /* B2 += W7*row[5] + W3*row[7] */ + ldr a4, [pc, #(w42n-.-8)] /* a4 = -W4 | (-W2 << 16) */ + ldr a3, [a1, #4] /* a3 = row[6,4] */ + smlsdx lr, fp, a2, lr /* B3 += W3*row[5] - W1*row[7] */ + ldr a2, [pc, #(w46-.-8)] /* a2 = W4 | (W6 << 16) */ + smlad v6, fp, ip, v6 /* B1 -= W1*row[5] + W5*row[7] */ + + smlad v2, a3, a4, v2 /* A1 += -W4*row[4] - W2*row[6] */ + smlsd v3, a3, a4, v3 /* A2 += -W4*row[4] + W2*row[6] */ + smlad v1, a3, a2, v1 /* A0 += W4*row[4] + W6*row[6] */ + smlsd v4, a3, a2, v4 /* A3 += W4*row[4] - W6*row[6] */ + + ldr a2, [sp], #4 + add a4, v1, v5 /* a4 = A0 + B0 */ + sub a3, v1, v5 /* a3 = A0 - B0 */ + mov v1, a4, asr #ROW_SHIFT + mov v5, a3, asr #ROW_SHIFT + + sub a4, v2, v6 /* a4 = A1 + B1 */ + add a3, v2, v6 /* a3 = A1 - B1 */ + mov v2, a4, asr #ROW_SHIFT + mov v6, a3, asr #ROW_SHIFT + + add a4, v3, v7 /* a4 = A2 + B2 */ + sub a3, v3, v7 /* a3 = A2 - B2 */ + mov v3, a4, asr #ROW_SHIFT + mov v7, a3, asr #ROW_SHIFT + + add a4, v4, lr /* a4 = A3 + B3 */ + sub a3, v4, lr /* a3 = A3 - B3 */ + mov v4, a4, asr #ROW_SHIFT + mov fp, a3, asr #ROW_SHIFT + + strh v1, [a2] + strh v2, [a2, #(16*2)] + strh v3, [a2, #(16*4)] + strh v4, [a2, #(16*6)] + strh fp, [a2, #(16*1)] + strh v7, [a2, #(16*3)] + strh v6, [a2, #(16*5)] + strh v5, [a2, #(16*7)] + + ldr pc, [sp], #4 3: ldr a2, [sp], #4 - idct_finish_shift ROW_SHIFT + add v7, v1, v5 /* v7 = A0 + B0 */ + sub a3, v1, v5 /* a3 = A0 - B0 */ + mov v1, v7, asr #ROW_SHIFT + mov v5, a3, asr #ROW_SHIFT + + sub v7, v2, v6 /* v7 = A1 + B1 */ + add a3, v2, v6 /* a3 = A1 - B1 */ + mov v2, v7, asr #ROW_SHIFT + mov v6, a3, asr #ROW_SHIFT + + add v7, v3, a4 /* v7 = A2 + B2 */ + sub a3, v3, a4 /* a3 = A2 - B2 */ + mov v3, v7, asr #ROW_SHIFT + mov v7, a3, asr #ROW_SHIFT + + add a4, v4, lr /* xx = A3 + B3 */ + sub a3, v4, lr /* a3 = A3 - B3 */ + mov v4, a4, asr #ROW_SHIFT + mov fp, a3, asr #ROW_SHIFT strh v1, [a2] strh v2, [a2, #(16*2)] @@ -236,7 +300,7 @@ idct_row_armv6: strh a3, [a2, #(16*3)] strh a3, [a2, #(16*5)] strh a3, [a2, #(16*7)] - ldr pc, [sp], #4 + mov pc, lr .endfunc /* @@ -250,8 +314,8 @@ idct_col_armv6: stmfd sp!, {a2, lr} ldr a3, [a1] /* a3 = row[2,0] */ - ldr ip, [pc, #(w42-.-8)] /* ip = W4 | (W2 << 16) */ ldr a4, [a1, #8] /* a4 = row[3,1] */ + idct_row_start COL_SHIFT idct_row COL_SHIFT ldr a2, [sp], #4 idct_finish_shift COL_SHIFT @@ -280,8 +344,8 @@ idct_col_put_armv6: stmfd sp!, {a2, a3, lr} ldr a3, [a1] /* a3 = row[2,0] */ - ldr ip, [pc, #(w42-.-8)] /* ip = W4 | (W2 << 16) */ ldr a4, [a1, #8] /* a4 = row[3,1] */ + idct_row_start COL_SHIFT idct_row COL_SHIFT ldmfd sp!, {a2, a3} idct_finish_shift_sat COL_SHIFT @@ -312,8 +376,8 @@ idct_col_add_armv6: stmfd sp!, {a2, a3, lr} ldr a3, [a1] /* a3 = row[2,0] */ - ldr ip, [pc, #(w42-.-8)] /* ip = W4 | (W2 << 16) */ ldr a4, [a1, #8] /* a4 = row[3,1] */ + idct_row_start COL_SHIFT idct_row COL_SHIFT ldmfd sp!, {a2, a3} idct_finish diff --git a/libavcodec/dtsdec.c b/libavcodec/dtsdec.c index 6c62f4ac37..6763572ddd 100644 --- a/libavcodec/dtsdec.c +++ b/libavcodec/dtsdec.c @@ -28,13 +28,8 @@ #define BUFFER_SIZE 18726 #define HEADER_SIZE 14 -#ifdef LIBDTS_FIXED -#define CONVERT_LEVEL (1 << 26) -#define CONVERT_BIAS 0 -#else #define CONVERT_LEVEL 1 -#define CONVERT_BIAS 384 -#endif +#define CONVERT_BIAS 0 typedef struct DTSContext { dts_state_t *state; @@ -44,151 +39,120 @@ typedef struct DTSContext { } DTSContext; static inline int16_t -convert(int32_t i) +convert(sample_t s) { -#ifdef LIBDTS_FIXED - i >>= 15; -#else - i -= 0x43c00000; -#endif - return (i > 32767) ? 32767 : ((i < -32768) ? -32768 : i); + return s * 0x7fff; } static void -convert2s16_2(sample_t * _f, int16_t * s16) +convert2s16_multi(sample_t *f, int16_t *s16, int flags) { int i; - int32_t *f = (int32_t *) _f; - for(i = 0; i < 256; i++) { - s16[2 * i] = convert(f[i]); - s16[2 * i + 1] = convert(f[i + 256]); - } -} - -static void -convert2s16_4(sample_t * _f, int16_t * s16) -{ - int i; - int32_t *f = (int32_t *) _f; - - for(i = 0; i < 256; i++) { - s16[4 * i] = convert(f[i]); - s16[4 * i + 1] = convert(f[i + 256]); - s16[4 * i + 2] = convert(f[i + 512]); - s16[4 * i + 3] = convert(f[i + 768]); - } -} - -static void -convert2s16_5(sample_t * _f, int16_t * s16) -{ - int i; - int32_t *f = (int32_t *) _f; - - for(i = 0; i < 256; i++) { - s16[5 * i] = convert(f[i]); - s16[5 * i + 1] = convert(f[i + 256]); - s16[5 * i + 2] = convert(f[i + 512]); - s16[5 * i + 3] = convert(f[i + 768]); - s16[5 * i + 4] = convert(f[i + 1024]); - } -} - -static void -convert2s16_multi(sample_t * _f, int16_t * s16, int flags) -{ - int i; - int32_t *f = (int32_t *) _f; - - switch (flags) { + switch(flags & (DTS_CHANNEL_MASK | DTS_LFE)){ case DTS_MONO: - for(i = 0; i < 256; i++) { - s16[5 * i] = s16[5 * i + 1] = s16[5 * i + 2] = s16[5 * i + 3] = - 0; - s16[5 * i + 4] = convert(f[i]); + for(i = 0; i < 256; i++){ + s16[5*i] = s16[5*i+1] = s16[5*i+2] = s16[5*i+3] = 0; + s16[5*i+4] = convert(f[i]); } - break; case DTS_CHANNEL: case DTS_STEREO: case DTS_DOLBY: - convert2s16_2(_f, s16); - break; + for(i = 0; i < 256; i++){ + s16[2*i] = convert(f[i]); + s16[2*i+1] = convert(f[i+256]); + } case DTS_3F: - for(i = 0; i < 256; i++) { - s16[5 * i] = convert(f[i]); - s16[5 * i + 1] = convert(f[i + 512]); - s16[5 * i + 2] = s16[5 * i + 3] = 0; - s16[5 * i + 4] = convert(f[i + 256]); + for(i = 0; i < 256; i++){ + s16[5*i] = convert(f[i+256]); + s16[5*i+1] = convert(f[i+512]); + s16[5*i+2] = s16[5*i+3] = 0; + s16[5*i+4] = convert(f[i]); } - break; case DTS_2F2R: - convert2s16_4(_f, s16); - break; - case DTS_3F2R: - convert2s16_5(_f, s16); - break; - case DTS_MONO | DTS_LFE: - for(i = 0; i < 256; i++) { - s16[6 * i] = s16[6 * i + 1] = s16[6 * i + 2] = s16[6 * i + 3] = - 0; - s16[6 * i + 4] = convert(f[i + 256]); - s16[6 * i + 5] = convert(f[i]); + for(i = 0; i < 256; i++){ + s16[4*i] = convert(f[i]); + s16[4*i+1] = convert(f[i+256]); + s16[4*i+2] = convert(f[i+512]); + s16[4*i+3] = convert(f[i+768]); + } + case DTS_3F2R: + for(i = 0; i < 256; i++){ + s16[5*i] = convert(f[i+256]); + s16[5*i+1] = convert(f[i+512]); + s16[5*i+2] = convert(f[i+768]); + s16[5*i+3] = convert(f[i+1024]); + s16[5*i+4] = convert(f[i]); + } + case DTS_MONO | DTS_LFE: + for(i = 0; i < 256; i++){ + s16[6*i] = s16[6*i+1] = s16[6*i+2] = s16[6*i+3] = 0; + s16[6*i+4] = convert(f[i]); + s16[6*i+5] = convert(f[i+256]); } - break; case DTS_CHANNEL | DTS_LFE: case DTS_STEREO | DTS_LFE: case DTS_DOLBY | DTS_LFE: - for(i = 0; i < 256; i++) { - s16[6 * i] = convert(f[i + 256]); - s16[6 * i + 1] = convert(f[i + 512]); - s16[6 * i + 2] = s16[6 * i + 3] = s16[6 * i + 4] = 0; - s16[6 * i + 5] = convert(f[i]); + for(i = 0; i < 256; i++){ + s16[6*i] = convert(f[i]); + s16[6*i+1] = convert(f[i+256]); + s16[6*i+2] = s16[6*i+3] = s16[6*i+4] = 0; + s16[6*i+5] = convert(f[i+512]); } - break; case DTS_3F | DTS_LFE: - for(i = 0; i < 256; i++) { - s16[6 * i] = convert(f[i + 256]); - s16[6 * i + 1] = convert(f[i + 768]); - s16[6 * i + 2] = s16[6 * i + 3] = 0; - s16[6 * i + 4] = convert(f[i + 512]); - s16[6 * i + 5] = convert(f[i]); + for(i = 0; i < 256; i++){ + s16[6*i] = convert(f[i+256]); + s16[6*i+1] = convert(f[i+512]); + s16[6*i+2] = s16[6*i+3] = 0; + s16[6*i+4] = convert(f[i]); + s16[6*i+5] = convert(f[i+768]); } - break; case DTS_2F2R | DTS_LFE: - for(i = 0; i < 256; i++) { - s16[6 * i] = convert(f[i + 256]); - s16[6 * i + 1] = convert(f[i + 512]); - s16[6 * i + 2] = convert(f[i + 768]); - s16[6 * i + 3] = convert(f[i + 1024]); - s16[6 * i + 4] = 0; - s16[6 * i + 5] = convert(f[i]); + for(i = 0; i < 256; i++){ + s16[6*i] = convert(f[i]); + s16[6*i+1] = convert(f[i+256]); + s16[6*i+2] = convert(f[i+512]); + s16[6*i+3] = convert(f[i+768]); + s16[6*i+4] = 0; + s16[6*i+5] = convert(f[i+1024]); } - break; case DTS_3F2R | DTS_LFE: - for(i = 0; i < 256; i++) { - s16[6 * i] = convert(f[i + 256]); - s16[6 * i + 1] = convert(f[i + 768]); - s16[6 * i + 2] = convert(f[i + 1024]); - s16[6 * i + 3] = convert(f[i + 1280]); - s16[6 * i + 4] = convert(f[i + 512]); - s16[6 * i + 5] = convert(f[i]); + for(i = 0; i < 256; i++){ + s16[6*i] = convert(f[i+256]); + s16[6*i+1] = convert(f[i+512]); + s16[6*i+2] = convert(f[i+768]); + s16[6*i+3] = convert(f[i+1024]); + s16[6*i+4] = convert(f[i]); + s16[6*i+5] = convert(f[i+1280]); } - break; } } static int channels_multi(int flags) { - if(flags & DTS_LFE) - return 6; - else if(flags & 1) /* center channel */ - return 5; - else if((flags & DTS_CHANNEL_MASK) == DTS_2F2R) - return 4; - else + switch(flags & (DTS_CHANNEL_MASK | DTS_LFE)){ + case DTS_CHANNEL: + case DTS_STEREO: + case DTS_DOLBY: return 2; + case DTS_2F2R: + return 4; + case DTS_MONO: + case DTS_3F: + case DTS_3F2R: + return 5; + case DTS_MONO | DTS_LFE: + case DTS_CHANNEL | DTS_LFE: + case DTS_STEREO | DTS_LFE: + case DTS_DOLBY | DTS_LFE: + case DTS_3F | DTS_LFE: + case DTS_2F2R | DTS_LFE: + case DTS_3F2R | DTS_LFE: + return 6; + } + + return -1; } static int @@ -206,6 +170,7 @@ dts_decode_frame(AVCodecContext * avctx, void *data, int *data_size, int len; level_t level; sample_t bias; + int nblocks; int i; *data_size = 0; @@ -237,7 +202,6 @@ dts_decode_frame(AVCodecContext * avctx, void *data, int *data_size, s->bufpos = s->buf + length; } - flags = 2; /* ???????????? */ level = CONVERT_LEVEL; bias = CONVERT_BIAS; @@ -251,20 +215,18 @@ dts_decode_frame(AVCodecContext * avctx, void *data, int *data_size, avctx->channels = channels_multi(flags); avctx->bit_rate = bit_rate; - for(i = 0; i < dts_blocks_num(s->state); i++) { - int chans; + nblocks = dts_blocks_num(s->state); + for(i = 0; i < nblocks; i++) { if(dts_block(s->state)) { av_log(avctx, AV_LOG_ERROR, "dts_block() failed\n"); goto end; } - chans = channels_multi(flags); - convert2s16_multi(dts_samples(s->state), out_samples, - flags & (DTS_CHANNEL_MASK | DTS_LFE)); + convert2s16_multi(dts_samples(s->state), out_samples, flags); - out_samples += 256 * chans; - *data_size += 256 * sizeof(int16_t) * chans; + out_samples += 256 * avctx->channels; + *data_size += 256 * sizeof(int16_t) * avctx->channels; } end: