mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
Merge commit 'aeaf268e52fc11c1f64914a319e0edddf1346d6a'
* commit 'aeaf268e52fc11c1f64914a319e0edddf1346d6a': vp3: integrate clear_blocks with idct of previous block. mpegvideo: fix loop condition in draw_line() dvdsubdec: parse the size from the extradata Conflicts: libavcodec/dvdsubdec.c libavcodec/mpegvideo.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
commit
cf061a9c3b
@ -108,14 +108,20 @@ endfunc
|
|||||||
|
|
||||||
function vp3_idct_start_neon
|
function vp3_idct_start_neon
|
||||||
vpush {d8-d15}
|
vpush {d8-d15}
|
||||||
|
vmov.i16 q4, #0
|
||||||
|
vmov.i16 q5, #0
|
||||||
movrel r3, vp3_idct_constants
|
movrel r3, vp3_idct_constants
|
||||||
vld1.64 {d0-d1}, [r3,:128]
|
vld1.64 {d0-d1}, [r3,:128]
|
||||||
vld1.64 {d16-d19}, [r2,:128]!
|
vld1.64 {d16-d19}, [r2,:128]
|
||||||
vld1.64 {d20-d23}, [r2,:128]!
|
vst1.64 {q4-q5}, [r2,:128]!
|
||||||
vld1.64 {d24-d27}, [r2,:128]!
|
vld1.64 {d20-d23}, [r2,:128]
|
||||||
|
vst1.64 {q4-q5}, [r2,:128]!
|
||||||
|
vld1.64 {d24-d27}, [r2,:128]
|
||||||
|
vst1.64 {q4-q5}, [r2,:128]!
|
||||||
vadd.s16 q1, q8, q12
|
vadd.s16 q1, q8, q12
|
||||||
vsub.s16 q8, q8, q12
|
vsub.s16 q8, q8, q12
|
||||||
vld1.64 {d28-d31}, [r2,:128]!
|
vld1.64 {d28-d31}, [r2,:128]
|
||||||
|
vst1.64 {q4-q5}, [r2,:128]!
|
||||||
|
|
||||||
vp3_idct_core_neon:
|
vp3_idct_core_neon:
|
||||||
vmull.s16 q2, d18, xC1S7 // (ip[1] * C1) << 16
|
vmull.s16 q2, d18, xC1S7 // (ip[1] * C1) << 16
|
||||||
@ -345,10 +351,12 @@ function ff_vp3_idct_add_neon, export=1
|
|||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
function ff_vp3_idct_dc_add_neon, export=1
|
function ff_vp3_idct_dc_add_neon, export=1
|
||||||
ldrsh r2, [r2]
|
ldrsh r12, [r2]
|
||||||
mov r3, r0
|
mov r3, r0
|
||||||
add r2, r2, #15
|
add r12, r12, #15
|
||||||
vdup.16 q15, r2
|
vdup.16 q15, r12
|
||||||
|
mov r12, 0
|
||||||
|
strh r12, [r2]
|
||||||
vshr.s16 q15, q15, #5
|
vshr.s16 q15, q15, #5
|
||||||
|
|
||||||
vld1.8 {d0}, [r0,:64], r1
|
vld1.8 {d0}, [r0,:64], r1
|
||||||
|
@ -1666,7 +1666,7 @@ static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey,
|
|||||||
buf += sx + sy * stride;
|
buf += sx + sy * stride;
|
||||||
ex -= sx;
|
ex -= sx;
|
||||||
f = ((ey - sy) << 16) / ex;
|
f = ((ey - sy) << 16) / ex;
|
||||||
for(x= 0; x <= ex; x++){
|
for (x = 0; x <= ex; x++) {
|
||||||
y = (x * f) >> 16;
|
y = (x * f) >> 16;
|
||||||
fr = (x * f) & 0xFFFF;
|
fr = (x * f) & 0xFFFF;
|
||||||
buf[y * stride + x] += (color * (0x10000 - fr)) >> 16;
|
buf[y * stride + x] += (color * (0x10000 - fr)) >> 16;
|
||||||
|
@ -140,6 +140,7 @@ static void vp3_idct_put_altivec(uint8_t *dst, int stride, DCTELEM block[64])
|
|||||||
PUT(b5) dst += stride;
|
PUT(b5) dst += stride;
|
||||||
PUT(b6) dst += stride;
|
PUT(b6) dst += stride;
|
||||||
PUT(b7)
|
PUT(b7)
|
||||||
|
memset(block, 0, sizeof(*block) * 64);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64])
|
static void vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64])
|
||||||
@ -171,6 +172,7 @@ static void vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64])
|
|||||||
ADD(b5) dst += stride;
|
ADD(b5) dst += stride;
|
||||||
ADD(b6) dst += stride;
|
ADD(b6) dst += stride;
|
||||||
ADD(b7)
|
ADD(b7)
|
||||||
|
memset(block, 0, sizeof(*block) * 64);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* HAVE_ALTIVEC */
|
#endif /* HAVE_ALTIVEC */
|
||||||
|
@ -138,6 +138,7 @@ typedef struct Vp3DecodeContext {
|
|||||||
DSPContext dsp;
|
DSPContext dsp;
|
||||||
VideoDSPContext vdsp;
|
VideoDSPContext vdsp;
|
||||||
VP3DSPContext vp3dsp;
|
VP3DSPContext vp3dsp;
|
||||||
|
DECLARE_ALIGNED(16, DCTELEM, block)[64];
|
||||||
int flipped_image;
|
int flipped_image;
|
||||||
int last_slice_end;
|
int last_slice_end;
|
||||||
int skip_loop_filter;
|
int skip_loop_filter;
|
||||||
@ -1458,7 +1459,7 @@ static void await_reference_row(Vp3DecodeContext *s, Vp3Fragment *fragment, int
|
|||||||
static void render_slice(Vp3DecodeContext *s, int slice)
|
static void render_slice(Vp3DecodeContext *s, int slice)
|
||||||
{
|
{
|
||||||
int x, y, i, j, fragment;
|
int x, y, i, j, fragment;
|
||||||
LOCAL_ALIGNED_16(DCTELEM, block, [64]);
|
DCTELEM *block = s->block;
|
||||||
int motion_x = 0xdeadbeef, motion_y = 0xdeadbeef;
|
int motion_x = 0xdeadbeef, motion_y = 0xdeadbeef;
|
||||||
int motion_halfpel_index;
|
int motion_halfpel_index;
|
||||||
uint8_t *motion_source;
|
uint8_t *motion_source;
|
||||||
@ -1571,8 +1572,6 @@ static void render_slice(Vp3DecodeContext *s, int slice)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
s->dsp.clear_block(block);
|
|
||||||
|
|
||||||
/* invert DCT and place (or add) in final output */
|
/* invert DCT and place (or add) in final output */
|
||||||
|
|
||||||
if (s->all_fragments[i].coding_method == MODE_INTRA) {
|
if (s->all_fragments[i].coding_method == MODE_INTRA) {
|
||||||
|
@ -215,14 +215,16 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int
|
|||||||
|
|
||||||
static void vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
|
static void vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
|
||||||
idct(dest, line_size, block, 1);
|
idct(dest, line_size, block, 1);
|
||||||
|
memset(block, 0, sizeof(*block) * 64);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
|
static void vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
|
||||||
idct(dest, line_size, block, 2);
|
idct(dest, line_size, block, 2);
|
||||||
|
memset(block, 0, sizeof(*block) * 64);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size,
|
static void vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size,
|
||||||
const DCTELEM *block/*align 16*/){
|
DCTELEM *block/*align 16*/){
|
||||||
int i, dc = (block[0] + 15) >> 5;
|
int i, dc = (block[0] + 15) >> 5;
|
||||||
|
|
||||||
for(i = 0; i < 8; i++){
|
for(i = 0; i < 8; i++){
|
||||||
@ -236,6 +238,7 @@ static void vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size,
|
|||||||
dest[7] = av_clip_uint8(dest[7] + dc);
|
dest[7] = av_clip_uint8(dest[7] + dc);
|
||||||
dest += line_size;
|
dest += line_size;
|
||||||
}
|
}
|
||||||
|
block[0] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vp3_v_loop_filter_c(uint8_t *first_pixel, int stride,
|
static void vp3_v_loop_filter_c(uint8_t *first_pixel, int stride,
|
||||||
|
@ -25,7 +25,7 @@
|
|||||||
typedef struct VP3DSPContext {
|
typedef struct VP3DSPContext {
|
||||||
void (*idct_put)(uint8_t *dest, int line_size, DCTELEM *block);
|
void (*idct_put)(uint8_t *dest, int line_size, DCTELEM *block);
|
||||||
void (*idct_add)(uint8_t *dest, int line_size, DCTELEM *block);
|
void (*idct_add)(uint8_t *dest, int line_size, DCTELEM *block);
|
||||||
void (*idct_dc_add)(uint8_t *dest, int line_size, const DCTELEM *block);
|
void (*idct_dc_add)(uint8_t *dest, int line_size, DCTELEM *block);
|
||||||
void (*v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
|
void (*v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
|
||||||
void (*h_loop_filter)(uint8_t *src, int stride, int *bounding_values);
|
void (*h_loop_filter)(uint8_t *src, int stride, int *bounding_values);
|
||||||
|
|
||||||
|
@ -561,6 +561,13 @@ cglobal vp3_idct_put, 3, 4, 9
|
|||||||
movhps [r0+r3 ], m3
|
movhps [r0+r3 ], m3
|
||||||
%endif
|
%endif
|
||||||
%assign %%i %%i+64
|
%assign %%i %%i+64
|
||||||
|
%endrep
|
||||||
|
|
||||||
|
pxor m0, m0
|
||||||
|
%assign %%offset 0
|
||||||
|
%rep 128/mmsize
|
||||||
|
mova [r2+%%offset], m0
|
||||||
|
%assign %%offset %%offset+mmsize
|
||||||
%endrep
|
%endrep
|
||||||
RET
|
RET
|
||||||
|
|
||||||
@ -600,6 +607,11 @@ cglobal vp3_idct_add, 3, 4, 9
|
|||||||
movhps [r0+r1], m0
|
movhps [r0+r1], m0
|
||||||
%endif
|
%endif
|
||||||
lea r0, [r0+r1*2]
|
lea r0, [r0+r1*2]
|
||||||
|
%assign %%offset 0
|
||||||
|
%rep 32/mmsize
|
||||||
|
mova [r2+%%offset], m4
|
||||||
|
%assign %%offset %%offset+mmsize
|
||||||
|
%endrep
|
||||||
add r2, 32
|
add r2, 32
|
||||||
dec r3
|
dec r3
|
||||||
jg .loop
|
jg .loop
|
||||||
@ -620,7 +632,7 @@ vp3_idct_funcs
|
|||||||
paddusb m2, m0
|
paddusb m2, m0
|
||||||
movq m4, [r0+r1*2]
|
movq m4, [r0+r1*2]
|
||||||
paddusb m3, m0
|
paddusb m3, m0
|
||||||
movq m5, [r0+r3 ]
|
movq m5, [r0+r2 ]
|
||||||
paddusb m4, m0
|
paddusb m4, m0
|
||||||
paddusb m5, m0
|
paddusb m5, m0
|
||||||
psubusb m2, m1
|
psubusb m2, m1
|
||||||
@ -630,7 +642,7 @@ vp3_idct_funcs
|
|||||||
movq [r0+r1 ], m3
|
movq [r0+r1 ], m3
|
||||||
psubusb m5, m1
|
psubusb m5, m1
|
||||||
movq [r0+r1*2], m4
|
movq [r0+r1*2], m4
|
||||||
movq [r0+r3 ], m5
|
movq [r0+r2 ], m5
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
INIT_MMX mmxext
|
INIT_MMX mmxext
|
||||||
@ -638,11 +650,12 @@ cglobal vp3_idct_dc_add, 3, 4
|
|||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
movsxd r1, r1d
|
movsxd r1, r1d
|
||||||
%endif
|
%endif
|
||||||
lea r3, [r1*3]
|
movsx r3, word [r2]
|
||||||
movsx r2, word [r2]
|
mov word [r2], 0
|
||||||
add r2, 15
|
lea r2, [r1*3]
|
||||||
sar r2, 5
|
add r3, 15
|
||||||
movd m0, r2d
|
sar r3, 5
|
||||||
|
movd m0, r3d
|
||||||
pshufw m0, m0, 0x0
|
pshufw m0, m0, 0x0
|
||||||
pxor m1, m1
|
pxor m1, m1
|
||||||
psubw m1, m0
|
psubw m1, m0
|
||||||
|
@ -32,7 +32,7 @@ void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block);
|
|||||||
void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block);
|
void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block);
|
||||||
|
|
||||||
void ff_vp3_idct_dc_add_mmxext(uint8_t *dest, int line_size,
|
void ff_vp3_idct_dc_add_mmxext(uint8_t *dest, int line_size,
|
||||||
const DCTELEM *block);
|
DCTELEM *block);
|
||||||
|
|
||||||
void ff_vp3_v_loop_filter_mmxext(uint8_t *src, int stride,
|
void ff_vp3_v_loop_filter_mmxext(uint8_t *src, int stride,
|
||||||
int *bounding_values);
|
int *bounding_values);
|
||||||
|
Loading…
Reference in New Issue
Block a user