mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
avcodec/x86: modify simple_idct10 macros to add an action paramter
This commit is contained in:
parent
8781330d80
commit
d2597fb0c1
@ -52,7 +52,7 @@ SECTION .text
|
||||
|
||||
%macro idct_fn 0
|
||||
cglobal prores_idct_put_10, 4, 4, 15, pixels, lsize, block, qmat
|
||||
IDCT_FN pw_1, 15, pw_88, 18, pw_4, pw_1019, r3
|
||||
IDCT_FN pw_1, 15, pw_88, 18, "put", pw_4, pw_1019, r3
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
|
@ -69,24 +69,24 @@ SECTION .text
|
||||
|
||||
%macro idct_fn 0
|
||||
cglobal simple_idct10, 1, 1, 16, block
|
||||
IDCT_FN "", 12, "", 19
|
||||
IDCT_FN "", 12, "", 19, "store"
|
||||
RET
|
||||
|
||||
cglobal simple_idct10_put, 3, 3, 16, pixels, lsize, block
|
||||
IDCT_FN "", 12, "", 19, 0, pw_1023
|
||||
IDCT_FN "", 12, "", 19, "put", 0, pw_1023
|
||||
RET
|
||||
|
||||
cglobal simple_idct12, 1, 1, 16, block
|
||||
; coeffs are already 15bits, adding the offset would cause
|
||||
; overflow in the input
|
||||
IDCT_FN "", 15, pw_2, 16
|
||||
IDCT_FN "", 15, pw_2, 16, "store"
|
||||
RET
|
||||
|
||||
cglobal simple_idct12_put, 3, 3, 16, pixels, lsize, block
|
||||
; range isn't known, so the C simple_idct range is used
|
||||
; Also, using a bias on input overflows, so use the bias
|
||||
; on output of the first butterfly instead
|
||||
IDCT_FN "", 15, pw_2, 16, 0, pw_4095
|
||||
IDCT_FN "", 15, pw_2, 16, "put", 0, pw_4095
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
|
@ -218,11 +218,12 @@
|
||||
; %2 = row bias macro
|
||||
; %3 = column shift
|
||||
; %4 = column bias macro
|
||||
; %5 = min pixel value
|
||||
; %6 = max pixel value
|
||||
; %7 = qmat (for prores)
|
||||
; %5 = final action (nothing, "store", "put", "add")
|
||||
; %6 = min pixel value
|
||||
; %7 = max pixel value
|
||||
; %8 = qmat (for prores)
|
||||
|
||||
%macro IDCT_FN 4-7
|
||||
%macro IDCT_FN 4-8
|
||||
; for (i = 0; i < 8; i++)
|
||||
; idctRowCondDC(block + i*8);
|
||||
mova m10,[blockq+ 0] ; { row[0] }[0-7]
|
||||
@ -230,13 +231,13 @@
|
||||
mova m13,[blockq+64] ; { row[4] }[0-7]
|
||||
mova m12,[blockq+96] ; { row[6] }[0-7]
|
||||
|
||||
%if %0 == 7
|
||||
pmullw m10,[%7+ 0]
|
||||
pmullw m8, [%7+32]
|
||||
pmullw m13,[%7+64]
|
||||
pmullw m12,[%7+96]
|
||||
%if %0 == 8
|
||||
pmullw m10,[%8+ 0]
|
||||
pmullw m8, [%8+32]
|
||||
pmullw m13,[%8+64]
|
||||
pmullw m12,[%8+96]
|
||||
|
||||
IDCT_1D %1, %2, %7
|
||||
IDCT_1D %1, %2, %8
|
||||
%else
|
||||
IDCT_1D %1, %2
|
||||
%endif
|
||||
@ -257,7 +258,8 @@
|
||||
IDCT_1D %3, %4
|
||||
|
||||
; clip/store
|
||||
%if %0 == 4
|
||||
%if %0 >= 5
|
||||
%ifidn %5,"store"
|
||||
; No clamping, means pure idct
|
||||
mova [blockq+ 0], m8
|
||||
mova [blockq+ 16], m0
|
||||
@ -267,13 +269,13 @@
|
||||
mova [blockq+ 80], m11
|
||||
mova [blockq+ 96], m9
|
||||
mova [blockq+112], m10
|
||||
%else
|
||||
%ifidn %5, 0
|
||||
%elifidn %5,"put"
|
||||
%ifidn %6, 0
|
||||
pxor m3, m3
|
||||
%else
|
||||
mova m3, [%5]
|
||||
%endif
|
||||
mova m5, [%6]
|
||||
mova m3, [%6]
|
||||
%endif ; ifidn %6, 0
|
||||
mova m5, [%7]
|
||||
pmaxsw m8, m3
|
||||
pmaxsw m0, m3
|
||||
pmaxsw m1, m3
|
||||
@ -301,7 +303,8 @@
|
||||
mova [r0+r1 ], m11
|
||||
mova [r0+r1*2], m9
|
||||
mova [r0+r2 ], m10
|
||||
%endif
|
||||
%endif ; %5 action
|
||||
%endif; if %0 >= 5
|
||||
%endmacro
|
||||
|
||||
%endif
|
||||
|
Loading…
Reference in New Issue
Block a user