mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-04-14 00:58:38 +02:00
x86/vf_blend: make all functions work on x86_32
Reviewed-by: Paul B Mahol <onemda@gmail.com> Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
parent
0988c68cf9
commit
02f428051a
@ -22,7 +22,6 @@
|
|||||||
|
|
||||||
%include "libavutil/x86/x86util.asm"
|
%include "libavutil/x86/x86util.asm"
|
||||||
|
|
||||||
%if ARCH_X86_64
|
|
||||||
SECTION_RODATA
|
SECTION_RODATA
|
||||||
|
|
||||||
pw_128: times 8 dw 128
|
pw_128: times 8 dw 128
|
||||||
@ -34,11 +33,19 @@ pb_255: times 16 db 255
|
|||||||
SECTION .text
|
SECTION .text
|
||||||
|
|
||||||
%macro BLEND_INIT 2
|
%macro BLEND_INIT 2
|
||||||
cglobal blend_%1, 9, 11, %2, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
|
%if ARCH_X86_64
|
||||||
|
cglobal blend_%1, 6, 9, %2, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, end, x
|
||||||
|
mov widthd, dword widthm
|
||||||
|
%else
|
||||||
|
cglobal blend_%1, 5, 7, %2, top, top_linesize, bottom, bottom_linesize, dst, end, x
|
||||||
|
%define dst_linesizeq r5mp
|
||||||
|
%define widthq r6mp
|
||||||
|
%endif
|
||||||
|
mov endd, dword r8m
|
||||||
add topq, widthq
|
add topq, widthq
|
||||||
add bottomq, widthq
|
add bottomq, widthq
|
||||||
add dstq, widthq
|
add dstq, widthq
|
||||||
sub endq, startq
|
sub endd, dword r7m ; start
|
||||||
neg widthq
|
neg widthq
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
@ -54,15 +61,14 @@ REP_RET
|
|||||||
%macro BLEND_SIMPLE 2
|
%macro BLEND_SIMPLE 2
|
||||||
BLEND_INIT %1, 2
|
BLEND_INIT %1, 2
|
||||||
.nextrow:
|
.nextrow:
|
||||||
mov r10q, widthq
|
mov xq, widthq
|
||||||
%define x r10q
|
|
||||||
|
|
||||||
.loop:
|
.loop:
|
||||||
movu m0, [topq + x]
|
movu m0, [topq + xq]
|
||||||
movu m1, [bottomq + x]
|
movu m1, [bottomq + xq]
|
||||||
p%2 m0, m1
|
p%2 m0, m1
|
||||||
mova [dstq + x], m0
|
mova [dstq + xq], m0
|
||||||
add r10q, mmsize
|
add xq, mmsize
|
||||||
jl .loop
|
jl .loop
|
||||||
BLEND_END
|
BLEND_END
|
||||||
%endmacro
|
%endmacro
|
||||||
@ -80,38 +86,36 @@ BLEND_INIT difference128, 4
|
|||||||
pxor m2, m2
|
pxor m2, m2
|
||||||
mova m3, [pw_128]
|
mova m3, [pw_128]
|
||||||
.nextrow:
|
.nextrow:
|
||||||
mov r10q, widthq
|
mov xq, widthq
|
||||||
%define x r10q
|
|
||||||
|
|
||||||
.loop:
|
.loop:
|
||||||
movh m0, [topq + x]
|
movh m0, [topq + xq]
|
||||||
movh m1, [bottomq + x]
|
movh m1, [bottomq + xq]
|
||||||
punpcklbw m0, m2
|
punpcklbw m0, m2
|
||||||
punpcklbw m1, m2
|
punpcklbw m1, m2
|
||||||
paddw m0, m3
|
paddw m0, m3
|
||||||
psubw m0, m1
|
psubw m0, m1
|
||||||
packuswb m0, m0
|
packuswb m0, m0
|
||||||
movh [dstq + x], m0
|
movh [dstq + xq], m0
|
||||||
add r10q, mmsize / 2
|
add xq, mmsize / 2
|
||||||
jl .loop
|
jl .loop
|
||||||
BLEND_END
|
BLEND_END
|
||||||
|
|
||||||
BLEND_INIT average, 3
|
BLEND_INIT average, 3
|
||||||
pxor m2, m2
|
pxor m2, m2
|
||||||
.nextrow:
|
.nextrow:
|
||||||
mov r10q, widthq
|
mov xq, widthq
|
||||||
%define x r10q
|
|
||||||
|
|
||||||
.loop:
|
.loop:
|
||||||
movh m0, [topq + x]
|
movh m0, [topq + xq]
|
||||||
movh m1, [bottomq + x]
|
movh m1, [bottomq + xq]
|
||||||
punpcklbw m0, m2
|
punpcklbw m0, m2
|
||||||
punpcklbw m1, m2
|
punpcklbw m1, m2
|
||||||
paddw m0, m1
|
paddw m0, m1
|
||||||
psrlw m0, 1
|
psrlw m0, 1
|
||||||
packuswb m0, m0
|
packuswb m0, m0
|
||||||
movh [dstq + x], m0
|
movh [dstq + xq], m0
|
||||||
add r10q, mmsize / 2
|
add xq, mmsize / 2
|
||||||
jl .loop
|
jl .loop
|
||||||
BLEND_END
|
BLEND_END
|
||||||
|
|
||||||
@ -119,19 +123,18 @@ BLEND_INIT addition128, 4
|
|||||||
pxor m2, m2
|
pxor m2, m2
|
||||||
mova m3, [pw_128]
|
mova m3, [pw_128]
|
||||||
.nextrow:
|
.nextrow:
|
||||||
mov r10q, widthq
|
mov xq, widthq
|
||||||
%define x r10q
|
|
||||||
|
|
||||||
.loop:
|
.loop:
|
||||||
movh m0, [topq + x]
|
movh m0, [topq + xq]
|
||||||
movh m1, [bottomq + x]
|
movh m1, [bottomq + xq]
|
||||||
punpcklbw m0, m2
|
punpcklbw m0, m2
|
||||||
punpcklbw m1, m2
|
punpcklbw m1, m2
|
||||||
paddw m0, m1
|
paddw m0, m1
|
||||||
psubw m0, m3
|
psubw m0, m3
|
||||||
packuswb m0, m0
|
packuswb m0, m0
|
||||||
movh [dstq + x], m0
|
movh [dstq + xq], m0
|
||||||
add r10q, mmsize / 2
|
add xq, mmsize / 2
|
||||||
jl .loop
|
jl .loop
|
||||||
BLEND_END
|
BLEND_END
|
||||||
|
|
||||||
@ -140,38 +143,36 @@ BLEND_INIT hardmix, 5
|
|||||||
mova m3, [pb_128]
|
mova m3, [pb_128]
|
||||||
mova m4, [pb_127]
|
mova m4, [pb_127]
|
||||||
.nextrow:
|
.nextrow:
|
||||||
mov r10q, widthq
|
mov xq, widthq
|
||||||
%define x r10q
|
|
||||||
|
|
||||||
.loop:
|
.loop:
|
||||||
movu m0, [topq + x]
|
movu m0, [topq + xq]
|
||||||
movu m1, [bottomq + x]
|
movu m1, [bottomq + xq]
|
||||||
pxor m1, m4
|
pxor m1, m4
|
||||||
pxor m0, m3
|
pxor m0, m3
|
||||||
pcmpgtb m1, m0
|
pcmpgtb m1, m0
|
||||||
pxor m1, m2
|
pxor m1, m2
|
||||||
mova [dstq + x], m1
|
mova [dstq + xq], m1
|
||||||
add r10q, mmsize
|
add xq, mmsize
|
||||||
jl .loop
|
jl .loop
|
||||||
BLEND_END
|
BLEND_END
|
||||||
|
|
||||||
BLEND_INIT phoenix, 4
|
BLEND_INIT phoenix, 4
|
||||||
mova m3, [pb_255]
|
mova m3, [pb_255]
|
||||||
.nextrow:
|
.nextrow:
|
||||||
mov r10q, widthq
|
mov xq, widthq
|
||||||
%define x r10q
|
|
||||||
|
|
||||||
.loop:
|
.loop:
|
||||||
movu m0, [topq + x]
|
movu m0, [topq + xq]
|
||||||
movu m1, [bottomq + x]
|
movu m1, [bottomq + xq]
|
||||||
mova m2, m0
|
mova m2, m0
|
||||||
pminub m0, m1
|
pminub m0, m1
|
||||||
pmaxub m1, m2
|
pmaxub m1, m2
|
||||||
mova m2, m3
|
mova m2, m3
|
||||||
psubusb m2, m1
|
psubusb m2, m1
|
||||||
paddusb m2, m0
|
paddusb m2, m0
|
||||||
mova [dstq + x], m2
|
mova [dstq + xq], m2
|
||||||
add r10q, mmsize
|
add xq, mmsize
|
||||||
jl .loop
|
jl .loop
|
||||||
BLEND_END
|
BLEND_END
|
||||||
|
|
||||||
@ -179,19 +180,18 @@ INIT_XMM ssse3
|
|||||||
BLEND_INIT difference, 3
|
BLEND_INIT difference, 3
|
||||||
pxor m2, m2
|
pxor m2, m2
|
||||||
.nextrow:
|
.nextrow:
|
||||||
mov r10q, widthq
|
mov xq, widthq
|
||||||
%define x r10q
|
|
||||||
|
|
||||||
.loop:
|
.loop:
|
||||||
movh m0, [topq + x]
|
movh m0, [topq + xq]
|
||||||
movh m1, [bottomq + x]
|
movh m1, [bottomq + xq]
|
||||||
punpcklbw m0, m2
|
punpcklbw m0, m2
|
||||||
punpcklbw m1, m2
|
punpcklbw m1, m2
|
||||||
psubw m0, m1
|
psubw m0, m1
|
||||||
pabsw m0, m0
|
pabsw m0, m0
|
||||||
packuswb m0, m0
|
packuswb m0, m0
|
||||||
movh [dstq + x], m0
|
movh [dstq + xq], m0
|
||||||
add r10q, mmsize / 2
|
add xq, mmsize / 2
|
||||||
jl .loop
|
jl .loop
|
||||||
BLEND_END
|
BLEND_END
|
||||||
|
|
||||||
@ -199,12 +199,11 @@ BLEND_INIT negation, 5
|
|||||||
pxor m2, m2
|
pxor m2, m2
|
||||||
mova m4, [pw_255]
|
mova m4, [pw_255]
|
||||||
.nextrow:
|
.nextrow:
|
||||||
mov r10q, widthq
|
mov xq, widthq
|
||||||
%define x r10q
|
|
||||||
|
|
||||||
.loop:
|
.loop:
|
||||||
movh m0, [topq + x]
|
movh m0, [topq + xq]
|
||||||
movh m1, [bottomq + x]
|
movh m1, [bottomq + xq]
|
||||||
punpcklbw m0, m2
|
punpcklbw m0, m2
|
||||||
punpcklbw m1, m2
|
punpcklbw m1, m2
|
||||||
mova m3, m4
|
mova m3, m4
|
||||||
@ -214,9 +213,7 @@ BLEND_INIT negation, 5
|
|||||||
mova m0, m4
|
mova m0, m4
|
||||||
psubw m0, m3
|
psubw m0, m3
|
||||||
packuswb m0, m0
|
packuswb m0, m0
|
||||||
movh [dstq + x], m0
|
movh [dstq + xq], m0
|
||||||
add r10q, mmsize / 2
|
add xq, mmsize / 2
|
||||||
jl .loop
|
jl .loop
|
||||||
BLEND_END
|
BLEND_END
|
||||||
|
|
||||||
%endif
|
|
||||||
|
@ -49,7 +49,7 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
|
|||||||
{
|
{
|
||||||
int cpu_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags) && param->opacity == 1 && !is_16bit) {
|
if (EXTERNAL_SSE2(cpu_flags) && param->opacity == 1 && !is_16bit) {
|
||||||
switch (param->mode) {
|
switch (param->mode) {
|
||||||
case BLEND_ADDITION: param->blend = ff_blend_addition_sse2; break;
|
case BLEND_ADDITION: param->blend = ff_blend_addition_sse2; break;
|
||||||
case BLEND_ADDITION128: param->blend = ff_blend_addition128_sse2; break;
|
case BLEND_ADDITION128: param->blend = ff_blend_addition128_sse2; break;
|
||||||
@ -65,7 +65,7 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
|
|||||||
case BLEND_XOR: param->blend = ff_blend_xor_sse2; break;
|
case BLEND_XOR: param->blend = ff_blend_xor_sse2; break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (ARCH_X86_64 && EXTERNAL_SSSE3(cpu_flags) && param->opacity == 1 && !is_16bit) {
|
if (EXTERNAL_SSSE3(cpu_flags) && param->opacity == 1 && !is_16bit) {
|
||||||
switch (param->mode) {
|
switch (param->mode) {
|
||||||
case BLEND_DIFFERENCE: param->blend = ff_blend_difference_ssse3; break;
|
case BLEND_DIFFERENCE: param->blend = ff_blend_difference_ssse3; break;
|
||||||
case BLEND_NEGATION: param->blend = ff_blend_negation_ssse3; break;
|
case BLEND_NEGATION: param->blend = ff_blend_negation_ssse3; break;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user