You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-10-06 05:47:18 +02:00
avcodec/x86/h264_qpel_10bit: Remove SSE2 "cache64" duplicates
The horizontal 10bit MC SSE2 functions are currently duplicated: They exist both in ordinary form as well as with a "sse2_cache64" suffix. A comment in ff_h264qpel_init_x86() indicates that this is due to older processors not liking accesses that cross cache lines, yet these functions are identical to the non-cache64 functions (apart from the unavoidable changes in the rip-offset). The only difference between these functions and the ordinary ones are that the cache64 ones are created via a special form of the INIT_XMM macro: "INIT_XMM sse2, cache64". This affects the name and apparently defines cpuflags_cache64, yet nothing checks for this, so both versions are identical. So remove the cache64 ones and treat the remaining ones like ordinary SSE2 functions. Reviewed-by: James Almer <jamrial@gmail.com> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
@@ -384,13 +384,10 @@ LUMA_MC_4(10, mc33, mmxext)
|
|||||||
|
|
||||||
LUMA_MC_816(10, mc00, sse2)
|
LUMA_MC_816(10, mc00, sse2)
|
||||||
LUMA_MC_816(10, mc10, sse2)
|
LUMA_MC_816(10, mc10, sse2)
|
||||||
LUMA_MC_816(10, mc10, sse2_cache64)
|
|
||||||
LUMA_MC_816(10, mc10, ssse3_cache64)
|
LUMA_MC_816(10, mc10, ssse3_cache64)
|
||||||
LUMA_MC_816(10, mc20, sse2)
|
LUMA_MC_816(10, mc20, sse2)
|
||||||
LUMA_MC_816(10, mc20, sse2_cache64)
|
|
||||||
LUMA_MC_816(10, mc20, ssse3_cache64)
|
LUMA_MC_816(10, mc20, ssse3_cache64)
|
||||||
LUMA_MC_816(10, mc30, sse2)
|
LUMA_MC_816(10, mc30, sse2)
|
||||||
LUMA_MC_816(10, mc30, sse2_cache64)
|
|
||||||
LUMA_MC_816(10, mc30, ssse3_cache64)
|
LUMA_MC_816(10, mc30, ssse3_cache64)
|
||||||
LUMA_MC_816(10, mc01, sse2)
|
LUMA_MC_816(10, mc01, sse2)
|
||||||
LUMA_MC_816(10, mc11, sse2)
|
LUMA_MC_816(10, mc11, sse2)
|
||||||
@@ -488,9 +485,9 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
|
|||||||
SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_);
|
SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_);
|
||||||
SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_);
|
SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_);
|
||||||
SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_);
|
SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_);
|
||||||
H264_QPEL_FUNCS_10(1, 0, sse2_cache64);
|
H264_QPEL_FUNCS_10(1, 0, sse2);
|
||||||
H264_QPEL_FUNCS_10(2, 0, sse2_cache64);
|
H264_QPEL_FUNCS_10(2, 0, sse2);
|
||||||
H264_QPEL_FUNCS_10(3, 0, sse2_cache64);
|
H264_QPEL_FUNCS_10(3, 0, sse2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -516,17 +513,5 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
|
|||||||
H264_QPEL_FUNCS_10(3, 0, ssse3_cache64);
|
H264_QPEL_FUNCS_10(3, 0, ssse3_cache64);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_AVX(cpu_flags)) {
|
|
||||||
/* AVX implies 64 byte cache lines without the need to avoid unaligned
|
|
||||||
* memory accesses that cross the boundary between two cache lines.
|
|
||||||
* TODO: Port X264_CPU_CACHELINE_32/64 detection from x264 to avoid
|
|
||||||
* having to treat SSE2 functions with such properties as AVX. */
|
|
||||||
if (bit_depth == 10) {
|
|
||||||
H264_QPEL_FUNCS_10(1, 0, sse2);
|
|
||||||
H264_QPEL_FUNCS_10(2, 0, sse2);
|
|
||||||
H264_QPEL_FUNCS_10(3, 0, sse2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@@ -227,8 +227,6 @@ MC00 avg
|
|||||||
%define OP_MOV mova
|
%define OP_MOV mova
|
||||||
INIT_MMX mmxext
|
INIT_MMX mmxext
|
||||||
%1 put, 4
|
%1 put, 4
|
||||||
INIT_XMM sse2, cache64
|
|
||||||
%1 put, 8
|
|
||||||
INIT_XMM ssse3, cache64
|
INIT_XMM ssse3, cache64
|
||||||
%1 put, 8
|
%1 put, 8
|
||||||
INIT_XMM sse2
|
INIT_XMM sse2
|
||||||
@@ -237,8 +235,6 @@ INIT_XMM sse2
|
|||||||
%define OP_MOV AVG_MOV
|
%define OP_MOV AVG_MOV
|
||||||
INIT_MMX mmxext
|
INIT_MMX mmxext
|
||||||
%1 avg, 4
|
%1 avg, 4
|
||||||
INIT_XMM sse2, cache64
|
|
||||||
%1 avg, 8
|
|
||||||
INIT_XMM ssse3, cache64
|
INIT_XMM ssse3, cache64
|
||||||
%1 avg, 8
|
%1 avg, 8
|
||||||
INIT_XMM sse2
|
INIT_XMM sse2
|
||||||
|
Reference in New Issue
Block a user