You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-11-23 21:54:53 +02:00
avcodec/x86/simple_idct: Port to SSE2
Before this commit, the (32-bit only) simple idct came in three versions: A pure MMX IDCT and idct-put and idct-add versions which use SSE2 at the put and add stage, but still use pure MMX for the actual IDCT. This commit ports said IDCT to SSE2; this was entirely trivial for the IDCT1-5 and IDCT7 parts (where one can directly use the full register width) and was easy for IDCT6 and IDCT8 (involving a few movhps and pshufds). Unfortunately, DC_COND_INIT and Z_COND_INIT still use only the lower half of the registers. This saved 4658B here; the benchmarking option of the dct test tool showed a 15% speedup. Reviewed-by: Lynne <dev@lynne.ee> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
@@ -90,7 +90,7 @@ static const struct algo idct_tab_arch[] = {
|
||||
#endif
|
||||
#else
|
||||
#if HAVE_SSE2_EXTERNAL
|
||||
{ "SIMPLE-SSE2", ff_simple_idct_mmx, FF_IDCT_PERM_SIMPLE, AV_CPU_FLAG_SSE2},
|
||||
{ "SIMPLE-SSE2", ff_simple_idct_sse2, FF_IDCT_PERM_SIMPLE, AV_CPU_FLAG_SSE2},
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -76,7 +76,7 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
|
||||
(avctx->idct_algo == FF_IDCT_AUTO ||
|
||||
avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
|
||||
avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
|
||||
c->idct = ff_simple_idct_mmx;
|
||||
c->idct = ff_simple_idct_sse2;
|
||||
c->idct_put = ff_simple_idct_put_sse2;
|
||||
c->idct_add = ff_simple_idct_add_sse2;
|
||||
c->perm_type = FF_IDCT_PERM_SIMPLE;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -22,10 +22,7 @@
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
void ff_simple_idct_mmx(int16_t *block);
|
||||
void ff_simple_idct_add_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
|
||||
void ff_simple_idct_put_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
|
||||
|
||||
void ff_simple_idct_sse2(int16_t *block);
|
||||
void ff_simple_idct_add_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
|
||||
void ff_simple_idct_put_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user