mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-08 13:22:53 +02:00
x86/simple_idct: add explicit sse2 simple_idct_put/add versions.
These use the mmx IDCT, but sse2 put/add_pixels_clamped implementations. This way we don't need to use the ff_put/add_pixels_clamped function pointers.
This commit is contained in:
parent
2f0591cfa3
commit
e0c205677f
@ -63,27 +63,41 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
|
|||||||
{
|
{
|
||||||
int cpu_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (INLINE_MMX(cpu_flags)) {
|
|
||||||
if (!high_bit_depth &&
|
|
||||||
avctx->lowres == 0 &&
|
|
||||||
(avctx->idct_algo == FF_IDCT_AUTO ||
|
|
||||||
avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
|
|
||||||
avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
|
|
||||||
c->idct_put = ff_simple_idct_put_mmx;
|
|
||||||
c->idct_add = ff_simple_idct_add_mmx;
|
|
||||||
c->idct = ff_simple_idct_mmx;
|
|
||||||
c->perm_type = FF_IDCT_PERM_SIMPLE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (EXTERNAL_MMX(cpu_flags)) {
|
if (EXTERNAL_MMX(cpu_flags)) {
|
||||||
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
|
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
|
||||||
c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
|
c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
|
||||||
c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
|
c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
|
||||||
|
|
||||||
|
if (INLINE_MMX(cpu_flags)) {
|
||||||
|
if (!high_bit_depth &&
|
||||||
|
avctx->lowres == 0 &&
|
||||||
|
(avctx->idct_algo == FF_IDCT_AUTO ||
|
||||||
|
avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
|
||||||
|
avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
|
||||||
|
c->idct_put = ff_simple_idct_put_mmx;
|
||||||
|
c->idct_add = ff_simple_idct_add_mmx;
|
||||||
|
c->idct = ff_simple_idct_mmx;
|
||||||
|
c->perm_type = FF_IDCT_PERM_SIMPLE;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2;
|
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2;
|
||||||
c->put_pixels_clamped = ff_put_pixels_clamped_sse2;
|
c->put_pixels_clamped = ff_put_pixels_clamped_sse2;
|
||||||
c->add_pixels_clamped = ff_add_pixels_clamped_sse2;
|
c->add_pixels_clamped = ff_add_pixels_clamped_sse2;
|
||||||
|
|
||||||
|
if (INLINE_SSE2(cpu_flags)) {
|
||||||
|
if (!high_bit_depth &&
|
||||||
|
avctx->lowres == 0 &&
|
||||||
|
(avctx->idct_algo == FF_IDCT_AUTO ||
|
||||||
|
avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
|
||||||
|
avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
|
||||||
|
c->idct_put = ff_simple_idct_put_sse2;
|
||||||
|
c->idct_add = ff_simple_idct_add_sse2;
|
||||||
|
c->perm_type = FF_IDCT_PERM_SIMPLE;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ARCH_X86_64 && avctx->lowres == 0) {
|
if (ARCH_X86_64 && avctx->lowres == 0) {
|
||||||
|
@ -24,6 +24,7 @@
|
|||||||
#include "libavutil/x86/asm.h"
|
#include "libavutil/x86/asm.h"
|
||||||
|
|
||||||
#include "libavcodec/idctdsp.h"
|
#include "libavcodec/idctdsp.h"
|
||||||
|
#include "libavcodec/x86/idctdsp.h"
|
||||||
|
|
||||||
#include "idctdsp.h"
|
#include "idctdsp.h"
|
||||||
#include "simple_idct.h"
|
#include "simple_idct.h"
|
||||||
@ -907,12 +908,22 @@ void ff_simple_idct_mmx(int16_t *block)
|
|||||||
void ff_simple_idct_put_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
|
void ff_simple_idct_put_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
|
||||||
{
|
{
|
||||||
idct(block);
|
idct(block);
|
||||||
ff_put_pixels_clamped(block, dest, line_size);
|
ff_put_pixels_clamped_mmx(block, dest, line_size);
|
||||||
}
|
}
|
||||||
void ff_simple_idct_add_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
|
void ff_simple_idct_add_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
|
||||||
{
|
{
|
||||||
idct(block);
|
idct(block);
|
||||||
ff_add_pixels_clamped(block, dest, line_size);
|
ff_add_pixels_clamped_mmx(block, dest, line_size);
|
||||||
|
}
|
||||||
|
void ff_simple_idct_put_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
|
||||||
|
{
|
||||||
|
idct(block);
|
||||||
|
ff_put_pixels_clamped_sse2(block, dest, line_size);
|
||||||
|
}
|
||||||
|
void ff_simple_idct_add_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
|
||||||
|
{
|
||||||
|
idct(block);
|
||||||
|
ff_add_pixels_clamped_sse2(block, dest, line_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* HAVE_INLINE_ASM */
|
#endif /* HAVE_INLINE_ASM */
|
||||||
|
@ -26,6 +26,9 @@ void ff_simple_idct_mmx(int16_t *block);
|
|||||||
void ff_simple_idct_add_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
|
void ff_simple_idct_add_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
|
||||||
void ff_simple_idct_put_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
|
void ff_simple_idct_put_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
|
||||||
|
|
||||||
|
void ff_simple_idct_add_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
|
||||||
|
void ff_simple_idct_put_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
|
||||||
|
|
||||||
void ff_simple_idct10_sse2(int16_t *block);
|
void ff_simple_idct10_sse2(int16_t *block);
|
||||||
void ff_simple_idct10_avx(int16_t *block);
|
void ff_simple_idct10_avx(int16_t *block);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user