mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
huffyuvencdsp: move functions only used by huffyuv from lossless_videodsp
Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
parent
5ac1dd8e23
commit
30c1f27299
2
configure
vendored
2
configure
vendored
@ -2430,7 +2430,7 @@ hap_encoder_deps="libsnappy"
|
||||
hap_encoder_select="texturedspenc"
|
||||
hevc_decoder_select="bswapdsp cabac golomb videodsp"
|
||||
huffyuv_decoder_select="bswapdsp huffyuvdsp llviddsp"
|
||||
huffyuv_encoder_select="bswapdsp huffman huffyuvencdsp llviddsp"
|
||||
huffyuv_encoder_select="bswapdsp huffman huffyuvencdsp"
|
||||
iac_decoder_select="imc_decoder"
|
||||
imc_decoder_select="bswapdsp fft mdct sinewin"
|
||||
indeo3_decoder_select="hpeldsp"
|
||||
|
@ -76,7 +76,6 @@ av_cold void ff_huffyuv_common_init(AVCodecContext *avctx)
|
||||
s->flags = avctx->flags;
|
||||
|
||||
ff_bswapdsp_init(&s->bdsp);
|
||||
ff_llviddsp_init(&s->llviddsp, avctx);
|
||||
|
||||
s->width = avctx->width;
|
||||
s->height = avctx->height;
|
||||
|
@ -298,6 +298,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
|
||||
return ret;
|
||||
|
||||
ff_huffyuvdsp_init(&s->hdsp);
|
||||
ff_llviddsp_init(&s->llviddsp, avctx);
|
||||
memset(s->vlc, 0, 4 * sizeof(VLC));
|
||||
|
||||
s->interlaced = avctx->height > 288;
|
||||
|
@ -43,7 +43,7 @@ static inline void diff_bytes(HYuvContext *s, uint8_t *dst,
|
||||
if (s->bps <= 8) {
|
||||
s->hencdsp.diff_bytes(dst, src0, src1, w);
|
||||
} else {
|
||||
s->llviddsp.diff_int16((uint16_t *)dst, (const uint16_t *)src0, (const uint16_t *)src1, s->n - 1, w);
|
||||
s->hencdsp.diff_int16((uint16_t *)dst, (const uint16_t *)src0, (const uint16_t *)src1, s->n - 1, w);
|
||||
}
|
||||
}
|
||||
|
||||
@ -84,7 +84,7 @@ static inline int sub_left_prediction(HYuvContext *s, uint8_t *dst,
|
||||
dst16[i] = temp - left;
|
||||
left = temp;
|
||||
}
|
||||
s->llviddsp.diff_int16(dst16 + 16, src16 + 16, src16 + 15, s->n - 1, w - 16);
|
||||
s->hencdsp.diff_int16(dst16 + 16, src16 + 16, src16 + 15, s->n - 1, w - 16);
|
||||
return src16[w-1];
|
||||
}
|
||||
}
|
||||
@ -158,7 +158,7 @@ static void sub_median_prediction(HYuvContext *s, uint8_t *dst, const uint8_t *s
|
||||
if (s->bps <= 8) {
|
||||
s->hencdsp.sub_hfyu_median_pred(dst, src1, src2, w , left, left_top);
|
||||
} else {
|
||||
s->llviddsp.sub_hfyu_median_pred_int16((uint16_t *)dst, (const uint16_t *)src1, (const uint16_t *)src2, s->n - 1, w , left, left_top);
|
||||
s->hencdsp.sub_hfyu_median_pred_int16((uint16_t *)dst, (const uint16_t *)src1, (const uint16_t *)src2, s->n - 1, w , left, left_top);
|
||||
}
|
||||
}
|
||||
|
||||
@ -217,7 +217,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
|
||||
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
|
||||
|
||||
ff_huffyuv_common_init(avctx);
|
||||
ff_huffyuvencdsp_init(&s->hencdsp);
|
||||
ff_huffyuvencdsp_init(&s->hencdsp, avctx);
|
||||
|
||||
avctx->extradata = av_mallocz(3*MAX_N + 4);
|
||||
if (s->flags&AV_CODEC_FLAG_PASS1) {
|
||||
|
@ -53,6 +53,32 @@ static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||
dst[i + 0] = src1[i + 0] - src2[i + 0];
|
||||
}
|
||||
|
||||
static void diff_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w){
|
||||
long i;
|
||||
#if !HAVE_FAST_UNALIGNED
|
||||
if((long)src2 & (sizeof(long)-1)){
|
||||
for(i=0; i+3<w; i+=4){
|
||||
dst[i+0] = (src1[i+0]-src2[i+0]) & mask;
|
||||
dst[i+1] = (src1[i+1]-src2[i+1]) & mask;
|
||||
dst[i+2] = (src1[i+2]-src2[i+2]) & mask;
|
||||
dst[i+3] = (src1[i+3]-src2[i+3]) & mask;
|
||||
}
|
||||
}else
|
||||
#endif
|
||||
{
|
||||
unsigned long pw_lsb = (mask >> 1) * 0x0001000100010001ULL;
|
||||
unsigned long pw_msb = pw_lsb + 0x0001000100010001ULL;
|
||||
|
||||
for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) {
|
||||
long a = *(long*)(src1+i);
|
||||
long b = *(long*)(src2+i);
|
||||
*(long*)(dst+i) = ((a|pw_msb) - (b&pw_lsb)) ^ ((a^b^pw_msb)&pw_msb);
|
||||
}
|
||||
}
|
||||
for (; i<w; i++)
|
||||
dst[i] = (src1[i] - src2[i]) & mask;
|
||||
}
|
||||
|
||||
static void sub_hfyu_median_pred_c(uint8_t *dst, const uint8_t *src1,
|
||||
const uint8_t *src2, intptr_t w,
|
||||
int *left, int *left_top)
|
||||
@ -74,11 +100,31 @@ static void sub_hfyu_median_pred_c(uint8_t *dst, const uint8_t *src1,
|
||||
*left_top = lt;
|
||||
}
|
||||
|
||||
av_cold void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c)
|
||||
static void sub_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w, int *left, int *left_top){
|
||||
int i;
|
||||
uint16_t l, lt;
|
||||
|
||||
l = *left;
|
||||
lt = *left_top;
|
||||
|
||||
for(i=0; i<w; i++){
|
||||
const int pred = mid_pred(l, src1[i], (l + src1[i] - lt) & mask);
|
||||
lt = src1[i];
|
||||
l = src2[i];
|
||||
dst[i] = (l - pred) & mask;
|
||||
}
|
||||
|
||||
*left = l;
|
||||
*left_top = lt;
|
||||
}
|
||||
|
||||
av_cold void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c, AVCodecContext *avctx)
|
||||
{
|
||||
c->diff_bytes = diff_bytes_c;
|
||||
c->diff_int16 = diff_int16_c;
|
||||
c->sub_hfyu_median_pred = sub_hfyu_median_pred_c;
|
||||
c->sub_hfyu_median_pred_int16 = sub_hfyu_median_pred_int16_c;
|
||||
|
||||
if (ARCH_X86)
|
||||
ff_huffyuvencdsp_init_x86(c);
|
||||
ff_huffyuvencdsp_init_x86(c, avctx);
|
||||
}
|
||||
|
@ -21,11 +21,18 @@
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "avcodec.h"
|
||||
|
||||
typedef struct HuffYUVEncDSPContext {
|
||||
void (*diff_bytes)(uint8_t *dst /* align 16 */,
|
||||
const uint8_t *src1 /* align 16 */,
|
||||
const uint8_t *src2 /* align 1 */,
|
||||
intptr_t w);
|
||||
void (*diff_int16)(uint16_t *dst /* align 16 */,
|
||||
const uint16_t *src1 /* align 16 */,
|
||||
const uint16_t *src2 /* align 1 */,
|
||||
unsigned mask, int w);
|
||||
|
||||
/**
|
||||
* Subtract HuffYUV's variant of median prediction.
|
||||
* Note, this might read from src1[-1], src2[-1].
|
||||
@ -33,9 +40,12 @@ typedef struct HuffYUVEncDSPContext {
|
||||
void (*sub_hfyu_median_pred)(uint8_t *dst, const uint8_t *src1,
|
||||
const uint8_t *src2, intptr_t w,
|
||||
int *left, int *left_top);
|
||||
void (*sub_hfyu_median_pred_int16)(uint16_t *dst, const uint16_t *src1,
|
||||
const uint16_t *src2, unsigned mask,
|
||||
int w, int *left, int *left_top);
|
||||
} HuffYUVEncDSPContext;
|
||||
|
||||
void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c);
|
||||
void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c);
|
||||
void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c, AVCodecContext *avctx);
|
||||
void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c, AVCodecContext *avctx);
|
||||
|
||||
#endif /* AVCODEC_HUFFYUVENCDSP_H */
|
||||
|
@ -92,32 +92,6 @@ static void add_int16_c(uint16_t *dst, const uint16_t *src, unsigned mask, int w
|
||||
dst[i] = (dst[i] + src[i]) & mask;
|
||||
}
|
||||
|
||||
static void diff_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w){
|
||||
long i;
|
||||
#if !HAVE_FAST_UNALIGNED
|
||||
if((long)src2 & (sizeof(long)-1)){
|
||||
for(i=0; i+3<w; i+=4){
|
||||
dst[i+0] = (src1[i+0]-src2[i+0]) & mask;
|
||||
dst[i+1] = (src1[i+1]-src2[i+1]) & mask;
|
||||
dst[i+2] = (src1[i+2]-src2[i+2]) & mask;
|
||||
dst[i+3] = (src1[i+3]-src2[i+3]) & mask;
|
||||
}
|
||||
}else
|
||||
#endif
|
||||
{
|
||||
unsigned long pw_lsb = (mask >> 1) * 0x0001000100010001ULL;
|
||||
unsigned long pw_msb = pw_lsb + 0x0001000100010001ULL;
|
||||
|
||||
for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) {
|
||||
long a = *(long*)(src1+i);
|
||||
long b = *(long*)(src2+i);
|
||||
*(long*)(dst+i) = ((a|pw_msb) - (b&pw_lsb)) ^ ((a^b^pw_msb)&pw_msb);
|
||||
}
|
||||
}
|
||||
for (; i<w; i++)
|
||||
dst[i] = (src1[i] - src2[i]) & mask;
|
||||
}
|
||||
|
||||
static void add_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top){
|
||||
int i;
|
||||
uint16_t l, lt;
|
||||
@ -135,24 +109,6 @@ static void add_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src, con
|
||||
*left_top = lt;
|
||||
}
|
||||
|
||||
static void sub_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w, int *left, int *left_top){
|
||||
int i;
|
||||
uint16_t l, lt;
|
||||
|
||||
l = *left;
|
||||
lt = *left_top;
|
||||
|
||||
for(i=0; i<w; i++){
|
||||
const int pred = mid_pred(l, src1[i], (l + src1[i] - lt) & mask);
|
||||
lt = src1[i];
|
||||
l = src2[i];
|
||||
dst[i] = (l - pred) & mask;
|
||||
}
|
||||
|
||||
*left = l;
|
||||
*left_top = lt;
|
||||
}
|
||||
|
||||
static int add_hfyu_left_pred_int16_c(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned acc){
|
||||
int i;
|
||||
|
||||
@ -180,10 +136,8 @@ void ff_llviddsp_init(LLVidDSPContext *c, AVCodecContext *avctx)
|
||||
c->add_left_pred = add_left_pred_c;
|
||||
|
||||
c->add_int16 = add_int16_c;
|
||||
c->diff_int16= diff_int16_c;
|
||||
c->add_hfyu_left_pred_int16 = add_hfyu_left_pred_int16_c;
|
||||
c->add_hfyu_median_pred_int16 = add_hfyu_median_pred_int16_c;
|
||||
c->sub_hfyu_median_pred_int16 = sub_hfyu_median_pred_int16_c;
|
||||
|
||||
if (ARCH_X86)
|
||||
ff_llviddsp_init_x86(c, avctx);
|
||||
|
@ -35,9 +35,7 @@ typedef struct LLVidDSPContext {
|
||||
intptr_t w, int left);
|
||||
|
||||
void (*add_int16)(uint16_t *dst/*align 16*/, const uint16_t *src/*align 16*/, unsigned mask, int w);
|
||||
void (*diff_int16)(uint16_t *dst/*align 16*/, const uint16_t *src1/*align 16*/, const uint16_t *src2/*align 1*/, unsigned mask, int w);
|
||||
|
||||
void (*sub_hfyu_median_pred_int16)(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w, int *left, int *left_top);
|
||||
void (*add_hfyu_median_pred_int16)(uint16_t *dst, const uint16_t *top, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top);
|
||||
int (*add_hfyu_left_pred_int16)(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned left);
|
||||
} LLVidDSPContext;
|
||||
|
@ -1015,7 +1015,7 @@ FF_DISABLE_DEPRECATION_WARNINGS
|
||||
FF_ENABLE_DEPRECATION_WARNINGS
|
||||
#endif
|
||||
|
||||
ff_huffyuvencdsp_init(&s->hdsp);
|
||||
ff_huffyuvencdsp_init(&s->hdsp, avctx);
|
||||
|
||||
#if FF_API_PRIVATE_OPT
|
||||
FF_DISABLE_DEPRECATION_WARNINGS
|
||||
|
@ -120,7 +120,7 @@ static av_cold int utvideo_encode_init(AVCodecContext *avctx)
|
||||
}
|
||||
|
||||
ff_bswapdsp_init(&c->bdsp);
|
||||
ff_huffyuvencdsp_init(&c->hdsp);
|
||||
ff_huffyuvencdsp_init(&c->hdsp, avctx);
|
||||
|
||||
#if FF_API_PRIVATE_OPT
|
||||
FF_DISABLE_DEPRECATION_WARNINGS
|
||||
|
@ -148,3 +148,116 @@ DIFF_BYTES_PROLOGUE
|
||||
DIFF_BYTES_BODY u, u
|
||||
%undef i
|
||||
%endif
|
||||
|
||||
%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub
|
||||
movd m4, maskd
|
||||
SPLATW m4, m4
|
||||
add wd, wd
|
||||
test wq, 2*mmsize - 1
|
||||
jz %%.tomainloop
|
||||
push tmpq
|
||||
%%.wordloop:
|
||||
sub wq, 2
|
||||
%ifidn %2, add
|
||||
mov tmpw, [srcq+wq]
|
||||
add tmpw, [dstq+wq]
|
||||
%else
|
||||
mov tmpw, [src1q+wq]
|
||||
sub tmpw, [src2q+wq]
|
||||
%endif
|
||||
and tmpw, maskw
|
||||
mov [dstq+wq], tmpw
|
||||
test wq, 2*mmsize - 1
|
||||
jnz %%.wordloop
|
||||
pop tmpq
|
||||
%%.tomainloop:
|
||||
%ifidn %2, add
|
||||
add srcq, wq
|
||||
%else
|
||||
add src1q, wq
|
||||
add src2q, wq
|
||||
%endif
|
||||
add dstq, wq
|
||||
neg wq
|
||||
jz %%.end
|
||||
%%.loop:
|
||||
%ifidn %2, add
|
||||
mov%1 m0, [srcq+wq]
|
||||
mov%1 m1, [dstq+wq]
|
||||
mov%1 m2, [srcq+wq+mmsize]
|
||||
mov%1 m3, [dstq+wq+mmsize]
|
||||
%else
|
||||
mov%1 m0, [src1q+wq]
|
||||
mov%1 m1, [src2q+wq]
|
||||
mov%1 m2, [src1q+wq+mmsize]
|
||||
mov%1 m3, [src2q+wq+mmsize]
|
||||
%endif
|
||||
p%2w m0, m1
|
||||
p%2w m2, m3
|
||||
pand m0, m4
|
||||
pand m2, m4
|
||||
mov%1 [dstq+wq] , m0
|
||||
mov%1 [dstq+wq+mmsize], m2
|
||||
add wq, 2*mmsize
|
||||
jl %%.loop
|
||||
%%.end:
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
%if ARCH_X86_32
|
||||
INIT_MMX mmx
|
||||
cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp
|
||||
INT16_LOOP a, sub
|
||||
%endif
|
||||
|
||||
INIT_XMM sse2
|
||||
cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp
|
||||
test src1q, mmsize-1
|
||||
jnz .unaligned
|
||||
test src2q, mmsize-1
|
||||
jnz .unaligned
|
||||
test dstq, mmsize-1
|
||||
jnz .unaligned
|
||||
INT16_LOOP a, sub
|
||||
.unaligned:
|
||||
INT16_LOOP u, sub
|
||||
|
||||
INIT_MMX mmxext
|
||||
cglobal sub_hfyu_median_pred_int16, 7,7,0, dst, src1, src2, mask, w, left, left_top
|
||||
add wd, wd
|
||||
movd mm7, maskd
|
||||
SPLATW mm7, mm7
|
||||
movq mm0, [src1q]
|
||||
movq mm2, [src2q]
|
||||
psllq mm0, 16
|
||||
psllq mm2, 16
|
||||
movd mm6, [left_topq]
|
||||
por mm0, mm6
|
||||
movd mm6, [leftq]
|
||||
por mm2, mm6
|
||||
xor maskq, maskq
|
||||
.loop:
|
||||
movq mm1, [src1q + maskq]
|
||||
movq mm3, [src2q + maskq]
|
||||
movq mm4, mm2
|
||||
psubw mm2, mm0
|
||||
paddw mm2, mm1
|
||||
pand mm2, mm7
|
||||
movq mm5, mm4
|
||||
pmaxsw mm4, mm1
|
||||
pminsw mm1, mm5
|
||||
pminsw mm4, mm2
|
||||
pmaxsw mm4, mm1
|
||||
psubw mm3, mm4
|
||||
pand mm3, mm7
|
||||
movq [dstq + maskq], mm3
|
||||
add maskq, 8
|
||||
movq mm0, [src1q + maskq - 2]
|
||||
movq mm2, [src2q + maskq - 2]
|
||||
cmp maskq, wq
|
||||
jb .loop
|
||||
movzx maskd, word [src1q + wq - 2]
|
||||
mov [left_topq], maskd
|
||||
movzx maskd, word [src2q + wq - 2]
|
||||
mov [leftq], maskd
|
||||
RET
|
||||
|
@ -24,6 +24,7 @@
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/pixdesc.h"
|
||||
#include "libavutil/x86/asm.h"
|
||||
#include "libavutil/x86/cpu.h"
|
||||
#include "libavcodec/huffyuvencdsp.h"
|
||||
@ -35,6 +36,12 @@ void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||
intptr_t w);
|
||||
void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||
intptr_t w);
|
||||
void ff_diff_int16_mmx (uint16_t *dst, const uint16_t *src1, const uint16_t *src2,
|
||||
unsigned mask, int w);
|
||||
void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2,
|
||||
unsigned mask, int w);
|
||||
void ff_sub_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *src1, const uint16_t *src2,
|
||||
unsigned mask, int w, int *left, int *left_top);
|
||||
|
||||
#if HAVE_INLINE_ASM
|
||||
|
||||
@ -80,12 +87,14 @@ static void sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1,
|
||||
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
|
||||
av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c)
|
||||
av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c, AVCodecContext *avctx)
|
||||
{
|
||||
av_unused int cpu_flags = av_get_cpu_flags();
|
||||
const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt);
|
||||
|
||||
if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) {
|
||||
c->diff_bytes = ff_diff_bytes_mmx;
|
||||
c->diff_int16 = ff_diff_int16_mmx;
|
||||
}
|
||||
|
||||
#if HAVE_INLINE_ASM
|
||||
@ -94,8 +103,13 @@ av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c)
|
||||
}
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
|
||||
if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc && pix_desc->comp[0].depth<16) {
|
||||
c->sub_hfyu_median_pred_int16 = ff_sub_hfyu_median_pred_int16_mmxext;
|
||||
}
|
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
c->diff_bytes = ff_diff_bytes_sse2;
|
||||
c->diff_int16 = ff_diff_int16_sse2;
|
||||
}
|
||||
|
||||
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
|
||||
|
@ -288,25 +288,6 @@ cglobal add_int16, 4,4,5, dst, src, mask, w, tmp
|
||||
.unaligned:
|
||||
INT16_LOOP u, add
|
||||
|
||||
%if ARCH_X86_32
|
||||
INIT_MMX mmx
|
||||
cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp
|
||||
INT16_LOOP a, sub
|
||||
%endif
|
||||
|
||||
INIT_XMM sse2
|
||||
cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp
|
||||
test src1q, mmsize-1
|
||||
jnz .unaligned
|
||||
test src2q, mmsize-1
|
||||
jnz .unaligned
|
||||
test dstq, mmsize-1
|
||||
jnz .unaligned
|
||||
INT16_LOOP a, sub
|
||||
.unaligned:
|
||||
INT16_LOOP u, sub
|
||||
|
||||
|
||||
%macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst alignment (a/u), %2 = src alignment (a/u)
|
||||
add wd, wd
|
||||
add srcq, wq
|
||||
@ -443,42 +424,3 @@ cglobal add_hfyu_median_pred_int16, 7,7,0, dst, top, diff, mask, w, left, left_t
|
||||
movzx r2d, word [topq-2]
|
||||
mov [left_topq], r2d
|
||||
RET
|
||||
|
||||
cglobal sub_hfyu_median_pred_int16, 7,7,0, dst, src1, src2, mask, w, left, left_top
|
||||
add wd, wd
|
||||
movd mm7, maskd
|
||||
SPLATW mm7, mm7
|
||||
movq mm0, [src1q]
|
||||
movq mm2, [src2q]
|
||||
psllq mm0, 16
|
||||
psllq mm2, 16
|
||||
movd mm6, [left_topq]
|
||||
por mm0, mm6
|
||||
movd mm6, [leftq]
|
||||
por mm2, mm6
|
||||
xor maskq, maskq
|
||||
.loop:
|
||||
movq mm1, [src1q + maskq]
|
||||
movq mm3, [src2q + maskq]
|
||||
movq mm4, mm2
|
||||
psubw mm2, mm0
|
||||
paddw mm2, mm1
|
||||
pand mm2, mm7
|
||||
movq mm5, mm4
|
||||
pmaxsw mm4, mm1
|
||||
pminsw mm1, mm5
|
||||
pminsw mm4, mm2
|
||||
pmaxsw mm4, mm1
|
||||
psubw mm3, mm4
|
||||
pand mm3, mm7
|
||||
movq [dstq + maskq], mm3
|
||||
add maskq, 8
|
||||
movq mm0, [src1q + maskq - 2]
|
||||
movq mm2, [src2q + maskq - 2]
|
||||
cmp maskq, wq
|
||||
jb .loop
|
||||
movzx maskd, word [src1q + wq - 2]
|
||||
mov [left_topq], maskd
|
||||
movzx maskd, word [src2q + wq - 2]
|
||||
mov [leftq], maskd
|
||||
RET
|
||||
|
@ -41,12 +41,9 @@ int ff_add_left_pred_sse4(uint8_t *dst, const uint8_t *src,
|
||||
|
||||
void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
|
||||
void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
|
||||
void ff_diff_int16_mmx (uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w);
|
||||
void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w);
|
||||
int ff_add_hfyu_left_pred_int16_ssse3(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned acc);
|
||||
int ff_add_hfyu_left_pred_int16_sse4(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned acc);
|
||||
void ff_add_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *top, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top);
|
||||
void ff_sub_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w, int *left, int *left_top);
|
||||
|
||||
#if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32
|
||||
static void add_median_pred_cmov(uint8_t *dst, const uint8_t *top,
|
||||
@ -98,9 +95,7 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx)
|
||||
|
||||
if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) {
|
||||
c->add_bytes = ff_add_bytes_mmx;
|
||||
|
||||
c->add_int16 = ff_add_int16_mmx;
|
||||
c->diff_int16 = ff_diff_int16_mmx;
|
||||
}
|
||||
|
||||
if (ARCH_X86_32 && EXTERNAL_MMXEXT(cpu_flags)) {
|
||||
@ -111,7 +106,6 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx)
|
||||
|
||||
if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc && pix_desc->comp[0].depth<16) {
|
||||
c->add_hfyu_median_pred_int16 = ff_add_hfyu_median_pred_int16_mmxext;
|
||||
c->sub_hfyu_median_pred_int16 = ff_sub_hfyu_median_pred_int16_mmxext;
|
||||
}
|
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
@ -119,7 +113,6 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx)
|
||||
c->add_median_pred = ff_add_median_pred_sse2;
|
||||
|
||||
c->add_int16 = ff_add_int16_sse2;
|
||||
c->diff_int16 = ff_diff_int16_sse2;
|
||||
}
|
||||
|
||||
if (EXTERNAL_SSSE3(cpu_flags)) {
|
||||
|
Loading…
Reference in New Issue
Block a user