mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-21 10:55:51 +02:00
mmx 16-bit ssd. 2.3x faster svq1 encoding.
Originally committed as revision 8559 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
0c0d17f3ce
commit
5900637219
@ -3694,6 +3694,14 @@ static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int st
|
||||
return score;
|
||||
}
|
||||
|
||||
static int ssd_int8_vs_int16_c(int8_t *pix1, int16_t *pix2, int size){
|
||||
int score=0;
|
||||
int i;
|
||||
for(i=0; i<size; i++)
|
||||
score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
|
||||
return score;
|
||||
}
|
||||
|
||||
WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
|
||||
WARPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
|
||||
WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
|
||||
@ -4076,6 +4084,8 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
||||
c->w97[1]= w97_8_c;
|
||||
#endif
|
||||
|
||||
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
|
||||
|
||||
c->add_bytes= add_bytes_c;
|
||||
c->diff_bytes= diff_bytes_c;
|
||||
c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
|
||||
|
@ -200,6 +200,8 @@ typedef struct DSPContext {
|
||||
me_cmp_func ildct_cmp[5]; //only width 16 used
|
||||
me_cmp_func frame_skip_cmp[5]; //only width 8 used
|
||||
|
||||
int (*ssd_int8_vs_int16)(int8_t *pix1, int16_t *pix2, int size);
|
||||
|
||||
/**
|
||||
* Halfpel motion compensation with rounding (a+b+1)>>1.
|
||||
* this is an array[4][4] of motion compensation funcions for 4
|
||||
|
@ -1730,6 +1730,38 @@ static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride
|
||||
|
||||
WARPER8_16_SQ(hadamard8_diff_mmx, hadamard8_diff16_mmx)
|
||||
WARPER8_16_SQ(hadamard8_diff_mmx2, hadamard8_diff16_mmx2)
|
||||
|
||||
static int ssd_int8_vs_int16_mmx(int8_t *pix1, int16_t *pix2, int size){
|
||||
int sum;
|
||||
long i=size;
|
||||
asm volatile(
|
||||
"pxor %%mm4, %%mm4 \n"
|
||||
"1: \n"
|
||||
"sub $8, %0 \n"
|
||||
"movq (%2,%0), %%mm2 \n"
|
||||
"movq (%3,%0,2), %%mm0 \n"
|
||||
"movq 8(%3,%0,2), %%mm1 \n"
|
||||
"punpckhbw %%mm2, %%mm3 \n"
|
||||
"punpcklbw %%mm2, %%mm2 \n"
|
||||
"psraw $8, %%mm3 \n"
|
||||
"psraw $8, %%mm2 \n"
|
||||
"psubw %%mm3, %%mm1 \n"
|
||||
"psubw %%mm2, %%mm0 \n"
|
||||
"pmaddwd %%mm1, %%mm1 \n"
|
||||
"pmaddwd %%mm0, %%mm0 \n"
|
||||
"paddd %%mm1, %%mm4 \n"
|
||||
"paddd %%mm0, %%mm4 \n"
|
||||
"jg 1b \n"
|
||||
"movq %%mm4, %%mm3 \n"
|
||||
"psrlq $32, %%mm3 \n"
|
||||
"paddd %%mm3, %%mm4 \n"
|
||||
"movd %%mm4, %1 \n"
|
||||
:"+r"(i), "=r"(sum)
|
||||
:"r"(pix1), "r"(pix2)
|
||||
);
|
||||
return sum;
|
||||
}
|
||||
|
||||
#endif //CONFIG_ENCODERS
|
||||
|
||||
#define put_no_rnd_pixels8_mmx(a,b,c,d) put_pixels8_mmx(a,b,c,d)
|
||||
@ -3215,6 +3247,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
||||
}
|
||||
c->add_8x8basis= add_8x8basis_mmx;
|
||||
|
||||
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx;
|
||||
|
||||
#endif //CONFIG_ENCODERS
|
||||
|
||||
c->h263_v_loop_filter= h263_v_loop_filter_mmx;
|
||||
|
@ -992,15 +992,10 @@ static int encode_block(SVQ1Context *s, uint8_t *src, uint8_t *ref, uint8_t *dec
|
||||
|
||||
for(i=0; i<16; i++){
|
||||
int sum= codebook_sum[stage*16 + i];
|
||||
int sqr=0;
|
||||
int diff, mean, score;
|
||||
int sqr, diff, mean, score;
|
||||
|
||||
vector = codebook + stage*size*16 + i*size;
|
||||
|
||||
for(j=0; j<size; j++){
|
||||
int v= vector[j];
|
||||
sqr += (v - block[stage][j])*(v - block[stage][j]);
|
||||
}
|
||||
sqr = s->dsp.ssd_int8_vs_int16(vector, block[stage], size);
|
||||
diff= block_sum[stage] - sum;
|
||||
mean= (diff + (size>>1)) >> (level+3);
|
||||
assert(mean >-300 && mean<300);
|
||||
|
Loading…
Reference in New Issue
Block a user