diff --git a/libavcodec/me_cmp.c b/libavcodec/me_cmp.c index 6639b919ff..5e34a11593 100644 --- a/libavcodec/me_cmp.c +++ b/libavcodec/me_cmp.c @@ -628,7 +628,7 @@ static int dct_sad8x8_c(MpegEncContext *s, uint8_t *src1, av_assert2(h == 8); - s->pdsp.diff_pixels(temp, src1, src2, stride); + s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride); s->fdsp.fdct(temp); return s->mecc.sum_abs_dctelem(temp); } @@ -668,7 +668,7 @@ static int dct264_sad8x8_c(MpegEncContext *s, uint8_t *src1, int16_t dct[8][8]; int i, sum = 0; - s->pdsp.diff_pixels(dct[0], src1, src2, stride); + s->pdsp.diff_pixels_unaligned(dct[0], src1, src2, stride); #define SRC(x) dct[i][x] #define DST(x, v) dct[i][x] = v @@ -695,7 +695,7 @@ static int dct_max8x8_c(MpegEncContext *s, uint8_t *src1, av_assert2(h == 8); - s->pdsp.diff_pixels(temp, src1, src2, stride); + s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride); s->fdsp.fdct(temp); for (i = 0; i < 64; i++) @@ -714,7 +714,7 @@ static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1, av_assert2(h == 8); s->mb_intra = 0; - s->pdsp.diff_pixels(temp, src1, src2, stride); + s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride); memcpy(bak, temp, 64 * sizeof(int16_t)); @@ -817,7 +817,7 @@ static int bit8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2, av_assert2(h == 8); - s->pdsp.diff_pixels(temp, src1, src2, stride); + s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride); s->block_last_index[0 /* FIXME */] = last = diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c index 417c944e00..50e1d1d735 100644 --- a/libavcodec/pixblockdsp.c +++ b/libavcodec/pixblockdsp.c @@ -82,6 +82,7 @@ av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx) { const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8; + c->diff_pixels_unaligned = c->diff_pixels = diff_pixels_c; switch (avctx->bits_per_raw_sample) { diff --git a/libavcodec/pixblockdsp.h b/libavcodec/pixblockdsp.h index 3ba1596a88..e036700ff0 100644 --- a/libavcodec/pixblockdsp.h +++ b/libavcodec/pixblockdsp.h @@ -33,6 +33,11 @@ typedef struct PixblockDSPContext { const uint8_t *s1 /* align 8 */, const uint8_t *s2 /* align 8 */, ptrdiff_t stride); + void (*diff_pixels_unaligned)(int16_t *av_restrict block /* align 16 */, + const uint8_t *s1, + const uint8_t *s2, + ptrdiff_t stride); + } PixblockDSPContext; void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx); diff --git a/libavcodec/x86/pixblockdsp_init.c b/libavcodec/x86/pixblockdsp_init.c index fa9578a2d3..ade55e01a3 100644 --- a/libavcodec/x86/pixblockdsp_init.c +++ b/libavcodec/x86/pixblockdsp_init.c @@ -39,12 +39,14 @@ av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c, if (EXTERNAL_MMX(cpu_flags)) { if (!high_bit_depth) c->get_pixels = ff_get_pixels_mmx; + c->diff_pixels_unaligned = c->diff_pixels = ff_diff_pixels_mmx; } if (EXTERNAL_SSE2(cpu_flags)) { if (!high_bit_depth) c->get_pixels = ff_get_pixels_sse2; + c->diff_pixels_unaligned = c->diff_pixels = ff_diff_pixels_sse2; } }