From bff7feb328d8d3fd234f920cb45e0ebdbdd7b407 Mon Sep 17 00:00:00 2001 From: Christophe Gisquet Date: Sun, 1 Feb 2015 15:13:45 -0300 Subject: [PATCH] x86: hevc/sao: aligned source buffers Usefull for at least band filter, for which: - Band filter call only: 32 64 Before: 16556 54015 After: 16497 52355 - Whole case: 32 64 Before: 37031 103008 After: 32045 93952 --- libavcodec/hevc.c | 6 +++--- libavcodec/x86/hevc_sao.asm | 28 ++++++++++++++-------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c index 7db32f79a7..f24cd8f119 100644 --- a/libavcodec/hevc.c +++ b/libavcodec/hevc.c @@ -284,12 +284,12 @@ static int get_buffer_sao(HEVCContext *s, AVFrame *frame, const HEVCSPS *sps) { int ret, i; - frame->width = s->avctx->coded_width + 2; - frame->height = s->avctx->coded_height + 2; + frame->width = FFALIGN(s->avctx->coded_width + 2, FF_INPUT_BUFFER_PADDING_SIZE); + frame->height = s->avctx->coded_height + 3; if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0) return ret; for (i = 0; frame->data[i]; i++) { - int offset = frame->linesize[i] + (1 << sps->pixel_shift); + int offset = frame->linesize[i] + FF_INPUT_BUFFER_PADDING_SIZE; frame->data[i] += offset; } frame->width = s->avctx->coded_width; diff --git a/libavcodec/x86/hevc_sao.asm b/libavcodec/x86/hevc_sao.asm index 7f36fd0a8b..4c11730282 100644 --- a/libavcodec/x86/hevc_sao.asm +++ b/libavcodec/x86/hevc_sao.asm @@ -104,26 +104,26 @@ align 16 %assign i 0 %rep %2 - movu m13, [srcq + i] + mova m13, [srcq + i] punpcklbw m8, m13, m14 HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m8 punpckhbw m13, m14 HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m13 packuswb m8, m13 - movu [dstq + i], m8 + mova [dstq + i], m8 %assign i i+mmsize %endrep %if %1 == 48 INIT_XMM cpuname - movu m13, [srcq + i] + mova m13, [srcq + i] punpcklbw m8, m13, m14 HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m8 punpckhbw m13, m14 HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m13 packuswb m8, m13 - movu [dstq + i], m8 + mova [dstq + i], m8 %assign i i+16 %endif ; %1 == 48 @@ -143,37 +143,37 @@ cglobal hevc_sao_band_filter_%2_%1, 6, 6, 15, dst, src, dststride, srcstride, of align 16 .loop %if %2 == 8 - movu m8, [srcq] + mova m8, [srcq] HEVC_SAO_BAND_FILTER_COMPUTE %1, m9, m8 CLIPW m8, m14, m13 - movu [dstq], m8 + mova [dstq], m8 %endif %assign i 0 %rep %3 - movu m8, [srcq + i] + mova m8, [srcq + i] HEVC_SAO_BAND_FILTER_COMPUTE %1, m9, m8 CLIPW m8, m14, m13 - movu [dstq + i], m8 + mova [dstq + i], m8 - movu m9, [srcq + i + mmsize] + mova m9, [srcq + i + mmsize] HEVC_SAO_BAND_FILTER_COMPUTE %1, m8, m9 CLIPW m9, m14, m13 - movu [dstq + i + mmsize], m9 + mova [dstq + i + mmsize], m9 %assign i i+mmsize*2 %endrep %if %2 == 48 INIT_XMM cpuname - movu m8, [srcq + i] + mova m8, [srcq + i] HEVC_SAO_BAND_FILTER_COMPUTE %1, m9, m8 CLIPW m8, m14, m13 - movu [dstq + i], m8 + mova [dstq + i], m8 - movu m9, [srcq + i + mmsize] + mova m9, [srcq + i + mmsize] HEVC_SAO_BAND_FILTER_COMPUTE %1, m8, m9 CLIPW m9, m14, m13 - movu [dstq + i + mmsize], m9 + mova [dstq + i + mmsize], m9 %assign i i+32 %endif ; %1 == 48