You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-15 14:13:16 +02:00
hevcdsp: remove an unneeded variable in the loop filter
beta0 and beta1 will always be the same within a CU Signed-off-by: Mickaël Raulet <mraulet@insa-rennes.fr> cherry picked from commit 4a23d824741a289c7d2d2f2871d1e2621b63fa1b Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
committed by
Michael Niedermayer
parent
ae2f048fd7
commit
d7e162d46b
@@ -340,7 +340,7 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0)
|
|||||||
uint8_t *src;
|
uint8_t *src;
|
||||||
int x, y;
|
int x, y;
|
||||||
int chroma;
|
int chroma;
|
||||||
int c_tc[2], beta[2], tc[2];
|
int c_tc[2], tc[2], beta;
|
||||||
uint8_t no_p[2] = { 0 };
|
uint8_t no_p[2] = { 0 };
|
||||||
uint8_t no_q[2] = { 0 };
|
uint8_t no_q[2] = { 0 };
|
||||||
|
|
||||||
@@ -381,13 +381,11 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0)
|
|||||||
const int bs0 = s->vertical_bs[(x + y * s->bs_width) >> 2];
|
const int bs0 = s->vertical_bs[(x + y * s->bs_width) >> 2];
|
||||||
const int bs1 = s->vertical_bs[(x + (y + 4) * s->bs_width) >> 2];
|
const int bs1 = s->vertical_bs[(x + (y + 4) * s->bs_width) >> 2];
|
||||||
if (bs0 || bs1) {
|
if (bs0 || bs1) {
|
||||||
const int qp0 = (get_qPy(s, x - 1, y) + get_qPy(s, x, y) + 1) >> 1;
|
const int qp = (get_qPy(s, x - 1, y) + get_qPy(s, x, y) + 1) >> 1;
|
||||||
const int qp1 = (get_qPy(s, x - 1, y + 4) + get_qPy(s, x, y + 4) + 1) >> 1;
|
|
||||||
|
|
||||||
beta[0] = betatable[av_clip(qp0 + beta_offset, 0, MAX_QP)];
|
beta = betatable[av_clip(qp + beta_offset, 0, MAX_QP)];
|
||||||
beta[1] = betatable[av_clip(qp1 + beta_offset, 0, MAX_QP)];
|
tc[0] = bs0 ? TC_CALC(qp, bs0) : 0;
|
||||||
tc[0] = bs0 ? TC_CALC(qp0, bs0) : 0;
|
tc[1] = bs1 ? TC_CALC(qp, bs1) : 0;
|
||||||
tc[1] = bs1 ? TC_CALC(qp1, bs1) : 0;
|
|
||||||
src = &s->frame->data[LUMA][y * s->frame->linesize[LUMA] + (x << s->sps->pixel_shift)];
|
src = &s->frame->data[LUMA][y * s->frame->linesize[LUMA] + (x << s->sps->pixel_shift)];
|
||||||
if (pcmf) {
|
if (pcmf) {
|
||||||
no_p[0] = get_pcm(s, x - 1, y);
|
no_p[0] = get_pcm(s, x - 1, y);
|
||||||
@@ -447,16 +445,14 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0)
|
|||||||
const int bs0 = s->horizontal_bs[( x + y * s->bs_width) >> 2];
|
const int bs0 = s->horizontal_bs[( x + y * s->bs_width) >> 2];
|
||||||
const int bs1 = s->horizontal_bs[((x + 4) + y * s->bs_width) >> 2];
|
const int bs1 = s->horizontal_bs[((x + 4) + y * s->bs_width) >> 2];
|
||||||
if (bs0 || bs1) {
|
if (bs0 || bs1) {
|
||||||
const int qp0 = (get_qPy(s, x, y - 1) + get_qPy(s, x, y) + 1) >> 1;
|
const int qp = (get_qPy(s, x, y - 1) + get_qPy(s, x, y) + 1) >> 1;
|
||||||
const int qp1 = (get_qPy(s, x + 4, y - 1) + get_qPy(s, x + 4, y) + 1) >> 1;
|
|
||||||
|
|
||||||
tc_offset = x >= x0 ? cur_tc_offset : left_tc_offset;
|
tc_offset = x >= x0 ? cur_tc_offset : left_tc_offset;
|
||||||
beta_offset = x >= x0 ? cur_beta_offset : left_beta_offset;
|
beta_offset = x >= x0 ? cur_beta_offset : left_beta_offset;
|
||||||
|
|
||||||
beta[0] = betatable[av_clip(qp0 + beta_offset, 0, MAX_QP)];
|
beta = betatable[av_clip(qp + beta_offset, 0, MAX_QP)];
|
||||||
beta[1] = betatable[av_clip(qp1 + beta_offset, 0, MAX_QP)];
|
tc[0] = bs0 ? TC_CALC(qp, bs0) : 0;
|
||||||
tc[0] = bs0 ? TC_CALC(qp0, bs0) : 0;
|
tc[1] = bs1 ? TC_CALC(qp, bs1) : 0;
|
||||||
tc[1] = bs1 ? TC_CALC(qp1, bs1) : 0;
|
|
||||||
src = &s->frame->data[LUMA][y * s->frame->linesize[LUMA] + (x << s->sps->pixel_shift)];
|
src = &s->frame->data[LUMA][y * s->frame->linesize[LUMA] + (x << s->sps->pixel_shift)];
|
||||||
if (pcmf) {
|
if (pcmf) {
|
||||||
no_p[0] = get_pcm(s, x, y - 1);
|
no_p[0] = get_pcm(s, x, y - 1);
|
||||||
|
@@ -97,20 +97,20 @@ typedef struct HEVCDSPContext {
|
|||||||
int ox1, intptr_t mx, intptr_t my, int width);
|
int ox1, intptr_t mx, intptr_t my, int width);
|
||||||
|
|
||||||
void (*hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
|
void (*hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
|
||||||
int *beta, int *tc,
|
int beta, int *tc,
|
||||||
uint8_t *no_p, uint8_t *no_q);
|
uint8_t *no_p, uint8_t *no_q);
|
||||||
void (*hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
|
void (*hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
|
||||||
int *beta, int *tc,
|
int beta, int *tc,
|
||||||
uint8_t *no_p, uint8_t *no_q);
|
uint8_t *no_p, uint8_t *no_q);
|
||||||
void (*hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
|
void (*hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
|
||||||
int *tc, uint8_t *no_p, uint8_t *no_q);
|
int *tc, uint8_t *no_p, uint8_t *no_q);
|
||||||
void (*hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
|
void (*hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
|
||||||
int *tc, uint8_t *no_p, uint8_t *no_q);
|
int *tc, uint8_t *no_p, uint8_t *no_q);
|
||||||
void (*hevc_h_loop_filter_luma_c)(uint8_t *pix, ptrdiff_t stride,
|
void (*hevc_h_loop_filter_luma_c)(uint8_t *pix, ptrdiff_t stride,
|
||||||
int *beta, int *tc,
|
int beta, int *tc,
|
||||||
uint8_t *no_p, uint8_t *no_q);
|
uint8_t *no_p, uint8_t *no_q);
|
||||||
void (*hevc_v_loop_filter_luma_c)(uint8_t *pix, ptrdiff_t stride,
|
void (*hevc_v_loop_filter_luma_c)(uint8_t *pix, ptrdiff_t stride,
|
||||||
int *beta, int *tc,
|
int beta, int *tc,
|
||||||
uint8_t *no_p, uint8_t *no_q);
|
uint8_t *no_p, uint8_t *no_q);
|
||||||
void (*hevc_h_loop_filter_chroma_c)(uint8_t *pix, ptrdiff_t stride,
|
void (*hevc_h_loop_filter_chroma_c)(uint8_t *pix, ptrdiff_t stride,
|
||||||
int *tc, uint8_t *no_p,
|
int *tc, uint8_t *no_p,
|
||||||
|
@@ -1564,7 +1564,7 @@ static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uin
|
|||||||
|
|
||||||
static void FUNC(hevc_loop_filter_luma)(uint8_t *_pix,
|
static void FUNC(hevc_loop_filter_luma)(uint8_t *_pix,
|
||||||
ptrdiff_t _xstride, ptrdiff_t _ystride,
|
ptrdiff_t _xstride, ptrdiff_t _ystride,
|
||||||
int *_beta, int *_tc,
|
int beta, int *_tc,
|
||||||
uint8_t *_no_p, uint8_t *_no_q)
|
uint8_t *_no_p, uint8_t *_no_q)
|
||||||
{
|
{
|
||||||
int d, j;
|
int d, j;
|
||||||
@@ -1572,6 +1572,8 @@ static void FUNC(hevc_loop_filter_luma)(uint8_t *_pix,
|
|||||||
ptrdiff_t xstride = _xstride / sizeof(pixel);
|
ptrdiff_t xstride = _xstride / sizeof(pixel);
|
||||||
ptrdiff_t ystride = _ystride / sizeof(pixel);
|
ptrdiff_t ystride = _ystride / sizeof(pixel);
|
||||||
|
|
||||||
|
beta <<= BIT_DEPTH - 8;
|
||||||
|
|
||||||
for (j = 0; j < 2; j++) {
|
for (j = 0; j < 2; j++) {
|
||||||
const int dp0 = abs(P2 - 2 * P1 + P0);
|
const int dp0 = abs(P2 - 2 * P1 + P0);
|
||||||
const int dq0 = abs(Q2 - 2 * Q1 + Q0);
|
const int dq0 = abs(Q2 - 2 * Q1 + Q0);
|
||||||
@@ -1579,7 +1581,6 @@ static void FUNC(hevc_loop_filter_luma)(uint8_t *_pix,
|
|||||||
const int dq3 = abs(TQ2 - 2 * TQ1 + TQ0);
|
const int dq3 = abs(TQ2 - 2 * TQ1 + TQ0);
|
||||||
const int d0 = dp0 + dq0;
|
const int d0 = dp0 + dq0;
|
||||||
const int d3 = dp3 + dq3;
|
const int d3 = dp3 + dq3;
|
||||||
const int beta = _beta[j] << (BIT_DEPTH - 8);
|
|
||||||
const int tc = _tc[j] << (BIT_DEPTH - 8);
|
const int tc = _tc[j] << (BIT_DEPTH - 8);
|
||||||
const int no_p = _no_p[j];
|
const int no_p = _no_p[j];
|
||||||
const int no_q = _no_q[j];
|
const int no_q = _no_q[j];
|
||||||
@@ -1706,7 +1707,7 @@ static void FUNC(hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
|
static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
|
||||||
int *beta, int *tc, uint8_t *no_p,
|
int beta, int *tc, uint8_t *no_p,
|
||||||
uint8_t *no_q)
|
uint8_t *no_q)
|
||||||
{
|
{
|
||||||
FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel),
|
FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel),
|
||||||
@@ -1714,7 +1715,7 @@ static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
|
static void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
|
||||||
int *beta, int *tc, uint8_t *no_p,
|
int beta, int *tc, uint8_t *no_p,
|
||||||
uint8_t *no_q)
|
uint8_t *no_q)
|
||||||
{
|
{
|
||||||
FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride,
|
FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride,
|
||||||
|
@@ -310,7 +310,7 @@ INIT_XMM sse2
|
|||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
; input in m0 ... m3 and tcs in tc (r2). Output in m1 and m2
|
; input in m0 ... m3 and tcs in r2. Output in m1 and m2
|
||||||
%macro CHROMA_DEBLOCK_BODY 1
|
%macro CHROMA_DEBLOCK_BODY 1
|
||||||
psubw m4, m2, m1; q0 - p0
|
psubw m4, m2, m1; q0 - p0
|
||||||
psubw m5, m0, m3; p1 - q1
|
psubw m5, m0, m3; p1 - q1
|
||||||
@@ -339,7 +339,7 @@ ALIGN 16
|
|||||||
psubw m2, m5; q0 - delta0
|
psubw m2, m5; q0 - delta0
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
; input in m0 ... m7, betas in r2 tcs in r3. Output in m1...m6
|
; input in m0 ... m7, beta in r2 tcs in r3. Output in m1...m6
|
||||||
%macro LUMA_DEBLOCK_BODY 2
|
%macro LUMA_DEBLOCK_BODY 2
|
||||||
psllw m9, m2, 1; *2
|
psllw m9, m2, 1; *2
|
||||||
psubw m10, m1, m9
|
psubw m10, m1, m9
|
||||||
@@ -352,20 +352,11 @@ ALIGN 16
|
|||||||
ABS1 m11, m13 ; 0dq0, 0dq3 , 1dq0, 1dq3
|
ABS1 m11, m13 ; 0dq0, 0dq3 , 1dq0, 1dq3
|
||||||
|
|
||||||
;beta calculations
|
;beta calculations
|
||||||
mov r11, [betaq];
|
|
||||||
%if %1 > 8
|
%if %1 > 8
|
||||||
shl r11, %1 - 8
|
shl betaq, %1 - 8
|
||||||
%endif
|
%endif
|
||||||
movd m13, r11d; beta0
|
movd m13, betaq
|
||||||
add betaq, 4;
|
SPLATW m13, m13, 0
|
||||||
punpcklwd m13, m13
|
|
||||||
mov r12, [betaq];
|
|
||||||
%if %1 > 8
|
|
||||||
shl r12, %1 - 8
|
|
||||||
%endif
|
|
||||||
movd m14, r12d; beta1
|
|
||||||
punpcklwd m14, m14
|
|
||||||
pshufd m13, m14, 0; beta0, beta1
|
|
||||||
;end beta calculations
|
;end beta calculations
|
||||||
|
|
||||||
paddw m9, m10, m11; 0d0, 0d3 , 1d0, 1d3
|
paddw m9, m10, m11; 0d0, 0d3 , 1d0, 1d3
|
||||||
@@ -412,31 +403,31 @@ ALIGN 16
|
|||||||
; end calc for weak filter
|
; end calc for weak filter
|
||||||
|
|
||||||
; filtering mask
|
; filtering mask
|
||||||
mov r2, r13
|
mov r11, r13
|
||||||
shr r2, 3
|
shr r11, 3
|
||||||
movd m15, r2d
|
movd m15, r11d
|
||||||
and r13, 1
|
and r13, 1
|
||||||
movd m11, r13d
|
movd m11, r13d
|
||||||
shufps m11, m15, 0
|
shufps m11, m15, 0
|
||||||
shl r2, 1
|
shl r11, 1
|
||||||
or r13, r2
|
or r13, r11
|
||||||
|
|
||||||
pcmpeqd m11, [pd_1]; filtering mask
|
pcmpeqd m11, [pd_1]; filtering mask
|
||||||
|
|
||||||
;decide between strong and weak filtering
|
;decide between strong and weak filtering
|
||||||
;tc25 calculations
|
;tc25 calculations
|
||||||
mov r2d, [tcq];
|
mov r11d, [tcq];
|
||||||
%if %1 > 8
|
%if %1 > 8
|
||||||
shl r2, %1 - 8
|
shl r11, %1 - 8
|
||||||
%endif
|
%endif
|
||||||
movd m8, r2d; tc0
|
movd m8, r11d; tc0
|
||||||
add tcq, 4;
|
add tcq, 4;
|
||||||
mov r3d, [tcq];
|
mov r3d, [tcq];
|
||||||
%if %1 > 8
|
%if %1 > 8
|
||||||
shl r3, %1 - 8
|
shl r3, %1 - 8
|
||||||
%endif
|
%endif
|
||||||
movd m9, r3d; tc1
|
movd m9, r3d; tc1
|
||||||
add r2d, r3d; tc0 + tc1
|
add r11d, r3d; tc0 + tc1
|
||||||
jz .bypassluma
|
jz .bypassluma
|
||||||
punpcklwd m8, m8
|
punpcklwd m8, m8
|
||||||
punpcklwd m9, m9
|
punpcklwd m9, m9
|
||||||
@@ -460,8 +451,8 @@ ALIGN 16
|
|||||||
|
|
||||||
psraw m13, 3; beta >> 3
|
psraw m13, 3; beta >> 3
|
||||||
pcmpgtw m13, m12;
|
pcmpgtw m13, m12;
|
||||||
movmskps r2, m13;
|
movmskps r11, m13;
|
||||||
and r14, r2; strong mask , beta_2 and beta_3 comparisons
|
and r14, r11; strong mask , beta_2 and beta_3 comparisons
|
||||||
;----beta_3 comparison end-----
|
;----beta_3 comparison end-----
|
||||||
;----tc25 comparison---
|
;----tc25 comparison---
|
||||||
psubw m12, m3, m4; p0 - q0
|
psubw m12, m3, m4; p0 - q0
|
||||||
@@ -471,24 +462,24 @@ ALIGN 16
|
|||||||
pshuflw m12, m12, 0xf0 ;0b11110000;
|
pshuflw m12, m12, 0xf0 ;0b11110000;
|
||||||
|
|
||||||
pcmpgtw m8, m12; tc25 comparisons
|
pcmpgtw m8, m12; tc25 comparisons
|
||||||
movmskps r2, m8;
|
movmskps r11, m8;
|
||||||
and r14, r2; strong mask, beta_2, beta_3 and tc25 comparisons
|
and r14, r11; strong mask, beta_2, beta_3 and tc25 comparisons
|
||||||
;----tc25 comparison end---
|
;----tc25 comparison end---
|
||||||
mov r2, r14;
|
mov r11, r14;
|
||||||
shr r2, 1;
|
shr r11, 1;
|
||||||
and r14, r2; strong mask, bits 2 and 0
|
and r14, r11; strong mask, bits 2 and 0
|
||||||
|
|
||||||
pmullw m14, m9, [pw_m2]; -tc * 2
|
pmullw m14, m9, [pw_m2]; -tc * 2
|
||||||
paddw m9, m9
|
paddw m9, m9
|
||||||
|
|
||||||
and r14, 5; 0b101
|
and r14, 5; 0b101
|
||||||
mov r2, r14; strong mask
|
mov r11, r14; strong mask
|
||||||
shr r14, 2;
|
shr r14, 2;
|
||||||
movd m12, r14d; store to xmm for mask generation
|
movd m12, r14d; store to xmm for mask generation
|
||||||
shl r14, 1
|
shl r14, 1
|
||||||
and r2, 1
|
and r11, 1
|
||||||
movd m10, r2d; store to xmm for mask generation
|
movd m10, r11d; store to xmm for mask generation
|
||||||
or r14, r2; final strong mask, bits 1 and 0
|
or r14, r11; final strong mask, bits 1 and 0
|
||||||
jz .weakfilter
|
jz .weakfilter
|
||||||
|
|
||||||
shufps m10, m12, 0
|
shufps m10, m12, 0
|
||||||
@@ -578,23 +569,18 @@ ALIGN 16
|
|||||||
jz .store
|
jz .store
|
||||||
|
|
||||||
; weak filtering mask
|
; weak filtering mask
|
||||||
mov r2, r14
|
mov r11, r14
|
||||||
shr r2, 1
|
shr r11, 1
|
||||||
movd m12, r2d
|
movd m12, r11d
|
||||||
and r14, 1
|
and r14, 1
|
||||||
movd m11, r14d
|
movd m11, r14d
|
||||||
shufps m11, m12, 0
|
shufps m11, m12, 0
|
||||||
pcmpeqd m11, [pd_1]; filtering mask
|
pcmpeqd m11, [pd_1]; filtering mask
|
||||||
|
|
||||||
mov r13, r11; beta0
|
mov r13, betaq
|
||||||
shr r13, 1;
|
shr r13, 1;
|
||||||
add r11, r13
|
add betaq, r13
|
||||||
shr r11, 3; ((beta0+(beta0>>1))>>3))
|
shr betaq, 3; ((beta + (beta >> 1)) >> 3))
|
||||||
|
|
||||||
mov r13, r12; beta1
|
|
||||||
shr r13, 1;
|
|
||||||
add r12, r13
|
|
||||||
shr r12, 3; ((beta1+(beta1>>1))>>3))
|
|
||||||
|
|
||||||
mova m13, [pw_8]
|
mova m13, [pw_8]
|
||||||
psubw m12, m4, m3 ; q0 - p0
|
psubw m12, m4, m3 ; q0 - p0
|
||||||
@@ -633,11 +619,8 @@ ALIGN 16
|
|||||||
paddw m15, m2; p1'
|
paddw m15, m2; p1'
|
||||||
|
|
||||||
;beta calculations
|
;beta calculations
|
||||||
movd m10, r11d; beta0
|
movd m10, betaq
|
||||||
punpcklwd m10, m10
|
SPLATW m10, m10, 0
|
||||||
movd m13, r12d; beta1
|
|
||||||
punpcklwd m13, m13
|
|
||||||
shufps m10, m13, 0; betax0, betax1
|
|
||||||
|
|
||||||
movd m13, r7d; 1dp0 + 1dp3
|
movd m13, r7d; 1dp0 + 1dp3
|
||||||
movd m8, r8d; 0dp0 + 0dp3
|
movd m8, r8d; 0dp0 + 0dp3
|
||||||
|
Reference in New Issue
Block a user