mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
hevcdsp: split the epel functions by width
This should allow for more efficient SIMD.
This commit is contained in:
parent
1f821750f0
commit
818bfe7f0a
@ -1533,7 +1533,7 @@ static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
|
||||
*/
|
||||
static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
|
||||
ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
|
||||
int x_off, int y_off, int block_w, int block_h)
|
||||
int x_off, int y_off, int block_w, int block_h, int pred_idx)
|
||||
{
|
||||
HEVCLocalContext *lc = &s->HEVClc;
|
||||
uint8_t *src1 = ref->data[1];
|
||||
@ -1571,8 +1571,8 @@ static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
|
||||
|
||||
src1 = lc->edge_emu_buffer + buf_offset1;
|
||||
src1stride = edge_emu_stride;
|
||||
s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
|
||||
block_w, block_h, mx, my, lc->mc_buffer);
|
||||
s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst1, dststride, src1, src1stride,
|
||||
block_h, mx, my, lc->mc_buffer);
|
||||
|
||||
s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
|
||||
edge_emu_stride, src2stride,
|
||||
@ -1583,16 +1583,13 @@ static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
|
||||
src2 = lc->edge_emu_buffer + buf_offset2;
|
||||
src2stride = edge_emu_stride;
|
||||
|
||||
s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
|
||||
block_w, block_h, mx, my,
|
||||
lc->mc_buffer);
|
||||
s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst2, dststride, src2, src2stride,
|
||||
block_h, mx, my, lc->mc_buffer);
|
||||
} else {
|
||||
s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
|
||||
block_w, block_h, mx, my,
|
||||
lc->mc_buffer);
|
||||
s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
|
||||
block_w, block_h, mx, my,
|
||||
lc->mc_buffer);
|
||||
s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst1, dststride, src1, src1stride,
|
||||
block_h, mx, my, lc->mc_buffer);
|
||||
s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst2, dststride, src2, src2stride,
|
||||
block_h, mx, my, lc->mc_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1737,7 +1734,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
|
||||
s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
|
||||
}
|
||||
chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
|
||||
¤t_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
|
||||
¤t_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
|
||||
|
||||
if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
|
||||
(s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
|
||||
@ -1774,7 +1771,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
|
||||
}
|
||||
|
||||
chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
|
||||
¤t_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
|
||||
¤t_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
|
||||
|
||||
if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
|
||||
(s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
|
||||
@ -1816,9 +1813,9 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
|
||||
}
|
||||
|
||||
chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
|
||||
¤t_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
|
||||
¤t_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
|
||||
chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
|
||||
¤t_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
|
||||
¤t_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
|
||||
|
||||
if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
|
||||
(s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
|
||||
|
@ -122,6 +122,12 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
|
||||
hevcdsp->put_hevc_qpel[1][0][i] = FUNC(put_hevc_qpel_v_ ## width, depth); \
|
||||
hevcdsp->put_hevc_qpel[1][1][i] = FUNC(put_hevc_qpel_hv_ ## width, depth); \
|
||||
|
||||
#define EPEL_FUNC(i, width, depth) \
|
||||
hevcdsp->put_hevc_epel[0][0][i] = FUNC(put_hevc_epel_pixels_ ## width, depth); \
|
||||
hevcdsp->put_hevc_epel[0][1][i] = FUNC(put_hevc_epel_h_ ## width, depth); \
|
||||
hevcdsp->put_hevc_epel[1][0][i] = FUNC(put_hevc_epel_v_ ## width, depth); \
|
||||
hevcdsp->put_hevc_epel[1][1][i] = FUNC(put_hevc_epel_hv_ ## width, depth); \
|
||||
|
||||
#define HEVC_DSP(depth) \
|
||||
hevcdsp->put_pcm = FUNC(put_pcm, depth); \
|
||||
hevcdsp->transquant_bypass[0] = FUNC(transquant_bypass4x4, depth); \
|
||||
@ -154,10 +160,14 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
|
||||
QPEL_FUNC(6, 48, depth); \
|
||||
QPEL_FUNC(7, 64, depth); \
|
||||
\
|
||||
hevcdsp->put_hevc_epel[0][0] = FUNC(put_hevc_epel_pixels, depth); \
|
||||
hevcdsp->put_hevc_epel[0][1] = FUNC(put_hevc_epel_h, depth); \
|
||||
hevcdsp->put_hevc_epel[1][0] = FUNC(put_hevc_epel_v, depth); \
|
||||
hevcdsp->put_hevc_epel[1][1] = FUNC(put_hevc_epel_hv, depth); \
|
||||
EPEL_FUNC(0, 2, depth); \
|
||||
EPEL_FUNC(1, 4, depth); \
|
||||
EPEL_FUNC(2, 6, depth); \
|
||||
EPEL_FUNC(3, 8, depth); \
|
||||
EPEL_FUNC(4, 12, depth); \
|
||||
EPEL_FUNC(5, 16, depth); \
|
||||
EPEL_FUNC(6, 24, depth); \
|
||||
EPEL_FUNC(7, 32, depth); \
|
||||
\
|
||||
hevcdsp->put_unweighted_pred = FUNC(put_unweighted_pred, depth); \
|
||||
hevcdsp->put_unweighted_pred_avg = FUNC(put_unweighted_pred_avg, depth); \
|
||||
|
@ -61,9 +61,9 @@ typedef struct HEVCDSPContext {
|
||||
void (*put_hevc_qpel[2][2][8])(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
|
||||
ptrdiff_t srcstride, int height,
|
||||
int mx, int my, int16_t *mcbuffer);
|
||||
void (*put_hevc_epel[2][2])(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
|
||||
ptrdiff_t srcstride, int width, int height,
|
||||
int mx, int my, int16_t *mcbuffer);
|
||||
void (*put_hevc_epel[2][2][8])(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
|
||||
ptrdiff_t srcstride, int height,
|
||||
int mx, int my, int16_t *mcbuffer);
|
||||
|
||||
void (*put_unweighted_pred)(uint8_t *dst, ptrdiff_t dststride, int16_t *src,
|
||||
ptrdiff_t srcstride, int width, int height);
|
||||
|
@ -982,10 +982,10 @@ QPEL(12)
|
||||
QPEL(8)
|
||||
QPEL(4)
|
||||
|
||||
static void FUNC(put_hevc_epel_pixels)(int16_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *_src, ptrdiff_t _srcstride,
|
||||
int width, int height, int mx, int my,
|
||||
int16_t* mcbuffer)
|
||||
static inline void FUNC(put_hevc_epel_pixels)(int16_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *_src, ptrdiff_t _srcstride,
|
||||
int width, int height, int mx, int my,
|
||||
int16_t* mcbuffer)
|
||||
{
|
||||
int x, y;
|
||||
pixel *src = (pixel *)_src;
|
||||
@ -1005,10 +1005,10 @@ static void FUNC(put_hevc_epel_pixels)(int16_t *dst, ptrdiff_t dststride,
|
||||
filter_2 * src[x + stride] + \
|
||||
filter_3 * src[x + 2 * stride])
|
||||
|
||||
static void FUNC(put_hevc_epel_h)(int16_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *_src, ptrdiff_t _srcstride,
|
||||
int width, int height, int mx, int my,
|
||||
int16_t* mcbuffer)
|
||||
static inline void FUNC(put_hevc_epel_h)(int16_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *_src, ptrdiff_t _srcstride,
|
||||
int width, int height, int mx, int my,
|
||||
int16_t* mcbuffer)
|
||||
{
|
||||
int x, y;
|
||||
pixel *src = (pixel *)_src;
|
||||
@ -1026,10 +1026,10 @@ static void FUNC(put_hevc_epel_h)(int16_t *dst, ptrdiff_t dststride,
|
||||
}
|
||||
}
|
||||
|
||||
static void FUNC(put_hevc_epel_v)(int16_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *_src, ptrdiff_t _srcstride,
|
||||
int width, int height, int mx, int my,
|
||||
int16_t* mcbuffer)
|
||||
static inline void FUNC(put_hevc_epel_v)(int16_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *_src, ptrdiff_t _srcstride,
|
||||
int width, int height, int mx, int my,
|
||||
int16_t* mcbuffer)
|
||||
{
|
||||
int x, y;
|
||||
pixel *src = (pixel *)_src;
|
||||
@ -1048,10 +1048,10 @@ static void FUNC(put_hevc_epel_v)(int16_t *dst, ptrdiff_t dststride,
|
||||
}
|
||||
}
|
||||
|
||||
static void FUNC(put_hevc_epel_hv)(int16_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *_src, ptrdiff_t _srcstride,
|
||||
int width, int height, int mx, int my,
|
||||
int16_t* mcbuffer)
|
||||
static inline void FUNC(put_hevc_epel_hv)(int16_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *_src, ptrdiff_t _srcstride,
|
||||
int width, int height, int mx, int my,
|
||||
int16_t* mcbuffer)
|
||||
{
|
||||
int x, y;
|
||||
pixel *src = (pixel *)_src;
|
||||
@ -1087,6 +1087,49 @@ static void FUNC(put_hevc_epel_hv)(int16_t *dst, ptrdiff_t dststride,
|
||||
}
|
||||
}
|
||||
|
||||
#define EPEL(W) \
|
||||
static void FUNC(put_hevc_epel_pixels_ ## W)(int16_t *dst, ptrdiff_t dststride, \
|
||||
uint8_t *src, ptrdiff_t srcstride, \
|
||||
int height, int mx, int my, \
|
||||
int16_t *mcbuffer) \
|
||||
{ \
|
||||
FUNC(put_hevc_epel_pixels)(dst, dststride, src, srcstride, \
|
||||
W, height, mx, my, mcbuffer); \
|
||||
} \
|
||||
static void FUNC(put_hevc_epel_h_ ## W)(int16_t *dst, ptrdiff_t dststride, \
|
||||
uint8_t *src, ptrdiff_t srcstride, \
|
||||
int height, int mx, int my, \
|
||||
int16_t *mcbuffer) \
|
||||
{ \
|
||||
FUNC(put_hevc_epel_h)(dst, dststride, src, srcstride, \
|
||||
W, height, mx, my, mcbuffer); \
|
||||
} \
|
||||
static void FUNC(put_hevc_epel_v_ ## W)(int16_t *dst, ptrdiff_t dststride, \
|
||||
uint8_t *src, ptrdiff_t srcstride, \
|
||||
int height, int mx, int my, \
|
||||
int16_t *mcbuffer) \
|
||||
{ \
|
||||
FUNC(put_hevc_epel_v)(dst, dststride, src, srcstride, \
|
||||
W, height, mx, my, mcbuffer); \
|
||||
} \
|
||||
static void FUNC(put_hevc_epel_hv_ ## W)(int16_t *dst, ptrdiff_t dststride, \
|
||||
uint8_t *src, ptrdiff_t srcstride, \
|
||||
int height, int mx, int my, \
|
||||
int16_t *mcbuffer) \
|
||||
{ \
|
||||
FUNC(put_hevc_epel_hv)(dst, dststride, src, srcstride, \
|
||||
W, height, mx, my, mcbuffer); \
|
||||
}
|
||||
|
||||
EPEL(32)
|
||||
EPEL(24)
|
||||
EPEL(16)
|
||||
EPEL(12)
|
||||
EPEL(8)
|
||||
EPEL(6)
|
||||
EPEL(4)
|
||||
EPEL(2)
|
||||
|
||||
static void FUNC(put_unweighted_pred)(uint8_t *_dst, ptrdiff_t _dststride,
|
||||
int16_t *src, ptrdiff_t srcstride,
|
||||
int width, int height)
|
||||
|
Loading…
Reference in New Issue
Block a user