mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
Merge commit '1bd890ad173d79e7906c5e1d06bf0a06cca4519d'
* commit '1bd890ad173d79e7906c5e1d06bf0a06cca4519d': hevc: Separate adding residual to prediction from IDCT This commit should be a noop but isn't because of the following renames: - transform_add → add_residual - transform_skip → dequant - idct_4x4_luma → transform_4x4_luma Merged-by: Clément Bœsch <cboesch@gopro.com>
This commit is contained in:
commit
d0e132bab6
@ -97,7 +97,7 @@ function ff_hevc_idct_32x32_dc_neon_8, export=1
|
|||||||
bx lr
|
bx lr
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
function ff_hevc_transform_add_4x4_neon_8, export=1
|
function ff_hevc_add_residual_4x4_neon_8, export=1
|
||||||
vldm r1, {q0-q1}
|
vldm r1, {q0-q1}
|
||||||
vld1.32 d4[0], [r0], r2
|
vld1.32 d4[0], [r0], r2
|
||||||
vld1.32 d4[1], [r0], r2
|
vld1.32 d4[1], [r0], r2
|
||||||
@ -117,7 +117,7 @@ function ff_hevc_transform_add_4x4_neon_8, export=1
|
|||||||
bx lr
|
bx lr
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
function ff_hevc_transform_add_8x8_neon_8, export=1
|
function ff_hevc_add_residual_8x8_neon_8, export=1
|
||||||
mov r3, #8
|
mov r3, #8
|
||||||
1: subs r3, #1
|
1: subs r3, #1
|
||||||
vld1.16 {q0}, [r1]!
|
vld1.16 {q0}, [r1]!
|
||||||
@ -130,7 +130,7 @@ function ff_hevc_transform_add_8x8_neon_8, export=1
|
|||||||
bx lr
|
bx lr
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
function ff_hevc_transform_add_16x16_neon_8, export=1
|
function ff_hevc_add_residual_16x16_neon_8, export=1
|
||||||
mov r3, #16
|
mov r3, #16
|
||||||
1: subs r3, #1
|
1: subs r3, #1
|
||||||
vld1.16 {q0, q1}, [r1]!
|
vld1.16 {q0, q1}, [r1]!
|
||||||
@ -146,7 +146,7 @@ function ff_hevc_transform_add_16x16_neon_8, export=1
|
|||||||
bx lr
|
bx lr
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
function ff_hevc_transform_add_32x32_neon_8, export=1
|
function ff_hevc_add_residual_32x32_neon_8, export=1
|
||||||
mov r3, #32
|
mov r3, #32
|
||||||
1: subs r3, #1
|
1: subs r3, #1
|
||||||
vldm r1!, {q0-q3}
|
vldm r1!, {q0-q3}
|
||||||
|
@ -34,14 +34,14 @@ void ff_hevc_idct_8x8_dc_neon_8(int16_t *coeffs);
|
|||||||
void ff_hevc_idct_16x16_dc_neon_8(int16_t *coeffs);
|
void ff_hevc_idct_16x16_dc_neon_8(int16_t *coeffs);
|
||||||
void ff_hevc_idct_32x32_dc_neon_8(int16_t *coeffs);
|
void ff_hevc_idct_32x32_dc_neon_8(int16_t *coeffs);
|
||||||
void ff_hevc_transform_luma_4x4_neon_8(int16_t *coeffs);
|
void ff_hevc_transform_luma_4x4_neon_8(int16_t *coeffs);
|
||||||
void ff_hevc_transform_add_4x4_neon_8(uint8_t *_dst, int16_t *coeffs,
|
void ff_hevc_add_residual_4x4_neon_8(uint8_t *_dst, int16_t *coeffs,
|
||||||
ptrdiff_t stride);
|
ptrdiff_t stride);
|
||||||
void ff_hevc_transform_add_8x8_neon_8(uint8_t *_dst, int16_t *coeffs,
|
void ff_hevc_add_residual_8x8_neon_8(uint8_t *_dst, int16_t *coeffs,
|
||||||
ptrdiff_t stride);
|
ptrdiff_t stride);
|
||||||
void ff_hevc_transform_add_16x16_neon_8(uint8_t *_dst, int16_t *coeffs,
|
void ff_hevc_add_residual_16x16_neon_8(uint8_t *_dst, int16_t *coeffs,
|
||||||
ptrdiff_t stride);
|
ptrdiff_t stride);
|
||||||
void ff_hevc_transform_add_32x32_neon_8(uint8_t *_dst, int16_t *coeffs,
|
void ff_hevc_add_residual_32x32_neon_8(uint8_t *_dst, int16_t *coeffs,
|
||||||
ptrdiff_t stride);
|
ptrdiff_t stride);
|
||||||
|
|
||||||
#define PUT_PIXELS(name) \
|
#define PUT_PIXELS(name) \
|
||||||
void name(int16_t *dst, uint8_t *src, \
|
void name(int16_t *dst, uint8_t *src, \
|
||||||
@ -156,11 +156,11 @@ av_cold void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth)
|
|||||||
c->idct_dc[1] = ff_hevc_idct_8x8_dc_neon_8;
|
c->idct_dc[1] = ff_hevc_idct_8x8_dc_neon_8;
|
||||||
c->idct_dc[2] = ff_hevc_idct_16x16_dc_neon_8;
|
c->idct_dc[2] = ff_hevc_idct_16x16_dc_neon_8;
|
||||||
c->idct_dc[3] = ff_hevc_idct_32x32_dc_neon_8;
|
c->idct_dc[3] = ff_hevc_idct_32x32_dc_neon_8;
|
||||||
c->transform_add[0] = ff_hevc_transform_add_4x4_neon_8;
|
c->add_residual[0] = ff_hevc_add_residual_4x4_neon_8;
|
||||||
c->transform_add[1] = ff_hevc_transform_add_8x8_neon_8;
|
c->add_residual[1] = ff_hevc_add_residual_8x8_neon_8;
|
||||||
c->transform_add[2] = ff_hevc_transform_add_16x16_neon_8;
|
c->add_residual[2] = ff_hevc_add_residual_16x16_neon_8;
|
||||||
c->transform_add[3] = ff_hevc_transform_add_32x32_neon_8;
|
c->add_residual[3] = ff_hevc_add_residual_32x32_neon_8;
|
||||||
c->idct_4x4_luma = ff_hevc_transform_luma_4x4_neon_8;
|
c->transform_4x4_luma = ff_hevc_transform_luma_4x4_neon_8;
|
||||||
put_hevc_qpel_neon[1][0] = ff_hevc_put_qpel_v1_neon_8;
|
put_hevc_qpel_neon[1][0] = ff_hevc_put_qpel_v1_neon_8;
|
||||||
put_hevc_qpel_neon[2][0] = ff_hevc_put_qpel_v2_neon_8;
|
put_hevc_qpel_neon[2][0] = ff_hevc_put_qpel_v2_neon_8;
|
||||||
put_hevc_qpel_neon[3][0] = ff_hevc_put_qpel_v3_neon_8;
|
put_hevc_qpel_neon[3][0] = ff_hevc_put_qpel_v3_neon_8;
|
||||||
|
@ -1052,7 +1052,7 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0,
|
|||||||
for (i = 0; i < (size * size); i++) {
|
for (i = 0; i < (size * size); i++) {
|
||||||
coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
|
coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
|
||||||
}
|
}
|
||||||
s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
|
s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1081,7 +1081,7 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0,
|
|||||||
for (i = 0; i < (size * size); i++) {
|
for (i = 0; i < (size * size); i++) {
|
||||||
coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
|
coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
|
||||||
}
|
}
|
||||||
s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
|
s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
|
} else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
|
||||||
|
@ -1476,7 +1476,7 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0,
|
|||||||
FFSWAP(int16_t, coeffs[i], coeffs[16 - i - 1]);
|
FFSWAP(int16_t, coeffs[i], coeffs[16 - i - 1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
s->hevcdsp.transform_skip(coeffs, log2_trafo_size);
|
s->hevcdsp.dequant(coeffs, log2_trafo_size);
|
||||||
|
|
||||||
if (explicit_rdpcm_flag || (s->ps.sps->implicit_rdpcm_enabled_flag &&
|
if (explicit_rdpcm_flag || (s->ps.sps->implicit_rdpcm_enabled_flag &&
|
||||||
lc->cu.pred_mode == MODE_INTRA &&
|
lc->cu.pred_mode == MODE_INTRA &&
|
||||||
@ -1486,7 +1486,7 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0,
|
|||||||
s->hevcdsp.transform_rdpcm(coeffs, log2_trafo_size, mode);
|
s->hevcdsp.transform_rdpcm(coeffs, log2_trafo_size, mode);
|
||||||
}
|
}
|
||||||
} else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 && log2_trafo_size == 2) {
|
} else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 && log2_trafo_size == 2) {
|
||||||
s->hevcdsp.idct_4x4_luma(coeffs);
|
s->hevcdsp.transform_4x4_luma(coeffs);
|
||||||
} else {
|
} else {
|
||||||
int max_xy = FFMAX(last_significant_coeff_x, last_significant_coeff_y);
|
int max_xy = FFMAX(last_significant_coeff_x, last_significant_coeff_y);
|
||||||
if (max_xy == 0)
|
if (max_xy == 0)
|
||||||
@ -1510,7 +1510,7 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0,
|
|||||||
coeffs[i] = coeffs[i] + ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
|
coeffs[i] = coeffs[i] + ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
s->hevcdsp.transform_add[log2_trafo_size-2](dst, coeffs, stride);
|
s->hevcdsp.add_residual[log2_trafo_size-2](dst, coeffs, stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ff_hevc_hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
|
void ff_hevc_hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
|
||||||
|
@ -195,13 +195,13 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
|
|||||||
|
|
||||||
#define HEVC_DSP(depth) \
|
#define HEVC_DSP(depth) \
|
||||||
hevcdsp->put_pcm = FUNC(put_pcm, depth); \
|
hevcdsp->put_pcm = FUNC(put_pcm, depth); \
|
||||||
hevcdsp->transform_add[0] = FUNC(transform_add4x4, depth); \
|
hevcdsp->add_residual[0] = FUNC(add_residual4x4, depth); \
|
||||||
hevcdsp->transform_add[1] = FUNC(transform_add8x8, depth); \
|
hevcdsp->add_residual[1] = FUNC(add_residual8x8, depth); \
|
||||||
hevcdsp->transform_add[2] = FUNC(transform_add16x16, depth); \
|
hevcdsp->add_residual[2] = FUNC(add_residual16x16, depth); \
|
||||||
hevcdsp->transform_add[3] = FUNC(transform_add32x32, depth); \
|
hevcdsp->add_residual[3] = FUNC(add_residual32x32, depth); \
|
||||||
hevcdsp->transform_skip = FUNC(transform_skip, depth); \
|
hevcdsp->dequant = FUNC(dequant, depth); \
|
||||||
hevcdsp->transform_rdpcm = FUNC(transform_rdpcm, depth); \
|
hevcdsp->transform_rdpcm = FUNC(transform_rdpcm, depth); \
|
||||||
hevcdsp->idct_4x4_luma = FUNC(transform_4x4_luma, depth); \
|
hevcdsp->transform_4x4_luma = FUNC(transform_4x4_luma, depth); \
|
||||||
hevcdsp->idct[0] = FUNC(idct_4x4, depth); \
|
hevcdsp->idct[0] = FUNC(idct_4x4, depth); \
|
||||||
hevcdsp->idct[1] = FUNC(idct_8x8, depth); \
|
hevcdsp->idct[1] = FUNC(idct_8x8, depth); \
|
||||||
hevcdsp->idct[2] = FUNC(idct_16x16, depth); \
|
hevcdsp->idct[2] = FUNC(idct_16x16, depth); \
|
||||||
|
@ -46,13 +46,13 @@ typedef struct HEVCDSPContext {
|
|||||||
void (*put_pcm)(uint8_t *_dst, ptrdiff_t _stride, int width, int height,
|
void (*put_pcm)(uint8_t *_dst, ptrdiff_t _stride, int width, int height,
|
||||||
struct GetBitContext *gb, int pcm_bit_depth);
|
struct GetBitContext *gb, int pcm_bit_depth);
|
||||||
|
|
||||||
void (*transform_add[4])(uint8_t *_dst, int16_t *coeffs, ptrdiff_t _stride);
|
void (*add_residual[4])(uint8_t *_dst, int16_t *coeffs, ptrdiff_t _stride);
|
||||||
|
|
||||||
void (*transform_skip)(int16_t *coeffs, int16_t log2_size);
|
void (*dequant)(int16_t *coeffs, int16_t log2_size);
|
||||||
|
|
||||||
void (*transform_rdpcm)(int16_t *coeffs, int16_t log2_size, int mode);
|
void (*transform_rdpcm)(int16_t *coeffs, int16_t log2_size, int mode);
|
||||||
|
|
||||||
void (*idct_4x4_luma)(int16_t *coeffs);
|
void (*transform_4x4_luma)(int16_t *coeffs);
|
||||||
|
|
||||||
void (*idct[4])(int16_t *coeffs, int col_limit);
|
void (*idct[4])(int16_t *coeffs, int col_limit);
|
||||||
|
|
||||||
|
@ -42,8 +42,8 @@ static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int width, int height
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static av_always_inline void FUNC(transquant_bypass)(uint8_t *_dst, int16_t *coeffs,
|
static av_always_inline void FUNC(add_residual)(uint8_t *_dst, int16_t *res,
|
||||||
ptrdiff_t stride, int size)
|
ptrdiff_t stride, int size)
|
||||||
{
|
{
|
||||||
int x, y;
|
int x, y;
|
||||||
pixel *dst = (pixel *)_dst;
|
pixel *dst = (pixel *)_dst;
|
||||||
@ -52,35 +52,35 @@ static av_always_inline void FUNC(transquant_bypass)(uint8_t *_dst, int16_t *coe
|
|||||||
|
|
||||||
for (y = 0; y < size; y++) {
|
for (y = 0; y < size; y++) {
|
||||||
for (x = 0; x < size; x++) {
|
for (x = 0; x < size; x++) {
|
||||||
dst[x] = av_clip_pixel(dst[x] + *coeffs);
|
dst[x] = av_clip_pixel(dst[x] + *res);
|
||||||
coeffs++;
|
res++;
|
||||||
}
|
}
|
||||||
dst += stride;
|
dst += stride;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void FUNC(transform_add4x4)(uint8_t *_dst, int16_t *coeffs,
|
static void FUNC(add_residual4x4)(uint8_t *_dst, int16_t *res,
|
||||||
ptrdiff_t stride)
|
ptrdiff_t stride)
|
||||||
{
|
{
|
||||||
FUNC(transquant_bypass)(_dst, coeffs, stride, 4);
|
FUNC(add_residual)(_dst, res, stride, 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void FUNC(transform_add8x8)(uint8_t *_dst, int16_t *coeffs,
|
static void FUNC(add_residual8x8)(uint8_t *_dst, int16_t *res,
|
||||||
ptrdiff_t stride)
|
ptrdiff_t stride)
|
||||||
{
|
{
|
||||||
FUNC(transquant_bypass)(_dst, coeffs, stride, 8);
|
FUNC(add_residual)(_dst, res, stride, 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void FUNC(transform_add16x16)(uint8_t *_dst, int16_t *coeffs,
|
static void FUNC(add_residual16x16)(uint8_t *_dst, int16_t *res,
|
||||||
ptrdiff_t stride)
|
ptrdiff_t stride)
|
||||||
{
|
{
|
||||||
FUNC(transquant_bypass)(_dst, coeffs, stride, 16);
|
FUNC(add_residual)(_dst, res, stride, 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void FUNC(transform_add32x32)(uint8_t *_dst, int16_t *coeffs,
|
static void FUNC(add_residual32x32)(uint8_t *_dst, int16_t *res,
|
||||||
ptrdiff_t stride)
|
ptrdiff_t stride)
|
||||||
{
|
{
|
||||||
FUNC(transquant_bypass)(_dst, coeffs, stride, 32);
|
FUNC(add_residual)(_dst, res, stride, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -106,13 +106,11 @@ static void FUNC(transform_rdpcm)(int16_t *_coeffs, int16_t log2_size, int mode)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void FUNC(transform_skip)(int16_t *_coeffs, int16_t log2_size)
|
static void FUNC(dequant)(int16_t *coeffs, int16_t log2_size)
|
||||||
{
|
{
|
||||||
int shift = 15 - BIT_DEPTH - log2_size;
|
int shift = 15 - BIT_DEPTH - log2_size;
|
||||||
int x, y;
|
int x, y;
|
||||||
int size = 1 << log2_size;
|
int size = 1 << log2_size;
|
||||||
int16_t *coeffs = _coeffs;
|
|
||||||
|
|
||||||
|
|
||||||
if (shift > 0) {
|
if (shift > 0) {
|
||||||
int offset = 1 << (shift - 1);
|
int offset = 1 << (shift - 1);
|
||||||
@ -134,8 +132,6 @@ static void FUNC(transform_skip)(int16_t *_coeffs, int16_t log2_size)
|
|||||||
|
|
||||||
#define SET(dst, x) (dst) = (x)
|
#define SET(dst, x) (dst) = (x)
|
||||||
#define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift)
|
#define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift)
|
||||||
#define ADD_AND_SCALE(dst, x) \
|
|
||||||
(dst) = av_clip_pixel((dst) + av_clip_int16(((x) + add) >> shift))
|
|
||||||
|
|
||||||
#define TR_4x4_LUMA(dst, src, step, assign) \
|
#define TR_4x4_LUMA(dst, src, step, assign) \
|
||||||
do { \
|
do { \
|
||||||
@ -299,7 +295,6 @@ IDCT_DC(32)
|
|||||||
|
|
||||||
#undef SET
|
#undef SET
|
||||||
#undef SCALE
|
#undef SCALE
|
||||||
#undef ADD_AND_SCALE
|
|
||||||
|
|
||||||
static void FUNC(sao_band_filter)(uint8_t *_dst, uint8_t *_src,
|
static void FUNC(sao_band_filter)(uint8_t *_dst, uint8_t *_src,
|
||||||
ptrdiff_t stride_dst, ptrdiff_t stride_src,
|
ptrdiff_t stride_dst, ptrdiff_t stride_src,
|
||||||
|
@ -437,11 +437,11 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c,
|
|||||||
c->idct_dc[1] = ff_hevc_idct_dc_8x8_msa;
|
c->idct_dc[1] = ff_hevc_idct_dc_8x8_msa;
|
||||||
c->idct_dc[2] = ff_hevc_idct_dc_16x16_msa;
|
c->idct_dc[2] = ff_hevc_idct_dc_16x16_msa;
|
||||||
c->idct_dc[3] = ff_hevc_idct_dc_32x32_msa;
|
c->idct_dc[3] = ff_hevc_idct_dc_32x32_msa;
|
||||||
c->transform_add[0] = ff_hevc_addblk_4x4_msa;
|
c->add_residual[0] = ff_hevc_addblk_4x4_msa;
|
||||||
c->transform_add[1] = ff_hevc_addblk_8x8_msa;
|
c->add_residual[1] = ff_hevc_addblk_8x8_msa;
|
||||||
c->transform_add[2] = ff_hevc_addblk_16x16_msa;
|
c->add_residual[2] = ff_hevc_addblk_16x16_msa;
|
||||||
c->transform_add[3] = ff_hevc_addblk_32x32_msa;
|
c->add_residual[3] = ff_hevc_addblk_32x32_msa;
|
||||||
c->idct_4x4_luma = ff_hevc_idct_luma_4x4_msa;
|
c->transform_4x4_luma = ff_hevc_idct_luma_4x4_msa;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif // #if HAVE_MSA
|
#endif // #if HAVE_MSA
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
; /*
|
; /*
|
||||||
; * Provide SIMD optimizations for transform_add functions for HEVC decoding
|
; * Provide SIMD optimizations for add_residual functions for HEVC decoding
|
||||||
; * Copyright (c) 2014 Pierre-Edouard LEPERE
|
; * Copyright (c) 2014 Pierre-Edouard LEPERE
|
||||||
; *
|
; *
|
||||||
; * This file is part of FFmpeg.
|
; * This file is part of FFmpeg.
|
||||||
@ -52,7 +52,7 @@ cextern pw_1023
|
|||||||
|
|
||||||
INIT_MMX mmxext
|
INIT_MMX mmxext
|
||||||
; void ff_hevc_tranform_add_8_mmxext(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
|
; void ff_hevc_tranform_add_8_mmxext(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
|
||||||
cglobal hevc_transform_add4_8, 3, 4, 6
|
cglobal hevc_add_residual4_8, 3, 4, 6
|
||||||
TR_ADD_MMX_4_8
|
TR_ADD_MMX_4_8
|
||||||
add r1, 16
|
add r1, 16
|
||||||
lea r0, [r0+r2*2]
|
lea r0, [r0+r2*2]
|
||||||
@ -135,8 +135,8 @@ cglobal hevc_transform_add4_8, 3, 4, 6
|
|||||||
|
|
||||||
|
|
||||||
%macro TRANSFORM_ADD_8 0
|
%macro TRANSFORM_ADD_8 0
|
||||||
; void ff_hevc_transform_add8_8_<opt>(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
|
; void ff_hevc_add_residual8_8_<opt>(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
|
||||||
cglobal hevc_transform_add8_8, 3, 4, 8
|
cglobal hevc_add_residual8_8, 3, 4, 8
|
||||||
lea r3, [r2*3]
|
lea r3, [r2*3]
|
||||||
TR_ADD_SSE_8_8
|
TR_ADD_SSE_8_8
|
||||||
add r1, 64
|
add r1, 64
|
||||||
@ -144,8 +144,8 @@ cglobal hevc_transform_add8_8, 3, 4, 8
|
|||||||
TR_ADD_SSE_8_8
|
TR_ADD_SSE_8_8
|
||||||
RET
|
RET
|
||||||
|
|
||||||
; void ff_hevc_transform_add16_8_<opt>(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
|
; void ff_hevc_add_residual16_8_<opt>(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
|
||||||
cglobal hevc_transform_add16_8, 3, 4, 7
|
cglobal hevc_add_residual16_8, 3, 4, 7
|
||||||
pxor m0, m0
|
pxor m0, m0
|
||||||
lea r3, [r2*3]
|
lea r3, [r2*3]
|
||||||
TR_ADD_SSE_16_32_8 0, r0, r0+r2
|
TR_ADD_SSE_16_32_8 0, r0, r0+r2
|
||||||
@ -158,8 +158,8 @@ cglobal hevc_transform_add16_8, 3, 4, 7
|
|||||||
%endrep
|
%endrep
|
||||||
RET
|
RET
|
||||||
|
|
||||||
; void ff_hevc_transform_add32_8_<opt>(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
|
; void ff_hevc_add_residual32_8_<opt>(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
|
||||||
cglobal hevc_transform_add32_8, 3, 4, 7
|
cglobal hevc_add_residual32_8, 3, 4, 7
|
||||||
pxor m0, m0
|
pxor m0, m0
|
||||||
TR_ADD_SSE_16_32_8 0, r0, r0+16
|
TR_ADD_SSE_16_32_8 0, r0, r0+16
|
||||||
TR_ADD_SSE_16_32_8 64, r0+r2, r0+r2+16
|
TR_ADD_SSE_16_32_8 64, r0+r2, r0+r2+16
|
||||||
@ -179,8 +179,8 @@ TRANSFORM_ADD_8
|
|||||||
|
|
||||||
%if HAVE_AVX2_EXTERNAL
|
%if HAVE_AVX2_EXTERNAL
|
||||||
INIT_YMM avx2
|
INIT_YMM avx2
|
||||||
; void ff_hevc_transform_add32_8_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
|
; void ff_hevc_add_residual32_8_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
|
||||||
cglobal hevc_transform_add32_8, 3, 4, 7
|
cglobal hevc_add_residual32_8, 3, 4, 7
|
||||||
pxor m0, m0
|
pxor m0, m0
|
||||||
lea r3, [r2*3]
|
lea r3, [r2*3]
|
||||||
TR_ADD_SSE_16_32_8 0, r0, r0+r2
|
TR_ADD_SSE_16_32_8 0, r0, r0+r2
|
||||||
@ -195,7 +195,7 @@ cglobal hevc_transform_add32_8, 3, 4, 7
|
|||||||
%endif
|
%endif
|
||||||
|
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
; void ff_hevc_transform_add_10(pixel *dst, int16_t *block, int stride)
|
; void ff_hevc_add_residual_10(pixel *dst, int16_t *block, int stride)
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
%macro TR_ADD_SSE_8_10 4
|
%macro TR_ADD_SSE_8_10 4
|
||||||
mova m0, [%4]
|
mova m0, [%4]
|
||||||
@ -310,7 +310,7 @@ cglobal hevc_transform_add32_8, 3, 4, 7
|
|||||||
|
|
||||||
|
|
||||||
INIT_MMX mmxext
|
INIT_MMX mmxext
|
||||||
cglobal hevc_transform_add4_10,3,4, 6
|
cglobal hevc_add_residual4_10,3,4, 6
|
||||||
pxor m2, m2
|
pxor m2, m2
|
||||||
mova m3, [max_pixels_10]
|
mova m3, [max_pixels_10]
|
||||||
TR_ADD_MMX4_10 r0, r2, r1
|
TR_ADD_MMX4_10 r0, r2, r1
|
||||||
@ -320,10 +320,10 @@ cglobal hevc_transform_add4_10,3,4, 6
|
|||||||
RET
|
RET
|
||||||
|
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
; void ff_hevc_transform_add_10(pixel *dst, int16_t *block, int stride)
|
; void ff_hevc_add_residual_10(pixel *dst, int16_t *block, int stride)
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
INIT_XMM sse2
|
INIT_XMM sse2
|
||||||
cglobal hevc_transform_add8_10,3,4,6
|
cglobal hevc_add_residual8_10,3,4,6
|
||||||
pxor m4, m4
|
pxor m4, m4
|
||||||
mova m5, [max_pixels_10]
|
mova m5, [max_pixels_10]
|
||||||
lea r3, [r2*3]
|
lea r3, [r2*3]
|
||||||
@ -334,7 +334,7 @@ cglobal hevc_transform_add8_10,3,4,6
|
|||||||
TR_ADD_SSE_8_10 r0, r2, r3, r1
|
TR_ADD_SSE_8_10 r0, r2, r3, r1
|
||||||
RET
|
RET
|
||||||
|
|
||||||
cglobal hevc_transform_add16_10,3,4,6
|
cglobal hevc_add_residual16_10,3,4,6
|
||||||
pxor m4, m4
|
pxor m4, m4
|
||||||
mova m5, [max_pixels_10]
|
mova m5, [max_pixels_10]
|
||||||
|
|
||||||
@ -346,7 +346,7 @@ cglobal hevc_transform_add16_10,3,4,6
|
|||||||
%endrep
|
%endrep
|
||||||
RET
|
RET
|
||||||
|
|
||||||
cglobal hevc_transform_add32_10,3,4,6
|
cglobal hevc_add_residual32_10,3,4,6
|
||||||
pxor m4, m4
|
pxor m4, m4
|
||||||
mova m5, [max_pixels_10]
|
mova m5, [max_pixels_10]
|
||||||
|
|
||||||
@ -361,7 +361,7 @@ cglobal hevc_transform_add32_10,3,4,6
|
|||||||
%if HAVE_AVX2_EXTERNAL
|
%if HAVE_AVX2_EXTERNAL
|
||||||
INIT_YMM avx2
|
INIT_YMM avx2
|
||||||
|
|
||||||
cglobal hevc_transform_add16_10,3,4,6
|
cglobal hevc_add_residual16_10,3,4,6
|
||||||
pxor m4, m4
|
pxor m4, m4
|
||||||
mova m5, [max_pixels_10]
|
mova m5, [max_pixels_10]
|
||||||
lea r3, [r2*3]
|
lea r3, [r2*3]
|
||||||
@ -374,7 +374,7 @@ cglobal hevc_transform_add16_10,3,4,6
|
|||||||
%endrep
|
%endrep
|
||||||
RET
|
RET
|
||||||
|
|
||||||
cglobal hevc_transform_add32_10,3,4,6
|
cglobal hevc_add_residual32_10,3,4,6
|
||||||
pxor m4, m4
|
pxor m4, m4
|
||||||
mova m5, [max_pixels_10]
|
mova m5, [max_pixels_10]
|
||||||
|
|
||||||
|
@ -239,23 +239,23 @@ WEIGHTING_PROTOTYPES(12, sse4);
|
|||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// TRANSFORM_ADD
|
// TRANSFORM_ADD
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
void ff_hevc_transform_add4_8_mmxext(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
void ff_hevc_add_residual4_8_mmxext(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
||||||
void ff_hevc_transform_add8_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
void ff_hevc_add_residual8_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
||||||
void ff_hevc_transform_add16_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
void ff_hevc_add_residual16_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
||||||
void ff_hevc_transform_add32_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
void ff_hevc_add_residual32_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
||||||
|
|
||||||
void ff_hevc_transform_add8_8_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
void ff_hevc_add_residual8_8_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
||||||
void ff_hevc_transform_add16_8_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
void ff_hevc_add_residual16_8_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
||||||
void ff_hevc_transform_add32_8_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
void ff_hevc_add_residual32_8_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
||||||
|
|
||||||
void ff_hevc_transform_add32_8_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
void ff_hevc_add_residual32_8_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
||||||
|
|
||||||
void ff_hevc_transform_add4_10_mmxext(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
void ff_hevc_add_residual4_10_mmxext(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
||||||
void ff_hevc_transform_add8_10_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
void ff_hevc_add_residual8_10_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
||||||
void ff_hevc_transform_add16_10_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
void ff_hevc_add_residual16_10_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
||||||
void ff_hevc_transform_add32_10_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
void ff_hevc_add_residual32_10_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
||||||
|
|
||||||
void ff_hevc_transform_add16_10_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
void ff_hevc_add_residual16_10_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
||||||
void ff_hevc_transform_add32_10_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
void ff_hevc_add_residual32_10_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
|
||||||
|
|
||||||
#endif // AVCODEC_X86_HEVCDSP_H
|
#endif // AVCODEC_X86_HEVCDSP_H
|
||||||
|
@ -700,7 +700,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
|
|||||||
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||||
c->idct_dc[0] = ff_hevc_idct4x4_dc_8_mmxext;
|
c->idct_dc[0] = ff_hevc_idct4x4_dc_8_mmxext;
|
||||||
c->idct_dc[1] = ff_hevc_idct8x8_dc_8_mmxext;
|
c->idct_dc[1] = ff_hevc_idct8x8_dc_8_mmxext;
|
||||||
c->transform_add[0] = ff_hevc_transform_add4_8_mmxext;
|
c->add_residual[0] = ff_hevc_add_residual4_8_mmxext;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
|
c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
|
||||||
@ -716,9 +716,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
|
|||||||
c->idct_dc[2] = ff_hevc_idct16x16_dc_8_sse2;
|
c->idct_dc[2] = ff_hevc_idct16x16_dc_8_sse2;
|
||||||
c->idct_dc[3] = ff_hevc_idct32x32_dc_8_sse2;
|
c->idct_dc[3] = ff_hevc_idct32x32_dc_8_sse2;
|
||||||
|
|
||||||
c->transform_add[1] = ff_hevc_transform_add8_8_sse2;
|
c->add_residual[1] = ff_hevc_add_residual8_8_sse2;
|
||||||
c->transform_add[2] = ff_hevc_transform_add16_8_sse2;
|
c->add_residual[2] = ff_hevc_add_residual16_8_sse2;
|
||||||
c->transform_add[3] = ff_hevc_transform_add32_8_sse2;
|
c->add_residual[3] = ff_hevc_add_residual32_8_sse2;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSSE3(cpu_flags)) {
|
if (EXTERNAL_SSSE3(cpu_flags)) {
|
||||||
if(ARCH_X86_64) {
|
if(ARCH_X86_64) {
|
||||||
@ -748,9 +748,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
|
|||||||
}
|
}
|
||||||
SAO_BAND_INIT(8, avx);
|
SAO_BAND_INIT(8, avx);
|
||||||
|
|
||||||
c->transform_add[1] = ff_hevc_transform_add8_8_avx;
|
c->add_residual[1] = ff_hevc_add_residual8_8_avx;
|
||||||
c->transform_add[2] = ff_hevc_transform_add16_8_avx;
|
c->add_residual[2] = ff_hevc_add_residual16_8_avx;
|
||||||
c->transform_add[3] = ff_hevc_transform_add32_8_avx;
|
c->add_residual[3] = ff_hevc_add_residual32_8_avx;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_AVX2(cpu_flags)) {
|
if (EXTERNAL_AVX2(cpu_flags)) {
|
||||||
c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_8_avx2;
|
c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_8_avx2;
|
||||||
@ -850,11 +850,11 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
|
|||||||
c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_8_avx2;
|
c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_8_avx2;
|
||||||
c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_8_avx2;
|
c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_8_avx2;
|
||||||
|
|
||||||
c->transform_add[3] = ff_hevc_transform_add32_8_avx2;
|
c->add_residual[3] = ff_hevc_add_residual32_8_avx2;
|
||||||
}
|
}
|
||||||
} else if (bit_depth == 10) {
|
} else if (bit_depth == 10) {
|
||||||
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||||
c->transform_add[0] = ff_hevc_transform_add4_10_mmxext;
|
c->add_residual[0] = ff_hevc_add_residual4_10_mmxext;
|
||||||
c->idct_dc[0] = ff_hevc_idct4x4_dc_10_mmxext;
|
c->idct_dc[0] = ff_hevc_idct4x4_dc_10_mmxext;
|
||||||
c->idct_dc[1] = ff_hevc_idct8x8_dc_10_mmxext;
|
c->idct_dc[1] = ff_hevc_idct8x8_dc_10_mmxext;
|
||||||
}
|
}
|
||||||
@ -872,9 +872,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
|
|||||||
c->idct_dc[2] = ff_hevc_idct16x16_dc_10_sse2;
|
c->idct_dc[2] = ff_hevc_idct16x16_dc_10_sse2;
|
||||||
c->idct_dc[3] = ff_hevc_idct32x32_dc_10_sse2;
|
c->idct_dc[3] = ff_hevc_idct32x32_dc_10_sse2;
|
||||||
|
|
||||||
c->transform_add[1] = ff_hevc_transform_add8_10_sse2;
|
c->add_residual[1] = ff_hevc_add_residual8_10_sse2;
|
||||||
c->transform_add[2] = ff_hevc_transform_add16_10_sse2;
|
c->add_residual[2] = ff_hevc_add_residual16_10_sse2;
|
||||||
c->transform_add[3] = ff_hevc_transform_add32_10_sse2;
|
c->add_residual[3] = ff_hevc_add_residual32_10_sse2;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
|
if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
|
||||||
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
|
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
|
||||||
@ -1053,8 +1053,8 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
|
|||||||
SAO_BAND_INIT(10, avx2);
|
SAO_BAND_INIT(10, avx2);
|
||||||
SAO_EDGE_INIT(10, avx2);
|
SAO_EDGE_INIT(10, avx2);
|
||||||
|
|
||||||
c->transform_add[2] = ff_hevc_transform_add16_10_avx2;
|
c->add_residual[2] = ff_hevc_add_residual16_10_avx2;
|
||||||
c->transform_add[3] = ff_hevc_transform_add32_10_avx2;
|
c->add_residual[3] = ff_hevc_add_residual32_10_avx2;
|
||||||
|
|
||||||
}
|
}
|
||||||
} else if (bit_depth == 12) {
|
} else if (bit_depth == 12) {
|
||||||
|
Loading…
Reference in New Issue
Block a user