mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
h264chroma: Change type of stride parameters to ptrdiff_t
This avoids SIMD-optimized functions having to sign-extend their stride argument manually to be able to do pointer arithmetic.
This commit is contained in:
parent
2ec9fa5ec6
commit
e4a94d8b36
@ -28,18 +28,18 @@
|
|||||||
|
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
|
||||||
void ff_put_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride,
|
void ff_put_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
|
||||||
int h, int x, int y);
|
int h, int x, int y);
|
||||||
void ff_put_h264_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride,
|
void ff_put_h264_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
|
||||||
int h, int x, int y);
|
int h, int x, int y);
|
||||||
void ff_put_h264_chroma_mc2_neon(uint8_t *dst, uint8_t *src, int stride,
|
void ff_put_h264_chroma_mc2_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
|
||||||
int h, int x, int y);
|
int h, int x, int y);
|
||||||
|
|
||||||
void ff_avg_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride,
|
void ff_avg_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
|
||||||
int h, int x, int y);
|
int h, int x, int y);
|
||||||
void ff_avg_h264_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride,
|
void ff_avg_h264_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
|
||||||
int h, int x, int y);
|
int h, int x, int y);
|
||||||
void ff_avg_h264_chroma_mc2_neon(uint8_t *dst, uint8_t *src, int stride,
|
void ff_avg_h264_chroma_mc2_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
|
||||||
int h, int x, int y);
|
int h, int x, int y);
|
||||||
|
|
||||||
av_cold void ff_h264chroma_init_aarch64(H264ChromaContext *c, int bit_depth)
|
av_cold void ff_h264chroma_init_aarch64(H264ChromaContext *c, int bit_depth)
|
||||||
|
@ -21,10 +21,9 @@
|
|||||||
|
|
||||||
#include "libavutil/aarch64/asm.S"
|
#include "libavutil/aarch64/asm.S"
|
||||||
|
|
||||||
/* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
|
/* chroma_mc8(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y) */
|
||||||
.macro h264_chroma_mc8 type, codec=h264
|
.macro h264_chroma_mc8 type, codec=h264
|
||||||
function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
|
function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
|
||||||
sxtw x2, w2
|
|
||||||
.ifc \type,avg
|
.ifc \type,avg
|
||||||
mov x8, x0
|
mov x8, x0
|
||||||
.endif
|
.endif
|
||||||
@ -192,10 +191,9 @@ function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
|
|||||||
endfunc
|
endfunc
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
/* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
|
/* chroma_mc4(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y) */
|
||||||
.macro h264_chroma_mc4 type, codec=h264
|
.macro h264_chroma_mc4 type, codec=h264
|
||||||
function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
|
function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
|
||||||
sxtw x2, w2
|
|
||||||
.ifc \type,avg
|
.ifc \type,avg
|
||||||
mov x8, x0
|
mov x8, x0
|
||||||
.endif
|
.endif
|
||||||
@ -359,7 +357,6 @@ endfunc
|
|||||||
|
|
||||||
.macro h264_chroma_mc2 type
|
.macro h264_chroma_mc2 type
|
||||||
function ff_\type\()_h264_chroma_mc2_neon, export=1
|
function ff_\type\()_h264_chroma_mc2_neon, export=1
|
||||||
sxtw x2, w2
|
|
||||||
prfm pldl1strm, [x1]
|
prfm pldl1strm, [x1]
|
||||||
prfm pldl1strm, [x1, x2]
|
prfm pldl1strm, [x1, x2]
|
||||||
orr w7, w4, w5
|
orr w7, w4, w5
|
||||||
|
@ -25,15 +25,15 @@
|
|||||||
|
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
|
||||||
void ff_put_rv40_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, int h,
|
void ff_put_rv40_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
|
||||||
int x, int y);
|
int h, int x, int y);
|
||||||
void ff_put_rv40_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, int h,
|
void ff_put_rv40_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
|
||||||
int x, int y);
|
int h, int x, int y);
|
||||||
|
|
||||||
void ff_avg_rv40_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, int h,
|
void ff_avg_rv40_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
|
||||||
int x, int y);
|
int h, int x, int y);
|
||||||
void ff_avg_rv40_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, int h,
|
void ff_avg_rv40_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
|
||||||
int x, int y);
|
int h, int x, int y);
|
||||||
|
|
||||||
av_cold void ff_rv40dsp_init_aarch64(RV34DSPContext *c)
|
av_cold void ff_rv40dsp_init_aarch64(RV34DSPContext *c)
|
||||||
{
|
{
|
||||||
|
@ -25,14 +25,14 @@
|
|||||||
|
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
|
||||||
void ff_put_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, int h,
|
void ff_put_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
|
||||||
int x, int y);
|
int h, int x, int y);
|
||||||
void ff_avg_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, int h,
|
void ff_avg_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
|
||||||
int x, int y);
|
int h, int x, int y);
|
||||||
void ff_put_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, int h,
|
void ff_put_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
|
||||||
int x, int y);
|
int h, int x, int y);
|
||||||
void ff_avg_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, int h,
|
void ff_avg_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
|
||||||
int x, int y);
|
int h, int x, int y);
|
||||||
|
|
||||||
av_cold void ff_vc1dsp_init_aarch64(VC1DSPContext *dsp)
|
av_cold void ff_vc1dsp_init_aarch64(VC1DSPContext *dsp)
|
||||||
{
|
{
|
||||||
|
@ -26,13 +26,19 @@
|
|||||||
#include "libavutil/arm/cpu.h"
|
#include "libavutil/arm/cpu.h"
|
||||||
#include "libavcodec/h264chroma.h"
|
#include "libavcodec/h264chroma.h"
|
||||||
|
|
||||||
void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
|
void ff_put_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
|
||||||
void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
|
int h, int x, int y);
|
||||||
void ff_put_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
|
void ff_put_h264_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
|
||||||
|
int h, int x, int y);
|
||||||
|
void ff_put_h264_chroma_mc2_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
|
||||||
|
int h, int x, int y);
|
||||||
|
|
||||||
void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
|
void ff_avg_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
|
||||||
void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
|
int h, int x, int y);
|
||||||
void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
|
void ff_avg_h264_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
|
||||||
|
int h, int x, int y);
|
||||||
|
void ff_avg_h264_chroma_mc2_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
|
||||||
|
int h, int x, int y);
|
||||||
|
|
||||||
av_cold void ff_h264chroma_init_arm(H264ChromaContext *c, int bit_depth)
|
av_cold void ff_h264chroma_init_arm(H264ChromaContext *c, int bit_depth)
|
||||||
{
|
{
|
||||||
|
@ -20,7 +20,7 @@
|
|||||||
|
|
||||||
#include "libavutil/arm/asm.S"
|
#include "libavutil/arm/asm.S"
|
||||||
|
|
||||||
/* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
|
/* chroma_mc8(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y) */
|
||||||
.macro h264_chroma_mc8 type, codec=h264
|
.macro h264_chroma_mc8 type, codec=h264
|
||||||
function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
|
function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
|
||||||
push {r4-r7, lr}
|
push {r4-r7, lr}
|
||||||
@ -195,7 +195,7 @@ T cmp r7, #0
|
|||||||
endfunc
|
endfunc
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
/* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
|
/* chroma_mc4(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y) */
|
||||||
.macro h264_chroma_mc4 type, codec=h264
|
.macro h264_chroma_mc4 type, codec=h264
|
||||||
function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
|
function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
|
||||||
push {r4-r7, lr}
|
push {r4-r7, lr}
|
||||||
|
@ -72,14 +72,14 @@ void ff_put_vc1_mspel_mc32_neon(uint8_t *dst, const uint8_t *src,
|
|||||||
void ff_put_vc1_mspel_mc33_neon(uint8_t *dst, const uint8_t *src,
|
void ff_put_vc1_mspel_mc33_neon(uint8_t *dst, const uint8_t *src,
|
||||||
ptrdiff_t stride, int rnd);
|
ptrdiff_t stride, int rnd);
|
||||||
|
|
||||||
void ff_put_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, int h,
|
void ff_put_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
|
||||||
int x, int y);
|
int h, int x, int y);
|
||||||
void ff_avg_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, int h,
|
void ff_avg_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
|
||||||
int x, int y);
|
int h, int x, int y);
|
||||||
void ff_put_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, int h,
|
void ff_put_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
|
||||||
int x, int y);
|
int h, int x, int y);
|
||||||
void ff_avg_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, int h,
|
void ff_avg_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
|
||||||
int x, int y);
|
int h, int x, int y);
|
||||||
|
|
||||||
av_cold void ff_vc1dsp_init_neon(VC1DSPContext *dsp)
|
av_cold void ff_vc1dsp_init_neon(VC1DSPContext *dsp)
|
||||||
{
|
{
|
||||||
|
@ -19,9 +19,10 @@
|
|||||||
#ifndef AVCODEC_H264CHROMA_H
|
#ifndef AVCODEC_H264CHROMA_H
|
||||||
#define AVCODEC_H264CHROMA_H
|
#define AVCODEC_H264CHROMA_H
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
|
typedef void (*h264_chroma_mc_func)(uint8_t *dst /*align 8*/, uint8_t *src /*align 1*/, ptrdiff_t srcStride, int h, int x, int y);
|
||||||
|
|
||||||
typedef struct H264ChromaContext {
|
typedef struct H264ChromaContext {
|
||||||
h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
|
h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
|
||||||
|
@ -20,11 +20,13 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
#include "bit_depth_template.c"
|
#include "bit_depth_template.c"
|
||||||
|
|
||||||
#define H264_CHROMA_MC(OPNAME, OP)\
|
#define H264_CHROMA_MC(OPNAME, OP)\
|
||||||
static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
|
static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst /*align 8*/, uint8_t *_src /*align 1*/, ptrdiff_t stride, int h, int x, int y)\
|
||||||
|
{\
|
||||||
pixel *dst = (pixel*)_dst;\
|
pixel *dst = (pixel*)_dst;\
|
||||||
pixel *src = (pixel*)_src;\
|
pixel *src = (pixel*)_src;\
|
||||||
const int A=(8-x)*(8-y);\
|
const int A=(8-x)*(8-y);\
|
||||||
@ -45,7 +47,7 @@ static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t *
|
|||||||
}\
|
}\
|
||||||
} else if (B + C) {\
|
} else if (B + C) {\
|
||||||
const int E= B+C;\
|
const int E= B+C;\
|
||||||
const int step= C ? stride : 1;\
|
const ptrdiff_t step = C ? stride : 1;\
|
||||||
for(i=0; i<h; i++){\
|
for(i=0; i<h; i++){\
|
||||||
OP(dst[0], (A*src[0] + E*src[step+0]));\
|
OP(dst[0], (A*src[0] + E*src[step+0]));\
|
||||||
OP(dst[1], (A*src[1] + E*src[step+1]));\
|
OP(dst[1], (A*src[1] + E*src[step+1]));\
|
||||||
@ -62,7 +64,8 @@ static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t *
|
|||||||
}\
|
}\
|
||||||
}\
|
}\
|
||||||
\
|
\
|
||||||
static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
|
static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst /*align 8*/, uint8_t *_src /*align 1*/, ptrdiff_t stride, int h, int x, int y)\
|
||||||
|
{\
|
||||||
pixel *dst = (pixel*)_dst;\
|
pixel *dst = (pixel*)_dst;\
|
||||||
pixel *src = (pixel*)_src;\
|
pixel *src = (pixel*)_src;\
|
||||||
const int A=(8-x)*(8-y);\
|
const int A=(8-x)*(8-y);\
|
||||||
@ -85,7 +88,7 @@ static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst/*align 8*/, uint8_t *
|
|||||||
}\
|
}\
|
||||||
} else if (B + C) {\
|
} else if (B + C) {\
|
||||||
const int E= B+C;\
|
const int E= B+C;\
|
||||||
const int step= C ? stride : 1;\
|
const ptrdiff_t step = C ? stride : 1;\
|
||||||
for(i=0; i<h; i++){\
|
for(i=0; i<h; i++){\
|
||||||
OP(dst[0], (A*src[0] + E*src[step+0]));\
|
OP(dst[0], (A*src[0] + E*src[step+0]));\
|
||||||
OP(dst[1], (A*src[1] + E*src[step+1]));\
|
OP(dst[1], (A*src[1] + E*src[step+1]));\
|
||||||
@ -106,7 +109,8 @@ static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst/*align 8*/, uint8_t *
|
|||||||
}\
|
}\
|
||||||
}\
|
}\
|
||||||
\
|
\
|
||||||
static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
|
static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *_dst /*align 8*/, uint8_t *_src /*align 1*/, ptrdiff_t stride, int h, int x, int y)\
|
||||||
|
{\
|
||||||
pixel *dst = (pixel*)_dst;\
|
pixel *dst = (pixel*)_dst;\
|
||||||
pixel *src = (pixel*)_src;\
|
pixel *src = (pixel*)_src;\
|
||||||
const int A=(8-x)*(8-y);\
|
const int A=(8-x)*(8-y);\
|
||||||
@ -133,7 +137,7 @@ static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *_dst/*align 8*/, uint8_t *
|
|||||||
}\
|
}\
|
||||||
} else if (B + C) {\
|
} else if (B + C) {\
|
||||||
const int E= B+C;\
|
const int E= B+C;\
|
||||||
const int step= C ? stride : 1;\
|
const ptrdiff_t step = C ? stride : 1;\
|
||||||
for(i=0; i<h; i++){\
|
for(i=0; i<h; i++){\
|
||||||
OP(dst[0], (A*src[0] + E*src[step+0]));\
|
OP(dst[0], (A*src[0] + E*src[step+0]));\
|
||||||
OP(dst[1], (A*src[1] + E*src[step+1]));\
|
OP(dst[1], (A*src[1] + E*src[step+1]));\
|
||||||
|
@ -72,7 +72,9 @@
|
|||||||
|
|
||||||
#ifdef PREFIX_h264_chroma_mc8_altivec
|
#ifdef PREFIX_h264_chroma_mc8_altivec
|
||||||
static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,
|
static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,
|
||||||
int stride, int h, int x, int y) {
|
ptrdiff_t stride, int h,
|
||||||
|
int x, int y)
|
||||||
|
{
|
||||||
DECLARE_ALIGNED(16, signed int, ABCD)[4] =
|
DECLARE_ALIGNED(16, signed int, ABCD)[4] =
|
||||||
{((8 - x) * (8 - y)),
|
{((8 - x) * (8 - y)),
|
||||||
(( x) * (8 - y)),
|
(( x) * (8 - y)),
|
||||||
@ -201,7 +203,10 @@ static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,
|
|||||||
|
|
||||||
/* this code assume that stride % 16 == 0 */
|
/* this code assume that stride % 16 == 0 */
|
||||||
#ifdef PREFIX_no_rnd_vc1_chroma_mc8_altivec
|
#ifdef PREFIX_no_rnd_vc1_chroma_mc8_altivec
|
||||||
static void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
|
static void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t *dst, uint8_t *src,
|
||||||
|
ptrdiff_t stride, int h,
|
||||||
|
int x, int y)
|
||||||
|
{
|
||||||
DECLARE_ALIGNED(16, signed int, ABCD)[4] =
|
DECLARE_ALIGNED(16, signed int, ABCD)[4] =
|
||||||
{((8 - x) * (8 - y)),
|
{((8 - x) * (8 - y)),
|
||||||
(( x) * (8 - y)),
|
(( x) * (8 - y)),
|
||||||
|
@ -291,7 +291,10 @@ static const int rv40_bias[4][4] = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
#define RV40_CHROMA_MC(OPNAME, OP)\
|
#define RV40_CHROMA_MC(OPNAME, OP)\
|
||||||
static void OPNAME ## rv40_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
|
static void OPNAME ## rv40_chroma_mc4_c(uint8_t *dst /*align 8*/,\
|
||||||
|
uint8_t *src /*align 1*/,\
|
||||||
|
ptrdiff_t stride, int h, int x, int y)\
|
||||||
|
{\
|
||||||
const int A = (8-x) * (8-y);\
|
const int A = (8-x) * (8-y);\
|
||||||
const int B = ( x) * (8-y);\
|
const int B = ( x) * (8-y);\
|
||||||
const int C = (8-x) * ( y);\
|
const int C = (8-x) * ( y);\
|
||||||
@ -312,7 +315,7 @@ static void OPNAME ## rv40_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*a
|
|||||||
}\
|
}\
|
||||||
}else{\
|
}else{\
|
||||||
const int E = B + C;\
|
const int E = B + C;\
|
||||||
const int step = C ? stride : 1;\
|
const ptrdiff_t step = C ? stride : 1;\
|
||||||
for(i = 0; i < h; i++){\
|
for(i = 0; i < h; i++){\
|
||||||
OP(dst[0], (A*src[0] + E*src[step+0] + bias));\
|
OP(dst[0], (A*src[0] + E*src[step+0] + bias));\
|
||||||
OP(dst[1], (A*src[1] + E*src[step+1] + bias));\
|
OP(dst[1], (A*src[1] + E*src[step+1] + bias));\
|
||||||
@ -324,7 +327,10 @@ static void OPNAME ## rv40_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*a
|
|||||||
}\
|
}\
|
||||||
}\
|
}\
|
||||||
\
|
\
|
||||||
static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
|
static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst/*align 8*/,\
|
||||||
|
uint8_t *src/*align 1*/,\
|
||||||
|
ptrdiff_t stride, int h, int x, int y)\
|
||||||
|
{\
|
||||||
const int A = (8-x) * (8-y);\
|
const int A = (8-x) * (8-y);\
|
||||||
const int B = ( x) * (8-y);\
|
const int B = ( x) * (8-y);\
|
||||||
const int C = (8-x) * ( y);\
|
const int C = (8-x) * ( y);\
|
||||||
@ -349,7 +355,7 @@ static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*a
|
|||||||
}\
|
}\
|
||||||
}else{\
|
}else{\
|
||||||
const int E = B + C;\
|
const int E = B + C;\
|
||||||
const int step = C ? stride : 1;\
|
const ptrdiff_t step = C ? stride : 1;\
|
||||||
for(i = 0; i < h; i++){\
|
for(i = 0; i < h; i++){\
|
||||||
OP(dst[0], (A*src[0] + E*src[step+0] + bias));\
|
OP(dst[0], (A*src[0] + E*src[step+0] + bias));\
|
||||||
OP(dst[1], (A*src[1] + E*src[step+1] + bias));\
|
OP(dst[1], (A*src[1] + E*src[step+1] + bias));\
|
||||||
|
@ -700,7 +700,7 @@ static void avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src,
|
|||||||
C * src[stride + a] + D * src[stride + a + 1] + 32 - 4) >> 6)
|
C * src[stride + a] + D * src[stride + a + 1] + 32 - 4) >> 6)
|
||||||
static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */,
|
static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */,
|
||||||
uint8_t *src /* align 1 */,
|
uint8_t *src /* align 1 */,
|
||||||
int stride, int h, int x, int y)
|
ptrdiff_t stride, int h, int x, int y)
|
||||||
{
|
{
|
||||||
const int A = (8 - x) * (8 - y);
|
const int A = (8 - x) * (8 - y);
|
||||||
const int B = (x) * (8 - y);
|
const int B = (x) * (8 - y);
|
||||||
@ -725,7 +725,7 @@ static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void put_no_rnd_vc1_chroma_mc4_c(uint8_t *dst, uint8_t *src,
|
static void put_no_rnd_vc1_chroma_mc4_c(uint8_t *dst, uint8_t *src,
|
||||||
int stride, int h, int x, int y)
|
ptrdiff_t stride, int h, int x, int y)
|
||||||
{
|
{
|
||||||
const int A = (8 - x) * (8 - y);
|
const int A = (8 - x) * (8 - y);
|
||||||
const int B = (x) * (8 - y);
|
const int B = (x) * (8 - y);
|
||||||
@ -748,7 +748,7 @@ static void put_no_rnd_vc1_chroma_mc4_c(uint8_t *dst, uint8_t *src,
|
|||||||
#define avg2(a, b) (((a) + (b) + 1) >> 1)
|
#define avg2(a, b) (((a) + (b) + 1) >> 1)
|
||||||
static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */,
|
static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */,
|
||||||
uint8_t *src /* align 1 */,
|
uint8_t *src /* align 1 */,
|
||||||
int stride, int h, int x, int y)
|
ptrdiff_t stride, int h, int x, int y)
|
||||||
{
|
{
|
||||||
const int A = (8 - x) * (8 - y);
|
const int A = (8 - x) * (8 - y);
|
||||||
const int B = (x) * (8 - y);
|
const int B = (x) * (8 - y);
|
||||||
@ -774,7 +774,7 @@ static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */,
|
|||||||
|
|
||||||
static void avg_no_rnd_vc1_chroma_mc4_c(uint8_t *dst /* align 8 */,
|
static void avg_no_rnd_vc1_chroma_mc4_c(uint8_t *dst /* align 8 */,
|
||||||
uint8_t *src /* align 1 */,
|
uint8_t *src /* align 1 */,
|
||||||
int stride, int h, int x, int y)
|
ptrdiff_t stride, int h, int x, int y)
|
||||||
{
|
{
|
||||||
const int A = (8 - x) * (8 - y);
|
const int A = (8 - x) * (8 - y);
|
||||||
const int B = ( x) * (8 - y);
|
const int B = ( x) * (8 - y);
|
||||||
|
@ -105,11 +105,8 @@ SECTION .text
|
|||||||
%endif ; rv40
|
%endif ; rv40
|
||||||
; void ff_put/avg_h264_chroma_mc8_*(uint8_t *dst /* align 8 */,
|
; void ff_put/avg_h264_chroma_mc8_*(uint8_t *dst /* align 8 */,
|
||||||
; uint8_t *src /* align 1 */,
|
; uint8_t *src /* align 1 */,
|
||||||
; int stride, int h, int mx, int my)
|
; ptrdiff_t stride, int h, int mx, int my)
|
||||||
cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0
|
cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0
|
||||||
%if ARCH_X86_64
|
|
||||||
movsxd r2, r2d
|
|
||||||
%endif
|
|
||||||
mov r6d, r5d
|
mov r6d, r5d
|
||||||
or r6d, r4d
|
or r6d, r4d
|
||||||
jne .at_least_one_non_zero
|
jne .at_least_one_non_zero
|
||||||
@ -291,9 +288,6 @@ cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0
|
|||||||
%endif ; PIC
|
%endif ; PIC
|
||||||
%endif ; rv40
|
%endif ; rv40
|
||||||
cglobal %1_%2_chroma_mc4, 6, 6 + extra_regs, 0
|
cglobal %1_%2_chroma_mc4, 6, 6 + extra_regs, 0
|
||||||
%if ARCH_X86_64
|
|
||||||
movsxd r2, r2d
|
|
||||||
%endif
|
|
||||||
pxor m7, m7
|
pxor m7, m7
|
||||||
movd m2, r4d ; x
|
movd m2, r4d ; x
|
||||||
movd m3, r5d ; y
|
movd m3, r5d ; y
|
||||||
@ -376,10 +370,6 @@ cglobal %1_%2_chroma_mc4, 6, 6 + extra_regs, 0
|
|||||||
|
|
||||||
%macro chroma_mc2_mmx_func 2
|
%macro chroma_mc2_mmx_func 2
|
||||||
cglobal %1_%2_chroma_mc2, 6, 7, 0
|
cglobal %1_%2_chroma_mc2, 6, 7, 0
|
||||||
%if ARCH_X86_64
|
|
||||||
movsxd r2, r2d
|
|
||||||
%endif
|
|
||||||
|
|
||||||
mov r6d, r4d
|
mov r6d, r4d
|
||||||
shl r4d, 16
|
shl r4d, 16
|
||||||
sub r4d, r6d
|
sub r4d, r6d
|
||||||
@ -465,9 +455,6 @@ chroma_mc4_mmx_func avg, rv40
|
|||||||
|
|
||||||
%macro chroma_mc8_ssse3_func 2-3
|
%macro chroma_mc8_ssse3_func 2-3
|
||||||
cglobal %1_%2_chroma_mc8%3, 6, 7, 8
|
cglobal %1_%2_chroma_mc8%3, 6, 7, 8
|
||||||
%if ARCH_X86_64
|
|
||||||
movsxd r2, r2d
|
|
||||||
%endif
|
|
||||||
mov r6d, r5d
|
mov r6d, r5d
|
||||||
or r6d, r4d
|
or r6d, r4d
|
||||||
jne .at_least_one_non_zero
|
jne .at_least_one_non_zero
|
||||||
@ -613,9 +600,6 @@ cglobal %1_%2_chroma_mc8%3, 6, 7, 8
|
|||||||
|
|
||||||
%macro chroma_mc4_ssse3_func 2
|
%macro chroma_mc4_ssse3_func 2
|
||||||
cglobal %1_%2_chroma_mc4, 6, 7, 0
|
cglobal %1_%2_chroma_mc4, 6, 7, 0
|
||||||
%if ARCH_X86_64
|
|
||||||
movsxd r2, r2d
|
|
||||||
%endif
|
|
||||||
mov r6, r4
|
mov r6, r4
|
||||||
shl r4d, 8
|
shl r4d, 8
|
||||||
sub r4d, r6d
|
sub r4d, r6d
|
||||||
|
@ -57,12 +57,11 @@ SECTION .text
|
|||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
; void ff_put/avg_h264_chroma_mc8(pixel *dst, pixel *src, int stride, int h,
|
; void ff_put/avg_h264_chroma_mc8(pixel *dst, pixel *src, ptrdiff_t stride,
|
||||||
; int mx, int my)
|
; int h, int mx, int my)
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
%macro CHROMA_MC8 1
|
%macro CHROMA_MC8 1
|
||||||
cglobal %1_h264_chroma_mc8_10, 6,7,8
|
cglobal %1_h264_chroma_mc8_10, 6,7,8
|
||||||
movsxdifnidn r2, r2d
|
|
||||||
mov r6d, r5d
|
mov r6d, r5d
|
||||||
or r6d, r4d
|
or r6d, r4d
|
||||||
jne .at_least_one_non_zero
|
jne .at_least_one_non_zero
|
||||||
@ -149,8 +148,8 @@ cglobal %1_h264_chroma_mc8_10, 6,7,8
|
|||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
; void ff_put/avg_h264_chroma_mc4(pixel *dst, pixel *src, int stride, int h,
|
; void ff_put/avg_h264_chroma_mc4(pixel *dst, pixel *src, ptrdiff_t stride,
|
||||||
; int mx, int my)
|
; int h, int mx, int my)
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
;TODO: xmm mc4
|
;TODO: xmm mc4
|
||||||
%macro MC4_OP 2
|
%macro MC4_OP 2
|
||||||
@ -174,7 +173,6 @@ cglobal %1_h264_chroma_mc8_10, 6,7,8
|
|||||||
|
|
||||||
%macro CHROMA_MC4 1
|
%macro CHROMA_MC4 1
|
||||||
cglobal %1_h264_chroma_mc4_10, 6,6,7
|
cglobal %1_h264_chroma_mc4_10, 6,6,7
|
||||||
movsxdifnidn r2, r2d
|
|
||||||
movd m2, r4m ; x
|
movd m2, r4m ; x
|
||||||
movd m3, r5m ; y
|
movd m3, r5m ; y
|
||||||
mova m4, [pw_8]
|
mova m4, [pw_8]
|
||||||
@ -200,12 +198,11 @@ cglobal %1_h264_chroma_mc4_10, 6,6,7
|
|||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
; void ff_put/avg_h264_chroma_mc2(pixel *dst, pixel *src, int stride, int h,
|
; void ff_put/avg_h264_chroma_mc2(pixel *dst, pixel *src, ptrdiff_t stride,
|
||||||
; int mx, int my)
|
; int h, int mx, int my)
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
%macro CHROMA_MC2 1
|
%macro CHROMA_MC2 1
|
||||||
cglobal %1_h264_chroma_mc2_10, 6,7
|
cglobal %1_h264_chroma_mc2_10, 6,7
|
||||||
movsxdifnidn r2, r2d
|
|
||||||
mov r6d, r4d
|
mov r6d, r4d
|
||||||
shl r4d, 16
|
shl r4d, 16
|
||||||
sub r4d, r6d
|
sub r4d, r6d
|
||||||
|
@ -25,38 +25,38 @@
|
|||||||
#include "libavcodec/h264chroma.h"
|
#include "libavcodec/h264chroma.h"
|
||||||
|
|
||||||
void ff_put_h264_chroma_mc8_rnd_mmx (uint8_t *dst, uint8_t *src,
|
void ff_put_h264_chroma_mc8_rnd_mmx (uint8_t *dst, uint8_t *src,
|
||||||
int stride, int h, int x, int y);
|
ptrdiff_t stride, int h, int x, int y);
|
||||||
void ff_avg_h264_chroma_mc8_rnd_mmxext(uint8_t *dst, uint8_t *src,
|
void ff_avg_h264_chroma_mc8_rnd_mmxext(uint8_t *dst, uint8_t *src,
|
||||||
int stride, int h, int x, int y);
|
ptrdiff_t stride, int h, int x, int y);
|
||||||
void ff_avg_h264_chroma_mc8_rnd_3dnow(uint8_t *dst, uint8_t *src,
|
void ff_avg_h264_chroma_mc8_rnd_3dnow(uint8_t *dst, uint8_t *src,
|
||||||
int stride, int h, int x, int y);
|
ptrdiff_t stride, int h, int x, int y);
|
||||||
|
|
||||||
void ff_put_h264_chroma_mc4_mmx (uint8_t *dst, uint8_t *src,
|
void ff_put_h264_chroma_mc4_mmx (uint8_t *dst, uint8_t *src,
|
||||||
int stride, int h, int x, int y);
|
ptrdiff_t stride, int h, int x, int y);
|
||||||
void ff_avg_h264_chroma_mc4_mmxext (uint8_t *dst, uint8_t *src,
|
void ff_avg_h264_chroma_mc4_mmxext (uint8_t *dst, uint8_t *src,
|
||||||
int stride, int h, int x, int y);
|
ptrdiff_t stride, int h, int x, int y);
|
||||||
void ff_avg_h264_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src,
|
void ff_avg_h264_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src,
|
||||||
int stride, int h, int x, int y);
|
ptrdiff_t stride, int h, int x, int y);
|
||||||
|
|
||||||
void ff_put_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src,
|
void ff_put_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src,
|
||||||
int stride, int h, int x, int y);
|
ptrdiff_t stride, int h, int x, int y);
|
||||||
void ff_avg_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src,
|
void ff_avg_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src,
|
||||||
int stride, int h, int x, int y);
|
ptrdiff_t stride, int h, int x, int y);
|
||||||
|
|
||||||
void ff_put_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src,
|
void ff_put_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src,
|
||||||
int stride, int h, int x, int y);
|
ptrdiff_t stride, int h, int x, int y);
|
||||||
void ff_put_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src,
|
void ff_put_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src,
|
||||||
int stride, int h, int x, int y);
|
ptrdiff_t stride, int h, int x, int y);
|
||||||
|
|
||||||
void ff_avg_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src,
|
void ff_avg_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src,
|
||||||
int stride, int h, int x, int y);
|
ptrdiff_t stride, int h, int x, int y);
|
||||||
void ff_avg_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src,
|
void ff_avg_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src,
|
||||||
int stride, int h, int x, int y);
|
ptrdiff_t stride, int h, int x, int y);
|
||||||
|
|
||||||
#define CHROMA_MC(OP, NUM, DEPTH, OPT) \
|
#define CHROMA_MC(OP, NUM, DEPTH, OPT) \
|
||||||
void ff_ ## OP ## _h264_chroma_mc ## NUM ## _ ## DEPTH ## _ ## OPT \
|
void ff_ ## OP ## _h264_chroma_mc ## NUM ## _ ## DEPTH ## _ ## OPT \
|
||||||
(uint8_t *dst, uint8_t *src, \
|
(uint8_t *dst, uint8_t *src, \
|
||||||
int stride, int h, int x, int y);
|
ptrdiff_t stride, int h, int x, int y);
|
||||||
|
|
||||||
CHROMA_MC(put, 2, 10, mmxext)
|
CHROMA_MC(put, 2, 10, mmxext)
|
||||||
CHROMA_MC(avg, 2, 10, mmxext)
|
CHROMA_MC(avg, 2, 10, mmxext)
|
||||||
|
@ -34,18 +34,18 @@
|
|||||||
|
|
||||||
#if HAVE_YASM
|
#if HAVE_YASM
|
||||||
void ff_put_rv40_chroma_mc8_mmx (uint8_t *dst, uint8_t *src,
|
void ff_put_rv40_chroma_mc8_mmx (uint8_t *dst, uint8_t *src,
|
||||||
int stride, int h, int x, int y);
|
ptrdiff_t stride, int h, int x, int y);
|
||||||
void ff_avg_rv40_chroma_mc8_mmxext(uint8_t *dst, uint8_t *src,
|
void ff_avg_rv40_chroma_mc8_mmxext(uint8_t *dst, uint8_t *src,
|
||||||
int stride, int h, int x, int y);
|
ptrdiff_t stride, int h, int x, int y);
|
||||||
void ff_avg_rv40_chroma_mc8_3dnow(uint8_t *dst, uint8_t *src,
|
void ff_avg_rv40_chroma_mc8_3dnow(uint8_t *dst, uint8_t *src,
|
||||||
int stride, int h, int x, int y);
|
ptrdiff_t stride, int h, int x, int y);
|
||||||
|
|
||||||
void ff_put_rv40_chroma_mc4_mmx (uint8_t *dst, uint8_t *src,
|
void ff_put_rv40_chroma_mc4_mmx (uint8_t *dst, uint8_t *src,
|
||||||
int stride, int h, int x, int y);
|
ptrdiff_t stride, int h, int x, int y);
|
||||||
void ff_avg_rv40_chroma_mc4_mmxext(uint8_t *dst, uint8_t *src,
|
void ff_avg_rv40_chroma_mc4_mmxext(uint8_t *dst, uint8_t *src,
|
||||||
int stride, int h, int x, int y);
|
ptrdiff_t stride, int h, int x, int y);
|
||||||
void ff_avg_rv40_chroma_mc4_3dnow(uint8_t *dst, uint8_t *src,
|
void ff_avg_rv40_chroma_mc4_3dnow(uint8_t *dst, uint8_t *src,
|
||||||
int stride, int h, int x, int y);
|
ptrdiff_t stride, int h, int x, int y);
|
||||||
|
|
||||||
#define DECLARE_WEIGHT(opt) \
|
#define DECLARE_WEIGHT(opt) \
|
||||||
void ff_rv40_weight_func_rnd_16_##opt(uint8_t *dst, uint8_t *src1, uint8_t *src2, \
|
void ff_rv40_weight_func_rnd_16_##opt(uint8_t *dst, uint8_t *src1, uint8_t *src2, \
|
||||||
|
@ -71,15 +71,15 @@ static void avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src,
|
|||||||
#endif /* HAVE_YASM */
|
#endif /* HAVE_YASM */
|
||||||
|
|
||||||
void ff_put_vc1_chroma_mc8_nornd_mmx (uint8_t *dst, uint8_t *src,
|
void ff_put_vc1_chroma_mc8_nornd_mmx (uint8_t *dst, uint8_t *src,
|
||||||
int stride, int h, int x, int y);
|
ptrdiff_t stride, int h, int x, int y);
|
||||||
void ff_avg_vc1_chroma_mc8_nornd_mmxext(uint8_t *dst, uint8_t *src,
|
void ff_avg_vc1_chroma_mc8_nornd_mmxext(uint8_t *dst, uint8_t *src,
|
||||||
int stride, int h, int x, int y);
|
ptrdiff_t stride, int h, int x, int y);
|
||||||
void ff_avg_vc1_chroma_mc8_nornd_3dnow(uint8_t *dst, uint8_t *src,
|
void ff_avg_vc1_chroma_mc8_nornd_3dnow(uint8_t *dst, uint8_t *src,
|
||||||
int stride, int h, int x, int y);
|
ptrdiff_t stride, int h, int x, int y);
|
||||||
void ff_put_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src,
|
void ff_put_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src,
|
||||||
int stride, int h, int x, int y);
|
ptrdiff_t stride, int h, int x, int y);
|
||||||
void ff_avg_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src,
|
void ff_avg_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src,
|
||||||
int stride, int h, int x, int y);
|
ptrdiff_t stride, int h, int x, int y);
|
||||||
|
|
||||||
|
|
||||||
av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
|
av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
|
||||||
|
Loading…
Reference in New Issue
Block a user