You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-15 14:13:16 +02:00
Merge commit '9a738c27dceb4b975784b23213a46f5cb560d1c2'
* commit '9a738c27dceb4b975784b23213a46f5cb560d1c2':
v210enc: Add SIMD optimised 8-bit and 10-bit encoders
Conflicts:
libavcodec/v210enc.c
libavcodec/v210enc.h
libavcodec/x86/Makefile
libavcodec/x86/v210enc.asm
libavcodec/x86/v210enc_init.c
tests/ref/vsynth/vsynth1-v210
tests/ref/vsynth/vsynth2-v210
See: 36091742d1
Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
@@ -31,7 +31,7 @@
|
|||||||
|
|
||||||
#define WRITE_PIXELS(a, b, c) \
|
#define WRITE_PIXELS(a, b, c) \
|
||||||
do { \
|
do { \
|
||||||
val = CLIP(*a++); \
|
val = CLIP(*a++); \
|
||||||
val |= (CLIP(*b++) << 10) | \
|
val |= (CLIP(*b++) << 10) | \
|
||||||
(CLIP(*c++) << 20); \
|
(CLIP(*c++) << 20); \
|
||||||
AV_WL32(dst, val); \
|
AV_WL32(dst, val); \
|
||||||
@@ -40,21 +40,22 @@
|
|||||||
|
|
||||||
#define WRITE_PIXELS8(a, b, c) \
|
#define WRITE_PIXELS8(a, b, c) \
|
||||||
do { \
|
do { \
|
||||||
val = (CLIP8(*a++) << 2); \
|
val = (CLIP8(*a++) << 2); \
|
||||||
val |= (CLIP8(*b++) << 12) | \
|
val |= (CLIP8(*b++) << 12) | \
|
||||||
(CLIP8(*c++) << 22); \
|
(CLIP8(*c++) << 22); \
|
||||||
AV_WL32(dst, val); \
|
AV_WL32(dst, val); \
|
||||||
dst += 4; \
|
dst += 4; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
static void v210_planar_pack_8_c(const uint8_t *y, const uint8_t *u,
|
static void v210_planar_pack_8_c(const uint8_t *y, const uint8_t *u,
|
||||||
const uint8_t *v, uint8_t *dst, ptrdiff_t width)
|
const uint8_t *v, uint8_t *dst,
|
||||||
|
ptrdiff_t width)
|
||||||
{
|
{
|
||||||
uint32_t val;
|
uint32_t val;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
/* unroll this to match the assembly */
|
/* unroll this to match the assembly */
|
||||||
for( i = 0; i < width-11; i += 12 ){
|
for (i = 0; i < width - 11; i += 12) {
|
||||||
WRITE_PIXELS8(u, y, v);
|
WRITE_PIXELS8(u, y, v);
|
||||||
WRITE_PIXELS8(y, u, y);
|
WRITE_PIXELS8(y, u, y);
|
||||||
WRITE_PIXELS8(v, y, u);
|
WRITE_PIXELS8(v, y, u);
|
||||||
@@ -67,12 +68,13 @@ static void v210_planar_pack_8_c(const uint8_t *y, const uint8_t *u,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void v210_planar_pack_10_c(const uint16_t *y, const uint16_t *u,
|
static void v210_planar_pack_10_c(const uint16_t *y, const uint16_t *u,
|
||||||
const uint16_t *v, uint8_t *dst, ptrdiff_t width)
|
const uint16_t *v, uint8_t *dst,
|
||||||
|
ptrdiff_t width)
|
||||||
{
|
{
|
||||||
uint32_t val;
|
uint32_t val;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for( i = 0; i < width-5; i += 6 ){
|
for (i = 0; i < width - 5; i += 6) {
|
||||||
WRITE_PIXELS(u, y, v);
|
WRITE_PIXELS(u, y, v);
|
||||||
WRITE_PIXELS(y, u, y);
|
WRITE_PIXELS(y, u, y);
|
||||||
WRITE_PIXELS(v, y, u);
|
WRITE_PIXELS(v, y, u);
|
||||||
@@ -95,8 +97,8 @@ static av_cold int encode_init(AVCodecContext *avctx)
|
|||||||
|
|
||||||
avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
|
avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
|
||||||
|
|
||||||
s->pack_line_8 = v210_planar_pack_8_c;
|
s->pack_line_8 = v210_planar_pack_8_c;
|
||||||
s->pack_line_10 = v210_planar_pack_10_c;
|
s->pack_line_10 = v210_planar_pack_10_c;
|
||||||
|
|
||||||
if (ARCH_X86)
|
if (ARCH_X86)
|
||||||
ff_v210enc_init_x86(s);
|
ff_v210enc_init_x86(s);
|
||||||
@@ -108,24 +110,23 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
|
|||||||
const AVFrame *pic, int *got_packet)
|
const AVFrame *pic, int *got_packet)
|
||||||
{
|
{
|
||||||
V210EncContext *s = avctx->priv_data;
|
V210EncContext *s = avctx->priv_data;
|
||||||
|
|
||||||
int aligned_width = ((avctx->width + 47) / 48) * 48;
|
int aligned_width = ((avctx->width + 47) / 48) * 48;
|
||||||
int stride = aligned_width * 8 / 3;
|
int stride = aligned_width * 8 / 3;
|
||||||
int line_padding = stride - ((avctx->width * 8 + 11) / 12) * 4;
|
int line_padding = stride - ((avctx->width * 8 + 11) / 12) * 4;
|
||||||
int h, w, ret;
|
int h, w, ret;
|
||||||
uint8_t *dst;
|
uint8_t *dst;
|
||||||
|
|
||||||
if ((ret = ff_alloc_packet(pkt, avctx->height * stride)) < 0) {
|
ret = ff_alloc_packet(pkt, avctx->height * stride);
|
||||||
|
if (ret < 0) {
|
||||||
av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
|
av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
dst = pkt->data;
|
dst = pkt->data;
|
||||||
|
|
||||||
if (pic->format == AV_PIX_FMT_YUV422P10) {
|
if (pic->format == AV_PIX_FMT_YUV422P10) {
|
||||||
const uint16_t *y = (const uint16_t*)pic->data[0];
|
const uint16_t *y = (const uint16_t *)pic->data[0];
|
||||||
const uint16_t *u = (const uint16_t*)pic->data[1];
|
const uint16_t *u = (const uint16_t *)pic->data[1];
|
||||||
const uint16_t *v = (const uint16_t*)pic->data[2];
|
const uint16_t *v = (const uint16_t *)pic->data[2];
|
||||||
for (h = 0; h < avctx->height; h++) {
|
for (h = 0; h < avctx->height; h++) {
|
||||||
uint32_t val;
|
uint32_t val;
|
||||||
w = (avctx->width / 6) * 6;
|
w = (avctx->width / 6) * 6;
|
||||||
@@ -156,13 +157,11 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
|
|||||||
|
|
||||||
memset(dst, 0, line_padding);
|
memset(dst, 0, line_padding);
|
||||||
dst += line_padding;
|
dst += line_padding;
|
||||||
|
|
||||||
y += pic->linesize[0] / 2 - avctx->width;
|
y += pic->linesize[0] / 2 - avctx->width;
|
||||||
u += pic->linesize[1] / 2 - avctx->width / 2;
|
u += pic->linesize[1] / 2 - avctx->width / 2;
|
||||||
v += pic->linesize[2] / 2 - avctx->width / 2;
|
v += pic->linesize[2] / 2 - avctx->width / 2;
|
||||||
}
|
}
|
||||||
}
|
} else if(pic->format == AV_PIX_FMT_YUV422P) {
|
||||||
else if(pic->format == AV_PIX_FMT_YUV422P) {
|
|
||||||
const uint8_t *y = pic->data[0];
|
const uint8_t *y = pic->data[0];
|
||||||
const uint8_t *u = pic->data[1];
|
const uint8_t *u = pic->data[1];
|
||||||
const uint8_t *v = pic->data[2];
|
const uint8_t *v = pic->data[2];
|
||||||
@@ -176,7 +175,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
|
|||||||
v += w >> 1;
|
v += w >> 1;
|
||||||
dst += (w / 12) * 32;
|
dst += (w / 12) * 32;
|
||||||
|
|
||||||
for( ; w < avctx->width-5; w += 6 ){
|
for (; w < avctx->width - 5; w += 6) {
|
||||||
WRITE_PIXELS8(u, y, v);
|
WRITE_PIXELS8(u, y, v);
|
||||||
WRITE_PIXELS8(y, u, y);
|
WRITE_PIXELS8(y, u, y);
|
||||||
WRITE_PIXELS8(v, y, u);
|
WRITE_PIXELS8(v, y, u);
|
||||||
@@ -200,7 +199,6 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
|
|||||||
AV_WL32(dst, val);
|
AV_WL32(dst, val);
|
||||||
dst += 4;
|
dst += 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
memset(dst, 0, line_padding);
|
memset(dst, 0, line_padding);
|
||||||
dst += line_padding;
|
dst += line_padding;
|
||||||
|
|
||||||
|
@@ -24,8 +24,10 @@
|
|||||||
#include "libavutil/pixfmt.h"
|
#include "libavutil/pixfmt.h"
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
void (*pack_line_8)(const uint8_t *y, const uint8_t *u, const uint8_t *v, uint8_t *dst, ptrdiff_t width);
|
void (*pack_line_8)(const uint8_t *y, const uint8_t *u,
|
||||||
void (*pack_line_10)(const uint16_t *y, const uint16_t *u, const uint16_t *v, uint8_t *dst, ptrdiff_t width);
|
const uint8_t *v, uint8_t *dst, ptrdiff_t width);
|
||||||
|
void (*pack_line_10)(const uint16_t *y, const uint16_t *u,
|
||||||
|
const uint16_t *v, uint8_t *dst, ptrdiff_t width);
|
||||||
} V210EncContext;
|
} V210EncContext;
|
||||||
|
|
||||||
void ff_v210enc_init_x86(V210EncContext *s);
|
void ff_v210enc_init_x86(V210EncContext *s);
|
||||||
|
@@ -19,19 +19,24 @@
|
|||||||
#include "libavutil/x86/cpu.h"
|
#include "libavutil/x86/cpu.h"
|
||||||
#include "libavcodec/v210enc.h"
|
#include "libavcodec/v210enc.h"
|
||||||
|
|
||||||
void ff_v210_planar_pack_8_ssse3(const uint8_t *y, const uint8_t *u, const uint8_t *v, uint8_t *dst, ptrdiff_t width);
|
void ff_v210_planar_pack_8_ssse3(const uint8_t *y, const uint8_t *u,
|
||||||
void ff_v210_planar_pack_8_avx(const uint8_t *y, const uint8_t *u, const uint8_t *v, uint8_t *dst, ptrdiff_t width);
|
const uint8_t *v, uint8_t *dst,
|
||||||
void ff_v210_planar_pack_10_ssse3(const uint16_t *y, const uint16_t *u, const uint16_t *v, uint8_t *dst, ptrdiff_t width);
|
ptrdiff_t width);
|
||||||
|
void ff_v210_planar_pack_8_avx(const uint8_t *y, const uint8_t *u,
|
||||||
|
const uint8_t *v, uint8_t *dst, ptrdiff_t width);
|
||||||
|
void ff_v210_planar_pack_10_ssse3(const uint16_t *y, const uint16_t *u,
|
||||||
|
const uint16_t *v, uint8_t *dst,
|
||||||
|
ptrdiff_t width);
|
||||||
|
|
||||||
av_cold void ff_v210enc_init_x86(V210EncContext *s)
|
av_cold void ff_v210enc_init_x86(V210EncContext *s)
|
||||||
{
|
{
|
||||||
int cpu_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if( EXTERNAL_SSSE3(cpu_flags) ) {
|
if (EXTERNAL_SSSE3(cpu_flags)) {
|
||||||
s->pack_line_8 = ff_v210_planar_pack_8_ssse3;
|
s->pack_line_8 = ff_v210_planar_pack_8_ssse3;
|
||||||
s->pack_line_10 = ff_v210_planar_pack_10_ssse3;
|
s->pack_line_10 = ff_v210_planar_pack_10_ssse3;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( EXTERNAL_AVX(cpu_flags) )
|
if (EXTERNAL_AVX(cpu_flags))
|
||||||
s->pack_line_8 = ff_v210_planar_pack_8_avx;
|
s->pack_line_8 = ff_v210_planar_pack_8_avx;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user