mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-21 10:55:51 +02:00
sws: implement slice threading
This commit is contained in:
parent
22c6fbc847
commit
d6fdc78e91
@ -16,6 +16,7 @@ version <next>:
|
||||
- atilt audio filter
|
||||
- grayworld video filter
|
||||
- AV1 Low overhead bitstream format muxer
|
||||
- swscale slice threading
|
||||
|
||||
|
||||
version 4.4:
|
||||
|
@ -81,6 +81,9 @@ static const AVOption swscale_options[] = {
|
||||
{ "uniform_color", "blend onto a uniform color", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_ALPHA_BLEND_UNIFORM},INT_MIN, INT_MAX, VE, "alphablend" },
|
||||
{ "checkerboard", "blend onto a checkerboard", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_ALPHA_BLEND_CHECKERBOARD},INT_MIN, INT_MAX, VE, "alphablend" },
|
||||
|
||||
{ "threads", "number of threads", OFFSET(nb_threads), AV_OPT_TYPE_INT, {.i64 = 1 }, 0, INT_MAX, VE, "threads" },
|
||||
{ "auto", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, .flags = VE, "threads" },
|
||||
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
|
@ -1113,6 +1113,9 @@ int sws_send_slice(struct SwsContext *c, unsigned int slice_start,
|
||||
|
||||
unsigned int sws_receive_slice_alignment(const struct SwsContext *c)
|
||||
{
|
||||
if (c->slice_ctx)
|
||||
return c->slice_ctx[0]->dst_slice_align;
|
||||
|
||||
return c->dst_slice_align;
|
||||
}
|
||||
|
||||
@ -1136,6 +1139,27 @@ int sws_receive_slice(struct SwsContext *c, unsigned int slice_start,
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
if (c->slicethread) {
|
||||
int nb_jobs = c->slice_ctx[0]->dither == SWS_DITHER_ED ? 1 : c->nb_slice_ctx;
|
||||
int ret = 0;
|
||||
|
||||
c->dst_slice_start = slice_start;
|
||||
c->dst_slice_height = slice_height;
|
||||
|
||||
avpriv_slicethread_execute(c->slicethread, nb_jobs, 0);
|
||||
|
||||
for (int i = 0; i < c->nb_slice_ctx; i++) {
|
||||
if (c->slice_err[i] < 0) {
|
||||
ret = c->slice_err[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
memset(c->slice_err, 0, c->nb_slice_ctx * sizeof(*c->slice_err));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
for (int i = 0; i < FF_ARRAY_ELEMS(dst) && c->frame_dst->data[i]; i++) {
|
||||
dst[i] = c->frame_dst->data[i] +
|
||||
c->frame_dst->linesize[i] * (slice_start >> c->chrDstVSubSample);
|
||||
@ -1173,6 +1197,41 @@ int attribute_align_arg sws_scale(struct SwsContext *c,
|
||||
int srcSliceH, uint8_t *const dst[],
|
||||
const int dstStride[])
|
||||
{
|
||||
if (c->nb_slice_ctx)
|
||||
c = c->slice_ctx[0];
|
||||
|
||||
return scale_internal(c, srcSlice, srcStride, srcSliceY, srcSliceH,
|
||||
dst, dstStride, 0, c->dstH);
|
||||
}
|
||||
|
||||
void ff_sws_slice_worker(void *priv, int jobnr, int threadnr,
|
||||
int nb_jobs, int nb_threads)
|
||||
{
|
||||
SwsContext *parent = priv;
|
||||
SwsContext *c = parent->slice_ctx[threadnr];
|
||||
|
||||
const int slice_height = FFALIGN(FFMAX((parent->dst_slice_height + nb_jobs - 1) / nb_jobs, 1),
|
||||
c->dst_slice_align);
|
||||
const int slice_start = jobnr * slice_height;
|
||||
const int slice_end = FFMIN((jobnr + 1) * slice_height, parent->dst_slice_height);
|
||||
int err = 0;
|
||||
|
||||
if (slice_end > slice_start) {
|
||||
uint8_t *dst[4] = { NULL };
|
||||
|
||||
for (int i = 0; i < FF_ARRAY_ELEMS(dst) && parent->frame_dst->data[i]; i++) {
|
||||
const int vshift = (i == 1 || i == 2) ? c->chrDstVSubSample : 0;
|
||||
const ptrdiff_t offset = parent->frame_dst->linesize[i] *
|
||||
((slice_start + parent->dst_slice_start) >> vshift);
|
||||
|
||||
dst[i] = parent->frame_dst->data[i] + offset;
|
||||
}
|
||||
|
||||
err = scale_internal(c, (const uint8_t * const *)parent->frame_src->data,
|
||||
parent->frame_src->linesize, 0, c->srcH,
|
||||
dst, parent->frame_dst->linesize,
|
||||
parent->dst_slice_start + slice_start, slice_end - slice_start);
|
||||
}
|
||||
|
||||
parent->slice_err[threadnr] = err;
|
||||
}
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include "libavutil/mem_internal.h"
|
||||
#include "libavutil/pixfmt.h"
|
||||
#include "libavutil/pixdesc.h"
|
||||
#include "libavutil/slicethread.h"
|
||||
#include "libavutil/ppc/util_altivec.h"
|
||||
|
||||
#define STR(s) AV_TOSTRING(s) // AV_STRINGIFY is too long
|
||||
@ -300,6 +301,15 @@ typedef struct SwsContext {
|
||||
*/
|
||||
const AVClass *av_class;
|
||||
|
||||
AVSliceThread *slicethread;
|
||||
struct SwsContext **slice_ctx;
|
||||
int *slice_err;
|
||||
int nb_slice_ctx;
|
||||
|
||||
// values passed to current sws_receive_slice() call
|
||||
unsigned int dst_slice_start;
|
||||
unsigned int dst_slice_height;
|
||||
|
||||
/**
|
||||
* Note that src, dst, srcStride, dstStride will be copied in the
|
||||
* sws_scale() wrapper so they can be freely modified here.
|
||||
@ -325,6 +335,7 @@ typedef struct SwsContext {
|
||||
int chrDstVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in destination image.
|
||||
int vChrDrop; ///< Binary logarithm of extra vertical subsampling factor in source image chroma planes specified by user.
|
||||
int sliceDir; ///< Direction that slices are fed to the scaler (1 = top-to-bottom, -1 = bottom-to-top).
|
||||
int nb_threads; ///< Number of threads used for scaling
|
||||
double param[2]; ///< Input parameters for scaling algorithms that need them.
|
||||
|
||||
AVFrame *frame_src;
|
||||
@ -1082,6 +1093,9 @@ void ff_init_vscale_pfn(SwsContext *c, yuv2planar1_fn yuv2plane1, yuv2planarX_fn
|
||||
yuv2interleavedX_fn yuv2nv12cX, yuv2packed1_fn yuv2packed1, yuv2packed2_fn yuv2packed2,
|
||||
yuv2packedX_fn yuv2packedX, yuv2anyX_fn yuv2anyX, int use_mmx);
|
||||
|
||||
void ff_sws_slice_worker(void *priv, int jobnr, int threadnr,
|
||||
int nb_jobs, int nb_threads);
|
||||
|
||||
//number of extra lines to process
|
||||
#define MAX_LINES_AHEAD 4
|
||||
|
||||
|
@ -49,6 +49,7 @@
|
||||
#include "libavutil/mathematics.h"
|
||||
#include "libavutil/opt.h"
|
||||
#include "libavutil/pixdesc.h"
|
||||
#include "libavutil/slicethread.h"
|
||||
#include "libavutil/thread.h"
|
||||
#include "libavutil/aarch64/cpu.h"
|
||||
#include "libavutil/ppc/cpu.h"
|
||||
@ -871,6 +872,18 @@ int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4],
|
||||
const AVPixFmtDescriptor *desc_src;
|
||||
int need_reinit = 0;
|
||||
|
||||
if (c->nb_slice_ctx) {
|
||||
for (int i = 0; i < c->nb_slice_ctx; i++) {
|
||||
int ret = sws_setColorspaceDetails(c->slice_ctx[i], inv_table,
|
||||
srcRange, table, dstRange,
|
||||
brightness, contrast, saturation);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
handle_formats(c);
|
||||
desc_dst = av_pix_fmt_desc_get(c->dstFormat);
|
||||
desc_src = av_pix_fmt_desc_get(c->srcFormat);
|
||||
@ -1005,6 +1018,12 @@ int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table,
|
||||
if (!c )
|
||||
return -1;
|
||||
|
||||
if (c->nb_slice_ctx) {
|
||||
return sws_getColorspaceDetails(c->slice_ctx[0], inv_table, srcRange,
|
||||
table, dstRange, brightness, contrast,
|
||||
saturation);
|
||||
}
|
||||
|
||||
*inv_table = c->srcColorspaceTable;
|
||||
*table = c->dstColorspaceTable;
|
||||
*srcRange = range_override_needed(c->srcFormat) ? 1 : c->srcRange;
|
||||
@ -1170,6 +1189,58 @@ static enum AVPixelFormat alphaless_fmt(enum AVPixelFormat fmt)
|
||||
}
|
||||
}
|
||||
|
||||
static int context_init_threaded(SwsContext *c,
|
||||
SwsFilter *src_filter, SwsFilter *dst_filter)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = avpriv_slicethread_create(&c->slicethread, (void*)c,
|
||||
ff_sws_slice_worker, NULL, c->nb_threads);
|
||||
if (ret == AVERROR(ENOSYS)) {
|
||||
c->nb_threads = 1;
|
||||
return 0;
|
||||
} else if (ret < 0)
|
||||
return ret;
|
||||
|
||||
c->nb_threads = ret;
|
||||
|
||||
c->slice_ctx = av_mallocz_array(c->nb_threads, sizeof(*c->slice_ctx));
|
||||
c->slice_err = av_mallocz_array(c->nb_threads, sizeof(*c->slice_err));
|
||||
if (!c->slice_ctx || !c->slice_err)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
for (int i = 0; i < c->nb_threads; i++) {
|
||||
c->slice_ctx[i] = sws_alloc_context();
|
||||
if (!c->slice_ctx[i])
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
ret = av_opt_copy((void*)c->slice_ctx[i], (void*)c);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
c->slice_ctx[i]->nb_threads = 1;
|
||||
|
||||
ret = sws_init_context(c->slice_ctx[i], src_filter, dst_filter);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
c->nb_slice_ctx++;
|
||||
|
||||
if (c->slice_ctx[i]->dither == SWS_DITHER_ED) {
|
||||
av_log(c, AV_LOG_VERBOSE,
|
||||
"Error-diffusion dither is in use, scaling will be single-threaded.");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
c->frame_src = av_frame_alloc();
|
||||
c->frame_dst = av_frame_alloc();
|
||||
if (!c->frame_src || !c->frame_dst)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
|
||||
SwsFilter *dstFilter)
|
||||
{
|
||||
@ -1192,6 +1263,13 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
|
||||
static const float float_mult = 1.0f / 255.0f;
|
||||
static AVOnce rgb2rgb_once = AV_ONCE_INIT;
|
||||
|
||||
if (c->nb_threads != 1) {
|
||||
ret = context_init_threaded(c, srcFilter, dstFilter);
|
||||
if (ret < 0 || c->nb_threads > 1)
|
||||
return ret;
|
||||
// threading disabled in this build, init as single-threaded
|
||||
}
|
||||
|
||||
cpu_flags = av_get_cpu_flags();
|
||||
flags = c->flags;
|
||||
emms_c();
|
||||
@ -2254,6 +2332,13 @@ void sws_freeContext(SwsContext *c)
|
||||
if (!c)
|
||||
return;
|
||||
|
||||
for (i = 0; i < c->nb_slice_ctx; i++)
|
||||
sws_freeContext(c->slice_ctx[i]);
|
||||
av_freep(&c->slice_ctx);
|
||||
av_freep(&c->slice_err);
|
||||
|
||||
avpriv_slicethread_free(&c->slicethread);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
av_freep(&c->dither_error[i]);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user