You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-11-29 05:57:37 +02:00
swscale/optimizer: add high-level ops optimizer
This is responsible for taking a "naive" ops list and optimizing it as much as possible. Also includes a small analyzer that generates component metadata for use by the optimizer.
This commit is contained in:
771
libswscale/ops_optimizer.c
Normal file
771
libswscale/ops_optimizer.c
Normal file
@@ -0,0 +1,771 @@
|
||||
/**
|
||||
* Copyright (C) 2025 Niklas Haas
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/avassert.h"
|
||||
#include "libavutil/rational.h"
|
||||
|
||||
#include "ops.h"
|
||||
#include "ops_internal.h"
|
||||
|
||||
#define RET(x) \
|
||||
do { \
|
||||
if ((ret = (x)) < 0) \
|
||||
return ret; \
|
||||
} while (0)
|
||||
|
||||
/* Returns true for operations that are independent per channel. These can
|
||||
* usually be commuted freely other such operations. */
|
||||
static bool op_type_is_independent(SwsOpType op)
|
||||
{
|
||||
switch (op) {
|
||||
case SWS_OP_SWAP_BYTES:
|
||||
case SWS_OP_LSHIFT:
|
||||
case SWS_OP_RSHIFT:
|
||||
case SWS_OP_CONVERT:
|
||||
case SWS_OP_DITHER:
|
||||
case SWS_OP_MIN:
|
||||
case SWS_OP_MAX:
|
||||
case SWS_OP_SCALE:
|
||||
return true;
|
||||
case SWS_OP_INVALID:
|
||||
case SWS_OP_READ:
|
||||
case SWS_OP_WRITE:
|
||||
case SWS_OP_SWIZZLE:
|
||||
case SWS_OP_CLEAR:
|
||||
case SWS_OP_LINEAR:
|
||||
case SWS_OP_PACK:
|
||||
case SWS_OP_UNPACK:
|
||||
return false;
|
||||
case SWS_OP_TYPE_NB:
|
||||
break;
|
||||
}
|
||||
|
||||
av_unreachable("Invalid operation type!");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* merge_comp_flags() forms a monoid with flags_identity as the null element */
|
||||
static const unsigned flags_identity = SWS_COMP_ZERO | SWS_COMP_EXACT;
|
||||
static unsigned merge_comp_flags(unsigned a, unsigned b)
|
||||
{
|
||||
const unsigned flags_or = SWS_COMP_GARBAGE;
|
||||
const unsigned flags_and = SWS_COMP_ZERO | SWS_COMP_EXACT;
|
||||
return ((a & b) & flags_and) | ((a | b) & flags_or);
|
||||
}
|
||||
|
||||
/* Infer + propagate known information about components */
|
||||
void ff_sws_op_list_update_comps(SwsOpList *ops)
|
||||
{
|
||||
SwsComps next = { .unused = {true, true, true, true} };
|
||||
SwsComps prev = { .flags = {
|
||||
SWS_COMP_GARBAGE, SWS_COMP_GARBAGE, SWS_COMP_GARBAGE, SWS_COMP_GARBAGE,
|
||||
}};
|
||||
|
||||
/* Forwards pass, propagates knowledge about the incoming pixel values */
|
||||
for (int n = 0; n < ops->num_ops; n++) {
|
||||
SwsOp *op = &ops->ops[n];
|
||||
|
||||
/* Prefill min/max values automatically; may have to be fixed in
|
||||
* special cases */
|
||||
memcpy(op->comps.min, prev.min, sizeof(prev.min));
|
||||
memcpy(op->comps.max, prev.max, sizeof(prev.max));
|
||||
|
||||
if (op->op != SWS_OP_SWAP_BYTES) {
|
||||
ff_sws_apply_op_q(op, op->comps.min);
|
||||
ff_sws_apply_op_q(op, op->comps.max);
|
||||
}
|
||||
|
||||
switch (op->op) {
|
||||
case SWS_OP_READ:
|
||||
for (int i = 0; i < op->rw.elems; i++) {
|
||||
if (ff_sws_pixel_type_is_int(op->type)) {
|
||||
int bits = 8 * ff_sws_pixel_type_size(op->type);
|
||||
if (!op->rw.packed && ops->src.desc) {
|
||||
/* Use legal value range from pixdesc if available;
|
||||
* we don't need to do this for packed formats because
|
||||
* non-byte-aligned packed formats will necessarily go
|
||||
* through SWS_OP_UNPACK anyway */
|
||||
for (int c = 0; c < 4; c++) {
|
||||
if (ops->src.desc->comp[c].plane == i) {
|
||||
bits = ops->src.desc->comp[c].depth;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
op->comps.flags[i] = SWS_COMP_EXACT;
|
||||
op->comps.min[i] = Q(0);
|
||||
op->comps.max[i] = Q((1ULL << bits) - 1);
|
||||
}
|
||||
}
|
||||
for (int i = op->rw.elems; i < 4; i++)
|
||||
op->comps.flags[i] = prev.flags[i];
|
||||
break;
|
||||
case SWS_OP_WRITE:
|
||||
for (int i = 0; i < op->rw.elems; i++)
|
||||
av_assert1(!(prev.flags[i] & SWS_COMP_GARBAGE));
|
||||
/* fall through */
|
||||
case SWS_OP_SWAP_BYTES:
|
||||
case SWS_OP_LSHIFT:
|
||||
case SWS_OP_RSHIFT:
|
||||
case SWS_OP_MIN:
|
||||
case SWS_OP_MAX:
|
||||
/* Linearly propagate flags per component */
|
||||
for (int i = 0; i < 4; i++)
|
||||
op->comps.flags[i] = prev.flags[i];
|
||||
break;
|
||||
case SWS_OP_DITHER:
|
||||
/* Strip zero flag because of the nonzero dithering offset */
|
||||
for (int i = 0; i < 4; i++)
|
||||
op->comps.flags[i] = prev.flags[i] & ~SWS_COMP_ZERO;
|
||||
break;
|
||||
case SWS_OP_UNPACK:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (op->pack.pattern[i])
|
||||
op->comps.flags[i] = prev.flags[0];
|
||||
else
|
||||
op->comps.flags[i] = SWS_COMP_GARBAGE;
|
||||
}
|
||||
break;
|
||||
case SWS_OP_PACK: {
|
||||
unsigned flags = flags_identity;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (op->pack.pattern[i])
|
||||
flags = merge_comp_flags(flags, prev.flags[i]);
|
||||
if (i > 0) /* clear remaining comps for sanity */
|
||||
op->comps.flags[i] = SWS_COMP_GARBAGE;
|
||||
}
|
||||
op->comps.flags[0] = flags;
|
||||
break;
|
||||
}
|
||||
case SWS_OP_CLEAR:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (op->c.q4[i].den) {
|
||||
if (op->c.q4[i].num == 0) {
|
||||
op->comps.flags[i] = SWS_COMP_ZERO | SWS_COMP_EXACT;
|
||||
} else if (op->c.q4[i].den == 1) {
|
||||
op->comps.flags[i] = SWS_COMP_EXACT;
|
||||
}
|
||||
} else {
|
||||
op->comps.flags[i] = prev.flags[i];
|
||||
}
|
||||
}
|
||||
break;
|
||||
case SWS_OP_SWIZZLE:
|
||||
for (int i = 0; i < 4; i++)
|
||||
op->comps.flags[i] = prev.flags[op->swizzle.in[i]];
|
||||
break;
|
||||
case SWS_OP_CONVERT:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
op->comps.flags[i] = prev.flags[i];
|
||||
if (ff_sws_pixel_type_is_int(op->convert.to))
|
||||
op->comps.flags[i] |= SWS_COMP_EXACT;
|
||||
}
|
||||
break;
|
||||
case SWS_OP_LINEAR:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
unsigned flags = flags_identity;
|
||||
AVRational min = Q(0), max = Q(0);
|
||||
for (int j = 0; j < 4; j++) {
|
||||
const AVRational k = op->lin.m[i][j];
|
||||
AVRational mink = av_mul_q(prev.min[j], k);
|
||||
AVRational maxk = av_mul_q(prev.max[j], k);
|
||||
if (k.num) {
|
||||
flags = merge_comp_flags(flags, prev.flags[j]);
|
||||
if (k.den != 1) /* fractional coefficient */
|
||||
flags &= ~SWS_COMP_EXACT;
|
||||
if (k.num < 0)
|
||||
FFSWAP(AVRational, mink, maxk);
|
||||
min = av_add_q(min, mink);
|
||||
max = av_add_q(max, maxk);
|
||||
}
|
||||
}
|
||||
if (op->lin.m[i][4].num) { /* nonzero offset */
|
||||
flags &= ~SWS_COMP_ZERO;
|
||||
if (op->lin.m[i][4].den != 1) /* fractional offset */
|
||||
flags &= ~SWS_COMP_EXACT;
|
||||
min = av_add_q(min, op->lin.m[i][4]);
|
||||
max = av_add_q(max, op->lin.m[i][4]);
|
||||
}
|
||||
op->comps.flags[i] = flags;
|
||||
op->comps.min[i] = min;
|
||||
op->comps.max[i] = max;
|
||||
}
|
||||
break;
|
||||
case SWS_OP_SCALE:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
op->comps.flags[i] = prev.flags[i];
|
||||
if (op->c.q.den != 1) /* fractional scale */
|
||||
op->comps.flags[i] &= ~SWS_COMP_EXACT;
|
||||
if (op->c.q.num < 0)
|
||||
FFSWAP(AVRational, op->comps.min[i], op->comps.max[i]);
|
||||
}
|
||||
break;
|
||||
|
||||
case SWS_OP_INVALID:
|
||||
case SWS_OP_TYPE_NB:
|
||||
av_unreachable("Invalid operation type!");
|
||||
}
|
||||
|
||||
prev = op->comps;
|
||||
}
|
||||
|
||||
/* Backwards pass, solves for component dependencies */
|
||||
for (int n = ops->num_ops - 1; n >= 0; n--) {
|
||||
SwsOp *op = &ops->ops[n];
|
||||
|
||||
switch (op->op) {
|
||||
case SWS_OP_READ:
|
||||
case SWS_OP_WRITE:
|
||||
for (int i = 0; i < op->rw.elems; i++)
|
||||
op->comps.unused[i] = op->op == SWS_OP_READ;
|
||||
for (int i = op->rw.elems; i < 4; i++)
|
||||
op->comps.unused[i] = next.unused[i];
|
||||
break;
|
||||
case SWS_OP_SWAP_BYTES:
|
||||
case SWS_OP_LSHIFT:
|
||||
case SWS_OP_RSHIFT:
|
||||
case SWS_OP_CONVERT:
|
||||
case SWS_OP_DITHER:
|
||||
case SWS_OP_MIN:
|
||||
case SWS_OP_MAX:
|
||||
case SWS_OP_SCALE:
|
||||
for (int i = 0; i < 4; i++)
|
||||
op->comps.unused[i] = next.unused[i];
|
||||
break;
|
||||
case SWS_OP_UNPACK: {
|
||||
bool unused = true;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (op->pack.pattern[i])
|
||||
unused &= next.unused[i];
|
||||
op->comps.unused[i] = i > 0;
|
||||
}
|
||||
op->comps.unused[0] = unused;
|
||||
break;
|
||||
}
|
||||
case SWS_OP_PACK:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (op->pack.pattern[i])
|
||||
op->comps.unused[i] = next.unused[0];
|
||||
else
|
||||
op->comps.unused[i] = true;
|
||||
}
|
||||
break;
|
||||
case SWS_OP_CLEAR:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (op->c.q4[i].den)
|
||||
op->comps.unused[i] = true;
|
||||
else
|
||||
op->comps.unused[i] = next.unused[i];
|
||||
}
|
||||
break;
|
||||
case SWS_OP_SWIZZLE: {
|
||||
bool unused[4] = { true, true, true, true };
|
||||
for (int i = 0; i < 4; i++)
|
||||
unused[op->swizzle.in[i]] &= next.unused[i];
|
||||
for (int i = 0; i < 4; i++)
|
||||
op->comps.unused[i] = unused[i];
|
||||
break;
|
||||
}
|
||||
case SWS_OP_LINEAR:
|
||||
for (int j = 0; j < 4; j++) {
|
||||
bool unused = true;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (op->lin.m[i][j].num)
|
||||
unused &= next.unused[i];
|
||||
}
|
||||
op->comps.unused[j] = unused;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
next = op->comps;
|
||||
}
|
||||
}
|
||||
|
||||
/* returns log2(x) only if x is a power of two, or 0 otherwise */
|
||||
static int exact_log2(const int x)
|
||||
{
|
||||
int p;
|
||||
if (x <= 0)
|
||||
return 0;
|
||||
p = av_log2(x);
|
||||
return (1 << p) == x ? p : 0;
|
||||
}
|
||||
|
||||
static int exact_log2_q(const AVRational x)
|
||||
{
|
||||
if (x.den == 1)
|
||||
return exact_log2(x.num);
|
||||
else if (x.num == 1)
|
||||
return -exact_log2(x.den);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* If a linear operation can be reduced to a scalar multiplication, returns
|
||||
* the corresponding scaling factor, or 0 otherwise.
|
||||
*/
|
||||
static bool extract_scalar(const SwsLinearOp *c, SwsComps prev, SwsComps next,
|
||||
SwsConst *out_scale)
|
||||
{
|
||||
SwsConst scale = {0};
|
||||
|
||||
/* There are components not on the main diagonal */
|
||||
if (c->mask & ~SWS_MASK_DIAG4)
|
||||
return false;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
const AVRational s = c->m[i][i];
|
||||
if ((prev.flags[i] & SWS_COMP_ZERO) || next.unused[i])
|
||||
continue;
|
||||
if (scale.q.den && av_cmp_q(s, scale.q))
|
||||
return false;
|
||||
scale.q = s;
|
||||
}
|
||||
|
||||
if (scale.q.den)
|
||||
*out_scale = scale;
|
||||
return scale.q.den;
|
||||
}
|
||||
|
||||
/* Extracts an integer clear operation (subset) from the given linear op. */
|
||||
static bool extract_constant_rows(SwsLinearOp *c, SwsComps prev,
|
||||
SwsConst *out_clear)
|
||||
{
|
||||
SwsConst clear = {0};
|
||||
bool ret = false;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
bool const_row = c->m[i][4].den == 1; /* offset is integer */
|
||||
for (int j = 0; j < 4; j++) {
|
||||
const_row &= c->m[i][j].num == 0 || /* scalar is zero */
|
||||
(prev.flags[j] & SWS_COMP_ZERO); /* input is zero */
|
||||
}
|
||||
if (const_row && (c->mask & SWS_MASK_ROW(i))) {
|
||||
clear.q4[i] = c->m[i][4];
|
||||
for (int j = 0; j < 5; j++)
|
||||
c->m[i][j] = Q(i == j);
|
||||
c->mask &= ~SWS_MASK_ROW(i);
|
||||
ret = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (ret)
|
||||
*out_clear = clear;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Unswizzle a linear operation by aligning single-input rows with
|
||||
* their corresponding diagonal */
|
||||
static bool extract_swizzle(SwsLinearOp *op, SwsComps prev, SwsSwizzleOp *out_swiz)
|
||||
{
|
||||
SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3);
|
||||
SwsLinearOp c = *op;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
int idx = -1;
|
||||
for (int j = 0; j < 4; j++) {
|
||||
if (!c.m[i][j].num || (prev.flags[j] & SWS_COMP_ZERO))
|
||||
continue;
|
||||
if (idx >= 0)
|
||||
return false; /* multiple inputs */
|
||||
idx = j;
|
||||
}
|
||||
|
||||
if (idx >= 0 && idx != i) {
|
||||
/* Move coefficient to the diagonal */
|
||||
c.m[i][i] = c.m[i][idx];
|
||||
c.m[i][idx] = Q(0);
|
||||
swiz.in[i] = idx;
|
||||
}
|
||||
}
|
||||
|
||||
if (swiz.mask == SWS_SWIZZLE(0, 1, 2, 3).mask)
|
||||
return false; /* no swizzle was identified */
|
||||
|
||||
c.mask = ff_sws_linear_mask(c);
|
||||
*out_swiz = swiz;
|
||||
*op = c;
|
||||
return true;
|
||||
}
|
||||
|
||||
int ff_sws_op_list_optimize(SwsOpList *ops)
|
||||
{
|
||||
int ret;
|
||||
|
||||
retry:
|
||||
ff_sws_op_list_update_comps(ops);
|
||||
|
||||
for (int n = 0; n < ops->num_ops;) {
|
||||
SwsOp dummy = {0};
|
||||
SwsOp *op = &ops->ops[n];
|
||||
SwsOp *prev = n ? &ops->ops[n - 1] : &dummy;
|
||||
SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : &dummy;
|
||||
|
||||
/* common helper variable */
|
||||
bool noop = true;
|
||||
|
||||
switch (op->op) {
|
||||
case SWS_OP_READ:
|
||||
/* Optimized further into refcopy / memcpy */
|
||||
if (next->op == SWS_OP_WRITE &&
|
||||
next->rw.elems == op->rw.elems &&
|
||||
next->rw.packed == op->rw.packed &&
|
||||
next->rw.frac == op->rw.frac)
|
||||
{
|
||||
ff_sws_op_list_remove_at(ops, n, 2);
|
||||
av_assert1(ops->num_ops == 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Skip reading extra unneeded components */
|
||||
if (!op->rw.packed) {
|
||||
int needed = op->rw.elems;
|
||||
while (needed > 0 && next->comps.unused[needed - 1])
|
||||
needed--;
|
||||
if (op->rw.elems != needed) {
|
||||
op->rw.elems = needed;
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case SWS_OP_SWAP_BYTES:
|
||||
/* Redundant (double) swap */
|
||||
if (next->op == SWS_OP_SWAP_BYTES) {
|
||||
ff_sws_op_list_remove_at(ops, n, 2);
|
||||
goto retry;
|
||||
}
|
||||
break;
|
||||
|
||||
case SWS_OP_UNPACK:
|
||||
/* Redundant unpack+pack */
|
||||
if (next->op == SWS_OP_PACK && next->type == op->type &&
|
||||
next->pack.pattern[0] == op->pack.pattern[0] &&
|
||||
next->pack.pattern[1] == op->pack.pattern[1] &&
|
||||
next->pack.pattern[2] == op->pack.pattern[2] &&
|
||||
next->pack.pattern[3] == op->pack.pattern[3])
|
||||
{
|
||||
ff_sws_op_list_remove_at(ops, n, 2);
|
||||
goto retry;
|
||||
}
|
||||
break;
|
||||
|
||||
case SWS_OP_LSHIFT:
|
||||
case SWS_OP_RSHIFT:
|
||||
/* Two shifts in the same direction */
|
||||
if (next->op == op->op) {
|
||||
op->c.u += next->c.u;
|
||||
ff_sws_op_list_remove_at(ops, n + 1, 1);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* No-op shift */
|
||||
if (!op->c.u) {
|
||||
ff_sws_op_list_remove_at(ops, n, 1);
|
||||
goto retry;
|
||||
}
|
||||
break;
|
||||
|
||||
case SWS_OP_CLEAR:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (!op->c.q4[i].den)
|
||||
continue;
|
||||
|
||||
if ((prev->comps.flags[i] & SWS_COMP_ZERO) &&
|
||||
!(prev->comps.flags[i] & SWS_COMP_GARBAGE) &&
|
||||
op->c.q4[i].num == 0)
|
||||
{
|
||||
/* Redundant clear-to-zero of zero component */
|
||||
op->c.q4[i].den = 0;
|
||||
} else if (next->comps.unused[i]) {
|
||||
/* Unnecessary clear of unused component */
|
||||
op->c.q4[i] = (AVRational) {0, 0};
|
||||
} else if (op->c.q4[i].den) {
|
||||
noop = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (noop) {
|
||||
ff_sws_op_list_remove_at(ops, n, 1);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* Transitive clear */
|
||||
if (next->op == SWS_OP_CLEAR) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (next->c.q4[i].den)
|
||||
op->c.q4[i] = next->c.q4[i];
|
||||
}
|
||||
ff_sws_op_list_remove_at(ops, n + 1, 1);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* Prefer to clear as late as possible, to avoid doing
|
||||
* redundant work */
|
||||
if ((op_type_is_independent(next->op) && next->op != SWS_OP_SWAP_BYTES) ||
|
||||
next->op == SWS_OP_SWIZZLE)
|
||||
{
|
||||
if (next->op == SWS_OP_CONVERT)
|
||||
op->type = next->convert.to;
|
||||
ff_sws_apply_op_q(next, op->c.q4);
|
||||
FFSWAP(SwsOp, *op, *next);
|
||||
goto retry;
|
||||
}
|
||||
break;
|
||||
|
||||
case SWS_OP_SWIZZLE: {
|
||||
bool seen[4] = {0};
|
||||
bool has_duplicates = false;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (next->comps.unused[i])
|
||||
continue;
|
||||
if (op->swizzle.in[i] != i)
|
||||
noop = false;
|
||||
has_duplicates |= seen[op->swizzle.in[i]];
|
||||
seen[op->swizzle.in[i]] = true;
|
||||
}
|
||||
|
||||
/* Identity swizzle */
|
||||
if (noop) {
|
||||
ff_sws_op_list_remove_at(ops, n, 1);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* Transitive swizzle */
|
||||
if (next->op == SWS_OP_SWIZZLE) {
|
||||
const SwsSwizzleOp orig = op->swizzle;
|
||||
for (int i = 0; i < 4; i++)
|
||||
op->swizzle.in[i] = orig.in[next->swizzle.in[i]];
|
||||
ff_sws_op_list_remove_at(ops, n + 1, 1);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* Try to push swizzles with duplicates towards the output */
|
||||
if (has_duplicates && op_type_is_independent(next->op)) {
|
||||
if (next->op == SWS_OP_CONVERT)
|
||||
op->type = next->convert.to;
|
||||
if (next->op == SWS_OP_MIN || next->op == SWS_OP_MAX) {
|
||||
/* Un-swizzle the next operation */
|
||||
const SwsConst c = next->c;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (!next->comps.unused[i])
|
||||
next->c.q4[op->swizzle.in[i]] = c.q4[i];
|
||||
}
|
||||
}
|
||||
FFSWAP(SwsOp, *op, *next);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* Move swizzle out of the way between two converts so that
|
||||
* they may be merged */
|
||||
if (prev->op == SWS_OP_CONVERT && next->op == SWS_OP_CONVERT) {
|
||||
op->type = next->convert.to;
|
||||
FFSWAP(SwsOp, *op, *next);
|
||||
goto retry;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case SWS_OP_CONVERT:
|
||||
/* No-op conversion */
|
||||
if (op->type == op->convert.to) {
|
||||
ff_sws_op_list_remove_at(ops, n, 1);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* Transitive conversion */
|
||||
if (next->op == SWS_OP_CONVERT &&
|
||||
op->convert.expand == next->convert.expand)
|
||||
{
|
||||
av_assert1(op->convert.to == next->type);
|
||||
op->convert.to = next->convert.to;
|
||||
ff_sws_op_list_remove_at(ops, n + 1, 1);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* Conversion followed by integer expansion */
|
||||
if (next->op == SWS_OP_SCALE && !op->convert.expand &&
|
||||
!av_cmp_q(next->c.q, ff_sws_pixel_expand(op->type, op->convert.to)))
|
||||
{
|
||||
op->convert.expand = true;
|
||||
ff_sws_op_list_remove_at(ops, n + 1, 1);
|
||||
goto retry;
|
||||
}
|
||||
break;
|
||||
|
||||
case SWS_OP_MIN:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (next->comps.unused[i] || !op->c.q4[i].den)
|
||||
continue;
|
||||
if (av_cmp_q(op->c.q4[i], prev->comps.max[i]) < 0)
|
||||
noop = false;
|
||||
}
|
||||
|
||||
if (noop) {
|
||||
ff_sws_op_list_remove_at(ops, n, 1);
|
||||
goto retry;
|
||||
}
|
||||
break;
|
||||
|
||||
case SWS_OP_MAX:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (next->comps.unused[i] || !op->c.q4[i].den)
|
||||
continue;
|
||||
if (av_cmp_q(prev->comps.min[i], op->c.q4[i]) < 0)
|
||||
noop = false;
|
||||
}
|
||||
|
||||
if (noop) {
|
||||
ff_sws_op_list_remove_at(ops, n, 1);
|
||||
goto retry;
|
||||
}
|
||||
break;
|
||||
|
||||
case SWS_OP_DITHER:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
noop &= (prev->comps.flags[i] & SWS_COMP_EXACT) ||
|
||||
next->comps.unused[i];
|
||||
}
|
||||
|
||||
if (noop) {
|
||||
ff_sws_op_list_remove_at(ops, n, 1);
|
||||
goto retry;
|
||||
}
|
||||
break;
|
||||
|
||||
case SWS_OP_LINEAR: {
|
||||
SwsSwizzleOp swizzle;
|
||||
SwsConst c;
|
||||
|
||||
/* No-op (identity) linear operation */
|
||||
if (!op->lin.mask) {
|
||||
ff_sws_op_list_remove_at(ops, n, 1);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (next->op == SWS_OP_LINEAR) {
|
||||
/* 5x5 matrix multiplication after appending [ 0 0 0 0 1 ] */
|
||||
const SwsLinearOp m1 = op->lin;
|
||||
const SwsLinearOp m2 = next->lin;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int j = 0; j < 5; j++) {
|
||||
AVRational sum = Q(0);
|
||||
for (int k = 0; k < 4; k++)
|
||||
sum = av_add_q(sum, av_mul_q(m2.m[i][k], m1.m[k][j]));
|
||||
if (j == 4) /* m1.m[4][j] == 1 */
|
||||
sum = av_add_q(sum, m2.m[i][4]);
|
||||
op->lin.m[i][j] = sum;
|
||||
}
|
||||
}
|
||||
op->lin.mask = ff_sws_linear_mask(op->lin);
|
||||
ff_sws_op_list_remove_at(ops, n + 1, 1);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* Optimize away zero columns */
|
||||
for (int j = 0; j < 4; j++) {
|
||||
const uint32_t col = SWS_MASK_COL(j);
|
||||
if (!(prev->comps.flags[j] & SWS_COMP_ZERO) || !(op->lin.mask & col))
|
||||
continue;
|
||||
for (int i = 0; i < 4; i++)
|
||||
op->lin.m[i][j] = Q(i == j);
|
||||
op->lin.mask &= ~col;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* Optimize away unused rows */
|
||||
for (int i = 0; i < 4; i++) {
|
||||
const uint32_t row = SWS_MASK_ROW(i);
|
||||
if (!next->comps.unused[i] || !(op->lin.mask & row))
|
||||
continue;
|
||||
for (int j = 0; j < 5; j++)
|
||||
op->lin.m[i][j] = Q(i == j);
|
||||
op->lin.mask &= ~row;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* Convert constant rows to explicit clear instruction */
|
||||
if (extract_constant_rows(&op->lin, prev->comps, &c)) {
|
||||
RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
|
||||
.op = SWS_OP_CLEAR,
|
||||
.type = op->type,
|
||||
.comps = op->comps,
|
||||
.c = c,
|
||||
}));
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* Multiplication by scalar constant */
|
||||
if (extract_scalar(&op->lin, prev->comps, next->comps, &c)) {
|
||||
op->op = SWS_OP_SCALE;
|
||||
op->c = c;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* Swizzle by fixed pattern */
|
||||
if (extract_swizzle(&op->lin, prev->comps, &swizzle)) {
|
||||
RET(ff_sws_op_list_insert_at(ops, n, &(SwsOp) {
|
||||
.op = SWS_OP_SWIZZLE,
|
||||
.type = op->type,
|
||||
.swizzle = swizzle,
|
||||
}));
|
||||
goto retry;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case SWS_OP_SCALE: {
|
||||
const int factor2 = exact_log2_q(op->c.q);
|
||||
|
||||
/* No-op scaling */
|
||||
if (op->c.q.num == 1 && op->c.q.den == 1) {
|
||||
ff_sws_op_list_remove_at(ops, n, 1);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* Scaling by integer before conversion to int */
|
||||
if (op->c.q.den == 1 &&
|
||||
next->op == SWS_OP_CONVERT &&
|
||||
ff_sws_pixel_type_is_int(next->convert.to))
|
||||
{
|
||||
op->type = next->convert.to;
|
||||
FFSWAP(SwsOp, *op, *next);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* Scaling by exact power of two */
|
||||
if (factor2 && ff_sws_pixel_type_is_int(op->type)) {
|
||||
op->op = factor2 > 0 ? SWS_OP_LSHIFT : SWS_OP_RSHIFT;
|
||||
op->c.u = FFABS(factor2);
|
||||
goto retry;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* No optimization triggered, move on to next operation */
|
||||
n++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user