/** * Copyright (C) 2025 Niklas Haas * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "libavutil/avassert.h" #include "libavutil/bswap.h" #include "libavutil/rational.h" #include "ops.h" #include "ops_internal.h" #define RET(x) \ do { \ if ((ret = (x)) < 0) \ return ret; \ } while (0) /* Returns true for operations that are independent per channel. These can * usually be commuted freely other such operations. */ static bool op_type_is_independent(SwsOpType op) { switch (op) { case SWS_OP_SWAP_BYTES: case SWS_OP_LSHIFT: case SWS_OP_RSHIFT: case SWS_OP_CONVERT: case SWS_OP_DITHER: case SWS_OP_MIN: case SWS_OP_MAX: case SWS_OP_SCALE: return true; case SWS_OP_INVALID: case SWS_OP_READ: case SWS_OP_WRITE: case SWS_OP_SWIZZLE: case SWS_OP_CLEAR: case SWS_OP_LINEAR: case SWS_OP_PACK: case SWS_OP_UNPACK: return false; case SWS_OP_TYPE_NB: break; } av_unreachable("Invalid operation type!"); return false; } /* merge_comp_flags() forms a monoid with flags_identity as the null element */ static const unsigned flags_identity = SWS_COMP_ZERO | SWS_COMP_EXACT; static unsigned merge_comp_flags(unsigned a, unsigned b) { const unsigned flags_or = SWS_COMP_GARBAGE; const unsigned flags_and = SWS_COMP_ZERO | SWS_COMP_EXACT; return ((a & b) & flags_and) | ((a | b) & flags_or); } /* Infer + propagate known information about components */ void ff_sws_op_list_update_comps(SwsOpList *ops) { SwsComps next = { .unused = {true, true, true, true} }; SwsComps prev = { .flags = { SWS_COMP_GARBAGE, SWS_COMP_GARBAGE, SWS_COMP_GARBAGE, SWS_COMP_GARBAGE, }}; /* Forwards pass, propagates knowledge about the incoming pixel values */ for (int n = 0; n < ops->num_ops; n++) { SwsOp *op = &ops->ops[n]; /* Prefill min/max values automatically; may have to be fixed in * special cases */ memcpy(op->comps.min, prev.min, sizeof(prev.min)); memcpy(op->comps.max, prev.max, sizeof(prev.max)); if (op->op != SWS_OP_SWAP_BYTES) { ff_sws_apply_op_q(op, op->comps.min); ff_sws_apply_op_q(op, op->comps.max); } switch (op->op) { case SWS_OP_READ: for (int i = 0; i < op->rw.elems; i++) { if (ff_sws_pixel_type_is_int(op->type)) { int bits = 8 * ff_sws_pixel_type_size(op->type); if (!op->rw.packed && ops->src.desc) { /* Use legal value range from pixdesc if available; * we don't need to do this for packed formats because * non-byte-aligned packed formats will necessarily go * through SWS_OP_UNPACK anyway */ for (int c = 0; c < 4; c++) { if (ops->src.desc->comp[c].plane == i) { bits = ops->src.desc->comp[c].depth; break; } } } op->comps.flags[i] = SWS_COMP_EXACT; op->comps.min[i] = Q(0); op->comps.max[i] = Q((1ULL << bits) - 1); } } for (int i = op->rw.elems; i < 4; i++) op->comps.flags[i] = prev.flags[i]; break; case SWS_OP_WRITE: for (int i = 0; i < op->rw.elems; i++) av_assert1(!(prev.flags[i] & SWS_COMP_GARBAGE)); /* fall through */ case SWS_OP_SWAP_BYTES: case SWS_OP_LSHIFT: case SWS_OP_RSHIFT: case SWS_OP_MIN: case SWS_OP_MAX: /* Linearly propagate flags per component */ for (int i = 0; i < 4; i++) op->comps.flags[i] = prev.flags[i]; break; case SWS_OP_DITHER: /* Strip zero flag because of the nonzero dithering offset */ for (int i = 0; i < 4; i++) op->comps.flags[i] = prev.flags[i] & ~SWS_COMP_ZERO; break; case SWS_OP_UNPACK: for (int i = 0; i < 4; i++) { if (op->pack.pattern[i]) op->comps.flags[i] = prev.flags[0]; else op->comps.flags[i] = SWS_COMP_GARBAGE; } break; case SWS_OP_PACK: { unsigned flags = flags_identity; for (int i = 0; i < 4; i++) { if (op->pack.pattern[i]) flags = merge_comp_flags(flags, prev.flags[i]); if (i > 0) /* clear remaining comps for sanity */ op->comps.flags[i] = SWS_COMP_GARBAGE; } op->comps.flags[0] = flags; break; } case SWS_OP_CLEAR: for (int i = 0; i < 4; i++) { if (op->c.q4[i].den) { if (op->c.q4[i].num == 0) { op->comps.flags[i] = SWS_COMP_ZERO | SWS_COMP_EXACT; } else if (op->c.q4[i].den == 1) { op->comps.flags[i] = SWS_COMP_EXACT; } } else { op->comps.flags[i] = prev.flags[i]; } } break; case SWS_OP_SWIZZLE: for (int i = 0; i < 4; i++) op->comps.flags[i] = prev.flags[op->swizzle.in[i]]; break; case SWS_OP_CONVERT: for (int i = 0; i < 4; i++) { op->comps.flags[i] = prev.flags[i]; if (ff_sws_pixel_type_is_int(op->convert.to)) op->comps.flags[i] |= SWS_COMP_EXACT; } break; case SWS_OP_LINEAR: for (int i = 0; i < 4; i++) { unsigned flags = flags_identity; AVRational min = Q(0), max = Q(0); for (int j = 0; j < 4; j++) { const AVRational k = op->lin.m[i][j]; AVRational mink = av_mul_q(prev.min[j], k); AVRational maxk = av_mul_q(prev.max[j], k); if (k.num) { flags = merge_comp_flags(flags, prev.flags[j]); if (k.den != 1) /* fractional coefficient */ flags &= ~SWS_COMP_EXACT; if (k.num < 0) FFSWAP(AVRational, mink, maxk); min = av_add_q(min, mink); max = av_add_q(max, maxk); } } if (op->lin.m[i][4].num) { /* nonzero offset */ flags &= ~SWS_COMP_ZERO; if (op->lin.m[i][4].den != 1) /* fractional offset */ flags &= ~SWS_COMP_EXACT; min = av_add_q(min, op->lin.m[i][4]); max = av_add_q(max, op->lin.m[i][4]); } op->comps.flags[i] = flags; op->comps.min[i] = min; op->comps.max[i] = max; } break; case SWS_OP_SCALE: for (int i = 0; i < 4; i++) { op->comps.flags[i] = prev.flags[i]; if (op->c.q.den != 1) /* fractional scale */ op->comps.flags[i] &= ~SWS_COMP_EXACT; if (op->c.q.num < 0) FFSWAP(AVRational, op->comps.min[i], op->comps.max[i]); } break; case SWS_OP_INVALID: case SWS_OP_TYPE_NB: av_unreachable("Invalid operation type!"); } prev = op->comps; } /* Backwards pass, solves for component dependencies */ for (int n = ops->num_ops - 1; n >= 0; n--) { SwsOp *op = &ops->ops[n]; switch (op->op) { case SWS_OP_READ: case SWS_OP_WRITE: for (int i = 0; i < op->rw.elems; i++) op->comps.unused[i] = op->op == SWS_OP_READ; for (int i = op->rw.elems; i < 4; i++) op->comps.unused[i] = next.unused[i]; break; case SWS_OP_SWAP_BYTES: case SWS_OP_LSHIFT: case SWS_OP_RSHIFT: case SWS_OP_CONVERT: case SWS_OP_DITHER: case SWS_OP_MIN: case SWS_OP_MAX: case SWS_OP_SCALE: for (int i = 0; i < 4; i++) op->comps.unused[i] = next.unused[i]; break; case SWS_OP_UNPACK: { bool unused = true; for (int i = 0; i < 4; i++) { if (op->pack.pattern[i]) unused &= next.unused[i]; op->comps.unused[i] = i > 0; } op->comps.unused[0] = unused; break; } case SWS_OP_PACK: for (int i = 0; i < 4; i++) { if (op->pack.pattern[i]) op->comps.unused[i] = next.unused[0]; else op->comps.unused[i] = true; } break; case SWS_OP_CLEAR: for (int i = 0; i < 4; i++) { if (op->c.q4[i].den) op->comps.unused[i] = true; else op->comps.unused[i] = next.unused[i]; } break; case SWS_OP_SWIZZLE: { bool unused[4] = { true, true, true, true }; for (int i = 0; i < 4; i++) unused[op->swizzle.in[i]] &= next.unused[i]; for (int i = 0; i < 4; i++) op->comps.unused[i] = unused[i]; break; } case SWS_OP_LINEAR: for (int j = 0; j < 4; j++) { bool unused = true; for (int i = 0; i < 4; i++) { if (op->lin.m[i][j].num) unused &= next.unused[i]; } op->comps.unused[j] = unused; } break; } next = op->comps; } } /* returns log2(x) only if x is a power of two, or 0 otherwise */ static int exact_log2(const int x) { int p; if (x <= 0) return 0; p = av_log2(x); return (1 << p) == x ? p : 0; } static int exact_log2_q(const AVRational x) { if (x.den == 1) return exact_log2(x.num); else if (x.num == 1) return -exact_log2(x.den); else return 0; } /** * If a linear operation can be reduced to a scalar multiplication, returns * the corresponding scaling factor, or 0 otherwise. */ static bool extract_scalar(const SwsLinearOp *c, SwsComps prev, SwsComps next, SwsConst *out_scale) { SwsConst scale = {0}; /* There are components not on the main diagonal */ if (c->mask & ~SWS_MASK_DIAG4) return false; for (int i = 0; i < 4; i++) { const AVRational s = c->m[i][i]; if ((prev.flags[i] & SWS_COMP_ZERO) || next.unused[i]) continue; if (scale.q.den && av_cmp_q(s, scale.q)) return false; scale.q = s; } if (scale.q.den) *out_scale = scale; return scale.q.den; } /* Extracts an integer clear operation (subset) from the given linear op. */ static bool extract_constant_rows(SwsLinearOp *c, SwsComps prev, SwsConst *out_clear) { SwsConst clear = {0}; bool ret = false; for (int i = 0; i < 4; i++) { bool const_row = c->m[i][4].den == 1; /* offset is integer */ for (int j = 0; j < 4; j++) { const_row &= c->m[i][j].num == 0 || /* scalar is zero */ (prev.flags[j] & SWS_COMP_ZERO); /* input is zero */ } if (const_row && (c->mask & SWS_MASK_ROW(i))) { clear.q4[i] = c->m[i][4]; for (int j = 0; j < 5; j++) c->m[i][j] = Q(i == j); c->mask &= ~SWS_MASK_ROW(i); ret = true; } } if (ret) *out_clear = clear; return ret; } /* Unswizzle a linear operation by aligning single-input rows with * their corresponding diagonal */ static bool extract_swizzle(SwsLinearOp *op, SwsComps prev, SwsSwizzleOp *out_swiz) { SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3); SwsLinearOp c = *op; for (int i = 0; i < 4; i++) { int idx = -1; for (int j = 0; j < 4; j++) { if (!c.m[i][j].num || (prev.flags[j] & SWS_COMP_ZERO)) continue; if (idx >= 0) return false; /* multiple inputs */ idx = j; } if (idx >= 0 && idx != i) { /* Move coefficient to the diagonal */ c.m[i][i] = c.m[i][idx]; c.m[i][idx] = Q(0); swiz.in[i] = idx; } } if (swiz.mask == SWS_SWIZZLE(0, 1, 2, 3).mask) return false; /* no swizzle was identified */ c.mask = ff_sws_linear_mask(c); *out_swiz = swiz; *op = c; return true; } int ff_sws_op_list_optimize(SwsOpList *ops) { int ret; retry: ff_sws_op_list_update_comps(ops); for (int n = 0; n < ops->num_ops;) { SwsOp dummy = {0}; SwsOp *op = &ops->ops[n]; SwsOp *prev = n ? &ops->ops[n - 1] : &dummy; SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : &dummy; /* common helper variable */ bool noop = true; switch (op->op) { case SWS_OP_READ: /* Optimized further into refcopy / memcpy */ if (next->op == SWS_OP_WRITE && next->rw.elems == op->rw.elems && next->rw.packed == op->rw.packed && next->rw.frac == op->rw.frac) { ff_sws_op_list_remove_at(ops, n, 2); av_assert1(ops->num_ops == 0); return 0; } /* Skip reading extra unneeded components */ if (!op->rw.packed) { int needed = op->rw.elems; while (needed > 0 && next->comps.unused[needed - 1]) needed--; if (op->rw.elems != needed) { op->rw.elems = needed; goto retry; } } break; case SWS_OP_SWAP_BYTES: /* Redundant (double) swap */ if (next->op == SWS_OP_SWAP_BYTES) { ff_sws_op_list_remove_at(ops, n, 2); goto retry; } break; case SWS_OP_UNPACK: /* Redundant unpack+pack */ if (next->op == SWS_OP_PACK && next->type == op->type && next->pack.pattern[0] == op->pack.pattern[0] && next->pack.pattern[1] == op->pack.pattern[1] && next->pack.pattern[2] == op->pack.pattern[2] && next->pack.pattern[3] == op->pack.pattern[3]) { ff_sws_op_list_remove_at(ops, n, 2); goto retry; } break; case SWS_OP_LSHIFT: case SWS_OP_RSHIFT: /* Two shifts in the same direction */ if (next->op == op->op) { op->c.u += next->c.u; ff_sws_op_list_remove_at(ops, n + 1, 1); goto retry; } /* No-op shift */ if (!op->c.u) { ff_sws_op_list_remove_at(ops, n, 1); goto retry; } break; case SWS_OP_CLEAR: for (int i = 0; i < 4; i++) { if (!op->c.q4[i].den) continue; if ((prev->comps.flags[i] & SWS_COMP_ZERO) && !(prev->comps.flags[i] & SWS_COMP_GARBAGE) && op->c.q4[i].num == 0) { /* Redundant clear-to-zero of zero component */ op->c.q4[i].den = 0; } else if (next->comps.unused[i]) { /* Unnecessary clear of unused component */ op->c.q4[i] = (AVRational) {0, 0}; } else if (op->c.q4[i].den) { noop = false; } } if (noop) { ff_sws_op_list_remove_at(ops, n, 1); goto retry; } /* Transitive clear */ if (next->op == SWS_OP_CLEAR) { for (int i = 0; i < 4; i++) { if (next->c.q4[i].den) op->c.q4[i] = next->c.q4[i]; } ff_sws_op_list_remove_at(ops, n + 1, 1); goto retry; } /* Prefer to clear as late as possible, to avoid doing * redundant work */ if ((op_type_is_independent(next->op) && next->op != SWS_OP_SWAP_BYTES) || next->op == SWS_OP_SWIZZLE) { if (next->op == SWS_OP_CONVERT) op->type = next->convert.to; ff_sws_apply_op_q(next, op->c.q4); FFSWAP(SwsOp, *op, *next); goto retry; } break; case SWS_OP_SWIZZLE: { bool seen[4] = {0}; bool has_duplicates = false; for (int i = 0; i < 4; i++) { if (next->comps.unused[i]) continue; if (op->swizzle.in[i] != i) noop = false; has_duplicates |= seen[op->swizzle.in[i]]; seen[op->swizzle.in[i]] = true; } /* Identity swizzle */ if (noop) { ff_sws_op_list_remove_at(ops, n, 1); goto retry; } /* Transitive swizzle */ if (next->op == SWS_OP_SWIZZLE) { const SwsSwizzleOp orig = op->swizzle; for (int i = 0; i < 4; i++) op->swizzle.in[i] = orig.in[next->swizzle.in[i]]; ff_sws_op_list_remove_at(ops, n + 1, 1); goto retry; } /* Try to push swizzles with duplicates towards the output */ if (has_duplicates && op_type_is_independent(next->op)) { if (next->op == SWS_OP_CONVERT) op->type = next->convert.to; if (next->op == SWS_OP_MIN || next->op == SWS_OP_MAX) { /* Un-swizzle the next operation */ const SwsConst c = next->c; for (int i = 0; i < 4; i++) { if (!next->comps.unused[i]) next->c.q4[op->swizzle.in[i]] = c.q4[i]; } } FFSWAP(SwsOp, *op, *next); goto retry; } /* Move swizzle out of the way between two converts so that * they may be merged */ if (prev->op == SWS_OP_CONVERT && next->op == SWS_OP_CONVERT) { op->type = next->convert.to; FFSWAP(SwsOp, *op, *next); goto retry; } break; } case SWS_OP_CONVERT: /* No-op conversion */ if (op->type == op->convert.to) { ff_sws_op_list_remove_at(ops, n, 1); goto retry; } /* Transitive conversion */ if (next->op == SWS_OP_CONVERT && op->convert.expand == next->convert.expand) { av_assert1(op->convert.to == next->type); op->convert.to = next->convert.to; ff_sws_op_list_remove_at(ops, n + 1, 1); goto retry; } /* Conversion followed by integer expansion */ if (next->op == SWS_OP_SCALE && !op->convert.expand && !av_cmp_q(next->c.q, ff_sws_pixel_expand(op->type, op->convert.to))) { op->convert.expand = true; ff_sws_op_list_remove_at(ops, n + 1, 1); goto retry; } break; case SWS_OP_MIN: for (int i = 0; i < 4; i++) { if (next->comps.unused[i] || !op->c.q4[i].den) continue; if (av_cmp_q(op->c.q4[i], prev->comps.max[i]) < 0) noop = false; } if (noop) { ff_sws_op_list_remove_at(ops, n, 1); goto retry; } break; case SWS_OP_MAX: for (int i = 0; i < 4; i++) { if (next->comps.unused[i] || !op->c.q4[i].den) continue; if (av_cmp_q(prev->comps.min[i], op->c.q4[i]) < 0) noop = false; } if (noop) { ff_sws_op_list_remove_at(ops, n, 1); goto retry; } break; case SWS_OP_DITHER: for (int i = 0; i < 4; i++) { noop &= (prev->comps.flags[i] & SWS_COMP_EXACT) || next->comps.unused[i]; } if (noop) { ff_sws_op_list_remove_at(ops, n, 1); goto retry; } break; case SWS_OP_LINEAR: { SwsSwizzleOp swizzle; SwsConst c; /* No-op (identity) linear operation */ if (!op->lin.mask) { ff_sws_op_list_remove_at(ops, n, 1); goto retry; } if (next->op == SWS_OP_LINEAR) { /* 5x5 matrix multiplication after appending [ 0 0 0 0 1 ] */ const SwsLinearOp m1 = op->lin; const SwsLinearOp m2 = next->lin; for (int i = 0; i < 4; i++) { for (int j = 0; j < 5; j++) { AVRational sum = Q(0); for (int k = 0; k < 4; k++) sum = av_add_q(sum, av_mul_q(m2.m[i][k], m1.m[k][j])); if (j == 4) /* m1.m[4][j] == 1 */ sum = av_add_q(sum, m2.m[i][4]); op->lin.m[i][j] = sum; } } op->lin.mask = ff_sws_linear_mask(op->lin); ff_sws_op_list_remove_at(ops, n + 1, 1); goto retry; } /* Optimize away zero columns */ for (int j = 0; j < 4; j++) { const uint32_t col = SWS_MASK_COL(j); if (!(prev->comps.flags[j] & SWS_COMP_ZERO) || !(op->lin.mask & col)) continue; for (int i = 0; i < 4; i++) op->lin.m[i][j] = Q(i == j); op->lin.mask &= ~col; goto retry; } /* Optimize away unused rows */ for (int i = 0; i < 4; i++) { const uint32_t row = SWS_MASK_ROW(i); if (!next->comps.unused[i] || !(op->lin.mask & row)) continue; for (int j = 0; j < 5; j++) op->lin.m[i][j] = Q(i == j); op->lin.mask &= ~row; goto retry; } /* Convert constant rows to explicit clear instruction */ if (extract_constant_rows(&op->lin, prev->comps, &c)) { RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) { .op = SWS_OP_CLEAR, .type = op->type, .comps = op->comps, .c = c, })); goto retry; } /* Multiplication by scalar constant */ if (extract_scalar(&op->lin, prev->comps, next->comps, &c)) { op->op = SWS_OP_SCALE; op->c = c; goto retry; } /* Swizzle by fixed pattern */ if (extract_swizzle(&op->lin, prev->comps, &swizzle)) { RET(ff_sws_op_list_insert_at(ops, n, &(SwsOp) { .op = SWS_OP_SWIZZLE, .type = op->type, .swizzle = swizzle, })); goto retry; } break; } case SWS_OP_SCALE: { const int factor2 = exact_log2_q(op->c.q); /* No-op scaling */ if (op->c.q.num == 1 && op->c.q.den == 1) { ff_sws_op_list_remove_at(ops, n, 1); goto retry; } /* Scaling by integer before conversion to int */ if (op->c.q.den == 1 && next->op == SWS_OP_CONVERT && ff_sws_pixel_type_is_int(next->convert.to)) { op->type = next->convert.to; FFSWAP(SwsOp, *op, *next); goto retry; } /* Scaling by exact power of two */ if (factor2 && ff_sws_pixel_type_is_int(op->type)) { op->op = factor2 > 0 ? SWS_OP_LSHIFT : SWS_OP_RSHIFT; op->c.u = FFABS(factor2); goto retry; } break; } } /* No optimization triggered, move on to next operation */ n++; } return 0; } int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[], int size, uint8_t clear_val, int *read_bytes, int *write_bytes) { const SwsOp read = ops->ops[0]; const int read_size = ff_sws_pixel_type_size(read.type); uint32_t mask[4] = {0}; if (!ops->num_ops || read.op != SWS_OP_READ) return AVERROR(EINVAL); if (read.rw.frac || (!read.rw.packed && read.rw.elems > 1)) return AVERROR(ENOTSUP); for (int i = 0; i < read.rw.elems; i++) mask[i] = 0x01010101 * i * read_size + 0x03020100; for (int opidx = 1; opidx < ops->num_ops; opidx++) { const SwsOp *op = &ops->ops[opidx]; switch (op->op) { case SWS_OP_SWIZZLE: { uint32_t orig[4] = { mask[0], mask[1], mask[2], mask[3] }; for (int i = 0; i < 4; i++) mask[i] = orig[op->swizzle.in[i]]; break; } case SWS_OP_SWAP_BYTES: for (int i = 0; i < 4; i++) { switch (ff_sws_pixel_type_size(op->type)) { case 2: mask[i] = av_bswap16(mask[i]); break; case 4: mask[i] = av_bswap32(mask[i]); break; } } break; case SWS_OP_CLEAR: for (int i = 0; i < 4; i++) { if (!op->c.q4[i].den) continue; if (op->c.q4[i].num != 0 || !clear_val) return AVERROR(ENOTSUP); mask[i] = 0x1010101ul * clear_val; } break; case SWS_OP_CONVERT: { if (!op->convert.expand) return AVERROR(ENOTSUP); for (int i = 0; i < 4; i++) { switch (ff_sws_pixel_type_size(op->type)) { case 1: mask[i] = 0x01010101 * (mask[i] & 0xFF); break; case 2: mask[i] = 0x00010001 * (mask[i] & 0xFFFF); break; } } break; } case SWS_OP_WRITE: { if (op->rw.frac || (!op->rw.packed && op->rw.elems > 1)) return AVERROR(ENOTSUP); /* Initialize to no-op */ memset(shuffle, clear_val, size); const int write_size = ff_sws_pixel_type_size(op->type); const int read_chunk = read.rw.elems * read_size; const int write_chunk = op->rw.elems * write_size; const int num_groups = size / FFMAX(read_chunk, write_chunk); for (int n = 0; n < num_groups; n++) { const int base_in = n * read_chunk; const int base_out = n * write_chunk; for (int i = 0; i < op->rw.elems; i++) { const int offset = base_out + i * write_size; for (int b = 0; b < write_size; b++) { const uint8_t idx = mask[i] >> (b * 8); if (idx != clear_val) shuffle[offset + b] = base_in + idx; } } } *read_bytes = num_groups * read_chunk; *write_bytes = num_groups * write_chunk; return num_groups; } default: return AVERROR(ENOTSUP); } } return AVERROR(EINVAL); }