1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-11-23 21:54:53 +02:00
Files
FFmpeg/libavfilter/vulkan/bwdif.comp
Lynne 779763181f bwdif_vulkan: convert to storage images
texture() uses bilinear scaling; imageLoad() accesses the image directly.
The reason why texture() was used throughout Vulkan filters is that
back when they were written, they were targetting old Intel hardware,
which had a texel cache only for sampled images.

These days, GPUs have a generic cache that doesn't care what source it
gets populated with. Additionally, bypassing the sampling circuitry saves
us some performance.

Finally, all the old texture() code had an issue where unnormalized
coordinates were used, but an offset of 0.5 was not added, hence each
pixel ended up being interpolated. This fixes this.
2025-02-18 10:44:53 +01:00

123 lines
4.6 KiB
Plaintext

/*
* Copyright (c) Lynne
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
const vec4 coef_lf[2] = { vec4(4309), vec4(213), };
const vec4 coef_hf[3] = { vec4(5570), vec4(3801), vec4(1016) };
const vec4 coef_sp[2] = { vec4(5077), vec4(981), };
vec4 process_intra(vec4 cur[4])
{
return (coef_sp[0]*(cur[1] + cur[2]) - coef_sp[1]*(cur[0] + cur[3])) / (1 << 13);
}
void process_plane_intra(int idx, ivec2 pos)
{
vec4 dcur[4];
dcur[0] = imageLoad(cur[idx], pos - ivec2(0, 3));
dcur[1] = imageLoad(cur[idx], pos - ivec2(0, 1));
dcur[2] = imageLoad(cur[idx], pos + ivec2(0, 1));
dcur[3] = imageLoad(cur[idx], pos + ivec2(0, 3));
imageStore(dst[idx], pos, process_intra(dcur));
}
vec4 process_line(vec4 prev2[5], vec4 prev1[2], vec4 cur[4], vec4 next1[2], vec4 next2[5])
{
vec4 fc = cur[1];
vec4 fe = cur[2];
vec4 fs = prev2[2] + next2[2];
vec4 fd = fs / 2;
vec4 temp_diff[3];
temp_diff[0] = abs(prev2[2] - next2[2]);
temp_diff[1] = (abs(prev1[0] - fc) + abs(prev1[1] - fe)) / 2;
temp_diff[1] = (abs(next1[0] - fc) + abs(next1[1] - fe)) / 2;
vec4 diff = max(temp_diff[0] / 2, max(temp_diff[1], temp_diff[2]));
bvec4 diff_mask = equal(diff, vec4(0));
vec4 fbs = prev2[1] + next2[1];
vec4 ffs = prev2[3] + next2[3];
vec4 fb = (fbs / 2) - fc;
vec4 ff = (ffs / 2) - fe;
vec4 dc = fd - fc;
vec4 de = fd - fe;
vec4 mmax = max(de, max(dc, min(fb, ff)));
vec4 mmin = min(de, min(dc, max(fb, ff)));
diff = max(diff, max(mmin, -mmax));
vec4 interpolate_all = (((coef_hf[0]*(fs) - coef_hf[1]*(fbs + ffs) +
coef_hf[2]*(prev2[0] + next2[0] + prev2[4] + next2[4])) / 4) +
coef_lf[0]*(fc + fe) - coef_lf[1]*(cur[0] + cur[3])) / (1 << 13);
vec4 interpolate_cur = (coef_sp[0]*(fc + fe) - coef_sp[1]*(cur[0] + cur[3])) / (1 << 13);
bvec4 interpolate_cnd1 = greaterThan(abs(fc - fe), temp_diff[0]);
vec4 interpol = mix(interpolate_cur, interpolate_all, interpolate_cnd1);
interpol = clamp(interpol, fd - diff, fd + diff);
return mix(interpol, fd, diff_mask);
}
void process_plane(int idx, const ivec2 pos, bool filter_field,
bool is_intra, bool field_parity)
{
vec4 dcur[4];
vec4 prev1[2];
vec4 next1[2];
vec4 prev2[5];
vec4 next2[5];
dcur[0] = imageLoad(cur[idx], pos - ivec2(0, 3));
dcur[1] = imageLoad(cur[idx], pos - ivec2(0, 1));
dcur[2] = imageLoad(cur[idx], pos + ivec2(0, 1));
dcur[3] = imageLoad(cur[idx], pos + ivec2(0, 3));
prev1[0] = imageLoad(prev[idx], pos - ivec2(0, 1));
prev1[1] = imageLoad(prev[idx], pos + ivec2(0, 1));
next1[0] = imageLoad(next[idx], pos - ivec2(0, 1));
next1[1] = imageLoad(next[idx], pos + ivec2(0, 1));
if (field_parity) {
prev2[0] = imageLoad(prev[idx], pos - ivec2(0, 4));
prev2[1] = imageLoad(prev[idx], pos - ivec2(0, 2));
prev2[2] = imageLoad(prev[idx], pos);
prev2[3] = imageLoad(prev[idx], pos + ivec2(0, 2));
prev2[4] = imageLoad(prev[idx], pos + ivec2(0, 4));
next2[0] = imageLoad(cur[idx], pos - ivec2(0, 4));
next2[1] = imageLoad(cur[idx], pos - ivec2(0, 2));
next2[2] = imageLoad(cur[idx], pos);
next2[3] = imageLoad(cur[idx], pos + ivec2(0, 2));
next2[4] = imageLoad(cur[idx], pos + ivec2(0, 4));
} else {
prev2[0] = imageLoad(cur[idx], pos - ivec2(0, 4));
prev2[1] = imageLoad(cur[idx], pos - ivec2(0, 2));
prev2[2] = imageLoad(cur[idx], pos);
prev2[3] = imageLoad(cur[idx], pos + ivec2(0, 2));
prev2[4] = imageLoad(cur[idx], pos + ivec2(0, 4));
next2[0] = imageLoad(next[idx], pos - ivec2(0, 4));
next2[1] = imageLoad(next[idx], pos - ivec2(0, 2));
next2[2] = imageLoad(next[idx], pos);
next2[3] = imageLoad(next[idx], pos + ivec2(0, 2));
next2[4] = imageLoad(next[idx], pos + ivec2(0, 4));
}
imageStore(dst[idx], pos, process_line(prev2, prev1, dcur, next1, next2));
}