1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-12-25 22:17:24 +02:00
Files
FFmpeg/libavcodec/vulkan/dpx_unpack.comp
Ruikai Peng c48b8ebbbb avcodec/vulkan: fix DPX unpack offset
The DPX Vulkan unpack shader computes a word offset as

    uint off = (line_off + pix_off >> 5);

Due to GLSL operator precedence this is evaluated as
line_off + (pix_off >> 5) rather than (line_off + pix_off) >> 5.
Since line_off is in bits while off is a 32-bit word index,
scanlines beyond y=0 use an inflated offset and the shader reads
past the end of the DPX slice buffer.

Parenthesize the expression so that the sum is shifted as intended:

    uint off = (line_off + pix_off) >> 5;

This corrects the unpacked data and removes the CRC mismatch
observed between the software and Vulkan DPX decoders for
mispacked 12-bit DPX samples. The GPU OOB read itself is only
observable indirectly via this corruption since it occurs inside
the shader.

Repro on x86_64 with Vulkan/llvmpipe (531ce713a0):

    ./configure --cc=clang --disable-optimizations --disable-stripping \
        --enable-debug=3 --disable-doc --disable-ffplay \
        --enable-vulkan --enable-libshaderc \
        --enable-hwaccel=dpx_vulkan \
        --extra-cflags='-fsanitize=address -fno-omit-frame-pointer' \
        --extra-ldflags='-fsanitize=address' && make

    VK_ICD_FILENAMES=/usr/share/vulkan/icd.d/lvp_icd.json

PoC: packed 12-bit DPX with the packing flag cleared so the unpack
shader runs (4x64 gbrp12le), e.g. poc12_packed0.dpx.

Software decode:

    ./ffmpeg -v error -i poc12_packed0.dpx -f framecrc -
    -> 0, ..., 1536, 0x26cf81c2

Vulkan hwaccel decode:

    VK_ICD_FILENAMES=/usr/share/vulkan/icd.d/lvp_icd.json \
    ./ffmpeg -v error -init_hw_device vulkan \
        -hwaccel vulkan -hwaccel_output_format vulkan \
        -i poc12_packed0.dpx \
        -vf hwdownload,format=gbrp12le -f framecrc -
    -> 0, ..., 1536, 0x71e10a51

The only difference between the two runs is the Vulkan unpack
shader, and the stable CRC mismatch indicates that it is reading
past the intended DPX slice region.

Regression since: 531ce713a0
Found-by: Pwno
2025-12-12 20:13:16 +00:00

87 lines
2.4 KiB
Plaintext

/*
* Copyright (c) 2025 Lynne <dev@lynne.ee>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
uint32_t read_data(uint off)
{
#ifdef BIG_ENDIAN
return reverse4(data[off]);
#else
return data[off];
#endif
}
#ifdef PACKED_10BIT
i16vec4 parse_packed_in_32(ivec2 pos, int stride)
{
uint32_t d = read_data(pos.y*stride + pos.x);
i16vec4 v;
d = d << 10 | d >> 22 & 0x3FFFFF;
v[0] = int16_t(d & 0x3FF);
d = d << 10 | d >> 22 & 0x3FFFFF;
v[1] = int16_t(d & 0x3FF);
d = d << 10 | d >> 22 & 0x3FFFFF;
v[2] = int16_t(d & 0x3FF);
v[3] = int16_t(0);
return v;
}
#else
i16vec4 parse_packed_in_32(ivec2 pos, int stride)
{
uint line_size = stride*BITS_PER_COMP*COMPONENTS;
line_size += line_size & 31;
line_size += need_align << 3;
uint line_off = pos.y*line_size;
uint pix_off = pos.x*BITS_PER_COMP*COMPONENTS;
uint off = (line_off + pix_off) >> 5;
uint bit = pix_off & 0x1f;
uint32_t d0 = read_data(off + 0);
uint32_t d1 = read_data(off + 1);
uint64_t combined = (uint64_t(d1) << 32) | d0;
combined >>= bit;
return i16vec4(combined,
combined >> (BITS_PER_COMP*1),
combined >> (BITS_PER_COMP*2),
combined >> (BITS_PER_COMP*3)) &
int16_t((1 << BITS_PER_COMP) - 1);
}
#endif
void main(void)
{
ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
if (!IS_WITHIN(pos, imageSize(dst[0])))
return;
i16vec4 p = parse_packed_in_32(pos, imageSize(dst[0]).x);
#if NB_IMAGES == 1
imageStore(dst[0], pos, p);
#else
const ivec4 fmt_lut = COMPONENTS == 1 ? ivec4(0) : ivec4(2, 0, 1, 3);
for (uint i = 0; i < COMPONENTS; i++)
imageStore(dst[fmt_lut[i]], pos, i16vec4(p[i]));
#endif
}