mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
dsputil_mmx: fix incorrect assembly code
In ff_put_pixels_clamped_mmx(), there are two assembly code blocks. In the first block (in the unrolled loop), the instructions "movq 8%3, %%mm1 \n\t", and so forth, have problems. From above instruction, it is clear what the programmer wants: a load from p + 8. But this assembly code doesn’t guarantee that. It only works if the compiler puts p in a register to produce an instruction like this: "movq 8(%edi), %mm1". During compiler optimization, it is possible that the compiler will be able to constant propagate into p. Suppose p = &x[10000]. Then operand 3 can become 10000(%edi), where %edi holds &x. And the instruction becomes "movq 810000(%edx)". That is, it will stride by 810000 instead of 8. This will cause a segmentation fault. This error was fixed in the second block of the assembly code, but not in the unrolled loop. How to reproduce: This error is exposed when we build using Intel C++ Compiler, with IPO+PGO optimization enabled. Crashed when decoding an MJPEG video. Signed-off-by: Michael Niedermayer <michaelni@gmx.at> Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
This commit is contained in:
parent
6d1c9945dd
commit
845e92fd6a
@ -245,14 +245,14 @@ void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels,
|
||||
pix = pixels;
|
||||
/* unrolled loop */
|
||||
__asm__ volatile (
|
||||
"movq %3, %%mm0 \n\t"
|
||||
"movq 8%3, %%mm1 \n\t"
|
||||
"movq 16%3, %%mm2 \n\t"
|
||||
"movq 24%3, %%mm3 \n\t"
|
||||
"movq 32%3, %%mm4 \n\t"
|
||||
"movq 40%3, %%mm5 \n\t"
|
||||
"movq 48%3, %%mm6 \n\t"
|
||||
"movq 56%3, %%mm7 \n\t"
|
||||
"movq (%3), %%mm0 \n\t"
|
||||
"movq 8(%3), %%mm1 \n\t"
|
||||
"movq 16(%3), %%mm2 \n\t"
|
||||
"movq 24(%3), %%mm3 \n\t"
|
||||
"movq 32(%3), %%mm4 \n\t"
|
||||
"movq 40(%3), %%mm5 \n\t"
|
||||
"movq 48(%3), %%mm6 \n\t"
|
||||
"movq 56(%3), %%mm7 \n\t"
|
||||
"packuswb %%mm1, %%mm0 \n\t"
|
||||
"packuswb %%mm3, %%mm2 \n\t"
|
||||
"packuswb %%mm5, %%mm4 \n\t"
|
||||
@ -262,7 +262,7 @@ void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels,
|
||||
"movq %%mm4, (%0, %1, 2) \n\t"
|
||||
"movq %%mm6, (%0, %2) \n\t"
|
||||
:: "r"(pix), "r"((x86_reg)line_size), "r"((x86_reg)line_size * 3),
|
||||
"m"(*p)
|
||||
"r"(p)
|
||||
: "memory");
|
||||
pix += line_size * 4;
|
||||
p += 32;
|
||||
|
Loading…
Reference in New Issue
Block a user