mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
Fix png decoding on x86.
Line sizes are only 8-byte aligned, so use unaliged loads for add_bytes_l2 pointers. Increasing the alignment requirement to 16 seemed a bit extreme (png may be used for rather small sizes). Also fix a mov that had its arguments swapped, leading add_bytes_l2 being applied on up to 8 bytes too few. Signed-off-by: Reimar Döffinger <Reimar.Doeffinger@gmx.de>
This commit is contained in:
parent
da1ba4e88b
commit
58dabf7bf2
@ -26,8 +26,8 @@
|
||||
|
||||
typedef struct PNGDSPContext {
|
||||
void (*add_bytes_l2)(uint8_t *dst /* align 16 */,
|
||||
uint8_t *src1 /* align 16 */,
|
||||
uint8_t *src2 /* align 16 */, int w);
|
||||
uint8_t *src1,
|
||||
uint8_t *src2, int w);
|
||||
|
||||
/* this might write to dst[w] */
|
||||
void (*add_paeth_prediction)(uint8_t *dst, uint8_t *src,
|
||||
|
@ -43,12 +43,12 @@ cglobal add_bytes_l2, 4, 6, %1, dst, src1, src2, wa, w, i
|
||||
and waq, ~(mmsize*2-1)
|
||||
jmp .end_v
|
||||
.loop_v:
|
||||
mova m0, [src1q+iq]
|
||||
mova m1, [src1q+iq+mmsize]
|
||||
paddb m0, [src2q+iq]
|
||||
paddb m1, [src2q+iq+mmsize]
|
||||
mova [dstq+iq ], m0
|
||||
mova [dstq+iq+mmsize], m1
|
||||
movu m0, [src2q+iq]
|
||||
movu m1, [src2q+iq+mmsize]
|
||||
paddb m0, [src1q+iq]
|
||||
paddb m1, [src1q+iq+mmsize]
|
||||
movu [dstq+iq ], m0
|
||||
movu [dstq+iq+mmsize], m1
|
||||
add iq, mmsize*2
|
||||
.end_v:
|
||||
cmp iq, waq
|
||||
@ -56,12 +56,12 @@ cglobal add_bytes_l2, 4, 6, %1, dst, src1, src2, wa, w, i
|
||||
|
||||
%if mmsize == 16
|
||||
; vector loop
|
||||
mov wq, waq
|
||||
mov waq, wq
|
||||
and waq, ~7
|
||||
jmp .end_l
|
||||
.loop_l:
|
||||
movq mm0, [src1q+iq]
|
||||
paddb mm0, [src2q+iq]
|
||||
movq mm0, [src2q+iq]
|
||||
paddb mm0, [src1q+iq]
|
||||
movq [dstq+iq ], mm0
|
||||
add iq, 8
|
||||
.end_l:
|
||||
|
Loading…
Reference in New Issue
Block a user