mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
x86: swscale: fix fragile memory accesses
To access data at multiple fixed offsets from a base address, this code uses a single "m" operand and code of the form "32%0", relying on the memory operand instantiation having no displacement, giving a final result of the form "32(%rax)". If the compiler uses a register and displacement, e.g. "64(%rax)", the end result becomes "3264(%rax)", which obviously does not work. Replacing the "m" operands with "r" operands allows safe addition of a displacement. In theory, multiple memory operands could use a shared base register with different index registers, "(%rax,%rbx)", potentially making more efficient use of registers. In the cases at hand, no such sharing is possible since the addresses involved are entirely unrelated. After this change, the code somewhat rudely accesses memory without using a corresponding memory operand, which in some cases can lead to unwanted "optimisations" of surrounding code. However, the original code also accesses memory not covered by a memory operand, so this is not adding any defect not already present. It is also hightly unlikely that any such optimisations could be performed here since the memory locations in questions are not accessed elsewhere in the same functions. This fixes crashes with suncc. Signed-off-by: Mans Rullgard <mans@mansr.com>
This commit is contained in:
parent
10b83cb653
commit
90540c2d5a
@ -73,25 +73,24 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr
|
||||
__asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory");
|
||||
while (s < mm_end) {
|
||||
__asm__ volatile(
|
||||
PREFETCH" 32%1 \n\t"
|
||||
"movd %1, %%mm0 \n\t"
|
||||
"punpckldq 3%1, %%mm0 \n\t"
|
||||
"movd 6%1, %%mm1 \n\t"
|
||||
"punpckldq 9%1, %%mm1 \n\t"
|
||||
"movd 12%1, %%mm2 \n\t"
|
||||
"punpckldq 15%1, %%mm2 \n\t"
|
||||
"movd 18%1, %%mm3 \n\t"
|
||||
"punpckldq 21%1, %%mm3 \n\t"
|
||||
PREFETCH" 32(%1) \n\t"
|
||||
"movd (%1), %%mm0 \n\t"
|
||||
"punpckldq 3(%1), %%mm0 \n\t"
|
||||
"movd 6(%1), %%mm1 \n\t"
|
||||
"punpckldq 9(%1), %%mm1 \n\t"
|
||||
"movd 12(%1), %%mm2 \n\t"
|
||||
"punpckldq 15(%1), %%mm2 \n\t"
|
||||
"movd 18(%1), %%mm3 \n\t"
|
||||
"punpckldq 21(%1), %%mm3 \n\t"
|
||||
"por %%mm7, %%mm0 \n\t"
|
||||
"por %%mm7, %%mm1 \n\t"
|
||||
"por %%mm7, %%mm2 \n\t"
|
||||
"por %%mm7, %%mm3 \n\t"
|
||||
MOVNTQ" %%mm0, %0 \n\t"
|
||||
MOVNTQ" %%mm1, 8%0 \n\t"
|
||||
MOVNTQ" %%mm2, 16%0 \n\t"
|
||||
MOVNTQ" %%mm3, 24%0"
|
||||
:"=m"(*dest)
|
||||
:"m"(*s)
|
||||
MOVNTQ" %%mm0, (%0) \n\t"
|
||||
MOVNTQ" %%mm1, 8(%0) \n\t"
|
||||
MOVNTQ" %%mm2, 16(%0) \n\t"
|
||||
MOVNTQ" %%mm3, 24(%0)"
|
||||
:: "r"(dest), "r"(s)
|
||||
:"memory");
|
||||
dest += 32;
|
||||
s += 24;
|
||||
@ -138,9 +137,9 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr
|
||||
"pand "MANGLE(mask24hhhh)", %%mm5\n\t" \
|
||||
"por %%mm5, %%mm4 \n\t" \
|
||||
\
|
||||
MOVNTQ" %%mm0, %0 \n\t" \
|
||||
MOVNTQ" %%mm1, 8%0 \n\t" \
|
||||
MOVNTQ" %%mm4, 16%0"
|
||||
MOVNTQ" %%mm0, (%0) \n\t" \
|
||||
MOVNTQ" %%mm1, 8(%0) \n\t" \
|
||||
MOVNTQ" %%mm4, 16(%0)"
|
||||
|
||||
|
||||
static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
|
||||
@ -154,18 +153,17 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int sr
|
||||
mm_end = end - 31;
|
||||
while (s < mm_end) {
|
||||
__asm__ volatile(
|
||||
PREFETCH" 32%1 \n\t"
|
||||
"movq %1, %%mm0 \n\t"
|
||||
"movq 8%1, %%mm1 \n\t"
|
||||
"movq 16%1, %%mm4 \n\t"
|
||||
"movq 24%1, %%mm5 \n\t"
|
||||
PREFETCH" 32(%1) \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq 8(%1), %%mm1 \n\t"
|
||||
"movq 16(%1), %%mm4 \n\t"
|
||||
"movq 24(%1), %%mm5 \n\t"
|
||||
"movq %%mm0, %%mm2 \n\t"
|
||||
"movq %%mm1, %%mm3 \n\t"
|
||||
"movq %%mm4, %%mm6 \n\t"
|
||||
"movq %%mm5, %%mm7 \n\t"
|
||||
STORE_BGR24_MMX
|
||||
:"=m"(*dest)
|
||||
:"m"(*s)
|
||||
:: "r"(dest), "r"(s)
|
||||
:"memory");
|
||||
dest += 24;
|
||||
s += 32;
|
||||
@ -198,19 +196,18 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_s
|
||||
mm_end = end - 15;
|
||||
while (s<mm_end) {
|
||||
__asm__ volatile(
|
||||
PREFETCH" 32%1 \n\t"
|
||||
"movq %1, %%mm0 \n\t"
|
||||
"movq 8%1, %%mm2 \n\t"
|
||||
PREFETCH" 32(%1) \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq 8(%1), %%mm2 \n\t"
|
||||
"movq %%mm0, %%mm1 \n\t"
|
||||
"movq %%mm2, %%mm3 \n\t"
|
||||
"pand %%mm4, %%mm0 \n\t"
|
||||
"pand %%mm4, %%mm2 \n\t"
|
||||
"paddw %%mm1, %%mm0 \n\t"
|
||||
"paddw %%mm3, %%mm2 \n\t"
|
||||
MOVNTQ" %%mm0, %0 \n\t"
|
||||
MOVNTQ" %%mm2, 8%0"
|
||||
:"=m"(*d)
|
||||
:"m"(*s)
|
||||
MOVNTQ" %%mm0, (%0) \n\t"
|
||||
MOVNTQ" %%mm2, 8(%0)"
|
||||
:: "r"(d), "r"(s)
|
||||
);
|
||||
d+=16;
|
||||
s+=16;
|
||||
@ -243,9 +240,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s
|
||||
mm_end = end - 15;
|
||||
while (s<mm_end) {
|
||||
__asm__ volatile(
|
||||
PREFETCH" 32%1 \n\t"
|
||||
"movq %1, %%mm0 \n\t"
|
||||
"movq 8%1, %%mm2 \n\t"
|
||||
PREFETCH" 32(%1) \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq 8(%1), %%mm2 \n\t"
|
||||
"movq %%mm0, %%mm1 \n\t"
|
||||
"movq %%mm2, %%mm3 \n\t"
|
||||
"psrlq $1, %%mm0 \n\t"
|
||||
@ -256,10 +253,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s
|
||||
"pand %%mm6, %%mm3 \n\t"
|
||||
"por %%mm1, %%mm0 \n\t"
|
||||
"por %%mm3, %%mm2 \n\t"
|
||||
MOVNTQ" %%mm0, %0 \n\t"
|
||||
MOVNTQ" %%mm2, 8%0"
|
||||
:"=m"(*d)
|
||||
:"m"(*s)
|
||||
MOVNTQ" %%mm0, (%0) \n\t"
|
||||
MOVNTQ" %%mm2, 8(%0)"
|
||||
:: "r"(d), "r"(s)
|
||||
);
|
||||
d+=16;
|
||||
s+=16;
|
||||
@ -344,11 +340,11 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr
|
||||
mm_end = end - 15;
|
||||
while (s < mm_end) {
|
||||
__asm__ volatile(
|
||||
PREFETCH" 32%1 \n\t"
|
||||
"movd %1, %%mm0 \n\t"
|
||||
"movd 4%1, %%mm3 \n\t"
|
||||
"punpckldq 8%1, %%mm0 \n\t"
|
||||
"punpckldq 12%1, %%mm3 \n\t"
|
||||
PREFETCH" 32(%1) \n\t"
|
||||
"movd (%1), %%mm0 \n\t"
|
||||
"movd 4(%1), %%mm3 \n\t"
|
||||
"punpckldq 8(%1), %%mm0 \n\t"
|
||||
"punpckldq 12(%1), %%mm3 \n\t"
|
||||
"movq %%mm0, %%mm1 \n\t"
|
||||
"movq %%mm0, %%mm2 \n\t"
|
||||
"movq %%mm3, %%mm4 \n\t"
|
||||
@ -371,8 +367,8 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr
|
||||
"por %%mm5, %%mm3 \n\t"
|
||||
"psllq $16, %%mm3 \n\t"
|
||||
"por %%mm3, %%mm0 \n\t"
|
||||
MOVNTQ" %%mm0, %0 \n\t"
|
||||
:"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
|
||||
MOVNTQ" %%mm0, (%0) \n\t"
|
||||
:: "r"(d),"r"(s),"m"(blue_16mask):"memory");
|
||||
d += 4;
|
||||
s += 16;
|
||||
}
|
||||
@ -449,11 +445,11 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr
|
||||
mm_end = end - 15;
|
||||
while (s < mm_end) {
|
||||
__asm__ volatile(
|
||||
PREFETCH" 32%1 \n\t"
|
||||
"movd %1, %%mm0 \n\t"
|
||||
"movd 4%1, %%mm3 \n\t"
|
||||
"punpckldq 8%1, %%mm0 \n\t"
|
||||
"punpckldq 12%1, %%mm3 \n\t"
|
||||
PREFETCH" 32(%1) \n\t"
|
||||
"movd (%1), %%mm0 \n\t"
|
||||
"movd 4(%1), %%mm3 \n\t"
|
||||
"punpckldq 8(%1), %%mm0 \n\t"
|
||||
"punpckldq 12(%1), %%mm3 \n\t"
|
||||
"movq %%mm0, %%mm1 \n\t"
|
||||
"movq %%mm0, %%mm2 \n\t"
|
||||
"movq %%mm3, %%mm4 \n\t"
|
||||
@ -476,8 +472,8 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr
|
||||
"por %%mm5, %%mm3 \n\t"
|
||||
"psllq $16, %%mm3 \n\t"
|
||||
"por %%mm3, %%mm0 \n\t"
|
||||
MOVNTQ" %%mm0, %0 \n\t"
|
||||
:"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
|
||||
MOVNTQ" %%mm0, (%0) \n\t"
|
||||
::"r"(d),"r"(s),"m"(blue_15mask):"memory");
|
||||
d += 4;
|
||||
s += 16;
|
||||
}
|
||||
@ -504,11 +500,11 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr
|
||||
mm_end = end - 11;
|
||||
while (s < mm_end) {
|
||||
__asm__ volatile(
|
||||
PREFETCH" 32%1 \n\t"
|
||||
"movd %1, %%mm0 \n\t"
|
||||
"movd 3%1, %%mm3 \n\t"
|
||||
"punpckldq 6%1, %%mm0 \n\t"
|
||||
"punpckldq 9%1, %%mm3 \n\t"
|
||||
PREFETCH" 32(%1) \n\t"
|
||||
"movd (%1), %%mm0 \n\t"
|
||||
"movd 3(%1), %%mm3 \n\t"
|
||||
"punpckldq 6(%1), %%mm0 \n\t"
|
||||
"punpckldq 9(%1), %%mm3 \n\t"
|
||||
"movq %%mm0, %%mm1 \n\t"
|
||||
"movq %%mm0, %%mm2 \n\t"
|
||||
"movq %%mm3, %%mm4 \n\t"
|
||||
@ -531,8 +527,8 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr
|
||||
"por %%mm5, %%mm3 \n\t"
|
||||
"psllq $16, %%mm3 \n\t"
|
||||
"por %%mm3, %%mm0 \n\t"
|
||||
MOVNTQ" %%mm0, %0 \n\t"
|
||||
:"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
|
||||
MOVNTQ" %%mm0, (%0) \n\t"
|
||||
::"r"(d),"r"(s),"m"(blue_16mask):"memory");
|
||||
d += 4;
|
||||
s += 12;
|
||||
}
|
||||
@ -561,11 +557,11 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s
|
||||
mm_end = end - 15;
|
||||
while (s < mm_end) {
|
||||
__asm__ volatile(
|
||||
PREFETCH" 32%1 \n\t"
|
||||
"movd %1, %%mm0 \n\t"
|
||||
"movd 3%1, %%mm3 \n\t"
|
||||
"punpckldq 6%1, %%mm0 \n\t"
|
||||
"punpckldq 9%1, %%mm3 \n\t"
|
||||
PREFETCH" 32(%1) \n\t"
|
||||
"movd (%1), %%mm0 \n\t"
|
||||
"movd 3(%1), %%mm3 \n\t"
|
||||
"punpckldq 6(%1), %%mm0 \n\t"
|
||||
"punpckldq 9(%1), %%mm3 \n\t"
|
||||
"movq %%mm0, %%mm1 \n\t"
|
||||
"movq %%mm0, %%mm2 \n\t"
|
||||
"movq %%mm3, %%mm4 \n\t"
|
||||
@ -588,8 +584,8 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s
|
||||
"por %%mm5, %%mm3 \n\t"
|
||||
"psllq $16, %%mm3 \n\t"
|
||||
"por %%mm3, %%mm0 \n\t"
|
||||
MOVNTQ" %%mm0, %0 \n\t"
|
||||
:"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
|
||||
MOVNTQ" %%mm0, (%0) \n\t"
|
||||
::"r"(d),"r"(s),"m"(blue_16mask):"memory");
|
||||
d += 4;
|
||||
s += 12;
|
||||
}
|
||||
@ -618,11 +614,11 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr
|
||||
mm_end = end - 11;
|
||||
while (s < mm_end) {
|
||||
__asm__ volatile(
|
||||
PREFETCH" 32%1 \n\t"
|
||||
"movd %1, %%mm0 \n\t"
|
||||
"movd 3%1, %%mm3 \n\t"
|
||||
"punpckldq 6%1, %%mm0 \n\t"
|
||||
"punpckldq 9%1, %%mm3 \n\t"
|
||||
PREFETCH" 32(%1) \n\t"
|
||||
"movd (%1), %%mm0 \n\t"
|
||||
"movd 3(%1), %%mm3 \n\t"
|
||||
"punpckldq 6(%1), %%mm0 \n\t"
|
||||
"punpckldq 9(%1), %%mm3 \n\t"
|
||||
"movq %%mm0, %%mm1 \n\t"
|
||||
"movq %%mm0, %%mm2 \n\t"
|
||||
"movq %%mm3, %%mm4 \n\t"
|
||||
@ -645,8 +641,8 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr
|
||||
"por %%mm5, %%mm3 \n\t"
|
||||
"psllq $16, %%mm3 \n\t"
|
||||
"por %%mm3, %%mm0 \n\t"
|
||||
MOVNTQ" %%mm0, %0 \n\t"
|
||||
:"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
|
||||
MOVNTQ" %%mm0, (%0) \n\t"
|
||||
::"r"(d),"r"(s),"m"(blue_15mask):"memory");
|
||||
d += 4;
|
||||
s += 12;
|
||||
}
|
||||
@ -675,11 +671,11 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s
|
||||
mm_end = end - 15;
|
||||
while (s < mm_end) {
|
||||
__asm__ volatile(
|
||||
PREFETCH" 32%1 \n\t"
|
||||
"movd %1, %%mm0 \n\t"
|
||||
"movd 3%1, %%mm3 \n\t"
|
||||
"punpckldq 6%1, %%mm0 \n\t"
|
||||
"punpckldq 9%1, %%mm3 \n\t"
|
||||
PREFETCH" 32(%1) \n\t"
|
||||
"movd (%1), %%mm0 \n\t"
|
||||
"movd 3(%1), %%mm3 \n\t"
|
||||
"punpckldq 6(%1), %%mm0 \n\t"
|
||||
"punpckldq 9(%1), %%mm3 \n\t"
|
||||
"movq %%mm0, %%mm1 \n\t"
|
||||
"movq %%mm0, %%mm2 \n\t"
|
||||
"movq %%mm3, %%mm4 \n\t"
|
||||
@ -702,8 +698,8 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s
|
||||
"por %%mm5, %%mm3 \n\t"
|
||||
"psllq $16, %%mm3 \n\t"
|
||||
"por %%mm3, %%mm0 \n\t"
|
||||
MOVNTQ" %%mm0, %0 \n\t"
|
||||
:"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
|
||||
MOVNTQ" %%mm0, (%0) \n\t"
|
||||
::"r"(d),"r"(s),"m"(blue_15mask):"memory");
|
||||
d += 4;
|
||||
s += 12;
|
||||
}
|
||||
@ -749,10 +745,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
|
||||
mm_end = end - 7;
|
||||
while (s < mm_end) {
|
||||
__asm__ volatile(
|
||||
PREFETCH" 32%1 \n\t"
|
||||
"movq %1, %%mm0 \n\t"
|
||||
"movq %1, %%mm1 \n\t"
|
||||
"movq %1, %%mm2 \n\t"
|
||||
PREFETCH" 32(%1) \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq (%1), %%mm1 \n\t"
|
||||
"movq (%1), %%mm2 \n\t"
|
||||
"pand %2, %%mm0 \n\t"
|
||||
"pand %3, %%mm1 \n\t"
|
||||
"pand %4, %%mm2 \n\t"
|
||||
@ -780,9 +776,9 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
|
||||
"movq %%mm0, %%mm6 \n\t"
|
||||
"movq %%mm3, %%mm7 \n\t"
|
||||
|
||||
"movq 8%1, %%mm0 \n\t"
|
||||
"movq 8%1, %%mm1 \n\t"
|
||||
"movq 8%1, %%mm2 \n\t"
|
||||
"movq 8(%1), %%mm0 \n\t"
|
||||
"movq 8(%1), %%mm1 \n\t"
|
||||
"movq 8(%1), %%mm2 \n\t"
|
||||
"pand %2, %%mm0 \n\t"
|
||||
"pand %3, %%mm1 \n\t"
|
||||
"pand %4, %%mm2 \n\t"
|
||||
@ -808,7 +804,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
|
||||
"por %%mm5, %%mm3 \n\t"
|
||||
|
||||
:"=m"(*d)
|
||||
:"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
|
||||
:"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
|
||||
:"memory");
|
||||
/* borrowed 32 to 24 */
|
||||
__asm__ volatile(
|
||||
@ -824,8 +820,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
|
||||
|
||||
STORE_BGR24_MMX
|
||||
|
||||
:"=m"(*d)
|
||||
:"m"(*s)
|
||||
:: "r"(d), "m"(*s)
|
||||
:"memory");
|
||||
d += 24;
|
||||
s += 8;
|
||||
@ -852,10 +847,10 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
|
||||
mm_end = end - 7;
|
||||
while (s < mm_end) {
|
||||
__asm__ volatile(
|
||||
PREFETCH" 32%1 \n\t"
|
||||
"movq %1, %%mm0 \n\t"
|
||||
"movq %1, %%mm1 \n\t"
|
||||
"movq %1, %%mm2 \n\t"
|
||||
PREFETCH" 32(%1) \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq (%1), %%mm1 \n\t"
|
||||
"movq (%1), %%mm2 \n\t"
|
||||
"pand %2, %%mm0 \n\t"
|
||||
"pand %3, %%mm1 \n\t"
|
||||
"pand %4, %%mm2 \n\t"
|
||||
@ -883,9 +878,9 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
|
||||
"movq %%mm0, %%mm6 \n\t"
|
||||
"movq %%mm3, %%mm7 \n\t"
|
||||
|
||||
"movq 8%1, %%mm0 \n\t"
|
||||
"movq 8%1, %%mm1 \n\t"
|
||||
"movq 8%1, %%mm2 \n\t"
|
||||
"movq 8(%1), %%mm0 \n\t"
|
||||
"movq 8(%1), %%mm1 \n\t"
|
||||
"movq 8(%1), %%mm2 \n\t"
|
||||
"pand %2, %%mm0 \n\t"
|
||||
"pand %3, %%mm1 \n\t"
|
||||
"pand %4, %%mm2 \n\t"
|
||||
@ -910,7 +905,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
|
||||
"por %%mm4, %%mm3 \n\t"
|
||||
"por %%mm5, %%mm3 \n\t"
|
||||
:"=m"(*d)
|
||||
:"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
|
||||
:"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
|
||||
:"memory");
|
||||
/* borrowed 32 to 24 */
|
||||
__asm__ volatile(
|
||||
@ -926,8 +921,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
|
||||
|
||||
STORE_BGR24_MMX
|
||||
|
||||
:"=m"(*d)
|
||||
:"m"(*s)
|
||||
:: "r"(d), "m"(*s)
|
||||
:"memory");
|
||||
d += 24;
|
||||
s += 8;
|
||||
@ -959,8 +953,8 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
|
||||
"movq %%mm0, %%mm3 \n\t" \
|
||||
"punpcklwd %%mm2, %%mm0 \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \
|
||||
"punpckhwd %%mm2, %%mm3 \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \
|
||||
MOVNTQ" %%mm0, %0 \n\t" \
|
||||
MOVNTQ" %%mm3, 8%0 \n\t" \
|
||||
MOVNTQ" %%mm0, (%0) \n\t" \
|
||||
MOVNTQ" %%mm3, 8(%0) \n\t" \
|
||||
|
||||
static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size)
|
||||
{
|
||||
@ -975,10 +969,10 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
|
||||
mm_end = end - 3;
|
||||
while (s < mm_end) {
|
||||
__asm__ volatile(
|
||||
PREFETCH" 32%1 \n\t"
|
||||
"movq %1, %%mm0 \n\t"
|
||||
"movq %1, %%mm1 \n\t"
|
||||
"movq %1, %%mm2 \n\t"
|
||||
PREFETCH" 32(%1) \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq (%1), %%mm1 \n\t"
|
||||
"movq (%1), %%mm2 \n\t"
|
||||
"pand %2, %%mm0 \n\t"
|
||||
"pand %3, %%mm1 \n\t"
|
||||
"pand %4, %%mm2 \n\t"
|
||||
@ -986,8 +980,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
|
||||
"psrlq $2, %%mm1 \n\t"
|
||||
"psrlq $7, %%mm2 \n\t"
|
||||
PACK_RGB32
|
||||
:"=m"(*d)
|
||||
:"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
|
||||
::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
|
||||
:"memory");
|
||||
d += 16;
|
||||
s += 4;
|
||||
@ -1017,10 +1010,10 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
|
||||
mm_end = end - 3;
|
||||
while (s < mm_end) {
|
||||
__asm__ volatile(
|
||||
PREFETCH" 32%1 \n\t"
|
||||
"movq %1, %%mm0 \n\t"
|
||||
"movq %1, %%mm1 \n\t"
|
||||
"movq %1, %%mm2 \n\t"
|
||||
PREFETCH" 32(%1) \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq (%1), %%mm1 \n\t"
|
||||
"movq (%1), %%mm2 \n\t"
|
||||
"pand %2, %%mm0 \n\t"
|
||||
"pand %3, %%mm1 \n\t"
|
||||
"pand %4, %%mm2 \n\t"
|
||||
@ -1028,8 +1021,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
|
||||
"psrlq $3, %%mm1 \n\t"
|
||||
"psrlq $8, %%mm2 \n\t"
|
||||
PACK_RGB32
|
||||
:"=m"(*d)
|
||||
:"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
|
||||
::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
|
||||
:"memory");
|
||||
d += 16;
|
||||
s += 4;
|
||||
@ -1957,8 +1949,8 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
|
||||
int srcStride1, int srcStride2,
|
||||
int dstStride1, int dstStride2)
|
||||
{
|
||||
x86_reg y;
|
||||
int x,w,h;
|
||||
x86_reg x, y;
|
||||
int w,h;
|
||||
w=width/2; h=height/2;
|
||||
__asm__ volatile(
|
||||
PREFETCH" %0 \n\t"
|
||||
@ -1970,11 +1962,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
|
||||
x=0;
|
||||
for (;x<w-31;x+=32) {
|
||||
__asm__ volatile(
|
||||
PREFETCH" 32%1 \n\t"
|
||||
"movq %1, %%mm0 \n\t"
|
||||
"movq 8%1, %%mm2 \n\t"
|
||||
"movq 16%1, %%mm4 \n\t"
|
||||
"movq 24%1, %%mm6 \n\t"
|
||||
PREFETCH" 32(%1,%2) \n\t"
|
||||
"movq (%1,%2), %%mm0 \n\t"
|
||||
"movq 8(%1,%2), %%mm2 \n\t"
|
||||
"movq 16(%1,%2), %%mm4 \n\t"
|
||||
"movq 24(%1,%2), %%mm6 \n\t"
|
||||
"movq %%mm0, %%mm1 \n\t"
|
||||
"movq %%mm2, %%mm3 \n\t"
|
||||
"movq %%mm4, %%mm5 \n\t"
|
||||
@ -1987,16 +1979,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
|
||||
"punpckhbw %%mm5, %%mm5 \n\t"
|
||||
"punpcklbw %%mm6, %%mm6 \n\t"
|
||||
"punpckhbw %%mm7, %%mm7 \n\t"
|
||||
MOVNTQ" %%mm0, %0 \n\t"
|
||||
MOVNTQ" %%mm1, 8%0 \n\t"
|
||||
MOVNTQ" %%mm2, 16%0 \n\t"
|
||||
MOVNTQ" %%mm3, 24%0 \n\t"
|
||||
MOVNTQ" %%mm4, 32%0 \n\t"
|
||||
MOVNTQ" %%mm5, 40%0 \n\t"
|
||||
MOVNTQ" %%mm6, 48%0 \n\t"
|
||||
MOVNTQ" %%mm7, 56%0"
|
||||
:"=m"(d[2*x])
|
||||
:"m"(s1[x])
|
||||
MOVNTQ" %%mm0, (%0,%2,2) \n\t"
|
||||
MOVNTQ" %%mm1, 8(%0,%2,2) \n\t"
|
||||
MOVNTQ" %%mm2, 16(%0,%2,2) \n\t"
|
||||
MOVNTQ" %%mm3, 24(%0,%2,2) \n\t"
|
||||
MOVNTQ" %%mm4, 32(%0,%2,2) \n\t"
|
||||
MOVNTQ" %%mm5, 40(%0,%2,2) \n\t"
|
||||
MOVNTQ" %%mm6, 48(%0,%2,2) \n\t"
|
||||
MOVNTQ" %%mm7, 56(%0,%2,2)"
|
||||
:: "r"(d), "r"(s1), "r"(x)
|
||||
:"memory");
|
||||
}
|
||||
for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
|
||||
@ -2007,11 +1998,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
|
||||
x=0;
|
||||
for (;x<w-31;x+=32) {
|
||||
__asm__ volatile(
|
||||
PREFETCH" 32%1 \n\t"
|
||||
"movq %1, %%mm0 \n\t"
|
||||
"movq 8%1, %%mm2 \n\t"
|
||||
"movq 16%1, %%mm4 \n\t"
|
||||
"movq 24%1, %%mm6 \n\t"
|
||||
PREFETCH" 32(%1,%2) \n\t"
|
||||
"movq (%1,%2), %%mm0 \n\t"
|
||||
"movq 8(%1,%2), %%mm2 \n\t"
|
||||
"movq 16(%1,%2), %%mm4 \n\t"
|
||||
"movq 24(%1,%2), %%mm6 \n\t"
|
||||
"movq %%mm0, %%mm1 \n\t"
|
||||
"movq %%mm2, %%mm3 \n\t"
|
||||
"movq %%mm4, %%mm5 \n\t"
|
||||
@ -2024,16 +2015,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
|
||||
"punpckhbw %%mm5, %%mm5 \n\t"
|
||||
"punpcklbw %%mm6, %%mm6 \n\t"
|
||||
"punpckhbw %%mm7, %%mm7 \n\t"
|
||||
MOVNTQ" %%mm0, %0 \n\t"
|
||||
MOVNTQ" %%mm1, 8%0 \n\t"
|
||||
MOVNTQ" %%mm2, 16%0 \n\t"
|
||||
MOVNTQ" %%mm3, 24%0 \n\t"
|
||||
MOVNTQ" %%mm4, 32%0 \n\t"
|
||||
MOVNTQ" %%mm5, 40%0 \n\t"
|
||||
MOVNTQ" %%mm6, 48%0 \n\t"
|
||||
MOVNTQ" %%mm7, 56%0"
|
||||
:"=m"(d[2*x])
|
||||
:"m"(s2[x])
|
||||
MOVNTQ" %%mm0, (%0,%2,2) \n\t"
|
||||
MOVNTQ" %%mm1, 8(%0,%2,2) \n\t"
|
||||
MOVNTQ" %%mm2, 16(%0,%2,2) \n\t"
|
||||
MOVNTQ" %%mm3, 24(%0,%2,2) \n\t"
|
||||
MOVNTQ" %%mm4, 32(%0,%2,2) \n\t"
|
||||
MOVNTQ" %%mm5, 40(%0,%2,2) \n\t"
|
||||
MOVNTQ" %%mm6, 48(%0,%2,2) \n\t"
|
||||
MOVNTQ" %%mm7, 56(%0,%2,2)"
|
||||
:: "r"(d), "r"(s2), "r"(x)
|
||||
:"memory");
|
||||
}
|
||||
for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];
|
||||
|
Loading…
Reference in New Issue
Block a user