mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
spelling/grammar/wording overhaul
Originally committed as revision 27190 to svn://svn.mplayerhq.hu/mplayer/trunk/libswscale
This commit is contained in:
parent
4bdc44c7fe
commit
8a3227968c
@ -2,8 +2,8 @@
|
|||||||
* Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
|
* Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
|
||||||
* April 20, 2007
|
* April 20, 2007
|
||||||
*
|
*
|
||||||
* Blackfin Video Color Space Converters Operations
|
* Blackfin video color space converter operations
|
||||||
* convert I420 YV12 to RGB in various formats,
|
* convert I420 YV12 to RGB in various formats
|
||||||
*
|
*
|
||||||
* This file is part of FFmpeg.
|
* This file is part of FFmpeg.
|
||||||
*
|
*
|
||||||
@ -24,8 +24,8 @@
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
YUV420 to RGB565 conversion. This routine takes a YUV 420 planar macroblock
|
YUV420 to RGB565 conversion. This routine takes a YUV 420 planar macroblock
|
||||||
and converts it to RGB565. R:5 bits, G:6 bits, B:5 bits.. packed into shorts
|
and converts it to RGB565. R:5 bits, G:6 bits, B:5 bits.. packed into shorts.
|
||||||
|
|
||||||
|
|
||||||
The following calculation is used for the conversion:
|
The following calculation is used for the conversion:
|
||||||
@ -34,36 +34,36 @@ The following calculation is used for the conversion:
|
|||||||
g = clipz((y-oy)*cy + cgv*(v-128) + cgu*(u-128))
|
g = clipz((y-oy)*cy + cgv*(v-128) + cgu*(u-128))
|
||||||
b = clipz((y-oy)*cy + cbu*(u-128))
|
b = clipz((y-oy)*cy + cbu*(u-128))
|
||||||
|
|
||||||
y,u,v are pre scaled by a factor of 4 i.e. left shifted to gain precision.
|
y,u,v are prescaled by a factor of 4 i.e. left-shifted to gain precision.
|
||||||
|
|
||||||
|
|
||||||
New factorization to eliminate the truncation error which was
|
New factorization to eliminate the truncation error which was
|
||||||
occuring due to the byteop3p.
|
occurring due to the byteop3p.
|
||||||
|
|
||||||
|
|
||||||
1) use the bytop16m to subtract quad bytes we use this in U8 this
|
1) Use the bytop16m to subtract quad bytes we use this in U8 this
|
||||||
then so the offsets need to be renormalized to 8bits.
|
then so the offsets need to be renormalized to 8bits.
|
||||||
|
|
||||||
2) scale operands up by a factor of 4 not 8 because Blackfin
|
2) Scale operands up by a factor of 4 not 8 because Blackfin
|
||||||
multiplies include a shift.
|
multiplies include a shift.
|
||||||
|
|
||||||
3) compute into the accumulators cy*yx0, cy*yx1
|
3) Compute into the accumulators cy*yx0, cy*yx1.
|
||||||
|
|
||||||
4) compute each of the linear equations
|
4) Compute each of the linear equations:
|
||||||
r = clipz((y - oy) * cy + crv * (v - 128))
|
r = clipz((y - oy) * cy + crv * (v - 128))
|
||||||
|
|
||||||
g = clipz((y - oy) * cy + cgv * (v - 128) + cgu * (u - 128))
|
g = clipz((y - oy) * cy + cgv * (v - 128) + cgu * (u - 128))
|
||||||
|
|
||||||
b = clipz((y - oy) * cy + cbu * (u - 128))
|
b = clipz((y - oy) * cy + cbu * (u - 128))
|
||||||
|
|
||||||
reuse of the accumulators requires that we actually multiply
|
Reuse of the accumulators requires that we actually multiply
|
||||||
twice once with addition and the second time with a subtaction.
|
twice once with addition and the second time with a subtraction.
|
||||||
|
|
||||||
because of this we need to compute the equations in the order R B
|
Because of this we need to compute the equations in the order R B
|
||||||
then G saving the writes for B in the case of 24/32 bit color
|
then G saving the writes for B in the case of 24/32 bit color
|
||||||
formats.
|
formats.
|
||||||
|
|
||||||
api: yuv2rgb_kind (uint8_t *Y, uint8_t *U, uint8_t *V, int *out,
|
API: yuv2rgb_kind (uint8_t *Y, uint8_t *U, uint8_t *V, int *out,
|
||||||
int dW, uint32_t *coeffs);
|
int dW, uint32_t *coeffs);
|
||||||
|
|
||||||
A B
|
A B
|
||||||
@ -77,13 +77,13 @@ uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv;
|
|||||||
|
|
||||||
coeffs is a pointer to oy.
|
coeffs is a pointer to oy.
|
||||||
|
|
||||||
the {rgb} masks are only utilized by the 565 packing algorithm. Note the data
|
The {rgb} masks are only utilized by the 565 packing algorithm. Note the data
|
||||||
replication is used to simplify the internal algorithms for the dual mac architecture
|
replication is used to simplify the internal algorithms for the dual Mac
|
||||||
of BlackFin.
|
architecture of BlackFin.
|
||||||
|
|
||||||
All routines are exported with _ff_bfin_ as a symbol prefix
|
All routines are exported with _ff_bfin_ as a symbol prefix.
|
||||||
|
|
||||||
rough performance gain compared against -O3:
|
Rough performance gain compared against -O3:
|
||||||
|
|
||||||
2779809/1484290 187.28%
|
2779809/1484290 187.28%
|
||||||
|
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
/*
|
/*
|
||||||
* rgb2rgb.c, Software RGB to RGB convertor
|
* software RGB to RGB converter
|
||||||
* pluralize by Software PAL8 to RGB convertor
|
* pluralize by software PAL8 to RGB converter
|
||||||
* Software YUV to YUV convertor
|
* software YUV to YUV converter
|
||||||
* Software YUV to RGB convertor
|
* software YUV to RGB converter
|
||||||
* Written by Nick Kurshev.
|
* Written by Nick Kurshev.
|
||||||
* palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
|
* palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
|
||||||
*
|
*
|
||||||
* This file is part of FFmpeg.
|
* This file is part of FFmpeg.
|
||||||
*
|
*
|
||||||
@ -22,8 +22,8 @@
|
|||||||
* along with FFmpeg; if not, write to the Free Software
|
* along with FFmpeg; if not, write to the Free Software
|
||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
*
|
*
|
||||||
* the C code (not assembly, mmx, ...) of this file can be used
|
* The C code (not assembly, MMX, ...) of this file can be used
|
||||||
* under the LGPL license too
|
* under the LGPL license.
|
||||||
*/
|
*/
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
@ -33,7 +33,7 @@
|
|||||||
#include "swscale.h"
|
#include "swscale.h"
|
||||||
#include "swscale_internal.h"
|
#include "swscale_internal.h"
|
||||||
|
|
||||||
#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
|
#define FAST_BGR2YV12 // use 7-bit instead of 15-bit coefficients
|
||||||
|
|
||||||
void (*rgb24to32)(const uint8_t *src, uint8_t *dst, long src_size);
|
void (*rgb24to32)(const uint8_t *src, uint8_t *dst, long src_size);
|
||||||
void (*rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size);
|
void (*rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size);
|
||||||
@ -149,8 +149,8 @@ static uint64_t __attribute__((aligned(8))) dither8[2]={
|
|||||||
#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
|
#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
|
||||||
#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
|
#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
|
||||||
|
|
||||||
//Note: we have C, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
|
//Note: We have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW + MMX2 one.
|
||||||
//Plain C versions
|
//plain C versions
|
||||||
#undef HAVE_MMX
|
#undef HAVE_MMX
|
||||||
#undef HAVE_MMX2
|
#undef HAVE_MMX2
|
||||||
#undef HAVE_3DNOW
|
#undef HAVE_3DNOW
|
||||||
@ -190,10 +190,10 @@ static uint64_t __attribute__((aligned(8))) dither8[2]={
|
|||||||
#endif //ARCH_X86 || ARCH_X86_64
|
#endif //ARCH_X86 || ARCH_X86_64
|
||||||
|
|
||||||
/*
|
/*
|
||||||
rgb15->rgb16 Original by Strepto/Astral
|
RGB15->RGB16 original by Strepto/Astral
|
||||||
ported to gcc & bugfixed : A'rpi
|
ported to gcc & bugfixed : A'rpi
|
||||||
MMX2, 3DNOW optimization by Nick Kurshev
|
MMX2, 3DNOW optimization by Nick Kurshev
|
||||||
32bit c version, and and&add trick by Michael Niedermayer
|
32-bit C version, and and&add trick by Michael Niedermayer
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void sws_rgb2rgb_init(int flags){
|
void sws_rgb2rgb_init(int flags){
|
||||||
@ -266,7 +266,7 @@ void palette8torgb24(const uint8_t *src, uint8_t *dst, long num_pixels, const ui
|
|||||||
{
|
{
|
||||||
long i;
|
long i;
|
||||||
/*
|
/*
|
||||||
writes 1 byte o much and might cause alignment issues on some architectures?
|
Writes 1 byte too much and might cause alignment issues on some architectures?
|
||||||
for (i=0; i<num_pixels; i++)
|
for (i=0; i<num_pixels; i++)
|
||||||
((unsigned *)(&dst[i*3])) = ((unsigned *)palette)[src[i]];
|
((unsigned *)(&dst[i*3])) = ((unsigned *)palette)[src[i]];
|
||||||
*/
|
*/
|
||||||
@ -284,7 +284,7 @@ void palette8tobgr24(const uint8_t *src, uint8_t *dst, long num_pixels, const ui
|
|||||||
{
|
{
|
||||||
long i;
|
long i;
|
||||||
/*
|
/*
|
||||||
writes 1 byte o much and might cause alignment issues on some architectures?
|
Writes 1 byte too much and might cause alignment issues on some architectures?
|
||||||
for (i=0; i<num_pixels; i++)
|
for (i=0; i<num_pixels; i++)
|
||||||
((unsigned *)(&dst[i*3])) = ((unsigned *)palette)[src[i]];
|
((unsigned *)(&dst[i*3])) = ((unsigned *)palette)[src[i]];
|
||||||
*/
|
*/
|
||||||
@ -299,7 +299,7 @@ void palette8tobgr24(const uint8_t *src, uint8_t *dst, long num_pixels, const ui
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Palette is assumed to contain bgr16, see rgb32to16 to convert the palette
|
* Palette is assumed to contain BGR16, see rgb32to16 to convert the palette.
|
||||||
*/
|
*/
|
||||||
void palette8torgb16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
|
void palette8torgb16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
|
||||||
{
|
{
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
/*
|
/*
|
||||||
* rgb2rgb.h, Software RGB to RGB convertor
|
* software RGB to RGB converter
|
||||||
* pluralize by Software PAL8 to RGB convertor
|
* pluralize by Software PAL8 to RGB converter
|
||||||
* Software YUV to YUV convertor
|
* Software YUV to YUV converter
|
||||||
* Software YUV to RGB convertor
|
* Software YUV to RGB converter
|
||||||
* Written by Nick Kurshev.
|
* Written by Nick Kurshev.
|
||||||
* palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
|
* palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
|
||||||
*
|
*
|
||||||
@ -28,7 +28,7 @@
|
|||||||
|
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
|
|
||||||
/* A full collection of rgb to rgb(bgr) convertors */
|
/* A full collection of RGB to RGB(BGR) converters */
|
||||||
extern void (*rgb24to32) (const uint8_t *src, uint8_t *dst, long src_size);
|
extern void (*rgb24to32) (const uint8_t *src, uint8_t *dst, long src_size);
|
||||||
extern void (*rgb24to16) (const uint8_t *src, uint8_t *dst, long src_size);
|
extern void (*rgb24to16) (const uint8_t *src, uint8_t *dst, long src_size);
|
||||||
extern void (*rgb24to15) (const uint8_t *src, uint8_t *dst, long src_size);
|
extern void (*rgb24to15) (const uint8_t *src, uint8_t *dst, long src_size);
|
||||||
@ -71,53 +71,49 @@ extern void palette8torgb15(const uint8_t *src, uint8_t *dst, long num_pixels, c
|
|||||||
extern void palette8tobgr15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
|
extern void palette8tobgr15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
* Height should be a multiple of 2 and width should be a multiple of 16.
|
||||||
* height should be a multiple of 2 and width should be a multiple of 16 (if this is a
|
* (If this is a problem for anyone then tell me, and I will fix it.)
|
||||||
* problem for anyone then tell me, and ill fix it)
|
* Chrominance data is only taken from every second line, others are ignored.
|
||||||
* chrominance data is only taken from every secound line others are ignored FIXME write HQ version
|
* FIXME: Write HQ version.
|
||||||
*/
|
*/
|
||||||
//void uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
//void uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
* Height should be a multiple of 2 and width should be a multiple of 16.
|
||||||
* height should be a multiple of 2 and width should be a multiple of 16 (if this is a
|
* (If this is a problem for anyone then tell me, and I will fix it.)
|
||||||
* problem for anyone then tell me, and ill fix it)
|
|
||||||
*/
|
*/
|
||||||
extern void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
|
extern void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
|
||||||
long width, long height,
|
long width, long height,
|
||||||
long lumStride, long chromStride, long dstStride);
|
long lumStride, long chromStride, long dstStride);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
* Width should be a multiple of 16.
|
||||||
* width should be a multiple of 16
|
|
||||||
*/
|
*/
|
||||||
extern void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
|
extern void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
|
||||||
long width, long height,
|
long width, long height,
|
||||||
long lumStride, long chromStride, long dstStride);
|
long lumStride, long chromStride, long dstStride);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
* Height should be a multiple of 2 and width should be a multiple of 16.
|
||||||
* height should be a multiple of 2 and width should be a multiple of 16 (if this is a
|
* (If this is a problem for anyone then tell me, and I will fix it.)
|
||||||
* problem for anyone then tell me, and ill fix it)
|
|
||||||
*/
|
*/
|
||||||
extern void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
extern void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
||||||
long width, long height,
|
long width, long height,
|
||||||
long lumStride, long chromStride, long srcStride);
|
long lumStride, long chromStride, long srcStride);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
* Height should be a multiple of 2 and width should be a multiple of 16.
|
||||||
* height should be a multiple of 2 and width should be a multiple of 16 (if this is a
|
* (If this is a problem for anyone then tell me, and I will fix it.)
|
||||||
* problem for anyone then tell me, and ill fix it)
|
|
||||||
*/
|
*/
|
||||||
extern void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
|
extern void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
|
||||||
long width, long height,
|
long width, long height,
|
||||||
long lumStride, long chromStride, long dstStride);
|
long lumStride, long chromStride, long dstStride);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
* Height should be a multiple of 2 and width should be a multiple of 2.
|
||||||
* height should be a multiple of 2 and width should be a multiple of 2 (if this is a
|
* (If this is a problem for anyone then tell me, and I will fix it.)
|
||||||
* problem for anyone then tell me, and ill fix it)
|
* Chrominance data is only taken from every second line, others are ignored.
|
||||||
* chrominance data is only taken from every secound line others are ignored FIXME write HQ version
|
* FIXME: Write HQ version.
|
||||||
*/
|
*/
|
||||||
extern void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
extern void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
||||||
long width, long height,
|
long width, long height,
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
/*
|
/*
|
||||||
* rgb2rgb.c, Software RGB to RGB convertor
|
* software RGB to RGB converter
|
||||||
* pluralize by Software PAL8 to RGB convertor
|
* pluralize by software PAL8 to RGB converter
|
||||||
* Software YUV to YUV convertor
|
* software YUV to YUV converter
|
||||||
* Software YUV to RGB convertor
|
* software YUV to RGB converter
|
||||||
* Written by Nick Kurshev.
|
* Written by Nick Kurshev.
|
||||||
* palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
|
* palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
|
||||||
* lot of big-endian byteorder fixes by Alex Beregszaszi
|
* lot of big-endian byte order fixes by Alex Beregszaszi
|
||||||
*
|
*
|
||||||
* This file is part of FFmpeg.
|
* This file is part of FFmpeg.
|
||||||
*
|
*
|
||||||
@ -23,7 +23,7 @@
|
|||||||
* along with FFmpeg; if not, write to the Free Software
|
* along with FFmpeg; if not, write to the Free Software
|
||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
*
|
*
|
||||||
* The C code (not assembly, mmx, ...) of this file can be used
|
* The C code (not assembly, MMX, ...) of this file can be used
|
||||||
* under the LGPL license.
|
* under the LGPL license.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -229,10 +229,10 @@ static inline void RENAME(rgb32to24)(const uint8_t *src, uint8_t *dst, long src_
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Original by Strepto/Astral
|
original by Strepto/Astral
|
||||||
ported to gcc & bugfixed : A'rpi
|
ported to gcc & bugfixed: A'rpi
|
||||||
MMX2, 3DNOW optimization by Nick Kurshev
|
MMX2, 3DNOW optimization by Nick Kurshev
|
||||||
32 bit C version, and and&add trick by Michael Niedermayer
|
32-bit C version, and and&add trick by Michael Niedermayer
|
||||||
*/
|
*/
|
||||||
static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size)
|
static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size)
|
||||||
{
|
{
|
||||||
@ -926,9 +926,9 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long s
|
|||||||
----------------
|
----------------
|
||||||
1 1 0 1 1 1 1 0
|
1 1 0 1 1 1 1 0
|
||||||
|=======| |===|
|
|=======| |===|
|
||||||
| Leftmost Bits Repeated to Fill Open Bits
|
| leftmost bits repeated to fill open bits
|
||||||
|
|
|
|
||||||
Original Bits
|
original bits
|
||||||
*/
|
*/
|
||||||
static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, long src_size)
|
static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, long src_size)
|
||||||
{
|
{
|
||||||
@ -1006,7 +1006,7 @@ static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, long src_
|
|||||||
:"=m"(*d)
|
:"=m"(*d)
|
||||||
:"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
|
:"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
|
||||||
:"memory");
|
:"memory");
|
||||||
/* Borrowed 32 to 24 */
|
/* borrowed 32 to 24 */
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movq %%mm0, %%mm4 \n\t"
|
"movq %%mm0, %%mm4 \n\t"
|
||||||
"movq %%mm3, %%mm5 \n\t"
|
"movq %%mm3, %%mm5 \n\t"
|
||||||
@ -1147,7 +1147,7 @@ static inline void RENAME(rgb16to24)(const uint8_t *src, uint8_t *dst, long src_
|
|||||||
:"=m"(*d)
|
:"=m"(*d)
|
||||||
:"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
|
:"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
|
||||||
:"memory");
|
:"memory");
|
||||||
/* Borrowed 32 to 24 */
|
/* borrowed 32 to 24 */
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movq %%mm0, %%mm4 \n\t"
|
"movq %%mm0, %%mm4 \n\t"
|
||||||
"movq %%mm3, %%mm5 \n\t"
|
"movq %%mm3, %%mm5 \n\t"
|
||||||
@ -1479,7 +1479,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s
|
|||||||
asm volatile(SFENCE:::"memory");
|
asm volatile(SFENCE:::"memory");
|
||||||
asm volatile(EMMS:::"memory");
|
asm volatile(EMMS:::"memory");
|
||||||
|
|
||||||
if (mmx_size==23) return; //finihsed, was multiple of 8
|
if (mmx_size==23) return; //finished, was multiple of 8
|
||||||
|
|
||||||
src+= src_size;
|
src+= src_size;
|
||||||
dst+= src_size;
|
dst+= src_size;
|
||||||
@ -1638,8 +1638,8 @@ asm( EMMS" \n\t"
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Height should be a multiple of 2 and width should be a multiple of 16 (if
|
* Height should be a multiple of 2 and width should be a multiple of 16.
|
||||||
* this is a problem for anyone then tell me, and I will fix it).
|
* (If this is a problem for anyone then tell me, and I will fix it.)
|
||||||
*/
|
*/
|
||||||
static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
|
static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
|
||||||
long width, long height,
|
long width, long height,
|
||||||
@ -1720,7 +1720,7 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
|
|||||||
(vc[0] << 8) + (yc[1] << 0);
|
(vc[0] << 8) + (yc[1] << 0);
|
||||||
#else
|
#else
|
||||||
*idst++ = uc[0] + (yc[0] << 8) +
|
*idst++ = uc[0] + (yc[0] << 8) +
|
||||||
(vc[0] << 16) + (yc[1] << 24);
|
(vc[0] << 16) + (yc[1] << 24);
|
||||||
#endif
|
#endif
|
||||||
yc += 2;
|
yc += 2;
|
||||||
uc++;
|
uc++;
|
||||||
@ -1744,8 +1744,8 @@ asm( EMMS" \n\t"
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Height should be a multiple of 2 and width should be a multiple of 16 (if
|
* Height should be a multiple of 2 and width should be a multiple of 16
|
||||||
* this is a problem for anyone then tell me, and I will fix it).
|
* (If this is a problem for anyone then tell me, and I will fix it.)
|
||||||
*/
|
*/
|
||||||
static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
|
static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
|
||||||
long width, long height,
|
long width, long height,
|
||||||
@ -1766,8 +1766,8 @@ static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usr
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Height should be a multiple of 2 and width should be a multiple of 16 (if
|
* Height should be a multiple of 2 and width should be a multiple of 16.
|
||||||
* this is a problem for anyone then tell me, and I will fix it).
|
* (If this is a problem for anyone then tell me, and I will fix it.)
|
||||||
*/
|
*/
|
||||||
static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
||||||
long width, long height,
|
long width, long height,
|
||||||
@ -2002,9 +2002,9 @@ asm volatile( EMMS" \n\t"
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Height should be a multiple of 2 and width should be a multiple of 16 (if
|
* Height should be a multiple of 2 and width should be a multiple of 16.
|
||||||
* this is a problem for anyone then tell me, and I will fix it).
|
* (If this is a problem for anyone then tell me, and I will fix it.)
|
||||||
* Chrominance data is only taken from every secound line, others are ignored.
|
* Chrominance data is only taken from every second line, others are ignored.
|
||||||
* FIXME: Write HQ version.
|
* FIXME: Write HQ version.
|
||||||
*/
|
*/
|
||||||
static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
||||||
@ -2128,9 +2128,9 @@ asm volatile( EMMS" \n\t"
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Height should be a multiple of 2 and width should be a multiple of 2 (if
|
* Height should be a multiple of 2 and width should be a multiple of 2.
|
||||||
* this is a problem for anyone then tell me, and I will fix it).
|
* (If this is a problem for anyone then tell me, and I will fix it.)
|
||||||
* Chrominance data is only taken from every secound line,
|
* Chrominance data is only taken from every second line,
|
||||||
* others are ignored in the C version.
|
* others are ignored in the C version.
|
||||||
* FIXME: Write HQ version.
|
* FIXME: Write HQ version.
|
||||||
*/
|
*/
|
||||||
|
@ -245,12 +245,12 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW, uint8_t *src, int
|
|||||||
src_v = vec_mergeh(src_v, (vector signed short)vzero);
|
src_v = vec_mergeh(src_v, (vector signed short)vzero);
|
||||||
|
|
||||||
filter_v = vec_ld(i << 3, filter);
|
filter_v = vec_ld(i << 3, filter);
|
||||||
// the 3 above is 2 (filterSize == 4) + 1 (sizeof(short) == 2)
|
// The 3 above is 2 (filterSize == 4) + 1 (sizeof(short) == 2).
|
||||||
|
|
||||||
// the neat trick : we only care for half the elements,
|
// The neat trick: We only care for half the elements,
|
||||||
// high or low depending on (i<<3)%16 (it's 0 or 8 here),
|
// high or low depending on (i<<3)%16 (it's 0 or 8 here),
|
||||||
// and we're going to use vec_mule, so we chose
|
// and we're going to use vec_mule, so we choose
|
||||||
// carefully how to "unpack" the elements into the even slots
|
// carefully how to "unpack" the elements into the even slots.
|
||||||
if ((i << 3) % 16)
|
if ((i << 3) % 16)
|
||||||
filter_v = vec_mergel(filter_v, (vector signed short)vzero);
|
filter_v = vec_mergel(filter_v, (vector signed short)vzero);
|
||||||
else
|
else
|
||||||
@ -405,12 +405,12 @@ static inline int yv12toyuy2_unscaled_altivec(SwsContext *c, uint8_t* src[], int
|
|||||||
return srcSliceH;
|
return srcSliceH;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* this code assume:
|
/* This code assumes:
|
||||||
|
|
||||||
1) dst is 16 bytes-aligned
|
1) dst is 16 bytes-aligned
|
||||||
2) dstStride is a multiple of 16
|
2) dstStride is a multiple of 16
|
||||||
3) width is a multiple of 16
|
3) width is a multiple of 16
|
||||||
4) lum&chrom stride are multiple of 8
|
4) lum & chrom stride are multiples of 8
|
||||||
*/
|
*/
|
||||||
|
|
||||||
for (y=0; y<height; y++) {
|
for (y=0; y<height; y++) {
|
||||||
@ -482,12 +482,12 @@ static inline int yv12touyvy_unscaled_altivec(SwsContext *c, uint8_t* src[], int
|
|||||||
return srcSliceH;
|
return srcSliceH;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* this code assume:
|
/* This code assumes:
|
||||||
|
|
||||||
1) dst is 16 bytes-aligned
|
1) dst is 16 bytes-aligned
|
||||||
2) dstStride is a multiple of 16
|
2) dstStride is a multiple of 16
|
||||||
3) width is a multiple of 16
|
3) width is a multiple of 16
|
||||||
4) lum&chrom stride are multiple of 8
|
4) lum & chrom stride are multiples of 8
|
||||||
*/
|
*/
|
||||||
|
|
||||||
for (y=0; y<height; y++) {
|
for (y=0; y<height; y++) {
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
|
* Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
|
||||||
*
|
*
|
||||||
* Blackfin Software Video SCALER Operations
|
* Blackfin software video scaler operations
|
||||||
*
|
*
|
||||||
* This file is part of FFmpeg.
|
* This file is part of FFmpeg.
|
||||||
*
|
*
|
||||||
|
@ -37,7 +37,7 @@
|
|||||||
typedef int (*SwsFunc)(struct SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY,
|
typedef int (*SwsFunc)(struct SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY,
|
||||||
int srcSliceH, uint8_t* dst[], int dstStride[]);
|
int srcSliceH, uint8_t* dst[], int dstStride[]);
|
||||||
|
|
||||||
/* this struct should be aligned on at least 32-byte boundary */
|
/* This struct should be aligned on at least a 32-byte boundary. */
|
||||||
typedef struct SwsContext{
|
typedef struct SwsContext{
|
||||||
/**
|
/**
|
||||||
* info on struct for av_log
|
* info on struct for av_log
|
||||||
@ -73,7 +73,7 @@ typedef struct SwsContext{
|
|||||||
int16_t *vChrFilter;
|
int16_t *vChrFilter;
|
||||||
int16_t *vChrFilterPos;
|
int16_t *vChrFilterPos;
|
||||||
|
|
||||||
uint8_t formatConvBuffer[VOF]; //FIXME dynamic alloc, but we have to change a lot of code for this to be useful
|
uint8_t formatConvBuffer[VOF]; //FIXME dynamic allocation, but we have to change a lot of code for this to be useful
|
||||||
|
|
||||||
int hLumFilterSize;
|
int hLumFilterSize;
|
||||||
int hChrFilterSize;
|
int hChrFilterSize;
|
||||||
@ -122,7 +122,7 @@ typedef struct SwsContext{
|
|||||||
#define V_OFFSET "10*8"
|
#define V_OFFSET "10*8"
|
||||||
#define LUM_MMX_FILTER_OFFSET "11*8"
|
#define LUM_MMX_FILTER_OFFSET "11*8"
|
||||||
#define CHR_MMX_FILTER_OFFSET "11*8+4*4*256"
|
#define CHR_MMX_FILTER_OFFSET "11*8+4*4*256"
|
||||||
#define DSTW_OFFSET "11*8+4*4*256*2" //do not change, it is hardcoded in the asm
|
#define DSTW_OFFSET "11*8+4*4*256*2" //do not change, it is hardcoded in the ASM
|
||||||
#define ESP_OFFSET "11*8+4*4*256*2+8"
|
#define ESP_OFFSET "11*8+4*4*256*2+8"
|
||||||
#define VROUNDER_OFFSET "11*8+4*4*256*2+16"
|
#define VROUNDER_OFFSET "11*8+4*4*256*2+16"
|
||||||
#define U_TEMP "11*8+4*4*256*2+24"
|
#define U_TEMP "11*8+4*4*256*2+24"
|
||||||
|
@ -17,8 +17,8 @@
|
|||||||
* along with FFmpeg; if not, write to the Free Software
|
* along with FFmpeg; if not, write to the Free Software
|
||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
*
|
*
|
||||||
* the C code (not assembly, mmx, ...) of this file can be used
|
* The C code (not assembly, MMX, ...) of this file can be used
|
||||||
* under the LGPL license too
|
* under the LGPL license.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#undef REAL_MOVNTQ
|
#undef REAL_MOVNTQ
|
||||||
@ -30,7 +30,7 @@
|
|||||||
#undef SFENCE
|
#undef SFENCE
|
||||||
|
|
||||||
#ifdef HAVE_3DNOW
|
#ifdef HAVE_3DNOW
|
||||||
/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
|
/* On K6 femms is faster than emms. On K7 femms is directly mapped on emms. */
|
||||||
#define EMMS "femms"
|
#define EMMS "femms"
|
||||||
#else
|
#else
|
||||||
#define EMMS "emms"
|
#define EMMS "emms"
|
||||||
@ -1503,7 +1503,7 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *
|
|||||||
const int yalpha1=0;
|
const int yalpha1=0;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1
|
uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
|
||||||
const int yalpha= 4096; //FIXME ...
|
const int yalpha= 4096; //FIXME ...
|
||||||
|
|
||||||
if (flags&SWS_FULL_CHR_H_INT)
|
if (flags&SWS_FULL_CHR_H_INT)
|
||||||
@ -1700,7 +1700,7 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//FIXME yuy2* can read upto 7 samples to much
|
//FIXME yuy2* can read up to 7 samples too much
|
||||||
|
|
||||||
static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width)
|
static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width)
|
||||||
{
|
{
|
||||||
@ -2297,7 +2297,7 @@ static inline void RENAME(palToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bilinear / Bicubic scaling
|
// bilinear / bicubic scaling
|
||||||
static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc,
|
static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc,
|
||||||
int16_t *filter, int16_t *filterPos, long filterSize)
|
int16_t *filter, int16_t *filterPos, long filterSize)
|
||||||
{
|
{
|
||||||
@ -2544,7 +2544,7 @@ static inline void RENAME(hyscale)(uint16_t *dst, long dstWidth, uint8_t *src, i
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_MMX
|
#ifdef HAVE_MMX
|
||||||
// use the new MMX scaler if the mmx2 can't be used (it is faster than the x86 ASM one)
|
// Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
|
||||||
if (!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
|
if (!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
|
||||||
#else
|
#else
|
||||||
if (!(flags&SWS_FAST_BILINEAR))
|
if (!(flags&SWS_FAST_BILINEAR))
|
||||||
@ -2552,7 +2552,7 @@ static inline void RENAME(hyscale)(uint16_t *dst, long dstWidth, uint8_t *src, i
|
|||||||
{
|
{
|
||||||
RENAME(hScale)(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
|
RENAME(hScale)(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
|
||||||
}
|
}
|
||||||
else // Fast Bilinear upscale / crap downscale
|
else // fast bilinear upscale / crap downscale
|
||||||
{
|
{
|
||||||
#if defined(ARCH_X86)
|
#if defined(ARCH_X86)
|
||||||
#ifdef HAVE_MMX2
|
#ifdef HAVE_MMX2
|
||||||
@ -2761,7 +2761,7 @@ inline static void RENAME(hcscale)(uint16_t *dst, long dstWidth, uint8_t *src1,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_MMX
|
#ifdef HAVE_MMX
|
||||||
// use the new MMX scaler if the mmx2 can't be used (it is faster than the x86 ASM one)
|
// Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
|
||||||
if (!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
|
if (!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
|
||||||
#else
|
#else
|
||||||
if (!(flags&SWS_FAST_BILINEAR))
|
if (!(flags&SWS_FAST_BILINEAR))
|
||||||
@ -2770,7 +2770,7 @@ inline static void RENAME(hcscale)(uint16_t *dst, long dstWidth, uint8_t *src1,
|
|||||||
RENAME(hScale)(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
|
RENAME(hScale)(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
|
||||||
RENAME(hScale)(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
|
RENAME(hScale)(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
|
||||||
}
|
}
|
||||||
else // Fast Bilinear upscale / crap downscale
|
else // fast bilinear upscale / crap downscale
|
||||||
{
|
{
|
||||||
#if defined(ARCH_X86)
|
#if defined(ARCH_X86)
|
||||||
#ifdef HAVE_MMX2
|
#ifdef HAVE_MMX2
|
||||||
@ -2890,8 +2890,8 @@ FUNNY_UV_CODE
|
|||||||
"cmp %2, %%"REG_a" \n\t"
|
"cmp %2, %%"REG_a" \n\t"
|
||||||
" jb 1b \n\t"
|
" jb 1b \n\t"
|
||||||
|
|
||||||
/* GCC-3.3 makes MPlayer crash on IA-32 machines when using "g" operand here,
|
/* GCC 3.3 makes MPlayer crash on IA-32 machines when using "g" operand here,
|
||||||
which is needed to support GCC-4.0 */
|
which is needed to support GCC 4.0. */
|
||||||
#if defined(ARCH_X86_64) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
|
#if defined(ARCH_X86_64) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
|
||||||
:: "m" (src1), "m" (dst), "g" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
|
:: "m" (src1), "m" (dst), "g" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
|
||||||
#else
|
#else
|
||||||
@ -2963,7 +2963,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
|
|||||||
int lastDstY;
|
int lastDstY;
|
||||||
uint8_t *pal=NULL;
|
uint8_t *pal=NULL;
|
||||||
|
|
||||||
/* vars whch will change and which we need to storw back in the context */
|
/* vars which will change and which we need to store back in the context */
|
||||||
int dstY= c->dstY;
|
int dstY= c->dstY;
|
||||||
int lumBufIndex= c->lumBufIndex;
|
int lumBufIndex= c->lumBufIndex;
|
||||||
int chrBufIndex= c->chrBufIndex;
|
int chrBufIndex= c->chrBufIndex;
|
||||||
@ -3004,13 +3004,14 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
|
|||||||
if (flags & SWS_PRINT_INFO && firstTime)
|
if (flags & SWS_PRINT_INFO && firstTime)
|
||||||
{
|
{
|
||||||
av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
|
av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
|
||||||
" ->cannot do aligned memory acesses anymore\n");
|
" ->cannot do aligned memory accesses anymore\n");
|
||||||
firstTime=0;
|
firstTime=0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Note the user might start scaling the picture in the middle so this will not get executed
|
/* Note the user might start scaling the picture in the middle so this
|
||||||
this is not really intended but works currently, so ppl might do it */
|
will not get executed. This is not really intended but works
|
||||||
|
currently, so people might do it. */
|
||||||
if (srcSliceY ==0){
|
if (srcSliceY ==0){
|
||||||
lumBufIndex=0;
|
lumBufIndex=0;
|
||||||
chrBufIndex=0;
|
chrBufIndex=0;
|
||||||
@ -3182,7 +3183,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
|
|||||||
{
|
{
|
||||||
const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
|
const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
|
||||||
if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
|
if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
|
||||||
if (vLumFilterSize == 1 && vChrFilterSize == 1) // Unscaled YV12
|
if (vLumFilterSize == 1 && vChrFilterSize == 1) // unscaled YV12
|
||||||
{
|
{
|
||||||
int16_t *lumBuf = lumPixBuf[0];
|
int16_t *lumBuf = lumPixBuf[0];
|
||||||
int16_t *chrBuf= chrPixBuf[0];
|
int16_t *chrBuf= chrPixBuf[0];
|
||||||
@ -3200,13 +3201,13 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
|
|||||||
{
|
{
|
||||||
ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
|
ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
|
||||||
ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
|
ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
|
||||||
if (vLumFilterSize == 1 && vChrFilterSize == 2) //Unscaled RGB
|
if (vLumFilterSize == 1 && vChrFilterSize == 2) //unscaled RGB
|
||||||
{
|
{
|
||||||
int chrAlpha= vChrFilter[2*dstY+1];
|
int chrAlpha= vChrFilter[2*dstY+1];
|
||||||
RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
|
RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
|
||||||
dest, dstW, chrAlpha, dstFormat, flags, dstY);
|
dest, dstW, chrAlpha, dstFormat, flags, dstY);
|
||||||
}
|
}
|
||||||
else if (vLumFilterSize == 2 && vChrFilterSize == 2) //BiLinear Upscale RGB
|
else if (vLumFilterSize == 2 && vChrFilterSize == 2) //bilinear upscale RGB
|
||||||
{
|
{
|
||||||
int lumAlpha= vLumFilter[2*dstY+1];
|
int lumAlpha= vLumFilter[2*dstY+1];
|
||||||
int chrAlpha= vChrFilter[2*dstY+1];
|
int chrAlpha= vChrFilter[2*dstY+1];
|
||||||
@ -3217,7 +3218,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
|
|||||||
RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
|
RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
|
||||||
dest, dstW, lumAlpha, chrAlpha, dstY);
|
dest, dstW, lumAlpha, chrAlpha, dstY);
|
||||||
}
|
}
|
||||||
else //General RGB
|
else //general RGB
|
||||||
{
|
{
|
||||||
RENAME(yuv2packedX)(c,
|
RENAME(yuv2packedX)(c,
|
||||||
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
|
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
|
||||||
|
@ -39,7 +39,7 @@
|
|||||||
#include "swscale.h"
|
#include "swscale.h"
|
||||||
#include "swscale_internal.h"
|
#include "swscale_internal.h"
|
||||||
|
|
||||||
#define DITHER1XBPP // only for mmx
|
#define DITHER1XBPP // only for MMX
|
||||||
|
|
||||||
const uint8_t __attribute__((aligned(8))) dither_2x2_4[2][8]={
|
const uint8_t __attribute__((aligned(8))) dither_2x2_4[2][8]={
|
||||||
{ 1, 3, 1, 3, 1, 3, 1, 3, },
|
{ 1, 3, 1, 3, 1, 3, 1, 3, },
|
||||||
@ -155,8 +155,8 @@ DECLARE_ASM_CONST(8, uint64_t, mmx_00ffw) = 0x00ff00ff00ff00ffULL;
|
|||||||
DECLARE_ASM_CONST(8, uint64_t, mmx_redmask) = 0xf8f8f8f8f8f8f8f8ULL;
|
DECLARE_ASM_CONST(8, uint64_t, mmx_redmask) = 0xf8f8f8f8f8f8f8f8ULL;
|
||||||
DECLARE_ASM_CONST(8, uint64_t, mmx_grnmask) = 0xfcfcfcfcfcfcfcfcULL;
|
DECLARE_ASM_CONST(8, uint64_t, mmx_grnmask) = 0xfcfcfcfcfcfcfcfcULL;
|
||||||
|
|
||||||
// the volatile is required because gcc otherwise optimizes some writes away not knowing that these
|
// The volatile is required because gcc otherwise optimizes some writes away
|
||||||
// are read in the asm block
|
// not knowing that these are read in the ASM block.
|
||||||
static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither;
|
static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither;
|
||||||
static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither;
|
static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither;
|
||||||
static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither;
|
static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither;
|
||||||
@ -641,7 +641,7 @@ SwsFunc yuv2rgb_get_func_ptr (SwsContext *c)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found\n");
|
av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found.\n");
|
||||||
|
|
||||||
switch(c->dstFormat){
|
switch(c->dstFormat){
|
||||||
case PIX_FMT_BGR32:
|
case PIX_FMT_BGR32:
|
||||||
|
@ -21,63 +21,68 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
convert I420 YV12 to RGB in various formats,
|
Convert I420 YV12 to RGB in various formats,
|
||||||
it rejects images that are not in 420 formats
|
it rejects images that are not in 420 formats,
|
||||||
it rejects images that don't have widths of multiples of 16
|
it rejects images that don't have widths of multiples of 16,
|
||||||
it rejects images that don't have heights of multiples of 2
|
it rejects images that don't have heights of multiples of 2.
|
||||||
reject defers to C simulation codes.
|
Reject defers to C simulation code.
|
||||||
|
|
||||||
lots of optimizations to be done here
|
Lots of optimizations to be done here.
|
||||||
|
|
||||||
1. need to fix saturation code, I just couldn't get it to fly with packs and adds.
|
1. Need to fix saturation code. I just couldn't get it to fly with packs
|
||||||
so we currently use max min to clip
|
and adds, so we currently use max/min to clip.
|
||||||
|
|
||||||
2. the inefficient use of chroma loading needs a bit of brushing up
|
2. The inefficient use of chroma loading needs a bit of brushing up.
|
||||||
|
|
||||||
3. analysis of pipeline stalls needs to be done, use shark to identify pipeline stalls
|
3. Analysis of pipeline stalls needs to be done. Use shark to identify
|
||||||
|
pipeline stalls.
|
||||||
|
|
||||||
|
|
||||||
MODIFIED to calculate coeffs from currently selected color space.
|
MODIFIED to calculate coeffs from currently selected color space.
|
||||||
MODIFIED core to be a macro which you spec the output format.
|
MODIFIED core to be a macro where you specify the output format.
|
||||||
ADDED UYVY conversion which is never called due to some thing in SWSCALE.
|
ADDED UYVY conversion which is never called due to some thing in swscale.
|
||||||
CORRECTED algorithim selection to be strict on input formats.
|
CORRECTED algorithim selection to be strict on input formats.
|
||||||
ADDED runtime detection of altivec.
|
ADDED runtime detection of AltiVec.
|
||||||
|
|
||||||
ADDED altivec_yuv2packedX vertical scl + RGB converter
|
ADDED altivec_yuv2packedX vertical scl + RGB converter
|
||||||
|
|
||||||
March 27,2004
|
March 27,2004
|
||||||
PERFORMANCE ANALYSIS
|
PERFORMANCE ANALYSIS
|
||||||
|
|
||||||
The C version use 25% of the processor or ~250Mips for D1 video rawvideo used as test
|
The C version uses 25% of the processor or ~250Mips for D1 video rawvideo
|
||||||
The ALTIVEC version uses 10% of the processor or ~100Mips for D1 video same sequence
|
used as test.
|
||||||
|
The AltiVec version uses 10% of the processor or ~100Mips for D1 video
|
||||||
|
same sequence.
|
||||||
|
|
||||||
720*480*30 ~10MPS
|
720 * 480 * 30 ~10MPS
|
||||||
|
|
||||||
so we have roughly 10clocks per pixel this is too high something has to be wrong.
|
so we have roughly 10 clocks per pixel. This is too high, something has
|
||||||
|
to be wrong.
|
||||||
|
|
||||||
OPTIMIZED clip codes to utilize vec_max and vec_packs removing the need for vec_min.
|
OPTIMIZED clip codes to utilize vec_max and vec_packs removing the
|
||||||
|
need for vec_min.
|
||||||
|
|
||||||
OPTIMIZED DST OUTPUT cache/dma controls. we are pretty much
|
OPTIMIZED DST OUTPUT cache/DMA controls. We are pretty much guaranteed to have
|
||||||
guaranteed to have the input video frame it was just decompressed so
|
the input video frame, it was just decompressed so it probably resides in L1
|
||||||
it probably resides in L1 caches. However we are creating the
|
caches. However, we are creating the output video stream. This needs to use the
|
||||||
output video stream this needs to use the DSTST instruction to
|
DSTST instruction to optimize for the cache. We couple this with the fact that
|
||||||
optimize for the cache. We couple this with the fact that we are
|
we are not going to be visiting the input buffer again so we mark it Least
|
||||||
not going to be visiting the input buffer again so we mark it Least
|
Recently Used. This shaves 25% of the processor cycles off.
|
||||||
Recently Used. This shaves 25% of the processor cycles off.
|
|
||||||
|
|
||||||
Now MEMCPY is the largest mips consumer in the system, probably due
|
Now memcpy is the largest mips consumer in the system, probably due
|
||||||
to the inefficient X11 stuff.
|
to the inefficient X11 stuff.
|
||||||
|
|
||||||
GL libraries seem to be very slow on this machine 1.33Ghz PB running
|
GL libraries seem to be very slow on this machine 1.33Ghz PB running
|
||||||
Jaguar, this is not the case for my 1Ghz PB. I thought it might be
|
Jaguar, this is not the case for my 1Ghz PB. I thought it might be
|
||||||
a versioning issues, however I have libGL.1.2.dylib for both
|
a versioning issue, however I have libGL.1.2.dylib for both
|
||||||
machines. ((We need to figure this out now))
|
machines. (We need to figure this out now.)
|
||||||
|
|
||||||
GL2 libraries work now with patch for RGB32
|
GL2 libraries work now with patch for RGB32.
|
||||||
|
|
||||||
NOTE quartz vo driver ARGB32_to_RGB24 consumes 30% of the processor
|
NOTE: quartz vo driver ARGB32_to_RGB24 consumes 30% of the processor.
|
||||||
|
|
||||||
Integrated luma prescaling adjustment for saturation/contrast/brightness adjustment.
|
Integrated luma prescaling adjustment for saturation/contrast/brightness
|
||||||
|
adjustment.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
@ -1,9 +1,8 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
|
* Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
|
||||||
* April 20, 2007
|
|
||||||
*
|
*
|
||||||
* Blackfin Video Color Space Converters Operations
|
* Blackfin video color space converter operations
|
||||||
* convert I420 YV12 to RGB in various formats,
|
* convert I420 YV12 to RGB in various formats
|
||||||
*
|
*
|
||||||
* This file is part of FFmpeg.
|
* This file is part of FFmpeg.
|
||||||
*
|
*
|
||||||
@ -200,7 +199,7 @@ SwsFunc ff_bfin_yuv2rgb_get_func_ptr (SwsContext *c)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
av_log(c, AV_LOG_INFO, "BlackFin Accelerated Color Space Converter %s\n",
|
av_log(c, AV_LOG_INFO, "BlackFin accelerated color space converter %s\n",
|
||||||
sws_format_name (c->dstFormat));
|
sws_format_name (c->dstFormat));
|
||||||
|
|
||||||
return f;
|
return f;
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* yuv2rgb_mlib.c, Software YUV to RGB converter using mediaLib
|
* software YUV to RGB converter using mediaLib
|
||||||
|
*
|
||||||
* Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at>
|
* Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at>
|
||||||
*
|
*
|
||||||
* This file is part of FFmpeg.
|
* This file is part of FFmpeg.
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* yuv2rgb_mmx.c, Software YUV to RGB converter with Intel MMX "technology"
|
* yuv2rgb_mmx.c, software YUV to RGB converter with Intel MMX "technology"
|
||||||
*
|
*
|
||||||
* Copyright (C) 2000, Silicon Integrated System Corp.
|
* Copyright (C) 2000, Silicon Integrated System Corp.
|
||||||
*
|
*
|
||||||
@ -31,7 +31,7 @@
|
|||||||
#undef SFENCE
|
#undef SFENCE
|
||||||
|
|
||||||
#ifdef HAVE_3DNOW
|
#ifdef HAVE_3DNOW
|
||||||
/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
|
/* On K6 femms is faster than emms. On K7 femms is directly mapped on emms. */
|
||||||
#define EMMS "femms"
|
#define EMMS "femms"
|
||||||
#else
|
#else
|
||||||
#define EMMS "emms"
|
#define EMMS "emms"
|
||||||
@ -147,8 +147,8 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, uint8_t* src[], int srcStr
|
|||||||
g6Dither= ff_dither4[y&1];
|
g6Dither= ff_dither4[y&1];
|
||||||
g5Dither= ff_dither8[y&1];
|
g5Dither= ff_dither8[y&1];
|
||||||
r5Dither= ff_dither8[(y+1)&1];
|
r5Dither= ff_dither8[(y+1)&1];
|
||||||
/* this mmx assembly code deals with SINGLE scan line at a time, it convert 8
|
/* This MMX assembly code deals with a SINGLE scan line at a time,
|
||||||
pixels in each iteration */
|
* it converts 8 pixels in each iteration. */
|
||||||
asm volatile (
|
asm volatile (
|
||||||
/* load data for start of next scan line */
|
/* load data for start of next scan line */
|
||||||
"movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
|
"movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
|
||||||
@ -156,8 +156,8 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, uint8_t* src[], int srcStr
|
|||||||
"movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
|
"movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
|
||||||
//".balign 16 \n\t"
|
//".balign 16 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
/* no speed diference on my p3@500 with prefetch,
|
/* No speed difference on my p3@500 with prefetch,
|
||||||
* if it is faster for anyone with -benchmark then tell me
|
* if it is faster for anyone with -benchmark then tell me.
|
||||||
PREFETCH" 64(%0) \n\t"
|
PREFETCH" 64(%0) \n\t"
|
||||||
PREFETCH" 64(%1) \n\t"
|
PREFETCH" 64(%1) \n\t"
|
||||||
PREFETCH" 64(%2) \n\t"
|
PREFETCH" 64(%2) \n\t"
|
||||||
@ -180,7 +180,7 @@ YUV2RGB
|
|||||||
"movq %%mm0, %%mm5;" /* Copy B7-B0 */
|
"movq %%mm0, %%mm5;" /* Copy B7-B0 */
|
||||||
"movq %%mm2, %%mm7;" /* Copy G7-G0 */
|
"movq %%mm2, %%mm7;" /* Copy G7-G0 */
|
||||||
|
|
||||||
/* convert rgb24 plane to rgb16 pack for pixel 0-3 */
|
/* convert RGB24 plane to RGB16 pack for pixel 0-3 */
|
||||||
"punpcklbw %%mm4, %%mm2;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0 */
|
"punpcklbw %%mm4, %%mm2;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0 */
|
||||||
"punpcklbw %%mm1, %%mm0;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */
|
"punpcklbw %%mm1, %%mm0;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */
|
||||||
|
|
||||||
@ -190,7 +190,7 @@ YUV2RGB
|
|||||||
"movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
|
"movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
|
||||||
MOVNTQ " %%mm0, (%1);" /* store pixel 0-3 */
|
MOVNTQ " %%mm0, (%1);" /* store pixel 0-3 */
|
||||||
|
|
||||||
/* convert rgb24 plane to rgb16 pack for pixel 0-3 */
|
/* convert RGB24 plane to RGB16 pack for pixel 0-3 */
|
||||||
"punpckhbw %%mm4, %%mm7;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0 */
|
"punpckhbw %%mm4, %%mm7;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0 */
|
||||||
"punpckhbw %%mm1, %%mm5;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */
|
"punpckhbw %%mm1, %%mm5;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */
|
||||||
|
|
||||||
@ -242,8 +242,8 @@ static inline int RENAME(yuv420_rgb15)(SwsContext *c, uint8_t* src[], int srcStr
|
|||||||
g6Dither= ff_dither4[y&1];
|
g6Dither= ff_dither4[y&1];
|
||||||
g5Dither= ff_dither8[y&1];
|
g5Dither= ff_dither8[y&1];
|
||||||
r5Dither= ff_dither8[(y+1)&1];
|
r5Dither= ff_dither8[(y+1)&1];
|
||||||
/* this mmx assembly code deals with SINGLE scan line at a time, it convert 8
|
/* This MMX assembly code deals with a SINGLE scan line at a time,
|
||||||
pixels in each iteration */
|
* it converts 8 pixels in each iteration. */
|
||||||
asm volatile (
|
asm volatile (
|
||||||
/* load data for start of next scan line */
|
/* load data for start of next scan line */
|
||||||
"movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
|
"movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
|
||||||
@ -271,7 +271,7 @@ YUV2RGB
|
|||||||
"movq %%mm0, %%mm5;" /* Copy B7-B0 */
|
"movq %%mm0, %%mm5;" /* Copy B7-B0 */
|
||||||
"movq %%mm2, %%mm7;" /* Copy G7-G0 */
|
"movq %%mm2, %%mm7;" /* Copy G7-G0 */
|
||||||
|
|
||||||
/* convert rgb24 plane to rgb16 pack for pixel 0-3 */
|
/* convert RGB24 plane to RGB16 pack for pixel 0-3 */
|
||||||
"punpcklbw %%mm4, %%mm2;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3_0_0_0 */
|
"punpcklbw %%mm4, %%mm2;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3_0_0_0 */
|
||||||
"punpcklbw %%mm1, %%mm0;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */
|
"punpcklbw %%mm1, %%mm0;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */
|
||||||
|
|
||||||
@ -281,7 +281,7 @@ YUV2RGB
|
|||||||
"movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
|
"movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
|
||||||
MOVNTQ " %%mm0, (%1);" /* store pixel 0-3 */
|
MOVNTQ " %%mm0, (%1);" /* store pixel 0-3 */
|
||||||
|
|
||||||
/* convert rgb24 plane to rgb16 pack for pixel 0-3 */
|
/* convert RGB24 plane to RGB16 pack for pixel 0-3 */
|
||||||
"punpckhbw %%mm4, %%mm7;" /* 0_0_0_0 0_0_0_0 0_g7g6g5 g4g3_0_0 */
|
"punpckhbw %%mm4, %%mm7;" /* 0_0_0_0 0_0_0_0 0_g7g6g5 g4g3_0_0 */
|
||||||
"punpckhbw %%mm1, %%mm5;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */
|
"punpckhbw %%mm1, %%mm5;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */
|
||||||
|
|
||||||
@ -326,8 +326,8 @@ static inline int RENAME(yuv420_rgb24)(SwsContext *c, uint8_t* src[], int srcStr
|
|||||||
uint8_t *pv = src[2] + (y>>1)*srcStride[2];
|
uint8_t *pv = src[2] + (y>>1)*srcStride[2];
|
||||||
long index= -h_size/2;
|
long index= -h_size/2;
|
||||||
|
|
||||||
/* this mmx assembly code deals with SINGLE scan line at a time, it convert 8
|
/* This MMX assembly code deals with a SINGLE scan line at a time,
|
||||||
pixels in each iteration */
|
* it converts 8 pixels in each iteration. */
|
||||||
asm volatile (
|
asm volatile (
|
||||||
/* load data for start of next scan line */
|
/* load data for start of next scan line */
|
||||||
"movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
|
"movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
|
||||||
@ -472,8 +472,8 @@ static inline int RENAME(yuv420_rgb32)(SwsContext *c, uint8_t* src[], int srcStr
|
|||||||
uint8_t *pv = src[2] + (y>>1)*srcStride[2];
|
uint8_t *pv = src[2] + (y>>1)*srcStride[2];
|
||||||
long index= -h_size/2;
|
long index= -h_size/2;
|
||||||
|
|
||||||
/* this mmx assembly code deals with SINGLE scan line at a time, it convert 8
|
/* This MMX assembly code deals with a SINGLE scan line at a time,
|
||||||
pixels in each iteration */
|
* it converts 8 pixels in each iteration. */
|
||||||
asm volatile (
|
asm volatile (
|
||||||
/* load data for start of next scan line */
|
/* load data for start of next scan line */
|
||||||
"movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
|
"movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
|
||||||
|
Loading…
Reference in New Issue
Block a user