mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-02-09 14:14:39 +02:00
Remove PPC perf counter support
This functionality is better accessed through tools like oprofile. Originally committed as revision 23808 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
a788196e20
commit
2829ce4b40
4
configure
vendored
4
configure
vendored
@ -211,8 +211,6 @@ Advanced options (experts only):
|
|||||||
--arch=ARCH select architecture [$arch]
|
--arch=ARCH select architecture [$arch]
|
||||||
--cpu=CPU select the minimum required CPU (affects
|
--cpu=CPU select the minimum required CPU (affects
|
||||||
instruction selection, may crash on older CPUs)
|
instruction selection, may crash on older CPUs)
|
||||||
--enable-powerpc-perf enable performance report on PPC
|
|
||||||
(requires enabling PMC)
|
|
||||||
--disable-asm disable all assembler optimizations
|
--disable-asm disable all assembler optimizations
|
||||||
--disable-altivec disable AltiVec optimizations
|
--disable-altivec disable AltiVec optimizations
|
||||||
--disable-amd3dnow disable 3DNow! optimizations
|
--disable-amd3dnow disable 3DNow! optimizations
|
||||||
@ -886,7 +884,6 @@ CONFIG_LIST="
|
|||||||
nonfree
|
nonfree
|
||||||
pic
|
pic
|
||||||
postproc
|
postproc
|
||||||
powerpc_perf
|
|
||||||
rdft
|
rdft
|
||||||
runtime_cpudetect
|
runtime_cpudetect
|
||||||
shared
|
shared
|
||||||
@ -2772,7 +2769,6 @@ if enabled ppc; then
|
|||||||
echo "AltiVec enabled ${altivec-no}"
|
echo "AltiVec enabled ${altivec-no}"
|
||||||
echo "PPC 4xx optimizations ${ppc4xx-no}"
|
echo "PPC 4xx optimizations ${ppc4xx-no}"
|
||||||
echo "dcbzl available ${dcbzl-no}"
|
echo "dcbzl available ${dcbzl-no}"
|
||||||
echo "performance report ${powerpc_perf-no}"
|
|
||||||
fi
|
fi
|
||||||
if enabled sparc; then
|
if enabled sparc; then
|
||||||
echo "VIS enabled ${vis-no}"
|
echo "VIS enabled ${vis-no}"
|
||||||
|
@ -1,172 +0,0 @@
|
|||||||
FFmpeg & evaluating performance on the PowerPC Architecture HOWTO
|
|
||||||
|
|
||||||
(c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
I - Introduction
|
|
||||||
|
|
||||||
The PowerPC architecture and its SIMD extension AltiVec offer some
|
|
||||||
interesting tools to evaluate performance and improve the code.
|
|
||||||
This document tries to explain how to use those tools with FFmpeg.
|
|
||||||
|
|
||||||
The architecture itself offers two ways to evaluate the performance of
|
|
||||||
a given piece of code:
|
|
||||||
|
|
||||||
1) The Time Base Registers (TBL)
|
|
||||||
2) The Performance Monitor Counter Registers (PMC)
|
|
||||||
|
|
||||||
The first ones are always available, always active, but they're not very
|
|
||||||
accurate: the registers increment by one every four *bus* cycles. On
|
|
||||||
my 667 Mhz tiBook (ppc7450), this means once every twenty *processor*
|
|
||||||
cycles. So we won't use that.
|
|
||||||
|
|
||||||
The PMC are much more useful: not only can they report cycle-accurate
|
|
||||||
timing, but they can also be used to monitor many other parameters,
|
|
||||||
such as the number of AltiVec stalls for every kind of instruction,
|
|
||||||
or instruction cache misses. The downside is that not all processors
|
|
||||||
support the PMC (all G3, all G4 and the 970 do support them), and
|
|
||||||
they're inactive by default - you need to activate them with a
|
|
||||||
dedicated tool. Also, the number of available PMC depends on the
|
|
||||||
procesor: the various 604 have 2, the various 75x (aka. G3) have 4,
|
|
||||||
and the various 74xx (aka G4) have 6.
|
|
||||||
|
|
||||||
*WARNING*: The PowerPC 970 is not very well documented, and its PMC
|
|
||||||
registers are 64 bits wide. To properly notify the code, you *must*
|
|
||||||
tune for the 970 (using --tune=970), or the code will assume 32 bit
|
|
||||||
registers.
|
|
||||||
|
|
||||||
|
|
||||||
II - Enabling FFmpeg PowerPC performance support
|
|
||||||
|
|
||||||
This needs to be done by hand. First, you need to configure FFmpeg as
|
|
||||||
usual, but add the "--powerpc-perf-enable" option. For instance:
|
|
||||||
|
|
||||||
#####
|
|
||||||
./configure --prefix=/usr/local/ffmpeg-svn --cc=gcc-3.3 --tune=7450 --powerpc-perf-enable
|
|
||||||
#####
|
|
||||||
|
|
||||||
This will configure FFmpeg to install inside /usr/local/ffmpeg-svn,
|
|
||||||
compiling with gcc-3.3 (you should try to use this one or a newer
|
|
||||||
gcc), and tuning for the PowerPC 7450 (i.e. the newer G4; as a rule of
|
|
||||||
thumb, those at 550Mhz and more). It will also enable the PMC.
|
|
||||||
|
|
||||||
You may also edit the file "config.h" to enable the following line:
|
|
||||||
|
|
||||||
#####
|
|
||||||
// #define ALTIVEC_USE_REFERENCE_C_CODE 1
|
|
||||||
#####
|
|
||||||
|
|
||||||
If you enable this line, then the code will not make use of AltiVec,
|
|
||||||
but will use the reference C code instead. This is useful to compare
|
|
||||||
performance between two versions of the code.
|
|
||||||
|
|
||||||
Also, the number of enabled PMC is defined in "libavcodec/ppc/dsputil_ppc.h":
|
|
||||||
|
|
||||||
#####
|
|
||||||
#define POWERPC_NUM_PMC_ENABLED 4
|
|
||||||
#####
|
|
||||||
|
|
||||||
If you have a G4 CPU, you can enable all 6 PMC. DO NOT enable more
|
|
||||||
PMC than available on your CPU!
|
|
||||||
|
|
||||||
Then, simply compile FFmpeg as usual (make && make install).
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
III - Using FFmpeg PowerPC performance support
|
|
||||||
|
|
||||||
This FFmeg can be used exactly as usual. But before exiting, FFmpeg
|
|
||||||
will dump a per-function report that looks like this:
|
|
||||||
|
|
||||||
#####
|
|
||||||
PowerPC performance report
|
|
||||||
Values are from the PMC registers, and represent whatever the
|
|
||||||
registers are set to record.
|
|
||||||
Function "gmc1_altivec" (pmc1):
|
|
||||||
min: 231
|
|
||||||
max: 1339867
|
|
||||||
avg: 558.25 (255302)
|
|
||||||
Function "gmc1_altivec" (pmc2):
|
|
||||||
min: 93
|
|
||||||
max: 2164
|
|
||||||
avg: 267.31 (255302)
|
|
||||||
Function "gmc1_altivec" (pmc3):
|
|
||||||
min: 72
|
|
||||||
max: 1987
|
|
||||||
avg: 276.20 (255302)
|
|
||||||
(...)
|
|
||||||
#####
|
|
||||||
|
|
||||||
In this example, PMC1 was set to record CPU cycles, PMC2 was set to
|
|
||||||
record AltiVec Permute Stall Cycles, and PMC3 was set to record AltiVec
|
|
||||||
Issue Stalls.
|
|
||||||
|
|
||||||
The function "gmc1_altivec" was monitored 255302 times, and the
|
|
||||||
minimum execution time was 231 processor cycles. The max and average
|
|
||||||
aren't much use, as it's very likely the OS interrupted execution for
|
|
||||||
reasons of its own :-(
|
|
||||||
|
|
||||||
With the exact same settings and source file, but using the reference C
|
|
||||||
code we get:
|
|
||||||
|
|
||||||
#####
|
|
||||||
PowerPC performance report
|
|
||||||
Values are from the PMC registers, and represent whatever the
|
|
||||||
registers are set to record.
|
|
||||||
Function "gmc1_altivec" (pmc1):
|
|
||||||
min: 592
|
|
||||||
max: 2532235
|
|
||||||
avg: 962.88 (255302)
|
|
||||||
Function "gmc1_altivec" (pmc2):
|
|
||||||
min: 0
|
|
||||||
max: 33
|
|
||||||
avg: 0.00 (255302)
|
|
||||||
Function "gmc1_altivec" (pmc3):
|
|
||||||
min: 0
|
|
||||||
max: 350
|
|
||||||
avg: 0.03 (255302)
|
|
||||||
(...)
|
|
||||||
#####
|
|
||||||
|
|
||||||
592 cycles, so the fastest AltiVec execution is about 2.5x faster than
|
|
||||||
the fastest C execution in this example. It's not perfect but it's not
|
|
||||||
bad (well I wrote this function so I can't say otherwise :-).
|
|
||||||
|
|
||||||
Once you have that kind of report, you can try to improve things by
|
|
||||||
finding what goes wrong and fixing it; in the example above, one
|
|
||||||
should try to diminish the number of AltiVec stalls, as this *may*
|
|
||||||
improve performance.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
IV) Enabling the PMC in Mac OS X
|
|
||||||
|
|
||||||
This is easy. Use "Monster" and "monster". Those tools come from
|
|
||||||
Apple's CHUD package, and can be found hidden in the developer web
|
|
||||||
site & FTP site. "MONster" is the graphical application, use it to
|
|
||||||
generate a config file specifying what each register should
|
|
||||||
monitor. Then use the command-line application "monster" to use that
|
|
||||||
config file, and enjoy the results.
|
|
||||||
|
|
||||||
Note that "MONster" can be used for many other things, but it's
|
|
||||||
documented by Apple, it's not my subject.
|
|
||||||
|
|
||||||
If you are using CHUD 4.4.2 or later, you'll notice that MONster is
|
|
||||||
no longer available. It's been superseeded by Shark, where
|
|
||||||
configuration of PMCs is available as a plugin.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
V) Enabling the PMC on Linux
|
|
||||||
|
|
||||||
On linux you may use oprofile from http://oprofile.sf.net, depending on the
|
|
||||||
version and the cpu you may need to apply a patch[1] to access a set of the
|
|
||||||
possibile counters from the userspace application. You can always define them
|
|
||||||
using the kernel interface /dev/oprofile/* .
|
|
||||||
|
|
||||||
[1] http://dev.gentoo.org/~lu_zero/development/oprofile-g4-20060423.patch
|
|
||||||
|
|
||||||
--
|
|
||||||
Romain Dolbeau <romain@dolbeau.org>
|
|
||||||
Luca Barbato <lu_zero@gentoo.org>
|
|
5
ffmpeg.c
5
ffmpeg.c
@ -618,11 +618,6 @@ static int av_exit(int ret)
|
|||||||
|
|
||||||
av_free(video_standard);
|
av_free(video_standard);
|
||||||
|
|
||||||
#if CONFIG_POWERPC_PERF
|
|
||||||
void powerpc_display_perf_report(void);
|
|
||||||
powerpc_display_perf_report();
|
|
||||||
#endif /* CONFIG_POWERPC_PERF */
|
|
||||||
|
|
||||||
for (i=0;i<AVMEDIA_TYPE_NB;i++)
|
for (i=0;i<AVMEDIA_TYPE_NB;i++)
|
||||||
av_free(avcodec_opts[i]);
|
av_free(avcodec_opts[i]);
|
||||||
av_free(avformat_opts);
|
av_free(avformat_opts);
|
||||||
|
@ -25,7 +25,6 @@
|
|||||||
#include <altivec.h>
|
#include <altivec.h>
|
||||||
#endif
|
#endif
|
||||||
#include "libavcodec/dsputil.h"
|
#include "libavcodec/dsputil.h"
|
||||||
#include "dsputil_ppc.h"
|
|
||||||
#include "util_altivec.h"
|
#include "util_altivec.h"
|
||||||
#include "types_altivec.h"
|
#include "types_altivec.h"
|
||||||
#include "dsputil_altivec.h"
|
#include "dsputil_altivec.h"
|
||||||
@ -610,7 +609,6 @@ static void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) {
|
|||||||
/* next one assumes that ((line_size % 16) == 0) */
|
/* next one assumes that ((line_size % 16) == 0) */
|
||||||
void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||||
{
|
{
|
||||||
POWERPC_PERF_DECLARE(altivec_put_pixels16_num, 1);
|
|
||||||
register vector unsigned char pixelsv1, pixelsv2;
|
register vector unsigned char pixelsv1, pixelsv2;
|
||||||
register vector unsigned char pixelsv1B, pixelsv2B;
|
register vector unsigned char pixelsv1B, pixelsv2B;
|
||||||
register vector unsigned char pixelsv1C, pixelsv2C;
|
register vector unsigned char pixelsv1C, pixelsv2C;
|
||||||
@ -622,7 +620,6 @@ POWERPC_PERF_DECLARE(altivec_put_pixels16_num, 1);
|
|||||||
register int line_size_3 = line_size + line_size_2;
|
register int line_size_3 = line_size + line_size_2;
|
||||||
register int line_size_4 = line_size << 2;
|
register int line_size_4 = line_size << 2;
|
||||||
|
|
||||||
POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1);
|
|
||||||
// hand-unrolling the loop by 4 gains about 15%
|
// hand-unrolling the loop by 4 gains about 15%
|
||||||
// mininum execution time goes from 74 to 60 cycles
|
// mininum execution time goes from 74 to 60 cycles
|
||||||
// it's faster than -funroll-loops, but using
|
// it's faster than -funroll-loops, but using
|
||||||
@ -659,20 +656,16 @@ POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1);
|
|||||||
block +=line_size_4;
|
block +=line_size_4;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* next one assumes that ((line_size % 16) == 0) */
|
/* next one assumes that ((line_size % 16) == 0) */
|
||||||
#define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
|
#define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
|
||||||
void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||||
{
|
{
|
||||||
POWERPC_PERF_DECLARE(altivec_avg_pixels16_num, 1);
|
|
||||||
register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
|
register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
|
||||||
register vector unsigned char perm = vec_lvsl(0, pixels);
|
register vector unsigned char perm = vec_lvsl(0, pixels);
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1);
|
|
||||||
|
|
||||||
for (i = 0; i < h; i++) {
|
for (i = 0; i < h; i++) {
|
||||||
pixelsv1 = vec_ld( 0, pixels);
|
pixelsv1 = vec_ld( 0, pixels);
|
||||||
pixelsv2 = vec_ld(16,pixels);
|
pixelsv2 = vec_ld(16,pixels);
|
||||||
@ -683,19 +676,14 @@ POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1);
|
|||||||
pixels+=line_size;
|
pixels+=line_size;
|
||||||
block +=line_size;
|
block +=line_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* next one assumes that ((line_size % 8) == 0) */
|
/* next one assumes that ((line_size % 8) == 0) */
|
||||||
static void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
|
static void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
|
||||||
{
|
{
|
||||||
POWERPC_PERF_DECLARE(altivec_avg_pixels8_num, 1);
|
|
||||||
register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
|
register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1);
|
|
||||||
|
|
||||||
for (i = 0; i < h; i++) {
|
for (i = 0; i < h; i++) {
|
||||||
/* block is 8 bytes-aligned, so we're either in the
|
/* block is 8 bytes-aligned, so we're either in the
|
||||||
left block (16 bytes-aligned) or in the right block (not) */
|
left block (16 bytes-aligned) or in the right block (not) */
|
||||||
@ -719,14 +707,11 @@ POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1);
|
|||||||
pixels += line_size;
|
pixels += line_size;
|
||||||
block += line_size;
|
block += line_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* next one assumes that ((line_size % 8) == 0) */
|
/* next one assumes that ((line_size % 8) == 0) */
|
||||||
static void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
static void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||||
{
|
{
|
||||||
POWERPC_PERF_DECLARE(altivec_put_pixels8_xy2_num, 1);
|
|
||||||
register int i;
|
register int i;
|
||||||
register vector unsigned char pixelsv1, pixelsv2, pixelsavg;
|
register vector unsigned char pixelsv1, pixelsv2, pixelsavg;
|
||||||
register vector unsigned char blockv, temp1, temp2;
|
register vector unsigned char blockv, temp1, temp2;
|
||||||
@ -748,7 +733,6 @@ POWERPC_PERF_DECLARE(altivec_put_pixels8_xy2_num, 1);
|
|||||||
(vector unsigned short)pixelsv2);
|
(vector unsigned short)pixelsv2);
|
||||||
pixelssum1 = vec_add(pixelssum1, vctwo);
|
pixelssum1 = vec_add(pixelssum1, vctwo);
|
||||||
|
|
||||||
POWERPC_PERF_START_COUNT(altivec_put_pixels8_xy2_num, 1);
|
|
||||||
for (i = 0; i < h ; i++) {
|
for (i = 0; i < h ; i++) {
|
||||||
int rightside = ((unsigned long)block & 0x0000000F);
|
int rightside = ((unsigned long)block & 0x0000000F);
|
||||||
blockv = vec_ld(0, block);
|
blockv = vec_ld(0, block);
|
||||||
@ -782,14 +766,11 @@ POWERPC_PERF_START_COUNT(altivec_put_pixels8_xy2_num, 1);
|
|||||||
block += line_size;
|
block += line_size;
|
||||||
pixels += line_size;
|
pixels += line_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* next one assumes that ((line_size % 8) == 0) */
|
/* next one assumes that ((line_size % 8) == 0) */
|
||||||
static void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
static void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||||
{
|
{
|
||||||
POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1);
|
|
||||||
register int i;
|
register int i;
|
||||||
register vector unsigned char pixelsv1, pixelsv2, pixelsavg;
|
register vector unsigned char pixelsv1, pixelsv2, pixelsavg;
|
||||||
register vector unsigned char blockv, temp1, temp2;
|
register vector unsigned char blockv, temp1, temp2;
|
||||||
@ -812,7 +793,6 @@ POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1);
|
|||||||
(vector unsigned short)pixelsv2);
|
(vector unsigned short)pixelsv2);
|
||||||
pixelssum1 = vec_add(pixelssum1, vcone);
|
pixelssum1 = vec_add(pixelssum1, vcone);
|
||||||
|
|
||||||
POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
|
|
||||||
for (i = 0; i < h ; i++) {
|
for (i = 0; i < h ; i++) {
|
||||||
int rightside = ((unsigned long)block & 0x0000000F);
|
int rightside = ((unsigned long)block & 0x0000000F);
|
||||||
blockv = vec_ld(0, block);
|
blockv = vec_ld(0, block);
|
||||||
@ -846,14 +826,11 @@ POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
|
|||||||
block += line_size;
|
block += line_size;
|
||||||
pixels += line_size;
|
pixels += line_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* next one assumes that ((line_size % 16) == 0) */
|
/* next one assumes that ((line_size % 16) == 0) */
|
||||||
static void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
|
static void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
|
||||||
{
|
{
|
||||||
POWERPC_PERF_DECLARE(altivec_put_pixels16_xy2_num, 1);
|
|
||||||
register int i;
|
register int i;
|
||||||
register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4;
|
register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4;
|
||||||
register vector unsigned char blockv, temp1, temp2;
|
register vector unsigned char blockv, temp1, temp2;
|
||||||
@ -862,8 +839,6 @@ POWERPC_PERF_DECLARE(altivec_put_pixels16_xy2_num, 1);
|
|||||||
register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
|
register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
|
||||||
register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
|
register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
|
||||||
|
|
||||||
POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1);
|
|
||||||
|
|
||||||
temp1 = vec_ld(0, pixels);
|
temp1 = vec_ld(0, pixels);
|
||||||
temp2 = vec_ld(16, pixels);
|
temp2 = vec_ld(16, pixels);
|
||||||
pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
|
pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
|
||||||
@ -919,14 +894,11 @@ POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1);
|
|||||||
block += line_size;
|
block += line_size;
|
||||||
pixels += line_size;
|
pixels += line_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* next one assumes that ((line_size % 16) == 0) */
|
/* next one assumes that ((line_size % 16) == 0) */
|
||||||
static void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
|
static void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
|
||||||
{
|
{
|
||||||
POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1);
|
|
||||||
register int i;
|
register int i;
|
||||||
register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4;
|
register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4;
|
||||||
register vector unsigned char blockv, temp1, temp2;
|
register vector unsigned char blockv, temp1, temp2;
|
||||||
@ -936,8 +908,6 @@ POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1);
|
|||||||
register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1);
|
register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1);
|
||||||
register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
|
register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
|
||||||
|
|
||||||
POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
|
|
||||||
|
|
||||||
temp1 = vec_ld(0, pixels);
|
temp1 = vec_ld(0, pixels);
|
||||||
temp2 = vec_ld(16, pixels);
|
temp2 = vec_ld(16, pixels);
|
||||||
pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
|
pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
|
||||||
@ -993,18 +963,14 @@ POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
|
|||||||
block += line_size;
|
block += line_size;
|
||||||
pixels += line_size;
|
pixels += line_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
|
static int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
|
||||||
POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1);
|
|
||||||
int sum;
|
int sum;
|
||||||
register const vector unsigned char vzero =
|
register const vector unsigned char vzero =
|
||||||
(const vector unsigned char)vec_splat_u8(0);
|
(const vector unsigned char)vec_splat_u8(0);
|
||||||
register vector signed short temp0, temp1, temp2, temp3, temp4,
|
register vector signed short temp0, temp1, temp2, temp3, temp4,
|
||||||
temp5, temp6, temp7;
|
temp5, temp6, temp7;
|
||||||
POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1);
|
|
||||||
{
|
{
|
||||||
register const vector signed short vprod1 =(const vector signed short)
|
register const vector signed short vprod1 =(const vector signed short)
|
||||||
{ 1,-1, 1,-1, 1,-1, 1,-1 };
|
{ 1,-1, 1,-1, 1,-1, 1,-1 };
|
||||||
@ -1100,7 +1066,6 @@ POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1);
|
|||||||
vsum = vec_splat(vsum, 3);
|
vsum = vec_splat(vsum, 3);
|
||||||
vec_ste(vsum, 0, &sum);
|
vec_ste(vsum, 0, &sum);
|
||||||
}
|
}
|
||||||
POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff8x8_num, 1);
|
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1319,16 +1284,13 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
|
static int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
|
||||||
POWERPC_PERF_DECLARE(altivec_hadamard8_diff16_num, 1);
|
|
||||||
int score;
|
int score;
|
||||||
POWERPC_PERF_START_COUNT(altivec_hadamard8_diff16_num, 1);
|
|
||||||
score = hadamard8_diff16x8_altivec(s, dst, src, stride, 8);
|
score = hadamard8_diff16x8_altivec(s, dst, src, stride, 8);
|
||||||
if (h==16) {
|
if (h==16) {
|
||||||
dst += 8*stride;
|
dst += 8*stride;
|
||||||
src += 8*stride;
|
src += 8*stride;
|
||||||
score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8);
|
score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8);
|
||||||
}
|
}
|
||||||
POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1);
|
|
||||||
return score;
|
return score;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1358,7 +1320,6 @@ static void vorbis_inverse_coupling_altivec(float *mag, float *ang,
|
|||||||
/* next one assumes that ((line_size % 8) == 0) */
|
/* next one assumes that ((line_size % 8) == 0) */
|
||||||
static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||||
{
|
{
|
||||||
POWERPC_PERF_DECLARE(altivec_avg_pixels8_xy2_num, 1);
|
|
||||||
register int i;
|
register int i;
|
||||||
register vector unsigned char pixelsv1, pixelsv2, pixelsavg;
|
register vector unsigned char pixelsv1, pixelsv2, pixelsavg;
|
||||||
register vector unsigned char blockv, temp1, temp2, blocktemp;
|
register vector unsigned char blockv, temp1, temp2, blocktemp;
|
||||||
@ -1383,7 +1344,6 @@ POWERPC_PERF_DECLARE(altivec_avg_pixels8_xy2_num, 1);
|
|||||||
(vector unsigned short)pixelsv2);
|
(vector unsigned short)pixelsv2);
|
||||||
pixelssum1 = vec_add(pixelssum1, vctwo);
|
pixelssum1 = vec_add(pixelssum1, vctwo);
|
||||||
|
|
||||||
POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1);
|
|
||||||
for (i = 0; i < h ; i++) {
|
for (i = 0; i < h ; i++) {
|
||||||
int rightside = ((unsigned long)block & 0x0000000F);
|
int rightside = ((unsigned long)block & 0x0000000F);
|
||||||
blockv = vec_ld(0, block);
|
blockv = vec_ld(0, block);
|
||||||
@ -1418,8 +1378,6 @@ POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1);
|
|||||||
block += line_size;
|
block += line_size;
|
||||||
pixels += line_size;
|
pixels += line_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
|
void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
|
||||||
|
@ -21,9 +21,6 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "libavcodec/dsputil.h"
|
#include "libavcodec/dsputil.h"
|
||||||
|
|
||||||
#include "dsputil_ppc.h"
|
|
||||||
|
|
||||||
#include "dsputil_altivec.h"
|
#include "dsputil_altivec.h"
|
||||||
|
|
||||||
int mm_flags = 0;
|
int mm_flags = 0;
|
||||||
@ -39,63 +36,6 @@ int mm_support(void)
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if CONFIG_POWERPC_PERF
|
|
||||||
unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
|
|
||||||
/* list below must match enum in dsputil_ppc.h */
|
|
||||||
static unsigned char* perfname[] = {
|
|
||||||
"ff_fft_calc_altivec",
|
|
||||||
"gmc1_altivec",
|
|
||||||
"dct_unquantize_h263_altivec",
|
|
||||||
"fdct_altivec",
|
|
||||||
"idct_add_altivec",
|
|
||||||
"idct_put_altivec",
|
|
||||||
"put_pixels16_altivec",
|
|
||||||
"avg_pixels16_altivec",
|
|
||||||
"avg_pixels8_altivec",
|
|
||||||
"put_pixels8_xy2_altivec",
|
|
||||||
"put_no_rnd_pixels8_xy2_altivec",
|
|
||||||
"put_pixels16_xy2_altivec",
|
|
||||||
"put_no_rnd_pixels16_xy2_altivec",
|
|
||||||
"hadamard8_diff8x8_altivec",
|
|
||||||
"hadamard8_diff16_altivec",
|
|
||||||
"avg_pixels8_xy2_altivec",
|
|
||||||
"clear_blocks_dcbz32_ppc",
|
|
||||||
"clear_blocks_dcbz128_ppc",
|
|
||||||
"put_h264_chroma_mc8_altivec",
|
|
||||||
"avg_h264_chroma_mc8_altivec",
|
|
||||||
"put_h264_qpel16_h_lowpass_altivec",
|
|
||||||
"avg_h264_qpel16_h_lowpass_altivec",
|
|
||||||
"put_h264_qpel16_v_lowpass_altivec",
|
|
||||||
"avg_h264_qpel16_v_lowpass_altivec",
|
|
||||||
"put_h264_qpel16_hv_lowpass_altivec",
|
|
||||||
"avg_h264_qpel16_hv_lowpass_altivec",
|
|
||||||
""
|
|
||||||
};
|
|
||||||
#include <stdio.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if CONFIG_POWERPC_PERF
|
|
||||||
void powerpc_display_perf_report(void)
|
|
||||||
{
|
|
||||||
int i, j;
|
|
||||||
av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
|
|
||||||
for(i = 0 ; i < powerpc_perf_total ; i++) {
|
|
||||||
for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) {
|
|
||||||
if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
|
|
||||||
av_log(NULL, AV_LOG_INFO,
|
|
||||||
" Function \"%s\" (pmc%d):\n\tmin: %"PRIu64"\n\tmax: %"PRIu64"\n\tavg: %1.2lf (%"PRIu64")\n",
|
|
||||||
perfname[i],
|
|
||||||
j+1,
|
|
||||||
perfdata[j][i][powerpc_data_min],
|
|
||||||
perfdata[j][i][powerpc_data_max],
|
|
||||||
(double)perfdata[j][i][powerpc_data_sum] /
|
|
||||||
(double)perfdata[j][i][powerpc_data_num],
|
|
||||||
perfdata[j][i][powerpc_data_num]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif /* CONFIG_POWERPC_PERF */
|
|
||||||
|
|
||||||
/* ***** WARNING ***** WARNING ***** WARNING ***** */
|
/* ***** WARNING ***** WARNING ***** WARNING ***** */
|
||||||
/*
|
/*
|
||||||
clear_blocks_dcbz32_ppc will not work properly on PowerPC processors with a
|
clear_blocks_dcbz32_ppc will not work properly on PowerPC processors with a
|
||||||
@ -118,10 +58,8 @@ and <http://developer.apple.com/technotes/tn/tn2086.html>
|
|||||||
*/
|
*/
|
||||||
static void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
|
static void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
|
||||||
{
|
{
|
||||||
POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1);
|
|
||||||
register int misal = ((unsigned long)blocks & 0x00000010);
|
register int misal = ((unsigned long)blocks & 0x00000010);
|
||||||
register int i = 0;
|
register int i = 0;
|
||||||
POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
|
|
||||||
#if 1
|
#if 1
|
||||||
if (misal) {
|
if (misal) {
|
||||||
((unsigned long*)blocks)[0] = 0L;
|
((unsigned long*)blocks)[0] = 0L;
|
||||||
@ -143,7 +81,6 @@ POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
|
|||||||
#else
|
#else
|
||||||
memset(blocks, 0, sizeof(DCTELEM)*6*64);
|
memset(blocks, 0, sizeof(DCTELEM)*6*64);
|
||||||
#endif
|
#endif
|
||||||
POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* same as above, when dcbzl clear a whole 128B cache line
|
/* same as above, when dcbzl clear a whole 128B cache line
|
||||||
@ -151,10 +88,8 @@ POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1);
|
|||||||
#if HAVE_DCBZL
|
#if HAVE_DCBZL
|
||||||
static void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
|
static void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
|
||||||
{
|
{
|
||||||
POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1);
|
|
||||||
register int misal = ((unsigned long)blocks & 0x0000007f);
|
register int misal = ((unsigned long)blocks & 0x0000007f);
|
||||||
register int i = 0;
|
register int i = 0;
|
||||||
POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
|
|
||||||
#if 1
|
#if 1
|
||||||
if (misal) {
|
if (misal) {
|
||||||
// we could probably also optimize this case,
|
// we could probably also optimize this case,
|
||||||
@ -169,7 +104,6 @@ POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
|
|||||||
#else
|
#else
|
||||||
memset(blocks, 0, sizeof(DCTELEM)*6*64);
|
memset(blocks, 0, sizeof(DCTELEM)*6*64);
|
||||||
#endif
|
#endif
|
||||||
POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1);
|
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
|
static void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
|
||||||
@ -279,19 +213,6 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if CONFIG_POWERPC_PERF
|
|
||||||
{
|
|
||||||
int i, j;
|
|
||||||
for (i = 0 ; i < powerpc_perf_total ; i++) {
|
|
||||||
for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) {
|
|
||||||
perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
|
|
||||||
perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
|
|
||||||
perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;
|
|
||||||
perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif /* CONFIG_POWERPC_PERF */
|
|
||||||
}
|
}
|
||||||
#endif /* HAVE_ALTIVEC */
|
#endif /* HAVE_ALTIVEC */
|
||||||
}
|
}
|
||||||
|
@ -1,154 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
|
|
||||||
*
|
|
||||||
* This file is part of FFmpeg.
|
|
||||||
*
|
|
||||||
* FFmpeg is free software; you can redistribute it and/or
|
|
||||||
* modify it under the terms of the GNU Lesser General Public
|
|
||||||
* License as published by the Free Software Foundation; either
|
|
||||||
* version 2.1 of the License, or (at your option) any later version.
|
|
||||||
*
|
|
||||||
* FFmpeg is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
||||||
* Lesser General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU Lesser General Public
|
|
||||||
* License along with FFmpeg; if not, write to the Free Software
|
|
||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef AVCODEC_PPC_DSPUTIL_PPC_H
|
|
||||||
#define AVCODEC_PPC_DSPUTIL_PPC_H
|
|
||||||
|
|
||||||
#include "config.h"
|
|
||||||
|
|
||||||
#if CONFIG_POWERPC_PERF
|
|
||||||
void powerpc_display_perf_report(void);
|
|
||||||
/* the 604* have 2, the G3* have 4, the G4s have 6,
|
|
||||||
and the G5 are completely different (they MUST use
|
|
||||||
ARCH_PPC64, and let's hope all future 64 bis PPC
|
|
||||||
will use the same PMCs... */
|
|
||||||
#define POWERPC_NUM_PMC_ENABLED 6
|
|
||||||
/* if you add to the enum below, also add to the perfname array
|
|
||||||
in dsputil_ppc.c */
|
|
||||||
enum powerpc_perf_index {
|
|
||||||
altivec_fft_num = 0,
|
|
||||||
altivec_gmc1_num,
|
|
||||||
altivec_dct_unquantize_h263_num,
|
|
||||||
altivec_fdct,
|
|
||||||
altivec_idct_add_num,
|
|
||||||
altivec_idct_put_num,
|
|
||||||
altivec_put_pixels16_num,
|
|
||||||
altivec_avg_pixels16_num,
|
|
||||||
altivec_avg_pixels8_num,
|
|
||||||
altivec_put_pixels8_xy2_num,
|
|
||||||
altivec_put_no_rnd_pixels8_xy2_num,
|
|
||||||
altivec_put_pixels16_xy2_num,
|
|
||||||
altivec_put_no_rnd_pixels16_xy2_num,
|
|
||||||
altivec_hadamard8_diff8x8_num,
|
|
||||||
altivec_hadamard8_diff16_num,
|
|
||||||
altivec_avg_pixels8_xy2_num,
|
|
||||||
powerpc_clear_blocks_dcbz32,
|
|
||||||
powerpc_clear_blocks_dcbz128,
|
|
||||||
altivec_put_h264_chroma_mc8_num,
|
|
||||||
altivec_avg_h264_chroma_mc8_num,
|
|
||||||
altivec_put_h264_qpel16_h_lowpass_num,
|
|
||||||
altivec_avg_h264_qpel16_h_lowpass_num,
|
|
||||||
altivec_put_h264_qpel16_v_lowpass_num,
|
|
||||||
altivec_avg_h264_qpel16_v_lowpass_num,
|
|
||||||
altivec_put_h264_qpel16_hv_lowpass_num,
|
|
||||||
altivec_avg_h264_qpel16_hv_lowpass_num,
|
|
||||||
powerpc_perf_total
|
|
||||||
};
|
|
||||||
enum powerpc_data_index {
|
|
||||||
powerpc_data_min = 0,
|
|
||||||
powerpc_data_max,
|
|
||||||
powerpc_data_sum,
|
|
||||||
powerpc_data_num,
|
|
||||||
powerpc_data_total
|
|
||||||
};
|
|
||||||
extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
|
|
||||||
|
|
||||||
#if !ARCH_PPC64
|
|
||||||
#define POWERP_PMC_DATATYPE unsigned long
|
|
||||||
#define POWERPC_GET_PMC1(a) __asm__ volatile("mfspr %0, 937" : "=r" (a))
|
|
||||||
#define POWERPC_GET_PMC2(a) __asm__ volatile("mfspr %0, 938" : "=r" (a))
|
|
||||||
#if (POWERPC_NUM_PMC_ENABLED > 2)
|
|
||||||
#define POWERPC_GET_PMC3(a) __asm__ volatile("mfspr %0, 941" : "=r" (a))
|
|
||||||
#define POWERPC_GET_PMC4(a) __asm__ volatile("mfspr %0, 942" : "=r" (a))
|
|
||||||
#else
|
|
||||||
#define POWERPC_GET_PMC3(a) do {} while (0)
|
|
||||||
#define POWERPC_GET_PMC4(a) do {} while (0)
|
|
||||||
#endif
|
|
||||||
#if (POWERPC_NUM_PMC_ENABLED > 4)
|
|
||||||
#define POWERPC_GET_PMC5(a) __asm__ volatile("mfspr %0, 929" : "=r" (a))
|
|
||||||
#define POWERPC_GET_PMC6(a) __asm__ volatile("mfspr %0, 930" : "=r" (a))
|
|
||||||
#else
|
|
||||||
#define POWERPC_GET_PMC5(a) do {} while (0)
|
|
||||||
#define POWERPC_GET_PMC6(a) do {} while (0)
|
|
||||||
#endif
|
|
||||||
#else /* ARCH_PPC64 */
|
|
||||||
#define POWERP_PMC_DATATYPE unsigned long long
|
|
||||||
#define POWERPC_GET_PMC1(a) __asm__ volatile("mfspr %0, 771" : "=r" (a))
|
|
||||||
#define POWERPC_GET_PMC2(a) __asm__ volatile("mfspr %0, 772" : "=r" (a))
|
|
||||||
#if (POWERPC_NUM_PMC_ENABLED > 2)
|
|
||||||
#define POWERPC_GET_PMC3(a) __asm__ volatile("mfspr %0, 773" : "=r" (a))
|
|
||||||
#define POWERPC_GET_PMC4(a) __asm__ volatile("mfspr %0, 774" : "=r" (a))
|
|
||||||
#else
|
|
||||||
#define POWERPC_GET_PMC3(a) do {} while (0)
|
|
||||||
#define POWERPC_GET_PMC4(a) do {} while (0)
|
|
||||||
#endif
|
|
||||||
#if (POWERPC_NUM_PMC_ENABLED > 4)
|
|
||||||
#define POWERPC_GET_PMC5(a) __asm__ volatile("mfspr %0, 775" : "=r" (a))
|
|
||||||
#define POWERPC_GET_PMC6(a) __asm__ volatile("mfspr %0, 776" : "=r" (a))
|
|
||||||
#else
|
|
||||||
#define POWERPC_GET_PMC5(a) do {} while (0)
|
|
||||||
#define POWERPC_GET_PMC6(a) do {} while (0)
|
|
||||||
#endif
|
|
||||||
#endif /* ARCH_PPC64 */
|
|
||||||
#define POWERPC_PERF_DECLARE(a, cond) \
|
|
||||||
POWERP_PMC_DATATYPE \
|
|
||||||
pmc_start[POWERPC_NUM_PMC_ENABLED], \
|
|
||||||
pmc_stop[POWERPC_NUM_PMC_ENABLED], \
|
|
||||||
pmc_loop_index;
|
|
||||||
#define POWERPC_PERF_START_COUNT(a, cond) do { \
|
|
||||||
POWERPC_GET_PMC6(pmc_start[5]); \
|
|
||||||
POWERPC_GET_PMC5(pmc_start[4]); \
|
|
||||||
POWERPC_GET_PMC4(pmc_start[3]); \
|
|
||||||
POWERPC_GET_PMC3(pmc_start[2]); \
|
|
||||||
POWERPC_GET_PMC2(pmc_start[1]); \
|
|
||||||
POWERPC_GET_PMC1(pmc_start[0]); \
|
|
||||||
} while (0)
|
|
||||||
#define POWERPC_PERF_STOP_COUNT(a, cond) do { \
|
|
||||||
POWERPC_GET_PMC1(pmc_stop[0]); \
|
|
||||||
POWERPC_GET_PMC2(pmc_stop[1]); \
|
|
||||||
POWERPC_GET_PMC3(pmc_stop[2]); \
|
|
||||||
POWERPC_GET_PMC4(pmc_stop[3]); \
|
|
||||||
POWERPC_GET_PMC5(pmc_stop[4]); \
|
|
||||||
POWERPC_GET_PMC6(pmc_stop[5]); \
|
|
||||||
if (cond) { \
|
|
||||||
for(pmc_loop_index = 0; \
|
|
||||||
pmc_loop_index < POWERPC_NUM_PMC_ENABLED; \
|
|
||||||
pmc_loop_index++) { \
|
|
||||||
if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index]) { \
|
|
||||||
POWERP_PMC_DATATYPE diff = \
|
|
||||||
pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index]; \
|
|
||||||
if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \
|
|
||||||
perfdata[pmc_loop_index][a][powerpc_data_min] = diff; \
|
|
||||||
if (diff > perfdata[pmc_loop_index][a][powerpc_data_max]) \
|
|
||||||
perfdata[pmc_loop_index][a][powerpc_data_max] = diff; \
|
|
||||||
perfdata[pmc_loop_index][a][powerpc_data_sum] += diff; \
|
|
||||||
perfdata[pmc_loop_index][a][powerpc_data_num] ++; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} while (0)
|
|
||||||
#else /* CONFIG_POWERPC_PERF */
|
|
||||||
// those are needed to avoid empty statements.
|
|
||||||
#define POWERPC_PERF_DECLARE(a, cond) int altivec_placeholder __attribute__ ((unused))
|
|
||||||
#define POWERPC_PERF_START_COUNT(a, cond) do {} while (0)
|
|
||||||
#define POWERPC_PERF_STOP_COUNT(a, cond) do {} while (0)
|
|
||||||
#endif /* CONFIG_POWERPC_PERF */
|
|
||||||
|
|
||||||
#endif /* AVCODEC_PPC_DSPUTIL_PPC_H */
|
|
@ -24,7 +24,6 @@
|
|||||||
#endif
|
#endif
|
||||||
#include "libavutil/common.h"
|
#include "libavutil/common.h"
|
||||||
#include "libavcodec/dsputil.h"
|
#include "libavcodec/dsputil.h"
|
||||||
#include "dsputil_ppc.h"
|
|
||||||
#include "dsputil_altivec.h"
|
#include "dsputil_altivec.h"
|
||||||
|
|
||||||
#define vs16(v) ((vector signed short)(v))
|
#define vs16(v) ((vector signed short)(v))
|
||||||
@ -198,7 +197,6 @@ static vector float fdctconsts[3] = {
|
|||||||
|
|
||||||
void fdct_altivec(int16_t *block)
|
void fdct_altivec(int16_t *block)
|
||||||
{
|
{
|
||||||
POWERPC_PERF_DECLARE(altivec_fdct, 1);
|
|
||||||
vector signed short *bp;
|
vector signed short *bp;
|
||||||
vector float *cp;
|
vector float *cp;
|
||||||
vector float b00, b10, b20, b30, b40, b50, b60, b70;
|
vector float b00, b10, b20, b30, b40, b50, b60, b70;
|
||||||
@ -206,9 +204,6 @@ POWERPC_PERF_DECLARE(altivec_fdct, 1);
|
|||||||
vector float mzero, cnst, cnsts0, cnsts1, cnsts2;
|
vector float mzero, cnst, cnsts0, cnsts1, cnsts2;
|
||||||
vector float x0, x1, x2, x3, x4, x5, x6, x7, x8;
|
vector float x0, x1, x2, x3, x4, x5, x6, x7, x8;
|
||||||
|
|
||||||
POWERPC_PERF_START_COUNT(altivec_fdct, 1);
|
|
||||||
|
|
||||||
|
|
||||||
/* setup constants {{{ */
|
/* setup constants {{{ */
|
||||||
/* mzero = -0.0 */
|
/* mzero = -0.0 */
|
||||||
mzero = ((vector float)vec_splat_u32(-1));
|
mzero = ((vector float)vec_splat_u32(-1));
|
||||||
@ -487,8 +482,6 @@ POWERPC_PERF_DECLARE(altivec_fdct, 1);
|
|||||||
|
|
||||||
#undef CTS
|
#undef CTS
|
||||||
/* }}} */
|
/* }}} */
|
||||||
|
|
||||||
POWERPC_PERF_STOP_COUNT(altivec_fdct, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* vim:set foldmethod=marker foldlevel=0: */
|
/* vim:set foldmethod=marker foldlevel=0: */
|
||||||
|
@ -21,7 +21,6 @@
|
|||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
*/
|
*/
|
||||||
#include "libavcodec/fft.h"
|
#include "libavcodec/fft.h"
|
||||||
#include "dsputil_ppc.h"
|
|
||||||
#include "util_altivec.h"
|
#include "util_altivec.h"
|
||||||
#include "dsputil_altivec.h"
|
#include "dsputil_altivec.h"
|
||||||
|
|
||||||
@ -38,7 +37,6 @@
|
|||||||
*/
|
*/
|
||||||
static void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z)
|
static void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z)
|
||||||
{
|
{
|
||||||
POWERPC_PERF_DECLARE(altivec_fft_num, s->nbits >= 6);
|
|
||||||
register const vector float vczero = (const vector float)vec_splat_u32(0.);
|
register const vector float vczero = (const vector float)vec_splat_u32(0.);
|
||||||
|
|
||||||
int ln = s->nbits;
|
int ln = s->nbits;
|
||||||
@ -48,8 +46,6 @@ POWERPC_PERF_DECLARE(altivec_fft_num, s->nbits >= 6);
|
|||||||
FFTComplex *cptr, *cptr1;
|
FFTComplex *cptr, *cptr1;
|
||||||
int k;
|
int k;
|
||||||
|
|
||||||
POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6);
|
|
||||||
|
|
||||||
np = 1 << ln;
|
np = 1 << ln;
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -132,8 +128,6 @@ POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6);
|
|||||||
nblocks = nblocks >> 1;
|
nblocks = nblocks >> 1;
|
||||||
nloops = nloops << 1;
|
nloops = nloops << 1;
|
||||||
} while (nblocks != 0);
|
} while (nblocks != 0);
|
||||||
|
|
||||||
POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
av_cold void ff_fft_init_altivec(FFTContext *s)
|
av_cold void ff_fft_init_altivec(FFTContext *s)
|
||||||
|
@ -21,7 +21,6 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "libavcodec/dsputil.h"
|
#include "libavcodec/dsputil.h"
|
||||||
#include "dsputil_ppc.h"
|
|
||||||
#include "util_altivec.h"
|
#include "util_altivec.h"
|
||||||
#include "types_altivec.h"
|
#include "types_altivec.h"
|
||||||
#include "dsputil_altivec.h"
|
#include "dsputil_altivec.h"
|
||||||
@ -30,10 +29,8 @@
|
|||||||
altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8,
|
altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8,
|
||||||
to preserve proper dst alignment.
|
to preserve proper dst alignment.
|
||||||
*/
|
*/
|
||||||
#define GMC1_PERF_COND (h==8)
|
|
||||||
void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder)
|
void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder)
|
||||||
{
|
{
|
||||||
POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
|
|
||||||
const DECLARE_ALIGNED(16, unsigned short, rounder_a) = rounder;
|
const DECLARE_ALIGNED(16, unsigned short, rounder_a) = rounder;
|
||||||
const DECLARE_ALIGNED(16, unsigned short, ABCD)[8] =
|
const DECLARE_ALIGNED(16, unsigned short, ABCD)[8] =
|
||||||
{
|
{
|
||||||
@ -51,9 +48,6 @@ POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
|
|||||||
unsigned long dst_odd = (unsigned long)dst & 0x0000000F;
|
unsigned long dst_odd = (unsigned long)dst & 0x0000000F;
|
||||||
unsigned long src_really_odd = (unsigned long)src & 0x0000000F;
|
unsigned long src_really_odd = (unsigned long)src & 0x0000000F;
|
||||||
|
|
||||||
|
|
||||||
POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
|
|
||||||
|
|
||||||
tempA = vec_ld(0, (unsigned short*)ABCD);
|
tempA = vec_ld(0, (unsigned short*)ABCD);
|
||||||
Av = vec_splat(tempA, 0);
|
Av = vec_splat(tempA, 0);
|
||||||
Bv = vec_splat(tempA, 1);
|
Bv = vec_splat(tempA, 1);
|
||||||
@ -133,6 +127,4 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
|
|||||||
dst += stride;
|
dst += stride;
|
||||||
src += stride;
|
src += stride;
|
||||||
}
|
}
|
||||||
|
|
||||||
POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
|
|
||||||
}
|
}
|
||||||
|
@ -22,7 +22,6 @@
|
|||||||
#include "libavcodec/h264data.h"
|
#include "libavcodec/h264data.h"
|
||||||
#include "libavcodec/h264dsp.h"
|
#include "libavcodec/h264dsp.h"
|
||||||
|
|
||||||
#include "dsputil_ppc.h"
|
|
||||||
#include "dsputil_altivec.h"
|
#include "dsputil_altivec.h"
|
||||||
#include "util_altivec.h"
|
#include "util_altivec.h"
|
||||||
#include "types_altivec.h"
|
#include "types_altivec.h"
|
||||||
|
@ -77,7 +77,6 @@
|
|||||||
|
|
||||||
static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,
|
static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,
|
||||||
int stride, int h, int x, int y) {
|
int stride, int h, int x, int y) {
|
||||||
POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1);
|
|
||||||
DECLARE_ALIGNED(16, signed int, ABCD)[4] =
|
DECLARE_ALIGNED(16, signed int, ABCD)[4] =
|
||||||
{((8 - x) * (8 - y)),
|
{((8 - x) * (8 - y)),
|
||||||
(( x) * (8 - y)),
|
(( x) * (8 - y)),
|
||||||
@ -103,8 +102,6 @@ static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,
|
|||||||
vec_s16 vsrc2ssH, vsrc3ssH, psum;
|
vec_s16 vsrc2ssH, vsrc3ssH, psum;
|
||||||
vec_u8 vdst, ppsum, vfdst, fsum;
|
vec_u8 vdst, ppsum, vfdst, fsum;
|
||||||
|
|
||||||
POWERPC_PERF_START_COUNT(PREFIX_h264_chroma_mc8_num, 1);
|
|
||||||
|
|
||||||
if (((unsigned long)dst) % 16 == 0) {
|
if (((unsigned long)dst) % 16 == 0) {
|
||||||
fperm = (vec_u8){0x10, 0x11, 0x12, 0x13,
|
fperm = (vec_u8){0x10, 0x11, 0x12, 0x13,
|
||||||
0x14, 0x15, 0x16, 0x17,
|
0x14, 0x15, 0x16, 0x17,
|
||||||
@ -203,7 +200,6 @@ static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* this code assume that stride % 16 == 0 */
|
/* this code assume that stride % 16 == 0 */
|
||||||
@ -295,7 +291,6 @@ static void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, i
|
|||||||
|
|
||||||
/* this code assume stride % 16 == 0 */
|
/* this code assume stride % 16 == 0 */
|
||||||
static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
|
static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
|
||||||
POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_h_lowpass_num, 1);
|
|
||||||
register int i;
|
register int i;
|
||||||
|
|
||||||
LOAD_ZERO;
|
LOAD_ZERO;
|
||||||
@ -323,8 +318,6 @@ static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, i
|
|||||||
|
|
||||||
vec_u8 sum, vdst, fsum;
|
vec_u8 sum, vdst, fsum;
|
||||||
|
|
||||||
POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
|
|
||||||
|
|
||||||
for (i = 0 ; i < 16 ; i ++) {
|
for (i = 0 ; i < 16 ; i ++) {
|
||||||
vec_u8 srcR1 = vec_ld(-2, src);
|
vec_u8 srcR1 = vec_ld(-2, src);
|
||||||
vec_u8 srcR2 = vec_ld(14, src);
|
vec_u8 srcR2 = vec_ld(14, src);
|
||||||
@ -433,13 +426,10 @@ static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, i
|
|||||||
src += srcStride;
|
src += srcStride;
|
||||||
dst += dstStride;
|
dst += dstStride;
|
||||||
}
|
}
|
||||||
POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* this code assume stride % 16 == 0 */
|
/* this code assume stride % 16 == 0 */
|
||||||
static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
|
static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
|
||||||
POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_v_lowpass_num, 1);
|
|
||||||
|
|
||||||
register int i;
|
register int i;
|
||||||
|
|
||||||
LOAD_ZERO;
|
LOAD_ZERO;
|
||||||
@ -490,8 +480,6 @@ static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, i
|
|||||||
|
|
||||||
vec_u8 sum, vdst, fsum, srcP3a, srcP3b, srcP3;
|
vec_u8 sum, vdst, fsum, srcP3a, srcP3b, srcP3;
|
||||||
|
|
||||||
POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);
|
|
||||||
|
|
||||||
for (i = 0 ; i < 16 ; i++) {
|
for (i = 0 ; i < 16 ; i++) {
|
||||||
srcP3a = vec_ld(0, srcbis += srcStride);
|
srcP3a = vec_ld(0, srcbis += srcStride);
|
||||||
srcP3b = vec_ld(16, srcbis);
|
srcP3b = vec_ld(16, srcbis);
|
||||||
@ -544,12 +532,10 @@ static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, i
|
|||||||
|
|
||||||
dst += dstStride;
|
dst += dstStride;
|
||||||
}
|
}
|
||||||
POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* this code assume stride % 16 == 0 *and* tmp is properly aligned */
|
/* this code assume stride % 16 == 0 *and* tmp is properly aligned */
|
||||||
static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp, uint8_t * src, int dstStride, int tmpStride, int srcStride) {
|
static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp, uint8_t * src, int dstStride, int tmpStride, int srcStride) {
|
||||||
POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_hv_lowpass_num, 1);
|
|
||||||
register int i;
|
register int i;
|
||||||
LOAD_ZERO;
|
LOAD_ZERO;
|
||||||
const vec_u8 permM2 = vec_lvsl(-2, src);
|
const vec_u8 permM2 = vec_lvsl(-2, src);
|
||||||
@ -589,7 +575,6 @@ static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp,
|
|||||||
vec_u8 fsum, sumv, sum, vdst;
|
vec_u8 fsum, sumv, sum, vdst;
|
||||||
vec_s16 ssume, ssumo;
|
vec_s16 ssume, ssumo;
|
||||||
|
|
||||||
POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1);
|
|
||||||
src -= (2 * srcStride);
|
src -= (2 * srcStride);
|
||||||
for (i = 0 ; i < 21 ; i ++) {
|
for (i = 0 ; i < 21 ; i ++) {
|
||||||
vec_u8 srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
|
vec_u8 srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
|
||||||
@ -779,5 +764,4 @@ static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp,
|
|||||||
|
|
||||||
dst += dstStride;
|
dst += dstStride;
|
||||||
}
|
}
|
||||||
POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1);
|
|
||||||
}
|
}
|
||||||
|
@ -43,7 +43,6 @@
|
|||||||
#endif
|
#endif
|
||||||
#include "libavcodec/dsputil.h"
|
#include "libavcodec/dsputil.h"
|
||||||
#include "types_altivec.h"
|
#include "types_altivec.h"
|
||||||
#include "dsputil_ppc.h"
|
|
||||||
#include "dsputil_altivec.h"
|
#include "dsputil_altivec.h"
|
||||||
|
|
||||||
#define IDCT_HALF \
|
#define IDCT_HALF \
|
||||||
@ -161,13 +160,9 @@ static const vec_s16 constants[5] = {
|
|||||||
|
|
||||||
void idct_put_altivec(uint8_t* dest, int stride, int16_t *blk)
|
void idct_put_altivec(uint8_t* dest, int stride, int16_t *blk)
|
||||||
{
|
{
|
||||||
POWERPC_PERF_DECLARE(altivec_idct_put_num, 1);
|
|
||||||
vec_s16 *block = (vec_s16*)blk;
|
vec_s16 *block = (vec_s16*)blk;
|
||||||
vec_u8 tmp;
|
vec_u8 tmp;
|
||||||
|
|
||||||
#if CONFIG_POWERPC_PERF
|
|
||||||
POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1);
|
|
||||||
#endif
|
|
||||||
IDCT
|
IDCT
|
||||||
|
|
||||||
#define COPY(dest,src) \
|
#define COPY(dest,src) \
|
||||||
@ -183,13 +178,10 @@ POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1);
|
|||||||
COPY (dest, vx5) dest += stride;
|
COPY (dest, vx5) dest += stride;
|
||||||
COPY (dest, vx6) dest += stride;
|
COPY (dest, vx6) dest += stride;
|
||||||
COPY (dest, vx7)
|
COPY (dest, vx7)
|
||||||
|
|
||||||
POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void idct_add_altivec(uint8_t* dest, int stride, int16_t *blk)
|
void idct_add_altivec(uint8_t* dest, int stride, int16_t *blk)
|
||||||
{
|
{
|
||||||
POWERPC_PERF_DECLARE(altivec_idct_add_num, 1);
|
|
||||||
vec_s16 *block = (vec_s16*)blk;
|
vec_s16 *block = (vec_s16*)blk;
|
||||||
vec_u8 tmp;
|
vec_u8 tmp;
|
||||||
vec_s16 tmp2, tmp3;
|
vec_s16 tmp2, tmp3;
|
||||||
@ -197,10 +189,6 @@ POWERPC_PERF_DECLARE(altivec_idct_add_num, 1);
|
|||||||
vec_u8 perm1;
|
vec_u8 perm1;
|
||||||
vec_u8 p0, p1, p;
|
vec_u8 p0, p1, p;
|
||||||
|
|
||||||
#if CONFIG_POWERPC_PERF
|
|
||||||
POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
IDCT
|
IDCT
|
||||||
|
|
||||||
p0 = vec_lvsl (0, dest);
|
p0 = vec_lvsl (0, dest);
|
||||||
@ -226,7 +214,5 @@ POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1);
|
|||||||
ADD (dest, vx5, perm1) dest += stride;
|
ADD (dest, vx5, perm1) dest += stride;
|
||||||
ADD (dest, vx6, perm0) dest += stride;
|
ADD (dest, vx6, perm0) dest += stride;
|
||||||
ADD (dest, vx7, perm1)
|
ADD (dest, vx7, perm1)
|
||||||
|
|
||||||
POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -26,7 +26,6 @@
|
|||||||
#include "libavcodec/dsputil.h"
|
#include "libavcodec/dsputil.h"
|
||||||
#include "libavcodec/mpegvideo.h"
|
#include "libavcodec/mpegvideo.h"
|
||||||
|
|
||||||
#include "dsputil_ppc.h"
|
|
||||||
#include "util_altivec.h"
|
#include "util_altivec.h"
|
||||||
#include "types_altivec.h"
|
#include "types_altivec.h"
|
||||||
#include "dsputil_altivec.h"
|
#include "dsputil_altivec.h"
|
||||||
@ -479,14 +478,11 @@ static int dct_quantize_altivec(MpegEncContext* s,
|
|||||||
static void dct_unquantize_h263_altivec(MpegEncContext *s,
|
static void dct_unquantize_h263_altivec(MpegEncContext *s,
|
||||||
DCTELEM *block, int n, int qscale)
|
DCTELEM *block, int n, int qscale)
|
||||||
{
|
{
|
||||||
POWERPC_PERF_DECLARE(altivec_dct_unquantize_h263_num, 1);
|
|
||||||
int i, level, qmul, qadd;
|
int i, level, qmul, qadd;
|
||||||
int nCoeffs;
|
int nCoeffs;
|
||||||
|
|
||||||
assert(s->block_last_index[n]>=0);
|
assert(s->block_last_index[n]>=0);
|
||||||
|
|
||||||
POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1);
|
|
||||||
|
|
||||||
qadd = (qscale - 1) | 1;
|
qadd = (qscale - 1) | 1;
|
||||||
qmul = qscale << 1;
|
qmul = qscale << 1;
|
||||||
|
|
||||||
@ -569,7 +565,6 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1);
|
|||||||
block[0] = backup_0;
|
block[0] = backup_0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
POWERPC_PERF_STOP_COUNT(altivec_dct_unquantize_h263_num, nCoeffs == 63);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user