mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-03 05:10:03 +02:00
Merge replacement of MPEG group reference DCT code.
Originally committed as revision 18492 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
This commit is contained in:
parent
266f6af570
commit
b0b57fa13b
4
LICENSE
4
LICENSE
@ -31,7 +31,3 @@ There are a handful of files under other licensing terms, namely:
|
||||
|
||||
* The files libavcodec/jfdctfst.c, libavcodec/jfdctint.c, libavcodec/jrevdct.c
|
||||
are taken from libjpeg, see the top of the files for licensing details.
|
||||
|
||||
* The file libavcodec/fdctref.c is copyrighted by the MPEG Software Simulation
|
||||
Group with all rights reserved. It is only used to create a DCT test program
|
||||
and not compiled into libavcodec.
|
||||
|
1
doc/TODO
1
doc/TODO
@ -81,7 +81,6 @@ unassigned TODO: (unordered)
|
||||
- add support for using mplayers video filters to ffmpeg
|
||||
- H264 encoder
|
||||
- per MB ratecontrol (so VCD and such do work better)
|
||||
- replace/rewrite libavcodec/fdctref.c
|
||||
- write a script which iteratively changes all functions between always_inline and noinline and benchmarks the result to find the best set of inlined functions
|
||||
- convert all the non SIMD asm into small asm vs. C testcases and submit them to the gcc devels so they can improve gcc
|
||||
- generic audio mixing API
|
||||
|
@ -533,5 +533,4 @@ DIRS = alpha arm bfin mlib ppc ps2 sh4 sparc x86
|
||||
|
||||
include $(SUBDIR)../subdir.mak
|
||||
|
||||
$(SUBDIR)dct-test$(EXESUF): $(SUBDIR)fdctref.o $(SUBDIR)aandcttab.o
|
||||
$(SUBDIR)fft-test$(EXESUF): $(SUBDIR)fdctref.o
|
||||
$(SUBDIR)dct-test$(EXESUF): $(SUBDIR)dctref.o $(SUBDIR)aandcttab.o
|
||||
|
@ -46,9 +46,9 @@
|
||||
void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
|
||||
|
||||
/* reference fdct/idct */
|
||||
void fdct(DCTELEM *block);
|
||||
void idct(DCTELEM *block);
|
||||
void init_fdct(void);
|
||||
void ff_ref_fdct(DCTELEM *block);
|
||||
void ff_ref_idct(DCTELEM *block);
|
||||
void ff_ref_dct_init(void);
|
||||
|
||||
void ff_mmx_idct(DCTELEM *data);
|
||||
void ff_mmxext_idct(DCTELEM *data);
|
||||
@ -90,57 +90,57 @@ struct algo {
|
||||
static int cpu_flags;
|
||||
|
||||
struct algo algos[] = {
|
||||
{"REF-DBL", 0, fdct, fdct, NO_PERM},
|
||||
{"FAAN", 0, ff_faandct, fdct, FAAN_SCALE},
|
||||
{"FAANI", 1, ff_faanidct, idct, NO_PERM},
|
||||
{"IJG-AAN-INT", 0, fdct_ifast, fdct, SCALE_PERM},
|
||||
{"IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, NO_PERM},
|
||||
{"REF-DBL", 1, idct, idct, NO_PERM},
|
||||
{"INT", 1, j_rev_dct, idct, MMX_PERM},
|
||||
{"SIMPLE-C", 1, ff_simple_idct, idct, NO_PERM},
|
||||
{"REF-DBL", 0, ff_ref_fdct, ff_ref_fdct, NO_PERM},
|
||||
{"FAAN", 0, ff_faandct, ff_ref_fdct, FAAN_SCALE},
|
||||
{"FAANI", 1, ff_faanidct, ff_ref_idct, NO_PERM},
|
||||
{"IJG-AAN-INT", 0, fdct_ifast, ff_ref_fdct, SCALE_PERM},
|
||||
{"IJG-LLM-INT", 0, ff_jpeg_fdct_islow, ff_ref_fdct, NO_PERM},
|
||||
{"REF-DBL", 1, ff_ref_idct, ff_ref_idct, NO_PERM},
|
||||
{"INT", 1, j_rev_dct, ff_ref_idct, MMX_PERM},
|
||||
{"SIMPLE-C", 1, ff_simple_idct, ff_ref_idct, NO_PERM},
|
||||
|
||||
#if HAVE_MMX
|
||||
{"MMX", 0, ff_fdct_mmx, fdct, NO_PERM, FF_MM_MMX},
|
||||
{"MMX", 0, ff_fdct_mmx, ff_ref_fdct, NO_PERM, FF_MM_MMX},
|
||||
#if HAVE_MMX2
|
||||
{"MMX2", 0, ff_fdct_mmx2, fdct, NO_PERM, FF_MM_MMXEXT},
|
||||
{"SSE2", 0, ff_fdct_sse2, fdct, NO_PERM, FF_MM_SSE2},
|
||||
{"MMX2", 0, ff_fdct_mmx2, ff_ref_fdct, NO_PERM, FF_MM_MMXEXT},
|
||||
{"SSE2", 0, ff_fdct_sse2, ff_ref_fdct, NO_PERM, FF_MM_SSE2},
|
||||
#endif
|
||||
|
||||
#if CONFIG_GPL
|
||||
{"LIBMPEG2-MMX", 1, ff_mmx_idct, idct, MMX_PERM, FF_MM_MMX},
|
||||
{"LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, MMX_PERM, FF_MM_MMXEXT},
|
||||
{"LIBMPEG2-MMX", 1, ff_mmx_idct, ff_ref_idct, MMX_PERM, FF_MM_MMX},
|
||||
{"LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, ff_ref_idct, MMX_PERM, FF_MM_MMXEXT},
|
||||
#endif
|
||||
{"SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM, FF_MM_MMX},
|
||||
{"XVID-MMX", 1, ff_idct_xvid_mmx, idct, NO_PERM, FF_MM_MMX},
|
||||
{"XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, NO_PERM, FF_MM_MMXEXT},
|
||||
{"XVID-SSE2", 1, ff_idct_xvid_sse2, idct, SSE2_PERM, FF_MM_SSE2},
|
||||
{"SIMPLE-MMX", 1, ff_simple_idct_mmx, ff_ref_idct, MMX_SIMPLE_PERM, FF_MM_MMX},
|
||||
{"XVID-MMX", 1, ff_idct_xvid_mmx, ff_ref_idct, NO_PERM, FF_MM_MMX},
|
||||
{"XVID-MMX2", 1, ff_idct_xvid_mmx2, ff_ref_idct, NO_PERM, FF_MM_MMXEXT},
|
||||
{"XVID-SSE2", 1, ff_idct_xvid_sse2, ff_ref_idct, SSE2_PERM, FF_MM_SSE2},
|
||||
#endif
|
||||
|
||||
#if HAVE_ALTIVEC
|
||||
{"altivecfdct", 0, fdct_altivec, fdct, NO_PERM, FF_MM_ALTIVEC},
|
||||
{"altivecfdct", 0, fdct_altivec, ff_ref_fdct, NO_PERM, FF_MM_ALTIVEC},
|
||||
#endif
|
||||
|
||||
#if ARCH_BFIN
|
||||
{"BFINfdct", 0, ff_bfin_fdct, fdct, NO_PERM},
|
||||
{"BFINidct", 1, ff_bfin_idct, idct, NO_PERM},
|
||||
{"BFINfdct", 0, ff_bfin_fdct, ff_ref_fdct, NO_PERM},
|
||||
{"BFINidct", 1, ff_bfin_idct, ff_ref_idct, NO_PERM},
|
||||
#endif
|
||||
|
||||
#if ARCH_ARM
|
||||
{"SIMPLE-ARM", 1, simple_idct_ARM, idct, NO_PERM },
|
||||
{"INT-ARM", 1, j_rev_dct_ARM, idct, MMX_PERM },
|
||||
{"SIMPLE-ARM", 1, simple_idct_ARM, ff_ref_idct, NO_PERM },
|
||||
{"INT-ARM", 1, j_rev_dct_ARM, ff_ref_idct, MMX_PERM },
|
||||
#if HAVE_ARMV5TE
|
||||
{"SIMPLE-ARMV5TE", 1, simple_idct_armv5te, idct, NO_PERM },
|
||||
{"SIMPLE-ARMV5TE", 1, simple_idct_armv5te, ff_ref_idct, NO_PERM },
|
||||
#endif
|
||||
#if HAVE_ARMV6
|
||||
{"SIMPLE-ARMV6", 1, ff_simple_idct_armv6, idct, MMX_PERM },
|
||||
{"SIMPLE-ARMV6", 1, ff_simple_idct_armv6, ff_ref_idct, MMX_PERM },
|
||||
#endif
|
||||
#if HAVE_NEON
|
||||
{"SIMPLE-NEON", 1, ff_simple_idct_neon, idct, PARTTRANS_PERM },
|
||||
{"SIMPLE-NEON", 1, ff_simple_idct_neon, ff_ref_idct, PARTTRANS_PERM },
|
||||
#endif
|
||||
#endif /* ARCH_ARM */
|
||||
|
||||
#if ARCH_ALPHA
|
||||
{"SIMPLE-ALPHA", 1, ff_simple_idct_axp, idct, NO_PERM },
|
||||
{"SIMPLE-ALPHA", 1, ff_simple_idct_axp, ff_ref_idct, NO_PERM },
|
||||
#endif
|
||||
|
||||
{ 0 }
|
||||
@ -222,7 +222,7 @@ void dct_error(const char *name, int is_idct,
|
||||
for(i=0;i<64;i++)
|
||||
block1[i] = (random() % 512) -256;
|
||||
if (is_idct){
|
||||
fdct(block1);
|
||||
ff_ref_fdct(block1);
|
||||
|
||||
for(i=0;i<64;i++)
|
||||
block1[i]>>=3;
|
||||
@ -336,7 +336,7 @@ void dct_error(const char *name, int is_idct,
|
||||
for(i=0;i<64;i++)
|
||||
block1[i] = (random() % 512) -256;
|
||||
if (is_idct){
|
||||
fdct(block1);
|
||||
ff_ref_fdct(block1);
|
||||
|
||||
for(i=0;i<64;i++)
|
||||
block1[i]>>=3;
|
||||
@ -559,7 +559,7 @@ int main(int argc, char **argv)
|
||||
int test=1;
|
||||
cpu_flags = mm_support();
|
||||
|
||||
init_fdct();
|
||||
ff_ref_dct_init();
|
||||
idct_mmx_init();
|
||||
|
||||
for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
|
||||
|
121
libavcodec/dctref.c
Normal file
121
libavcodec/dctref.c
Normal file
@ -0,0 +1,121 @@
|
||||
/*
|
||||
* reference discrete cosine transform (double precision)
|
||||
* Copyright (C) 2009 Dylan Yudaken
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file libavcodec/dctref.c
|
||||
* reference discrete cosine transform (double precision)
|
||||
*
|
||||
* @author Dylan Yudaken (dyudaken at gmail)
|
||||
*
|
||||
* @note This file could be optimized a lot, but is for
|
||||
* reference and so readability is better.
|
||||
*/
|
||||
|
||||
#include "libavutil/mathematics.h"
|
||||
static double coefficients[8 * 8];
|
||||
|
||||
/**
|
||||
* Initialize the double precision discrete cosine transform
|
||||
* functions fdct & idct.
|
||||
*/
|
||||
av_cold void ff_ref_dct_init(void)
|
||||
{
|
||||
unsigned int i, j;
|
||||
|
||||
for (j = 0; j < 8; ++j) {
|
||||
coefficients[j] = sqrt(0.125);
|
||||
for (i = 8; i < 64; i += 8) {
|
||||
coefficients[i + j] = 0.5 * cos(i * (j + 0.5) * M_PI / 64.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Transform 8x8 block of data with a double precision forward DCT <br>
|
||||
* This is a reference implementation.
|
||||
*
|
||||
* @param block pointer to 8x8 block of data to transform
|
||||
*/
|
||||
void ff_ref_fdct(short *block)
|
||||
{
|
||||
/* implement the equation: block = coefficients * block * coefficients' */
|
||||
|
||||
unsigned int i, j, k;
|
||||
double out[8 * 8];
|
||||
|
||||
/* out = coefficients * block */
|
||||
for (i = 0; i < 64; i += 8) {
|
||||
for (j = 0; j < 8; ++j) {
|
||||
double tmp = 0;
|
||||
for (k = 0; k < 8; ++k) {
|
||||
tmp += coefficients[i + k] * block[k * 8 + j];
|
||||
}
|
||||
out[i + j] = tmp * 8;
|
||||
}
|
||||
}
|
||||
|
||||
/* block = out * (coefficients') */
|
||||
for (j = 0; j < 8; ++j) {
|
||||
for (i = 0; i < 64; i += 8) {
|
||||
double tmp = 0;
|
||||
for (k = 0; k < 8; ++k) {
|
||||
tmp += out[i + k] * coefficients[j * 8 + k];
|
||||
}
|
||||
block[i + j] = floor(tmp + 0.499999999999);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Transform 8x8 block of data with a double precision inverse DCT <br>
|
||||
* This is a reference implementation.
|
||||
*
|
||||
* @param block pointer to 8x8 block of data to transform
|
||||
*/
|
||||
void ff_ref_idct(short *block)
|
||||
{
|
||||
/* implement the equation: block = (coefficients') * block * coefficients */
|
||||
|
||||
unsigned int i, j, k;
|
||||
double out[8 * 8];
|
||||
|
||||
/* out = block * coefficients */
|
||||
for (i = 0; i < 64; i += 8) {
|
||||
for (j = 0; j < 8; ++j) {
|
||||
double tmp = 0;
|
||||
for (k = 0; k < 8; ++k) {
|
||||
tmp += block[i + k] * coefficients[k * 8 + j];
|
||||
}
|
||||
out[i + j] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
/* block = (coefficients') * out */
|
||||
for (i = 0; i < 8; ++i) {
|
||||
for (j = 0; j < 8; ++j) {
|
||||
double tmp = 0;
|
||||
for (k = 0; k < 64; k += 8) {
|
||||
tmp += coefficients[k + i] * out[k + j];
|
||||
}
|
||||
block[i * 8 + j] = floor(tmp + 0.5);
|
||||
}
|
||||
}
|
||||
}
|
@ -1,157 +0,0 @@
|
||||
/**
|
||||
* @file libavcodec/fdctref.c
|
||||
* forward discrete cosine transform, double precision.
|
||||
*/
|
||||
|
||||
/* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
|
||||
|
||||
/*
|
||||
* Disclaimer of Warranty
|
||||
*
|
||||
* These software programs are available to the user without any license fee or
|
||||
* royalty on an "as is" basis. The MPEG Software Simulation Group disclaims
|
||||
* any and all warranties, whether express, implied, or statuary, including any
|
||||
* implied warranties or merchantability or of fitness for a particular
|
||||
* purpose. In no event shall the copyright-holder be liable for any
|
||||
* incidental, punitive, or consequential damages of any kind whatsoever
|
||||
* arising from the use of these programs.
|
||||
*
|
||||
* This disclaimer of warranty extends to the user of these programs and user's
|
||||
* customers, employees, agents, transferees, successors, and assigns.
|
||||
*
|
||||
* The MPEG Software Simulation Group does not represent or warrant that the
|
||||
* programs furnished hereunder are free of infringement of any third-party
|
||||
* patents.
|
||||
*
|
||||
* Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
|
||||
* are subject to royalty fees to patent holders. Many of these patents are
|
||||
* general enough such that they are unavoidable regardless of implementation
|
||||
* design.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#ifndef PI
|
||||
# ifdef M_PI
|
||||
# define PI M_PI
|
||||
# else
|
||||
# define PI 3.14159265358979323846
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* global declarations */
|
||||
void init_fdct (void);
|
||||
void fdct (short *block);
|
||||
|
||||
/* private data */
|
||||
static double c[8][8]; /* transform coefficients */
|
||||
|
||||
void init_fdct(void)
|
||||
{
|
||||
int i, j;
|
||||
double s;
|
||||
|
||||
for (i=0; i<8; i++)
|
||||
{
|
||||
s = (i==0) ? sqrt(0.125) : 0.5;
|
||||
|
||||
for (j=0; j<8; j++)
|
||||
c[i][j] = s * cos((PI/8.0)*i*(j+0.5));
|
||||
}
|
||||
}
|
||||
|
||||
void fdct(block)
|
||||
short *block;
|
||||
{
|
||||
register int i, j;
|
||||
double s;
|
||||
double tmp[64];
|
||||
|
||||
for(i = 0; i < 8; i++)
|
||||
for(j = 0; j < 8; j++)
|
||||
{
|
||||
s = 0.0;
|
||||
|
||||
/*
|
||||
* for(k = 0; k < 8; k++)
|
||||
* s += c[j][k] * block[8 * i + k];
|
||||
*/
|
||||
s += c[j][0] * block[8 * i + 0];
|
||||
s += c[j][1] * block[8 * i + 1];
|
||||
s += c[j][2] * block[8 * i + 2];
|
||||
s += c[j][3] * block[8 * i + 3];
|
||||
s += c[j][4] * block[8 * i + 4];
|
||||
s += c[j][5] * block[8 * i + 5];
|
||||
s += c[j][6] * block[8 * i + 6];
|
||||
s += c[j][7] * block[8 * i + 7];
|
||||
|
||||
tmp[8 * i + j] = s;
|
||||
}
|
||||
|
||||
for(j = 0; j < 8; j++)
|
||||
for(i = 0; i < 8; i++)
|
||||
{
|
||||
s = 0.0;
|
||||
|
||||
/*
|
||||
* for(k = 0; k < 8; k++)
|
||||
* s += c[i][k] * tmp[8 * k + j];
|
||||
*/
|
||||
s += c[i][0] * tmp[8 * 0 + j];
|
||||
s += c[i][1] * tmp[8 * 1 + j];
|
||||
s += c[i][2] * tmp[8 * 2 + j];
|
||||
s += c[i][3] * tmp[8 * 3 + j];
|
||||
s += c[i][4] * tmp[8 * 4 + j];
|
||||
s += c[i][5] * tmp[8 * 5 + j];
|
||||
s += c[i][6] * tmp[8 * 6 + j];
|
||||
s += c[i][7] * tmp[8 * 7 + j];
|
||||
s*=8.0;
|
||||
|
||||
block[8 * i + j] = (short)floor(s + 0.499999);
|
||||
/*
|
||||
* reason for adding 0.499999 instead of 0.5:
|
||||
* s is quite often x.5 (at least for i and/or j = 0 or 4)
|
||||
* and setting the rounding threshold exactly to 0.5 leads to an
|
||||
* extremely high arithmetic implementation dependency of the result;
|
||||
* s being between x.5 and x.500001 (which is now incorrectly rounded
|
||||
* downwards instead of upwards) is assumed to occur less often
|
||||
* (if at all)
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
/* perform IDCT matrix multiply for 8x8 coefficient block */
|
||||
|
||||
void idct(block)
|
||||
short *block;
|
||||
{
|
||||
int i, j, k, v;
|
||||
double partial_product;
|
||||
double tmp[64];
|
||||
|
||||
for (i=0; i<8; i++)
|
||||
for (j=0; j<8; j++)
|
||||
{
|
||||
partial_product = 0.0;
|
||||
|
||||
for (k=0; k<8; k++)
|
||||
partial_product+= c[k][j]*block[8*i+k];
|
||||
|
||||
tmp[8*i+j] = partial_product;
|
||||
}
|
||||
|
||||
/* Transpose operation is integrated into address mapping by switching
|
||||
loop order of i and j */
|
||||
|
||||
for (j=0; j<8; j++)
|
||||
for (i=0; i<8; i++)
|
||||
{
|
||||
partial_product = 0.0;
|
||||
|
||||
for (k=0; k<8; k++)
|
||||
partial_product+= c[k][i]*tmp[8*k+j];
|
||||
|
||||
v = (int) floor(partial_product+0.5);
|
||||
block[8*i+j] = v;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user