diff --git a/LICENSE b/LICENSE index 286cd9016d..7cb34d7597 100644 --- a/LICENSE +++ b/LICENSE @@ -31,7 +31,3 @@ There are a handful of files under other licensing terms, namely: * The files libavcodec/jfdctfst.c, libavcodec/jfdctint.c, libavcodec/jrevdct.c are taken from libjpeg, see the top of the files for licensing details. - -* The file libavcodec/fdctref.c is copyrighted by the MPEG Software Simulation - Group with all rights reserved. It is only used to create a DCT test program - and not compiled into libavcodec. diff --git a/doc/TODO b/doc/TODO index 6c0a824107..f03270ec13 100644 --- a/doc/TODO +++ b/doc/TODO @@ -81,7 +81,6 @@ unassigned TODO: (unordered) - add support for using mplayers video filters to ffmpeg - H264 encoder - per MB ratecontrol (so VCD and such do work better) -- replace/rewrite libavcodec/fdctref.c - write a script which iteratively changes all functions between always_inline and noinline and benchmarks the result to find the best set of inlined functions - convert all the non SIMD asm into small asm vs. C testcases and submit them to the gcc devels so they can improve gcc - generic audio mixing API diff --git a/libavcodec/Makefile b/libavcodec/Makefile index afa5fac152..5067354da1 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -533,5 +533,4 @@ DIRS = alpha arm bfin mlib ppc ps2 sh4 sparc x86 include $(SUBDIR)../subdir.mak -$(SUBDIR)dct-test$(EXESUF): $(SUBDIR)fdctref.o $(SUBDIR)aandcttab.o -$(SUBDIR)fft-test$(EXESUF): $(SUBDIR)fdctref.o +$(SUBDIR)dct-test$(EXESUF): $(SUBDIR)dctref.o $(SUBDIR)aandcttab.o diff --git a/libavcodec/dct-test.c b/libavcodec/dct-test.c index fef142fe94..48e7c5a091 100644 --- a/libavcodec/dct-test.c +++ b/libavcodec/dct-test.c @@ -46,9 +46,9 @@ void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);}; /* reference fdct/idct */ -void fdct(DCTELEM *block); -void idct(DCTELEM *block); -void init_fdct(void); +void ff_ref_fdct(DCTELEM *block); +void ff_ref_idct(DCTELEM *block); +void ff_ref_dct_init(void); void ff_mmx_idct(DCTELEM *data); void ff_mmxext_idct(DCTELEM *data); @@ -90,57 +90,57 @@ struct algo { static int cpu_flags; struct algo algos[] = { - {"REF-DBL", 0, fdct, fdct, NO_PERM}, - {"FAAN", 0, ff_faandct, fdct, FAAN_SCALE}, - {"FAANI", 1, ff_faanidct, idct, NO_PERM}, - {"IJG-AAN-INT", 0, fdct_ifast, fdct, SCALE_PERM}, - {"IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, NO_PERM}, - {"REF-DBL", 1, idct, idct, NO_PERM}, - {"INT", 1, j_rev_dct, idct, MMX_PERM}, - {"SIMPLE-C", 1, ff_simple_idct, idct, NO_PERM}, + {"REF-DBL", 0, ff_ref_fdct, ff_ref_fdct, NO_PERM}, + {"FAAN", 0, ff_faandct, ff_ref_fdct, FAAN_SCALE}, + {"FAANI", 1, ff_faanidct, ff_ref_idct, NO_PERM}, + {"IJG-AAN-INT", 0, fdct_ifast, ff_ref_fdct, SCALE_PERM}, + {"IJG-LLM-INT", 0, ff_jpeg_fdct_islow, ff_ref_fdct, NO_PERM}, + {"REF-DBL", 1, ff_ref_idct, ff_ref_idct, NO_PERM}, + {"INT", 1, j_rev_dct, ff_ref_idct, MMX_PERM}, + {"SIMPLE-C", 1, ff_simple_idct, ff_ref_idct, NO_PERM}, #if HAVE_MMX - {"MMX", 0, ff_fdct_mmx, fdct, NO_PERM, FF_MM_MMX}, + {"MMX", 0, ff_fdct_mmx, ff_ref_fdct, NO_PERM, FF_MM_MMX}, #if HAVE_MMX2 - {"MMX2", 0, ff_fdct_mmx2, fdct, NO_PERM, FF_MM_MMXEXT}, - {"SSE2", 0, ff_fdct_sse2, fdct, NO_PERM, FF_MM_SSE2}, + {"MMX2", 0, ff_fdct_mmx2, ff_ref_fdct, NO_PERM, FF_MM_MMXEXT}, + {"SSE2", 0, ff_fdct_sse2, ff_ref_fdct, NO_PERM, FF_MM_SSE2}, #endif #if CONFIG_GPL - {"LIBMPEG2-MMX", 1, ff_mmx_idct, idct, MMX_PERM, FF_MM_MMX}, - {"LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, MMX_PERM, FF_MM_MMXEXT}, + {"LIBMPEG2-MMX", 1, ff_mmx_idct, ff_ref_idct, MMX_PERM, FF_MM_MMX}, + {"LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, ff_ref_idct, MMX_PERM, FF_MM_MMXEXT}, #endif - {"SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM, FF_MM_MMX}, - {"XVID-MMX", 1, ff_idct_xvid_mmx, idct, NO_PERM, FF_MM_MMX}, - {"XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, NO_PERM, FF_MM_MMXEXT}, - {"XVID-SSE2", 1, ff_idct_xvid_sse2, idct, SSE2_PERM, FF_MM_SSE2}, + {"SIMPLE-MMX", 1, ff_simple_idct_mmx, ff_ref_idct, MMX_SIMPLE_PERM, FF_MM_MMX}, + {"XVID-MMX", 1, ff_idct_xvid_mmx, ff_ref_idct, NO_PERM, FF_MM_MMX}, + {"XVID-MMX2", 1, ff_idct_xvid_mmx2, ff_ref_idct, NO_PERM, FF_MM_MMXEXT}, + {"XVID-SSE2", 1, ff_idct_xvid_sse2, ff_ref_idct, SSE2_PERM, FF_MM_SSE2}, #endif #if HAVE_ALTIVEC - {"altivecfdct", 0, fdct_altivec, fdct, NO_PERM, FF_MM_ALTIVEC}, + {"altivecfdct", 0, fdct_altivec, ff_ref_fdct, NO_PERM, FF_MM_ALTIVEC}, #endif #if ARCH_BFIN - {"BFINfdct", 0, ff_bfin_fdct, fdct, NO_PERM}, - {"BFINidct", 1, ff_bfin_idct, idct, NO_PERM}, + {"BFINfdct", 0, ff_bfin_fdct, ff_ref_fdct, NO_PERM}, + {"BFINidct", 1, ff_bfin_idct, ff_ref_idct, NO_PERM}, #endif #if ARCH_ARM - {"SIMPLE-ARM", 1, simple_idct_ARM, idct, NO_PERM }, - {"INT-ARM", 1, j_rev_dct_ARM, idct, MMX_PERM }, + {"SIMPLE-ARM", 1, simple_idct_ARM, ff_ref_idct, NO_PERM }, + {"INT-ARM", 1, j_rev_dct_ARM, ff_ref_idct, MMX_PERM }, #if HAVE_ARMV5TE - {"SIMPLE-ARMV5TE", 1, simple_idct_armv5te, idct, NO_PERM }, + {"SIMPLE-ARMV5TE", 1, simple_idct_armv5te, ff_ref_idct, NO_PERM }, #endif #if HAVE_ARMV6 - {"SIMPLE-ARMV6", 1, ff_simple_idct_armv6, idct, MMX_PERM }, + {"SIMPLE-ARMV6", 1, ff_simple_idct_armv6, ff_ref_idct, MMX_PERM }, #endif #if HAVE_NEON - {"SIMPLE-NEON", 1, ff_simple_idct_neon, idct, PARTTRANS_PERM }, + {"SIMPLE-NEON", 1, ff_simple_idct_neon, ff_ref_idct, PARTTRANS_PERM }, #endif #endif /* ARCH_ARM */ #if ARCH_ALPHA - {"SIMPLE-ALPHA", 1, ff_simple_idct_axp, idct, NO_PERM }, + {"SIMPLE-ALPHA", 1, ff_simple_idct_axp, ff_ref_idct, NO_PERM }, #endif { 0 } @@ -222,7 +222,7 @@ void dct_error(const char *name, int is_idct, for(i=0;i<64;i++) block1[i] = (random() % 512) -256; if (is_idct){ - fdct(block1); + ff_ref_fdct(block1); for(i=0;i<64;i++) block1[i]>>=3; @@ -336,7 +336,7 @@ void dct_error(const char *name, int is_idct, for(i=0;i<64;i++) block1[i] = (random() % 512) -256; if (is_idct){ - fdct(block1); + ff_ref_fdct(block1); for(i=0;i<64;i++) block1[i]>>=3; @@ -559,7 +559,7 @@ int main(int argc, char **argv) int test=1; cpu_flags = mm_support(); - init_fdct(); + ff_ref_dct_init(); idct_mmx_init(); for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; diff --git a/libavcodec/dctref.c b/libavcodec/dctref.c new file mode 100644 index 0000000000..faad057a9a --- /dev/null +++ b/libavcodec/dctref.c @@ -0,0 +1,121 @@ +/* + * reference discrete cosine transform (double precision) + * Copyright (C) 2009 Dylan Yudaken + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file libavcodec/dctref.c + * reference discrete cosine transform (double precision) + * + * @author Dylan Yudaken (dyudaken at gmail) + * + * @note This file could be optimized a lot, but is for + * reference and so readability is better. + */ + +#include "libavutil/mathematics.h" +static double coefficients[8 * 8]; + +/** + * Initialize the double precision discrete cosine transform + * functions fdct & idct. + */ +av_cold void ff_ref_dct_init(void) +{ + unsigned int i, j; + + for (j = 0; j < 8; ++j) { + coefficients[j] = sqrt(0.125); + for (i = 8; i < 64; i += 8) { + coefficients[i + j] = 0.5 * cos(i * (j + 0.5) * M_PI / 64.0); + } + } +} + +/** + * Transform 8x8 block of data with a double precision forward DCT
+ * This is a reference implementation. + * + * @param block pointer to 8x8 block of data to transform + */ +void ff_ref_fdct(short *block) +{ + /* implement the equation: block = coefficients * block * coefficients' */ + + unsigned int i, j, k; + double out[8 * 8]; + + /* out = coefficients * block */ + for (i = 0; i < 64; i += 8) { + for (j = 0; j < 8; ++j) { + double tmp = 0; + for (k = 0; k < 8; ++k) { + tmp += coefficients[i + k] * block[k * 8 + j]; + } + out[i + j] = tmp * 8; + } + } + + /* block = out * (coefficients') */ + for (j = 0; j < 8; ++j) { + for (i = 0; i < 64; i += 8) { + double tmp = 0; + for (k = 0; k < 8; ++k) { + tmp += out[i + k] * coefficients[j * 8 + k]; + } + block[i + j] = floor(tmp + 0.499999999999); + } + } +} + +/** + * Transform 8x8 block of data with a double precision inverse DCT
+ * This is a reference implementation. + * + * @param block pointer to 8x8 block of data to transform + */ +void ff_ref_idct(short *block) +{ + /* implement the equation: block = (coefficients') * block * coefficients */ + + unsigned int i, j, k; + double out[8 * 8]; + + /* out = block * coefficients */ + for (i = 0; i < 64; i += 8) { + for (j = 0; j < 8; ++j) { + double tmp = 0; + for (k = 0; k < 8; ++k) { + tmp += block[i + k] * coefficients[k * 8 + j]; + } + out[i + j] = tmp; + } + } + + /* block = (coefficients') * out */ + for (i = 0; i < 8; ++i) { + for (j = 0; j < 8; ++j) { + double tmp = 0; + for (k = 0; k < 64; k += 8) { + tmp += coefficients[k + i] * out[k + j]; + } + block[i * 8 + j] = floor(tmp + 0.5); + } + } +} diff --git a/libavcodec/fdctref.c b/libavcodec/fdctref.c deleted file mode 100644 index 164883dcbc..0000000000 --- a/libavcodec/fdctref.c +++ /dev/null @@ -1,157 +0,0 @@ -/** - * @file libavcodec/fdctref.c - * forward discrete cosine transform, double precision. - */ - -/* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */ - -/* - * Disclaimer of Warranty - * - * These software programs are available to the user without any license fee or - * royalty on an "as is" basis. The MPEG Software Simulation Group disclaims - * any and all warranties, whether express, implied, or statuary, including any - * implied warranties or merchantability or of fitness for a particular - * purpose. In no event shall the copyright-holder be liable for any - * incidental, punitive, or consequential damages of any kind whatsoever - * arising from the use of these programs. - * - * This disclaimer of warranty extends to the user of these programs and user's - * customers, employees, agents, transferees, successors, and assigns. - * - * The MPEG Software Simulation Group does not represent or warrant that the - * programs furnished hereunder are free of infringement of any third-party - * patents. - * - * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware, - * are subject to royalty fees to patent holders. Many of these patents are - * general enough such that they are unavoidable regardless of implementation - * design. - */ - -#include - -#ifndef PI -# ifdef M_PI -# define PI M_PI -# else -# define PI 3.14159265358979323846 -# endif -#endif - -/* global declarations */ -void init_fdct (void); -void fdct (short *block); - -/* private data */ -static double c[8][8]; /* transform coefficients */ - -void init_fdct(void) -{ - int i, j; - double s; - - for (i=0; i<8; i++) - { - s = (i==0) ? sqrt(0.125) : 0.5; - - for (j=0; j<8; j++) - c[i][j] = s * cos((PI/8.0)*i*(j+0.5)); - } -} - -void fdct(block) -short *block; -{ - register int i, j; - double s; - double tmp[64]; - - for(i = 0; i < 8; i++) - for(j = 0; j < 8; j++) - { - s = 0.0; - -/* - * for(k = 0; k < 8; k++) - * s += c[j][k] * block[8 * i + k]; - */ - s += c[j][0] * block[8 * i + 0]; - s += c[j][1] * block[8 * i + 1]; - s += c[j][2] * block[8 * i + 2]; - s += c[j][3] * block[8 * i + 3]; - s += c[j][4] * block[8 * i + 4]; - s += c[j][5] * block[8 * i + 5]; - s += c[j][6] * block[8 * i + 6]; - s += c[j][7] * block[8 * i + 7]; - - tmp[8 * i + j] = s; - } - - for(j = 0; j < 8; j++) - for(i = 0; i < 8; i++) - { - s = 0.0; - -/* - * for(k = 0; k < 8; k++) - * s += c[i][k] * tmp[8 * k + j]; - */ - s += c[i][0] * tmp[8 * 0 + j]; - s += c[i][1] * tmp[8 * 1 + j]; - s += c[i][2] * tmp[8 * 2 + j]; - s += c[i][3] * tmp[8 * 3 + j]; - s += c[i][4] * tmp[8 * 4 + j]; - s += c[i][5] * tmp[8 * 5 + j]; - s += c[i][6] * tmp[8 * 6 + j]; - s += c[i][7] * tmp[8 * 7 + j]; - s*=8.0; - - block[8 * i + j] = (short)floor(s + 0.499999); -/* - * reason for adding 0.499999 instead of 0.5: - * s is quite often x.5 (at least for i and/or j = 0 or 4) - * and setting the rounding threshold exactly to 0.5 leads to an - * extremely high arithmetic implementation dependency of the result; - * s being between x.5 and x.500001 (which is now incorrectly rounded - * downwards instead of upwards) is assumed to occur less often - * (if at all) - */ - } -} - -/* perform IDCT matrix multiply for 8x8 coefficient block */ - -void idct(block) -short *block; -{ - int i, j, k, v; - double partial_product; - double tmp[64]; - - for (i=0; i<8; i++) - for (j=0; j<8; j++) - { - partial_product = 0.0; - - for (k=0; k<8; k++) - partial_product+= c[k][j]*block[8*i+k]; - - tmp[8*i+j] = partial_product; - } - - /* Transpose operation is integrated into address mapping by switching - loop order of i and j */ - - for (j=0; j<8; j++) - for (i=0; i<8; i++) - { - partial_product = 0.0; - - for (k=0; k<8; k++) - partial_product+= c[k][i]*tmp[8*k+j]; - - v = (int) floor(partial_product+0.5); - block[8*i+j] = v; - } -}