Merge replacement of MPEG group reference DCT code.

Originally committed as revision 18492 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
2025-01-03 05:10:03 +02:00 · 2009-04-13 10:23:10 +00:00 · 2009-04-13 10:23:10 +00:00 · b0b57fa13b
commit b0b57fa13b
parent 266f6af570
6 changed files with 154 additions and 196 deletions
--- a/4
+++ b/4
@ -31,7 +31,3 @@ There are a handful of files under other licensing terms, namely:

 * The files libavcodec/jfdctfst.c, libavcodec/jfdctint.c, libavcodec/jrevdct.c
  are taken from libjpeg, see the top of the files for licensing details.
-
-* The file libavcodec/fdctref.c is copyrighted by the MPEG Software Simulation
-  Group with all rights reserved. It is only used to create a DCT test program
-  and not compiled into libavcodec.
--- a/doc/TODO
+++ b/doc/TODO
@ -81,7 +81,6 @@ unassigned TODO: (unordered)
 - add support for using mplayers video filters to ffmpeg
 - H264 encoder
 - per MB ratecontrol (so VCD and such do work better)
- replace/rewrite libavcodec/fdctref.c
 - write a script which iteratively changes all functions between always_inline and noinline and benchmarks the result to find the best set of inlined functions
 - convert all the non SIMD asm into small asm vs. C testcases and submit them to the gcc devels so they can improve gcc
 - generic audio mixing API
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@ -533,5 +533,4 @@ DIRS = alpha arm bfin mlib ppc ps2 sh4 sparc x86

 include $(SUBDIR)../subdir.mak

-$(SUBDIR)dct-test$(EXESUF): $(SUBDIR)fdctref.o $(SUBDIR)aandcttab.o
-$(SUBDIR)fft-test$(EXESUF): $(SUBDIR)fdctref.o
+$(SUBDIR)dct-test$(EXESUF): $(SUBDIR)dctref.o $(SUBDIR)aandcttab.o
--- a/libavcodec/dct-test.c
+++ b/libavcodec/dct-test.c
@ -46,9 +46,9 @@
 void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};

 /* reference fdct/idct */
-void fdct(DCTELEM *block);
-void idct(DCTELEM *block);
-void init_fdct(void);
+void ff_ref_fdct(DCTELEM *block);
+void ff_ref_idct(DCTELEM *block);
+void ff_ref_dct_init(void);

 void ff_mmx_idct(DCTELEM *data);
 void ff_mmxext_idct(DCTELEM *data);
@ -90,57 +90,57 @@ struct algo {
 static int cpu_flags;

 struct algo algos[] = {
-  {"REF-DBL",         0, fdct,               fdct, NO_PERM},
-  {"FAAN",            0, ff_faandct,         fdct, FAAN_SCALE},
-  {"FAANI",           1, ff_faanidct,        idct, NO_PERM},
-  {"IJG-AAN-INT",     0, fdct_ifast,         fdct, SCALE_PERM},
-  {"IJG-LLM-INT",     0, ff_jpeg_fdct_islow, fdct, NO_PERM},
-  {"REF-DBL",         1, idct,               idct, NO_PERM},
-  {"INT",             1, j_rev_dct,          idct, MMX_PERM},
-  {"SIMPLE-C",        1, ff_simple_idct,     idct, NO_PERM},
+  {"REF-DBL",         0, ff_ref_fdct,        ff_ref_fdct, NO_PERM},
+  {"FAAN",            0, ff_faandct,         ff_ref_fdct, FAAN_SCALE},
+  {"FAANI",           1, ff_faanidct,        ff_ref_idct, NO_PERM},
+  {"IJG-AAN-INT",     0, fdct_ifast,         ff_ref_fdct, SCALE_PERM},
+  {"IJG-LLM-INT",     0, ff_jpeg_fdct_islow, ff_ref_fdct, NO_PERM},
+  {"REF-DBL",         1, ff_ref_idct,        ff_ref_idct, NO_PERM},
+  {"INT",             1, j_rev_dct,          ff_ref_idct, MMX_PERM},
+  {"SIMPLE-C",        1, ff_simple_idct,     ff_ref_idct, NO_PERM},

 #if HAVE_MMX
-  {"MMX",             0, ff_fdct_mmx,        fdct, NO_PERM, FF_MM_MMX},
+  {"MMX",             0, ff_fdct_mmx,        ff_ref_fdct, NO_PERM, FF_MM_MMX},
 #if HAVE_MMX2
-  {"MMX2",            0, ff_fdct_mmx2,       fdct, NO_PERM, FF_MM_MMXEXT},
-  {"SSE2",            0, ff_fdct_sse2,       fdct, NO_PERM, FF_MM_SSE2},
+  {"MMX2",            0, ff_fdct_mmx2,       ff_ref_fdct, NO_PERM, FF_MM_MMXEXT},
+  {"SSE2",            0, ff_fdct_sse2,       ff_ref_fdct, NO_PERM, FF_MM_SSE2},
 #endif

 #if CONFIG_GPL
-  {"LIBMPEG2-MMX",    1, ff_mmx_idct,        idct, MMX_PERM, FF_MM_MMX},
-  {"LIBMPEG2-MMXEXT", 1, ff_mmxext_idct,     idct, MMX_PERM, FF_MM_MMXEXT},
+  {"LIBMPEG2-MMX",    1, ff_mmx_idct,        ff_ref_idct, MMX_PERM, FF_MM_MMX},
+  {"LIBMPEG2-MMXEXT", 1, ff_mmxext_idct,     ff_ref_idct, MMX_PERM, FF_MM_MMXEXT},
 #endif
-  {"SIMPLE-MMX",      1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM, FF_MM_MMX},
-  {"XVID-MMX",        1, ff_idct_xvid_mmx,   idct, NO_PERM, FF_MM_MMX},
-  {"XVID-MMX2",       1, ff_idct_xvid_mmx2,  idct, NO_PERM, FF_MM_MMXEXT},
-  {"XVID-SSE2",       1, ff_idct_xvid_sse2,  idct, SSE2_PERM, FF_MM_SSE2},
+  {"SIMPLE-MMX",      1, ff_simple_idct_mmx, ff_ref_idct, MMX_SIMPLE_PERM, FF_MM_MMX},
+  {"XVID-MMX",        1, ff_idct_xvid_mmx,   ff_ref_idct, NO_PERM, FF_MM_MMX},
+  {"XVID-MMX2",       1, ff_idct_xvid_mmx2,  ff_ref_idct, NO_PERM, FF_MM_MMXEXT},
+  {"XVID-SSE2",       1, ff_idct_xvid_sse2,  ff_ref_idct, SSE2_PERM, FF_MM_SSE2},
 #endif

 #if HAVE_ALTIVEC
-  {"altivecfdct",     0, fdct_altivec,       fdct, NO_PERM, FF_MM_ALTIVEC},
+  {"altivecfdct",     0, fdct_altivec,       ff_ref_fdct, NO_PERM, FF_MM_ALTIVEC},
 #endif

 #if ARCH_BFIN
-  {"BFINfdct",        0, ff_bfin_fdct,       fdct, NO_PERM},
-  {"BFINidct",        1, ff_bfin_idct,       idct, NO_PERM},
+  {"BFINfdct",        0, ff_bfin_fdct,       ff_ref_fdct, NO_PERM},
+  {"BFINidct",        1, ff_bfin_idct,       ff_ref_idct, NO_PERM},
 #endif

 #if ARCH_ARM
-  {"SIMPLE-ARM",      1, simple_idct_ARM,    idct, NO_PERM },
-  {"INT-ARM",         1, j_rev_dct_ARM,      idct, MMX_PERM },
+  {"SIMPLE-ARM",      1, simple_idct_ARM,    ff_ref_idct, NO_PERM },
+  {"INT-ARM",         1, j_rev_dct_ARM,      ff_ref_idct, MMX_PERM },
 #if HAVE_ARMV5TE
-  {"SIMPLE-ARMV5TE",  1, simple_idct_armv5te, idct, NO_PERM },
+  {"SIMPLE-ARMV5TE",  1, simple_idct_armv5te, ff_ref_idct, NO_PERM },
 #endif
 #if HAVE_ARMV6
-  {"SIMPLE-ARMV6",    1, ff_simple_idct_armv6, idct, MMX_PERM },
+  {"SIMPLE-ARMV6",    1, ff_simple_idct_armv6, ff_ref_idct, MMX_PERM },
 #endif
 #if HAVE_NEON
-  {"SIMPLE-NEON",     1, ff_simple_idct_neon, idct, PARTTRANS_PERM },
+  {"SIMPLE-NEON",     1, ff_simple_idct_neon, ff_ref_idct, PARTTRANS_PERM },
 #endif
 #endif /* ARCH_ARM */

 #if ARCH_ALPHA
-  {"SIMPLE-ALPHA",    1, ff_simple_idct_axp,  idct, NO_PERM },
+  {"SIMPLE-ALPHA",    1, ff_simple_idct_axp,  ff_ref_idct, NO_PERM },
 #endif

  { 0 }
@ -222,7 +222,7 @@ void dct_error(const char *name, int is_idct,
            for(i=0;i<64;i++)
                block1[i] = (random() % 512) -256;
            if (is_idct){
-                fdct(block1);
+                ff_ref_fdct(block1);

                for(i=0;i<64;i++)
                    block1[i]>>=3;
@ -336,7 +336,7 @@ void dct_error(const char *name, int is_idct,
        for(i=0;i<64;i++)
            block1[i] = (random() % 512) -256;
        if (is_idct){
-            fdct(block1);
+            ff_ref_fdct(block1);

            for(i=0;i<64;i++)
                block1[i]>>=3;
@ -559,7 +559,7 @@ int main(int argc, char **argv)
    int test=1;
    cpu_flags = mm_support();

-    init_fdct();
+    ff_ref_dct_init();
    idct_mmx_init();

    for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
--- a/libavcodec/dctref.c
+++ b/libavcodec/dctref.c
@ -0,0 +1,121 @@
+/*
+ * reference discrete cosine transform (double precision)
+ * Copyright (C) 2009 Dylan Yudaken
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavcodec/dctref.c
+ * reference discrete cosine transform (double precision)
+ *
+ * @author Dylan Yudaken (dyudaken at gmail)
+ *
+ * @note This file could be optimized a lot, but is for
+ * reference and so readability is better.
+ */
+
+#include "libavutil/mathematics.h"
+static double coefficients[8 * 8];
+
+/**
+ * Initialize the double precision discrete cosine transform
+ * functions fdct & idct.
+ */
+av_cold void ff_ref_dct_init(void)
+{
+    unsigned int i, j;
+
+    for (j = 0; j < 8; ++j) {
+        coefficients[j] = sqrt(0.125);
+        for (i = 8; i < 64; i += 8) {
+            coefficients[i + j] = 0.5 * cos(i * (j + 0.5) * M_PI / 64.0);
+        }
+    }
+}
+
+/**
+ * Transform 8x8 block of data with a double precision forward DCT <br>
+ * This is a reference implementation.
+ *
+ * @param block pointer to 8x8 block of data to transform
+ */
+void ff_ref_fdct(short *block)
+{
+    /* implement the equation: block = coefficients * block * coefficients' */
+
+    unsigned int i, j, k;
+    double out[8 * 8];
+
+    /* out = coefficients * block */
+    for (i = 0; i < 64; i += 8) {
+        for (j = 0; j < 8; ++j) {
+            double tmp = 0;
+            for (k = 0; k < 8; ++k) {
+                tmp += coefficients[i + k] * block[k * 8 + j];
+            }
+            out[i + j] = tmp * 8;
+        }
+    }
+
+    /* block = out * (coefficients') */
+    for (j = 0; j < 8; ++j) {
+        for (i = 0; i < 64; i += 8) {
+            double tmp = 0;
+            for (k = 0; k < 8; ++k) {
+                tmp += out[i + k] * coefficients[j * 8 + k];
+            }
+            block[i + j] = floor(tmp + 0.499999999999);
+        }
+    }
+}
+
+/**
+ * Transform 8x8 block of data with a double precision inverse DCT <br>
+ * This is a reference implementation.
+ *
+ * @param block pointer to 8x8 block of data to transform
+ */
+void ff_ref_idct(short *block)
+{
+    /* implement the equation: block = (coefficients') * block * coefficients */
+
+    unsigned int i, j, k;
+    double out[8 * 8];
+
+    /* out = block * coefficients */
+    for (i = 0; i < 64; i += 8) {
+        for (j = 0; j < 8; ++j) {
+            double tmp = 0;
+            for (k = 0; k < 8; ++k) {
+                tmp += block[i + k] * coefficients[k * 8 + j];
+            }
+            out[i + j] = tmp;
+        }
+    }
+
+    /* block = (coefficients') * out */
+    for (i = 0; i < 8; ++i) {
+        for (j = 0; j < 8; ++j) {
+            double tmp = 0;
+            for (k = 0; k < 64; k += 8) {
+                tmp += coefficients[k + i] * out[k + j];
+            }
+            block[i * 8 + j] = floor(tmp + 0.5);
+        }
+    }
+}
--- a/libavcodec/fdctref.c
+++ b/libavcodec/fdctref.c
@ -1,157 +0,0 @@
-/**
- * @file libavcodec/fdctref.c
- * forward discrete cosine transform, double precision.
- */
-
-/* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
-
-/*
- * Disclaimer of Warranty
- *
- * These software programs are available to the user without any license fee or
- * royalty on an "as is" basis.  The MPEG Software Simulation Group disclaims
- * any and all warranties, whether express, implied, or statuary, including any
- * implied warranties or merchantability or of fitness for a particular
- * purpose.  In no event shall the copyright-holder be liable for any
- * incidental, punitive, or consequential damages of any kind whatsoever
- * arising from the use of these programs.
- *
- * This disclaimer of warranty extends to the user of these programs and user's
- * customers, employees, agents, transferees, successors, and assigns.
- *
- * The MPEG Software Simulation Group does not represent or warrant that the
- * programs furnished hereunder are free of infringement of any third-party
- * patents.
- *
- * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
- * are subject to royalty fees to patent holders.  Many of these patents are
- * general enough such that they are unavoidable regardless of implementation
- * design.
- */
-
-#include <math.h>
-
-#ifndef PI
-# ifdef M_PI
-#  define PI M_PI
-# else
-#  define PI 3.14159265358979323846
-# endif
-#endif
-
-/* global declarations */
-void init_fdct (void);
-void fdct (short *block);
-
-/* private data */
-static double c[8][8]; /* transform coefficients */
-
-void init_fdct(void)
-{
-  int i, j;
-  double s;
-
-  for (i=0; i<8; i++)
-  {
-    s = (i==0) ? sqrt(0.125) : 0.5;
-
-    for (j=0; j<8; j++)
-      c[i][j] = s * cos((PI/8.0)*i*(j+0.5));
-  }
-}
-
-void fdct(block)
-short *block;
-{
-        register int i, j;
-        double s;
-        double tmp[64];
-
-        for(i = 0; i < 8; i++)
-            for(j = 0; j < 8; j++)
-            {
-                    s = 0.0;
-
-/*
- *                     for(k = 0; k < 8; k++)
- *                         s += c[j][k] * block[8 * i + k];
- */
-                s += c[j][0] * block[8 * i + 0];
-                s += c[j][1] * block[8 * i + 1];
-                s += c[j][2] * block[8 * i + 2];
-                s += c[j][3] * block[8 * i + 3];
-                s += c[j][4] * block[8 * i + 4];
-                s += c[j][5] * block[8 * i + 5];
-                s += c[j][6] * block[8 * i + 6];
-                s += c[j][7] * block[8 * i + 7];
-
-                    tmp[8 * i + j] = s;
-            }
-
-        for(j = 0; j < 8; j++)
-            for(i = 0; i < 8; i++)
-            {
-                    s = 0.0;
-
-/*
- *                       for(k = 0; k < 8; k++)
- *                    s += c[i][k] * tmp[8 * k + j];
- */
-                s += c[i][0] * tmp[8 * 0 + j];
-                s += c[i][1] * tmp[8 * 1 + j];
-                s += c[i][2] * tmp[8 * 2 + j];
-                s += c[i][3] * tmp[8 * 3 + j];
-                s += c[i][4] * tmp[8 * 4 + j];
-                s += c[i][5] * tmp[8 * 5 + j];
-                s += c[i][6] * tmp[8 * 6 + j];
-                s += c[i][7] * tmp[8 * 7 + j];
-                s*=8.0;
-
-                    block[8 * i + j] = (short)floor(s + 0.499999);
-/*
- * reason for adding 0.499999 instead of 0.5:
- * s is quite often x.5 (at least for i and/or j = 0 or 4)
- * and setting the rounding threshold exactly to 0.5 leads to an
- * extremely high arithmetic implementation dependency of the result;
- * s being between x.5 and x.500001 (which is now incorrectly rounded
- * downwards instead of upwards) is assumed to occur less often
- * (if at all)
- */
-      }
-}
-
-/* perform IDCT matrix multiply for 8x8 coefficient block */
-
-void idct(block)
-short *block;
-{
-  int i, j, k, v;
-  double partial_product;
-  double tmp[64];
-
-  for (i=0; i<8; i++)
-    for (j=0; j<8; j++)
-    {
-      partial_product = 0.0;
-
-      for (k=0; k<8; k++)
-        partial_product+= c[k][j]*block[8*i+k];
-
-      tmp[8*i+j] = partial_product;
-    }
-
-  /* Transpose operation is integrated into address mapping by switching
-     loop order of i and j */
-
-  for (j=0; j<8; j++)
-    for (i=0; i<8; i++)
-    {
-      partial_product = 0.0;
-
-      for (k=0; k<8; k++)
-        partial_product+= c[k][i]*tmp[8*k+j];
-
-      v = (int) floor(partial_product+0.5);
-      block[8*i+j] = v;
-    }
-}