mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-26 19:01:44 +02:00
Remove all Alpha architecture optimizations
Alpha has been end-of-lifed and no more test machines are available.
This commit is contained in:
parent
16381923fb
commit
c7f7bfc9e3
@ -1,9 +0,0 @@
|
||||
OBJS += alpha/dsputil_alpha.o \
|
||||
alpha/dsputil_alpha_asm.o \
|
||||
alpha/motion_est_alpha.o \
|
||||
alpha/motion_est_mvi_asm.o \
|
||||
alpha/simple_idct_alpha.o \
|
||||
|
||||
OBJS-$(CONFIG_HPELDSP) += alpha/hpeldsp_alpha.o \
|
||||
alpha/hpeldsp_alpha_asm.o
|
||||
OBJS-$(CONFIG_MPEGVIDEO) += alpha/mpegvideo_alpha.o
|
@ -1,186 +0,0 @@
|
||||
/*
|
||||
* Alpha optimized DSP utils
|
||||
* Copyright (c) 2002 Falk Hueffner <falk@debian.org>
|
||||
*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVCODEC_ALPHA_ASM_H
|
||||
#define AVCODEC_ALPHA_ASM_H
|
||||
|
||||
#include <inttypes.h>
|
||||
|
||||
#include "libavutil/common.h"
|
||||
|
||||
#if AV_GCC_VERSION_AT_LEAST(2,96)
|
||||
# define likely(x) __builtin_expect((x) != 0, 1)
|
||||
# define unlikely(x) __builtin_expect((x) != 0, 0)
|
||||
#else
|
||||
# define likely(x) (x)
|
||||
# define unlikely(x) (x)
|
||||
#endif
|
||||
|
||||
#define AMASK_BWX (1 << 0)
|
||||
#define AMASK_FIX (1 << 1)
|
||||
#define AMASK_CIX (1 << 2)
|
||||
#define AMASK_MVI (1 << 8)
|
||||
|
||||
static inline uint64_t BYTE_VEC(uint64_t x)
|
||||
{
|
||||
x |= x << 8;
|
||||
x |= x << 16;
|
||||
x |= x << 32;
|
||||
return x;
|
||||
}
|
||||
static inline uint64_t WORD_VEC(uint64_t x)
|
||||
{
|
||||
x |= x << 16;
|
||||
x |= x << 32;
|
||||
return x;
|
||||
}
|
||||
|
||||
#define sextw(x) ((int16_t) (x))
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define ldq(p) \
|
||||
(((const union { \
|
||||
uint64_t __l; \
|
||||
__typeof__(*(p)) __s[sizeof (uint64_t) / sizeof *(p)]; \
|
||||
} *) (p))->__l)
|
||||
#define ldl(p) \
|
||||
(((const union { \
|
||||
int32_t __l; \
|
||||
__typeof__(*(p)) __s[sizeof (int32_t) / sizeof *(p)]; \
|
||||
} *) (p))->__l)
|
||||
#define stq(l, p) \
|
||||
do { \
|
||||
(((union { \
|
||||
uint64_t __l; \
|
||||
__typeof__(*(p)) __s[sizeof (uint64_t) / sizeof *(p)]; \
|
||||
} *) (p))->__l) = l; \
|
||||
} while (0)
|
||||
#define stl(l, p) \
|
||||
do { \
|
||||
(((union { \
|
||||
int32_t __l; \
|
||||
__typeof__(*(p)) __s[sizeof (int32_t) / sizeof *(p)]; \
|
||||
} *) (p))->__l) = l; \
|
||||
} while (0)
|
||||
struct unaligned_long { uint64_t l; } __attribute__((packed));
|
||||
#define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul))
|
||||
#define uldq(a) (((const struct unaligned_long *) (a))->l)
|
||||
|
||||
#if AV_GCC_VERSION_AT_LEAST(3,3)
|
||||
#define prefetch(p) __builtin_prefetch((p), 0, 1)
|
||||
#define prefetch_en(p) __builtin_prefetch((p), 0, 0)
|
||||
#define prefetch_m(p) __builtin_prefetch((p), 1, 1)
|
||||
#define prefetch_men(p) __builtin_prefetch((p), 1, 0)
|
||||
#define cmpbge __builtin_alpha_cmpbge
|
||||
/* Avoid warnings. */
|
||||
#define extql(a, b) __builtin_alpha_extql(a, (uint64_t) (b))
|
||||
#define extwl(a, b) __builtin_alpha_extwl(a, (uint64_t) (b))
|
||||
#define extqh(a, b) __builtin_alpha_extqh(a, (uint64_t) (b))
|
||||
#define zap __builtin_alpha_zap
|
||||
#define zapnot __builtin_alpha_zapnot
|
||||
#define amask __builtin_alpha_amask
|
||||
#define implver __builtin_alpha_implver
|
||||
#define rpcc __builtin_alpha_rpcc
|
||||
#else
|
||||
#define prefetch(p) __asm__ volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory")
|
||||
#define prefetch_en(p) __asm__ volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory")
|
||||
#define prefetch_m(p) __asm__ volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory")
|
||||
#define prefetch_men(p) __asm__ volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory")
|
||||
#define cmpbge(a, b) ({ uint64_t __r; __asm__ ("cmpbge %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
|
||||
#define extql(a, b) ({ uint64_t __r; __asm__ ("extql %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
|
||||
#define extwl(a, b) ({ uint64_t __r; __asm__ ("extwl %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
|
||||
#define extqh(a, b) ({ uint64_t __r; __asm__ ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
|
||||
#define zap(a, b) ({ uint64_t __r; __asm__ ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
|
||||
#define zapnot(a, b) ({ uint64_t __r; __asm__ ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
|
||||
#define amask(a) ({ uint64_t __r; __asm__ ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; })
|
||||
#define implver() ({ uint64_t __r; __asm__ ("implver %0" : "=r" (__r)); __r; })
|
||||
#define rpcc() ({ uint64_t __r; __asm__ volatile ("rpcc %0" : "=r" (__r)); __r; })
|
||||
#endif
|
||||
#define wh64(p) __asm__ volatile("wh64 (%0)" : : "r"(p) : "memory")
|
||||
|
||||
#if AV_GCC_VERSION_AT_LEAST(3,3) && defined(__alpha_max__)
|
||||
#define minub8 __builtin_alpha_minub8
|
||||
#define minsb8 __builtin_alpha_minsb8
|
||||
#define minuw4 __builtin_alpha_minuw4
|
||||
#define minsw4 __builtin_alpha_minsw4
|
||||
#define maxub8 __builtin_alpha_maxub8
|
||||
#define maxsb8 __builtin_alpha_maxsb8
|
||||
#define maxuw4 __builtin_alpha_maxuw4
|
||||
#define maxsw4 __builtin_alpha_maxsw4
|
||||
#define perr __builtin_alpha_perr
|
||||
#define pklb __builtin_alpha_pklb
|
||||
#define pkwb __builtin_alpha_pkwb
|
||||
#define unpkbl __builtin_alpha_unpkbl
|
||||
#define unpkbw __builtin_alpha_unpkbw
|
||||
#else
|
||||
#define minub8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
|
||||
#define minsb8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
|
||||
#define minuw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
|
||||
#define minsw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
|
||||
#define maxub8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
|
||||
#define maxsb8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
|
||||
#define maxuw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
|
||||
#define maxsw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
|
||||
#define perr(a, b) ({ uint64_t __r; __asm__ (".arch ev6; perr %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; })
|
||||
#define pklb(a) ({ uint64_t __r; __asm__ (".arch ev6; pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
|
||||
#define pkwb(a) ({ uint64_t __r; __asm__ (".arch ev6; pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
|
||||
#define unpkbl(a) ({ uint64_t __r; __asm__ (".arch ev6; unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
|
||||
#define unpkbw(a) ({ uint64_t __r; __asm__ (".arch ev6; unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
|
||||
#endif
|
||||
|
||||
#elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */
|
||||
|
||||
#include <c_asm.h>
|
||||
#define ldq(p) (*(const uint64_t *) (p))
|
||||
#define ldl(p) (*(const int32_t *) (p))
|
||||
#define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0)
|
||||
#define stl(l, p) do { *(int32_t *) (p) = (l); } while (0)
|
||||
#define ldq_u(a) asm ("ldq_u %v0,0(%a0)", a)
|
||||
#define uldq(a) (*(const __unaligned uint64_t *) (a))
|
||||
#define cmpbge(a, b) asm ("cmpbge %a0,%a1,%v0", a, b)
|
||||
#define extql(a, b) asm ("extql %a0,%a1,%v0", a, b)
|
||||
#define extwl(a, b) asm ("extwl %a0,%a1,%v0", a, b)
|
||||
#define extqh(a, b) asm ("extqh %a0,%a1,%v0", a, b)
|
||||
#define zap(a, b) asm ("zap %a0,%a1,%v0", a, b)
|
||||
#define zapnot(a, b) asm ("zapnot %a0,%a1,%v0", a, b)
|
||||
#define amask(a) asm ("amask %a0,%v0", a)
|
||||
#define implver() asm ("implver %v0")
|
||||
#define rpcc() asm ("rpcc %v0")
|
||||
#define minub8(a, b) asm ("minub8 %a0,%a1,%v0", a, b)
|
||||
#define minsb8(a, b) asm ("minsb8 %a0,%a1,%v0", a, b)
|
||||
#define minuw4(a, b) asm ("minuw4 %a0,%a1,%v0", a, b)
|
||||
#define minsw4(a, b) asm ("minsw4 %a0,%a1,%v0", a, b)
|
||||
#define maxub8(a, b) asm ("maxub8 %a0,%a1,%v0", a, b)
|
||||
#define maxsb8(a, b) asm ("maxsb8 %a0,%a1,%v0", a, b)
|
||||
#define maxuw4(a, b) asm ("maxuw4 %a0,%a1,%v0", a, b)
|
||||
#define maxsw4(a, b) asm ("maxsw4 %a0,%a1,%v0", a, b)
|
||||
#define perr(a, b) asm ("perr %a0,%a1,%v0", a, b)
|
||||
#define pklb(a) asm ("pklb %a0,%v0", a)
|
||||
#define pkwb(a) asm ("pkwb %a0,%v0", a)
|
||||
#define unpkbl(a) asm ("unpkbl %a0,%v0", a)
|
||||
#define unpkbw(a) asm ("unpkbw %a0,%v0", a)
|
||||
#define wh64(a) asm ("wh64 %a0", a)
|
||||
|
||||
#else
|
||||
#error "Unknown compiler!"
|
||||
#endif
|
||||
|
||||
#endif /* AVCODEC_ALPHA_ASM_H */
|
@ -1,157 +0,0 @@
|
||||
/*
|
||||
* Alpha optimized DSP utils
|
||||
* Copyright (c) 2002 Falk Hueffner <falk@debian.org>
|
||||
*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavcodec/dsputil.h"
|
||||
#include "dsputil_alpha.h"
|
||||
#include "asm.h"
|
||||
|
||||
void (*put_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
|
||||
int line_size);
|
||||
void (*add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
|
||||
int line_size);
|
||||
|
||||
#if 0
|
||||
/* These functions were the base for the optimized assembler routines,
|
||||
and remain here for documentation purposes. */
|
||||
static void put_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels,
|
||||
ptrdiff_t line_size)
|
||||
{
|
||||
int i = 8;
|
||||
uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
|
||||
|
||||
do {
|
||||
uint64_t shorts0, shorts1;
|
||||
|
||||
shorts0 = ldq(block);
|
||||
shorts0 = maxsw4(shorts0, 0);
|
||||
shorts0 = minsw4(shorts0, clampmask);
|
||||
stl(pkwb(shorts0), pixels);
|
||||
|
||||
shorts1 = ldq(block + 4);
|
||||
shorts1 = maxsw4(shorts1, 0);
|
||||
shorts1 = minsw4(shorts1, clampmask);
|
||||
stl(pkwb(shorts1), pixels + 4);
|
||||
|
||||
pixels += line_size;
|
||||
block += 8;
|
||||
} while (--i);
|
||||
}
|
||||
|
||||
void add_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels,
|
||||
ptrdiff_t line_size)
|
||||
{
|
||||
int h = 8;
|
||||
/* Keep this function a leaf function by generating the constants
|
||||
manually (mainly for the hack value ;-). */
|
||||
uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
|
||||
uint64_t signmask = zap(-1, 0x33);
|
||||
signmask ^= signmask >> 1; /* 0x8000800080008000 */
|
||||
|
||||
do {
|
||||
uint64_t shorts0, pix0, signs0;
|
||||
uint64_t shorts1, pix1, signs1;
|
||||
|
||||
shorts0 = ldq(block);
|
||||
shorts1 = ldq(block + 4);
|
||||
|
||||
pix0 = unpkbw(ldl(pixels));
|
||||
/* Signed subword add (MMX paddw). */
|
||||
signs0 = shorts0 & signmask;
|
||||
shorts0 &= ~signmask;
|
||||
shorts0 += pix0;
|
||||
shorts0 ^= signs0;
|
||||
/* Clamp. */
|
||||
shorts0 = maxsw4(shorts0, 0);
|
||||
shorts0 = minsw4(shorts0, clampmask);
|
||||
|
||||
/* Next 4. */
|
||||
pix1 = unpkbw(ldl(pixels + 4));
|
||||
signs1 = shorts1 & signmask;
|
||||
shorts1 &= ~signmask;
|
||||
shorts1 += pix1;
|
||||
shorts1 ^= signs1;
|
||||
shorts1 = maxsw4(shorts1, 0);
|
||||
shorts1 = minsw4(shorts1, clampmask);
|
||||
|
||||
stl(pkwb(shorts0), pixels);
|
||||
stl(pkwb(shorts1), pixels + 4);
|
||||
|
||||
pixels += line_size;
|
||||
block += 8;
|
||||
} while (--h);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void clear_blocks_axp(int16_t *blocks) {
|
||||
uint64_t *p = (uint64_t *) blocks;
|
||||
int n = sizeof(int16_t) * 6 * 64;
|
||||
|
||||
do {
|
||||
p[0] = 0;
|
||||
p[1] = 0;
|
||||
p[2] = 0;
|
||||
p[3] = 0;
|
||||
p[4] = 0;
|
||||
p[5] = 0;
|
||||
p[6] = 0;
|
||||
p[7] = 0;
|
||||
p += 8;
|
||||
n -= 8 * 8;
|
||||
} while (n);
|
||||
}
|
||||
|
||||
av_cold void ff_dsputil_init_alpha(DSPContext *c, AVCodecContext *avctx)
|
||||
{
|
||||
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
|
||||
|
||||
if (!high_bit_depth) {
|
||||
c->clear_blocks = clear_blocks_axp;
|
||||
}
|
||||
|
||||
/* amask clears all bits that correspond to present features. */
|
||||
if (amask(AMASK_MVI) == 0) {
|
||||
c->put_pixels_clamped = put_pixels_clamped_mvi_asm;
|
||||
c->add_pixels_clamped = add_pixels_clamped_mvi_asm;
|
||||
|
||||
if (!high_bit_depth)
|
||||
c->get_pixels = get_pixels_mvi;
|
||||
c->diff_pixels = diff_pixels_mvi;
|
||||
c->sad[0] = pix_abs16x16_mvi_asm;
|
||||
c->sad[1] = pix_abs8x8_mvi;
|
||||
c->pix_abs[0][0] = pix_abs16x16_mvi_asm;
|
||||
c->pix_abs[1][0] = pix_abs8x8_mvi;
|
||||
c->pix_abs[0][1] = pix_abs16x16_x2_mvi;
|
||||
c->pix_abs[0][2] = pix_abs16x16_y2_mvi;
|
||||
c->pix_abs[0][3] = pix_abs16x16_xy2_mvi;
|
||||
}
|
||||
|
||||
put_pixels_clamped_axp_p = c->put_pixels_clamped;
|
||||
add_pixels_clamped_axp_p = c->add_pixels_clamped;
|
||||
|
||||
if (avctx->bits_per_raw_sample <= 8 &&
|
||||
(avctx->idct_algo == FF_IDCT_AUTO ||
|
||||
avctx->idct_algo == FF_IDCT_SIMPLEALPHA)) {
|
||||
c->idct_put = ff_simple_idct_put_axp;
|
||||
c->idct_add = ff_simple_idct_add_axp;
|
||||
c->idct = ff_simple_idct_axp;
|
||||
}
|
||||
}
|
@ -1,49 +0,0 @@
|
||||
/*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVCODEC_ALPHA_DSPUTIL_ALPHA_H
|
||||
#define AVCODEC_ALPHA_DSPUTIL_ALPHA_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
void ff_simple_idct_axp(int16_t *block);
|
||||
void ff_simple_idct_put_axp(uint8_t *dest, int line_size, int16_t *block);
|
||||
void ff_simple_idct_add_axp(uint8_t *dest, int line_size, int16_t *block);
|
||||
|
||||
void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
|
||||
int line_size);
|
||||
void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
|
||||
int line_size);
|
||||
extern void (*put_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
|
||||
int line_size);
|
||||
extern void (*add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
|
||||
int line_size);
|
||||
|
||||
void get_pixels_mvi(int16_t *restrict block,
|
||||
const uint8_t *restrict pixels, int line_size);
|
||||
void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2,
|
||||
int stride);
|
||||
int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
|
||||
int pix_abs16x16_mvi_asm(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
|
||||
int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
|
||||
int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
|
||||
int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
|
||||
|
||||
|
||||
#endif /* AVCODEC_ALPHA_DSPUTIL_ALPHA_H */
|
@ -1,167 +0,0 @@
|
||||
/*
|
||||
* Alpha optimized DSP utils
|
||||
* Copyright (c) 2002 Falk Hueffner <falk@debian.org>
|
||||
*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* These functions are scheduled for pca56. They should work
|
||||
* reasonably on ev6, though.
|
||||
*/
|
||||
|
||||
#include "regdef.h"
|
||||
|
||||
.set noat
|
||||
.set noreorder
|
||||
.arch pca56
|
||||
.text
|
||||
|
||||
/************************************************************************
|
||||
* void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
|
||||
* int line_size)
|
||||
*/
|
||||
.align 6
|
||||
.globl put_pixels_clamped_mvi_asm
|
||||
.ent put_pixels_clamped_mvi_asm
|
||||
put_pixels_clamped_mvi_asm:
|
||||
.frame sp, 0, ra
|
||||
.prologue 0
|
||||
|
||||
lda t8, -1
|
||||
lda t9, 8 # loop counter
|
||||
zap t8, 0xaa, t8 # 00ff00ff00ff00ff
|
||||
|
||||
.align 4
|
||||
1: ldq t0, 0(a0)
|
||||
ldq t1, 8(a0)
|
||||
ldq t2, 16(a0)
|
||||
ldq t3, 24(a0)
|
||||
|
||||
maxsw4 t0, zero, t0
|
||||
subq t9, 2, t9
|
||||
maxsw4 t1, zero, t1
|
||||
lda a0, 32(a0)
|
||||
|
||||
maxsw4 t2, zero, t2
|
||||
addq a1, a2, ta
|
||||
maxsw4 t3, zero, t3
|
||||
minsw4 t0, t8, t0
|
||||
|
||||
minsw4 t1, t8, t1
|
||||
minsw4 t2, t8, t2
|
||||
minsw4 t3, t8, t3
|
||||
pkwb t0, t0
|
||||
|
||||
pkwb t1, t1
|
||||
pkwb t2, t2
|
||||
pkwb t3, t3
|
||||
stl t0, 0(a1)
|
||||
|
||||
stl t1, 4(a1)
|
||||
addq ta, a2, a1
|
||||
stl t2, 0(ta)
|
||||
stl t3, 4(ta)
|
||||
|
||||
bne t9, 1b
|
||||
ret
|
||||
.end put_pixels_clamped_mvi_asm
|
||||
|
||||
/************************************************************************
|
||||
* void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
|
||||
* int line_size)
|
||||
*/
|
||||
.align 6
|
||||
.globl add_pixels_clamped_mvi_asm
|
||||
.ent add_pixels_clamped_mvi_asm
|
||||
add_pixels_clamped_mvi_asm:
|
||||
.frame sp, 0, ra
|
||||
.prologue 0
|
||||
|
||||
lda t1, -1
|
||||
lda th, 8
|
||||
zap t1, 0x33, tg
|
||||
nop
|
||||
|
||||
srl tg, 1, t0
|
||||
xor tg, t0, tg # 0x8000800080008000
|
||||
zap t1, 0xaa, tf # 0x00ff00ff00ff00ff
|
||||
|
||||
.align 4
|
||||
1: ldl t1, 0(a1) # pix0 (try to hit cache line soon)
|
||||
ldl t4, 4(a1) # pix1
|
||||
addq a1, a2, te # pixels += line_size
|
||||
ldq t0, 0(a0) # shorts0
|
||||
|
||||
ldl t7, 0(te) # pix2 (try to hit cache line soon)
|
||||
ldl ta, 4(te) # pix3
|
||||
ldq t3, 8(a0) # shorts1
|
||||
ldq t6, 16(a0) # shorts2
|
||||
|
||||
ldq t9, 24(a0) # shorts3
|
||||
unpkbw t1, t1 # 0 0 (quarter/op no.)
|
||||
and t0, tg, t2 # 0 1
|
||||
unpkbw t4, t4 # 1 0
|
||||
|
||||
bic t0, tg, t0 # 0 2
|
||||
unpkbw t7, t7 # 2 0
|
||||
and t3, tg, t5 # 1 1
|
||||
addq t0, t1, t0 # 0 3
|
||||
|
||||
xor t0, t2, t0 # 0 4
|
||||
unpkbw ta, ta # 3 0
|
||||
and t6, tg, t8 # 2 1
|
||||
maxsw4 t0, zero, t0 # 0 5
|
||||
|
||||
bic t3, tg, t3 # 1 2
|
||||
bic t6, tg, t6 # 2 2
|
||||
minsw4 t0, tf, t0 # 0 6
|
||||
addq t3, t4, t3 # 1 3
|
||||
|
||||
pkwb t0, t0 # 0 7
|
||||
xor t3, t5, t3 # 1 4
|
||||
maxsw4 t3, zero, t3 # 1 5
|
||||
addq t6, t7, t6 # 2 3
|
||||
|
||||
xor t6, t8, t6 # 2 4
|
||||
and t9, tg, tb # 3 1
|
||||
minsw4 t3, tf, t3 # 1 6
|
||||
bic t9, tg, t9 # 3 2
|
||||
|
||||
maxsw4 t6, zero, t6 # 2 5
|
||||
addq t9, ta, t9 # 3 3
|
||||
stl t0, 0(a1) # 0 8
|
||||
minsw4 t6, tf, t6 # 2 6
|
||||
|
||||
xor t9, tb, t9 # 3 4
|
||||
maxsw4 t9, zero, t9 # 3 5
|
||||
lda a0, 32(a0) # block += 16;
|
||||
pkwb t3, t3 # 1 7
|
||||
|
||||
minsw4 t9, tf, t9 # 3 6
|
||||
subq th, 2, th
|
||||
pkwb t6, t6 # 2 7
|
||||
pkwb t9, t9 # 3 7
|
||||
|
||||
stl t3, 4(a1) # 1 8
|
||||
addq te, a2, a1 # pixels += line_size
|
||||
stl t6, 0(te) # 2 8
|
||||
stl t9, 4(te) # 3 8
|
||||
|
||||
bne th, 1b
|
||||
ret
|
||||
.end add_pixels_clamped_mvi_asm
|
@ -1,213 +0,0 @@
|
||||
/*
|
||||
* Alpha optimized DSP utils
|
||||
* Copyright (c) 2002 Falk Hueffner <falk@debian.org>
|
||||
*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavcodec/hpeldsp.h"
|
||||
#include "hpeldsp_alpha.h"
|
||||
#include "asm.h"
|
||||
|
||||
static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b)
|
||||
{
|
||||
return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
|
||||
}
|
||||
|
||||
static inline uint64_t avg2(uint64_t a, uint64_t b)
|
||||
{
|
||||
return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* The XY2 routines basically utilize this scheme, but reuse parts in
|
||||
each iteration. */
|
||||
static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
|
||||
{
|
||||
uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
|
||||
+ ((l2 & ~BYTE_VEC(0x03)) >> 2)
|
||||
+ ((l3 & ~BYTE_VEC(0x03)) >> 2)
|
||||
+ ((l4 & ~BYTE_VEC(0x03)) >> 2);
|
||||
uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
|
||||
+ (l2 & BYTE_VEC(0x03))
|
||||
+ (l3 & BYTE_VEC(0x03))
|
||||
+ (l4 & BYTE_VEC(0x03))
|
||||
+ BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
|
||||
return r1 + r2;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define OP(LOAD, STORE) \
|
||||
do { \
|
||||
STORE(LOAD(pixels), block); \
|
||||
pixels += line_size; \
|
||||
block += line_size; \
|
||||
} while (--h)
|
||||
|
||||
#define OP_X2(LOAD, STORE) \
|
||||
do { \
|
||||
uint64_t pix1, pix2; \
|
||||
\
|
||||
pix1 = LOAD(pixels); \
|
||||
pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
|
||||
STORE(AVG2(pix1, pix2), block); \
|
||||
pixels += line_size; \
|
||||
block += line_size; \
|
||||
} while (--h)
|
||||
|
||||
#define OP_Y2(LOAD, STORE) \
|
||||
do { \
|
||||
uint64_t pix = LOAD(pixels); \
|
||||
do { \
|
||||
uint64_t next_pix; \
|
||||
\
|
||||
pixels += line_size; \
|
||||
next_pix = LOAD(pixels); \
|
||||
STORE(AVG2(pix, next_pix), block); \
|
||||
block += line_size; \
|
||||
pix = next_pix; \
|
||||
} while (--h); \
|
||||
} while (0)
|
||||
|
||||
#define OP_XY2(LOAD, STORE) \
|
||||
do { \
|
||||
uint64_t pix1 = LOAD(pixels); \
|
||||
uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
|
||||
uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \
|
||||
+ (pix2 & BYTE_VEC(0x03)); \
|
||||
uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \
|
||||
+ ((pix2 & ~BYTE_VEC(0x03)) >> 2); \
|
||||
\
|
||||
do { \
|
||||
uint64_t npix1, npix2; \
|
||||
uint64_t npix_l, npix_h; \
|
||||
uint64_t avg; \
|
||||
\
|
||||
pixels += line_size; \
|
||||
npix1 = LOAD(pixels); \
|
||||
npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \
|
||||
npix_l = (npix1 & BYTE_VEC(0x03)) \
|
||||
+ (npix2 & BYTE_VEC(0x03)); \
|
||||
npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \
|
||||
+ ((npix2 & ~BYTE_VEC(0x03)) >> 2); \
|
||||
avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \
|
||||
+ pix_h + npix_h; \
|
||||
STORE(avg, block); \
|
||||
\
|
||||
block += line_size; \
|
||||
pix_l = npix_l; \
|
||||
pix_h = npix_h; \
|
||||
} while (--h); \
|
||||
} while (0)
|
||||
|
||||
#define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \
|
||||
static void OPNAME ## _pixels ## SUFF ## _axp \
|
||||
(uint8_t *restrict block, const uint8_t *restrict pixels, \
|
||||
ptrdiff_t line_size, int h) \
|
||||
{ \
|
||||
if ((size_t) pixels & 0x7) { \
|
||||
OPKIND(uldq, STORE); \
|
||||
} else { \
|
||||
OPKIND(ldq, STORE); \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
static void OPNAME ## _pixels16 ## SUFF ## _axp \
|
||||
(uint8_t *restrict block, const uint8_t *restrict pixels, \
|
||||
ptrdiff_t line_size, int h) \
|
||||
{ \
|
||||
OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \
|
||||
OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \
|
||||
}
|
||||
|
||||
#define PIXOP(OPNAME, STORE) \
|
||||
MAKE_OP(OPNAME, , OP, STORE) \
|
||||
MAKE_OP(OPNAME, _x2, OP_X2, STORE) \
|
||||
MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \
|
||||
MAKE_OP(OPNAME, _xy2, OP_XY2, STORE)
|
||||
|
||||
/* Rounding primitives. */
|
||||
#define AVG2 avg2
|
||||
#define AVG4 avg4
|
||||
#define AVG4_ROUNDER BYTE_VEC(0x02)
|
||||
#define STORE(l, b) stq(l, b)
|
||||
PIXOP(put, STORE);
|
||||
|
||||
#undef STORE
|
||||
#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
|
||||
PIXOP(avg, STORE);
|
||||
|
||||
/* Not rounding primitives. */
|
||||
#undef AVG2
|
||||
#undef AVG4
|
||||
#undef AVG4_ROUNDER
|
||||
#undef STORE
|
||||
#define AVG2 avg2_no_rnd
|
||||
#define AVG4 avg4_no_rnd
|
||||
#define AVG4_ROUNDER BYTE_VEC(0x01)
|
||||
#define STORE(l, b) stq(l, b)
|
||||
PIXOP(put_no_rnd, STORE);
|
||||
|
||||
#undef STORE
|
||||
#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
|
||||
PIXOP(avg_no_rnd, STORE);
|
||||
|
||||
static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h)
|
||||
{
|
||||
put_pixels_axp_asm(block, pixels, line_size, h);
|
||||
put_pixels_axp_asm(block + 8, pixels + 8, line_size, h);
|
||||
}
|
||||
|
||||
av_cold void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags)
|
||||
{
|
||||
c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
|
||||
c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
|
||||
c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
|
||||
c->put_pixels_tab[0][3] = put_pixels16_xy2_axp;
|
||||
|
||||
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm;
|
||||
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp;
|
||||
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp;
|
||||
c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp;
|
||||
|
||||
c->avg_pixels_tab[0][0] = avg_pixels16_axp;
|
||||
c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp;
|
||||
c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp;
|
||||
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp;
|
||||
|
||||
c->avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels16_axp;
|
||||
c->avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels16_x2_axp;
|
||||
c->avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels16_y2_axp;
|
||||
c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy2_axp;
|
||||
|
||||
c->put_pixels_tab[1][0] = put_pixels_axp_asm;
|
||||
c->put_pixels_tab[1][1] = put_pixels_x2_axp;
|
||||
c->put_pixels_tab[1][2] = put_pixels_y2_axp;
|
||||
c->put_pixels_tab[1][3] = put_pixels_xy2_axp;
|
||||
|
||||
c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm;
|
||||
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp;
|
||||
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp;
|
||||
c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp;
|
||||
|
||||
c->avg_pixels_tab[1][0] = avg_pixels_axp;
|
||||
c->avg_pixels_tab[1][1] = avg_pixels_x2_axp;
|
||||
c->avg_pixels_tab[1][2] = avg_pixels_y2_axp;
|
||||
c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp;
|
||||
}
|
@ -1,28 +0,0 @@
|
||||
/*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVCODEC_ALPHA_HPELDSP_ALPHA_H
|
||||
#define AVCODEC_ALPHA_HPELDSP_ALPHA_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
|
||||
#endif /* AVCODEC_ALPHA_HPELDSP_ALPHA_H */
|
@ -1,124 +0,0 @@
|
||||
/*
|
||||
* Alpha optimized DSP utils
|
||||
* Copyright (c) 2002 Falk Hueffner <falk@debian.org>
|
||||
*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* These functions are scheduled for pca56. They should work
|
||||
* reasonably on ev6, though.
|
||||
*/
|
||||
|
||||
#include "regdef.h"
|
||||
|
||||
.set noat
|
||||
.set noreorder
|
||||
.arch pca56
|
||||
.text
|
||||
|
||||
/************************************************************************
|
||||
* void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
|
||||
* int line_size, int h)
|
||||
*/
|
||||
.align 6
|
||||
.globl put_pixels_axp_asm
|
||||
.ent put_pixels_axp_asm
|
||||
put_pixels_axp_asm:
|
||||
.frame sp, 0, ra
|
||||
.prologue 0
|
||||
|
||||
and a1, 7, t0
|
||||
beq t0, $aligned
|
||||
|
||||
.align 4
|
||||
$unaligned:
|
||||
ldq_u t0, 0(a1)
|
||||
ldq_u t1, 8(a1)
|
||||
addq a1, a2, a1
|
||||
nop
|
||||
|
||||
ldq_u t2, 0(a1)
|
||||
ldq_u t3, 8(a1)
|
||||
addq a1, a2, a1
|
||||
nop
|
||||
|
||||
ldq_u t4, 0(a1)
|
||||
ldq_u t5, 8(a1)
|
||||
addq a1, a2, a1
|
||||
nop
|
||||
|
||||
ldq_u t6, 0(a1)
|
||||
ldq_u t7, 8(a1)
|
||||
extql t0, a1, t0
|
||||
addq a1, a2, a1
|
||||
|
||||
extqh t1, a1, t1
|
||||
addq a0, a2, t8
|
||||
extql t2, a1, t2
|
||||
addq t8, a2, t9
|
||||
|
||||
extqh t3, a1, t3
|
||||
addq t9, a2, ta
|
||||
extql t4, a1, t4
|
||||
or t0, t1, t0
|
||||
|
||||
extqh t5, a1, t5
|
||||
or t2, t3, t2
|
||||
extql t6, a1, t6
|
||||
or t4, t5, t4
|
||||
|
||||
extqh t7, a1, t7
|
||||
or t6, t7, t6
|
||||
stq t0, 0(a0)
|
||||
stq t2, 0(t8)
|
||||
|
||||
stq t4, 0(t9)
|
||||
subq a3, 4, a3
|
||||
stq t6, 0(ta)
|
||||
addq ta, a2, a0
|
||||
|
||||
bne a3, $unaligned
|
||||
ret
|
||||
|
||||
.align 4
|
||||
$aligned:
|
||||
ldq t0, 0(a1)
|
||||
addq a1, a2, a1
|
||||
ldq t1, 0(a1)
|
||||
addq a1, a2, a1
|
||||
|
||||
ldq t2, 0(a1)
|
||||
addq a1, a2, a1
|
||||
ldq t3, 0(a1)
|
||||
|
||||
addq a0, a2, t4
|
||||
addq a1, a2, a1
|
||||
addq t4, a2, t5
|
||||
subq a3, 4, a3
|
||||
|
||||
stq t0, 0(a0)
|
||||
addq t5, a2, t6
|
||||
stq t1, 0(t4)
|
||||
addq t6, a2, a0
|
||||
|
||||
stq t2, 0(t5)
|
||||
stq t3, 0(t6)
|
||||
|
||||
bne a3, $aligned
|
||||
ret
|
||||
.end put_pixels_axp_asm
|
@ -1,345 +0,0 @@
|
||||
/*
|
||||
* Alpha optimized DSP utils
|
||||
* Copyright (c) 2002 Falk Hueffner <falk@debian.org>
|
||||
*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "dsputil_alpha.h"
|
||||
#include "asm.h"
|
||||
|
||||
void get_pixels_mvi(int16_t *restrict block,
|
||||
const uint8_t *restrict pixels, int line_size)
|
||||
{
|
||||
int h = 8;
|
||||
|
||||
do {
|
||||
uint64_t p;
|
||||
|
||||
p = ldq(pixels);
|
||||
stq(unpkbw(p), block);
|
||||
stq(unpkbw(p >> 32), block + 4);
|
||||
|
||||
pixels += line_size;
|
||||
block += 8;
|
||||
} while (--h);
|
||||
}
|
||||
|
||||
void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2,
|
||||
int stride) {
|
||||
int h = 8;
|
||||
uint64_t mask = 0x4040;
|
||||
|
||||
mask |= mask << 16;
|
||||
mask |= mask << 32;
|
||||
do {
|
||||
uint64_t x, y, c, d, a;
|
||||
uint64_t signs;
|
||||
|
||||
x = ldq(s1);
|
||||
y = ldq(s2);
|
||||
c = cmpbge(x, y);
|
||||
d = x - y;
|
||||
a = zap(mask, c); /* We use 0x4040404040404040 here... */
|
||||
d += 4 * a; /* ...so we can use s4addq here. */
|
||||
signs = zap(-1, c);
|
||||
|
||||
stq(unpkbw(d) | (unpkbw(signs) << 8), block);
|
||||
stq(unpkbw(d >> 32) | (unpkbw(signs >> 32) << 8), block + 4);
|
||||
|
||||
s1 += stride;
|
||||
s2 += stride;
|
||||
block += 8;
|
||||
} while (--h);
|
||||
}
|
||||
|
||||
static inline uint64_t avg2(uint64_t a, uint64_t b)
|
||||
{
|
||||
return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
|
||||
}
|
||||
|
||||
static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
|
||||
{
|
||||
uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
|
||||
+ ((l2 & ~BYTE_VEC(0x03)) >> 2)
|
||||
+ ((l3 & ~BYTE_VEC(0x03)) >> 2)
|
||||
+ ((l4 & ~BYTE_VEC(0x03)) >> 2);
|
||||
uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
|
||||
+ (l2 & BYTE_VEC(0x03))
|
||||
+ (l3 & BYTE_VEC(0x03))
|
||||
+ (l4 & BYTE_VEC(0x03))
|
||||
+ BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
|
||||
return r1 + r2;
|
||||
}
|
||||
|
||||
int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
if ((size_t) pix2 & 0x7) {
|
||||
/* works only when pix2 is actually unaligned */
|
||||
do { /* do 8 pixel a time */
|
||||
uint64_t p1, p2;
|
||||
|
||||
p1 = ldq(pix1);
|
||||
p2 = uldq(pix2);
|
||||
result += perr(p1, p2);
|
||||
|
||||
pix1 += line_size;
|
||||
pix2 += line_size;
|
||||
} while (--h);
|
||||
} else {
|
||||
do {
|
||||
uint64_t p1, p2;
|
||||
|
||||
p1 = ldq(pix1);
|
||||
p2 = ldq(pix2);
|
||||
result += perr(p1, p2);
|
||||
|
||||
pix1 += line_size;
|
||||
pix2 += line_size;
|
||||
} while (--h);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
#if 0 /* now done in assembly */
|
||||
int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
|
||||
{
|
||||
int result = 0;
|
||||
int h = 16;
|
||||
|
||||
if ((size_t) pix2 & 0x7) {
|
||||
/* works only when pix2 is actually unaligned */
|
||||
do { /* do 16 pixel a time */
|
||||
uint64_t p1_l, p1_r, p2_l, p2_r;
|
||||
uint64_t t;
|
||||
|
||||
p1_l = ldq(pix1);
|
||||
p1_r = ldq(pix1 + 8);
|
||||
t = ldq_u(pix2 + 8);
|
||||
p2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2);
|
||||
p2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2);
|
||||
pix1 += line_size;
|
||||
pix2 += line_size;
|
||||
|
||||
result += perr(p1_l, p2_l)
|
||||
+ perr(p1_r, p2_r);
|
||||
} while (--h);
|
||||
} else {
|
||||
do {
|
||||
uint64_t p1_l, p1_r, p2_l, p2_r;
|
||||
|
||||
p1_l = ldq(pix1);
|
||||
p1_r = ldq(pix1 + 8);
|
||||
p2_l = ldq(pix2);
|
||||
p2_r = ldq(pix2 + 8);
|
||||
pix1 += line_size;
|
||||
pix2 += line_size;
|
||||
|
||||
result += perr(p1_l, p2_l)
|
||||
+ perr(p1_r, p2_r);
|
||||
} while (--h);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
||||
{
|
||||
int result = 0;
|
||||
uint64_t disalign = (size_t) pix2 & 0x7;
|
||||
|
||||
switch (disalign) {
|
||||
case 0:
|
||||
do {
|
||||
uint64_t p1_l, p1_r, p2_l, p2_r;
|
||||
uint64_t l, r;
|
||||
|
||||
p1_l = ldq(pix1);
|
||||
p1_r = ldq(pix1 + 8);
|
||||
l = ldq(pix2);
|
||||
r = ldq(pix2 + 8);
|
||||
p2_l = avg2(l, (l >> 8) | ((uint64_t) r << 56));
|
||||
p2_r = avg2(r, (r >> 8) | ((uint64_t) pix2[16] << 56));
|
||||
pix1 += line_size;
|
||||
pix2 += line_size;
|
||||
|
||||
result += perr(p1_l, p2_l)
|
||||
+ perr(p1_r, p2_r);
|
||||
} while (--h);
|
||||
break;
|
||||
case 7:
|
||||
/* |.......l|lllllllr|rrrrrrr*|
|
||||
This case is special because disalign1 would be 8, which
|
||||
gets treated as 0 by extqh. At least it is a bit faster
|
||||
that way :) */
|
||||
do {
|
||||
uint64_t p1_l, p1_r, p2_l, p2_r;
|
||||
uint64_t l, m, r;
|
||||
|
||||
p1_l = ldq(pix1);
|
||||
p1_r = ldq(pix1 + 8);
|
||||
l = ldq_u(pix2);
|
||||
m = ldq_u(pix2 + 8);
|
||||
r = ldq_u(pix2 + 16);
|
||||
p2_l = avg2(extql(l, disalign) | extqh(m, disalign), m);
|
||||
p2_r = avg2(extql(m, disalign) | extqh(r, disalign), r);
|
||||
pix1 += line_size;
|
||||
pix2 += line_size;
|
||||
|
||||
result += perr(p1_l, p2_l)
|
||||
+ perr(p1_r, p2_r);
|
||||
} while (--h);
|
||||
break;
|
||||
default:
|
||||
do {
|
||||
uint64_t disalign1 = disalign + 1;
|
||||
uint64_t p1_l, p1_r, p2_l, p2_r;
|
||||
uint64_t l, m, r;
|
||||
|
||||
p1_l = ldq(pix1);
|
||||
p1_r = ldq(pix1 + 8);
|
||||
l = ldq_u(pix2);
|
||||
m = ldq_u(pix2 + 8);
|
||||
r = ldq_u(pix2 + 16);
|
||||
p2_l = avg2(extql(l, disalign) | extqh(m, disalign),
|
||||
extql(l, disalign1) | extqh(m, disalign1));
|
||||
p2_r = avg2(extql(m, disalign) | extqh(r, disalign),
|
||||
extql(m, disalign1) | extqh(r, disalign1));
|
||||
pix1 += line_size;
|
||||
pix2 += line_size;
|
||||
|
||||
result += perr(p1_l, p2_l)
|
||||
+ perr(p1_r, p2_r);
|
||||
} while (--h);
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
if ((size_t) pix2 & 0x7) {
|
||||
uint64_t t, p2_l, p2_r;
|
||||
t = ldq_u(pix2 + 8);
|
||||
p2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2);
|
||||
p2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2);
|
||||
|
||||
do {
|
||||
uint64_t p1_l, p1_r, np2_l, np2_r;
|
||||
uint64_t t;
|
||||
|
||||
p1_l = ldq(pix1);
|
||||
p1_r = ldq(pix1 + 8);
|
||||
pix2 += line_size;
|
||||
t = ldq_u(pix2 + 8);
|
||||
np2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2);
|
||||
np2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2);
|
||||
|
||||
result += perr(p1_l, avg2(p2_l, np2_l))
|
||||
+ perr(p1_r, avg2(p2_r, np2_r));
|
||||
|
||||
pix1 += line_size;
|
||||
p2_l = np2_l;
|
||||
p2_r = np2_r;
|
||||
|
||||
} while (--h);
|
||||
} else {
|
||||
uint64_t p2_l, p2_r;
|
||||
p2_l = ldq(pix2);
|
||||
p2_r = ldq(pix2 + 8);
|
||||
do {
|
||||
uint64_t p1_l, p1_r, np2_l, np2_r;
|
||||
|
||||
p1_l = ldq(pix1);
|
||||
p1_r = ldq(pix1 + 8);
|
||||
pix2 += line_size;
|
||||
np2_l = ldq(pix2);
|
||||
np2_r = ldq(pix2 + 8);
|
||||
|
||||
result += perr(p1_l, avg2(p2_l, np2_l))
|
||||
+ perr(p1_r, avg2(p2_r, np2_r));
|
||||
|
||||
pix1 += line_size;
|
||||
p2_l = np2_l;
|
||||
p2_r = np2_r;
|
||||
} while (--h);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
uint64_t p1_l, p1_r;
|
||||
uint64_t p2_l, p2_r, p2_x;
|
||||
|
||||
p1_l = ldq(pix1);
|
||||
p1_r = ldq(pix1 + 8);
|
||||
|
||||
if ((size_t) pix2 & 0x7) { /* could be optimized a lot */
|
||||
p2_l = uldq(pix2);
|
||||
p2_r = uldq(pix2 + 8);
|
||||
p2_x = (uint64_t) pix2[16] << 56;
|
||||
} else {
|
||||
p2_l = ldq(pix2);
|
||||
p2_r = ldq(pix2 + 8);
|
||||
p2_x = ldq(pix2 + 16) << 56;
|
||||
}
|
||||
|
||||
do {
|
||||
uint64_t np1_l, np1_r;
|
||||
uint64_t np2_l, np2_r, np2_x;
|
||||
|
||||
pix1 += line_size;
|
||||
pix2 += line_size;
|
||||
|
||||
np1_l = ldq(pix1);
|
||||
np1_r = ldq(pix1 + 8);
|
||||
|
||||
if ((size_t) pix2 & 0x7) { /* could be optimized a lot */
|
||||
np2_l = uldq(pix2);
|
||||
np2_r = uldq(pix2 + 8);
|
||||
np2_x = (uint64_t) pix2[16] << 56;
|
||||
} else {
|
||||
np2_l = ldq(pix2);
|
||||
np2_r = ldq(pix2 + 8);
|
||||
np2_x = ldq(pix2 + 16) << 56;
|
||||
}
|
||||
|
||||
result += perr(p1_l,
|
||||
avg4( p2_l, ( p2_l >> 8) | ((uint64_t) p2_r << 56),
|
||||
np2_l, (np2_l >> 8) | ((uint64_t) np2_r << 56)))
|
||||
+ perr(p1_r,
|
||||
avg4( p2_r, ( p2_r >> 8) | ((uint64_t) p2_x),
|
||||
np2_r, (np2_r >> 8) | ((uint64_t) np2_x)));
|
||||
|
||||
p1_l = np1_l;
|
||||
p1_r = np1_r;
|
||||
p2_l = np2_l;
|
||||
p2_r = np2_r;
|
||||
p2_x = np2_x;
|
||||
} while (--h);
|
||||
|
||||
return result;
|
||||
}
|
@ -1,179 +0,0 @@
|
||||
/*
|
||||
* Alpha optimized DSP utils
|
||||
* Copyright (c) 2002 Falk Hueffner <falk@debian.org>
|
||||
*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "regdef.h"
|
||||
|
||||
/* Some nicer register names. */
|
||||
#define ta t10
|
||||
#define tb t11
|
||||
#define tc t12
|
||||
#define td AT
|
||||
/* Danger: these overlap with the argument list and the return value */
|
||||
#define te a5
|
||||
#define tf a4
|
||||
#define tg a3
|
||||
#define th v0
|
||||
|
||||
.set noat
|
||||
.set noreorder
|
||||
.arch pca56
|
||||
.text
|
||||
|
||||
/*****************************************************************************
|
||||
* int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size)
|
||||
*
|
||||
* This code is written with a pca56 in mind. For ev6, one should
|
||||
* really take the increased latency of 3 cycles for MVI instructions
|
||||
* into account.
|
||||
*
|
||||
* It is important to keep the loading and first use of a register as
|
||||
* far apart as possible, because if a register is accessed before it
|
||||
* has been fetched from memory, the CPU will stall.
|
||||
*/
|
||||
.align 4
|
||||
.globl pix_abs16x16_mvi_asm
|
||||
.ent pix_abs16x16_mvi_asm
|
||||
pix_abs16x16_mvi_asm:
|
||||
.frame sp, 0, ra, 0
|
||||
.prologue 0
|
||||
|
||||
and a2, 7, t0
|
||||
clr v0
|
||||
beq t0, $aligned
|
||||
.align 4
|
||||
$unaligned:
|
||||
/* Registers:
|
||||
line 0:
|
||||
t0: left_u -> left lo -> left
|
||||
t1: mid
|
||||
t2: right_u -> right hi -> right
|
||||
t3: ref left
|
||||
t4: ref right
|
||||
line 1:
|
||||
t5: left_u -> left lo -> left
|
||||
t6: mid
|
||||
t7: right_u -> right hi -> right
|
||||
t8: ref left
|
||||
t9: ref right
|
||||
temp:
|
||||
ta: left hi
|
||||
tb: right lo
|
||||
tc: error left
|
||||
td: error right */
|
||||
|
||||
/* load line 0 */
|
||||
ldq_u t0, 0(a2) # left_u
|
||||
ldq_u t1, 8(a2) # mid
|
||||
ldq_u t2, 16(a2) # right_u
|
||||
ldq t3, 0(a1) # ref left
|
||||
ldq t4, 8(a1) # ref right
|
||||
addq a1, a3, a1 # pix1
|
||||
addq a2, a3, a2 # pix2
|
||||
/* load line 1 */
|
||||
ldq_u t5, 0(a2) # left_u
|
||||
ldq_u t6, 8(a2) # mid
|
||||
ldq_u t7, 16(a2) # right_u
|
||||
ldq t8, 0(a1) # ref left
|
||||
ldq t9, 8(a1) # ref right
|
||||
addq a1, a3, a1 # pix1
|
||||
addq a2, a3, a2 # pix2
|
||||
/* calc line 0 */
|
||||
extql t0, a2, t0 # left lo
|
||||
extqh t1, a2, ta # left hi
|
||||
extql t1, a2, tb # right lo
|
||||
or t0, ta, t0 # left
|
||||
extqh t2, a2, t2 # right hi
|
||||
perr t3, t0, tc # error left
|
||||
or t2, tb, t2 # right
|
||||
perr t4, t2, td # error right
|
||||
addq v0, tc, v0 # add error left
|
||||
addq v0, td, v0 # add error left
|
||||
/* calc line 1 */
|
||||
extql t5, a2, t5 # left lo
|
||||
extqh t6, a2, ta # left hi
|
||||
extql t6, a2, tb # right lo
|
||||
or t5, ta, t5 # left
|
||||
extqh t7, a2, t7 # right hi
|
||||
perr t8, t5, tc # error left
|
||||
or t7, tb, t7 # right
|
||||
perr t9, t7, td # error right
|
||||
addq v0, tc, v0 # add error left
|
||||
addq v0, td, v0 # add error left
|
||||
/* loop */
|
||||
subq a4, 2, a4 # h -= 2
|
||||
bne a4, $unaligned
|
||||
ret
|
||||
|
||||
.align 4
|
||||
$aligned:
|
||||
/* load line 0 */
|
||||
ldq t0, 0(a2) # left
|
||||
ldq t1, 8(a2) # right
|
||||
addq a2, a3, a2 # pix2
|
||||
ldq t2, 0(a1) # ref left
|
||||
ldq t3, 8(a1) # ref right
|
||||
addq a1, a3, a1 # pix1
|
||||
/* load line 1 */
|
||||
ldq t4, 0(a2) # left
|
||||
ldq t5, 8(a2) # right
|
||||
addq a2, a3, a2 # pix2
|
||||
ldq t6, 0(a1) # ref left
|
||||
ldq t7, 8(a1) # ref right
|
||||
addq a1, a3, a1 # pix1
|
||||
/* load line 2 */
|
||||
ldq t8, 0(a2) # left
|
||||
ldq t9, 8(a2) # right
|
||||
addq a2, a3, a2 # pix2
|
||||
ldq ta, 0(a1) # ref left
|
||||
ldq tb, 8(a1) # ref right
|
||||
addq a1, a3, a1 # pix1
|
||||
/* load line 3 */
|
||||
ldq tc, 0(a2) # left
|
||||
ldq td, 8(a2) # right
|
||||
addq a2, a3, a2 # pix2
|
||||
ldq te, 0(a1) # ref left
|
||||
ldq a0, 8(a1) # ref right
|
||||
/* calc line 0 */
|
||||
perr t0, t2, t0 # error left
|
||||
addq a1, a3, a1 # pix1
|
||||
perr t1, t3, t1 # error right
|
||||
addq v0, t0, v0 # add error left
|
||||
/* calc line 1 */
|
||||
perr t4, t6, t0 # error left
|
||||
addq v0, t1, v0 # add error right
|
||||
perr t5, t7, t1 # error right
|
||||
addq v0, t0, v0 # add error left
|
||||
/* calc line 2 */
|
||||
perr t8, ta, t0 # error left
|
||||
addq v0, t1, v0 # add error right
|
||||
perr t9, tb, t1 # error right
|
||||
addq v0, t0, v0 # add error left
|
||||
/* calc line 3 */
|
||||
perr tc, te, t0 # error left
|
||||
addq v0, t1, v0 # add error right
|
||||
perr td, a0, t1 # error right
|
||||
addq v0, t0, v0 # add error left
|
||||
addq v0, t1, v0 # add error right
|
||||
/* loop */
|
||||
subq a4, 4, a4 # h -= 4
|
||||
bne a4, $aligned
|
||||
ret
|
||||
.end pix_abs16x16_mvi_asm
|
@ -1,110 +0,0 @@
|
||||
/*
|
||||
* Alpha optimized DSP utils
|
||||
* Copyright (c) 2002 Falk Hueffner <falk@debian.org>
|
||||
*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavcodec/mpegvideo.h"
|
||||
#include "asm.h"
|
||||
|
||||
static void dct_unquantize_h263_axp(int16_t *block, int n_coeffs,
|
||||
uint64_t qscale, uint64_t qadd)
|
||||
{
|
||||
uint64_t qmul = qscale << 1;
|
||||
uint64_t correction = WORD_VEC(qmul * 255 >> 8);
|
||||
int i;
|
||||
|
||||
qadd = WORD_VEC(qadd);
|
||||
|
||||
for(i = 0; i <= n_coeffs; block += 4, i += 4) {
|
||||
uint64_t levels, negmask, zeros, add, sub;
|
||||
|
||||
levels = ldq(block);
|
||||
if (levels == 0)
|
||||
continue;
|
||||
|
||||
#ifdef __alpha_max__
|
||||
/* I don't think the speed difference justifies runtime
|
||||
detection. */
|
||||
negmask = maxsw4(levels, -1); /* negative -> ffff (-1) */
|
||||
negmask = minsw4(negmask, 0); /* positive -> 0000 (0) */
|
||||
#else
|
||||
negmask = cmpbge(WORD_VEC(0x7fff), levels);
|
||||
negmask &= (negmask >> 1) | (1 << 7);
|
||||
negmask = zap(-1, negmask);
|
||||
#endif
|
||||
|
||||
zeros = cmpbge(0, levels);
|
||||
zeros &= zeros >> 1;
|
||||
/* zeros |= zeros << 1 is not needed since qadd <= 255, so
|
||||
zapping the lower byte suffices. */
|
||||
|
||||
levels *= qmul;
|
||||
levels -= correction & (negmask << 16);
|
||||
|
||||
add = qadd & ~negmask;
|
||||
sub = qadd & negmask;
|
||||
/* Set qadd to 0 for levels == 0. */
|
||||
add = zap(add, zeros);
|
||||
levels += add;
|
||||
levels -= sub;
|
||||
|
||||
stq(levels, block);
|
||||
}
|
||||
}
|
||||
|
||||
static void dct_unquantize_h263_intra_axp(MpegEncContext *s, int16_t *block,
|
||||
int n, int qscale)
|
||||
{
|
||||
int n_coeffs;
|
||||
uint64_t qadd;
|
||||
int16_t block0 = block[0];
|
||||
|
||||
if (!s->h263_aic) {
|
||||
if (n < 4)
|
||||
block0 *= s->y_dc_scale;
|
||||
else
|
||||
block0 *= s->c_dc_scale;
|
||||
qadd = (qscale - 1) | 1;
|
||||
} else {
|
||||
qadd = 0;
|
||||
}
|
||||
|
||||
if(s->ac_pred)
|
||||
n_coeffs = 63;
|
||||
else
|
||||
n_coeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
|
||||
|
||||
dct_unquantize_h263_axp(block, n_coeffs, qscale, qadd);
|
||||
|
||||
block[0] = block0;
|
||||
}
|
||||
|
||||
static void dct_unquantize_h263_inter_axp(MpegEncContext *s, int16_t *block,
|
||||
int n, int qscale)
|
||||
{
|
||||
int n_coeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
|
||||
dct_unquantize_h263_axp(block, n_coeffs, qscale, (qscale - 1) | 1);
|
||||
}
|
||||
|
||||
av_cold void ff_MPV_common_init_axp(MpegEncContext *s)
|
||||
{
|
||||
s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_axp;
|
||||
s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_axp;
|
||||
}
|
@ -1,77 +0,0 @@
|
||||
/*
|
||||
* Alpha optimized DSP utils
|
||||
* copyright (c) 2002 Falk Hueffner <falk@debian.org>
|
||||
*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/* Some BSDs don't seem to have regdef.h... sigh */
|
||||
#ifndef AVCODEC_ALPHA_REGDEF_H
|
||||
#define AVCODEC_ALPHA_REGDEF_H
|
||||
|
||||
#define v0 $0 /* function return value */
|
||||
|
||||
#define t0 $1 /* temporary registers (caller-saved) */
|
||||
#define t1 $2
|
||||
#define t2 $3
|
||||
#define t3 $4
|
||||
#define t4 $5
|
||||
#define t5 $6
|
||||
#define t6 $7
|
||||
#define t7 $8
|
||||
|
||||
#define s0 $9 /* saved-registers (callee-saved registers) */
|
||||
#define s1 $10
|
||||
#define s2 $11
|
||||
#define s3 $12
|
||||
#define s4 $13
|
||||
#define s5 $14
|
||||
#define s6 $15
|
||||
#define fp s6 /* frame-pointer (s6 in frame-less procedures) */
|
||||
|
||||
#define a0 $16 /* argument registers (caller-saved) */
|
||||
#define a1 $17
|
||||
#define a2 $18
|
||||
#define a3 $19
|
||||
#define a4 $20
|
||||
#define a5 $21
|
||||
|
||||
#define t8 $22 /* more temps (caller-saved) */
|
||||
#define t9 $23
|
||||
#define t10 $24
|
||||
#define t11 $25
|
||||
#define ra $26 /* return address register */
|
||||
#define t12 $27
|
||||
|
||||
#define pv t12 /* procedure-variable register */
|
||||
#define AT $at /* assembler temporary */
|
||||
#define gp $29 /* global pointer */
|
||||
#define sp $30 /* stack pointer */
|
||||
#define zero $31 /* reads as zero, writes are noops */
|
||||
|
||||
/* Some nicer register names. */
|
||||
#define ta t10
|
||||
#define tb t11
|
||||
#define tc t12
|
||||
#define td AT
|
||||
/* Danger: these overlap with the argument list and the return value */
|
||||
#define te a5
|
||||
#define tf a4
|
||||
#define tg a3
|
||||
#define th v0
|
||||
|
||||
#endif /* AVCODEC_ALPHA_REGDEF_H */
|
@ -1,303 +0,0 @@
|
||||
/*
|
||||
* Simple IDCT (Alpha optimized)
|
||||
*
|
||||
* Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
|
||||
*
|
||||
* based upon some outcommented C code from mpeg2dec (idct_mmx.c
|
||||
* written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
|
||||
*
|
||||
* Alpha optimizations by Måns Rullgård <mans@mansr.com>
|
||||
* and Falk Hueffner <falk@debian.org>
|
||||
*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "dsputil_alpha.h"
|
||||
#include "asm.h"
|
||||
|
||||
// cos(i * M_PI / 16) * sqrt(2) * (1 << 14)
|
||||
// W4 is actually exactly 16384, but using 16383 works around
|
||||
// accumulating rounding errors for some encoders
|
||||
#define W1 22725
|
||||
#define W2 21407
|
||||
#define W3 19266
|
||||
#define W4 16383
|
||||
#define W5 12873
|
||||
#define W6 8867
|
||||
#define W7 4520
|
||||
#define ROW_SHIFT 11
|
||||
#define COL_SHIFT 20
|
||||
|
||||
/* 0: all entries 0, 1: only first entry nonzero, 2: otherwise */
|
||||
static inline int idct_row(int16_t *row)
|
||||
{
|
||||
int a0, a1, a2, a3, b0, b1, b2, b3, t;
|
||||
uint64_t l, r, t2;
|
||||
l = ldq(row);
|
||||
r = ldq(row + 4);
|
||||
|
||||
if (l == 0 && r == 0)
|
||||
return 0;
|
||||
|
||||
a0 = W4 * sextw(l) + (1 << (ROW_SHIFT - 1));
|
||||
|
||||
if (((l & ~0xffffUL) | r) == 0) {
|
||||
a0 >>= ROW_SHIFT;
|
||||
t2 = (uint16_t) a0;
|
||||
t2 |= t2 << 16;
|
||||
t2 |= t2 << 32;
|
||||
|
||||
stq(t2, row);
|
||||
stq(t2, row + 4);
|
||||
return 1;
|
||||
}
|
||||
|
||||
a1 = a0;
|
||||
a2 = a0;
|
||||
a3 = a0;
|
||||
|
||||
t = extwl(l, 4); /* row[2] */
|
||||
if (t != 0) {
|
||||
t = sextw(t);
|
||||
a0 += W2 * t;
|
||||
a1 += W6 * t;
|
||||
a2 -= W6 * t;
|
||||
a3 -= W2 * t;
|
||||
}
|
||||
|
||||
t = extwl(r, 0); /* row[4] */
|
||||
if (t != 0) {
|
||||
t = sextw(t);
|
||||
a0 += W4 * t;
|
||||
a1 -= W4 * t;
|
||||
a2 -= W4 * t;
|
||||
a3 += W4 * t;
|
||||
}
|
||||
|
||||
t = extwl(r, 4); /* row[6] */
|
||||
if (t != 0) {
|
||||
t = sextw(t);
|
||||
a0 += W6 * t;
|
||||
a1 -= W2 * t;
|
||||
a2 += W2 * t;
|
||||
a3 -= W6 * t;
|
||||
}
|
||||
|
||||
t = extwl(l, 2); /* row[1] */
|
||||
if (t != 0) {
|
||||
t = sextw(t);
|
||||
b0 = W1 * t;
|
||||
b1 = W3 * t;
|
||||
b2 = W5 * t;
|
||||
b3 = W7 * t;
|
||||
} else {
|
||||
b0 = 0;
|
||||
b1 = 0;
|
||||
b2 = 0;
|
||||
b3 = 0;
|
||||
}
|
||||
|
||||
t = extwl(l, 6); /* row[3] */
|
||||
if (t) {
|
||||
t = sextw(t);
|
||||
b0 += W3 * t;
|
||||
b1 -= W7 * t;
|
||||
b2 -= W1 * t;
|
||||
b3 -= W5 * t;
|
||||
}
|
||||
|
||||
|
||||
t = extwl(r, 2); /* row[5] */
|
||||
if (t) {
|
||||
t = sextw(t);
|
||||
b0 += W5 * t;
|
||||
b1 -= W1 * t;
|
||||
b2 += W7 * t;
|
||||
b3 += W3 * t;
|
||||
}
|
||||
|
||||
t = extwl(r, 6); /* row[7] */
|
||||
if (t) {
|
||||
t = sextw(t);
|
||||
b0 += W7 * t;
|
||||
b1 -= W5 * t;
|
||||
b2 += W3 * t;
|
||||
b3 -= W1 * t;
|
||||
}
|
||||
|
||||
row[0] = (a0 + b0) >> ROW_SHIFT;
|
||||
row[1] = (a1 + b1) >> ROW_SHIFT;
|
||||
row[2] = (a2 + b2) >> ROW_SHIFT;
|
||||
row[3] = (a3 + b3) >> ROW_SHIFT;
|
||||
row[4] = (a3 - b3) >> ROW_SHIFT;
|
||||
row[5] = (a2 - b2) >> ROW_SHIFT;
|
||||
row[6] = (a1 - b1) >> ROW_SHIFT;
|
||||
row[7] = (a0 - b0) >> ROW_SHIFT;
|
||||
|
||||
return 2;
|
||||
}
|
||||
|
||||
static inline void idct_col(int16_t *col)
|
||||
{
|
||||
int a0, a1, a2, a3, b0, b1, b2, b3;
|
||||
|
||||
col[0] += (1 << (COL_SHIFT - 1)) / W4;
|
||||
|
||||
a0 = W4 * col[8 * 0];
|
||||
a1 = W4 * col[8 * 0];
|
||||
a2 = W4 * col[8 * 0];
|
||||
a3 = W4 * col[8 * 0];
|
||||
|
||||
if (col[8 * 2]) {
|
||||
a0 += W2 * col[8 * 2];
|
||||
a1 += W6 * col[8 * 2];
|
||||
a2 -= W6 * col[8 * 2];
|
||||
a3 -= W2 * col[8 * 2];
|
||||
}
|
||||
|
||||
if (col[8 * 4]) {
|
||||
a0 += W4 * col[8 * 4];
|
||||
a1 -= W4 * col[8 * 4];
|
||||
a2 -= W4 * col[8 * 4];
|
||||
a3 += W4 * col[8 * 4];
|
||||
}
|
||||
|
||||
if (col[8 * 6]) {
|
||||
a0 += W6 * col[8 * 6];
|
||||
a1 -= W2 * col[8 * 6];
|
||||
a2 += W2 * col[8 * 6];
|
||||
a3 -= W6 * col[8 * 6];
|
||||
}
|
||||
|
||||
if (col[8 * 1]) {
|
||||
b0 = W1 * col[8 * 1];
|
||||
b1 = W3 * col[8 * 1];
|
||||
b2 = W5 * col[8 * 1];
|
||||
b3 = W7 * col[8 * 1];
|
||||
} else {
|
||||
b0 = 0;
|
||||
b1 = 0;
|
||||
b2 = 0;
|
||||
b3 = 0;
|
||||
}
|
||||
|
||||
if (col[8 * 3]) {
|
||||
b0 += W3 * col[8 * 3];
|
||||
b1 -= W7 * col[8 * 3];
|
||||
b2 -= W1 * col[8 * 3];
|
||||
b3 -= W5 * col[8 * 3];
|
||||
}
|
||||
|
||||
if (col[8 * 5]) {
|
||||
b0 += W5 * col[8 * 5];
|
||||
b1 -= W1 * col[8 * 5];
|
||||
b2 += W7 * col[8 * 5];
|
||||
b3 += W3 * col[8 * 5];
|
||||
}
|
||||
|
||||
if (col[8 * 7]) {
|
||||
b0 += W7 * col[8 * 7];
|
||||
b1 -= W5 * col[8 * 7];
|
||||
b2 += W3 * col[8 * 7];
|
||||
b3 -= W1 * col[8 * 7];
|
||||
}
|
||||
|
||||
col[8 * 0] = (a0 + b0) >> COL_SHIFT;
|
||||
col[8 * 7] = (a0 - b0) >> COL_SHIFT;
|
||||
col[8 * 1] = (a1 + b1) >> COL_SHIFT;
|
||||
col[8 * 6] = (a1 - b1) >> COL_SHIFT;
|
||||
col[8 * 2] = (a2 + b2) >> COL_SHIFT;
|
||||
col[8 * 5] = (a2 - b2) >> COL_SHIFT;
|
||||
col[8 * 3] = (a3 + b3) >> COL_SHIFT;
|
||||
col[8 * 4] = (a3 - b3) >> COL_SHIFT;
|
||||
}
|
||||
|
||||
/* If all rows but the first one are zero after row transformation,
|
||||
all rows will be identical after column transformation. */
|
||||
static inline void idct_col2(int16_t *col)
|
||||
{
|
||||
int i;
|
||||
uint64_t l, r;
|
||||
|
||||
for (i = 0; i < 8; ++i) {
|
||||
int a0 = col[i] + (1 << (COL_SHIFT - 1)) / W4;
|
||||
|
||||
a0 *= W4;
|
||||
col[i] = a0 >> COL_SHIFT;
|
||||
}
|
||||
|
||||
l = ldq(col + 0 * 4); r = ldq(col + 1 * 4);
|
||||
stq(l, col + 2 * 4); stq(r, col + 3 * 4);
|
||||
stq(l, col + 4 * 4); stq(r, col + 5 * 4);
|
||||
stq(l, col + 6 * 4); stq(r, col + 7 * 4);
|
||||
stq(l, col + 8 * 4); stq(r, col + 9 * 4);
|
||||
stq(l, col + 10 * 4); stq(r, col + 11 * 4);
|
||||
stq(l, col + 12 * 4); stq(r, col + 13 * 4);
|
||||
stq(l, col + 14 * 4); stq(r, col + 15 * 4);
|
||||
}
|
||||
|
||||
void ff_simple_idct_axp(int16_t *block)
|
||||
{
|
||||
|
||||
int i;
|
||||
int rowsZero = 1; /* all rows except row 0 zero */
|
||||
int rowsConstant = 1; /* all rows consist of a constant value */
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
int sparseness = idct_row(block + 8 * i);
|
||||
|
||||
if (i > 0 && sparseness > 0)
|
||||
rowsZero = 0;
|
||||
if (sparseness == 2)
|
||||
rowsConstant = 0;
|
||||
}
|
||||
|
||||
if (rowsZero) {
|
||||
idct_col2(block);
|
||||
} else if (rowsConstant) {
|
||||
idct_col(block);
|
||||
for (i = 0; i < 8; i += 2) {
|
||||
uint64_t v = (uint16_t) block[0];
|
||||
uint64_t w = (uint16_t) block[8];
|
||||
|
||||
v |= v << 16;
|
||||
w |= w << 16;
|
||||
v |= v << 32;
|
||||
w |= w << 32;
|
||||
stq(v, block + 0 * 4);
|
||||
stq(v, block + 1 * 4);
|
||||
stq(w, block + 2 * 4);
|
||||
stq(w, block + 3 * 4);
|
||||
block += 4 * 4;
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < 8; i++)
|
||||
idct_col(block + i);
|
||||
}
|
||||
}
|
||||
|
||||
void ff_simple_idct_put_axp(uint8_t *dest, int line_size, int16_t *block)
|
||||
{
|
||||
ff_simple_idct_axp(block);
|
||||
put_pixels_clamped_axp_p(block, dest, line_size);
|
||||
}
|
||||
|
||||
void ff_simple_idct_add_axp(uint8_t *dest, int line_size, int16_t *block)
|
||||
{
|
||||
ff_simple_idct_axp(block);
|
||||
add_pixels_clamped_axp_p(block, dest, line_size);
|
||||
}
|
@ -2442,7 +2442,9 @@ typedef struct AVCodecContext {
|
||||
#define FF_IDCT_SIMPLEVIS 18
|
||||
#define FF_IDCT_FAAN 20
|
||||
#define FF_IDCT_SIMPLENEON 22
|
||||
#if FF_API_ARCH_ALPHA
|
||||
#define FF_IDCT_SIMPLEALPHA 23
|
||||
#endif
|
||||
|
||||
/**
|
||||
* bits per sample/pixel from the demuxer (needed for huffyuv).
|
||||
|
@ -61,8 +61,6 @@ void ff_simple_idct_armv5te(int16_t *data);
|
||||
void ff_simple_idct_armv6(int16_t *data);
|
||||
void ff_simple_idct_neon(int16_t *data);
|
||||
|
||||
void ff_simple_idct_axp(int16_t *data);
|
||||
|
||||
struct algo {
|
||||
const char *name;
|
||||
void (*func)(int16_t *block);
|
||||
@ -136,10 +134,6 @@ static const struct algo idct_tab[] = {
|
||||
{ "SIMPLE-NEON", ff_simple_idct_neon, PARTTRANS_PERM, AV_CPU_FLAG_NEON },
|
||||
#endif
|
||||
|
||||
#if ARCH_ALPHA
|
||||
{ "SIMPLE-ALPHA", ff_simple_idct_axp, NO_PERM },
|
||||
#endif
|
||||
|
||||
{ 0 }
|
||||
};
|
||||
|
||||
|
@ -2666,8 +2666,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
||||
}
|
||||
|
||||
|
||||
if (ARCH_ALPHA)
|
||||
ff_dsputil_init_alpha(c, avctx);
|
||||
if (ARCH_ARM)
|
||||
ff_dsputil_init_arm(c, avctx);
|
||||
if (ARCH_BFIN)
|
||||
|
@ -311,7 +311,6 @@ int ff_check_alignment(void);
|
||||
|
||||
void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
|
||||
|
||||
void ff_dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
|
||||
void ff_dsputil_init_arm(DSPContext* c, AVCodecContext *avctx);
|
||||
void ff_dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
|
||||
void ff_dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
|
||||
|
@ -54,8 +54,6 @@ av_cold void ff_hpeldsp_init(HpelDSPContext *c, int flags)
|
||||
hpel_funcs(avg, [3], 2);
|
||||
hpel_funcs(avg_no_rnd,, 16);
|
||||
|
||||
if (ARCH_ALPHA)
|
||||
ff_hpeldsp_init_alpha(c, flags);
|
||||
if (ARCH_ARM)
|
||||
ff_hpeldsp_init_arm(c, flags);
|
||||
if (ARCH_BFIN)
|
||||
|
@ -94,7 +94,6 @@ typedef struct HpelDSPContext {
|
||||
|
||||
void ff_hpeldsp_init(HpelDSPContext *c, int flags);
|
||||
|
||||
void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags);
|
||||
void ff_hpeldsp_init_arm(HpelDSPContext *c, int flags);
|
||||
void ff_hpeldsp_init_bfin(HpelDSPContext *c, int flags);
|
||||
void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags);
|
||||
|
@ -165,8 +165,6 @@ av_cold int ff_dct_common_init(MpegEncContext *s)
|
||||
s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
|
||||
s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
|
||||
|
||||
if (ARCH_ALPHA)
|
||||
ff_MPV_common_init_axp(s);
|
||||
if (ARCH_ARM)
|
||||
ff_MPV_common_init_arm(s);
|
||||
if (ARCH_BFIN)
|
||||
|
@ -785,7 +785,6 @@ int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
|
||||
const AVFrame *frame, int *got_packet);
|
||||
void ff_MPV_encode_init_x86(MpegEncContext *s);
|
||||
void ff_MPV_common_init_x86(MpegEncContext *s);
|
||||
void ff_MPV_common_init_axp(MpegEncContext *s);
|
||||
void ff_MPV_common_init_arm(MpegEncContext *s);
|
||||
void ff_MPV_common_init_bfin(MpegEncContext *s);
|
||||
void ff_MPV_common_init_ppc(MpegEncContext *s);
|
||||
|
@ -242,10 +242,7 @@ int ff_msmpeg4_pred_dc(MpegEncContext *s, int n,
|
||||
: "%eax", "%edx"
|
||||
);
|
||||
#else
|
||||
/* #elif ARCH_ALPHA */
|
||||
/* Divisions are extremely costly on Alpha; optimize the most
|
||||
common case. But they are costly everywhere...
|
||||
*/
|
||||
/* Divisions are costly everywhere; optimize the most common case. */
|
||||
if (scale == 8) {
|
||||
a = (a + (8 >> 1)) / 8;
|
||||
b = (b + (8 >> 1)) / 8;
|
||||
|
@ -193,7 +193,9 @@ static const AVOption avcodec_options[] = {
|
||||
{"simplearmv5te", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARMV5TE }, INT_MIN, INT_MAX, V|E|D, "idct"},
|
||||
{"simplearmv6", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARMV6 }, INT_MIN, INT_MAX, V|E|D, "idct"},
|
||||
{"simpleneon", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLENEON }, INT_MIN, INT_MAX, V|E|D, "idct"},
|
||||
#if FF_API_ARCH_ALPHA
|
||||
{"simplealpha", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEALPHA }, INT_MIN, INT_MAX, V|E|D, "idct"},
|
||||
#endif
|
||||
{"ipp", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_IPP }, INT_MIN, INT_MAX, V|E|D, "idct"},
|
||||
{"xvidmmx", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_XVIDMMX }, INT_MIN, INT_MAX, V|E|D, "idct"},
|
||||
{"faani", "floating point AAN IDCT", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_FAAN }, INT_MIN, INT_MAX, V|D|E, "idct"},
|
||||
|
@ -94,5 +94,8 @@
|
||||
#ifndef FF_API_CODEC_PKT
|
||||
#define FF_API_CODEC_PKT (LIBAVCODEC_VERSION_MAJOR < 56)
|
||||
#endif
|
||||
#ifndef FF_API_ARCH_ALPHA
|
||||
#define FF_API_ARCH_ALPHA (LIBAVCODEC_VERSION_MAJOR < 56)
|
||||
#endif
|
||||
|
||||
#endif /* AVCODEC_VERSION_H */
|
||||
|
Loading…
Reference in New Issue
Block a user