1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-13 21:28:01 +02:00
FFmpeg/libavutil/intreadwrite.h
Alexander Strange f6d0390657 Add macros for 64- and 128-bit write-combining optimization to intreadwrite.h.
Add x86 implementation using MMX/SSE.

Originally committed as revision 21281 to svn://svn.ffmpeg.org/ffmpeg/trunk
2010-01-18 10:24:33 +00:00

444 lines
13 KiB
C

/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVUTIL_INTREADWRITE_H
#define AVUTIL_INTREADWRITE_H
#include <stdint.h>
#include "config.h"
#include "bswap.h"
/*
* Arch-specific headers can provide any combination of
* AV_[RW][BLN](16|24|32|64) and AV_(COPY|SWAP|ZERO)(64|128) macros.
* Preprocessor symbols must be defined, even if these are implemented
* as inline functions.
*/
#if ARCH_ARM
# include "arm/intreadwrite.h"
#elif ARCH_AVR32
# include "avr32/intreadwrite.h"
#elif ARCH_MIPS
# include "mips/intreadwrite.h"
#elif ARCH_PPC
# include "ppc/intreadwrite.h"
#elif ARCH_X86
# include "x86/intreadwrite.h"
#endif
/*
* Map AV_RNXX <-> AV_R[BL]XX for all variants provided by per-arch headers.
*/
#if HAVE_BIGENDIAN
# if defined(AV_RN16) && !defined(AV_RB16)
# define AV_RB16(p) AV_RN16(p)
# elif !defined(AV_RN16) && defined(AV_RB16)
# define AV_RN16(p) AV_RB16(p)
# endif
# if defined(AV_WN16) && !defined(AV_WB16)
# define AV_WB16(p, v) AV_WN16(p, v)
# elif !defined(AV_WN16) && defined(AV_WB16)
# define AV_WN16(p, v) AV_WB16(p, v)
# endif
# if defined(AV_RN24) && !defined(AV_RB24)
# define AV_RB24(p) AV_RN24(p)
# elif !defined(AV_RN24) && defined(AV_RB24)
# define AV_RN24(p) AV_RB24(p)
# endif
# if defined(AV_WN24) && !defined(AV_WB24)
# define AV_WB24(p, v) AV_WN24(p, v)
# elif !defined(AV_WN24) && defined(AV_WB24)
# define AV_WN24(p, v) AV_WB24(p, v)
# endif
# if defined(AV_RN32) && !defined(AV_RB32)
# define AV_RB32(p) AV_RN32(p)
# elif !defined(AV_RN32) && defined(AV_RB32)
# define AV_RN32(p) AV_RB32(p)
# endif
# if defined(AV_WN32) && !defined(AV_WB32)
# define AV_WB32(p, v) AV_WN32(p, v)
# elif !defined(AV_WN32) && defined(AV_WB32)
# define AV_WN32(p, v) AV_WB32(p, v)
# endif
# if defined(AV_RN64) && !defined(AV_RB64)
# define AV_RB64(p) AV_RN64(p)
# elif !defined(AV_RN64) && defined(AV_RB64)
# define AV_RN64(p) AV_RB64(p)
# endif
# if defined(AV_WN64) && !defined(AV_WB64)
# define AV_WB64(p, v) AV_WN64(p, v)
# elif !defined(AV_WN64) && defined(AV_WB64)
# define AV_WN64(p, v) AV_WB64(p, v)
# endif
#else /* HAVE_BIGENDIAN */
# if defined(AV_RN16) && !defined(AV_RL16)
# define AV_RL16(p) AV_RN16(p)
# elif !defined(AV_RN16) && defined(AV_RL16)
# define AV_RN16(p) AV_RL16(p)
# endif
# if defined(AV_WN16) && !defined(AV_WL16)
# define AV_WL16(p, v) AV_WN16(p, v)
# elif !defined(AV_WN16) && defined(AV_WL16)
# define AV_WN16(p, v) AV_WL16(p, v)
# endif
# if defined(AV_RN24) && !defined(AV_RL24)
# define AV_RL24(p) AV_RN24(p)
# elif !defined(AV_RN24) && defined(AV_RL24)
# define AV_RN24(p) AV_RL24(p)
# endif
# if defined(AV_WN24) && !defined(AV_WL24)
# define AV_WL24(p, v) AV_WN24(p, v)
# elif !defined(AV_WN24) && defined(AV_WL24)
# define AV_WN24(p, v) AV_WL24(p, v)
# endif
# if defined(AV_RN32) && !defined(AV_RL32)
# define AV_RL32(p) AV_RN32(p)
# elif !defined(AV_RN32) && defined(AV_RL32)
# define AV_RN32(p) AV_RL32(p)
# endif
# if defined(AV_WN32) && !defined(AV_WL32)
# define AV_WL32(p, v) AV_WN32(p, v)
# elif !defined(AV_WN32) && defined(AV_WL32)
# define AV_WN32(p, v) AV_WL32(p, v)
# endif
# if defined(AV_RN64) && !defined(AV_RL64)
# define AV_RL64(p) AV_RN64(p)
# elif !defined(AV_RN64) && defined(AV_RL64)
# define AV_RN64(p) AV_RL64(p)
# endif
# if defined(AV_WN64) && !defined(AV_WL64)
# define AV_WL64(p, v) AV_WN64(p, v)
# elif !defined(AV_WN64) && defined(AV_WL64)
# define AV_WN64(p, v) AV_WL64(p, v)
# endif
#endif /* !HAVE_BIGENDIAN */
/*
* Define AV_[RW]N helper macros to simplify definitions not provided
* by per-arch headers.
*/
#if HAVE_ATTRIBUTE_PACKED
struct unaligned_64 { uint64_t l; } __attribute__((packed));
struct unaligned_32 { uint32_t l; } __attribute__((packed));
struct unaligned_16 { uint16_t l; } __attribute__((packed));
# define AV_RN(s, p) (((const struct unaligned_##s *) (p))->l)
# define AV_WN(s, p, v) ((((struct unaligned_##s *) (p))->l) = (v))
#elif defined(__DECC)
# define AV_RN(s, p) (*((const __unaligned uint##s##_t*)(p)))
# define AV_WN(s, p, v) (*((__unaligned uint##s##_t*)(p)) = (v))
#elif HAVE_FAST_UNALIGNED
# define AV_RN(s, p) (*((const uint##s##_t*)(p)))
# define AV_WN(s, p, v) (*((uint##s##_t*)(p)) = (v))
#else
#ifndef AV_RB16
# define AV_RB16(x) \
((((const uint8_t*)(x))[0] << 8) | \
((const uint8_t*)(x))[1])
#endif
#ifndef AV_WB16
# define AV_WB16(p, d) do { \
((uint8_t*)(p))[1] = (d); \
((uint8_t*)(p))[0] = (d)>>8; \
} while(0)
#endif
#ifndef AV_RL16
# define AV_RL16(x) \
((((const uint8_t*)(x))[1] << 8) | \
((const uint8_t*)(x))[0])
#endif
#ifndef AV_WL16
# define AV_WL16(p, d) do { \
((uint8_t*)(p))[0] = (d); \
((uint8_t*)(p))[1] = (d)>>8; \
} while(0)
#endif
#ifndef AV_RB32
# define AV_RB32(x) \
((((const uint8_t*)(x))[0] << 24) | \
(((const uint8_t*)(x))[1] << 16) | \
(((const uint8_t*)(x))[2] << 8) | \
((const uint8_t*)(x))[3])
#endif
#ifndef AV_WB32
# define AV_WB32(p, d) do { \
((uint8_t*)(p))[3] = (d); \
((uint8_t*)(p))[2] = (d)>>8; \
((uint8_t*)(p))[1] = (d)>>16; \
((uint8_t*)(p))[0] = (d)>>24; \
} while(0)
#endif
#ifndef AV_RL32
# define AV_RL32(x) \
((((const uint8_t*)(x))[3] << 24) | \
(((const uint8_t*)(x))[2] << 16) | \
(((const uint8_t*)(x))[1] << 8) | \
((const uint8_t*)(x))[0])
#endif
#ifndef AV_WL32
# define AV_WL32(p, d) do { \
((uint8_t*)(p))[0] = (d); \
((uint8_t*)(p))[1] = (d)>>8; \
((uint8_t*)(p))[2] = (d)>>16; \
((uint8_t*)(p))[3] = (d)>>24; \
} while(0)
#endif
#ifndef AV_RB64
# define AV_RB64(x) \
(((uint64_t)((const uint8_t*)(x))[0] << 56) | \
((uint64_t)((const uint8_t*)(x))[1] << 48) | \
((uint64_t)((const uint8_t*)(x))[2] << 40) | \
((uint64_t)((const uint8_t*)(x))[3] << 32) | \
((uint64_t)((const uint8_t*)(x))[4] << 24) | \
((uint64_t)((const uint8_t*)(x))[5] << 16) | \
((uint64_t)((const uint8_t*)(x))[6] << 8) | \
(uint64_t)((const uint8_t*)(x))[7])
#endif
#ifndef AV_WB64
# define AV_WB64(p, d) do { \
((uint8_t*)(p))[7] = (d); \
((uint8_t*)(p))[6] = (d)>>8; \
((uint8_t*)(p))[5] = (d)>>16; \
((uint8_t*)(p))[4] = (d)>>24; \
((uint8_t*)(p))[3] = (d)>>32; \
((uint8_t*)(p))[2] = (d)>>40; \
((uint8_t*)(p))[1] = (d)>>48; \
((uint8_t*)(p))[0] = (d)>>56; \
} while(0)
#endif
#ifndef AV_RL64
# define AV_RL64(x) \
(((uint64_t)((const uint8_t*)(x))[7] << 56) | \
((uint64_t)((const uint8_t*)(x))[6] << 48) | \
((uint64_t)((const uint8_t*)(x))[5] << 40) | \
((uint64_t)((const uint8_t*)(x))[4] << 32) | \
((uint64_t)((const uint8_t*)(x))[3] << 24) | \
((uint64_t)((const uint8_t*)(x))[2] << 16) | \
((uint64_t)((const uint8_t*)(x))[1] << 8) | \
(uint64_t)((const uint8_t*)(x))[0])
#endif
#ifndef AV_WL64
# define AV_WL64(p, d) do { \
((uint8_t*)(p))[0] = (d); \
((uint8_t*)(p))[1] = (d)>>8; \
((uint8_t*)(p))[2] = (d)>>16; \
((uint8_t*)(p))[3] = (d)>>24; \
((uint8_t*)(p))[4] = (d)>>32; \
((uint8_t*)(p))[5] = (d)>>40; \
((uint8_t*)(p))[6] = (d)>>48; \
((uint8_t*)(p))[7] = (d)>>56; \
} while(0)
#endif
#if HAVE_BIGENDIAN
# define AV_RN(s, p) AV_RB##s(p)
# define AV_WN(s, p, v) AV_WB##s(p, v)
#else
# define AV_RN(s, p) AV_RL##s(p)
# define AV_WN(s, p, v) AV_WL##s(p, v)
#endif
#endif /* HAVE_FAST_UNALIGNED */
#ifndef AV_RN16
# define AV_RN16(p) AV_RN(16, p)
#endif
#ifndef AV_RN32
# define AV_RN32(p) AV_RN(32, p)
#endif
#ifndef AV_RN64
# define AV_RN64(p) AV_RN(64, p)
#endif
#ifndef AV_WN16
# define AV_WN16(p, v) AV_WN(16, p, v)
#endif
#ifndef AV_WN32
# define AV_WN32(p, v) AV_WN(32, p, v)
#endif
#ifndef AV_WN64
# define AV_WN64(p, v) AV_WN(64, p, v)
#endif
#if HAVE_BIGENDIAN
# define AV_RB(s, p) AV_RN##s(p)
# define AV_WB(s, p, v) AV_WN##s(p, v)
# define AV_RL(s, p) bswap_##s(AV_RN##s(p))
# define AV_WL(s, p, v) AV_WN##s(p, bswap_##s(v))
#else
# define AV_RB(s, p) bswap_##s(AV_RN##s(p))
# define AV_WB(s, p, v) AV_WN##s(p, bswap_##s(v))
# define AV_RL(s, p) AV_RN##s(p)
# define AV_WL(s, p, v) AV_WN##s(p, v)
#endif
#define AV_RB8(x) (((const uint8_t*)(x))[0])
#define AV_WB8(p, d) do { ((uint8_t*)(p))[0] = (d); } while(0)
#define AV_RL8(x) AV_RB8(x)
#define AV_WL8(p, d) AV_WB8(p, d)
#ifndef AV_RB16
# define AV_RB16(p) AV_RB(16, p)
#endif
#ifndef AV_WB16
# define AV_WB16(p, v) AV_WB(16, p, v)
#endif
#ifndef AV_RL16
# define AV_RL16(p) AV_RL(16, p)
#endif
#ifndef AV_WL16
# define AV_WL16(p, v) AV_WL(16, p, v)
#endif
#ifndef AV_RB32
# define AV_RB32(p) AV_RB(32, p)
#endif
#ifndef AV_WB32
# define AV_WB32(p, v) AV_WB(32, p, v)
#endif
#ifndef AV_RL32
# define AV_RL32(p) AV_RL(32, p)
#endif
#ifndef AV_WL32
# define AV_WL32(p, v) AV_WL(32, p, v)
#endif
#ifndef AV_RB64
# define AV_RB64(p) AV_RB(64, p)
#endif
#ifndef AV_WB64
# define AV_WB64(p, v) AV_WB(64, p, v)
#endif
#ifndef AV_RL64
# define AV_RL64(p) AV_RL(64, p)
#endif
#ifndef AV_WL64
# define AV_WL64(p, v) AV_WL(64, p, v)
#endif
#ifndef AV_RB24
# define AV_RB24(x) \
((((const uint8_t*)(x))[0] << 16) | \
(((const uint8_t*)(x))[1] << 8) | \
((const uint8_t*)(x))[2])
#endif
#ifndef AV_WB24
# define AV_WB24(p, d) do { \
((uint8_t*)(p))[2] = (d); \
((uint8_t*)(p))[1] = (d)>>8; \
((uint8_t*)(p))[0] = (d)>>16; \
} while(0)
#endif
#ifndef AV_RL24
# define AV_RL24(x) \
((((const uint8_t*)(x))[2] << 16) | \
(((const uint8_t*)(x))[1] << 8) | \
((const uint8_t*)(x))[0])
#endif
#ifndef AV_WL24
# define AV_WL24(p, d) do { \
((uint8_t*)(p))[0] = (d); \
((uint8_t*)(p))[1] = (d)>>8; \
((uint8_t*)(p))[2] = (d)>>16; \
} while(0)
#endif
/* Parameters for AV_COPY*, AV_SWAP*, AV_ZERO* must be
* naturally aligned. They may be implemented using MMX,
* so emms_c() must be called before using any float code
* afterwards.
*/
#define AV_COPY(n, d, s) (*(uint##n##_t*)(d) = *(const uint##n##_t*)(s))
#ifndef AV_COPY64
# define AV_COPY64(d, s) AV_COPY(64, d, s)
#endif
#ifndef AV_COPY128
# define AV_COPY128(d, s) \
do { \
AV_COPY64(d, s); \
AV_COPY64((char*)(d)+8, (char*)(s)+8); \
} while(0)
#endif
#define AV_SWAP(n, a, b) FFSWAP(uint##n##_t, *(uint##n##_t*)(a), *(uint##n##_t*)(b))
#ifndef AV_SWAP64
# define AV_SWAP64(a, b) AV_SWAP(64, a, b)
#endif
#define AV_ZERO(n, d) (*(uint##n##_t*)(d) = 0)
#ifndef AV_ZERO64
# define AV_ZERO64(d) AV_ZERO(64, d)
#endif
#ifndef AV_ZERO128
# define AV_ZERO128(d) \
do { \
AV_ZERO64(d); \
AV_ZERO64((char*)(d)+8); \
} while(0)
#endif
#endif /* AVUTIL_INTREADWRITE_H */