1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-12-23 12:43:46 +02:00

Merge remote branch 'qatar/master'

* qatar/master: (32 commits)
  10-bit H.264 x86 chroma v loopfilter asm
  Port SMPTE S302M audio decoder from FFmbc 0.3. [Copyright headers corrected]
  Fix crash of interlaced MPEG2 decoding
  h264pred: fix one more aliasing violation.
  doc/APIchanges: fill in missing hashes and dates.
  flacenc: use proper initializers for AVOption default values.
  lavc: deprecate named constants for deprecated antialias_algo.
  aac: workaround for compilation on cygwin
  swscale: extend YUV422p support to 10bits depth
  tiff: add support for inverted FillOrder for uncompressed data
  Remove unused softfloat implementation.
  h264pred: fix aliasing violations.
  rotozoom: Eliminate French variable name.
  rotozoom: Check return value of fread().
  rotozoom: Return an error value instead of calling exit().
  rotozoom: Make init_demo() return int and check for errors on invocation.
  rotozoom: Drop silly UINT8 typedef.
  rotozoom: Drop some unnecessary parentheses.
  rotozoom: K&R coding style cosmetics
  rtsp: Only do keepalive using GET_PARAMETER if the server supports it
  ...

Conflicts:
	Changelog
	cmdutils.c
	doc/APIchanges
	doc/general.texi
	ffmpeg.c
	ffplay.c
	libavcodec/h264pred_template.c
	libavcodec/resample.c
	libavutil/pixfmt.h
	libavutil/softfloat.c
	libavutil/softfloat.h
	tests/rotozoom.c

Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
Michael Niedermayer 2011-05-12 04:51:24 +02:00
commit 612122b187
26 changed files with 1613 additions and 433 deletions

View File

@ -12,6 +12,8 @@ version <next>:
- Lots of deprecated API cruft removed - Lots of deprecated API cruft removed
- fft and imdct optimizations for AVX (Sandy Bridge) processors - fft and imdct optimizations for AVX (Sandy Bridge) processors
- showinfo filter added - showinfo filter added
- DPX image encoder
- SMPTE 302M AES3 audio decoder
version 0.7_beta1: version 0.7_beta1:

View File

@ -13,7 +13,7 @@ libavutil: 2011-04-18
API changes, most recent first: API changes, most recent first:
2011-05-10 - xxxxxxx - lavc 53.3.0 - avcodec.h 2011-05-10 - 188dea1 - lavc 53.3.0 - avcodec.h
Deprecate AVLPCType and the following fields in Deprecate AVLPCType and the following fields in
AVCodecContext: lpc_coeff_precision, prediction_order_method, AVCodecContext: lpc_coeff_precision, prediction_order_method,
min_partition_order, max_partition_order, lpc_type, lpc_passes. min_partition_order, max_partition_order, lpc_type, lpc_passes.
@ -43,15 +43,15 @@ API changes, most recent first:
Add av_dynarray_add function for adding Add av_dynarray_add function for adding
an element to a dynamic array. an element to a dynamic array.
2011-04-XX - bebe72f - lavu 51.1.0 - avutil.h 2011-04-26 - bebe72f - lavu 51.1.0 - avutil.h
Add AVPictureType enum and av_get_picture_type_char(), deprecate Add AVPictureType enum and av_get_picture_type_char(), deprecate
FF_*_TYPE defines and av_get_pict_type_char() defined in FF_*_TYPE defines and av_get_pict_type_char() defined in
libavcodec/avcodec.h. libavcodec/avcodec.h.
2011-04-xx - 10d3940 - lavfi 2.3.0 - avfilter.h 2011-04-26 - 10d3940 - lavfi 2.3.0 - avfilter.h
Add pict_type and key_frame fields to AVFilterBufferRefVideo. Add pict_type and key_frame fields to AVFilterBufferRefVideo.
2011-04-xx - 7a11c82 - lavfi 2.2.0 - vsrc_buffer 2011-04-26 - 7a11c82 - lavfi 2.2.0 - vsrc_buffer
Add sample_aspect_ratio fields to vsrc_buffer arguments Add sample_aspect_ratio fields to vsrc_buffer arguments
2011-04-21 - 94f7451 - lavc 53.1.0 - avcodec.h 2011-04-21 - 94f7451 - lavc 53.1.0 - avcodec.h

View File

@ -19,7 +19,6 @@ integer.c 128bit integer math
lls.c lls.c
mathematics.c greatest common divisor, integer sqrt, integer log2, ... mathematics.c greatest common divisor, integer sqrt, integer log2, ...
mem.c memory allocation routines with guaranteed alignment mem.c memory allocation routines with guaranteed alignment
softfloat.c
Headers: Headers:
bswap.h big/little/native-endian conversion code bswap.h big/little/native-endian conversion code

View File

@ -677,6 +677,7 @@ following image formats are supported:
@item Sierra VMD audio @tab @tab X @item Sierra VMD audio @tab @tab X
@tab Used in Sierra VMD files. @tab Used in Sierra VMD files.
@item Smacker audio @tab @tab X @item Smacker audio @tab @tab X
@item SMPTE 302M AES3 audio @tab @tab X
@item Sonic @tab X @tab X @item Sonic @tab X @tab X
@tab experimental codec @tab experimental codec
@item Sonic lossless @tab X @tab X @item Sonic lossless @tab X @tab X

View File

@ -663,11 +663,11 @@ static void choose_pixel_fmt(AVStream *st, AVCodec *codec)
} }
if (*p == -1) { if (*p == -1) {
if(st->codec->pix_fmt != PIX_FMT_NONE) if(st->codec->pix_fmt != PIX_FMT_NONE)
av_log(NULL, AV_LOG_WARNING, av_log(NULL, AV_LOG_WARNING,
"Incompatible pixel format '%s' for codec '%s', auto-selecting format '%s'\n", "Incompatible pixel format '%s' for codec '%s', auto-selecting format '%s'\n",
av_pix_fmt_descriptors[st->codec->pix_fmt].name, av_pix_fmt_descriptors[st->codec->pix_fmt].name,
codec->name, codec->name,
av_pix_fmt_descriptors[codec->pix_fmts[0]].name); av_pix_fmt_descriptors[codec->pix_fmts[0]].name);
st->codec->pix_fmt = codec->pix_fmts[0]; st->codec->pix_fmt = codec->pix_fmts[0];
} }
} }

View File

@ -329,6 +329,7 @@ OBJS-$(CONFIG_RV30_DECODER) += rv30.o rv34.o rv30dsp.o \
mpegvideo.o error_resilience.o mpegvideo.o error_resilience.o
OBJS-$(CONFIG_RV40_DECODER) += rv40.o rv34.o rv40dsp.o \ OBJS-$(CONFIG_RV40_DECODER) += rv40.o rv34.o rv40dsp.o \
mpegvideo.o error_resilience.o mpegvideo.o error_resilience.o
OBJS-$(CONFIG_S302M_DECODER) += s302m.o
OBJS-$(CONFIG_SGI_DECODER) += sgidec.o OBJS-$(CONFIG_SGI_DECODER) += sgidec.o
OBJS-$(CONFIG_SGI_ENCODER) += sgienc.o rle.o OBJS-$(CONFIG_SGI_ENCODER) += sgienc.o rle.o
OBJS-$(CONFIG_SHORTEN_DECODER) += shorten.o OBJS-$(CONFIG_SHORTEN_DECODER) += shorten.o

View File

@ -30,6 +30,8 @@
* add sane pulse detection * add sane pulse detection
***********************************/ ***********************************/
#include "libavutil/libm.h" // brought forward to work around cygwin header breakage
#include <float.h> #include <float.h>
#include <math.h> #include <math.h>
#include "avcodec.h" #include "avcodec.h"
@ -37,7 +39,6 @@
#include "aac.h" #include "aac.h"
#include "aacenc.h" #include "aacenc.h"
#include "aactab.h" #include "aactab.h"
#include "libavutil/libm.h"
/** bits needed to code codebook run value for long windows */ /** bits needed to code codebook run value for long windows */
static const uint8_t run_value_bits_long[64] = { static const uint8_t run_value_bits_long[64] = {

View File

@ -184,6 +184,7 @@ void avcodec_register_all(void)
REGISTER_ENCDEC (RV20, rv20); REGISTER_ENCDEC (RV20, rv20);
REGISTER_DECODER (RV30, rv30); REGISTER_DECODER (RV30, rv30);
REGISTER_DECODER (RV40, rv40); REGISTER_DECODER (RV40, rv40);
REGISTER_DECODER (S302M, s302m);
REGISTER_ENCDEC (SGI, sgi); REGISTER_ENCDEC (SGI, sgi);
REGISTER_DECODER (SMACKER, smacker); REGISTER_DECODER (SMACKER, smacker);
REGISTER_DECODER (SMC, smc); REGISTER_DECODER (SMC, smc);

View File

@ -232,6 +232,7 @@ enum CodecID {
CODEC_ID_PCM_F64LE, CODEC_ID_PCM_F64LE,
CODEC_ID_PCM_BLURAY, CODEC_ID_PCM_BLURAY,
CODEC_ID_PCM_LXF, CODEC_ID_PCM_LXF,
CODEC_ID_S302M,
/* various ADPCM codecs */ /* various ADPCM codecs */
CODEC_ID_ADPCM_IMA_QT= 0x11000, CODEC_ID_ADPCM_IMA_QT= 0x11000,

View File

@ -1352,22 +1352,22 @@ static av_cold int flac_encode_close(AVCodecContext *avctx)
#define FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM #define FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
static const AVOption options[] = { static const AVOption options[] = {
{ "lpc_coeff_precision", "LPC coefficient precision", offsetof(FlacEncodeContext, options.lpc_coeff_precision), FF_OPT_TYPE_INT, 15, 0, MAX_LPC_PRECISION, FLAGS }, { "lpc_coeff_precision", "LPC coefficient precision", offsetof(FlacEncodeContext, options.lpc_coeff_precision), FF_OPT_TYPE_INT, {.dbl = 15 }, 0, MAX_LPC_PRECISION, FLAGS },
{ "lpc_type", "LPC algorithm", offsetof(FlacEncodeContext, options.lpc_type), FF_OPT_TYPE_INT, FF_LPC_TYPE_DEFAULT, FF_LPC_TYPE_DEFAULT, FF_LPC_TYPE_NB-1, FLAGS, "lpc_type" }, { "lpc_type", "LPC algorithm", offsetof(FlacEncodeContext, options.lpc_type), FF_OPT_TYPE_INT, {.dbl = FF_LPC_TYPE_DEFAULT }, FF_LPC_TYPE_DEFAULT, FF_LPC_TYPE_NB-1, FLAGS, "lpc_type" },
{ "none", NULL, 0, FF_OPT_TYPE_CONST, FF_LPC_TYPE_NONE, INT_MIN, INT_MAX, FLAGS, "lpc_type" }, { "none", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_LPC_TYPE_NONE }, INT_MIN, INT_MAX, FLAGS, "lpc_type" },
{ "fixed", NULL, 0, FF_OPT_TYPE_CONST, FF_LPC_TYPE_FIXED, INT_MIN, INT_MAX, FLAGS, "lpc_type" }, { "fixed", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_LPC_TYPE_FIXED }, INT_MIN, INT_MAX, FLAGS, "lpc_type" },
{ "levinson", NULL, 0, FF_OPT_TYPE_CONST, FF_LPC_TYPE_LEVINSON, INT_MIN, INT_MAX, FLAGS, "lpc_type" }, { "levinson", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_LPC_TYPE_LEVINSON }, INT_MIN, INT_MAX, FLAGS, "lpc_type" },
{ "cholesky", NULL, 0, FF_OPT_TYPE_CONST, FF_LPC_TYPE_CHOLESKY, INT_MIN, INT_MAX, FLAGS, "lpc_type" }, { "cholesky", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_LPC_TYPE_CHOLESKY }, INT_MIN, INT_MAX, FLAGS, "lpc_type" },
{ "lpc_passes", "Number of passes to use for Cholesky factorization during LPC analysis", offsetof(FlacEncodeContext, options.lpc_passes), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, FLAGS }, { "lpc_passes", "Number of passes to use for Cholesky factorization during LPC analysis", offsetof(FlacEncodeContext, options.lpc_passes), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, FLAGS },
{ "min_partition_order", NULL, offsetof(FlacEncodeContext, options.min_partition_order), FF_OPT_TYPE_INT, -1, -1, MAX_PARTITION_ORDER, FLAGS }, { "min_partition_order", NULL, offsetof(FlacEncodeContext, options.min_partition_order), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, MAX_PARTITION_ORDER, FLAGS },
{ "max_partition_order", NULL, offsetof(FlacEncodeContext, options.max_partition_order), FF_OPT_TYPE_INT, -1, -1, MAX_PARTITION_ORDER, FLAGS }, { "max_partition_order", NULL, offsetof(FlacEncodeContext, options.max_partition_order), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, MAX_PARTITION_ORDER, FLAGS },
{ "prediction_order_method", "Search method for selecting prediction order", offsetof(FlacEncodeContext, options.prediction_order_method), FF_OPT_TYPE_INT, -1, -1, ORDER_METHOD_LOG, FLAGS, "predm" }, { "prediction_order_method", "Search method for selecting prediction order", offsetof(FlacEncodeContext, options.prediction_order_method), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, ORDER_METHOD_LOG, FLAGS, "predm" },
{ "estimation", NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_EST, INT_MIN, INT_MAX, FLAGS, "predm" }, { "estimation", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = ORDER_METHOD_EST }, INT_MIN, INT_MAX, FLAGS, "predm" },
{ "2level", NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_2LEVEL, INT_MIN, INT_MAX, FLAGS, "predm" }, { "2level", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = ORDER_METHOD_2LEVEL }, INT_MIN, INT_MAX, FLAGS, "predm" },
{ "4level", NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_4LEVEL, INT_MIN, INT_MAX, FLAGS, "predm" }, { "4level", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = ORDER_METHOD_4LEVEL }, INT_MIN, INT_MAX, FLAGS, "predm" },
{ "8level", NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_8LEVEL, INT_MIN, INT_MAX, FLAGS, "predm" }, { "8level", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = ORDER_METHOD_8LEVEL }, INT_MIN, INT_MAX, FLAGS, "predm" },
{ "search", NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_SEARCH, INT_MIN, INT_MAX, FLAGS, "predm" }, { "search", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = ORDER_METHOD_SEARCH }, INT_MIN, INT_MAX, FLAGS, "predm" },
{ "log", NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_LOG, INT_MIN, INT_MAX, FLAGS, "predm" }, { "log", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = ORDER_METHOD_LOG }, INT_MIN, INT_MAX, FLAGS, "predm" },
{ NULL }, { NULL },
}; };

View File

@ -305,11 +305,11 @@ static const AVOption options[]={
{"error", NULL, OFFSET(error_rate), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E}, {"error", NULL, OFFSET(error_rate), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
#if FF_API_ANTIALIAS_ALGO #if FF_API_ANTIALIAS_ALGO
{"antialias", "MP3 antialias algorithm", OFFSET(antialias_algo), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|D, "aa"}, {"antialias", "MP3 antialias algorithm", OFFSET(antialias_algo), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|D, "aa"},
#endif
{"auto", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_AA_AUTO }, INT_MIN, INT_MAX, V|D, "aa"}, {"auto", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_AA_AUTO }, INT_MIN, INT_MAX, V|D, "aa"},
{"fastint", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_AA_FASTINT }, INT_MIN, INT_MAX, V|D, "aa"}, {"fastint", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_AA_FASTINT }, INT_MIN, INT_MAX, V|D, "aa"},
{"int", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_AA_INT }, INT_MIN, INT_MAX, V|D, "aa"}, {"int", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_AA_INT }, INT_MIN, INT_MAX, V|D, "aa"},
{"float", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_AA_FLOAT }, INT_MIN, INT_MAX, V|D, "aa"}, {"float", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_AA_FLOAT }, INT_MIN, INT_MAX, V|D, "aa"},
#endif
{"qns", "quantizer noise shaping", OFFSET(quantizer_noise_shaping), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E}, {"qns", "quantizer noise shaping", OFFSET(quantizer_noise_shaping), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
{"threads", NULL, OFFSET(thread_count), FF_OPT_TYPE_INT, {.dbl = 1 }, INT_MIN, INT_MAX, V|E|D}, {"threads", NULL, OFFSET(thread_count), FF_OPT_TYPE_INT, {.dbl = 1 }, INT_MIN, INT_MAX, V|E|D},
{"me_threshold", "motion estimaton threshold", OFFSET(me_threshold), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX}, {"me_threshold", "motion estimaton threshold", OFFSET(me_threshold), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},

View File

@ -29,6 +29,8 @@
#include "libavutil/opt.h" #include "libavutil/opt.h"
#include "libavutil/samplefmt.h" #include "libavutil/samplefmt.h"
#define MAX_CHANNELS 8
struct AVResampleContext; struct AVResampleContext;
static const char *context_to_name(void *ptr) static const char *context_to_name(void *ptr)
@ -37,20 +39,22 @@ static const char *context_to_name(void *ptr)
} }
static const AVOption options[] = {{NULL}}; static const AVOption options[] = {{NULL}};
static const AVClass audioresample_context_class = { "ReSampleContext", context_to_name, options, LIBAVUTIL_VERSION_INT }; static const AVClass audioresample_context_class = {
"ReSampleContext", context_to_name, options, LIBAVUTIL_VERSION_INT
};
struct ReSampleContext { struct ReSampleContext {
struct AVResampleContext *resample_context; struct AVResampleContext *resample_context;
short *temp[2]; short *temp[MAX_CHANNELS];
int temp_len; int temp_len;
float ratio; float ratio;
/* channel convert */ /* channel convert */
int input_channels, output_channels, filter_channels; int input_channels, output_channels, filter_channels;
AVAudioConvert *convert_ctx[2]; AVAudioConvert *convert_ctx[2];
enum AVSampleFormat sample_fmt[2]; ///< input and output sample format enum AVSampleFormat sample_fmt[2]; ///< input and output sample format
unsigned sample_size[2]; ///< size of one sample in sample_fmt unsigned sample_size[2]; ///< size of one sample in sample_fmt
short *buffer[2]; ///< buffers used for conversion to S16 short *buffer[2]; ///< buffers used for conversion to S16
unsigned buffer_size[2]; ///< sizes of allocated buffers unsigned buffer_size[2]; ///< sizes of allocated buffers
}; };
/* n1: number of samples */ /* n1: number of samples */
@ -104,41 +108,42 @@ static void mono_to_stereo(short *output, short *input, int n1)
} }
} }
/* XXX: should use more abstract 'N' channels system */ static void deinterleave(short **output, short *input, int channels, int samples)
static void stereo_split(short *output1, short *output2, short *input, int n)
{ {
int i; int i, j;
for(i=0;i<n;i++) { for (i = 0; i < samples; i++) {
*output1++ = *input++; for (j = 0; j < channels; j++) {
*output2++ = *input++; *output[j]++ = *input++;
}
} }
} }
static void stereo_mux(short *output, short *input1, short *input2, int n) static void interleave(short *output, short **input, int channels, int samples)
{ {
int i; int i, j;
for(i=0;i<n;i++) { for (i = 0; i < samples; i++) {
*output++ = *input1++; for (j = 0; j < channels; j++) {
*output++ = *input2++; *output++ = *input[j]++;
}
} }
} }
static void ac3_5p1_mux(short *output, short *input1, short *input2, int n) static void ac3_5p1_mux(short *output, short *input1, short *input2, int n)
{ {
int i; int i;
short l,r; short l, r;
for(i=0;i<n;i++) { for (i = 0; i < n; i++) {
l=*input1++; l = *input1++;
r=*input2++; r = *input2++;
*output++ = l; /* left */ *output++ = l; /* left */
*output++ = (l/2)+(r/2); /* center */ *output++ = (l / 2) + (r / 2); /* center */
*output++ = r; /* right */ *output++ = r; /* right */
*output++ = 0; /* left surround */ *output++ = 0; /* left surround */
*output++ = 0; /* right surroud */ *output++ = 0; /* right surroud */
*output++ = 0; /* low freq */ *output++ = 0; /* low freq */
} }
} }
@ -151,18 +156,25 @@ ReSampleContext *av_audio_resample_init(int output_channels, int input_channels,
{ {
ReSampleContext *s; ReSampleContext *s;
if ( input_channels > 2) if (input_channels > MAX_CHANNELS) {
{ av_log(NULL, AV_LOG_ERROR,
av_log(NULL, AV_LOG_ERROR, "Resampling with input channels greater than 2 unsupported.\n"); "Resampling with input channels greater than %d is unsupported.\n",
MAX_CHANNELS);
return NULL; return NULL;
} }
if (output_channels > 2 &&
!(output_channels == 6 && input_channels == 2) &&
output_channels != input_channels) {
av_log(NULL, AV_LOG_ERROR,
"Resampling output channel count must be 1 or 2 for mono input; 1, 2 or 6 for stereo input; or N for N channel input.\n");
return NULL;
}
s = av_mallocz(sizeof(ReSampleContext)); s = av_mallocz(sizeof(ReSampleContext));
if (!s) if (!s) {
{
av_log(NULL, AV_LOG_ERROR, "Can't allocate memory for resample context.\n"); av_log(NULL, AV_LOG_ERROR, "Can't allocate memory for resample context.\n");
return NULL; return NULL;
} }
s->ratio = (float)output_rate / (float)input_rate; s->ratio = (float)output_rate / (float)input_rate;
@ -173,10 +185,10 @@ ReSampleContext *av_audio_resample_init(int output_channels, int input_channels,
if (s->output_channels < s->filter_channels) if (s->output_channels < s->filter_channels)
s->filter_channels = s->output_channels; s->filter_channels = s->output_channels;
s->sample_fmt [0] = sample_fmt_in; s->sample_fmt[0] = sample_fmt_in;
s->sample_fmt [1] = sample_fmt_out; s->sample_fmt[1] = sample_fmt_out;
s->sample_size[0] = av_get_bits_per_sample_fmt(s->sample_fmt[0])>>3; s->sample_size[0] = av_get_bits_per_sample_fmt(s->sample_fmt[0]) >> 3;
s->sample_size[1] = av_get_bits_per_sample_fmt(s->sample_fmt[1])>>3; s->sample_size[1] = av_get_bits_per_sample_fmt(s->sample_fmt[1]) >> 3;
if (s->sample_fmt[0] != AV_SAMPLE_FMT_S16) { if (s->sample_fmt[0] != AV_SAMPLE_FMT_S16) {
if (!(s->convert_ctx[0] = av_audio_convert_alloc(AV_SAMPLE_FMT_S16, 1, if (!(s->convert_ctx[0] = av_audio_convert_alloc(AV_SAMPLE_FMT_S16, 1,
@ -201,17 +213,10 @@ ReSampleContext *av_audio_resample_init(int output_channels, int input_channels,
} }
} }
/*
* AC-3 output is the only case where filter_channels could be greater than 2.
* input channels can't be greater than 2, so resample the 2 channels and then
* expand to 6 channels after the resampling.
*/
if(s->filter_channels>2)
s->filter_channels = 2;
#define TAPS 16 #define TAPS 16
s->resample_context= av_resample_init(output_rate, input_rate, s->resample_context = av_resample_init(output_rate, input_rate,
filter_length, log2_phase_count, linear, cutoff); filter_length, log2_phase_count,
linear, cutoff);
*(const AVClass**)s->resample_context = &audioresample_context_class; *(const AVClass**)s->resample_context = &audioresample_context_class;
@ -223,9 +228,9 @@ ReSampleContext *av_audio_resample_init(int output_channels, int input_channels,
int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples) int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples)
{ {
int i, nb_samples1; int i, nb_samples1;
short *bufin[2]; short *bufin[MAX_CHANNELS];
short *bufout[2]; short *bufout[MAX_CHANNELS];
short *buftmp2[2], *buftmp3[2]; short *buftmp2[MAX_CHANNELS], *buftmp3[MAX_CHANNELS];
short *output_bak = NULL; short *output_bak = NULL;
int lenout; int lenout;
@ -240,7 +245,7 @@ int audio_resample(ReSampleContext *s, short *output, short *input, int nb_sampl
int ostride[1] = { 2 }; int ostride[1] = { 2 };
const void *ibuf[1] = { input }; const void *ibuf[1] = { input };
void *obuf[1]; void *obuf[1];
unsigned input_size = nb_samples*s->input_channels*2; unsigned input_size = nb_samples * s->input_channels * 2;
if (!s->buffer_size[0] || s->buffer_size[0] < input_size) { if (!s->buffer_size[0] || s->buffer_size[0] < input_size) {
av_free(s->buffer[0]); av_free(s->buffer[0]);
@ -255,12 +260,13 @@ int audio_resample(ReSampleContext *s, short *output, short *input, int nb_sampl
obuf[0] = s->buffer[0]; obuf[0] = s->buffer[0];
if (av_audio_convert(s->convert_ctx[0], obuf, ostride, if (av_audio_convert(s->convert_ctx[0], obuf, ostride,
ibuf, istride, nb_samples*s->input_channels) < 0) { ibuf, istride, nb_samples * s->input_channels) < 0) {
av_log(s->resample_context, AV_LOG_ERROR, "Audio sample format conversion failed\n"); av_log(s->resample_context, AV_LOG_ERROR,
"Audio sample format conversion failed\n");
return 0; return 0;
} }
input = s->buffer[0]; input = s->buffer[0];
} }
lenout= 2*s->output_channels*nb_samples * s->ratio + 16; lenout= 2*s->output_channels*nb_samples * s->ratio + 16;
@ -282,52 +288,50 @@ int audio_resample(ReSampleContext *s, short *output, short *input, int nb_sampl
} }
/* XXX: move those malloc to resample init code */ /* XXX: move those malloc to resample init code */
for(i=0; i<s->filter_channels; i++){ for (i = 0; i < s->filter_channels; i++) {
bufin[i]= av_malloc( (nb_samples + s->temp_len) * sizeof(short) ); bufin[i] = av_malloc((nb_samples + s->temp_len) * sizeof(short));
memcpy(bufin[i], s->temp[i], s->temp_len * sizeof(short)); memcpy(bufin[i], s->temp[i], s->temp_len * sizeof(short));
buftmp2[i] = bufin[i] + s->temp_len; buftmp2[i] = bufin[i] + s->temp_len;
bufout[i] = av_malloc(lenout * sizeof(short));
} }
/* make some zoom to avoid round pb */ if (s->input_channels == 2 && s->output_channels == 1) {
bufout[0]= av_malloc( lenout * sizeof(short) );
bufout[1]= av_malloc( lenout * sizeof(short) );
if (s->input_channels == 2 &&
s->output_channels == 1) {
buftmp3[0] = output; buftmp3[0] = output;
stereo_to_mono(buftmp2[0], input, nb_samples); stereo_to_mono(buftmp2[0], input, nb_samples);
} else if (s->output_channels >= 2 && s->input_channels == 1) { } else if (s->output_channels >= 2 && s->input_channels == 1) {
buftmp3[0] = bufout[0]; buftmp3[0] = bufout[0];
memcpy(buftmp2[0], input, nb_samples*sizeof(short)); memcpy(buftmp2[0], input, nb_samples * sizeof(short));
} else if (s->output_channels >= 2) { } else if (s->output_channels >= s->input_channels && s->input_channels >= 2) {
buftmp3[0] = bufout[0]; for (i = 0; i < s->input_channels; i++) {
buftmp3[1] = bufout[1]; buftmp3[i] = bufout[i];
stereo_split(buftmp2[0], buftmp2[1], input, nb_samples); }
deinterleave(buftmp2, input, s->input_channels, nb_samples);
} else { } else {
buftmp3[0] = output; buftmp3[0] = output;
memcpy(buftmp2[0], input, nb_samples*sizeof(short)); memcpy(buftmp2[0], input, nb_samples * sizeof(short));
} }
nb_samples += s->temp_len; nb_samples += s->temp_len;
/* resample each channel */ /* resample each channel */
nb_samples1 = 0; /* avoid warning */ nb_samples1 = 0; /* avoid warning */
for(i=0;i<s->filter_channels;i++) { for (i = 0; i < s->filter_channels; i++) {
int consumed; int consumed;
int is_last= i+1 == s->filter_channels; int is_last = i + 1 == s->filter_channels;
nb_samples1 = av_resample(s->resample_context, buftmp3[i], bufin[i], &consumed, nb_samples, lenout, is_last); nb_samples1 = av_resample(s->resample_context, buftmp3[i], bufin[i],
s->temp_len= nb_samples - consumed; &consumed, nb_samples, lenout, is_last);
s->temp[i]= av_realloc(s->temp[i], s->temp_len*sizeof(short)); s->temp_len = nb_samples - consumed;
memcpy(s->temp[i], bufin[i] + consumed, s->temp_len*sizeof(short)); s->temp[i] = av_realloc(s->temp[i], s->temp_len * sizeof(short));
memcpy(s->temp[i], bufin[i] + consumed, s->temp_len * sizeof(short));
} }
if (s->output_channels == 2 && s->input_channels == 1) { if (s->output_channels == 2 && s->input_channels == 1) {
mono_to_stereo(output, buftmp3[0], nb_samples1); mono_to_stereo(output, buftmp3[0], nb_samples1);
} else if (s->output_channels == 2) { } else if (s->output_channels == 6 && s->input_channels == 2) {
stereo_mux(output, buftmp3[0], buftmp3[1], nb_samples1);
} else if (s->output_channels == 6) {
ac3_5p1_mux(output, buftmp3[0], buftmp3[1], nb_samples1); ac3_5p1_mux(output, buftmp3[0], buftmp3[1], nb_samples1);
} else if (s->output_channels == s->input_channels && s->input_channels >= 2) {
interleave(output, buftmp3, s->output_channels, nb_samples1);
} }
if (s->sample_fmt[1] != AV_SAMPLE_FMT_S16) { if (s->sample_fmt[1] != AV_SAMPLE_FMT_S16) {
@ -337,25 +341,27 @@ int audio_resample(ReSampleContext *s, short *output, short *input, int nb_sampl
void *obuf[1] = { output_bak }; void *obuf[1] = { output_bak };
if (av_audio_convert(s->convert_ctx[1], obuf, ostride, if (av_audio_convert(s->convert_ctx[1], obuf, ostride,
ibuf, istride, nb_samples1*s->output_channels) < 0) { ibuf, istride, nb_samples1 * s->output_channels) < 0) {
av_log(s->resample_context, AV_LOG_ERROR, "Audio sample format convertion failed\n"); av_log(s->resample_context, AV_LOG_ERROR,
"Audio sample format convertion failed\n");
return 0; return 0;
} }
} }
for(i=0; i<s->filter_channels; i++) for (i = 0; i < s->filter_channels; i++) {
av_free(bufin[i]); av_free(bufin[i]);
av_free(bufout[i]);
}
av_free(bufout[0]);
av_free(bufout[1]);
return nb_samples1; return nb_samples1;
} }
void audio_resample_close(ReSampleContext *s) void audio_resample_close(ReSampleContext *s)
{ {
int i;
av_resample_close(s->resample_context); av_resample_close(s->resample_context);
av_freep(&s->temp[0]); for (i = 0; i < s->filter_channels; i++)
av_freep(&s->temp[1]); av_freep(&s->temp[i]);
av_freep(&s->buffer[0]); av_freep(&s->buffer[0]);
av_freep(&s->buffer[1]); av_freep(&s->buffer[1]);
av_audio_convert_free(s->convert_ctx[0]); av_audio_convert_free(s->convert_ctx[0]);

141
libavcodec/s302m.c Normal file
View File

@ -0,0 +1,141 @@
/*
* SMPTE 302M decoder
* Copyright (c) 2008 Laurent Aimar <fenrir@videolan.org>
* Copyright (c) 2009 Baptiste Coudurier <baptiste.coudurier@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/intreadwrite.h"
#include "avcodec.h"
#define AES3_HEADER_LEN 4
static int s302m_parse_frame_header(AVCodecContext *avctx, const uint8_t *buf,
int buf_size)
{
uint32_t h;
int frame_size, channels, id, bits;
if (buf_size <= AES3_HEADER_LEN) {
av_log(avctx, AV_LOG_ERROR, "frame is too short\n");
return AVERROR_INVALIDDATA;
}
/*
* AES3 header :
* size: 16
* number channels 2
* channel_id 8
* bits per samples 2
* alignments 4
*/
h = AV_RB32(buf);
frame_size = (h >> 16) & 0xffff;
channels = ((h >> 14) & 0x0003) * 2 + 2;
id = (h >> 6) & 0x00ff;
bits = ((h >> 4) & 0x0003) * 4 + 16;
if (AES3_HEADER_LEN + frame_size != buf_size || bits > 24) {
av_log(avctx, AV_LOG_ERROR, "frame has invalid header\n");
return AVERROR_INVALIDDATA;
}
/* Set output properties */
avctx->bits_per_coded_sample = bits;
if (bits > 16)
avctx->sample_fmt = SAMPLE_FMT_S32;
else
avctx->sample_fmt = SAMPLE_FMT_S16;
avctx->channels = channels;
avctx->sample_rate = 48000;
avctx->bit_rate = 48000 * avctx->channels * (avctx->bits_per_coded_sample + 4) +
32 * (48000 / (buf_size * 8 /
(avctx->channels *
(avctx->bits_per_coded_sample + 4))));
return frame_size;
}
static int s302m_decode_frame(AVCodecContext *avctx, void *data,
int *data_size, AVPacket *avpkt)
{
const uint8_t *buf = avpkt->data;
int buf_size = avpkt->size;
int frame_size = s302m_parse_frame_header(avctx, buf, buf_size);
if (frame_size < 0)
return frame_size;
buf_size -= AES3_HEADER_LEN;
buf += AES3_HEADER_LEN;
if (*data_size < 4 * buf_size * 8 / (avctx->bits_per_coded_sample + 4))
return -1;
if (avctx->bits_per_coded_sample == 24) {
uint32_t *o = data;
for (; buf_size > 6; buf_size -= 7) {
*o++ = (av_reverse[buf[2]] << 24) |
(av_reverse[buf[1]] << 16) |
(av_reverse[buf[0]] << 8);
*o++ = (av_reverse[buf[6] & 0xf0] << 28) |
(av_reverse[buf[5]] << 20) |
(av_reverse[buf[4]] << 12) |
(av_reverse[buf[3] & 0x0f] << 8);
buf += 7;
}
*data_size = (uint8_t*) o - (uint8_t*) data;
} else if (avctx->bits_per_coded_sample == 20) {
uint32_t *o = data;
for (; buf_size > 5; buf_size -= 6) {
*o++ = (av_reverse[buf[2] & 0xf0] << 28) |
(av_reverse[buf[1]] << 20) |
(av_reverse[buf[0]] << 12);
*o++ = (av_reverse[buf[5] & 0xf0] << 28) |
(av_reverse[buf[4]] << 20) |
(av_reverse[buf[3]] << 12);
buf += 6;
}
*data_size = (uint8_t*) o - (uint8_t*) data;
} else {
uint16_t *o = data;
for (; buf_size > 4; buf_size -= 5) {
*o++ = (av_reverse[buf[1]] << 8) |
av_reverse[buf[0]];
*o++ = (av_reverse[buf[4] & 0xf0] << 12) |
(av_reverse[buf[3]] << 4) |
av_reverse[buf[2] & 0x0f];
buf += 5;
}
*data_size = (uint8_t*) o - (uint8_t*) data;
}
return buf - avpkt->data;
}
AVCodec ff_s302m_decoder = {
.name = "s302m",
.type = AVMEDIA_TYPE_AUDIO,
.id = CODEC_ID_S302M,
.priv_data_size = 0,
.decode = s302m_decode_frame,
.long_name = NULL_IF_CONFIG_SMALL("SMPTE 302M"),
};

View File

@ -168,7 +168,13 @@ static int tiff_unpack_strip(TiffContext *s, uint8_t* dst, int stride, const uin
} }
switch(s->compr){ switch(s->compr){
case TIFF_RAW: case TIFF_RAW:
memcpy(dst, src, width); if (!s->fill_order) {
memcpy(dst, src, width);
} else {
int i;
for (i = 0; i < width; i++)
dst[i] = av_reverse[src[i]];
}
src += width; src += width;
break; break;
case TIFF_PACKBITS: case TIFF_PACKBITS:

View File

@ -9,6 +9,7 @@ YASM-OBJS-$(CONFIG_FFT) += x86/fft_mmx.o \
MMX-OBJS-$(CONFIG_H264DSP) += x86/h264dsp_mmx.o MMX-OBJS-$(CONFIG_H264DSP) += x86/h264dsp_mmx.o
YASM-OBJS-$(CONFIG_H264DSP) += x86/h264_deblock.o \ YASM-OBJS-$(CONFIG_H264DSP) += x86/h264_deblock.o \
x86/h264_deblock_10bit.o \
x86/h264_weight.o \ x86/h264_weight.o \
x86/h264_idct.o \ x86/h264_idct.o \

View File

@ -43,6 +43,7 @@ DECLARE_ALIGNED(16, const uint64_t, ff_pdw_80000000)[2] =
{0x8000000080000000ULL, 0x8000000080000000ULL}; {0x8000000080000000ULL, 0x8000000080000000ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_1 ) = 0x0001000100010001ULL; DECLARE_ALIGNED(8, const uint64_t, ff_pw_1 ) = 0x0001000100010001ULL;
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_2 ) = {0x0002000200020002ULL, 0x0002000200020002ULL};
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_3 ) = {0x0003000300030003ULL, 0x0003000300030003ULL}; DECLARE_ALIGNED(16, const xmm_reg, ff_pw_3 ) = {0x0003000300030003ULL, 0x0003000300030003ULL};
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_4 ) = {0x0004000400040004ULL, 0x0004000400040004ULL}; DECLARE_ALIGNED(16, const xmm_reg, ff_pw_4 ) = {0x0004000400040004ULL, 0x0004000400040004ULL};
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x0005000500050005ULL}; DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x0005000500050005ULL};

View File

@ -1,10 +1,11 @@
;***************************************************************************** ;*****************************************************************************
;* MMX/SSE2-optimized H.264 deblocking code ;* MMX/SSE2/AVX-optimized H.264 deblocking code
;***************************************************************************** ;*****************************************************************************
;* Copyright (C) 2005-2008 x264 project ;* Copyright (C) 2005-2011 x264 project
;* ;*
;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Authors: Loren Merritt <lorenm@u.washington.edu>
;* Jason Garrett-Glaser <darkshikari@gmail.com> ;* Jason Garrett-Glaser <darkshikari@gmail.com>
;* Oskar Arvidsson <oskar@irock.se>
;* ;*
;* This file is part of FFmpeg. ;* This file is part of FFmpeg.
;* ;*
@ -26,96 +27,94 @@
%include "x86inc.asm" %include "x86inc.asm"
%include "x86util.asm" %include "x86util.asm"
SECTION_RODATA SECTION .text
cextern pb_0 cextern pb_0
cextern pb_1 cextern pb_1
cextern pb_3 cextern pb_3
cextern pb_A1 cextern pb_A1
SECTION .text
; expands to [base],...,[base+7*stride] ; expands to [base],...,[base+7*stride]
%define PASS8ROWS(base, base3, stride, stride3) \ %define PASS8ROWS(base, base3, stride, stride3) \
[base], [base+stride], [base+stride*2], [base3], \ [base], [base+stride], [base+stride*2], [base3], \
[base3+stride], [base3+stride*2], [base3+stride3], [base3+stride*4] [base3+stride], [base3+stride*2], [base3+stride3], [base3+stride*4]
; in: 8 rows of 4 bytes in %1..%8 %define PASS8ROWS(base, base3, stride, stride3, offset) \
PASS8ROWS(base+offset, base3+offset, stride, stride3)
; in: 8 rows of 4 bytes in %4..%11
; out: 4 rows of 8 bytes in m0..m3 ; out: 4 rows of 8 bytes in m0..m3
%macro TRANSPOSE4x8_LOAD 8 %macro TRANSPOSE4x8_LOAD 11
movd m0, %1 movh m0, %4
movd m2, %2 movh m2, %5
movd m1, %3 movh m1, %6
movd m3, %4 movh m3, %7
punpcklbw m0, m2 punpckl%1 m0, m2
punpcklbw m1, m3 punpckl%1 m1, m3
movq m2, m0 mova m2, m0
punpcklwd m0, m1 punpckl%2 m0, m1
punpckhwd m2, m1 punpckh%2 m2, m1
movd m4, %5 movh m4, %8
movd m6, %6 movh m6, %9
movd m5, %7 movh m5, %10
movd m7, %8 movh m7, %11
punpcklbw m4, m6 punpckl%1 m4, m6
punpcklbw m5, m7 punpckl%1 m5, m7
movq m6, m4 mova m6, m4
punpcklwd m4, m5 punpckl%2 m4, m5
punpckhwd m6, m5 punpckh%2 m6, m5
movq m1, m0 punpckh%3 m1, m0, m4
movq m3, m2 punpckh%3 m3, m2, m6
punpckldq m0, m4 punpckl%3 m0, m4
punpckhdq m1, m4 punpckl%3 m2, m6
punpckldq m2, m6
punpckhdq m3, m6
%endmacro %endmacro
; in: 4 rows of 8 bytes in m0..m3 ; in: 4 rows of 8 bytes in m0..m3
; out: 8 rows of 4 bytes in %1..%8 ; out: 8 rows of 4 bytes in %1..%8
%macro TRANSPOSE8x4_STORE 8 %macro TRANSPOSE8x4B_STORE 8
movq m4, m0 punpckhdq m4, m0, m0
movq m5, m1 punpckhdq m5, m1, m1
movq m6, m2 punpckhdq m6, m2, m2
punpckhdq m4, m4
punpckhdq m5, m5
punpckhdq m6, m6
punpcklbw m0, m1 punpcklbw m0, m1
punpcklbw m2, m3 punpcklbw m2, m3
movq m1, m0 punpcklwd m1, m0, m2
punpcklwd m0, m2 punpckhwd m0, m2
punpckhwd m1, m2 movh %1, m1
movd %1, m0
punpckhdq m0, m0
movd %2, m0
movd %3, m1
punpckhdq m1, m1 punpckhdq m1, m1
movd %4, m1 movh %2, m1
movh %3, m0
punpckhdq m0, m0
movh %4, m0
punpckhdq m3, m3 punpckhdq m3, m3
punpcklbw m4, m5 punpcklbw m4, m5
punpcklbw m6, m3 punpcklbw m6, m3
movq m5, m4 punpcklwd m5, m4, m6
punpcklwd m4, m6 punpckhwd m4, m6
punpckhwd m5, m6 movh %5, m5
movd %5, m4
punpckhdq m4, m4
movd %6, m4
movd %7, m5
punpckhdq m5, m5 punpckhdq m5, m5
movd %8, m5 movh %6, m5
movh %7, m4
punpckhdq m4, m4
movh %8, m4
%endmacro
%macro TRANSPOSE4x8B_LOAD 8
TRANSPOSE4x8_LOAD bw, wd, dq, %1, %2, %3, %4, %5, %6, %7, %8
%endmacro %endmacro
%macro SBUTTERFLY3 4 %macro SBUTTERFLY3 4
movq %4, %2 punpckh%1 %4, %2, %3
punpckl%1 %2, %3 punpckl%1 %2, %3
punpckh%1 %4, %3
%endmacro %endmacro
; in: 8 rows of 8 (only the middle 6 pels are used) in %1..%8 ; in: 8 rows of 8 (only the middle 6 pels are used) in %1..%8
; out: 6 rows of 8 in [%9+0*16] .. [%9+5*16] ; out: 6 rows of 8 in [%9+0*16] .. [%9+5*16]
%macro TRANSPOSE6x8_MEM 9 %macro TRANSPOSE6x8_MEM 9
RESET_MM_PERMUTATION
movq m0, %1 movq m0, %1
movq m1, %2 movq m1, %2
movq m2, %3 movq m2, %3
@ -123,30 +122,32 @@ SECTION .text
movq m4, %5 movq m4, %5
movq m5, %6 movq m5, %6
movq m6, %7 movq m6, %7
SBUTTERFLY3 bw, m0, m1, m7 SBUTTERFLY bw, 0, 1, 7
SBUTTERFLY3 bw, m2, m3, m1 SBUTTERFLY bw, 2, 3, 7
SBUTTERFLY3 bw, m4, m5, m3 SBUTTERFLY bw, 4, 5, 7
movq [%9+0x10], m1 movq [%9+0x10], m3
SBUTTERFLY3 bw, m6, %8, m5 SBUTTERFLY3 bw, m6, %8, m7
SBUTTERFLY3 wd, m0, m2, m1 SBUTTERFLY wd, 0, 2, 3
SBUTTERFLY3 wd, m4, m6, m2 SBUTTERFLY wd, 4, 6, 3
punpckhdq m0, m4 punpckhdq m0, m4
movq [%9+0x00], m0 movq [%9+0x00], m0
SBUTTERFLY3 wd, m7, [%9+0x10], m6 SBUTTERFLY3 wd, m1, [%9+0x10], m3
SBUTTERFLY3 wd, m3, m5, m4 SBUTTERFLY wd, 5, 7, 0
SBUTTERFLY3 dq, m7, m3, m0 SBUTTERFLY dq, 1, 5, 0
SBUTTERFLY3 dq, m1, m2, m5 SBUTTERFLY dq, 2, 6, 0
punpckldq m6, m4 punpckldq m3, m7
movq [%9+0x10], m1 movq [%9+0x10], m2
movq [%9+0x20], m5 movq [%9+0x20], m6
movq [%9+0x30], m7 movq [%9+0x30], m1
movq [%9+0x40], m0 movq [%9+0x40], m5
movq [%9+0x50], m6 movq [%9+0x50], m3
RESET_MM_PERMUTATION
%endmacro %endmacro
; in: 8 rows of 8 in %1..%8 ; in: 8 rows of 8 in %1..%8
; out: 8 rows of 8 in %9..%16 ; out: 8 rows of 8 in %9..%16
%macro TRANSPOSE8x8_MEM 16 %macro TRANSPOSE8x8_MEM 16
RESET_MM_PERMUTATION
movq m0, %1 movq m0, %1
movq m1, %2 movq m1, %2
movq m2, %3 movq m2, %3
@ -154,38 +155,44 @@ SECTION .text
movq m4, %5 movq m4, %5
movq m5, %6 movq m5, %6
movq m6, %7 movq m6, %7
SBUTTERFLY3 bw, m0, m1, m7 SBUTTERFLY bw, 0, 1, 7
SBUTTERFLY3 bw, m2, m3, m1 SBUTTERFLY bw, 2, 3, 7
SBUTTERFLY3 bw, m4, m5, m3 SBUTTERFLY bw, 4, 5, 7
SBUTTERFLY3 bw, m6, %8, m5 SBUTTERFLY3 bw, m6, %8, m7
movq %9, m3 movq %9, m5
SBUTTERFLY3 wd, m0, m2, m3 SBUTTERFLY wd, 0, 2, 5
SBUTTERFLY3 wd, m4, m6, m2 SBUTTERFLY wd, 4, 6, 5
SBUTTERFLY3 wd, m7, m1, m6 SBUTTERFLY wd, 1, 3, 5
movq %11, m2 movq %11, m6
movq m2, %9 movq m6, %9
SBUTTERFLY3 wd, m2, m5, m1 SBUTTERFLY wd, 6, 7, 5
SBUTTERFLY3 dq, m0, m4, m5 SBUTTERFLY dq, 0, 4, 5
SBUTTERFLY3 dq, m7, m2, m4 SBUTTERFLY dq, 1, 6, 5
movq %9, m0 movq %9, m0
movq %10, m5 movq %10, m4
movq %13, m7 movq %13, m1
movq %14, m4 movq %14, m6
SBUTTERFLY3 dq, m3, %11, m0 SBUTTERFLY3 dq, m2, %11, m0
SBUTTERFLY3 dq, m6, m1, m5 SBUTTERFLY dq, 3, 7, 4
movq %11, m3 movq %11, m2
movq %12, m0 movq %12, m0
movq %15, m6 movq %15, m3
movq %16, m5 movq %16, m7
RESET_MM_PERMUTATION
%endmacro %endmacro
; out: %4 = |%1-%2|>%3 ; out: %4 = |%1-%2|>%3
; clobbers: %5 ; clobbers: %5
%macro DIFF_GT 5 %macro DIFF_GT 5
%if avx_enabled == 0
mova %5, %2 mova %5, %2
mova %4, %1 mova %4, %1
psubusb %5, %1 psubusb %5, %1
psubusb %4, %2 psubusb %4, %2
%else
psubusb %5, %2, %1
psubusb %4, %1, %2
%endif
por %4, %5 por %4, %5
psubusb %4, %3 psubusb %4, %3
%endmacro %endmacro
@ -193,32 +200,28 @@ SECTION .text
; out: %4 = |%1-%2|>%3 ; out: %4 = |%1-%2|>%3
; clobbers: %5 ; clobbers: %5
%macro DIFF_GT2 5 %macro DIFF_GT2 5
%ifdef ARCH_X86_64
psubusb %5, %2, %1
psubusb %4, %1, %2
%else
mova %5, %2 mova %5, %2
mova %4, %1 mova %4, %1
psubusb %5, %1 psubusb %5, %1
psubusb %4, %2 psubusb %4, %2
%endif
psubusb %5, %3 psubusb %5, %3
psubusb %4, %3 psubusb %4, %3
pcmpeqb %4, %5 pcmpeqb %4, %5
%endmacro %endmacro
%macro SPLATW 1
%ifidn m0, xmm0
pshuflw %1, %1, 0
punpcklqdq %1, %1
%else
pshufw %1, %1, 0
%endif
%endmacro
; in: m0=p1 m1=p0 m2=q0 m3=q1 %1=alpha-1 %2=beta-1 ; in: m0=p1 m1=p0 m2=q0 m3=q1 %1=alpha-1 %2=beta-1
; out: m5=beta-1, m7=mask, %3=alpha-1 ; out: m5=beta-1, m7=mask, %3=alpha-1
; clobbers: m4,m6 ; clobbers: m4,m6
%macro LOAD_MASK 2-3 %macro LOAD_MASK 2-3
movd m4, %1 movd m4, %1
movd m5, %2 movd m5, %2
SPLATW m4 SPLATW m4, m4
SPLATW m5 SPLATW m5, m5
packuswb m4, m4 ; 16x alpha-1 packuswb m4, m4 ; 16x alpha-1
packuswb m5, m5 ; 16x beta-1 packuswb m5, m5 ; 16x beta-1
%if %0>2 %if %0>2
@ -237,8 +240,7 @@ SECTION .text
; out: m1=p0' m2=q0' ; out: m1=p0' m2=q0'
; clobbers: m0,3-6 ; clobbers: m0,3-6
%macro DEBLOCK_P0_Q0 0 %macro DEBLOCK_P0_Q0 0
mova m5, m1 pxor m5, m1, m2 ; p0^q0
pxor m5, m2 ; p0^q0
pand m5, [pb_1] ; (p0^q0)&1 pand m5, [pb_1] ; (p0^q0)&1
pcmpeqb m4, m4 pcmpeqb m4, m4
pxor m3, m4 pxor m3, m4
@ -264,14 +266,12 @@ SECTION .text
; out: [q1] = clip( (q2+((p0+q0+1)>>1))>>1, q1-tc0, q1+tc0 ) ; out: [q1] = clip( (q2+((p0+q0+1)>>1))>>1, q1-tc0, q1+tc0 )
; clobbers: q2, tmp, tc0 ; clobbers: q2, tmp, tc0
%macro LUMA_Q1 6 %macro LUMA_Q1 6
mova %6, m1 pavgb %6, m1, m2
pavgb %6, m2
pavgb %2, %6 ; avg(p2,avg(p0,q0)) pavgb %2, %6 ; avg(p2,avg(p0,q0))
pxor %6, %3 pxor %6, %3
pand %6, [pb_1] ; (p2^avg(p0,q0))&1 pand %6, [pb_1] ; (p2^avg(p0,q0))&1
psubusb %2, %6 ; (p2+((p0+q0+1)>>1))>>1 psubusb %2, %6 ; (p2+((p0+q0+1)>>1))>>1
mova %6, %1 psubusb %6, %1, %5
psubusb %6, %5
paddusb %5, %1 paddusb %5, %1
pmaxub %2, %6 pmaxub %2, %6
pminub %2, %5 pminub %2, %5
@ -280,10 +280,10 @@ SECTION .text
%ifdef ARCH_X86_64 %ifdef ARCH_X86_64
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ; void deblock_v_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
INIT_XMM %macro DEBLOCK_LUMA 1
cglobal x264_deblock_v_luma_sse2, 5,5,10 cglobal deblock_v_luma_8_%1, 5,5,10
movd m8, [r4] ; tc0 movd m8, [r4] ; tc0
lea r4, [r1*3] lea r4, [r1*3]
dec r2d ; alpha-1 dec r2d ; alpha-1
@ -307,8 +307,7 @@ cglobal x264_deblock_v_luma_sse2, 5,5,10
movdqa m3, [r4] ; p2 movdqa m3, [r4] ; p2
DIFF_GT2 m1, m3, m5, m6, m7 ; |p2-p0| > beta-1 DIFF_GT2 m1, m3, m5, m6, m7 ; |p2-p0| > beta-1
pand m6, m9 pand m6, m9
mova m7, m8 psubb m7, m8, m6
psubb m7, m6
pand m6, m8 pand m6, m8
LUMA_Q1 m0, m3, [r4], [r4+r1], m6, m4 LUMA_Q1 m0, m3, [r4], [r4+r1], m6, m4
@ -326,10 +325,10 @@ cglobal x264_deblock_v_luma_sse2, 5,5,10
RET RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
INIT_MMX INIT_MMX
cglobal x264_deblock_h_luma_sse2, 5,7 cglobal deblock_h_luma_8_%1, 5,7
movsxd r10, r1d movsxd r10, r1d
lea r11, [r10+r10*2] lea r11, [r10+r10*2]
lea r6, [r0-4] lea r6, [r0-4]
@ -350,13 +349,13 @@ cglobal x264_deblock_h_luma_sse2, 5,7
; vertical filter ; vertical filter
; alpha, beta, tc0 are still in r2d, r3d, r4 ; alpha, beta, tc0 are still in r2d, r3d, r4
; don't backup r6, r5, r10, r11 because x264_deblock_v_luma_sse2 doesn't use them ; don't backup r6, r5, r10, r11 because deblock_v_luma_sse2 doesn't use them
lea r0, [pix_tmp+0x30] lea r0, [pix_tmp+0x30]
mov r1d, 0x10 mov r1d, 0x10
%ifdef WIN64 %ifdef WIN64
mov [rsp+0x20], r4 mov [rsp+0x20], r4
%endif %endif
call x264_deblock_v_luma_sse2 call deblock_v_luma_8_%1
; transpose 16x4 -> original space (only the middle 4 rows were changed by the filter) ; transpose 16x4 -> original space (only the middle 4 rows were changed by the filter)
add r6, 2 add r6, 2
@ -365,7 +364,7 @@ cglobal x264_deblock_h_luma_sse2, 5,7
movq m1, [pix_tmp+0x28] movq m1, [pix_tmp+0x28]
movq m2, [pix_tmp+0x38] movq m2, [pix_tmp+0x38]
movq m3, [pix_tmp+0x48] movq m3, [pix_tmp+0x48]
TRANSPOSE8x4_STORE PASS8ROWS(r6, r5, r10, r11) TRANSPOSE8x4B_STORE PASS8ROWS(r6, r5, r10, r11)
shl r10, 3 shl r10, 3
sub r6, r10 sub r6, r10
@ -375,7 +374,7 @@ cglobal x264_deblock_h_luma_sse2, 5,7
movq m1, [pix_tmp+0x20] movq m1, [pix_tmp+0x20]
movq m2, [pix_tmp+0x30] movq m2, [pix_tmp+0x30]
movq m3, [pix_tmp+0x40] movq m3, [pix_tmp+0x40]
TRANSPOSE8x4_STORE PASS8ROWS(r6, r5, r10, r11) TRANSPOSE8x4B_STORE PASS8ROWS(r6, r5, r10, r11)
%ifdef WIN64 %ifdef WIN64
add rsp, 0x98 add rsp, 0x98
@ -383,14 +382,20 @@ cglobal x264_deblock_h_luma_sse2, 5,7
add rsp, 0x68 add rsp, 0x68
%endif %endif
RET RET
%endmacro
INIT_XMM
DEBLOCK_LUMA sse2
INIT_AVX
DEBLOCK_LUMA avx
%else %else
%macro DEBLOCK_LUMA 3 %macro DEBLOCK_LUMA 3
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ; void deblock_v8_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
cglobal x264_deblock_%2_luma_%1, 5,5 cglobal deblock_%2_luma_8_%1, 5,5
lea r4, [r1*3] lea r4, [r1*3]
dec r2 ; alpha-1 dec r2 ; alpha-1
neg r4 neg r4
@ -419,8 +424,7 @@ cglobal x264_deblock_%2_luma_%1, 5,5
DIFF_GT2 m1, m3, m5, m6, m7 ; |p2-p0| > beta-1 DIFF_GT2 m1, m3, m5, m6, m7 ; |p2-p0| > beta-1
pand m6, m4 pand m6, m4
pand m4, [esp+%3] ; tc pand m4, [esp+%3] ; tc
mova m7, m4 psubb m7, m4, m6
psubb m7, m6
pand m6, m4 pand m6, m4
LUMA_Q1 m0, m3, [r4], [r4+r1], m6, m4 LUMA_Q1 m0, m3, [r4], [r4+r1], m6, m4
@ -441,10 +445,10 @@ cglobal x264_deblock_%2_luma_%1, 5,5
RET RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
INIT_MMX INIT_MMX
cglobal x264_deblock_h_luma_%1, 0,5 cglobal deblock_h_luma_8_%1, 0,5
mov r0, r0mp mov r0, r0mp
mov r3, r1m mov r3, r1m
lea r4, [r3*3] lea r4, [r3*3]
@ -467,11 +471,11 @@ cglobal x264_deblock_h_luma_%1, 0,5
PUSH dword r2m PUSH dword r2m
PUSH dword 16 PUSH dword 16
PUSH dword r0 PUSH dword r0
call x264_deblock_%2_luma_%1 call deblock_%2_luma_8_%1
%ifidn %2, v8 %ifidn %2, v8
add dword [esp ], 8 ; pix_tmp+0x38 add dword [esp ], 8 ; pix_tmp+0x38
add dword [esp+16], 2 ; tc0+2 add dword [esp+16], 2 ; tc0+2
call x264_deblock_%2_luma_%1 call deblock_%2_luma_8_%1
%endif %endif
ADD esp, 20 ADD esp, 20
@ -484,7 +488,7 @@ cglobal x264_deblock_h_luma_%1, 0,5
movq m1, [pix_tmp+0x20] movq m1, [pix_tmp+0x20]
movq m2, [pix_tmp+0x30] movq m2, [pix_tmp+0x30]
movq m3, [pix_tmp+0x40] movq m3, [pix_tmp+0x40]
TRANSPOSE8x4_STORE PASS8ROWS(r0, r1, r3, r4) TRANSPOSE8x4B_STORE PASS8ROWS(r0, r1, r3, r4)
lea r0, [r0+r3*8] lea r0, [r0+r3*8]
lea r1, [r1+r3*8] lea r1, [r1+r3*8]
@ -492,7 +496,7 @@ cglobal x264_deblock_h_luma_%1, 0,5
movq m1, [pix_tmp+0x28] movq m1, [pix_tmp+0x28]
movq m2, [pix_tmp+0x38] movq m2, [pix_tmp+0x38]
movq m3, [pix_tmp+0x48] movq m3, [pix_tmp+0x48]
TRANSPOSE8x4_STORE PASS8ROWS(r0, r1, r3, r4) TRANSPOSE8x4B_STORE PASS8ROWS(r0, r1, r3, r4)
ADD esp, pad ADD esp, pad
RET RET
@ -502,22 +506,34 @@ INIT_MMX
DEBLOCK_LUMA mmxext, v8, 8 DEBLOCK_LUMA mmxext, v8, 8
INIT_XMM INIT_XMM
DEBLOCK_LUMA sse2, v, 16 DEBLOCK_LUMA sse2, v, 16
INIT_AVX
DEBLOCK_LUMA avx, v, 16
%endif ; ARCH %endif ; ARCH
%macro LUMA_INTRA_P012 4 ; p0..p3 in memory %macro LUMA_INTRA_P012 4 ; p0..p3 in memory
%ifdef ARCH_X86_64
pavgb t0, p2, p1
pavgb t1, p0, q0
%else
mova t0, p2 mova t0, p2
mova t1, p0 mova t1, p0
pavgb t0, p1 pavgb t0, p1
pavgb t1, q0 pavgb t1, q0
%endif
pavgb t0, t1 ; ((p2+p1+1)/2 + (p0+q0+1)/2 + 1)/2 pavgb t0, t1 ; ((p2+p1+1)/2 + (p0+q0+1)/2 + 1)/2
mova t5, t1 mova t5, t1
%ifdef ARCH_X86_64
paddb t2, p2, p1
paddb t3, p0, q0
%else
mova t2, p2 mova t2, p2
mova t3, p0 mova t3, p0
paddb t2, p1 paddb t2, p1
paddb t3, q0 paddb t3, q0
%endif
paddb t2, t3 paddb t2, t3
mova t3, t2 mova t3, t2
mova t4, t2 mova t4, t2
@ -527,10 +543,15 @@ DEBLOCK_LUMA sse2, v, 16
pand t2, mpb_1 pand t2, mpb_1
psubb t0, t2 ; p1' = (p2+p1+p0+q0+2)/4; psubb t0, t2 ; p1' = (p2+p1+p0+q0+2)/4;
%ifdef ARCH_X86_64
pavgb t1, p2, q1
psubb t2, p2, q1
%else
mova t1, p2 mova t1, p2
mova t2, p2 mova t2, p2
pavgb t1, q1 pavgb t1, q1
psubb t2, q1 psubb t2, q1
%endif
paddb t3, t3 paddb t3, t3
psubb t3, t2 ; p2+2*p1+2*p0+2*q0+q1 psubb t3, t2 ; p2+2*p1+2*p0+2*q0+q1
pand t2, mpb_1 pand t2, mpb_1
@ -543,10 +564,8 @@ DEBLOCK_LUMA sse2, v, 16
pand t3, mpb_1 pand t3, mpb_1
psubb t1, t3 ; p0'a = (p2+2*p1+2*p0+2*q0+q1+4)/8 psubb t1, t3 ; p0'a = (p2+2*p1+2*p0+2*q0+q1+4)/8
mova t3, p0 pxor t3, p0, q1
mova t2, p0 pavgb t2, p0, q1
pxor t3, q1
pavgb t2, q1
pand t3, mpb_1 pand t3, mpb_1
psubb t2, t3 psubb t2, t3
pavgb t2, p1 ; p0'b = (2*p1+p0+q0+2)/4 pavgb t2, p1 ; p0'b = (2*p1+p0+q0+2)/4
@ -560,9 +579,8 @@ DEBLOCK_LUMA sse2, v, 16
mova %1, t1 ; store p0 mova %1, t1 ; store p0
mova t1, %4 ; p3 mova t1, %4 ; p3
mova t2, t1 paddb t2, t1, p2
pavgb t1, p2 pavgb t1, p2
paddb t2, p2
pavgb t1, t0 ; (p3+p2+1)/2 + (p2+p1+p0+q0+2)/4 pavgb t1, t0 ; (p3+p2+1)/2 + (p2+p1+p0+q0+2)/4
paddb t2, t2 paddb t2, t2
paddb t2, t4 ; 2*p3+3*p2+p1+p0+q0 paddb t2, t4 ; 2*p3+3*p2+p1+p0+q0
@ -624,9 +642,9 @@ DEBLOCK_LUMA sse2, v, 16
%endif %endif
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void x264_deblock_v_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta ) ; void deblock_v_luma_intra( uint8_t *pix, int stride, int alpha, int beta )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
cglobal x264_deblock_%2_luma_intra_%1, 4,6,16 cglobal deblock_%2_luma_intra_8_%1, 4,6,16
%ifndef ARCH_X86_64 %ifndef ARCH_X86_64
sub esp, 0x60 sub esp, 0x60
%endif %endif
@ -686,9 +704,9 @@ cglobal x264_deblock_%2_luma_intra_%1, 4,6,16
INIT_MMX INIT_MMX
%ifdef ARCH_X86_64 %ifdef ARCH_X86_64
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void x264_deblock_h_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta ) ; void deblock_h_luma_intra( uint8_t *pix, int stride, int alpha, int beta )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
cglobal x264_deblock_h_luma_intra_%1, 4,7 cglobal deblock_h_luma_intra_8_%1, 4,7
movsxd r10, r1d movsxd r10, r1d
lea r11, [r10*3] lea r11, [r10*3]
lea r6, [r0-4] lea r6, [r0-4]
@ -704,7 +722,7 @@ cglobal x264_deblock_h_luma_intra_%1, 4,7
lea r0, [pix_tmp+0x40] lea r0, [pix_tmp+0x40]
mov r1, 0x10 mov r1, 0x10
call x264_deblock_v_luma_intra_%1 call deblock_v_luma_intra_8_%1
; transpose 16x6 -> original space (but we can't write only 6 pixels, so really 16x8) ; transpose 16x6 -> original space (but we can't write only 6 pixels, so really 16x8)
lea r5, [r6+r11] lea r5, [r6+r11]
@ -717,7 +735,7 @@ cglobal x264_deblock_h_luma_intra_%1, 4,7
add rsp, 0x88 add rsp, 0x88
RET RET
%else %else
cglobal x264_deblock_h_luma_intra_%1, 2,4 cglobal deblock_h_luma_intra_8_%1, 2,4
lea r3, [r1*3] lea r3, [r1*3]
sub r0, 4 sub r0, 4
lea r2, [r0+r3] lea r2, [r0+r3]
@ -736,10 +754,10 @@ cglobal x264_deblock_h_luma_intra_%1, 2,4
PUSH dword r2m PUSH dword r2m
PUSH dword 16 PUSH dword 16
PUSH r0 PUSH r0
call x264_deblock_%2_luma_intra_%1 call deblock_%2_luma_intra_8_%1
%ifidn %2, v8 %ifidn %2, v8
add dword [rsp], 8 ; pix_tmp+8 add dword [rsp], 8 ; pix_tmp+8
call x264_deblock_%2_luma_intra_%1 call deblock_%2_luma_intra_8_%1
%endif %endif
ADD esp, 16 ADD esp, 16
@ -760,13 +778,13 @@ cglobal x264_deblock_h_luma_intra_%1, 2,4
INIT_XMM INIT_XMM
DEBLOCK_LUMA_INTRA sse2, v DEBLOCK_LUMA_INTRA sse2, v
INIT_AVX
DEBLOCK_LUMA_INTRA avx , v
%ifndef ARCH_X86_64 %ifndef ARCH_X86_64
INIT_MMX INIT_MMX
DEBLOCK_LUMA_INTRA mmxext, v8 DEBLOCK_LUMA_INTRA mmxext, v8
%endif %endif
INIT_MMX INIT_MMX
%macro CHROMA_V_START 0 %macro CHROMA_V_START 0
@ -790,23 +808,23 @@ INIT_MMX
%define t6 r6 %define t6 r6
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void x264_deblock_v_chroma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ; void ff_deblock_v_chroma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
cglobal x264_deblock_v_chroma_mmxext, 5,6 cglobal deblock_v_chroma_8_mmxext, 5,6
CHROMA_V_START CHROMA_V_START
movq m0, [t5] movq m0, [t5]
movq m1, [t5+r1] movq m1, [t5+r1]
movq m2, [r0] movq m2, [r0]
movq m3, [r0+r1] movq m3, [r0+r1]
call x264_chroma_inter_body_mmxext call ff_chroma_inter_body_mmxext
movq [t5+r1], m1 movq [t5+r1], m1
movq [r0], m2 movq [r0], m2
RET RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void x264_deblock_h_chroma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ; void ff_deblock_h_chroma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
cglobal x264_deblock_h_chroma_mmxext, 5,7 cglobal deblock_h_chroma_8_mmxext, 5,7
%ifdef ARCH_X86_64 %ifdef ARCH_X86_64
%define buf0 [rsp-24] %define buf0 [rsp-24]
%define buf1 [rsp-16] %define buf1 [rsp-16]
@ -815,17 +833,17 @@ cglobal x264_deblock_h_chroma_mmxext, 5,7
%define buf1 r2m %define buf1 r2m
%endif %endif
CHROMA_H_START CHROMA_H_START
TRANSPOSE4x8_LOAD PASS8ROWS(t5, r0, r1, t6) TRANSPOSE4x8_LOAD bw, wd, dq, PASS8ROWS(t5, r0, r1, t6)
movq buf0, m0 movq buf0, m0
movq buf1, m3 movq buf1, m3
call x264_chroma_inter_body_mmxext call ff_chroma_inter_body_mmxext
movq m0, buf0 movq m0, buf0
movq m3, buf1 movq m3, buf1
TRANSPOSE8x4_STORE PASS8ROWS(t5, r0, r1, t6) TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
RET RET
ALIGN 16 ALIGN 16
x264_chroma_inter_body_mmxext: ff_chroma_inter_body_mmxext:
LOAD_MASK r2d, r3d LOAD_MASK r2d, r3d
movd m6, [r4] ; tc0 movd m6, [r4] ; tc0
punpcklbw m6, m6 punpcklbw m6, m6
@ -850,31 +868,31 @@ x264_chroma_inter_body_mmxext:
%define t6 r5 %define t6 r5
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void x264_deblock_v_chroma_intra( uint8_t *pix, int stride, int alpha, int beta ) ; void ff_deblock_v_chroma_intra( uint8_t *pix, int stride, int alpha, int beta )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
cglobal x264_deblock_v_chroma_intra_mmxext, 4,5 cglobal deblock_v_chroma_intra_8_mmxext, 4,5
CHROMA_V_START CHROMA_V_START
movq m0, [t5] movq m0, [t5]
movq m1, [t5+r1] movq m1, [t5+r1]
movq m2, [r0] movq m2, [r0]
movq m3, [r0+r1] movq m3, [r0+r1]
call x264_chroma_intra_body_mmxext call ff_chroma_intra_body_mmxext
movq [t5+r1], m1 movq [t5+r1], m1
movq [r0], m2 movq [r0], m2
RET RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void x264_deblock_h_chroma_intra( uint8_t *pix, int stride, int alpha, int beta ) ; void ff_deblock_h_chroma_intra( uint8_t *pix, int stride, int alpha, int beta )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
cglobal x264_deblock_h_chroma_intra_mmxext, 4,6 cglobal deblock_h_chroma_intra_8_mmxext, 4,6
CHROMA_H_START CHROMA_H_START
TRANSPOSE4x8_LOAD PASS8ROWS(t5, r0, r1, t6) TRANSPOSE4x8_LOAD bw, wd, dq, PASS8ROWS(t5, r0, r1, t6)
call x264_chroma_intra_body_mmxext call ff_chroma_intra_body_mmxext
TRANSPOSE8x4_STORE PASS8ROWS(t5, r0, r1, t6) TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
RET RET
ALIGN 16 ALIGN 16
x264_chroma_intra_body_mmxext: ff_chroma_intra_body_mmxext:
LOAD_MASK r2d, r3d LOAD_MASK r2d, r3d
movq m5, m1 movq m5, m1
movq m6, m2 movq m6, m2

View File

@ -0,0 +1,910 @@
;*****************************************************************************
;* MMX/SSE2/AVX-optimized 10-bit H.264 deblocking code
;*****************************************************************************
;* Copyright (C) 2005-2011 x264 project
;*
;* Authors: Oskar Arvidsson <oskar@irock.se>
;* Loren Merritt <lorenm@u.washington.edu>
;* Jason Garrett-Glaser <darkshikari@gmail.com>
;*
;* This file is part of Libav.
;*
;* Libav is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* Libav is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with Libav; if not, write to the Free Software
;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "x86inc.asm"
%include "x86util.asm"
SECTION_RODATA
pw_pixel_max: times 8 dw ((1 << 10)-1)
SECTION .text
cextern pw_2
cextern pw_3
cextern pw_4
; out: %4 = |%1-%2|-%3
; clobbers: %5
%macro ABS_SUB 5
psubusw %5, %2, %1
psubusw %4, %1, %2
por %4, %5
psubw %4, %3
%endmacro
; out: %4 = |%1-%2|<%3
%macro DIFF_LT 5
psubusw %4, %2, %1
psubusw %5, %1, %2
por %5, %4 ; |%1-%2|
pxor %4, %4
psubw %5, %3 ; |%1-%2|-%3
pcmpgtw %4, %5 ; 0 > |%1-%2|-%3
%endmacro
%macro LOAD_AB 4
movd %1, %3
movd %2, %4
SPLATW %1, %1
SPLATW %2, %2
%endmacro
; in: %2=tc reg
; out: %1=splatted tc
%macro LOAD_TC 2
movd %1, [%2]
punpcklbw %1, %1
%if mmsize == 8
pshufw %1, %1, 0
%else
pshuflw %1, %1, 01010000b
pshufd %1, %1, 01010000b
%endif
psraw %1, 6
%endmacro
; in: %1=p1, %2=p0, %3=q0, %4=q1
; %5=alpha, %6=beta, %7-%9=tmp
; out: %7=mask
%macro LOAD_MASK 9
ABS_SUB %2, %3, %5, %8, %7 ; |p0-q0| - alpha
ABS_SUB %1, %2, %6, %9, %7 ; |p1-p0| - beta
pand %8, %9
ABS_SUB %3, %4, %6, %9, %7 ; |q1-q0| - beta
pxor %7, %7
pand %8, %9
pcmpgtw %7, %8
%endmacro
; in: %1=p0, %2=q0, %3=p1, %4=q1, %5=mask, %6=tmp, %7=tmp
; out: %1=p0', m2=q0'
%macro DEBLOCK_P0_Q0 7
psubw %3, %4
pxor %7, %7
paddw %3, [pw_4]
psubw %7, %5
psubw %6, %2, %1
psllw %6, 2
paddw %3, %6
psraw %3, 3
mova %6, [pw_pixel_max]
CLIPW %3, %7, %5
pxor %7, %7
paddw %1, %3
psubw %2, %3
CLIPW %1, %7, %6
CLIPW %2, %7, %6
%endmacro
; in: %1=x2, %2=x1, %3=p0, %4=q0 %5=mask&tc, %6=tmp
%macro LUMA_Q1 6
pavgw %6, %3, %4 ; (p0+q0+1)>>1
paddw %1, %6
pxor %6, %6
psraw %1, 1
psubw %6, %5
psubw %1, %2
CLIPW %1, %6, %5
paddw %1, %2
%endmacro
%macro LUMA_DEBLOCK_ONE 3
DIFF_LT m5, %1, bm, m4, m6
pxor m6, m6
mova %3, m4
pcmpgtw m6, tcm
pand m4, tcm
pandn m6, m7
pand m4, m6
LUMA_Q1 m5, %2, m1, m2, m4, m6
%endmacro
%macro LUMA_H_STORE 2
%if mmsize == 8
movq [r0-4], m0
movq [r0+r1-4], m1
movq [r0+r1*2-4], m2
movq [r0+%2-4], m3
%else
movq [r0-4], m0
movhps [r0+r1-4], m0
movq [r0+r1*2-4], m1
movhps [%1-4], m1
movq [%1+r1-4], m2
movhps [%1+r1*2-4], m2
movq [%1+%2-4], m3
movhps [%1+r1*4-4], m3
%endif
%endmacro
%macro DEBLOCK_LUMA 1
;-----------------------------------------------------------------------------
; void deblock_v_luma( uint16_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
cglobal deblock_v_luma_10_%1, 5,5,8*(mmsize/16)
%assign pad 5*mmsize+12-(stack_offset&15)
%define tcm [rsp]
%define ms1 [rsp+mmsize]
%define ms2 [rsp+mmsize*2]
%define am [rsp+mmsize*3]
%define bm [rsp+mmsize*4]
SUB rsp, pad
shl r2d, 2
shl r3d, 2
LOAD_AB m4, m5, r2, r3
mov r3, 32/mmsize
mov r2, r0
sub r0, r1
mova am, m4
sub r0, r1
mova bm, m5
sub r0, r1
.loop:
mova m0, [r0+r1]
mova m1, [r0+r1*2]
mova m2, [r2]
mova m3, [r2+r1]
LOAD_MASK m0, m1, m2, m3, am, bm, m7, m4, m6
LOAD_TC m6, r4
mova tcm, m6
mova m5, [r0]
LUMA_DEBLOCK_ONE m1, m0, ms1
mova [r0+r1], m5
mova m5, [r2+r1*2]
LUMA_DEBLOCK_ONE m2, m3, ms2
mova [r2+r1], m5
pxor m5, m5
mova m6, tcm
pcmpgtw m5, tcm
psubw m6, ms1
pandn m5, m7
psubw m6, ms2
pand m5, m6
DEBLOCK_P0_Q0 m1, m2, m0, m3, m5, m7, m6
mova [r0+r1*2], m1
mova [r2], m2
add r0, mmsize
add r2, mmsize
add r4, mmsize/8
dec r3
jg .loop
ADD rsp, pad
RET
cglobal deblock_h_luma_10_%1, 5,6,8*(mmsize/16)
%assign pad 7*mmsize+12-(stack_offset&15)
%define tcm [rsp]
%define ms1 [rsp+mmsize]
%define ms2 [rsp+mmsize*2]
%define p1m [rsp+mmsize*3]
%define p2m [rsp+mmsize*4]
%define am [rsp+mmsize*5]
%define bm [rsp+mmsize*6]
SUB rsp, pad
shl r2d, 2
shl r3d, 2
LOAD_AB m4, m5, r2, r3
mov r3, r1
mova am, m4
add r3, r1
mov r5, 32/mmsize
mova bm, m5
add r3, r1
%if mmsize == 16
mov r2, r0
add r2, r3
%endif
.loop:
%if mmsize == 8
movq m2, [r0-8] ; y q2 q1 q0
movq m7, [r0+0]
movq m5, [r0+r1-8]
movq m3, [r0+r1+0]
movq m0, [r0+r1*2-8]
movq m6, [r0+r1*2+0]
movq m1, [r0+r3-8]
TRANSPOSE4x4W 2, 5, 0, 1, 4
SWAP 2, 7
movq m7, [r0+r3]
TRANSPOSE4x4W 2, 3, 6, 7, 4
%else
movu m5, [r0-8] ; y q2 q1 q0 p0 p1 p2 x
movu m0, [r0+r1-8]
movu m2, [r0+r1*2-8]
movu m3, [r2-8]
TRANSPOSE4x4W 5, 0, 2, 3, 6
mova tcm, m3
movu m4, [r2+r1-8]
movu m1, [r2+r1*2-8]
movu m3, [r2+r3-8]
movu m7, [r2+r1*4-8]
TRANSPOSE4x4W 4, 1, 3, 7, 6
mova m6, tcm
punpcklqdq m6, m7
punpckhqdq m5, m4
SBUTTERFLY qdq, 0, 1, 7
SBUTTERFLY qdq, 2, 3, 7
%endif
mova p2m, m6
LOAD_MASK m0, m1, m2, m3, am, bm, m7, m4, m6
LOAD_TC m6, r4
mova tcm, m6
LUMA_DEBLOCK_ONE m1, m0, ms1
mova p1m, m5
mova m5, p2m
LUMA_DEBLOCK_ONE m2, m3, ms2
mova p2m, m5
pxor m5, m5
mova m6, tcm
pcmpgtw m5, tcm
psubw m6, ms1
pandn m5, m7
psubw m6, ms2
pand m5, m6
DEBLOCK_P0_Q0 m1, m2, m0, m3, m5, m7, m6
mova m0, p1m
mova m3, p2m
TRANSPOSE4x4W 0, 1, 2, 3, 4
LUMA_H_STORE r2, r3
add r4, mmsize/8
lea r0, [r0+r1*(mmsize/2)]
lea r2, [r2+r1*(mmsize/2)]
dec r5
jg .loop
ADD rsp, pad
RET
%endmacro
INIT_XMM
%ifdef ARCH_X86_64
; in: m0=p1, m1=p0, m2=q0, m3=q1, m8=p2, m9=q2
; m12=alpha, m13=beta
; out: m0=p1', m3=q1', m1=p0', m2=q0'
; clobbers: m4, m5, m6, m7, m10, m11, m14
%macro DEBLOCK_LUMA_INTER_SSE2 0
LOAD_MASK m0, m1, m2, m3, m12, m13, m7, m4, m6
LOAD_TC m6, r4
DIFF_LT m8, m1, m13, m10, m4
DIFF_LT m9, m2, m13, m11, m4
pand m6, m7
mova m14, m6
pxor m4, m4
pcmpgtw m6, m4
pand m6, m14
mova m5, m10
pand m5, m6
LUMA_Q1 m8, m0, m1, m2, m5, m4
mova m5, m11
pand m5, m6
LUMA_Q1 m9, m3, m1, m2, m5, m4
pxor m4, m4
psubw m6, m10
pcmpgtw m4, m14
pandn m4, m7
psubw m6, m11
pand m4, m6
DEBLOCK_P0_Q0 m1, m2, m0, m3, m4, m5, m6
SWAP 0, 8
SWAP 3, 9
%endmacro
%macro DEBLOCK_LUMA_64 1
cglobal deblock_v_luma_10_%1, 5,5,15
%define p2 m8
%define p1 m0
%define p0 m1
%define q0 m2
%define q1 m3
%define q2 m9
%define mask0 m7
%define mask1 m10
%define mask2 m11
shl r2d, 2
shl r3d, 2
LOAD_AB m12, m13, r2, r3
mov r2, r0
sub r0, r1
sub r0, r1
sub r0, r1
mov r3, 2
.loop:
mova p2, [r0]
mova p1, [r0+r1]
mova p0, [r0+r1*2]
mova q0, [r2]
mova q1, [r2+r1]
mova q2, [r2+r1*2]
DEBLOCK_LUMA_INTER_SSE2
mova [r0+r1], p1
mova [r0+r1*2], p0
mova [r2], q0
mova [r2+r1], q1
add r0, mmsize
add r2, mmsize
add r4, 2
dec r3
jg .loop
REP_RET
cglobal deblock_h_luma_10_%1, 5,7,15
shl r2d, 2
shl r3d, 2
LOAD_AB m12, m13, r2, r3
mov r2, r1
add r2, r1
add r2, r1
mov r5, r0
add r5, r2
mov r6, 2
.loop:
movu m8, [r0-8] ; y q2 q1 q0 p0 p1 p2 x
movu m0, [r0+r1-8]
movu m2, [r0+r1*2-8]
movu m9, [r5-8]
movu m5, [r5+r1-8]
movu m1, [r5+r1*2-8]
movu m3, [r5+r2-8]
movu m7, [r5+r1*4-8]
TRANSPOSE4x4W 8, 0, 2, 9, 10
TRANSPOSE4x4W 5, 1, 3, 7, 10
punpckhqdq m8, m5
SBUTTERFLY qdq, 0, 1, 10
SBUTTERFLY qdq, 2, 3, 10
punpcklqdq m9, m7
DEBLOCK_LUMA_INTER_SSE2
TRANSPOSE4x4W 0, 1, 2, 3, 4
LUMA_H_STORE r5, r2
add r4, 2
lea r0, [r0+r1*8]
lea r5, [r5+r1*8]
dec r6
jg .loop
REP_RET
%endmacro
INIT_XMM
DEBLOCK_LUMA_64 sse2
INIT_AVX
DEBLOCK_LUMA_64 avx
%endif
%macro SWAPMOVA 2
%ifid %1
SWAP %1, %2
%else
mova %1, %2
%endif
%endmacro
; in: t0-t2: tmp registers
; %1=p0 %2=p1 %3=p2 %4=p3 %5=q0 %6=q1 %7=mask0
; %8=mask1p %9=2 %10=p0' %11=p1' %12=p2'
%macro LUMA_INTRA_P012 12 ; p0..p3 in memory
%ifdef ARCH_X86_64
paddw t0, %3, %2
mova t2, %4
paddw t2, %3
%else
mova t0, %3
mova t2, %4
paddw t0, %2
paddw t2, %3
%endif
paddw t0, %1
paddw t2, t2
paddw t0, %5
paddw t2, %9
paddw t0, %9 ; (p2 + p1 + p0 + q0 + 2)
paddw t2, t0 ; (2*p3 + 3*p2 + p1 + p0 + q0 + 4)
psrlw t2, 3
psrlw t1, t0, 2
psubw t2, %3
psubw t1, %2
pand t2, %8
pand t1, %8
paddw t2, %3
paddw t1, %2
SWAPMOVA %11, t1
psubw t1, t0, %3
paddw t0, t0
psubw t1, %5
psubw t0, %3
paddw t1, %6
paddw t1, %2
paddw t0, %6
psrlw t1, 2 ; (2*p1 + p0 + q1 + 2)/4
psrlw t0, 3 ; (p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4)>>3
pxor t0, t1
pxor t1, %1
pand t0, %8
pand t1, %7
pxor t0, t1
pxor t0, %1
SWAPMOVA %10, t0
SWAPMOVA %12, t2
%endmacro
%macro LUMA_INTRA_INIT 1
%xdefine pad %1*mmsize+((gprsize*3) % mmsize)-(stack_offset&15)
%define t0 m4
%define t1 m5
%define t2 m6
%define t3 m7
%assign i 4
%rep %1
CAT_XDEFINE t, i, [rsp+mmsize*(i-4)]
%assign i i+1
%endrep
SUB rsp, pad
%endmacro
; in: %1-%3=tmp, %4=p2, %5=q2
%macro LUMA_INTRA_INTER 5
LOAD_AB t0, t1, r2d, r3d
mova %1, t0
LOAD_MASK m0, m1, m2, m3, %1, t1, t0, t2, t3
%ifdef ARCH_X86_64
mova %2, t0 ; mask0
psrlw t3, %1, 2
%else
mova t3, %1
mova %2, t0 ; mask0
psrlw t3, 2
%endif
paddw t3, [pw_2] ; alpha/4+2
DIFF_LT m1, m2, t3, t2, t0 ; t2 = |p0-q0| < alpha/4+2
pand t2, %2
mova t3, %5 ; q2
mova %1, t2 ; mask1
DIFF_LT t3, m2, t1, t2, t0 ; t2 = |q2-q0| < beta
pand t2, %1
mova t3, %4 ; p2
mova %3, t2 ; mask1q
DIFF_LT t3, m1, t1, t2, t0 ; t2 = |p2-p0| < beta
pand t2, %1
mova %1, t2 ; mask1p
%endmacro
%macro LUMA_H_INTRA_LOAD 0
%if mmsize == 8
movu t0, [r0-8]
movu t1, [r0+r1-8]
movu m0, [r0+r1*2-8]
movu m1, [r0+r4-8]
TRANSPOSE4x4W 4, 5, 0, 1, 2
mova t4, t0 ; p3
mova t5, t1 ; p2
movu m2, [r0]
movu m3, [r0+r1]
movu t0, [r0+r1*2]
movu t1, [r0+r4]
TRANSPOSE4x4W 2, 3, 4, 5, 6
mova t6, t0 ; q2
mova t7, t1 ; q3
%else
movu t0, [r0-8]
movu t1, [r0+r1-8]
movu m0, [r0+r1*2-8]
movu m1, [r0+r5-8]
movu m2, [r4-8]
movu m3, [r4+r1-8]
movu t2, [r4+r1*2-8]
movu t3, [r4+r5-8]
TRANSPOSE8x8W 4, 5, 0, 1, 2, 3, 6, 7, t4, t5
mova t4, t0 ; p3
mova t5, t1 ; p2
mova t6, t2 ; q2
mova t7, t3 ; q3
%endif
%endmacro
; in: %1=q3 %2=q2' %3=q1' %4=q0' %5=p0' %6=p1' %7=p2' %8=p3 %9=tmp
%macro LUMA_H_INTRA_STORE 9
%if mmsize == 8
TRANSPOSE4x4W %1, %2, %3, %4, %9
movq [r0-8], m%1
movq [r0+r1-8], m%2
movq [r0+r1*2-8], m%3
movq [r0+r4-8], m%4
movq m%1, %8
TRANSPOSE4x4W %5, %6, %7, %1, %9
movq [r0], m%5
movq [r0+r1], m%6
movq [r0+r1*2], m%7
movq [r0+r4], m%1
%else
TRANSPOSE2x4x4W %1, %2, %3, %4, %9
movq [r0-8], m%1
movq [r0+r1-8], m%2
movq [r0+r1*2-8], m%3
movq [r0+r5-8], m%4
movhps [r4-8], m%1
movhps [r4+r1-8], m%2
movhps [r4+r1*2-8], m%3
movhps [r4+r5-8], m%4
%ifnum %8
SWAP %1, %8
%else
mova m%1, %8
%endif
TRANSPOSE2x4x4W %5, %6, %7, %1, %9
movq [r0], m%5
movq [r0+r1], m%6
movq [r0+r1*2], m%7
movq [r0+r5], m%1
movhps [r4], m%5
movhps [r4+r1], m%6
movhps [r4+r1*2], m%7
movhps [r4+r5], m%1
%endif
%endmacro
%ifdef ARCH_X86_64
;-----------------------------------------------------------------------------
; void deblock_v_luma_intra( uint16_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
%macro DEBLOCK_LUMA_INTRA_64 1
cglobal deblock_v_luma_intra_10_%1, 4,7,16
%define t0 m1
%define t1 m2
%define t2 m4
%define p2 m8
%define p1 m9
%define p0 m10
%define q0 m11
%define q1 m12
%define q2 m13
%define aa m5
%define bb m14
lea r4, [r1*4]
lea r5, [r1*3] ; 3*stride
neg r4
add r4, r0 ; pix-4*stride
mov r6, 2
mova m0, [pw_2]
shl r2d, 2
shl r3d, 2
LOAD_AB aa, bb, r2d, r3d
.loop
mova p2, [r4+r1]
mova p1, [r4+2*r1]
mova p0, [r4+r5]
mova q0, [r0]
mova q1, [r0+r1]
mova q2, [r0+2*r1]
LOAD_MASK p1, p0, q0, q1, aa, bb, m3, t0, t1
mova t2, aa
psrlw t2, 2
paddw t2, m0 ; alpha/4+2
DIFF_LT p0, q0, t2, m6, t0 ; m6 = |p0-q0| < alpha/4+2
DIFF_LT p2, p0, bb, t1, t0 ; m7 = |p2-p0| < beta
DIFF_LT q2, q0, bb, m7, t0 ; t1 = |q2-q0| < beta
pand m6, m3
pand m7, m6
pand m6, t1
LUMA_INTRA_P012 p0, p1, p2, [r4], q0, q1, m3, m6, m0, [r4+r5], [r4+2*r1], [r4+r1]
LUMA_INTRA_P012 q0, q1, q2, [r0+r5], p0, p1, m3, m7, m0, [r0], [r0+r1], [r0+2*r1]
add r0, mmsize
add r4, mmsize
dec r6
jg .loop
REP_RET
;-----------------------------------------------------------------------------
; void deblock_h_luma_intra( uint16_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
cglobal deblock_h_luma_intra_10_%1, 4,7,16
%define t0 m15
%define t1 m14
%define t2 m2
%define q3 m5
%define q2 m8
%define q1 m9
%define q0 m10
%define p0 m11
%define p1 m12
%define p2 m13
%define p3 m4
%define spill [rsp]
%assign pad 24-(stack_offset&15)
SUB rsp, pad
lea r4, [r1*4]
lea r5, [r1*3] ; 3*stride
add r4, r0 ; pix+4*stride
mov r6, 2
mova m0, [pw_2]
shl r2d, 2
shl r3d, 2
.loop
movu q3, [r0-8]
movu q2, [r0+r1-8]
movu q1, [r0+r1*2-8]
movu q0, [r0+r5-8]
movu p0, [r4-8]
movu p1, [r4+r1-8]
movu p2, [r4+r1*2-8]
movu p3, [r4+r5-8]
TRANSPOSE8x8W 5, 8, 9, 10, 11, 12, 13, 4, 1
LOAD_AB m1, m2, r2d, r3d
LOAD_MASK q1, q0, p0, p1, m1, m2, m3, t0, t1
psrlw m1, 2
paddw m1, m0 ; alpha/4+2
DIFF_LT p0, q0, m1, m6, t0 ; m6 = |p0-q0| < alpha/4+2
DIFF_LT q2, q0, m2, t1, t0 ; t1 = |q2-q0| < beta
DIFF_LT p0, p2, m2, m7, t0 ; m7 = |p2-p0| < beta
pand m6, m3
pand m7, m6
pand m6, t1
mova spill, q3
LUMA_INTRA_P012 q0, q1, q2, q3, p0, p1, m3, m6, m0, m5, m1, q2
LUMA_INTRA_P012 p0, p1, p2, p3, q0, q1, m3, m7, m0, p0, m6, p2
mova m7, spill
LUMA_H_INTRA_STORE 7, 8, 1, 5, 11, 6, 13, 4, 14
lea r0, [r0+r1*8]
lea r4, [r4+r1*8]
dec r6
jg .loop
ADD rsp, pad
RET
%endmacro
INIT_XMM
DEBLOCK_LUMA_INTRA_64 sse2
INIT_AVX
DEBLOCK_LUMA_INTRA_64 avx
%endif
%macro DEBLOCK_LUMA_INTRA 1
;-----------------------------------------------------------------------------
; void deblock_v_luma_intra( uint16_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
cglobal deblock_v_luma_intra_10_%1, 4,7,8*(mmsize/16)
LUMA_INTRA_INIT 3
lea r4, [r1*4]
lea r5, [r1*3]
neg r4
add r4, r0
mov r6, 32/mmsize
shl r2d, 2
shl r3d, 2
.loop:
mova m0, [r4+r1*2] ; p1
mova m1, [r4+r5] ; p0
mova m2, [r0] ; q0
mova m3, [r0+r1] ; q1
LUMA_INTRA_INTER t4, t5, t6, [r4+r1], [r0+r1*2]
LUMA_INTRA_P012 m1, m0, t3, [r4], m2, m3, t5, t4, [pw_2], [r4+r5], [r4+2*r1], [r4+r1]
mova t3, [r0+r1*2] ; q2
LUMA_INTRA_P012 m2, m3, t3, [r0+r5], m1, m0, t5, t6, [pw_2], [r0], [r0+r1], [r0+2*r1]
add r0, mmsize
add r4, mmsize
dec r6
jg .loop
ADD rsp, pad
RET
;-----------------------------------------------------------------------------
; void deblock_h_luma_intra( uint16_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
cglobal deblock_h_luma_intra_10_%1, 4,7,8*(mmsize/16)
LUMA_INTRA_INIT 8
%if mmsize == 8
lea r4, [r1*3]
mov r5, 32/mmsize
%else
lea r4, [r1*4]
lea r5, [r1*3] ; 3*stride
add r4, r0 ; pix+4*stride
mov r6, 32/mmsize
%endif
shl r2d, 2
shl r3d, 2
.loop:
LUMA_H_INTRA_LOAD
LUMA_INTRA_INTER t8, t9, t10, t5, t6
LUMA_INTRA_P012 m1, m0, t3, t4, m2, m3, t9, t8, [pw_2], t8, t5, t11
mova t3, t6 ; q2
LUMA_INTRA_P012 m2, m3, t3, t7, m1, m0, t9, t10, [pw_2], m4, t6, m5
mova m2, t4
mova m0, t11
mova m1, t5
mova m3, t8
mova m6, t6
LUMA_H_INTRA_STORE 2, 0, 1, 3, 4, 6, 5, t7, 7
lea r0, [r0+r1*(mmsize/2)]
%if mmsize == 8
dec r5
%else
lea r4, [r4+r1*(mmsize/2)]
dec r6
%endif
jg .loop
ADD rsp, pad
RET
%endmacro
%ifndef ARCH_X86_64
INIT_MMX
DEBLOCK_LUMA mmxext
DEBLOCK_LUMA_INTRA mmxext
INIT_XMM
DEBLOCK_LUMA sse2
DEBLOCK_LUMA_INTRA sse2
INIT_AVX
DEBLOCK_LUMA avx
DEBLOCK_LUMA_INTRA avx
%endif
; in: %1=p0, %2=q0, %3=p1, %4=q1, %5=mask, %6=tmp, %7=tmp
; out: %1=p0', %2=q0'
%macro CHROMA_DEBLOCK_P0_Q0_INTRA 7
mova %6, [pw_2]
paddw %6, %3
paddw %6, %4
paddw %7, %6, %2
paddw %6, %1
paddw %6, %3
paddw %7, %4
psraw %6, 2
psraw %7, 2
psubw %6, %1
psubw %7, %2
pand %6, %5
pand %7, %5
paddw %1, %6
paddw %2, %7
%endmacro
%macro CHROMA_V_LOAD 1
mova m0, [r0] ; p1
mova m1, [r0+r1] ; p0
mova m2, [%1] ; q0
mova m3, [%1+r1] ; q1
%endmacro
%macro CHROMA_V_STORE 0
mova [r0+1*r1], m1
mova [r0+2*r1], m2
%endmacro
%macro DEBLOCK_CHROMA 1
;-----------------------------------------------------------------------------
; void deblock_v_chroma( uint16_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
cglobal deblock_v_chroma_10_%1, 5,7-(mmsize/16),8*(mmsize/16)
mov r5, r0
sub r0, r1
sub r0, r1
shl r2d, 2
shl r3d, 2
%if mmsize < 16
mov r6, 16/mmsize
.loop:
%endif
CHROMA_V_LOAD r5
LOAD_AB m4, m5, r2, r3
LOAD_MASK m0, m1, m2, m3, m4, m5, m7, m6, m4
pxor m4, m4
LOAD_TC m6, r4
psubw m6, [pw_3]
pmaxsw m6, m4
pand m7, m6
DEBLOCK_P0_Q0 m1, m2, m0, m3, m7, m5, m6
CHROMA_V_STORE
%if mmsize < 16
add r0, mmsize
add r5, mmsize
add r4, mmsize/8
dec r6
jg .loop
REP_RET
%else
RET
%endif
;-----------------------------------------------------------------------------
; void deblock_v_chroma_intra( uint16_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
cglobal deblock_v_chroma_intra_10_%1, 4,6-(mmsize/16),8*(mmsize/16)
mov r4, r0
sub r0, r1
sub r0, r1
shl r2d, 2
shl r3d, 2
%if mmsize < 16
mov r5, 16/mmsize
.loop:
%endif
CHROMA_V_LOAD r4
LOAD_AB m4, m5, r2, r3
LOAD_MASK m0, m1, m2, m3, m4, m5, m7, m6, m4
CHROMA_DEBLOCK_P0_Q0_INTRA m1, m2, m0, m3, m7, m5, m6
CHROMA_V_STORE
%if mmsize < 16
add r0, mmsize
add r4, mmsize
dec r5
jg .loop
REP_RET
%else
RET
%endif
%endmacro
%ifndef ARCH_X86_64
INIT_MMX
DEBLOCK_CHROMA mmxext
%endif
INIT_XMM
DEBLOCK_CHROMA sse2
INIT_AVX
DEBLOCK_CHROMA avx

View File

@ -218,41 +218,57 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40]
); );
} }
#define LF_FUNC(DIR, TYPE, OPT) \ #define LF_FUNC(DIR, TYPE, DEPTH, OPT) \
void ff_x264_deblock_ ## DIR ## _ ## TYPE ## _ ## OPT (uint8_t *pix, int stride, \ void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *pix, int stride, \
int alpha, int beta, int8_t *tc0); int alpha, int beta, int8_t *tc0);
#define LF_IFUNC(DIR, TYPE, OPT) \ #define LF_IFUNC(DIR, TYPE, DEPTH, OPT) \
void ff_x264_deblock_ ## DIR ## _ ## TYPE ## _ ## OPT (uint8_t *pix, int stride, \ void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *pix, int stride, \
int alpha, int beta); int alpha, int beta);
LF_FUNC (h, chroma, mmxext) #define LF_FUNCS(type, depth)\
LF_IFUNC(h, chroma_intra, mmxext) LF_FUNC (h, chroma, depth, mmxext)\
LF_FUNC (v, chroma, mmxext) LF_IFUNC(h, chroma_intra, depth, mmxext)\
LF_IFUNC(v, chroma_intra, mmxext) LF_FUNC (v, chroma, depth, mmxext)\
LF_IFUNC(v, chroma_intra, depth, mmxext)\
LF_FUNC (h, luma, depth, mmxext)\
LF_IFUNC(h, luma_intra, depth, mmxext)\
LF_FUNC (h, luma, depth, sse2)\
LF_IFUNC(h, luma_intra, depth, sse2)\
LF_FUNC (v, luma, depth, sse2)\
LF_IFUNC(v, luma_intra, depth, sse2)\
LF_FUNC (h, chroma, depth, sse2)\
LF_IFUNC(h, chroma_intra, depth, sse2)\
LF_FUNC (v, chroma, depth, sse2)\
LF_IFUNC(v, chroma_intra, depth, sse2)\
LF_FUNC (h, luma, depth, avx)\
LF_IFUNC(h, luma_intra, depth, avx)\
LF_FUNC (v, luma, depth, avx)\
LF_IFUNC(v, luma_intra, depth, avx)\
LF_FUNC (h, chroma, depth, avx)\
LF_IFUNC(h, chroma_intra, depth, avx)\
LF_FUNC (v, chroma, depth, avx)\
LF_IFUNC(v, chroma_intra, depth, avx)
LF_FUNC (h, luma, mmxext) LF_FUNCS( uint8_t, 8)
LF_IFUNC(h, luma_intra, mmxext) LF_FUNCS(uint16_t, 10)
#if HAVE_YASM && ARCH_X86_32
LF_FUNC (v8, luma, mmxext) LF_FUNC (v8, luma, 8, mmxext)
static void ff_x264_deblock_v_luma_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) static void ff_deblock_v_luma_8_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
{ {
if((tc0[0] & tc0[1]) >= 0) if((tc0[0] & tc0[1]) >= 0)
ff_x264_deblock_v8_luma_mmxext(pix+0, stride, alpha, beta, tc0); ff_deblock_v8_luma_8_mmxext(pix+0, stride, alpha, beta, tc0);
if((tc0[2] & tc0[3]) >= 0) if((tc0[2] & tc0[3]) >= 0)
ff_x264_deblock_v8_luma_mmxext(pix+8, stride, alpha, beta, tc0+2); ff_deblock_v8_luma_8_mmxext(pix+8, stride, alpha, beta, tc0+2);
} }
LF_IFUNC(v8, luma_intra, mmxext) LF_IFUNC(v8, luma_intra, 8, mmxext)
static void ff_x264_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta) static void ff_deblock_v_luma_intra_8_mmxext(uint8_t *pix, int stride, int alpha, int beta)
{ {
ff_x264_deblock_v8_luma_intra_mmxext(pix+0, stride, alpha, beta); ff_deblock_v8_luma_intra_8_mmxext(pix+0, stride, alpha, beta);
ff_x264_deblock_v8_luma_intra_mmxext(pix+8, stride, alpha, beta); ff_deblock_v8_luma_intra_8_mmxext(pix+8, stride, alpha, beta);
} }
#endif
LF_FUNC (h, luma, sse2) LF_FUNC (v, luma, 10, mmxext)
LF_IFUNC(h, luma_intra, sse2) LF_IFUNC(v, luma_intra, 10, mmxext)
LF_FUNC (v, luma, sse2)
LF_IFUNC(v, luma_intra, sse2)
/***********************************/ /***********************************/
/* weighted prediction */ /* weighted prediction */
@ -314,15 +330,15 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
c->h264_idct_add8 = ff_h264_idct_add8_mmx2; c->h264_idct_add8 = ff_h264_idct_add8_mmx2;
c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2; c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2;
c->h264_v_loop_filter_chroma= ff_x264_deblock_v_chroma_mmxext; c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmxext;
c->h264_h_loop_filter_chroma= ff_x264_deblock_h_chroma_mmxext; c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext;
c->h264_v_loop_filter_chroma_intra= ff_x264_deblock_v_chroma_intra_mmxext; c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_8_mmxext;
c->h264_h_loop_filter_chroma_intra= ff_x264_deblock_h_chroma_intra_mmxext; c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_8_mmxext;
#if ARCH_X86_32 #if ARCH_X86_32
c->h264_v_loop_filter_luma= ff_x264_deblock_v_luma_mmxext; c->h264_v_loop_filter_luma= ff_deblock_v_luma_8_mmxext;
c->h264_h_loop_filter_luma= ff_x264_deblock_h_luma_mmxext; c->h264_h_loop_filter_luma= ff_deblock_h_luma_8_mmxext;
c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext; c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmxext;
c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmxext;
#endif #endif
c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2; c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2;
c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2; c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2;
@ -360,10 +376,10 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_sse2; c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_sse2;
#if HAVE_ALIGNED_STACK #if HAVE_ALIGNED_STACK
c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2; c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_sse2;
c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2; c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_sse2;
c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2; c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2;
c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
#endif #endif
c->h264_idct_add16 = ff_h264_idct_add16_sse2; c->h264_idct_add16 = ff_h264_idct_add16_sse2;
@ -377,6 +393,49 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_ssse3; c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_ssse3;
c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_ssse3; c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_ssse3;
} }
if (mm_flags&AV_CPU_FLAG_AVX) {
#if HAVE_ALIGNED_STACK
c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_avx;
c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx;
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx;
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx;
#endif
}
}
}
#endif
} else if (bit_depth == 10) {
#if HAVE_YASM
if (mm_flags & AV_CPU_FLAG_MMX) {
if (mm_flags & AV_CPU_FLAG_MMX2) {
#if ARCH_X86_32
c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_mmxext;
c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_mmxext;
c->h264_v_loop_filter_luma= ff_deblock_v_luma_10_mmxext;
c->h264_h_loop_filter_luma= ff_deblock_h_luma_10_mmxext;
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_mmxext;
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmxext;
#endif
if (mm_flags&AV_CPU_FLAG_SSE2) {
c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_sse2;
c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_sse2;
#if HAVE_ALIGNED_STACK
c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_sse2;
c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_sse2;
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2;
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2;
#endif
}
if (mm_flags&AV_CPU_FLAG_AVX) {
c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_avx;
c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_avx;
#if HAVE_ALIGNED_STACK
c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_avx;
c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_avx;
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_avx;
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx;
#endif
}
} }
} }
#endif #endif

View File

@ -24,16 +24,20 @@
;****************************************************************************** ;******************************************************************************
%macro SBUTTERFLY 4 %macro SBUTTERFLY 4
%if avx_enabled == 0
mova m%4, m%2 mova m%4, m%2
punpckl%1 m%2, m%3 punpckl%1 m%2, m%3
punpckh%1 m%4, m%3 punpckh%1 m%4, m%3
%else
punpckh%1 m%4, m%2, m%3
punpckl%1 m%2, m%3
%endif
SWAP %3, %4 SWAP %3, %4
%endmacro %endmacro
%macro SBUTTERFLY2 4 %macro SBUTTERFLY2 4
mova m%4, m%2 punpckl%1 m%4, m%2, m%3
punpckh%1 m%2, m%3 punpckh%1 m%2, m%2, m%3
punpckl%1 m%4, m%3
SWAP %2, %4, %3 SWAP %2, %4, %3
%endmacro %endmacro
@ -444,3 +448,17 @@
%macro PMINUB_MMXEXT 3 ; dst, src, ignored %macro PMINUB_MMXEXT 3 ; dst, src, ignored
pminub %1, %2 pminub %1, %2
%endmacro %endmacro
%macro SPLATW 2-3 0
%if mmsize == 16
pshuflw %1, %2, (%3)*0x55
punpcklqdq %1, %1
%else
pshufw %1, %2, (%3)*0x55
%endif
%endmacro
%macro CLIPW 3 ;(dst, min, max)
pmaxsw %1, %2
pminsw %1, %3
%endmacro

View File

@ -524,6 +524,7 @@ static const StreamType MISC_types[] = {
static const StreamType REGD_types[] = { static const StreamType REGD_types[] = {
{ MKTAG('d','r','a','c'), AVMEDIA_TYPE_VIDEO, CODEC_ID_DIRAC }, { MKTAG('d','r','a','c'), AVMEDIA_TYPE_VIDEO, CODEC_ID_DIRAC },
{ MKTAG('A','C','-','3'), AVMEDIA_TYPE_AUDIO, CODEC_ID_AC3 }, { MKTAG('A','C','-','3'), AVMEDIA_TYPE_AUDIO, CODEC_ID_AC3 },
{ MKTAG('B','S','S','D'), AVMEDIA_TYPE_AUDIO, CODEC_ID_S302M },
{ 0 }, { 0 },
}; };

View File

@ -808,6 +808,10 @@ void ff_rtsp_parse_line(RTSPMessageHeader *reply, const char *buf,
p += strspn(p, SPACE_CHARS); p += strspn(p, SPACE_CHARS);
if (method && !strcmp(method, "PLAY")) if (method && !strcmp(method, "PLAY"))
rtsp_parse_rtp_info(rt, p); rtsp_parse_rtp_info(rt, p);
} else if (av_stristart(p, "Public:", &p) && rt) {
if (strstr(p, "GET_PARAMETER") &&
method && !strcmp(method, "OPTIONS"))
rt->get_parameter_supported = 1;
} }
} }

View File

@ -331,6 +331,11 @@ typedef struct RTSPState {
* Polling array for udp * Polling array for udp
*/ */
struct pollfd *p; struct pollfd *p;
/**
* Whether the server supports the GET_PARAMETER method.
*/
int get_parameter_supported;
} RTSPState; } RTSPState;
/** /**

View File

@ -341,7 +341,9 @@ retry:
/* send dummy request to keep TCP connection alive */ /* send dummy request to keep TCP connection alive */
if ((av_gettime() - rt->last_cmd_time) / 1000000 >= rt->timeout / 2) { if ((av_gettime() - rt->last_cmd_time) / 1000000 >= rt->timeout / 2) {
if (rt->server_type != RTSP_SERVER_REAL) { if (rt->server_type == RTSP_SERVER_WMS ||
(rt->server_type != RTSP_SERVER_REAL &&
rt->get_parameter_supported)) {
ff_rtsp_send_cmd_async(s, "GET_PARAMETER", rt->control_uri, NULL); ff_rtsp_send_cmd_async(s, "GET_PARAMETER", rt->control_uri, NULL);
} else { } else {
ff_rtsp_send_cmd_async(s, "OPTIONS", "*", NULL); ff_rtsp_send_cmd_async(s, "OPTIONS", "*", NULL);

View File

@ -75,7 +75,7 @@ OBJS-$(ARCH_ARM) += arm/cpu.o
OBJS-$(ARCH_PPC) += ppc/cpu.o OBJS-$(ARCH_PPC) += ppc/cpu.o
OBJS-$(ARCH_X86) += x86/cpu.o OBJS-$(ARCH_X86) += x86/cpu.o
TESTPROGS = adler32 aes base64 cpu crc des lls md5 pca sha softfloat tree TESTPROGS = adler32 aes base64 cpu crc des lls md5 pca sha tree
TESTPROGS-$(HAVE_LZO1X_999_COMPRESS) += lzo TESTPROGS-$(HAVE_LZO1X_999_COMPRESS) += lzo
DIRS = arm bfin sh4 x86 DIRS = arm bfin sh4 x86

View File

@ -24,47 +24,52 @@
#include <stdio.h> #include <stdio.h>
#include <inttypes.h> #include <inttypes.h>
#define FIXP (1<<16) #define FIXP (1 << 16)
#define MY_PI 205887 //(M_PI*FIX) #define MY_PI 205887 //(M_PI * FIX)
static int64_t int_pow(int64_t a, int p){ static int64_t int_pow(int64_t a, int p)
int64_t v= FIXP; {
int64_t v = FIXP;
for(; p; p--){ for (; p; p--) {
v*= a; v *= a;
v/= FIXP; v /= FIXP;
} }
return v; return v;
} }
static int64_t int_sin(int64_t a){ static int64_t int_sin(int64_t a)
if(a<0) a= MY_PI-a; // 0..inf {
a %= 2*MY_PI; // 0..2PI if (a < 0)
a = MY_PI - a; // 0..inf
a %= 2 * MY_PI; // 0..2PI
if(a>=MY_PI*3/2) a -= 2*MY_PI; // -PI/2 .. 3PI/2 if (a >= MY_PI * 3 / 2)
if(a>=MY_PI/2 ) a = MY_PI - a; // -PI/2 .. PI/2 a -= 2 * MY_PI; // -PI / 2 .. 3PI / 2
if (a >= MY_PI /2)
a = MY_PI - a; // -PI / 2 .. PI / 2
return a - int_pow(a, 3)/6 + int_pow(a, 5)/120 - int_pow(a, 7)/5040; return a - int_pow(a, 3) / 6 + int_pow(a, 5) / 120 - int_pow(a, 7) / 5040;
} }
#define SCALEBITS 8 #define SCALEBITS 8
#define ONE_HALF (1 << (SCALEBITS - 1)) #define ONE_HALF (1 << (SCALEBITS - 1))
#define FIX(x) ((int) ((x) * (1L<<SCALEBITS) + 0.5)) #define FIX(x) ((int) ((x) * (1L << SCALEBITS) + 0.5))
typedef unsigned char UINT8;
static void rgb24_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr, static void rgb24_to_yuv420p(unsigned char *lum, unsigned char *cb,
UINT8 *src, int width, int height) unsigned char *cr, unsigned char *src,
int width, int height)
{ {
int wrap, wrap3, x, y; int wrap, wrap3, x, y;
int r, g, b, r1, g1, b1; int r, g, b, r1, g1, b1;
UINT8 *p; unsigned char *p;
wrap = width; wrap = width;
wrap3 = width * 3; wrap3 = width * 3;
p = src; p = src;
for(y=0;y<height;y+=2) { for (y = 0; y < height; y += 2) {
for(x=0;x<width;x+=2) { for (x = 0; x < width; x += 2) {
r = p[0]; r = p[0];
g = p[1]; g = p[1];
b = p[2]; b = p[2];
@ -81,7 +86,7 @@ static void rgb24_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
b1 += b; b1 += b;
lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g + lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
p += wrap3; p += wrap3;
lum += wrap; lum += wrap;
r = p[0]; r = p[0];
@ -104,14 +109,14 @@ static void rgb24_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 + cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128; FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 - cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128; FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
cb++; cb++;
cr++; cr++;
p += -wrap3 + 2 * 3; p += -wrap3 + 2 * 3;
lum += -wrap + 2; lum += -wrap + 2;
} }
p += wrap3; p += wrap3;
lum += wrap; lum += wrap;
} }
} }
@ -119,7 +124,7 @@ static void rgb24_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
/* cif format */ /* cif format */
#define DEFAULT_WIDTH 352 #define DEFAULT_WIDTH 352
#define DEFAULT_HEIGHT 288 #define DEFAULT_HEIGHT 288
#define DEFAULT_NB_PICT 50 #define DEFAULT_NB_PICT 50
static void pgmyuv_save(const char *filename, int w, int h, static void pgmyuv_save(const char *filename, int w, int h,
unsigned char *rgb_tab) unsigned char *rgb_tab)
@ -130,19 +135,19 @@ static void pgmyuv_save(const char *filename, int w, int h,
unsigned char *lum_tab, *cb_tab, *cr_tab; unsigned char *lum_tab, *cb_tab, *cr_tab;
lum_tab = malloc(w * h); lum_tab = malloc(w * h);
cb_tab = malloc((w * h) / 4); cb_tab = malloc(w * h / 4);
cr_tab = malloc((w * h) / 4); cr_tab = malloc(w * h / 4);
rgb24_to_yuv420p(lum_tab, cb_tab, cr_tab, rgb_tab, w, h); rgb24_to_yuv420p(lum_tab, cb_tab, cr_tab, rgb_tab, w, h);
f = fopen(filename,"wb"); f = fopen(filename, "wb");
fprintf(f, "P5\n%d %d\n%d\n", w, (h * 3) / 2, 255); fprintf(f, "P5\n%d %d\n%d\n", w, h * 3 / 2, 255);
fwrite(lum_tab, 1, w * h, f); fwrite(lum_tab, 1, w * h, f);
h2 = h / 2; h2 = h / 2;
w2 = w / 2; w2 = w / 2;
cb = cb_tab; cb = cb_tab;
cr = cr_tab; cr = cr_tab;
for(i=0;i<h2;i++) { for (i = 0; i < h2; i++) {
fwrite(cb, 1, w2, f); fwrite(cb, 1, w2, f);
fwrite(cr, 1, w2, f); fwrite(cr, 1, w2, f);
cb += w2; cb += w2;
@ -172,104 +177,100 @@ static void put_pixel(int x, int y, int r, int g, int b)
p[2] = b; p[2] = b;
} }
unsigned char tab_r[256*256]; unsigned char tab_r[256 * 256];
unsigned char tab_g[256*256]; unsigned char tab_g[256 * 256];
unsigned char tab_b[256*256]; unsigned char tab_b[256 * 256];
int h_cos [360]; int h_cos [360];
int h_sin [360]; int h_sin [360];
static int ipol(uint8_t *src, int x, int y){ static int ipol(uint8_t *src, int x, int y)
int int_x= x>>16; {
int int_y= y>>16; int int_x = x >> 16;
int frac_x= x&0xFFFF; int int_y = y >> 16;
int frac_y= y&0xFFFF; int frac_x = x & 0xFFFF;
int s00= src[ ( int_x &255) + 256*( int_y &255) ]; int frac_y = y & 0xFFFF;
int s01= src[ ((int_x+1)&255) + 256*( int_y &255) ]; int s00 = src[( int_x & 255) + 256 * ( int_y & 255)];
int s10= src[ ( int_x &255) + 256*((int_y+1)&255) ]; int s01 = src[((int_x + 1) & 255) + 256 * ( int_y & 255)];
int s11= src[ ((int_x+1)&255) + 256*((int_y+1)&255) ]; int s10 = src[( int_x & 255) + 256 * ((int_y + 1) & 255)];
int s0= (((1<<16) - frac_x)*s00 + frac_x*s01)>>8; int s11 = src[((int_x + 1) & 255) + 256 * ((int_y + 1) & 255)];
int s1= (((1<<16) - frac_x)*s10 + frac_x*s11)>>8; int s0 = (((1 << 16) - frac_x) * s00 + frac_x * s01) >> 8;
int s1 = (((1 << 16) - frac_x) * s10 + frac_x * s11) >> 8;
return (((1<<16) - frac_y)*s0 + frac_y*s1)>>24; return (((1 << 16) - frac_y) * s0 + frac_y * s1) >> 24;
} }
static void gen_image(int num, int w, int h) static void gen_image(int num, int w, int h)
{ {
const int c = h_cos [num % 360]; const int c = h_cos [num % 360];
const int s = h_sin [num % 360]; const int s = h_sin [num % 360];
const int xi = -(w/2) * c; const int xi = -(w / 2) * c;
const int yi = (w/2) * s; const int yi = (w / 2) * s;
const int xj = -(h/2) * s; const int xj = -(h / 2) * s;
const int yj = -(h/2) * c; const int yj = -(h / 2) * c;
int i,j; int i, j;
int x,y; int x, y;
int xprime = xj; int xprime = xj;
int yprime = yj; int yprime = yj;
for (j = 0; j < h; j++) {
x = xprime + xi + FIXP * w / 2;
xprime += s;
for (j=0;j<h;j++) { y = yprime + yi + FIXP * h / 2;
yprime += c;
x = xprime + xi + FIXP*w/2; for (i = 0; i < w; i++ ) {
xprime += s; x += c;
y -= s;
y = yprime + yi + FIXP*h/2; put_pixel(i, j, ipol(tab_r, x, y), ipol(tab_g, x, y), ipol(tab_b, x, y));
yprime += c; }
for ( i=0 ; i<w ; i++ ) {
x += c;
y -= s;
#if 1
put_pixel(i, j, ipol(tab_r, x, y), ipol(tab_g, x, y), ipol(tab_b, x, y));
#else
{
unsigned dep;
dep = ((x>>16)&255) + (((y>>16)&255)<<8);
put_pixel(i, j, tab_r[dep], tab_g[dep], tab_b[dep]);
}
#endif
} }
}
} }
#define W 256 #define W 256
#define H 256 #define H 256
static void init_demo(const char *filename) { static int init_demo(const char *filename)
int i,j; {
int h; int i, j;
int radian; int h;
char line[3 * W]; int radian;
char line[3 * W];
FILE *fichier; FILE *input_file;
fichier = fopen(filename,"rb"); input_file = fopen(filename, "rb");
if (!fichier) { if (!input_file) {
perror(filename); perror(filename);
exit(1); return 1;
}
fread(line, 1, 15, fichier);
for (i=0;i<H;i++) {
fread(line,1,3*W,fichier);
for (j=0;j<W;j++) {
tab_r[W*i+j] = line[3*j ];
tab_g[W*i+j] = line[3*j + 1];
tab_b[W*i+j] = line[3*j + 2];
} }
}
fclose(fichier);
/* tables sin/cos */ if (fread(line, 1, 15, input_file) != 15)
for (i=0;i<360;i++) { return 1;
radian = 2*i*MY_PI/360; for (i = 0; i < H; i++) {
h = 2*FIXP + int_sin (radian); if (fread(line, 1, 3 * W, input_file) != 3 * W)
h_cos[i] = ( h * int_sin (radian + MY_PI/2) )/2/FIXP; return 1;
h_sin[i] = ( h * int_sin (radian ) )/2/FIXP; for (j = 0; j < W; j++) {
} tab_r[W * i + j] = line[3 * j ];
tab_g[W * i + j] = line[3 * j + 1];
tab_b[W * i + j] = line[3 * j + 2];
}
}
fclose(input_file);
/* tables sin/cos */
for (i = 0; i < 360; i++) {
radian = 2 * i * MY_PI / 360;
h = 2 * FIXP + int_sin (radian);
h_cos[i] = h * int_sin(radian + MY_PI / 2) / 2 / FIXP;
h_sin[i] = h * int_sin(radian) / 2 / FIXP;
}
return 0;
} }
int main(int argc, char **argv) int main(int argc, char **argv)
@ -280,20 +281,21 @@ int main(int argc, char **argv)
if (argc != 3) { if (argc != 3) {
printf("usage: %s directory/ image.pnm\n" printf("usage: %s directory/ image.pnm\n"
"generate a test video stream\n", argv[0]); "generate a test video stream\n", argv[0]);
exit(1); return 1;
} }
w = DEFAULT_WIDTH; w = DEFAULT_WIDTH;
h = DEFAULT_HEIGHT; h = DEFAULT_HEIGHT;
rgb_tab = malloc(w * h * 3); rgb_tab = malloc(w * h * 3);
wrap = w * 3; wrap = w * 3;
width = w; width = w;
height = h; height = h;
init_demo(argv[2]); if (init_demo(argv[2]))
return 1;
for(i=0;i<DEFAULT_NB_PICT;i++) { for (i = 0; i < DEFAULT_NB_PICT; i++) {
snprintf(buf, sizeof(buf), "%s%02d.pgm", argv[1], i); snprintf(buf, sizeof(buf), "%s%02d.pgm", argv[1], i);
gen_image(i, w, h); gen_image(i, w, h);
pgmyuv_save(buf, w, h, rgb_tab); pgmyuv_save(buf, w, h, rgb_tab);