From e727bca392995e4fec0104b7f75f89a66618c120 Mon Sep 17 00:00:00 2001 From: Stefano Sabatini Date: Tue, 1 Feb 2011 12:34:23 +0100 Subject: [PATCH 01/25] lavfi: cleanup avfilter_get_audio_buffer() and pals. Remove AVFilterBufferRefAudioProps.size, and use nb_samples in its place everywhere. This is required as the size in the audio buffer may be aligned, so it may not contain a well defined number of samples. Also remove the useless planar parameter, which can be deduced from the sample format. This is technically an API and ABI break, but since the audio part of lavfi is not usable now, this should not be a problem in practice. Signed-off-by: Anton Khirnov --- libavfilter/avfilter.c | 22 ++++++++++++---------- libavfilter/avfilter.h | 20 ++++++++++---------- libavfilter/defaults.c | 23 +++++++++++------------ 3 files changed, 33 insertions(+), 32 deletions(-) diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c index e535bdab64..9f81be7a8d 100644 --- a/libavfilter/avfilter.c +++ b/libavfilter/avfilter.c @@ -265,10 +265,9 @@ static void ff_dlog_ref(void *ctx, AVFilterBufferRef *ref, int end) av_get_picture_type_char(ref->video->pict_type)); } if (ref->audio) { - av_dlog(ctx, " cl:%"PRId64"d sn:%d s:%d sr:%d p:%d", + av_dlog(ctx, " cl:%"PRId64"d n:%d r:%d p:%d", ref->audio->channel_layout, ref->audio->nb_samples, - ref->audio->size, ref->audio->sample_rate, ref->audio->planar); } @@ -368,16 +367,16 @@ fail: } AVFilterBufferRef *avfilter_get_audio_buffer(AVFilterLink *link, int perms, - enum AVSampleFormat sample_fmt, int size, - uint64_t channel_layout, int planar) + enum AVSampleFormat sample_fmt, int nb_samples, + uint64_t channel_layout) { AVFilterBufferRef *ret = NULL; if (link->dstpad->get_audio_buffer) - ret = link->dstpad->get_audio_buffer(link, perms, sample_fmt, size, channel_layout, planar); + ret = link->dstpad->get_audio_buffer(link, perms, sample_fmt, nb_samples, channel_layout); if (!ret) - ret = avfilter_default_get_audio_buffer(link, perms, sample_fmt, size, channel_layout, planar); + ret = avfilter_default_get_audio_buffer(link, perms, sample_fmt, nb_samples, channel_layout); if (ret) ret->type = AVMEDIA_TYPE_AUDIO; @@ -585,6 +584,9 @@ void avfilter_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref) /* prepare to copy the samples if the buffer has insufficient permissions */ if ((dst->min_perms & samplesref->perms) != dst->min_perms || dst->rej_perms & samplesref->perms) { + int i, planar = av_sample_fmt_is_planar(samplesref->format); + int planes = !planar ? 1: + av_get_channel_layout_nb_channels(samplesref->audio->channel_layout); av_log(link->dst, AV_LOG_DEBUG, "Copying audio data in avfilter (have perms %x, need %x, reject %x)\n", @@ -592,14 +594,14 @@ void avfilter_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref) link->cur_buf = avfilter_default_get_audio_buffer(link, dst->min_perms, samplesref->format, - samplesref->audio->size, - samplesref->audio->channel_layout, - samplesref->audio->planar); + samplesref->audio->nb_samples, + samplesref->audio->channel_layout); link->cur_buf->pts = samplesref->pts; link->cur_buf->audio->sample_rate = samplesref->audio->sample_rate; /* Copy actual data into new samples buffer */ - memcpy(link->cur_buf->data[0], samplesref->data[0], samplesref->audio->size); + for (i = 0; i < planes; i++) + memcpy(link->cur_buf->extended_data[i], samplesref->extended_data[i], samplesref->linesize[0]); avfilter_unref_buffer(samplesref); } else diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h index 6555744f12..19ac057ed4 100644 --- a/libavfilter/avfilter.h +++ b/libavfilter/avfilter.h @@ -109,7 +109,6 @@ typedef struct AVFilterBuffer { typedef struct AVFilterBufferRefAudioProps { uint64_t channel_layout; ///< channel layout of audio buffer int nb_samples; ///< number of audio samples - int size; ///< audio buffer size uint32_t sample_rate; ///< audio buffer sample rate int planar; ///< audio buffer - planar or packed } AVFilterBufferRefAudioProps; @@ -388,8 +387,8 @@ struct AVFilterPad { * Input audio pads only. */ AVFilterBufferRef *(*get_audio_buffer)(AVFilterLink *link, int perms, - enum AVSampleFormat sample_fmt, int size, - uint64_t channel_layout, int planar); + enum AVSampleFormat sample_fmt, int nb_samples, + uint64_t channel_layout); /** * Callback called after the slices of a frame are completely sent. If @@ -474,8 +473,9 @@ AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link, /** default handler for get_audio_buffer() for audio inputs */ AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int perms, - enum AVSampleFormat sample_fmt, int size, - uint64_t channel_layout, int planar); + enum AVSampleFormat sample_fmt, + int nb_samples, + uint64_t channel_layout); /** * A helper for query_formats() which sets all links to the same list of @@ -505,8 +505,8 @@ AVFilterBufferRef *avfilter_null_get_video_buffer(AVFilterLink *link, /** get_audio_buffer() handler for filters which simply pass audio along */ AVFilterBufferRef *avfilter_null_get_audio_buffer(AVFilterLink *link, int perms, - enum AVSampleFormat sample_fmt, int size, - uint64_t channel_layout, int planar); + enum AVSampleFormat sample_fmt, int nb_samples, + uint64_t channel_layout); /** * Filter definition. This defines the pads a filter contains, and all the @@ -690,15 +690,15 @@ avfilter_get_video_buffer_ref_from_arrays(uint8_t *data[4], int linesize[4], int * be requested * @param perms the required access permissions * @param sample_fmt the format of each sample in the buffer to allocate - * @param size the buffer size in bytes + * @param nb_samples the number of samples per channel * @param channel_layout the number and type of channels per sample in the buffer to allocate * @param planar audio data layout - planar or packed * @return A reference to the samples. This must be unreferenced with * avfilter_unref_buffer when you are finished with it. */ AVFilterBufferRef *avfilter_get_audio_buffer(AVFilterLink *link, int perms, - enum AVSampleFormat sample_fmt, int size, - uint64_t channel_layout, int planar); + enum AVSampleFormat sample_fmt, int nb_samples, + uint64_t channel_layout); /** * Create an audio buffer reference wrapped around an already diff --git a/libavfilter/defaults.c b/libavfilter/defaults.c index 086fcc0b4c..fcb29e3b8d 100644 --- a/libavfilter/defaults.c +++ b/libavfilter/defaults.c @@ -58,12 +58,13 @@ AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link, int per } AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int perms, - enum AVSampleFormat sample_fmt, int size, - uint64_t channel_layout, int planar) + enum AVSampleFormat sample_fmt, int nb_samples, + uint64_t channel_layout) { AVFilterBuffer *samples = av_mallocz(sizeof(AVFilterBuffer)); AVFilterBufferRef *ref = NULL; int i, sample_size, chans_nb, bufsize, per_channel_size, step_size = 0; + int planar = av_sample_fmt_is_planar(sample_fmt); char *buf; if (!samples || !(ref = av_mallocz(sizeof(AVFilterBufferRef)))) @@ -77,7 +78,7 @@ AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int per goto fail; ref->audio->channel_layout = channel_layout; - ref->audio->size = size; + ref->audio->nb_samples = nb_samples; ref->audio->planar = planar; /* make sure the buffer gets read permission or it's useless for output */ @@ -89,8 +90,7 @@ AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int per sample_size = av_get_bytes_per_sample(sample_fmt); chans_nb = av_get_channel_layout_nb_channels(channel_layout); - per_channel_size = size/chans_nb; - ref->audio->nb_samples = per_channel_size/sample_size; + per_channel_size = nb_samples * sample_size; /* Set the number of bytes to traverse to reach next sample of a particular channel: * For planar, this is simply the sample size. @@ -101,7 +101,7 @@ AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int per memset(&samples->linesize[chans_nb], 0, (8-chans_nb) * sizeof(samples->linesize[0])); /* Calculate total buffer size, round to multiple of 16 to be SIMD friendly */ - bufsize = (size + 15)&~15; + bufsize = (nb_samples * chans_nb * sample_size + 15)&~15; buf = av_malloc(bufsize); if (!buf) goto fail; @@ -189,9 +189,8 @@ void avfilter_default_filter_samples(AVFilterLink *inlink, AVFilterBufferRef *sa if (outlink) { outlink->out_buf = avfilter_default_get_audio_buffer(inlink, AV_PERM_WRITE, samplesref->format, - samplesref->audio->size, - samplesref->audio->channel_layout, - samplesref->audio->planar); + samplesref->audio->nb_samples, + samplesref->audio->channel_layout); outlink->out_buf->pts = samplesref->pts; outlink->out_buf->audio->sample_rate = samplesref->audio->sample_rate; avfilter_filter_samples(outlink, avfilter_ref_buffer(outlink->out_buf, ~0)); @@ -293,9 +292,9 @@ AVFilterBufferRef *avfilter_null_get_video_buffer(AVFilterLink *link, int perms, } AVFilterBufferRef *avfilter_null_get_audio_buffer(AVFilterLink *link, int perms, - enum AVSampleFormat sample_fmt, int size, - uint64_t channel_layout, int packed) + enum AVSampleFormat sample_fmt, int nb_samples, + uint64_t channel_layout) { return avfilter_get_audio_buffer(link->dst->outputs[0], perms, sample_fmt, - size, channel_layout, packed); + nb_samples, channel_layout); } From 6735534f19369a914d795aa84cd3faa4c57729ce Mon Sep 17 00:00:00 2001 From: Stefano Sabatini Date: Sat, 15 Jan 2011 18:48:37 +0100 Subject: [PATCH 02/25] lavfi: use avfilter_get_audio_buffer_ref_from_arrays() in avfilter_default_get_audio_buffer --- libavfilter/defaults.c | 80 +++++++++--------------------------------- 1 file changed, 17 insertions(+), 63 deletions(-) diff --git a/libavfilter/defaults.c b/libavfilter/defaults.c index fcb29e3b8d..7c75ab9c4b 100644 --- a/libavfilter/defaults.c +++ b/libavfilter/defaults.c @@ -61,78 +61,32 @@ AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int per enum AVSampleFormat sample_fmt, int nb_samples, uint64_t channel_layout) { - AVFilterBuffer *samples = av_mallocz(sizeof(AVFilterBuffer)); - AVFilterBufferRef *ref = NULL; - int i, sample_size, chans_nb, bufsize, per_channel_size, step_size = 0; - int planar = av_sample_fmt_is_planar(sample_fmt); - char *buf; + AVFilterBufferRef *samplesref = NULL; + uint8_t **data; + int planar = av_sample_fmt_is_planar(sample_fmt); + int nb_channels = av_get_channel_layout_nb_channels(channel_layout); + int planes = planar ? nb_channels : 1; + int linesize; - if (!samples || !(ref = av_mallocz(sizeof(AVFilterBufferRef)))) + if (!(data = av_mallocz(sizeof(*data) * planes))) goto fail; - ref->buf = samples; - ref->format = sample_fmt; - - ref->audio = av_mallocz(sizeof(AVFilterBufferRefAudioProps)); - if (!ref->audio) + if (av_samples_alloc(data, &linesize, nb_channels, nb_samples, sample_fmt, 0) < 0) goto fail; - ref->audio->channel_layout = channel_layout; - ref->audio->nb_samples = nb_samples; - ref->audio->planar = planar; - - /* make sure the buffer gets read permission or it's useless for output */ - ref->perms = perms | AV_PERM_READ; - - samples->refcount = 1; - samples->free = ff_avfilter_default_free_buffer; - - sample_size = av_get_bytes_per_sample(sample_fmt); - chans_nb = av_get_channel_layout_nb_channels(channel_layout); - - per_channel_size = nb_samples * sample_size; - - /* Set the number of bytes to traverse to reach next sample of a particular channel: - * For planar, this is simply the sample size. - * For packed, this is the number of samples * sample_size. - */ - for (i = 0; i < chans_nb; i++) - samples->linesize[i] = planar > 0 ? per_channel_size : sample_size; - memset(&samples->linesize[chans_nb], 0, (8-chans_nb) * sizeof(samples->linesize[0])); - - /* Calculate total buffer size, round to multiple of 16 to be SIMD friendly */ - bufsize = (nb_samples * chans_nb * sample_size + 15)&~15; - buf = av_malloc(bufsize); - if (!buf) + samplesref = avfilter_get_audio_buffer_ref_from_arrays(data, linesize, perms, + nb_samples, sample_fmt, + channel_layout); + if (!samplesref) goto fail; - /* For planar, set the start point of each channel's data within the buffer - * For packed, set the start point of the entire buffer only - */ - samples->data[0] = buf; - if (buf && planar) { - for (i = 1; i < chans_nb; i++) { - step_size += per_channel_size; - samples->data[i] = buf + step_size; - } - } else { - for (i = 1; i < chans_nb; i++) - samples->data[i] = buf; - } - - memset(&samples->data[chans_nb], 0, (8-chans_nb) * sizeof(samples->data[0])); - - memcpy(ref->data, samples->data, sizeof(ref->data)); - memcpy(ref->linesize, samples->linesize, sizeof(ref->linesize)); - - return ref; + av_freep(&data); fail: - if (ref) - av_free(ref->audio); - av_free(ref); - av_free(samples); - return NULL; + if (data) + av_freep(&data[0]); + av_freep(&data); + return samplesref; } void avfilter_default_start_frame(AVFilterLink *inlink, AVFilterBufferRef *picref) From 7ef0adcc2e800cb1357d4d5d7ce878c0f9a36c01 Mon Sep 17 00:00:00 2001 From: Stefano Sabatini Date: Tue, 30 Aug 2011 23:22:29 +0200 Subject: [PATCH 03/25] lavfi: simplify signature for avfilter_get_audio_buffer() and friends The additional parameters are just complicating the function interface. Assume that a requested samples buffer will *always* have the format specified in the requested link. This breaks audio filtering API and ABI in theory, but since it's unusable right now this shouldn't be a problem. Signed-off-by: Anton Khirnov --- libavfilter/avfilter.c | 11 ++++------- libavfilter/avfilter.h | 16 ++++------------ libavfilter/defaults.c | 24 ++++++++++-------------- 3 files changed, 18 insertions(+), 33 deletions(-) diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c index 9f81be7a8d..e301ddb37b 100644 --- a/libavfilter/avfilter.c +++ b/libavfilter/avfilter.c @@ -367,16 +367,15 @@ fail: } AVFilterBufferRef *avfilter_get_audio_buffer(AVFilterLink *link, int perms, - enum AVSampleFormat sample_fmt, int nb_samples, - uint64_t channel_layout) + int nb_samples) { AVFilterBufferRef *ret = NULL; if (link->dstpad->get_audio_buffer) - ret = link->dstpad->get_audio_buffer(link, perms, sample_fmt, nb_samples, channel_layout); + ret = link->dstpad->get_audio_buffer(link, perms, nb_samples); if (!ret) - ret = avfilter_default_get_audio_buffer(link, perms, sample_fmt, nb_samples, channel_layout); + ret = avfilter_default_get_audio_buffer(link, perms, nb_samples); if (ret) ret->type = AVMEDIA_TYPE_AUDIO; @@ -593,9 +592,7 @@ void avfilter_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref) samplesref->perms, link->dstpad->min_perms, link->dstpad->rej_perms); link->cur_buf = avfilter_default_get_audio_buffer(link, dst->min_perms, - samplesref->format, - samplesref->audio->nb_samples, - samplesref->audio->channel_layout); + samplesref->audio->nb_samples); link->cur_buf->pts = samplesref->pts; link->cur_buf->audio->sample_rate = samplesref->audio->sample_rate; diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h index 19ac057ed4..cf95b4bb16 100644 --- a/libavfilter/avfilter.h +++ b/libavfilter/avfilter.h @@ -387,8 +387,7 @@ struct AVFilterPad { * Input audio pads only. */ AVFilterBufferRef *(*get_audio_buffer)(AVFilterLink *link, int perms, - enum AVSampleFormat sample_fmt, int nb_samples, - uint64_t channel_layout); + int nb_samples); /** * Callback called after the slices of a frame are completely sent. If @@ -473,9 +472,7 @@ AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link, /** default handler for get_audio_buffer() for audio inputs */ AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int perms, - enum AVSampleFormat sample_fmt, - int nb_samples, - uint64_t channel_layout); + int nb_samples); /** * A helper for query_formats() which sets all links to the same list of @@ -505,8 +502,7 @@ AVFilterBufferRef *avfilter_null_get_video_buffer(AVFilterLink *link, /** get_audio_buffer() handler for filters which simply pass audio along */ AVFilterBufferRef *avfilter_null_get_audio_buffer(AVFilterLink *link, int perms, - enum AVSampleFormat sample_fmt, int nb_samples, - uint64_t channel_layout); + int nb_samples); /** * Filter definition. This defines the pads a filter contains, and all the @@ -689,16 +685,12 @@ avfilter_get_video_buffer_ref_from_arrays(uint8_t *data[4], int linesize[4], int * @param link the output link to the filter from which the buffer will * be requested * @param perms the required access permissions - * @param sample_fmt the format of each sample in the buffer to allocate * @param nb_samples the number of samples per channel - * @param channel_layout the number and type of channels per sample in the buffer to allocate - * @param planar audio data layout - planar or packed * @return A reference to the samples. This must be unreferenced with * avfilter_unref_buffer when you are finished with it. */ AVFilterBufferRef *avfilter_get_audio_buffer(AVFilterLink *link, int perms, - enum AVSampleFormat sample_fmt, int nb_samples, - uint64_t channel_layout); + int nb_samples); /** * Create an audio buffer reference wrapped around an already diff --git a/libavfilter/defaults.c b/libavfilter/defaults.c index 7c75ab9c4b..df05c06d63 100644 --- a/libavfilter/defaults.c +++ b/libavfilter/defaults.c @@ -58,25 +58,24 @@ AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link, int per } AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int perms, - enum AVSampleFormat sample_fmt, int nb_samples, - uint64_t channel_layout) + int nb_samples) { AVFilterBufferRef *samplesref = NULL; uint8_t **data; - int planar = av_sample_fmt_is_planar(sample_fmt); - int nb_channels = av_get_channel_layout_nb_channels(channel_layout); + int planar = av_sample_fmt_is_planar(link->format); + int nb_channels = av_get_channel_layout_nb_channels(link->channel_layout); int planes = planar ? nb_channels : 1; int linesize; if (!(data = av_mallocz(sizeof(*data) * planes))) goto fail; - if (av_samples_alloc(data, &linesize, nb_channels, nb_samples, sample_fmt, 0) < 0) + if (av_samples_alloc(data, &linesize, nb_channels, nb_samples, link->format, 0) < 0) goto fail; samplesref = avfilter_get_audio_buffer_ref_from_arrays(data, linesize, perms, - nb_samples, sample_fmt, - channel_layout); + nb_samples, link->format, + link->channel_layout); if (!samplesref) goto fail; @@ -142,9 +141,8 @@ void avfilter_default_filter_samples(AVFilterLink *inlink, AVFilterBufferRef *sa outlink = inlink->dst->outputs[0]; if (outlink) { - outlink->out_buf = avfilter_default_get_audio_buffer(inlink, AV_PERM_WRITE, samplesref->format, - samplesref->audio->nb_samples, - samplesref->audio->channel_layout); + outlink->out_buf = avfilter_default_get_audio_buffer(inlink, AV_PERM_WRITE, + samplesref->audio->nb_samples); outlink->out_buf->pts = samplesref->pts; outlink->out_buf->audio->sample_rate = samplesref->audio->sample_rate; avfilter_filter_samples(outlink, avfilter_ref_buffer(outlink->out_buf, ~0)); @@ -246,9 +244,7 @@ AVFilterBufferRef *avfilter_null_get_video_buffer(AVFilterLink *link, int perms, } AVFilterBufferRef *avfilter_null_get_audio_buffer(AVFilterLink *link, int perms, - enum AVSampleFormat sample_fmt, int nb_samples, - uint64_t channel_layout) + int nb_samples) { - return avfilter_get_audio_buffer(link->dst->outputs[0], perms, sample_fmt, - nb_samples, channel_layout); + return avfilter_get_audio_buffer(link->dst->outputs[0], perms, nb_samples); } From a6bdfc2a92a46aa7ee2d95a40f43b848ef94ec13 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Sun, 6 May 2012 10:40:11 +0200 Subject: [PATCH 04/25] lavfi: change AVFilterBufferRefAudioProps.sample_rate from uint32_t to int There's no reason for it to be explicitly 32 bits. It's declared as a plain int in all other places in Libav. This breaks audio filtering API and ABI in theory, but since it's unusable right now this shouldn't be a problem. --- libavfilter/avfilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h index cf95b4bb16..357ce34555 100644 --- a/libavfilter/avfilter.h +++ b/libavfilter/avfilter.h @@ -109,7 +109,7 @@ typedef struct AVFilterBuffer { typedef struct AVFilterBufferRefAudioProps { uint64_t channel_layout; ///< channel layout of audio buffer int nb_samples; ///< number of audio samples - uint32_t sample_rate; ///< audio buffer sample rate + int sample_rate; ///< audio buffer sample rate int planar; ///< audio buffer - planar or packed } AVFilterBufferRefAudioProps; From f20ab492acd2ab49f859dcd6d310029fb8c09dc4 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Wed, 9 May 2012 08:43:07 +0200 Subject: [PATCH 05/25] lavfi: change AVFilterLink.sample_rate from int64_t to int on next bump There is no real reason for it to be 64bit, it's just a plain int in the rest of Libav. --- libavfilter/avfilter.h | 4 ++++ libavfilter/version.h | 3 +++ 2 files changed, 7 insertions(+) diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h index 357ce34555..69ada1b8be 100644 --- a/libavfilter/avfilter.h +++ b/libavfilter/avfilter.h @@ -596,7 +596,11 @@ struct AVFilterLink { AVRational sample_aspect_ratio; ///< agreed upon sample aspect ratio /* These two parameters apply only to audio */ uint64_t channel_layout; ///< channel layout of current buffer (see libavutil/audioconvert.h) +#if FF_API_SAMPLERATE64 int64_t sample_rate; ///< samples per second +#else + int sample_rate; ///< samples per second +#endif int format; ///< agreed upon media format diff --git a/libavfilter/version.h b/libavfilter/version.h index 718ed7812e..71928f3f3b 100644 --- a/libavfilter/version.h +++ b/libavfilter/version.h @@ -47,5 +47,8 @@ #ifndef FF_API_GRAPH_AVCLASS #define FF_API_GRAPH_AVCLASS (LIBAVFILTER_VERSION_MAJOR > 2) #endif +#ifndef FF_API_SAMPLERATE64 +#define FF_API_SAMPLERATE64 (LIBAVFILTER_VERSION_MAJOR < 3) +#endif #endif // AVFILTER_VERSION_H From 472fb3bbfaf6fddb33d45688046184e7684c9f71 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Mon, 7 May 2012 10:51:23 +0200 Subject: [PATCH 06/25] lavfi: remove some audio-related function from public API. Those functions are only useful inside filters. It is better to not support user filters until the API is more stable. This breaks audio filtering API and ABI in theory, but since it's unusable right now this shouldn't be a problem. --- libavfilter/af_anull.c | 5 ++-- libavfilter/audio.h | 61 ++++++++++++++++++++++++++++++++++++++++++ libavfilter/avfilter.c | 16 ++++++----- libavfilter/avfilter.h | 37 ------------------------- libavfilter/defaults.c | 24 +++++++++-------- 5 files changed, 86 insertions(+), 57 deletions(-) create mode 100644 libavfilter/audio.h diff --git a/libavfilter/af_anull.c b/libavfilter/af_anull.c index e2bed36f0a..59b275c767 100644 --- a/libavfilter/af_anull.c +++ b/libavfilter/af_anull.c @@ -21,6 +21,7 @@ * null audio filter */ +#include "audio.h" #include "avfilter.h" AVFilter avfilter_af_anull = { @@ -31,8 +32,8 @@ AVFilter avfilter_af_anull = { .inputs = (AVFilterPad[]) {{ .name = "default", .type = AVMEDIA_TYPE_AUDIO, - .get_audio_buffer = avfilter_null_get_audio_buffer, - .filter_samples = avfilter_null_filter_samples }, + .get_audio_buffer = ff_null_get_audio_buffer, + .filter_samples = ff_null_filter_samples }, { .name = NULL}}, .outputs = (AVFilterPad[]) {{ .name = "default", diff --git a/libavfilter/audio.h b/libavfilter/audio.h new file mode 100644 index 0000000000..935bec5c43 --- /dev/null +++ b/libavfilter/audio.h @@ -0,0 +1,61 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVFILTER_AUDIO_H +#define AVFILTER_AUDIO_H + +#include "avfilter.h" + +/** default handler for get_audio_buffer() for audio inputs */ +AVFilterBufferRef *ff_default_get_audio_buffer(AVFilterLink *link, int perms, + int nb_samples); + +/** get_audio_buffer() handler for filters which simply pass audio along */ +AVFilterBufferRef *ff_null_get_audio_buffer(AVFilterLink *link, int perms, + int nb_samples); + +/** + * Request an audio samples buffer with a specific set of permissions. + * + * @param link the output link to the filter from which the buffer will + * be requested + * @param perms the required access permissions + * @param nb_samples the number of samples per channel + * @return A reference to the samples. This must be unreferenced with + * avfilter_unref_buffer when you are finished with it. + */ +AVFilterBufferRef *ff_get_audio_buffer(AVFilterLink *link, int perms, + int nb_samples); + +/** default handler for filter_samples() for audio inputs */ +void ff_default_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref); + +/** filter_samples() handler for filters which simply pass audio along */ +void ff_null_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref); + +/** + * Send a buffer of audio samples to the next filter. + * + * @param link the output link over which the audio samples are being sent + * @param samplesref a reference to the buffer of audio samples being sent. The + * receiving filter will free this reference when it no longer + * needs it or pass it on to the next filter. + */ +void ff_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref); + +#endif /* AVFILTER_AUDIO_H */ diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c index e301ddb37b..6a530f8fd6 100644 --- a/libavfilter/avfilter.c +++ b/libavfilter/avfilter.c @@ -26,6 +26,8 @@ #include "libavutil/audioconvert.h" #include "libavutil/imgutils.h" #include "libavcodec/avcodec.h" + +#include "audio.h" #include "avfilter.h" #include "internal.h" @@ -366,8 +368,8 @@ fail: return NULL; } -AVFilterBufferRef *avfilter_get_audio_buffer(AVFilterLink *link, int perms, - int nb_samples) +AVFilterBufferRef *ff_get_audio_buffer(AVFilterLink *link, int perms, + int nb_samples) { AVFilterBufferRef *ret = NULL; @@ -375,7 +377,7 @@ AVFilterBufferRef *avfilter_get_audio_buffer(AVFilterLink *link, int perms, ret = link->dstpad->get_audio_buffer(link, perms, nb_samples); if (!ret) - ret = avfilter_default_get_audio_buffer(link, perms, nb_samples); + ret = ff_default_get_audio_buffer(link, perms, nb_samples); if (ret) ret->type = AVMEDIA_TYPE_AUDIO; @@ -570,7 +572,7 @@ void avfilter_draw_slice(AVFilterLink *link, int y, int h, int slice_dir) draw_slice(link, y, h, slice_dir); } -void avfilter_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref) +void ff_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref) { void (*filter_samples)(AVFilterLink *, AVFilterBufferRef *); AVFilterPad *dst = link->dstpad; @@ -578,7 +580,7 @@ void avfilter_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref) FF_DPRINTF_START(NULL, filter_samples); ff_dlog_link(NULL, link, 1); if (!(filter_samples = dst->filter_samples)) - filter_samples = avfilter_default_filter_samples; + filter_samples = ff_default_filter_samples; /* prepare to copy the samples if the buffer has insufficient permissions */ if ((dst->min_perms & samplesref->perms) != dst->min_perms || @@ -591,8 +593,8 @@ void avfilter_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref) "Copying audio data in avfilter (have perms %x, need %x, reject %x)\n", samplesref->perms, link->dstpad->min_perms, link->dstpad->rej_perms); - link->cur_buf = avfilter_default_get_audio_buffer(link, dst->min_perms, - samplesref->audio->nb_samples); + link->cur_buf = ff_default_get_audio_buffer(link, dst->min_perms, + samplesref->audio->nb_samples); link->cur_buf->pts = samplesref->pts; link->cur_buf->audio->sample_rate = samplesref->audio->sample_rate; diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h index 69ada1b8be..fd996db94b 100644 --- a/libavfilter/avfilter.h +++ b/libavfilter/avfilter.h @@ -460,9 +460,6 @@ void avfilter_default_draw_slice(AVFilterLink *link, int y, int h, int slice_dir /** default handler for end_frame() for video inputs */ void avfilter_default_end_frame(AVFilterLink *link); -/** default handler for filter_samples() for audio inputs */ -void avfilter_default_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref); - /** default handler for config_props() for audio/video outputs */ int avfilter_default_config_output_link(AVFilterLink *link); @@ -470,10 +467,6 @@ int avfilter_default_config_output_link(AVFilterLink *link); AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link, int perms, int w, int h); -/** default handler for get_audio_buffer() for audio inputs */ -AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int perms, - int nb_samples); - /** * A helper for query_formats() which sets all links to the same list of * formats. If there are no links hooked to this filter, the list of formats is @@ -493,17 +486,10 @@ void avfilter_null_draw_slice(AVFilterLink *link, int y, int h, int slice_dir); /** end_frame() handler for filters which simply pass video along */ void avfilter_null_end_frame(AVFilterLink *link); -/** filter_samples() handler for filters which simply pass audio along */ -void avfilter_null_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref); - /** get_video_buffer() handler for filters which simply pass video along */ AVFilterBufferRef *avfilter_null_get_video_buffer(AVFilterLink *link, int perms, int w, int h); -/** get_audio_buffer() handler for filters which simply pass audio along */ -AVFilterBufferRef *avfilter_null_get_audio_buffer(AVFilterLink *link, int perms, - int nb_samples); - /** * Filter definition. This defines the pads a filter contains, and all the * callback functions used to interact with the filter. @@ -683,19 +669,6 @@ AVFilterBufferRef * avfilter_get_video_buffer_ref_from_arrays(uint8_t *data[4], int linesize[4], int perms, int w, int h, enum PixelFormat format); -/** - * Request an audio samples buffer with a specific set of permissions. - * - * @param link the output link to the filter from which the buffer will - * be requested - * @param perms the required access permissions - * @param nb_samples the number of samples per channel - * @return A reference to the samples. This must be unreferenced with - * avfilter_unref_buffer when you are finished with it. - */ -AVFilterBufferRef *avfilter_get_audio_buffer(AVFilterLink *link, int perms, - int nb_samples); - /** * Create an audio buffer reference wrapped around an already * allocated samples buffer. @@ -766,16 +739,6 @@ void avfilter_end_frame(AVFilterLink *link); */ void avfilter_draw_slice(AVFilterLink *link, int y, int h, int slice_dir); -/** - * Send a buffer of audio samples to the next filter. - * - * @param link the output link over which the audio samples are being sent - * @param samplesref a reference to the buffer of audio samples being sent. The - * receiving filter will free this reference when it no longer - * needs it or pass it on to the next filter. - */ -void avfilter_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref); - /** Initialize the filter system. Register all builtin filters. */ void avfilter_register_all(void); diff --git a/libavfilter/defaults.c b/libavfilter/defaults.c index df05c06d63..c25d37f8b3 100644 --- a/libavfilter/defaults.c +++ b/libavfilter/defaults.c @@ -22,6 +22,8 @@ #include "libavutil/audioconvert.h" #include "libavutil/imgutils.h" #include "libavutil/samplefmt.h" + +#include "audio.h" #include "avfilter.h" #include "internal.h" @@ -57,8 +59,8 @@ AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link, int per return picref; } -AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int perms, - int nb_samples) +AVFilterBufferRef *ff_default_get_audio_buffer(AVFilterLink *link, int perms, + int nb_samples) { AVFilterBufferRef *samplesref = NULL; uint8_t **data; @@ -133,7 +135,7 @@ void avfilter_default_end_frame(AVFilterLink *inlink) } /* FIXME: samplesref is same as link->cur_buf. Need to consider removing the redundant parameter. */ -void avfilter_default_filter_samples(AVFilterLink *inlink, AVFilterBufferRef *samplesref) +void ff_default_filter_samples(AVFilterLink *inlink, AVFilterBufferRef *samplesref) { AVFilterLink *outlink = NULL; @@ -141,11 +143,11 @@ void avfilter_default_filter_samples(AVFilterLink *inlink, AVFilterBufferRef *sa outlink = inlink->dst->outputs[0]; if (outlink) { - outlink->out_buf = avfilter_default_get_audio_buffer(inlink, AV_PERM_WRITE, - samplesref->audio->nb_samples); + outlink->out_buf = ff_default_get_audio_buffer(inlink, AV_PERM_WRITE, + samplesref->audio->nb_samples); outlink->out_buf->pts = samplesref->pts; outlink->out_buf->audio->sample_rate = samplesref->audio->sample_rate; - avfilter_filter_samples(outlink, avfilter_ref_buffer(outlink->out_buf, ~0)); + ff_filter_samples(outlink, avfilter_ref_buffer(outlink->out_buf, ~0)); avfilter_unref_buffer(outlink->out_buf); outlink->out_buf = NULL; } @@ -233,9 +235,9 @@ void avfilter_null_end_frame(AVFilterLink *link) avfilter_end_frame(link->dst->outputs[0]); } -void avfilter_null_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref) +void ff_null_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref) { - avfilter_filter_samples(link->dst->outputs[0], samplesref); + ff_filter_samples(link->dst->outputs[0], samplesref); } AVFilterBufferRef *avfilter_null_get_video_buffer(AVFilterLink *link, int perms, int w, int h) @@ -243,8 +245,8 @@ AVFilterBufferRef *avfilter_null_get_video_buffer(AVFilterLink *link, int perms, return avfilter_get_video_buffer(link->dst->outputs[0], perms, w, h); } -AVFilterBufferRef *avfilter_null_get_audio_buffer(AVFilterLink *link, int perms, - int nb_samples) +AVFilterBufferRef *ff_null_get_audio_buffer(AVFilterLink *link, int perms, + int nb_samples) { - return avfilter_get_audio_buffer(link->dst->outputs[0], perms, nb_samples); + return ff_get_audio_buffer(link->dst->outputs[0], perms, nb_samples); } From 0b45334a5880d6e2a4b3642adcd5feab8a27a150 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Mon, 7 May 2012 11:21:38 +0200 Subject: [PATCH 07/25] lavfi: move audio-related functions to a separate file. This is easier to follow than having them randomly scattered in avfilter.c and defaults.c. --- libavfilter/Makefile | 1 + libavfilter/audio.c | 209 +++++++++++++++++++++++++++++++++++++++++ libavfilter/avfilter.c | 127 +------------------------ libavfilter/defaults.c | 64 ------------- libavfilter/internal.h | 4 + 5 files changed, 215 insertions(+), 190 deletions(-) create mode 100644 libavfilter/audio.c diff --git a/libavfilter/Makefile b/libavfilter/Makefile index e786b6d2fe..49a47d3e1b 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -10,6 +10,7 @@ HEADERS = avfilter.h \ vsrc_buffer.h \ OBJS = allfilters.o \ + audio.o \ avfilter.o \ avfiltergraph.o \ buffersink.o \ diff --git a/libavfilter/audio.c b/libavfilter/audio.c new file mode 100644 index 0000000000..3e12c697ce --- /dev/null +++ b/libavfilter/audio.c @@ -0,0 +1,209 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/audioconvert.h" + +#include "audio.h" +#include "avfilter.h" +#include "internal.h" + +AVFilterBufferRef *ff_null_get_audio_buffer(AVFilterLink *link, int perms, + int nb_samples) +{ + return ff_get_audio_buffer(link->dst->outputs[0], perms, nb_samples); +} + +AVFilterBufferRef *ff_default_get_audio_buffer(AVFilterLink *link, int perms, + int nb_samples) +{ + AVFilterBufferRef *samplesref = NULL; + uint8_t **data; + int planar = av_sample_fmt_is_planar(link->format); + int nb_channels = av_get_channel_layout_nb_channels(link->channel_layout); + int planes = planar ? nb_channels : 1; + int linesize; + + if (!(data = av_mallocz(sizeof(*data) * planes))) + goto fail; + + if (av_samples_alloc(data, &linesize, nb_channels, nb_samples, link->format, 0) < 0) + goto fail; + + samplesref = avfilter_get_audio_buffer_ref_from_arrays(data, linesize, perms, + nb_samples, link->format, + link->channel_layout); + if (!samplesref) + goto fail; + + av_freep(&data); + +fail: + if (data) + av_freep(&data[0]); + av_freep(&data); + return samplesref; +} + +AVFilterBufferRef *ff_get_audio_buffer(AVFilterLink *link, int perms, + int nb_samples) +{ + AVFilterBufferRef *ret = NULL; + + if (link->dstpad->get_audio_buffer) + ret = link->dstpad->get_audio_buffer(link, perms, nb_samples); + + if (!ret) + ret = ff_default_get_audio_buffer(link, perms, nb_samples); + + if (ret) + ret->type = AVMEDIA_TYPE_AUDIO; + + return ret; +} + +AVFilterBufferRef* avfilter_get_audio_buffer_ref_from_arrays(uint8_t **data, + int linesize,int perms, + int nb_samples, + enum AVSampleFormat sample_fmt, + uint64_t channel_layout) +{ + int planes; + AVFilterBuffer *samples = av_mallocz(sizeof(*samples)); + AVFilterBufferRef *samplesref = av_mallocz(sizeof(*samplesref)); + + if (!samples || !samplesref) + goto fail; + + samplesref->buf = samples; + samplesref->buf->free = ff_avfilter_default_free_buffer; + if (!(samplesref->audio = av_mallocz(sizeof(*samplesref->audio)))) + goto fail; + + samplesref->audio->nb_samples = nb_samples; + samplesref->audio->channel_layout = channel_layout; + samplesref->audio->planar = av_sample_fmt_is_planar(sample_fmt); + + planes = samplesref->audio->planar ? av_get_channel_layout_nb_channels(channel_layout) : 1; + + /* make sure the buffer gets read permission or it's useless for output */ + samplesref->perms = perms | AV_PERM_READ; + + samples->refcount = 1; + samplesref->type = AVMEDIA_TYPE_AUDIO; + samplesref->format = sample_fmt; + + memcpy(samples->data, data, + FFMIN(FF_ARRAY_ELEMS(samples->data), planes)*sizeof(samples->data[0])); + memcpy(samplesref->data, samples->data, sizeof(samples->data)); + + samples->linesize[0] = samplesref->linesize[0] = linesize; + + if (planes > FF_ARRAY_ELEMS(samples->data)) { + samples-> extended_data = av_mallocz(sizeof(*samples->extended_data) * + planes); + samplesref->extended_data = av_mallocz(sizeof(*samplesref->extended_data) * + planes); + + if (!samples->extended_data || !samplesref->extended_data) + goto fail; + + memcpy(samples-> extended_data, data, sizeof(*data)*planes); + memcpy(samplesref->extended_data, data, sizeof(*data)*planes); + } else { + samples->extended_data = samples->data; + samplesref->extended_data = samplesref->data; + } + + return samplesref; + +fail: + if (samples && samples->extended_data != samples->data) + av_freep(&samples->extended_data); + if (samplesref) { + av_freep(&samplesref->audio); + if (samplesref->extended_data != samplesref->data) + av_freep(&samplesref->extended_data); + } + av_freep(&samplesref); + av_freep(&samples); + return NULL; +} + +void ff_null_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref) +{ + ff_filter_samples(link->dst->outputs[0], samplesref); +} + +/* FIXME: samplesref is same as link->cur_buf. Need to consider removing the redundant parameter. */ +void ff_default_filter_samples(AVFilterLink *inlink, AVFilterBufferRef *samplesref) +{ + AVFilterLink *outlink = NULL; + + if (inlink->dst->output_count) + outlink = inlink->dst->outputs[0]; + + if (outlink) { + outlink->out_buf = ff_default_get_audio_buffer(inlink, AV_PERM_WRITE, + samplesref->audio->nb_samples); + outlink->out_buf->pts = samplesref->pts; + outlink->out_buf->audio->sample_rate = samplesref->audio->sample_rate; + ff_filter_samples(outlink, avfilter_ref_buffer(outlink->out_buf, ~0)); + avfilter_unref_buffer(outlink->out_buf); + outlink->out_buf = NULL; + } + avfilter_unref_buffer(samplesref); + inlink->cur_buf = NULL; +} + +void ff_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref) +{ + void (*filter_samples)(AVFilterLink *, AVFilterBufferRef *); + AVFilterPad *dst = link->dstpad; + + FF_DPRINTF_START(NULL, filter_samples); ff_dlog_link(NULL, link, 1); + + if (!(filter_samples = dst->filter_samples)) + filter_samples = ff_default_filter_samples; + + /* prepare to copy the samples if the buffer has insufficient permissions */ + if ((dst->min_perms & samplesref->perms) != dst->min_perms || + dst->rej_perms & samplesref->perms) { + int i, planar = av_sample_fmt_is_planar(samplesref->format); + int planes = !planar ? 1: + av_get_channel_layout_nb_channels(samplesref->audio->channel_layout); + + av_log(link->dst, AV_LOG_DEBUG, + "Copying audio data in avfilter (have perms %x, need %x, reject %x)\n", + samplesref->perms, link->dstpad->min_perms, link->dstpad->rej_perms); + + link->cur_buf = ff_default_get_audio_buffer(link, dst->min_perms, + samplesref->audio->nb_samples); + link->cur_buf->pts = samplesref->pts; + link->cur_buf->audio->sample_rate = samplesref->audio->sample_rate; + + /* Copy actual data into new samples buffer */ + for (i = 0; i < planes; i++) + memcpy(link->cur_buf->extended_data[i], samplesref->extended_data[i], samplesref->linesize[0]); + + avfilter_unref_buffer(samplesref); + } else + link->cur_buf = samplesref; + + filter_samples(link, link->cur_buf); +} + diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c index 6a530f8fd6..bd898e37ab 100644 --- a/libavfilter/avfilter.c +++ b/libavfilter/avfilter.c @@ -27,7 +27,6 @@ #include "libavutil/imgutils.h" #include "libavcodec/avcodec.h" -#include "audio.h" #include "avfilter.h" #include "internal.h" @@ -277,7 +276,7 @@ static void ff_dlog_ref(void *ctx, AVFilterBufferRef *ref, int end) av_dlog(ctx, "]%s", end ? "\n" : ""); } -static void ff_dlog_link(void *ctx, AVFilterLink *link, int end) +void ff_dlog_link(void *ctx, AVFilterLink *link, int end) { if (link->type == AVMEDIA_TYPE_VIDEO) { av_dlog(ctx, @@ -301,8 +300,6 @@ static void ff_dlog_link(void *ctx, AVFilterLink *link, int end) } } -#define FF_DPRINTF_START(ctx, func) av_dlog(NULL, "%-16s: ", #func) - AVFilterBufferRef *avfilter_get_video_buffer(AVFilterLink *link, int perms, int w, int h) { AVFilterBufferRef *ret = NULL; @@ -368,91 +365,6 @@ fail: return NULL; } -AVFilterBufferRef *ff_get_audio_buffer(AVFilterLink *link, int perms, - int nb_samples) -{ - AVFilterBufferRef *ret = NULL; - - if (link->dstpad->get_audio_buffer) - ret = link->dstpad->get_audio_buffer(link, perms, nb_samples); - - if (!ret) - ret = ff_default_get_audio_buffer(link, perms, nb_samples); - - if (ret) - ret->type = AVMEDIA_TYPE_AUDIO; - - return ret; -} - -AVFilterBufferRef *avfilter_get_audio_buffer_ref_from_arrays(uint8_t **data, - int linesize, int perms, - int nb_samples, - enum AVSampleFormat sample_fmt, - uint64_t channel_layout) -{ - int planes; - AVFilterBuffer *samples = av_mallocz(sizeof(*samples)); - AVFilterBufferRef *samplesref = av_mallocz(sizeof(*samplesref)); - - if (!samples || !samplesref) - goto fail; - - samplesref->buf = samples; - samplesref->buf->free = ff_avfilter_default_free_buffer; - if (!(samplesref->audio = av_mallocz(sizeof(*samplesref->audio)))) - goto fail; - - samplesref->audio->nb_samples = nb_samples; - samplesref->audio->channel_layout = channel_layout; - samplesref->audio->planar = av_sample_fmt_is_planar(sample_fmt); - - planes = samplesref->audio->planar ? av_get_channel_layout_nb_channels(channel_layout) : 1; - - /* make sure the buffer gets read permission or it's useless for output */ - samplesref->perms = perms | AV_PERM_READ; - - samples->refcount = 1; - samplesref->type = AVMEDIA_TYPE_AUDIO; - samplesref->format = sample_fmt; - - memcpy(samples->data, data, - FFMIN(FF_ARRAY_ELEMS(samples->data), planes)*sizeof(samples->data[0])); - memcpy(samplesref->data, samples->data, sizeof(samples->data)); - - samples->linesize[0] = samplesref->linesize[0] = linesize; - - if (planes > FF_ARRAY_ELEMS(samples->data)) { - samples-> extended_data = av_mallocz(sizeof(*samples->extended_data) * - planes); - samplesref->extended_data = av_mallocz(sizeof(*samplesref->extended_data) * - planes); - - if (!samples->extended_data || !samplesref->extended_data) - goto fail; - - memcpy(samples-> extended_data, data, sizeof(*data)*planes); - memcpy(samplesref->extended_data, data, sizeof(*data)*planes); - } else { - samples->extended_data = samples->data; - samplesref->extended_data = samplesref->data; - } - - return samplesref; - -fail: - if (samples && samples->extended_data != samples->data) - av_freep(&samples->extended_data); - if (samplesref) { - av_freep(&samplesref->audio); - if (samplesref->extended_data != samplesref->data) - av_freep(&samplesref->extended_data); - } - av_freep(&samplesref); - av_freep(&samples); - return NULL; -} - int avfilter_request_frame(AVFilterLink *link) { FF_DPRINTF_START(NULL, request_frame); ff_dlog_link(NULL, link, 1); @@ -572,43 +484,6 @@ void avfilter_draw_slice(AVFilterLink *link, int y, int h, int slice_dir) draw_slice(link, y, h, slice_dir); } -void ff_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref) -{ - void (*filter_samples)(AVFilterLink *, AVFilterBufferRef *); - AVFilterPad *dst = link->dstpad; - - FF_DPRINTF_START(NULL, filter_samples); ff_dlog_link(NULL, link, 1); - - if (!(filter_samples = dst->filter_samples)) - filter_samples = ff_default_filter_samples; - - /* prepare to copy the samples if the buffer has insufficient permissions */ - if ((dst->min_perms & samplesref->perms) != dst->min_perms || - dst->rej_perms & samplesref->perms) { - int i, planar = av_sample_fmt_is_planar(samplesref->format); - int planes = !planar ? 1: - av_get_channel_layout_nb_channels(samplesref->audio->channel_layout); - - av_log(link->dst, AV_LOG_DEBUG, - "Copying audio data in avfilter (have perms %x, need %x, reject %x)\n", - samplesref->perms, link->dstpad->min_perms, link->dstpad->rej_perms); - - link->cur_buf = ff_default_get_audio_buffer(link, dst->min_perms, - samplesref->audio->nb_samples); - link->cur_buf->pts = samplesref->pts; - link->cur_buf->audio->sample_rate = samplesref->audio->sample_rate; - - /* Copy actual data into new samples buffer */ - for (i = 0; i < planes; i++) - memcpy(link->cur_buf->extended_data[i], samplesref->extended_data[i], samplesref->linesize[0]); - - avfilter_unref_buffer(samplesref); - } else - link->cur_buf = samplesref; - - filter_samples(link, link->cur_buf); -} - #define MAX_REGISTERED_AVFILTERS_NB 64 static AVFilter *registered_avfilters[MAX_REGISTERED_AVFILTERS_NB + 1]; diff --git a/libavfilter/defaults.c b/libavfilter/defaults.c index c25d37f8b3..caf6442974 100644 --- a/libavfilter/defaults.c +++ b/libavfilter/defaults.c @@ -23,7 +23,6 @@ #include "libavutil/imgutils.h" #include "libavutil/samplefmt.h" -#include "audio.h" #include "avfilter.h" #include "internal.h" @@ -59,37 +58,6 @@ AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link, int per return picref; } -AVFilterBufferRef *ff_default_get_audio_buffer(AVFilterLink *link, int perms, - int nb_samples) -{ - AVFilterBufferRef *samplesref = NULL; - uint8_t **data; - int planar = av_sample_fmt_is_planar(link->format); - int nb_channels = av_get_channel_layout_nb_channels(link->channel_layout); - int planes = planar ? nb_channels : 1; - int linesize; - - if (!(data = av_mallocz(sizeof(*data) * planes))) - goto fail; - - if (av_samples_alloc(data, &linesize, nb_channels, nb_samples, link->format, 0) < 0) - goto fail; - - samplesref = avfilter_get_audio_buffer_ref_from_arrays(data, linesize, perms, - nb_samples, link->format, - link->channel_layout); - if (!samplesref) - goto fail; - - av_freep(&data); - -fail: - if (data) - av_freep(&data[0]); - av_freep(&data); - return samplesref; -} - void avfilter_default_start_frame(AVFilterLink *inlink, AVFilterBufferRef *picref) { AVFilterLink *outlink = NULL; @@ -134,27 +102,6 @@ void avfilter_default_end_frame(AVFilterLink *inlink) } } -/* FIXME: samplesref is same as link->cur_buf. Need to consider removing the redundant parameter. */ -void ff_default_filter_samples(AVFilterLink *inlink, AVFilterBufferRef *samplesref) -{ - AVFilterLink *outlink = NULL; - - if (inlink->dst->output_count) - outlink = inlink->dst->outputs[0]; - - if (outlink) { - outlink->out_buf = ff_default_get_audio_buffer(inlink, AV_PERM_WRITE, - samplesref->audio->nb_samples); - outlink->out_buf->pts = samplesref->pts; - outlink->out_buf->audio->sample_rate = samplesref->audio->sample_rate; - ff_filter_samples(outlink, avfilter_ref_buffer(outlink->out_buf, ~0)); - avfilter_unref_buffer(outlink->out_buf); - outlink->out_buf = NULL; - } - avfilter_unref_buffer(samplesref); - inlink->cur_buf = NULL; -} - /** * default config_link() implementation for output video links to simplify * the implementation of one input one output video filters */ @@ -235,18 +182,7 @@ void avfilter_null_end_frame(AVFilterLink *link) avfilter_end_frame(link->dst->outputs[0]); } -void ff_null_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref) -{ - ff_filter_samples(link->dst->outputs[0], samplesref); -} - AVFilterBufferRef *avfilter_null_get_video_buffer(AVFilterLink *link, int perms, int w, int h) { return avfilter_get_video_buffer(link->dst->outputs[0], perms, w, h); } - -AVFilterBufferRef *ff_null_get_audio_buffer(AVFilterLink *link, int perms, - int nb_samples) -{ - return ff_get_audio_buffer(link->dst->outputs[0], perms, nb_samples); -} diff --git a/libavfilter/internal.h b/libavfilter/internal.h index 0630e9b7d6..a5b3f788da 100644 --- a/libavfilter/internal.h +++ b/libavfilter/internal.h @@ -55,4 +55,8 @@ void ff_avfilter_default_free_buffer(AVFilterBuffer *buf); /** Tell is a format is contained in the provided list terminated by -1. */ int ff_fmt_is_in(int fmt, const int *fmts); +#define FF_DPRINTF_START(ctx, func) av_dlog(NULL, "%-16s: ", #func) + +void ff_dlog_link(void *ctx, AVFilterLink *link, int end); + #endif /* AVFILTER_INTERNAL_H */ From 5cc6d5244d4ec89b3ac855abff4a3d19caee22f1 Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Thu, 3 May 2012 15:23:32 -0400 Subject: [PATCH 08/25] lavr: replace the SSE version of ff_conv_fltp_to_flt_6ch() with SSE4 and AVX The current SSE version is slower than the MMX version on Athlon64 and Sandy Bridge, but the SSE4 and AVX versions are faster on Sandy Bridge. --- libavresample/x86/audio_convert.asm | 30 ++++++++++++++------------ libavresample/x86/audio_convert_init.c | 13 +++++++---- libavutil/x86/x86util.asm | 7 +++--- 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm index 809c5d1378..ba59f3314f 100644 --- a/libavresample/x86/audio_convert.asm +++ b/libavresample/x86/audio_convert.asm @@ -54,26 +54,24 @@ cglobal conv_fltp_to_flt_6ch, 2,8,7, dst, src, src1, src2, src3, src4, src5, len mova m3, [srcq+src3q] mova m4, [srcq+src4q] mova m5, [srcq+src5q] -%if cpuflag(sse) +%if cpuflag(sse4) SBUTTERFLYPS 0, 1, 6 SBUTTERFLYPS 2, 3, 6 SBUTTERFLYPS 4, 5, 6 - movaps m6, m4 - shufps m4, m0, q3210 + blendps m6, m4, m0, 1100b movlhps m0, m2 - movhlps m6, m2 - movaps [dstq ], m0 - movaps [dstq+16], m4 - movaps [dstq+32], m6 - - movaps m6, m5 - shufps m5, m1, q3210 + movhlps m4, m2 + blendps m2, m5, m1, 1100b movlhps m1, m3 - movhlps m6, m3 + movhlps m5, m3 + + movaps [dstq ], m0 + movaps [dstq+16], m6 + movaps [dstq+32], m4 movaps [dstq+48], m1 - movaps [dstq+64], m5 - movaps [dstq+80], m6 + movaps [dstq+64], m2 + movaps [dstq+80], m5 %else ; mmx SBUTTERFLY dq, 0, 1, 6 SBUTTERFLY dq, 2, 3, 6 @@ -100,5 +98,9 @@ cglobal conv_fltp_to_flt_6ch, 2,8,7, dst, src, src1, src2, src3, src4, src5, len INIT_MMX mmx CONV_FLTP_TO_FLT_6CH -INIT_XMM sse +INIT_XMM sse4 CONV_FLTP_TO_FLT_6CH +%if HAVE_AVX +INIT_XMM avx +CONV_FLTP_TO_FLT_6CH +%endif diff --git a/libavresample/x86/audio_convert_init.c b/libavresample/x86/audio_convert_init.c index 6883f10a21..206aede751 100644 --- a/libavresample/x86/audio_convert_init.c +++ b/libavresample/x86/audio_convert_init.c @@ -22,8 +22,9 @@ #include "libavutil/cpu.h" #include "libavresample/audio_convert.h" -extern void ff_conv_fltp_to_flt_6ch_mmx(float *dst, float *const *src, int len); -extern void ff_conv_fltp_to_flt_6ch_sse(float *dst, float *const *src, int len); +extern void ff_conv_fltp_to_flt_6ch_mmx (float *dst, float *const *src, int len); +extern void ff_conv_fltp_to_flt_6ch_sse4(float *dst, float *const *src, int len); +extern void ff_conv_fltp_to_flt_6ch_avx (float *dst, float *const *src, int len); av_cold void ff_audio_convert_init_x86(AudioConvert *ac) { @@ -34,9 +35,13 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, 6, 1, 4, "MMX", ff_conv_fltp_to_flt_6ch_mmx); } - if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { + if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, - 6, 16, 4, "SSE", ff_conv_fltp_to_flt_6ch_sse); + 6, 16, 4, "SSE4", ff_conv_fltp_to_flt_6ch_sse4); + } + if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { + ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, + 6, 16, 4, "AVX", ff_conv_fltp_to_flt_6ch_avx); } #endif } diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm index 55f4a936e2..508f24e2b5 100644 --- a/libavutil/x86/x86util.asm +++ b/libavutil/x86/x86util.asm @@ -42,10 +42,9 @@ %endmacro %macro SBUTTERFLYPS 3 - movaps m%3, m%1 - unpcklps m%1, m%2 - unpckhps m%3, m%2 - SWAP %2, %3 + unpcklps m%3, m%1, m%2 + unpckhps m%1, m%1, m%2 + SWAP %1, %3, %2 %endmacro %macro TRANSPOSE4x4B 5 From 59cbc4eee2edcfd0a89086237cd7a54e47f7c73b Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Thu, 23 Feb 2012 11:34:28 +0100 Subject: [PATCH 09/25] mov: make one comment slightly more specific --- libavformat/mov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavformat/mov.c b/libavformat/mov.c index ad1340a79d..29f01c3f72 100644 --- a/libavformat/mov.c +++ b/libavformat/mov.c @@ -994,7 +994,7 @@ static int mov_read_glbl(MOVContext *c, AVIOContext *pb, MOVAtom atom) return AVERROR_INVALIDDATA; if (atom.size >= 10) { - // Broken files created by legacy versions of Libav and FFmpeg will + // Broken files created by legacy versions of libavformat will // wrap a whole fiel atom inside of a glbl atom. unsigned size = avio_rb32(pb); unsigned type = avio_rl32(pb); From db1e403cfbfdba00826c458fa80c4cd83d5499ec Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Sun, 8 Apr 2012 13:05:17 +0200 Subject: [PATCH 10/25] vcr1: cosmetics: K&R prettyprinting, typos, parentheses, dead code, comments --- libavcodec/vcr1.c | 142 +++++++++++++++++++++++----------------------- 1 file changed, 71 insertions(+), 71 deletions(-) diff --git a/libavcodec/vcr1.c b/libavcodec/vcr1.c index 23b06479ae..681150d80e 100644 --- a/libavcodec/vcr1.c +++ b/libavcodec/vcr1.c @@ -21,92 +21,88 @@ /** * @file - * ati vcr1 codec. + * ATI VCR1 codec */ #include "avcodec.h" #include "dsputil.h" -//#undef NDEBUG -//#include - /* Disable the encoder. */ #undef CONFIG_VCR1_ENCODER #define CONFIG_VCR1_ENCODER 0 -typedef struct VCR1Context{ +typedef struct VCR1Context { AVCodecContext *avctx; AVFrame picture; int delta[16]; int offset[4]; } VCR1Context; -static int decode_frame(AVCodecContext *avctx, - void *data, int *data_size, - AVPacket *avpkt) +static int decode_frame(AVCodecContext *avctx, void *data, + int *data_size, AVPacket *avpkt) { - const uint8_t *buf = avpkt->data; - int buf_size = avpkt->size; - VCR1Context * const a = avctx->priv_data; - AVFrame *picture = data; - AVFrame * const p = &a->picture; - const uint8_t *bytestream= buf; + const uint8_t *buf = avpkt->data; + int buf_size = avpkt->size; + VCR1Context *const a = avctx->priv_data; + AVFrame *picture = data; + AVFrame *const p = &a->picture; + const uint8_t *bytestream = buf; int i, x, y; - if(p->data[0]) + if (p->data[0]) avctx->release_buffer(avctx, p); - p->reference= 0; - if(avctx->get_buffer(avctx, p) < 0){ + p->reference = 0; + if (avctx->get_buffer(avctx, p) < 0) { av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); return -1; } - p->pict_type= AV_PICTURE_TYPE_I; - p->key_frame= 1; + p->pict_type = AV_PICTURE_TYPE_I; + p->key_frame = 1; - for(i=0; i<16; i++){ - a->delta[i]= *(bytestream++); + for (i = 0; i < 16; i++) { + a->delta[i] = *bytestream++; bytestream++; } - for(y=0; yheight; y++){ + for (y = 0; y < avctx->height; y++) { int offset; - uint8_t *luma= &a->picture.data[0][ y*a->picture.linesize[0] ]; + uint8_t *luma = &a->picture.data[0][y * a->picture.linesize[0]]; - if((y&3) == 0){ - uint8_t *cb= &a->picture.data[1][ (y>>2)*a->picture.linesize[1] ]; - uint8_t *cr= &a->picture.data[2][ (y>>2)*a->picture.linesize[2] ]; + if ((y & 3) == 0) { + uint8_t *cb = &a->picture.data[1][(y >> 2) * a->picture.linesize[1]]; + uint8_t *cr = &a->picture.data[2][(y >> 2) * a->picture.linesize[2]]; - for(i=0; i<4; i++) - a->offset[i]= *(bytestream++); + for (i = 0; i < 4; i++) + a->offset[i] = *bytestream++; - offset= a->offset[0] - a->delta[ bytestream[2]&0xF ]; - for(x=0; xwidth; x+=4){ - luma[0]=( offset += a->delta[ bytestream[2]&0xF ]); - luma[1]=( offset += a->delta[ bytestream[2]>>4 ]); - luma[2]=( offset += a->delta[ bytestream[0]&0xF ]); - luma[3]=( offset += a->delta[ bytestream[0]>>4 ]); - luma += 4; + offset = a->offset[0] - a->delta[bytestream[2] & 0xF]; + for (x = 0; x < avctx->width; x += 4) { + luma[0] = offset += a->delta[bytestream[2] & 0xF]; + luma[1] = offset += a->delta[bytestream[2] >> 4]; + luma[2] = offset += a->delta[bytestream[0] & 0xF]; + luma[3] = offset += a->delta[bytestream[0] >> 4]; + luma += 4; - *(cb++) = bytestream[3]; - *(cr++) = bytestream[1]; + *cb++ = bytestream[3]; + *cr++ = bytestream[1]; - bytestream+= 4; + bytestream += 4; } - }else{ - offset= a->offset[y&3] - a->delta[ bytestream[2]&0xF ]; + } else { + offset = a->offset[y & 3] - a->delta[bytestream[2] & 0xF]; - for(x=0; xwidth; x+=8){ - luma[0]=( offset += a->delta[ bytestream[2]&0xF ]); - luma[1]=( offset += a->delta[ bytestream[2]>>4 ]); - luma[2]=( offset += a->delta[ bytestream[3]&0xF ]); - luma[3]=( offset += a->delta[ bytestream[3]>>4 ]); - luma[4]=( offset += a->delta[ bytestream[0]&0xF ]); - luma[5]=( offset += a->delta[ bytestream[0]>>4 ]); - luma[6]=( offset += a->delta[ bytestream[1]&0xF ]); - luma[7]=( offset += a->delta[ bytestream[1]>>4 ]); - luma += 8; - bytestream+= 4; + for (x = 0; x < avctx->width; x += 8) { + luma[0] = offset += a->delta[bytestream[2] & 0xF]; + luma[1] = offset += a->delta[bytestream[2] >> 4]; + luma[2] = offset += a->delta[bytestream[3] & 0xF]; + luma[3] = offset += a->delta[bytestream[3] >> 4]; + luma[4] = offset += a->delta[bytestream[0] & 0xF]; + luma[5] = offset += a->delta[bytestream[0] >> 4]; + luma[6] = offset += a->delta[bytestream[1] & 0xF]; + luma[7] = offset += a->delta[bytestream[1] >> 4]; + luma += 8; + bytestream += 4; } } } @@ -118,43 +114,47 @@ static int decode_frame(AVCodecContext *avctx, } #if CONFIG_VCR1_ENCODER -static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){ - VCR1Context * const a = avctx->priv_data; - AVFrame *pict = data; - AVFrame * const p = &a->picture; +static int encode_frame(AVCodecContext *avctx, unsigned char *buf, + int buf_size, void *data) +{ + VCR1Context *const a = avctx->priv_data; + AVFrame *pict = data; + AVFrame *const p = &a->picture; int size; - *p = *pict; - p->pict_type= AV_PICTURE_TYPE_I; - p->key_frame= 1; + *p = *pict; + p->pict_type = AV_PICTURE_TYPE_I; + p->key_frame = 1; avpriv_align_put_bits(&a->pb); - while(get_bit_count(&a->pb)&31) + while (get_bit_count(&a->pb) & 31) put_bits(&a->pb, 8, 0); - size= get_bit_count(&a->pb)/32; + size = get_bit_count(&a->pb) / 32; - return size*4; + return size * 4; } #endif -static av_cold void common_init(AVCodecContext *avctx){ - VCR1Context * const a = avctx->priv_data; +static av_cold void common_init(AVCodecContext *avctx) +{ + VCR1Context *const a = avctx->priv_data; avctx->coded_frame = &a->picture; - a->avctx= avctx; + a->avctx = avctx; } -static av_cold int decode_init(AVCodecContext *avctx){ - +static av_cold int decode_init(AVCodecContext *avctx) +{ common_init(avctx); - avctx->pix_fmt= PIX_FMT_YUV410P; + avctx->pix_fmt = PIX_FMT_YUV410P; return 0; } -static av_cold int decode_end(AVCodecContext *avctx){ +static av_cold int decode_end(AVCodecContext *avctx) +{ VCR1Context *s = avctx->priv_data; if (s->picture.data[0]) @@ -164,8 +164,8 @@ static av_cold int decode_end(AVCodecContext *avctx){ } #if CONFIG_VCR1_ENCODER -static av_cold int encode_init(AVCodecContext *avctx){ - +static av_cold int encode_init(AVCodecContext *avctx) +{ common_init(avctx); return 0; @@ -194,4 +194,4 @@ AVCodec ff_vcr1_encoder = { .encode = encode_frame, .long_name = NULL_IF_CONFIG_SMALL("ATI VCR1"), }; -#endif +#endif /* CONFIG_VCR1_ENCODER */ From 51c4d870936976039807bbc881850cf6491fc89a Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Mon, 9 Apr 2012 18:11:35 +0200 Subject: [PATCH 11/25] vcr1: group encoder code together to save #ifdefs --- libavcodec/vcr1.c | 90 ++++++++++++++++++++++------------------------- 1 file changed, 43 insertions(+), 47 deletions(-) diff --git a/libavcodec/vcr1.c b/libavcodec/vcr1.c index 681150d80e..a745e58300 100644 --- a/libavcodec/vcr1.c +++ b/libavcodec/vcr1.c @@ -27,10 +27,6 @@ #include "avcodec.h" #include "dsputil.h" -/* Disable the encoder. */ -#undef CONFIG_VCR1_ENCODER -#define CONFIG_VCR1_ENCODER 0 - typedef struct VCR1Context { AVCodecContext *avctx; AVFrame picture; @@ -38,6 +34,33 @@ typedef struct VCR1Context { int offset[4]; } VCR1Context; +static av_cold void common_init(AVCodecContext *avctx) +{ + VCR1Context *const a = avctx->priv_data; + + avctx->coded_frame = &a->picture; + a->avctx = avctx; +} + +static av_cold int decode_init(AVCodecContext *avctx) +{ + common_init(avctx); + + avctx->pix_fmt = PIX_FMT_YUV410P; + + return 0; +} + +static av_cold int decode_end(AVCodecContext *avctx) +{ + VCR1Context *s = avctx->priv_data; + + if (s->picture.data[0]) + avctx->release_buffer(avctx, &s->picture); + + return 0; +} + static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt) { @@ -113,6 +136,22 @@ static int decode_frame(AVCodecContext *avctx, void *data, return buf_size; } +AVCodec ff_vcr1_decoder = { + .name = "vcr1", + .type = AVMEDIA_TYPE_VIDEO, + .id = CODEC_ID_VCR1, + .priv_data_size = sizeof(VCR1Context), + .init = decode_init, + .close = decode_end, + .decode = decode_frame, + .capabilities = CODEC_CAP_DR1, + .long_name = NULL_IF_CONFIG_SMALL("ATI VCR1"), +}; + +/* Disable the encoder. */ +#undef CONFIG_VCR1_ENCODER +#define CONFIG_VCR1_ENCODER 0 + #if CONFIG_VCR1_ENCODER static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data) @@ -134,57 +173,14 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, return size * 4; } -#endif -static av_cold void common_init(AVCodecContext *avctx) -{ - VCR1Context *const a = avctx->priv_data; - - avctx->coded_frame = &a->picture; - a->avctx = avctx; -} - -static av_cold int decode_init(AVCodecContext *avctx) -{ - common_init(avctx); - - avctx->pix_fmt = PIX_FMT_YUV410P; - - return 0; -} - -static av_cold int decode_end(AVCodecContext *avctx) -{ - VCR1Context *s = avctx->priv_data; - - if (s->picture.data[0]) - avctx->release_buffer(avctx, &s->picture); - - return 0; -} - -#if CONFIG_VCR1_ENCODER static av_cold int encode_init(AVCodecContext *avctx) { common_init(avctx); return 0; } -#endif -AVCodec ff_vcr1_decoder = { - .name = "vcr1", - .type = AVMEDIA_TYPE_VIDEO, - .id = CODEC_ID_VCR1, - .priv_data_size = sizeof(VCR1Context), - .init = decode_init, - .close = decode_end, - .decode = decode_frame, - .capabilities = CODEC_CAP_DR1, - .long_name = NULL_IF_CONFIG_SMALL("ATI VCR1"), -}; - -#if CONFIG_VCR1_ENCODER AVCodec ff_vcr1_encoder = { .name = "vcr1", .type = AVMEDIA_TYPE_VIDEO, From eeeefd500124a1ec3a7ca75d9c071c5517c3d153 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Thu, 12 Apr 2012 18:52:16 +0200 Subject: [PATCH 12/25] vcr1: drop pointless write-only AVCodecContext member from VCR1Context --- libavcodec/vcr1.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/libavcodec/vcr1.c b/libavcodec/vcr1.c index a745e58300..e3e45cc6a5 100644 --- a/libavcodec/vcr1.c +++ b/libavcodec/vcr1.c @@ -28,7 +28,6 @@ #include "dsputil.h" typedef struct VCR1Context { - AVCodecContext *avctx; AVFrame picture; int delta[16]; int offset[4]; @@ -39,7 +38,6 @@ static av_cold void common_init(AVCodecContext *avctx) VCR1Context *const a = avctx->priv_data; avctx->coded_frame = &a->picture; - a->avctx = avctx; } static av_cold int decode_init(AVCodecContext *avctx) From 8ae19143277e8c740e1cdeb280cfdf4c47a3eb23 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Thu, 12 Apr 2012 18:55:25 +0200 Subject: [PATCH 13/25] vcr1enc: drop pointless empty encode_init() wrapper function --- libavcodec/vcr1.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/libavcodec/vcr1.c b/libavcodec/vcr1.c index e3e45cc6a5..7edd801b8f 100644 --- a/libavcodec/vcr1.c +++ b/libavcodec/vcr1.c @@ -172,19 +172,12 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, return size * 4; } -static av_cold int encode_init(AVCodecContext *avctx) -{ - common_init(avctx); - - return 0; -} - AVCodec ff_vcr1_encoder = { .name = "vcr1", .type = AVMEDIA_TYPE_VIDEO, .id = CODEC_ID_VCR1, .priv_data_size = sizeof(VCR1Context), - .init = encode_init, + .init = common_init, .encode = encode_frame, .long_name = NULL_IF_CONFIG_SMALL("ATI VCR1"), }; From 779222dbfe19ebe731dcdff460e1b1807b9285a7 Mon Sep 17 00:00:00 2001 From: Sean McGovern Date: Wed, 9 May 2012 02:13:15 -0400 Subject: [PATCH 14/25] configure: Add _XOPEN_SOURCE=600 to Solaris preprocessor flags. This is needed to expose some networking APIs. Signed-off-by: Diego Biurrun --- configure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure b/configure index 4b1e55169f..79b948e4f3 100755 --- a/configure +++ b/configure @@ -2452,7 +2452,7 @@ case $target_os in SHFLAGS='-shared -Wl,-h,$$(@F)' enabled x86 && SHFLAGS="-mimpure-text $SHFLAGS" network_extralibs="-lsocket -lnsl" - add_cppflags -D__EXTENSIONS__ + add_cppflags -D__EXTENSIONS__ -D_XOPEN_SOURCE=600 # When using suncc to build, the Solaris linker will mark # an executable with each instruction set encountered by # the Solaris assembler. As our libraries contain their own From ded69c5e21758e2e4a9a7e6ae0fec2d5ad312ba9 Mon Sep 17 00:00:00 2001 From: Sean McGovern Date: Wed, 9 May 2012 02:13:16 -0400 Subject: [PATCH 15/25] sctp: be consistent with socket option level Replace SOL_SCTP by the more portable IPPROTO_SCTP. Signed-off-by: Diego Biurrun --- libavformat/sctp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavformat/sctp.c b/libavformat/sctp.c index 3823e03ebe..817b0049a9 100644 --- a/libavformat/sctp.c +++ b/libavformat/sctp.c @@ -227,7 +227,7 @@ static int sctp_open(URLContext *h, const char *uri, int flags) if (s->max_streams) { initparams.sinit_max_instreams = s->max_streams; initparams.sinit_num_ostreams = s->max_streams; - if (setsockopt(fd, SOL_SCTP, SCTP_INITMSG, &initparams, + if (setsockopt(fd, IPPROTO_SCTP, SCTP_INITMSG, &initparams, sizeof(initparams)) < 0) av_log(h, AV_LOG_ERROR, "SCTP ERROR: Unable to initialize socket max streams %d\n", From 7cf78b3476d77888caa059398078640fb821170e Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Wed, 11 Apr 2012 10:31:02 +0200 Subject: [PATCH 16/25] cmdutils: Add fallback case to switch in check_stream_specifier(). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the warning: cmdutils.c:897: warning: ‘type’ may be used uninitialized in this function --- cmdutils.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmdutils.c b/cmdutils.c index 3cd11ca241..bd07d2ad89 100644 --- a/cmdutils.c +++ b/cmdutils.c @@ -34,6 +34,7 @@ #include "libavdevice/avdevice.h" #include "libavresample/avresample.h" #include "libswscale/swscale.h" +#include "libavutil/avassert.h" #include "libavutil/avstring.h" #include "libavutil/mathematics.h" #include "libavutil/parseutils.h" @@ -905,6 +906,7 @@ int check_stream_specifier(AVFormatContext *s, AVStream *st, const char *spec) case 's': type = AVMEDIA_TYPE_SUBTITLE; break; case 'd': type = AVMEDIA_TYPE_DATA; break; case 't': type = AVMEDIA_TYPE_ATTACHMENT; break; + default: av_assert0(0); } if (type != st->codec->codec_type) return 0; From 55c9320e0638349dbea2f8a658ecd3f48d1a80f1 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 9 May 2012 02:12:14 +0200 Subject: [PATCH 17/25] rtmp: Support 'rtmp_tcurl', an option which overrides the URL of the target stream. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Martin Storsjö --- libavformat/rtmpproto.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c index 9c82b462ff..5d7f4d185d 100644 --- a/libavformat/rtmpproto.c +++ b/libavformat/rtmpproto.c @@ -44,6 +44,7 @@ #define APP_MAX_LENGTH 128 #define PLAYPATH_MAX_LENGTH 256 +#define TCURL_MAX_LENGTH 512 /** RTMP protocol handler state */ typedef enum { @@ -82,6 +83,7 @@ typedef struct RTMPContext { int flv_header_bytes; ///< number of initialized bytes in flv_header int nb_invokes; ///< keeps track of invoke messages int create_stream_invoke; ///< invoke id for the create stream command + char* tcurl; ///< url of the target stream } RTMPContext; #define PLAYER_KEY_OPEN_PART_LEN 30 ///< length of partial key used for first client digest signing @@ -110,17 +112,14 @@ static const uint8_t rtmp_server_key[] = { /** * Generate 'connect' call and send it to the server. */ -static void gen_connect(URLContext *s, RTMPContext *rt, const char *proto, - const char *host, int port) +static void gen_connect(URLContext *s, RTMPContext *rt) { RTMPPacket pkt; uint8_t ver[64], *p; - char tcurl[512]; ff_rtmp_packet_create(&pkt, RTMP_SYSTEM_CHANNEL, RTMP_PT_INVOKE, 0, 4096); p = pkt.data; - ff_url_join(tcurl, sizeof(tcurl), proto, NULL, host, port, "/%s", rt->app); ff_amf_write_string(&p, "connect"); ff_amf_write_number(&p, ++rt->nb_invokes); ff_amf_write_object_start(&p); @@ -138,7 +137,7 @@ static void gen_connect(URLContext *s, RTMPContext *rt, const char *proto, ff_amf_write_field_name(&p, "flashVer"); ff_amf_write_string(&p, ver); ff_amf_write_field_name(&p, "tcUrl"); - ff_amf_write_string(&p, tcurl); + ff_amf_write_string(&p, rt->tcurl); if (rt->is_input) { ff_amf_write_field_name(&p, "fpad"); ff_amf_write_bool(&p, 0); @@ -910,13 +909,19 @@ static int rtmp_open(URLContext *s, const char *uri, int flags) strncat(rt->playpath, fname, PLAYPATH_MAX_LENGTH - 5); } + if (!rt->tcurl) { + rt->tcurl = av_malloc(TCURL_MAX_LENGTH); + ff_url_join(rt->tcurl, TCURL_MAX_LENGTH, proto, NULL, hostname, + port, "/%s", rt->app); + } + rt->client_report_size = 1048576; rt->bytes_read = 0; rt->last_bytes_read = 0; av_log(s, AV_LOG_DEBUG, "Proto = %s, path = %s, app = %s, fname = %s\n", proto, path, rt->app, rt->playpath); - gen_connect(s, rt, proto, hostname, port); + gen_connect(s, rt); do { ret = get_packet(s, 1); @@ -1057,6 +1062,7 @@ static const AVOption rtmp_options[] = { {"live", "live stream", 0, AV_OPT_TYPE_CONST, {-1}, 0, 0, DEC, "rtmp_live"}, {"recorded", "recorded stream", 0, AV_OPT_TYPE_CONST, {0}, 0, 0, DEC, "rtmp_live"}, {"rtmp_playpath", "Stream identifier to play or to publish", OFFSET(playpath), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, DEC|ENC}, + {"rtmp_tcurl", "URL of the target stream. Defaults to rtmp://host[:port]/app.", OFFSET(tcurl), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, DEC|ENC}, { NULL }, }; From e64673e4f4f7acefe5f60f35fb3a196ccf5e9490 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 9 May 2012 02:12:15 +0200 Subject: [PATCH 18/25] rtmp: Support 'rtmp_flashver', an option which overrides the version of the Flash plugin. --- libavformat/rtmpproto.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c index 5d7f4d185d..11caad6270 100644 --- a/libavformat/rtmpproto.c +++ b/libavformat/rtmpproto.c @@ -45,6 +45,7 @@ #define APP_MAX_LENGTH 128 #define PLAYPATH_MAX_LENGTH 256 #define TCURL_MAX_LENGTH 512 +#define FLASHVER_MAX_LENGTH 64 /** RTMP protocol handler state */ typedef enum { @@ -84,6 +85,7 @@ typedef struct RTMPContext { int nb_invokes; ///< keeps track of invoke messages int create_stream_invoke; ///< invoke id for the create stream command char* tcurl; ///< url of the target stream + char* flashver; ///< version of the flash plugin } RTMPContext; #define PLAYER_KEY_OPEN_PART_LEN 30 ///< length of partial key used for first client digest signing @@ -115,7 +117,7 @@ static const uint8_t rtmp_server_key[] = { static void gen_connect(URLContext *s, RTMPContext *rt) { RTMPPacket pkt; - uint8_t ver[64], *p; + uint8_t *p; ff_rtmp_packet_create(&pkt, RTMP_SYSTEM_CHANNEL, RTMP_PT_INVOKE, 0, 4096); p = pkt.data; @@ -126,16 +128,12 @@ static void gen_connect(URLContext *s, RTMPContext *rt) ff_amf_write_field_name(&p, "app"); ff_amf_write_string(&p, rt->app); - if (rt->is_input) { - snprintf(ver, sizeof(ver), "%s %d,%d,%d,%d", RTMP_CLIENT_PLATFORM, RTMP_CLIENT_VER1, - RTMP_CLIENT_VER2, RTMP_CLIENT_VER3, RTMP_CLIENT_VER4); - } else { - snprintf(ver, sizeof(ver), "FMLE/3.0 (compatible; %s)", LIBAVFORMAT_IDENT); + if (!rt->is_input) { ff_amf_write_field_name(&p, "type"); ff_amf_write_string(&p, "nonprivate"); } ff_amf_write_field_name(&p, "flashVer"); - ff_amf_write_string(&p, ver); + ff_amf_write_string(&p, rt->flashver); ff_amf_write_field_name(&p, "tcUrl"); ff_amf_write_string(&p, rt->tcurl); if (rt->is_input) { @@ -915,6 +913,18 @@ static int rtmp_open(URLContext *s, const char *uri, int flags) port, "/%s", rt->app); } + if (!rt->flashver) { + rt->flashver = av_malloc(FLASHVER_MAX_LENGTH); + if (rt->is_input) { + snprintf(rt->flashver, FLASHVER_MAX_LENGTH, "%s %d,%d,%d,%d", + RTMP_CLIENT_PLATFORM, RTMP_CLIENT_VER1, RTMP_CLIENT_VER2, + RTMP_CLIENT_VER3, RTMP_CLIENT_VER4); + } else { + snprintf(rt->flashver, FLASHVER_MAX_LENGTH, + "FMLE/3.0 (compatible; %s)", LIBAVFORMAT_IDENT); + } + } + rt->client_report_size = 1048576; rt->bytes_read = 0; rt->last_bytes_read = 0; @@ -1057,6 +1067,7 @@ static int rtmp_write(URLContext *s, const uint8_t *buf, int size) static const AVOption rtmp_options[] = { {"rtmp_app", "Name of application to connect to on the RTMP server", OFFSET(app), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, DEC|ENC}, + {"rtmp_flashver", "Version of the Flash plugin used to run the SWF player.", OFFSET(flashver), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, DEC|ENC}, {"rtmp_live", "Specify that the media is a live stream.", OFFSET(live), AV_OPT_TYPE_INT, {-2}, INT_MIN, INT_MAX, DEC, "rtmp_live"}, {"any", "both", 0, AV_OPT_TYPE_CONST, {-2}, 0, 0, DEC, "rtmp_live"}, {"live", "live stream", 0, AV_OPT_TYPE_CONST, {-1}, 0, 0, DEC, "rtmp_live"}, From 05945db9ce3c6708e62d05bfb040db10d73eade0 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 9 May 2012 02:12:16 +0200 Subject: [PATCH 19/25] rtmp: Support 'rtmp_swfurl', an option which specifies the URL of the SWF player. --- libavformat/rtmpproto.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c index 11caad6270..13ef719ab2 100644 --- a/libavformat/rtmpproto.c +++ b/libavformat/rtmpproto.c @@ -86,6 +86,7 @@ typedef struct RTMPContext { int create_stream_invoke; ///< invoke id for the create stream command char* tcurl; ///< url of the target stream char* flashver; ///< version of the flash plugin + char* swfurl; ///< url of the swf player } RTMPContext; #define PLAYER_KEY_OPEN_PART_LEN 30 ///< length of partial key used for first client digest signing @@ -134,6 +135,12 @@ static void gen_connect(URLContext *s, RTMPContext *rt) } ff_amf_write_field_name(&p, "flashVer"); ff_amf_write_string(&p, rt->flashver); + + if (rt->swfurl) { + ff_amf_write_field_name(&p, "swfUrl"); + ff_amf_write_string(&p, rt->swfurl); + } + ff_amf_write_field_name(&p, "tcUrl"); ff_amf_write_string(&p, rt->tcurl); if (rt->is_input) { @@ -1073,6 +1080,7 @@ static const AVOption rtmp_options[] = { {"live", "live stream", 0, AV_OPT_TYPE_CONST, {-1}, 0, 0, DEC, "rtmp_live"}, {"recorded", "recorded stream", 0, AV_OPT_TYPE_CONST, {0}, 0, 0, DEC, "rtmp_live"}, {"rtmp_playpath", "Stream identifier to play or to publish", OFFSET(playpath), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, DEC|ENC}, + {"rtmp_swfurl", "URL of the SWF player. By default no value will be sent", OFFSET(swfurl), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, DEC|ENC}, {"rtmp_tcurl", "URL of the target stream. Defaults to rtmp://host[:port]/app.", OFFSET(tcurl), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, DEC|ENC}, { NULL }, }; From d55961fa82d34c1783f525b05608694d2b2dea1c Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 9 May 2012 00:58:09 +0200 Subject: [PATCH 20/25] rtmp: Implement check bandwidth notification. According to the behaviour of librtmp, it is recommended to send this message to the server after receiving the 'onBWDone' callback in order to do bandwidth checking and improve compatibility with some servers. --- libavformat/rtmpproto.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c index 13ef719ab2..427655c27e 100644 --- a/libavformat/rtmpproto.c +++ b/libavformat/rtmpproto.c @@ -372,6 +372,25 @@ static void gen_server_bw(URLContext *s, RTMPContext *rt) ff_rtmp_packet_destroy(&pkt); } +/** + * Generate check bandwidth message and send it to the server. + */ +static void gen_check_bw(URLContext *s, RTMPContext *rt) +{ + RTMPPacket pkt; + uint8_t *p; + + ff_rtmp_packet_create(&pkt, RTMP_SYSTEM_CHANNEL, RTMP_PT_INVOKE, 0, 21); + + p = pkt.data; + ff_amf_write_string(&p, "_checkbw"); + ff_amf_write_number(&p, ++rt->nb_invokes); + ff_amf_write_null(&p); + + ff_rtmp_packet_write(rt->stream, &pkt, rt->chunk_size, rt->prev_pkt[1]); + ff_rtmp_packet_destroy(&pkt); +} + /** * Generate report on bytes read so far and send it to the server. */ @@ -691,6 +710,8 @@ static int rtmp_parse_result(URLContext *s, RTMPContext *rt, RTMPPacket *pkt) if (!t && !strcmp(tmpstr, "NetStream.Play.Stop")) rt->state = STATE_STOPPED; if (!t && !strcmp(tmpstr, "NetStream.Play.UnpublishNotify")) rt->state = STATE_STOPPED; if (!t && !strcmp(tmpstr, "NetStream.Publish.Start")) rt->state = STATE_PUBLISHING; + } else if (!memcmp(pkt->data, "\002\000\010onBWDone", 11)) { + gen_check_bw(s, rt); } break; } From be545b8a34cb7934bddc6c76aa783bee0b90c361 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Mon, 7 May 2012 14:13:23 +0200 Subject: [PATCH 21/25] h264: K&R formatting cosmetics for header files (part I/II) --- libavcodec/h264.h | 497 ++++++++++++++++++++++-------------------- libavcodec/h264data.h | 371 +++++++++++++++---------------- libavcodec/h264pred.h | 81 ++++--- 3 files changed, 491 insertions(+), 458 deletions(-) diff --git a/libavcodec/h264.h b/libavcodec/h264.h index ce06f613cd..570ce2ffae 100644 --- a/libavcodec/h264.h +++ b/libavcodec/h264.h @@ -37,14 +37,14 @@ #include "rectangle.h" #define interlaced_dct interlaced_dct_is_a_bad_name -#define mb_intra mb_intra_is_not_initialized_see_mb_type +#define mb_intra mb_intra_is_not_initialized_see_mb_type -#define MAX_SPS_COUNT 32 -#define MAX_PPS_COUNT 256 +#define MAX_SPS_COUNT 32 +#define MAX_PPS_COUNT 256 -#define MAX_MMCO_COUNT 66 +#define MAX_MMCO_COUNT 66 -#define MAX_DELAYED_PIC_COUNT 16 +#define MAX_DELAYED_PIC_COUNT 16 /* Compiling in interlaced support reduces the speed * of progressive decoding by about 2%. */ @@ -59,25 +59,25 @@ #define MAX_SLICES 16 #ifdef ALLOW_INTERLACE -#define MB_MBAFF h->mb_mbaff -#define MB_FIELD h->mb_field_decoding_flag +#define MB_MBAFF h->mb_mbaff +#define MB_FIELD h->mb_field_decoding_flag #define FRAME_MBAFF h->mb_aff_frame #define FIELD_PICTURE (s->picture_structure != PICT_FRAME) #define LEFT_MBS 2 -#define LTOP 0 -#define LBOT 1 -#define LEFT(i) (i) +#define LTOP 0 +#define LBOT 1 +#define LEFT(i) (i) #else -#define MB_MBAFF 0 -#define MB_FIELD 0 -#define FRAME_MBAFF 0 +#define MB_MBAFF 0 +#define MB_FIELD 0 +#define FRAME_MBAFF 0 #define FIELD_PICTURE 0 #undef IS_INTERLACED #define IS_INTERLACED(mb_type) 0 #define LEFT_MBS 1 -#define LTOP 0 -#define LBOT 0 -#define LEFT(i) 0 +#define LTOP 0 +#define LBOT 0 +#define LEFT(i) 0 #endif #define FIELD_OR_MBAFF_PICTURE (FRAME_MBAFF || FIELD_PICTURE) @@ -88,9 +88,9 @@ #define CHROMA422 (h->sps.chroma_format_idc == 2) #define CHROMA444 (h->sps.chroma_format_idc == 3) -#define EXTENDED_SAR 255 +#define EXTENDED_SAR 255 -#define MB_TYPE_REF0 MB_TYPE_ACPRED //dirty but it fits in 16 bit +#define MB_TYPE_REF0 MB_TYPE_ACPRED // dirty but it fits in 16 bit #define MB_TYPE_8x8DCT 0x01000000 #define IS_REF0(a) ((a) & MB_TYPE_REF0) #define IS_8x8DCT(a) ((a) & MB_TYPE_8x8DCT) @@ -101,11 +101,11 @@ */ #define DELAYED_PIC_REF 4 -#define QP_MAX_NUM (51 + 2*6) // The maximum supported qp +#define QP_MAX_NUM (51 + 2 * 6) // The maximum supported qp /* NAL unit types */ enum { - NAL_SLICE=1, + NAL_SLICE = 1, NAL_DPA, NAL_DPB, NAL_DPC, @@ -118,17 +118,17 @@ enum { NAL_END_STREAM, NAL_FILLER_DATA, NAL_SPS_EXT, - NAL_AUXILIARY_SLICE=19 + NAL_AUXILIARY_SLICE = 19 }; /** * SEI message types */ typedef enum { - SEI_BUFFERING_PERIOD = 0, ///< buffering period (H.264, D.1.1) - SEI_TYPE_PIC_TIMING = 1, ///< picture timing - SEI_TYPE_USER_DATA_UNREGISTERED = 5, ///< unregistered user data - SEI_TYPE_RECOVERY_POINT = 6 ///< recovery point (frame # to decoder sync) + SEI_BUFFERING_PERIOD = 0, ///< buffering period (H.264, D.1.1) + SEI_TYPE_PIC_TIMING = 1, ///< picture timing + SEI_TYPE_USER_DATA_UNREGISTERED = 5, ///< unregistered user data + SEI_TYPE_RECOVERY_POINT = 6 ///< recovery point (frame # to decoder sync) } SEI_Type; /** @@ -149,8 +149,7 @@ typedef enum { /** * Sequence parameter set */ -typedef struct SPS{ - +typedef struct SPS { int profile_idc; int level_idc; int chroma_format_idc; @@ -167,9 +166,9 @@ typedef struct SPS{ int mb_width; ///< pic_width_in_mbs_minus1 + 1 int mb_height; ///< pic_height_in_map_units_minus1 + 1 int frame_mbs_only_flag; - int mb_aff; ///b4_stride + int b_stride; // FIXME use s->b4_stride - int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mbaff + int mb_linesize; ///< may be equal to s->linesize or s->linesize * 2, for mbaff int mb_uvlinesize; int emu_edge_width; @@ -335,32 +334,32 @@ typedef struct H264Context{ /** * current pps */ - PPS pps; //FIXME move to Picture perhaps? (->no) do we need that? + PPS pps; // FIXME move to Picture perhaps? (->no) do we need that? - uint32_t dequant4_buffer[6][QP_MAX_NUM+1][16]; //FIXME should these be moved down? - uint32_t dequant8_buffer[6][QP_MAX_NUM+1][64]; - uint32_t (*dequant4_coeff[6])[16]; - uint32_t (*dequant8_coeff[6])[64]; + uint32_t dequant4_buffer[6][QP_MAX_NUM + 1][16]; // FIXME should these be moved down? + uint32_t dequant8_buffer[6][QP_MAX_NUM + 1][64]; + uint32_t(*dequant4_coeff[6])[16]; + uint32_t(*dequant8_coeff[6])[64]; int slice_num; - uint16_t *slice_table; ///< slice_table_base + 2*mb_stride + 1 + uint16_t *slice_table; ///< slice_table_base + 2*mb_stride + 1 int slice_type; - int slice_type_nos; ///< S free slice type (SI/SP are remapped to I/P) + int slice_type_nos; ///< S free slice type (SI/SP are remapped to I/P) int slice_type_fixed; - //interlacing specific flags + // interlacing specific flags int mb_aff_frame; int mb_field_decoding_flag; - int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag + int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag DECLARE_ALIGNED(8, uint16_t, sub_mb_type)[4]; - //Weighted pred stuff + // Weighted pred stuff int use_weight; int use_weight_chroma; int luma_log2_weight_denom; int chroma_log2_weight_denom; - //The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss + // The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss int luma_weight[48][2][2]; int chroma_weight[48][2][2][2]; int implicit_weight[48][48][2]; @@ -370,48 +369,48 @@ typedef struct H264Context{ int col_fieldoff; int dist_scale_factor[16]; int dist_scale_factor_field[2][32]; - int map_col_to_list0[2][16+32]; - int map_col_to_list0_field[2][2][16+32]; + int map_col_to_list0[2][16 + 32]; + int map_col_to_list0_field[2][2][16 + 32]; /** * num_ref_idx_l0/1_active_minus1 + 1 */ - unsigned int ref_count[2]; ///< counts frames or fields, depending on current mb mode + unsigned int ref_count[2]; ///< counts frames or fields, depending on current mb mode unsigned int list_count; - uint8_t *list_counts; ///< Array of list_count per MB specifying the slice type - Picture ref_list[2][48]; /**< 0..15: frame refs, 16..47: mbaff field refs. - Reordered version of default_ref_list - according to picture reordering in slice header */ - int ref2frm[MAX_SLICES][2][64]; ///< reference to frame number lists, used in the loop filter, the first 2 are for -2,-1 + uint8_t *list_counts; ///< Array of list_count per MB specifying the slice type + Picture ref_list[2][48]; /**< 0..15: frame refs, 16..47: mbaff field refs. + * Reordered version of default_ref_list + * according to picture reordering in slice header */ + int ref2frm[MAX_SLICES][2][64]; ///< reference to frame number lists, used in the loop filter, the first 2 are for -2,-1 - //data partitioning + // data partitioning GetBitContext intra_gb; GetBitContext inter_gb; GetBitContext *intra_gb_ptr; GetBitContext *inter_gb_ptr; - DECLARE_ALIGNED(16, DCTELEM, mb)[16*48*2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space. - DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[3][16*2]; - DCTELEM mb_padding[256*2]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb + DECLARE_ALIGNED(16, DCTELEM, mb)[16 * 48 * 2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space. + DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[3][16 * 2]; + DCTELEM mb_padding[256 * 2]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb /** * Cabac */ CABACContext cabac; - uint8_t cabac_state[1024]; + uint8_t cabac_state[1024]; - /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */ - uint16_t *cbp_table; + /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0, 1, 2), 0x0? luma_cbp */ + uint16_t *cbp_table; int cbp; int top_cbp; int left_cbp; /* chroma_pred_mode for i4x4 or i16x16, else 0 */ - uint8_t *chroma_pred_mode_table; - int last_qscale_diff; - uint8_t (*mvd_table[2])[2]; - DECLARE_ALIGNED(16, uint8_t, mvd_cache)[2][5*8][2]; - uint8_t *direct_table; - uint8_t direct_cache[5*8]; + uint8_t *chroma_pred_mode_table; + int last_qscale_diff; + uint8_t (*mvd_table[2])[2]; + DECLARE_ALIGNED(16, uint8_t, mvd_cache)[2][5 * 8][2]; + uint8_t *direct_table; + uint8_t direct_cache[5 * 8]; uint8_t zigzag_scan[16]; uint8_t zigzag_scan8x8[64]; @@ -432,13 +431,13 @@ typedef struct H264Context{ int is_complex; - //deblock - int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0 + // deblock + int deblocking_filter; ///< disable_deblocking_filter_idc with 1 <-> 0 int slice_alpha_c0_offset; int slice_beta_offset; -//============================================================= - //Things below are not used in the MB or more inner code + // ============================================================= + // Things below are not used in the MB or more inner code int nal_ref_idc; int nal_unit_type; @@ -448,37 +447,36 @@ typedef struct H264Context{ /** * Used to parse AVC variant of h264 */ - int is_avc; ///< this flag is != 0 if codec is avc1 - int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4) - int got_first; ///< this flag is != 0 if we've parsed a frame + int is_avc; ///< this flag is != 0 if codec is avc1 + int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4) + int got_first; ///< this flag is != 0 if we've parsed a frame SPS *sps_buffers[MAX_SPS_COUNT]; PPS *pps_buffers[MAX_PPS_COUNT]; - int dequant_coeff_pps; ///< reinit tables when pps changes + int dequant_coeff_pps; ///< reinit tables when pps changes uint16_t *slice_table_base; - - //POC stuff + // POC stuff int poc_lsb; int poc_msb; int delta_poc_bottom; int delta_poc[2]; int frame_num; - int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0 - int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0 - int frame_num_offset; ///< for POC type 2 - int prev_frame_num_offset; ///< for POC type 2 - int prev_frame_num; ///< frame_num of the last pic for POC type 1/2 + int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0 + int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0 + int frame_num_offset; ///< for POC type 2 + int prev_frame_num_offset; ///< for POC type 2 + int prev_frame_num; ///< frame_num of the last pic for POC type 1/2 /** - * frame_num for frames or 2*frame_num+1 for field pics. + * frame_num for frames or 2 * frame_num + 1 for field pics. */ int curr_pic_num; /** - * max_frame_num or 2*max_frame_num for field pics. + * max_frame_num or 2 * max_frame_num for field pics. */ int max_pic_num; @@ -487,7 +485,7 @@ typedef struct H264Context{ Picture *short_ref[32]; Picture *long_ref[32]; Picture default_ref_list[2][32]; ///< base reference list for all slices of a coded picture - Picture *delayed_pic[MAX_DELAYED_PIC_COUNT+2]; //FIXME size? + Picture *delayed_pic[MAX_DELAYED_PIC_COUNT + 2]; // FIXME size? int last_pocs[MAX_DELAYED_PIC_COUNT]; Picture *next_output_pic; int outputed_poc; @@ -500,10 +498,10 @@ typedef struct H264Context{ int mmco_index; int mmco_reset; - int long_ref_count; ///< number of actual long term references - int short_ref_count; ///< number of actual short term references + int long_ref_count; ///< number of actual long term references + int short_ref_count; ///< number of actual short term references - int cabac_init_idc; + int cabac_init_idc; /** * @name Members for slice based multithreading @@ -572,18 +570,17 @@ typedef struct H264Context{ */ int sei_recovery_frame_cnt; - int luma_weight_flag[2]; ///< 7.4.3.2 luma_weight_lX_flag - int chroma_weight_flag[2]; ///< 7.4.3.2 chroma_weight_lX_flag + int luma_weight_flag[2]; ///< 7.4.3.2 luma_weight_lX_flag + int chroma_weight_flag[2]; ///< 7.4.3.2 chroma_weight_lX_flag // Timestamp stuff - int sei_buffering_period_present; ///< Buffering period SEI flag - int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs + int sei_buffering_period_present; ///< Buffering period SEI flag + int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs int cur_chroma_format_idc; -}H264Context; +} H264Context; - -extern const uint8_t ff_h264_chroma_qp[3][QP_MAX_NUM+1]; ///< One chroma qp table for each supported bit depth (8, 9, 10). +extern const uint8_t ff_h264_chroma_qp[3][QP_MAX_NUM + 1]; ///< One chroma qp table for each supported bit depth (8, 9, 10). extern const uint16_t ff_h264_mb_sizes[4]; /** @@ -610,13 +607,16 @@ int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length); * Decode a network abstraction layer unit. * @param consumed is the number of bytes used as input * @param length is the length of the array - * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing? + * @param dst_length is the number of decoded bytes FIXME here + * or a decode rbsp tailing? * @return decoded bytes, might be src+1 if no escapes */ -const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length); +const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, + int *dst_length, int *consumed, int length); /** - * Free any data that may have been allocated in the H264 context like SPS, PPS etc. + * Free any data that may have been allocated in the H264 context + * like SPS, PPS etc. */ av_cold void ff_h264_free_context(H264Context *h); @@ -649,14 +649,15 @@ int ff_h264_decode_ref_pic_marking(H264Context *h, GetBitContext *gb); void ff_generate_sliding_window_mmcos(H264Context *h); - /** - * Check if the top & left blocks are available if needed & change the dc mode so it only uses the available blocks. + * Check if the top & left blocks are available if needed & change the + * dc mode so it only uses the available blocks. */ int ff_h264_check_intra4x4_pred_mode(H264Context *h); /** - * Check if the top & left blocks are available if needed & change the dc mode so it only uses the available blocks. + * Check if the top & left blocks are available if needed & change the + * dc mode so it only uses the available blocks. */ int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma); @@ -668,24 +669,28 @@ av_cold void ff_h264_decode_init_vlc(void); /** * Decode a macroblock - * @return 0 if OK, ER_AC_ERROR / ER_DC_ERROR / ER_MV_ERROR if an error is noticed + * @return 0 if OK, ER_AC_ERROR / ER_DC_ERROR / ER_MV_ERROR on error */ int ff_h264_decode_mb_cavlc(H264Context *h); /** * Decode a CABAC coded macroblock - * @return 0 if OK, ER_AC_ERROR / ER_DC_ERROR / ER_MV_ERROR if an error is noticed + * @return 0 if OK, ER_AC_ERROR / ER_DC_ERROR / ER_MV_ERROR on error */ int ff_h264_decode_mb_cabac(H264Context *h); void ff_h264_init_cabac_states(H264Context *h); -void ff_h264_direct_dist_scale_factor(H264Context * const h); -void ff_h264_direct_ref_list_init(H264Context * const h); -void ff_h264_pred_direct_motion(H264Context * const h, int *mb_type); +void ff_h264_direct_dist_scale_factor(H264Context *const h); +void ff_h264_direct_ref_list_init(H264Context *const h); +void ff_h264_pred_direct_motion(H264Context *const h, int *mb_type); -void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize); -void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize); +void ff_h264_filter_mb_fast(H264Context *h, int mb_x, int mb_y, + uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, + unsigned int linesize, unsigned int uvlinesize); +void ff_h264_filter_mb(H264Context *h, int mb_x, int mb_y, + uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, + unsigned int linesize, unsigned int uvlinesize); /** * Reset SEI values at the beginning of the frame. @@ -694,16 +699,15 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint */ void ff_h264_reset_sei(H264Context *h); - /* -o-o o-o - / / / -o-o o-o - ,---' -o-o o-o - / / / -o-o o-o -*/ + * o-o o-o + * / / / + * o-o o-o + * ,---' + * o-o o-o + * / / / + * o-o o-o + */ /* Scan8 organization: * 0 1 2 3 4 5 6 7 @@ -728,156 +732,173 @@ o-o o-o #define LUMA_DC_BLOCK_INDEX 48 #define CHROMA_DC_BLOCK_INDEX 49 -//This table must be here because scan8[constant] must be known at compiletime -static const uint8_t scan8[16*3 + 3]={ - 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8, - 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8, - 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8, - 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8, - 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8, - 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8, - 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8, - 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8, - 4+11*8, 5+11*8, 4+12*8, 5+12*8, - 6+11*8, 7+11*8, 6+12*8, 7+12*8, - 4+13*8, 5+13*8, 4+14*8, 5+14*8, - 6+13*8, 7+13*8, 6+14*8, 7+14*8, - 0+ 0*8, 0+ 5*8, 0+10*8 +// This table must be here because scan8[constant] must be known at compiletime +static const uint8_t scan8[16 * 3 + 3] = { + 4 + 1 * 8, 5 + 1 * 8, 4 + 2 * 8, 5 + 2 * 8, + 6 + 1 * 8, 7 + 1 * 8, 6 + 2 * 8, 7 + 2 * 8, + 4 + 3 * 8, 5 + 3 * 8, 4 + 4 * 8, 5 + 4 * 8, + 6 + 3 * 8, 7 + 3 * 8, 6 + 4 * 8, 7 + 4 * 8, + 4 + 6 * 8, 5 + 6 * 8, 4 + 7 * 8, 5 + 7 * 8, + 6 + 6 * 8, 7 + 6 * 8, 6 + 7 * 8, 7 + 7 * 8, + 4 + 8 * 8, 5 + 8 * 8, 4 + 9 * 8, 5 + 9 * 8, + 6 + 8 * 8, 7 + 8 * 8, 6 + 9 * 8, 7 + 9 * 8, + 4 + 11 * 8, 5 + 11 * 8, 4 + 12 * 8, 5 + 12 * 8, + 6 + 11 * 8, 7 + 11 * 8, 6 + 12 * 8, 7 + 12 * 8, + 4 + 13 * 8, 5 + 13 * 8, 4 + 14 * 8, 5 + 14 * 8, + 6 + 13 * 8, 7 + 13 * 8, 6 + 14 * 8, 7 + 14 * 8, + 0 + 0 * 8, 0 + 5 * 8, 0 + 10 * 8 }; -static av_always_inline uint32_t pack16to32(int a, int b){ +static av_always_inline uint32_t pack16to32(int a, int b) +{ #if HAVE_BIGENDIAN - return (b&0xFFFF) + (a<<16); + return (b & 0xFFFF) + (a << 16); #else - return (a&0xFFFF) + (b<<16); + return (a & 0xFFFF) + (b << 16); #endif } -static av_always_inline uint16_t pack8to16(int a, int b){ +static av_always_inline uint16_t pack8to16(int a, int b) +{ #if HAVE_BIGENDIAN - return (b&0xFF) + (a<<8); + return (b & 0xFF) + (a << 8); #else - return (a&0xFF) + (b<<8); + return (a & 0xFF) + (b << 8); #endif } /** * Get the chroma qp. */ -static av_always_inline int get_chroma_qp(H264Context *h, int t, int qscale){ +static av_always_inline int get_chroma_qp(H264Context *h, int t, int qscale) +{ return h->pps.chroma_qp_table[t][qscale]; } /** * Get the predicted intra4x4 prediction mode. */ -static av_always_inline int pred_intra_mode(H264Context *h, int n){ - const int index8= scan8[n]; - const int left= h->intra4x4_pred_mode_cache[index8 - 1]; - const int top = h->intra4x4_pred_mode_cache[index8 - 8]; - const int min= FFMIN(left, top); +static av_always_inline int pred_intra_mode(H264Context *h, int n) +{ + const int index8 = scan8[n]; + const int left = h->intra4x4_pred_mode_cache[index8 - 1]; + const int top = h->intra4x4_pred_mode_cache[index8 - 8]; + const int min = FFMIN(left, top); - tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min); + tprintf(h->s.avctx, "mode:%d %d min:%d\n", left, top, min); - if(min<0) return DC_PRED; - else return min; + if (min < 0) + return DC_PRED; + else + return min; } -static av_always_inline void write_back_intra_pred_mode(H264Context *h){ - int8_t *i4x4= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy]; - int8_t *i4x4_cache= h->intra4x4_pred_mode_cache; +static av_always_inline void write_back_intra_pred_mode(H264Context *h) +{ + int8_t *i4x4 = h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy]; + int8_t *i4x4_cache = h->intra4x4_pred_mode_cache; - AV_COPY32(i4x4, i4x4_cache + 4 + 8*4); - i4x4[4]= i4x4_cache[7+8*3]; - i4x4[5]= i4x4_cache[7+8*2]; - i4x4[6]= i4x4_cache[7+8*1]; + AV_COPY32(i4x4, i4x4_cache + 4 + 8 * 4); + i4x4[4] = i4x4_cache[7 + 8 * 3]; + i4x4[5] = i4x4_cache[7 + 8 * 2]; + i4x4[6] = i4x4_cache[7 + 8 * 1]; } -static av_always_inline void write_back_non_zero_count(H264Context *h){ - const int mb_xy= h->mb_xy; - uint8_t *nnz = h->non_zero_count[mb_xy]; +static av_always_inline void write_back_non_zero_count(H264Context *h) +{ + const int mb_xy = h->mb_xy; + uint8_t *nnz = h->non_zero_count[mb_xy]; uint8_t *nnz_cache = h->non_zero_count_cache; - AV_COPY32(&nnz[ 0], &nnz_cache[4+8* 1]); - AV_COPY32(&nnz[ 4], &nnz_cache[4+8* 2]); - AV_COPY32(&nnz[ 8], &nnz_cache[4+8* 3]); - AV_COPY32(&nnz[12], &nnz_cache[4+8* 4]); - AV_COPY32(&nnz[16], &nnz_cache[4+8* 6]); - AV_COPY32(&nnz[20], &nnz_cache[4+8* 7]); - AV_COPY32(&nnz[32], &nnz_cache[4+8*11]); - AV_COPY32(&nnz[36], &nnz_cache[4+8*12]); + AV_COPY32(&nnz[ 0], &nnz_cache[4 + 8 * 1]); + AV_COPY32(&nnz[ 4], &nnz_cache[4 + 8 * 2]); + AV_COPY32(&nnz[ 8], &nnz_cache[4 + 8 * 3]); + AV_COPY32(&nnz[12], &nnz_cache[4 + 8 * 4]); + AV_COPY32(&nnz[16], &nnz_cache[4 + 8 * 6]); + AV_COPY32(&nnz[20], &nnz_cache[4 + 8 * 7]); + AV_COPY32(&nnz[32], &nnz_cache[4 + 8 * 11]); + AV_COPY32(&nnz[36], &nnz_cache[4 + 8 * 12]); - if(!h->s.chroma_y_shift){ - AV_COPY32(&nnz[24], &nnz_cache[4+8* 8]); - AV_COPY32(&nnz[28], &nnz_cache[4+8* 9]); - AV_COPY32(&nnz[40], &nnz_cache[4+8*13]); - AV_COPY32(&nnz[44], &nnz_cache[4+8*14]); + if (!h->s.chroma_y_shift) { + AV_COPY32(&nnz[24], &nnz_cache[4 + 8 * 8]); + AV_COPY32(&nnz[28], &nnz_cache[4 + 8 * 9]); + AV_COPY32(&nnz[40], &nnz_cache[4 + 8 * 13]); + AV_COPY32(&nnz[44], &nnz_cache[4 + 8 * 14]); } } -static av_always_inline void write_back_motion_list(H264Context *h, MpegEncContext * const s, int b_stride, - int b_xy, int b8_xy, int mb_type, int list ) +static av_always_inline void write_back_motion_list(H264Context *h, + MpegEncContext *const s, + int b_stride, + int b_xy, int b8_xy, + int mb_type, int list) { - int16_t (*mv_dst)[2] = &s->current_picture.f.motion_val[list][b_xy]; - int16_t (*mv_src)[2] = &h->mv_cache[list][scan8[0]]; - AV_COPY128(mv_dst + 0*b_stride, mv_src + 8*0); - AV_COPY128(mv_dst + 1*b_stride, mv_src + 8*1); - AV_COPY128(mv_dst + 2*b_stride, mv_src + 8*2); - AV_COPY128(mv_dst + 3*b_stride, mv_src + 8*3); - if( CABAC ) { - uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8*h->mb_xy : h->mb2br_xy[h->mb_xy]]; - uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]]; - if(IS_SKIP(mb_type)) + int16_t(*mv_dst)[2] = &s->current_picture.f.motion_val[list][b_xy]; + int16_t(*mv_src)[2] = &h->mv_cache[list][scan8[0]]; + AV_COPY128(mv_dst + 0 * b_stride, mv_src + 8 * 0); + AV_COPY128(mv_dst + 1 * b_stride, mv_src + 8 * 1); + AV_COPY128(mv_dst + 2 * b_stride, mv_src + 8 * 2); + AV_COPY128(mv_dst + 3 * b_stride, mv_src + 8 * 3); + if (CABAC) { + uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8 * h->mb_xy + : h->mb2br_xy[h->mb_xy]]; + uint8_t(*mvd_src)[2] = &h->mvd_cache[list][scan8[0]]; + if (IS_SKIP(mb_type)) { AV_ZERO128(mvd_dst); - else{ - AV_COPY64(mvd_dst, mvd_src + 8*3); - AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8*0); - AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8*1); - AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8*2); + } else { + AV_COPY64(mvd_dst, mvd_src + 8 * 3); + AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8 * 0); + AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8 * 1); + AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8 * 2); } } { int8_t *ref_index = &s->current_picture.f.ref_index[list][b8_xy]; int8_t *ref_cache = h->ref_cache[list]; - ref_index[0+0*2]= ref_cache[scan8[0]]; - ref_index[1+0*2]= ref_cache[scan8[4]]; - ref_index[0+1*2]= ref_cache[scan8[8]]; - ref_index[1+1*2]= ref_cache[scan8[12]]; + ref_index[0 + 0 * 2] = ref_cache[scan8[0]]; + ref_index[1 + 0 * 2] = ref_cache[scan8[4]]; + ref_index[0 + 1 * 2] = ref_cache[scan8[8]]; + ref_index[1 + 1 * 2] = ref_cache[scan8[12]]; } } -static av_always_inline void write_back_motion(H264Context *h, int mb_type){ - MpegEncContext * const s = &h->s; - const int b_stride = h->b_stride; - const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; //try mb2b(8)_xy - const int b8_xy= 4*h->mb_xy; +static av_always_inline void write_back_motion(H264Context *h, int mb_type) +{ + MpegEncContext *const s = &h->s; + const int b_stride = h->b_stride; + const int b_xy = 4 * s->mb_x + 4 * s->mb_y * h->b_stride; // try mb2b(8)_xy + const int b8_xy = 4 * h->mb_xy; - if(USES_LIST(mb_type, 0)){ + if (USES_LIST(mb_type, 0)) { write_back_motion_list(h, s, b_stride, b_xy, b8_xy, mb_type, 0); - }else{ + } else { fill_rectangle(&s->current_picture.f.ref_index[0][b8_xy], 2, 2, 2, (uint8_t)LIST_NOT_USED, 1); } - if(USES_LIST(mb_type, 1)){ + if (USES_LIST(mb_type, 1)) write_back_motion_list(h, s, b_stride, b_xy, b8_xy, mb_type, 1); - } - if(h->slice_type_nos == AV_PICTURE_TYPE_B && CABAC){ - if(IS_8X8(mb_type)){ - uint8_t *direct_table = &h->direct_table[4*h->mb_xy]; - direct_table[1] = h->sub_mb_type[1]>>1; - direct_table[2] = h->sub_mb_type[2]>>1; - direct_table[3] = h->sub_mb_type[3]>>1; + if (h->slice_type_nos == AV_PICTURE_TYPE_B && CABAC) { + if (IS_8X8(mb_type)) { + uint8_t *direct_table = &h->direct_table[4 * h->mb_xy]; + direct_table[1] = h->sub_mb_type[1] >> 1; + direct_table[2] = h->sub_mb_type[2] >> 1; + direct_table[3] = h->sub_mb_type[3] >> 1; } } } -static av_always_inline int get_dct8x8_allowed(H264Context *h){ - if(h->sps.direct_8x8_inference_flag) - return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL)); +static av_always_inline int get_dct8x8_allowed(H264Context *h) +{ + if (h->sps.direct_8x8_inference_flag) + return !(AV_RN64A(h->sub_mb_type) & + ((MB_TYPE_16x8 | MB_TYPE_8x16 | MB_TYPE_8x8) * + 0x0001000100010001ULL)); else - return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL)); + return !(AV_RN64A(h->sub_mb_type) & + ((MB_TYPE_16x8 | MB_TYPE_8x16 | MB_TYPE_8x8 | MB_TYPE_DIRECT2) * + 0x0001000100010001ULL)); } #endif /* AVCODEC_H264_H */ diff --git a/libavcodec/h264data.h b/libavcodec/h264data.h index 2cfa548624..5311c21be9 100644 --- a/libavcodec/h264data.h +++ b/libavcodec/h264data.h @@ -30,240 +30,243 @@ #define AVCODEC_H264DATA_H #include + #include "libavutil/rational.h" #include "mpegvideo.h" #include "h264.h" - -static const uint8_t golomb_to_pict_type[5]= -{AV_PICTURE_TYPE_P, AV_PICTURE_TYPE_B, AV_PICTURE_TYPE_I, AV_PICTURE_TYPE_SP, AV_PICTURE_TYPE_SI}; - -static const uint8_t golomb_to_intra4x4_cbp[48]={ - 47, 31, 15, 0, 23, 27, 29, 30, 7, 11, 13, 14, 39, 43, 45, 46, - 16, 3, 5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44, 1, 2, 4, - 8, 17, 18, 20, 24, 6, 9, 22, 25, 32, 33, 34, 36, 40, 38, 41 +static const uint8_t golomb_to_pict_type[5] = { + AV_PICTURE_TYPE_P, AV_PICTURE_TYPE_B, AV_PICTURE_TYPE_I, + AV_PICTURE_TYPE_SP, AV_PICTURE_TYPE_SI }; -static const uint8_t golomb_to_inter_cbp[48]={ - 0, 16, 1, 2, 4, 8, 32, 3, 5, 10, 12, 15, 47, 7, 11, 13, - 14, 6, 9, 31, 35, 37, 42, 44, 33, 34, 36, 40, 39, 43, 45, 46, - 17, 18, 20, 24, 19, 21, 26, 28, 23, 27, 29, 30, 22, 25, 38, 41 +static const uint8_t golomb_to_intra4x4_cbp[48] = { + 47, 31, 15, 0, 23, 27, 29, 30, 7, 11, 13, 14, 39, 43, 45, 46, + 16, 3, 5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44, 1, 2, 4, + 8, 17, 18, 20, 24, 6, 9, 22, 25, 32, 33, 34, 36, 40, 38, 41 }; -static const uint8_t zigzag_scan[16]={ - 0+0*4, 1+0*4, 0+1*4, 0+2*4, - 1+1*4, 2+0*4, 3+0*4, 2+1*4, - 1+2*4, 0+3*4, 1+3*4, 2+2*4, - 3+1*4, 3+2*4, 2+3*4, 3+3*4, +static const uint8_t golomb_to_inter_cbp[48] = { + 0, 16, 1, 2, 4, 8, 32, 3, 5, 10, 12, 15, 47, 7, 11, 13, + 14, 6, 9, 31, 35, 37, 42, 44, 33, 34, 36, 40, 39, 43, 45, 46, + 17, 18, 20, 24, 19, 21, 26, 28, 23, 27, 29, 30, 22, 25, 38, 41 }; -static const uint8_t field_scan[16]={ - 0+0*4, 0+1*4, 1+0*4, 0+2*4, - 0+3*4, 1+1*4, 1+2*4, 1+3*4, - 2+0*4, 2+1*4, 2+2*4, 2+3*4, - 3+0*4, 3+1*4, 3+2*4, 3+3*4, +static const uint8_t zigzag_scan[16] = { + 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, + 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4, + 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, + 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4, }; -static const uint8_t luma_dc_zigzag_scan[16]={ - 0*16 + 0*64, 1*16 + 0*64, 2*16 + 0*64, 0*16 + 2*64, - 3*16 + 0*64, 0*16 + 1*64, 1*16 + 1*64, 2*16 + 1*64, - 1*16 + 2*64, 2*16 + 2*64, 3*16 + 2*64, 0*16 + 3*64, - 3*16 + 1*64, 1*16 + 3*64, 2*16 + 3*64, 3*16 + 3*64, +static const uint8_t field_scan[16] = { + 0 + 0 * 4, 0 + 1 * 4, 1 + 0 * 4, 0 + 2 * 4, + 0 + 3 * 4, 1 + 1 * 4, 1 + 2 * 4, 1 + 3 * 4, + 2 + 0 * 4, 2 + 1 * 4, 2 + 2 * 4, 2 + 3 * 4, + 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4, 3 + 3 * 4, }; -static const uint8_t luma_dc_field_scan[16]={ - 0*16 + 0*64, 2*16 + 0*64, 1*16 + 0*64, 0*16 + 2*64, - 2*16 + 2*64, 3*16 + 0*64, 1*16 + 2*64, 3*16 + 2*64, - 0*16 + 1*64, 2*16 + 1*64, 0*16 + 3*64, 2*16 + 3*64, - 1*16 + 1*64, 3*16 + 1*64, 1*16 + 3*64, 3*16 + 3*64, +static const uint8_t luma_dc_zigzag_scan[16] = { + 0 * 16 + 0 * 64, 1 * 16 + 0 * 64, 2 * 16 + 0 * 64, 0 * 16 + 2 * 64, + 3 * 16 + 0 * 64, 0 * 16 + 1 * 64, 1 * 16 + 1 * 64, 2 * 16 + 1 * 64, + 1 * 16 + 2 * 64, 2 * 16 + 2 * 64, 3 * 16 + 2 * 64, 0 * 16 + 3 * 64, + 3 * 16 + 1 * 64, 1 * 16 + 3 * 64, 2 * 16 + 3 * 64, 3 * 16 + 3 * 64, }; -static const uint8_t chroma_dc_scan[4]={ - (0+0*2)*16, (1+0*2)*16, - (0+1*2)*16, (1+1*2)*16, +static const uint8_t luma_dc_field_scan[16] = { + 0 * 16 + 0 * 64, 2 * 16 + 0 * 64, 1 * 16 + 0 * 64, 0 * 16 + 2 * 64, + 2 * 16 + 2 * 64, 3 * 16 + 0 * 64, 1 * 16 + 2 * 64, 3 * 16 + 2 * 64, + 0 * 16 + 1 * 64, 2 * 16 + 1 * 64, 0 * 16 + 3 * 64, 2 * 16 + 3 * 64, + 1 * 16 + 1 * 64, 3 * 16 + 1 * 64, 1 * 16 + 3 * 64, 3 * 16 + 3 * 64, }; -static const uint8_t chroma422_dc_scan[8]={ - (0+0*2)*16, (0+1*2)*16, - (1+0*2)*16, (0+2*2)*16, - (0+3*2)*16, (1+1*2)*16, - (1+2*2)*16, (1+3*2)*16, +static const uint8_t chroma_dc_scan[4] = { + (0 + 0 * 2) * 16, (1 + 0 * 2) * 16, + (0 + 1 * 2) * 16, (1 + 1 * 2) * 16, +}; + +static const uint8_t chroma422_dc_scan[8] = { + (0 + 0 * 2) * 16, (0 + 1 * 2) * 16, + (1 + 0 * 2) * 16, (0 + 2 * 2) * 16, + (0 + 3 * 2) * 16, (1 + 1 * 2) * 16, + (1 + 2 * 2) * 16, (1 + 3 * 2) * 16, }; // zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)] -static const uint8_t zigzag_scan8x8_cavlc[64]={ - 0+0*8, 1+1*8, 1+2*8, 2+2*8, - 4+1*8, 0+5*8, 3+3*8, 7+0*8, - 3+4*8, 1+7*8, 5+3*8, 6+3*8, - 2+7*8, 6+4*8, 5+6*8, 7+5*8, - 1+0*8, 2+0*8, 0+3*8, 3+1*8, - 3+2*8, 0+6*8, 4+2*8, 6+1*8, - 2+5*8, 2+6*8, 6+2*8, 5+4*8, - 3+7*8, 7+3*8, 4+7*8, 7+6*8, - 0+1*8, 3+0*8, 0+4*8, 4+0*8, - 2+3*8, 1+5*8, 5+1*8, 5+2*8, - 1+6*8, 3+5*8, 7+1*8, 4+5*8, - 4+6*8, 7+4*8, 5+7*8, 6+7*8, - 0+2*8, 2+1*8, 1+3*8, 5+0*8, - 1+4*8, 2+4*8, 6+0*8, 4+3*8, - 0+7*8, 4+4*8, 7+2*8, 3+6*8, - 5+5*8, 6+5*8, 6+6*8, 7+7*8, +static const uint8_t zigzag_scan8x8_cavlc[64] = { + 0 + 0 * 8, 1 + 1 * 8, 1 + 2 * 8, 2 + 2 * 8, + 4 + 1 * 8, 0 + 5 * 8, 3 + 3 * 8, 7 + 0 * 8, + 3 + 4 * 8, 1 + 7 * 8, 5 + 3 * 8, 6 + 3 * 8, + 2 + 7 * 8, 6 + 4 * 8, 5 + 6 * 8, 7 + 5 * 8, + 1 + 0 * 8, 2 + 0 * 8, 0 + 3 * 8, 3 + 1 * 8, + 3 + 2 * 8, 0 + 6 * 8, 4 + 2 * 8, 6 + 1 * 8, + 2 + 5 * 8, 2 + 6 * 8, 6 + 2 * 8, 5 + 4 * 8, + 3 + 7 * 8, 7 + 3 * 8, 4 + 7 * 8, 7 + 6 * 8, + 0 + 1 * 8, 3 + 0 * 8, 0 + 4 * 8, 4 + 0 * 8, + 2 + 3 * 8, 1 + 5 * 8, 5 + 1 * 8, 5 + 2 * 8, + 1 + 6 * 8, 3 + 5 * 8, 7 + 1 * 8, 4 + 5 * 8, + 4 + 6 * 8, 7 + 4 * 8, 5 + 7 * 8, 6 + 7 * 8, + 0 + 2 * 8, 2 + 1 * 8, 1 + 3 * 8, 5 + 0 * 8, + 1 + 4 * 8, 2 + 4 * 8, 6 + 0 * 8, 4 + 3 * 8, + 0 + 7 * 8, 4 + 4 * 8, 7 + 2 * 8, 3 + 6 * 8, + 5 + 5 * 8, 6 + 5 * 8, 6 + 6 * 8, 7 + 7 * 8, }; -static const uint8_t field_scan8x8[64]={ - 0+0*8, 0+1*8, 0+2*8, 1+0*8, - 1+1*8, 0+3*8, 0+4*8, 1+2*8, - 2+0*8, 1+3*8, 0+5*8, 0+6*8, - 0+7*8, 1+4*8, 2+1*8, 3+0*8, - 2+2*8, 1+5*8, 1+6*8, 1+7*8, - 2+3*8, 3+1*8, 4+0*8, 3+2*8, - 2+4*8, 2+5*8, 2+6*8, 2+7*8, - 3+3*8, 4+1*8, 5+0*8, 4+2*8, - 3+4*8, 3+5*8, 3+6*8, 3+7*8, - 4+3*8, 5+1*8, 6+0*8, 5+2*8, - 4+4*8, 4+5*8, 4+6*8, 4+7*8, - 5+3*8, 6+1*8, 6+2*8, 5+4*8, - 5+5*8, 5+6*8, 5+7*8, 6+3*8, - 7+0*8, 7+1*8, 6+4*8, 6+5*8, - 6+6*8, 6+7*8, 7+2*8, 7+3*8, - 7+4*8, 7+5*8, 7+6*8, 7+7*8, +static const uint8_t field_scan8x8[64] = { + 0 + 0 * 8, 0 + 1 * 8, 0 + 2 * 8, 1 + 0 * 8, + 1 + 1 * 8, 0 + 3 * 8, 0 + 4 * 8, 1 + 2 * 8, + 2 + 0 * 8, 1 + 3 * 8, 0 + 5 * 8, 0 + 6 * 8, + 0 + 7 * 8, 1 + 4 * 8, 2 + 1 * 8, 3 + 0 * 8, + 2 + 2 * 8, 1 + 5 * 8, 1 + 6 * 8, 1 + 7 * 8, + 2 + 3 * 8, 3 + 1 * 8, 4 + 0 * 8, 3 + 2 * 8, + 2 + 4 * 8, 2 + 5 * 8, 2 + 6 * 8, 2 + 7 * 8, + 3 + 3 * 8, 4 + 1 * 8, 5 + 0 * 8, 4 + 2 * 8, + 3 + 4 * 8, 3 + 5 * 8, 3 + 6 * 8, 3 + 7 * 8, + 4 + 3 * 8, 5 + 1 * 8, 6 + 0 * 8, 5 + 2 * 8, + 4 + 4 * 8, 4 + 5 * 8, 4 + 6 * 8, 4 + 7 * 8, + 5 + 3 * 8, 6 + 1 * 8, 6 + 2 * 8, 5 + 4 * 8, + 5 + 5 * 8, 5 + 6 * 8, 5 + 7 * 8, 6 + 3 * 8, + 7 + 0 * 8, 7 + 1 * 8, 6 + 4 * 8, 6 + 5 * 8, + 6 + 6 * 8, 6 + 7 * 8, 7 + 2 * 8, 7 + 3 * 8, + 7 + 4 * 8, 7 + 5 * 8, 7 + 6 * 8, 7 + 7 * 8, }; -static const uint8_t field_scan8x8_cavlc[64]={ - 0+0*8, 1+1*8, 2+0*8, 0+7*8, - 2+2*8, 2+3*8, 2+4*8, 3+3*8, - 3+4*8, 4+3*8, 4+4*8, 5+3*8, - 5+5*8, 7+0*8, 6+6*8, 7+4*8, - 0+1*8, 0+3*8, 1+3*8, 1+4*8, - 1+5*8, 3+1*8, 2+5*8, 4+1*8, - 3+5*8, 5+1*8, 4+5*8, 6+1*8, - 5+6*8, 7+1*8, 6+7*8, 7+5*8, - 0+2*8, 0+4*8, 0+5*8, 2+1*8, - 1+6*8, 4+0*8, 2+6*8, 5+0*8, - 3+6*8, 6+0*8, 4+6*8, 6+2*8, - 5+7*8, 6+4*8, 7+2*8, 7+6*8, - 1+0*8, 1+2*8, 0+6*8, 3+0*8, - 1+7*8, 3+2*8, 2+7*8, 4+2*8, - 3+7*8, 5+2*8, 4+7*8, 5+4*8, - 6+3*8, 6+5*8, 7+3*8, 7+7*8, +static const uint8_t field_scan8x8_cavlc[64] = { + 0 + 0 * 8, 1 + 1 * 8, 2 + 0 * 8, 0 + 7 * 8, + 2 + 2 * 8, 2 + 3 * 8, 2 + 4 * 8, 3 + 3 * 8, + 3 + 4 * 8, 4 + 3 * 8, 4 + 4 * 8, 5 + 3 * 8, + 5 + 5 * 8, 7 + 0 * 8, 6 + 6 * 8, 7 + 4 * 8, + 0 + 1 * 8, 0 + 3 * 8, 1 + 3 * 8, 1 + 4 * 8, + 1 + 5 * 8, 3 + 1 * 8, 2 + 5 * 8, 4 + 1 * 8, + 3 + 5 * 8, 5 + 1 * 8, 4 + 5 * 8, 6 + 1 * 8, + 5 + 6 * 8, 7 + 1 * 8, 6 + 7 * 8, 7 + 5 * 8, + 0 + 2 * 8, 0 + 4 * 8, 0 + 5 * 8, 2 + 1 * 8, + 1 + 6 * 8, 4 + 0 * 8, 2 + 6 * 8, 5 + 0 * 8, + 3 + 6 * 8, 6 + 0 * 8, 4 + 6 * 8, 6 + 2 * 8, + 5 + 7 * 8, 6 + 4 * 8, 7 + 2 * 8, 7 + 6 * 8, + 1 + 0 * 8, 1 + 2 * 8, 0 + 6 * 8, 3 + 0 * 8, + 1 + 7 * 8, 3 + 2 * 8, 2 + 7 * 8, 4 + 2 * 8, + 3 + 7 * 8, 5 + 2 * 8, 4 + 7 * 8, 5 + 4 * 8, + 6 + 3 * 8, 6 + 5 * 8, 7 + 3 * 8, 7 + 7 * 8, }; -typedef struct IMbInfo{ +typedef struct IMbInfo { uint16_t type; uint8_t pred_mode; uint8_t cbp; } IMbInfo; -static const IMbInfo i_mb_type_info[26]={ -{MB_TYPE_INTRA4x4 , -1, -1}, -{MB_TYPE_INTRA16x16, 2, 0}, -{MB_TYPE_INTRA16x16, 1, 0}, -{MB_TYPE_INTRA16x16, 0, 0}, -{MB_TYPE_INTRA16x16, 3, 0}, -{MB_TYPE_INTRA16x16, 2, 16}, -{MB_TYPE_INTRA16x16, 1, 16}, -{MB_TYPE_INTRA16x16, 0, 16}, -{MB_TYPE_INTRA16x16, 3, 16}, -{MB_TYPE_INTRA16x16, 2, 32}, -{MB_TYPE_INTRA16x16, 1, 32}, -{MB_TYPE_INTRA16x16, 0, 32}, -{MB_TYPE_INTRA16x16, 3, 32}, -{MB_TYPE_INTRA16x16, 2, 15+0}, -{MB_TYPE_INTRA16x16, 1, 15+0}, -{MB_TYPE_INTRA16x16, 0, 15+0}, -{MB_TYPE_INTRA16x16, 3, 15+0}, -{MB_TYPE_INTRA16x16, 2, 15+16}, -{MB_TYPE_INTRA16x16, 1, 15+16}, -{MB_TYPE_INTRA16x16, 0, 15+16}, -{MB_TYPE_INTRA16x16, 3, 15+16}, -{MB_TYPE_INTRA16x16, 2, 15+32}, -{MB_TYPE_INTRA16x16, 1, 15+32}, -{MB_TYPE_INTRA16x16, 0, 15+32}, -{MB_TYPE_INTRA16x16, 3, 15+32}, -{MB_TYPE_INTRA_PCM , -1, -1}, +static const IMbInfo i_mb_type_info[26] = { + { MB_TYPE_INTRA4x4, -1, -1 }, + { MB_TYPE_INTRA16x16, 2, 0 }, + { MB_TYPE_INTRA16x16, 1, 0 }, + { MB_TYPE_INTRA16x16, 0, 0 }, + { MB_TYPE_INTRA16x16, 3, 0 }, + { MB_TYPE_INTRA16x16, 2, 16 }, + { MB_TYPE_INTRA16x16, 1, 16 }, + { MB_TYPE_INTRA16x16, 0, 16 }, + { MB_TYPE_INTRA16x16, 3, 16 }, + { MB_TYPE_INTRA16x16, 2, 32 }, + { MB_TYPE_INTRA16x16, 1, 32 }, + { MB_TYPE_INTRA16x16, 0, 32 }, + { MB_TYPE_INTRA16x16, 3, 32 }, + { MB_TYPE_INTRA16x16, 2, 15 + 0 }, + { MB_TYPE_INTRA16x16, 1, 15 + 0 }, + { MB_TYPE_INTRA16x16, 0, 15 + 0 }, + { MB_TYPE_INTRA16x16, 3, 15 + 0 }, + { MB_TYPE_INTRA16x16, 2, 15 + 16 }, + { MB_TYPE_INTRA16x16, 1, 15 + 16 }, + { MB_TYPE_INTRA16x16, 0, 15 + 16 }, + { MB_TYPE_INTRA16x16, 3, 15 + 16 }, + { MB_TYPE_INTRA16x16, 2, 15 + 32 }, + { MB_TYPE_INTRA16x16, 1, 15 + 32 }, + { MB_TYPE_INTRA16x16, 0, 15 + 32 }, + { MB_TYPE_INTRA16x16, 3, 15 + 32 }, + { MB_TYPE_INTRA_PCM, -1, -1 }, }; -typedef struct PMbInfo{ +typedef struct PMbInfo { uint16_t type; uint8_t partition_count; } PMbInfo; -static const PMbInfo p_mb_type_info[5]={ -{MB_TYPE_16x16|MB_TYPE_P0L0 , 1}, -{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P1L0, 2}, -{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P1L0, 2}, -{MB_TYPE_8x8 |MB_TYPE_P0L0|MB_TYPE_P1L0, 4}, -{MB_TYPE_8x8 |MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_REF0, 4}, +static const PMbInfo p_mb_type_info[5] = { + { MB_TYPE_16x16 | MB_TYPE_P0L0, 1 }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 2 }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 2 }, + { MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 4 }, + { MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_REF0, 4 }, }; -static const PMbInfo p_sub_mb_type_info[4]={ -{MB_TYPE_16x16|MB_TYPE_P0L0 , 1}, -{MB_TYPE_16x8 |MB_TYPE_P0L0 , 2}, -{MB_TYPE_8x16 |MB_TYPE_P0L0 , 2}, -{MB_TYPE_8x8 |MB_TYPE_P0L0 , 4}, +static const PMbInfo p_sub_mb_type_info[4] = { + { MB_TYPE_16x16 | MB_TYPE_P0L0, 1 }, + { MB_TYPE_16x8 | MB_TYPE_P0L0, 2 }, + { MB_TYPE_8x16 | MB_TYPE_P0L0, 2 }, + { MB_TYPE_8x8 | MB_TYPE_P0L0, 4 }, }; -static const PMbInfo b_mb_type_info[23]={ -{MB_TYPE_DIRECT2|MB_TYPE_L0L1 , 1, }, -{MB_TYPE_16x16|MB_TYPE_P0L0 , 1, }, -{MB_TYPE_16x16 |MB_TYPE_P0L1 , 1, }, -{MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1 , 1, }, -{MB_TYPE_16x8 |MB_TYPE_P0L0 |MB_TYPE_P1L0 , 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L0 |MB_TYPE_P1L0 , 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L1 |MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L1 |MB_TYPE_P1L1, 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L0 |MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L0 |MB_TYPE_P1L1, 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L1|MB_TYPE_P1L0 , 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L1|MB_TYPE_P1L0 , 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L0 |MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L0 |MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0 , 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0 , 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1 |MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1 |MB_TYPE_P1L1, 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 4, }, +static const PMbInfo b_mb_type_info[23] = { + { MB_TYPE_DIRECT2 | MB_TYPE_L0L1, 1, }, + { MB_TYPE_16x16 | MB_TYPE_P0L0, 1, }, + { MB_TYPE_16x16 | MB_TYPE_P0L1, 1, }, + { MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1, 1, }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L1 | MB_TYPE_P1L0, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L1 | MB_TYPE_P1L0, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 4, }, }; -static const PMbInfo b_sub_mb_type_info[13]={ -{MB_TYPE_DIRECT2 , 1, }, -{MB_TYPE_16x16|MB_TYPE_P0L0 , 1, }, -{MB_TYPE_16x16 |MB_TYPE_P0L1 , 1, }, -{MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1 , 1, }, -{MB_TYPE_16x8 |MB_TYPE_P0L0 |MB_TYPE_P1L0 , 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L0 |MB_TYPE_P1L0 , 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L1 |MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L1 |MB_TYPE_P1L1, 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x8 |MB_TYPE_P0L0 |MB_TYPE_P1L0 , 4, }, -{MB_TYPE_8x8 |MB_TYPE_P0L1 |MB_TYPE_P1L1, 4, }, -{MB_TYPE_8x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 4, }, +static const PMbInfo b_sub_mb_type_info[13] = { + { MB_TYPE_DIRECT2, 1, }, + { MB_TYPE_16x16 | MB_TYPE_P0L0, 1, }, + { MB_TYPE_16x16 | MB_TYPE_P0L1, 1, }, + { MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1, 1, }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 4, }, + { MB_TYPE_8x8 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 4, }, + { MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 4, }, }; -static const uint8_t dequant4_coeff_init[6][3]={ - {10,13,16}, - {11,14,18}, - {13,16,20}, - {14,18,23}, - {16,20,25}, - {18,23,29}, +static const uint8_t dequant4_coeff_init[6][3] = { + { 10, 13, 16 }, + { 11, 14, 18 }, + { 13, 16, 20 }, + { 14, 18, 23 }, + { 16, 20, 25 }, + { 18, 23, 29 }, }; static const uint8_t dequant8_coeff_init_scan[16] = { - 0,3,4,3, 3,1,5,1, 4,5,2,5, 3,1,5,1 + 0, 3, 4, 3, 3, 1, 5, 1, 4, 5, 2, 5, 3, 1, 5, 1 }; -static const uint8_t dequant8_coeff_init[6][6]={ - {20,18,32,19,25,24}, - {22,19,35,21,28,26}, - {26,23,42,24,33,31}, - {28,25,45,26,35,33}, - {32,28,51,30,40,38}, - {36,32,58,34,46,43}, + +static const uint8_t dequant8_coeff_init[6][6] = { + { 20, 18, 32, 19, 25, 24 }, + { 22, 19, 35, 21, 28, 26 }, + { 26, 23, 42, 24, 33, 31 }, + { 28, 25, 45, 26, 35, 33 }, + { 32, 28, 51, 30, 40, 38 }, + { 36, 32, 58, 34, 46, 43 }, }; #endif /* AVCODEC_H264DATA_H */ diff --git a/libavcodec/h264pred.h b/libavcodec/h264pred.h index b880446121..a964ae394b 100644 --- a/libavcodec/h264pred.h +++ b/libavcodec/h264pred.h @@ -35,18 +35,18 @@ * Prediction types */ //@{ -#define VERT_PRED 0 -#define HOR_PRED 1 -#define DC_PRED 2 -#define DIAG_DOWN_LEFT_PRED 3 -#define DIAG_DOWN_RIGHT_PRED 4 -#define VERT_RIGHT_PRED 5 -#define HOR_DOWN_PRED 6 -#define VERT_LEFT_PRED 7 -#define HOR_UP_PRED 8 +#define VERT_PRED 0 +#define HOR_PRED 1 +#define DC_PRED 2 +#define DIAG_DOWN_LEFT_PRED 3 +#define DIAG_DOWN_RIGHT_PRED 4 +#define VERT_RIGHT_PRED 5 +#define HOR_DOWN_PRED 6 +#define VERT_LEFT_PRED 7 +#define HOR_UP_PRED 8 // DC edge (not for VP8) -#define LEFT_DC_PRED 9 +#define LEFT_DC_PRED 9 #define TOP_DC_PRED 10 #define DC_128_PRED 11 @@ -56,7 +56,7 @@ #define VERT_LEFT_PRED_RV40_NODOWN 14 // VP8 specific -#define TM_VP8_PRED 9 ///< "True Motion", used instead of plane +#define TM_VP8_PRED 9 ///< "True Motion", used instead of plane #define VERT_VP8_PRED 10 ///< for VP8, #VERT_PRED is the average of ///< (left col+cur col x2+right col) / 4; ///< this is the "unaveraged" one @@ -65,44 +65,53 @@ #define DC_127_PRED 12 #define DC_129_PRED 13 -#define DC_PRED8x8 0 -#define HOR_PRED8x8 1 -#define VERT_PRED8x8 2 -#define PLANE_PRED8x8 3 +#define DC_PRED8x8 0 +#define HOR_PRED8x8 1 +#define VERT_PRED8x8 2 +#define PLANE_PRED8x8 3 // DC edge -#define LEFT_DC_PRED8x8 4 -#define TOP_DC_PRED8x8 5 -#define DC_128_PRED8x8 6 +#define LEFT_DC_PRED8x8 4 +#define TOP_DC_PRED8x8 5 +#define DC_128_PRED8x8 6 // H264/SVQ3 (8x8) specific -#define ALZHEIMER_DC_L0T_PRED8x8 7 -#define ALZHEIMER_DC_0LT_PRED8x8 8 -#define ALZHEIMER_DC_L00_PRED8x8 9 +#define ALZHEIMER_DC_L0T_PRED8x8 7 +#define ALZHEIMER_DC_0LT_PRED8x8 8 +#define ALZHEIMER_DC_L00_PRED8x8 9 #define ALZHEIMER_DC_0L0_PRED8x8 10 // VP8 specific -#define DC_127_PRED8x8 7 -#define DC_129_PRED8x8 8 +#define DC_127_PRED8x8 7 +#define DC_129_PRED8x8 8 //@} /** * Context for storing H.264 prediction functions */ -typedef struct H264PredContext{ - void (*pred4x4 [9+3+3])(uint8_t *src, const uint8_t *topright, int stride);//FIXME move to dsp? - void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride); - void (*pred8x8 [4+3+4])(uint8_t *src, int stride); - void (*pred16x16[4+3+2])(uint8_t *src, int stride); +typedef struct H264PredContext { + void(*pred4x4[9 + 3 + 3])(uint8_t *src, const uint8_t *topright, int stride); //FIXME move to dsp? + void(*pred8x8l[9 + 3])(uint8_t *src, int topleft, int topright, int stride); + void(*pred8x8[4 + 3 + 4])(uint8_t *src, int stride); + void(*pred16x16[4 + 3 + 2])(uint8_t *src, int stride); - void (*pred4x4_add [2])(uint8_t *pix/*align 4*/, const DCTELEM *block/*align 16*/, int stride); - void (*pred8x8l_add [2])(uint8_t *pix/*align 8*/, const DCTELEM *block/*align 16*/, int stride); - void (*pred8x8_add [3])(uint8_t *pix/*align 8*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride); - void (*pred16x16_add[3])(uint8_t *pix/*align 16*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride); -}H264PredContext; + void(*pred4x4_add[2])(uint8_t *pix /*align 4*/, + const DCTELEM *block /*align 16*/, int stride); + void(*pred8x8l_add[2])(uint8_t *pix /*align 8*/, + const DCTELEM *block /*align 16*/, int stride); + void(*pred8x8_add[3])(uint8_t *pix /*align 8*/, + const int *block_offset, + const DCTELEM *block /*align 16*/, int stride); + void(*pred16x16_add[3])(uint8_t *pix /*align 16*/, + const int *block_offset, + const DCTELEM *block /*align 16*/, int stride); +} H264PredContext; -void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc); -void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc); -void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc); +void ff_h264_pred_init(H264PredContext *h, int codec_id, + const int bit_depth, const int chroma_format_idc); +void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, + const int bit_depth, const int chroma_format_idc); +void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, + const int bit_depth, const int chroma_format_idc); #endif /* AVCODEC_H264PRED_H */ From 1de53d006b754c8ecab2f31a223acfaea15924f4 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Mon, 7 May 2012 14:13:57 +0200 Subject: [PATCH 22/25] h264: K&R formatting cosmetics for header files (part II/II) --- libavcodec/h264_mvpred.h | 995 ++++++++++++++++++++------------------- libavcodec/h264dsp.h | 96 ++-- 2 files changed, 584 insertions(+), 507 deletions(-) diff --git a/libavcodec/h264_mvpred.h b/libavcodec/h264_mvpred.h index 2bd4458f0b..5244c290f2 100644 --- a/libavcodec/h264_mvpred.h +++ b/libavcodec/h264_mvpred.h @@ -35,53 +35,53 @@ //#undef NDEBUG #include -static av_always_inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){ - const int topright_ref= h->ref_cache[list][ i - 8 + part_width ]; - MpegEncContext *s = &h->s; +static av_always_inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, + int i, int list, int part_width) +{ + const int topright_ref = h->ref_cache[list][i - 8 + part_width]; + MpegEncContext *s = &h->s; /* there is no consistent mapping of mvs to neighboring locations that will * make mbaff happy, so we can't move all this logic to fill_caches */ - if(FRAME_MBAFF){ + if (FRAME_MBAFF) { +#define SET_DIAG_MV(MV_OP, REF_OP, XY, Y4) \ + const int xy = XY, y4 = Y4; \ + const int mb_type = mb_types[xy + (y4 >> 2) * s->mb_stride]; \ + if (!USES_LIST(mb_type, list)) \ + return LIST_NOT_USED; \ + mv = s->current_picture_ptr->f.motion_val[list][h->mb2b_xy[xy] + 3 + y4 * h->b_stride]; \ + h->mv_cache[list][scan8[0] - 2][0] = mv[0]; \ + h->mv_cache[list][scan8[0] - 2][1] = mv[1] MV_OP; \ + return s->current_picture_ptr->f.ref_index[list][4 * xy + 1 + (y4 & ~1)] REF_OP; -#define SET_DIAG_MV(MV_OP, REF_OP, XY, Y4)\ - const int xy = XY, y4 = Y4;\ - const int mb_type = mb_types[xy+(y4>>2)*s->mb_stride];\ - if(!USES_LIST(mb_type,list))\ - return LIST_NOT_USED;\ - mv = s->current_picture_ptr->f.motion_val[list][h->mb2b_xy[xy] + 3 + y4*h->b_stride];\ - h->mv_cache[list][scan8[0]-2][0] = mv[0];\ - h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\ - return s->current_picture_ptr->f.ref_index[list][4*xy + 1 + (y4 & ~1)] REF_OP; - - if(topright_ref == PART_NOT_AVAILABLE - && i >= scan8[0]+8 && (i&7)==4 - && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){ + if (topright_ref == PART_NOT_AVAILABLE + && i >= scan8[0] + 8 && (i & 7) == 4 + && h->ref_cache[list][scan8[0] - 1] != PART_NOT_AVAILABLE) { const uint32_t *mb_types = s->current_picture_ptr->f.mb_type; const int16_t *mv; - AV_ZERO32(h->mv_cache[list][scan8[0]-2]); - *C = h->mv_cache[list][scan8[0]-2]; + AV_ZERO32(h->mv_cache[list][scan8[0] - 2]); + *C = h->mv_cache[list][scan8[0] - 2]; - if(!MB_FIELD - && IS_INTERLACED(h->left_type[0])){ - SET_DIAG_MV(*2, >>1, h->left_mb_xy[0]+s->mb_stride, (s->mb_y&1)*2+(i>>5)); + if (!MB_FIELD && IS_INTERLACED(h->left_type[0])) { + SET_DIAG_MV(* 2, >> 1, h->left_mb_xy[0] + s->mb_stride, + (s->mb_y & 1) * 2 + (i >> 5)); } - if(MB_FIELD - && !IS_INTERLACED(h->left_type[0])){ + if (MB_FIELD && !IS_INTERLACED(h->left_type[0])) { // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK. - SET_DIAG_MV(/2, <<1, h->left_mb_xy[i>=36], ((i>>2))&3); + SET_DIAG_MV(/ 2, << 1, h->left_mb_xy[i >= 36], ((i >> 2)) & 3); } } #undef SET_DIAG_MV } - if(topright_ref != PART_NOT_AVAILABLE){ - *C= h->mv_cache[list][ i - 8 + part_width ]; + if (topright_ref != PART_NOT_AVAILABLE) { + *C = h->mv_cache[list][i - 8 + part_width]; return topright_ref; - }else{ + } else { tprintf(s->avctx, "topright MV not available\n"); - *C= h->mv_cache[list][ i - 8 - 1 ]; - return h->ref_cache[list][ i - 8 - 1 ]; + *C = h->mv_cache[list][i - 8 - 1]; + return h->ref_cache[list][i - 8 - 1]; } } @@ -92,53 +92,61 @@ static av_always_inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, * @param mx the x component of the predicted motion vector * @param my the y component of the predicted motion vector */ -static av_always_inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){ - const int index8= scan8[n]; - const int top_ref= h->ref_cache[list][ index8 - 8 ]; - const int left_ref= h->ref_cache[list][ index8 - 1 ]; - const int16_t * const A= h->mv_cache[list][ index8 - 1 ]; - const int16_t * const B= h->mv_cache[list][ index8 - 8 ]; - const int16_t * C; +static av_always_inline void pred_motion(H264Context *const h, int n, + int part_width, int list, int ref, + int *const mx, int *const my) +{ + const int index8 = scan8[n]; + const int top_ref = h->ref_cache[list][index8 - 8]; + const int left_ref = h->ref_cache[list][index8 - 1]; + const int16_t *const A = h->mv_cache[list][index8 - 1]; + const int16_t *const B = h->mv_cache[list][index8 - 8]; + const int16_t *C; int diagonal_ref, match_count; - assert(part_width==1 || part_width==2 || part_width==4); + assert(part_width == 1 || part_width == 2 || part_width == 4); /* mv_cache - B . . A T T T T - U . . L . . , . - U . . L . . . . - U . . L . . , . - . . . L . . . . -*/ + * B . . A T T T T + * U . . L . . , . + * U . . L . . . . + * U . . L . . , . + * . . . L . . . . + */ - diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width); - match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref); + diagonal_ref = fetch_diagonal_mv(h, &C, index8, list, part_width); + match_count = (diagonal_ref == ref) + (top_ref == ref) + (left_ref == ref); tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count); - if(match_count > 1){ //most common - *mx= mid_pred(A[0], B[0], C[0]); - *my= mid_pred(A[1], B[1], C[1]); - }else if(match_count==1){ - if(left_ref==ref){ - *mx= A[0]; - *my= A[1]; - }else if(top_ref==ref){ - *mx= B[0]; - *my= B[1]; - }else{ - *mx= C[0]; - *my= C[1]; + if (match_count > 1) { //most common + *mx = mid_pred(A[0], B[0], C[0]); + *my = mid_pred(A[1], B[1], C[1]); + } else if (match_count == 1) { + if (left_ref == ref) { + *mx = A[0]; + *my = A[1]; + } else if (top_ref == ref) { + *mx = B[0]; + *my = B[1]; + } else { + *mx = C[0]; + *my = C[1]; } - }else{ - if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){ - *mx= A[0]; - *my= A[1]; - }else{ - *mx= mid_pred(A[0], B[0], C[0]); - *my= mid_pred(A[1], B[1], C[1]); + } else { + if (top_ref == PART_NOT_AVAILABLE && + diagonal_ref == PART_NOT_AVAILABLE && + left_ref != PART_NOT_AVAILABLE) { + *mx = A[0]; + *my = A[1]; + } else { + *mx = mid_pred(A[0], B[0], C[0]); + *my = mid_pred(A[1], B[1], C[1]); } } - tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list); + tprintf(h->s.avctx, + "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", + top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, + A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list); } /** @@ -147,27 +155,32 @@ static av_always_inline void pred_motion(H264Context * const h, int n, int part_ * @param mx the x component of the predicted motion vector * @param my the y component of the predicted motion vector */ -static av_always_inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){ - if(n==0){ - const int top_ref= h->ref_cache[list][ scan8[0] - 8 ]; - const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ]; +static av_always_inline void pred_16x8_motion(H264Context *const h, + int n, int list, int ref, + int *const mx, int *const my) +{ + if (n == 0) { + const int top_ref = h->ref_cache[list][scan8[0] - 8]; + const int16_t *const B = h->mv_cache[list][scan8[0] - 8]; - tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list); + tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", + top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list); - if(top_ref == ref){ - *mx= B[0]; - *my= B[1]; + if (top_ref == ref) { + *mx = B[0]; + *my = B[1]; return; } - }else{ - const int left_ref= h->ref_cache[list][ scan8[8] - 1 ]; - const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ]; + } else { + const int left_ref = h->ref_cache[list][scan8[8] - 1]; + const int16_t *const A = h->mv_cache[list][scan8[8] - 1]; - tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); + tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", + left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); - if(left_ref == ref){ - *mx= A[0]; - *my= A[1]; + if (left_ref == ref) { + *mx = A[0]; + *my = A[1]; return; } } @@ -182,29 +195,34 @@ static av_always_inline void pred_16x8_motion(H264Context * const h, int n, int * @param mx the x component of the predicted motion vector * @param my the y component of the predicted motion vector */ -static av_always_inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){ - if(n==0){ - const int left_ref= h->ref_cache[list][ scan8[0] - 1 ]; - const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ]; +static av_always_inline void pred_8x16_motion(H264Context *const h, + int n, int list, int ref, + int *const mx, int *const my) +{ + if (n == 0) { + const int left_ref = h->ref_cache[list][scan8[0] - 1]; + const int16_t *const A = h->mv_cache[list][scan8[0] - 1]; - tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); + tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", + left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); - if(left_ref == ref){ - *mx= A[0]; - *my= A[1]; + if (left_ref == ref) { + *mx = A[0]; + *my = A[1]; return; } - }else{ - const int16_t * C; + } else { + const int16_t *C; int diagonal_ref; - diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2); + diagonal_ref = fetch_diagonal_mv(h, &C, scan8[4], list, 2); - tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list); + tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", + diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list); - if(diagonal_ref == ref){ - *mx= C[0]; - *my= C[1]; + if (diagonal_ref == ref) { + *mx = C[0]; + *my = C[1]; return; } } @@ -213,168 +231,174 @@ static av_always_inline void pred_8x16_motion(H264Context * const h, int n, int pred_motion(h, n, 2, list, ref, mx, my); } -#define FIX_MV_MBAFF(type, refn, mvn, idx)\ - if(FRAME_MBAFF){\ - if(MB_FIELD){\ - if(!IS_INTERLACED(type)){\ - refn <<= 1;\ - AV_COPY32(mvbuf[idx], mvn);\ - mvbuf[idx][1] /= 2;\ - mvn = mvbuf[idx];\ - }\ - }else{\ - if(IS_INTERLACED(type)){\ - refn >>= 1;\ - AV_COPY32(mvbuf[idx], mvn);\ - mvbuf[idx][1] <<= 1;\ - mvn = mvbuf[idx];\ - }\ - }\ +#define FIX_MV_MBAFF(type, refn, mvn, idx) \ + if (FRAME_MBAFF) { \ + if (MB_FIELD) { \ + if (!IS_INTERLACED(type)) { \ + refn <<= 1; \ + AV_COPY32(mvbuf[idx], mvn); \ + mvbuf[idx][1] /= 2; \ + mvn = mvbuf[idx]; \ + } \ + } else { \ + if (IS_INTERLACED(type)) { \ + refn >>= 1; \ + AV_COPY32(mvbuf[idx], mvn); \ + mvbuf[idx][1] <<= 1; \ + mvn = mvbuf[idx]; \ + } \ + } \ } -static av_always_inline void pred_pskip_motion(H264Context * const h){ - DECLARE_ALIGNED(4, static const int16_t, zeromv)[2] = {0}; +static av_always_inline void pred_pskip_motion(H264Context *const h) +{ + DECLARE_ALIGNED(4, static const int16_t, zeromv)[2] = { 0 }; DECLARE_ALIGNED(4, int16_t, mvbuf)[3][2]; - MpegEncContext * const s = &h->s; - int8_t *ref = s->current_picture.f.ref_index[0]; - int16_t (*mv)[2] = s->current_picture.f.motion_val[0]; + MpegEncContext *const s = &h->s; + int8_t *ref = s->current_picture.f.ref_index[0]; + int16_t(*mv)[2] = s->current_picture.f.motion_val[0]; int top_ref, left_ref, diagonal_ref, match_count, mx, my; const int16_t *A, *B, *C; int b_stride = h->b_stride; fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1); - /* To avoid doing an entire fill_decode_caches, we inline the relevant parts here. - * FIXME: this is a partial duplicate of the logic in fill_decode_caches, but it's - * faster this way. Is there a way to avoid this duplication? + /* To avoid doing an entire fill_decode_caches, we inline the relevant + * parts here. + * FIXME: this is a partial duplicate of the logic in fill_decode_caches, + * but it's faster this way. Is there a way to avoid this duplication? */ - if(USES_LIST(h->left_type[LTOP], 0)){ - left_ref = ref[4*h->left_mb_xy[LTOP] + 1 + (h->left_block[0]&~1)]; - A = mv[h->mb2b_xy[h->left_mb_xy[LTOP]] + 3 + b_stride*h->left_block[0]]; + if (USES_LIST(h->left_type[LTOP], 0)) { + left_ref = ref[4 * h->left_mb_xy[LTOP] + 1 + (h->left_block[0] & ~1)]; + A = mv[h->mb2b_xy[h->left_mb_xy[LTOP]] + 3 + b_stride * h->left_block[0]]; FIX_MV_MBAFF(h->left_type[LTOP], left_ref, A, 0); - if(!(left_ref | AV_RN32A(A))){ + if (!(left_ref | AV_RN32A(A))) goto zeromv; - } - }else if(h->left_type[LTOP]){ + } else if (h->left_type[LTOP]) { left_ref = LIST_NOT_USED; - A = zeromv; - }else{ + A = zeromv; + } else { goto zeromv; } - if(USES_LIST(h->top_type, 0)){ - top_ref = ref[4*h->top_mb_xy + 2]; - B = mv[h->mb2b_xy[h->top_mb_xy] + 3*b_stride]; + if (USES_LIST(h->top_type, 0)) { + top_ref = ref[4 * h->top_mb_xy + 2]; + B = mv[h->mb2b_xy[h->top_mb_xy] + 3 * b_stride]; FIX_MV_MBAFF(h->top_type, top_ref, B, 1); - if(!(top_ref | AV_RN32A(B))){ + if (!(top_ref | AV_RN32A(B))) goto zeromv; - } - }else if(h->top_type){ + } else if (h->top_type) { top_ref = LIST_NOT_USED; - B = zeromv; - }else{ + B = zeromv; + } else { goto zeromv; } - tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y); + tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", + top_ref, left_ref, h->s.mb_x, h->s.mb_y); - if(USES_LIST(h->topright_type, 0)){ - diagonal_ref = ref[4*h->topright_mb_xy + 2]; - C = mv[h->mb2b_xy[h->topright_mb_xy] + 3*b_stride]; + if (USES_LIST(h->topright_type, 0)) { + diagonal_ref = ref[4 * h->topright_mb_xy + 2]; + C = mv[h->mb2b_xy[h->topright_mb_xy] + 3 * b_stride]; FIX_MV_MBAFF(h->topright_type, diagonal_ref, C, 2); - }else if(h->topright_type){ + } else if (h->topright_type) { diagonal_ref = LIST_NOT_USED; C = zeromv; - }else{ - if(USES_LIST(h->topleft_type, 0)){ - diagonal_ref = ref[4*h->topleft_mb_xy + 1 + (h->topleft_partition & 2)]; - C = mv[h->mb2b_xy[h->topleft_mb_xy] + 3 + b_stride + (h->topleft_partition & 2*b_stride)]; + } else { + if (USES_LIST(h->topleft_type, 0)) { + diagonal_ref = ref[4 * h->topleft_mb_xy + 1 + + (h->topleft_partition & 2)]; + C = mv[h->mb2b_xy[h->topleft_mb_xy] + 3 + b_stride + + (h->topleft_partition & 2 * b_stride)]; FIX_MV_MBAFF(h->topleft_type, diagonal_ref, C, 2); - }else if(h->topleft_type){ + } else if (h->topleft_type) { diagonal_ref = LIST_NOT_USED; - C = zeromv; - }else{ + C = zeromv; + } else { diagonal_ref = PART_NOT_AVAILABLE; - C = zeromv; + C = zeromv; } } - match_count= !diagonal_ref + !top_ref + !left_ref; + match_count = !diagonal_ref + !top_ref + !left_ref; tprintf(h->s.avctx, "pred_pskip_motion match_count=%d\n", match_count); - if(match_count > 1){ + if (match_count > 1) { mx = mid_pred(A[0], B[0], C[0]); my = mid_pred(A[1], B[1], C[1]); - }else if(match_count==1){ - if(!left_ref){ + } else if (match_count == 1) { + if (!left_ref) { mx = A[0]; my = A[1]; - }else if(!top_ref){ + } else if (!top_ref) { mx = B[0]; my = B[1]; - }else{ + } else { mx = C[0]; my = C[1]; } - }else{ + } else { mx = mid_pred(A[0], B[0], C[0]); my = mid_pred(A[1], B[1], C[1]); } - fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4); + fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx, my), 4); return; + zeromv: - fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4); + fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4); return; } -static void fill_decode_neighbors(H264Context *h, int mb_type){ - MpegEncContext * const s = &h->s; - const int mb_xy= h->mb_xy; +static void fill_decode_neighbors(H264Context *h, int mb_type) +{ + MpegEncContext *const s = &h->s; + const int mb_xy = h->mb_xy; int topleft_xy, top_xy, topright_xy, left_xy[LEFT_MBS]; - static const uint8_t left_block_options[4][32]={ - {0,1,2,3,7,10,8,11,3+0*4, 3+1*4, 3+2*4, 3+3*4, 1+4*4, 1+8*4, 1+5*4, 1+9*4}, - {2,2,3,3,8,11,8,11,3+2*4, 3+2*4, 3+3*4, 3+3*4, 1+5*4, 1+9*4, 1+5*4, 1+9*4}, - {0,0,1,1,7,10,7,10,3+0*4, 3+0*4, 3+1*4, 3+1*4, 1+4*4, 1+8*4, 1+4*4, 1+8*4}, - {0,2,0,2,7,10,7,10,3+0*4, 3+2*4, 3+0*4, 3+2*4, 1+4*4, 1+8*4, 1+4*4, 1+8*4} + static const uint8_t left_block_options[4][32] = { + { 0, 1, 2, 3, 7, 10, 8, 11, 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4, 3 + 3 * 4, 1 + 4 * 4, 1 + 8 * 4, 1 + 5 * 4, 1 + 9 * 4 }, + { 2, 2, 3, 3, 8, 11, 8, 11, 3 + 2 * 4, 3 + 2 * 4, 3 + 3 * 4, 3 + 3 * 4, 1 + 5 * 4, 1 + 9 * 4, 1 + 5 * 4, 1 + 9 * 4 }, + { 0, 0, 1, 1, 7, 10, 7, 10, 3 + 0 * 4, 3 + 0 * 4, 3 + 1 * 4, 3 + 1 * 4, 1 + 4 * 4, 1 + 8 * 4, 1 + 4 * 4, 1 + 8 * 4 }, + { 0, 2, 0, 2, 7, 10, 7, 10, 3 + 0 * 4, 3 + 2 * 4, 3 + 0 * 4, 3 + 2 * 4, 1 + 4 * 4, 1 + 8 * 4, 1 + 4 * 4, 1 + 8 * 4 } }; - h->topleft_partition= -1; + h->topleft_partition = -1; - top_xy = mb_xy - (s->mb_stride << MB_FIELD); + top_xy = mb_xy - (s->mb_stride << MB_FIELD); /* Wow, what a mess, why didn't they simplify the interlacing & intra * stuff, I can't imagine that these complex rules are worth it. */ - topleft_xy = top_xy - 1; - topright_xy= top_xy + 1; - left_xy[LBOT] = left_xy[LTOP] = mb_xy-1; + topleft_xy = top_xy - 1; + topright_xy = top_xy + 1; + left_xy[LBOT] = left_xy[LTOP] = mb_xy - 1; h->left_block = left_block_options[0]; - if(FRAME_MBAFF){ + if (FRAME_MBAFF) { const int left_mb_field_flag = IS_INTERLACED(s->current_picture.f.mb_type[mb_xy - 1]); const int curr_mb_field_flag = IS_INTERLACED(mb_type); - if(s->mb_y&1){ + if (s->mb_y & 1) { if (left_mb_field_flag != curr_mb_field_flag) { left_xy[LBOT] = left_xy[LTOP] = mb_xy - s->mb_stride - 1; if (curr_mb_field_flag) { left_xy[LBOT] += s->mb_stride; - h->left_block = left_block_options[3]; + h->left_block = left_block_options[3]; } else { topleft_xy += s->mb_stride; - // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition + /* take top left mv from the middle of the mb, as opposed + * to all other modes which use the bottom right partition */ h->topleft_partition = 0; - h->left_block = left_block_options[1]; + h->left_block = left_block_options[1]; } } - }else{ - if(curr_mb_field_flag){ + } else { + if (curr_mb_field_flag) { topleft_xy += s->mb_stride & (((s->current_picture.f.mb_type[top_xy - 1] >> 7) & 1) - 1); topright_xy += s->mb_stride & (((s->current_picture.f.mb_type[top_xy + 1] >> 7) & 1) - 1); - top_xy += s->mb_stride & (((s->current_picture.f.mb_type[top_xy ] >> 7) & 1) - 1); + top_xy += s->mb_stride & (((s->current_picture.f.mb_type[top_xy] >> 7) & 1) - 1); } if (left_mb_field_flag != curr_mb_field_flag) { if (curr_mb_field_flag) { left_xy[LBOT] += s->mb_stride; - h->left_block = left_block_options[3]; + h->left_block = left_block_options[3]; } else { h->left_block = left_block_options[2]; } @@ -382,9 +406,9 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){ } } - h->topleft_mb_xy = topleft_xy; - h->top_mb_xy = top_xy; - h->topright_mb_xy= topright_xy; + h->topleft_mb_xy = topleft_xy; + h->top_mb_xy = top_xy; + h->topright_mb_xy = topright_xy; h->left_mb_xy[LTOP] = left_xy[LTOP]; h->left_mb_xy[LBOT] = left_xy[LBOT]; //FIXME do we need all in the context? @@ -395,351 +419,372 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){ h->left_type[LTOP] = s->current_picture.f.mb_type[left_xy[LTOP]]; h->left_type[LBOT] = s->current_picture.f.mb_type[left_xy[LBOT]]; - if(FMO){ - if(h->slice_table[topleft_xy ] != h->slice_num) h->topleft_type = 0; - if(h->slice_table[top_xy ] != h->slice_num) h->top_type = 0; - if(h->slice_table[left_xy[LTOP] ] != h->slice_num) h->left_type[LTOP] = h->left_type[LBOT] = 0; - }else{ - if(h->slice_table[topleft_xy ] != h->slice_num){ + if (FMO) { + if (h->slice_table[topleft_xy] != h->slice_num) h->topleft_type = 0; - if(h->slice_table[top_xy ] != h->slice_num) h->top_type = 0; - if(h->slice_table[left_xy[LTOP] ] != h->slice_num) h->left_type[LTOP] = h->left_type[LBOT] = 0; + if (h->slice_table[top_xy] != h->slice_num) + h->top_type = 0; + if (h->slice_table[left_xy[LTOP]] != h->slice_num) + h->left_type[LTOP] = h->left_type[LBOT] = 0; + } else { + if (h->slice_table[topleft_xy] != h->slice_num) { + h->topleft_type = 0; + if (h->slice_table[top_xy] != h->slice_num) + h->top_type = 0; + if (h->slice_table[left_xy[LTOP]] != h->slice_num) + h->left_type[LTOP] = h->left_type[LBOT] = 0; } } - if(h->slice_table[topright_xy] != h->slice_num) h->topright_type= 0; + if (h->slice_table[topright_xy] != h->slice_num) + h->topright_type = 0; } -static void fill_decode_caches(H264Context *h, int mb_type){ - MpegEncContext * const s = &h->s; +static void fill_decode_caches(H264Context *h, int mb_type) +{ + MpegEncContext *const s = &h->s; int topleft_xy, top_xy, topright_xy, left_xy[LEFT_MBS]; int topleft_type, top_type, topright_type, left_type[LEFT_MBS]; - const uint8_t * left_block= h->left_block; + const uint8_t *left_block = h->left_block; int i; uint8_t *nnz; uint8_t *nnz_cache; - topleft_xy = h->topleft_mb_xy; - top_xy = h->top_mb_xy; - topright_xy = h->topright_mb_xy; - left_xy[LTOP] = h->left_mb_xy[LTOP]; - left_xy[LBOT] = h->left_mb_xy[LBOT]; - topleft_type = h->topleft_type; - top_type = h->top_type; - topright_type = h->topright_type; - left_type[LTOP]= h->left_type[LTOP]; - left_type[LBOT]= h->left_type[LBOT]; + topleft_xy = h->topleft_mb_xy; + top_xy = h->top_mb_xy; + topright_xy = h->topright_mb_xy; + left_xy[LTOP] = h->left_mb_xy[LTOP]; + left_xy[LBOT] = h->left_mb_xy[LBOT]; + topleft_type = h->topleft_type; + top_type = h->top_type; + topright_type = h->topright_type; + left_type[LTOP] = h->left_type[LTOP]; + left_type[LBOT] = h->left_type[LBOT]; - if(!IS_SKIP(mb_type)){ - if(IS_INTRA(mb_type)){ - int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1; - h->topleft_samples_available= - h->top_samples_available= - h->left_samples_available= 0xFFFF; - h->topright_samples_available= 0xEEEA; + if (!IS_SKIP(mb_type)) { + if (IS_INTRA(mb_type)) { + int type_mask = h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1; + h->topleft_samples_available = + h->top_samples_available = + h->left_samples_available = 0xFFFF; + h->topright_samples_available = 0xEEEA; - if(!(top_type & type_mask)){ - h->topleft_samples_available= 0xB3FF; - h->top_samples_available= 0x33FF; - h->topright_samples_available= 0x26EA; + if (!(top_type & type_mask)) { + h->topleft_samples_available = 0xB3FF; + h->top_samples_available = 0x33FF; + h->topright_samples_available = 0x26EA; } - if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[LTOP])){ - if(IS_INTERLACED(mb_type)){ - if(!(left_type[LTOP] & type_mask)){ - h->topleft_samples_available&= 0xDFFF; - h->left_samples_available&= 0x5FFF; + if (IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[LTOP])) { + if (IS_INTERLACED(mb_type)) { + if (!(left_type[LTOP] & type_mask)) { + h->topleft_samples_available &= 0xDFFF; + h->left_samples_available &= 0x5FFF; } - if(!(left_type[LBOT] & type_mask)){ - h->topleft_samples_available&= 0xFF5F; - h->left_samples_available&= 0xFF5F; + if (!(left_type[LBOT] & type_mask)) { + h->topleft_samples_available &= 0xFF5F; + h->left_samples_available &= 0xFF5F; } - }else{ + } else { int left_typei = s->current_picture.f.mb_type[left_xy[LTOP] + s->mb_stride]; assert(left_xy[LTOP] == left_xy[LBOT]); - if(!((left_typei & type_mask) && (left_type[LTOP] & type_mask))){ - h->topleft_samples_available&= 0xDF5F; - h->left_samples_available&= 0x5F5F; + if (!((left_typei & type_mask) && (left_type[LTOP] & type_mask))) { + h->topleft_samples_available &= 0xDF5F; + h->left_samples_available &= 0x5F5F; } } - }else{ - if(!(left_type[LTOP] & type_mask)){ - h->topleft_samples_available&= 0xDF5F; - h->left_samples_available&= 0x5F5F; + } else { + if (!(left_type[LTOP] & type_mask)) { + h->topleft_samples_available &= 0xDF5F; + h->left_samples_available &= 0x5F5F; } } - if(!(topleft_type & type_mask)) - h->topleft_samples_available&= 0x7FFF; + if (!(topleft_type & type_mask)) + h->topleft_samples_available &= 0x7FFF; - if(!(topright_type & type_mask)) - h->topright_samples_available&= 0xFBFF; + if (!(topright_type & type_mask)) + h->topright_samples_available &= 0xFBFF; - if(IS_INTRA4x4(mb_type)){ - if(IS_INTRA4x4(top_type)){ - AV_COPY32(h->intra4x4_pred_mode_cache+4+8*0, h->intra4x4_pred_mode + h->mb2br_xy[top_xy]); - }else{ - h->intra4x4_pred_mode_cache[4+8*0]= - h->intra4x4_pred_mode_cache[5+8*0]= - h->intra4x4_pred_mode_cache[6+8*0]= - h->intra4x4_pred_mode_cache[7+8*0]= 2 - 3*!(top_type & type_mask); + if (IS_INTRA4x4(mb_type)) { + if (IS_INTRA4x4(top_type)) { + AV_COPY32(h->intra4x4_pred_mode_cache + 4 + 8 * 0, h->intra4x4_pred_mode + h->mb2br_xy[top_xy]); + } else { + h->intra4x4_pred_mode_cache[4 + 8 * 0] = + h->intra4x4_pred_mode_cache[5 + 8 * 0] = + h->intra4x4_pred_mode_cache[6 + 8 * 0] = + h->intra4x4_pred_mode_cache[7 + 8 * 0] = 2 - 3 * !(top_type & type_mask); } - for(i=0; i<2; i++){ - if(IS_INTRA4x4(left_type[LEFT(i)])){ - int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[left_xy[LEFT(i)]]; - h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= mode[6-left_block[0+2*i]]; - h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= mode[6-left_block[1+2*i]]; - }else{ - h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= - h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= 2 - 3*!(left_type[LEFT(i)] & type_mask); + for (i = 0; i < 2; i++) { + if (IS_INTRA4x4(left_type[LEFT(i)])) { + int8_t *mode = h->intra4x4_pred_mode + h->mb2br_xy[left_xy[LEFT(i)]]; + h->intra4x4_pred_mode_cache[3 + 8 * 1 + 2 * 8 * i] = mode[6 - left_block[0 + 2 * i]]; + h->intra4x4_pred_mode_cache[3 + 8 * 2 + 2 * 8 * i] = mode[6 - left_block[1 + 2 * i]]; + } else { + h->intra4x4_pred_mode_cache[3 + 8 * 1 + 2 * 8 * i] = + h->intra4x4_pred_mode_cache[3 + 8 * 2 + 2 * 8 * i] = 2 - 3 * !(left_type[LEFT(i)] & type_mask); } } } } - -/* -0 . T T. T T T T -1 L . .L . . . . -2 L . .L . . . . -3 . T TL . . . . -4 L . .L . . . . -5 L . .. . . . . -*/ -//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) - nnz_cache = h->non_zero_count_cache; - if(top_type){ - nnz = h->non_zero_count[top_xy]; - AV_COPY32(&nnz_cache[4+8* 0], &nnz[4*3]); - if(!s->chroma_y_shift){ - AV_COPY32(&nnz_cache[4+8* 5], &nnz[4* 7]); - AV_COPY32(&nnz_cache[4+8*10], &nnz[4*11]); - }else{ - AV_COPY32(&nnz_cache[4+8* 5], &nnz[4* 5]); - AV_COPY32(&nnz_cache[4+8*10], &nnz[4* 9]); - } - }else{ - uint32_t top_empty = CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040; - AV_WN32A(&nnz_cache[4+8* 0], top_empty); - AV_WN32A(&nnz_cache[4+8* 5], top_empty); - AV_WN32A(&nnz_cache[4+8*10], top_empty); - } - - for (i=0; i<2; i++) { - if(left_type[LEFT(i)]){ - nnz = h->non_zero_count[left_xy[LEFT(i)]]; - nnz_cache[3+8* 1 + 2*8*i]= nnz[left_block[8+0+2*i]]; - nnz_cache[3+8* 2 + 2*8*i]= nnz[left_block[8+1+2*i]]; - if(CHROMA444){ - nnz_cache[3+8* 6 + 2*8*i]= nnz[left_block[8+0+2*i]+4*4]; - nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]+4*4]; - nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]+8*4]; - nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]+8*4]; - }else if(CHROMA422) { - nnz_cache[3+8* 6 + 2*8*i]= nnz[left_block[8+0+2*i]-2+4*4]; - nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]-2+4*4]; - nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]-2+8*4]; - nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]-2+8*4]; - }else{ - nnz_cache[3+8* 6 + 8*i]= nnz[left_block[8+4+2*i]]; - nnz_cache[3+8*11 + 8*i]= nnz[left_block[8+5+2*i]]; + /* + * 0 . T T. T T T T + * 1 L . .L . . . . + * 2 L . .L . . . . + * 3 . T TL . . . . + * 4 L . .L . . . . + * 5 L . .. . . . . + */ + /* FIXME: constraint_intra_pred & partitioning & nnz + * (let us hope this is just a typo in the spec) */ + nnz_cache = h->non_zero_count_cache; + if (top_type) { + nnz = h->non_zero_count[top_xy]; + AV_COPY32(&nnz_cache[4 + 8 * 0], &nnz[4 * 3]); + if (!s->chroma_y_shift) { + AV_COPY32(&nnz_cache[4 + 8 * 5], &nnz[4 * 7]); + AV_COPY32(&nnz_cache[4 + 8 * 10], &nnz[4 * 11]); + } else { + AV_COPY32(&nnz_cache[4 + 8 * 5], &nnz[4 * 5]); + AV_COPY32(&nnz_cache[4 + 8 * 10], &nnz[4 * 9]); + } + } else { + uint32_t top_empty = CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040; + AV_WN32A(&nnz_cache[4 + 8 * 0], top_empty); + AV_WN32A(&nnz_cache[4 + 8 * 5], top_empty); + AV_WN32A(&nnz_cache[4 + 8 * 10], top_empty); + } + + for (i = 0; i < 2; i++) { + if (left_type[LEFT(i)]) { + nnz = h->non_zero_count[left_xy[LEFT(i)]]; + nnz_cache[3 + 8 * 1 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i]]; + nnz_cache[3 + 8 * 2 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i]]; + if (CHROMA444) { + nnz_cache[3 + 8 * 6 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] + 4 * 4]; + nnz_cache[3 + 8 * 7 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] + 4 * 4]; + nnz_cache[3 + 8 * 11 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] + 8 * 4]; + nnz_cache[3 + 8 * 12 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] + 8 * 4]; + } else if (CHROMA422) { + nnz_cache[3 + 8 * 6 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] - 2 + 4 * 4]; + nnz_cache[3 + 8 * 7 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] - 2 + 4 * 4]; + nnz_cache[3 + 8 * 11 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] - 2 + 8 * 4]; + nnz_cache[3 + 8 * 12 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] - 2 + 8 * 4]; + } else { + nnz_cache[3 + 8 * 6 + 8 * i] = nnz[left_block[8 + 4 + 2 * i]]; + nnz_cache[3 + 8 * 11 + 8 * i] = nnz[left_block[8 + 5 + 2 * i]]; + } + } else { + nnz_cache[3 + 8 * 1 + 2 * 8 * i] = + nnz_cache[3 + 8 * 2 + 2 * 8 * i] = + nnz_cache[3 + 8 * 6 + 2 * 8 * i] = + nnz_cache[3 + 8 * 7 + 2 * 8 * i] = + nnz_cache[3 + 8 * 11 + 2 * 8 * i] = + nnz_cache[3 + 8 * 12 + 2 * 8 * i] = CABAC && !IS_INTRA(mb_type) ? 0 : 64; + } + } + + if (CABAC) { + // top_cbp + if (top_type) + h->top_cbp = h->cbp_table[top_xy]; + else + h->top_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F; + // left_cbp + if (left_type[LTOP]) { + h->left_cbp = (h->cbp_table[left_xy[LTOP]] & 0x7F0) | + ((h->cbp_table[left_xy[LTOP]] >> (left_block[0] & (~1))) & 2) | + (((h->cbp_table[left_xy[LBOT]] >> (left_block[2] & (~1))) & 2) << 2); + } else { + h->left_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F; } - }else{ - nnz_cache[3+8* 1 + 2*8*i]= - nnz_cache[3+8* 2 + 2*8*i]= - nnz_cache[3+8* 6 + 2*8*i]= - nnz_cache[3+8* 7 + 2*8*i]= - nnz_cache[3+8*11 + 2*8*i]= - nnz_cache[3+8*12 + 2*8*i]= CABAC && !IS_INTRA(mb_type) ? 0 : 64; } } - if( CABAC ) { - // top_cbp - if(top_type) { - h->top_cbp = h->cbp_table[top_xy]; - } else { - h->top_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F; - } - // left_cbp - if (left_type[LTOP]) { - h->left_cbp = (h->cbp_table[left_xy[LTOP]] & 0x7F0) - | ((h->cbp_table[left_xy[LTOP]]>>(left_block[0]&(~1)))&2) - | (((h->cbp_table[left_xy[LBOT]]>>(left_block[2]&(~1)))&2) << 2); - } else { - h->left_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F; - } - } - } - - if(IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)){ + if (IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)) { int list; int b_stride = h->b_stride; - for(list=0; listlist_count; list++){ + for (list = 0; list < h->list_count; list++) { int8_t *ref_cache = &h->ref_cache[list][scan8[0]]; int8_t *ref = s->current_picture.f.ref_index[list]; - int16_t (*mv_cache)[2] = &h->mv_cache[list][scan8[0]]; - int16_t (*mv)[2] = s->current_picture.f.motion_val[list]; - if(!USES_LIST(mb_type, list)){ + int16_t(*mv_cache)[2] = &h->mv_cache[list][scan8[0]]; + int16_t(*mv)[2] = s->current_picture.f.motion_val[list]; + if (!USES_LIST(mb_type, list)) continue; - } assert(!(IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)); - if(USES_LIST(top_type, list)){ - const int b_xy= h->mb2b_xy[top_xy] + 3*b_stride; - AV_COPY128(mv_cache[0 - 1*8], mv[b_xy + 0]); - ref_cache[0 - 1*8]= - ref_cache[1 - 1*8]= ref[4*top_xy + 2]; - ref_cache[2 - 1*8]= - ref_cache[3 - 1*8]= ref[4*top_xy + 3]; - }else{ - AV_ZERO128(mv_cache[0 - 1*8]); - AV_WN32A(&ref_cache[0 - 1*8], ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101u); + if (USES_LIST(top_type, list)) { + const int b_xy = h->mb2b_xy[top_xy] + 3 * b_stride; + AV_COPY128(mv_cache[0 - 1 * 8], mv[b_xy + 0]); + ref_cache[0 - 1 * 8] = + ref_cache[1 - 1 * 8] = ref[4 * top_xy + 2]; + ref_cache[2 - 1 * 8] = + ref_cache[3 - 1 * 8] = ref[4 * top_xy + 3]; + } else { + AV_ZERO128(mv_cache[0 - 1 * 8]); + AV_WN32A(&ref_cache[0 - 1 * 8], + ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE) & 0xFF) * 0x01010101u); } - if(mb_type & (MB_TYPE_16x8|MB_TYPE_8x8)){ - for(i=0; i<2; i++){ - int cache_idx = -1 + i*2*8; - if(USES_LIST(left_type[LEFT(i)], list)){ - const int b_xy= h->mb2b_xy[left_xy[LEFT(i)]] + 3; - const int b8_xy= 4*left_xy[LEFT(i)] + 1; - AV_COPY32(mv_cache[cache_idx ], mv[b_xy + b_stride*left_block[0+i*2]]); - AV_COPY32(mv_cache[cache_idx+8], mv[b_xy + b_stride*left_block[1+i*2]]); - ref_cache[cache_idx ]= ref[b8_xy + (left_block[0+i*2]&~1)]; - ref_cache[cache_idx+8]= ref[b8_xy + (left_block[1+i*2]&~1)]; - }else{ - AV_ZERO32(mv_cache[cache_idx ]); - AV_ZERO32(mv_cache[cache_idx+8]); - ref_cache[cache_idx ]= - ref_cache[cache_idx+8]= (left_type[LEFT(i)]) ? LIST_NOT_USED : PART_NOT_AVAILABLE; + if (mb_type & (MB_TYPE_16x8 | MB_TYPE_8x8)) { + for (i = 0; i < 2; i++) { + int cache_idx = -1 + i * 2 * 8; + if (USES_LIST(left_type[LEFT(i)], list)) { + const int b_xy = h->mb2b_xy[left_xy[LEFT(i)]] + 3; + const int b8_xy = 4 * left_xy[LEFT(i)] + 1; + AV_COPY32(mv_cache[cache_idx], + mv[b_xy + b_stride * left_block[0 + i * 2]]); + AV_COPY32(mv_cache[cache_idx + 8], + mv[b_xy + b_stride * left_block[1 + i * 2]]); + ref_cache[cache_idx] = ref[b8_xy + (left_block[0 + i * 2] & ~1)]; + ref_cache[cache_idx + 8] = ref[b8_xy + (left_block[1 + i * 2] & ~1)]; + } else { + AV_ZERO32(mv_cache[cache_idx]); + AV_ZERO32(mv_cache[cache_idx + 8]); + ref_cache[cache_idx] = + ref_cache[cache_idx + 8] = (left_type[LEFT(i)]) ? LIST_NOT_USED + : PART_NOT_AVAILABLE; + } } - } - }else{ - if(USES_LIST(left_type[LTOP], list)){ - const int b_xy= h->mb2b_xy[left_xy[LTOP]] + 3; - const int b8_xy= 4*left_xy[LTOP] + 1; - AV_COPY32(mv_cache[-1], mv[b_xy + b_stride*left_block[0]]); - ref_cache[-1]= ref[b8_xy + (left_block[0]&~1)]; - }else{ + } else { + if (USES_LIST(left_type[LTOP], list)) { + const int b_xy = h->mb2b_xy[left_xy[LTOP]] + 3; + const int b8_xy = 4 * left_xy[LTOP] + 1; + AV_COPY32(mv_cache[-1], mv[b_xy + b_stride * left_block[0]]); + ref_cache[-1] = ref[b8_xy + (left_block[0] & ~1)]; + } else { AV_ZERO32(mv_cache[-1]); - ref_cache[-1]= left_type[LTOP] ? LIST_NOT_USED : PART_NOT_AVAILABLE; + ref_cache[-1] = left_type[LTOP] ? LIST_NOT_USED + : PART_NOT_AVAILABLE; } } - if(USES_LIST(topright_type, list)){ - const int b_xy= h->mb2b_xy[topright_xy] + 3*b_stride; - AV_COPY32(mv_cache[4 - 1*8], mv[b_xy]); - ref_cache[4 - 1*8]= ref[4*topright_xy + 2]; - }else{ - AV_ZERO32(mv_cache[4 - 1*8]); - ref_cache[4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; + if (USES_LIST(topright_type, list)) { + const int b_xy = h->mb2b_xy[topright_xy] + 3 * b_stride; + AV_COPY32(mv_cache[4 - 1 * 8], mv[b_xy]); + ref_cache[4 - 1 * 8] = ref[4 * topright_xy + 2]; + } else { + AV_ZERO32(mv_cache[4 - 1 * 8]); + ref_cache[4 - 1 * 8] = topright_type ? LIST_NOT_USED + : PART_NOT_AVAILABLE; } - if(ref_cache[4 - 1*8] < 0){ - if(USES_LIST(topleft_type, list)){ - const int b_xy = h->mb2b_xy[topleft_xy] + 3 + b_stride + (h->topleft_partition & 2*b_stride); - const int b8_xy= 4*topleft_xy + 1 + (h->topleft_partition & 2); - AV_COPY32(mv_cache[-1 - 1*8], mv[b_xy]); - ref_cache[-1 - 1*8]= ref[b8_xy]; - }else{ - AV_ZERO32(mv_cache[-1 - 1*8]); - ref_cache[-1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; + if (ref_cache[4 - 1 * 8] < 0) { + if (USES_LIST(topleft_type, list)) { + const int b_xy = h->mb2b_xy[topleft_xy] + 3 + b_stride + + (h->topleft_partition & 2 * b_stride); + const int b8_xy = 4 * topleft_xy + 1 + (h->topleft_partition & 2); + AV_COPY32(mv_cache[-1 - 1 * 8], mv[b_xy]); + ref_cache[-1 - 1 * 8] = ref[b8_xy]; + } else { + AV_ZERO32(mv_cache[-1 - 1 * 8]); + ref_cache[-1 - 1 * 8] = topleft_type ? LIST_NOT_USED + : PART_NOT_AVAILABLE; } } - if((mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2)) && !FRAME_MBAFF) + if ((mb_type & (MB_TYPE_SKIP | MB_TYPE_DIRECT2)) && !FRAME_MBAFF) continue; - if(!(mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2))){ - uint8_t (*mvd_cache)[2] = &h->mvd_cache[list][scan8[0]]; - uint8_t (*mvd)[2] = h->mvd_table[list]; - ref_cache[2+8*0] = - ref_cache[2+8*2] = PART_NOT_AVAILABLE; - AV_ZERO32(mv_cache[2+8*0]); - AV_ZERO32(mv_cache[2+8*2]); + if (!(mb_type & (MB_TYPE_SKIP | MB_TYPE_DIRECT2))) { + uint8_t(*mvd_cache)[2] = &h->mvd_cache[list][scan8[0]]; + uint8_t(*mvd)[2] = h->mvd_table[list]; + ref_cache[2 + 8 * 0] = + ref_cache[2 + 8 * 2] = PART_NOT_AVAILABLE; + AV_ZERO32(mv_cache[2 + 8 * 0]); + AV_ZERO32(mv_cache[2 + 8 * 2]); - if( CABAC ) { - if(USES_LIST(top_type, list)){ - const int b_xy= h->mb2br_xy[top_xy]; - AV_COPY64(mvd_cache[0 - 1*8], mvd[b_xy + 0]); - }else{ - AV_ZERO64(mvd_cache[0 - 1*8]); + if (CABAC) { + if (USES_LIST(top_type, list)) { + const int b_xy = h->mb2br_xy[top_xy]; + AV_COPY64(mvd_cache[0 - 1 * 8], mvd[b_xy + 0]); + } else { + AV_ZERO64(mvd_cache[0 - 1 * 8]); } - if(USES_LIST(left_type[LTOP], list)){ - const int b_xy= h->mb2br_xy[left_xy[LTOP]] + 6; - AV_COPY16(mvd_cache[-1 + 0*8], mvd[b_xy - left_block[0]]); - AV_COPY16(mvd_cache[-1 + 1*8], mvd[b_xy - left_block[1]]); - }else{ - AV_ZERO16(mvd_cache[-1 + 0*8]); - AV_ZERO16(mvd_cache[-1 + 1*8]); + if (USES_LIST(left_type[LTOP], list)) { + const int b_xy = h->mb2br_xy[left_xy[LTOP]] + 6; + AV_COPY16(mvd_cache[-1 + 0 * 8], mvd[b_xy - left_block[0]]); + AV_COPY16(mvd_cache[-1 + 1 * 8], mvd[b_xy - left_block[1]]); + } else { + AV_ZERO16(mvd_cache[-1 + 0 * 8]); + AV_ZERO16(mvd_cache[-1 + 1 * 8]); } - if(USES_LIST(left_type[LBOT], list)){ - const int b_xy= h->mb2br_xy[left_xy[LBOT]] + 6; - AV_COPY16(mvd_cache[-1 + 2*8], mvd[b_xy - left_block[2]]); - AV_COPY16(mvd_cache[-1 + 3*8], mvd[b_xy - left_block[3]]); - }else{ - AV_ZERO16(mvd_cache[-1 + 2*8]); - AV_ZERO16(mvd_cache[-1 + 3*8]); + if (USES_LIST(left_type[LBOT], list)) { + const int b_xy = h->mb2br_xy[left_xy[LBOT]] + 6; + AV_COPY16(mvd_cache[-1 + 2 * 8], mvd[b_xy - left_block[2]]); + AV_COPY16(mvd_cache[-1 + 3 * 8], mvd[b_xy - left_block[3]]); + } else { + AV_ZERO16(mvd_cache[-1 + 2 * 8]); + AV_ZERO16(mvd_cache[-1 + 3 * 8]); } - AV_ZERO16(mvd_cache[2+8*0]); - AV_ZERO16(mvd_cache[2+8*2]); - if(h->slice_type_nos == AV_PICTURE_TYPE_B){ + AV_ZERO16(mvd_cache[2 + 8 * 0]); + AV_ZERO16(mvd_cache[2 + 8 * 2]); + if (h->slice_type_nos == AV_PICTURE_TYPE_B) { uint8_t *direct_cache = &h->direct_cache[scan8[0]]; uint8_t *direct_table = h->direct_table; - fill_rectangle(direct_cache, 4, 4, 8, MB_TYPE_16x16>>1, 1); + fill_rectangle(direct_cache, 4, 4, 8, MB_TYPE_16x16 >> 1, 1); - if(IS_DIRECT(top_type)){ - AV_WN32A(&direct_cache[-1*8], 0x01010101u*(MB_TYPE_DIRECT2>>1)); - }else if(IS_8X8(top_type)){ - int b8_xy = 4*top_xy; - direct_cache[0 - 1*8]= direct_table[b8_xy + 2]; - direct_cache[2 - 1*8]= direct_table[b8_xy + 3]; - }else{ - AV_WN32A(&direct_cache[-1*8], 0x01010101*(MB_TYPE_16x16>>1)); + if (IS_DIRECT(top_type)) { + AV_WN32A(&direct_cache[-1 * 8], + 0x01010101u * (MB_TYPE_DIRECT2 >> 1)); + } else if (IS_8X8(top_type)) { + int b8_xy = 4 * top_xy; + direct_cache[0 - 1 * 8] = direct_table[b8_xy + 2]; + direct_cache[2 - 1 * 8] = direct_table[b8_xy + 3]; + } else { + AV_WN32A(&direct_cache[-1 * 8], + 0x01010101 * (MB_TYPE_16x16 >> 1)); } - if(IS_DIRECT(left_type[LTOP])) - direct_cache[-1 + 0*8]= MB_TYPE_DIRECT2>>1; - else if(IS_8X8(left_type[LTOP])) - direct_cache[-1 + 0*8]= direct_table[4*left_xy[LTOP] + 1 + (left_block[0]&~1)]; + if (IS_DIRECT(left_type[LTOP])) + direct_cache[-1 + 0 * 8] = MB_TYPE_DIRECT2 >> 1; + else if (IS_8X8(left_type[LTOP])) + direct_cache[-1 + 0 * 8] = direct_table[4 * left_xy[LTOP] + 1 + (left_block[0] & ~1)]; else - direct_cache[-1 + 0*8]= MB_TYPE_16x16>>1; + direct_cache[-1 + 0 * 8] = MB_TYPE_16x16 >> 1; - if(IS_DIRECT(left_type[LBOT])) - direct_cache[-1 + 2*8]= MB_TYPE_DIRECT2>>1; - else if(IS_8X8(left_type[LBOT])) - direct_cache[-1 + 2*8]= direct_table[4*left_xy[LBOT] + 1 + (left_block[2]&~1)]; + if (IS_DIRECT(left_type[LBOT])) + direct_cache[-1 + 2 * 8] = MB_TYPE_DIRECT2 >> 1; + else if (IS_8X8(left_type[LBOT])) + direct_cache[-1 + 2 * 8] = direct_table[4 * left_xy[LBOT] + 1 + (left_block[2] & ~1)]; else - direct_cache[-1 + 2*8]= MB_TYPE_16x16>>1; + direct_cache[-1 + 2 * 8] = MB_TYPE_16x16 >> 1; } } } - if(FRAME_MBAFF){ -#define MAP_MVS\ - MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\ - MAP_F2F(scan8[0] + 0 - 1*8, top_type)\ - MAP_F2F(scan8[0] + 1 - 1*8, top_type)\ - MAP_F2F(scan8[0] + 2 - 1*8, top_type)\ - MAP_F2F(scan8[0] + 3 - 1*8, top_type)\ - MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\ - MAP_F2F(scan8[0] - 1 + 0*8, left_type[LTOP])\ - MAP_F2F(scan8[0] - 1 + 1*8, left_type[LTOP])\ - MAP_F2F(scan8[0] - 1 + 2*8, left_type[LBOT])\ - MAP_F2F(scan8[0] - 1 + 3*8, left_type[LBOT]) - if(MB_FIELD){ -#define MAP_F2F(idx, mb_type)\ - if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\ - h->ref_cache[list][idx] <<= 1;\ - h->mv_cache[list][idx][1] /= 2;\ - h->mvd_cache[list][idx][1] >>=1;\ - } + +#define MAP_MVS \ + MAP_F2F(scan8[0] - 1 - 1 * 8, topleft_type) \ + MAP_F2F(scan8[0] + 0 - 1 * 8, top_type) \ + MAP_F2F(scan8[0] + 1 - 1 * 8, top_type) \ + MAP_F2F(scan8[0] + 2 - 1 * 8, top_type) \ + MAP_F2F(scan8[0] + 3 - 1 * 8, top_type) \ + MAP_F2F(scan8[0] + 4 - 1 * 8, topright_type) \ + MAP_F2F(scan8[0] - 1 + 0 * 8, left_type[LTOP]) \ + MAP_F2F(scan8[0] - 1 + 1 * 8, left_type[LTOP]) \ + MAP_F2F(scan8[0] - 1 + 2 * 8, left_type[LBOT]) \ + MAP_F2F(scan8[0] - 1 + 3 * 8, left_type[LBOT]) + + if (FRAME_MBAFF) { + if (MB_FIELD) { + +#define MAP_F2F(idx, mb_type) \ + if (!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0) { \ + h->ref_cache[list][idx] <<= 1; \ + h->mv_cache[list][idx][1] /= 2; \ + h->mvd_cache[list][idx][1] >>= 1; \ + } + MAP_MVS + } else { + #undef MAP_F2F - }else{ -#define MAP_F2F(idx, mb_type)\ - if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\ - h->ref_cache[list][idx] >>= 1;\ - h->mv_cache[list][idx][1] <<= 1;\ - h->mvd_cache[list][idx][1] <<= 1;\ - } +#define MAP_F2F(idx, mb_type) \ + if (IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0) { \ + h->ref_cache[list][idx] >>= 1; \ + h->mv_cache[list][idx][1] <<= 1; \ + h->mvd_cache[list][idx][1] <<= 1; \ + } + MAP_MVS #undef MAP_F2F } @@ -747,36 +792,34 @@ static void fill_decode_caches(H264Context *h, int mb_type){ } } - h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[LTOP]); + h->neighbor_transform_size = !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[LTOP]); } /** * decodes a P_SKIP or B_SKIP macroblock */ -static void av_unused decode_mb_skip(H264Context *h){ - MpegEncContext * const s = &h->s; - const int mb_xy= h->mb_xy; - int mb_type=0; +static void av_unused decode_mb_skip(H264Context *h) +{ + MpegEncContext *const s = &h->s; + const int mb_xy = h->mb_xy; + int mb_type = 0; memset(h->non_zero_count[mb_xy], 0, 48); - if(MB_FIELD) - mb_type|= MB_TYPE_INTERLACED; + if (MB_FIELD) + mb_type |= MB_TYPE_INTERLACED; - if( h->slice_type_nos == AV_PICTURE_TYPE_B ) - { + if (h->slice_type_nos == AV_PICTURE_TYPE_B) { // just for fill_caches. pred_direct_motion will set the real mb_type - mb_type|= MB_TYPE_L0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP; - if(h->direct_spatial_mv_pred){ + mb_type |= MB_TYPE_L0L1 | MB_TYPE_DIRECT2 | MB_TYPE_SKIP; + if (h->direct_spatial_mv_pred) { fill_decode_neighbors(h, mb_type); - fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ... + fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ... } ff_h264_pred_direct_motion(h, &mb_type); - mb_type|= MB_TYPE_SKIP; - } - else - { - mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP; + mb_type |= MB_TYPE_SKIP; + } else { + mb_type |= MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_SKIP; fill_decode_neighbors(h, mb_type); pred_pskip_motion(h); @@ -785,8 +828,8 @@ static void av_unused decode_mb_skip(H264Context *h){ write_back_motion(h, mb_type); s->current_picture.f.mb_type[mb_xy] = mb_type; s->current_picture.f.qscale_table[mb_xy] = s->qscale; - h->slice_table[ mb_xy ]= h->slice_num; - h->prev_mb_skipped= 1; + h->slice_table[mb_xy] = h->slice_num; + h->prev_mb_skipped = 1; } #endif /* AVCODEC_H264_MVPRED_H */ diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h index 08a25a54c9..248c7d0e08 100644 --- a/libavcodec/h264dsp.h +++ b/libavcodec/h264dsp.h @@ -28,56 +28,90 @@ #define AVCODEC_H264DSP_H #include + #include "dsputil.h" typedef void (*h264_weight_func)(uint8_t *block, int stride, int height, int log2_denom, int weight, int offset); -typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int height, - int log2_denom, int weightd, int weights, int offset); +typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, + int stride, int height, int log2_denom, + int weightd, int weights, int offset); /** * Context for storing H.264 DSP functions */ -typedef struct H264DSPContext{ +typedef struct H264DSPContext { /* weighted MC */ h264_weight_func weight_h264_pixels_tab[4]; h264_biweight_func biweight_h264_pixels_tab[4]; /* loop filter */ - void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0); - void (*h264_h_loop_filter_luma)(uint8_t *pix/*align 4 */, int stride, int alpha, int beta, int8_t *tc0); - void (*h264_h_loop_filter_luma_mbaff)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0); + void (*h264_v_loop_filter_luma)(uint8_t *pix /*align 16*/, int stride, + int alpha, int beta, int8_t *tc0); + void (*h264_h_loop_filter_luma)(uint8_t *pix /*align 4 */, int stride, + int alpha, int beta, int8_t *tc0); + void (*h264_h_loop_filter_luma_mbaff)(uint8_t *pix /*align 16*/, int stride, + int alpha, int beta, int8_t *tc0); /* v/h_loop_filter_luma_intra: align 16 */ - void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta); - void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta); - void (*h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta); - void (*h264_v_loop_filter_chroma)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0); - void (*h264_h_loop_filter_chroma)(uint8_t *pix/*align 4*/, int stride, int alpha, int beta, int8_t *tc0); - void (*h264_h_loop_filter_chroma_mbaff)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0); - void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); - void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); - void (*h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); + void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, + int alpha, int beta); + void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, + int alpha, int beta); + void (*h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix /*align 16*/, + int stride, int alpha, int beta); + void (*h264_v_loop_filter_chroma)(uint8_t *pix /*align 8*/, int stride, + int alpha, int beta, int8_t *tc0); + void (*h264_h_loop_filter_chroma)(uint8_t *pix /*align 4*/, int stride, + int alpha, int beta, int8_t *tc0); + void (*h264_h_loop_filter_chroma_mbaff)(uint8_t *pix /*align 8*/, + int stride, int alpha, int beta, + int8_t *tc0); + void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/, + int stride, int alpha, int beta); + void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/, + int stride, int alpha, int beta); + void (*h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix /*align 8*/, + int stride, int alpha, int beta); // h264_loop_filter_strength: simd only. the C version is inlined in h264.c - void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], - int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field); + void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], + int8_t ref[2][40], int16_t mv[2][40][2], + int bidir, int edges, int step, + int mask_mv0, int mask_mv1, int field); /* IDCT */ - void (*h264_idct_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride); - void (*h264_idct8_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride); - void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride); - void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride); + void (*h264_idct_add)(uint8_t *dst /*align 4*/, + DCTELEM *block /*align 16*/, int stride); + void (*h264_idct8_add)(uint8_t *dst /*align 8*/, + DCTELEM *block /*align 16*/, int stride); + void (*h264_idct_dc_add)(uint8_t *dst /*align 4*/, + DCTELEM *block /*align 16*/, int stride); + void (*h264_idct8_dc_add)(uint8_t *dst /*align 8*/, + DCTELEM *block /*align 16*/, int stride); - void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]); - void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]); - void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]); - void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]); - void (*h264_luma_dc_dequant_idct)(DCTELEM *output, DCTELEM *input/*align 16*/, int qmul); + void (*h264_idct_add16)(uint8_t *dst /*align 16*/, const int *blockoffset, + DCTELEM *block /*align 16*/, int stride, + const uint8_t nnzc[15 * 8]); + void (*h264_idct8_add4)(uint8_t *dst /*align 16*/, const int *blockoffset, + DCTELEM *block /*align 16*/, int stride, + const uint8_t nnzc[15 * 8]); + void (*h264_idct_add8)(uint8_t **dst /*align 16*/, const int *blockoffset, + DCTELEM *block /*align 16*/, int stride, + const uint8_t nnzc[15 * 8]); + void (*h264_idct_add16intra)(uint8_t *dst /*align 16*/, const int *blockoffset, + DCTELEM *block /*align 16*/, + int stride, const uint8_t nnzc[15 * 8]); + void (*h264_luma_dc_dequant_idct)(DCTELEM *output, + DCTELEM *input /*align 16*/, int qmul); void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul); -}H264DSPContext; +} H264DSPContext; -void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc); -void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, const int chroma_format_idc); -void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chroma_format_idc); -void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc); +void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, + const int chroma_format_idc); +void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, + const int chroma_format_idc); +void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, + const int chroma_format_idc); +void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, + const int chroma_format_idc); #endif /* AVCODEC_H264DSP_H */ From cbc7d60afa0c56f8e50131830278fd32a89aed9d Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Thu, 10 May 2012 00:55:18 +0100 Subject: [PATCH 23/25] arm: dsputil: fix overreads in put/avg_pixels functions The vertically interpolating variants of these functions read ahead one line to optimise the loop. On the last line processed, this might be outside the buffer. Fix these invalid reads by processing the last line outside the loop. Signed-off-by: Mans Rullgard --- libavcodec/arm/dsputil_neon.S | 92 +++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S index d49aedd6c4..4bdcd95061 100644 --- a/libavcodec/arm/dsputil_neon.S +++ b/libavcodec/arm/dsputil_neon.S @@ -95,6 +95,7 @@ endfunc .endm .macro pixels16_y2 rnd=1, avg=0 + sub r3, r3, #2 vld1.64 {q0}, [r1], r2 vld1.64 {q1}, [r1], r2 1: subs r3, r3, #2 @@ -114,10 +115,25 @@ endfunc vst1.64 {q2}, [r0,:128], r2 vst1.64 {q3}, [r0,:128], r2 bne 1b + + avg q2, q0, q1 + vld1.64 {q0}, [r1], r2 + avg q3, q0, q1 + .if \avg + vld1.8 {q8}, [r0,:128], r2 + vld1.8 {q9}, [r0,:128] + vrhadd.u8 q2, q2, q8 + vrhadd.u8 q3, q3, q9 + sub r0, r0, r2 + .endif + vst1.64 {q2}, [r0,:128], r2 + vst1.64 {q3}, [r0,:128], r2 + bx lr .endm .macro pixels16_xy2 rnd=1, avg=0 + sub r3, r3, #2 vld1.64 {d0-d2}, [r1], r2 vld1.64 {d4-d6}, [r1], r2 .ifeq \rnd @@ -173,6 +189,42 @@ endfunc vaddl.u8 q11, d3, d5 vst1.64 {q15}, [r0,:128], r2 bgt 1b + + vld1.64 {d0-d2}, [r1], r2 + vadd.u16 q12, q8, q9 + .ifeq \rnd + vadd.u16 q12, q12, q13 + .endif + vext.8 q15, q0, q1, #1 + vadd.u16 q1 , q10, q11 + shrn d28, q12, #2 + .ifeq \rnd + vadd.u16 q1, q1, q13 + .endif + shrn d29, q1, #2 + .if \avg + vld1.8 {q8}, [r0,:128] + vrhadd.u8 q14, q14, q8 + .endif + vaddl.u8 q8, d0, d30 + vaddl.u8 q10, d1, d31 + vst1.64 {q14}, [r0,:128], r2 + vadd.u16 q12, q8, q9 + .ifeq \rnd + vadd.u16 q12, q12, q13 + .endif + vadd.u16 q0, q10, q11 + shrn d30, q12, #2 + .ifeq \rnd + vadd.u16 q0, q0, q13 + .endif + shrn d31, q0, #2 + .if \avg + vld1.8 {q9}, [r0,:128] + vrhadd.u8 q15, q15, q9 + .endif + vst1.64 {q15}, [r0,:128], r2 + bx lr .endm @@ -228,6 +280,7 @@ endfunc .endm .macro pixels8_y2 rnd=1, avg=0 + sub r3, r3, #2 vld1.64 {d0}, [r1], r2 vld1.64 {d1}, [r1], r2 1: subs r3, r3, #2 @@ -246,10 +299,24 @@ endfunc vst1.64 {d4}, [r0,:64], r2 vst1.64 {d5}, [r0,:64], r2 bne 1b + + avg d4, d0, d1 + vld1.64 {d0}, [r1], r2 + avg d5, d0, d1 + .if \avg + vld1.8 {d2}, [r0,:64], r2 + vld1.8 {d3}, [r0,:64] + vrhadd.u8 q2, q2, q1 + sub r0, r0, r2 + .endif + vst1.64 {d4}, [r0,:64], r2 + vst1.64 {d5}, [r0,:64], r2 + bx lr .endm .macro pixels8_xy2 rnd=1, avg=0 + sub r3, r3, #2 vld1.64 {q0}, [r1], r2 vld1.64 {q1}, [r1], r2 .ifeq \rnd @@ -291,6 +358,31 @@ endfunc vaddl.u8 q9, d2, d6 vst1.64 {d7}, [r0,:64], r2 bgt 1b + + vld1.64 {q0}, [r1], r2 + vadd.u16 q10, q8, q9 + vext.8 d4, d0, d1, #1 + .ifeq \rnd + vadd.u16 q10, q10, q11 + .endif + vaddl.u8 q8, d0, d4 + shrn d5, q10, #2 + vadd.u16 q10, q8, q9 + .if \avg + vld1.8 {d7}, [r0,:64] + vrhadd.u8 d5, d5, d7 + .endif + .ifeq \rnd + vadd.u16 q10, q10, q11 + .endif + vst1.64 {d5}, [r0,:64], r2 + shrn d7, q10, #2 + .if \avg + vld1.8 {d5}, [r0,:64] + vrhadd.u8 d7, d7, d5 + .endif + vst1.64 {d7}, [r0,:64], r2 + bx lr .endm From 706b998cdcea97c50fad2228f67488de0e06b2a2 Mon Sep 17 00:00:00 2001 From: Christophe Gisquet Date: Tue, 28 Feb 2012 17:42:12 +0100 Subject: [PATCH 24/25] ape: Use unsigned integer maths This involves a division that should be a shift. Signed-off-by: Diego Biurrun --- libavcodec/apedec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavcodec/apedec.c b/libavcodec/apedec.c index e41f555e31..b07f3a090b 100644 --- a/libavcodec/apedec.c +++ b/libavcodec/apedec.c @@ -393,7 +393,7 @@ static inline int range_get_symbol(APEContext *ctx, } /** @} */ // group rangecoder -static inline void update_rice(APERice *rice, int x) +static inline void update_rice(APERice *rice, unsigned int x) { int lim = rice->k ? (1 << (rice->k + 4)) : 0; rice->ksum += ((x + 1) / 2) - ((rice->ksum + 16) >> 5); @@ -406,7 +406,7 @@ static inline void update_rice(APERice *rice, int x) static inline int ape_decode_value(APEContext *ctx, APERice *rice) { - int x, overflow; + unsigned int x, overflow; if (ctx->fileversion < 3990) { int tmpk; From 110d0cdc9d1ec414a658f841a3fbefbf6f796d61 Mon Sep 17 00:00:00 2001 From: Christophe Gisquet Date: Thu, 19 Apr 2012 22:36:17 +0200 Subject: [PATCH 25/25] rv40dsp x86: MMX/MMX2/3DNow/SSE2/SSSE3 implementations of MC Code mostly inspired by vp8's MC, however: - its MMX2 horizontal filter is worse because it can't take advantage of the coefficient redundancy - that same coefficient redundancy allows better code for non-SSSE3 versions Benchmark (rounded to tens of unit): V8x8 H8x8 2D8x8 V16x16 H16x16 2D16x16 C 445 358 985 1785 1559 3280 MMX* 219 271 478 714 929 1443 SSE2 131 158 294 425 515 892 SSSE3 120 122 248 387 390 763 End result is overall around a 15% speedup for SSSE3 version (on 6 sequences); all loop filter functions now take around 55% of decoding time, while luma MC dsp functions are around 6%, chroma ones are 1.3% and biweight around 2.3%. Signed-off-by: Diego Biurrun --- libavcodec/x86/dsputil_mmx.c | 16 ++ libavcodec/x86/dsputil_mmx.h | 5 + libavcodec/x86/rv40dsp.asm | 316 +++++++++++++++++++++++++++++++++- libavcodec/x86/rv40dsp_init.c | 146 ++++++++++++++++ 4 files changed, 480 insertions(+), 3 deletions(-) diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 3ef19c5d13..6377a73555 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -1791,6 +1791,22 @@ QPEL_2TAP(avg_, 16, 3dnow) QPEL_2TAP(put_, 8, 3dnow) QPEL_2TAP(avg_, 8, 3dnow) +void ff_put_rv40_qpel8_mc33_mmx(uint8_t *dst, uint8_t *src, int stride) +{ + put_pixels8_xy2_mmx(dst, src, stride, 8); +} +void ff_put_rv40_qpel16_mc33_mmx(uint8_t *dst, uint8_t *src, int stride) +{ + put_pixels16_xy2_mmx(dst, src, stride, 16); +} +void ff_avg_rv40_qpel8_mc33_mmx(uint8_t *dst, uint8_t *src, int stride) +{ + avg_pixels8_xy2_mmx(dst, src, stride, 8); +} +void ff_avg_rv40_qpel16_mc33_mmx(uint8_t *dst, uint8_t *src, int stride) +{ + avg_pixels16_xy2_mmx(dst, src, stride, 16); +} #if HAVE_YASM typedef void emu_edge_core_func(uint8_t *buf, const uint8_t *src, diff --git a/libavcodec/x86/dsputil_mmx.h b/libavcodec/x86/dsputil_mmx.h index 097739cf98..37f4581b9c 100644 --- a/libavcodec/x86/dsputil_mmx.h +++ b/libavcodec/x86/dsputil_mmx.h @@ -199,6 +199,11 @@ void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd); void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd); +void ff_put_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size); +void ff_put_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size); +void ff_avg_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size); +void ff_avg_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size); + void ff_mmx_idct(DCTELEM *block); void ff_mmxext_idct(DCTELEM *block); diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm index 721d3df094..e0213f40b9 100644 --- a/libavcodec/x86/rv40dsp.asm +++ b/libavcodec/x86/rv40dsp.asm @@ -1,5 +1,7 @@ ;****************************************************************************** ;* MMX/SSE2-optimized functions for the RV40 decoder +;* Copyright (c) 2010 Ronald S. Bultje +;* Copyright (c) 2010 Jason Garrett-Glaser ;* Copyright (C) 2012 Christophe Gisquet ;* ;* This file is part of Libav. @@ -25,11 +27,319 @@ SECTION_RODATA align 16 -shift_round: times 8 dw 1 << (16 - 6) -cextern pw_16 +pw_1024: times 8 dw 1 << (16 - 6) ; pw_1024 + +sixtap_filter_hb_m: times 8 db 1, -5 + times 8 db 52, 20 + ; multiplied by 2 to have the same shift + times 8 db 2, -10 + times 8 db 40, 40 + ; back to normal + times 8 db 1, -5 + times 8 db 20, 52 + +sixtap_filter_v_m: times 8 dw 1 + times 8 dw -5 + times 8 dw 52 + times 8 dw 20 + ; multiplied by 2 to have the same shift + times 8 dw 2 + times 8 dw -10 + times 8 dw 40 + times 8 dw 40 + ; back to normal + times 8 dw 1 + times 8 dw -5 + times 8 dw 20 + times 8 dw 52 + +%ifdef PIC +%define sixtap_filter_hw picregq +%define sixtap_filter_hb picregq +%define sixtap_filter_v picregq +%define npicregs 1 +%else +%define sixtap_filter_hw sixtap_filter_hw_m +%define sixtap_filter_hb sixtap_filter_hb_m +%define sixtap_filter_v sixtap_filter_v_m +%define npicregs 0 +%endif + +filter_h6_shuf1: db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 +filter_h6_shuf2: db 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10 +filter_h6_shuf3: db 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11, 10, 12, 11 + +cextern pw_32 +cextern pw_16 +cextern pw_512 SECTION .text +;----------------------------------------------------------------------------- +; subpel MC functions: +; +; void [put|rv40]_rv40_qpel_[h|v]_(uint8_t *dst, int deststride, +; uint8_t *src, int srcstride, +; int len, int m); +;---------------------------------------------------------------------- +%macro LOAD 2 +%if WIN64 + movsxd %1q, %1d +%endif +%ifdef PIC + add %1q, picregq +%else + add %1q, %2 +%endif +%endmacro + +%macro STORE 3 +%ifidn %3, avg + movh %2, [dstq] +%endif + packuswb %1, %1 +%ifidn %3, avg +%if cpuflag(3dnow) + pavgusb %1, %2 +%else + pavgb %1, %2 +%endif +%endif + movh [dstq], %1 +%endmacro + +%macro FILTER_V 1 +cglobal %1_rv40_qpel_v, 6,6+npicregs,12, dst, dststride, src, srcstride, height, my, picreg +%ifdef PIC + lea picregq, [sixtap_filter_v_m] +%endif + pxor m7, m7 + LOAD my, sixtap_filter_v + + ; read 5 lines + sub srcq, srcstrideq + sub srcq, srcstrideq + movh m0, [srcq] + movh m1, [srcq+srcstrideq] + movh m2, [srcq+srcstrideq*2] + lea srcq, [srcq+srcstrideq*2] + add srcq, srcstrideq + movh m3, [srcq] + movh m4, [srcq+srcstrideq] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + punpcklbw m4, m7 + +%ifdef m8 + mova m8, [myq+ 0] + mova m9, [myq+16] + mova m10, [myq+32] + mova m11, [myq+48] +%define COEFF05 m8 +%define COEFF14 m9 +%define COEFF2 m10 +%define COEFF3 m11 +%else +%define COEFF05 [myq+ 0] +%define COEFF14 [myq+16] +%define COEFF2 [myq+32] +%define COEFF3 [myq+48] +%endif +.nextrow: + mova m6, m1 + movh m5, [srcq+2*srcstrideq] ; read new row + paddw m6, m4 + punpcklbw m5, m7 + pmullw m6, COEFF14 + paddw m0, m5 + pmullw m0, COEFF05 + paddw m6, m0 + mova m0, m1 + paddw m6, [pw_32] + mova m1, m2 + pmullw m2, COEFF2 + paddw m6, m2 + mova m2, m3 + pmullw m3, COEFF3 + paddw m6, m3 + + ; round/clip/store + mova m3, m4 + psraw m6, 6 + mova m4, m5 + STORE m6, m5, %1 + + ; go to next line + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row + jg .nextrow + REP_RET +%endmacro + +%macro FILTER_H 1 +cglobal %1_rv40_qpel_h, 6, 6+npicregs, 12, dst, dststride, src, srcstride, height, mx, picreg +%ifdef PIC + lea picregq, [sixtap_filter_v_m] +%endif + pxor m7, m7 + LOAD mx, sixtap_filter_v + mova m6, [pw_32] +%ifdef m8 + mova m8, [mxq+ 0] + mova m9, [mxq+16] + mova m10, [mxq+32] + mova m11, [mxq+48] +%define COEFF05 m8 +%define COEFF14 m9 +%define COEFF2 m10 +%define COEFF3 m11 +%else +%define COEFF05 [mxq+ 0] +%define COEFF14 [mxq+16] +%define COEFF2 [mxq+32] +%define COEFF3 [mxq+48] +%endif +.nextrow: + movq m0, [srcq-2] + movq m5, [srcq+3] + movq m1, [srcq-1] + movq m4, [srcq+2] + punpcklbw m0, m7 + punpcklbw m5, m7 + punpcklbw m1, m7 + punpcklbw m4, m7 + movq m2, [srcq-0] + movq m3, [srcq+1] + paddw m0, m5 + paddw m1, m4 + punpcklbw m2, m7 + punpcklbw m3, m7 + pmullw m0, COEFF05 + pmullw m1, COEFF14 + pmullw m2, COEFF2 + pmullw m3, COEFF3 + paddw m0, m6 + paddw m1, m2 + paddw m0, m3 + paddw m0, m1 + psraw m0, 6 + STORE m0, m1, %1 + + ; go to next line + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row + jg .nextrow + REP_RET +%endmacro + +%if ARCH_X86_32 +INIT_MMX mmx +FILTER_V put +FILTER_H put + +INIT_MMX mmx2 +FILTER_V avg +FILTER_H avg + +INIT_MMX 3dnow +FILTER_V avg +FILTER_H avg +%endif + +INIT_XMM sse2 +FILTER_H put +FILTER_H avg +FILTER_V put +FILTER_V avg + +%macro FILTER_SSSE3 1 +cglobal %1_rv40_qpel_v, 6,6+npicregs,8, dst, dststride, src, srcstride, height, my, picreg +%ifdef PIC + lea picregq, [sixtap_filter_hb_m] +%endif + + ; read 5 lines + sub srcq, srcstrideq + LOAD my, sixtap_filter_hb + sub srcq, srcstrideq + movh m0, [srcq] + movh m1, [srcq+srcstrideq] + movh m2, [srcq+srcstrideq*2] + lea srcq, [srcq+srcstrideq*2] + add srcq, srcstrideq + mova m5, [myq] + movh m3, [srcq] + movh m4, [srcq+srcstrideq] + lea srcq, [srcq+2*srcstrideq] + +.nextrow: + mova m6, m2 + punpcklbw m0, m1 + punpcklbw m6, m3 + pmaddubsw m0, m5 + pmaddubsw m6, [myq+16] + movh m7, [srcq] ; read new row + paddw m6, m0 + mova m0, m1 + mova m1, m2 + mova m2, m3 + mova m3, m4 + mova m4, m7 + punpcklbw m7, m3 + pmaddubsw m7, m5 + paddw m6, m7 + pmulhrsw m6, [pw_512] + STORE m6, m7, %1 + + ; go to next line + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row + jg .nextrow + REP_RET + +cglobal %1_rv40_qpel_h, 6,6+npicregs,8, dst, dststride, src, srcstride, height, mx, picreg +%ifdef PIC + lea picregq, [sixtap_filter_hb_m] +%endif + mova m3, [filter_h6_shuf2] + mova m4, [filter_h6_shuf3] + LOAD mx, sixtap_filter_hb + mova m5, [mxq] ; set up 6tap filter in bytes + mova m6, [mxq+16] + mova m7, [filter_h6_shuf1] + +.nextrow: + movu m0, [srcq-2] + mova m1, m0 + mova m2, m0 + pshufb m0, m7 + pshufb m1, m3 + pshufb m2, m4 + pmaddubsw m0, m5 + pmaddubsw m1, m6 + pmaddubsw m2, m5 + paddw m0, m1 + paddw m0, m2 + pmulhrsw m0, [pw_512] + STORE m0, m1, %1 + + ; go to next line + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row + jg .nextrow + REP_RET +%endmacro + +INIT_XMM ssse3 +FILTER_SSSE3 put +FILTER_SSSE3 avg + ; %1=5bits weights?, %2=dst %3=src1 %4=src3 %5=stride if sse2 %macro RV40_WCORE 4-5 movh m4, [%3 + r6 + 0] @@ -143,7 +453,7 @@ SECTION .text %macro RV40_WEIGHT 3 cglobal rv40_weight_func_%1_%2, 6, 7, 8 %if cpuflag(ssse3) - mova m1, [shift_round] + mova m1, [pw_1024] %else mova m1, [pw_16] %endif diff --git a/libavcodec/x86/rv40dsp_init.c b/libavcodec/x86/rv40dsp_init.c index df468aa9e5..3f42363e4e 100644 --- a/libavcodec/x86/rv40dsp_init.c +++ b/libavcodec/x86/rv40dsp_init.c @@ -22,8 +22,11 @@ /** * @file * RV40 decoder motion compensation functions x86-optimised + * 2,0 and 0,2 have h264 equivalents. + * 3,3 is bugged in the rv40 format and maps to _xy2 version */ +#include "libavcodec/x86/dsputil_mmx.h" #include "libavcodec/rv34dsp.h" void ff_put_rv40_chroma_mc8_mmx (uint8_t *dst, uint8_t *src, @@ -53,6 +56,132 @@ DECLARE_WEIGHT(mmx) DECLARE_WEIGHT(sse2) DECLARE_WEIGHT(ssse3) +/** @{ */ +/** + * Define one qpel function. + * LOOPSIZE must be already set to the number of pixels processed per + * iteration in the inner loop of the called functions. + * COFF(x) must be already defined so as to provide the offset into any + * array of coeffs used by the called function for the qpel position x. + */ +#define QPEL_FUNC_DECL(OP, SIZE, PH, PV, OPT) \ +static void OP ## rv40_qpel ##SIZE ##_mc ##PH ##PV ##OPT(uint8_t *dst, \ + uint8_t *src, \ + int stride) \ +{ \ + int i; \ + if (PH && PV) { \ + DECLARE_ALIGNED(16, uint8_t, tmp)[SIZE * (SIZE + 5)]; \ + uint8_t *tmpptr = tmp + SIZE * 2; \ + src -= stride * 2; \ + \ + for (i = 0; i < SIZE; i += LOOPSIZE) \ + ff_put_rv40_qpel_h ##OPT(tmp + i, SIZE, src + i, stride, \ + SIZE + 5, HCOFF(PH)); \ + for (i = 0; i < SIZE; i += LOOPSIZE) \ + ff_ ##OP ##rv40_qpel_v ##OPT(dst + i, stride, tmpptr + i, \ + SIZE, SIZE, VCOFF(PV)); \ + } else if (PV) { \ + for (i = 0; i < SIZE; i += LOOPSIZE) \ + ff_ ##OP ##rv40_qpel_v ## OPT(dst + i, stride, src + i, \ + stride, SIZE, VCOFF(PV)); \ + } else { \ + for (i = 0; i < SIZE; i += LOOPSIZE) \ + ff_ ##OP ##rv40_qpel_h ## OPT(dst + i, stride, src + i, \ + stride, SIZE, HCOFF(PH)); \ + } \ +}; + +/** Declare functions for sizes 8 and 16 and given operations + * and qpel position. */ +#define QPEL_FUNCS_DECL(OP, PH, PV, OPT) \ + QPEL_FUNC_DECL(OP, 8, PH, PV, OPT) \ + QPEL_FUNC_DECL(OP, 16, PH, PV, OPT) + +/** Declare all functions for all sizes and qpel positions */ +#define QPEL_MC_DECL(OP, OPT) \ +void ff_ ##OP ##rv40_qpel_h ##OPT(uint8_t *dst, ptrdiff_t dstStride, \ + const uint8_t *src, \ + ptrdiff_t srcStride, \ + int len, int m); \ +void ff_ ##OP ##rv40_qpel_v ##OPT(uint8_t *dst, ptrdiff_t dstStride, \ + const uint8_t *src, \ + ptrdiff_t srcStride, \ + int len, int m); \ +QPEL_FUNCS_DECL(OP, 0, 1, OPT) \ +QPEL_FUNCS_DECL(OP, 0, 3, OPT) \ +QPEL_FUNCS_DECL(OP, 1, 0, OPT) \ +QPEL_FUNCS_DECL(OP, 1, 1, OPT) \ +QPEL_FUNCS_DECL(OP, 1, 2, OPT) \ +QPEL_FUNCS_DECL(OP, 1, 3, OPT) \ +QPEL_FUNCS_DECL(OP, 2, 1, OPT) \ +QPEL_FUNCS_DECL(OP, 2, 2, OPT) \ +QPEL_FUNCS_DECL(OP, 2, 3, OPT) \ +QPEL_FUNCS_DECL(OP, 3, 0, OPT) \ +QPEL_FUNCS_DECL(OP, 3, 1, OPT) \ +QPEL_FUNCS_DECL(OP, 3, 2, OPT) +/** @} */ + +#define LOOPSIZE 8 +#define HCOFF(x) (32 * (x - 1)) +#define VCOFF(x) (32 * (x - 1)) +QPEL_MC_DECL(put_, _ssse3) +QPEL_MC_DECL(avg_, _ssse3) + +#undef LOOPSIZE +#undef HCOFF +#undef VCOFF +#define LOOPSIZE 8 +#define HCOFF(x) (64 * (x - 1)) +#define VCOFF(x) (64 * (x - 1)) +QPEL_MC_DECL(put_, _sse2) +QPEL_MC_DECL(avg_, _sse2) + +#if ARCH_X86_32 +#undef LOOPSIZE +#undef HCOFF +#undef VCOFF +#define LOOPSIZE 4 +#define HCOFF(x) (64 * (x - 1)) +#define VCOFF(x) (64 * (x - 1)) + +QPEL_MC_DECL(put_, _mmx) + +#define ff_put_rv40_qpel_h_mmx2 ff_put_rv40_qpel_h_mmx +#define ff_put_rv40_qpel_v_mmx2 ff_put_rv40_qpel_v_mmx +QPEL_MC_DECL(avg_, _mmx2) + +#define ff_put_rv40_qpel_h_3dnow ff_put_rv40_qpel_h_mmx +#define ff_put_rv40_qpel_v_3dnow ff_put_rv40_qpel_v_mmx +QPEL_MC_DECL(avg_, _3dnow) +#endif + +/** @{ */ +/** Set one function */ +#define QPEL_FUNC_SET(OP, SIZE, PH, PV, OPT) \ + c-> OP ## pixels_tab[2 - SIZE / 8][4 * PV + PH] = OP ## rv40_qpel ##SIZE ## _mc ##PH ##PV ##OPT; + +/** Set functions put and avg for sizes 8 and 16 and a given qpel position */ +#define QPEL_FUNCS_SET(OP, PH, PV, OPT) \ + QPEL_FUNC_SET(OP, 8, PH, PV, OPT) \ + QPEL_FUNC_SET(OP, 16, PH, PV, OPT) + +/** Set all functions for all sizes and qpel positions */ +#define QPEL_MC_SET(OP, OPT) \ +QPEL_FUNCS_SET (OP, 0, 1, OPT) \ +QPEL_FUNCS_SET (OP, 0, 3, OPT) \ +QPEL_FUNCS_SET (OP, 1, 0, OPT) \ +QPEL_FUNCS_SET (OP, 1, 1, OPT) \ +QPEL_FUNCS_SET (OP, 1, 2, OPT) \ +QPEL_FUNCS_SET (OP, 1, 3, OPT) \ +QPEL_FUNCS_SET (OP, 2, 1, OPT) \ +QPEL_FUNCS_SET (OP, 2, 2, OPT) \ +QPEL_FUNCS_SET (OP, 2, 3, OPT) \ +QPEL_FUNCS_SET (OP, 3, 0, OPT) \ +QPEL_FUNCS_SET (OP, 3, 1, OPT) \ +QPEL_FUNCS_SET (OP, 3, 2, OPT) +/** @} */ + void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) { #if HAVE_YASM @@ -65,25 +194,42 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_mmx; c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_mmx; c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_mmx; + c->put_pixels_tab[0][15] = ff_put_rv40_qpel16_mc33_mmx; + c->put_pixels_tab[1][15] = ff_put_rv40_qpel8_mc33_mmx; + c->avg_pixels_tab[0][15] = ff_avg_rv40_qpel16_mc33_mmx; + c->avg_pixels_tab[1][15] = ff_avg_rv40_qpel8_mc33_mmx; +#if ARCH_X86_32 + QPEL_MC_SET(put_, _mmx) +#endif } if (mm_flags & AV_CPU_FLAG_MMX2) { c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmx2; c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmx2; +#if ARCH_X86_32 + QPEL_MC_SET(avg_, _mmx2) +#endif } else if (mm_flags & AV_CPU_FLAG_3DNOW) { c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_3dnow; c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_3dnow; +#if ARCH_X86_32 + QPEL_MC_SET(avg_, _3dnow) +#endif } if (mm_flags & AV_CPU_FLAG_SSE2) { c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_sse2; c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_sse2; c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_sse2; c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_sse2; + QPEL_MC_SET(put_, _sse2) + QPEL_MC_SET(avg_, _sse2) } if (mm_flags & AV_CPU_FLAG_SSSE3) { c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_ssse3; c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_ssse3; c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_ssse3; c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_ssse3; + QPEL_MC_SET(put_, _ssse3) + QPEL_MC_SET(avg_, _ssse3) } #endif }