mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
Merge remote-tracking branch 'qatar/master'
* qatar/master: libx264: fix indentation. vorbis: fix overflows in floor1[] vector and inverse db table index. win64: add a XMM clobber test configure option. movdec: Parse the dvc1 atom ARM: ac3: fix ac3_bit_alloc_calc_bap_armv6 swscale: K&R formatting cosmetics for Blackfin code frwu: lowercase the FRWU codec name movdec: fix dts generation in fragmented files fate: make acodec-ac3_fixed test output raw AC3 APIchanges: add missing commit hashes swscale: implement MMX, SSE2 and AVX functions for RGB32 input. ra144enc: drop pointless "encoder" from .long_name bethsoftvideo: fix palette reading. mpc7: use av_fast_padded_malloc() mpc7: simplify handling of packet sizes that are not a multiple of 4 bytes doc: decoding Forward Uncompressed is supported Fix a typo in the x86 asm version of ff_vector_clip_int32() pcmenc: Do not set avpkt->size. ff_alloc_packet: modify the size of the packet to match the requested size Conflicts: doc/APIchanges libavcodec/libx264.c libavcodec/mpc7.c libavformat/isom.h libswscale/Makefile libswscale/bfin/yuv2rgb_bfin.c tests/ref/fate/bethsoft-vid tests/ref/seek/ac3_ac3 Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
commit
d77294c5e4
16
configure
vendored
16
configure
vendored
@ -276,6 +276,8 @@ Developer options (useful when working on FFmpeg itself):
|
|||||||
Cannot be combined with --target-exec
|
Cannot be combined with --target-exec
|
||||||
--samples=PATH location of test samples for FATE, if not set use
|
--samples=PATH location of test samples for FATE, if not set use
|
||||||
\$FATE_SAMPLES at make invocation time.
|
\$FATE_SAMPLES at make invocation time.
|
||||||
|
--enable-xmm-clobber-test check XMM registers for clobbering (Win64-only;
|
||||||
|
should be used only for debugging purposes)
|
||||||
|
|
||||||
NOTE: Object files are built at the place where configure is launched.
|
NOTE: Object files are built at the place where configure is launched.
|
||||||
EOF
|
EOF
|
||||||
@ -1085,6 +1087,7 @@ CONFIG_LIST="
|
|||||||
vda
|
vda
|
||||||
vdpau
|
vdpau
|
||||||
version3
|
version3
|
||||||
|
xmm_clobber_test
|
||||||
x11grab
|
x11grab
|
||||||
zlib
|
zlib
|
||||||
"
|
"
|
||||||
@ -1779,7 +1782,7 @@ test_deps _muxer _demuxer \
|
|||||||
wav \
|
wav \
|
||||||
yuv4mpegpipe=yuv4mpeg \
|
yuv4mpegpipe=yuv4mpeg \
|
||||||
|
|
||||||
ac3_fixed_test_deps="ac3_fixed_encoder ac3_decoder rm_muxer rm_demuxer"
|
ac3_fixed_test_deps="ac3_fixed_encoder ac3_decoder"
|
||||||
mpg_test_deps="mpeg1system_muxer mpegps_demuxer"
|
mpg_test_deps="mpeg1system_muxer mpegps_demuxer"
|
||||||
|
|
||||||
# default parameters
|
# default parameters
|
||||||
@ -3304,6 +3307,17 @@ check_ldflags -Wl,--warn-common
|
|||||||
check_ldflags -Wl,-rpath-link=libpostproc:libswresample:libswscale:libavfilter:libavdevice:libavformat:libavcodec:libavutil
|
check_ldflags -Wl,-rpath-link=libpostproc:libswresample:libswscale:libavfilter:libavdevice:libavformat:libavcodec:libavutil
|
||||||
test_ldflags -Wl,-Bsymbolic && append SHFLAGS -Wl,-Bsymbolic
|
test_ldflags -Wl,-Bsymbolic && append SHFLAGS -Wl,-Bsymbolic
|
||||||
|
|
||||||
|
enabled xmm_clobber_test && \
|
||||||
|
check_ldflags -Wl,--wrap,avcodec_open2 \
|
||||||
|
-Wl,--wrap,avcodec_decode_audio4 \
|
||||||
|
-Wl,--wrap,avcodec_decode_video2 \
|
||||||
|
-Wl,--wrap,avcodec_decode_subtitle2 \
|
||||||
|
-Wl,--wrap,avcodec_encode_audio2 \
|
||||||
|
-Wl,--wrap,avcodec_encode_video \
|
||||||
|
-Wl,--wrap,avcodec_encode_subtitle \
|
||||||
|
-Wl,--wrap,sws_scale || \
|
||||||
|
disable xmm_clobber_test
|
||||||
|
|
||||||
echo "X{};" > $TMPV
|
echo "X{};" > $TMPV
|
||||||
if test_ldflags -Wl,--version-script,$TMPV; then
|
if test_ldflags -Wl,--version-script,$TMPV; then
|
||||||
append SHFLAGS '-Wl,--version-script,\$(SUBDIR)lib\$(NAME).ver'
|
append SHFLAGS '-Wl,--version-script,\$(SUBDIR)lib\$(NAME).ver'
|
||||||
|
@ -19,18 +19,18 @@ API changes, most recent first:
|
|||||||
2012-01-24 - xxxxxxx - lavfi 2.60.100
|
2012-01-24 - xxxxxxx - lavfi 2.60.100
|
||||||
Add avfilter_graph_dump.
|
Add avfilter_graph_dump.
|
||||||
|
|
||||||
2012-02-01 - xxxxxxx - lavc 54.01.0
|
2012-02-01 - 316fc74 - lavc 54.01.0
|
||||||
Add av_fast_padded_malloc() as alternative for av_realloc() when aligned
|
Add av_fast_padded_malloc() as alternative for av_realloc() when aligned
|
||||||
memory is required. The buffer will always have FF_INPUT_BUFFER_PADDING_SIZE
|
memory is required. The buffer will always have FF_INPUT_BUFFER_PADDING_SIZE
|
||||||
zero-padded bytes at the end.
|
zero-padded bytes at the end.
|
||||||
|
|
||||||
2012-01-31 - xxxxxxx - lavf 54.01.0
|
2012-01-31 - dd6d3b0 - lavf 54.01.0
|
||||||
Add avformat_get_riff_video_tags() and avformat_get_riff_audio_tags().
|
Add avformat_get_riff_video_tags() and avformat_get_riff_audio_tags().
|
||||||
|
|
||||||
2012-01-31 - xxxxxxx - lavc 54.01.0
|
2012-01-31 - af08d9a - lavc 54.01.0
|
||||||
Add avcodec_is_open() function.
|
Add avcodec_is_open() function.
|
||||||
|
|
||||||
2012-01-30 - xxxxxxx - lavu 51.22.0 - intfloat.h
|
2012-01-30 - 8b93312 - lavu 51.22.0 - intfloat.h
|
||||||
Add a new installed header libavutil/intfloat.h with int/float punning
|
Add a new installed header libavutil/intfloat.h with int/float punning
|
||||||
functions.
|
functions.
|
||||||
|
|
||||||
|
@ -497,6 +497,7 @@ following image formats are supported:
|
|||||||
@item Flash Screen Video v2 @tab X @tab X
|
@item Flash Screen Video v2 @tab X @tab X
|
||||||
@item Flash Video (FLV) @tab X @tab X
|
@item Flash Video (FLV) @tab X @tab X
|
||||||
@tab Sorenson H.263 used in Flash
|
@tab Sorenson H.263 used in Flash
|
||||||
|
@item Forward Uncompressed @tab @tab X
|
||||||
@item Fraps @tab @tab X
|
@item Fraps @tab @tab X
|
||||||
@item H.261 @tab X @tab X
|
@item H.261 @tab X @tab X
|
||||||
@item H.263 / H.263-1996 @tab X @tab X
|
@item H.263 / H.263-1996 @tab X @tab X
|
||||||
|
@ -34,24 +34,23 @@ function ff_ac3_bit_alloc_calc_bap_armv6, export=1
|
|||||||
add r0, r0, r4, lsl #1 @ mask + band
|
add r0, r0, r4, lsl #1 @ mask + band
|
||||||
add r4, lr, r4
|
add r4, lr, r4
|
||||||
add r7, r7, r2 @ bap + start
|
add r7, r7, r2 @ bap + start
|
||||||
ldrb r10, [r4], #1
|
|
||||||
1:
|
1:
|
||||||
ldrsh r9, [r0], #2 @ mask[band]
|
ldrsh r9, [r0], #2 @ mask[band]
|
||||||
mov r8, #0xff0
|
mov r8, #0xff0
|
||||||
sub r9, r9, r12 @ - snr_offset
|
sub r9, r9, r12 @ - snr_offset
|
||||||
mov r11, r10
|
ldrb r10, [r4, #1]! @ band_start_tab[++band]
|
||||||
ldrb r10, [r4], #1 @ band_start_tab[band++]
|
|
||||||
subs r9, r9, r5 @ - floor
|
subs r9, r9, r5 @ - floor
|
||||||
it lt
|
it lt
|
||||||
movlt r9, #0
|
movlt r9, #0
|
||||||
cmp r10, r3 @ - end
|
cmp r10, r3 @ - end
|
||||||
and r9, r9, r8, lsl #1 @ & 0x1fe0
|
and r9, r9, r8, lsl #1 @ & 0x1fe0
|
||||||
ite gt
|
ite gt
|
||||||
subgt r8, r3, r11
|
subgt r8, r3, r2
|
||||||
suble r8, r10, r11
|
suble r8, r10, r2
|
||||||
|
mov r2, r10
|
||||||
add r9, r9, r5 @ + floor => m
|
add r9, r9, r5 @ + floor => m
|
||||||
tst r8, #1
|
tst r8, #1
|
||||||
add r2, r7, r8
|
add r11, r7, r8
|
||||||
bne 3f
|
bne 3f
|
||||||
b 5f
|
b 5f
|
||||||
2:
|
2:
|
||||||
@ -65,9 +64,9 @@ function ff_ac3_bit_alloc_calc_bap_armv6, export=1
|
|||||||
ldrb lr, [r6, lr]
|
ldrb lr, [r6, lr]
|
||||||
strb r8, [r7], #1 @ bap[bin]
|
strb r8, [r7], #1 @ bap[bin]
|
||||||
strb lr, [r7], #1
|
strb lr, [r7], #1
|
||||||
5: cmp r7, r2
|
5: cmp r7, r11
|
||||||
blo 2b
|
blo 2b
|
||||||
cmp r3, r11
|
cmp r3, r10
|
||||||
bgt 1b
|
bgt 1b
|
||||||
pop {r4-r11,pc}
|
pop {r4-r11,pc}
|
||||||
3:
|
3:
|
||||||
|
@ -61,7 +61,7 @@ static int set_palette(BethsoftvidContext *ctx)
|
|||||||
palette[a] |= palette[a] >> 6 & 0x30303;
|
palette[a] |= palette[a] >> 6 & 0x30303;
|
||||||
}
|
}
|
||||||
ctx->frame.palette_has_changed = 1;
|
ctx->frame.palette_has_changed = 1;
|
||||||
return 256*3;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bethsoftvid_decode_frame(AVCodecContext *avctx,
|
static int bethsoftvid_decode_frame(AVCodecContext *avctx,
|
||||||
@ -88,7 +88,13 @@ static int bethsoftvid_decode_frame(AVCodecContext *avctx,
|
|||||||
|
|
||||||
switch(block_type = bytestream2_get_byte(&vid->g)){
|
switch(block_type = bytestream2_get_byte(&vid->g)){
|
||||||
case PALETTE_BLOCK: {
|
case PALETTE_BLOCK: {
|
||||||
return set_palette(vid);
|
int ret;
|
||||||
|
*data_size = 0;
|
||||||
|
if ((ret = set_palette(vid)) < 0) {
|
||||||
|
av_log(avctx, AV_LOG_ERROR, "error reading palette\n");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
return bytestream2_tell(&vid->g);
|
||||||
}
|
}
|
||||||
case VIDEO_YOFF_P_FRAME:
|
case VIDEO_YOFF_P_FRAME:
|
||||||
yoffset = bytestream2_get_le16(&vid->g);
|
yoffset = bytestream2_get_le16(&vid->g);
|
||||||
|
@ -130,6 +130,7 @@ int avpriv_unlock_avformat(void);
|
|||||||
* If avpkt->data is already set, avpkt->size is checked
|
* If avpkt->data is already set, avpkt->size is checked
|
||||||
* to ensure it is large enough.
|
* to ensure it is large enough.
|
||||||
* If avpkt->data is NULL, a new buffer is allocated.
|
* If avpkt->data is NULL, a new buffer is allocated.
|
||||||
|
* avpkt->size is set to the specified size.
|
||||||
* All other AVPacket fields will be reset with av_init_packet().
|
* All other AVPacket fields will be reset with av_init_packet().
|
||||||
* @param size the minimum required packet size
|
* @param size the minimum required packet size
|
||||||
* @return 0 on success, negative error code on failure
|
* @return 0 on success, negative error code on failure
|
||||||
|
@ -188,12 +188,12 @@ static int X264_frame(AVCodecContext *ctx, uint8_t *buf,
|
|||||||
|
|
||||||
do {
|
do {
|
||||||
bufsize = orig_bufsize;
|
bufsize = orig_bufsize;
|
||||||
if (x264_encoder_encode(x4->enc, &nal, &nnal, frame? &x4->pic: NULL, &pic_out) < 0)
|
if (x264_encoder_encode(x4->enc, &nal, &nnal, frame? &x4->pic: NULL, &pic_out) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
bufsize = encode_nals(ctx, buf, bufsize, nal, nnal, 0);
|
bufsize = encode_nals(ctx, buf, bufsize, nal, nnal, 0);
|
||||||
if (bufsize < 0)
|
if (bufsize < 0)
|
||||||
return -1;
|
return -1;
|
||||||
} while (!bufsize && !frame && x264_encoder_delayed_frames(x4->enc));
|
} while (!bufsize && !frame && x264_encoder_delayed_frames(x4->enc));
|
||||||
|
|
||||||
/* FIXME: libx264 now provides DTS, but AVFrame doesn't have a field for it. */
|
/* FIXME: libx264 now provides DTS, but AVFrame doesn't have a field for it. */
|
||||||
|
@ -66,8 +66,6 @@ typedef struct {
|
|||||||
int buf_size;
|
int buf_size;
|
||||||
AVLFG rnd;
|
AVLFG rnd;
|
||||||
int frames_to_skip;
|
int frames_to_skip;
|
||||||
uint8_t *buffer;
|
|
||||||
int buffer_size;
|
|
||||||
/* for synthesis */
|
/* for synthesis */
|
||||||
DECLARE_ALIGNED(16, MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512*2];
|
DECLARE_ALIGNED(16, MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512*2];
|
||||||
int synth_buf_offset[MPA_MAX_CHANNELS];
|
int synth_buf_offset[MPA_MAX_CHANNELS];
|
||||||
|
@ -200,34 +200,46 @@ static int mpc7_decode_frame(AVCodecContext * avctx, void *data,
|
|||||||
int *got_frame_ptr, AVPacket *avpkt)
|
int *got_frame_ptr, AVPacket *avpkt)
|
||||||
{
|
{
|
||||||
const uint8_t *buf = avpkt->data;
|
const uint8_t *buf = avpkt->data;
|
||||||
int buf_size = avpkt->size;
|
int buf_size;
|
||||||
MPCContext *c = avctx->priv_data;
|
MPCContext *c = avctx->priv_data;
|
||||||
GetBitContext gb;
|
GetBitContext gb;
|
||||||
int i, ch;
|
int i, ch;
|
||||||
int mb = -1;
|
int mb = -1;
|
||||||
Band *bands = c->bands;
|
Band *bands = c->bands;
|
||||||
int off, ret;
|
int off, ret, last_frame, skip;
|
||||||
int bits_used, bits_avail;
|
int bits_used, bits_avail;
|
||||||
|
|
||||||
memset(bands, 0, sizeof(*bands) * (c->maxbands + 1));
|
memset(bands, 0, sizeof(*bands) * (c->maxbands + 1));
|
||||||
if(buf_size <= 4){
|
|
||||||
av_log(avctx, AV_LOG_ERROR, "Too small buffer passed (%i bytes)\n", buf_size);
|
buf_size = avpkt->size & ~3;
|
||||||
return AVERROR(EINVAL);
|
if (buf_size <= 0) {
|
||||||
|
av_log(avctx, AV_LOG_ERROR, "packet size is too small (%i bytes)\n",
|
||||||
|
avpkt->size);
|
||||||
|
return AVERROR_INVALIDDATA;
|
||||||
|
}
|
||||||
|
if (buf_size != avpkt->size) {
|
||||||
|
av_log(avctx, AV_LOG_WARNING, "packet size is not a multiple of 4. "
|
||||||
|
"extra bytes at the end will be skipped.\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
skip = buf[0];
|
||||||
|
last_frame = buf[1];
|
||||||
|
buf += 4;
|
||||||
|
buf_size -= 4;
|
||||||
|
|
||||||
/* get output buffer */
|
/* get output buffer */
|
||||||
c->frame.nb_samples = buf[1] ? c->lastframelen : MPC_FRAME_SIZE;
|
c->frame.nb_samples = last_frame ? c->lastframelen : MPC_FRAME_SIZE;
|
||||||
if ((ret = avctx->get_buffer(avctx, &c->frame)) < 0) {
|
if ((ret = avctx->get_buffer(avctx, &c->frame)) < 0) {
|
||||||
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
|
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
av_fast_padded_malloc(&c->buffer, &c->buffer_size, FFALIGN(buf_size - 1, 4));
|
av_fast_padded_malloc(&c->bits, &c->buf_size, buf_size);
|
||||||
if (!c->buffer)
|
if (!c->bits)
|
||||||
return AVERROR(ENOMEM);
|
return AVERROR(ENOMEM);
|
||||||
c->dsp.bswap_buf((uint32_t*)c->buffer, (const uint32_t*)(buf + 4), (buf_size - 4) >> 2);
|
c->dsp.bswap_buf((uint32_t *)c->bits, (const uint32_t *)buf, buf_size >> 2);
|
||||||
init_get_bits(&gb, c->buffer, (buf_size - 4)* 8);
|
init_get_bits(&gb, c->bits, buf_size * 8);
|
||||||
skip_bits_long(&gb, buf[0]);
|
skip_bits_long(&gb, skip);
|
||||||
|
|
||||||
/* read subband indexes */
|
/* read subband indexes */
|
||||||
for(i = 0; i <= c->maxbands; i++){
|
for(i = 0; i <= c->maxbands; i++){
|
||||||
@ -284,21 +296,21 @@ static int mpc7_decode_frame(AVCodecContext * avctx, void *data,
|
|||||||
ff_mpc_dequantize_and_synth(c, mb, c->frame.data[0], 2);
|
ff_mpc_dequantize_and_synth(c, mb, c->frame.data[0], 2);
|
||||||
|
|
||||||
bits_used = get_bits_count(&gb);
|
bits_used = get_bits_count(&gb);
|
||||||
bits_avail = (buf_size - 4) * 8;
|
bits_avail = buf_size * 8;
|
||||||
if(!buf[1] && ((bits_avail < bits_used) || (bits_used + 32 <= bits_avail))){
|
if (!last_frame && ((bits_avail < bits_used) || (bits_used + 32 <= bits_avail))) {
|
||||||
av_log(NULL,0, "Error decoding frame: used %i of %i bits\n", bits_used, bits_avail);
|
av_log(NULL,0, "Error decoding frame: used %i of %i bits\n", bits_used, bits_avail);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if(c->frames_to_skip){
|
if(c->frames_to_skip){
|
||||||
c->frames_to_skip--;
|
c->frames_to_skip--;
|
||||||
*got_frame_ptr = 0;
|
*got_frame_ptr = 0;
|
||||||
return buf_size;
|
return avpkt->size;
|
||||||
}
|
}
|
||||||
|
|
||||||
*got_frame_ptr = 1;
|
*got_frame_ptr = 1;
|
||||||
*(AVFrame *)data = c->frame;
|
*(AVFrame *)data = c->frame;
|
||||||
|
|
||||||
return buf_size;
|
return avpkt->size;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mpc7_decode_flush(AVCodecContext *avctx)
|
static void mpc7_decode_flush(AVCodecContext *avctx)
|
||||||
@ -312,8 +324,8 @@ static void mpc7_decode_flush(AVCodecContext *avctx)
|
|||||||
static av_cold int mpc7_decode_close(AVCodecContext *avctx)
|
static av_cold int mpc7_decode_close(AVCodecContext *avctx)
|
||||||
{
|
{
|
||||||
MPCContext *c = avctx->priv_data;
|
MPCContext *c = avctx->priv_data;
|
||||||
av_freep(&c->buffer);
|
av_freep(&c->bits);
|
||||||
c->buffer_size = 0;
|
c->buf_size = 0;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -194,7 +194,6 @@ static int pcm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
avpkt->size = frame->nb_samples * avctx->channels * sample_size;
|
|
||||||
*got_packet_ptr = 1;
|
*got_packet_ptr = 1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -521,5 +521,5 @@ AVCodec ff_ra_144_encoder = {
|
|||||||
.close = ra144_encode_close,
|
.close = ra144_encode_close,
|
||||||
.sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
|
.sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
|
||||||
AV_SAMPLE_FMT_NONE },
|
AV_SAMPLE_FMT_NONE },
|
||||||
.long_name = NULL_IF_CONFIG_SMALL("RealAudio 1.0 (14.4K) encoder"),
|
.long_name = NULL_IF_CONFIG_SMALL("RealAudio 1.0 (14.4K)"),
|
||||||
};
|
};
|
||||||
|
@ -919,16 +919,14 @@ int ff_alloc_packet(AVPacket *avpkt, int size)
|
|||||||
|
|
||||||
if (avpkt->data) {
|
if (avpkt->data) {
|
||||||
uint8_t *pkt_data;
|
uint8_t *pkt_data;
|
||||||
int pkt_size;
|
|
||||||
|
|
||||||
if (avpkt->size < size)
|
if (avpkt->size < size)
|
||||||
return AVERROR(EINVAL);
|
return AVERROR(EINVAL);
|
||||||
|
|
||||||
pkt_data = avpkt->data;
|
pkt_data = avpkt->data;
|
||||||
pkt_size = avpkt->size;
|
|
||||||
av_init_packet(avpkt);
|
av_init_packet(avpkt);
|
||||||
avpkt->data = pkt_data;
|
avpkt->data = pkt_data;
|
||||||
avpkt->size = pkt_size;
|
avpkt->size = size;
|
||||||
return 0;
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
return av_new_packet(avpkt, size);
|
return av_new_packet(avpkt, size);
|
||||||
|
@ -156,7 +156,7 @@ void ff_vorbis_ready_floor1_list(vorbis_floor1_entry * list, int values)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void render_line_unrolled(intptr_t x, uint8_t y, int x1,
|
static inline void render_line_unrolled(intptr_t x, int y, int x1,
|
||||||
intptr_t sy, int ady, int adx,
|
intptr_t sy, int ady, int adx,
|
||||||
float *buf)
|
float *buf)
|
||||||
{
|
{
|
||||||
@ -168,30 +168,30 @@ static inline void render_line_unrolled(intptr_t x, uint8_t y, int x1,
|
|||||||
if (err >= 0) {
|
if (err >= 0) {
|
||||||
err += ady - adx;
|
err += ady - adx;
|
||||||
y += sy;
|
y += sy;
|
||||||
buf[x++] = ff_vorbis_floor1_inverse_db_table[y];
|
buf[x++] = ff_vorbis_floor1_inverse_db_table[av_clip_uint8(y)];
|
||||||
}
|
}
|
||||||
buf[x] = ff_vorbis_floor1_inverse_db_table[y];
|
buf[x] = ff_vorbis_floor1_inverse_db_table[av_clip_uint8(y)];
|
||||||
}
|
}
|
||||||
if (x <= 0) {
|
if (x <= 0) {
|
||||||
if (err + ady >= 0)
|
if (err + ady >= 0)
|
||||||
y += sy;
|
y += sy;
|
||||||
buf[x] = ff_vorbis_floor1_inverse_db_table[y];
|
buf[x] = ff_vorbis_floor1_inverse_db_table[av_clip_uint8(y)];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void render_line(int x0, uint8_t y0, int x1, int y1, float *buf)
|
static void render_line(int x0, int y0, int x1, int y1, float *buf)
|
||||||
{
|
{
|
||||||
int dy = y1 - y0;
|
int dy = y1 - y0;
|
||||||
int adx = x1 - x0;
|
int adx = x1 - x0;
|
||||||
int ady = FFABS(dy);
|
int ady = FFABS(dy);
|
||||||
int sy = dy < 0 ? -1 : 1;
|
int sy = dy < 0 ? -1 : 1;
|
||||||
buf[x0] = ff_vorbis_floor1_inverse_db_table[y0];
|
buf[x0] = ff_vorbis_floor1_inverse_db_table[av_clip_uint8(y0)];
|
||||||
if (ady*2 <= adx) { // optimized common case
|
if (ady*2 <= adx) { // optimized common case
|
||||||
render_line_unrolled(x0, y0, x1, sy, ady, adx, buf);
|
render_line_unrolled(x0, y0, x1, sy, ady, adx, buf);
|
||||||
} else {
|
} else {
|
||||||
int base = dy / adx;
|
int base = dy / adx;
|
||||||
int x = x0;
|
int x = x0;
|
||||||
uint8_t y = y0;
|
int y = y0;
|
||||||
int err = -adx;
|
int err = -adx;
|
||||||
ady -= FFABS(base) * adx;
|
ady -= FFABS(base) * adx;
|
||||||
while (++x < x1) {
|
while (++x < x1) {
|
||||||
@ -201,7 +201,7 @@ static void render_line(int x0, uint8_t y0, int x1, int y1, float *buf)
|
|||||||
err -= adx;
|
err -= adx;
|
||||||
y += sy;
|
y += sy;
|
||||||
}
|
}
|
||||||
buf[x] = ff_vorbis_floor1_inverse_db_table[y];
|
buf[x] = ff_vorbis_floor1_inverse_db_table[av_clip_uint8(y)];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -210,8 +210,7 @@ void ff_vorbis_floor1_render_list(vorbis_floor1_entry * list, int values,
|
|||||||
uint16_t *y_list, int *flag,
|
uint16_t *y_list, int *flag,
|
||||||
int multiplier, float *out, int samples)
|
int multiplier, float *out, int samples)
|
||||||
{
|
{
|
||||||
int lx, i;
|
int lx, ly, i;
|
||||||
uint8_t ly;
|
|
||||||
lx = 0;
|
lx = 0;
|
||||||
ly = y_list[0] * multiplier;
|
ly = y_list[0] * multiplier;
|
||||||
for (i = 1; i < values; i++) {
|
for (i = 1; i < values; i++) {
|
||||||
|
@ -1256,20 +1256,20 @@ static int vorbis_floor1_decode(vorbis_context *vc,
|
|||||||
floor1_flag[i] = 1;
|
floor1_flag[i] = 1;
|
||||||
if (val >= room) {
|
if (val >= room) {
|
||||||
if (highroom > lowroom) {
|
if (highroom > lowroom) {
|
||||||
floor1_Y_final[i] = val - lowroom + predicted;
|
floor1_Y_final[i] = av_clip_uint16(val - lowroom + predicted);
|
||||||
} else {
|
} else {
|
||||||
floor1_Y_final[i] = predicted - val + highroom - 1;
|
floor1_Y_final[i] = av_clip_uint16(predicted - val + highroom - 1);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (val & 1) {
|
if (val & 1) {
|
||||||
floor1_Y_final[i] = predicted - (val + 1) / 2;
|
floor1_Y_final[i] = av_clip_uint16(predicted - (val + 1) / 2);
|
||||||
} else {
|
} else {
|
||||||
floor1_Y_final[i] = predicted + val / 2;
|
floor1_Y_final[i] = av_clip_uint16(predicted + val / 2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
floor1_flag[i] = 0;
|
floor1_flag[i] = 0;
|
||||||
floor1_Y_final[i] = predicted;
|
floor1_Y_final[i] = av_clip_uint16(predicted);
|
||||||
}
|
}
|
||||||
|
|
||||||
av_dlog(NULL, " Decoded floor(%d) = %u / val %u\n",
|
av_dlog(NULL, " Decoded floor(%d) = %u / val %u\n",
|
||||||
|
@ -83,3 +83,4 @@ OBJS-$(HAVE_MMX) += x86/dsputil_mmx.o \
|
|||||||
x86/mpegvideo_mmx.o \
|
x86/mpegvideo_mmx.o \
|
||||||
x86/simple_idct_mmx.o \
|
x86/simple_idct_mmx.o \
|
||||||
|
|
||||||
|
OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
|
||||||
|
@ -1063,7 +1063,7 @@ emu_edge mmx
|
|||||||
; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2)
|
; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2)
|
||||||
; %5 = suffix
|
; %5 = suffix
|
||||||
%macro VECTOR_CLIP_INT32 4-5
|
%macro VECTOR_CLIP_INT32 4-5
|
||||||
cglobal vector_clip_int32%5, 5,5,%2, dst, src, min, max, len
|
cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
|
||||||
%if %4
|
%if %4
|
||||||
cvtsi2ss m4, minm
|
cvtsi2ss m4, minm
|
||||||
cvtsi2ss m5, maxm
|
cvtsi2ss m5, maxm
|
||||||
|
80
libavcodec/x86/w64xmmtest.c
Normal file
80
libavcodec/x86/w64xmmtest.c
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
/*
|
||||||
|
* check XMM registers for clobbers on Win64
|
||||||
|
* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com>
|
||||||
|
*
|
||||||
|
* This file is part of Libav.
|
||||||
|
*
|
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Libav is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "libavcodec/avcodec.h"
|
||||||
|
#include "libavutil/x86/w64xmmtest.h"
|
||||||
|
|
||||||
|
wrap(avcodec_open2(AVCodecContext *avctx,
|
||||||
|
AVCodec *codec,
|
||||||
|
AVDictionary **options))
|
||||||
|
{
|
||||||
|
testxmmclobbers(avcodec_open2, avctx, codec, options);
|
||||||
|
}
|
||||||
|
|
||||||
|
wrap(avcodec_decode_audio4(AVCodecContext *avctx,
|
||||||
|
AVFrame *frame,
|
||||||
|
int *got_frame_ptr,
|
||||||
|
AVPacket *avpkt))
|
||||||
|
{
|
||||||
|
testxmmclobbers(avcodec_decode_audio4, avctx, frame,
|
||||||
|
got_frame_ptr, avpkt);
|
||||||
|
}
|
||||||
|
|
||||||
|
wrap(avcodec_decode_video2(AVCodecContext *avctx,
|
||||||
|
AVFrame *picture,
|
||||||
|
int *got_picture_ptr,
|
||||||
|
AVPacket *avpkt))
|
||||||
|
{
|
||||||
|
testxmmclobbers(avcodec_decode_video2, avctx, picture,
|
||||||
|
got_picture_ptr, avpkt);
|
||||||
|
}
|
||||||
|
|
||||||
|
wrap(avcodec_decode_subtitle2(AVCodecContext *avctx,
|
||||||
|
AVSubtitle *sub,
|
||||||
|
int *got_sub_ptr,
|
||||||
|
AVPacket *avpkt))
|
||||||
|
{
|
||||||
|
testxmmclobbers(avcodec_decode_subtitle2, avctx, sub,
|
||||||
|
got_sub_ptr, avpkt);
|
||||||
|
}
|
||||||
|
|
||||||
|
wrap(avcodec_encode_audio2(AVCodecContext *avctx,
|
||||||
|
AVPacket *avpkt,
|
||||||
|
const AVFrame *frame,
|
||||||
|
int *got_packet_ptr))
|
||||||
|
{
|
||||||
|
testxmmclobbers(avcodec_encode_audio2, avctx, avpkt, frame,
|
||||||
|
got_packet_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
wrap(avcodec_encode_video(AVCodecContext *avctx,
|
||||||
|
uint8_t *buf, int buf_size,
|
||||||
|
const AVFrame *pict))
|
||||||
|
{
|
||||||
|
testxmmclobbers(avcodec_encode_video, avctx, buf, buf_size, pict);
|
||||||
|
}
|
||||||
|
|
||||||
|
wrap(avcodec_encode_subtitle(AVCodecContext *avctx,
|
||||||
|
uint8_t *buf, int buf_size,
|
||||||
|
const AVSubtitle *sub))
|
||||||
|
{
|
||||||
|
testxmmclobbers(avcodec_encode_subtitle, avctx, buf, buf_size, sub);
|
||||||
|
}
|
@ -129,6 +129,7 @@ typedef struct MOVStreamContext {
|
|||||||
int has_palette;
|
int has_palette;
|
||||||
int64_t data_size;
|
int64_t data_size;
|
||||||
uint32_t tmcd_flags; ///< tmcd track flags
|
uint32_t tmcd_flags; ///< tmcd track flags
|
||||||
|
int64_t track_end; ///< used for dts generation in fragmented movie files
|
||||||
} MOVStreamContext;
|
} MOVStreamContext;
|
||||||
|
|
||||||
typedef struct MOVContext {
|
typedef struct MOVContext {
|
||||||
|
@ -1012,6 +1012,32 @@ static int mov_read_glbl(MOVContext *c, AVIOContext *pb, MOVAtom atom)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int mov_read_dvc1(MOVContext *c, AVIOContext *pb, MOVAtom atom)
|
||||||
|
{
|
||||||
|
AVStream *st;
|
||||||
|
uint8_t profile_level;
|
||||||
|
|
||||||
|
if (c->fc->nb_streams < 1)
|
||||||
|
return 0;
|
||||||
|
st = c->fc->streams[c->fc->nb_streams-1];
|
||||||
|
|
||||||
|
if (atom.size >= (1<<28) || atom.size < 7)
|
||||||
|
return AVERROR_INVALIDDATA;
|
||||||
|
|
||||||
|
profile_level = avio_r8(pb);
|
||||||
|
if (profile_level & 0xf0 != 0xc0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
av_free(st->codec->extradata);
|
||||||
|
st->codec->extradata = av_mallocz(atom.size - 7 + FF_INPUT_BUFFER_PADDING_SIZE);
|
||||||
|
if (!st->codec->extradata)
|
||||||
|
return AVERROR(ENOMEM);
|
||||||
|
st->codec->extradata_size = atom.size - 7;
|
||||||
|
avio_seek(pb, 6, SEEK_CUR);
|
||||||
|
avio_read(pb, st->codec->extradata, st->codec->extradata_size);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An strf atom is a BITMAPINFOHEADER struct. This struct is 40 bytes itself,
|
* An strf atom is a BITMAPINFOHEADER struct. This struct is 40 bytes itself,
|
||||||
* but can have extradata appended at the end after the 40 bytes belonging
|
* but can have extradata appended at the end after the 40 bytes belonging
|
||||||
@ -1706,6 +1732,7 @@ static int mov_read_stts(MOVContext *c, AVIOContext *pb, MOVAtom atom)
|
|||||||
st->nb_frames= total_sample_count;
|
st->nb_frames= total_sample_count;
|
||||||
if (duration)
|
if (duration)
|
||||||
st->duration= duration;
|
st->duration= duration;
|
||||||
|
sc->track_end = duration;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2326,7 +2353,7 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
|
|||||||
|
|
||||||
if (flags & 0x001) data_offset = avio_rb32(pb);
|
if (flags & 0x001) data_offset = avio_rb32(pb);
|
||||||
if (flags & 0x004) first_sample_flags = avio_rb32(pb);
|
if (flags & 0x004) first_sample_flags = avio_rb32(pb);
|
||||||
dts = st->duration - sc->time_offset;
|
dts = sc->track_end - sc->time_offset;
|
||||||
offset = frag->base_data_offset + data_offset;
|
offset = frag->base_data_offset + data_offset;
|
||||||
distance = 0;
|
distance = 0;
|
||||||
av_dlog(c->fc, "first sample flags 0x%x\n", first_sample_flags);
|
av_dlog(c->fc, "first sample flags 0x%x\n", first_sample_flags);
|
||||||
@ -2356,7 +2383,7 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
|
|||||||
sc->data_size += sample_size;
|
sc->data_size += sample_size;
|
||||||
}
|
}
|
||||||
frag->moof_offset = offset;
|
frag->moof_offset = offset;
|
||||||
st->duration = dts + sc->time_offset;
|
st->duration = sc->track_end = dts + sc->time_offset;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2538,6 +2565,7 @@ static const MOVParseTableEntry mov_default_parse_table[] = {
|
|||||||
{ MKTAG('w','f','e','x'), mov_read_wfex },
|
{ MKTAG('w','f','e','x'), mov_read_wfex },
|
||||||
{ MKTAG('c','m','o','v'), mov_read_cmov },
|
{ MKTAG('c','m','o','v'), mov_read_cmov },
|
||||||
{ MKTAG('c','h','a','n'), mov_read_chan }, /* channel layout */
|
{ MKTAG('c','h','a','n'), mov_read_chan }, /* channel layout */
|
||||||
|
{ MKTAG('d','v','c','1'), mov_read_dvc1 },
|
||||||
{ 0, NULL }
|
{ 0, NULL }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
71
libavutil/x86/w64xmmtest.h
Normal file
71
libavutil/x86/w64xmmtest.h
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
/*
|
||||||
|
* check XMM registers for clobbers on Win64
|
||||||
|
* Copyright (c) 2008 Ramiro Polla <ramiro.polla@gmail.com>
|
||||||
|
*
|
||||||
|
* This file is part of Libav.
|
||||||
|
*
|
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Libav is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdarg.h>
|
||||||
|
|
||||||
|
#include "libavutil/bswap.h"
|
||||||
|
|
||||||
|
#define storexmmregs(mem) \
|
||||||
|
__asm__ volatile( \
|
||||||
|
"movups %%xmm6 , 0x00(%0)\n\t" \
|
||||||
|
"movups %%xmm7 , 0x10(%0)\n\t" \
|
||||||
|
"movups %%xmm8 , 0x20(%0)\n\t" \
|
||||||
|
"movups %%xmm9 , 0x30(%0)\n\t" \
|
||||||
|
"movups %%xmm10, 0x40(%0)\n\t" \
|
||||||
|
"movups %%xmm11, 0x50(%0)\n\t" \
|
||||||
|
"movups %%xmm12, 0x60(%0)\n\t" \
|
||||||
|
"movups %%xmm13, 0x70(%0)\n\t" \
|
||||||
|
"movups %%xmm14, 0x80(%0)\n\t" \
|
||||||
|
"movups %%xmm15, 0x90(%0)\n\t" \
|
||||||
|
:: "r"(mem) : "memory")
|
||||||
|
|
||||||
|
#define testxmmclobbers(func, ctx, ...) \
|
||||||
|
uint64_t xmm[2][10][2]; \
|
||||||
|
int ret; \
|
||||||
|
storexmmregs(xmm[0]); \
|
||||||
|
ret = __real_ ## func(ctx, __VA_ARGS__); \
|
||||||
|
storexmmregs(xmm[1]); \
|
||||||
|
if (memcmp(xmm[0], xmm[1], sizeof(xmm[0]))) { \
|
||||||
|
int i; \
|
||||||
|
av_log(ctx, AV_LOG_ERROR, \
|
||||||
|
"XMM REGS CLOBBERED IN %s!\n", #func); \
|
||||||
|
for (i = 0; i < 10; i ++) \
|
||||||
|
if (xmm[0][i][0] != xmm[1][i][0] || \
|
||||||
|
xmm[0][i][1] != xmm[1][i][1]) { \
|
||||||
|
av_log(ctx, AV_LOG_ERROR, \
|
||||||
|
"xmm%-2d = %016"PRIx64"%016"PRIx64"\n", \
|
||||||
|
6 + i, av_bswap64(xmm[0][i][0]), \
|
||||||
|
av_bswap64(xmm[0][i][1])); \
|
||||||
|
av_log(ctx, AV_LOG_ERROR, \
|
||||||
|
" -> %016"PRIx64"%016"PRIx64"\n", \
|
||||||
|
av_bswap64(xmm[1][i][0]), \
|
||||||
|
av_bswap64(xmm[1][i][1])); \
|
||||||
|
} \
|
||||||
|
abort(); \
|
||||||
|
} \
|
||||||
|
return ret
|
||||||
|
|
||||||
|
#define wrap(func) \
|
||||||
|
int __real_ ## func; \
|
||||||
|
int __wrap_ ## func; \
|
||||||
|
int __wrap_ ## func
|
@ -25,6 +25,8 @@ MMX-OBJS-$(HAVE_YASM) += x86/input.o \
|
|||||||
|
|
||||||
$(SUBDIR)x86/swscale_mmx.o: CFLAGS += $(NOREDZONE_FLAGS)
|
$(SUBDIR)x86/swscale_mmx.o: CFLAGS += $(NOREDZONE_FLAGS)
|
||||||
|
|
||||||
|
OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
|
||||||
|
|
||||||
TESTPROGS = colorspace swscale
|
TESTPROGS = colorspace swscale
|
||||||
|
|
||||||
DIRS = bfin mlib ppc sparc x86
|
DIRS = bfin mlib ppc sparc x86
|
||||||
|
@ -30,11 +30,11 @@ and converts it to RGB565. R:5 bits, G:6 bits, B:5 bits.. packed into shorts.
|
|||||||
|
|
||||||
The following calculation is used for the conversion:
|
The following calculation is used for the conversion:
|
||||||
|
|
||||||
r = clipz((y-oy)*cy + crv*(v-128))
|
r = clipz((y - oy) * cy + crv * (v - 128))
|
||||||
g = clipz((y-oy)*cy + cgv*(v-128) + cgu*(u-128))
|
g = clipz((y - oy) * cy + cgv * (v - 128) + cgu * (u - 128))
|
||||||
b = clipz((y-oy)*cy + cbu*(u-128))
|
b = clipz((y - oy) * cy + cbu * (u - 128))
|
||||||
|
|
||||||
y,u,v are prescaled by a factor of 4 i.e. left-shifted to gain precision.
|
y, u, v are prescaled by a factor of 4 i.e. left-shifted to gain precision.
|
||||||
|
|
||||||
|
|
||||||
New factorization to eliminate the truncation error which was
|
New factorization to eliminate the truncation error which was
|
||||||
@ -47,7 +47,7 @@ occurring due to the byteop3p.
|
|||||||
2) Scale operands up by a factor of 4 not 8 because Blackfin
|
2) Scale operands up by a factor of 4 not 8 because Blackfin
|
||||||
multiplies include a shift.
|
multiplies include a shift.
|
||||||
|
|
||||||
3) Compute into the accumulators cy*yx0, cy*yx1.
|
3) Compute into the accumulators cy * yx0, cy * yx1.
|
||||||
|
|
||||||
4) Compute each of the linear equations:
|
4) Compute each of the linear equations:
|
||||||
r = clipz((y - oy) * cy + crv * (v - 128))
|
r = clipz((y - oy) * cy + crv * (v - 128))
|
||||||
@ -73,7 +73,7 @@ occurring due to the byteop3p.
|
|||||||
|
|
||||||
Where coeffs have the following layout in memory.
|
Where coeffs have the following layout in memory.
|
||||||
|
|
||||||
uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv;
|
uint32_t oy, oc, zero, cy, crv, rmask, cbu, bmask, cgu, cgv;
|
||||||
|
|
||||||
coeffs is a pointer to oy.
|
coeffs is a pointer to oy.
|
||||||
|
|
||||||
|
@ -27,32 +27,34 @@
|
|||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
#include "libswscale/rgb2rgb.h"
|
#include "libswscale/rgb2rgb.h"
|
||||||
#include "libswscale/swscale.h"
|
#include "libswscale/swscale.h"
|
||||||
#include "libswscale/swscale_internal.h"
|
#include "libswscale/swscale_internal.h"
|
||||||
|
|
||||||
#if defined (__FDPIC__) && CONFIG_SRAM
|
#if defined (__FDPIC__) && CONFIG_SRAM
|
||||||
#define L1CODE __attribute__ ((l1_text))
|
#define L1CODE __attribute__((l1_text))
|
||||||
#else
|
#else
|
||||||
#define L1CODE
|
#define L1CODE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int ff_bfin_uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
int ff_bfin_uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
|
||||||
int width, int height,
|
uint8_t *vdst, int width, int height,
|
||||||
int lumStride, int chromStride, int srcStride) L1CODE;
|
int lumStride, int chromStride, int srcStride) L1CODE;
|
||||||
|
|
||||||
int ff_bfin_yuyvtoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
int ff_bfin_yuyvtoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
|
||||||
int width, int height,
|
uint8_t *vdst, int width, int height,
|
||||||
int lumStride, int chromStride, int srcStride) L1CODE;
|
int lumStride, int chromStride, int srcStride) L1CODE;
|
||||||
|
|
||||||
static int uyvytoyv12_unscaled(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
|
static int uyvytoyv12_unscaled(SwsContext *c, uint8_t *src[], int srcStride[],
|
||||||
int srcSliceH, uint8_t* dst[], int dstStride[])
|
int srcSliceY, int srcSliceH, uint8_t *dst[],
|
||||||
|
int dstStride[])
|
||||||
{
|
{
|
||||||
uint8_t *dsty = dst[0] + dstStride[0]*srcSliceY;
|
uint8_t *dsty = dst[0] + dstStride[0] * srcSliceY;
|
||||||
uint8_t *dstu = dst[1] + dstStride[1]*srcSliceY/2;
|
uint8_t *dstu = dst[1] + dstStride[1] * srcSliceY / 2;
|
||||||
uint8_t *dstv = dst[2] + dstStride[2]*srcSliceY/2;
|
uint8_t *dstv = dst[2] + dstStride[2] * srcSliceY / 2;
|
||||||
uint8_t *ip = src[0] + srcStride[0]*srcSliceY;
|
uint8_t *ip = src[0] + srcStride[0] * srcSliceY;
|
||||||
int w = dstStride[0];
|
int w = dstStride[0];
|
||||||
|
|
||||||
ff_bfin_uyvytoyv12(ip, dsty, dstu, dstv, w, srcSliceH,
|
ff_bfin_uyvytoyv12(ip, dsty, dstu, dstv, w, srcSliceH,
|
||||||
dstStride[0], dstStride[1], srcStride[0]);
|
dstStride[0], dstStride[1], srcStride[0]);
|
||||||
@ -60,14 +62,15 @@ static int uyvytoyv12_unscaled(SwsContext *c, uint8_t* src[], int srcStride[], i
|
|||||||
return srcSliceH;
|
return srcSliceH;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int yuyvtoyv12_unscaled(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
|
static int yuyvtoyv12_unscaled(SwsContext *c, uint8_t *src[], int srcStride[],
|
||||||
int srcSliceH, uint8_t* dst[], int dstStride[])
|
int srcSliceY, int srcSliceH, uint8_t *dst[],
|
||||||
|
int dstStride[])
|
||||||
{
|
{
|
||||||
uint8_t *dsty = dst[0] + dstStride[0]*srcSliceY;
|
uint8_t *dsty = dst[0] + dstStride[0] * srcSliceY;
|
||||||
uint8_t *dstu = dst[1] + dstStride[1]*srcSliceY/2;
|
uint8_t *dstu = dst[1] + dstStride[1] * srcSliceY / 2;
|
||||||
uint8_t *dstv = dst[2] + dstStride[2]*srcSliceY/2;
|
uint8_t *dstv = dst[2] + dstStride[2] * srcSliceY / 2;
|
||||||
uint8_t *ip = src[0] + srcStride[0]*srcSliceY;
|
uint8_t *ip = src[0] + srcStride[0] * srcSliceY;
|
||||||
int w = dstStride[0];
|
int w = dstStride[0];
|
||||||
|
|
||||||
ff_bfin_yuyvtoyv12(ip, dsty, dstu, dstv, w, srcSliceH,
|
ff_bfin_yuyvtoyv12(ip, dsty, dstu, dstv, w, srcSliceH,
|
||||||
dstStride[0], dstStride[1], srcStride[0]);
|
dstStride[0], dstStride[1], srcStride[0]);
|
||||||
@ -75,15 +78,16 @@ static int yuyvtoyv12_unscaled(SwsContext *c, uint8_t* src[], int srcStride[], i
|
|||||||
return srcSliceH;
|
return srcSliceH;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void ff_bfin_get_unscaled_swscale(SwsContext *c)
|
void ff_bfin_get_unscaled_swscale(SwsContext *c)
|
||||||
{
|
{
|
||||||
if (c->dstFormat == PIX_FMT_YUV420P && c->srcFormat == PIX_FMT_UYVY422) {
|
if (c->dstFormat == PIX_FMT_YUV420P && c->srcFormat == PIX_FMT_UYVY422) {
|
||||||
av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized uyvytoyv12_unscaled\n");
|
av_log(NULL, AV_LOG_VERBOSE,
|
||||||
|
"selecting Blackfin optimized uyvytoyv12_unscaled\n");
|
||||||
c->swScale = uyvytoyv12_unscaled;
|
c->swScale = uyvytoyv12_unscaled;
|
||||||
}
|
}
|
||||||
if (c->dstFormat == PIX_FMT_YUV420P && c->srcFormat == PIX_FMT_YUYV422) {
|
if (c->dstFormat == PIX_FMT_YUV420P && c->srcFormat == PIX_FMT_YUYV422) {
|
||||||
av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized yuyvtoyv12_unscaled\n");
|
av_log(NULL, AV_LOG_VERBOSE,
|
||||||
|
"selecting Blackfin optimized yuyvtoyv12_unscaled\n");
|
||||||
c->swScale = yuyvtoyv12_unscaled;
|
c->swScale = yuyvtoyv12_unscaled;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -26,15 +26,16 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include "config.h"
|
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include "libavutil/pixdesc.h"
|
#include "libavutil/pixdesc.h"
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
#include "libswscale/rgb2rgb.h"
|
#include "libswscale/rgb2rgb.h"
|
||||||
#include "libswscale/swscale.h"
|
#include "libswscale/swscale.h"
|
||||||
#include "libswscale/swscale_internal.h"
|
#include "libswscale/swscale_internal.h"
|
||||||
|
|
||||||
#if defined(__FDPIC__) && CONFIG_SRAM
|
#if defined(__FDPIC__) && CONFIG_SRAM
|
||||||
#define L1CODE __attribute__ ((l1_text))
|
#define L1CODE __attribute__((l1_text))
|
||||||
#else
|
#else
|
||||||
#define L1CODE
|
#define L1CODE
|
||||||
#endif
|
#endif
|
||||||
@ -48,21 +49,20 @@ void ff_bfin_yuv2rgb565_line(uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
|
|||||||
void ff_bfin_yuv2rgb24_line(uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
|
void ff_bfin_yuv2rgb24_line(uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
|
||||||
int w, uint32_t *coeffs) L1CODE;
|
int w, uint32_t *coeffs) L1CODE;
|
||||||
|
|
||||||
typedef void (* ltransform)(uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
|
typedef void (*ltransform)(uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
|
||||||
int w, uint32_t *coeffs);
|
int w, uint32_t *coeffs);
|
||||||
|
|
||||||
|
|
||||||
static void bfin_prepare_coefficients(SwsContext *c, int rgb, int masks)
|
static void bfin_prepare_coefficients(SwsContext *c, int rgb, int masks)
|
||||||
{
|
{
|
||||||
int oy;
|
int oy;
|
||||||
oy = c->yOffset&0xffff;
|
oy = c->yOffset & 0xffff;
|
||||||
oy = oy >> 3; // keep everything U8.0 for offset calculation
|
oy = oy >> 3; // keep everything U8.0 for offset calculation
|
||||||
|
|
||||||
c->oc = 128*0x01010101U;
|
c->oc = 128 * 0x01010101U;
|
||||||
c->oy = oy*0x01010101U;
|
c->oy = oy * 0x01010101U;
|
||||||
|
|
||||||
/* copy 64bit vector coeffs down to 32bit vector coeffs */
|
/* copy 64bit vector coeffs down to 32bit vector coeffs */
|
||||||
c->cy = c->yCoeff;
|
c->cy = c->yCoeff;
|
||||||
c->zero = 0;
|
c->zero = 0;
|
||||||
|
|
||||||
if (rgb) {
|
if (rgb) {
|
||||||
@ -77,7 +77,6 @@ static void bfin_prepare_coefficients(SwsContext *c, int rgb, int masks)
|
|||||||
c->cgv = c->ugCoeff;
|
c->cgv = c->ugCoeff;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (masks == 555) {
|
if (masks == 555) {
|
||||||
c->rmask = 0x001f * 0x00010001U;
|
c->rmask = 0x001f * 0x00010001U;
|
||||||
c->gmask = 0x03e0 * 0x00010001U;
|
c->gmask = 0x03e0 * 0x00010001U;
|
||||||
@ -89,27 +88,25 @@ static void bfin_prepare_coefficients(SwsContext *c, int rgb, int masks)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int core_yuv420_rgb(SwsContext *c,
|
static int core_yuv420_rgb(SwsContext *c, uint8_t **in, int *instrides,
|
||||||
uint8_t **in, int *instrides,
|
int srcSliceY, int srcSliceH, uint8_t **oplanes,
|
||||||
int srcSliceY, int srcSliceH,
|
int *outstrides, ltransform lcscf,
|
||||||
uint8_t **oplanes, int *outstrides,
|
int rgb, int masks)
|
||||||
ltransform lcscf, int rgb, int masks)
|
|
||||||
{
|
{
|
||||||
uint8_t *py,*pu,*pv,*op;
|
uint8_t *py, *pu, *pv, *op;
|
||||||
int w = instrides[0];
|
int w = instrides[0];
|
||||||
int h2 = srcSliceH>>1;
|
int h2 = srcSliceH >> 1;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
bfin_prepare_coefficients(c, rgb, masks);
|
bfin_prepare_coefficients(c, rgb, masks);
|
||||||
|
|
||||||
py = in[0];
|
py = in[0];
|
||||||
pu = in[1+(1^rgb)];
|
pu = in[1 + (1 ^ rgb)];
|
||||||
pv = in[1+(0^rgb)];
|
pv = in[1 + (0 ^ rgb)];
|
||||||
|
|
||||||
op = oplanes[0] + srcSliceY*outstrides[0];
|
op = oplanes[0] + srcSliceY * outstrides[0];
|
||||||
|
|
||||||
for (i=0;i<h2;i++) {
|
|
||||||
|
|
||||||
|
for (i = 0; i < h2; i++) {
|
||||||
lcscf(py, pu, pv, op, w, &c->oy);
|
lcscf(py, pu, pv, op, w, &c->oy);
|
||||||
|
|
||||||
py += instrides[0];
|
py += instrides[0];
|
||||||
@ -126,9 +123,7 @@ static int core_yuv420_rgb(SwsContext *c,
|
|||||||
return srcSliceH;
|
return srcSliceH;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int bfin_yuv420_rgb555(SwsContext *c, uint8_t **in, int *instrides,
|
||||||
static int bfin_yuv420_rgb555(SwsContext *c,
|
|
||||||
uint8_t **in, int *instrides,
|
|
||||||
int srcSliceY, int srcSliceH,
|
int srcSliceY, int srcSliceH,
|
||||||
uint8_t **oplanes, int *outstrides)
|
uint8_t **oplanes, int *outstrides)
|
||||||
{
|
{
|
||||||
@ -136,8 +131,7 @@ static int bfin_yuv420_rgb555(SwsContext *c,
|
|||||||
outstrides, ff_bfin_yuv2rgb555_line, 1, 555);
|
outstrides, ff_bfin_yuv2rgb555_line, 1, 555);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bfin_yuv420_bgr555(SwsContext *c,
|
static int bfin_yuv420_bgr555(SwsContext *c, uint8_t **in, int *instrides,
|
||||||
uint8_t **in, int *instrides,
|
|
||||||
int srcSliceY, int srcSliceH,
|
int srcSliceY, int srcSliceH,
|
||||||
uint8_t **oplanes, int *outstrides)
|
uint8_t **oplanes, int *outstrides)
|
||||||
{
|
{
|
||||||
@ -145,8 +139,7 @@ static int bfin_yuv420_bgr555(SwsContext *c,
|
|||||||
outstrides, ff_bfin_yuv2rgb555_line, 0, 555);
|
outstrides, ff_bfin_yuv2rgb555_line, 0, 555);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bfin_yuv420_rgb24(SwsContext *c,
|
static int bfin_yuv420_rgb24(SwsContext *c, uint8_t **in, int *instrides,
|
||||||
uint8_t **in, int *instrides,
|
|
||||||
int srcSliceY, int srcSliceH,
|
int srcSliceY, int srcSliceH,
|
||||||
uint8_t **oplanes, int *outstrides)
|
uint8_t **oplanes, int *outstrides)
|
||||||
{
|
{
|
||||||
@ -154,8 +147,7 @@ static int bfin_yuv420_rgb24(SwsContext *c,
|
|||||||
outstrides, ff_bfin_yuv2rgb24_line, 1, 888);
|
outstrides, ff_bfin_yuv2rgb24_line, 1, 888);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bfin_yuv420_bgr24(SwsContext *c,
|
static int bfin_yuv420_bgr24(SwsContext *c, uint8_t **in, int *instrides,
|
||||||
uint8_t **in, int *instrides,
|
|
||||||
int srcSliceY, int srcSliceH,
|
int srcSliceY, int srcSliceH,
|
||||||
uint8_t **oplanes, int *outstrides)
|
uint8_t **oplanes, int *outstrides)
|
||||||
{
|
{
|
||||||
@ -163,8 +155,7 @@ static int bfin_yuv420_bgr24(SwsContext *c,
|
|||||||
outstrides, ff_bfin_yuv2rgb24_line, 0, 888);
|
outstrides, ff_bfin_yuv2rgb24_line, 0, 888);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bfin_yuv420_rgb565(SwsContext *c,
|
static int bfin_yuv420_rgb565(SwsContext *c, uint8_t **in, int *instrides,
|
||||||
uint8_t **in, int *instrides,
|
|
||||||
int srcSliceY, int srcSliceH,
|
int srcSliceY, int srcSliceH,
|
||||||
uint8_t **oplanes, int *outstrides)
|
uint8_t **oplanes, int *outstrides)
|
||||||
{
|
{
|
||||||
@ -172,8 +163,7 @@ static int bfin_yuv420_rgb565(SwsContext *c,
|
|||||||
outstrides, ff_bfin_yuv2rgb565_line, 1, 565);
|
outstrides, ff_bfin_yuv2rgb565_line, 1, 565);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bfin_yuv420_bgr565(SwsContext *c,
|
static int bfin_yuv420_bgr565(SwsContext *c, uint8_t **in, int *instrides,
|
||||||
uint8_t **in, int *instrides,
|
|
||||||
int srcSliceY, int srcSliceH,
|
int srcSliceY, int srcSliceH,
|
||||||
uint8_t **oplanes, int *outstrides)
|
uint8_t **oplanes, int *outstrides)
|
||||||
{
|
{
|
||||||
@ -181,18 +171,29 @@ static int bfin_yuv420_bgr565(SwsContext *c,
|
|||||||
outstrides, ff_bfin_yuv2rgb565_line, 0, 565);
|
outstrides, ff_bfin_yuv2rgb565_line, 0, 565);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c)
|
SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c)
|
||||||
{
|
{
|
||||||
SwsFunc f;
|
SwsFunc f;
|
||||||
|
|
||||||
switch(c->dstFormat) {
|
switch (c->dstFormat) {
|
||||||
case PIX_FMT_RGB555: f = bfin_yuv420_rgb555; break;
|
case PIX_FMT_RGB555:
|
||||||
case PIX_FMT_BGR555: f = bfin_yuv420_bgr555; break;
|
f = bfin_yuv420_rgb555;
|
||||||
case PIX_FMT_RGB565: f = bfin_yuv420_rgb565; break;
|
break;
|
||||||
case PIX_FMT_BGR565: f = bfin_yuv420_bgr565; break;
|
case PIX_FMT_BGR555:
|
||||||
case PIX_FMT_RGB24: f = bfin_yuv420_rgb24; break;
|
f = bfin_yuv420_bgr555;
|
||||||
case PIX_FMT_BGR24: f = bfin_yuv420_bgr24; break;
|
break;
|
||||||
|
case PIX_FMT_RGB565:
|
||||||
|
f = bfin_yuv420_rgb565;
|
||||||
|
break;
|
||||||
|
case PIX_FMT_BGR565:
|
||||||
|
f = bfin_yuv420_bgr565;
|
||||||
|
break;
|
||||||
|
case PIX_FMT_RGB24:
|
||||||
|
f = bfin_yuv420_rgb24;
|
||||||
|
break;
|
||||||
|
case PIX_FMT_BGR24:
|
||||||
|
f = bfin_yuv420_bgr24;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -51,6 +51,19 @@ bgr_Vcoeff_3x56: times 2 dw RV, 0, GV, RV
|
|||||||
rgb_Vcoeff_12x4: times 2 dw RV, GV, 0, RV
|
rgb_Vcoeff_12x4: times 2 dw RV, GV, 0, RV
|
||||||
rgb_Vcoeff_3x56: times 2 dw BV, 0, GV, BV
|
rgb_Vcoeff_3x56: times 2 dw BV, 0, GV, BV
|
||||||
|
|
||||||
|
rgba_Ycoeff_rb: times 4 dw RY, BY
|
||||||
|
rgba_Ycoeff_br: times 4 dw BY, RY
|
||||||
|
rgba_Ycoeff_ga: times 4 dw GY, 0
|
||||||
|
rgba_Ycoeff_ag: times 4 dw 0, GY
|
||||||
|
rgba_Ucoeff_rb: times 4 dw RU, BU
|
||||||
|
rgba_Ucoeff_br: times 4 dw BU, RU
|
||||||
|
rgba_Ucoeff_ga: times 4 dw GU, 0
|
||||||
|
rgba_Ucoeff_ag: times 4 dw 0, GU
|
||||||
|
rgba_Vcoeff_rb: times 4 dw RV, BV
|
||||||
|
rgba_Vcoeff_br: times 4 dw BV, RV
|
||||||
|
rgba_Vcoeff_ga: times 4 dw GV, 0
|
||||||
|
rgba_Vcoeff_ag: times 4 dw 0, GV
|
||||||
|
|
||||||
shuf_rgb_12x4: db 0, 0x80, 1, 0x80, 2, 0x80, 3, 0x80, \
|
shuf_rgb_12x4: db 0, 0x80, 1, 0x80, 2, 0x80, 3, 0x80, \
|
||||||
6, 0x80, 7, 0x80, 8, 0x80, 9, 0x80
|
6, 0x80, 7, 0x80, 8, 0x80, 9, 0x80
|
||||||
shuf_rgb_3x56: db 2, 0x80, 3, 0x80, 4, 0x80, 5, 0x80, \
|
shuf_rgb_3x56: db 2, 0x80, 3, 0x80, 4, 0x80, 5, 0x80, \
|
||||||
@ -294,6 +307,150 @@ RGB24_FUNCS 11, 13
|
|||||||
INIT_XMM avx
|
INIT_XMM avx
|
||||||
RGB24_FUNCS 11, 13
|
RGB24_FUNCS 11, 13
|
||||||
|
|
||||||
|
; %1 = nr. of XMM registers
|
||||||
|
; %2-5 = rgba, bgra, argb or abgr (in individual characters)
|
||||||
|
%macro RGB32_TO_Y_FN 5-6
|
||||||
|
cglobal %2%3%4%5 %+ ToY, 6, 6, %1, dst, src, u1, u2, w, u3
|
||||||
|
mova m5, [rgba_Ycoeff_%2%4]
|
||||||
|
mova m6, [rgba_Ycoeff_%3%5]
|
||||||
|
%if %0 == 6
|
||||||
|
jmp mangle(program_name %+ _ %+ %6 %+ ToY %+ SUFFIX).body
|
||||||
|
%else ; %0 == 6
|
||||||
|
.body:
|
||||||
|
%if ARCH_X86_64
|
||||||
|
movsxd wq, wd
|
||||||
|
%endif
|
||||||
|
lea srcq, [srcq+wq*4]
|
||||||
|
add wq, wq
|
||||||
|
add dstq, wq
|
||||||
|
neg wq
|
||||||
|
mova m4, [rgb_Yrnd]
|
||||||
|
pcmpeqb m7, m7
|
||||||
|
psrlw m7, 8 ; (word) { 0x00ff } x4
|
||||||
|
.loop:
|
||||||
|
; FIXME check alignment and use mova
|
||||||
|
movu m0, [srcq+wq*2+0] ; (byte) { Bx, Gx, Rx, xx }[0-3]
|
||||||
|
movu m2, [srcq+wq*2+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7]
|
||||||
|
DEINTB 1, 0, 3, 2, 7 ; (word) { Gx, xx (m0/m2) or Bx, Rx (m1/m3) }[0-3]/[4-7]
|
||||||
|
pmaddwd m1, m5 ; (dword) { Bx*BY + Rx*RY }[0-3]
|
||||||
|
pmaddwd m0, m6 ; (dword) { Gx*GY }[0-3]
|
||||||
|
pmaddwd m3, m5 ; (dword) { Bx*BY + Rx*RY }[4-7]
|
||||||
|
pmaddwd m2, m6 ; (dword) { Gx*GY }[4-7]
|
||||||
|
paddd m0, m4 ; += rgb_Yrnd
|
||||||
|
paddd m2, m4 ; += rgb_Yrnd
|
||||||
|
paddd m0, m1 ; (dword) { Y[0-3] }
|
||||||
|
paddd m2, m3 ; (dword) { Y[4-7] }
|
||||||
|
psrad m0, 9
|
||||||
|
psrad m2, 9
|
||||||
|
packssdw m0, m2 ; (word) { Y[0-7] }
|
||||||
|
mova [dstq+wq], m0
|
||||||
|
add wq, mmsize
|
||||||
|
jl .loop
|
||||||
|
REP_RET
|
||||||
|
%endif ; %0 == 3
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
; %1 = nr. of XMM registers
|
||||||
|
; %2-5 = rgba, bgra, argb or abgr (in individual characters)
|
||||||
|
%macro RGB32_TO_UV_FN 5-6
|
||||||
|
cglobal %2%3%4%5 %+ ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, u3
|
||||||
|
%if ARCH_X86_64
|
||||||
|
mova m8, [rgba_Ucoeff_%2%4]
|
||||||
|
mova m9, [rgba_Ucoeff_%3%5]
|
||||||
|
mova m10, [rgba_Vcoeff_%2%4]
|
||||||
|
mova m11, [rgba_Vcoeff_%3%5]
|
||||||
|
%define coeffU1 m8
|
||||||
|
%define coeffU2 m9
|
||||||
|
%define coeffV1 m10
|
||||||
|
%define coeffV2 m11
|
||||||
|
%else ; x86-32
|
||||||
|
%define coeffU1 [rgba_Ucoeff_%2%4]
|
||||||
|
%define coeffU2 [rgba_Ucoeff_%3%5]
|
||||||
|
%define coeffV1 [rgba_Vcoeff_%2%4]
|
||||||
|
%define coeffV2 [rgba_Vcoeff_%3%5]
|
||||||
|
%endif ; x86-64/32
|
||||||
|
%if ARCH_X86_64 && %0 == 6
|
||||||
|
jmp mangle(program_name %+ _ %+ %6 %+ ToUV %+ SUFFIX).body
|
||||||
|
%else ; ARCH_X86_64 && %0 == 6
|
||||||
|
.body:
|
||||||
|
%if ARCH_X86_64
|
||||||
|
movsxd wq, dword r5m
|
||||||
|
%else ; x86-32
|
||||||
|
mov wq, r5m
|
||||||
|
%endif
|
||||||
|
add wq, wq
|
||||||
|
add dstUq, wq
|
||||||
|
add dstVq, wq
|
||||||
|
lea srcq, [srcq+wq*2]
|
||||||
|
neg wq
|
||||||
|
pcmpeqb m7, m7
|
||||||
|
psrlw m7, 8 ; (word) { 0x00ff } x4
|
||||||
|
mova m6, [rgb_UVrnd]
|
||||||
|
.loop:
|
||||||
|
; FIXME check alignment and use mova
|
||||||
|
movu m0, [srcq+wq*2+0] ; (byte) { Bx, Gx, Rx, xx }[0-3]
|
||||||
|
movu m4, [srcq+wq*2+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7]
|
||||||
|
DEINTB 1, 0, 5, 4, 7 ; (word) { Gx, xx (m0/m4) or Bx, Rx (m1/m5) }[0-3]/[4-7]
|
||||||
|
pmaddwd m3, m1, coeffV1 ; (dword) { Bx*BV + Rx*RV }[0-3]
|
||||||
|
pmaddwd m2, m0, coeffV2 ; (dword) { Gx*GV }[0-3]
|
||||||
|
pmaddwd m1, coeffU1 ; (dword) { Bx*BU + Rx*RU }[0-3]
|
||||||
|
pmaddwd m0, coeffU2 ; (dword) { Gx*GU }[0-3]
|
||||||
|
paddd m3, m6 ; += rgb_UVrnd
|
||||||
|
paddd m1, m6 ; += rgb_UVrnd
|
||||||
|
paddd m2, m3 ; (dword) { V[0-3] }
|
||||||
|
paddd m0, m1 ; (dword) { U[0-3] }
|
||||||
|
pmaddwd m3, m5, coeffV1 ; (dword) { Bx*BV + Rx*RV }[4-7]
|
||||||
|
pmaddwd m1, m4, coeffV2 ; (dword) { Gx*GV }[4-7]
|
||||||
|
pmaddwd m5, coeffU1 ; (dword) { Bx*BU + Rx*RU }[4-7]
|
||||||
|
pmaddwd m4, coeffU2 ; (dword) { Gx*GU }[4-7]
|
||||||
|
paddd m3, m6 ; += rgb_UVrnd
|
||||||
|
paddd m5, m6 ; += rgb_UVrnd
|
||||||
|
psrad m0, 9
|
||||||
|
paddd m1, m3 ; (dword) { V[4-7] }
|
||||||
|
paddd m4, m5 ; (dword) { U[4-7] }
|
||||||
|
psrad m2, 9
|
||||||
|
psrad m4, 9
|
||||||
|
psrad m1, 9
|
||||||
|
packssdw m0, m4 ; (word) { U[0-7] }
|
||||||
|
packssdw m2, m1 ; (word) { V[0-7] }
|
||||||
|
%if mmsize == 8
|
||||||
|
mova [dstUq+wq], m0
|
||||||
|
mova [dstVq+wq], m2
|
||||||
|
%else ; mmsize == 16
|
||||||
|
mova [dstUq+wq], m0
|
||||||
|
mova [dstVq+wq], m2
|
||||||
|
%endif ; mmsize == 8/16
|
||||||
|
add wq, mmsize
|
||||||
|
jl .loop
|
||||||
|
REP_RET
|
||||||
|
%endif ; ARCH_X86_64 && %0 == 3
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
; %1 = nr. of XMM registers for rgb-to-Y func
|
||||||
|
; %2 = nr. of XMM registers for rgb-to-UV func
|
||||||
|
%macro RGB32_FUNCS 2
|
||||||
|
RGB32_TO_Y_FN %1, r, g, b, a
|
||||||
|
RGB32_TO_Y_FN %1, b, g, r, a, rgba
|
||||||
|
RGB32_TO_Y_FN %1, a, r, g, b, rgba
|
||||||
|
RGB32_TO_Y_FN %1, a, b, g, r, rgba
|
||||||
|
|
||||||
|
RGB32_TO_UV_FN %2, r, g, b, a
|
||||||
|
RGB32_TO_UV_FN %2, b, g, r, a, rgba
|
||||||
|
RGB32_TO_UV_FN %2, a, r, g, b, rgba
|
||||||
|
RGB32_TO_UV_FN %2, a, b, g, r, rgba
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%if ARCH_X86_32
|
||||||
|
INIT_MMX mmx
|
||||||
|
RGB32_FUNCS 0, 0
|
||||||
|
%endif
|
||||||
|
|
||||||
|
INIT_XMM sse2
|
||||||
|
RGB32_FUNCS 8, 12
|
||||||
|
|
||||||
|
INIT_XMM avx
|
||||||
|
RGB32_FUNCS 8, 12
|
||||||
|
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
; YUYV/UYVY/NV12/NV21 packed pixel shuffling.
|
; YUYV/UYVY/NV12/NV21 packed pixel shuffling.
|
||||||
;
|
;
|
||||||
|
@ -308,6 +308,10 @@ extern void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
|
|||||||
INPUT_FUNC(yuyv, opt); \
|
INPUT_FUNC(yuyv, opt); \
|
||||||
INPUT_UV_FUNC(nv12, opt); \
|
INPUT_UV_FUNC(nv12, opt); \
|
||||||
INPUT_UV_FUNC(nv21, opt); \
|
INPUT_UV_FUNC(nv21, opt); \
|
||||||
|
INPUT_FUNC(rgba, opt); \
|
||||||
|
INPUT_FUNC(bgra, opt); \
|
||||||
|
INPUT_FUNC(argb, opt); \
|
||||||
|
INPUT_FUNC(abgr, opt); \
|
||||||
INPUT_FUNC(rgb24, opt); \
|
INPUT_FUNC(rgb24, opt); \
|
||||||
INPUT_FUNC(bgr24, opt)
|
INPUT_FUNC(bgr24, opt)
|
||||||
|
|
||||||
@ -406,6 +410,10 @@ switch(c->dstBpc){ \
|
|||||||
break;
|
break;
|
||||||
case_rgb(rgb24, RGB24, mmx);
|
case_rgb(rgb24, RGB24, mmx);
|
||||||
case_rgb(bgr24, BGR24, mmx);
|
case_rgb(bgr24, BGR24, mmx);
|
||||||
|
case_rgb(bgra, BGRA, mmx);
|
||||||
|
case_rgb(rgba, RGBA, mmx);
|
||||||
|
case_rgb(abgr, ABGR, mmx);
|
||||||
|
case_rgb(argb, ARGB, mmx);
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -450,6 +458,10 @@ switch(c->dstBpc){ \
|
|||||||
break;
|
break;
|
||||||
case_rgb(rgb24, RGB24, sse2);
|
case_rgb(rgb24, RGB24, sse2);
|
||||||
case_rgb(bgr24, BGR24, sse2);
|
case_rgb(bgr24, BGR24, sse2);
|
||||||
|
case_rgb(bgra, BGRA, sse2);
|
||||||
|
case_rgb(rgba, RGBA, sse2);
|
||||||
|
case_rgb(abgr, ABGR, sse2);
|
||||||
|
case_rgb(argb, ARGB, sse2);
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -493,6 +505,10 @@ switch(c->dstBpc){ \
|
|||||||
break;
|
break;
|
||||||
case_rgb(rgb24, RGB24, avx);
|
case_rgb(rgb24, RGB24, avx);
|
||||||
case_rgb(bgr24, BGR24, avx);
|
case_rgb(bgr24, BGR24, avx);
|
||||||
|
case_rgb(bgra, BGRA, avx);
|
||||||
|
case_rgb(rgba, RGBA, avx);
|
||||||
|
case_rgb(abgr, ABGR, avx);
|
||||||
|
case_rgb(argb, ARGB, avx);
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
31
libswscale/x86/w64xmmtest.c
Normal file
31
libswscale/x86/w64xmmtest.c
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
/*
|
||||||
|
* check XMM registers for clobbers on Win64
|
||||||
|
* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com>
|
||||||
|
*
|
||||||
|
* This file is part of Libav.
|
||||||
|
*
|
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Libav is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "libavutil/x86/w64xmmtest.h"
|
||||||
|
#include "libswscale/swscale.h"
|
||||||
|
|
||||||
|
wrap(sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[],
|
||||||
|
const int srcStride[], int srcSliceY, int srcSliceH,
|
||||||
|
uint8_t *const dst[], const int dstStride[]))
|
||||||
|
{
|
||||||
|
testxmmclobbers(sws_scale, c, srcSlice, srcStride, srcSliceY,
|
||||||
|
srcSliceH, dst, dstStride);
|
||||||
|
}
|
@ -368,7 +368,7 @@ $tiny_psnr $pcm_dst $pcm_ref 2 1924
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -n "$do_ac3_fixed" ] ; then
|
if [ -n "$do_ac3_fixed" ] ; then
|
||||||
do_audio_encoding ac3.rm "-vn -acodec ac3_fixed"
|
do_audio_encoding ac3.ac3 "-vn -acodec ac3_fixed"
|
||||||
# binaries configured with --disable-sse decode ac3 differently
|
# binaries configured with --disable-sse decode ac3 differently
|
||||||
#do_audio_decoding
|
#do_audio_decoding
|
||||||
#$tiny_psnr $pcm_dst $pcm_ref 2 1024
|
#$tiny_psnr $pcm_dst $pcm_ref 2 1024
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
e7fa185030a56d9db8663ad9e38c6c94 *./tests/data/acodec/ac3.rm
|
a1d1fc116463b771abf5aef7ed37d7b1 *./tests/data/acodec/ac3.ac3
|
||||||
98751 ./tests/data/acodec/ac3.rm
|
96408 ./tests/data/acodec/ac3.ac3
|
||||||
|
@ -117,4 +117,3 @@
|
|||||||
0, 438750, 37440, 0xf0fe8c1c
|
0, 438750, 37440, 0xf0fe8c1c
|
||||||
0, 442500, 37440, 0xc0036222
|
0, 442500, 37440, 0xc0036222
|
||||||
0, 446250, 37440, 0x3058385c
|
0, 446250, 37440, 0x3058385c
|
||||||
0, 450000, 37440, 0x68141016
|
|
||||||
|
49
tests/ref/seek/ac3_ac3
Normal file
49
tests/ref/seek/ac3_ac3
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 0 size: 556
|
||||||
|
ret: 0 st:-1 flags:0 ts:-1.000000
|
||||||
|
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 0 size: 556
|
||||||
|
ret: 0 st:-1 flags:1 ts: 1.894167
|
||||||
|
ret: 0 st: 0 flags:1 dts: 1.880400 pts: 1.880400 pos: 30092 size: 558
|
||||||
|
ret: 0 st: 0 flags:0 ts: 0.788333
|
||||||
|
ret: 0 st: 0 flags:1 dts: 0.800911 pts: 0.800911 pos: 12818 size: 556
|
||||||
|
ret:-1 st: 0 flags:1 ts:-0.317500
|
||||||
|
ret: 0 st:-1 flags:0 ts: 2.576668
|
||||||
|
ret: 0 st: 0 flags:1 dts: 2.576844 pts: 2.576844 pos: 41238 size: 558
|
||||||
|
ret: 0 st:-1 flags:1 ts: 1.470835
|
||||||
|
ret: 0 st: 0 flags:1 dts: 1.462533 pts: 1.462533 pos: 23406 size: 556
|
||||||
|
ret: 0 st: 0 flags:0 ts: 0.365000
|
||||||
|
ret: 0 st: 0 flags:1 dts: 0.383044 pts: 0.383044 pos: 6130 size: 558
|
||||||
|
ret:-1 st: 0 flags:1 ts:-0.740833
|
||||||
|
ret: 0 st:-1 flags:0 ts: 2.153336
|
||||||
|
ret: 0 st: 0 flags:1 dts: 2.158978 pts: 2.158978 pos: 34552 size: 556
|
||||||
|
ret: 0 st:-1 flags:1 ts: 1.047503
|
||||||
|
ret: 0 st: 0 flags:1 dts: 1.044667 pts: 1.044667 pos: 16718 size: 558
|
||||||
|
ret: 0 st: 0 flags:0 ts:-0.058333
|
||||||
|
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 0 size: 556
|
||||||
|
ret: 0 st: 0 flags:1 ts: 2.835833
|
||||||
|
ret: 0 st: 0 flags:1 dts: 2.820600 pts: 2.820600 pos: 45140 size: 556
|
||||||
|
ret: 0 st:-1 flags:0 ts: 1.730004
|
||||||
|
ret: 0 st: 0 flags:1 dts: 1.741111 pts: 1.741111 pos: 27864 size: 556
|
||||||
|
ret: 0 st:-1 flags:1 ts: 0.624171
|
||||||
|
ret: 0 st: 0 flags:1 dts: 0.591978 pts: 0.591978 pos: 9474 size: 556
|
||||||
|
ret: 0 st: 0 flags:0 ts:-0.481667
|
||||||
|
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 0 size: 556
|
||||||
|
ret: 0 st: 0 flags:1 ts: 2.412500
|
||||||
|
ret: 0 st: 0 flags:1 dts: 2.402733 pts: 2.402733 pos: 38452 size: 558
|
||||||
|
ret: 0 st:-1 flags:0 ts: 1.306672
|
||||||
|
ret: 0 st: 0 flags:1 dts: 1.323244 pts: 1.323244 pos: 21176 size: 558
|
||||||
|
ret: 0 st:-1 flags:1 ts: 0.200839
|
||||||
|
ret: 0 st: 0 flags:1 dts: 0.174111 pts: 0.174111 pos: 2786 size: 558
|
||||||
|
ret: 0 st: 0 flags:0 ts:-0.904989
|
||||||
|
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 0 size: 556
|
||||||
|
ret: 0 st: 0 flags:1 ts: 1.989178
|
||||||
|
ret: 0 st: 0 flags:1 dts: 1.984867 pts: 1.984867 pos: 31764 size: 558
|
||||||
|
ret: 0 st:-1 flags:0 ts: 0.883340
|
||||||
|
ret: 0 st: 0 flags:1 dts: 0.905378 pts: 0.905378 pos: 14488 size: 558
|
||||||
|
ret:-1 st:-1 flags:1 ts:-0.222493
|
||||||
|
ret: 0 st: 0 flags:0 ts: 2.671678
|
||||||
|
ret: 0 st: 0 flags:1 dts: 2.681311 pts: 2.681311 pos: 42910 size: 558
|
||||||
|
ret: 0 st: 0 flags:1 ts: 1.565844
|
||||||
|
ret: 0 st: 0 flags:1 dts: 1.532178 pts: 1.532178 pos: 24520 size: 558
|
||||||
|
ret: 0 st:-1 flags:0 ts: 0.460008
|
||||||
|
ret: 0 st: 0 flags:1 dts: 0.487511 pts: 0.487511 pos: 7802 size: 556
|
||||||
|
ret:-1 st:-1 flags:1 ts:-0.645825
|
@ -1,41 +0,0 @@
|
|||||||
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
|
|
||||||
ret: 0 st:-1 flags:0 ts:-1.000000
|
|
||||||
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
|
|
||||||
ret:-1 st:-1 flags:1 ts: 1.894167
|
|
||||||
ret:-1 st: 0 flags:0 ts: 0.788000
|
|
||||||
ret: 0 st: 0 flags:1 ts:-0.317000
|
|
||||||
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
|
|
||||||
ret: 0 st:-1 flags:0 ts: 2.576668
|
|
||||||
ret: 0 st: 0 flags:1 dts: 2.124000 pts: 2.124000 pos: 34997 size: 558
|
|
||||||
ret:-1 st:-1 flags:1 ts: 1.470835
|
|
||||||
ret:-1 st: 0 flags:0 ts: 0.365000
|
|
||||||
ret: 0 st: 0 flags:1 ts:-0.741000
|
|
||||||
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
|
|
||||||
ret: 0 st:-1 flags:0 ts: 2.153336
|
|
||||||
ret: 0 st: 0 flags:1 dts: 2.124000 pts: 2.124000 pos: 34997 size: 558
|
|
||||||
ret:-1 st:-1 flags:1 ts: 1.047503
|
|
||||||
ret: 0 st: 0 flags:0 ts:-0.058000
|
|
||||||
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
|
|
||||||
ret: 0 st: 0 flags:1 ts: 2.836000
|
|
||||||
ret: 0 st: 0 flags:1 dts: 2.124000 pts: 2.124000 pos: 34997 size: 558
|
|
||||||
ret:-1 st:-1 flags:0 ts: 1.730004
|
|
||||||
ret:-1 st:-1 flags:1 ts: 0.624171
|
|
||||||
ret: 0 st: 0 flags:0 ts:-0.482000
|
|
||||||
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
|
|
||||||
ret: 0 st: 0 flags:1 ts: 2.413000
|
|
||||||
ret: 0 st: 0 flags:1 dts: 2.124000 pts: 2.124000 pos: 34997 size: 558
|
|
||||||
ret: 0 st:-1 flags:0 ts: 1.306672
|
|
||||||
ret: 0 st: 0 flags:1 dts:65.537000 pts:65.537000 pos: 87488 size: 6132
|
|
||||||
ret:-1 st:-1 flags:1 ts: 0.200839
|
|
||||||
ret: 0 st: 0 flags:0 ts:-0.905000
|
|
||||||
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
|
|
||||||
ret:-1 st: 0 flags:1 ts: 1.989000
|
|
||||||
ret:-1 st:-1 flags:0 ts: 0.883340
|
|
||||||
ret: 0 st:-1 flags:1 ts:-0.222493
|
|
||||||
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
|
|
||||||
ret:-1 st: 0 flags:0 ts: 2.672000
|
|
||||||
ret:-1 st: 0 flags:1 ts: 1.566000
|
|
||||||
ret: 0 st:-1 flags:0 ts: 0.460008
|
|
||||||
ret: 0 st: 0 flags:1 dts: 1.567000 pts: 1.567000 pos: 25889 size: 556
|
|
||||||
ret: 0 st:-1 flags:1 ts:-0.645825
|
|
||||||
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
|
|
Loading…
Reference in New Issue
Block a user