mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
avcodec/dfa: Optimize output reshuffle loop
18035 -> 4018 dezicycles (Tested with LOGOS.DFA, gcc 7, 3950X) Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
This commit is contained in:
parent
d1bf5b7d96
commit
18bc612f2f
@ -388,9 +388,17 @@ static int dfa_decode_frame(AVCodecContext *avctx,
|
||||
for (i = 0; i < avctx->height; i++) {
|
||||
if(version == 0x100) {
|
||||
int j;
|
||||
for(j = 0; j < avctx->width; j++) {
|
||||
dst[j] = buf[ (i&3)*(avctx->width /4) + (j/4) +
|
||||
((j&3)*(avctx->height/4) + (i/4))*avctx->width];
|
||||
const uint8_t *buf1 = buf + (i&3)*(avctx->width/4) + (i/4)*avctx->width;
|
||||
int stride = (avctx->height/4)*avctx->width;
|
||||
for(j = 0; j < avctx->width/4; j++) {
|
||||
dst[4*j+0] = buf1[j + 0*stride];
|
||||
dst[4*j+1] = buf1[j + 1*stride];
|
||||
dst[4*j+2] = buf1[j + 2*stride];
|
||||
dst[4*j+3] = buf1[j + 3*stride];
|
||||
}
|
||||
j *= 4;
|
||||
for(; j < avctx->width; j++) {
|
||||
dst[j] = buf1[(j/4) + (j&3)*stride];
|
||||
}
|
||||
} else {
|
||||
memcpy(dst, buf, avctx->width);
|
||||
|
Loading…
Reference in New Issue
Block a user