You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	Move add/diff_int16 to lossless_videodsp
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
		
							
								
								
									
										9
									
								
								configure
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										9
									
								
								configure
									
									
									
									
										vendored
									
									
								
							| @@ -1683,6 +1683,7 @@ CONFIG_EXTRA=" | ||||
|     huffman | ||||
|     intrax8 | ||||
|     lgplv3 | ||||
|     llviddsp | ||||
|     lpc | ||||
|     mpegaudio | ||||
|     mpegaudiodsp | ||||
| @@ -1904,8 +1905,8 @@ eatqi_decoder_select="aandcttables error_resilience mpegvideo" | ||||
| exr_decoder_select="zlib" | ||||
| ffv1_decoder_select="dsputil golomb rangecoder" | ||||
| ffv1_encoder_select="dsputil rangecoder" | ||||
| ffvhuff_decoder_select="dsputil" | ||||
| ffvhuff_encoder_select="dsputil huffman" | ||||
| ffvhuff_decoder_select="dsputil llviddsp" | ||||
| ffvhuff_encoder_select="dsputil huffman llviddsp" | ||||
| flac_decoder_select="golomb" | ||||
| flac_encoder_select="dsputil golomb lpc" | ||||
| flashsv_decoder_select="zlib" | ||||
| @@ -1927,8 +1928,8 @@ h263p_encoder_select="h263_encoder" | ||||
| h264_decoder_select="golomb h264chroma h264dsp h264pred h264qpel videodsp" | ||||
| h264_decoder_suggest="error_resilience" | ||||
| hevc_decoder_select="dsputil golomb videodsp" | ||||
| huffyuv_decoder_select="dsputil" | ||||
| huffyuv_encoder_select="dsputil huffman" | ||||
| huffyuv_decoder_select="dsputil llviddsp" | ||||
| huffyuv_encoder_select="dsputil huffman llviddsp" | ||||
| iac_decoder_select="dsputil fft mdct sinewin" | ||||
| imc_decoder_select="dsputil fft mdct sinewin" | ||||
| indeo3_decoder_select="hpeldsp" | ||||
|   | ||||
| @@ -57,6 +57,7 @@ OBJS-$(CONFIG_HPELDSP)                 += hpeldsp.o | ||||
| OBJS-$(CONFIG_HUFFMAN)                 += huffman.o | ||||
| OBJS-$(CONFIG_INTRAX8)                 += intrax8.o intrax8dsp.o | ||||
| OBJS-$(CONFIG_LIBXVID)                 += libxvid_rc.o | ||||
| OBJS-$(CONFIG_LLVIDDSP)                += lossless_videodsp.o | ||||
| OBJS-$(CONFIG_LPC)                     += lpc.o | ||||
| OBJS-$(CONFIG_LSP)                     += lsp.o | ||||
| OBJS-$(CONFIG_MDCT)                    += mdct_fixed.o mdct_float.o mdct_fixed_32.o | ||||
|   | ||||
| @@ -1888,45 +1888,6 @@ static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, | ||||
|         dst[i+0] = src1[i+0]-src2[i+0]; | ||||
| } | ||||
|  | ||||
| static void add_int16_c(uint16_t *dst, const uint16_t *src, unsigned mask, int w){ | ||||
|     long i; | ||||
|     unsigned long pw_lsb = (mask >> 1) * 0x0001000100010001ULL; | ||||
|     unsigned long pw_msb = pw_lsb +  0x0001000100010001ULL; | ||||
|     for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) { | ||||
|         long a = *(long*)(src+i); | ||||
|         long b = *(long*)(dst+i); | ||||
|         *(long*)(dst+i) = ((a&pw_lsb) + (b&pw_lsb)) ^ ((a^b)&pw_msb); | ||||
|     } | ||||
|     for(; i<w; i++) | ||||
|         dst[i] = (dst[i] + src[i]) & mask; | ||||
| } | ||||
|  | ||||
| static void diff_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w){ | ||||
|     long i; | ||||
| #if !HAVE_FAST_UNALIGNED | ||||
|     if((long)src2 & (sizeof(long)-1)){ | ||||
|         for(i=0; i+7<w; i+=8){ | ||||
|             dst[i+0] = (src1[i+0]-src2[i+0]) & mask; | ||||
|             dst[i+1] = (src1[i+1]-src2[i+1]) & mask; | ||||
|             dst[i+2] = (src1[i+2]-src2[i+2]) & mask; | ||||
|             dst[i+3] = (src1[i+3]-src2[i+3]) & mask; | ||||
|         } | ||||
|     }else | ||||
| #endif | ||||
|     { | ||||
|         unsigned long pw_lsb = (mask >> 1) * 0x0001000100010001ULL; | ||||
|         unsigned long pw_msb = pw_lsb +  0x0001000100010001ULL; | ||||
|  | ||||
|         for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) { | ||||
|             long a = *(long*)(src1+i); | ||||
|             long b = *(long*)(src2+i); | ||||
|             *(long*)(dst+i) = ((a|pw_msb) - (b&pw_lsb)) ^ ((a^b^pw_msb)&pw_msb); | ||||
|         } | ||||
|     } | ||||
|     for (; i<w; i++) | ||||
|         dst[i] = (src1[i] - src2[i]) & mask; | ||||
| } | ||||
|  | ||||
| static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){ | ||||
|     int i; | ||||
|     uint8_t l, lt; | ||||
| @@ -2812,8 +2773,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) | ||||
|  | ||||
|     c->add_bytes= add_bytes_c; | ||||
|     c->diff_bytes= diff_bytes_c; | ||||
|     c->add_int16 = add_int16_c; | ||||
|     c->diff_int16= diff_int16_c; | ||||
|     c->add_hfyu_median_prediction= add_hfyu_median_prediction_c; | ||||
|     c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; | ||||
|     c->add_hfyu_left_prediction  = add_hfyu_left_prediction_c; | ||||
|   | ||||
| @@ -193,8 +193,6 @@ typedef struct DSPContext { | ||||
|     /* huffyuv specific */ | ||||
|     void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w); | ||||
|     void (*diff_bytes)(uint8_t *dst/*align 16*/, const uint8_t *src1/*align 16*/, const uint8_t *src2/*align 1*/,int w); | ||||
|     void (*add_int16)(uint16_t *dst/*align 16*/, const uint16_t *src/*align 16*/, unsigned mask, int w); | ||||
|     void (*diff_int16)(uint16_t *dst/*align 16*/, const uint16_t *src1/*align 16*/, const uint16_t *src2/*align 1*/, unsigned mask, int w); | ||||
|     /** | ||||
|      * subtract huffyuv's variant of median prediction | ||||
|      * note, this might read from src1[-1], src2[-1] | ||||
|   | ||||
| @@ -81,6 +81,7 @@ av_cold void ff_huffyuv_common_init(AVCodecContext *avctx) | ||||
|     s->flags = avctx->flags; | ||||
|  | ||||
|     ff_dsputil_init(&s->dsp, avctx); | ||||
|     ff_llviddsp_init(&s->llviddsp); | ||||
|  | ||||
|     s->width = avctx->width; | ||||
|     s->height = avctx->height; | ||||
|   | ||||
| @@ -35,6 +35,7 @@ | ||||
| #include "dsputil.h" | ||||
| #include "get_bits.h" | ||||
| #include "put_bits.h" | ||||
| #include "lossless_videodsp.h" | ||||
|  | ||||
| #define VLC_BITS 11 | ||||
|  | ||||
| @@ -92,6 +93,7 @@ typedef struct HYuvContext { | ||||
|     uint8_t *bitstream_buffer; | ||||
|     unsigned int bitstream_buffer_size; | ||||
|     DSPContext dsp; | ||||
|     LLVidDSPContext llviddsp; | ||||
| } HYuvContext; | ||||
|  | ||||
| void ff_huffyuv_common_init(AVCodecContext *s); | ||||
|   | ||||
| @@ -708,7 +708,7 @@ static void add_bytes(HYuvContext *s, uint8_t *dst, uint8_t *src, int w) | ||||
|     if (s->bps <= 8) { | ||||
|         s->dsp.add_bytes(dst, src, w); | ||||
|     } else { | ||||
|         s->dsp.add_int16((uint16_t*)dst, (const uint16_t*)src, s->n - 1, w); | ||||
|         s->llviddsp.add_int16((uint16_t*)dst, (const uint16_t*)src, s->n - 1, w); | ||||
|     } | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -41,7 +41,7 @@ static inline void diff_bytes(HYuvContext *s, uint8_t *dst, | ||||
|     if (s->bps <= 8) { | ||||
|         s->dsp.diff_bytes(dst, src0, src1, w); | ||||
|     } else { | ||||
|         s->dsp.diff_int16((uint16_t *)dst, (const uint16_t *)src0, (const uint16_t *)src1, s->n - 1, w); | ||||
|         s->llviddsp.diff_int16((uint16_t *)dst, (const uint16_t *)src0, (const uint16_t *)src1, s->n - 1, w); | ||||
|     } | ||||
| } | ||||
|  | ||||
|   | ||||
							
								
								
									
										69
									
								
								libavcodec/lossless_videodsp.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								libavcodec/lossless_videodsp.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,69 @@ | ||||
| /* | ||||
|  * Lossless video DSP utils | ||||
|  * | ||||
|  * This file is part of FFmpeg. | ||||
|  * | ||||
|  * FFmpeg is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * FFmpeg is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with FFmpeg; if not, write to the Free Software | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
| #include "avcodec.h" | ||||
| #include "lossless_videodsp.h" | ||||
|  | ||||
| static void add_int16_c(uint16_t *dst, const uint16_t *src, unsigned mask, int w){ | ||||
|     long i; | ||||
|     unsigned long pw_lsb = (mask >> 1) * 0x0001000100010001ULL; | ||||
|     unsigned long pw_msb = pw_lsb +  0x0001000100010001ULL; | ||||
|     for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) { | ||||
|         long a = *(long*)(src+i); | ||||
|         long b = *(long*)(dst+i); | ||||
|         *(long*)(dst+i) = ((a&pw_lsb) + (b&pw_lsb)) ^ ((a^b)&pw_msb); | ||||
|     } | ||||
|     for(; i<w; i++) | ||||
|         dst[i] = (dst[i] + src[i]) & mask; | ||||
| } | ||||
|  | ||||
| static void diff_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w){ | ||||
|     long i; | ||||
| #if !HAVE_FAST_UNALIGNED | ||||
|     if((long)src2 & (sizeof(long)-1)){ | ||||
|         for(i=0; i+7<w; i+=8){ | ||||
|             dst[i+0] = (src1[i+0]-src2[i+0]) & mask; | ||||
|             dst[i+1] = (src1[i+1]-src2[i+1]) & mask; | ||||
|             dst[i+2] = (src1[i+2]-src2[i+2]) & mask; | ||||
|             dst[i+3] = (src1[i+3]-src2[i+3]) & mask; | ||||
|         } | ||||
|     }else | ||||
| #endif | ||||
|     { | ||||
|         unsigned long pw_lsb = (mask >> 1) * 0x0001000100010001ULL; | ||||
|         unsigned long pw_msb = pw_lsb +  0x0001000100010001ULL; | ||||
|  | ||||
|         for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) { | ||||
|             long a = *(long*)(src1+i); | ||||
|             long b = *(long*)(src2+i); | ||||
|             *(long*)(dst+i) = ((a|pw_msb) - (b&pw_lsb)) ^ ((a^b^pw_msb)&pw_msb); | ||||
|         } | ||||
|     } | ||||
|     for (; i<w; i++) | ||||
|         dst[i] = (src1[i] - src2[i]) & mask; | ||||
| } | ||||
|  | ||||
| void ff_llviddsp_init(LLVidDSPContext *c) | ||||
| { | ||||
|     c->add_int16 = add_int16_c; | ||||
|     c->diff_int16= diff_int16_c; | ||||
|  | ||||
|     if (ARCH_X86) | ||||
|         ff_llviddsp_init_x86(c); | ||||
| } | ||||
							
								
								
									
										36
									
								
								libavcodec/lossless_videodsp.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								libavcodec/lossless_videodsp.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,36 @@ | ||||
| /* | ||||
|  * Lossless video DSP utils | ||||
|  * | ||||
|  * This file is part of FFmpeg. | ||||
|  * | ||||
|  * FFmpeg is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * FFmpeg is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with FFmpeg; if not, write to the Free Software | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
|  | ||||
|  | ||||
| #ifndef AVCODEC_LOSSLESS_VIDEODSP_H | ||||
| #define AVCODEC_LOSSLESS_VIDEODSP_H | ||||
|  | ||||
| #include "avcodec.h" | ||||
| #include "libavutil/cpu.h" | ||||
|  | ||||
| typedef struct LLVidDSPContext { | ||||
|     void (*add_int16)(uint16_t *dst/*align 16*/, const uint16_t *src/*align 16*/, unsigned mask, int w); | ||||
|     void (*diff_int16)(uint16_t *dst/*align 16*/, const uint16_t *src1/*align 16*/, const uint16_t *src2/*align 1*/, unsigned mask, int w); | ||||
| } LLVidDSPContext; | ||||
|  | ||||
| void ff_llviddsp_init(LLVidDSPContext *llviddsp); | ||||
| void ff_llviddsp_init_x86(LLVidDSPContext *llviddsp); | ||||
|  | ||||
| #endif //AVCODEC_LOSSLESS_VIDEODSP_H | ||||
| @@ -18,6 +18,7 @@ OBJS-$(CONFIG_H264DSP)                 += x86/h264dsp_init.o | ||||
| OBJS-$(CONFIG_H264PRED)                += x86/h264_intrapred_init.o | ||||
| OBJS-$(CONFIG_H264QPEL)                += x86/h264_qpel.o | ||||
| OBJS-$(CONFIG_HPELDSP)                 += x86/hpeldsp_init.o | ||||
| OBJS-$(CONFIG_LLVIDDSP)                += x86/lossless_videodsp_init.o | ||||
| OBJS-$(CONFIG_LPC)                     += x86/lpc.o | ||||
| OBJS-$(CONFIG_MLP_DECODER)             += x86/mlpdsp.o | ||||
| OBJS-$(CONFIG_MPEGAUDIODSP)            += x86/mpegaudiodsp.o | ||||
| @@ -86,6 +87,7 @@ YASM-OBJS-$(CONFIG_H264QPEL)           += x86/h264_qpel_8bit.o          \ | ||||
|                                           x86/qpel.o | ||||
| YASM-OBJS-$(CONFIG_HPELDSP)            += x86/fpel.o                    \ | ||||
|                                           x86/hpeldsp.o | ||||
| YASM-OBJS-$(CONFIG_LLVIDDSP)           += x86/lossless_videodsp.o | ||||
| YASM-OBJS-$(CONFIG_MPEGAUDIODSP)       += x86/imdct36.o | ||||
| YASM-OBJS-$(CONFIG_PNG_DECODER)        += x86/pngdsp.o | ||||
| YASM-OBJS-$(CONFIG_PRORES_DECODER)     += x86/proresdsp.o | ||||
|   | ||||
| @@ -466,70 +466,6 @@ cglobal add_hfyu_left_prediction, 3,3,7, dst, src, w, left | ||||
|     ADD_HFYU_LEFT_LOOP 0, 0 | ||||
|  | ||||
|  | ||||
| %macro ADD_INT16_LOOP 1 ; %1 = is_aligned | ||||
|     movd      m4, maskq | ||||
|     punpcklwd m4, m4 | ||||
|     punpcklwd m4, m4 | ||||
|     punpcklwd m4, m4 | ||||
|     add     wq, wq | ||||
|     test    wq, 2*mmsize - 1 | ||||
|     jz %%.tomainloop | ||||
| %%.wordloop: | ||||
|     sub     wq, 2 | ||||
|     mov     ax, [srcq+wq] | ||||
|     add     ax, [dstq+wq] | ||||
|     and     ax, maskw | ||||
|     mov     [dstq+wq], ax | ||||
|     test    wq, 2*mmsize - 1 | ||||
|     jnz %%.wordloop | ||||
| %%.tomainloop: | ||||
|     add     srcq, wq | ||||
|     add     dstq, wq | ||||
|     neg     wq | ||||
|     jz      %%.end | ||||
| %%.loop: | ||||
| %if %1 | ||||
|     mova    m0, [srcq+wq] | ||||
|     mova    m1, [dstq+wq] | ||||
|     mova    m2, [srcq+wq+mmsize] | ||||
|     mova    m3, [dstq+wq+mmsize] | ||||
| %else | ||||
|     movu    m0, [srcq+wq] | ||||
|     movu    m1, [dstq+wq] | ||||
|     movu    m2, [srcq+wq+mmsize] | ||||
|     movu    m3, [dstq+wq+mmsize] | ||||
| %endif | ||||
|     paddw   m0, m1 | ||||
|     paddw   m2, m3 | ||||
|     pand    m0, m4 | ||||
|     pand    m2, m4 | ||||
| %if %1 | ||||
|     mova    [dstq+wq]       , m0 | ||||
|     mova    [dstq+wq+mmsize], m2 | ||||
| %else | ||||
|     movu    [dstq+wq]       , m0 | ||||
|     movu    [dstq+wq+mmsize], m2 | ||||
| %endif | ||||
|     add     wq, 2*mmsize | ||||
|     jl %%.loop | ||||
| %%.end: | ||||
|     RET | ||||
| %endmacro | ||||
|  | ||||
| INIT_MMX mmx | ||||
| cglobal add_int16, 4,4,5, dst, src, mask, w | ||||
|     ADD_INT16_LOOP 1 | ||||
|  | ||||
| INIT_XMM sse2 | ||||
| cglobal add_int16, 4,4,5, dst, src, mask, w | ||||
|     test srcq, mmsize-1 | ||||
|     jnz .unaligned | ||||
|     test dstq, mmsize-1 | ||||
|     jnz .unaligned | ||||
|     ADD_INT16_LOOP 1 | ||||
| .unaligned: | ||||
|     ADD_INT16_LOOP 0 | ||||
|  | ||||
| ;----------------------------------------------------------------------------- | ||||
| ; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, | ||||
| ;                           int32_t max, unsigned int len) | ||||
|   | ||||
| @@ -542,7 +542,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, | ||||
| #endif /* HAVE_MMX_INLINE */ | ||||
|  | ||||
| #if HAVE_MMX_EXTERNAL | ||||
|     c->add_int16 = ff_add_int16_mmx; | ||||
|     c->vector_clip_int32 = ff_vector_clip_int32_mmx; | ||||
| #endif /* HAVE_MMX_EXTERNAL */ | ||||
| } | ||||
| @@ -626,8 +625,6 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, | ||||
|         c->vector_clip_int32 = ff_vector_clip_int32_sse2; | ||||
|     } | ||||
|     c->bswap_buf = ff_bswap32_buf_sse2; | ||||
|  | ||||
|     c->add_int16 = ff_add_int16_sse2; | ||||
| #endif /* HAVE_SSE2_EXTERNAL */ | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -116,8 +116,6 @@ void ff_clear_blocks_mmx(int16_t *blocks); | ||||
| void ff_clear_blocks_sse(int16_t *blocks); | ||||
|  | ||||
| void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w); | ||||
| void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w); | ||||
| void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w); | ||||
|  | ||||
| void ff_add_hfyu_median_prediction_cmov(uint8_t *dst, const uint8_t *top, | ||||
|                                         const uint8_t *diff, int w, | ||||
|   | ||||
							
								
								
									
										88
									
								
								libavcodec/x86/lossless_videodsp.asm
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								libavcodec/x86/lossless_videodsp.asm
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,88 @@ | ||||
| ;****************************************************************************** | ||||
| ;* SIMD lossless video DSP utils | ||||
| ;* Copyright (c) 2014 Michael Niedermayer | ||||
| ;* | ||||
| ;* This file is part of FFmpeg. | ||||
| ;* | ||||
| ;* FFmpeg is free software; you can redistribute it and/or | ||||
| ;* modify it under the terms of the GNU Lesser General Public | ||||
| ;* License as published by the Free Software Foundation; either | ||||
| ;* version 2.1 of the License, or (at your option) any later version. | ||||
| ;* | ||||
| ;* FFmpeg is distributed in the hope that it will be useful, | ||||
| ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
| ;* Lesser General Public License for more details. | ||||
| ;* | ||||
| ;* You should have received a copy of the GNU Lesser General Public | ||||
| ;* License along with FFmpeg; if not, write to the Free Software | ||||
| ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
| ;****************************************************************************** | ||||
|  | ||||
| %include "libavutil/x86/x86util.asm" | ||||
|  | ||||
| SECTION_TEXT | ||||
|  | ||||
| %macro ADD_INT16_LOOP 1 ; %1 = is_aligned | ||||
|     movd      m4, maskq | ||||
|     punpcklwd m4, m4 | ||||
|     punpcklwd m4, m4 | ||||
|     punpcklwd m4, m4 | ||||
|     add     wq, wq | ||||
|     test    wq, 2*mmsize - 1 | ||||
|     jz %%.tomainloop | ||||
| %%.wordloop: | ||||
|     sub     wq, 2 | ||||
|     mov     ax, [srcq+wq] | ||||
|     add     ax, [dstq+wq] | ||||
|     and     ax, maskw | ||||
|     mov     [dstq+wq], ax | ||||
|     test    wq, 2*mmsize - 1 | ||||
|     jnz %%.wordloop | ||||
| %%.tomainloop: | ||||
|     add     srcq, wq | ||||
|     add     dstq, wq | ||||
|     neg     wq | ||||
|     jz      %%.end | ||||
| %%.loop: | ||||
| %if %1 | ||||
|     mova    m0, [srcq+wq] | ||||
|     mova    m1, [dstq+wq] | ||||
|     mova    m2, [srcq+wq+mmsize] | ||||
|     mova    m3, [dstq+wq+mmsize] | ||||
| %else | ||||
|     movu    m0, [srcq+wq] | ||||
|     movu    m1, [dstq+wq] | ||||
|     movu    m2, [srcq+wq+mmsize] | ||||
|     movu    m3, [dstq+wq+mmsize] | ||||
| %endif | ||||
|     paddw   m0, m1 | ||||
|     paddw   m2, m3 | ||||
|     pand    m0, m4 | ||||
|     pand    m2, m4 | ||||
| %if %1 | ||||
|     mova    [dstq+wq]       , m0 | ||||
|     mova    [dstq+wq+mmsize], m2 | ||||
| %else | ||||
|     movu    [dstq+wq]       , m0 | ||||
|     movu    [dstq+wq+mmsize], m2 | ||||
| %endif | ||||
|     add     wq, 2*mmsize | ||||
|     jl %%.loop | ||||
| %%.end: | ||||
|     RET | ||||
| %endmacro | ||||
|  | ||||
| INIT_MMX mmx | ||||
| cglobal add_int16, 4,4,5, dst, src, mask, w | ||||
|     ADD_INT16_LOOP 1 | ||||
|  | ||||
| INIT_XMM sse2 | ||||
| cglobal add_int16, 4,4,5, dst, src, mask, w | ||||
|     test srcq, mmsize-1 | ||||
|     jnz .unaligned | ||||
|     test dstq, mmsize-1 | ||||
|     jnz .unaligned | ||||
|     ADD_INT16_LOOP 1 | ||||
| .unaligned: | ||||
|     ADD_INT16_LOOP 0 | ||||
							
								
								
									
										38
									
								
								libavcodec/x86/lossless_videodsp_init.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								libavcodec/x86/lossless_videodsp_init.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,38 @@ | ||||
| /* | ||||
|  * Lossless video DSP utils | ||||
|  * | ||||
|  * This file is part of FFmpeg. | ||||
|  * | ||||
|  * FFmpeg is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * FFmpeg is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with FFmpeg; if not, write to the Free Software | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
|  | ||||
| #include "../lossless_videodsp.h" | ||||
| #include "libavutil/x86/cpu.h" | ||||
|  | ||||
| void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w); | ||||
| void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w); | ||||
|  | ||||
| void ff_llviddsp_init_x86(LLVidDSPContext *c) | ||||
| { | ||||
|     int cpu_flags = av_get_cpu_flags(); | ||||
|  | ||||
|     if (EXTERNAL_MMX(cpu_flags)) { | ||||
|         c->add_int16 = ff_add_int16_mmx; | ||||
|     } | ||||
|  | ||||
|     if (EXTERNAL_SSE2(cpu_flags)) { | ||||
|         c->add_int16 = ff_add_int16_sse2; | ||||
|     } | ||||
| } | ||||
		Reference in New Issue
	
	Block a user