You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	huffyuvencdsp: move shared functions to a new lossless_videoencdsp context
Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
		
							
								
								
									
										9
									
								
								configure
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										9
									
								
								configure
									
									
									
									
										vendored
									
									
								
							| @@ -2115,6 +2115,7 @@ CONFIG_EXTRA=" | ||||
|     libx262 | ||||
|     llauddsp | ||||
|     llviddsp | ||||
|     llvidencdsp | ||||
|     lpc | ||||
|     lzf | ||||
|     me_cmp | ||||
| @@ -2366,7 +2367,7 @@ amv_decoder_select="sp5x_decoder exif" | ||||
| amv_encoder_select="aandcttables jpegtables mpegvideoenc" | ||||
| ape_decoder_select="bswapdsp llauddsp" | ||||
| apng_decoder_select="zlib" | ||||
| apng_encoder_select="huffyuvencdsp zlib" | ||||
| apng_encoder_select="llvidencdsp zlib" | ||||
| asv1_decoder_select="blockdsp bswapdsp idctdsp" | ||||
| asv1_encoder_select="bswapdsp fdctdsp pixblockdsp" | ||||
| asv2_decoder_select="blockdsp bswapdsp idctdsp" | ||||
| @@ -2430,7 +2431,7 @@ hap_encoder_deps="libsnappy" | ||||
| hap_encoder_select="texturedspenc" | ||||
| hevc_decoder_select="bswapdsp cabac golomb videodsp" | ||||
| huffyuv_decoder_select="bswapdsp huffyuvdsp llviddsp" | ||||
| huffyuv_encoder_select="bswapdsp huffman huffyuvencdsp" | ||||
| huffyuv_encoder_select="bswapdsp huffman huffyuvencdsp llvidencdsp" | ||||
| iac_decoder_select="imc_decoder" | ||||
| imc_decoder_select="bswapdsp fft mdct sinewin" | ||||
| indeo3_decoder_select="hpeldsp" | ||||
| @@ -2491,7 +2492,7 @@ on2avc_decoder_select="mdct" | ||||
| opus_decoder_deps="swresample" | ||||
| opus_decoder_select="imdct15" | ||||
| png_decoder_select="zlib" | ||||
| png_encoder_select="huffyuvencdsp zlib" | ||||
| png_encoder_select="llvidencdsp zlib" | ||||
| prores_decoder_select="blockdsp idctdsp" | ||||
| prores_encoder_select="fdctdsp" | ||||
| qcelp_decoder_select="lsp" | ||||
| @@ -2534,7 +2535,7 @@ tscc_decoder_select="zlib" | ||||
| twinvq_decoder_select="mdct lsp sinewin" | ||||
| txd_decoder_select="texturedsp" | ||||
| utvideo_decoder_select="bswapdsp llviddsp" | ||||
| utvideo_encoder_select="bswapdsp huffman huffyuvencdsp" | ||||
| utvideo_encoder_select="bswapdsp huffman llvidencdsp" | ||||
| vble_decoder_select="llviddsp" | ||||
| vc1_decoder_select="blockdsp h263_decoder h264qpel intrax8 mpegvideo vc1dsp" | ||||
| vc1_qsv_decoder_deps="libmfx" | ||||
|   | ||||
| @@ -91,6 +91,7 @@ OBJS-$(CONFIG_JPEGTABLES)              += jpegtables.o | ||||
| OBJS-$(CONFIG_LIBXVID)                 += libxvid_rc.o | ||||
| OBJS-$(CONFIG_LLAUDDSP)                += lossless_audiodsp.o | ||||
| OBJS-$(CONFIG_LLVIDDSP)                += lossless_videodsp.o | ||||
| OBJS-$(CONFIG_LLVIDENCDSP)             += lossless_videoencdsp.o | ||||
| OBJS-$(CONFIG_LPC)                     += lpc.o | ||||
| OBJS-$(CONFIG_LSP)                     += lsp.o | ||||
| OBJS-$(CONFIG_LZF)                     += lzf.o | ||||
|   | ||||
| @@ -38,6 +38,7 @@ | ||||
| #include "huffyuvencdsp.h" | ||||
| #include "put_bits.h" | ||||
| #include "lossless_videodsp.h" | ||||
| #include "lossless_videoencdsp.h" | ||||
|  | ||||
| #define VLC_BITS 12 | ||||
|  | ||||
| @@ -89,6 +90,7 @@ typedef struct HYuvContext { | ||||
|     HuffYUVDSPContext hdsp; | ||||
|     HuffYUVEncDSPContext hencdsp; | ||||
|     LLVidDSPContext llviddsp; | ||||
|     LLVidEncDSPContext llvidencdsp; | ||||
|     int non_determ; // non-deterministic, multi-threaded encoder allowed | ||||
| } HYuvContext; | ||||
|  | ||||
|   | ||||
| @@ -33,6 +33,7 @@ | ||||
| #include "huffman.h" | ||||
| #include "huffyuvencdsp.h" | ||||
| #include "internal.h" | ||||
| #include "lossless_videoencdsp.h" | ||||
| #include "put_bits.h" | ||||
| #include "libavutil/opt.h" | ||||
| #include "libavutil/pixdesc.h" | ||||
| @@ -41,7 +42,7 @@ static inline void diff_bytes(HYuvContext *s, uint8_t *dst, | ||||
|                               const uint8_t *src0, const uint8_t *src1, int w) | ||||
| { | ||||
|     if (s->bps <= 8) { | ||||
|         s->hencdsp.diff_bytes(dst, src0, src1, w); | ||||
|         s->llvidencdsp.diff_bytes(dst, src0, src1, w); | ||||
|     } else { | ||||
|         s->hencdsp.diff_int16((uint16_t *)dst, (const uint16_t *)src0, (const uint16_t *)src1, s->n - 1, w); | ||||
|     } | ||||
| @@ -65,7 +66,7 @@ static inline int sub_left_prediction(HYuvContext *s, uint8_t *dst, | ||||
|                 dst[i] = temp - left; | ||||
|                 left   = temp; | ||||
|             } | ||||
|             s->hencdsp.diff_bytes(dst + 32, src + 32, src + 31, w - 32); | ||||
|             s->llvidencdsp.diff_bytes(dst + 32, src + 32, src + 31, w - 32); | ||||
|             return src[w-1]; | ||||
|         } | ||||
|     } else { | ||||
| @@ -117,7 +118,7 @@ static inline void sub_left_prediction_bgr32(HYuvContext *s, uint8_t *dst, | ||||
|         a = at; | ||||
|     } | ||||
|  | ||||
|     s->hencdsp.diff_bytes(dst + 16, src + 16, src + 12, w * 4 - 16); | ||||
|     s->llvidencdsp.diff_bytes(dst + 16, src + 16, src + 12, w * 4 - 16); | ||||
|  | ||||
|     *red   = src[(w - 1) * 4 + R]; | ||||
|     *green = src[(w - 1) * 4 + G]; | ||||
| @@ -146,7 +147,7 @@ static inline void sub_left_prediction_rgb24(HYuvContext *s, uint8_t *dst, | ||||
|         b = bt; | ||||
|     } | ||||
|  | ||||
|     s->hencdsp.diff_bytes(dst + 48, src + 48, src + 48 - 3, w * 3 - 48); | ||||
|     s->llvidencdsp.diff_bytes(dst + 48, src + 48, src + 48 - 3, w * 3 - 48); | ||||
|  | ||||
|     *red   = src[(w - 1) * 3 + 0]; | ||||
|     *green = src[(w - 1) * 3 + 1]; | ||||
| @@ -156,7 +157,7 @@ static inline void sub_left_prediction_rgb24(HYuvContext *s, uint8_t *dst, | ||||
| static void sub_median_prediction(HYuvContext *s, uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top) | ||||
| { | ||||
|     if (s->bps <= 8) { | ||||
|         s->hencdsp.sub_hfyu_median_pred(dst, src1, src2, w , left, left_top); | ||||
|         s->llvidencdsp.sub_median_pred(dst, src1, src2, w , left, left_top); | ||||
|     } else { | ||||
|         s->hencdsp.sub_hfyu_median_pred_int16((uint16_t *)dst, (const uint16_t *)src1, (const uint16_t *)src2, s->n - 1, w , left, left_top); | ||||
|     } | ||||
| @@ -218,6 +219,7 @@ static av_cold int encode_init(AVCodecContext *avctx) | ||||
|  | ||||
|     ff_huffyuv_common_init(avctx); | ||||
|     ff_huffyuvencdsp_init(&s->hencdsp, avctx); | ||||
|     ff_llvidencdsp_init(&s->llvidencdsp); | ||||
|  | ||||
|     avctx->extradata = av_mallocz(3*MAX_N + 4); | ||||
|     if (s->flags&AV_CODEC_FLAG_PASS1) { | ||||
| @@ -823,9 +825,9 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, | ||||
|             lefttopy = p->data[0][3]; | ||||
|             lefttopu = p->data[1][1]; | ||||
|             lefttopv = p->data[2][1]; | ||||
|             s->hencdsp.sub_hfyu_median_pred(s->temp[0], p->data[0] + 4, p->data[0] + fake_ystride + 4, width  - 4, &lefty, &lefttopy); | ||||
|             s->hencdsp.sub_hfyu_median_pred(s->temp[1], p->data[1] + 2, p->data[1] + fake_ustride + 2, width2 - 2, &leftu, &lefttopu); | ||||
|             s->hencdsp.sub_hfyu_median_pred(s->temp[2], p->data[2] + 2, p->data[2] + fake_vstride + 2, width2 - 2, &leftv, &lefttopv); | ||||
|             s->llvidencdsp.sub_median_pred(s->temp[0], p->data[0] + 4, p->data[0] + fake_ystride + 4, width  - 4, &lefty, &lefttopy); | ||||
|             s->llvidencdsp.sub_median_pred(s->temp[1], p->data[1] + 2, p->data[1] + fake_ustride + 2, width2 - 2, &leftu, &lefttopu); | ||||
|             s->llvidencdsp.sub_median_pred(s->temp[2], p->data[2] + 2, p->data[2] + fake_vstride + 2, width2 - 2, &leftv, &lefttopv); | ||||
|             encode_422_bitstream(s, 0, width - 4); | ||||
|             y++; cy++; | ||||
|  | ||||
| @@ -835,7 +837,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, | ||||
|                 if (s->bitstream_bpp == 12) { | ||||
|                     while (2 * cy > y) { | ||||
|                         ydst = p->data[0] + p->linesize[0] * y; | ||||
|                         s->hencdsp.sub_hfyu_median_pred(s->temp[0], ydst - fake_ystride, ydst, width, &lefty, &lefttopy); | ||||
|                         s->llvidencdsp.sub_median_pred(s->temp[0], ydst - fake_ystride, ydst, width, &lefty, &lefttopy); | ||||
|                         encode_gray_bitstream(s, width); | ||||
|                         y++; | ||||
|                     } | ||||
| @@ -845,9 +847,9 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, | ||||
|                 udst = p->data[1] + p->linesize[1] * cy; | ||||
|                 vdst = p->data[2] + p->linesize[2] * cy; | ||||
|  | ||||
|                 s->hencdsp.sub_hfyu_median_pred(s->temp[0], ydst - fake_ystride, ydst, width,  &lefty, &lefttopy); | ||||
|                 s->hencdsp.sub_hfyu_median_pred(s->temp[1], udst - fake_ustride, udst, width2, &leftu, &lefttopu); | ||||
|                 s->hencdsp.sub_hfyu_median_pred(s->temp[2], vdst - fake_vstride, vdst, width2, &leftv, &lefttopv); | ||||
|                 s->llvidencdsp.sub_median_pred(s->temp[0], ydst - fake_ystride, ydst, width,  &lefty, &lefttopy); | ||||
|                 s->llvidencdsp.sub_median_pred(s->temp[1], udst - fake_ustride, udst, width2, &leftu, &lefttopu); | ||||
|                 s->llvidencdsp.sub_median_pred(s->temp[2], vdst - fake_vstride, vdst, width2, &leftv, &lefttopv); | ||||
|  | ||||
|                 encode_422_bitstream(s, 0, width); | ||||
|             } | ||||
| @@ -860,7 +862,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, | ||||
|                     ydst = p->data[0] + p->linesize[0] * y; | ||||
|  | ||||
|                     if (s->predictor == PLANE && s->interlaced < y) { | ||||
|                         s->hencdsp.diff_bytes(s->temp[1], ydst, ydst - fake_ystride, width); | ||||
|                         s->llvidencdsp.diff_bytes(s->temp[1], ydst, ydst - fake_ystride, width); | ||||
|  | ||||
|                         lefty = sub_left_prediction(s, s->temp[0], s->temp[1], width , lefty); | ||||
|                     } else { | ||||
| @@ -876,9 +878,9 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, | ||||
|                 vdst = p->data[2] + p->linesize[2] * cy; | ||||
|  | ||||
|                 if (s->predictor == PLANE && s->interlaced < cy) { | ||||
|                     s->hencdsp.diff_bytes(s->temp[1],          ydst, ydst - fake_ystride, width); | ||||
|                     s->hencdsp.diff_bytes(s->temp[2],          udst, udst - fake_ustride, width2); | ||||
|                     s->hencdsp.diff_bytes(s->temp[2] + width2, vdst, vdst - fake_vstride, width2); | ||||
|                     s->llvidencdsp.diff_bytes(s->temp[1],          ydst, ydst - fake_ystride, width); | ||||
|                     s->llvidencdsp.diff_bytes(s->temp[2],          udst, udst - fake_ustride, width2); | ||||
|                     s->llvidencdsp.diff_bytes(s->temp[2] + width2, vdst, vdst - fake_vstride, width2); | ||||
|  | ||||
|                     lefty = sub_left_prediction(s, s->temp[0], s->temp[1], width , lefty); | ||||
|                     leftu = sub_left_prediction(s, s->temp[1], s->temp[2], width2, leftu); | ||||
| @@ -911,7 +913,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, | ||||
|         for (y = 1; y < s->height; y++) { | ||||
|             uint8_t *dst = data + y*stride; | ||||
|             if (s->predictor == PLANE && s->interlaced < y) { | ||||
|                 s->hencdsp.diff_bytes(s->temp[1], dst, dst - fake_stride, width * 4); | ||||
|                 s->llvidencdsp.diff_bytes(s->temp[1], dst, dst - fake_stride, width * 4); | ||||
|                 sub_left_prediction_bgr32(s, s->temp[0], s->temp[1], width, | ||||
|                                           &leftr, &leftg, &leftb, &lefta); | ||||
|             } else { | ||||
| @@ -939,7 +941,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, | ||||
|         for (y = 1; y < s->height; y++) { | ||||
|             uint8_t *dst = data + y * stride; | ||||
|             if (s->predictor == PLANE && s->interlaced < y) { | ||||
|                 s->hencdsp.diff_bytes(s->temp[1], dst, dst - fake_stride, | ||||
|                 s->llvidencdsp.diff_bytes(s->temp[1], dst, dst - fake_stride, | ||||
|                                       width * 3); | ||||
|                 sub_left_prediction_rgb24(s, s->temp[0], s->temp[1], width, | ||||
|                                           &leftr, &leftg, &leftb); | ||||
|   | ||||
| @@ -21,38 +21,6 @@ | ||||
| #include "huffyuvencdsp.h" | ||||
| #include "mathops.h" | ||||
|  | ||||
| // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size | ||||
| #define pb_7f (~0UL / 255 * 0x7f) | ||||
| #define pb_80 (~0UL / 255 * 0x80) | ||||
|  | ||||
| static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, intptr_t w) | ||||
| { | ||||
|     long i; | ||||
|  | ||||
| #if !HAVE_FAST_UNALIGNED | ||||
|     if (((long)src1 | (long)src2) & (sizeof(long) - 1)) { | ||||
|         for (i = 0; i + 7 < w; i += 8) { | ||||
|             dst[i + 0] = src1[i + 0] - src2[i + 0]; | ||||
|             dst[i + 1] = src1[i + 1] - src2[i + 1]; | ||||
|             dst[i + 2] = src1[i + 2] - src2[i + 2]; | ||||
|             dst[i + 3] = src1[i + 3] - src2[i + 3]; | ||||
|             dst[i + 4] = src1[i + 4] - src2[i + 4]; | ||||
|             dst[i + 5] = src1[i + 5] - src2[i + 5]; | ||||
|             dst[i + 6] = src1[i + 6] - src2[i + 6]; | ||||
|             dst[i + 7] = src1[i + 7] - src2[i + 7]; | ||||
|         } | ||||
|     } else | ||||
| #endif | ||||
|     for (i = 0; i <= w - (int) sizeof(long); i += sizeof(long)) { | ||||
|         long a = *(long *) (src1 + i); | ||||
|         long b = *(long *) (src2 + i); | ||||
|         *(long *) (dst + i) = ((a | pb_80) - (b & pb_7f)) ^ | ||||
|                               ((a ^ b ^ pb_80) & pb_80); | ||||
|     } | ||||
|     for (; i < w; i++) | ||||
|         dst[i + 0] = src1[i + 0] - src2[i + 0]; | ||||
| } | ||||
|  | ||||
| static void diff_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w){ | ||||
|     long i; | ||||
| #if !HAVE_FAST_UNALIGNED | ||||
| @@ -79,27 +47,6 @@ static void diff_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *sr | ||||
|         dst[i] = (src1[i] - src2[i]) & mask; | ||||
| } | ||||
|  | ||||
| static void sub_hfyu_median_pred_c(uint8_t *dst, const uint8_t *src1, | ||||
|                                    const uint8_t *src2, intptr_t w, | ||||
|                                    int *left, int *left_top) | ||||
| { | ||||
|     int i; | ||||
|     uint8_t l, lt; | ||||
|  | ||||
|     l  = *left; | ||||
|     lt = *left_top; | ||||
|  | ||||
|     for (i = 0; i < w; i++) { | ||||
|         const int pred = mid_pred(l, src1[i], (l + src1[i] - lt) & 0xFF); | ||||
|         lt     = src1[i]; | ||||
|         l      = src2[i]; | ||||
|         dst[i] = l - pred; | ||||
|     } | ||||
|  | ||||
|     *left     = l; | ||||
|     *left_top = lt; | ||||
| } | ||||
|  | ||||
| static void sub_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w, int *left, int *left_top){ | ||||
|     int i; | ||||
|     uint16_t l, lt; | ||||
| @@ -120,9 +67,7 @@ static void sub_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src1, co | ||||
|  | ||||
| av_cold void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c, AVCodecContext *avctx) | ||||
| { | ||||
|     c->diff_bytes           = diff_bytes_c; | ||||
|     c->diff_int16           = diff_int16_c; | ||||
|     c->sub_hfyu_median_pred = sub_hfyu_median_pred_c; | ||||
|     c->sub_hfyu_median_pred_int16 = sub_hfyu_median_pred_int16_c; | ||||
|  | ||||
|     if (ARCH_X86) | ||||
|   | ||||
| @@ -24,22 +24,11 @@ | ||||
| #include "avcodec.h" | ||||
|  | ||||
| typedef struct HuffYUVEncDSPContext { | ||||
|     void (*diff_bytes)(uint8_t *dst /* align 16 */, | ||||
|                        const uint8_t *src1 /* align 16 */, | ||||
|                        const uint8_t *src2 /* align 1 */, | ||||
|                        intptr_t w); | ||||
|     void (*diff_int16)(uint16_t *dst /* align 16 */, | ||||
|                        const uint16_t *src1 /* align 16 */, | ||||
|                        const uint16_t *src2 /* align 1 */, | ||||
|                        unsigned mask, int w); | ||||
|  | ||||
|     /** | ||||
|      * Subtract HuffYUV's variant of median prediction. | ||||
|      * Note, this might read from src1[-1], src2[-1]. | ||||
|      */ | ||||
|     void (*sub_hfyu_median_pred)(uint8_t *dst, const uint8_t *src1, | ||||
|                                  const uint8_t *src2, intptr_t w, | ||||
|                                  int *left, int *left_top); | ||||
|     void (*sub_hfyu_median_pred_int16)(uint16_t *dst, const uint16_t *src1, | ||||
|                                        const uint16_t *src2, unsigned mask, | ||||
|                                        int w, int *left, int *left_top); | ||||
|   | ||||
							
								
								
									
										84
									
								
								libavcodec/lossless_videoencdsp.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								libavcodec/lossless_videoencdsp.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,84 @@ | ||||
| /* | ||||
|  * This file is part of FFmpeg. | ||||
|  * | ||||
|  * FFmpeg is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * FFmpeg is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with FFmpeg; if not, write to the Free Software | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
|  | ||||
| #include "config.h" | ||||
| #include "libavutil/attributes.h" | ||||
| #include "lossless_videoencdsp.h" | ||||
| #include "mathops.h" | ||||
|  | ||||
| // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size | ||||
| #define pb_7f (~0UL / 255 * 0x7f) | ||||
| #define pb_80 (~0UL / 255 * 0x80) | ||||
|  | ||||
| static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, intptr_t w) | ||||
| { | ||||
|     long i; | ||||
|  | ||||
| #if !HAVE_FAST_UNALIGNED | ||||
|     if (((long)src1 | (long)src2) & (sizeof(long) - 1)) { | ||||
|         for (i = 0; i + 7 < w; i += 8) { | ||||
|             dst[i + 0] = src1[i + 0] - src2[i + 0]; | ||||
|             dst[i + 1] = src1[i + 1] - src2[i + 1]; | ||||
|             dst[i + 2] = src1[i + 2] - src2[i + 2]; | ||||
|             dst[i + 3] = src1[i + 3] - src2[i + 3]; | ||||
|             dst[i + 4] = src1[i + 4] - src2[i + 4]; | ||||
|             dst[i + 5] = src1[i + 5] - src2[i + 5]; | ||||
|             dst[i + 6] = src1[i + 6] - src2[i + 6]; | ||||
|             dst[i + 7] = src1[i + 7] - src2[i + 7]; | ||||
|         } | ||||
|     } else | ||||
| #endif | ||||
|     for (i = 0; i <= w - (int) sizeof(long); i += sizeof(long)) { | ||||
|         long a = *(long *) (src1 + i); | ||||
|         long b = *(long *) (src2 + i); | ||||
|         *(long *) (dst + i) = ((a | pb_80) - (b & pb_7f)) ^ | ||||
|                               ((a ^ b ^ pb_80) & pb_80); | ||||
|     } | ||||
|     for (; i < w; i++) | ||||
|         dst[i + 0] = src1[i + 0] - src2[i + 0]; | ||||
| } | ||||
|  | ||||
| static void sub_median_pred_c(uint8_t *dst, const uint8_t *src1, | ||||
|                               const uint8_t *src2, intptr_t w, | ||||
|                               int *left, int *left_top) | ||||
| { | ||||
|     int i; | ||||
|     uint8_t l, lt; | ||||
|  | ||||
|     l  = *left; | ||||
|     lt = *left_top; | ||||
|  | ||||
|     for (i = 0; i < w; i++) { | ||||
|         const int pred = mid_pred(l, src1[i], (l + src1[i] - lt) & 0xFF); | ||||
|         lt     = src1[i]; | ||||
|         l      = src2[i]; | ||||
|         dst[i] = l - pred; | ||||
|     } | ||||
|  | ||||
|     *left     = l; | ||||
|     *left_top = lt; | ||||
| } | ||||
|  | ||||
| av_cold void ff_llvidencdsp_init(LLVidEncDSPContext *c) | ||||
| { | ||||
|     c->diff_bytes      = diff_bytes_c; | ||||
|     c->sub_median_pred = sub_median_pred_c; | ||||
|  | ||||
|     if (ARCH_X86) | ||||
|         ff_llvidencdsp_init_x86(c); | ||||
| } | ||||
							
								
								
									
										41
									
								
								libavcodec/lossless_videoencdsp.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								libavcodec/lossless_videoencdsp.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,41 @@ | ||||
| /* | ||||
|  * This file is part of FFmpeg. | ||||
|  * | ||||
|  * FFmpeg is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * FFmpeg is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with FFmpeg; if not, write to the Free Software | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
|  | ||||
| #ifndef AVCODEC_LOSSLESS_VIDEOENCDSP_H | ||||
| #define AVCODEC_LOSSLESS_VIDEOENCDSP_H | ||||
|  | ||||
| #include <stdint.h> | ||||
|  | ||||
| typedef struct LLVidEncDSPContext { | ||||
|     void (*diff_bytes)(uint8_t *dst /* align 16 */, | ||||
|                        const uint8_t *src1 /* align 16 */, | ||||
|                        const uint8_t *src2 /* align 1 */, | ||||
|                        intptr_t w); | ||||
|     /** | ||||
|      * Subtract HuffYUV's variant of median prediction. | ||||
|      * Note, this might read from src1[-1], src2[-1]. | ||||
|      */ | ||||
|     void (*sub_median_pred)(uint8_t *dst, const uint8_t *src1, | ||||
|                             const uint8_t *src2, intptr_t w, | ||||
|                             int *left, int *left_top); | ||||
| } LLVidEncDSPContext; | ||||
|  | ||||
| void ff_llvidencdsp_init(LLVidEncDSPContext *c); | ||||
| void ff_llvidencdsp_init_x86(LLVidEncDSPContext *c); | ||||
|  | ||||
| #endif /* AVCODEC_LOSSLESS_VIDEOENCDSP_H */ | ||||
| @@ -22,7 +22,7 @@ | ||||
| #include "avcodec.h" | ||||
| #include "internal.h" | ||||
| #include "bytestream.h" | ||||
| #include "huffyuvencdsp.h" | ||||
| #include "lossless_videoencdsp.h" | ||||
| #include "png.h" | ||||
| #include "apng.h" | ||||
|  | ||||
| @@ -47,7 +47,7 @@ typedef struct APNGFctlChunk { | ||||
|  | ||||
| typedef struct PNGEncContext { | ||||
|     AVClass *class; | ||||
|     HuffYUVEncDSPContext hdsp; | ||||
|     LLVidEncDSPContext llvidencdsp; | ||||
|  | ||||
|     uint8_t *bytestream; | ||||
|     uint8_t *bytestream_start; | ||||
| @@ -159,7 +159,7 @@ static void sub_left_prediction(PNGEncContext *c, uint8_t *dst, const uint8_t *s | ||||
|     for (x = 0; x < unaligned_w; x++) | ||||
|         *dst++ = *src1++ - *src2++; | ||||
|     size -= unaligned_w; | ||||
|     c->hdsp.diff_bytes(dst, src1, src2, size); | ||||
|     c->llvidencdsp.diff_bytes(dst, src1, src2, size); | ||||
| } | ||||
|  | ||||
| static void png_filter_row(PNGEncContext *c, uint8_t *dst, int filter_type, | ||||
| @@ -175,7 +175,7 @@ static void png_filter_row(PNGEncContext *c, uint8_t *dst, int filter_type, | ||||
|         sub_left_prediction(c, dst, src, bpp, size); | ||||
|         break; | ||||
|     case PNG_FILTER_VALUE_UP: | ||||
|         c->hdsp.diff_bytes(dst, src, top, size); | ||||
|         c->llvidencdsp.diff_bytes(dst, src, top, size); | ||||
|         break; | ||||
|     case PNG_FILTER_VALUE_AVG: | ||||
|         for (i = 0; i < bpp; i++) | ||||
| @@ -1015,7 +1015,7 @@ FF_DISABLE_DEPRECATION_WARNINGS | ||||
| FF_ENABLE_DEPRECATION_WARNINGS | ||||
| #endif | ||||
|  | ||||
|     ff_huffyuvencdsp_init(&s->hdsp, avctx); | ||||
|     ff_llvidencdsp_init(&s->llvidencdsp); | ||||
|  | ||||
| #if FF_API_PRIVATE_OPT | ||||
| FF_DISABLE_DEPRECATION_WARNINGS | ||||
|   | ||||
| @@ -30,8 +30,8 @@ | ||||
| #include "libavutil/common.h" | ||||
| #include "avcodec.h" | ||||
| #include "bswapdsp.h" | ||||
| #include "huffyuvencdsp.h" | ||||
| #include "lossless_videodsp.h" | ||||
| #include "lossless_videoencdsp.h" | ||||
|  | ||||
| enum { | ||||
|     PRED_NONE = 0, | ||||
| @@ -70,8 +70,8 @@ typedef struct UtvideoContext { | ||||
|     const AVClass *class; | ||||
|     AVCodecContext *avctx; | ||||
|     BswapDSPContext bdsp; | ||||
|     HuffYUVEncDSPContext hdsp; | ||||
|     LLVidDSPContext llviddsp; | ||||
|     LLVidEncDSPContext llvidencdsp; | ||||
|  | ||||
|     uint32_t frame_info_size, flags, frame_info; | ||||
|     int      planes; | ||||
|   | ||||
| @@ -33,7 +33,6 @@ | ||||
| #include "bswapdsp.h" | ||||
| #include "bytestream.h" | ||||
| #include "put_bits.h" | ||||
| #include "huffyuvencdsp.h" | ||||
| #include "mathops.h" | ||||
| #include "utvideo.h" | ||||
| #include "huffman.h" | ||||
| @@ -120,7 +119,7 @@ static av_cold int utvideo_encode_init(AVCodecContext *avctx) | ||||
|     } | ||||
|  | ||||
|     ff_bswapdsp_init(&c->bdsp); | ||||
|     ff_huffyuvencdsp_init(&c->hdsp, avctx); | ||||
|     ff_llvidencdsp_init(&c->llvidencdsp); | ||||
|  | ||||
| #if FF_API_PRIVATE_OPT | ||||
| FF_DISABLE_DEPRECATION_WARNINGS | ||||
| @@ -324,7 +323,7 @@ static void median_predict(UtvideoContext *c, uint8_t *src, uint8_t *dst, int st | ||||
|  | ||||
|     /* Rest of the coded part uses median prediction */ | ||||
|     for (j = 1; j < height; j++) { | ||||
|         c->hdsp.sub_hfyu_median_pred(dst, src - stride, src, width, &A, &B); | ||||
|         c->llvidencdsp.sub_median_pred(dst, src - stride, src, width, &A, &B); | ||||
|         dst += width; | ||||
|         src += stride; | ||||
|     } | ||||
|   | ||||
| @@ -20,8 +20,9 @@ OBJS-$(CONFIG_H264QPEL)                += x86/h264_qpel.o | ||||
| OBJS-$(CONFIG_HPELDSP)                 += x86/hpeldsp_init.o | ||||
| OBJS-$(CONFIG_LLAUDDSP)                += x86/lossless_audiodsp_init.o | ||||
| OBJS-$(CONFIG_LLVIDDSP)                += x86/lossless_videodsp_init.o | ||||
| OBJS-$(CONFIG_LLVIDENCDSP)             += x86/lossless_videoencdsp_init.o | ||||
| OBJS-$(CONFIG_HUFFYUVDSP)              += x86/huffyuvdsp_init.o | ||||
| OBJS-$(CONFIG_HUFFYUVENCDSP)           += x86/huffyuvencdsp_mmx.o | ||||
| OBJS-$(CONFIG_HUFFYUVENCDSP)           += x86/huffyuvencdsp_init.o | ||||
| OBJS-$(CONFIG_IDCTDSP)                 += x86/idctdsp_init.o | ||||
| OBJS-$(CONFIG_LPC)                     += x86/lpc.o | ||||
| OBJS-$(CONFIG_ME_CMP)                  += x86/me_cmp_init.o | ||||
| @@ -114,6 +115,7 @@ YASM-OBJS-$(CONFIG_HUFFYUVENCDSP)      += x86/huffyuvencdsp.o | ||||
| YASM-OBJS-$(CONFIG_IDCTDSP)            += x86/idctdsp.o | ||||
| YASM-OBJS-$(CONFIG_LLAUDDSP)           += x86/lossless_audiodsp.o | ||||
| YASM-OBJS-$(CONFIG_LLVIDDSP)           += x86/lossless_videodsp.o | ||||
| YASM-OBJS-$(CONFIG_LLVIDENCDSP)        += x86/lossless_videoencdsp.o | ||||
| YASM-OBJS-$(CONFIG_ME_CMP)             += x86/me_cmp.o | ||||
| YASM-OBJS-$(CONFIG_MPEGAUDIODSP)       += x86/imdct36.o | ||||
| YASM-OBJS-$(CONFIG_MPEGVIDEOENC)       += x86/mpegvideoencdsp.o | ||||
|   | ||||
| @@ -27,128 +27,8 @@ | ||||
|  | ||||
| section .text | ||||
|  | ||||
| ; void ff_diff_bytes(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, | ||||
| ;                    intptr_t w); | ||||
| %macro DIFF_BYTES_PROLOGUE 0 | ||||
| %if ARCH_X86_32 | ||||
| cglobal diff_bytes, 3,5,2, dst, src1, src2 | ||||
| %define wq r4q | ||||
|     DECLARE_REG_TMP 3 | ||||
|     mov               wq, r3mp | ||||
| %else | ||||
| cglobal diff_bytes, 4,5,2, dst, src1, src2, w | ||||
|     DECLARE_REG_TMP 4 | ||||
| %endif ; ARCH_X86_32 | ||||
| %define i t0q | ||||
| %endmacro | ||||
|  | ||||
| ; label to jump to if w < regsize | ||||
| %macro DIFF_BYTES_LOOP_PREP 1 | ||||
|     mov                i, wq | ||||
|     and                i, -2 * regsize | ||||
|         jz            %1 | ||||
|     add             dstq, i | ||||
|     add            src1q, i | ||||
|     add            src2q, i | ||||
|     neg                i | ||||
| %endmacro | ||||
|  | ||||
| ; mov type used for src1q, dstq, first reg, second reg | ||||
| %macro DIFF_BYTES_LOOP_CORE 4 | ||||
| %if mmsize != 16 | ||||
|     mov%1             %3, [src1q + i] | ||||
|     mov%1             %4, [src1q + i + regsize] | ||||
|     psubb             %3, [src2q + i] | ||||
|     psubb             %4, [src2q + i + regsize] | ||||
|     mov%2           [dstq + i], %3 | ||||
|     mov%2 [regsize + dstq + i], %4 | ||||
| %else | ||||
|     ; SSE enforces alignment of psubb operand | ||||
|     mov%1             %3, [src1q + i] | ||||
|     movu              %4, [src2q + i] | ||||
|     psubb             %3, %4 | ||||
|     mov%2     [dstq + i], %3 | ||||
|     mov%1             %3, [src1q + i + regsize] | ||||
|     movu              %4, [src2q + i + regsize] | ||||
|     psubb             %3, %4 | ||||
|     mov%2 [regsize + dstq + i], %3 | ||||
| %endif | ||||
| %endmacro | ||||
|  | ||||
| %macro DIFF_BYTES_BODY 2 ; mov type used for src1q, for dstq | ||||
|     %define regsize mmsize | ||||
| .loop_%1%2: | ||||
|     DIFF_BYTES_LOOP_CORE %1, %2, m0, m1 | ||||
|     add                i, 2 * regsize | ||||
|         jl    .loop_%1%2 | ||||
| .skip_main_%1%2: | ||||
|     and               wq, 2 * regsize - 1 | ||||
|         jz     .end_%1%2 | ||||
| %if mmsize > 16 | ||||
|     ; fall back to narrower xmm | ||||
|     %define regsize mmsize / 2 | ||||
|     DIFF_BYTES_LOOP_PREP .setup_loop_gpr_aa | ||||
| .loop2_%1%2: | ||||
|     DIFF_BYTES_LOOP_CORE %1, %2, xm0, xm1 | ||||
|     add                i, 2 * regsize | ||||
|         jl   .loop2_%1%2 | ||||
| .setup_loop_gpr_%1%2: | ||||
|     and               wq, 2 * regsize - 1 | ||||
|         jz     .end_%1%2 | ||||
| %endif | ||||
|     add             dstq, wq | ||||
|     add            src1q, wq | ||||
|     add            src2q, wq | ||||
|     neg               wq | ||||
| .loop_gpr_%1%2: | ||||
|     mov              t0b, [src1q + wq] | ||||
|     sub              t0b, [src2q + wq] | ||||
|     mov      [dstq + wq], t0b | ||||
|     inc               wq | ||||
|         jl .loop_gpr_%1%2 | ||||
| .end_%1%2: | ||||
|     REP_RET | ||||
| %endmacro | ||||
|  | ||||
| %if ARCH_X86_32 | ||||
| INIT_MMX mmx | ||||
| DIFF_BYTES_PROLOGUE | ||||
|     %define regsize mmsize | ||||
|     DIFF_BYTES_LOOP_PREP .skip_main_aa | ||||
|     DIFF_BYTES_BODY    a, a | ||||
| %undef i | ||||
| %endif | ||||
|  | ||||
| INIT_XMM sse2 | ||||
| DIFF_BYTES_PROLOGUE | ||||
|     %define regsize mmsize | ||||
|     DIFF_BYTES_LOOP_PREP .skip_main_aa | ||||
|     test            dstq, regsize - 1 | ||||
|         jnz     .loop_uu | ||||
|     test           src1q, regsize - 1 | ||||
|         jnz     .loop_ua | ||||
|     DIFF_BYTES_BODY    a, a | ||||
|     DIFF_BYTES_BODY    u, a | ||||
|     DIFF_BYTES_BODY    u, u | ||||
| %undef i | ||||
|  | ||||
| %if HAVE_AVX2_EXTERNAL | ||||
| INIT_YMM avx2 | ||||
| DIFF_BYTES_PROLOGUE | ||||
|     %define regsize mmsize | ||||
|     ; Directly using unaligned SSE2 version is marginally faster than | ||||
|     ; branching based on arguments. | ||||
|     DIFF_BYTES_LOOP_PREP .skip_main_uu | ||||
|     test            dstq, regsize - 1 | ||||
|         jnz     .loop_uu | ||||
|     test           src1q, regsize - 1 | ||||
|         jnz     .loop_ua | ||||
|     DIFF_BYTES_BODY    a, a | ||||
|     DIFF_BYTES_BODY    u, a | ||||
|     DIFF_BYTES_BODY    u, u | ||||
| %undef i | ||||
| %endif | ||||
|  | ||||
| ; void ff_diff_int16(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, | ||||
| ;                    unsigned mask, int w); | ||||
| %macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub | ||||
|     movd    m4, maskd | ||||
|     SPLATW  m4, m4 | ||||
|   | ||||
							
								
								
									
										54
									
								
								libavcodec/x86/huffyuvencdsp_init.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								libavcodec/x86/huffyuvencdsp_init.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,54 @@ | ||||
| /* | ||||
|  * SIMD-optimized HuffYUV encoding functions | ||||
|  * Copyright (c) 2000, 2001 Fabrice Bellard | ||||
|  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | ||||
|  * | ||||
|  * MMX optimization by Nick Kurshev <nickols_k@mail.ru> | ||||
|  * | ||||
|  * This file is part of FFmpeg. | ||||
|  * | ||||
|  * FFmpeg is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * FFmpeg is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with FFmpeg; if not, write to the Free Software | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
|  | ||||
| #include "libavutil/attributes.h" | ||||
| #include "libavutil/cpu.h" | ||||
| #include "libavutil/pixdesc.h" | ||||
| #include "libavutil/x86/cpu.h" | ||||
| #include "libavcodec/huffyuvencdsp.h" | ||||
|  | ||||
| void ff_diff_int16_mmx (uint16_t *dst, const uint16_t *src1, const uint16_t *src2, | ||||
|                         unsigned mask, int w); | ||||
| void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, | ||||
|                         unsigned mask, int w); | ||||
| void ff_sub_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, | ||||
|                                           unsigned mask, int w, int *left, int *left_top); | ||||
|  | ||||
| av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c, AVCodecContext *avctx) | ||||
| { | ||||
|     av_unused int cpu_flags = av_get_cpu_flags(); | ||||
|     const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt); | ||||
|  | ||||
|     if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) { | ||||
|         c->diff_int16 = ff_diff_int16_mmx; | ||||
|     } | ||||
|  | ||||
|     if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc && pix_desc->comp[0].depth<16) { | ||||
|         c->sub_hfyu_median_pred_int16 = ff_sub_hfyu_median_pred_int16_mmxext; | ||||
|     } | ||||
|  | ||||
|     if (EXTERNAL_SSE2(cpu_flags)) { | ||||
|         c->diff_int16 = ff_diff_int16_sse2; | ||||
|     } | ||||
| } | ||||
							
								
								
									
										150
									
								
								libavcodec/x86/lossless_videoencdsp.asm
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										150
									
								
								libavcodec/x86/lossless_videoencdsp.asm
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,150 @@ | ||||
| ;************************************************************************ | ||||
| ;* SIMD-optimized lossless video encoding functions | ||||
| ;* Copyright (c) 2000, 2001 Fabrice Bellard | ||||
| ;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | ||||
| ;* | ||||
| ;* MMX optimization by Nick Kurshev <nickols_k@mail.ru> | ||||
| ;* Conversion to NASM format by Tiancheng "Timothy" Gu <timothygu99@gmail.com> | ||||
| ;* | ||||
| ;* This file is part of FFmpeg. | ||||
| ;* | ||||
| ;* FFmpeg is free software; you can redistribute it and/or | ||||
| ;* modify it under the terms of the GNU Lesser General Public | ||||
| ;* License as published by the Free Software Foundation; either | ||||
| ;* version 2.1 of the License, or (at your option) any later version. | ||||
| ;* | ||||
| ;* FFmpeg is distributed in the hope that it will be useful, | ||||
| ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
| ;* Lesser General Public License for more details. | ||||
| ;* | ||||
| ;* You should have received a copy of the GNU Lesser General Public | ||||
| ;* License along with FFmpeg; if not, write to the Free Software | ||||
| ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
| ;****************************************************************************** | ||||
|  | ||||
| %include "libavutil/x86/x86util.asm" | ||||
|  | ||||
| section .text | ||||
|  | ||||
| ; void ff_diff_bytes(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, | ||||
| ;                    intptr_t w); | ||||
| %macro DIFF_BYTES_PROLOGUE 0 | ||||
| %if ARCH_X86_32 | ||||
| cglobal diff_bytes, 3,5,2, dst, src1, src2 | ||||
| %define wq r4q | ||||
|     DECLARE_REG_TMP 3 | ||||
|     mov               wq, r3mp | ||||
| %else | ||||
| cglobal diff_bytes, 4,5,2, dst, src1, src2, w | ||||
|     DECLARE_REG_TMP 4 | ||||
| %endif ; ARCH_X86_32 | ||||
| %define i t0q | ||||
| %endmacro | ||||
|  | ||||
| ; label to jump to if w < regsize | ||||
| %macro DIFF_BYTES_LOOP_PREP 1 | ||||
|     mov                i, wq | ||||
|     and                i, -2 * regsize | ||||
|         jz            %1 | ||||
|     add             dstq, i | ||||
|     add            src1q, i | ||||
|     add            src2q, i | ||||
|     neg                i | ||||
| %endmacro | ||||
|  | ||||
| ; mov type used for src1q, dstq, first reg, second reg | ||||
| %macro DIFF_BYTES_LOOP_CORE 4 | ||||
| %if mmsize != 16 | ||||
|     mov%1             %3, [src1q + i] | ||||
|     mov%1             %4, [src1q + i + regsize] | ||||
|     psubb             %3, [src2q + i] | ||||
|     psubb             %4, [src2q + i + regsize] | ||||
|     mov%2           [dstq + i], %3 | ||||
|     mov%2 [regsize + dstq + i], %4 | ||||
| %else | ||||
|     ; SSE enforces alignment of psubb operand | ||||
|     mov%1             %3, [src1q + i] | ||||
|     movu              %4, [src2q + i] | ||||
|     psubb             %3, %4 | ||||
|     mov%2     [dstq + i], %3 | ||||
|     mov%1             %3, [src1q + i + regsize] | ||||
|     movu              %4, [src2q + i + regsize] | ||||
|     psubb             %3, %4 | ||||
|     mov%2 [regsize + dstq + i], %3 | ||||
| %endif | ||||
| %endmacro | ||||
|  | ||||
| %macro DIFF_BYTES_BODY 2 ; mov type used for src1q, for dstq | ||||
|     %define regsize mmsize | ||||
| .loop_%1%2: | ||||
|     DIFF_BYTES_LOOP_CORE %1, %2, m0, m1 | ||||
|     add                i, 2 * regsize | ||||
|         jl    .loop_%1%2 | ||||
| .skip_main_%1%2: | ||||
|     and               wq, 2 * regsize - 1 | ||||
|         jz     .end_%1%2 | ||||
| %if mmsize > 16 | ||||
|     ; fall back to narrower xmm | ||||
|     %define regsize mmsize / 2 | ||||
|     DIFF_BYTES_LOOP_PREP .setup_loop_gpr_aa | ||||
| .loop2_%1%2: | ||||
|     DIFF_BYTES_LOOP_CORE %1, %2, xm0, xm1 | ||||
|     add                i, 2 * regsize | ||||
|         jl   .loop2_%1%2 | ||||
| .setup_loop_gpr_%1%2: | ||||
|     and               wq, 2 * regsize - 1 | ||||
|         jz     .end_%1%2 | ||||
| %endif | ||||
|     add             dstq, wq | ||||
|     add            src1q, wq | ||||
|     add            src2q, wq | ||||
|     neg               wq | ||||
| .loop_gpr_%1%2: | ||||
|     mov              t0b, [src1q + wq] | ||||
|     sub              t0b, [src2q + wq] | ||||
|     mov      [dstq + wq], t0b | ||||
|     inc               wq | ||||
|         jl .loop_gpr_%1%2 | ||||
| .end_%1%2: | ||||
|     REP_RET | ||||
| %endmacro | ||||
|  | ||||
| %if ARCH_X86_32 | ||||
| INIT_MMX mmx | ||||
| DIFF_BYTES_PROLOGUE | ||||
|     %define regsize mmsize | ||||
|     DIFF_BYTES_LOOP_PREP .skip_main_aa | ||||
|     DIFF_BYTES_BODY    a, a | ||||
| %undef i | ||||
| %endif | ||||
|  | ||||
| INIT_XMM sse2 | ||||
| DIFF_BYTES_PROLOGUE | ||||
|     %define regsize mmsize | ||||
|     DIFF_BYTES_LOOP_PREP .skip_main_aa | ||||
|     test            dstq, regsize - 1 | ||||
|         jnz     .loop_uu | ||||
|     test           src1q, regsize - 1 | ||||
|         jnz     .loop_ua | ||||
|     DIFF_BYTES_BODY    a, a | ||||
|     DIFF_BYTES_BODY    u, a | ||||
|     DIFF_BYTES_BODY    u, u | ||||
| %undef i | ||||
|  | ||||
| %if HAVE_AVX2_EXTERNAL | ||||
| INIT_YMM avx2 | ||||
| DIFF_BYTES_PROLOGUE | ||||
|     %define regsize mmsize | ||||
|     ; Directly using unaligned SSE2 version is marginally faster than | ||||
|     ; branching based on arguments. | ||||
|     DIFF_BYTES_LOOP_PREP .skip_main_uu | ||||
|     test            dstq, regsize - 1 | ||||
|         jnz     .loop_uu | ||||
|     test           src1q, regsize - 1 | ||||
|         jnz     .loop_ua | ||||
|     DIFF_BYTES_BODY    a, a | ||||
|     DIFF_BYTES_BODY    u, a | ||||
|     DIFF_BYTES_BODY    u, u | ||||
| %undef i | ||||
| %endif | ||||
| @@ -1,5 +1,5 @@ | ||||
| /*
 | ||||
|  * SIMD-optimized HuffYUV encoding functions | ||||
|  * SIMD-optimized lossless video encoding functions | ||||
|  * Copyright (c) 2000, 2001 Fabrice Bellard | ||||
|  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | ||||
|  * | ||||
| @@ -24,10 +24,9 @@ | ||||
| 
 | ||||
| #include "libavutil/attributes.h" | ||||
| #include "libavutil/cpu.h" | ||||
| #include "libavutil/pixdesc.h" | ||||
| #include "libavutil/x86/asm.h" | ||||
| #include "libavutil/x86/cpu.h" | ||||
| #include "libavcodec/huffyuvencdsp.h" | ||||
| #include "libavcodec/lossless_videoencdsp.h" | ||||
| #include "libavcodec/mathops.h" | ||||
| 
 | ||||
| void ff_diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, | ||||
| @@ -36,18 +35,12 @@ void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, | ||||
|                         intptr_t w); | ||||
| void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, | ||||
|                         intptr_t w); | ||||
| void ff_diff_int16_mmx (uint16_t *dst, const uint16_t *src1, const uint16_t *src2, | ||||
|                         unsigned mask, int w); | ||||
| void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, | ||||
|                         unsigned mask, int w); | ||||
| void ff_sub_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, | ||||
|                                           unsigned mask, int w, int *left, int *left_top); | ||||
| 
 | ||||
| #if HAVE_INLINE_ASM | ||||
| 
 | ||||
| static void sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1, | ||||
|                                         const uint8_t *src2, intptr_t w, | ||||
|                                         int *left, int *left_top) | ||||
| static void sub_median_pred_mmxext(uint8_t *dst, const uint8_t *src1, | ||||
|                                    const uint8_t *src2, intptr_t w, | ||||
|                                    int *left, int *left_top) | ||||
| { | ||||
|     x86_reg i = 0; | ||||
|     uint8_t l, lt; | ||||
| @@ -87,29 +80,22 @@ static void sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1, | ||||
| 
 | ||||
| #endif /* HAVE_INLINE_ASM */ | ||||
| 
 | ||||
| av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c, AVCodecContext *avctx) | ||||
| av_cold void ff_llvidencdsp_init_x86(LLVidEncDSPContext *c) | ||||
| { | ||||
|     av_unused int cpu_flags = av_get_cpu_flags(); | ||||
|     const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt); | ||||
| 
 | ||||
|     if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) { | ||||
|         c->diff_bytes = ff_diff_bytes_mmx; | ||||
|         c->diff_int16 = ff_diff_int16_mmx; | ||||
|     } | ||||
| 
 | ||||
| #if HAVE_INLINE_ASM | ||||
|     if (INLINE_MMXEXT(cpu_flags)) { | ||||
|         c->sub_hfyu_median_pred = sub_hfyu_median_pred_mmxext; | ||||
|         c->sub_median_pred = sub_median_pred_mmxext; | ||||
|     } | ||||
| #endif /* HAVE_INLINE_ASM */ | ||||
| 
 | ||||
|     if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc && pix_desc->comp[0].depth<16) { | ||||
|         c->sub_hfyu_median_pred_int16 = ff_sub_hfyu_median_pred_int16_mmxext; | ||||
|     } | ||||
| 
 | ||||
|     if (EXTERNAL_SSE2(cpu_flags)) { | ||||
|         c->diff_bytes = ff_diff_bytes_sse2; | ||||
|         c->diff_int16 = ff_diff_int16_sse2; | ||||
|     } | ||||
| 
 | ||||
|     if (EXTERNAL_AVX2_FAST(cpu_flags)) { | ||||
		Reference in New Issue
	
	Block a user