You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	diracdsp: add dequantization SIMD
Currently unused, to be used in the following commits. Signed-off-by: Rostislav Pehlivanov <rpehlivanov@obe.tv>
This commit is contained in:
		
				
					committed by
					
						 Rostislav Pehlivanov
						Rostislav Pehlivanov
					
				
			
			
				
	
			
			
			
						parent
						
							244d22452c
						
					
				
				
					commit
					80721cc1ff
				
			| @@ -189,6 +189,27 @@ static void add_rect_clamped_c(uint8_t *dst, const uint16_t *src, int stride, | ||||
|     } | ||||
| } | ||||
|  | ||||
| #define DEQUANT_SUBBAND(PX)                                                                \ | ||||
| static void dequant_subband_ ## PX ## _c(uint8_t *src, uint8_t *dst, ptrdiff_t stride,     \ | ||||
|                                          const int qf, const int qs, int tot_v, int tot_h) \ | ||||
| {                                                                                          \ | ||||
|     int i, y;                                                                              \ | ||||
|     for (y = 0; y < tot_v; y++) {                                                          \ | ||||
|         PX c, sign, *src_r = (PX *)src, *dst_r = (PX *)dst;                                \ | ||||
|         for (i = 0; i < tot_h; i++) {                                                      \ | ||||
|             c = *src_r++;                                                                  \ | ||||
|             sign = FFSIGN(c)*(!!c);                                                        \ | ||||
|             c = (FFABS(c)*qf + qs) >> 2;                                                   \ | ||||
|             *dst_r++ = c*sign;                                                             \ | ||||
|         }                                                                                  \ | ||||
|         src += tot_h << (sizeof(PX) >> 1);                                                 \ | ||||
|         dst += stride;                                                                     \ | ||||
|     }                                                                                      \ | ||||
| } | ||||
|  | ||||
| DEQUANT_SUBBAND(int16_t) | ||||
| DEQUANT_SUBBAND(int32_t) | ||||
|  | ||||
| #define PIXFUNC(PFX, WIDTH)                                             \ | ||||
|     c->PFX ## _dirac_pixels_tab[WIDTH>>4][0] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _c; \ | ||||
|     c->PFX ## _dirac_pixels_tab[WIDTH>>4][1] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _l2_c; \ | ||||
| @@ -214,6 +235,9 @@ av_cold void ff_diracdsp_init(DiracDSPContext *c) | ||||
|     c->biweight_dirac_pixels_tab[1] = biweight_dirac_pixels16_c; | ||||
|     c->biweight_dirac_pixels_tab[2] = biweight_dirac_pixels32_c; | ||||
|  | ||||
|     c->dequant_subband[0] = c->dequant_subband[2] = dequant_subband_int16_t_c; | ||||
|     c->dequant_subband[1] = c->dequant_subband[3] = dequant_subband_int32_t_c; | ||||
|  | ||||
|     PIXFUNC(put, 8); | ||||
|     PIXFUNC(put, 16); | ||||
|     PIXFUNC(put, 32); | ||||
|   | ||||
| @@ -22,6 +22,7 @@ | ||||
| #define AVCODEC_DIRACDSP_H | ||||
|  | ||||
| #include <stdint.h> | ||||
| #include <stddef.h> | ||||
|  | ||||
| typedef void (*dirac_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int h); | ||||
| typedef void (*dirac_biweight_func)(uint8_t *dst, const uint8_t *src, int stride, int log2_denom, int weightd, int weights, int h); | ||||
| @@ -46,6 +47,9 @@ typedef struct { | ||||
|     void (*add_rect_clamped)(uint8_t *dst/*align 16*/, const uint16_t *src/*align 16*/, int stride, const int16_t *idwt/*align 16*/, int idwt_stride, int width, int height/*mod 2*/); | ||||
|     void (*add_dirac_obmc[3])(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); | ||||
|  | ||||
|     /* 0-1: int16_t and int32_t asm/c, 2-3: int16 and int32_t, C only */ | ||||
|     void (*dequant_subband[4])(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h); | ||||
|  | ||||
|     dirac_weight_func weight_dirac_pixels_tab[3]; | ||||
|     dirac_biweight_func biweight_dirac_pixels_tab[3]; | ||||
| } DiracDSPContext; | ||||
|   | ||||
| @@ -263,3 +263,40 @@ ADD_RECT sse2 | ||||
| HPEL_FILTER sse2 | ||||
| ADD_OBMC 32, sse2 | ||||
| ADD_OBMC 16, sse2 | ||||
|  | ||||
| INIT_XMM sse4 | ||||
|  | ||||
| ; void dequant_subband_32(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h) | ||||
| cglobal dequant_subband_32, 7, 7, 4, src, dst, stride, qf, qs, tot_v, tot_h | ||||
|     movd   m2, qfd | ||||
|     movd   m3, qsd | ||||
|     SPLATD m2 | ||||
|     SPLATD m3 | ||||
|     mov    r4, tot_hq | ||||
|     mov    r3, dstq | ||||
|  | ||||
|     .loop_v: | ||||
|     mov    tot_hq, r4 | ||||
|     mov    dstq,   r3 | ||||
|  | ||||
|     .loop_h: | ||||
|     movu   m0, [srcq] | ||||
|  | ||||
|     pabsd  m1, m0 | ||||
|     pmulld m1, m2 | ||||
|     paddd  m1, m3 | ||||
|     psrld  m1,  2 | ||||
|     psignd m1, m0 | ||||
|  | ||||
|     movu   [dstq], m1 | ||||
|  | ||||
|     add    srcq, mmsize | ||||
|     add    dstq, mmsize | ||||
|     sub    tot_hd, 4 | ||||
|     jg     .loop_h | ||||
|  | ||||
|     add    r3, strideq | ||||
|     dec    tot_vd | ||||
|     jg     .loop_v | ||||
|  | ||||
|     RET | ||||
|   | ||||
| @@ -46,6 +46,8 @@ void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, | ||||
| void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); | ||||
| void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); | ||||
|  | ||||
| void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h); | ||||
|  | ||||
| #if HAVE_YASM | ||||
|  | ||||
| #define HPEL_FILTER(MMSIZE, EXT)                                                             \ | ||||
| @@ -184,4 +186,8 @@ void ff_diracdsp_init_x86(DiracDSPContext* c) | ||||
|         c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2; | ||||
|         c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2; | ||||
|     } | ||||
|  | ||||
|     if (EXTERNAL_SSE4(mm_flags)) { | ||||
|         c->dequant_subband[1]         = ff_dequant_subband_32_sse4; | ||||
|     } | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user