You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	arm: Implement a NEON version of 422 h264_h_loop_filter_chroma
Previously, the 420 version was used even for 422. This fixes occasional checkasm failures. Signed-off-by: Martin Storsjö <martin@martin.st>
This commit is contained in:
		| @@ -33,6 +33,8 @@ void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, | ||||
|                                        int beta, int8_t *tc0); | ||||
| void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, | ||||
|                                        int beta, int8_t *tc0); | ||||
| void ff_h264_h_loop_filter_chroma422_neon(uint8_t *pix, int stride, int alpha, | ||||
|                                           int beta, int8_t *tc0); | ||||
|  | ||||
| void ff_weight_h264_pixels_16_neon(uint8_t *dst, int stride, int height, | ||||
|                                    int log2_den, int weight, int offset); | ||||
| @@ -76,7 +78,11 @@ static av_cold void h264dsp_init_neon(H264DSPContext *c, const int bit_depth, | ||||
|         c->h264_v_loop_filter_luma   = ff_h264_v_loop_filter_luma_neon; | ||||
|         c->h264_h_loop_filter_luma   = ff_h264_h_loop_filter_luma_neon; | ||||
|         c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon; | ||||
|         c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; | ||||
|  | ||||
|         if (chroma_format_idc <= 1) | ||||
|             c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; | ||||
|         else | ||||
|             c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma422_neon; | ||||
|  | ||||
|         c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon; | ||||
|         c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon; | ||||
|   | ||||
| @@ -237,6 +237,7 @@ function ff_h264_h_loop_filter_chroma_neon, export=1 | ||||
|         h264_loop_filter_start | ||||
|  | ||||
|         sub             r0,  r0,  #2 | ||||
| h_loop_filter_chroma420: | ||||
|         vld1.32         {d18[0]}, [r0], r1 | ||||
|         vld1.32         {d16[0]}, [r0], r1 | ||||
|         vld1.32         {d0[0]},  [r0], r1 | ||||
| @@ -271,6 +272,24 @@ function ff_h264_h_loop_filter_chroma_neon, export=1 | ||||
|         bx              lr | ||||
| endfunc | ||||
|  | ||||
| function ff_h264_h_loop_filter_chroma422_neon, export=1 | ||||
|         h264_loop_filter_start | ||||
|         push            {r4, lr} | ||||
|         add             r4,  r0,  r1 | ||||
|         add             r1,  r1,  r1 | ||||
|         sub             r0,  r0,  #2 | ||||
|  | ||||
|         bl              h_loop_filter_chroma420 | ||||
|  | ||||
|         ldr             r12, [sp, #8] | ||||
|         ldr             r12, [r12] | ||||
|         vmov.32         d24[0], r12 | ||||
|         sub             r0,  r4,  #2 | ||||
|  | ||||
|         bl              h_loop_filter_chroma420 | ||||
|         pop             {r4, pc} | ||||
| endfunc | ||||
|  | ||||
| @ Biweighted prediction | ||||
|  | ||||
| .macro  biweight_16     macs, macd | ||||
|   | ||||
		Reference in New Issue
	
	Block a user