You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	ARM: allow building in Thumb2 mode
Signed-off-by: Mans Rullgard <mans@mansr.com>
This commit is contained in:
		
							
								
								
									
										3
									
								
								configure
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								configure
									
									
									
									
										vendored
									
									
								
							| @@ -967,6 +967,7 @@ CONFIG_LIST=" | ||||
|     static | ||||
|     swscale | ||||
|     swscale_alpha | ||||
|     thumb | ||||
|     vaapi | ||||
|     vdpau | ||||
|     version3 | ||||
| @@ -2607,7 +2608,7 @@ if enabled alpha; then | ||||
|  | ||||
| elif enabled arm; then | ||||
|  | ||||
|     check_cflags -marm | ||||
|     enabled thumb && check_cflags -mthumb || check_cflags -marm | ||||
|     nogas=die | ||||
|  | ||||
|     if     check_cpp_condition stddef.h "defined __ARM_PCS_VFP"; then | ||||
|   | ||||
| @@ -114,12 +114,15 @@ static inline float *VMUL4S(float *dst, const float *v, unsigned idx, | ||||
|              "vmov     d1,  %2,  %3          \n\t" | ||||
|              "lsls     %6,  %6,  #1          \n\t" | ||||
|              "and      %0,  %5,  #1<<31      \n\t" | ||||
|              "it       cs                    \n\t" | ||||
|              "lslcs    %5,  %5,  #1          \n\t" | ||||
|              "lsls     %6,  %6,  #1          \n\t" | ||||
|              "and      %1,  %5,  #1<<31      \n\t" | ||||
|              "it       cs                    \n\t" | ||||
|              "lslcs    %5,  %5,  #1          \n\t" | ||||
|              "lsls     %6,  %6,  #1          \n\t" | ||||
|              "and      %2,  %5,  #1<<31      \n\t" | ||||
|              "it       cs                    \n\t" | ||||
|              "lslcs    %5,  %5,  #1          \n\t" | ||||
|              "vmov     d4,  %0,  %1          \n\t" | ||||
|              "and      %3,  %5,  #1<<31      \n\t" | ||||
|   | ||||
| @@ -27,6 +27,7 @@ function ff_ac3_update_bap_counts_arm, export=1 | ||||
|         lsl             r3,  lr,  #1 | ||||
|         ldrh            r12, [r0, r3] | ||||
|         subs            r2,  r2,  #1 | ||||
|         it              gt | ||||
|         ldrbgt          lr,  [r1], #1 | ||||
|         add             r12, r12, #1 | ||||
|         strh            r12, [r0, r3] | ||||
|   | ||||
| @@ -42,9 +42,11 @@ function ff_ac3_bit_alloc_calc_bap_armv6, export=1 | ||||
|         mov             r11, r10 | ||||
|         ldrb            r10, [r4], #1                   @ band_start_tab[band++] | ||||
|         subs            r9,  r9,  r5                    @   - floor | ||||
|         it              lt | ||||
|         movlt           r9,  #0 | ||||
|         cmp             r10, r3                         @   - end | ||||
|         and             r9,  r9,  r8                    @   & 0x1fe0 | ||||
|         ite             gt | ||||
|         subgt           r8,  r3,  r11 | ||||
|         suble           r8,  r10, r11 | ||||
|         add             r9,  r9,  r5                    @   + floor => m | ||||
|   | ||||
| @@ -41,6 +41,7 @@ endfunc | ||||
|  | ||||
| function ff_ac3_exponent_min_neon, export=1 | ||||
|         cmp             r1,  #0 | ||||
|         it              eq | ||||
|         bxeq            lr | ||||
|         push            {lr} | ||||
|         mov             r12, #256 | ||||
|   | ||||
| @@ -24,9 +24,18 @@ | ||||
| #   define ELF | ||||
| #else | ||||
| #   define ELF @ | ||||
| #endif | ||||
|  | ||||
| #if CONFIG_THUMB | ||||
| #   define A @ | ||||
| #   define T | ||||
| #else | ||||
| #   define A | ||||
| #   define T @ | ||||
| #endif | ||||
|  | ||||
|         .syntax unified | ||||
| T       .thumb | ||||
|  | ||||
| .macro  require8 val=1 | ||||
| ELF     .eabi_attribute 24, \val | ||||
| @@ -82,6 +91,90 @@ ELF     .size   \name, . - \name | ||||
| #endif | ||||
| .endm | ||||
|  | ||||
| .macro  ldr_pre         rt,  rn,  rm:vararg | ||||
| A       ldr             \rt, [\rn, \rm]! | ||||
| T       add             \rn, \rn, \rm | ||||
| T       ldr             \rt, [\rn] | ||||
| .endm | ||||
|  | ||||
| .macro  ldr_post        rt,  rn,  rm:vararg | ||||
| A       ldr             \rt, [\rn], \rm | ||||
| T       ldr             \rt, [\rn] | ||||
| T       add             \rn, \rn, \rm | ||||
| .endm | ||||
|  | ||||
| .macro  ldrd_reg        rt,  rt2, rn,  rm | ||||
| A       ldrd            \rt, \rt2, [\rn, \rm] | ||||
| T       add             \rt, \rn, \rm | ||||
| T       ldrd            \rt, \rt2, [\rt] | ||||
| .endm | ||||
|  | ||||
| .macro  ldrd_post       rt,  rt2, rn,  rm | ||||
| A       ldrd            \rt, \rt2, [\rn], \rm | ||||
| T       ldrd            \rt, \rt2, [\rn] | ||||
| T       add             \rn, \rn, \rm | ||||
| .endm | ||||
|  | ||||
| .macro  ldrh_pre        rt,  rn,  rm | ||||
| A       ldrh            \rt, [\rn, \rm]! | ||||
| T       add             \rn, \rn, \rm | ||||
| T       ldrh            \rt, [\rn] | ||||
| .endm | ||||
|  | ||||
| .macro  ldrh_dpre       rt,  rn,  rm | ||||
| A       ldrh            \rt, [\rn, -\rm]! | ||||
| T       sub             \rn, \rn, \rm | ||||
| T       ldrh            \rt, [\rn] | ||||
| .endm | ||||
|  | ||||
| .macro  ldrh_post       rt,  rn,  rm | ||||
| A       ldrh            \rt, [\rn], \rm | ||||
| T       ldrh            \rt, [\rn] | ||||
| T       add             \rn, \rn, \rm | ||||
| .endm | ||||
|  | ||||
| .macro  str_post       rt,  rn,  rm:vararg | ||||
| A       str             \rt, [\rn], \rm | ||||
| T       str             \rt, [\rn] | ||||
| T       add             \rn, \rn, \rm | ||||
| .endm | ||||
|  | ||||
| .macro  strb_post       rt,  rn,  rm:vararg | ||||
| A       strb            \rt, [\rn], \rm | ||||
| T       strb            \rt, [\rn] | ||||
| T       add             \rn, \rn, \rm | ||||
| .endm | ||||
|  | ||||
| .macro  strd_post       rt,  rt2, rn,  rm | ||||
| A       strd            \rt, \rt2, [\rn], \rm | ||||
| T       strd            \rt, \rt2, [\rn] | ||||
| T       add             \rn, \rn, \rm | ||||
| .endm | ||||
|  | ||||
| .macro  strh_pre        rt,  rn,  rm | ||||
| A       strh            \rt, [\rn, \rm]! | ||||
| T       add             \rn, \rn, \rm | ||||
| T       strh            \rt, [\rn] | ||||
| .endm | ||||
|  | ||||
| .macro  strh_dpre       rt,  rn,  rm | ||||
| A       strh            \rt, [\rn, -\rm]! | ||||
| T       sub             \rn, \rn, \rm | ||||
| T       strh            \rt, [\rn] | ||||
| .endm | ||||
|  | ||||
| .macro  strh_post       rt,  rn,  rm | ||||
| A       strh            \rt, [\rn], \rm | ||||
| T       strh            \rt, [\rn] | ||||
| T       add             \rn, \rn, \rm | ||||
| .endm | ||||
|  | ||||
| .macro  strh_dpost       rt,  rn,  rm | ||||
| A       strh            \rt, [\rn], -\rm | ||||
| T       strh            \rt, [\rn] | ||||
| T       sub             \rn, \rn, \rm | ||||
| .endm | ||||
|  | ||||
| #if HAVE_VFP_ARGS | ||||
|         .eabi_attribute 28, 1 | ||||
| #   define VFP | ||||
|   | ||||
| @@ -27,6 +27,7 @@ function ff_dca_lfe_fir_neon, export=1 | ||||
|         add             r5,  r2,  #256*4-16     @ cf1 | ||||
|         sub             r1,  r1,  #12 | ||||
|         cmp             r3,  #32 | ||||
|         ite             eq | ||||
|         moveq           r6,  #256/32 | ||||
|         movne           r6,  #256/64 | ||||
| NOVFP   vldr            s0,  [sp, #16]          @ scale | ||||
|   | ||||
| @@ -554,10 +554,12 @@ endfunc | ||||
|         and             r9,  r5,  r14 | ||||
|         and             r10, r6,  r14 | ||||
|         and             r11, r7,  r14 | ||||
|         it              eq | ||||
|         andeq           r14, r14, r14, \rnd #1 | ||||
|         add             r8,  r8,  r10 | ||||
|         add             r9,  r9,  r11 | ||||
|         ldr             r12, =0xfcfcfcfc >> 2 | ||||
|         itt             eq | ||||
|         addeq           r8,  r8,  r14 | ||||
|         addeq           r9,  r9,  r14 | ||||
|         and             r4,  r12, r4,  lsr #2 | ||||
| @@ -638,8 +640,10 @@ function ff_add_pixels_clamped_arm, export=1 | ||||
|         mvn             r5,  r5 | ||||
|         mvn             r7,  r7 | ||||
|         tst             r6,  #0x100 | ||||
|         it              ne | ||||
|         movne           r6,  r5,  lsr #24 | ||||
|         tst             r8,  #0x100 | ||||
|         it              ne | ||||
|         movne           r8,  r7,  lsr #24 | ||||
|         mov             r9,  r6 | ||||
|         ldrsh           r5,  [r0, #4]           /* moved form [A] */ | ||||
| @@ -654,8 +658,10 @@ function ff_add_pixels_clamped_arm, export=1 | ||||
|         mvn             r5,  r5 | ||||
|         mvn             r7,  r7 | ||||
|         tst             r6,  #0x100 | ||||
|         it              ne | ||||
|         movne           r6,  r5,  lsr #24 | ||||
|         tst             r8,  #0x100 | ||||
|         it              ne | ||||
|         movne           r8,  r7,  lsr #24 | ||||
|         orr             r9,  r9,  r6,  lsl #16 | ||||
|         ldr             r4,  [r1, #4]           /* moved form [B] */ | ||||
| @@ -676,8 +682,10 @@ function ff_add_pixels_clamped_arm, export=1 | ||||
|         mvn             r5,  r5 | ||||
|         mvn             r7,  r7 | ||||
|         tst             r6,  #0x100 | ||||
|         it              ne | ||||
|         movne           r6,  r5,  lsr #24 | ||||
|         tst             r8,  #0x100 | ||||
|         it              ne | ||||
|         movne           r8,  r7,  lsr #24 | ||||
|         mov             r9,  r6 | ||||
|         ldrsh           r5,  [r0, #12]          /* moved from [D] */ | ||||
| @@ -692,8 +700,10 @@ function ff_add_pixels_clamped_arm, export=1 | ||||
|         mvn             r5,  r5 | ||||
|         mvn             r7,  r7 | ||||
|         tst             r6,  #0x100 | ||||
|         it              ne | ||||
|         movne           r6,  r5,  lsr #24 | ||||
|         tst             r8,  #0x100 | ||||
|         it              ne | ||||
|         movne           r8,  r7,  lsr #24 | ||||
|         orr             r9,  r9,  r6,  lsl #16 | ||||
|         add             r0,  r0,  #16           /* moved from [E] */ | ||||
|   | ||||
| @@ -47,16 +47,16 @@ function ff_put_pixels16_armv6, export=1 | ||||
|         ldr             r5,  [r1, #4] | ||||
|         ldr             r6,  [r1, #8] | ||||
|         ldr             r7,  [r1, #12] | ||||
|         ldr             r4,  [r1], r2 | ||||
|         ldr_post        r4,  r1,  r2 | ||||
|         strd            r6,  r7,  [r0, #8] | ||||
|         ldr             r9,  [r1, #4] | ||||
|         strd            r4,  r5,  [r0],  r2 | ||||
|         strd_post       r4,  r5,  r0,  r2 | ||||
|         ldr             r10, [r1, #8] | ||||
|         ldr             r11, [r1, #12] | ||||
|         ldr             r8,  [r1], r2 | ||||
|         ldr_post        r8,  r1,  r2 | ||||
|         strd            r10, r11, [r0, #8] | ||||
|         subs            r3,  r3,  #2 | ||||
|         strd            r8,  r9,  [r0],  r2 | ||||
|         strd_post       r8,  r9,  r0,  r2 | ||||
|         bne             1b | ||||
|  | ||||
|         pop             {r4-r11} | ||||
| @@ -67,12 +67,12 @@ function ff_put_pixels8_armv6, export=1 | ||||
|         push            {r4-r7} | ||||
| 1: | ||||
|         ldr             r5,  [r1, #4] | ||||
|         ldr             r4,  [r1], r2 | ||||
|         ldr_post        r4,  r1,  r2 | ||||
|         ldr             r7,  [r1, #4] | ||||
|         strd            r4,  r5,  [r0],  r2 | ||||
|         ldr             r6,  [r1], r2 | ||||
|         strd_post       r4,  r5,  r0,  r2 | ||||
|         ldr_post        r6,  r1,  r2 | ||||
|         subs            r3,  r3,  #2 | ||||
|         strd            r6,  r7,  [r0],  r2 | ||||
|         strd_post       r6,  r7,  r0,  r2 | ||||
|         bne             1b | ||||
|  | ||||
|         pop             {r4-r7} | ||||
| @@ -90,7 +90,7 @@ function ff_put_pixels8_x2_armv6, export=1 | ||||
|         ldr             r5,  [r1, #4] | ||||
|         ldr             r7,  [r1, #5] | ||||
|         lsr             r6,  r4,  #8 | ||||
|         ldr             r8,  [r1, r2]! | ||||
|         ldr_pre         r8,  r1,  r2 | ||||
|         orr             r6,  r6,  r5,  lsl #24 | ||||
|         ldr             r9,  [r1, #4] | ||||
|         ldr             r11, [r1, #5] | ||||
| @@ -112,9 +112,9 @@ function ff_put_pixels8_x2_armv6, export=1 | ||||
|         uhadd8          r9,  r9,  r11 | ||||
|         and             r6,  r6,  r12 | ||||
|         uadd8           r8,  r8,  r14 | ||||
|         strd            r4,  r5,  [r0],  r2 | ||||
|         strd_post       r4,  r5,  r0,  r2 | ||||
|         uadd8           r9,  r9,  r6 | ||||
|         strd            r8,  r9,  [r0],  r2 | ||||
|         strd_post       r8,  r9,  r0,  r2 | ||||
|         bne             1b | ||||
|  | ||||
|         pop             {r4-r11, pc} | ||||
| @@ -127,7 +127,7 @@ function ff_put_pixels8_y2_armv6, export=1 | ||||
|         orr             r12, r12, r12, lsl #16 | ||||
|         ldr             r4,  [r1] | ||||
|         ldr             r5,  [r1, #4] | ||||
|         ldr             r6,  [r1, r2]! | ||||
|         ldr_pre         r6,  r1,  r2 | ||||
|         ldr             r7,  [r1, #4] | ||||
| 1: | ||||
|         subs            r3,  r3,  #2 | ||||
| @@ -136,7 +136,7 @@ function ff_put_pixels8_y2_armv6, export=1 | ||||
|         uhadd8          r9,  r5,  r7 | ||||
|         eor             r11, r5,  r7 | ||||
|         and             r10, r10, r12 | ||||
|         ldr             r4,  [r1, r2]! | ||||
|         ldr_pre         r4,  r1,  r2 | ||||
|         uadd8           r8,  r8,  r10 | ||||
|         and             r11, r11, r12 | ||||
|         uadd8           r9,  r9,  r11 | ||||
| @@ -148,11 +148,11 @@ function ff_put_pixels8_y2_armv6, export=1 | ||||
|         eor             r7,  r5,  r7 | ||||
|         uadd8           r10, r10, r6 | ||||
|         and             r7,  r7,  r12 | ||||
|         ldr             r6,  [r1, r2]! | ||||
|         ldr_pre         r6,  r1,  r2 | ||||
|         uadd8           r11, r11, r7 | ||||
|         strd            r8,  r9,  [r0],  r2 | ||||
|         strd_post       r8,  r9,  r0,  r2 | ||||
|         ldr             r7,  [r1, #4] | ||||
|         strd            r10, r11, [r0],  r2 | ||||
|         strd_post       r10, r11, r0,  r2 | ||||
|         bne             1b | ||||
|  | ||||
|         pop             {r4-r11} | ||||
| @@ -166,7 +166,7 @@ function ff_put_pixels8_x2_no_rnd_armv6, export=1 | ||||
|         ldr             r4,  [r1] | ||||
|         ldr             r5,  [r1, #4] | ||||
|         ldr             r7,  [r1, #5] | ||||
|         ldr             r8,  [r1, r2]! | ||||
|         ldr_pre         r8,  r1,  r2 | ||||
|         ldr             r9,  [r1, #4] | ||||
|         ldr             r14, [r1, #5] | ||||
|         add             r1,  r1,  r2 | ||||
| @@ -191,16 +191,16 @@ function ff_put_pixels8_y2_no_rnd_armv6, export=1 | ||||
|         push            {r4-r9, lr} | ||||
|         ldr             r4,  [r1] | ||||
|         ldr             r5,  [r1, #4] | ||||
|         ldr             r6,  [r1, r2]! | ||||
|         ldr_pre         r6,  r1,  r2 | ||||
|         ldr             r7,  [r1, #4] | ||||
| 1: | ||||
|         subs            r3,  r3,  #2 | ||||
|         uhadd8          r8,  r4,  r6 | ||||
|         ldr             r4,  [r1, r2]! | ||||
|         ldr_pre         r4,  r1,  r2 | ||||
|         uhadd8          r9,  r5,  r7 | ||||
|         ldr             r5,  [r1, #4] | ||||
|         uhadd8          r12, r4,  r6 | ||||
|         ldr             r6,  [r1, r2]! | ||||
|         ldr_pre         r6,  r1,  r2 | ||||
|         uhadd8          r14, r5,  r7 | ||||
|         ldr             r7,  [r1, #4] | ||||
|         stm             r0,  {r8,r9} | ||||
| @@ -220,44 +220,44 @@ function ff_avg_pixels8_armv6, export=1 | ||||
|         orr             lr,  lr,  lr,  lsl #16 | ||||
|         ldrd            r4,  r5,  [r0] | ||||
|         ldr             r10, [r1, #4] | ||||
|         ldr             r9,  [r1], r2 | ||||
|         ldr_post        r9,  r1,  r2 | ||||
|         subs            r3,  r3,  #2 | ||||
| 1: | ||||
|         pld             [r1, r2] | ||||
|         eor             r8,  r4,  r9 | ||||
|         uhadd8          r4,  r4,  r9 | ||||
|         eor             r12, r5,  r10 | ||||
|         ldrd            r6,  r7,  [r0, r2] | ||||
|         ldrd_reg        r6,  r7,  r0,  r2 | ||||
|         uhadd8          r5,  r5,  r10 | ||||
|         and             r8,  r8,  lr | ||||
|         ldr             r10, [r1, #4] | ||||
|         and             r12, r12, lr | ||||
|         uadd8           r4,  r4,  r8 | ||||
|         ldr             r9,  [r1], r2 | ||||
|         ldr_post        r9,  r1,  r2 | ||||
|         eor             r8,  r6,  r9 | ||||
|         uadd8           r5,  r5,  r12 | ||||
|         pld             [r1, r2,  lsl #1] | ||||
|         eor             r12, r7,  r10 | ||||
|         uhadd8          r6,  r6,  r9 | ||||
|         strd            r4,  r5,  [r0], r2 | ||||
|         strd_post       r4,  r5,  r0,  r2 | ||||
|         uhadd8          r7,  r7,  r10 | ||||
|         beq             2f | ||||
|         and             r8,  r8,  lr | ||||
|         ldrd            r4,  r5,  [r0, r2] | ||||
|         ldrd_reg        r4,  r5,  r0,  r2 | ||||
|         uadd8           r6,  r6,  r8 | ||||
|         ldr             r10, [r1, #4] | ||||
|         and             r12, r12, lr | ||||
|         subs            r3,  r3,  #2 | ||||
|         uadd8           r7,  r7,  r12 | ||||
|         ldr             r9,  [r1], r2 | ||||
|         strd            r6,  r7,  [r0], r2 | ||||
|         ldr_post        r9,  r1,  r2 | ||||
|         strd_post       r6,  r7,  r0,  r2 | ||||
|         b               1b | ||||
| 2: | ||||
|         and             r8,  r8,  lr | ||||
|         and             r12, r12, lr | ||||
|         uadd8           r6,  r6,  r8 | ||||
|         uadd8           r7,  r7,  r12 | ||||
|         strd            r6,  r7,  [r0], r2 | ||||
|         strd_post       r6,  r7,  r0,  r2 | ||||
|  | ||||
|         pop             {r4-r10, pc} | ||||
| endfunc | ||||
| @@ -284,7 +284,7 @@ function ff_add_pixels_clamped_armv6, export=1 | ||||
|         orr             r6,  r8,  r5,  lsl #8 | ||||
|         orr             r7,  r4,  lr,  lsl #8 | ||||
|         subs            r3,  r3,  #1 | ||||
|         strd            r6,  r7,  [r1],  r2 | ||||
|         strd_post       r6,  r7,  r1,  r2 | ||||
|         bgt             1b | ||||
|         pop             {r4-r8,pc} | ||||
| endfunc | ||||
| @@ -294,7 +294,7 @@ function ff_get_pixels_armv6, export=1 | ||||
|         push            {r4-r8, lr} | ||||
|         mov             lr,  #8 | ||||
| 1: | ||||
|         ldrd            r4,  r5,  [r1],  r2 | ||||
|         ldrd_post       r4,  r5,  r1,  r2 | ||||
|         subs            lr,  lr,  #1 | ||||
|         uxtb16          r6,  r4 | ||||
|         uxtb16          r4,  r4,  ror #8 | ||||
| @@ -317,8 +317,8 @@ function ff_diff_pixels_armv6, export=1 | ||||
|         push            {r4-r9, lr} | ||||
|         mov             lr,  #8 | ||||
| 1: | ||||
|         ldrd            r4,  r5,  [r1],  r3 | ||||
|         ldrd            r6,  r7,  [r2],  r3 | ||||
|         ldrd_post       r4,  r5,  r1,  r3 | ||||
|         ldrd_post       r6,  r7,  r2,  r3 | ||||
|         uxtb16          r8,  r4 | ||||
|         uxtb16          r4,  r4,  ror #8 | ||||
|         uxtb16          r9,  r6 | ||||
| @@ -492,19 +492,19 @@ function ff_pix_abs8_armv6, export=1 | ||||
|         push            {r4-r9, lr} | ||||
|         mov             r0,  #0 | ||||
|         mov             lr,  #0 | ||||
|         ldrd            r4,  r5,  [r1], r3 | ||||
|         ldrd_post       r4,  r5,  r1,  r3 | ||||
| 1: | ||||
|         subs            r12, r12, #2 | ||||
|         ldr             r7,  [r2, #4] | ||||
|         ldr             r6,  [r2], r3 | ||||
|         ldrd            r8,  r9,  [r1], r3 | ||||
|         ldr_post        r6,  r2,  r3 | ||||
|         ldrd_post       r8,  r9,  r1,  r3 | ||||
|         usada8          r0,  r4,  r6,  r0 | ||||
|         pld             [r2, r3] | ||||
|         usada8          lr,  r5,  r7,  lr | ||||
|         ldr             r7,  [r2, #4] | ||||
|         ldr             r6,  [r2], r3 | ||||
|         ldr_post        r6,  r2,  r3 | ||||
|         beq             2f | ||||
|         ldrd            r4,  r5,  [r1], r3 | ||||
|         ldrd_post       r4,  r5,  r1,  r3 | ||||
|         usada8          r0,  r8,  r6,  r0 | ||||
|         pld             [r2, r3] | ||||
|         usada8          lr,  r9,  r7,  lr | ||||
| @@ -613,7 +613,7 @@ function ff_pix_sum_armv6, export=1 | ||||
|         ldr             r7,  [r0, #12] | ||||
|         usada8          r2,  r6,  lr,  r2 | ||||
|         beq             2f | ||||
|         ldr             r4,  [r0, r1]! | ||||
|         ldr_pre         r4,  r0,  r1 | ||||
|         usada8          r3,  r7,  lr,  r3 | ||||
|         bgt             1b | ||||
| 2: | ||||
|   | ||||
| @@ -531,6 +531,7 @@ function ff_vorbis_inverse_coupling_neon, export=1 | ||||
|  | ||||
| 2:      vst1.32         {d2-d3},  [r3, :128]! | ||||
|         vst1.32         {d0-d1},  [r12,:128]! | ||||
|         it              lt | ||||
|         bxlt            lr | ||||
|  | ||||
| 3:      vld1.32         {d2-d3},  [r1,:128] | ||||
| @@ -575,6 +576,7 @@ NOVFP   vdup.32         q8,  r2 | ||||
| 2:      vst1.32         {q2},[r0,:128]! | ||||
|         vst1.32         {q3},[r0,:128]! | ||||
|         ands            len, len, #15 | ||||
|         it              eq | ||||
|         bxeq            lr | ||||
| 3:      vld1.32         {q0},[r1,:128]! | ||||
|         vmul.f32        q0,  q0,  q8 | ||||
| @@ -638,6 +640,7 @@ NOVFP   ldr             r3,  [sp] | ||||
| 2:      vst1.32         {q8},[r0,:128]! | ||||
|         vst1.32         {q9},[r0,:128]! | ||||
|         ands            r3,  r3,  #7 | ||||
|         it              eq | ||||
|         popeq           {pc} | ||||
| 3:      vld1.32         {q0},[r1,:128]! | ||||
|         ldr             r12, [r2], #4 | ||||
|   | ||||
| @@ -55,18 +55,23 @@ function ff_vector_fmul_vfp, export=1 | ||||
| 1: | ||||
|         subs            r3,  r3,  #16 | ||||
|         vmul.f32        s12, s4,  s12 | ||||
|         itttt           ge | ||||
|         vldmiage        r1!, {s16-s19} | ||||
|         vldmiage        r2!, {s24-s27} | ||||
|         vldmiage        r1!, {s20-s23} | ||||
|         vldmiage        r2!, {s28-s31} | ||||
|         it              ge | ||||
|         vmulge.f32      s24, s16, s24 | ||||
|         vstmia          r0!, {s8-s11} | ||||
|         vstmia          r0!, {s12-s15} | ||||
|         it              ge | ||||
|         vmulge.f32      s28, s20, s28 | ||||
|         itttt           gt | ||||
|         vldmiagt        r1!, {s0-s3} | ||||
|         vldmiagt        r2!, {s8-s11} | ||||
|         vldmiagt        r1!, {s4-s7} | ||||
|         vldmiagt        r2!, {s12-s15} | ||||
|         ittt            ge | ||||
|         vmulge.f32      s8,  s0,  s8 | ||||
|         vstmiage        r0!, {s24-s27} | ||||
|         vstmiage        r0!, {s28-s31} | ||||
| @@ -97,33 +102,49 @@ function ff_vector_fmul_reverse_vfp, export=1 | ||||
|         vmul.f32        s11, s0,  s11 | ||||
| 1: | ||||
|         subs            r3,  r3,  #16 | ||||
|         it              ge | ||||
|         vldmdbge        r2!, {s16-s19} | ||||
|         vmul.f32        s12, s7,  s12 | ||||
|         it              ge | ||||
|         vldmiage        r1!, {s24-s27} | ||||
|         vmul.f32        s13, s6,  s13 | ||||
|         it              ge | ||||
|         vldmdbge        r2!, {s20-s23} | ||||
|         vmul.f32        s14, s5,  s14 | ||||
|         it              ge | ||||
|         vldmiage        r1!, {s28-s31} | ||||
|         vmul.f32        s15, s4,  s15 | ||||
|         it              ge | ||||
|         vmulge.f32      s24, s19, s24 | ||||
|         it              gt | ||||
|         vldmdbgt        r2!, {s0-s3} | ||||
|         it              ge | ||||
|         vmulge.f32      s25, s18, s25 | ||||
|         vstmia          r0!, {s8-s13} | ||||
|         it              ge | ||||
|         vmulge.f32      s26, s17, s26 | ||||
|         it              gt | ||||
|         vldmiagt        r1!, {s8-s11} | ||||
|         itt             ge | ||||
|         vmulge.f32      s27, s16, s27 | ||||
|         vmulge.f32      s28, s23, s28 | ||||
|         it              gt | ||||
|         vldmdbgt        r2!, {s4-s7} | ||||
|         it              ge | ||||
|         vmulge.f32      s29, s22, s29 | ||||
|         vstmia          r0!, {s14-s15} | ||||
|         ittt            ge | ||||
|         vmulge.f32      s30, s21, s30 | ||||
|         vmulge.f32      s31, s20, s31 | ||||
|         vmulge.f32      s8,  s3,  s8 | ||||
|         it              gt | ||||
|         vldmiagt        r1!, {s12-s15} | ||||
|         itttt           ge | ||||
|         vmulge.f32      s9,  s2,  s9 | ||||
|         vmulge.f32      s10, s1,  s10 | ||||
|         vstmiage        r0!, {s24-s27} | ||||
|         vmulge.f32      s11, s0,  s11 | ||||
|         it              ge | ||||
|         vstmiage        r0!, {s28-s31} | ||||
|         bgt             1b | ||||
|  | ||||
|   | ||||
| @@ -71,6 +71,7 @@ endfunc | ||||
|  | ||||
| function ff_float_to_int16_interleave_neon, export=1 | ||||
|         cmp             r3, #2 | ||||
|         itt             lt | ||||
|         ldrlt           r1, [r1] | ||||
|         blt             ff_float_to_int16_neon | ||||
|         bne             4f | ||||
| @@ -196,6 +197,7 @@ function ff_float_to_int16_interleave_neon, export=1 | ||||
|         vst1.64         {d3},     [r8], ip | ||||
|         vst1.64         {d7},     [r8], ip | ||||
|         subs            r3,  r3,  #4 | ||||
|         it              eq | ||||
|         popeq           {r4-r8,pc} | ||||
|         cmp             r3,  #4 | ||||
|         add             r0,  r0,  #8 | ||||
| @@ -305,6 +307,7 @@ function ff_float_to_int16_interleave_neon, export=1 | ||||
|         vst1.32         {d23[1]}, [r8], ip | ||||
| 8:      subs            r3,  r3,  #2 | ||||
|         add             r0,  r0,  #4 | ||||
|         it              eq | ||||
|         popeq           {r4-r8,pc} | ||||
|  | ||||
|         @ 1 channel | ||||
| @@ -354,6 +357,7 @@ function ff_float_to_int16_interleave_neon, export=1 | ||||
|         vst1.16         {d2[3]},  [r5,:16], ip | ||||
|         vst1.16         {d3[1]},  [r5,:16], ip | ||||
|         vst1.16         {d3[3]},  [r5,:16], ip | ||||
|         it              eq | ||||
|         popeq           {r4-r8,pc} | ||||
|         vld1.64         {d0-d1},  [r4,:128]! | ||||
|         vcvt.s32.f32    q0,  q0,  #16 | ||||
|   | ||||
| @@ -46,6 +46,7 @@ function ff_float_to_int16_vfp, export=1 | ||||
|         vmov            r5,  r6,  s2, s3 | ||||
|         vmov            r7,  r8,  s4, s5 | ||||
|         vmov            ip,  lr,  s6, s7 | ||||
|         it              gt | ||||
|         vldmiagt        r1!, {s16-s23} | ||||
|         ssat            r4,  #16, r4 | ||||
|         ssat            r3,  #16, r3 | ||||
| @@ -53,10 +54,12 @@ function ff_float_to_int16_vfp, export=1 | ||||
|         ssat            r5,  #16, r5 | ||||
|         pkhbt           r3,  r3,  r4, lsl #16 | ||||
|         pkhbt           r4,  r5,  r6, lsl #16 | ||||
|         itttt           gt | ||||
|         vcvtgt.s32.f32  s0,  s16 | ||||
|         vcvtgt.s32.f32  s1,  s17 | ||||
|         vcvtgt.s32.f32  s2,  s18 | ||||
|         vcvtgt.s32.f32  s3,  s19 | ||||
|         itttt           gt | ||||
|         vcvtgt.s32.f32  s4,  s20 | ||||
|         vcvtgt.s32.f32  s5,  s21 | ||||
|         vcvtgt.s32.f32  s6,  s22 | ||||
|   | ||||
| @@ -71,7 +71,9 @@ function ff_\type\()_h264_chroma_mc8_neon, export=1 | ||||
|         pld             [r1] | ||||
|         pld             [r1, r2] | ||||
|  | ||||
|         muls            r7,  r4,  r5 | ||||
| A       muls            r7,  r4,  r5 | ||||
| T       mul             r7,  r4,  r5 | ||||
| T       cmp             r7,  #0 | ||||
|         rsb             r6,  r7,  r5,  lsl #3 | ||||
|         rsb             ip,  r7,  r4,  lsl #3 | ||||
|         sub             r4,  r7,  r4,  lsl #3 | ||||
| @@ -197,7 +199,9 @@ function ff_\type\()_h264_chroma_mc4_neon, export=1 | ||||
|         pld             [r1] | ||||
|         pld             [r1, r2] | ||||
|  | ||||
|         muls            r7,  r4,  r5 | ||||
| A       muls            r7,  r4,  r5 | ||||
| T       mul             r7,  r4,  r5 | ||||
| T       cmp             r7,  #0 | ||||
|         rsb             r6,  r7,  r5,  lsl #3 | ||||
|         rsb             ip,  r7,  r4,  lsl #3 | ||||
|         sub             r4,  r7,  r4,  lsl #3 | ||||
| @@ -368,10 +372,10 @@ function ff_\type\()_h264_chroma_mc2_neon, export=1 | ||||
|         pop             {r4-r6, pc} | ||||
| 2: | ||||
| .ifc \type,put | ||||
|         ldrh            r5,  [r1], r2 | ||||
|         strh            r5,  [r0], r2 | ||||
|         ldrh            r6,  [r1], r2 | ||||
|         strh            r6,  [r0], r2 | ||||
|         ldrh_post       r5,  r1,  r2 | ||||
|         strh_post       r5,  r0,  r2 | ||||
|         ldrh_post       r6,  r1,  r2 | ||||
|         strh_post       r6,  r0,  r2 | ||||
| .else | ||||
|         vld1.16         {d16[0]}, [r1], r2 | ||||
|         vld1.16         {d16[1]}, [r1], r2 | ||||
| @@ -404,28 +408,17 @@ endfunc | ||||
|         ldr             ip,  [sp] | ||||
|         tst             r2,  r2 | ||||
|         ldr             ip,  [ip] | ||||
|         it              ne | ||||
|         tstne           r3,  r3 | ||||
|         vmov.32         d24[0], ip | ||||
|         and             ip,  ip,  ip, lsl #16 | ||||
|         it              eq | ||||
|         bxeq            lr | ||||
|         ands            ip,  ip,  ip, lsl #8 | ||||
|         it              lt | ||||
|         bxlt            lr | ||||
|         .endm | ||||
|  | ||||
|         .macro align_push_regs | ||||
|         and             ip,  sp,  #15 | ||||
|         add             ip,  ip,  #32 | ||||
|         sub             sp,  sp,  ip | ||||
|         vst1.64         {d12-d15}, [sp,:128] | ||||
|         sub             sp,  sp,  #32 | ||||
|         vst1.64         {d8-d11},  [sp,:128] | ||||
|         .endm | ||||
|  | ||||
|         .macro align_pop_regs | ||||
|         vld1.64         {d8-d11},  [sp,:128]! | ||||
|         vld1.64         {d12-d15}, [sp,:128], ip | ||||
|         .endm | ||||
|  | ||||
|         .macro h264_loop_filter_luma | ||||
|         vdup.8          q11, r2         @ alpha | ||||
|         vmovl.u8        q12, d24 | ||||
| @@ -506,7 +499,7 @@ function ff_h264_v_loop_filter_luma_neon, export=1 | ||||
|         vld1.64         {d18,d19}, [r0,:128], r1 | ||||
|         vld1.64         {d16,d17}, [r0,:128], r1 | ||||
|  | ||||
|         align_push_regs | ||||
|         vpush           {d8-d15} | ||||
|  | ||||
|         h264_loop_filter_luma | ||||
|  | ||||
| @@ -516,7 +509,7 @@ function ff_h264_v_loop_filter_luma_neon, export=1 | ||||
|         vst1.64         {d0, d1},  [r0,:128], r1 | ||||
|         vst1.64         {d10,d11}, [r0,:128] | ||||
|  | ||||
|         align_pop_regs | ||||
|         vpop            {d8-d15} | ||||
|         bx              lr | ||||
| endfunc | ||||
|  | ||||
| @@ -543,7 +536,7 @@ function ff_h264_h_loop_filter_luma_neon, export=1 | ||||
|  | ||||
|         transpose_8x8   q3, q10, q9, q8, q0, q1, q2, q13 | ||||
|  | ||||
|         align_push_regs | ||||
|         vpush           {d8-d15} | ||||
|  | ||||
|         h264_loop_filter_luma | ||||
|  | ||||
| @@ -568,7 +561,7 @@ function ff_h264_h_loop_filter_luma_neon, export=1 | ||||
|         vst1.32         {d1[1]},  [r0], r1 | ||||
|         vst1.32         {d11[1]}, [r0], r1 | ||||
|  | ||||
|         align_pop_regs | ||||
|         vpop            {d8-d15} | ||||
|         bx              lr | ||||
| endfunc | ||||
|  | ||||
| @@ -1116,6 +1109,7 @@ function \type\()_h264_qpel8_hv_lowpass_neon | ||||
|         vrhadd.u8       d11, d11, d7 | ||||
|         sub             r0,  r0,  r2,  lsl #3 | ||||
| .endif | ||||
|  | ||||
|         vst1.64         {d12},     [r0,:64], r2 | ||||
|         vst1.64         {d13},     [r0,:64], r2 | ||||
|         vst1.64         {d14},     [r0,:64], r2 | ||||
| @@ -1263,7 +1257,9 @@ function ff_\type\()_h264_qpel8_mc11_neon, export=1 | ||||
| \type\()_h264_qpel8_mc11: | ||||
|         lowpass_const   r3 | ||||
|         mov             r11, sp | ||||
|         bic             sp,  sp,  #15 | ||||
| A       bic             sp,  sp,  #15 | ||||
| T       bic             r0,  r11, #15 | ||||
| T       mov             sp,  r0 | ||||
|         sub             sp,  sp,  #64 | ||||
|         mov             r0,  sp | ||||
|         sub             r1,  r1,  #2 | ||||
| @@ -1271,14 +1267,14 @@ function ff_\type\()_h264_qpel8_mc11_neon, export=1 | ||||
|         mov             ip,  #8 | ||||
|         vpush           {d8-d15} | ||||
|         bl              put_h264_qpel8_h_lowpass_neon | ||||
|         ldrd            r0,  [r11] | ||||
|         ldrd            r0,  [r11], #8 | ||||
|         mov             r3,  r2 | ||||
|         add             ip,  sp,  #64 | ||||
|         sub             r1,  r1,  r2, lsl #1 | ||||
|         mov             r2,  #8 | ||||
|         bl              \type\()_h264_qpel8_v_lowpass_l2_neon | ||||
|         vpop            {d8-d15} | ||||
|         add             sp,  r11, #8 | ||||
|         mov             sp,  r11 | ||||
|         pop             {r11, pc} | ||||
| endfunc | ||||
|  | ||||
| @@ -1287,7 +1283,9 @@ function ff_\type\()_h264_qpel8_mc21_neon, export=1 | ||||
| \type\()_h264_qpel8_mc21: | ||||
|         lowpass_const   r3 | ||||
|         mov             r11, sp | ||||
|         bic             sp,  sp,  #15 | ||||
| A       bic             sp,  sp,  #15 | ||||
| T       bic             r0,  r11, #15 | ||||
| T       mov             sp,  r0 | ||||
|         sub             sp,  sp,  #(8*8+16*12) | ||||
|         sub             r1,  r1,  #2 | ||||
|         mov             r3,  #8 | ||||
| @@ -1296,14 +1294,14 @@ function ff_\type\()_h264_qpel8_mc21_neon, export=1 | ||||
|         vpush           {d8-d15} | ||||
|         bl              put_h264_qpel8_h_lowpass_neon | ||||
|         mov             r4,  r0 | ||||
|         ldrd            r0,  [r11] | ||||
|         ldrd            r0,  [r11], #8 | ||||
|         sub             r1,  r1,  r2, lsl #1 | ||||
|         sub             r1,  r1,  #2 | ||||
|         mov             r3,  r2 | ||||
|         sub             r2,  r4,  #64 | ||||
|         bl              \type\()_h264_qpel8_hv_lowpass_l2_neon | ||||
|         vpop            {d8-d15} | ||||
|         add             sp,  r11,  #8 | ||||
|         mov             sp,  r11 | ||||
|         pop             {r4, r10, r11, pc} | ||||
| endfunc | ||||
|  | ||||
| @@ -1330,7 +1328,9 @@ function ff_\type\()_h264_qpel8_mc12_neon, export=1 | ||||
| \type\()_h264_qpel8_mc12: | ||||
|         lowpass_const   r3 | ||||
|         mov             r11, sp | ||||
|         bic             sp,  sp,  #15 | ||||
| A       bic             sp,  sp,  #15 | ||||
| T       bic             r0,  r11, #15 | ||||
| T       mov             sp,  r0 | ||||
|         sub             sp,  sp,  #(8*8+16*12) | ||||
|         sub             r1,  r1,  r2, lsl #1 | ||||
|         mov             r3,  r2 | ||||
| @@ -1339,20 +1339,22 @@ function ff_\type\()_h264_qpel8_mc12_neon, export=1 | ||||
|         vpush           {d8-d15} | ||||
|         bl              put_h264_qpel8_v_lowpass_neon | ||||
|         mov             r4,  r0 | ||||
|         ldrd            r0,  [r11] | ||||
|         ldrd            r0,  [r11], #8 | ||||
|         sub             r1,  r1,  r3, lsl #1 | ||||
|         sub             r1,  r1,  #2 | ||||
|         sub             r2,  r4,  #64 | ||||
|         bl              \type\()_h264_qpel8_hv_lowpass_l2_neon | ||||
|         vpop            {d8-d15} | ||||
|         add             sp,  r11,  #8 | ||||
|         mov             sp,  r11 | ||||
|         pop             {r4, r10, r11, pc} | ||||
| endfunc | ||||
|  | ||||
| function ff_\type\()_h264_qpel8_mc22_neon, export=1 | ||||
|         push            {r4, r10, r11, lr} | ||||
|         mov             r11, sp | ||||
|         bic             sp,  sp,  #15 | ||||
| A       bic             sp,  sp,  #15 | ||||
| T       bic             r4,  r11, #15 | ||||
| T       mov             sp,  r4 | ||||
|         sub             r1,  r1,  r2, lsl #1 | ||||
|         sub             r1,  r1,  #2 | ||||
|         mov             r3,  r2 | ||||
| @@ -1441,21 +1443,23 @@ function ff_\type\()_h264_qpel16_mc11_neon, export=1 | ||||
| \type\()_h264_qpel16_mc11: | ||||
|         lowpass_const   r3 | ||||
|         mov             r11, sp | ||||
|         bic             sp,  sp,  #15 | ||||
| A       bic             sp,  sp,  #15 | ||||
| T       bic             r0,  r11, #15 | ||||
| T       mov             sp,  r0 | ||||
|         sub             sp,  sp,  #256 | ||||
|         mov             r0,  sp | ||||
|         sub             r1,  r1,  #2 | ||||
|         mov             r3,  #16 | ||||
|         vpush           {d8-d15} | ||||
|         bl              put_h264_qpel16_h_lowpass_neon | ||||
|         ldrd            r0,  [r11] | ||||
|         ldrd            r0,  [r11], #8 | ||||
|         mov             r3,  r2 | ||||
|         add             ip,  sp,  #64 | ||||
|         sub             r1,  r1,  r2, lsl #1 | ||||
|         mov             r2,  #16 | ||||
|         bl              \type\()_h264_qpel16_v_lowpass_l2_neon | ||||
|         vpop            {d8-d15} | ||||
|         add             sp,  r11, #8 | ||||
|         mov             sp,  r11 | ||||
|         pop             {r4, r11, pc} | ||||
| endfunc | ||||
|  | ||||
| @@ -1464,20 +1468,22 @@ function ff_\type\()_h264_qpel16_mc21_neon, export=1 | ||||
| \type\()_h264_qpel16_mc21: | ||||
|         lowpass_const   r3 | ||||
|         mov             r11, sp | ||||
|         bic             sp,  sp,  #15 | ||||
| A       bic             sp,  sp,  #15 | ||||
| T       bic             r0,  r11, #15 | ||||
| T       mov             sp,  r0 | ||||
|         sub             sp,  sp,  #(16*16+16*12) | ||||
|         sub             r1,  r1,  #2 | ||||
|         mov             r0,  sp | ||||
|         vpush           {d8-d15} | ||||
|         bl              put_h264_qpel16_h_lowpass_neon_packed | ||||
|         mov             r4,  r0 | ||||
|         ldrd            r0,  [r11] | ||||
|         ldrd            r0,  [r11], #8 | ||||
|         sub             r1,  r1,  r2, lsl #1 | ||||
|         sub             r1,  r1,  #2 | ||||
|         mov             r3,  r2 | ||||
|         bl              \type\()_h264_qpel16_hv_lowpass_l2_neon | ||||
|         vpop            {d8-d15} | ||||
|         add             sp,  r11,  #8 | ||||
|         mov             sp,  r11 | ||||
|         pop             {r4-r5, r9-r11, pc} | ||||
| endfunc | ||||
|  | ||||
| @@ -1504,7 +1510,9 @@ function ff_\type\()_h264_qpel16_mc12_neon, export=1 | ||||
| \type\()_h264_qpel16_mc12: | ||||
|         lowpass_const   r3 | ||||
|         mov             r11, sp | ||||
|         bic             sp,  sp,  #15 | ||||
| A       bic             sp,  sp,  #15 | ||||
| T       bic             r0,  r11, #15 | ||||
| T       mov             sp,  r0 | ||||
|         sub             sp,  sp,  #(16*16+16*12) | ||||
|         sub             r1,  r1,  r2, lsl #1 | ||||
|         mov             r0,  sp | ||||
| @@ -1512,13 +1520,13 @@ function ff_\type\()_h264_qpel16_mc12_neon, export=1 | ||||
|         vpush           {d8-d15} | ||||
|         bl              put_h264_qpel16_v_lowpass_neon_packed | ||||
|         mov             r4,  r0 | ||||
|         ldrd            r0,  [r11] | ||||
|         ldrd            r0,  [r11], #8 | ||||
|         sub             r1,  r1,  r3, lsl #1 | ||||
|         sub             r1,  r1,  #2 | ||||
|         mov             r2,  r3 | ||||
|         bl              \type\()_h264_qpel16_hv_lowpass_l2_neon | ||||
|         vpop            {d8-d15} | ||||
|         add             sp,  r11,  #8 | ||||
|         mov             sp,  r11 | ||||
|         pop             {r4-r5, r9-r11, pc} | ||||
| endfunc | ||||
|  | ||||
| @@ -1526,7 +1534,9 @@ function ff_\type\()_h264_qpel16_mc22_neon, export=1 | ||||
|         push            {r4, r9-r11, lr} | ||||
|         lowpass_const   r3 | ||||
|         mov             r11, sp | ||||
|         bic             sp,  sp,  #15 | ||||
| A       bic             sp,  sp,  #15 | ||||
| T       bic             r4,  r11, #15 | ||||
| T       mov             sp,  r4 | ||||
|         sub             r1,  r1,  r2, lsl #1 | ||||
|         sub             r1,  r1,  #2 | ||||
|         mov             r3,  r2 | ||||
|   | ||||
| @@ -106,10 +106,12 @@ function ff_h264_idct_add16_neon, export=1 | ||||
|         blt             2f | ||||
|         ldrsh           lr,  [r1] | ||||
|         add             r0,  r0,  r4 | ||||
|         it              ne | ||||
|         movne           lr,  #0 | ||||
|         cmp             lr,  #0 | ||||
|         adrne           lr,  ff_h264_idct_dc_add_neon | ||||
|         adreq           lr,  ff_h264_idct_add_neon | ||||
|         ite             ne | ||||
|         adrne           lr,  ff_h264_idct_dc_add_neon + CONFIG_THUMB | ||||
|         adreq           lr,  ff_h264_idct_add_neon    + CONFIG_THUMB | ||||
|         blx             lr | ||||
| 2:      subs            ip,  ip,  #1 | ||||
|         add             r1,  r1,  #32 | ||||
| @@ -132,8 +134,9 @@ function ff_h264_idct_add16intra_neon, export=1 | ||||
|         add             r0,  r0,  r4 | ||||
|         cmp             r8,  #0 | ||||
|         ldrsh           r8,  [r1] | ||||
|         adrne           lr,  ff_h264_idct_add_neon | ||||
|         adreq           lr,  ff_h264_idct_dc_add_neon | ||||
|         iteet           ne | ||||
|         adrne           lr,  ff_h264_idct_add_neon    + CONFIG_THUMB | ||||
|         adreq           lr,  ff_h264_idct_dc_add_neon + CONFIG_THUMB | ||||
|         cmpeq           r8,  #0 | ||||
|         blxne           lr | ||||
|         subs            ip,  ip,  #1 | ||||
| @@ -159,12 +162,14 @@ function ff_h264_idct_add8_neon, export=1 | ||||
|         add             r1,  r3,  r12, lsl #5 | ||||
|         cmp             r8,  #0 | ||||
|         ldrsh           r8,  [r1] | ||||
|         adrne           lr,  ff_h264_idct_add_neon | ||||
|         adreq           lr,  ff_h264_idct_dc_add_neon | ||||
|         iteet           ne | ||||
|         adrne           lr,  ff_h264_idct_add_neon    + CONFIG_THUMB | ||||
|         adreq           lr,  ff_h264_idct_dc_add_neon + CONFIG_THUMB | ||||
|         cmpeq           r8,  #0 | ||||
|         blxne           lr | ||||
|         add             r12, r12, #1 | ||||
|         cmp             r12, #4 | ||||
|         itt             eq | ||||
|         moveq           r12, #16 | ||||
|         moveq           r4,  r9 | ||||
|         cmp             r12, #20 | ||||
| @@ -365,10 +370,12 @@ function ff_h264_idct8_add4_neon, export=1 | ||||
|         blt             2f | ||||
|         ldrsh           lr,  [r1] | ||||
|         add             r0,  r0,  r4 | ||||
|         it              ne | ||||
|         movne           lr,  #0 | ||||
|         cmp             lr,  #0 | ||||
|         adrne           lr,  ff_h264_idct8_dc_add_neon | ||||
|         adreq           lr,  ff_h264_idct8_add_neon | ||||
|         ite             ne | ||||
|         adrne           lr,  ff_h264_idct8_dc_add_neon + CONFIG_THUMB | ||||
|         adreq           lr,  ff_h264_idct8_add_neon    + CONFIG_THUMB | ||||
|         blx             lr | ||||
| 2:      subs            r12, r12, #4 | ||||
|         add             r1,  r1,  #128 | ||||
|   | ||||
| @@ -64,11 +64,14 @@ static inline av_const int mid_pred(int a, int b, int c) | ||||
|     __asm__ ( | ||||
|         "mov   %0, %2  \n\t" | ||||
|         "cmp   %1, %2  \n\t" | ||||
|         "itt   gt      \n\t" | ||||
|         "movgt %0, %1  \n\t" | ||||
|         "movgt %1, %2  \n\t" | ||||
|         "cmp   %1, %3  \n\t" | ||||
|         "it    le      \n\t" | ||||
|         "movle %1, %3  \n\t" | ||||
|         "cmp   %0, %1  \n\t" | ||||
|         "it    gt      \n\t" | ||||
|         "movgt %0, %1  \n\t" | ||||
|         : "=&r"(m), "+r"(a) | ||||
|         : "r"(b), "r"(c) | ||||
|   | ||||
| @@ -191,7 +191,9 @@ function ff_mdct_calc_neon, export=1 | ||||
|         vadd.f32        d17, d17, d3            @ in2u+in1d     -I | ||||
| 1: | ||||
|         vmul.f32        d7,  d0,  d21           @  I*s | ||||
|         ldr             r10, [r3, lr, lsr #1] | ||||
| A       ldr             r10, [r3, lr, lsr #1] | ||||
| T       lsr             r10, lr,  #1 | ||||
| T       ldr             r10, [r3, r10] | ||||
|         vmul.f32        d6,  d1,  d20           @ -R*c | ||||
|         ldr             r6,  [r3, #4]! | ||||
|         vmul.f32        d4,  d1,  d21           @ -R*s | ||||
|   | ||||
| @@ -75,7 +75,7 @@ function ff_mpadsp_apply_window_fixed_armv6, export=1 | ||||
|         sum8            r8,  r9,  r1,  r0,  r10, r11, r12, lr | ||||
|         sum8            r8,  r9,  r1,  r2,  r10, r11, r12, lr, rsb, 32 | ||||
|         round           r10, r8,  r9 | ||||
|         strh            r10, [r3], r4 | ||||
|         strh_post       r10, r3,  r4 | ||||
|  | ||||
|         mov             lr,  #15 | ||||
| 1: | ||||
| @@ -127,10 +127,10 @@ function ff_mpadsp_apply_window_fixed_armv6, export=1 | ||||
|         round           r10, r8,  r9 | ||||
|         adds            r8,  r8,  r4 | ||||
|         adc             r9,  r9,  r7 | ||||
|         strh            r10, [r3], r12 | ||||
|         strh_post       r10, r3,  r12 | ||||
|         round           r11, r8,  r9 | ||||
|         subs            lr,  lr,  #1 | ||||
|         strh            r11, [r5], -r12 | ||||
|         strh_dpost      r11, r5, r12 | ||||
|         bgt             1b | ||||
|  | ||||
|         sum8            r8,  r9,  r1,  r0,  r10, r11, r12, lr, rsb, 33 | ||||
|   | ||||
| @@ -38,15 +38,21 @@ | ||||
|  | ||||
| .macro  dequant_t       dst, src, mul, add, tmp | ||||
|         rsbs            \tmp, ip, \src, asr #16 | ||||
|         it              gt | ||||
|         addgt           \tmp, \add, #0 | ||||
|         it              lt | ||||
|         rsblt           \tmp, \add, #0 | ||||
|         it              ne | ||||
|         smlatbne        \dst, \src, \mul, \tmp | ||||
| .endm | ||||
|  | ||||
| .macro  dequant_b       dst, src, mul, add, tmp | ||||
|         rsbs            \tmp, ip, \src, lsl #16 | ||||
|         it              gt | ||||
|         addgt           \tmp, \add, #0 | ||||
|         it              lt | ||||
|         rsblt           \tmp, \add, #0 | ||||
|         it              ne | ||||
|         smlabbne        \dst, \src, \mul, \tmp | ||||
| .endm | ||||
|  | ||||
| @@ -80,21 +86,27 @@ function ff_dct_unquantize_h263_armv5te, export=1 | ||||
|         strh            lr, [r0], #2 | ||||
|  | ||||
|         subs            r3, r3, #8 | ||||
|         it              gt | ||||
|         ldrdgt          r4, [r0, #0] /* load data early to avoid load/use pipeline stall */ | ||||
|         bgt             1b | ||||
|  | ||||
|         adds            r3, r3, #2 | ||||
|         it              le | ||||
|         pople           {r4-r9,pc} | ||||
| 2: | ||||
|         ldrsh           r9, [r0, #0] | ||||
|         ldrsh           lr, [r0, #2] | ||||
|         mov             r8, r2 | ||||
|         cmp             r9, #0 | ||||
|         it              lt | ||||
|         rsblt           r8, r2, #0 | ||||
|         it              ne | ||||
|         smlabbne        r9, r9, r1, r8 | ||||
|         mov             r8, r2 | ||||
|         cmp             lr, #0 | ||||
|         it              lt | ||||
|         rsblt           r8, r2, #0 | ||||
|         it              ne | ||||
|         smlabbne        lr, lr, r1, r8 | ||||
|         strh            r9, [r0], #2 | ||||
|         strh            lr, [r0], #2 | ||||
|   | ||||
| @@ -57,6 +57,7 @@ function ff_dct_unquantize_h263_neon, export=1 | ||||
|         subs            r3,  r3,  #16 | ||||
|         vst1.16         {q0},     [r1,:128]! | ||||
|         vst1.16         {q8},     [r1,:128]! | ||||
|         it              le | ||||
|         bxle            lr | ||||
|         cmp             r3,  #8 | ||||
|         bgt             1b | ||||
| @@ -78,6 +79,7 @@ function ff_dct_unquantize_h263_intra_neon, export=1 | ||||
|         ldr             r6,  [r0, #AC_PRED] | ||||
|         add             lr,  r0,  #INTER_SCANTAB_RASTER_END | ||||
|         cmp             r6,  #0 | ||||
|         it              ne | ||||
|         movne           r12, #63 | ||||
|         bne             1f | ||||
|         ldr             r12, [r12, r2, lsl #2] | ||||
| @@ -86,9 +88,11 @@ function ff_dct_unquantize_h263_intra_neon, export=1 | ||||
|         ldrsh           r4,  [r1] | ||||
|         cmp             r5,  #0 | ||||
|         mov             r5,  r1 | ||||
|         it              ne | ||||
|         movne           r2,  #0 | ||||
|         bne             2f | ||||
|         cmp             r2,  #4 | ||||
|         it              ge | ||||
|         addge           r0,  r0,  #4 | ||||
|         sub             r2,  r3,  #1 | ||||
|         ldr             r6,  [r0, #Y_DC_SCALE] | ||||
|   | ||||
| @@ -137,6 +137,7 @@ function ff_rdft_calc_neon, export=1 | ||||
|         vst1.32         {d22},    [r5,:64] | ||||
|  | ||||
|         cmp             r6,  #0 | ||||
|         it              eq | ||||
|         popeq           {r4-r8,pc} | ||||
|  | ||||
|         vmul.f32        d22, d22, d18 | ||||
|   | ||||
| @@ -121,11 +121,13 @@ __b_evaluation: | ||||
|         ldr r11, [r12, #offW7]   @ R11=W7 | ||||
|         mul r5, r10, r7          @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) | ||||
|         mul r7, r11, r7          @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) | ||||
|                 teq r2, #0               @ if null avoid muls | ||||
|                 mlane r0, r9, r2, r0     @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | ||||
|         teq r2, #0               @ if null avoid muls | ||||
|         itttt ne | ||||
|         mlane r0, r9, r2, r0     @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | ||||
|         rsbne r2, r2, #0         @ R2=-ROWr16[3] | ||||
|         mlane r1, r11, r2, r1    @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | ||||
|         mlane r5, r8, r2, r5     @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | ||||
|         it    ne | ||||
|         mlane r7, r10, r2, r7    @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | ||||
|  | ||||
|         @@ at this point, R0=b0,  R1=b1, R2 (free), R3=ROWr32[2], R4=ROWr32[3], | ||||
| @@ -148,19 +150,23 @@ __b_evaluation: | ||||
|         @@ MAC16(b3, -W1, row[7]); | ||||
|         @@ MAC16(b1, -W5, row[7]); | ||||
|         mov r3, r3, asr #16      @ R3=ROWr16[5] | ||||
|                 teq r3, #0               @ if null avoid muls | ||||
|         teq r3, #0               @ if null avoid muls | ||||
|         it    ne | ||||
|         mlane r0, r10, r3, r0    @ R0+=W5*ROWr16[5]=b0 | ||||
|         mov r4, r4, asr #16      @ R4=ROWr16[7] | ||||
|         itttt ne | ||||
|         mlane r5, r11, r3, r5    @ R5+=W7*ROWr16[5]=b2 | ||||
|         mlane r7, r9, r3, r7     @ R7+=W3*ROWr16[5]=b3 | ||||
|         rsbne r3, r3, #0         @ R3=-ROWr16[5] | ||||
|         mlane r1, r8, r3, r1     @ R7-=W1*ROWr16[5]=b1 | ||||
|         @@ R3 is free now | ||||
|                 teq r4, #0               @ if null avoid muls | ||||
|         teq r4, #0               @ if null avoid muls | ||||
|         itttt ne | ||||
|         mlane r0, r11, r4, r0    @ R0+=W7*ROWr16[7]=b0 | ||||
|         mlane r5, r9, r4, r5     @ R5+=W3*ROWr16[7]=b2 | ||||
|         rsbne r4, r4, #0         @ R4=-ROWr16[7] | ||||
|         mlane r7, r8, r4, r7     @ R7-=W1*ROWr16[7]=b3 | ||||
|         it    ne | ||||
|         mlane r1, r10, r4, r1    @ R1-=W5*ROWr16[7]=b1 | ||||
|         @@ R4 is free now | ||||
| __end_b_evaluation: | ||||
| @@ -204,16 +210,19 @@ __a_evaluation: | ||||
|         @@ a2 -= W4*row[4] | ||||
|         @@ a3 += W4*row[4] | ||||
|         ldrsh r11, [r14, #8]     @ R11=ROWr16[4] | ||||
|                 teq r11, #0              @ if null avoid muls | ||||
|         teq r11, #0              @ if null avoid muls | ||||
|         it    ne | ||||
|         mulne r11, r9, r11       @ R11=W4*ROWr16[4] | ||||
|         @@ R9 is free now | ||||
|         ldrsh r9, [r14, #12]     @ R9=ROWr16[6] | ||||
|         itttt ne | ||||
|         addne r6, r6, r11        @ R6+=W4*ROWr16[4] (a0) | ||||
|         subne r2, r2, r11        @ R2-=W4*ROWr16[4] (a1) | ||||
|         subne r3, r3, r11        @ R3-=W4*ROWr16[4] (a2) | ||||
|         addne r4, r4, r11        @ R4+=W4*ROWr16[4] (a3) | ||||
|         @@ W6 alone is no more useful, save W2*ROWr16[6] in it instead | ||||
|                 teq r9, #0               @ if null avoid muls | ||||
|         teq r9, #0               @ if null avoid muls | ||||
|         itttt ne | ||||
|         mulne r11, r10, r9       @ R11=W6*ROWr16[6] | ||||
|         addne r6, r6, r11        @ R6+=W6*ROWr16[6] (a0) | ||||
|         mulne r10, r8, r9        @ R10=W2*ROWr16[6] | ||||
| @@ -222,6 +231,7 @@ __a_evaluation: | ||||
|         @@ a1 -= W2*row[6]; | ||||
|         @@ a2 += W2*row[6]; | ||||
|         subne r4, r4, r11        @ R4-=W6*ROWr16[6] (a3) | ||||
|         itt   ne | ||||
|         subne r2, r2, r10        @ R2-=W2*ROWr16[6] (a1) | ||||
|         addne r3, r3, r10        @ R3+=W2*ROWr16[6] (a2) | ||||
|  | ||||
| @@ -323,10 +333,12 @@ __b_evaluation2: | ||||
|         ldrsh r2, [r14, #48] | ||||
|         mul r7, r11, r7          @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) | ||||
|         teq r2, #0               @ if 0, then avoid muls | ||||
|         itttt ne | ||||
|         mlane r0, r9, r2, r0     @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | ||||
|         rsbne r2, r2, #0         @ R2=-ROWr16[3] | ||||
|         mlane r1, r11, r2, r1    @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | ||||
|         mlane r5, r8, r2, r5     @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | ||||
|         it    ne | ||||
|         mlane r7, r10, r2, r7    @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | ||||
|  | ||||
|         @@ at this point, R0=b0,  R1=b1, R2 (free), R3 (free), R4 (free), | ||||
| @@ -342,18 +354,22 @@ __b_evaluation2: | ||||
|         @@ MAC16(b1, -W5, col[7x8]); | ||||
|         ldrsh r3, [r14, #80]     @ R3=COLr16[5x8] | ||||
|         teq r3, #0               @ if 0 then avoid muls | ||||
|         itttt ne | ||||
|         mlane r0, r10, r3, r0    @ R0+=W5*ROWr16[5x8]=b0 | ||||
|         mlane r5, r11, r3, r5    @ R5+=W7*ROWr16[5x8]=b2 | ||||
|         mlane r7, r9, r3, r7     @ R7+=W3*ROWr16[5x8]=b3 | ||||
|         rsbne r3, r3, #0         @ R3=-ROWr16[5x8] | ||||
|         ldrsh r4, [r14, #112]    @ R4=COLr16[7x8] | ||||
|         it    ne | ||||
|         mlane r1, r8, r3, r1     @ R7-=W1*ROWr16[5x8]=b1 | ||||
|         @@ R3 is free now | ||||
|         teq r4, #0               @ if 0 then avoid muls | ||||
|         itttt ne | ||||
|         mlane r0, r11, r4, r0    @ R0+=W7*ROWr16[7x8]=b0 | ||||
|         mlane r5, r9, r4, r5     @ R5+=W3*ROWr16[7x8]=b2 | ||||
|         rsbne r4, r4, #0         @ R4=-ROWr16[7x8] | ||||
|         mlane r7, r8, r4, r7     @ R7-=W1*ROWr16[7x8]=b3 | ||||
|         it    ne | ||||
|         mlane r1, r10, r4, r1    @ R1-=W5*ROWr16[7x8]=b1 | ||||
|         @@ R4 is free now | ||||
| __end_b_evaluation2: | ||||
| @@ -390,15 +406,18 @@ __a_evaluation2: | ||||
|         @@ a3 += W4*row[4] | ||||
|         ldrsh r11, [r14, #64]    @ R11=ROWr16[4] | ||||
|         teq r11, #0              @ if null avoid muls | ||||
|         itttt ne | ||||
|         mulne r11, r9, r11       @ R11=W4*ROWr16[4] | ||||
|         @@ R9 is free now | ||||
|         addne r6, r6, r11        @ R6+=W4*ROWr16[4] (a0) | ||||
|         subne r2, r2, r11        @ R2-=W4*ROWr16[4] (a1) | ||||
|         subne r3, r3, r11        @ R3-=W4*ROWr16[4] (a2) | ||||
|         ldrsh r9, [r14, #96]     @ R9=ROWr16[6] | ||||
|         it    ne | ||||
|         addne r4, r4, r11        @ R4+=W4*ROWr16[4] (a3) | ||||
|         @@ W6 alone is no more useful, save W2*ROWr16[6] in it instead | ||||
|         teq r9, #0               @ if null avoid muls | ||||
|         itttt ne | ||||
|         mulne r11, r10, r9       @ R11=W6*ROWr16[6] | ||||
|         addne r6, r6, r11        @ R6+=W6*ROWr16[6] (a0) | ||||
|         mulne r10, r8, r9        @ R10=W2*ROWr16[6] | ||||
| @@ -407,6 +426,7 @@ __a_evaluation2: | ||||
|         @@ a1 -= W2*row[6]; | ||||
|         @@ a2 += W2*row[6]; | ||||
|         subne r4, r4, r11        @ R4-=W6*ROWr16[6] (a3) | ||||
|         itt   ne | ||||
|         subne r2, r2, r10        @ R2-=W2*ROWr16[6] (a1) | ||||
|         addne r3, r3, r10        @ R3+=W2*ROWr16[6] (a2) | ||||
| __end_a_evaluation2: | ||||
|   | ||||
| @@ -49,6 +49,7 @@ function idct_row_armv5te | ||||
|         ldrd   v1, [a1, #8] | ||||
|         ldrd   a3, [a1]              /* a3 = row[1:0], a4 = row[3:2] */ | ||||
|         orrs   v1, v1, v2 | ||||
|         itt    eq | ||||
|         cmpeq  v1, a4 | ||||
|         cmpeq  v1, a3, lsr #16 | ||||
|         beq    row_dc_only | ||||
| @@ -269,6 +270,7 @@ function idct_col_armv5te | ||||
|         ldmfd  sp!, {a3, a4} | ||||
|         adds   a2, a3, v1 | ||||
|         mov    a2, a2, lsr #20 | ||||
|         it     mi | ||||
|         orrmi  a2, a2, #0xf000 | ||||
|         add    ip, a4, v2 | ||||
|         mov    ip, ip, asr #20 | ||||
| @@ -276,6 +278,7 @@ function idct_col_armv5te | ||||
|         str    a2, [a1] | ||||
|         subs   a3, a3, v1 | ||||
|         mov    a2, a3, lsr #20 | ||||
|         it     mi | ||||
|         orrmi  a2, a2, #0xf000 | ||||
|         sub    a4, a4, v2 | ||||
|         mov    a4, a4, asr #20 | ||||
| @@ -285,6 +288,7 @@ function idct_col_armv5te | ||||
|  | ||||
|         subs   a2, a3, v3 | ||||
|         mov    a2, a2, lsr #20 | ||||
|         it     mi | ||||
|         orrmi  a2, a2, #0xf000 | ||||
|         sub    ip, a4, v4 | ||||
|         mov    ip, ip, asr #20 | ||||
| @@ -292,6 +296,7 @@ function idct_col_armv5te | ||||
|         str    a2, [a1, #(16*1)] | ||||
|         adds   a3, a3, v3 | ||||
|         mov    a2, a3, lsr #20 | ||||
|         it     mi | ||||
|         orrmi  a2, a2, #0xf000 | ||||
|         add    a4, a4, v4 | ||||
|         mov    a4, a4, asr #20 | ||||
| @@ -301,6 +306,7 @@ function idct_col_armv5te | ||||
|  | ||||
|         adds   a2, a3, v5 | ||||
|         mov    a2, a2, lsr #20 | ||||
|         it     mi | ||||
|         orrmi  a2, a2, #0xf000 | ||||
|         add    ip, a4, v6 | ||||
|         mov    ip, ip, asr #20 | ||||
| @@ -308,6 +314,7 @@ function idct_col_armv5te | ||||
|         str    a2, [a1, #(16*2)] | ||||
|         subs   a3, a3, v5 | ||||
|         mov    a2, a3, lsr #20 | ||||
|         it     mi | ||||
|         orrmi  a2, a2, #0xf000 | ||||
|         sub    a4, a4, v6 | ||||
|         mov    a4, a4, asr #20 | ||||
| @@ -317,6 +324,7 @@ function idct_col_armv5te | ||||
|  | ||||
|         adds   a2, a3, v7 | ||||
|         mov    a2, a2, lsr #20 | ||||
|         it     mi | ||||
|         orrmi  a2, a2, #0xf000 | ||||
|         add    ip, a4, fp | ||||
|         mov    ip, ip, asr #20 | ||||
| @@ -324,6 +332,7 @@ function idct_col_armv5te | ||||
|         str    a2, [a1, #(16*3)] | ||||
|         subs   a3, a3, v7 | ||||
|         mov    a2, a3, lsr #20 | ||||
|         it     mi | ||||
|         orrmi  a2, a2, #0xf000 | ||||
|         sub    a4, a4, fp | ||||
|         mov    a4, a4, asr #20 | ||||
| @@ -335,15 +344,19 @@ endfunc | ||||
|  | ||||
| .macro  clip   dst, src:vararg | ||||
|         movs   \dst, \src | ||||
|         it     mi | ||||
|         movmi  \dst, #0 | ||||
|         cmp    \dst, #255 | ||||
|         it     gt | ||||
|         movgt  \dst, #255 | ||||
| .endm | ||||
|  | ||||
| .macro  aclip  dst, src:vararg | ||||
|         adds   \dst, \src | ||||
|         it     mi | ||||
|         movmi  \dst, #0 | ||||
|         cmp    \dst, #255 | ||||
|         it     gt | ||||
|         movgt  \dst, #255 | ||||
| .endm | ||||
|  | ||||
| @@ -370,35 +383,35 @@ function idct_col_put_armv5te | ||||
|         orr    a2, a3, a4, lsl #8 | ||||
|         rsb    v2, lr, lr, lsl #3 | ||||
|         ldmfd  sp!, {a3, a4} | ||||
|         strh   a2, [v2, v1]! | ||||
|         strh_pre a2, v2, v1 | ||||
|  | ||||
|         sub    a2, a3, v3 | ||||
|         clip   a2, a2, asr #20 | ||||
|         sub    ip, a4, v4 | ||||
|         clip   ip, ip, asr #20 | ||||
|         orr    a2, a2, ip, lsl #8 | ||||
|         strh   a2, [v1, lr]! | ||||
|         strh_pre a2, v1, lr | ||||
|         add    a3, a3, v3 | ||||
|         clip   a2, a3, asr #20 | ||||
|         add    a4, a4, v4 | ||||
|         clip   a4, a4, asr #20 | ||||
|         orr    a2, a2, a4, lsl #8 | ||||
|         ldmfd  sp!, {a3, a4} | ||||
|         strh   a2, [v2, -lr]! | ||||
|         strh_dpre a2, v2, lr | ||||
|  | ||||
|         add    a2, a3, v5 | ||||
|         clip   a2, a2, asr #20 | ||||
|         add    ip, a4, v6 | ||||
|         clip   ip, ip, asr #20 | ||||
|         orr    a2, a2, ip, lsl #8 | ||||
|         strh   a2, [v1, lr]! | ||||
|         strh_pre a2, v1, lr | ||||
|         sub    a3, a3, v5 | ||||
|         clip   a2, a3, asr #20 | ||||
|         sub    a4, a4, v6 | ||||
|         clip   a4, a4, asr #20 | ||||
|         orr    a2, a2, a4, lsl #8 | ||||
|         ldmfd  sp!, {a3, a4} | ||||
|         strh   a2, [v2, -lr]! | ||||
|         strh_dpre a2, v2, lr | ||||
|  | ||||
|         add    a2, a3, v7 | ||||
|         clip   a2, a2, asr #20 | ||||
| @@ -411,7 +424,7 @@ function idct_col_put_armv5te | ||||
|         sub    a4, a4, fp | ||||
|         clip   a4, a4, asr #20 | ||||
|         orr    a2, a2, a4, lsl #8 | ||||
|         strh   a2, [v2, -lr] | ||||
|         strh_dpre a2, v2, lr | ||||
|  | ||||
|         ldr    pc, [sp], #4 | ||||
| endfunc | ||||
| @@ -436,7 +449,7 @@ function idct_col_add_armv5te | ||||
|         ldr    v1, [sp, #32] | ||||
|         sub    a4, a4, v2 | ||||
|         rsb    v2, v1, v1, lsl #3 | ||||
|         ldrh   ip, [v2, lr]! | ||||
|         ldrh_pre ip, v2, lr | ||||
|         strh   a2, [lr] | ||||
|         and    a2, ip, #255 | ||||
|         aclip  a3, a2, a3, asr #20 | ||||
| @@ -448,7 +461,7 @@ function idct_col_add_armv5te | ||||
|         strh   a2, [v2] | ||||
|  | ||||
|         ldmfd  sp!, {a3, a4} | ||||
|         ldrh   ip, [lr, v1]! | ||||
|         ldrh_pre ip, lr, v1 | ||||
|         sub    a2, a3, v3 | ||||
|         add    a3, a3, v3 | ||||
|         and    v3, ip, #255 | ||||
| @@ -458,7 +471,7 @@ function idct_col_add_armv5te | ||||
|         aclip  v3, v3, ip, lsr #8 | ||||
|         orr    a2, a2, v3, lsl #8 | ||||
|         add    a4, a4, v4 | ||||
|         ldrh   ip, [v2, -v1]! | ||||
|         ldrh_dpre ip, v2, v1 | ||||
|         strh   a2, [lr] | ||||
|         and    a2, ip, #255 | ||||
|         aclip  a3, a2, a3, asr #20 | ||||
| @@ -468,7 +481,7 @@ function idct_col_add_armv5te | ||||
|         strh   a2, [v2] | ||||
|  | ||||
|         ldmfd  sp!, {a3, a4} | ||||
|         ldrh   ip, [lr, v1]! | ||||
|         ldrh_pre ip, lr, v1 | ||||
|         add    a2, a3, v5 | ||||
|         sub    a3, a3, v5 | ||||
|         and    v3, ip, #255 | ||||
| @@ -478,7 +491,7 @@ function idct_col_add_armv5te | ||||
|         aclip  v3, v3, ip, lsr #8 | ||||
|         orr    a2, a2, v3, lsl #8 | ||||
|         sub    a4, a4, v6 | ||||
|         ldrh   ip, [v2, -v1]! | ||||
|         ldrh_dpre ip, v2, v1 | ||||
|         strh   a2, [lr] | ||||
|         and    a2, ip, #255 | ||||
|         aclip  a3, a2, a3, asr #20 | ||||
| @@ -488,7 +501,7 @@ function idct_col_add_armv5te | ||||
|         strh   a2, [v2] | ||||
|  | ||||
|         ldmfd  sp!, {a3, a4} | ||||
|         ldrh   ip, [lr, v1]! | ||||
|         ldrh_pre ip, lr, v1 | ||||
|         add    a2, a3, v7 | ||||
|         sub    a3, a3, v7 | ||||
|         and    v3, ip, #255 | ||||
| @@ -498,7 +511,7 @@ function idct_col_add_armv5te | ||||
|         aclip  v3, v3, ip, lsr #8 | ||||
|         orr    a2, a2, v3, lsl #8 | ||||
|         sub    a4, a4, fp | ||||
|         ldrh   ip, [v2, -v1]! | ||||
|         ldrh_dpre ip, v2, v1 | ||||
|         strh   a2, [lr] | ||||
|         and    a2, ip, #255 | ||||
|         aclip  a3, a2, a3, asr #20 | ||||
|   | ||||
| @@ -200,6 +200,7 @@ function idct_row_armv6 | ||||
|         ldr    r3, [r0, #8]          /* r3 = row[3,1] */ | ||||
|         ldr    r2, [r0]              /* r2 = row[2,0] */ | ||||
|         orrs   lr, lr, ip | ||||
|         itt    eq | ||||
|         cmpeq  lr, r3 | ||||
|         cmpeq  lr, r2, lsr #16 | ||||
|         beq    1f | ||||
| @@ -282,14 +283,14 @@ function idct_col_put_armv6 | ||||
|         pop    {r1, r2} | ||||
|         idct_finish_shift_sat COL_SHIFT | ||||
|  | ||||
|         strb   r4, [r1], r2 | ||||
|         strb   r5, [r1], r2 | ||||
|         strb   r6, [r1], r2 | ||||
|         strb   r7, [r1], r2 | ||||
|         strb   r11,[r1], r2 | ||||
|         strb   r10,[r1], r2 | ||||
|         strb   r9, [r1], r2 | ||||
|         strb   r8, [r1], r2 | ||||
|         strb_post r4, r1, r2 | ||||
|         strb_post r5, r1, r2 | ||||
|         strb_post r6, r1, r2 | ||||
|         strb_post r7, r1, r2 | ||||
|         strb_post r11,r1, r2 | ||||
|         strb_post r10,r1, r2 | ||||
|         strb_post r9, r1, r2 | ||||
|         strb_post r8, r1, r2 | ||||
|  | ||||
|         sub    r1, r1, r2, lsl #3 | ||||
|  | ||||
| @@ -318,16 +319,16 @@ function idct_col_add_armv6 | ||||
|         add    ip, r3, ip, asr #COL_SHIFT | ||||
|         usat   ip, #8, ip | ||||
|         add    r4, r7, r4, asr #COL_SHIFT | ||||
|         strb   ip, [r1], r2 | ||||
|         strb_post ip, r1, r2 | ||||
|         ldrb   ip, [r1, r2] | ||||
|         usat   r4, #8, r4 | ||||
|         ldrb   r11,[r1, r2, lsl #2] | ||||
|         add    r5, ip, r5, asr #COL_SHIFT | ||||
|         usat   r5, #8, r5 | ||||
|         strb   r4, [r1], r2 | ||||
|         strb_post r4, r1, r2 | ||||
|         ldrb   r3, [r1, r2] | ||||
|         ldrb   ip, [r1, r2, lsl #2] | ||||
|         strb   r5, [r1], r2 | ||||
|         strb_post r5, r1, r2 | ||||
|         ldrb   r7, [r1, r2] | ||||
|         ldrb   r4, [r1, r2, lsl #2] | ||||
|         add    r6, r3, r6, asr #COL_SHIFT | ||||
| @@ -340,11 +341,11 @@ function idct_col_add_armv6 | ||||
|         usat   r8, #8, r8 | ||||
|         add    lr, r4, lr, asr #COL_SHIFT | ||||
|         usat   lr, #8, lr | ||||
|         strb   r6, [r1], r2 | ||||
|         strb   r10,[r1], r2 | ||||
|         strb   r9, [r1], r2 | ||||
|         strb   r8, [r1], r2 | ||||
|         strb   lr, [r1], r2 | ||||
|         strb_post r6, r1, r2 | ||||
|         strb_post r10,r1, r2 | ||||
|         strb_post r9, r1, r2 | ||||
|         strb_post r8, r1, r2 | ||||
|         strb_post lr, r1, r2 | ||||
|  | ||||
|         sub    r1, r1, r2, lsl #3 | ||||
|  | ||||
|   | ||||
| @@ -71,7 +71,7 @@ function idct_row4_pld_neon | ||||
|         add             r3,  r0,  r1,  lsl #2 | ||||
|         pld             [r0, r1] | ||||
|         pld             [r0, r1, lsl #1] | ||||
|         pld             [r3, -r1] | ||||
| A       pld             [r3, -r1] | ||||
|         pld             [r3] | ||||
|         pld             [r3, r1] | ||||
|         add             r3,  r3,  r1,  lsl #1 | ||||
| @@ -164,6 +164,7 @@ function idct_col4_neon | ||||
|         orrs            r4,  r4,  r5 | ||||
|  | ||||
|         idct_col4_top | ||||
|         it              eq | ||||
|         addeq           r2,  r2,  #16 | ||||
|         beq             1f | ||||
|  | ||||
| @@ -176,6 +177,7 @@ function idct_col4_neon | ||||
|  | ||||
| 1:      orrs            r6,  r6,  r7 | ||||
|         ldrd            r4,  [r2, #16] | ||||
|         it              eq | ||||
|         addeq           r2,  r2,  #16 | ||||
|         beq             2f | ||||
|  | ||||
| @@ -187,6 +189,7 @@ function idct_col4_neon | ||||
|  | ||||
| 2:      orrs            r4,  r4,  r5 | ||||
|         ldrd            r4,  [r2, #16] | ||||
|         it              eq | ||||
|         addeq           r2,  r2,  #16 | ||||
|         beq             3f | ||||
|  | ||||
| @@ -199,6 +202,7 @@ function idct_col4_neon | ||||
|         vadd.i32        q13, q13, q8 | ||||
|  | ||||
| 3:      orrs            r4,  r4,  r5 | ||||
|         it              eq | ||||
|         addeq           r2,  r2,  #16 | ||||
|         beq             4f | ||||
|  | ||||
|   | ||||
| @@ -100,9 +100,11 @@ NOVFP   vldr            s0,  [sp, #12*4]        @ scale | ||||
|         vst1.32         {q9},     [r2,:128] | ||||
|  | ||||
|         subs            r1,  r1,  #1 | ||||
|         it              eq | ||||
|         popeq           {r4-r11,pc} | ||||
|  | ||||
|         cmp             r4,  #0 | ||||
|         itt             eq | ||||
|         subeq           r8,  r8,  #512*4 | ||||
|         subeq           r9,  r9,  #512*4 | ||||
|         sub             r5,  r5,  #512*4 | ||||
|   | ||||
| @@ -21,6 +21,14 @@ | ||||
| #ifndef AVCODEC_ARM_VP56_ARITH_H | ||||
| #define AVCODEC_ARM_VP56_ARITH_H | ||||
|  | ||||
| #if CONFIG_THUMB | ||||
| #   define A(x) | ||||
| #   define T(x) x | ||||
| #else | ||||
| #   define A(x) x | ||||
| #   define T(x) | ||||
| #endif | ||||
|  | ||||
| #if HAVE_ARMV6 && HAVE_INLINE_ASM | ||||
|  | ||||
| #define vp56_rac_get_prob vp56_rac_get_prob_armv6 | ||||
| @@ -32,15 +40,21 @@ static inline int vp56_rac_get_prob_armv6(VP56RangeCoder *c, int pr) | ||||
|     unsigned bit; | ||||
|  | ||||
|     __asm__ ("adds    %3,  %3,  %0           \n" | ||||
|              "itt     cs                     \n" | ||||
|              "cmpcs   %7,  %4                \n" | ||||
|              "ldrcsh  %2,  [%4], #2          \n" | ||||
|            A("ldrcsh  %2,  [%4], #2          \n") | ||||
|            T("ldrhcs  %2,  [%4], #2          \n") | ||||
|              "rsb     %0,  %6,  #256         \n" | ||||
|              "smlabb  %0,  %5,  %6,  %0      \n" | ||||
|            T("itttt   cs                     \n") | ||||
|              "rev16cs %2,  %2                \n" | ||||
|              "orrcs   %1,  %1,  %2,  lsl %3  \n" | ||||
|            T("lslcs   %2,  %2,  %3           \n") | ||||
|            T("orrcs   %1,  %1,  %2           \n") | ||||
|            A("orrcs   %1,  %1,  %2,  lsl %3  \n") | ||||
|              "subcs   %3,  %3,  #16          \n" | ||||
|              "lsr     %0,  %0,  #8           \n" | ||||
|              "cmp     %1,  %0,  lsl #16      \n" | ||||
|              "ittte   ge                     \n" | ||||
|              "subge   %1,  %1,  %0,  lsl #16 \n" | ||||
|              "subge   %0,  %5,  %0           \n" | ||||
|              "movge   %2,  #1                \n" | ||||
| @@ -64,12 +78,17 @@ static inline int vp56_rac_get_prob_branchy_armv6(VP56RangeCoder *c, int pr) | ||||
|     unsigned tmp; | ||||
|  | ||||
|     __asm__ ("adds    %3,  %3,  %0           \n" | ||||
|              "itt     cs                     \n" | ||||
|              "cmpcs   %7,  %4                \n" | ||||
|              "ldrcsh  %2,  [%4], #2          \n" | ||||
|            A("ldrcsh  %2,  [%4], #2          \n") | ||||
|            T("ldrhcs  %2,  [%4], #2          \n") | ||||
|              "rsb     %0,  %6,  #256         \n" | ||||
|              "smlabb  %0,  %5,  %6,  %0      \n" | ||||
|            T("itttt   cs                     \n") | ||||
|              "rev16cs %2,  %2                \n" | ||||
|              "orrcs   %1,  %1,  %2,  lsl %3  \n" | ||||
|            T("lslcs   %2,  %2,  %3           \n") | ||||
|            T("orrcs   %1,  %1,  %2           \n") | ||||
|            A("orrcs   %1,  %1,  %2,  lsl %3  \n") | ||||
|              "subcs   %3,  %3,  #16          \n" | ||||
|              "lsr     %0,  %0,  #8           \n" | ||||
|              "lsl     %2,  %0,  #16          \n" | ||||
|   | ||||
| @@ -25,13 +25,18 @@ | ||||
|         lsl             \cw, \cw, \t0 | ||||
|         lsl             \t0, \h,  \t0 | ||||
|         rsb             \h,  \pr, #256 | ||||
|         it              cs | ||||
|         ldrhcs          \t1, [\buf], #2 | ||||
|         smlabb          \h,  \t0, \pr, \h | ||||
| T       itttt           cs | ||||
|         rev16cs         \t1, \t1 | ||||
|         orrcs           \cw, \cw, \t1, lsl \bs | ||||
| A       orrcs           \cw, \cw, \t1, lsl \bs | ||||
| T       lslcs           \t1, \t1, \bs | ||||
| T       orrcs           \cw, \cw, \t1 | ||||
|         subcs           \bs, \bs, #16 | ||||
|         lsr             \h,  \h,  #8 | ||||
|         cmp             \cw, \h,  lsl #16 | ||||
|         itt             ge | ||||
|         subge           \cw, \cw, \h,  lsl #16 | ||||
|         subge           \h,  \t0, \h | ||||
| .endm | ||||
| @@ -40,14 +45,20 @@ | ||||
|         adds            \bs, \bs, \t0 | ||||
|         lsl             \cw, \cw, \t0 | ||||
|         lsl             \t0, \h,  \t0 | ||||
|         it              cs | ||||
|         ldrhcs          \t1, [\buf], #2 | ||||
|         mov             \h,  #128 | ||||
|         it              cs | ||||
|         rev16cs         \t1, \t1 | ||||
|         add             \h,  \h,  \t0, lsl #7 | ||||
|         orrcs           \cw, \cw, \t1, lsl \bs | ||||
| A       orrcs           \cw, \cw, \t1, lsl \bs | ||||
| T       ittt            cs | ||||
| T       lslcs           \t1, \t1, \bs | ||||
| T       orrcs           \cw, \cw, \t1 | ||||
|         subcs           \bs, \bs, #16 | ||||
|         lsr             \h,  \h,  #8 | ||||
|         cmp             \cw, \h,  lsl #16 | ||||
|         itt             ge | ||||
|         subge           \cw, \cw, \h,  lsl #16 | ||||
|         subge           \h,  \t0, \h | ||||
| .endm | ||||
| @@ -59,6 +70,7 @@ function ff_decode_block_coeffs_armv6, export=1 | ||||
|         cmp             r3,  #0 | ||||
|         ldr             r11, [r5] | ||||
|         ldm             r0,  {r5-r7}                    @ high, bits, buf | ||||
|         it              ne | ||||
|         pkhtbne         r11, r11, r11, asr #16 | ||||
|         ldr             r8,  [r0, #16]                  @ code_word | ||||
| 0: | ||||
| @@ -80,19 +92,26 @@ function ff_decode_block_coeffs_armv6, export=1 | ||||
|         adds            r6,  r6,  r9 | ||||
|         add             r4,  r4,  #11 | ||||
|         lsl             r8,  r8,  r9 | ||||
|         it              cs | ||||
|         ldrhcs          r10, [r7], #2 | ||||
|         lsl             r9,  r5,  r9 | ||||
|         mov             r5,  #128 | ||||
|         it              cs | ||||
|         rev16cs         r10, r10 | ||||
|         add             r5,  r5,  r9,  lsl #7 | ||||
|         orrcs           r8,  r8,  r10, lsl r6 | ||||
| T       ittt            cs | ||||
| T       lslcs           r10, r10, r6 | ||||
| T       orrcs           r8,  r8,  r10 | ||||
| A       orrcs           r8,  r8,  r10, lsl r6 | ||||
|         subcs           r6,  r6,  #16 | ||||
|         lsr             r5,  r5,  #8 | ||||
|         cmp             r8,  r5,  lsl #16 | ||||
|         movrel          r10, zigzag_scan-1 | ||||
|         itt             ge | ||||
|         subge           r8,  r8,  r5,  lsl #16 | ||||
|         subge           r5,  r9,  r5 | ||||
|         ldrb            r10, [r10, r3] | ||||
|         it              ge | ||||
|         rsbge           r12, r12, #0 | ||||
|         cmp             r3,  #16 | ||||
|         strh            r12, [r1, r10] | ||||
| @@ -108,6 +127,7 @@ function ff_decode_block_coeffs_armv6, export=1 | ||||
|         ldr             r0,  [sp] | ||||
|         ldr             r9,  [r0, #12] | ||||
|         cmp             r7,  r9 | ||||
|         it              hi | ||||
|         movhi           r7,  r9 | ||||
|         stm             r0,  {r5-r7}                    @ high, bits, buf | ||||
|         str             r8,  [r0, #16]                  @ code_word | ||||
| @@ -131,11 +151,13 @@ function ff_decode_block_coeffs_armv6, export=1 | ||||
|         mov             r12, #2 | ||||
|         ldrb            r0,  [r4, #4] | ||||
|         rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10 | ||||
|         it              ge | ||||
|         addge           r12, #1 | ||||
|         ldrb            r9,  [lr, r5] | ||||
|         blt             4f | ||||
|         ldrb            r0,  [r4, #5] | ||||
|         rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10 | ||||
|         it              ge | ||||
|         addge           r12, #1 | ||||
|         ldrb            r9,  [lr, r5] | ||||
|         b               4f | ||||
| @@ -153,6 +175,7 @@ function ff_decode_block_coeffs_armv6, export=1 | ||||
|         mov             r12, #5 | ||||
|         mov             r0,  #159 | ||||
|         rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10 | ||||
|         it              ge | ||||
|         addge           r12, r12, #1 | ||||
|         ldrb            r9,  [lr, r5] | ||||
|         b               4f | ||||
| @@ -160,23 +183,28 @@ function ff_decode_block_coeffs_armv6, export=1 | ||||
|         mov             r12, #7 | ||||
|         mov             r0,  #165 | ||||
|         rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10 | ||||
|         it              ge | ||||
|         addge           r12, r12, #2 | ||||
|         ldrb            r9,  [lr, r5] | ||||
|         mov             r0,  #145 | ||||
|         rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10 | ||||
|         it              ge | ||||
|         addge           r12, r12, #1 | ||||
|         ldrb            r9,  [lr, r5] | ||||
|         b               4f | ||||
| 3: | ||||
|         ldrb            r0,  [r4, #8] | ||||
|         rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10 | ||||
|         it              ge | ||||
|         addge           r4,  r4,  #1 | ||||
|         ldrb            r9,  [lr, r5] | ||||
|         ite             ge | ||||
|         movge           r12, #2 | ||||
|         movlt           r12, #0 | ||||
|         ldrb            r0,  [r4, #9] | ||||
|         rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10 | ||||
|         mov             r9,  #8 | ||||
|         it              ge | ||||
|         addge           r12, r12, #1 | ||||
|         movrel          r4,  X(ff_vp8_dct_cat_prob) | ||||
|         lsl             r9,  r9,  r12 | ||||
| @@ -189,6 +217,7 @@ function ff_decode_block_coeffs_armv6, export=1 | ||||
|         lsl             r1,  r1,  #1 | ||||
|         rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10 | ||||
|         ldrb            r0,  [r4], #1 | ||||
|         it              ge | ||||
|         addge           r1,  r1,  #1 | ||||
|         cmp             r0,  #0 | ||||
|         bne             1b | ||||
| @@ -200,6 +229,7 @@ function ff_decode_block_coeffs_armv6, export=1 | ||||
|         add             r4,  r2,  r4 | ||||
|         add             r4,  r4,  #22 | ||||
|         rac_get_128     r5,  r6,  r7,  r8,  r9,  r10 | ||||
|         it              ge | ||||
|         rsbge           r12, r12, #0 | ||||
|         smulbb          r12, r12, r11 | ||||
|         movrel          r9,  zigzag_scan-1 | ||||
|   | ||||
| @@ -746,14 +746,14 @@ function ff_put_vp8_pixels4_neon, export=1 | ||||
|         push            {r4-r6,lr} | ||||
| 1: | ||||
|         subs            r12, r12, #4 | ||||
|         ldr             r4,       [r2], r3 | ||||
|         ldr             r5,       [r2], r3 | ||||
|         ldr             r6,       [r2], r3 | ||||
|         ldr             lr,       [r2], r3 | ||||
|         str             r4,       [r0], r1 | ||||
|         str             r5,       [r0], r1 | ||||
|         str             r6,       [r0], r1 | ||||
|         str             lr,       [r0], r1 | ||||
|         ldr_post        r4,  r2,  r3 | ||||
|         ldr_post        r5,  r2,  r3 | ||||
|         ldr_post        r6,  r2,  r3 | ||||
|         ldr_post        lr,  r2,  r3 | ||||
|         str_post        r4,  r0,  r1 | ||||
|         str_post        r5,  r0,  r1 | ||||
|         str_post        r6,  r0,  r1 | ||||
|         str_post        lr,  r0,  r1 | ||||
|         bgt             1b | ||||
|         pop             {r4-r6,pc} | ||||
| endfunc | ||||
|   | ||||
| @@ -36,6 +36,7 @@ static av_always_inline av_const int FASTDIV(int a, int b) | ||||
|     int r; | ||||
|     __asm__ ("cmp     %2, #2               \n\t" | ||||
|              "ldr     %0, [%3, %2, lsl #2] \n\t" | ||||
|              "ite     le                   \n\t" | ||||
|              "lsrle   %0, %1, #1           \n\t" | ||||
|              "smmulgt %0, %0, %1           \n\t" | ||||
|              : "=&r"(r) : "r"(a), "r"(b), "r"(ff_inverse) : "cc"); | ||||
| @@ -101,6 +102,7 @@ static av_always_inline av_const int32_t av_clipl_int32_arm(int64_t a) | ||||
| { | ||||
|     int x, y; | ||||
|     __asm__ ("adds   %1, %R2, %Q2, lsr #31  \n\t" | ||||
|              "itet   ne                     \n\t" | ||||
|              "mvnne  %1, #1<<31             \n\t" | ||||
|              "moveq  %0, %Q2                \n\t" | ||||
|              "eorne  %0, %1,  %R2, asr #31  \n\t" | ||||
|   | ||||
		Reference in New Issue
	
	Block a user