From 62634158b7cd39ad1e330a87153a97bf3dc6f8de Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Fri, 29 Jun 2012 13:35:08 +0100 Subject: [PATCH 1/5] ARM: generate position independent code to access data symbols This creates proper position independent code when accessing data symbols if CONFIG_PIC is set. References to external symbols should now use the movrelx macro. Some additional code changes are required since this macro may need a register to hold the GOT pointer. Signed-off-by: Mans Rullgard --- libavcodec/arm/ac3dsp_armv6.S | 4 +- libavcodec/arm/fft_fixed_neon.S | 2 +- libavcodec/arm/fft_neon.S | 4 +- libavcodec/arm/sbrdsp_neon.S | 4 +- libavcodec/arm/vp3dsp_neon.S | 3 +- libavcodec/arm/vp8_armv6.S | 4 +- libavutil/arm/asm.S | 73 ++++++++++++++++++++++++++++++++- 7 files changed, 82 insertions(+), 12 deletions(-) diff --git a/libavcodec/arm/ac3dsp_armv6.S b/libavcodec/arm/ac3dsp_armv6.S index 7e2f40edf0..f6f297a532 100644 --- a/libavcodec/arm/ac3dsp_armv6.S +++ b/libavcodec/arm/ac3dsp_armv6.S @@ -26,8 +26,8 @@ function ff_ac3_bit_alloc_calc_bap_armv6, export=1 beq 4f push {r4-r11,lr} add r5, sp, #40 - movrel r4, X(ff_ac3_bin_to_band_tab) - movrel lr, X(ff_ac3_band_start_tab) + movrelx r4, X(ff_ac3_bin_to_band_tab), r11 + movrelx lr, X(ff_ac3_band_start_tab) ldm r5, {r5-r7} ldrb r4, [r4, r2] add r1, r1, r2, lsl #1 @ psd + start diff --git a/libavcodec/arm/fft_fixed_neon.S b/libavcodec/arm/fft_fixed_neon.S index 4d891ba1a4..faddc0095a 100644 --- a/libavcodec/arm/fft_fixed_neon.S +++ b/libavcodec/arm/fft_fixed_neon.S @@ -214,7 +214,7 @@ function fft\n\()_neon bl fft\n4\()_neon mov r0, r4 pop {r4, lr} - movrel r1, X(ff_cos_\n\()_fixed) + movrelx r1, X(ff_cos_\n\()_fixed) mov r2, #\n4/2 b fft_pass_neon endfunc diff --git a/libavcodec/arm/fft_neon.S b/libavcodec/arm/fft_neon.S index aa06e6daa7..c4d89189ea 100644 --- a/libavcodec/arm/fft_neon.S +++ b/libavcodec/arm/fft_neon.S @@ -143,7 +143,7 @@ function fft16_neon vswp d29, d30 @ q14{r12,i12,i14,r15} q15{r13,i13,i15,r14} vadd.f32 q0, q12, q13 @ {t1,t2,t5,t6} vadd.f32 q1, q14, q15 @ {t1a,t2a,t5a,t6a} - movrel r2, X(ff_cos_16) + movrelx r2, X(ff_cos_16) vsub.f32 q13, q12, q13 @ {t3,t4,t7,t8} vrev64.32 d1, d1 vsub.f32 q15, q14, q15 @ {t3a,t4a,t7a,t8a} @@ -290,7 +290,7 @@ function fft\n\()_neon bl fft\n4\()_neon mov r0, r4 pop {r4, lr} - movrel r1, X(ff_cos_\n) + movrelx r1, X(ff_cos_\n) mov r2, #\n4/2 b fft_pass_neon endfunc diff --git a/libavcodec/arm/sbrdsp_neon.S b/libavcodec/arm/sbrdsp_neon.S index 4b681bfe6a..610397f9e2 100644 --- a/libavcodec/arm/sbrdsp_neon.S +++ b/libavcodec/arm/sbrdsp_neon.S @@ -307,8 +307,8 @@ function ff_sbr_hf_apply_noise_0_neon, export=1 vmov.i32 d3, #0 .Lhf_apply_noise_0: push {r4,lr} + movrelx r4, X(ff_sbr_noise_table) ldr r12, [sp, #12] - movrel r4, X(ff_sbr_noise_table) add r3, r3, #1 bfc r3, #9, #23 sub r12, r12, #1 @@ -355,8 +355,8 @@ function ff_sbr_hf_apply_noise_1_neon, export=1 eor lr, r12, #1<<31 vmov d3, r12, lr .Lhf_apply_noise_1: + movrelx r4, X(ff_sbr_noise_table) ldr r12, [sp, #12] - movrel r4, X(ff_sbr_noise_table) add r3, r3, #1 bfc r3, #9, #23 sub r12, r12, #1 diff --git a/libavcodec/arm/vp3dsp_neon.S b/libavcodec/arm/vp3dsp_neon.S index 8d22d00038..2a9b25f34e 100644 --- a/libavcodec/arm/vp3dsp_neon.S +++ b/libavcodec/arm/vp3dsp_neon.S @@ -116,9 +116,8 @@ function vp3_idct_start_neon vadd.s16 q1, q8, q12 vsub.s16 q8, q8, q12 vld1.64 {d28-d31}, [r2,:128]! -endfunc -function vp3_idct_core_neon +vp3_idct_core_neon: vmull.s16 q2, d18, xC1S7 // (ip[1] * C1) << 16 vmull.s16 q3, d19, xC1S7 vmull.s16 q4, d2, xC4S4 // ((ip[0] + ip[4]) * C4) << 16 diff --git a/libavcodec/arm/vp8_armv6.S b/libavcodec/arm/vp8_armv6.S index 1fa6d15617..1b668bcd2a 100644 --- a/libavcodec/arm/vp8_armv6.S +++ b/libavcodec/arm/vp8_armv6.S @@ -65,7 +65,7 @@ T orrcs \cw, \cw, \t1 function ff_decode_block_coeffs_armv6, export=1 push {r0,r1,r4-r11,lr} - movrel lr, X(ff_vp56_norm_shift) + movrelx lr, X(ff_vp56_norm_shift) ldrd r4, r5, [sp, #44] @ token_prob, qmul cmp r3, #0 ldr r11, [r5] @@ -206,7 +206,7 @@ A orrcs r8, r8, r10, lsl r6 mov r9, #8 it ge addge r12, r12, #1 - movrel r4, X(ff_vp8_dct_cat_prob) + movrelx r4, X(ff_vp8_dct_cat_prob), r1 lsl r9, r9, r12 ldr r4, [r4, r12, lsl #2] add r12, r9, #3 diff --git a/libavutil/arm/asm.S b/libavutil/arm/asm.S index 6038a63a27..15081802c6 100644 --- a/libavutil/arm/asm.S +++ b/libavutil/arm/asm.S @@ -62,7 +62,14 @@ ELF .eabi_attribute 25, \val .endm .macro function name, export=0 + .set .Lpic_idx, 0 + .set .Lpic_gp, 0 .macro endfunc + .if .Lpic_idx + .altmacro + put_pic %(.Lpic_idx - 1) + .noaltmacro + .endif ELF .size \name, . - \name .endfunc .purgem endfunc @@ -106,8 +113,44 @@ ELF .size \name, . - \name #endif .endm +.macro put_pic num + put_pic_\num +.endm + +.macro do_def_pic num, val, label + .macro put_pic_\num + .if \num + .altmacro + put_pic %(\num - 1) + .noaltmacro + .endif +\label: .word \val + .purgem put_pic_\num + .endm +.endm + +.macro def_pic val, label + .altmacro + do_def_pic %.Lpic_idx, \val, \label + .noaltmacro + .set .Lpic_idx, .Lpic_idx + 1 +.endm + +.macro ldpic rd, val, indir=0 + ldr \rd, .Lpicoff\@ +.Lpic\@: + .if \indir + ldr \rd, [pc, \rd] + .else + add \rd, pc, \rd + .endif + def_pic \val - (.Lpic\@ + (8 >> CONFIG_THUMB)), .Lpicoff\@ +.endm + .macro movrel rd, val -#if HAVE_ARMV6T2 && !CONFIG_PIC && !defined(__APPLE__) +#if CONFIG_PIC + ldpic \rd, \val +#elif HAVE_ARMV6T2 && !defined(__APPLE__) movw \rd, #:lower16:\val movt \rd, #:upper16:\val #else @@ -115,6 +158,34 @@ ELF .size \name, . - \name #endif .endm +.macro movrelx rd, val, gp +#if CONFIG_PIC && defined(__ELF__) + .ifnb \gp + .if .Lpic_gp + .unreq gp + .endif + gp .req \gp + ldpic gp, _GLOBAL_OFFSET_TABLE_ + .elseif !.Lpic_gp + gp .req r12 + ldpic gp, _GLOBAL_OFFSET_TABLE_ + .endif + .set .Lpic_gp, 1 + ldr \rd, .Lpicoff\@ + ldr \rd, [gp, \rd] + def_pic \val(GOT), .Lpicoff\@ +#elif CONFIG_PIC && defined(__APPLE__) + ldpic \rd, .Lpic\@, indir=1 + .non_lazy_symbol_pointer +.Lpic\@: + .indirect_symbol \val + .word 0 + .text +#else + movrel \rd, \val +#endif +.endm + .macro ldr_pre rt, rn, rm:vararg A ldr \rt, [\rn, \rm]! T add \rn, \rn, \rm From 889c1ec4cc064313092bc43a8a3a05aac9799e61 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Sun, 24 Jun 2012 12:29:28 +0100 Subject: [PATCH 2/5] x86: cpu: clean up check for cpuid instruction support This adds macros for accessing the EFLAGS register and uses these instead of coding the entire check in inline asm. Signed-off-by: Mans Rullgard --- libavutil/x86/cpu.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index f61add0ded..5d77b0c2df 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -37,6 +37,16 @@ #define xgetbv(index, eax, edx) \ __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c" (index)) +#define get_eflags(x) \ + __asm__ volatile ("pushfl \n" \ + "pop %0 \n" \ + : "=r"(x)) + +#define set_eflags(x) \ + __asm__ volatile ("push %0 \n" \ + "popfl \n" \ + :: "r"(x)) + /* Function to test if multimedia instructions are supported... */ int ff_get_cpu_flags_x86(void) { @@ -48,26 +58,12 @@ int ff_get_cpu_flags_x86(void) #if ARCH_X86_32 x86_reg a, c; - __asm__ volatile ( - /* See if CPUID instruction is supported ... */ - /* ... Get copies of EFLAGS into eax and ecx */ - "pushfl\n\t" - "pop %0\n\t" - "mov %0, %1\n\t" - /* ... Toggle the ID bit in one copy and store */ - /* to the EFLAGS reg */ - "xor $0x200000, %0\n\t" - "push %0\n\t" - "popfl\n\t" - - /* ... Get the (hopefully modified) EFLAGS */ - "pushfl\n\t" - "pop %0\n\t" - : "=a" (a), "=c" (c) - : - : "cc" - ); + /* Check if CPUID is supported by attempting to toggle the ID bit in + * the EFLAGS register. */ + get_eflags(a); + set_eflags(a ^ 0x200000); + get_eflags(c); if (a == c) return 0; /* CPUID not supported */ From 710bd8a33e9c183ded4a61d742d404d8d5d838ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Sat, 30 Jun 2012 21:30:28 +0300 Subject: [PATCH 3/5] wma: Lower the maximum number of channels to 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ff_wma_init is used only by wmadec and wmaenc, and neither of them can handle more than 2 channels. This fixes crashes with invalid files. Based on patch by Piotr Bandurski and Michael Niedermayer. Signed-off-by: Martin Storsjö --- libavcodec/wma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/wma.c b/libavcodec/wma.c index 007653ffec..b61228b94b 100644 --- a/libavcodec/wma.c +++ b/libavcodec/wma.c @@ -78,7 +78,7 @@ int ff_wma_init(AVCodecContext *avctx, int flags2) int coef_vlc_table; if ( avctx->sample_rate <= 0 || avctx->sample_rate > 50000 - || avctx->channels <= 0 || avctx->channels > 8 + || avctx->channels <= 0 || avctx->channels > 2 || avctx->bit_rate <= 0) return -1; From a1245d5ca1bed154a3bf38843b63018ae3544115 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Fri, 22 Jun 2012 13:05:21 +0300 Subject: [PATCH 4/5] mathematics: Don't use division by zero in NAN/INFINITY macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some compilers, MSVC among them, don't recognize the divisions by zero as meaning infinity/nan. These macros should, according to the standard, expand to constant expressions, but this shouldn't matter for our usage. Signed-off-by: Martin Storsjö --- libavutil/mathematics.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libavutil/mathematics.h b/libavutil/mathematics.h index a734b75c02..043dd0fafe 100644 --- a/libavutil/mathematics.h +++ b/libavutil/mathematics.h @@ -25,6 +25,7 @@ #include #include "attributes.h" #include "rational.h" +#include "intfloat.h" #ifndef M_LOG2_10 #define M_LOG2_10 3.32192809488736234787 /* log_2 10 */ @@ -33,10 +34,10 @@ #define M_PHI 1.61803398874989484820 /* phi / golden ratio */ #endif #ifndef NAN -#define NAN (0.0/0.0) +#define NAN av_int2float(0x7fc00000) #endif #ifndef INFINITY -#define INFINITY (1.0/0.0) +#define INFINITY av_int2float(0x7f800000) #endif /** From 3893feeccdf754057fc7c7ac711ae876733f2f33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Sat, 30 Jun 2012 18:49:33 +0300 Subject: [PATCH 5/5] opt/eval: Include mathematics.h for NAN/INFINITY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These files use NAN/INFINITY but didn't include mathematics.h to get the fallback definitions if the system lacks the macros. Signed-off-by: Martin Storsjö --- libavutil/eval.c | 1 + libavutil/opt.c | 1 + 2 files changed, 2 insertions(+) diff --git a/libavutil/eval.c b/libavutil/eval.c index 4d8ebf4f6e..36b5ce5dda 100644 --- a/libavutil/eval.c +++ b/libavutil/eval.c @@ -29,6 +29,7 @@ #include "avutil.h" #include "eval.h" #include "log.h" +#include "mathematics.h" typedef struct Parser { const AVClass *class; diff --git a/libavutil/opt.c b/libavutil/opt.c index 9a3c0d47c7..b1e50f7a59 100644 --- a/libavutil/opt.c +++ b/libavutil/opt.c @@ -31,6 +31,7 @@ #include "eval.h" #include "dict.h" #include "log.h" +#include "mathematics.h" #if FF_API_FIND_OPT //FIXME order them and do a bin search