aboutsummaryrefslogtreecommitdiff
path: root/lib/ffmpeg
diff options
context:
space:
mode:
authordavilla <davilla@4pi.com>2011-10-30 14:13:21 -0400
committerdavilla <davilla@4pi.com>2011-10-30 14:13:21 -0400
commiteb61ba8d4962f5348d807444aefeb2c8533ff42b (patch)
treed6349aede21bc936d66e2f8836541fb8398add14 /lib/ffmpeg
parentfbcda23f5bbb2cef190498641b613252c8d93b21 (diff)
[ios] fixed, ffmpeg used .text symbol relocation in neon asm code, this is not permitted in iOS5 due to ASLR and will panic the darwin kernel when ffmpeg is dyloaded
Diffstat (limited to 'lib/ffmpeg')
-rw-r--r--lib/ffmpeg/libavcodec/arm/fft_neon.S73
-rw-r--r--lib/ffmpeg/libavcodec/arm/h264idct_neon.S28
-rw-r--r--lib/ffmpeg/libavcodec/arm/h264pred_neon.S5
-rw-r--r--lib/ffmpeg/libavcodec/arm/simple_idct_neon.S3
-rw-r--r--lib/ffmpeg/libavcodec/arm/vp3dsp_neon.S13
5 files changed, 83 insertions, 39 deletions
diff --git a/lib/ffmpeg/libavcodec/arm/fft_neon.S b/lib/ffmpeg/libavcodec/arm/fft_neon.S
index 1db7abd146..6390065fc4 100644
--- a/lib/ffmpeg/libavcodec/arm/fft_neon.S
+++ b/lib/ffmpeg/libavcodec/arm/fft_neon.S
@@ -101,8 +101,12 @@ function fft8_neon
bx lr
endfunc
+ .align 4
+pmmp: .float +1.0, -1.0, -1.0, +1.0
+mppm: .float -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2
+
function fft16_neon
- movrel r1, mppm
+ adr r1, mppm
vld1.32 {d16-d19}, [r0,:128]! @ q8{r0,i0,r1,i1} q9{r2,i2,r3,i3}
pld [r0, #32]
vld1.32 {d2-d3}, [r1,:128]
@@ -144,12 +148,16 @@ function fft16_neon
vswp d29, d30 @ q14{r12,i12,i14,r15} q15{r13,i13,i15,r14}
vadd.f32 q0, q12, q13 @ {t1,t2,t5,t6}
vadd.f32 q1, q14, q15 @ {t1a,t2a,t5a,t6a}
- movrel r2, X(ff_cos_16)
+ ldr r2, _neon_label
+ ldr r3, L$diff1
+ add r2, r3
+local_label1:
+ ldr r2, [pc, r2]
vsub.f32 q13, q12, q13 @ {t3,t4,t7,t8}
vrev64.32 d1, d1
vsub.f32 q15, q14, q15 @ {t3a,t4a,t7a,t8a}
vrev64.32 d3, d3
- movrel r3, pmmp
+ adr r3, pmmp
vswp d1, d26 @ q0{t1,t2,t3,t4} q13{t6,t5,t7,t8}
vswp d3, d30 @ q1{t1a,t2a,t3a,t4a} q15{t6a,t5a,t7a,t8a}
vadd.f32 q12, q0, q13 @ {r8,i8,r9,i9}
@@ -214,7 +222,7 @@ function fft_pass_neon
add r2, r2, r0 @ &z[o2]
add r3, r3, r0 @ &z[o3]
vld1.32 {d20-d21},[r2,:128] @ {z[o2],z[o2+1]}
- movrel r12, pmmp
+ adr r12, pmmp
vld1.32 {d22-d23},[r3,:128] @ {z[o3],z[o3+1]}
add r5, r5, r1 @ wim
vld1.32 {d6-d7}, [r12,:128] @ pmmp
@@ -279,6 +287,11 @@ function fft_pass_neon
pop {r4-r6,pc}
endfunc
+.set L$offs16, 0
+.macro setTabOffs n, n2
+.set L$offs\n, L$offs\n2 + 4
+.endm
+
.macro def_fft n, n2, n4
.align 6
function fft\n\()_neon
@@ -291,10 +304,15 @@ function fft\n\()_neon
bl fft\n4\()_neon
mov r0, r4
pop {r4, lr}
- movrel r1, X(ff_cos_\n)
+ ldr r1, _neon_label
+ add r1, #L$diff\n
+local_label\n:
+ ldr r1, [pc, r1]
mov r2, #\n4/2
b fft_pass_neon
endfunc
+setTabOffs \n, \n2
+.set L$diff\n, _neon_label - local_label\n + L$offs\n - 8
.endm
def_fft 32, 16, 8
@@ -310,10 +328,14 @@ endfunc
def_fft 32768, 16384, 8192
def_fft 65536, 32768, 16384
+.set L$diffTab, fft_tab_neon_offs - local_label_tab - 8
function ff_fft_calc_neon, export=1
ldr r2, [r0]
sub r2, r2, #2
- movrel r3, fft_tab_neon
+ ldr r3, fft_tab_neon_offs
+ add r3, #L$diffTab
+local_label_tab:
+ add r3, pc
ldr r3, [r3, r2, lsl #2]
mov r0, r1
bx r3
@@ -349,9 +371,22 @@ function ff_fft_permute_neon, export=1
pop {r4,pc}
endfunc
- .section .rodata
+
+.global _neon_label
+_neon_label:
+.word _neon_cos_tab - .
+
+L$diff1:
+.word _neon_label - local_label1 - 8
+
+fft_tab_neon_offs:
+.word _fft_tab_neon - .
+
+
+.section .rodata
+
.align 4
-fft_tab_neon:
+_fft_tab_neon:
.word fft4_neon
.word fft8_neon
.word fft16_neon
@@ -367,8 +402,20 @@ fft_tab_neon:
.word fft16384_neon
.word fft32768_neon
.word fft65536_neon
-ELF .size fft_tab_neon, . - fft_tab_neon
-
- .align 4
-pmmp: .float +1.0, -1.0, -1.0, +1.0
-mppm: .float -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2
+ELF .size _fft_tab_neon, . - _fft_tab_neon
+
+ .align 4
+_neon_cos_tab:
+ .word X(ff_cos_16)
+ .word X(ff_cos_32)
+ .word X(ff_cos_64)
+ .word X(ff_cos_128)
+ .word X(ff_cos_256)
+ .word X(ff_cos_512)
+ .word X(ff_cos_1024)
+ .word X(ff_cos_2048)
+ .word X(ff_cos_4096)
+ .word X(ff_cos_8192)
+ .word X(ff_cos_16384)
+ .word X(ff_cos_32768)
+ .word X(ff_cos_65536)
diff --git a/lib/ffmpeg/libavcodec/arm/h264idct_neon.S b/lib/ffmpeg/libavcodec/arm/h264idct_neon.S
index 6b6a669f35..8111975160 100644
--- a/lib/ffmpeg/libavcodec/arm/h264idct_neon.S
+++ b/lib/ffmpeg/libavcodec/arm/h264idct_neon.S
@@ -97,7 +97,7 @@ function ff_h264_idct_add16_neon, export=1
mov r1, r2
mov r2, r3
ldr r6, [sp, #24]
- movrel r7, scan8
+ adr r7, scan8
mov ip, #16
1: ldrb r8, [r7], #1
ldr r0, [r5], #4
@@ -117,6 +117,16 @@ function ff_h264_idct_add16_neon, export=1
pop {r4-r8,pc}
endfunc
+ .align
+scan8: .byte 4+1*8, 5+1*8, 4+2*8, 5+2*8
+ .byte 6+1*8, 7+1*8, 6+2*8, 7+2*8
+ .byte 4+3*8, 5+3*8, 4+4*8, 5+4*8
+ .byte 6+3*8, 7+3*8, 6+4*8, 7+4*8
+ .byte 1+1*8, 2+1*8
+ .byte 1+2*8, 2+2*8
+ .byte 1+4*8, 2+4*8
+ .byte 1+5*8, 2+5*8
+
function ff_h264_idct_add16intra_neon, export=1
push {r4-r8,lr}
mov r4, r0
@@ -124,7 +134,7 @@ function ff_h264_idct_add16intra_neon, export=1
mov r1, r2
mov r2, r3
ldr r6, [sp, #24]
- movrel r7, scan8
+ adr r7, scan8
mov ip, #16
1: ldrb r8, [r7], #1
ldr r0, [r5], #4
@@ -149,7 +159,7 @@ function ff_h264_idct_add8_neon, export=1
add r1, r2, #16*32
mov r2, r3
ldr r6, [sp, #32]
- movrel r7, scan8+16
+ adr r7, scan8+16
mov ip, #7
1: ldrb r8, [r7], #1
ldr r0, [r5], #4
@@ -353,7 +363,7 @@ function ff_h264_idct8_add4_neon, export=1
mov r1, r2
mov r2, r3
ldr r6, [sp, #24]
- movrel r7, scan8
+ adr r7, scan8
mov r12, #16
1: ldrb r8, [r7], #4
ldr r0, [r5], #16
@@ -372,13 +382,3 @@ function ff_h264_idct8_add4_neon, export=1
bne 1b
pop {r4-r8,pc}
endfunc
-
- .section .rodata
-scan8: .byte 4+1*8, 5+1*8, 4+2*8, 5+2*8
- .byte 6+1*8, 7+1*8, 6+2*8, 7+2*8
- .byte 4+3*8, 5+3*8, 4+4*8, 5+4*8
- .byte 6+3*8, 7+3*8, 6+4*8, 7+4*8
- .byte 1+1*8, 2+1*8
- .byte 1+2*8, 2+2*8
- .byte 1+4*8, 2+4*8
- .byte 1+5*8, 2+5*8
diff --git a/lib/ffmpeg/libavcodec/arm/h264pred_neon.S b/lib/ffmpeg/libavcodec/arm/h264pred_neon.S
index 63c96ee725..357c10a3fa 100644
--- a/lib/ffmpeg/libavcodec/arm/h264pred_neon.S
+++ b/lib/ffmpeg/libavcodec/arm/h264pred_neon.S
@@ -123,7 +123,7 @@ function ff_pred16x16_plane_neon, export=1
vaddl.u8 q8, d2, d3
vsubl.u8 q2, d2, d0
vsubl.u8 q3, d3, d1
- movrel r3, p16weight
+ adr r3, p16weight
vld1.8 {q0}, [r3,:128]
vmul.s16 q2, q2, q0
vmul.s16 q3, q3, q0
@@ -166,7 +166,6 @@ function ff_pred16x16_plane_neon, export=1
bx lr
endfunc
- .section .rodata
.align 4
p16weight:
.short 1,2,3,4,5,6,7,8
@@ -207,7 +206,7 @@ function ff_pred8x8_plane_neon, export=1
vrev32.8 d0, d0
vtrn.32 d2, d3
vsubl.u8 q2, d2, d0
- movrel r3, p16weight
+ adr r3, p16weight
vld1.16 {q0}, [r3,:128]
vmul.s16 d4, d4, d0
vmul.s16 d5, d5, d0
diff --git a/lib/ffmpeg/libavcodec/arm/simple_idct_neon.S b/lib/ffmpeg/libavcodec/arm/simple_idct_neon.S
index 17cde5835a..e61414eda7 100644
--- a/lib/ffmpeg/libavcodec/arm/simple_idct_neon.S
+++ b/lib/ffmpeg/libavcodec/arm/simple_idct_neon.S
@@ -239,7 +239,6 @@ function idct_col4_st8_neon
bx lr
endfunc
- .section .rodata
.align 4
idct_coeff_neon:
.short W1, W2, W3, W4, W5, W6, W7, W4c
@@ -249,7 +248,7 @@ idct_coeff_neon:
pld [\data]
pld [\data, #64]
vpush {d8-d15}
- movrel r3, idct_coeff_neon
+ adr r3, idct_coeff_neon
vld1.64 {d0,d1}, [r3,:128]
.endm
diff --git a/lib/ffmpeg/libavcodec/arm/vp3dsp_neon.S b/lib/ffmpeg/libavcodec/arm/vp3dsp_neon.S
index d97ed3d21d..74bf7ba4dc 100644
--- a/lib/ffmpeg/libavcodec/arm/vp3dsp_neon.S
+++ b/lib/ffmpeg/libavcodec/arm/vp3dsp_neon.S
@@ -20,12 +20,9 @@
#include "asm.S"
-.section .rodata
+.text
.align 4
-vp3_idct_constants:
-.short 64277, 60547, 54491, 46341, 36410, 25080, 12785
-
#define xC1S7 d0[0]
#define xC2S6 d0[1]
#define xC3S5 d0[2]
@@ -34,8 +31,6 @@ vp3_idct_constants:
#define xC6S2 d1[1]
#define xC7S1 d1[2]
-.text
-
.macro vp3_loop_filter
vsubl.u8 q3, d18, d17
vsubl.u8 q2, d16, d19
@@ -109,10 +104,14 @@ function ff_vp3_h_loop_filter_neon, export=1
bx lr
endfunc
+.align 4
+vp3_idct_constants:
+.short 64277, 60547, 54491, 46341, 36410, 25080, 12785
+.align 4
function vp3_idct_start_neon
vpush {d8-d15}
- movrel r3, vp3_idct_constants
+ adr r3, vp3_idct_constants
vld1.64 {d0-d1}, [r3,:128]
vld1.64 {d16-d19}, [r2,:128]!
vld1.64 {d20-d23}, [r2,:128]!