diff options
author | davilla <davilla@4pi.com> | 2011-10-30 14:13:21 -0400 |
---|---|---|
committer | davilla <davilla@4pi.com> | 2011-10-30 14:13:21 -0400 |
commit | eb61ba8d4962f5348d807444aefeb2c8533ff42b (patch) | |
tree | d6349aede21bc936d66e2f8836541fb8398add14 /lib/ffmpeg/libavcodec | |
parent | fbcda23f5bbb2cef190498641b613252c8d93b21 (diff) |
[ios] fixed, ffmpeg used .text symbol relocation in neon asm code, this is not permitted in iOS5 due to ASLR and will panic the darwin kernel when ffmpeg is dyloaded
Diffstat (limited to 'lib/ffmpeg/libavcodec')
-rw-r--r-- | lib/ffmpeg/libavcodec/arm/fft_neon.S | 73 | ||||
-rw-r--r-- | lib/ffmpeg/libavcodec/arm/h264idct_neon.S | 28 | ||||
-rw-r--r-- | lib/ffmpeg/libavcodec/arm/h264pred_neon.S | 5 | ||||
-rw-r--r-- | lib/ffmpeg/libavcodec/arm/simple_idct_neon.S | 3 | ||||
-rw-r--r-- | lib/ffmpeg/libavcodec/arm/vp3dsp_neon.S | 13 |
5 files changed, 83 insertions, 39 deletions
diff --git a/lib/ffmpeg/libavcodec/arm/fft_neon.S b/lib/ffmpeg/libavcodec/arm/fft_neon.S index 1db7abd146..6390065fc4 100644 --- a/lib/ffmpeg/libavcodec/arm/fft_neon.S +++ b/lib/ffmpeg/libavcodec/arm/fft_neon.S @@ -101,8 +101,12 @@ function fft8_neon bx lr endfunc + .align 4 +pmmp: .float +1.0, -1.0, -1.0, +1.0 +mppm: .float -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2 + function fft16_neon - movrel r1, mppm + adr r1, mppm vld1.32 {d16-d19}, [r0,:128]! @ q8{r0,i0,r1,i1} q9{r2,i2,r3,i3} pld [r0, #32] vld1.32 {d2-d3}, [r1,:128] @@ -144,12 +148,16 @@ function fft16_neon vswp d29, d30 @ q14{r12,i12,i14,r15} q15{r13,i13,i15,r14} vadd.f32 q0, q12, q13 @ {t1,t2,t5,t6} vadd.f32 q1, q14, q15 @ {t1a,t2a,t5a,t6a} - movrel r2, X(ff_cos_16) + ldr r2, _neon_label + ldr r3, L$diff1 + add r2, r3 +local_label1: + ldr r2, [pc, r2] vsub.f32 q13, q12, q13 @ {t3,t4,t7,t8} vrev64.32 d1, d1 vsub.f32 q15, q14, q15 @ {t3a,t4a,t7a,t8a} vrev64.32 d3, d3 - movrel r3, pmmp + adr r3, pmmp vswp d1, d26 @ q0{t1,t2,t3,t4} q13{t6,t5,t7,t8} vswp d3, d30 @ q1{t1a,t2a,t3a,t4a} q15{t6a,t5a,t7a,t8a} vadd.f32 q12, q0, q13 @ {r8,i8,r9,i9} @@ -214,7 +222,7 @@ function fft_pass_neon add r2, r2, r0 @ &z[o2] add r3, r3, r0 @ &z[o3] vld1.32 {d20-d21},[r2,:128] @ {z[o2],z[o2+1]} - movrel r12, pmmp + adr r12, pmmp vld1.32 {d22-d23},[r3,:128] @ {z[o3],z[o3+1]} add r5, r5, r1 @ wim vld1.32 {d6-d7}, [r12,:128] @ pmmp @@ -279,6 +287,11 @@ function fft_pass_neon pop {r4-r6,pc} endfunc +.set L$offs16, 0 +.macro setTabOffs n, n2 +.set L$offs\n, L$offs\n2 + 4 +.endm + .macro def_fft n, n2, n4 .align 6 function fft\n\()_neon @@ -291,10 +304,15 @@ function fft\n\()_neon bl fft\n4\()_neon mov r0, r4 pop {r4, lr} - movrel r1, X(ff_cos_\n) + ldr r1, _neon_label + add r1, #L$diff\n +local_label\n: + ldr r1, [pc, r1] mov r2, #\n4/2 b fft_pass_neon endfunc +setTabOffs \n, \n2 +.set L$diff\n, _neon_label - local_label\n + L$offs\n - 8 .endm def_fft 32, 16, 8 @@ -310,10 +328,14 @@ endfunc def_fft 32768, 16384, 8192 def_fft 65536, 32768, 16384 +.set L$diffTab, fft_tab_neon_offs - local_label_tab - 8 function ff_fft_calc_neon, export=1 ldr r2, [r0] sub r2, r2, #2 - movrel r3, fft_tab_neon + ldr r3, fft_tab_neon_offs + add r3, #L$diffTab +local_label_tab: + add r3, pc ldr r3, [r3, r2, lsl #2] mov r0, r1 bx r3 @@ -349,9 +371,22 @@ function ff_fft_permute_neon, export=1 pop {r4,pc} endfunc - .section .rodata + +.global _neon_label +_neon_label: +.word _neon_cos_tab - . + +L$diff1: +.word _neon_label - local_label1 - 8 + +fft_tab_neon_offs: +.word _fft_tab_neon - . + + +.section .rodata + .align 4 -fft_tab_neon: +_fft_tab_neon: .word fft4_neon .word fft8_neon .word fft16_neon @@ -367,8 +402,20 @@ fft_tab_neon: .word fft16384_neon .word fft32768_neon .word fft65536_neon -ELF .size fft_tab_neon, . - fft_tab_neon - - .align 4 -pmmp: .float +1.0, -1.0, -1.0, +1.0 -mppm: .float -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2 +ELF .size _fft_tab_neon, . - _fft_tab_neon + + .align 4 +_neon_cos_tab: + .word X(ff_cos_16) + .word X(ff_cos_32) + .word X(ff_cos_64) + .word X(ff_cos_128) + .word X(ff_cos_256) + .word X(ff_cos_512) + .word X(ff_cos_1024) + .word X(ff_cos_2048) + .word X(ff_cos_4096) + .word X(ff_cos_8192) + .word X(ff_cos_16384) + .word X(ff_cos_32768) + .word X(ff_cos_65536) diff --git a/lib/ffmpeg/libavcodec/arm/h264idct_neon.S b/lib/ffmpeg/libavcodec/arm/h264idct_neon.S index 6b6a669f35..8111975160 100644 --- a/lib/ffmpeg/libavcodec/arm/h264idct_neon.S +++ b/lib/ffmpeg/libavcodec/arm/h264idct_neon.S @@ -97,7 +97,7 @@ function ff_h264_idct_add16_neon, export=1 mov r1, r2 mov r2, r3 ldr r6, [sp, #24] - movrel r7, scan8 + adr r7, scan8 mov ip, #16 1: ldrb r8, [r7], #1 ldr r0, [r5], #4 @@ -117,6 +117,16 @@ function ff_h264_idct_add16_neon, export=1 pop {r4-r8,pc} endfunc + .align +scan8: .byte 4+1*8, 5+1*8, 4+2*8, 5+2*8 + .byte 6+1*8, 7+1*8, 6+2*8, 7+2*8 + .byte 4+3*8, 5+3*8, 4+4*8, 5+4*8 + .byte 6+3*8, 7+3*8, 6+4*8, 7+4*8 + .byte 1+1*8, 2+1*8 + .byte 1+2*8, 2+2*8 + .byte 1+4*8, 2+4*8 + .byte 1+5*8, 2+5*8 + function ff_h264_idct_add16intra_neon, export=1 push {r4-r8,lr} mov r4, r0 @@ -124,7 +134,7 @@ function ff_h264_idct_add16intra_neon, export=1 mov r1, r2 mov r2, r3 ldr r6, [sp, #24] - movrel r7, scan8 + adr r7, scan8 mov ip, #16 1: ldrb r8, [r7], #1 ldr r0, [r5], #4 @@ -149,7 +159,7 @@ function ff_h264_idct_add8_neon, export=1 add r1, r2, #16*32 mov r2, r3 ldr r6, [sp, #32] - movrel r7, scan8+16 + adr r7, scan8+16 mov ip, #7 1: ldrb r8, [r7], #1 ldr r0, [r5], #4 @@ -353,7 +363,7 @@ function ff_h264_idct8_add4_neon, export=1 mov r1, r2 mov r2, r3 ldr r6, [sp, #24] - movrel r7, scan8 + adr r7, scan8 mov r12, #16 1: ldrb r8, [r7], #4 ldr r0, [r5], #16 @@ -372,13 +382,3 @@ function ff_h264_idct8_add4_neon, export=1 bne 1b pop {r4-r8,pc} endfunc - - .section .rodata -scan8: .byte 4+1*8, 5+1*8, 4+2*8, 5+2*8 - .byte 6+1*8, 7+1*8, 6+2*8, 7+2*8 - .byte 4+3*8, 5+3*8, 4+4*8, 5+4*8 - .byte 6+3*8, 7+3*8, 6+4*8, 7+4*8 - .byte 1+1*8, 2+1*8 - .byte 1+2*8, 2+2*8 - .byte 1+4*8, 2+4*8 - .byte 1+5*8, 2+5*8 diff --git a/lib/ffmpeg/libavcodec/arm/h264pred_neon.S b/lib/ffmpeg/libavcodec/arm/h264pred_neon.S index 63c96ee725..357c10a3fa 100644 --- a/lib/ffmpeg/libavcodec/arm/h264pred_neon.S +++ b/lib/ffmpeg/libavcodec/arm/h264pred_neon.S @@ -123,7 +123,7 @@ function ff_pred16x16_plane_neon, export=1 vaddl.u8 q8, d2, d3 vsubl.u8 q2, d2, d0 vsubl.u8 q3, d3, d1 - movrel r3, p16weight + adr r3, p16weight vld1.8 {q0}, [r3,:128] vmul.s16 q2, q2, q0 vmul.s16 q3, q3, q0 @@ -166,7 +166,6 @@ function ff_pred16x16_plane_neon, export=1 bx lr endfunc - .section .rodata .align 4 p16weight: .short 1,2,3,4,5,6,7,8 @@ -207,7 +206,7 @@ function ff_pred8x8_plane_neon, export=1 vrev32.8 d0, d0 vtrn.32 d2, d3 vsubl.u8 q2, d2, d0 - movrel r3, p16weight + adr r3, p16weight vld1.16 {q0}, [r3,:128] vmul.s16 d4, d4, d0 vmul.s16 d5, d5, d0 diff --git a/lib/ffmpeg/libavcodec/arm/simple_idct_neon.S b/lib/ffmpeg/libavcodec/arm/simple_idct_neon.S index 17cde5835a..e61414eda7 100644 --- a/lib/ffmpeg/libavcodec/arm/simple_idct_neon.S +++ b/lib/ffmpeg/libavcodec/arm/simple_idct_neon.S @@ -239,7 +239,6 @@ function idct_col4_st8_neon bx lr endfunc - .section .rodata .align 4 idct_coeff_neon: .short W1, W2, W3, W4, W5, W6, W7, W4c @@ -249,7 +248,7 @@ idct_coeff_neon: pld [\data] pld [\data, #64] vpush {d8-d15} - movrel r3, idct_coeff_neon + adr r3, idct_coeff_neon vld1.64 {d0,d1}, [r3,:128] .endm diff --git a/lib/ffmpeg/libavcodec/arm/vp3dsp_neon.S b/lib/ffmpeg/libavcodec/arm/vp3dsp_neon.S index d97ed3d21d..74bf7ba4dc 100644 --- a/lib/ffmpeg/libavcodec/arm/vp3dsp_neon.S +++ b/lib/ffmpeg/libavcodec/arm/vp3dsp_neon.S @@ -20,12 +20,9 @@ #include "asm.S" -.section .rodata +.text .align 4 -vp3_idct_constants: -.short 64277, 60547, 54491, 46341, 36410, 25080, 12785 - #define xC1S7 d0[0] #define xC2S6 d0[1] #define xC3S5 d0[2] @@ -34,8 +31,6 @@ vp3_idct_constants: #define xC6S2 d1[1] #define xC7S1 d1[2] -.text - .macro vp3_loop_filter vsubl.u8 q3, d18, d17 vsubl.u8 q2, d16, d19 @@ -109,10 +104,14 @@ function ff_vp3_h_loop_filter_neon, export=1 bx lr endfunc +.align 4 +vp3_idct_constants: +.short 64277, 60547, 54491, 46341, 36410, 25080, 12785 +.align 4 function vp3_idct_start_neon vpush {d8-d15} - movrel r3, vp3_idct_constants + adr r3, vp3_idct_constants vld1.64 {d0-d1}, [r3,:128] vld1.64 {d16-d19}, [r2,:128]! vld1.64 {d20-d23}, [r2,:128]! |