aboutsummaryrefslogtreecommitdiff
path: root/lib/ffmpeg
diff options
context:
space:
mode:
Diffstat (limited to 'lib/ffmpeg')
-rw-r--r--lib/ffmpeg/libavcodec/arm/fft_neon.S73
-rw-r--r--lib/ffmpeg/libavcodec/arm/h264idct_neon.S28
-rw-r--r--lib/ffmpeg/libavcodec/arm/h264pred_neon.S5
-rw-r--r--lib/ffmpeg/libavcodec/arm/simple_idct_neon.S3
-rw-r--r--lib/ffmpeg/libavcodec/arm/vp3dsp_neon.S13
5 files changed, 83 insertions, 39 deletions
diff --git a/lib/ffmpeg/libavcodec/arm/fft_neon.S b/lib/ffmpeg/libavcodec/arm/fft_neon.S
index 1db7abd146..6390065fc4 100644
--- a/lib/ffmpeg/libavcodec/arm/fft_neon.S
+++ b/lib/ffmpeg/libavcodec/arm/fft_neon.S
@@ -101,8 +101,12 @@ function fft8_neon
bx lr
endfunc
+ .align 4
+pmmp: .float +1.0, -1.0, -1.0, +1.0
+mppm: .float -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2
+
function fft16_neon
- movrel r1, mppm
+ adr r1, mppm
vld1.32 {d16-d19}, [r0,:128]! @ q8{r0,i0,r1,i1} q9{r2,i2,r3,i3}
pld [r0, #32]
vld1.32 {d2-d3}, [r1,:128]
@@ -144,12 +148,16 @@ function fft16_neon
vswp d29, d30 @ q14{r12,i12,i14,r15} q15{r13,i13,i15,r14}
vadd.f32 q0, q12, q13 @ {t1,t2,t5,t6}
vadd.f32 q1, q14, q15 @ {t1a,t2a,t5a,t6a}
- movrel r2, X(ff_cos_16)
+ ldr r2, _neon_label
+ ldr r3, L$diff1
+ add r2, r3
+local_label1:
+ ldr r2, [pc, r2]
vsub.f32 q13, q12, q13 @ {t3,t4,t7,t8}
vrev64.32 d1, d1
vsub.f32 q15, q14, q15 @ {t3a,t4a,t7a,t8a}
vrev64.32 d3, d3
- movrel r3, pmmp
+ adr r3, pmmp
vswp d1, d26 @ q0{t1,t2,t3,t4} q13{t6,t5,t7,t8}
vswp d3, d30 @ q1{t1a,t2a,t3a,t4a} q15{t6a,t5a,t7a,t8a}
vadd.f32 q12, q0, q13 @ {r8,i8,r9,i9}
@@ -214,7 +222,7 @@ function fft_pass_neon
add r2, r2, r0 @ &z[o2]
add r3, r3, r0 @ &z[o3]
vld1.32 {d20-d21},[r2,:128] @ {z[o2],z[o2+1]}
- movrel r12, pmmp
+ adr r12, pmmp
vld1.32 {d22-d23},[r3,:128] @ {z[o3],z[o3+1]}
add r5, r5, r1 @ wim
vld1.32 {d6-d7}, [r12,:128] @ pmmp
@@ -279,6 +287,11 @@ function fft_pass_neon
pop {r4-r6,pc}
endfunc
+.set L$offs16, 0
+.macro setTabOffs n, n2
+.set L$offs\n, L$offs\n2 + 4
+.endm
+
.macro def_fft n, n2, n4
.align 6
function fft\n\()_neon
@@ -291,10 +304,15 @@ function fft\n\()_neon
bl fft\n4\()_neon
mov r0, r4
pop {r4, lr}
- movrel r1, X(ff_cos_\n)
+ ldr r1, _neon_label
+ add r1, #L$diff\n
+local_label\n:
+ ldr r1, [pc, r1]
mov r2, #\n4/2
b fft_pass_neon
endfunc
+setTabOffs \n, \n2
+.set L$diff\n, _neon_label - local_label\n + L$offs\n - 8
.endm
def_fft 32, 16, 8
@@ -310,10 +328,14 @@ endfunc
def_fft 32768, 16384, 8192
def_fft 65536, 32768, 16384
+.set L$diffTab, fft_tab_neon_offs - local_label_tab - 8
function ff_fft_calc_neon, export=1
ldr r2, [r0]
sub r2, r2, #2
- movrel r3, fft_tab_neon
+ ldr r3, fft_tab_neon_offs
+ add r3, #L$diffTab
+local_label_tab:
+ add r3, pc
ldr r3, [r3, r2, lsl #2]
mov r0, r1
bx r3
@@ -349,9 +371,22 @@ function ff_fft_permute_neon, export=1
pop {r4,pc}
endfunc
- .section .rodata
+
+.global _neon_label
+_neon_label:
+.word _neon_cos_tab - .
+
+L$diff1:
+.word _neon_label - local_label1 - 8
+
+fft_tab_neon_offs:
+.word _fft_tab_neon - .
+
+
+.section .rodata
+
.align 4
-fft_tab_neon:
+_fft_tab_neon:
.word fft4_neon
.word fft8_neon
.word fft16_neon
@@ -367,8 +402,20 @@ fft_tab_neon:
.word fft16384_neon
.word fft32768_neon
.word fft65536_neon
-ELF .size fft_tab_neon, . - fft_tab_neon
-
- .align 4
-pmmp: .float +1.0, -1.0, -1.0, +1.0
-mppm: .float -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2
+ELF .size _fft_tab_neon, . - _fft_tab_neon
+
+ .align 4
+_neon_cos_tab:
+ .word X(ff_cos_16)
+ .word X(ff_cos_32)
+ .word X(ff_cos_64)
+ .word X(ff_cos_128)
+ .word X(ff_cos_256)
+ .word X(ff_cos_512)
+ .word X(ff_cos_1024)
+ .word X(ff_cos_2048)
+ .word X(ff_cos_4096)
+ .word X(ff_cos_8192)
+ .word X(ff_cos_16384)
+ .word X(ff_cos_32768)
+ .word X(ff_cos_65536)
diff --git a/lib/ffmpeg/libavcodec/arm/h264idct_neon.S b/lib/ffmpeg/libavcodec/arm/h264idct_neon.S
index 6b6a669f35..8111975160 100644
--- a/lib/ffmpeg/libavcodec/arm/h264idct_neon.S
+++ b/lib/ffmpeg/libavcodec/arm/h264idct_neon.S
@@ -97,7 +97,7 @@ function ff_h264_idct_add16_neon, export=1
mov r1, r2
mov r2, r3
ldr r6, [sp, #24]
- movrel r7, scan8
+ adr r7, scan8
mov ip, #16
1: ldrb r8, [r7], #1
ldr r0, [r5], #4
@@ -117,6 +117,16 @@ function ff_h264_idct_add16_neon, export=1
pop {r4-r8,pc}
endfunc
+ .align
+scan8: .byte 4+1*8, 5+1*8, 4+2*8, 5+2*8
+ .byte 6+1*8, 7+1*8, 6+2*8, 7+2*8
+ .byte 4+3*8, 5+3*8, 4+4*8, 5+4*8
+ .byte 6+3*8, 7+3*8, 6+4*8, 7+4*8
+ .byte 1+1*8, 2+1*8
+ .byte 1+2*8, 2+2*8
+ .byte 1+4*8, 2+4*8
+ .byte 1+5*8, 2+5*8
+
function ff_h264_idct_add16intra_neon, export=1
push {r4-r8,lr}
mov r4, r0
@@ -124,7 +134,7 @@ function ff_h264_idct_add16intra_neon, export=1
mov r1, r2
mov r2, r3
ldr r6, [sp, #24]
- movrel r7, scan8
+ adr r7, scan8
mov ip, #16
1: ldrb r8, [r7], #1
ldr r0, [r5], #4
@@ -149,7 +159,7 @@ function ff_h264_idct_add8_neon, export=1
add r1, r2, #16*32
mov r2, r3
ldr r6, [sp, #32]
- movrel r7, scan8+16
+ adr r7, scan8+16
mov ip, #7
1: ldrb r8, [r7], #1
ldr r0, [r5], #4
@@ -353,7 +363,7 @@ function ff_h264_idct8_add4_neon, export=1
mov r1, r2
mov r2, r3
ldr r6, [sp, #24]
- movrel r7, scan8
+ adr r7, scan8
mov r12, #16
1: ldrb r8, [r7], #4
ldr r0, [r5], #16
@@ -372,13 +382,3 @@ function ff_h264_idct8_add4_neon, export=1
bne 1b
pop {r4-r8,pc}
endfunc
-
- .section .rodata
-scan8: .byte 4+1*8, 5+1*8, 4+2*8, 5+2*8
- .byte 6+1*8, 7+1*8, 6+2*8, 7+2*8
- .byte 4+3*8, 5+3*8, 4+4*8, 5+4*8
- .byte 6+3*8, 7+3*8, 6+4*8, 7+4*8
- .byte 1+1*8, 2+1*8
- .byte 1+2*8, 2+2*8
- .byte 1+4*8, 2+4*8
- .byte 1+5*8, 2+5*8
diff --git a/lib/ffmpeg/libavcodec/arm/h264pred_neon.S b/lib/ffmpeg/libavcodec/arm/h264pred_neon.S
index 63c96ee725..357c10a3fa 100644
--- a/lib/ffmpeg/libavcodec/arm/h264pred_neon.S
+++ b/lib/ffmpeg/libavcodec/arm/h264pred_neon.S
@@ -123,7 +123,7 @@ function ff_pred16x16_plane_neon, export=1
vaddl.u8 q8, d2, d3
vsubl.u8 q2, d2, d0
vsubl.u8 q3, d3, d1
- movrel r3, p16weight
+ adr r3, p16weight
vld1.8 {q0}, [r3,:128]
vmul.s16 q2, q2, q0
vmul.s16 q3, q3, q0
@@ -166,7 +166,6 @@ function ff_pred16x16_plane_neon, export=1
bx lr
endfunc
- .section .rodata
.align 4
p16weight:
.short 1,2,3,4,5,6,7,8
@@ -207,7 +206,7 @@ function ff_pred8x8_plane_neon, export=1
vrev32.8 d0, d0
vtrn.32 d2, d3
vsubl.u8 q2, d2, d0
- movrel r3, p16weight
+ adr r3, p16weight
vld1.16 {q0}, [r3,:128]
vmul.s16 d4, d4, d0
vmul.s16 d5, d5, d0
diff --git a/lib/ffmpeg/libavcodec/arm/simple_idct_neon.S b/lib/ffmpeg/libavcodec/arm/simple_idct_neon.S
index 17cde5835a..e61414eda7 100644
--- a/lib/ffmpeg/libavcodec/arm/simple_idct_neon.S
+++ b/lib/ffmpeg/libavcodec/arm/simple_idct_neon.S
@@ -239,7 +239,6 @@ function idct_col4_st8_neon
bx lr
endfunc
- .section .rodata
.align 4
idct_coeff_neon:
.short W1, W2, W3, W4, W5, W6, W7, W4c
@@ -249,7 +248,7 @@ idct_coeff_neon:
pld [\data]
pld [\data, #64]
vpush {d8-d15}
- movrel r3, idct_coeff_neon
+ adr r3, idct_coeff_neon
vld1.64 {d0,d1}, [r3,:128]
.endm
diff --git a/lib/ffmpeg/libavcodec/arm/vp3dsp_neon.S b/lib/ffmpeg/libavcodec/arm/vp3dsp_neon.S
index d97ed3d21d..74bf7ba4dc 100644
--- a/lib/ffmpeg/libavcodec/arm/vp3dsp_neon.S
+++ b/lib/ffmpeg/libavcodec/arm/vp3dsp_neon.S
@@ -20,12 +20,9 @@
#include "asm.S"
-.section .rodata
+.text
.align 4
-vp3_idct_constants:
-.short 64277, 60547, 54491, 46341, 36410, 25080, 12785
-
#define xC1S7 d0[0]
#define xC2S6 d0[1]
#define xC3S5 d0[2]
@@ -34,8 +31,6 @@ vp3_idct_constants:
#define xC6S2 d1[1]
#define xC7S1 d1[2]
-.text
-
.macro vp3_loop_filter
vsubl.u8 q3, d18, d17
vsubl.u8 q2, d16, d19
@@ -109,10 +104,14 @@ function ff_vp3_h_loop_filter_neon, export=1
bx lr
endfunc
+.align 4
+vp3_idct_constants:
+.short 64277, 60547, 54491, 46341, 36410, 25080, 12785
+.align 4
function vp3_idct_start_neon
vpush {d8-d15}
- movrel r3, vp3_idct_constants
+ adr r3, vp3_idct_constants
vld1.64 {d0-d1}, [r3,:128]
vld1.64 {d16-d19}, [r2,:128]!
vld1.64 {d20-d23}, [r2,:128]!