neon: add alignment hints

2026-07-09 00:06:24 -04:00 · 2020-04-03 14:26:02 +02:00 · 2020-04-03 14:26:02 +02:00 · bf3ebb67aa
commit bf3ebb67aa
parent 167460a9bc
1 changed files with 10 additions and 10 deletions
--- a/spa/plugins/audioconvert/resample-native-neon.c
+++ b/spa/plugins/audioconvert/resample-native-neon.c
@ -89,16 +89,16 @@ static void inner_product_neon(float *d, const float * SPA_RESTRICT s,
 	asm volatile (
 		"      cmp %[n_taps], #0\n"
 		"      bne 1f\n"
-		"      vld1.32 {q4}, [%[taps]]!\n"
+		"      vld1.32 {q4}, [%[taps] :128]!\n"
 		"      vld1.32 {q8}, [%[s]]!\n"
 		"      subs %[remainder], %[remainder], #4\n"
 		"      vmul.f32 q0, q4, q8\n"
 		"      bne 4f\n"
 		"      b 5f\n"
 		"1:"
-		"      vld1.32 {q4, q5}, [%[taps]]!\n"
+		"      vld1.32 {q4, q5}, [%[taps] :128]!\n"
 		"      vld1.32 {q8, q9}, [%[s]]!\n"
-		"      vld1.32 {q6, q7}, [%[taps]]!\n"
+		"      vld1.32 {q6, q7}, [%[taps] :128]!\n"
 		"      vld1.32 {q10, q11}, [%[s]]!\n"
 		"      subs %[n_taps], %[n_taps], #16\n"
 		"      vmul.f32 q0, q4, q8\n"
@ -107,9 +107,9 @@ static void inner_product_neon(float *d, const float * SPA_RESTRICT s,
 		"      vmul.f32 q3, q7, q11\n"
 		"      beq 3f\n"
 		"2:"
-		"      vld1.32 {q4, q5}, [%[taps]]!\n"
+		"      vld1.32 {q4, q5}, [%[taps] :128]!\n"
 		"      vld1.32 {q8, q9}, [%[s]]!\n"
-		"      vld1.32 {q6, q7}, [%[taps]]!\n"
+		"      vld1.32 {q6, q7}, [%[taps] :128]!\n"
 		"      vld1.32 {q10, q11}, [%[s]]!\n"
 		"      subs %[n_taps], %[n_taps], #16\n"
 		"      vmla.f32 q0, q4, q8\n"
@ -124,7 +124,7 @@ static void inner_product_neon(float *d, const float * SPA_RESTRICT s,
 		"      vadd.f32 q0, q4, q5\n"
 		"      beq 5f\n"
 		"4:"
-		"      vld1.32 {q6}, [%[taps]]!\n"
+		"      vld1.32 {q6}, [%[taps] :128]!\n"
 		"      vld1.32 {q10}, [%[s]]!\n"
 		"      subs %[remainder], %[remainder], #4\n"
 		"      vmla.f32 q0, q6, q10\n"
@ -183,9 +183,9 @@ static void inner_product_ip_neon(float *d, const float * SPA_RESTRICT s,
 #else
 	asm volatile(
 		"      vdup.32 q10, %[x]\n"
-		"      vld1.32 {q4, q5}, [%[t0]]!\n"
+		"      vld1.32 {q4, q5}, [%[t0] :128]!\n"
 		"      vld1.32 {q8, q9}, [%[s]]!\n"
-		"      vld1.32 {q6, q7}, [%[t1]]!\n"
+		"      vld1.32 {q6, q7}, [%[t1] :128]!\n"
 		"      subs %[n_taps], %[n_taps], #8\n"
 		"      vmul.f32 q0, q4, q8\n"
 		"      vmul.f32 q1, q5, q9\n"
@ -193,9 +193,9 @@ static void inner_product_ip_neon(float *d, const float * SPA_RESTRICT s,
 		"      vmul.f32 q3, q7, q9\n"
 		"      beq 3f\n"
 		"2:"
-		"      vld1.32 {q4, q5}, [%[t0]]!\n"
+		"      vld1.32 {q4, q5}, [%[t0] :128]!\n"
 		"      vld1.32 {q8, q9}, [%[s]]!\n"
-		"      vld1.32 {q6, q7}, [%[t1]]!\n"
+		"      vld1.32 {q6, q7}, [%[t1] :128]!\n"
 		"      subs %[n_taps], %[n_taps], #8\n"
 		"      vmla.f32 q0, q4, q8\n"
 		"      vmla.f32 q1, q5, q9\n"