neon: add alignment hints

This commit is contained in:
Wim Taymans 2020-04-03 14:26:02 +02:00
parent 167460a9bc
commit bf3ebb67aa

View file

@ -89,16 +89,16 @@ static void inner_product_neon(float *d, const float * SPA_RESTRICT s,
asm volatile (
" cmp %[n_taps], #0\n"
" bne 1f\n"
" vld1.32 {q4}, [%[taps]]!\n"
" vld1.32 {q4}, [%[taps] :128]!\n"
" vld1.32 {q8}, [%[s]]!\n"
" subs %[remainder], %[remainder], #4\n"
" vmul.f32 q0, q4, q8\n"
" bne 4f\n"
" b 5f\n"
"1:"
" vld1.32 {q4, q5}, [%[taps]]!\n"
" vld1.32 {q4, q5}, [%[taps] :128]!\n"
" vld1.32 {q8, q9}, [%[s]]!\n"
" vld1.32 {q6, q7}, [%[taps]]!\n"
" vld1.32 {q6, q7}, [%[taps] :128]!\n"
" vld1.32 {q10, q11}, [%[s]]!\n"
" subs %[n_taps], %[n_taps], #16\n"
" vmul.f32 q0, q4, q8\n"
@ -107,9 +107,9 @@ static void inner_product_neon(float *d, const float * SPA_RESTRICT s,
" vmul.f32 q3, q7, q11\n"
" beq 3f\n"
"2:"
" vld1.32 {q4, q5}, [%[taps]]!\n"
" vld1.32 {q4, q5}, [%[taps] :128]!\n"
" vld1.32 {q8, q9}, [%[s]]!\n"
" vld1.32 {q6, q7}, [%[taps]]!\n"
" vld1.32 {q6, q7}, [%[taps] :128]!\n"
" vld1.32 {q10, q11}, [%[s]]!\n"
" subs %[n_taps], %[n_taps], #16\n"
" vmla.f32 q0, q4, q8\n"
@ -124,7 +124,7 @@ static void inner_product_neon(float *d, const float * SPA_RESTRICT s,
" vadd.f32 q0, q4, q5\n"
" beq 5f\n"
"4:"
" vld1.32 {q6}, [%[taps]]!\n"
" vld1.32 {q6}, [%[taps] :128]!\n"
" vld1.32 {q10}, [%[s]]!\n"
" subs %[remainder], %[remainder], #4\n"
" vmla.f32 q0, q6, q10\n"
@ -183,9 +183,9 @@ static void inner_product_ip_neon(float *d, const float * SPA_RESTRICT s,
#else
asm volatile(
" vdup.32 q10, %[x]\n"
" vld1.32 {q4, q5}, [%[t0]]!\n"
" vld1.32 {q4, q5}, [%[t0] :128]!\n"
" vld1.32 {q8, q9}, [%[s]]!\n"
" vld1.32 {q6, q7}, [%[t1]]!\n"
" vld1.32 {q6, q7}, [%[t1] :128]!\n"
" subs %[n_taps], %[n_taps], #8\n"
" vmul.f32 q0, q4, q8\n"
" vmul.f32 q1, q5, q9\n"
@ -193,9 +193,9 @@ static void inner_product_ip_neon(float *d, const float * SPA_RESTRICT s,
" vmul.f32 q3, q7, q9\n"
" beq 3f\n"
"2:"
" vld1.32 {q4, q5}, [%[t0]]!\n"
" vld1.32 {q4, q5}, [%[t0] :128]!\n"
" vld1.32 {q8, q9}, [%[s]]!\n"
" vld1.32 {q6, q7}, [%[t1]]!\n"
" vld1.32 {q6, q7}, [%[t1] :128]!\n"
" subs %[n_taps], %[n_taps], #8\n"
" vmla.f32 q0, q4, q8\n"
" vmla.f32 q1, q5, q9\n"