audioconvert: handle more optimizations

Compile an optimized library for the given CPU with the right flags,
then link it with the main library.
This commit is contained in:
Wim Taymans 2019-03-27 17:58:48 +01:00
parent eaffb25cc2
commit c8d3d475bb
12 changed files with 271 additions and 141 deletions

View file

@ -22,10 +22,7 @@
* DEALINGS IN THE SOFTWARE.
*/
#include <string.h>
#include <stdio.h>
#include <spa/utils/defs.h>
#include "fmt-ops.h"
#include <tmmintrin.h>
@ -38,8 +35,8 @@ conv_s24_to_f32d_4_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA
uint32_t n, unrolled;
__m128i in[4];
__m128 out[4], factor = _mm_set1_ps(1.0f / S24_SCALE);
//const __m128i mask = _mm_setr_epi8(-1, 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11);
const __m128i mask = _mm_set_epi8(15, 14, 13, -1, 12, 11, 10, -1, 9, 8, 7, -1, 6, 5, 4, -1);
const __m128i mask = _mm_setr_epi8(-1, 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11);
//const __m128i mask = _mm_set_epi8(15, 14, 13, -1, 12, 11, 10, -1, 9, 8, 7, -1, 6, 5, 4, -1);
if (SPA_IS_ALIGNED(d0, 16))
unrolled = n_samples / 4;
@ -93,7 +90,10 @@ conv_s24_to_f32d_4_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA
}
}
static void
extern void conv_s24_to_f32d_2_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples);
extern void conv_s24_to_f32d_1_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples);
void
conv_s24_to_f32d_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{
const int8_t *s = src[0];
@ -101,8 +101,10 @@ conv_s24_to_f32d_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA_R
for(; i + 3 < n_channels; i += 4)
conv_s24_to_f32d_4_ssse3(data, &dst[i], &s[3*i], n_channels, n_samples);
#if defined (HAVE_SSE2)
for(; i + 1 < n_channels; i += 2)
conv_s24_to_f32d_2_sse2(data, &dst[i], &s[3*i], n_channels, n_samples);
for(; i < n_channels; i++)
conv_s24_to_f32d_1_sse2(data, &dst[i], &s[3*i], n_channels, n_samples);
#endif
}