mirror of
https://gitlab.freedesktop.org/pipewire/pipewire.git
synced 2025-11-02 09:01:50 -05:00
fmt-ops: add avx2 optimized version
Only one optimized version but the sse2 version are compiled with the avx2 flags so that they get optimized better.
This commit is contained in:
parent
6eca935e61
commit
3a911dfe3b
7 changed files with 821 additions and 28 deletions
|
|
@ -31,8 +31,7 @@ conv_s16_to_f32d_1s_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA
|
|||
uint32_t n_channels, uint32_t n_samples)
|
||||
{
|
||||
const int16_t *s = src;
|
||||
float **d = (float **) dst;
|
||||
float *d0 = d[0];
|
||||
float *d0 = dst[0];
|
||||
uint32_t n, unrolled;
|
||||
__m128i in;
|
||||
__m128 out, factor = _mm_set1_ps(1.0f / S16_SCALE);
|
||||
|
|
@ -77,8 +76,7 @@ conv_s16_to_f32d_2_sse2(struct convert *conv, void * SPA_RESTRICT dst[], const v
|
|||
uint32_t n_samples)
|
||||
{
|
||||
const int16_t *s = src[0];
|
||||
float **d = (float **) dst;
|
||||
float *d0 = d[0], *d1 = d[1];
|
||||
float *d0 = dst[0], *d1 = dst[1];
|
||||
uint32_t n, unrolled;
|
||||
__m128i in[2], t[4];
|
||||
__m128 out[4], factor = _mm_set1_ps(1.0f / S16_SCALE);
|
||||
|
|
@ -135,8 +133,7 @@ conv_s24_to_f32d_1s_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA
|
|||
uint32_t n_channels, uint32_t n_samples)
|
||||
{
|
||||
const uint8_t *s = src;
|
||||
float **d = (float **) dst;
|
||||
float *d0 = d[0];
|
||||
float *d0 = dst[0];
|
||||
uint32_t n, unrolled;
|
||||
__m128i in;
|
||||
__m128 out, factor = _mm_set1_ps(1.0f / S24_SCALE);
|
||||
|
|
@ -175,8 +172,7 @@ conv_s24_to_f32d_2s_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA
|
|||
uint32_t n_channels, uint32_t n_samples)
|
||||
{
|
||||
const uint8_t *s = src;
|
||||
float **d = (float **) dst;
|
||||
float *d0 = d[0], *d1 = d[1];
|
||||
float *d0 = dst[0], *d1 = dst[1];
|
||||
uint32_t n, unrolled;
|
||||
__m128i in[2];
|
||||
__m128 out[2], factor = _mm_set1_ps(1.0f / S24_SCALE);
|
||||
|
|
@ -235,8 +231,7 @@ conv_s24_to_f32d_4s_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA
|
|||
uint32_t n_channels, uint32_t n_samples)
|
||||
{
|
||||
const uint8_t *s = src;
|
||||
float **d = (float **) dst;
|
||||
float *d0 = d[0], *d1 = d[1], *d2 = d[2], *d3 = d[3];
|
||||
float *d0 = dst[0], *d1 = dst[1], *d2 = dst[2], *d3 = dst[3];
|
||||
uint32_t n, unrolled;
|
||||
__m128i in[4];
|
||||
__m128 out[4], factor = _mm_set1_ps(1.0f / S24_SCALE);
|
||||
|
|
@ -340,8 +335,7 @@ conv_s32_to_f32d_1s_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA
|
|||
uint32_t n_channels, uint32_t n_samples)
|
||||
{
|
||||
const int32_t *s = src;
|
||||
float **d = (float **) dst;
|
||||
float *d0 = d[0];
|
||||
float *d0 = dst[0];
|
||||
uint32_t n, unrolled;
|
||||
__m128i in;
|
||||
__m128 out, factor = _mm_set1_ps(1.0f / S24_SCALE);
|
||||
|
|
@ -385,8 +379,7 @@ static void
|
|||
conv_f32d_to_s32_1s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src[],
|
||||
uint32_t n_channels, uint32_t n_samples)
|
||||
{
|
||||
const float **s = (const float **) src;
|
||||
const float *s0 = s[0];
|
||||
const float *s0 = src[0];
|
||||
int32_t *d = dst;
|
||||
uint32_t n, unrolled;
|
||||
__m128 in[1];
|
||||
|
|
@ -426,8 +419,7 @@ static void
|
|||
conv_f32d_to_s32_2s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src[],
|
||||
uint32_t n_channels, uint32_t n_samples)
|
||||
{
|
||||
const float **s = (const float **) src;
|
||||
const float *s0 = s[0], *s1 = s[1];
|
||||
const float *s0 = src[0], *s1 = src[1];
|
||||
int32_t *d = dst;
|
||||
uint32_t n, unrolled;
|
||||
__m128 in[2];
|
||||
|
|
@ -478,8 +470,7 @@ static void
|
|||
conv_f32d_to_s32_4s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src[],
|
||||
uint32_t n_channels, uint32_t n_samples)
|
||||
{
|
||||
const float **s = (const float **) src;
|
||||
const float *s0 = s[0], *s1 = s[1], *s2 = s[2], *s3 = s[3];
|
||||
const float *s0 = src[0], *s1 = src[1], *s2 = src[2], *s3 = src[3];
|
||||
int32_t *d = dst;
|
||||
uint32_t n, unrolled;
|
||||
__m128 in[4];
|
||||
|
|
@ -556,8 +547,7 @@ static void
|
|||
conv_f32d_to_s16_1s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src[],
|
||||
uint32_t n_channels, uint32_t n_samples)
|
||||
{
|
||||
const float **s = (const float **) src;
|
||||
const float *s0 = s[0];
|
||||
const float *s0 = src[0];
|
||||
int16_t *d = dst;
|
||||
uint32_t n, unrolled;
|
||||
__m128 in[2];
|
||||
|
|
@ -599,8 +589,7 @@ static void
|
|||
conv_f32d_to_s16_2s_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src[],
|
||||
uint32_t n_channels, uint32_t n_samples)
|
||||
{
|
||||
const float **s = (const float **) src;
|
||||
const float *s0 = s[0], *s1 = s[1];
|
||||
const float *s0 = src[0], *s1 = src[1];
|
||||
int16_t *d = dst;
|
||||
uint32_t n, unrolled;
|
||||
__m128 in[2];
|
||||
|
|
@ -724,7 +713,6 @@ conv_f32d_to_s16_sse2(struct convert *conv, void * SPA_RESTRICT dst[], const voi
|
|||
conv_f32d_to_s16_1s_sse2(conv, &d[i], &src[i], n_channels, n_samples);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
conv_f32d_to_s16_2_sse2(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
|
||||
uint32_t n_samples)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue