From 14028c67cde73a023387fd7d9b8604975bb7148b Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Thu, 29 Sep 2022 13:03:02 +0200 Subject: [PATCH] audioconvert: some optimizations --- spa/plugins/audioconvert/fmt-ops-sse2.c | 101 ++++++++++++++++-------- 1 file changed, 69 insertions(+), 32 deletions(-) diff --git a/spa/plugins/audioconvert/fmt-ops-sse2.c b/spa/plugins/audioconvert/fmt-ops-sse2.c index 2c0b0f5c1..14a793fc5 100644 --- a/spa/plugins/audioconvert/fmt-ops-sse2.c +++ b/spa/plugins/audioconvert/fmt-ops-sse2.c @@ -576,51 +576,88 @@ conv_f32d_to_s32_sse2(struct convert *conv, void * SPA_RESTRICT dst[], const voi i; \ }) - -static inline void update_noise_sse2(struct convert *conv, uint32_t n_samples) +static inline void update_noise_rect_sse2(struct convert *conv, uint32_t n_samples) { uint32_t n; const uint32_t *r = SPA_PTR_ALIGN(conv->random, 16, uint32_t); - int32_t *p = SPA_PTR_ALIGN(conv->prev, 16, int32_t), op; - __m128 scale = _mm_set1_ps(conv->scale); - __m128 out[1]; float *noise = SPA_PTR_ALIGN(conv->noise, 16, float); - __m128i in[1], old[1], new[1]; + __m128 scale = _mm_set1_ps(conv->scale); + __m128i in[1]; + __m128 out[1]; + for (n = 0; n < n_samples; n += 4) { + in[0] = _MM_XORSHIFT_EPI32(r); + out[0] = _mm_cvtepi32_ps(in[0]); + out[0] = _mm_mul_ps(out[0], scale); + _mm_store_ps(&noise[n], out[0]); + } +} + +static inline void update_noise_tri_sse2(struct convert *conv, uint32_t n_samples) +{ + uint32_t n; + const uint32_t *r = SPA_PTR_ALIGN(conv->random, 16, uint32_t); + float *noise = SPA_PTR_ALIGN(conv->noise, 16, float); + __m128 scale = _mm_set1_ps(conv->scale); + __m128i in[1]; + __m128 out[1]; + + for (n = 0; n < n_samples; n += 4) { + in[0] = _mm_sub_epi32( _MM_XORSHIFT_EPI32(r), _MM_XORSHIFT_EPI32(r)); + out[0] = _mm_cvtepi32_ps(in[0]); + out[0] = _mm_mul_ps(out[0], scale); + _mm_store_ps(&noise[n], out[0]); + } +} + +static inline void update_noise_tri_hf_sse2(struct convert *conv, uint32_t n_samples) +{ + uint32_t n; + int32_t *p = SPA_PTR_ALIGN(conv->prev, 16, int32_t); + const uint32_t *r = SPA_PTR_ALIGN(conv->random, 16, uint32_t); + float *noise = SPA_PTR_ALIGN(conv->noise, 16, float); + __m128 scale = _mm_set1_ps(conv->scale); + __m128i in[1], old[1], new[1]; + __m128 out[1]; + + old[0] = _mm_load_si128((__m128i*)p); + for (n = 0; n < n_samples; n += 4) { + new[0] = _MM_XORSHIFT_EPI32(r); + in[0] = _mm_sub_epi32(old[0], new[0]); + old[0] = new[0]; + out[0] = _mm_cvtepi32_ps(in[0]); + out[0] = _mm_mul_ps(out[0], scale); + _mm_store_ps(&noise[n], out[0]); + } + _mm_store_si128((__m128i*)p, old[0]); +} + +static inline void update_noise_pattern_sse2(struct convert *conv, uint32_t n_samples) +{ + uint32_t n; + int32_t *p = SPA_PTR_ALIGN(conv->prev, 16, int32_t), op; + float *noise = SPA_PTR_ALIGN(conv->noise, 16, float); + + op = *p; + for (n = 0; n < n_samples; n++) + noise[n] = conv->scale * (1-((op++>>10)&1)); + *p = op; +} + +static inline void update_noise_sse2(struct convert *conv, uint32_t n_samples) +{ switch (conv->noise_method) { case DITHER_METHOD_RECTANGULAR: - for (n = 0; n < n_samples; n += 4) { - in[0] = _MM_XORSHIFT_EPI32(r); - out[0] = _mm_cvtepi32_ps(in[0]); - out[0] = _mm_mul_ps(out[0], scale); - _mm_store_ps(&noise[n], out[0]); - } + update_noise_rect_sse2(conv, n_samples); break; case DITHER_METHOD_TRIANGULAR: - for (n = 0; n < n_samples; n += 4) { - in[0] = _mm_sub_epi32( _MM_XORSHIFT_EPI32(r), _MM_XORSHIFT_EPI32(r)); - out[0] = _mm_cvtepi32_ps(in[0]); - out[0] = _mm_mul_ps(out[0], scale); - _mm_store_ps(&noise[n], out[0]); - } + update_noise_tri_sse2(conv, n_samples); break; case DITHER_METHOD_TRIANGULAR_HF: - old[0] = _mm_load_si128((__m128i*)p); - for (n = 0; n < n_samples; n += 4) { - new[0] = _MM_XORSHIFT_EPI32(r); - in[0] = _mm_sub_epi32(old[0], new[0]); - old[0] = new[0]; - out[0] = _mm_cvtepi32_ps(in[0]); - out[0] = _mm_mul_ps(out[0], scale); - _mm_store_ps(&noise[n], out[0]); - } - _mm_store_si128((__m128i*)p, old[0]); + update_noise_tri_hf_sse2(conv, n_samples); break; case NOISE_METHOD_PATTERN: - op = *p; - for (n = 0; n < n_samples; n++) - noise[n] = conv->scale * (1-((op++>>10)&1)); - *p = op; + update_noise_pattern_sse2(conv, n_samples); break; } }