From 00998ffd7ecb5111cbb56074d463dcdb51a13103 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Mon, 27 Jun 2022 17:18:23 +0200 Subject: [PATCH] audioconvert: ensure alignment Allocate a little bit more data to ensure alignment and overread of the dither data. Ensure sse2 can load aligned data in all cases. --- spa/plugins/audioconvert/dither-ops-c.c | 4 ++-- spa/plugins/audioconvert/dither-ops-sse2.c | 22 +++++++++++----------- spa/plugins/audioconvert/dither-ops.c | 4 ++-- spa/plugins/audioconvert/dither-ops.h | 7 ++++--- 4 files changed, 19 insertions(+), 18 deletions(-) diff --git a/spa/plugins/audioconvert/dither-ops-c.c b/spa/plugins/audioconvert/dither-ops-c.c index 688709938..03f5fd941 100644 --- a/spa/plugins/audioconvert/dither-ops-c.c +++ b/spa/plugins/audioconvert/dither-ops-c.c @@ -49,7 +49,7 @@ void dither_f32_c(struct dither *dt, void * SPA_RESTRICT dst[], uint32_t i, n, m, chunk; const float **s = (const float**)src; float **d = (float**)dst; - float *t = dt->dither; + float *dither = dt->dither; chunk = SPA_MIN(n_samples, dt->dither_size); update_dither_c(dt, chunk); @@ -62,7 +62,7 @@ void dither_f32_c(struct dither *dt, void * SPA_RESTRICT dst[], const float *si = &s[i][n]; for (m = 0; m < chunk; m++) - di[m] = si[m] + t[m]; + di[m] = si[m] + dither[m]; } } } diff --git a/spa/plugins/audioconvert/dither-ops-sse2.c b/spa/plugins/audioconvert/dither-ops-sse2.c index ec4871566..8e58a35d6 100644 --- a/spa/plugins/audioconvert/dither-ops-sse2.c +++ b/spa/plugins/audioconvert/dither-ops-sse2.c @@ -29,25 +29,25 @@ static inline void update_dither_sse2(struct dither *dt, uint32_t n_samples) { uint32_t n; - const uint32_t *r = dt->random; + const uint32_t *r = SPA_PTR_ALIGN(dt->random, 16, uint32_t); + float *dither = SPA_PTR_ALIGN(dt->dither, 16, float); __m128 scale = _mm_set1_ps(dt->scale), out[1]; __m128i in[1], t[1]; for (n = 0; n < n_samples; n += 4) { - /* 32 bit xorshift PRNG, see https://en.wikipedia.org/wiki/Xorshift */ - in[0] = _mm_loadu_si128((__m128i*)r); + in[0] = _mm_load_si128((__m128i*)r); t[0] = _mm_slli_epi32(in[0], 13); in[0] = _mm_xor_si128(in[0], t[0]); t[0] = _mm_srli_epi32(in[0], 17); in[0] = _mm_xor_si128(in[0], t[0]); t[0] = _mm_slli_epi32(in[0], 5); in[0] = _mm_xor_si128(in[0], t[0]); - _mm_storeu_si128((__m128i*)r, in[0]); + _mm_store_si128((__m128i*)r, in[0]); out[0] = _mm_cvtepi32_ps(in[0]); out[0] = _mm_mul_ps(out[0], scale); - _mm_storeu_ps(&dt->dither[n], out[0]); + _mm_store_ps(&dither[n], out[0]); } } @@ -57,7 +57,7 @@ void dither_f32_sse2(struct dither *dt, void * SPA_RESTRICT dst[], uint32_t i, n, m, chunk, unrolled; const float **s = (const float**)src; float **d = (float**)dst; - float *t = dt->dither; + float *dither = SPA_PTR_ALIGN(dt->dither, 16, float); __m128 in[4]; chunk = SPA_MIN(n_samples, dt->dither_size); @@ -81,17 +81,17 @@ void dither_f32_sse2(struct dither *dt, void * SPA_RESTRICT dst[], in[1] = _mm_load_ps(&si[m + 4]); in[2] = _mm_load_ps(&si[m + 8]); in[3] = _mm_load_ps(&si[m + 12]); - in[0] = _mm_add_ps(in[0], _mm_load_ps(&t[m ])); - in[1] = _mm_add_ps(in[1], _mm_load_ps(&t[m + 4])); - in[2] = _mm_add_ps(in[2], _mm_load_ps(&t[m + 8])); - in[3] = _mm_add_ps(in[3], _mm_load_ps(&t[m + 12])); + in[0] = _mm_add_ps(in[0], _mm_load_ps(&dither[m ])); + in[1] = _mm_add_ps(in[1], _mm_load_ps(&dither[m + 4])); + in[2] = _mm_add_ps(in[2], _mm_load_ps(&dither[m + 8])); + in[3] = _mm_add_ps(in[3], _mm_load_ps(&dither[m + 12])); _mm_store_ps(&di[m ], in[0]); _mm_store_ps(&di[m + 4], in[1]); _mm_store_ps(&di[m + 8], in[2]); _mm_store_ps(&di[m + 12], in[3]); } for (; m < chunk; m++) - di[m] = si[m] + t[m]; + di[m] = si[m] + dither[m]; } } } diff --git a/spa/plugins/audioconvert/dither-ops.c b/spa/plugins/audioconvert/dither-ops.c index 834ce2b76..5d84f8eb5 100644 --- a/spa/plugins/audioconvert/dither-ops.c +++ b/spa/plugins/audioconvert/dither-ops.c @@ -85,14 +85,14 @@ int dither_init(struct dither *d) d->scale = 1.0f / (1ULL << (31 + d->intensity)); d->dither_size = DITHER_SIZE; - d->dither = calloc(d->dither_size + 8, sizeof(float)); + d->dither = calloc(d->dither_size + DITHER_OPS_MAX_OVERREAD + + DITHER_OPS_MAX_ALIGN / sizeof(float), sizeof(float)); if (d->dither == NULL) return -errno; for (i = 0; i < SPA_N_ELEMENTS(d->random); i++) d->random[i] = random(); - d->free = impl_dither_free; d->process = info->process; return 0; diff --git a/spa/plugins/audioconvert/dither-ops.h b/spa/plugins/audioconvert/dither-ops.h index 39cdd4874..867b469f8 100644 --- a/spa/plugins/audioconvert/dither-ops.h +++ b/spa/plugins/audioconvert/dither-ops.h @@ -29,6 +29,9 @@ #include #include +#define DITHER_OPS_MAX_ALIGN 16 +#define DITHER_OPS_MAX_OVERREAD 16 + struct dither { uint32_t intensity; #define DITHER_METHOD_NONE 0 @@ -45,7 +48,7 @@ struct dither { const void * SPA_RESTRICT src[], uint32_t n_samples); void (*free) (struct dither *d); - uint32_t random[16]; + uint32_t random[16 + DITHER_OPS_MAX_ALIGN/sizeof(uint32_t)]; float *dither; uint32_t dither_size; float scale; @@ -83,8 +86,6 @@ void dither_##name##_##arch(struct dither *d, \ const void * SPA_RESTRICT src[], \ uint32_t n_samples); -#define DITHER_OPS_MAX_ALIGN 16 - DEFINE_FUNCTION(f32, c); #if defined(HAVE_SSE2) DEFINE_FUNCTION(f32, sse2);