audioconvert: improve benchmark

Also include the simd versions in the benchmark
Fix some issues found by new test
This commit is contained in:
Wim Taymans 2019-03-28 13:26:06 +01:00
parent c8d3d475bb
commit 7f041f4098
8 changed files with 350 additions and 101 deletions

View file

@ -33,6 +33,7 @@ extern "C" {
#include <spa/utils/defs.h>
/* x86 specific */
#define SPA_CPU_FLAG_MMX (1<<0) /**< standard MMX */
#define SPA_CPU_FLAG_MMXEXT (1<<1) /**< SSE integer or AMD MMX ext */
#define SPA_CPU_FLAG_3DNOW (1<<2) /**< AMD 3DNOW */
@ -53,11 +54,14 @@ extern "C" {
#define SPA_CPU_FLAG_BMI1 (1<<17) /**< Bit Manipulation Instruction Set 1 */
#define SPA_CPU_FLAG_BMI2 (1<<18) /**< Bit Manipulation Instruction Set 2 */
#define SPA_CPU_FLAG_AVX512 (1<<19) /**< AVX-512 */
#define SPA_CPU_FLAG_SLOW_UNALIGNED (1<<20) /**< unaligned loads/stores are slow */
/* PPC specific */
#define SPA_CPU_FLAG_ALTIVEC (1<<0) /**< standard */
#define SPA_CPU_FLAG_VSX (1<<1) /**< ISA 2.06 */
#define SPA_CPU_FLAG_POWER8 (1<<2) /**< ISA 2.07 */
/* ARM specific */
#define SPA_CPU_FLAG_ARMV5TE (1 << 0)
#define SPA_CPU_FLAG_ARMV6 (1 << 1)
#define SPA_CPU_FLAG_ARMV6T2 (1 << 2)

View file

@ -121,6 +121,7 @@ struct spa_param_info {
})
#define SPA_MEMBER(b,o,t) ((t*)((uint8_t*)(b) + (int)(o)))
#define SPA_MEMBER_ALIGN(b,o,a,t) SPA_PTR_ALIGN(SPA_MEMBER(b,o,t),a,t)
#define SPA_CONTAINER_OF(p,t,m) (t*)((uint8_t*)p - offsetof (t,m))

View file

@ -31,6 +31,14 @@
#include "fmt-ops.c"
struct stats {
uint32_t n_samples;
uint32_t n_channels;
uint64_t perf;
const char *name;
const char *impl;
};
#define MAX_SAMPLES 4096
#define MAX_CHANNELS 11
@ -42,8 +50,13 @@ static uint8_t samp_out[MAX_SAMPLES * MAX_CHANNELS * 4];
static const int sample_sizes[] = { 0, 1, 128, 513, 4096 };
static const int channel_counts[] = { 1, 2, 4, 6, 8, 11 };
static void run_test1(const char *name, bool in_packed, bool out_packed, convert_func_t func,
int n_channels, int n_samples)
#define MAX_RESULTS SPA_N_ELEMENTS(sample_sizes) * SPA_N_ELEMENTS(channel_counts) * 60
static uint32_t n_results = 0;
static struct stats results[MAX_RESULTS];
static void run_test1(const char *name, const char *impl, bool in_packed, bool out_packed,
convert_func_t func, int n_channels, int n_samples)
{
int i, j;
const void *ip[n_channels];
@ -67,18 +80,22 @@ static void run_test1(const char *name, bool in_packed, bool out_packed, convert
clock_gettime(CLOCK_MONOTONIC, &ts);
t2 = SPA_TIMESPEC_TO_NSEC(&ts);
fprintf(stderr, "%s: samples %d, channels %d: elapsed %"PRIu64" count %"
PRIu64" = %"PRIu64"/sec\n", name, n_samples, n_channels,
t2 - t1, count, count * (uint64_t)SPA_NSEC_PER_SEC / (t2 - t1));
results[n_results++] = (struct stats) {
.n_samples = n_samples,
.n_channels = n_channels,
.perf = count * (uint64_t)SPA_NSEC_PER_SEC / (t2 - t1),
.name = name,
.impl = impl
};
}
static void run_test(const char *name, bool in_packed, bool out_packed, convert_func_t func)
static void run_test(const char *name, const char *impl, bool in_packed, bool out_packed, convert_func_t func)
{
size_t i, j;
for (i = 0; i < SPA_N_ELEMENTS(sample_sizes); i++) {
for (j = 0; j < SPA_N_ELEMENTS(channel_counts); j++) {
run_test1(name, in_packed, out_packed, func, channel_counts[j],
run_test1(name, impl, in_packed, out_packed, func, channel_counts[j],
(sample_sizes[i] + (channel_counts[j] -1)) / channel_counts[j]);
}
}
@ -86,93 +103,124 @@ static void run_test(const char *name, bool in_packed, bool out_packed, convert_
static void test_f32_u8(void)
{
run_test("test_f32_u8", true, true, conv_f32_to_u8);
run_test("test_f32d_u8", false, true, conv_f32d_to_u8);
run_test("test_f32_u8d", true, false, conv_f32_to_u8d);
run_test("test_f32d_u8d", false, false, conv_f32d_to_u8d);
run_test("test_f32_u8", "c", true, true, conv_f32_to_u8);
run_test("test_f32d_u8", "c", false, true, conv_f32d_to_u8);
run_test("test_f32_u8d", "c", true, false, conv_f32_to_u8d);
run_test("test_f32d_u8d", "c", false, false, conv_f32d_to_u8d);
}
static void test_u8_f32(void)
{
run_test("test_u8_f32", true, true, conv_u8_to_f32);
run_test("test_u8d_f32", false, true, conv_u8d_to_f32);
run_test("test_u8_f32d", true, false, conv_u8_to_f32d);
run_test("test_u8_f32", "c", true, true, conv_u8_to_f32);
run_test("test_u8d_f32", "c", false, true, conv_u8d_to_f32);
run_test("test_u8_f32d", "c", true, false, conv_u8_to_f32d);
}
static void test_f32_s16(void)
{
run_test("test_f32_s16", true, true, conv_f32_to_s16);
run_test("test_f32d_s16", false, true, conv_f32d_to_s16);
run_test("test_f32_s16d", true, false, conv_f32_to_s16d);
run_test("test_f32_s16", "c", true, true, conv_f32_to_s16);
run_test("test_f32d_s16", "c", false, true, conv_f32d_to_s16);
#if defined (HAVE_SSE2)
run_test("test_f32d_s16", "sse2", false, true, conv_f32d_to_s16_sse2);
#endif
run_test("test_f32_s16d", "c", true, false, conv_f32_to_s16d);
}
static void test_s16_f32(void)
{
run_test("test_s16_f32", true, true, conv_s16_to_f32);
run_test("test_s16d_f32", false, true, conv_s16d_to_f32);
run_test("test_s16_f32d", true, false, conv_s16_to_f32d);
run_test("test_s16_f32", "c", true, true, conv_s16_to_f32);
run_test("test_s16d_f32", "c", false, true, conv_s16d_to_f32);
run_test("test_s16_f32d", "c", true, false, conv_s16_to_f32d);
#if defined (HAVE_SSE2)
run_test("test_s16_f32d", "sse2", true, false, conv_s16_to_f32d_sse2);
#endif
}
static void test_f32_s32(void)
{
run_test("test_f32_s32", true, true, conv_f32_to_s32);
run_test("test_f32d_s32", false, true, conv_f32d_to_s32);
run_test("test_f32_s32d", true, false, conv_f32_to_s32d);
run_test("test_f32_s32", "c", true, true, conv_f32_to_s32);
run_test("test_f32d_s32", "c", false, true, conv_f32d_to_s32);
#if defined (HAVE_SSE2)
run_test("test_f32d_s32", "sse2", false, true, conv_f32d_to_s32_sse2);
#endif
run_test("test_f32_s32d", "c", true, false, conv_f32_to_s32d);
}
static void test_s32_f32(void)
{
run_test("test_s32_f32", true, true, conv_s32_to_f32);
run_test("test_s32d_f32", false, true, conv_s32d_to_f32);
run_test("test_s32_f32d", true, false, conv_s32_to_f32d);
run_test("test_s32_f32", "c", true, true, conv_s32_to_f32);
run_test("test_s32d_f32", "c", false, true, conv_s32d_to_f32);
run_test("test_s32_f32d", "c", true, false, conv_s32_to_f32d);
}
static void test_f32_s24(void)
{
run_test("test_f32_s24", true, true, conv_f32_to_s24);
run_test("test_f32d_s24", false, true, conv_f32d_to_s24);
run_test("test_f32_s24d", true, false, conv_f32_to_s24d);
run_test("test_f32_s24", "c", true, true, conv_f32_to_s24);
run_test("test_f32d_s24", "c", false, true, conv_f32d_to_s24);
run_test("test_f32_s24d", "c", true, false, conv_f32_to_s24d);
}
static void test_s24_f32(void)
{
run_test("test_s24_f32", true, true, conv_s24_to_f32);
run_test("test_s24d_f32", false, true, conv_s24d_to_f32);
run_test("test_s24_f32d", true, false, conv_s24_to_f32d);
run_test("test_s24_f32", "c", true, true, conv_s24_to_f32);
run_test("test_s24d_f32", "c", false, true, conv_s24d_to_f32);
run_test("test_s24_f32d", "c", true, false, conv_s24_to_f32d);
#if defined (HAVE_SSE2)
run_test("test_s24_f32d", "sse2", true, false, conv_s24_to_f32d_sse2);
#endif
#if defined (HAVE_SSSE3)
run_test("test_s24_f32d", "ssse3", true, false, conv_s24_to_f32d_ssse3);
#endif
#if defined (HAVE_SSE41)
run_test("test_s24_f32d", "sse41", true, false, conv_s24_to_f32d_sse41);
#endif
}
static void test_f32_s24_32(void)
{
run_test("test_f32_s24_32", true, true, conv_f32_to_s24_32);
run_test("test_f32d_s24_32", false, true, conv_f32d_to_s24_32);
run_test("test_f32_s24_32d", true, false, conv_f32_to_s24_32d);
run_test("test_f32_s24_32", "c", true, true, conv_f32_to_s24_32);
run_test("test_f32d_s24_32", "c", false, true, conv_f32d_to_s24_32);
run_test("test_f32_s24_32d", "c", true, false, conv_f32_to_s24_32d);
}
static void test_s24_32_f32(void)
{
run_test("test_s24_32_f32", true, true, conv_s24_32_to_f32);
run_test("test_s24_32d_f32", false, true, conv_s24_32d_to_f32);
run_test("test_s24_32_f32d", true, false, conv_s24_32_to_f32d);
run_test("test_s24_32_f32", "c", true, true, conv_s24_32_to_f32);
run_test("test_s24_32d_f32", "c", false, true, conv_s24_32d_to_f32);
run_test("test_s24_32_f32d", "c", true, false, conv_s24_32_to_f32d);
}
static void test_interleave(void)
{
run_test("test_interleave_8", false, true, interleave_8);
run_test("test_interleave_16", false, true, interleave_16);
run_test("test_interleave_24", false, true, interleave_24);
run_test("test_interleave_32", false, true, interleave_32);
run_test("test_interleave_8", "c", false, true, interleave_8);
run_test("test_interleave_16", "c", false, true, interleave_16);
run_test("test_interleave_24", "c", false, true, interleave_24);
run_test("test_interleave_32", "c", false, true, interleave_32);
}
static void test_deinterleave(void)
{
run_test("test_deinterleave_8", true, false, deinterleave_8);
run_test("test_deinterleave_16", true, false, deinterleave_16);
run_test("test_deinterleave_24", true, false, deinterleave_24);
run_test("test_deinterleave_32", true, false, deinterleave_32);
run_test("test_deinterleave_8", "c", true, false, deinterleave_8);
run_test("test_deinterleave_16", "c", true, false, deinterleave_16);
run_test("test_deinterleave_24", "c", true, false, deinterleave_24);
run_test("test_deinterleave_32", "c", true, false, deinterleave_32);
}
static int compare_func(const void *_a, const void *_b)
{
const struct stats *a = _a, *b = _b;
int diff;
if ((diff = strcmp(a->name, b->name)) != 0) return diff;
if ((diff = a->n_samples - b->n_samples) != 0) return diff;
if ((diff = a->n_channels - b->n_channels) != 0) return diff;
if ((diff = b->perf - a->perf) != 0) return diff;
return 0;
}
int main(int argc, char *argv[])
{
uint32_t i;
find_conv_info(0, 0, 0);
test_f32_u8();
@ -188,5 +236,14 @@ int main(int argc, char *argv[])
test_interleave();
test_deinterleave();
spa_assert(n_results <= MAX_RESULTS);
qsort(results, n_results, sizeof(struct stats), compare_func);
for (i = 0; i < n_results; i++) {
struct stats *s = &results[i];
fprintf(stderr, "%-12."PRIu64" \t%-32.32s %s \t samples %d, channels %d\n",
s->perf, s->name, s->impl, s->n_samples, s->n_channels);
}
return 0;
}

View file

@ -0,0 +1,183 @@
/* Spa
*
* Copyright © 2019 Wim Taymans
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <time.h>
#include "resample.h"
#include "resample-native.h"
#include "resample-speex.h"
#define MAX_SAMPLES 4096
#define MAX_CHANNELS 11
#define MAX_COUNT 200
struct stats {
uint32_t in_rate;
uint32_t out_rate;
uint32_t n_samples;
uint32_t n_channels;
uint64_t perf;
const char *name;
const char *impl;
};
static float samp_in[MAX_SAMPLES * MAX_CHANNELS];
static float samp_out[MAX_SAMPLES * MAX_CHANNELS];
static const int sample_sizes[] = { 0, 1, 128, 513, 4096 };
static const int in_rates[] = { 44100, 44100, 48000, 96000, 22050, 96000 };
static const int out_rates[] = { 44100, 48000, 44100, 48000, 48000, 44100 };
#define MAX_RESAMPLER 4
#define MAX_SIZES SPA_N_ELEMENTS(sample_sizes)
#define MAX_RATES SPA_N_ELEMENTS(in_rates)
static uint32_t n_results = 0;
static struct stats results[MAX_RESAMPLER * MAX_SIZES * MAX_RATES];
static void run_test1(const char *name, const char *impl, struct resample *r, int n_samples)
{
uint32_t i, j;
const void *ip[MAX_CHANNELS];
void *op[MAX_CHANNELS];
struct timespec ts;
uint64_t count, t1, t2;
uint32_t in_len, out_len;
for (j = 0; j < r->channels; j++) {
ip[j] = &samp_in[j * MAX_SAMPLES];
op[j] = &samp_out[j * MAX_SAMPLES];
}
clock_gettime(CLOCK_MONOTONIC, &ts);
t1 = SPA_TIMESPEC_TO_NSEC(&ts);
count = 0;
for (i = 0; i < MAX_COUNT; i++) {
in_len = n_samples;
out_len = MAX_SAMPLES;
resample_process(r, ip, &in_len, op, &out_len);
count++;
}
clock_gettime(CLOCK_MONOTONIC, &ts);
t2 = SPA_TIMESPEC_TO_NSEC(&ts);
results[n_results++] = (struct stats) {
.in_rate = r->i_rate,
.out_rate = r->o_rate,
.n_samples = n_samples,
.n_channels = r->channels,
.perf = count * (uint64_t)SPA_NSEC_PER_SEC / (t2 - t1),
.name = name,
.impl = impl
};
}
static void run_test(const char *name, const char *impl, struct resample *r)
{
size_t i;
for (i = 0; i < SPA_N_ELEMENTS(sample_sizes); i++)
run_test1(name, impl, r, sample_sizes[i]);
}
static int compare_func(const void *_a, const void *_b)
{
const struct stats *a = _a, *b = _b;
int diff;
if ((diff = a->in_rate - b->in_rate) != 0) return diff;
if ((diff = a->out_rate - b->out_rate) != 0) return diff;
if ((diff = a->n_samples - b->n_samples) != 0) return diff;
if ((diff = a->n_channels - b->n_channels) != 0) return diff;
if ((diff = b->perf - a->perf) != 0) return diff;
return 0;
}
int main(int argc, char *argv[])
{
struct resample r;
uint32_t i;
for (i = 0; i < SPA_N_ELEMENTS(in_rates); i++) {
spa_zero(r);
r.channels = 2;
r.cpu_flags = 0;
r.i_rate = in_rates[i];
r.o_rate = out_rates[i];
impl_native_init(&r);
run_test("native", "c", &r);
resample_free(&r);
}
#if defined (HAVE_SSE)
for (i = 0; i < SPA_N_ELEMENTS(in_rates); i++) {
spa_zero(r);
r.channels = 2;
r.cpu_flags = SPA_CPU_FLAG_SSE;
r.i_rate = in_rates[i];
r.o_rate = out_rates[i];
impl_native_init(&r);
run_test("native", "sse", &r);
resample_free(&r);
}
#endif
#if defined (HAVE_SSSE3)
for (i = 0; i < SPA_N_ELEMENTS(in_rates); i++) {
spa_zero(r);
r.channels = 2;
r.cpu_flags = SPA_CPU_FLAG_SSSE3 | SPA_CPU_FLAG_SLOW_UNALIGNED;
r.i_rate = in_rates[i];
r.o_rate = out_rates[i];
impl_native_init(&r);
run_test("native", "ssse3", &r);
resample_free(&r);
}
#endif
for (i = 0; i < SPA_N_ELEMENTS(in_rates); i++) {
spa_zero(r);
r.channels = 2;
r.i_rate = in_rates[i];
r.o_rate = out_rates[i];
impl_speex_init(&r);
run_test("speex", "def", &r);
resample_free(&r);
}
qsort(results, n_results, sizeof(struct stats), compare_func);
for (i = 0; i < n_results; i++) {
struct stats *s = &results[i];
fprintf(stderr, "%-12."PRIu64" \t%-16.16s %s \t%d->%d samples %d, channels %d\n",
s->perf, s->name, s->impl, s->in_rate, s->out_rate,
s->n_samples, s->n_channels);
}
return 0;
}

View file

@ -118,7 +118,7 @@ conv_s16_to_f32d_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RE
conv_s16_to_f32d_1_sse2(data, &dst[i], &s[i], n_channels, n_samples);
}
static void
void
conv_s24_to_f32d_1_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples)
{
const uint8_t *s = src;
@ -128,7 +128,7 @@ conv_s24_to_f32d_1_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_
__m128i in;
__m128 out, factor = _mm_set1_ps(1.0f / S24_SCALE);
if (SPA_IS_ALIGNED(d0, 16)) {
if (SPA_IS_ALIGNED(d0, 16) && n_samples > 0) {
unrolled = n_samples / 4;
if ((n_samples & 3) == 0)
unrolled--;
@ -167,7 +167,9 @@ conv_s24_to_f32d_2_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_
__m128i in[2];
__m128 out[2], factor = _mm_set1_ps(1.0f / S24_SCALE);
if (SPA_IS_ALIGNED(d0, 16)) {
if (SPA_IS_ALIGNED(d0, 16) &&
SPA_IS_ALIGNED(d1, 16) &&
n_samples > 0) {
unrolled = n_samples / 4;
if ((n_samples & 3) == 0)
unrolled--;
@ -224,7 +226,11 @@ conv_s24_to_f32d_4_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_
__m128i in[4];
__m128 out[4], factor = _mm_set1_ps(1.0f / S24_SCALE);
if (SPA_IS_ALIGNED(d0, 16)) {
if (SPA_IS_ALIGNED(d0, 16) &&
SPA_IS_ALIGNED(d1, 16) &&
SPA_IS_ALIGNED(d2, 16) &&
SPA_IS_ALIGNED(d3, 16) &&
n_samples > 0) {
unrolled = n_samples / 4;
if ((n_samples & 3) == 0)
unrolled--;
@ -418,8 +424,7 @@ conv_f32d_to_s32_4_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_RE
if (SPA_IS_ALIGNED(s0, 16) &&
SPA_IS_ALIGNED(s1, 16) &&
SPA_IS_ALIGNED(s2, 16) &&
SPA_IS_ALIGNED(s3, 16) &&
SPA_IS_ALIGNED(d, 16))
SPA_IS_ALIGNED(s3, 16))
unrolled = n_samples / 4;
else
unrolled = 0;
@ -442,10 +447,10 @@ conv_f32d_to_s32_4_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_RE
out[2] = _mm_cvtps_epi32(in[2]);
out[3] = _mm_cvtps_epi32(in[3]);
_mm_store_si128((__m128i*)(d + 0*n_channels), out[0]);
_mm_store_si128((__m128i*)(d + 1*n_channels), out[1]);
_mm_store_si128((__m128i*)(d + 2*n_channels), out[2]);
_mm_store_si128((__m128i*)(d + 3*n_channels), out[3]);
_mm_storeu_si128((__m128i*)(d + 0*n_channels), out[0]);
_mm_storeu_si128((__m128i*)(d + 1*n_channels), out[1]);
_mm_storeu_si128((__m128i*)(d + 2*n_channels), out[2]);
_mm_storeu_si128((__m128i*)(d + 3*n_channels), out[3]);
d += 4*n_channels;
}
for(; n < n_samples; n++) {

View file

@ -38,7 +38,10 @@ conv_s24_to_f32d_4_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA
const __m128i mask = _mm_setr_epi8(-1, 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11);
//const __m128i mask = _mm_set_epi8(15, 14, 13, -1, 12, 11, 10, -1, 9, 8, 7, -1, 6, 5, 4, -1);
if (SPA_IS_ALIGNED(d0, 16))
if (SPA_IS_ALIGNED(d0, 16) &&
SPA_IS_ALIGNED(d1, 16) &&
SPA_IS_ALIGNED(d2, 16) &&
SPA_IS_ALIGNED(d3, 16))
unrolled = n_samples / 4;
else
unrolled = 0;
@ -90,8 +93,8 @@ conv_s24_to_f32d_4_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA
}
}
extern void conv_s24_to_f32d_2_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples);
extern void conv_s24_to_f32d_1_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples);
void
conv_s24_to_f32d_1_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples);
void
conv_s24_to_f32d_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
@ -101,10 +104,6 @@ conv_s24_to_f32d_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA_R
for(; i + 3 < n_channels; i += 4)
conv_s24_to_f32d_4_ssse3(data, &dst[i], &s[3*i], n_channels, n_samples);
#if defined (HAVE_SSE2)
for(; i + 1 < n_channels; i += 2)
conv_s24_to_f32d_2_sse2(data, &dst[i], &s[3*i], n_channels, n_samples);
for(; i < n_channels; i++)
conv_s24_to_f32d_1_sse2(data, &dst[i], &s[3*i], n_channels, n_samples);
#endif
}

View file

@ -79,14 +79,16 @@ endforeach
benchmark_apps = [
'benchmark-fmt-ops',
'benchmark-resample',
]
foreach a : benchmark_apps
benchmark(a,
executable(a, a + '.c',
dependencies : [dl_lib, pthread_lib, mathlib ],
dependencies : [dl_lib, pthread_lib, mathlib, speexdsp_dep, ],
include_directories : [spa_inc ],
c_args : [ '-D_GNU_SOURCE' ],
c_args : [ simd_cargs, '-D_GNU_SOURCE' ],
link_with : simd_dependencies,
install : false),
env : [
'SPA_PLUGIN_DIR=@0@/spa/plugins/'.format(meson.build_root()),

View file

@ -119,11 +119,11 @@ static void impl_native_update_rate(struct resample *r, double rate)
data->func = is_full ? do_resample_full_c : do_resample_inter_c;
#if defined (HAVE_SSE)
if (r->cpu_flags & SPA_CPU_FLAG_SSE)
if (SPA_FLAG_CHECK(r->cpu_flags, SPA_CPU_FLAG_SSE))
data->func = is_full ? do_resample_full_sse : do_resample_inter_sse;
#endif
#if defined (HAVE_SSSE3)
if (r->cpu_flags & SPA_CPU_FLAG_SSSE3)
if (SPA_FLAG_CHECK(r->cpu_flags, SPA_CPU_FLAG_SSSE3 | SPA_CPU_FLAG_SLOW_UNALIGNED))
data->func = is_full ? do_resample_full_ssse3 : do_resample_inter_ssse3;
#endif
}
@ -276,10 +276,8 @@ static int impl_native_init(struct resample *r)
d->n_phases = n_phases;
d->in_rate = in_rate;
d->out_rate = out_rate;
d->filter = SPA_MEMBER(d, sizeof(struct native_data), float);
d->filter = SPA_PTR_ALIGN(d->filter, 64, float);
d->hist_mem = SPA_MEMBER(d->filter, filter_size, float);
d->hist_mem = SPA_PTR_ALIGN(d->hist_mem, 64, float);
d->filter = SPA_MEMBER_ALIGN(d, sizeof(struct native_data), 64, float);
d->hist_mem = SPA_MEMBER_ALIGN(d->filter, filter_size, 64, float);
d->history = SPA_MEMBER(d->hist_mem, history_size, float*);
d->filter_stride = filter_stride / sizeof(float);
d->filter_stride_os = d->filter_stride * oversample;