mirror of
https://gitlab.freedesktop.org/pipewire/pipewire.git
synced 2025-11-02 09:01:50 -05:00
audioconvert: improve benchmark
Also include the simd versions in the benchmark Fix some issues found by new test
This commit is contained in:
parent
c8d3d475bb
commit
7f041f4098
8 changed files with 350 additions and 101 deletions
|
|
@ -33,38 +33,42 @@ extern "C" {
|
||||||
|
|
||||||
#include <spa/utils/defs.h>
|
#include <spa/utils/defs.h>
|
||||||
|
|
||||||
#define SPA_CPU_FLAG_MMX (1<<0) /**< standard MMX */
|
/* x86 specific */
|
||||||
#define SPA_CPU_FLAG_MMXEXT (1<<1) /**< SSE integer or AMD MMX ext */
|
#define SPA_CPU_FLAG_MMX (1<<0) /**< standard MMX */
|
||||||
#define SPA_CPU_FLAG_3DNOW (1<<2) /**< AMD 3DNOW */
|
#define SPA_CPU_FLAG_MMXEXT (1<<1) /**< SSE integer or AMD MMX ext */
|
||||||
#define SPA_CPU_FLAG_SSE (1<<3) /**< SSE */
|
#define SPA_CPU_FLAG_3DNOW (1<<2) /**< AMD 3DNOW */
|
||||||
#define SPA_CPU_FLAG_SSE2 (1<<4) /**< SSE2 */
|
#define SPA_CPU_FLAG_SSE (1<<3) /**< SSE */
|
||||||
#define SPA_CPU_FLAG_3DNOWEXT (1<<5) /**< AMD 3DNowExt */
|
#define SPA_CPU_FLAG_SSE2 (1<<4) /**< SSE2 */
|
||||||
#define SPA_CPU_FLAG_SSE3 (1<<6) /**< Prescott SSE3 */
|
#define SPA_CPU_FLAG_3DNOWEXT (1<<5) /**< AMD 3DNowExt */
|
||||||
#define SPA_CPU_FLAG_SSSE3 (1<<7) /**< Conroe SSSE3 */
|
#define SPA_CPU_FLAG_SSE3 (1<<6) /**< Prescott SSE3 */
|
||||||
#define SPA_CPU_FLAG_SSE41 (1<<8) /**< Penryn SSE4.1 */
|
#define SPA_CPU_FLAG_SSSE3 (1<<7) /**< Conroe SSSE3 */
|
||||||
#define SPA_CPU_FLAG_SSE42 (1<<9) /**< Nehalem SSE4.2 */
|
#define SPA_CPU_FLAG_SSE41 (1<<8) /**< Penryn SSE4.1 */
|
||||||
#define SPA_CPU_FLAG_AESNI (1<<10) /**< Advanced Encryption Standard */
|
#define SPA_CPU_FLAG_SSE42 (1<<9) /**< Nehalem SSE4.2 */
|
||||||
#define SPA_CPU_FLAG_AVX (1<<11) /**< AVX */
|
#define SPA_CPU_FLAG_AESNI (1<<10) /**< Advanced Encryption Standard */
|
||||||
#define SPA_CPU_FLAG_XOP (1<<12) /**< Bulldozer XOP */
|
#define SPA_CPU_FLAG_AVX (1<<11) /**< AVX */
|
||||||
#define SPA_CPU_FLAG_FMA4 (1<<13) /**< Bulldozer FMA4 */
|
#define SPA_CPU_FLAG_XOP (1<<12) /**< Bulldozer XOP */
|
||||||
#define SPA_CPU_FLAG_CMOV (1<<14) /**< supports cmov */
|
#define SPA_CPU_FLAG_FMA4 (1<<13) /**< Bulldozer FMA4 */
|
||||||
#define SPA_CPU_FLAG_AVX2 (1<<15) /**< AVX2 */
|
#define SPA_CPU_FLAG_CMOV (1<<14) /**< supports cmov */
|
||||||
#define SPA_CPU_FLAG_FMA3 (1<<16) /**< Haswell FMA3 */
|
#define SPA_CPU_FLAG_AVX2 (1<<15) /**< AVX2 */
|
||||||
#define SPA_CPU_FLAG_BMI1 (1<<17) /**< Bit Manipulation Instruction Set 1 */
|
#define SPA_CPU_FLAG_FMA3 (1<<16) /**< Haswell FMA3 */
|
||||||
#define SPA_CPU_FLAG_BMI2 (1<<18) /**< Bit Manipulation Instruction Set 2 */
|
#define SPA_CPU_FLAG_BMI1 (1<<17) /**< Bit Manipulation Instruction Set 1 */
|
||||||
#define SPA_CPU_FLAG_AVX512 (1<<19) /**< AVX-512 */
|
#define SPA_CPU_FLAG_BMI2 (1<<18) /**< Bit Manipulation Instruction Set 2 */
|
||||||
|
#define SPA_CPU_FLAG_AVX512 (1<<19) /**< AVX-512 */
|
||||||
|
#define SPA_CPU_FLAG_SLOW_UNALIGNED (1<<20) /**< unaligned loads/stores are slow */
|
||||||
|
|
||||||
#define SPA_CPU_FLAG_ALTIVEC (1<<0) /**< standard */
|
/* PPC specific */
|
||||||
#define SPA_CPU_FLAG_VSX (1<<1) /**< ISA 2.06 */
|
#define SPA_CPU_FLAG_ALTIVEC (1<<0) /**< standard */
|
||||||
#define SPA_CPU_FLAG_POWER8 (1<<2) /**< ISA 2.07 */
|
#define SPA_CPU_FLAG_VSX (1<<1) /**< ISA 2.06 */
|
||||||
|
#define SPA_CPU_FLAG_POWER8 (1<<2) /**< ISA 2.07 */
|
||||||
|
|
||||||
#define SPA_CPU_FLAG_ARMV5TE (1 << 0)
|
/* ARM specific */
|
||||||
#define SPA_CPU_FLAG_ARMV6 (1 << 1)
|
#define SPA_CPU_FLAG_ARMV5TE (1 << 0)
|
||||||
#define SPA_CPU_FLAG_ARMV6T2 (1 << 2)
|
#define SPA_CPU_FLAG_ARMV6 (1 << 1)
|
||||||
#define SPA_CPU_FLAG_VFP (1 << 3)
|
#define SPA_CPU_FLAG_ARMV6T2 (1 << 2)
|
||||||
#define SPA_CPU_FLAG_VFPV3 (1 << 4)
|
#define SPA_CPU_FLAG_VFP (1 << 3)
|
||||||
#define SPA_CPU_FLAG_NEON (1 << 5)
|
#define SPA_CPU_FLAG_VFPV3 (1 << 4)
|
||||||
#define SPA_CPU_FLAG_ARMV8 (1 << 6)
|
#define SPA_CPU_FLAG_NEON (1 << 5)
|
||||||
|
#define SPA_CPU_FLAG_ARMV8 (1 << 6)
|
||||||
|
|
||||||
#define SPA_CPU_FORCE_AUTODETECT ((uint32_t)-1)
|
#define SPA_CPU_FORCE_AUTODETECT ((uint32_t)-1)
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -121,6 +121,7 @@ struct spa_param_info {
|
||||||
})
|
})
|
||||||
|
|
||||||
#define SPA_MEMBER(b,o,t) ((t*)((uint8_t*)(b) + (int)(o)))
|
#define SPA_MEMBER(b,o,t) ((t*)((uint8_t*)(b) + (int)(o)))
|
||||||
|
#define SPA_MEMBER_ALIGN(b,o,a,t) SPA_PTR_ALIGN(SPA_MEMBER(b,o,t),a,t)
|
||||||
|
|
||||||
#define SPA_CONTAINER_OF(p,t,m) (t*)((uint8_t*)p - offsetof (t,m))
|
#define SPA_CONTAINER_OF(p,t,m) (t*)((uint8_t*)p - offsetof (t,m))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,14 @@
|
||||||
|
|
||||||
#include "fmt-ops.c"
|
#include "fmt-ops.c"
|
||||||
|
|
||||||
|
struct stats {
|
||||||
|
uint32_t n_samples;
|
||||||
|
uint32_t n_channels;
|
||||||
|
uint64_t perf;
|
||||||
|
const char *name;
|
||||||
|
const char *impl;
|
||||||
|
};
|
||||||
|
|
||||||
#define MAX_SAMPLES 4096
|
#define MAX_SAMPLES 4096
|
||||||
#define MAX_CHANNELS 11
|
#define MAX_CHANNELS 11
|
||||||
|
|
||||||
|
|
@ -42,8 +50,13 @@ static uint8_t samp_out[MAX_SAMPLES * MAX_CHANNELS * 4];
|
||||||
static const int sample_sizes[] = { 0, 1, 128, 513, 4096 };
|
static const int sample_sizes[] = { 0, 1, 128, 513, 4096 };
|
||||||
static const int channel_counts[] = { 1, 2, 4, 6, 8, 11 };
|
static const int channel_counts[] = { 1, 2, 4, 6, 8, 11 };
|
||||||
|
|
||||||
static void run_test1(const char *name, bool in_packed, bool out_packed, convert_func_t func,
|
#define MAX_RESULTS SPA_N_ELEMENTS(sample_sizes) * SPA_N_ELEMENTS(channel_counts) * 60
|
||||||
int n_channels, int n_samples)
|
|
||||||
|
static uint32_t n_results = 0;
|
||||||
|
static struct stats results[MAX_RESULTS];
|
||||||
|
|
||||||
|
static void run_test1(const char *name, const char *impl, bool in_packed, bool out_packed,
|
||||||
|
convert_func_t func, int n_channels, int n_samples)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
const void *ip[n_channels];
|
const void *ip[n_channels];
|
||||||
|
|
@ -67,18 +80,22 @@ static void run_test1(const char *name, bool in_packed, bool out_packed, convert
|
||||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||||
t2 = SPA_TIMESPEC_TO_NSEC(&ts);
|
t2 = SPA_TIMESPEC_TO_NSEC(&ts);
|
||||||
|
|
||||||
fprintf(stderr, "%s: samples %d, channels %d: elapsed %"PRIu64" count %"
|
results[n_results++] = (struct stats) {
|
||||||
PRIu64" = %"PRIu64"/sec\n", name, n_samples, n_channels,
|
.n_samples = n_samples,
|
||||||
t2 - t1, count, count * (uint64_t)SPA_NSEC_PER_SEC / (t2 - t1));
|
.n_channels = n_channels,
|
||||||
|
.perf = count * (uint64_t)SPA_NSEC_PER_SEC / (t2 - t1),
|
||||||
|
.name = name,
|
||||||
|
.impl = impl
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
static void run_test(const char *name, bool in_packed, bool out_packed, convert_func_t func)
|
static void run_test(const char *name, const char *impl, bool in_packed, bool out_packed, convert_func_t func)
|
||||||
{
|
{
|
||||||
size_t i, j;
|
size_t i, j;
|
||||||
|
|
||||||
for (i = 0; i < SPA_N_ELEMENTS(sample_sizes); i++) {
|
for (i = 0; i < SPA_N_ELEMENTS(sample_sizes); i++) {
|
||||||
for (j = 0; j < SPA_N_ELEMENTS(channel_counts); j++) {
|
for (j = 0; j < SPA_N_ELEMENTS(channel_counts); j++) {
|
||||||
run_test1(name, in_packed, out_packed, func, channel_counts[j],
|
run_test1(name, impl, in_packed, out_packed, func, channel_counts[j],
|
||||||
(sample_sizes[i] + (channel_counts[j] -1)) / channel_counts[j]);
|
(sample_sizes[i] + (channel_counts[j] -1)) / channel_counts[j]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -86,93 +103,124 @@ static void run_test(const char *name, bool in_packed, bool out_packed, convert_
|
||||||
|
|
||||||
static void test_f32_u8(void)
|
static void test_f32_u8(void)
|
||||||
{
|
{
|
||||||
run_test("test_f32_u8", true, true, conv_f32_to_u8);
|
run_test("test_f32_u8", "c", true, true, conv_f32_to_u8);
|
||||||
run_test("test_f32d_u8", false, true, conv_f32d_to_u8);
|
run_test("test_f32d_u8", "c", false, true, conv_f32d_to_u8);
|
||||||
run_test("test_f32_u8d", true, false, conv_f32_to_u8d);
|
run_test("test_f32_u8d", "c", true, false, conv_f32_to_u8d);
|
||||||
run_test("test_f32d_u8d", false, false, conv_f32d_to_u8d);
|
run_test("test_f32d_u8d", "c", false, false, conv_f32d_to_u8d);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void test_u8_f32(void)
|
static void test_u8_f32(void)
|
||||||
{
|
{
|
||||||
run_test("test_u8_f32", true, true, conv_u8_to_f32);
|
run_test("test_u8_f32", "c", true, true, conv_u8_to_f32);
|
||||||
run_test("test_u8d_f32", false, true, conv_u8d_to_f32);
|
run_test("test_u8d_f32", "c", false, true, conv_u8d_to_f32);
|
||||||
run_test("test_u8_f32d", true, false, conv_u8_to_f32d);
|
run_test("test_u8_f32d", "c", true, false, conv_u8_to_f32d);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void test_f32_s16(void)
|
static void test_f32_s16(void)
|
||||||
{
|
{
|
||||||
run_test("test_f32_s16", true, true, conv_f32_to_s16);
|
run_test("test_f32_s16", "c", true, true, conv_f32_to_s16);
|
||||||
run_test("test_f32d_s16", false, true, conv_f32d_to_s16);
|
run_test("test_f32d_s16", "c", false, true, conv_f32d_to_s16);
|
||||||
run_test("test_f32_s16d", true, false, conv_f32_to_s16d);
|
#if defined (HAVE_SSE2)
|
||||||
|
run_test("test_f32d_s16", "sse2", false, true, conv_f32d_to_s16_sse2);
|
||||||
|
#endif
|
||||||
|
run_test("test_f32_s16d", "c", true, false, conv_f32_to_s16d);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void test_s16_f32(void)
|
static void test_s16_f32(void)
|
||||||
{
|
{
|
||||||
run_test("test_s16_f32", true, true, conv_s16_to_f32);
|
run_test("test_s16_f32", "c", true, true, conv_s16_to_f32);
|
||||||
run_test("test_s16d_f32", false, true, conv_s16d_to_f32);
|
run_test("test_s16d_f32", "c", false, true, conv_s16d_to_f32);
|
||||||
run_test("test_s16_f32d", true, false, conv_s16_to_f32d);
|
run_test("test_s16_f32d", "c", true, false, conv_s16_to_f32d);
|
||||||
|
#if defined (HAVE_SSE2)
|
||||||
|
run_test("test_s16_f32d", "sse2", true, false, conv_s16_to_f32d_sse2);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static void test_f32_s32(void)
|
static void test_f32_s32(void)
|
||||||
{
|
{
|
||||||
run_test("test_f32_s32", true, true, conv_f32_to_s32);
|
run_test("test_f32_s32", "c", true, true, conv_f32_to_s32);
|
||||||
run_test("test_f32d_s32", false, true, conv_f32d_to_s32);
|
run_test("test_f32d_s32", "c", false, true, conv_f32d_to_s32);
|
||||||
run_test("test_f32_s32d", true, false, conv_f32_to_s32d);
|
#if defined (HAVE_SSE2)
|
||||||
|
run_test("test_f32d_s32", "sse2", false, true, conv_f32d_to_s32_sse2);
|
||||||
|
#endif
|
||||||
|
run_test("test_f32_s32d", "c", true, false, conv_f32_to_s32d);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void test_s32_f32(void)
|
static void test_s32_f32(void)
|
||||||
{
|
{
|
||||||
run_test("test_s32_f32", true, true, conv_s32_to_f32);
|
run_test("test_s32_f32", "c", true, true, conv_s32_to_f32);
|
||||||
run_test("test_s32d_f32", false, true, conv_s32d_to_f32);
|
run_test("test_s32d_f32", "c", false, true, conv_s32d_to_f32);
|
||||||
run_test("test_s32_f32d", true, false, conv_s32_to_f32d);
|
run_test("test_s32_f32d", "c", true, false, conv_s32_to_f32d);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void test_f32_s24(void)
|
static void test_f32_s24(void)
|
||||||
{
|
{
|
||||||
run_test("test_f32_s24", true, true, conv_f32_to_s24);
|
run_test("test_f32_s24", "c", true, true, conv_f32_to_s24);
|
||||||
run_test("test_f32d_s24", false, true, conv_f32d_to_s24);
|
run_test("test_f32d_s24", "c", false, true, conv_f32d_to_s24);
|
||||||
run_test("test_f32_s24d", true, false, conv_f32_to_s24d);
|
run_test("test_f32_s24d", "c", true, false, conv_f32_to_s24d);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void test_s24_f32(void)
|
static void test_s24_f32(void)
|
||||||
{
|
{
|
||||||
run_test("test_s24_f32", true, true, conv_s24_to_f32);
|
run_test("test_s24_f32", "c", true, true, conv_s24_to_f32);
|
||||||
run_test("test_s24d_f32", false, true, conv_s24d_to_f32);
|
run_test("test_s24d_f32", "c", false, true, conv_s24d_to_f32);
|
||||||
run_test("test_s24_f32d", true, false, conv_s24_to_f32d);
|
run_test("test_s24_f32d", "c", true, false, conv_s24_to_f32d);
|
||||||
|
#if defined (HAVE_SSE2)
|
||||||
|
run_test("test_s24_f32d", "sse2", true, false, conv_s24_to_f32d_sse2);
|
||||||
|
#endif
|
||||||
|
#if defined (HAVE_SSSE3)
|
||||||
|
run_test("test_s24_f32d", "ssse3", true, false, conv_s24_to_f32d_ssse3);
|
||||||
|
#endif
|
||||||
|
#if defined (HAVE_SSE41)
|
||||||
|
run_test("test_s24_f32d", "sse41", true, false, conv_s24_to_f32d_sse41);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static void test_f32_s24_32(void)
|
static void test_f32_s24_32(void)
|
||||||
{
|
{
|
||||||
run_test("test_f32_s24_32", true, true, conv_f32_to_s24_32);
|
run_test("test_f32_s24_32", "c", true, true, conv_f32_to_s24_32);
|
||||||
run_test("test_f32d_s24_32", false, true, conv_f32d_to_s24_32);
|
run_test("test_f32d_s24_32", "c", false, true, conv_f32d_to_s24_32);
|
||||||
run_test("test_f32_s24_32d", true, false, conv_f32_to_s24_32d);
|
run_test("test_f32_s24_32d", "c", true, false, conv_f32_to_s24_32d);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void test_s24_32_f32(void)
|
static void test_s24_32_f32(void)
|
||||||
{
|
{
|
||||||
run_test("test_s24_32_f32", true, true, conv_s24_32_to_f32);
|
run_test("test_s24_32_f32", "c", true, true, conv_s24_32_to_f32);
|
||||||
run_test("test_s24_32d_f32", false, true, conv_s24_32d_to_f32);
|
run_test("test_s24_32d_f32", "c", false, true, conv_s24_32d_to_f32);
|
||||||
run_test("test_s24_32_f32d", true, false, conv_s24_32_to_f32d);
|
run_test("test_s24_32_f32d", "c", true, false, conv_s24_32_to_f32d);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void test_interleave(void)
|
static void test_interleave(void)
|
||||||
{
|
{
|
||||||
run_test("test_interleave_8", false, true, interleave_8);
|
run_test("test_interleave_8", "c", false, true, interleave_8);
|
||||||
run_test("test_interleave_16", false, true, interleave_16);
|
run_test("test_interleave_16", "c", false, true, interleave_16);
|
||||||
run_test("test_interleave_24", false, true, interleave_24);
|
run_test("test_interleave_24", "c", false, true, interleave_24);
|
||||||
run_test("test_interleave_32", false, true, interleave_32);
|
run_test("test_interleave_32", "c", false, true, interleave_32);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void test_deinterleave(void)
|
static void test_deinterleave(void)
|
||||||
{
|
{
|
||||||
run_test("test_deinterleave_8", true, false, deinterleave_8);
|
run_test("test_deinterleave_8", "c", true, false, deinterleave_8);
|
||||||
run_test("test_deinterleave_16", true, false, deinterleave_16);
|
run_test("test_deinterleave_16", "c", true, false, deinterleave_16);
|
||||||
run_test("test_deinterleave_24", true, false, deinterleave_24);
|
run_test("test_deinterleave_24", "c", true, false, deinterleave_24);
|
||||||
run_test("test_deinterleave_32", true, false, deinterleave_32);
|
run_test("test_deinterleave_32", "c", true, false, deinterleave_32);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int compare_func(const void *_a, const void *_b)
|
||||||
|
{
|
||||||
|
const struct stats *a = _a, *b = _b;
|
||||||
|
int diff;
|
||||||
|
if ((diff = strcmp(a->name, b->name)) != 0) return diff;
|
||||||
|
if ((diff = a->n_samples - b->n_samples) != 0) return diff;
|
||||||
|
if ((diff = a->n_channels - b->n_channels) != 0) return diff;
|
||||||
|
if ((diff = b->perf - a->perf) != 0) return diff;
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
|
uint32_t i;
|
||||||
|
|
||||||
find_conv_info(0, 0, 0);
|
find_conv_info(0, 0, 0);
|
||||||
|
|
||||||
test_f32_u8();
|
test_f32_u8();
|
||||||
|
|
@ -188,5 +236,14 @@ int main(int argc, char *argv[])
|
||||||
test_interleave();
|
test_interleave();
|
||||||
test_deinterleave();
|
test_deinterleave();
|
||||||
|
|
||||||
|
spa_assert(n_results <= MAX_RESULTS);
|
||||||
|
|
||||||
|
qsort(results, n_results, sizeof(struct stats), compare_func);
|
||||||
|
|
||||||
|
for (i = 0; i < n_results; i++) {
|
||||||
|
struct stats *s = &results[i];
|
||||||
|
fprintf(stderr, "%-12."PRIu64" \t%-32.32s %s \t samples %d, channels %d\n",
|
||||||
|
s->perf, s->name, s->impl, s->n_samples, s->n_channels);
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
183
spa/plugins/audioconvert/benchmark-resample.c
Normal file
183
spa/plugins/audioconvert/benchmark-resample.c
Normal file
|
|
@ -0,0 +1,183 @@
|
||||||
|
/* Spa
|
||||||
|
*
|
||||||
|
* Copyright © 2019 Wim Taymans
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <time.h>
|
||||||
|
|
||||||
|
#include "resample.h"
|
||||||
|
#include "resample-native.h"
|
||||||
|
#include "resample-speex.h"
|
||||||
|
|
||||||
|
#define MAX_SAMPLES 4096
|
||||||
|
#define MAX_CHANNELS 11
|
||||||
|
|
||||||
|
#define MAX_COUNT 200
|
||||||
|
|
||||||
|
struct stats {
|
||||||
|
uint32_t in_rate;
|
||||||
|
uint32_t out_rate;
|
||||||
|
uint32_t n_samples;
|
||||||
|
uint32_t n_channels;
|
||||||
|
uint64_t perf;
|
||||||
|
const char *name;
|
||||||
|
const char *impl;
|
||||||
|
};
|
||||||
|
|
||||||
|
static float samp_in[MAX_SAMPLES * MAX_CHANNELS];
|
||||||
|
static float samp_out[MAX_SAMPLES * MAX_CHANNELS];
|
||||||
|
|
||||||
|
static const int sample_sizes[] = { 0, 1, 128, 513, 4096 };
|
||||||
|
static const int in_rates[] = { 44100, 44100, 48000, 96000, 22050, 96000 };
|
||||||
|
static const int out_rates[] = { 44100, 48000, 44100, 48000, 48000, 44100 };
|
||||||
|
|
||||||
|
|
||||||
|
#define MAX_RESAMPLER 4
|
||||||
|
#define MAX_SIZES SPA_N_ELEMENTS(sample_sizes)
|
||||||
|
#define MAX_RATES SPA_N_ELEMENTS(in_rates)
|
||||||
|
|
||||||
|
static uint32_t n_results = 0;
|
||||||
|
static struct stats results[MAX_RESAMPLER * MAX_SIZES * MAX_RATES];
|
||||||
|
|
||||||
|
static void run_test1(const char *name, const char *impl, struct resample *r, int n_samples)
|
||||||
|
{
|
||||||
|
uint32_t i, j;
|
||||||
|
const void *ip[MAX_CHANNELS];
|
||||||
|
void *op[MAX_CHANNELS];
|
||||||
|
struct timespec ts;
|
||||||
|
uint64_t count, t1, t2;
|
||||||
|
uint32_t in_len, out_len;
|
||||||
|
|
||||||
|
for (j = 0; j < r->channels; j++) {
|
||||||
|
ip[j] = &samp_in[j * MAX_SAMPLES];
|
||||||
|
op[j] = &samp_out[j * MAX_SAMPLES];
|
||||||
|
}
|
||||||
|
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||||
|
t1 = SPA_TIMESPEC_TO_NSEC(&ts);
|
||||||
|
|
||||||
|
count = 0;
|
||||||
|
for (i = 0; i < MAX_COUNT; i++) {
|
||||||
|
in_len = n_samples;
|
||||||
|
out_len = MAX_SAMPLES;
|
||||||
|
resample_process(r, ip, &in_len, op, &out_len);
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||||
|
t2 = SPA_TIMESPEC_TO_NSEC(&ts);
|
||||||
|
|
||||||
|
results[n_results++] = (struct stats) {
|
||||||
|
.in_rate = r->i_rate,
|
||||||
|
.out_rate = r->o_rate,
|
||||||
|
.n_samples = n_samples,
|
||||||
|
.n_channels = r->channels,
|
||||||
|
.perf = count * (uint64_t)SPA_NSEC_PER_SEC / (t2 - t1),
|
||||||
|
.name = name,
|
||||||
|
.impl = impl
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static void run_test(const char *name, const char *impl, struct resample *r)
|
||||||
|
{
|
||||||
|
size_t i;
|
||||||
|
for (i = 0; i < SPA_N_ELEMENTS(sample_sizes); i++)
|
||||||
|
run_test1(name, impl, r, sample_sizes[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int compare_func(const void *_a, const void *_b)
|
||||||
|
{
|
||||||
|
const struct stats *a = _a, *b = _b;
|
||||||
|
int diff;
|
||||||
|
|
||||||
|
if ((diff = a->in_rate - b->in_rate) != 0) return diff;
|
||||||
|
if ((diff = a->out_rate - b->out_rate) != 0) return diff;
|
||||||
|
if ((diff = a->n_samples - b->n_samples) != 0) return diff;
|
||||||
|
if ((diff = a->n_channels - b->n_channels) != 0) return diff;
|
||||||
|
if ((diff = b->perf - a->perf) != 0) return diff;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
struct resample r;
|
||||||
|
uint32_t i;
|
||||||
|
|
||||||
|
for (i = 0; i < SPA_N_ELEMENTS(in_rates); i++) {
|
||||||
|
spa_zero(r);
|
||||||
|
r.channels = 2;
|
||||||
|
r.cpu_flags = 0;
|
||||||
|
r.i_rate = in_rates[i];
|
||||||
|
r.o_rate = out_rates[i];
|
||||||
|
impl_native_init(&r);
|
||||||
|
run_test("native", "c", &r);
|
||||||
|
resample_free(&r);
|
||||||
|
}
|
||||||
|
#if defined (HAVE_SSE)
|
||||||
|
for (i = 0; i < SPA_N_ELEMENTS(in_rates); i++) {
|
||||||
|
spa_zero(r);
|
||||||
|
r.channels = 2;
|
||||||
|
r.cpu_flags = SPA_CPU_FLAG_SSE;
|
||||||
|
r.i_rate = in_rates[i];
|
||||||
|
r.o_rate = out_rates[i];
|
||||||
|
impl_native_init(&r);
|
||||||
|
run_test("native", "sse", &r);
|
||||||
|
resample_free(&r);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if defined (HAVE_SSSE3)
|
||||||
|
for (i = 0; i < SPA_N_ELEMENTS(in_rates); i++) {
|
||||||
|
spa_zero(r);
|
||||||
|
r.channels = 2;
|
||||||
|
r.cpu_flags = SPA_CPU_FLAG_SSSE3 | SPA_CPU_FLAG_SLOW_UNALIGNED;
|
||||||
|
r.i_rate = in_rates[i];
|
||||||
|
r.o_rate = out_rates[i];
|
||||||
|
impl_native_init(&r);
|
||||||
|
run_test("native", "ssse3", &r);
|
||||||
|
resample_free(&r);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for (i = 0; i < SPA_N_ELEMENTS(in_rates); i++) {
|
||||||
|
spa_zero(r);
|
||||||
|
r.channels = 2;
|
||||||
|
r.i_rate = in_rates[i];
|
||||||
|
r.o_rate = out_rates[i];
|
||||||
|
impl_speex_init(&r);
|
||||||
|
run_test("speex", "def", &r);
|
||||||
|
resample_free(&r);
|
||||||
|
}
|
||||||
|
|
||||||
|
qsort(results, n_results, sizeof(struct stats), compare_func);
|
||||||
|
|
||||||
|
for (i = 0; i < n_results; i++) {
|
||||||
|
struct stats *s = &results[i];
|
||||||
|
fprintf(stderr, "%-12."PRIu64" \t%-16.16s %s \t%d->%d samples %d, channels %d\n",
|
||||||
|
s->perf, s->name, s->impl, s->in_rate, s->out_rate,
|
||||||
|
s->n_samples, s->n_channels);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -118,7 +118,7 @@ conv_s16_to_f32d_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RE
|
||||||
conv_s16_to_f32d_1_sse2(data, &dst[i], &s[i], n_channels, n_samples);
|
conv_s16_to_f32d_1_sse2(data, &dst[i], &s[i], n_channels, n_samples);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
void
|
||||||
conv_s24_to_f32d_1_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples)
|
conv_s24_to_f32d_1_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples)
|
||||||
{
|
{
|
||||||
const uint8_t *s = src;
|
const uint8_t *s = src;
|
||||||
|
|
@ -128,7 +128,7 @@ conv_s24_to_f32d_1_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_
|
||||||
__m128i in;
|
__m128i in;
|
||||||
__m128 out, factor = _mm_set1_ps(1.0f / S24_SCALE);
|
__m128 out, factor = _mm_set1_ps(1.0f / S24_SCALE);
|
||||||
|
|
||||||
if (SPA_IS_ALIGNED(d0, 16)) {
|
if (SPA_IS_ALIGNED(d0, 16) && n_samples > 0) {
|
||||||
unrolled = n_samples / 4;
|
unrolled = n_samples / 4;
|
||||||
if ((n_samples & 3) == 0)
|
if ((n_samples & 3) == 0)
|
||||||
unrolled--;
|
unrolled--;
|
||||||
|
|
@ -167,7 +167,9 @@ conv_s24_to_f32d_2_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_
|
||||||
__m128i in[2];
|
__m128i in[2];
|
||||||
__m128 out[2], factor = _mm_set1_ps(1.0f / S24_SCALE);
|
__m128 out[2], factor = _mm_set1_ps(1.0f / S24_SCALE);
|
||||||
|
|
||||||
if (SPA_IS_ALIGNED(d0, 16)) {
|
if (SPA_IS_ALIGNED(d0, 16) &&
|
||||||
|
SPA_IS_ALIGNED(d1, 16) &&
|
||||||
|
n_samples > 0) {
|
||||||
unrolled = n_samples / 4;
|
unrolled = n_samples / 4;
|
||||||
if ((n_samples & 3) == 0)
|
if ((n_samples & 3) == 0)
|
||||||
unrolled--;
|
unrolled--;
|
||||||
|
|
@ -224,7 +226,11 @@ conv_s24_to_f32d_4_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_
|
||||||
__m128i in[4];
|
__m128i in[4];
|
||||||
__m128 out[4], factor = _mm_set1_ps(1.0f / S24_SCALE);
|
__m128 out[4], factor = _mm_set1_ps(1.0f / S24_SCALE);
|
||||||
|
|
||||||
if (SPA_IS_ALIGNED(d0, 16)) {
|
if (SPA_IS_ALIGNED(d0, 16) &&
|
||||||
|
SPA_IS_ALIGNED(d1, 16) &&
|
||||||
|
SPA_IS_ALIGNED(d2, 16) &&
|
||||||
|
SPA_IS_ALIGNED(d3, 16) &&
|
||||||
|
n_samples > 0) {
|
||||||
unrolled = n_samples / 4;
|
unrolled = n_samples / 4;
|
||||||
if ((n_samples & 3) == 0)
|
if ((n_samples & 3) == 0)
|
||||||
unrolled--;
|
unrolled--;
|
||||||
|
|
@ -418,8 +424,7 @@ conv_f32d_to_s32_4_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_RE
|
||||||
if (SPA_IS_ALIGNED(s0, 16) &&
|
if (SPA_IS_ALIGNED(s0, 16) &&
|
||||||
SPA_IS_ALIGNED(s1, 16) &&
|
SPA_IS_ALIGNED(s1, 16) &&
|
||||||
SPA_IS_ALIGNED(s2, 16) &&
|
SPA_IS_ALIGNED(s2, 16) &&
|
||||||
SPA_IS_ALIGNED(s3, 16) &&
|
SPA_IS_ALIGNED(s3, 16))
|
||||||
SPA_IS_ALIGNED(d, 16))
|
|
||||||
unrolled = n_samples / 4;
|
unrolled = n_samples / 4;
|
||||||
else
|
else
|
||||||
unrolled = 0;
|
unrolled = 0;
|
||||||
|
|
@ -442,10 +447,10 @@ conv_f32d_to_s32_4_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_RE
|
||||||
out[2] = _mm_cvtps_epi32(in[2]);
|
out[2] = _mm_cvtps_epi32(in[2]);
|
||||||
out[3] = _mm_cvtps_epi32(in[3]);
|
out[3] = _mm_cvtps_epi32(in[3]);
|
||||||
|
|
||||||
_mm_store_si128((__m128i*)(d + 0*n_channels), out[0]);
|
_mm_storeu_si128((__m128i*)(d + 0*n_channels), out[0]);
|
||||||
_mm_store_si128((__m128i*)(d + 1*n_channels), out[1]);
|
_mm_storeu_si128((__m128i*)(d + 1*n_channels), out[1]);
|
||||||
_mm_store_si128((__m128i*)(d + 2*n_channels), out[2]);
|
_mm_storeu_si128((__m128i*)(d + 2*n_channels), out[2]);
|
||||||
_mm_store_si128((__m128i*)(d + 3*n_channels), out[3]);
|
_mm_storeu_si128((__m128i*)(d + 3*n_channels), out[3]);
|
||||||
d += 4*n_channels;
|
d += 4*n_channels;
|
||||||
}
|
}
|
||||||
for(; n < n_samples; n++) {
|
for(; n < n_samples; n++) {
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,10 @@ conv_s24_to_f32d_4_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA
|
||||||
const __m128i mask = _mm_setr_epi8(-1, 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11);
|
const __m128i mask = _mm_setr_epi8(-1, 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11);
|
||||||
//const __m128i mask = _mm_set_epi8(15, 14, 13, -1, 12, 11, 10, -1, 9, 8, 7, -1, 6, 5, 4, -1);
|
//const __m128i mask = _mm_set_epi8(15, 14, 13, -1, 12, 11, 10, -1, 9, 8, 7, -1, 6, 5, 4, -1);
|
||||||
|
|
||||||
if (SPA_IS_ALIGNED(d0, 16))
|
if (SPA_IS_ALIGNED(d0, 16) &&
|
||||||
|
SPA_IS_ALIGNED(d1, 16) &&
|
||||||
|
SPA_IS_ALIGNED(d2, 16) &&
|
||||||
|
SPA_IS_ALIGNED(d3, 16))
|
||||||
unrolled = n_samples / 4;
|
unrolled = n_samples / 4;
|
||||||
else
|
else
|
||||||
unrolled = 0;
|
unrolled = 0;
|
||||||
|
|
@ -90,8 +93,8 @@ conv_s24_to_f32d_4_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
extern void conv_s24_to_f32d_2_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples);
|
void
|
||||||
extern void conv_s24_to_f32d_1_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples);
|
conv_s24_to_f32d_1_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples);
|
||||||
|
|
||||||
void
|
void
|
||||||
conv_s24_to_f32d_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
|
conv_s24_to_f32d_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
|
||||||
|
|
@ -101,10 +104,6 @@ conv_s24_to_f32d_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA_R
|
||||||
|
|
||||||
for(; i + 3 < n_channels; i += 4)
|
for(; i + 3 < n_channels; i += 4)
|
||||||
conv_s24_to_f32d_4_ssse3(data, &dst[i], &s[3*i], n_channels, n_samples);
|
conv_s24_to_f32d_4_ssse3(data, &dst[i], &s[3*i], n_channels, n_samples);
|
||||||
#if defined (HAVE_SSE2)
|
|
||||||
for(; i + 1 < n_channels; i += 2)
|
|
||||||
conv_s24_to_f32d_2_sse2(data, &dst[i], &s[3*i], n_channels, n_samples);
|
|
||||||
for(; i < n_channels; i++)
|
for(; i < n_channels; i++)
|
||||||
conv_s24_to_f32d_1_sse2(data, &dst[i], &s[3*i], n_channels, n_samples);
|
conv_s24_to_f32d_1_sse2(data, &dst[i], &s[3*i], n_channels, n_samples);
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -79,14 +79,16 @@ endforeach
|
||||||
|
|
||||||
benchmark_apps = [
|
benchmark_apps = [
|
||||||
'benchmark-fmt-ops',
|
'benchmark-fmt-ops',
|
||||||
|
'benchmark-resample',
|
||||||
]
|
]
|
||||||
|
|
||||||
foreach a : benchmark_apps
|
foreach a : benchmark_apps
|
||||||
benchmark(a,
|
benchmark(a,
|
||||||
executable(a, a + '.c',
|
executable(a, a + '.c',
|
||||||
dependencies : [dl_lib, pthread_lib, mathlib ],
|
dependencies : [dl_lib, pthread_lib, mathlib, speexdsp_dep, ],
|
||||||
include_directories : [spa_inc ],
|
include_directories : [spa_inc ],
|
||||||
c_args : [ '-D_GNU_SOURCE' ],
|
c_args : [ simd_cargs, '-D_GNU_SOURCE' ],
|
||||||
|
link_with : simd_dependencies,
|
||||||
install : false),
|
install : false),
|
||||||
env : [
|
env : [
|
||||||
'SPA_PLUGIN_DIR=@0@/spa/plugins/'.format(meson.build_root()),
|
'SPA_PLUGIN_DIR=@0@/spa/plugins/'.format(meson.build_root()),
|
||||||
|
|
|
||||||
|
|
@ -119,11 +119,11 @@ static void impl_native_update_rate(struct resample *r, double rate)
|
||||||
|
|
||||||
data->func = is_full ? do_resample_full_c : do_resample_inter_c;
|
data->func = is_full ? do_resample_full_c : do_resample_inter_c;
|
||||||
#if defined (HAVE_SSE)
|
#if defined (HAVE_SSE)
|
||||||
if (r->cpu_flags & SPA_CPU_FLAG_SSE)
|
if (SPA_FLAG_CHECK(r->cpu_flags, SPA_CPU_FLAG_SSE))
|
||||||
data->func = is_full ? do_resample_full_sse : do_resample_inter_sse;
|
data->func = is_full ? do_resample_full_sse : do_resample_inter_sse;
|
||||||
#endif
|
#endif
|
||||||
#if defined (HAVE_SSSE3)
|
#if defined (HAVE_SSSE3)
|
||||||
if (r->cpu_flags & SPA_CPU_FLAG_SSSE3)
|
if (SPA_FLAG_CHECK(r->cpu_flags, SPA_CPU_FLAG_SSSE3 | SPA_CPU_FLAG_SLOW_UNALIGNED))
|
||||||
data->func = is_full ? do_resample_full_ssse3 : do_resample_inter_ssse3;
|
data->func = is_full ? do_resample_full_ssse3 : do_resample_inter_ssse3;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
@ -276,10 +276,8 @@ static int impl_native_init(struct resample *r)
|
||||||
d->n_phases = n_phases;
|
d->n_phases = n_phases;
|
||||||
d->in_rate = in_rate;
|
d->in_rate = in_rate;
|
||||||
d->out_rate = out_rate;
|
d->out_rate = out_rate;
|
||||||
d->filter = SPA_MEMBER(d, sizeof(struct native_data), float);
|
d->filter = SPA_MEMBER_ALIGN(d, sizeof(struct native_data), 64, float);
|
||||||
d->filter = SPA_PTR_ALIGN(d->filter, 64, float);
|
d->hist_mem = SPA_MEMBER_ALIGN(d->filter, filter_size, 64, float);
|
||||||
d->hist_mem = SPA_MEMBER(d->filter, filter_size, float);
|
|
||||||
d->hist_mem = SPA_PTR_ALIGN(d->hist_mem, 64, float);
|
|
||||||
d->history = SPA_MEMBER(d->hist_mem, history_size, float*);
|
d->history = SPA_MEMBER(d->hist_mem, history_size, float*);
|
||||||
d->filter_stride = filter_stride / sizeof(float);
|
d->filter_stride = filter_stride / sizeof(float);
|
||||||
d->filter_stride_os = d->filter_stride * oversample;
|
d->filter_stride_os = d->filter_stride * oversample;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue