mirror of
https://gitlab.freedesktop.org/pulseaudio/pulseaudio.git
synced 2025-11-04 13:29:59 -05:00
whitespace fixes
This commit is contained in:
parent
3cc1278dcf
commit
f09b51198f
8 changed files with 695 additions and 670 deletions
|
|
@ -36,7 +36,7 @@
|
||||||
|
|
||||||
#if defined (__arm__) && defined (__linux__)
|
#if defined (__arm__) && defined (__linux__)
|
||||||
|
|
||||||
#define MAX_BUFFER 4096
|
#define MAX_BUFFER 4096
|
||||||
static char *
|
static char *
|
||||||
get_cpuinfo_line (char *cpuinfo, const char *tag) {
|
get_cpuinfo_line (char *cpuinfo, const char *tag) {
|
||||||
char *line, *end, *colon;
|
char *line, *end, *colon;
|
||||||
|
|
@ -106,20 +106,20 @@ void pa_cpu_init_arm (void) {
|
||||||
}
|
}
|
||||||
/* get the CPU features */
|
/* get the CPU features */
|
||||||
if ((line = get_cpuinfo_line (cpuinfo, "Features"))) {
|
if ((line = get_cpuinfo_line (cpuinfo, "Features"))) {
|
||||||
char *state = NULL, *current;
|
char *state = NULL, *current;
|
||||||
|
|
||||||
while ((current = pa_split_spaces (line, &state))) {
|
while ((current = pa_split_spaces (line, &state))) {
|
||||||
if (!strcmp (current, "vfp"))
|
if (!strcmp (current, "vfp"))
|
||||||
flags |= PA_CPU_ARM_VFP;
|
flags |= PA_CPU_ARM_VFP;
|
||||||
else if (!strcmp (current, "edsp"))
|
else if (!strcmp (current, "edsp"))
|
||||||
flags |= PA_CPU_ARM_EDSP;
|
flags |= PA_CPU_ARM_EDSP;
|
||||||
else if (!strcmp (current, "neon"))
|
else if (!strcmp (current, "neon"))
|
||||||
flags |= PA_CPU_ARM_NEON;
|
flags |= PA_CPU_ARM_NEON;
|
||||||
else if (!strcmp (current, "vfpv3"))
|
else if (!strcmp (current, "vfpv3"))
|
||||||
flags |= PA_CPU_ARM_VFPV3;
|
flags |= PA_CPU_ARM_VFPV3;
|
||||||
|
|
||||||
free (current);
|
free (current);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
free (cpuinfo);
|
free (cpuinfo);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -34,14 +34,15 @@
|
||||||
static void
|
static void
|
||||||
get_cpuid (uint32_t op, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d)
|
get_cpuid (uint32_t op, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d)
|
||||||
{
|
{
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
" push %%"PA_REG_b" \n\t"
|
" push %%"PA_REG_b" \n\t"
|
||||||
" cpuid \n\t"
|
" cpuid \n\t"
|
||||||
" mov %%ebx, %%esi \n\t"
|
" mov %%ebx, %%esi \n\t"
|
||||||
" pop %%"PA_REG_b" \n\t"
|
" pop %%"PA_REG_b" \n\t"
|
||||||
|
|
||||||
: "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d)
|
: "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d)
|
||||||
: "0" (op));
|
: "0" (op)
|
||||||
|
);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
@ -97,23 +98,23 @@ void pa_cpu_init_x86 (void) {
|
||||||
}
|
}
|
||||||
|
|
||||||
pa_log_info ("CPU flags: %s%s%s%s%s%s%s%s%s%s",
|
pa_log_info ("CPU flags: %s%s%s%s%s%s%s%s%s%s",
|
||||||
(flags & PA_CPU_X86_MMX) ? "MMX " : "",
|
(flags & PA_CPU_X86_MMX) ? "MMX " : "",
|
||||||
(flags & PA_CPU_X86_SSE) ? "SSE " : "",
|
(flags & PA_CPU_X86_SSE) ? "SSE " : "",
|
||||||
(flags & PA_CPU_X86_SSE2) ? "SSE2 " : "",
|
(flags & PA_CPU_X86_SSE2) ? "SSE2 " : "",
|
||||||
(flags & PA_CPU_X86_SSE3) ? "SSE3 " : "",
|
(flags & PA_CPU_X86_SSE3) ? "SSE3 " : "",
|
||||||
(flags & PA_CPU_X86_SSSE3) ? "SSSE3 " : "",
|
(flags & PA_CPU_X86_SSSE3) ? "SSSE3 " : "",
|
||||||
(flags & PA_CPU_X86_SSE4_1) ? "SSE4_1 " : "",
|
(flags & PA_CPU_X86_SSE4_1) ? "SSE4_1 " : "",
|
||||||
(flags & PA_CPU_X86_SSE4_2) ? "SSE4_2 " : "",
|
(flags & PA_CPU_X86_SSE4_2) ? "SSE4_2 " : "",
|
||||||
(flags & PA_CPU_X86_MMXEXT) ? "MMXEXT " : "",
|
(flags & PA_CPU_X86_MMXEXT) ? "MMXEXT " : "",
|
||||||
(flags & PA_CPU_X86_3DNOW) ? "3DNOW " : "",
|
(flags & PA_CPU_X86_3DNOW) ? "3DNOW " : "",
|
||||||
(flags & PA_CPU_X86_3DNOWEXT) ? "3DNOWEXT " : "");
|
(flags & PA_CPU_X86_3DNOWEXT) ? "3DNOWEXT " : "");
|
||||||
|
|
||||||
/* activate various optimisations */
|
/* activate various optimisations */
|
||||||
if (flags & PA_CPU_X86_MMX) {
|
if (flags & PA_CPU_X86_MMX)
|
||||||
pa_volume_func_init_mmx (flags);
|
pa_volume_func_init_mmx (flags);
|
||||||
}
|
|
||||||
if (flags & PA_CPU_X86_SSE) {
|
if (flags & PA_CPU_X86_SSE)
|
||||||
pa_volume_func_init_sse (flags);
|
pa_volume_func_init_sse (flags);
|
||||||
}
|
|
||||||
#endif /* defined (__i386__) || defined (__amd64__) */
|
#endif /* defined (__i386__) || defined (__amd64__) */
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1065,30 +1065,53 @@ static pa_memchunk* convert_to_work_format(pa_resampler *r, pa_memchunk *input)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void remap_mono_to_stereo(pa_resampler *r, void *dst, const void *src, unsigned n) {
|
static void remap_mono_to_stereo(pa_resampler *r, void *dst, const void *src, unsigned n) {
|
||||||
|
unsigned i;
|
||||||
|
|
||||||
switch (r->work_format) {
|
switch (r->work_format) {
|
||||||
case PA_SAMPLE_FLOAT32NE:
|
case PA_SAMPLE_FLOAT32NE:
|
||||||
{
|
{
|
||||||
float *d, *s;
|
float *d, *s;
|
||||||
|
|
||||||
d = (float *) dst;
|
d = (float *) dst;
|
||||||
s = (float *) src;
|
s = (float *) src;
|
||||||
|
|
||||||
for (; n > 0; n--, s++, d += 2)
|
for (i = n >> 2; i; i--) {
|
||||||
d[0] = d[1] = *s;
|
d[0] = d[1] = s[0];
|
||||||
break;
|
d[2] = d[3] = s[1];
|
||||||
}
|
d[4] = d[5] = s[2];
|
||||||
|
d[6] = d[7] = s[3];
|
||||||
|
s += 4;
|
||||||
|
d += 8;
|
||||||
|
}
|
||||||
|
for (i = n & 3; i; i--) {
|
||||||
|
d[0] = d[1] = s[0];
|
||||||
|
s++;
|
||||||
|
d += 2;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
case PA_SAMPLE_S16NE:
|
case PA_SAMPLE_S16NE:
|
||||||
{
|
{
|
||||||
int16_t *d, *s;
|
int16_t *d, *s;
|
||||||
|
|
||||||
d = (int16_t *) dst;
|
d = (int16_t *) dst;
|
||||||
s = (int16_t *) src;
|
s = (int16_t *) src;
|
||||||
|
|
||||||
for (; n > 0; n--, s++, d += 2)
|
for (i = n >> 2; i; i--) {
|
||||||
d[0] = d[1] = *s;
|
d[0] = d[1] = s[0];
|
||||||
break;
|
d[2] = d[3] = s[1];
|
||||||
}
|
d[4] = d[5] = s[2];
|
||||||
|
d[6] = d[7] = s[3];
|
||||||
|
s += 4;
|
||||||
|
d += 8;
|
||||||
|
}
|
||||||
|
for (i = n & 3; i; i--) {
|
||||||
|
d[0] = d[1] = s[0];
|
||||||
|
s++;
|
||||||
|
d += 2;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
pa_assert_not_reached();
|
pa_assert_not_reached();
|
||||||
}
|
}
|
||||||
|
|
@ -1114,7 +1137,7 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src,
|
||||||
for (ic = 0; ic < n_ic; ic++) {
|
for (ic = 0; ic < n_ic; ic++) {
|
||||||
float vol;
|
float vol;
|
||||||
|
|
||||||
vol = r->map_table_f[oc][ic];
|
vol = r->map_table_f[oc][ic];
|
||||||
|
|
||||||
if (vol <= 0.0)
|
if (vol <= 0.0)
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -1122,18 +1145,18 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src,
|
||||||
d = (float *)dst + oc;
|
d = (float *)dst + oc;
|
||||||
s = (float *)src + ic;
|
s = (float *)src + ic;
|
||||||
|
|
||||||
if (vol >= 1.0) {
|
if (vol >= 1.0) {
|
||||||
for (i = n; i > 0; i--, s += n_ic, d += n_oc)
|
for (i = n; i > 0; i--, s += n_ic, d += n_oc)
|
||||||
*d += *s;
|
*d += *s;
|
||||||
} else {
|
} else {
|
||||||
for (i = n; i > 0; i--, s += n_ic, d += n_oc)
|
for (i = n; i > 0; i--, s += n_ic, d += n_oc)
|
||||||
*d += *s * vol;
|
*d += *s * vol;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PA_SAMPLE_S16NE:
|
case PA_SAMPLE_S16NE:
|
||||||
{
|
{
|
||||||
int16_t *d, *s;
|
int16_t *d, *s;
|
||||||
|
|
@ -1144,7 +1167,7 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src,
|
||||||
for (ic = 0; ic < n_ic; ic++) {
|
for (ic = 0; ic < n_ic; ic++) {
|
||||||
int32_t vol;
|
int32_t vol;
|
||||||
|
|
||||||
vol = r->map_table_i[oc][ic];
|
vol = r->map_table_i[oc][ic];
|
||||||
|
|
||||||
if (vol <= 0)
|
if (vol <= 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -1158,11 +1181,11 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src,
|
||||||
} else {
|
} else {
|
||||||
for (i = n; i > 0; i--, s += n_ic, d += n_oc)
|
for (i = n; i > 0; i--, s += n_ic, d += n_oc)
|
||||||
*d += (int16_t) (((int32_t)*s * vol) >> 16);
|
*d += (int16_t) (((int32_t)*s * vol) >> 16);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
pa_assert_not_reached();
|
pa_assert_not_reached();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -752,12 +752,13 @@ void pa_volume_memchunk(
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
ptr = (uint8_t*) pa_memblock_acquire(c->memblock) + c->index;
|
|
||||||
|
|
||||||
do_volume = pa_get_volume_func (spec->format);
|
do_volume = pa_get_volume_func (spec->format);
|
||||||
pa_assert(do_volume);
|
pa_assert(do_volume);
|
||||||
|
|
||||||
calc_volume_table[spec->format] ((void *)linear, volume);
|
calc_volume_table[spec->format] ((void *)linear, volume);
|
||||||
|
|
||||||
|
ptr = (uint8_t*) pa_memblock_acquire(c->memblock) + c->index;
|
||||||
|
|
||||||
do_volume (ptr, (void *)linear, spec->channels, c->length);
|
do_volume (ptr, (void *)linear, spec->channels, c->length);
|
||||||
|
|
||||||
pa_memblock_release(c->memblock);
|
pa_memblock_release(c->memblock);
|
||||||
|
|
@ -944,12 +945,12 @@ void pa_sample_clamp(pa_sample_format_t format, void *dst, size_t dstr, const vo
|
||||||
for (; n > 0; n--) {
|
for (; n > 0; n--) {
|
||||||
float f;
|
float f;
|
||||||
|
|
||||||
f = *s;
|
f = *s;
|
||||||
*d = PA_CLAMP_UNLIKELY(f, -1.0f, 1.0f);
|
*d = PA_CLAMP_UNLIKELY(f, -1.0f, 1.0f);
|
||||||
|
|
||||||
s = (const float*) ((const uint8_t*) s + sstr);
|
s = (const float*) ((const uint8_t*) s + sstr);
|
||||||
d = (float*) ((uint8_t*) d + dstr);
|
d = (float*) ((uint8_t*) d + dstr);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
pa_assert(format == PA_SAMPLE_FLOAT32RE);
|
pa_assert(format == PA_SAMPLE_FLOAT32RE);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -45,81 +45,81 @@
|
||||||
static void
|
static void
|
||||||
pa_volume_s16ne_arm (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
pa_volume_s16ne_arm (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||||
{
|
{
|
||||||
int32_t *ve;
|
int32_t *ve;
|
||||||
|
|
||||||
channels = MAX (4, channels);
|
channels = MAX (4, channels);
|
||||||
ve = volumes + channels;
|
ve = volumes + channels;
|
||||||
|
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
" mov r6, %1 \n\t"
|
" mov r6, %1 \n\t"
|
||||||
" mov %3, %3, LSR #1 \n\t" /* length /= sizeof (int16_t) */
|
" mov %3, %3, LSR #1 \n\t" /* length /= sizeof (int16_t) */
|
||||||
" tst %3, #1 \n\t" /* check for odd samples */
|
" tst %3, #1 \n\t" /* check for odd samples */
|
||||||
" beq 2f \n\t"
|
" beq 2f \n\t"
|
||||||
|
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
" ldr r0, [r6], #4 \n\t" /* odd samples volumes */
|
" ldr r0, [r6], #4 \n\t" /* odd samples volumes */
|
||||||
" ldrh r2, [%0] \n\t"
|
" ldrh r2, [%0] \n\t"
|
||||||
|
|
||||||
" smulwb r0, r0, r2 \n\t"
|
" smulwb r0, r0, r2 \n\t"
|
||||||
" ssat r0, #16, r0 \n\t"
|
" ssat r0, #16, r0 \n\t"
|
||||||
|
|
||||||
" strh r0, [%0], #2 \n\t"
|
" strh r0, [%0], #2 \n\t"
|
||||||
|
|
||||||
MOD_INC()
|
MOD_INC()
|
||||||
|
|
||||||
"2: \n\t"
|
"2: \n\t"
|
||||||
" mov %3, %3, LSR #1 \n\t"
|
" mov %3, %3, LSR #1 \n\t"
|
||||||
" tst %3, #1 \n\t" /* check for odd samples */
|
" tst %3, #1 \n\t" /* check for odd samples */
|
||||||
" beq 4f \n\t"
|
" beq 4f \n\t"
|
||||||
|
|
||||||
"3: \n\t"
|
"3: \n\t"
|
||||||
" ldrd r2, [r6], #8 \n\t" /* 2 samples at a time */
|
" ldrd r2, [r6], #8 \n\t" /* 2 samples at a time */
|
||||||
" ldr r0, [%0] \n\t"
|
" ldr r0, [%0] \n\t"
|
||||||
|
|
||||||
" smulwt r2, r2, r0 \n\t"
|
" smulwt r2, r2, r0 \n\t"
|
||||||
" smulwb r3, r3, r0 \n\t"
|
" smulwb r3, r3, r0 \n\t"
|
||||||
|
|
||||||
" ssat r2, #16, r2 \n\t"
|
" ssat r2, #16, r2 \n\t"
|
||||||
" ssat r3, #16, r3 \n\t"
|
" ssat r3, #16, r3 \n\t"
|
||||||
|
|
||||||
" pkhbt r0, r3, r2, LSL #16 \n\t"
|
" pkhbt r0, r3, r2, LSL #16 \n\t"
|
||||||
" str r0, [%0], #4 \n\t"
|
" str r0, [%0], #4 \n\t"
|
||||||
|
|
||||||
MOD_INC()
|
MOD_INC()
|
||||||
|
|
||||||
"4: \n\t"
|
"4: \n\t"
|
||||||
" movs %3, %3, LSR #1 \n\t"
|
" movs %3, %3, LSR #1 \n\t"
|
||||||
" beq 6f \n\t"
|
" beq 6f \n\t"
|
||||||
|
|
||||||
"5: \n\t"
|
"5: \n\t"
|
||||||
" ldrd r2, [r6], #8 \n\t" /* 4 samples at a time */
|
" ldrd r2, [r6], #8 \n\t" /* 4 samples at a time */
|
||||||
" ldrd r4, [r6], #8 \n\t"
|
" ldrd r4, [r6], #8 \n\t"
|
||||||
" ldrd r0, [%0] \n\t"
|
" ldrd r0, [%0] \n\t"
|
||||||
|
|
||||||
" smulwt r2, r2, r0 \n\t"
|
" smulwt r2, r2, r0 \n\t"
|
||||||
" smulwb r3, r3, r0 \n\t"
|
" smulwb r3, r3, r0 \n\t"
|
||||||
" smulwt r4, r4, r1 \n\t"
|
" smulwt r4, r4, r1 \n\t"
|
||||||
" smulwb r5, r5, r1 \n\t"
|
" smulwb r5, r5, r1 \n\t"
|
||||||
|
|
||||||
" ssat r2, #16, r2 \n\t"
|
" ssat r2, #16, r2 \n\t"
|
||||||
" ssat r3, #16, r3 \n\t"
|
" ssat r3, #16, r3 \n\t"
|
||||||
" ssat r4, #16, r4 \n\t"
|
" ssat r4, #16, r4 \n\t"
|
||||||
" ssat r5, #16, r5 \n\t"
|
" ssat r5, #16, r5 \n\t"
|
||||||
|
|
||||||
" pkhbt r0, r3, r2, LSL #16 \n\t"
|
" pkhbt r0, r3, r2, LSL #16 \n\t"
|
||||||
" pkhbt r1, r5, r4, LSL #16 \n\t"
|
" pkhbt r1, r5, r4, LSL #16 \n\t"
|
||||||
" strd r0, [%0], #8 \n\t"
|
" strd r0, [%0], #8 \n\t"
|
||||||
|
|
||||||
MOD_INC()
|
MOD_INC()
|
||||||
|
|
||||||
" subs %3, %3, #1 \n\t"
|
" subs %3, %3, #1 \n\t"
|
||||||
" bne 5b \n\t"
|
" bne 5b \n\t"
|
||||||
"6: \n\t"
|
"6: \n\t"
|
||||||
|
|
||||||
: "+r" (samples), "+r" (volumes), "+r" (ve), "+r" (length)
|
: "+r" (samples), "+r" (volumes), "+r" (ve), "+r" (length)
|
||||||
:
|
:
|
||||||
: "r6", "r5", "r4", "r3", "r2", "r1", "r0", "cc"
|
: "r6", "r5", "r4", "r3", "r2", "r1", "r0", "cc"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef RUN_TEST
|
#undef RUN_TEST
|
||||||
|
|
@ -131,51 +131,51 @@ pa_volume_s16ne_arm (int16_t *samples, int32_t *volumes, unsigned channels, unsi
|
||||||
#define PADDING 16
|
#define PADDING 16
|
||||||
|
|
||||||
static void run_test (void) {
|
static void run_test (void) {
|
||||||
int16_t samples[SAMPLES];
|
int16_t samples[SAMPLES];
|
||||||
int16_t samples_ref[SAMPLES];
|
int16_t samples_ref[SAMPLES];
|
||||||
int16_t samples_orig[SAMPLES];
|
int16_t samples_orig[SAMPLES];
|
||||||
int32_t volumes[CHANNELS + PADDING];
|
int32_t volumes[CHANNELS + PADDING];
|
||||||
int i, j, padding;
|
int i, j, padding;
|
||||||
pa_do_volume_func_t func;
|
pa_do_volume_func_t func;
|
||||||
struct timeval start, stop;
|
struct timeval start, stop;
|
||||||
|
|
||||||
func = pa_get_volume_func (PA_SAMPLE_S16NE);
|
func = pa_get_volume_func (PA_SAMPLE_S16NE);
|
||||||
|
|
||||||
printf ("checking ARM %zd\n", sizeof (samples));
|
printf ("checking ARM %zd\n", sizeof (samples));
|
||||||
|
|
||||||
pa_random (samples, sizeof (samples));
|
pa_random (samples, sizeof (samples));
|
||||||
memcpy (samples_ref, samples, sizeof (samples));
|
memcpy (samples_ref, samples, sizeof (samples));
|
||||||
memcpy (samples_orig, samples, sizeof (samples));
|
memcpy (samples_orig, samples, sizeof (samples));
|
||||||
|
|
||||||
for (i = 0; i < CHANNELS; i++)
|
for (i = 0; i < CHANNELS; i++)
|
||||||
volumes[i] = rand() >> 1;
|
volumes[i] = rand() >> 1;
|
||||||
for (padding = 0; padding < PADDING; padding++, i++)
|
for (padding = 0; padding < PADDING; padding++, i++)
|
||||||
volumes[i] = volumes[padding];
|
volumes[i] = volumes[padding];
|
||||||
|
|
||||||
func (samples_ref, volumes, CHANNELS, sizeof (samples));
|
|
||||||
pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples));
|
|
||||||
for (i = 0; i < SAMPLES; i++) {
|
|
||||||
if (samples[i] != samples_ref[i]) {
|
|
||||||
printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
|
|
||||||
samples_orig[i], volumes[i % CHANNELS]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pa_gettimeofday(&start);
|
|
||||||
for (j = 0; j < TIMES; j++) {
|
|
||||||
memcpy (samples, samples_orig, sizeof (samples));
|
|
||||||
pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples));
|
|
||||||
}
|
|
||||||
pa_gettimeofday(&stop);
|
|
||||||
pa_log_info("ARM: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
|
|
||||||
|
|
||||||
pa_gettimeofday(&start);
|
|
||||||
for (j = 0; j < TIMES; j++) {
|
|
||||||
memcpy (samples_ref, samples_orig, sizeof (samples));
|
|
||||||
func (samples_ref, volumes, CHANNELS, sizeof (samples));
|
func (samples_ref, volumes, CHANNELS, sizeof (samples));
|
||||||
}
|
pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples));
|
||||||
pa_gettimeofday(&stop);
|
for (i = 0; i < SAMPLES; i++) {
|
||||||
pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
|
if (samples[i] != samples_ref[i]) {
|
||||||
|
printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
|
||||||
|
samples_orig[i], volumes[i % CHANNELS]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pa_gettimeofday(&start);
|
||||||
|
for (j = 0; j < TIMES; j++) {
|
||||||
|
memcpy (samples, samples_orig, sizeof (samples));
|
||||||
|
pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples));
|
||||||
|
}
|
||||||
|
pa_gettimeofday(&stop);
|
||||||
|
pa_log_info("ARM: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
|
||||||
|
|
||||||
|
pa_gettimeofday(&start);
|
||||||
|
for (j = 0; j < TIMES; j++) {
|
||||||
|
memcpy (samples_ref, samples_orig, sizeof (samples));
|
||||||
|
func (samples_ref, volumes, CHANNELS, sizeof (samples));
|
||||||
|
}
|
||||||
|
pa_gettimeofday(&stop);
|
||||||
|
pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
@ -184,12 +184,12 @@ static void run_test (void) {
|
||||||
|
|
||||||
void pa_volume_func_init_arm (pa_cpu_arm_flag_t flags) {
|
void pa_volume_func_init_arm (pa_cpu_arm_flag_t flags) {
|
||||||
#if defined (__arm__)
|
#if defined (__arm__)
|
||||||
pa_log_info("Initialising ARM optimized functions.");
|
pa_log_info("Initialising ARM optimized functions.");
|
||||||
|
|
||||||
#ifdef RUN_TEST
|
#ifdef RUN_TEST
|
||||||
run_test ();
|
run_test ();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_arm);
|
pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_arm);
|
||||||
#endif /* defined (__arm__) */
|
#endif /* defined (__arm__) */
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -35,289 +35,289 @@
|
||||||
static void
|
static void
|
||||||
pa_volume_u8_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
pa_volume_u8_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||||
{
|
{
|
||||||
unsigned channel;
|
unsigned channel;
|
||||||
|
|
||||||
for (channel = 0; length; length--) {
|
for (channel = 0; length; length--) {
|
||||||
int32_t t, hi, lo;
|
int32_t t, hi, lo;
|
||||||
|
|
||||||
hi = volumes[channel] >> 16;
|
hi = volumes[channel] >> 16;
|
||||||
lo = volumes[channel] & 0xFFFF;
|
lo = volumes[channel] & 0xFFFF;
|
||||||
|
|
||||||
t = (int32_t) *samples - 0x80;
|
t = (int32_t) *samples - 0x80;
|
||||||
t = ((t * lo) >> 16) + (t * hi);
|
t = ((t * lo) >> 16) + (t * hi);
|
||||||
t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F);
|
t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F);
|
||||||
*samples++ = (uint8_t) (t + 0x80);
|
*samples++ = (uint8_t) (t + 0x80);
|
||||||
|
|
||||||
if (PA_UNLIKELY(++channel >= channels))
|
if (PA_UNLIKELY(++channel >= channels))
|
||||||
channel = 0;
|
channel = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pa_volume_alaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
pa_volume_alaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||||
{
|
{
|
||||||
unsigned channel;
|
unsigned channel;
|
||||||
|
|
||||||
for (channel = 0; length; length--) {
|
for (channel = 0; length; length--) {
|
||||||
int32_t t, hi, lo;
|
int32_t t, hi, lo;
|
||||||
|
|
||||||
hi = volumes[channel] >> 16;
|
hi = volumes[channel] >> 16;
|
||||||
lo = volumes[channel] & 0xFFFF;
|
lo = volumes[channel] & 0xFFFF;
|
||||||
|
|
||||||
t = (int32_t) st_alaw2linear16(*samples);
|
t = (int32_t) st_alaw2linear16(*samples);
|
||||||
t = ((t * lo) >> 16) + (t * hi);
|
t = ((t * lo) >> 16) + (t * hi);
|
||||||
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
|
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
|
||||||
*samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3);
|
*samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3);
|
||||||
|
|
||||||
if (PA_UNLIKELY(++channel >= channels))
|
if (PA_UNLIKELY(++channel >= channels))
|
||||||
channel = 0;
|
channel = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pa_volume_ulaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
pa_volume_ulaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||||
{
|
{
|
||||||
unsigned channel;
|
unsigned channel;
|
||||||
|
|
||||||
for (channel = 0; length; length--) {
|
for (channel = 0; length; length--) {
|
||||||
int32_t t, hi, lo;
|
int32_t t, hi, lo;
|
||||||
|
|
||||||
hi = volumes[channel] >> 16;
|
hi = volumes[channel] >> 16;
|
||||||
lo = volumes[channel] & 0xFFFF;
|
lo = volumes[channel] & 0xFFFF;
|
||||||
|
|
||||||
t = (int32_t) st_ulaw2linear16(*samples);
|
t = (int32_t) st_ulaw2linear16(*samples);
|
||||||
t = ((t * lo) >> 16) + (t * hi);
|
t = ((t * lo) >> 16) + (t * hi);
|
||||||
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
|
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
|
||||||
*samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2);
|
*samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2);
|
||||||
|
|
||||||
if (PA_UNLIKELY(++channel >= channels))
|
if (PA_UNLIKELY(++channel >= channels))
|
||||||
channel = 0;
|
channel = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pa_volume_s16ne_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
pa_volume_s16ne_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||||
{
|
{
|
||||||
unsigned channel;
|
unsigned channel;
|
||||||
|
|
||||||
length /= sizeof (int16_t);
|
length /= sizeof (int16_t);
|
||||||
|
|
||||||
for (channel = 0; length; length--) {
|
for (channel = 0; length; length--) {
|
||||||
int32_t t, hi, lo;
|
int32_t t, hi, lo;
|
||||||
|
|
||||||
/* Multiplying the 32bit volume factor with the 16bit
|
/* Multiplying the 32bit volume factor with the 16bit
|
||||||
* sample might result in an 48bit value. We want to
|
* sample might result in an 48bit value. We want to
|
||||||
* do without 64 bit integers and hence do the
|
* do without 64 bit integers and hence do the
|
||||||
* multiplication independantly for the HI and LO part
|
* multiplication independantly for the HI and LO part
|
||||||
* of the volume. */
|
* of the volume. */
|
||||||
|
|
||||||
hi = volumes[channel] >> 16;
|
hi = volumes[channel] >> 16;
|
||||||
lo = volumes[channel] & 0xFFFF;
|
lo = volumes[channel] & 0xFFFF;
|
||||||
|
|
||||||
t = (int32_t)(*samples);
|
t = (int32_t)(*samples);
|
||||||
t = ((t * lo) >> 16) + (t * hi);
|
t = ((t * lo) >> 16) + (t * hi);
|
||||||
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
|
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
|
||||||
*samples++ = (int16_t) t;
|
*samples++ = (int16_t) t;
|
||||||
|
|
||||||
if (PA_UNLIKELY(++channel >= channels))
|
if (PA_UNLIKELY(++channel >= channels))
|
||||||
channel = 0;
|
channel = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pa_volume_s16re_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
pa_volume_s16re_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||||
{
|
{
|
||||||
unsigned channel;
|
unsigned channel;
|
||||||
|
|
||||||
length /= sizeof (int16_t);
|
length /= sizeof (int16_t);
|
||||||
|
|
||||||
for (channel = 0; length; length--) {
|
for (channel = 0; length; length--) {
|
||||||
int32_t t, hi, lo;
|
int32_t t, hi, lo;
|
||||||
|
|
||||||
hi = volumes[channel] >> 16;
|
hi = volumes[channel] >> 16;
|
||||||
lo = volumes[channel] & 0xFFFF;
|
lo = volumes[channel] & 0xFFFF;
|
||||||
|
|
||||||
t = (int32_t) PA_INT16_SWAP(*samples);
|
t = (int32_t) PA_INT16_SWAP(*samples);
|
||||||
t = ((t * lo) >> 16) + (t * hi);
|
t = ((t * lo) >> 16) + (t * hi);
|
||||||
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
|
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
|
||||||
*samples++ = PA_INT16_SWAP((int16_t) t);
|
*samples++ = PA_INT16_SWAP((int16_t) t);
|
||||||
|
|
||||||
if (PA_UNLIKELY(++channel >= channels))
|
if (PA_UNLIKELY(++channel >= channels))
|
||||||
channel = 0;
|
channel = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pa_volume_float32ne_c (float *samples, float *volumes, unsigned channels, unsigned length)
|
pa_volume_float32ne_c (float *samples, float *volumes, unsigned channels, unsigned length)
|
||||||
{
|
{
|
||||||
unsigned channel;
|
unsigned channel;
|
||||||
|
|
||||||
length /= sizeof (float);
|
length /= sizeof (float);
|
||||||
|
|
||||||
for (channel = 0; length; length--) {
|
for (channel = 0; length; length--) {
|
||||||
*samples++ *= volumes[channel];
|
*samples++ *= volumes[channel];
|
||||||
|
|
||||||
if (PA_UNLIKELY(++channel >= channels))
|
if (PA_UNLIKELY(++channel >= channels))
|
||||||
channel = 0;
|
channel = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pa_volume_float32re_c (float *samples, float *volumes, unsigned channels, unsigned length)
|
pa_volume_float32re_c (float *samples, float *volumes, unsigned channels, unsigned length)
|
||||||
{
|
{
|
||||||
unsigned channel;
|
unsigned channel;
|
||||||
|
|
||||||
length /= sizeof (float);
|
length /= sizeof (float);
|
||||||
|
|
||||||
for (channel = 0; length; length--) {
|
for (channel = 0; length; length--) {
|
||||||
float t;
|
float t;
|
||||||
|
|
||||||
t = PA_FLOAT32_SWAP(*samples);
|
t = PA_FLOAT32_SWAP(*samples);
|
||||||
t *= volumes[channel];
|
t *= volumes[channel];
|
||||||
*samples++ = PA_FLOAT32_SWAP(t);
|
*samples++ = PA_FLOAT32_SWAP(t);
|
||||||
|
|
||||||
if (PA_UNLIKELY(++channel >= channels))
|
if (PA_UNLIKELY(++channel >= channels))
|
||||||
channel = 0;
|
channel = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pa_volume_s32ne_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
pa_volume_s32ne_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||||
{
|
{
|
||||||
unsigned channel;
|
unsigned channel;
|
||||||
|
|
||||||
length /= sizeof (int32_t);
|
length /= sizeof (int32_t);
|
||||||
|
|
||||||
for (channel = 0; length; length--) {
|
for (channel = 0; length; length--) {
|
||||||
int64_t t;
|
int64_t t;
|
||||||
|
|
||||||
t = (int64_t)(*samples);
|
t = (int64_t)(*samples);
|
||||||
t = (t * volumes[channel]) >> 16;
|
t = (t * volumes[channel]) >> 16;
|
||||||
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
|
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
|
||||||
*samples++ = (int32_t) t;
|
*samples++ = (int32_t) t;
|
||||||
|
|
||||||
if (PA_UNLIKELY(++channel >= channels))
|
if (PA_UNLIKELY(++channel >= channels))
|
||||||
channel = 0;
|
channel = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pa_volume_s32re_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
pa_volume_s32re_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||||
{
|
{
|
||||||
unsigned channel;
|
unsigned channel;
|
||||||
|
|
||||||
length /= sizeof (int32_t);
|
length /= sizeof (int32_t);
|
||||||
|
|
||||||
for (channel = 0; length; length--) {
|
for (channel = 0; length; length--) {
|
||||||
int64_t t;
|
int64_t t;
|
||||||
|
|
||||||
t = (int64_t) PA_INT32_SWAP(*samples);
|
t = (int64_t) PA_INT32_SWAP(*samples);
|
||||||
t = (t * volumes[channel]) >> 16;
|
t = (t * volumes[channel]) >> 16;
|
||||||
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
|
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
|
||||||
*samples++ = PA_INT32_SWAP((int32_t) t);
|
*samples++ = PA_INT32_SWAP((int32_t) t);
|
||||||
|
|
||||||
if (PA_UNLIKELY(++channel >= channels))
|
if (PA_UNLIKELY(++channel >= channels))
|
||||||
channel = 0;
|
channel = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pa_volume_s24ne_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
pa_volume_s24ne_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||||
{
|
{
|
||||||
unsigned channel;
|
unsigned channel;
|
||||||
uint8_t *e;
|
uint8_t *e;
|
||||||
|
|
||||||
e = samples + length;
|
e = samples + length;
|
||||||
|
|
||||||
for (channel = 0; samples < e; samples += 3) {
|
for (channel = 0; samples < e; samples += 3) {
|
||||||
int64_t t;
|
int64_t t;
|
||||||
|
|
||||||
t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8));
|
t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8));
|
||||||
t = (t * volumes[channel]) >> 16;
|
t = (t * volumes[channel]) >> 16;
|
||||||
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
|
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
|
||||||
PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8);
|
PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8);
|
||||||
|
|
||||||
if (PA_UNLIKELY(++channel >= channels))
|
if (PA_UNLIKELY(++channel >= channels))
|
||||||
channel = 0;
|
channel = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pa_volume_s24re_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
pa_volume_s24re_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||||
{
|
{
|
||||||
unsigned channel;
|
unsigned channel;
|
||||||
uint8_t *e;
|
uint8_t *e;
|
||||||
|
|
||||||
e = samples + length;
|
e = samples + length;
|
||||||
|
|
||||||
for (channel = 0; samples < e; samples += 3) {
|
for (channel = 0; samples < e; samples += 3) {
|
||||||
int64_t t;
|
int64_t t;
|
||||||
|
|
||||||
t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8));
|
t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8));
|
||||||
t = (t * volumes[channel]) >> 16;
|
t = (t * volumes[channel]) >> 16;
|
||||||
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
|
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
|
||||||
PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8);
|
PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8);
|
||||||
|
|
||||||
if (PA_UNLIKELY(++channel >= channels))
|
if (PA_UNLIKELY(++channel >= channels))
|
||||||
channel = 0;
|
channel = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pa_volume_s24_32ne_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
pa_volume_s24_32ne_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||||
{
|
{
|
||||||
unsigned channel;
|
unsigned channel;
|
||||||
|
|
||||||
length /= sizeof (uint32_t);
|
length /= sizeof (uint32_t);
|
||||||
|
|
||||||
for (channel = 0; length; length--) {
|
for (channel = 0; length; length--) {
|
||||||
int64_t t;
|
int64_t t;
|
||||||
|
|
||||||
t = (int64_t) ((int32_t) (*samples << 8));
|
t = (int64_t) ((int32_t) (*samples << 8));
|
||||||
t = (t * volumes[channel]) >> 16;
|
t = (t * volumes[channel]) >> 16;
|
||||||
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
|
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
|
||||||
*samples++ = ((uint32_t) ((int32_t) t)) >> 8;
|
*samples++ = ((uint32_t) ((int32_t) t)) >> 8;
|
||||||
|
|
||||||
if (PA_UNLIKELY(++channel >= channels))
|
if (PA_UNLIKELY(++channel >= channels))
|
||||||
channel = 0;
|
channel = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pa_volume_s24_32re_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
pa_volume_s24_32re_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||||
{
|
{
|
||||||
unsigned channel;
|
unsigned channel;
|
||||||
|
|
||||||
length /= sizeof (uint32_t);
|
length /= sizeof (uint32_t);
|
||||||
|
|
||||||
for (channel = 0; length; length--) {
|
for (channel = 0; length; length--) {
|
||||||
int64_t t;
|
int64_t t;
|
||||||
|
|
||||||
t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8));
|
t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8));
|
||||||
t = (t * volumes[channel]) >> 16;
|
t = (t * volumes[channel]) >> 16;
|
||||||
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
|
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
|
||||||
*samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8);
|
*samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8);
|
||||||
|
|
||||||
if (PA_UNLIKELY(++channel >= channels))
|
if (PA_UNLIKELY(++channel >= channels))
|
||||||
channel = 0;
|
channel = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static pa_do_volume_func_t do_volume_table[] =
|
static pa_do_volume_func_t do_volume_table[] =
|
||||||
{
|
{
|
||||||
[PA_SAMPLE_U8] = (pa_do_volume_func_t) pa_volume_u8_c,
|
[PA_SAMPLE_U8] = (pa_do_volume_func_t) pa_volume_u8_c,
|
||||||
[PA_SAMPLE_ALAW] = (pa_do_volume_func_t) pa_volume_alaw_c,
|
[PA_SAMPLE_ALAW] = (pa_do_volume_func_t) pa_volume_alaw_c,
|
||||||
[PA_SAMPLE_ULAW] = (pa_do_volume_func_t) pa_volume_ulaw_c,
|
[PA_SAMPLE_ULAW] = (pa_do_volume_func_t) pa_volume_ulaw_c,
|
||||||
[PA_SAMPLE_S16NE] = (pa_do_volume_func_t) pa_volume_s16ne_c,
|
[PA_SAMPLE_S16NE] = (pa_do_volume_func_t) pa_volume_s16ne_c,
|
||||||
[PA_SAMPLE_S16RE] = (pa_do_volume_func_t) pa_volume_s16re_c,
|
[PA_SAMPLE_S16RE] = (pa_do_volume_func_t) pa_volume_s16re_c,
|
||||||
[PA_SAMPLE_FLOAT32NE] = (pa_do_volume_func_t) pa_volume_float32ne_c,
|
[PA_SAMPLE_FLOAT32NE] = (pa_do_volume_func_t) pa_volume_float32ne_c,
|
||||||
[PA_SAMPLE_FLOAT32RE] = (pa_do_volume_func_t) pa_volume_float32re_c,
|
[PA_SAMPLE_FLOAT32RE] = (pa_do_volume_func_t) pa_volume_float32re_c,
|
||||||
[PA_SAMPLE_S32NE] = (pa_do_volume_func_t) pa_volume_s32ne_c,
|
[PA_SAMPLE_S32NE] = (pa_do_volume_func_t) pa_volume_s32ne_c,
|
||||||
[PA_SAMPLE_S32RE] = (pa_do_volume_func_t) pa_volume_s32re_c,
|
[PA_SAMPLE_S32RE] = (pa_do_volume_func_t) pa_volume_s32re_c,
|
||||||
[PA_SAMPLE_S24NE] = (pa_do_volume_func_t) pa_volume_s24ne_c,
|
[PA_SAMPLE_S24NE] = (pa_do_volume_func_t) pa_volume_s24ne_c,
|
||||||
[PA_SAMPLE_S24RE] = (pa_do_volume_func_t) pa_volume_s24re_c,
|
[PA_SAMPLE_S24RE] = (pa_do_volume_func_t) pa_volume_s24re_c,
|
||||||
[PA_SAMPLE_S24_32NE] = (pa_do_volume_func_t) pa_volume_s24_32ne_c,
|
[PA_SAMPLE_S24_32NE] = (pa_do_volume_func_t) pa_volume_s24_32ne_c,
|
||||||
[PA_SAMPLE_S24_32RE] = (pa_do_volume_func_t) pa_volume_s24_32re_c
|
[PA_SAMPLE_S24_32RE] = (pa_do_volume_func_t) pa_volume_s24_32re_c
|
||||||
};
|
};
|
||||||
|
|
||||||
pa_do_volume_func_t pa_get_volume_func(pa_sample_format_t f) {
|
pa_do_volume_func_t pa_get_volume_func(pa_sample_format_t f) {
|
||||||
|
|
|
||||||
|
|
@ -96,147 +96,147 @@
|
||||||
static void
|
static void
|
||||||
pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||||
{
|
{
|
||||||
pa_reg_x86 channel, temp;
|
pa_reg_x86 channel, temp;
|
||||||
|
|
||||||
/* the max number of samples we process at a time, this is also the max amount
|
/* the max number of samples we process at a time, this is also the max amount
|
||||||
* we overread the volume array, which should have enough padding. */
|
* we overread the volume array, which should have enough padding. */
|
||||||
channels = MAX (4, channels);
|
channels = MAX (4, channels);
|
||||||
|
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
" xor %3, %3 \n\t"
|
" xor %3, %3 \n\t"
|
||||||
" sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
|
" sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
|
||||||
" pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */
|
" pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */
|
||||||
" pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */
|
" pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */
|
||||||
" pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */
|
" pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */
|
||||||
" psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */
|
" psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */
|
||||||
|
|
||||||
" test $1, %2 \n\t" /* check for odd samples */
|
" test $1, %2 \n\t" /* check for odd samples */
|
||||||
" je 2f \n\t"
|
" je 2f \n\t"
|
||||||
|
|
||||||
" movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */
|
" movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */
|
||||||
" movw (%0), %w4 \n\t" /* .. | p0 | */
|
" movw (%0), %w4 \n\t" /* .. | p0 | */
|
||||||
" movd %4, %%mm1 \n\t"
|
" movd %4, %%mm1 \n\t"
|
||||||
VOLUME_32x16 (%%mm1, %%mm0)
|
VOLUME_32x16 (%%mm1, %%mm0)
|
||||||
" movd %%mm0, %4 \n\t" /* .. | p0*v0 | */
|
" movd %%mm0, %4 \n\t" /* .. | p0*v0 | */
|
||||||
" movw %w4, (%0) \n\t"
|
" movw %w4, (%0) \n\t"
|
||||||
" add $2, %0 \n\t"
|
" add $2, %0 \n\t"
|
||||||
MOD_ADD ($1, %5)
|
MOD_ADD ($1, %5)
|
||||||
|
|
||||||
"2: \n\t"
|
"2: \n\t"
|
||||||
" sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
|
" sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
|
||||||
" test $1, %2 \n\t" /* check for odd samples */
|
" test $1, %2 \n\t" /* check for odd samples */
|
||||||
" je 4f \n\t"
|
" je 4f \n\t"
|
||||||
|
|
||||||
"3: \n\t" /* do samples in groups of 2 */
|
"3: \n\t" /* do samples in groups of 2 */
|
||||||
" movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
|
" movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
|
||||||
" movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
|
" movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
|
||||||
VOLUME_32x16 (%%mm1, %%mm0)
|
VOLUME_32x16 (%%mm1, %%mm0)
|
||||||
" movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
|
" movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
|
||||||
" add $4, %0 \n\t"
|
" add $4, %0 \n\t"
|
||||||
MOD_ADD ($2, %5)
|
MOD_ADD ($2, %5)
|
||||||
|
|
||||||
"4: \n\t"
|
"4: \n\t"
|
||||||
" sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
|
" sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
|
||||||
" cmp $0, %2 \n\t"
|
" cmp $0, %2 \n\t"
|
||||||
" je 6f \n\t"
|
" je 6f \n\t"
|
||||||
|
|
||||||
"5: \n\t" /* do samples in groups of 4 */
|
"5: \n\t" /* do samples in groups of 4 */
|
||||||
" movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
|
" movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
|
||||||
" movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */
|
" movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */
|
||||||
" movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
|
" movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
|
||||||
" movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */
|
" movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */
|
||||||
VOLUME_32x16 (%%mm1, %%mm0)
|
VOLUME_32x16 (%%mm1, %%mm0)
|
||||||
VOLUME_32x16 (%%mm3, %%mm2)
|
VOLUME_32x16 (%%mm3, %%mm2)
|
||||||
" movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
|
" movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
|
||||||
" movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */
|
" movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */
|
||||||
" add $8, %0 \n\t"
|
" add $8, %0 \n\t"
|
||||||
MOD_ADD ($4, %5)
|
MOD_ADD ($4, %5)
|
||||||
" dec %2 \n\t"
|
" dec %2 \n\t"
|
||||||
" jne 5b \n\t"
|
" jne 5b \n\t"
|
||||||
|
|
||||||
"6: \n\t"
|
"6: \n\t"
|
||||||
" emms \n\t"
|
" emms \n\t"
|
||||||
|
|
||||||
: "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp)
|
: "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp)
|
||||||
: "r" ((pa_reg_x86)channels)
|
: "r" ((pa_reg_x86)channels)
|
||||||
: "cc"
|
: "cc"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||||
{
|
{
|
||||||
pa_reg_x86 channel, temp;
|
pa_reg_x86 channel, temp;
|
||||||
|
|
||||||
/* the max number of samples we process at a time, this is also the max amount
|
/* the max number of samples we process at a time, this is also the max amount
|
||||||
* we overread the volume array, which should have enough padding. */
|
* we overread the volume array, which should have enough padding. */
|
||||||
channels = MAX (4, channels);
|
channels = MAX (4, channels);
|
||||||
|
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
" xor %3, %3 \n\t"
|
" xor %3, %3 \n\t"
|
||||||
" sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
|
" sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
|
||||||
" pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */
|
" pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */
|
||||||
" pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */
|
" pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */
|
||||||
" pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */
|
" pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */
|
||||||
" psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */
|
" psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */
|
||||||
|
|
||||||
" test $1, %2 \n\t" /* check for odd samples */
|
" test $1, %2 \n\t" /* check for odd samples */
|
||||||
" je 2f \n\t"
|
" je 2f \n\t"
|
||||||
|
|
||||||
" movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */
|
" movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */
|
||||||
" movw (%0), %w4 \n\t" /* .. | p0 | */
|
" movw (%0), %w4 \n\t" /* .. | p0 | */
|
||||||
" rorw $8, %w4 \n\t"
|
" rorw $8, %w4 \n\t"
|
||||||
" movd %4, %%mm1 \n\t"
|
" movd %4, %%mm1 \n\t"
|
||||||
VOLUME_32x16 (%%mm1, %%mm0)
|
VOLUME_32x16 (%%mm1, %%mm0)
|
||||||
" movd %%mm0, %4 \n\t" /* .. | p0*v0 | */
|
" movd %%mm0, %4 \n\t" /* .. | p0*v0 | */
|
||||||
" rorw $8, %w4 \n\t"
|
" rorw $8, %w4 \n\t"
|
||||||
" movw %w4, (%0) \n\t"
|
" movw %w4, (%0) \n\t"
|
||||||
" add $2, %0 \n\t"
|
" add $2, %0 \n\t"
|
||||||
MOD_ADD ($1, %5)
|
MOD_ADD ($1, %5)
|
||||||
|
|
||||||
"2: \n\t"
|
"2: \n\t"
|
||||||
" sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
|
" sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
|
||||||
" test $1, %2 \n\t" /* check for odd samples */
|
" test $1, %2 \n\t" /* check for odd samples */
|
||||||
" je 4f \n\t"
|
" je 4f \n\t"
|
||||||
|
|
||||||
"3: \n\t" /* do samples in groups of 2 */
|
"3: \n\t" /* do samples in groups of 2 */
|
||||||
" movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
|
" movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
|
||||||
" movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
|
" movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
|
||||||
SWAP_16 (%%mm1)
|
SWAP_16 (%%mm1)
|
||||||
VOLUME_32x16 (%%mm1, %%mm0)
|
VOLUME_32x16 (%%mm1, %%mm0)
|
||||||
SWAP_16 (%%mm0)
|
SWAP_16 (%%mm0)
|
||||||
" movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
|
" movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
|
||||||
" add $4, %0 \n\t"
|
" add $4, %0 \n\t"
|
||||||
MOD_ADD ($2, %5)
|
MOD_ADD ($2, %5)
|
||||||
|
|
||||||
"4: \n\t"
|
"4: \n\t"
|
||||||
" sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
|
" sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
|
||||||
" cmp $0, %2 \n\t"
|
" cmp $0, %2 \n\t"
|
||||||
" je 6f \n\t"
|
" je 6f \n\t"
|
||||||
|
|
||||||
"5: \n\t" /* do samples in groups of 4 */
|
"5: \n\t" /* do samples in groups of 4 */
|
||||||
" movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
|
" movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
|
||||||
" movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */
|
" movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */
|
||||||
" movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
|
" movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
|
||||||
" movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */
|
" movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */
|
||||||
SWAP_16_2 (%%mm1, %%mm3)
|
SWAP_16_2 (%%mm1, %%mm3)
|
||||||
VOLUME_32x16 (%%mm1, %%mm0)
|
VOLUME_32x16 (%%mm1, %%mm0)
|
||||||
VOLUME_32x16 (%%mm3, %%mm2)
|
VOLUME_32x16 (%%mm3, %%mm2)
|
||||||
SWAP_16_2 (%%mm0, %%mm2)
|
SWAP_16_2 (%%mm0, %%mm2)
|
||||||
" movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
|
" movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
|
||||||
" movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */
|
" movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */
|
||||||
" add $8, %0 \n\t"
|
" add $8, %0 \n\t"
|
||||||
MOD_ADD ($4, %5)
|
MOD_ADD ($4, %5)
|
||||||
" dec %2 \n\t"
|
" dec %2 \n\t"
|
||||||
" jne 5b \n\t"
|
" jne 5b \n\t"
|
||||||
|
|
||||||
"6: \n\t"
|
"6: \n\t"
|
||||||
" emms \n\t"
|
" emms \n\t"
|
||||||
|
|
||||||
: "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp)
|
: "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp)
|
||||||
: "r" ((pa_reg_x86)channels)
|
: "r" ((pa_reg_x86)channels)
|
||||||
: "cc"
|
: "cc"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef RUN_TEST
|
#undef RUN_TEST
|
||||||
|
|
@ -248,51 +248,51 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
|
||||||
#define PADDING 16
|
#define PADDING 16
|
||||||
|
|
||||||
static void run_test (void) {
|
static void run_test (void) {
|
||||||
int16_t samples[SAMPLES];
|
int16_t samples[SAMPLES];
|
||||||
int16_t samples_ref[SAMPLES];
|
int16_t samples_ref[SAMPLES];
|
||||||
int16_t samples_orig[SAMPLES];
|
int16_t samples_orig[SAMPLES];
|
||||||
int32_t volumes[CHANNELS + PADDING];
|
int32_t volumes[CHANNELS + PADDING];
|
||||||
int i, j, padding;
|
int i, j, padding;
|
||||||
pa_do_volume_func_t func;
|
pa_do_volume_func_t func;
|
||||||
struct timeval start, stop;
|
struct timeval start, stop;
|
||||||
|
|
||||||
func = pa_get_volume_func (PA_SAMPLE_S16NE);
|
func = pa_get_volume_func (PA_SAMPLE_S16NE);
|
||||||
|
|
||||||
printf ("checking MMX %zd\n", sizeof (samples));
|
printf ("checking MMX %zd\n", sizeof (samples));
|
||||||
|
|
||||||
pa_random (samples, sizeof (samples));
|
pa_random (samples, sizeof (samples));
|
||||||
memcpy (samples_ref, samples, sizeof (samples));
|
memcpy (samples_ref, samples, sizeof (samples));
|
||||||
memcpy (samples_orig, samples, sizeof (samples));
|
memcpy (samples_orig, samples, sizeof (samples));
|
||||||
|
|
||||||
for (i = 0; i < CHANNELS; i++)
|
for (i = 0; i < CHANNELS; i++)
|
||||||
volumes[i] = rand() >> 1;
|
volumes[i] = rand() >> 1;
|
||||||
for (padding = 0; padding < PADDING; padding++, i++)
|
for (padding = 0; padding < PADDING; padding++, i++)
|
||||||
volumes[i] = volumes[padding];
|
volumes[i] = volumes[padding];
|
||||||
|
|
||||||
func (samples_ref, volumes, CHANNELS, sizeof (samples));
|
|
||||||
pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples));
|
|
||||||
for (i = 0; i < SAMPLES; i++) {
|
|
||||||
if (samples[i] != samples_ref[i]) {
|
|
||||||
printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
|
|
||||||
samples_orig[i], volumes[i % CHANNELS]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pa_gettimeofday(&start);
|
|
||||||
for (j = 0; j < TIMES; j++) {
|
|
||||||
memcpy (samples, samples_orig, sizeof (samples));
|
|
||||||
pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples));
|
|
||||||
}
|
|
||||||
pa_gettimeofday(&stop);
|
|
||||||
pa_log_info("MMX: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
|
|
||||||
|
|
||||||
pa_gettimeofday(&start);
|
|
||||||
for (j = 0; j < TIMES; j++) {
|
|
||||||
memcpy (samples_ref, samples_orig, sizeof (samples));
|
|
||||||
func (samples_ref, volumes, CHANNELS, sizeof (samples));
|
func (samples_ref, volumes, CHANNELS, sizeof (samples));
|
||||||
}
|
pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples));
|
||||||
pa_gettimeofday(&stop);
|
for (i = 0; i < SAMPLES; i++) {
|
||||||
pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
|
if (samples[i] != samples_ref[i]) {
|
||||||
|
printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
|
||||||
|
samples_orig[i], volumes[i % CHANNELS]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pa_gettimeofday(&start);
|
||||||
|
for (j = 0; j < TIMES; j++) {
|
||||||
|
memcpy (samples, samples_orig, sizeof (samples));
|
||||||
|
pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples));
|
||||||
|
}
|
||||||
|
pa_gettimeofday(&stop);
|
||||||
|
pa_log_info("MMX: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
|
||||||
|
|
||||||
|
pa_gettimeofday(&start);
|
||||||
|
for (j = 0; j < TIMES; j++) {
|
||||||
|
memcpy (samples_ref, samples_orig, sizeof (samples));
|
||||||
|
func (samples_ref, volumes, CHANNELS, sizeof (samples));
|
||||||
|
}
|
||||||
|
pa_gettimeofday(&stop);
|
||||||
|
pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
@ -301,13 +301,13 @@ static void run_test (void) {
|
||||||
|
|
||||||
void pa_volume_func_init_mmx (pa_cpu_x86_flag_t flags) {
|
void pa_volume_func_init_mmx (pa_cpu_x86_flag_t flags) {
|
||||||
#if defined (__i386__) || defined (__amd64__)
|
#if defined (__i386__) || defined (__amd64__)
|
||||||
pa_log_info("Initialising MMX optimized functions.");
|
pa_log_info("Initialising MMX optimized functions.");
|
||||||
|
|
||||||
#ifdef RUN_TEST
|
#ifdef RUN_TEST
|
||||||
run_test ();
|
run_test ();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_mmx);
|
pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_mmx);
|
||||||
pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_mmx);
|
pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_mmx);
|
||||||
#endif /* defined (__i386__) || defined (__amd64__) */
|
#endif /* defined (__i386__) || defined (__amd64__) */
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -77,169 +77,169 @@
|
||||||
static void
|
static void
|
||||||
pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||||
{
|
{
|
||||||
pa_reg_x86 channel, temp;
|
pa_reg_x86 channel, temp;
|
||||||
|
|
||||||
/* the max number of samples we process at a time, this is also the max amount
|
/* the max number of samples we process at a time, this is also the max amount
|
||||||
* we overread the volume array, which should have enough padding. */
|
* we overread the volume array, which should have enough padding. */
|
||||||
channels = MAX (8, channels);
|
channels = MAX (8, channels);
|
||||||
|
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
" xor %3, %3 \n\t"
|
" xor %3, %3 \n\t"
|
||||||
" sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
|
" sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
|
||||||
|
|
||||||
" test $1, %2 \n\t" /* check for odd samples */
|
" test $1, %2 \n\t" /* check for odd samples */
|
||||||
" je 2f \n\t"
|
" je 2f \n\t"
|
||||||
|
|
||||||
" movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */
|
" movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */
|
||||||
" movw (%0), %w4 \n\t" /* .. | p0 | */
|
" movw (%0), %w4 \n\t" /* .. | p0 | */
|
||||||
" movd %4, %%xmm1 \n\t"
|
" movd %4, %%xmm1 \n\t"
|
||||||
VOLUME_32x16 (%%xmm1, %%xmm0)
|
VOLUME_32x16 (%%xmm1, %%xmm0)
|
||||||
" movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */
|
" movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */
|
||||||
" movw %w4, (%0) \n\t"
|
" movw %w4, (%0) \n\t"
|
||||||
" add $2, %0 \n\t"
|
" add $2, %0 \n\t"
|
||||||
MOD_ADD ($1, %5)
|
MOD_ADD ($1, %5)
|
||||||
|
|
||||||
"2: \n\t"
|
"2: \n\t"
|
||||||
" sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
|
" sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
|
||||||
" test $1, %2 \n\t"
|
" test $1, %2 \n\t"
|
||||||
" je 4f \n\t"
|
" je 4f \n\t"
|
||||||
|
|
||||||
"3: \n\t" /* do samples in groups of 2 */
|
"3: \n\t" /* do samples in groups of 2 */
|
||||||
" movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */
|
" movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */
|
||||||
" movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */
|
" movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */
|
||||||
VOLUME_32x16 (%%xmm1, %%xmm0)
|
VOLUME_32x16 (%%xmm1, %%xmm0)
|
||||||
" movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
|
" movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
|
||||||
" add $4, %0 \n\t"
|
" add $4, %0 \n\t"
|
||||||
MOD_ADD ($2, %5)
|
MOD_ADD ($2, %5)
|
||||||
|
|
||||||
"4: \n\t"
|
"4: \n\t"
|
||||||
" sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
|
" sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
|
||||||
" test $1, %2 \n\t"
|
" test $1, %2 \n\t"
|
||||||
" je 6f \n\t"
|
" je 6f \n\t"
|
||||||
|
|
||||||
/* FIXME, we can do aligned access of the volume values if we can guarantee
|
/* FIXME, we can do aligned access of the volume values if we can guarantee
|
||||||
* that the array is 16 bytes aligned, we probably have to do the odd values
|
* that the array is 16 bytes aligned, we probably have to do the odd values
|
||||||
* after this then. */
|
* after this then. */
|
||||||
"5: \n\t" /* do samples in groups of 4 */
|
"5: \n\t" /* do samples in groups of 4 */
|
||||||
" movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
|
" movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
|
||||||
" movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
|
" movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
|
||||||
VOLUME_32x16 (%%xmm1, %%xmm0)
|
VOLUME_32x16 (%%xmm1, %%xmm0)
|
||||||
" movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
|
" movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
|
||||||
" add $8, %0 \n\t"
|
" add $8, %0 \n\t"
|
||||||
MOD_ADD ($4, %5)
|
MOD_ADD ($4, %5)
|
||||||
|
|
||||||
"6: \n\t"
|
"6: \n\t"
|
||||||
" sar $1, %2 \n\t" /* prepare for processing 8 samples at a time */
|
" sar $1, %2 \n\t" /* prepare for processing 8 samples at a time */
|
||||||
" cmp $0, %2 \n\t"
|
" cmp $0, %2 \n\t"
|
||||||
" je 8f \n\t"
|
" je 8f \n\t"
|
||||||
|
|
||||||
"7: \n\t" /* do samples in groups of 8 */
|
"7: \n\t" /* do samples in groups of 8 */
|
||||||
" movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
|
" movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
|
||||||
" movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */
|
" movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */
|
||||||
" movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
|
" movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
|
||||||
" movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */
|
" movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */
|
||||||
VOLUME_32x16 (%%xmm1, %%xmm0)
|
VOLUME_32x16 (%%xmm1, %%xmm0)
|
||||||
VOLUME_32x16 (%%xmm3, %%xmm2)
|
VOLUME_32x16 (%%xmm3, %%xmm2)
|
||||||
" movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
|
" movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
|
||||||
" movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */
|
" movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */
|
||||||
" add $16, %0 \n\t"
|
" add $16, %0 \n\t"
|
||||||
MOD_ADD ($8, %5)
|
MOD_ADD ($8, %5)
|
||||||
" dec %2 \n\t"
|
" dec %2 \n\t"
|
||||||
" jne 7b \n\t"
|
" jne 7b \n\t"
|
||||||
"8: \n\t"
|
"8: \n\t"
|
||||||
|
|
||||||
: "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp)
|
: "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp)
|
||||||
: "r" ((pa_reg_x86)channels)
|
: "r" ((pa_reg_x86)channels)
|
||||||
: "cc"
|
: "cc"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||||
{
|
{
|
||||||
pa_reg_x86 channel, temp;
|
pa_reg_x86 channel, temp;
|
||||||
|
|
||||||
/* the max number of samples we process at a time, this is also the max amount
|
/* the max number of samples we process at a time, this is also the max amount
|
||||||
* we overread the volume array, which should have enough padding. */
|
* we overread the volume array, which should have enough padding. */
|
||||||
channels = MAX (8, channels);
|
channels = MAX (8, channels);
|
||||||
|
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
" xor %3, %3 \n\t"
|
" xor %3, %3 \n\t"
|
||||||
" sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
|
" sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
|
||||||
|
|
||||||
" test $1, %2 \n\t" /* check for odd samples */
|
" test $1, %2 \n\t" /* check for odd samples */
|
||||||
" je 2f \n\t"
|
" je 2f \n\t"
|
||||||
|
|
||||||
" movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */
|
" movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */
|
||||||
" movw (%0), %w4 \n\t" /* .. | p0 | */
|
" movw (%0), %w4 \n\t" /* .. | p0 | */
|
||||||
" rorw $8, %w4 \n\t"
|
" rorw $8, %w4 \n\t"
|
||||||
" movd %4, %%xmm1 \n\t"
|
" movd %4, %%xmm1 \n\t"
|
||||||
VOLUME_32x16 (%%xmm1, %%xmm0)
|
VOLUME_32x16 (%%xmm1, %%xmm0)
|
||||||
" movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */
|
" movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */
|
||||||
" rorw $8, %w4 \n\t"
|
" rorw $8, %w4 \n\t"
|
||||||
" movw %w4, (%0) \n\t"
|
" movw %w4, (%0) \n\t"
|
||||||
" add $2, %0 \n\t"
|
" add $2, %0 \n\t"
|
||||||
MOD_ADD ($1, %5)
|
MOD_ADD ($1, %5)
|
||||||
|
|
||||||
"2: \n\t"
|
"2: \n\t"
|
||||||
" sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
|
" sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
|
||||||
" test $1, %2 \n\t"
|
" test $1, %2 \n\t"
|
||||||
" je 4f \n\t"
|
" je 4f \n\t"
|
||||||
|
|
||||||
"3: \n\t" /* do samples in groups of 2 */
|
"3: \n\t" /* do samples in groups of 2 */
|
||||||
" movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */
|
" movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */
|
||||||
" movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */
|
" movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */
|
||||||
SWAP_16 (%%xmm1)
|
SWAP_16 (%%xmm1)
|
||||||
VOLUME_32x16 (%%xmm1, %%xmm0)
|
VOLUME_32x16 (%%xmm1, %%xmm0)
|
||||||
SWAP_16 (%%xmm0)
|
SWAP_16 (%%xmm0)
|
||||||
" movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
|
" movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
|
||||||
" add $4, %0 \n\t"
|
" add $4, %0 \n\t"
|
||||||
MOD_ADD ($2, %5)
|
MOD_ADD ($2, %5)
|
||||||
|
|
||||||
"4: \n\t"
|
"4: \n\t"
|
||||||
" sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
|
" sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
|
||||||
" test $1, %2 \n\t"
|
" test $1, %2 \n\t"
|
||||||
" je 6f \n\t"
|
" je 6f \n\t"
|
||||||
|
|
||||||
/* FIXME, we can do aligned access of the volume values if we can guarantee
|
/* FIXME, we can do aligned access of the volume values if we can guarantee
|
||||||
* that the array is 16 bytes aligned, we probably have to do the odd values
|
* that the array is 16 bytes aligned, we probably have to do the odd values
|
||||||
* after this then. */
|
* after this then. */
|
||||||
"5: \n\t" /* do samples in groups of 4 */
|
"5: \n\t" /* do samples in groups of 4 */
|
||||||
" movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
|
" movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
|
||||||
" movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
|
" movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
|
||||||
SWAP_16 (%%xmm1)
|
SWAP_16 (%%xmm1)
|
||||||
VOLUME_32x16 (%%xmm1, %%xmm0)
|
VOLUME_32x16 (%%xmm1, %%xmm0)
|
||||||
SWAP_16 (%%xmm0)
|
SWAP_16 (%%xmm0)
|
||||||
" movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
|
" movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
|
||||||
" add $8, %0 \n\t"
|
" add $8, %0 \n\t"
|
||||||
MOD_ADD ($4, %5)
|
MOD_ADD ($4, %5)
|
||||||
|
|
||||||
"6: \n\t"
|
"6: \n\t"
|
||||||
" sar $1, %2 \n\t" /* prepare for processing 8 samples at a time */
|
" sar $1, %2 \n\t" /* prepare for processing 8 samples at a time */
|
||||||
" cmp $0, %2 \n\t"
|
" cmp $0, %2 \n\t"
|
||||||
" je 8f \n\t"
|
" je 8f \n\t"
|
||||||
|
|
||||||
"7: \n\t" /* do samples in groups of 8 */
|
"7: \n\t" /* do samples in groups of 8 */
|
||||||
" movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
|
" movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
|
||||||
" movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */
|
" movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */
|
||||||
" movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
|
" movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
|
||||||
" movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */
|
" movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */
|
||||||
SWAP_16_2 (%%xmm1, %%xmm3)
|
SWAP_16_2 (%%xmm1, %%xmm3)
|
||||||
VOLUME_32x16 (%%xmm1, %%xmm0)
|
VOLUME_32x16 (%%xmm1, %%xmm0)
|
||||||
VOLUME_32x16 (%%xmm3, %%xmm2)
|
VOLUME_32x16 (%%xmm3, %%xmm2)
|
||||||
SWAP_16_2 (%%xmm0, %%xmm2)
|
SWAP_16_2 (%%xmm0, %%xmm2)
|
||||||
" movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
|
" movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
|
||||||
" movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */
|
" movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */
|
||||||
" add $16, %0 \n\t"
|
" add $16, %0 \n\t"
|
||||||
MOD_ADD ($8, %5)
|
MOD_ADD ($8, %5)
|
||||||
" dec %2 \n\t"
|
" dec %2 \n\t"
|
||||||
" jne 7b \n\t"
|
" jne 7b \n\t"
|
||||||
"8: \n\t"
|
"8: \n\t"
|
||||||
|
|
||||||
: "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp)
|
: "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp)
|
||||||
: "r" ((pa_reg_x86)channels)
|
: "r" ((pa_reg_x86)channels)
|
||||||
: "cc"
|
: "cc"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef RUN_TEST
|
#undef RUN_TEST
|
||||||
|
|
@ -251,64 +251,64 @@ pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
|
||||||
#define PADDING 16
|
#define PADDING 16
|
||||||
|
|
||||||
static void run_test (void) {
|
static void run_test (void) {
|
||||||
int16_t samples[SAMPLES];
|
int16_t samples[SAMPLES];
|
||||||
int16_t samples_ref[SAMPLES];
|
int16_t samples_ref[SAMPLES];
|
||||||
int16_t samples_orig[SAMPLES];
|
int16_t samples_orig[SAMPLES];
|
||||||
int32_t volumes[CHANNELS + PADDING];
|
int32_t volumes[CHANNELS + PADDING];
|
||||||
int i, j, padding;
|
int i, j, padding;
|
||||||
pa_do_volume_func_t func;
|
pa_do_volume_func_t func;
|
||||||
struct timeval start, stop;
|
struct timeval start, stop;
|
||||||
|
|
||||||
func = pa_get_volume_func (PA_SAMPLE_S16NE);
|
func = pa_get_volume_func (PA_SAMPLE_S16NE);
|
||||||
|
|
||||||
printf ("checking SSE %zd\n", sizeof (samples));
|
printf ("checking SSE %zd\n", sizeof (samples));
|
||||||
|
|
||||||
pa_random (samples, sizeof (samples));
|
pa_random (samples, sizeof (samples));
|
||||||
memcpy (samples_ref, samples, sizeof (samples));
|
memcpy (samples_ref, samples, sizeof (samples));
|
||||||
memcpy (samples_orig, samples, sizeof (samples));
|
memcpy (samples_orig, samples, sizeof (samples));
|
||||||
|
|
||||||
for (i = 0; i < CHANNELS; i++)
|
for (i = 0; i < CHANNELS; i++)
|
||||||
volumes[i] = rand() >> 1;
|
volumes[i] = rand() >> 1;
|
||||||
for (padding = 0; padding < PADDING; padding++, i++)
|
for (padding = 0; padding < PADDING; padding++, i++)
|
||||||
volumes[i] = volumes[padding];
|
volumes[i] = volumes[padding];
|
||||||
|
|
||||||
func (samples_ref, volumes, CHANNELS, sizeof (samples));
|
|
||||||
pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples));
|
|
||||||
for (i = 0; i < SAMPLES; i++) {
|
|
||||||
if (samples[i] != samples_ref[i]) {
|
|
||||||
printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
|
|
||||||
samples_orig[i], volumes[i % CHANNELS]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pa_gettimeofday(&start);
|
|
||||||
for (j = 0; j < TIMES; j++) {
|
|
||||||
memcpy (samples, samples_orig, sizeof (samples));
|
|
||||||
pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples));
|
|
||||||
}
|
|
||||||
pa_gettimeofday(&stop);
|
|
||||||
pa_log_info("SSE: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
|
|
||||||
|
|
||||||
pa_gettimeofday(&start);
|
|
||||||
for (j = 0; j < TIMES; j++) {
|
|
||||||
memcpy (samples_ref, samples_orig, sizeof (samples));
|
|
||||||
func (samples_ref, volumes, CHANNELS, sizeof (samples));
|
func (samples_ref, volumes, CHANNELS, sizeof (samples));
|
||||||
}
|
pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples));
|
||||||
pa_gettimeofday(&stop);
|
for (i = 0; i < SAMPLES; i++) {
|
||||||
pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
|
if (samples[i] != samples_ref[i]) {
|
||||||
|
printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
|
||||||
|
samples_orig[i], volumes[i % CHANNELS]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pa_gettimeofday(&start);
|
||||||
|
for (j = 0; j < TIMES; j++) {
|
||||||
|
memcpy (samples, samples_orig, sizeof (samples));
|
||||||
|
pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples));
|
||||||
|
}
|
||||||
|
pa_gettimeofday(&stop);
|
||||||
|
pa_log_info("SSE: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
|
||||||
|
|
||||||
|
pa_gettimeofday(&start);
|
||||||
|
for (j = 0; j < TIMES; j++) {
|
||||||
|
memcpy (samples_ref, samples_orig, sizeof (samples));
|
||||||
|
func (samples_ref, volumes, CHANNELS, sizeof (samples));
|
||||||
|
}
|
||||||
|
pa_gettimeofday(&stop);
|
||||||
|
pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#endif /* defined (__i386__) || defined (__amd64__) */
|
#endif /* defined (__i386__) || defined (__amd64__) */
|
||||||
|
|
||||||
void pa_volume_func_init_sse (pa_cpu_x86_flag_t flags) {
|
void pa_volume_func_init_sse (pa_cpu_x86_flag_t flags) {
|
||||||
#if defined (__i386__) || defined (__amd64__)
|
#if defined (__i386__) || defined (__amd64__)
|
||||||
pa_log_info("Initialising SSE optimized functions.");
|
pa_log_info("Initialising SSE optimized functions.");
|
||||||
|
|
||||||
#ifdef RUN_TEST
|
#ifdef RUN_TEST
|
||||||
run_test ();
|
run_test ();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_sse);
|
pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_sse);
|
||||||
pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_sse);
|
pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_sse);
|
||||||
#endif /* defined (__i386__) || defined (__amd64__) */
|
#endif /* defined (__i386__) || defined (__amd64__) */
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue