whitespace fixes

This commit is contained in:
Wim Taymans 2009-08-20 10:56:20 +02:00
parent 3cc1278dcf
commit f09b51198f
8 changed files with 695 additions and 670 deletions

View file

@ -36,7 +36,7 @@
#if defined (__arm__) && defined (__linux__) #if defined (__arm__) && defined (__linux__)
#define MAX_BUFFER 4096 #define MAX_BUFFER 4096
static char * static char *
get_cpuinfo_line (char *cpuinfo, const char *tag) { get_cpuinfo_line (char *cpuinfo, const char *tag) {
char *line, *end, *colon; char *line, *end, *colon;
@ -106,20 +106,20 @@ void pa_cpu_init_arm (void) {
} }
/* get the CPU features */ /* get the CPU features */
if ((line = get_cpuinfo_line (cpuinfo, "Features"))) { if ((line = get_cpuinfo_line (cpuinfo, "Features"))) {
char *state = NULL, *current; char *state = NULL, *current;
while ((current = pa_split_spaces (line, &state))) { while ((current = pa_split_spaces (line, &state))) {
if (!strcmp (current, "vfp")) if (!strcmp (current, "vfp"))
flags |= PA_CPU_ARM_VFP; flags |= PA_CPU_ARM_VFP;
else if (!strcmp (current, "edsp")) else if (!strcmp (current, "edsp"))
flags |= PA_CPU_ARM_EDSP; flags |= PA_CPU_ARM_EDSP;
else if (!strcmp (current, "neon")) else if (!strcmp (current, "neon"))
flags |= PA_CPU_ARM_NEON; flags |= PA_CPU_ARM_NEON;
else if (!strcmp (current, "vfpv3")) else if (!strcmp (current, "vfpv3"))
flags |= PA_CPU_ARM_VFPV3; flags |= PA_CPU_ARM_VFPV3;
free (current); free (current);
} }
} }
free (cpuinfo); free (cpuinfo);

View file

@ -34,14 +34,15 @@
static void static void
get_cpuid (uint32_t op, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) get_cpuid (uint32_t op, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d)
{ {
__asm__ __volatile__ ( __asm__ __volatile__ (
" push %%"PA_REG_b" \n\t" " push %%"PA_REG_b" \n\t"
" cpuid \n\t" " cpuid \n\t"
" mov %%ebx, %%esi \n\t" " mov %%ebx, %%esi \n\t"
" pop %%"PA_REG_b" \n\t" " pop %%"PA_REG_b" \n\t"
: "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d) : "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d)
: "0" (op)); : "0" (op)
);
} }
#endif #endif
@ -97,23 +98,23 @@ void pa_cpu_init_x86 (void) {
} }
pa_log_info ("CPU flags: %s%s%s%s%s%s%s%s%s%s", pa_log_info ("CPU flags: %s%s%s%s%s%s%s%s%s%s",
(flags & PA_CPU_X86_MMX) ? "MMX " : "", (flags & PA_CPU_X86_MMX) ? "MMX " : "",
(flags & PA_CPU_X86_SSE) ? "SSE " : "", (flags & PA_CPU_X86_SSE) ? "SSE " : "",
(flags & PA_CPU_X86_SSE2) ? "SSE2 " : "", (flags & PA_CPU_X86_SSE2) ? "SSE2 " : "",
(flags & PA_CPU_X86_SSE3) ? "SSE3 " : "", (flags & PA_CPU_X86_SSE3) ? "SSE3 " : "",
(flags & PA_CPU_X86_SSSE3) ? "SSSE3 " : "", (flags & PA_CPU_X86_SSSE3) ? "SSSE3 " : "",
(flags & PA_CPU_X86_SSE4_1) ? "SSE4_1 " : "", (flags & PA_CPU_X86_SSE4_1) ? "SSE4_1 " : "",
(flags & PA_CPU_X86_SSE4_2) ? "SSE4_2 " : "", (flags & PA_CPU_X86_SSE4_2) ? "SSE4_2 " : "",
(flags & PA_CPU_X86_MMXEXT) ? "MMXEXT " : "", (flags & PA_CPU_X86_MMXEXT) ? "MMXEXT " : "",
(flags & PA_CPU_X86_3DNOW) ? "3DNOW " : "", (flags & PA_CPU_X86_3DNOW) ? "3DNOW " : "",
(flags & PA_CPU_X86_3DNOWEXT) ? "3DNOWEXT " : ""); (flags & PA_CPU_X86_3DNOWEXT) ? "3DNOWEXT " : "");
/* activate various optimisations */ /* activate various optimisations */
if (flags & PA_CPU_X86_MMX) { if (flags & PA_CPU_X86_MMX)
pa_volume_func_init_mmx (flags); pa_volume_func_init_mmx (flags);
}
if (flags & PA_CPU_X86_SSE) { if (flags & PA_CPU_X86_SSE)
pa_volume_func_init_sse (flags); pa_volume_func_init_sse (flags);
}
#endif /* defined (__i386__) || defined (__amd64__) */ #endif /* defined (__i386__) || defined (__amd64__) */
} }

View file

@ -1065,30 +1065,53 @@ static pa_memchunk* convert_to_work_format(pa_resampler *r, pa_memchunk *input)
} }
static void remap_mono_to_stereo(pa_resampler *r, void *dst, const void *src, unsigned n) { static void remap_mono_to_stereo(pa_resampler *r, void *dst, const void *src, unsigned n) {
unsigned i;
switch (r->work_format) { switch (r->work_format) {
case PA_SAMPLE_FLOAT32NE: case PA_SAMPLE_FLOAT32NE:
{ {
float *d, *s; float *d, *s;
d = (float *) dst; d = (float *) dst;
s = (float *) src; s = (float *) src;
for (; n > 0; n--, s++, d += 2) for (i = n >> 2; i; i--) {
d[0] = d[1] = *s; d[0] = d[1] = s[0];
break; d[2] = d[3] = s[1];
} d[4] = d[5] = s[2];
d[6] = d[7] = s[3];
s += 4;
d += 8;
}
for (i = n & 3; i; i--) {
d[0] = d[1] = s[0];
s++;
d += 2;
}
break;
}
case PA_SAMPLE_S16NE: case PA_SAMPLE_S16NE:
{ {
int16_t *d, *s; int16_t *d, *s;
d = (int16_t *) dst; d = (int16_t *) dst;
s = (int16_t *) src; s = (int16_t *) src;
for (; n > 0; n--, s++, d += 2) for (i = n >> 2; i; i--) {
d[0] = d[1] = *s; d[0] = d[1] = s[0];
break; d[2] = d[3] = s[1];
} d[4] = d[5] = s[2];
d[6] = d[7] = s[3];
s += 4;
d += 8;
}
for (i = n & 3; i; i--) {
d[0] = d[1] = s[0];
s++;
d += 2;
}
break;
}
default: default:
pa_assert_not_reached(); pa_assert_not_reached();
} }
@ -1114,7 +1137,7 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src,
for (ic = 0; ic < n_ic; ic++) { for (ic = 0; ic < n_ic; ic++) {
float vol; float vol;
vol = r->map_table_f[oc][ic]; vol = r->map_table_f[oc][ic];
if (vol <= 0.0) if (vol <= 0.0)
continue; continue;
@ -1122,18 +1145,18 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src,
d = (float *)dst + oc; d = (float *)dst + oc;
s = (float *)src + ic; s = (float *)src + ic;
if (vol >= 1.0) { if (vol >= 1.0) {
for (i = n; i > 0; i--, s += n_ic, d += n_oc) for (i = n; i > 0; i--, s += n_ic, d += n_oc)
*d += *s; *d += *s;
} else { } else {
for (i = n; i > 0; i--, s += n_ic, d += n_oc) for (i = n; i > 0; i--, s += n_ic, d += n_oc)
*d += *s * vol; *d += *s * vol;
} }
} }
} }
break; break;
} }
case PA_SAMPLE_S16NE: case PA_SAMPLE_S16NE:
{ {
int16_t *d, *s; int16_t *d, *s;
@ -1144,7 +1167,7 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src,
for (ic = 0; ic < n_ic; ic++) { for (ic = 0; ic < n_ic; ic++) {
int32_t vol; int32_t vol;
vol = r->map_table_i[oc][ic]; vol = r->map_table_i[oc][ic];
if (vol <= 0) if (vol <= 0)
continue; continue;
@ -1158,11 +1181,11 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src,
} else { } else {
for (i = n; i > 0; i--, s += n_ic, d += n_oc) for (i = n; i > 0; i--, s += n_ic, d += n_oc)
*d += (int16_t) (((int32_t)*s * vol) >> 16); *d += (int16_t) (((int32_t)*s * vol) >> 16);
} }
} }
} }
break; break;
} }
default: default:
pa_assert_not_reached(); pa_assert_not_reached();
} }

View file

@ -752,12 +752,13 @@ void pa_volume_memchunk(
return; return;
} }
ptr = (uint8_t*) pa_memblock_acquire(c->memblock) + c->index;
do_volume = pa_get_volume_func (spec->format); do_volume = pa_get_volume_func (spec->format);
pa_assert(do_volume); pa_assert(do_volume);
calc_volume_table[spec->format] ((void *)linear, volume); calc_volume_table[spec->format] ((void *)linear, volume);
ptr = (uint8_t*) pa_memblock_acquire(c->memblock) + c->index;
do_volume (ptr, (void *)linear, spec->channels, c->length); do_volume (ptr, (void *)linear, spec->channels, c->length);
pa_memblock_release(c->memblock); pa_memblock_release(c->memblock);
@ -944,12 +945,12 @@ void pa_sample_clamp(pa_sample_format_t format, void *dst, size_t dstr, const vo
for (; n > 0; n--) { for (; n > 0; n--) {
float f; float f;
f = *s; f = *s;
*d = PA_CLAMP_UNLIKELY(f, -1.0f, 1.0f); *d = PA_CLAMP_UNLIKELY(f, -1.0f, 1.0f);
s = (const float*) ((const uint8_t*) s + sstr); s = (const float*) ((const uint8_t*) s + sstr);
d = (float*) ((uint8_t*) d + dstr); d = (float*) ((uint8_t*) d + dstr);
} }
} else { } else {
pa_assert(format == PA_SAMPLE_FLOAT32RE); pa_assert(format == PA_SAMPLE_FLOAT32RE);

View file

@ -45,81 +45,81 @@
static void static void
pa_volume_s16ne_arm (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) pa_volume_s16ne_arm (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{ {
int32_t *ve; int32_t *ve;
channels = MAX (4, channels); channels = MAX (4, channels);
ve = volumes + channels; ve = volumes + channels;
__asm__ __volatile__ ( __asm__ __volatile__ (
" mov r6, %1 \n\t" " mov r6, %1 \n\t"
" mov %3, %3, LSR #1 \n\t" /* length /= sizeof (int16_t) */ " mov %3, %3, LSR #1 \n\t" /* length /= sizeof (int16_t) */
" tst %3, #1 \n\t" /* check for odd samples */ " tst %3, #1 \n\t" /* check for odd samples */
" beq 2f \n\t" " beq 2f \n\t"
"1: \n\t" "1: \n\t"
" ldr r0, [r6], #4 \n\t" /* odd samples volumes */ " ldr r0, [r6], #4 \n\t" /* odd samples volumes */
" ldrh r2, [%0] \n\t" " ldrh r2, [%0] \n\t"
" smulwb r0, r0, r2 \n\t" " smulwb r0, r0, r2 \n\t"
" ssat r0, #16, r0 \n\t" " ssat r0, #16, r0 \n\t"
" strh r0, [%0], #2 \n\t" " strh r0, [%0], #2 \n\t"
MOD_INC() MOD_INC()
"2: \n\t" "2: \n\t"
" mov %3, %3, LSR #1 \n\t" " mov %3, %3, LSR #1 \n\t"
" tst %3, #1 \n\t" /* check for odd samples */ " tst %3, #1 \n\t" /* check for odd samples */
" beq 4f \n\t" " beq 4f \n\t"
"3: \n\t" "3: \n\t"
" ldrd r2, [r6], #8 \n\t" /* 2 samples at a time */ " ldrd r2, [r6], #8 \n\t" /* 2 samples at a time */
" ldr r0, [%0] \n\t" " ldr r0, [%0] \n\t"
" smulwt r2, r2, r0 \n\t" " smulwt r2, r2, r0 \n\t"
" smulwb r3, r3, r0 \n\t" " smulwb r3, r3, r0 \n\t"
" ssat r2, #16, r2 \n\t" " ssat r2, #16, r2 \n\t"
" ssat r3, #16, r3 \n\t" " ssat r3, #16, r3 \n\t"
" pkhbt r0, r3, r2, LSL #16 \n\t" " pkhbt r0, r3, r2, LSL #16 \n\t"
" str r0, [%0], #4 \n\t" " str r0, [%0], #4 \n\t"
MOD_INC() MOD_INC()
"4: \n\t" "4: \n\t"
" movs %3, %3, LSR #1 \n\t" " movs %3, %3, LSR #1 \n\t"
" beq 6f \n\t" " beq 6f \n\t"
"5: \n\t" "5: \n\t"
" ldrd r2, [r6], #8 \n\t" /* 4 samples at a time */ " ldrd r2, [r6], #8 \n\t" /* 4 samples at a time */
" ldrd r4, [r6], #8 \n\t" " ldrd r4, [r6], #8 \n\t"
" ldrd r0, [%0] \n\t" " ldrd r0, [%0] \n\t"
" smulwt r2, r2, r0 \n\t" " smulwt r2, r2, r0 \n\t"
" smulwb r3, r3, r0 \n\t" " smulwb r3, r3, r0 \n\t"
" smulwt r4, r4, r1 \n\t" " smulwt r4, r4, r1 \n\t"
" smulwb r5, r5, r1 \n\t" " smulwb r5, r5, r1 \n\t"
" ssat r2, #16, r2 \n\t" " ssat r2, #16, r2 \n\t"
" ssat r3, #16, r3 \n\t" " ssat r3, #16, r3 \n\t"
" ssat r4, #16, r4 \n\t" " ssat r4, #16, r4 \n\t"
" ssat r5, #16, r5 \n\t" " ssat r5, #16, r5 \n\t"
" pkhbt r0, r3, r2, LSL #16 \n\t" " pkhbt r0, r3, r2, LSL #16 \n\t"
" pkhbt r1, r5, r4, LSL #16 \n\t" " pkhbt r1, r5, r4, LSL #16 \n\t"
" strd r0, [%0], #8 \n\t" " strd r0, [%0], #8 \n\t"
MOD_INC() MOD_INC()
" subs %3, %3, #1 \n\t" " subs %3, %3, #1 \n\t"
" bne 5b \n\t" " bne 5b \n\t"
"6: \n\t" "6: \n\t"
: "+r" (samples), "+r" (volumes), "+r" (ve), "+r" (length) : "+r" (samples), "+r" (volumes), "+r" (ve), "+r" (length)
: :
: "r6", "r5", "r4", "r3", "r2", "r1", "r0", "cc" : "r6", "r5", "r4", "r3", "r2", "r1", "r0", "cc"
); );
} }
#undef RUN_TEST #undef RUN_TEST
@ -131,51 +131,51 @@ pa_volume_s16ne_arm (int16_t *samples, int32_t *volumes, unsigned channels, unsi
#define PADDING 16 #define PADDING 16
static void run_test (void) { static void run_test (void) {
int16_t samples[SAMPLES]; int16_t samples[SAMPLES];
int16_t samples_ref[SAMPLES]; int16_t samples_ref[SAMPLES];
int16_t samples_orig[SAMPLES]; int16_t samples_orig[SAMPLES];
int32_t volumes[CHANNELS + PADDING]; int32_t volumes[CHANNELS + PADDING];
int i, j, padding; int i, j, padding;
pa_do_volume_func_t func; pa_do_volume_func_t func;
struct timeval start, stop; struct timeval start, stop;
func = pa_get_volume_func (PA_SAMPLE_S16NE); func = pa_get_volume_func (PA_SAMPLE_S16NE);
printf ("checking ARM %zd\n", sizeof (samples)); printf ("checking ARM %zd\n", sizeof (samples));
pa_random (samples, sizeof (samples)); pa_random (samples, sizeof (samples));
memcpy (samples_ref, samples, sizeof (samples)); memcpy (samples_ref, samples, sizeof (samples));
memcpy (samples_orig, samples, sizeof (samples)); memcpy (samples_orig, samples, sizeof (samples));
for (i = 0; i < CHANNELS; i++) for (i = 0; i < CHANNELS; i++)
volumes[i] = rand() >> 1; volumes[i] = rand() >> 1;
for (padding = 0; padding < PADDING; padding++, i++) for (padding = 0; padding < PADDING; padding++, i++)
volumes[i] = volumes[padding]; volumes[i] = volumes[padding];
func (samples_ref, volumes, CHANNELS, sizeof (samples));
pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples));
for (i = 0; i < SAMPLES; i++) {
if (samples[i] != samples_ref[i]) {
printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
samples_orig[i], volumes[i % CHANNELS]);
}
}
pa_gettimeofday(&start);
for (j = 0; j < TIMES; j++) {
memcpy (samples, samples_orig, sizeof (samples));
pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples));
}
pa_gettimeofday(&stop);
pa_log_info("ARM: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
pa_gettimeofday(&start);
for (j = 0; j < TIMES; j++) {
memcpy (samples_ref, samples_orig, sizeof (samples));
func (samples_ref, volumes, CHANNELS, sizeof (samples)); func (samples_ref, volumes, CHANNELS, sizeof (samples));
} pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples));
pa_gettimeofday(&stop); for (i = 0; i < SAMPLES; i++) {
pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); if (samples[i] != samples_ref[i]) {
printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
samples_orig[i], volumes[i % CHANNELS]);
}
}
pa_gettimeofday(&start);
for (j = 0; j < TIMES; j++) {
memcpy (samples, samples_orig, sizeof (samples));
pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples));
}
pa_gettimeofday(&stop);
pa_log_info("ARM: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
pa_gettimeofday(&start);
for (j = 0; j < TIMES; j++) {
memcpy (samples_ref, samples_orig, sizeof (samples));
func (samples_ref, volumes, CHANNELS, sizeof (samples));
}
pa_gettimeofday(&stop);
pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
} }
#endif #endif
@ -184,12 +184,12 @@ static void run_test (void) {
void pa_volume_func_init_arm (pa_cpu_arm_flag_t flags) { void pa_volume_func_init_arm (pa_cpu_arm_flag_t flags) {
#if defined (__arm__) #if defined (__arm__)
pa_log_info("Initialising ARM optimized functions."); pa_log_info("Initialising ARM optimized functions.");
#ifdef RUN_TEST #ifdef RUN_TEST
run_test (); run_test ();
#endif #endif
pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_arm); pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_arm);
#endif /* defined (__arm__) */ #endif /* defined (__arm__) */
} }

View file

@ -35,289 +35,289 @@
static void static void
pa_volume_u8_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) pa_volume_u8_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{ {
unsigned channel; unsigned channel;
for (channel = 0; length; length--) { for (channel = 0; length; length--) {
int32_t t, hi, lo; int32_t t, hi, lo;
hi = volumes[channel] >> 16; hi = volumes[channel] >> 16;
lo = volumes[channel] & 0xFFFF; lo = volumes[channel] & 0xFFFF;
t = (int32_t) *samples - 0x80; t = (int32_t) *samples - 0x80;
t = ((t * lo) >> 16) + (t * hi); t = ((t * lo) >> 16) + (t * hi);
t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F); t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F);
*samples++ = (uint8_t) (t + 0x80); *samples++ = (uint8_t) (t + 0x80);
if (PA_UNLIKELY(++channel >= channels)) if (PA_UNLIKELY(++channel >= channels))
channel = 0; channel = 0;
} }
} }
static void static void
pa_volume_alaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) pa_volume_alaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{ {
unsigned channel; unsigned channel;
for (channel = 0; length; length--) { for (channel = 0; length; length--) {
int32_t t, hi, lo; int32_t t, hi, lo;
hi = volumes[channel] >> 16; hi = volumes[channel] >> 16;
lo = volumes[channel] & 0xFFFF; lo = volumes[channel] & 0xFFFF;
t = (int32_t) st_alaw2linear16(*samples); t = (int32_t) st_alaw2linear16(*samples);
t = ((t * lo) >> 16) + (t * hi); t = ((t * lo) >> 16) + (t * hi);
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
*samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3); *samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3);
if (PA_UNLIKELY(++channel >= channels)) if (PA_UNLIKELY(++channel >= channels))
channel = 0; channel = 0;
} }
} }
static void static void
pa_volume_ulaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) pa_volume_ulaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{ {
unsigned channel; unsigned channel;
for (channel = 0; length; length--) { for (channel = 0; length; length--) {
int32_t t, hi, lo; int32_t t, hi, lo;
hi = volumes[channel] >> 16; hi = volumes[channel] >> 16;
lo = volumes[channel] & 0xFFFF; lo = volumes[channel] & 0xFFFF;
t = (int32_t) st_ulaw2linear16(*samples); t = (int32_t) st_ulaw2linear16(*samples);
t = ((t * lo) >> 16) + (t * hi); t = ((t * lo) >> 16) + (t * hi);
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
*samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2); *samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2);
if (PA_UNLIKELY(++channel >= channels)) if (PA_UNLIKELY(++channel >= channels))
channel = 0; channel = 0;
} }
} }
static void static void
pa_volume_s16ne_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) pa_volume_s16ne_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{ {
unsigned channel; unsigned channel;
length /= sizeof (int16_t); length /= sizeof (int16_t);
for (channel = 0; length; length--) { for (channel = 0; length; length--) {
int32_t t, hi, lo; int32_t t, hi, lo;
/* Multiplying the 32bit volume factor with the 16bit /* Multiplying the 32bit volume factor with the 16bit
* sample might result in an 48bit value. We want to * sample might result in an 48bit value. We want to
* do without 64 bit integers and hence do the * do without 64 bit integers and hence do the
* multiplication independantly for the HI and LO part * multiplication independantly for the HI and LO part
* of the volume. */ * of the volume. */
hi = volumes[channel] >> 16; hi = volumes[channel] >> 16;
lo = volumes[channel] & 0xFFFF; lo = volumes[channel] & 0xFFFF;
t = (int32_t)(*samples); t = (int32_t)(*samples);
t = ((t * lo) >> 16) + (t * hi); t = ((t * lo) >> 16) + (t * hi);
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
*samples++ = (int16_t) t; *samples++ = (int16_t) t;
if (PA_UNLIKELY(++channel >= channels)) if (PA_UNLIKELY(++channel >= channels))
channel = 0; channel = 0;
} }
} }
static void static void
pa_volume_s16re_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) pa_volume_s16re_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{ {
unsigned channel; unsigned channel;
length /= sizeof (int16_t); length /= sizeof (int16_t);
for (channel = 0; length; length--) { for (channel = 0; length; length--) {
int32_t t, hi, lo; int32_t t, hi, lo;
hi = volumes[channel] >> 16; hi = volumes[channel] >> 16;
lo = volumes[channel] & 0xFFFF; lo = volumes[channel] & 0xFFFF;
t = (int32_t) PA_INT16_SWAP(*samples); t = (int32_t) PA_INT16_SWAP(*samples);
t = ((t * lo) >> 16) + (t * hi); t = ((t * lo) >> 16) + (t * hi);
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
*samples++ = PA_INT16_SWAP((int16_t) t); *samples++ = PA_INT16_SWAP((int16_t) t);
if (PA_UNLIKELY(++channel >= channels)) if (PA_UNLIKELY(++channel >= channels))
channel = 0; channel = 0;
} }
} }
static void static void
pa_volume_float32ne_c (float *samples, float *volumes, unsigned channels, unsigned length) pa_volume_float32ne_c (float *samples, float *volumes, unsigned channels, unsigned length)
{ {
unsigned channel; unsigned channel;
length /= sizeof (float); length /= sizeof (float);
for (channel = 0; length; length--) { for (channel = 0; length; length--) {
*samples++ *= volumes[channel]; *samples++ *= volumes[channel];
if (PA_UNLIKELY(++channel >= channels)) if (PA_UNLIKELY(++channel >= channels))
channel = 0; channel = 0;
} }
} }
static void static void
pa_volume_float32re_c (float *samples, float *volumes, unsigned channels, unsigned length) pa_volume_float32re_c (float *samples, float *volumes, unsigned channels, unsigned length)
{ {
unsigned channel; unsigned channel;
length /= sizeof (float); length /= sizeof (float);
for (channel = 0; length; length--) { for (channel = 0; length; length--) {
float t; float t;
t = PA_FLOAT32_SWAP(*samples); t = PA_FLOAT32_SWAP(*samples);
t *= volumes[channel]; t *= volumes[channel];
*samples++ = PA_FLOAT32_SWAP(t); *samples++ = PA_FLOAT32_SWAP(t);
if (PA_UNLIKELY(++channel >= channels)) if (PA_UNLIKELY(++channel >= channels))
channel = 0; channel = 0;
} }
} }
static void static void
pa_volume_s32ne_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length) pa_volume_s32ne_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{ {
unsigned channel; unsigned channel;
length /= sizeof (int32_t); length /= sizeof (int32_t);
for (channel = 0; length; length--) { for (channel = 0; length; length--) {
int64_t t; int64_t t;
t = (int64_t)(*samples); t = (int64_t)(*samples);
t = (t * volumes[channel]) >> 16; t = (t * volumes[channel]) >> 16;
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
*samples++ = (int32_t) t; *samples++ = (int32_t) t;
if (PA_UNLIKELY(++channel >= channels)) if (PA_UNLIKELY(++channel >= channels))
channel = 0; channel = 0;
} }
} }
static void static void
pa_volume_s32re_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length) pa_volume_s32re_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{ {
unsigned channel; unsigned channel;
length /= sizeof (int32_t); length /= sizeof (int32_t);
for (channel = 0; length; length--) { for (channel = 0; length; length--) {
int64_t t; int64_t t;
t = (int64_t) PA_INT32_SWAP(*samples); t = (int64_t) PA_INT32_SWAP(*samples);
t = (t * volumes[channel]) >> 16; t = (t * volumes[channel]) >> 16;
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
*samples++ = PA_INT32_SWAP((int32_t) t); *samples++ = PA_INT32_SWAP((int32_t) t);
if (PA_UNLIKELY(++channel >= channels)) if (PA_UNLIKELY(++channel >= channels))
channel = 0; channel = 0;
} }
} }
static void static void
pa_volume_s24ne_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) pa_volume_s24ne_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{ {
unsigned channel; unsigned channel;
uint8_t *e; uint8_t *e;
e = samples + length; e = samples + length;
for (channel = 0; samples < e; samples += 3) { for (channel = 0; samples < e; samples += 3) {
int64_t t; int64_t t;
t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8)); t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8));
t = (t * volumes[channel]) >> 16; t = (t * volumes[channel]) >> 16;
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8); PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8);
if (PA_UNLIKELY(++channel >= channels)) if (PA_UNLIKELY(++channel >= channels))
channel = 0; channel = 0;
} }
} }
static void static void
pa_volume_s24re_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) pa_volume_s24re_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{ {
unsigned channel; unsigned channel;
uint8_t *e; uint8_t *e;
e = samples + length; e = samples + length;
for (channel = 0; samples < e; samples += 3) { for (channel = 0; samples < e; samples += 3) {
int64_t t; int64_t t;
t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8)); t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8));
t = (t * volumes[channel]) >> 16; t = (t * volumes[channel]) >> 16;
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8); PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8);
if (PA_UNLIKELY(++channel >= channels)) if (PA_UNLIKELY(++channel >= channels))
channel = 0; channel = 0;
} }
} }
static void static void
pa_volume_s24_32ne_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length) pa_volume_s24_32ne_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{ {
unsigned channel; unsigned channel;
length /= sizeof (uint32_t); length /= sizeof (uint32_t);
for (channel = 0; length; length--) { for (channel = 0; length; length--) {
int64_t t; int64_t t;
t = (int64_t) ((int32_t) (*samples << 8)); t = (int64_t) ((int32_t) (*samples << 8));
t = (t * volumes[channel]) >> 16; t = (t * volumes[channel]) >> 16;
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
*samples++ = ((uint32_t) ((int32_t) t)) >> 8; *samples++ = ((uint32_t) ((int32_t) t)) >> 8;
if (PA_UNLIKELY(++channel >= channels)) if (PA_UNLIKELY(++channel >= channels))
channel = 0; channel = 0;
} }
} }
static void static void
pa_volume_s24_32re_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length) pa_volume_s24_32re_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{ {
unsigned channel; unsigned channel;
length /= sizeof (uint32_t); length /= sizeof (uint32_t);
for (channel = 0; length; length--) { for (channel = 0; length; length--) {
int64_t t; int64_t t;
t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8)); t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8));
t = (t * volumes[channel]) >> 16; t = (t * volumes[channel]) >> 16;
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
*samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8); *samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8);
if (PA_UNLIKELY(++channel >= channels)) if (PA_UNLIKELY(++channel >= channels))
channel = 0; channel = 0;
} }
} }
static pa_do_volume_func_t do_volume_table[] = static pa_do_volume_func_t do_volume_table[] =
{ {
[PA_SAMPLE_U8] = (pa_do_volume_func_t) pa_volume_u8_c, [PA_SAMPLE_U8] = (pa_do_volume_func_t) pa_volume_u8_c,
[PA_SAMPLE_ALAW] = (pa_do_volume_func_t) pa_volume_alaw_c, [PA_SAMPLE_ALAW] = (pa_do_volume_func_t) pa_volume_alaw_c,
[PA_SAMPLE_ULAW] = (pa_do_volume_func_t) pa_volume_ulaw_c, [PA_SAMPLE_ULAW] = (pa_do_volume_func_t) pa_volume_ulaw_c,
[PA_SAMPLE_S16NE] = (pa_do_volume_func_t) pa_volume_s16ne_c, [PA_SAMPLE_S16NE] = (pa_do_volume_func_t) pa_volume_s16ne_c,
[PA_SAMPLE_S16RE] = (pa_do_volume_func_t) pa_volume_s16re_c, [PA_SAMPLE_S16RE] = (pa_do_volume_func_t) pa_volume_s16re_c,
[PA_SAMPLE_FLOAT32NE] = (pa_do_volume_func_t) pa_volume_float32ne_c, [PA_SAMPLE_FLOAT32NE] = (pa_do_volume_func_t) pa_volume_float32ne_c,
[PA_SAMPLE_FLOAT32RE] = (pa_do_volume_func_t) pa_volume_float32re_c, [PA_SAMPLE_FLOAT32RE] = (pa_do_volume_func_t) pa_volume_float32re_c,
[PA_SAMPLE_S32NE] = (pa_do_volume_func_t) pa_volume_s32ne_c, [PA_SAMPLE_S32NE] = (pa_do_volume_func_t) pa_volume_s32ne_c,
[PA_SAMPLE_S32RE] = (pa_do_volume_func_t) pa_volume_s32re_c, [PA_SAMPLE_S32RE] = (pa_do_volume_func_t) pa_volume_s32re_c,
[PA_SAMPLE_S24NE] = (pa_do_volume_func_t) pa_volume_s24ne_c, [PA_SAMPLE_S24NE] = (pa_do_volume_func_t) pa_volume_s24ne_c,
[PA_SAMPLE_S24RE] = (pa_do_volume_func_t) pa_volume_s24re_c, [PA_SAMPLE_S24RE] = (pa_do_volume_func_t) pa_volume_s24re_c,
[PA_SAMPLE_S24_32NE] = (pa_do_volume_func_t) pa_volume_s24_32ne_c, [PA_SAMPLE_S24_32NE] = (pa_do_volume_func_t) pa_volume_s24_32ne_c,
[PA_SAMPLE_S24_32RE] = (pa_do_volume_func_t) pa_volume_s24_32re_c [PA_SAMPLE_S24_32RE] = (pa_do_volume_func_t) pa_volume_s24_32re_c
}; };
pa_do_volume_func_t pa_get_volume_func(pa_sample_format_t f) { pa_do_volume_func_t pa_get_volume_func(pa_sample_format_t f) {

View file

@ -96,147 +96,147 @@
static void static void
pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{ {
pa_reg_x86 channel, temp; pa_reg_x86 channel, temp;
/* the max number of samples we process at a time, this is also the max amount /* the max number of samples we process at a time, this is also the max amount
* we overread the volume array, which should have enough padding. */ * we overread the volume array, which should have enough padding. */
channels = MAX (4, channels); channels = MAX (4, channels);
__asm__ __volatile__ ( __asm__ __volatile__ (
" xor %3, %3 \n\t" " xor %3, %3 \n\t"
" sar $1, %2 \n\t" /* length /= sizeof (int16_t) */ " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
" pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */ " pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */
" pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */ " pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */
" pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */ " pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */
" psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */ " psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */
" test $1, %2 \n\t" /* check for odd samples */ " test $1, %2 \n\t" /* check for odd samples */
" je 2f \n\t" " je 2f \n\t"
" movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */ " movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */
" movw (%0), %w4 \n\t" /* .. | p0 | */ " movw (%0), %w4 \n\t" /* .. | p0 | */
" movd %4, %%mm1 \n\t" " movd %4, %%mm1 \n\t"
VOLUME_32x16 (%%mm1, %%mm0) VOLUME_32x16 (%%mm1, %%mm0)
" movd %%mm0, %4 \n\t" /* .. | p0*v0 | */ " movd %%mm0, %4 \n\t" /* .. | p0*v0 | */
" movw %w4, (%0) \n\t" " movw %w4, (%0) \n\t"
" add $2, %0 \n\t" " add $2, %0 \n\t"
MOD_ADD ($1, %5) MOD_ADD ($1, %5)
"2: \n\t" "2: \n\t"
" sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */ " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
" test $1, %2 \n\t" /* check for odd samples */ " test $1, %2 \n\t" /* check for odd samples */
" je 4f \n\t" " je 4f \n\t"
"3: \n\t" /* do samples in groups of 2 */ "3: \n\t" /* do samples in groups of 2 */
" movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */ " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
" movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */ " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
VOLUME_32x16 (%%mm1, %%mm0) VOLUME_32x16 (%%mm1, %%mm0)
" movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */ " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
" add $4, %0 \n\t" " add $4, %0 \n\t"
MOD_ADD ($2, %5) MOD_ADD ($2, %5)
"4: \n\t" "4: \n\t"
" sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */ " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
" cmp $0, %2 \n\t" " cmp $0, %2 \n\t"
" je 6f \n\t" " je 6f \n\t"
"5: \n\t" /* do samples in groups of 4 */ "5: \n\t" /* do samples in groups of 4 */
" movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */ " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
" movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */ " movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */
" movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */ " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
" movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */ " movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */
VOLUME_32x16 (%%mm1, %%mm0) VOLUME_32x16 (%%mm1, %%mm0)
VOLUME_32x16 (%%mm3, %%mm2) VOLUME_32x16 (%%mm3, %%mm2)
" movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */ " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
" movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */ " movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */
" add $8, %0 \n\t" " add $8, %0 \n\t"
MOD_ADD ($4, %5) MOD_ADD ($4, %5)
" dec %2 \n\t" " dec %2 \n\t"
" jne 5b \n\t" " jne 5b \n\t"
"6: \n\t" "6: \n\t"
" emms \n\t" " emms \n\t"
: "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp) : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp)
: "r" ((pa_reg_x86)channels) : "r" ((pa_reg_x86)channels)
: "cc" : "cc"
); );
} }
static void static void
pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{ {
pa_reg_x86 channel, temp; pa_reg_x86 channel, temp;
/* the max number of samples we process at a time, this is also the max amount /* the max number of samples we process at a time, this is also the max amount
* we overread the volume array, which should have enough padding. */ * we overread the volume array, which should have enough padding. */
channels = MAX (4, channels); channels = MAX (4, channels);
__asm__ __volatile__ ( __asm__ __volatile__ (
" xor %3, %3 \n\t" " xor %3, %3 \n\t"
" sar $1, %2 \n\t" /* length /= sizeof (int16_t) */ " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
" pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */ " pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */
" pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */ " pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */
" pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */ " pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */
" psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */ " psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */
" test $1, %2 \n\t" /* check for odd samples */ " test $1, %2 \n\t" /* check for odd samples */
" je 2f \n\t" " je 2f \n\t"
" movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */ " movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */
" movw (%0), %w4 \n\t" /* .. | p0 | */ " movw (%0), %w4 \n\t" /* .. | p0 | */
" rorw $8, %w4 \n\t" " rorw $8, %w4 \n\t"
" movd %4, %%mm1 \n\t" " movd %4, %%mm1 \n\t"
VOLUME_32x16 (%%mm1, %%mm0) VOLUME_32x16 (%%mm1, %%mm0)
" movd %%mm0, %4 \n\t" /* .. | p0*v0 | */ " movd %%mm0, %4 \n\t" /* .. | p0*v0 | */
" rorw $8, %w4 \n\t" " rorw $8, %w4 \n\t"
" movw %w4, (%0) \n\t" " movw %w4, (%0) \n\t"
" add $2, %0 \n\t" " add $2, %0 \n\t"
MOD_ADD ($1, %5) MOD_ADD ($1, %5)
"2: \n\t" "2: \n\t"
" sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */ " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
" test $1, %2 \n\t" /* check for odd samples */ " test $1, %2 \n\t" /* check for odd samples */
" je 4f \n\t" " je 4f \n\t"
"3: \n\t" /* do samples in groups of 2 */ "3: \n\t" /* do samples in groups of 2 */
" movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */ " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
" movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */ " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
SWAP_16 (%%mm1) SWAP_16 (%%mm1)
VOLUME_32x16 (%%mm1, %%mm0) VOLUME_32x16 (%%mm1, %%mm0)
SWAP_16 (%%mm0) SWAP_16 (%%mm0)
" movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */ " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
" add $4, %0 \n\t" " add $4, %0 \n\t"
MOD_ADD ($2, %5) MOD_ADD ($2, %5)
"4: \n\t" "4: \n\t"
" sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */ " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
" cmp $0, %2 \n\t" " cmp $0, %2 \n\t"
" je 6f \n\t" " je 6f \n\t"
"5: \n\t" /* do samples in groups of 4 */ "5: \n\t" /* do samples in groups of 4 */
" movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */ " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
" movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */ " movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */
" movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */ " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
" movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */ " movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */
SWAP_16_2 (%%mm1, %%mm3) SWAP_16_2 (%%mm1, %%mm3)
VOLUME_32x16 (%%mm1, %%mm0) VOLUME_32x16 (%%mm1, %%mm0)
VOLUME_32x16 (%%mm3, %%mm2) VOLUME_32x16 (%%mm3, %%mm2)
SWAP_16_2 (%%mm0, %%mm2) SWAP_16_2 (%%mm0, %%mm2)
" movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */ " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
" movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */ " movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */
" add $8, %0 \n\t" " add $8, %0 \n\t"
MOD_ADD ($4, %5) MOD_ADD ($4, %5)
" dec %2 \n\t" " dec %2 \n\t"
" jne 5b \n\t" " jne 5b \n\t"
"6: \n\t" "6: \n\t"
" emms \n\t" " emms \n\t"
: "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp) : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp)
: "r" ((pa_reg_x86)channels) : "r" ((pa_reg_x86)channels)
: "cc" : "cc"
); );
} }
#undef RUN_TEST #undef RUN_TEST
@ -248,51 +248,51 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
#define PADDING 16 #define PADDING 16
static void run_test (void) { static void run_test (void) {
int16_t samples[SAMPLES]; int16_t samples[SAMPLES];
int16_t samples_ref[SAMPLES]; int16_t samples_ref[SAMPLES];
int16_t samples_orig[SAMPLES]; int16_t samples_orig[SAMPLES];
int32_t volumes[CHANNELS + PADDING]; int32_t volumes[CHANNELS + PADDING];
int i, j, padding; int i, j, padding;
pa_do_volume_func_t func; pa_do_volume_func_t func;
struct timeval start, stop; struct timeval start, stop;
func = pa_get_volume_func (PA_SAMPLE_S16NE); func = pa_get_volume_func (PA_SAMPLE_S16NE);
printf ("checking MMX %zd\n", sizeof (samples)); printf ("checking MMX %zd\n", sizeof (samples));
pa_random (samples, sizeof (samples)); pa_random (samples, sizeof (samples));
memcpy (samples_ref, samples, sizeof (samples)); memcpy (samples_ref, samples, sizeof (samples));
memcpy (samples_orig, samples, sizeof (samples)); memcpy (samples_orig, samples, sizeof (samples));
for (i = 0; i < CHANNELS; i++) for (i = 0; i < CHANNELS; i++)
volumes[i] = rand() >> 1; volumes[i] = rand() >> 1;
for (padding = 0; padding < PADDING; padding++, i++) for (padding = 0; padding < PADDING; padding++, i++)
volumes[i] = volumes[padding]; volumes[i] = volumes[padding];
func (samples_ref, volumes, CHANNELS, sizeof (samples));
pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples));
for (i = 0; i < SAMPLES; i++) {
if (samples[i] != samples_ref[i]) {
printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
samples_orig[i], volumes[i % CHANNELS]);
}
}
pa_gettimeofday(&start);
for (j = 0; j < TIMES; j++) {
memcpy (samples, samples_orig, sizeof (samples));
pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples));
}
pa_gettimeofday(&stop);
pa_log_info("MMX: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
pa_gettimeofday(&start);
for (j = 0; j < TIMES; j++) {
memcpy (samples_ref, samples_orig, sizeof (samples));
func (samples_ref, volumes, CHANNELS, sizeof (samples)); func (samples_ref, volumes, CHANNELS, sizeof (samples));
} pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples));
pa_gettimeofday(&stop); for (i = 0; i < SAMPLES; i++) {
pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); if (samples[i] != samples_ref[i]) {
printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
samples_orig[i], volumes[i % CHANNELS]);
}
}
pa_gettimeofday(&start);
for (j = 0; j < TIMES; j++) {
memcpy (samples, samples_orig, sizeof (samples));
pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples));
}
pa_gettimeofday(&stop);
pa_log_info("MMX: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
pa_gettimeofday(&start);
for (j = 0; j < TIMES; j++) {
memcpy (samples_ref, samples_orig, sizeof (samples));
func (samples_ref, volumes, CHANNELS, sizeof (samples));
}
pa_gettimeofday(&stop);
pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
} }
#endif #endif
@ -301,13 +301,13 @@ static void run_test (void) {
void pa_volume_func_init_mmx (pa_cpu_x86_flag_t flags) { void pa_volume_func_init_mmx (pa_cpu_x86_flag_t flags) {
#if defined (__i386__) || defined (__amd64__) #if defined (__i386__) || defined (__amd64__)
pa_log_info("Initialising MMX optimized functions."); pa_log_info("Initialising MMX optimized functions.");
#ifdef RUN_TEST #ifdef RUN_TEST
run_test (); run_test ();
#endif #endif
pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_mmx); pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_mmx);
pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_mmx); pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_mmx);
#endif /* defined (__i386__) || defined (__amd64__) */ #endif /* defined (__i386__) || defined (__amd64__) */
} }

View file

@ -77,169 +77,169 @@
static void static void
pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{ {
pa_reg_x86 channel, temp; pa_reg_x86 channel, temp;
/* the max number of samples we process at a time, this is also the max amount /* the max number of samples we process at a time, this is also the max amount
* we overread the volume array, which should have enough padding. */ * we overread the volume array, which should have enough padding. */
channels = MAX (8, channels); channels = MAX (8, channels);
__asm__ __volatile__ ( __asm__ __volatile__ (
" xor %3, %3 \n\t" " xor %3, %3 \n\t"
" sar $1, %2 \n\t" /* length /= sizeof (int16_t) */ " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
" test $1, %2 \n\t" /* check for odd samples */ " test $1, %2 \n\t" /* check for odd samples */
" je 2f \n\t" " je 2f \n\t"
" movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */ " movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */
" movw (%0), %w4 \n\t" /* .. | p0 | */ " movw (%0), %w4 \n\t" /* .. | p0 | */
" movd %4, %%xmm1 \n\t" " movd %4, %%xmm1 \n\t"
VOLUME_32x16 (%%xmm1, %%xmm0) VOLUME_32x16 (%%xmm1, %%xmm0)
" movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */ " movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */
" movw %w4, (%0) \n\t" " movw %w4, (%0) \n\t"
" add $2, %0 \n\t" " add $2, %0 \n\t"
MOD_ADD ($1, %5) MOD_ADD ($1, %5)
"2: \n\t" "2: \n\t"
" sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */ " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
" test $1, %2 \n\t" " test $1, %2 \n\t"
" je 4f \n\t" " je 4f \n\t"
"3: \n\t" /* do samples in groups of 2 */ "3: \n\t" /* do samples in groups of 2 */
" movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */ " movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */
" movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */ " movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */
VOLUME_32x16 (%%xmm1, %%xmm0) VOLUME_32x16 (%%xmm1, %%xmm0)
" movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */ " movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
" add $4, %0 \n\t" " add $4, %0 \n\t"
MOD_ADD ($2, %5) MOD_ADD ($2, %5)
"4: \n\t" "4: \n\t"
" sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */ " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
" test $1, %2 \n\t" " test $1, %2 \n\t"
" je 6f \n\t" " je 6f \n\t"
/* FIXME, we can do aligned access of the volume values if we can guarantee /* FIXME, we can do aligned access of the volume values if we can guarantee
* that the array is 16 bytes aligned, we probably have to do the odd values * that the array is 16 bytes aligned, we probably have to do the odd values
* after this then. */ * after this then. */
"5: \n\t" /* do samples in groups of 4 */ "5: \n\t" /* do samples in groups of 4 */
" movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */ " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
" movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */ " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
VOLUME_32x16 (%%xmm1, %%xmm0) VOLUME_32x16 (%%xmm1, %%xmm0)
" movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */ " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
" add $8, %0 \n\t" " add $8, %0 \n\t"
MOD_ADD ($4, %5) MOD_ADD ($4, %5)
"6: \n\t" "6: \n\t"
" sar $1, %2 \n\t" /* prepare for processing 8 samples at a time */ " sar $1, %2 \n\t" /* prepare for processing 8 samples at a time */
" cmp $0, %2 \n\t" " cmp $0, %2 \n\t"
" je 8f \n\t" " je 8f \n\t"
"7: \n\t" /* do samples in groups of 8 */ "7: \n\t" /* do samples in groups of 8 */
" movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */ " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
" movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */ " movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */
" movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */ " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
" movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */ " movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */
VOLUME_32x16 (%%xmm1, %%xmm0) VOLUME_32x16 (%%xmm1, %%xmm0)
VOLUME_32x16 (%%xmm3, %%xmm2) VOLUME_32x16 (%%xmm3, %%xmm2)
" movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */ " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
" movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */ " movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */
" add $16, %0 \n\t" " add $16, %0 \n\t"
MOD_ADD ($8, %5) MOD_ADD ($8, %5)
" dec %2 \n\t" " dec %2 \n\t"
" jne 7b \n\t" " jne 7b \n\t"
"8: \n\t" "8: \n\t"
: "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp) : "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp)
: "r" ((pa_reg_x86)channels) : "r" ((pa_reg_x86)channels)
: "cc" : "cc"
); );
} }
static void static void
pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{ {
pa_reg_x86 channel, temp; pa_reg_x86 channel, temp;
/* the max number of samples we process at a time, this is also the max amount /* the max number of samples we process at a time, this is also the max amount
* we overread the volume array, which should have enough padding. */ * we overread the volume array, which should have enough padding. */
channels = MAX (8, channels); channels = MAX (8, channels);
__asm__ __volatile__ ( __asm__ __volatile__ (
" xor %3, %3 \n\t" " xor %3, %3 \n\t"
" sar $1, %2 \n\t" /* length /= sizeof (int16_t) */ " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
" test $1, %2 \n\t" /* check for odd samples */ " test $1, %2 \n\t" /* check for odd samples */
" je 2f \n\t" " je 2f \n\t"
" movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */ " movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */
" movw (%0), %w4 \n\t" /* .. | p0 | */ " movw (%0), %w4 \n\t" /* .. | p0 | */
" rorw $8, %w4 \n\t" " rorw $8, %w4 \n\t"
" movd %4, %%xmm1 \n\t" " movd %4, %%xmm1 \n\t"
VOLUME_32x16 (%%xmm1, %%xmm0) VOLUME_32x16 (%%xmm1, %%xmm0)
" movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */ " movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */
" rorw $8, %w4 \n\t" " rorw $8, %w4 \n\t"
" movw %w4, (%0) \n\t" " movw %w4, (%0) \n\t"
" add $2, %0 \n\t" " add $2, %0 \n\t"
MOD_ADD ($1, %5) MOD_ADD ($1, %5)
"2: \n\t" "2: \n\t"
" sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */ " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
" test $1, %2 \n\t" " test $1, %2 \n\t"
" je 4f \n\t" " je 4f \n\t"
"3: \n\t" /* do samples in groups of 2 */ "3: \n\t" /* do samples in groups of 2 */
" movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */ " movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */
" movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */ " movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */
SWAP_16 (%%xmm1) SWAP_16 (%%xmm1)
VOLUME_32x16 (%%xmm1, %%xmm0) VOLUME_32x16 (%%xmm1, %%xmm0)
SWAP_16 (%%xmm0) SWAP_16 (%%xmm0)
" movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */ " movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
" add $4, %0 \n\t" " add $4, %0 \n\t"
MOD_ADD ($2, %5) MOD_ADD ($2, %5)
"4: \n\t" "4: \n\t"
" sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */ " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
" test $1, %2 \n\t" " test $1, %2 \n\t"
" je 6f \n\t" " je 6f \n\t"
/* FIXME, we can do aligned access of the volume values if we can guarantee /* FIXME, we can do aligned access of the volume values if we can guarantee
* that the array is 16 bytes aligned, we probably have to do the odd values * that the array is 16 bytes aligned, we probably have to do the odd values
* after this then. */ * after this then. */
"5: \n\t" /* do samples in groups of 4 */ "5: \n\t" /* do samples in groups of 4 */
" movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */ " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
" movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */ " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
SWAP_16 (%%xmm1) SWAP_16 (%%xmm1)
VOLUME_32x16 (%%xmm1, %%xmm0) VOLUME_32x16 (%%xmm1, %%xmm0)
SWAP_16 (%%xmm0) SWAP_16 (%%xmm0)
" movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */ " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
" add $8, %0 \n\t" " add $8, %0 \n\t"
MOD_ADD ($4, %5) MOD_ADD ($4, %5)
"6: \n\t" "6: \n\t"
" sar $1, %2 \n\t" /* prepare for processing 8 samples at a time */ " sar $1, %2 \n\t" /* prepare for processing 8 samples at a time */
" cmp $0, %2 \n\t" " cmp $0, %2 \n\t"
" je 8f \n\t" " je 8f \n\t"
"7: \n\t" /* do samples in groups of 8 */ "7: \n\t" /* do samples in groups of 8 */
" movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */ " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
" movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */ " movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */
" movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */ " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
" movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */ " movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */
SWAP_16_2 (%%xmm1, %%xmm3) SWAP_16_2 (%%xmm1, %%xmm3)
VOLUME_32x16 (%%xmm1, %%xmm0) VOLUME_32x16 (%%xmm1, %%xmm0)
VOLUME_32x16 (%%xmm3, %%xmm2) VOLUME_32x16 (%%xmm3, %%xmm2)
SWAP_16_2 (%%xmm0, %%xmm2) SWAP_16_2 (%%xmm0, %%xmm2)
" movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */ " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
" movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */ " movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */
" add $16, %0 \n\t" " add $16, %0 \n\t"
MOD_ADD ($8, %5) MOD_ADD ($8, %5)
" dec %2 \n\t" " dec %2 \n\t"
" jne 7b \n\t" " jne 7b \n\t"
"8: \n\t" "8: \n\t"
: "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp) : "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp)
: "r" ((pa_reg_x86)channels) : "r" ((pa_reg_x86)channels)
: "cc" : "cc"
); );
} }
#undef RUN_TEST #undef RUN_TEST
@ -251,64 +251,64 @@ pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
#define PADDING 16 #define PADDING 16
static void run_test (void) { static void run_test (void) {
int16_t samples[SAMPLES]; int16_t samples[SAMPLES];
int16_t samples_ref[SAMPLES]; int16_t samples_ref[SAMPLES];
int16_t samples_orig[SAMPLES]; int16_t samples_orig[SAMPLES];
int32_t volumes[CHANNELS + PADDING]; int32_t volumes[CHANNELS + PADDING];
int i, j, padding; int i, j, padding;
pa_do_volume_func_t func; pa_do_volume_func_t func;
struct timeval start, stop; struct timeval start, stop;
func = pa_get_volume_func (PA_SAMPLE_S16NE); func = pa_get_volume_func (PA_SAMPLE_S16NE);
printf ("checking SSE %zd\n", sizeof (samples)); printf ("checking SSE %zd\n", sizeof (samples));
pa_random (samples, sizeof (samples)); pa_random (samples, sizeof (samples));
memcpy (samples_ref, samples, sizeof (samples)); memcpy (samples_ref, samples, sizeof (samples));
memcpy (samples_orig, samples, sizeof (samples)); memcpy (samples_orig, samples, sizeof (samples));
for (i = 0; i < CHANNELS; i++) for (i = 0; i < CHANNELS; i++)
volumes[i] = rand() >> 1; volumes[i] = rand() >> 1;
for (padding = 0; padding < PADDING; padding++, i++) for (padding = 0; padding < PADDING; padding++, i++)
volumes[i] = volumes[padding]; volumes[i] = volumes[padding];
func (samples_ref, volumes, CHANNELS, sizeof (samples));
pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples));
for (i = 0; i < SAMPLES; i++) {
if (samples[i] != samples_ref[i]) {
printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
samples_orig[i], volumes[i % CHANNELS]);
}
}
pa_gettimeofday(&start);
for (j = 0; j < TIMES; j++) {
memcpy (samples, samples_orig, sizeof (samples));
pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples));
}
pa_gettimeofday(&stop);
pa_log_info("SSE: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
pa_gettimeofday(&start);
for (j = 0; j < TIMES; j++) {
memcpy (samples_ref, samples_orig, sizeof (samples));
func (samples_ref, volumes, CHANNELS, sizeof (samples)); func (samples_ref, volumes, CHANNELS, sizeof (samples));
} pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples));
pa_gettimeofday(&stop); for (i = 0; i < SAMPLES; i++) {
pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); if (samples[i] != samples_ref[i]) {
printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
samples_orig[i], volumes[i % CHANNELS]);
}
}
pa_gettimeofday(&start);
for (j = 0; j < TIMES; j++) {
memcpy (samples, samples_orig, sizeof (samples));
pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples));
}
pa_gettimeofday(&stop);
pa_log_info("SSE: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
pa_gettimeofday(&start);
for (j = 0; j < TIMES; j++) {
memcpy (samples_ref, samples_orig, sizeof (samples));
func (samples_ref, volumes, CHANNELS, sizeof (samples));
}
pa_gettimeofday(&stop);
pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
} }
#endif #endif
#endif /* defined (__i386__) || defined (__amd64__) */ #endif /* defined (__i386__) || defined (__amd64__) */
void pa_volume_func_init_sse (pa_cpu_x86_flag_t flags) { void pa_volume_func_init_sse (pa_cpu_x86_flag_t flags) {
#if defined (__i386__) || defined (__amd64__) #if defined (__i386__) || defined (__amd64__)
pa_log_info("Initialising SSE optimized functions."); pa_log_info("Initialising SSE optimized functions.");
#ifdef RUN_TEST #ifdef RUN_TEST
run_test (); run_test ();
#endif #endif
pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_sse); pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_sse);
pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_sse); pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_sse);
#endif /* defined (__i386__) || defined (__amd64__) */ #endif /* defined (__i386__) || defined (__amd64__) */
} }