mirror of
https://gitlab.freedesktop.org/pulseaudio/pulseaudio.git
synced 2025-11-04 13:29:59 -05:00
whitespace fixes
This commit is contained in:
parent
3cc1278dcf
commit
f09b51198f
8 changed files with 695 additions and 670 deletions
|
|
@ -36,7 +36,7 @@
|
|||
|
||||
#if defined (__arm__) && defined (__linux__)
|
||||
|
||||
#define MAX_BUFFER 4096
|
||||
#define MAX_BUFFER 4096
|
||||
static char *
|
||||
get_cpuinfo_line (char *cpuinfo, const char *tag) {
|
||||
char *line, *end, *colon;
|
||||
|
|
@ -106,20 +106,20 @@ void pa_cpu_init_arm (void) {
|
|||
}
|
||||
/* get the CPU features */
|
||||
if ((line = get_cpuinfo_line (cpuinfo, "Features"))) {
|
||||
char *state = NULL, *current;
|
||||
char *state = NULL, *current;
|
||||
|
||||
while ((current = pa_split_spaces (line, &state))) {
|
||||
if (!strcmp (current, "vfp"))
|
||||
flags |= PA_CPU_ARM_VFP;
|
||||
else if (!strcmp (current, "edsp"))
|
||||
flags |= PA_CPU_ARM_EDSP;
|
||||
else if (!strcmp (current, "neon"))
|
||||
flags |= PA_CPU_ARM_NEON;
|
||||
else if (!strcmp (current, "vfpv3"))
|
||||
flags |= PA_CPU_ARM_VFPV3;
|
||||
while ((current = pa_split_spaces (line, &state))) {
|
||||
if (!strcmp (current, "vfp"))
|
||||
flags |= PA_CPU_ARM_VFP;
|
||||
else if (!strcmp (current, "edsp"))
|
||||
flags |= PA_CPU_ARM_EDSP;
|
||||
else if (!strcmp (current, "neon"))
|
||||
flags |= PA_CPU_ARM_NEON;
|
||||
else if (!strcmp (current, "vfpv3"))
|
||||
flags |= PA_CPU_ARM_VFPV3;
|
||||
|
||||
free (current);
|
||||
}
|
||||
free (current);
|
||||
}
|
||||
}
|
||||
free (cpuinfo);
|
||||
|
||||
|
|
|
|||
|
|
@ -34,14 +34,15 @@
|
|||
static void
|
||||
get_cpuid (uint32_t op, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d)
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
" push %%"PA_REG_b" \n\t"
|
||||
" cpuid \n\t"
|
||||
" mov %%ebx, %%esi \n\t"
|
||||
" pop %%"PA_REG_b" \n\t"
|
||||
__asm__ __volatile__ (
|
||||
" push %%"PA_REG_b" \n\t"
|
||||
" cpuid \n\t"
|
||||
" mov %%ebx, %%esi \n\t"
|
||||
" pop %%"PA_REG_b" \n\t"
|
||||
|
||||
: "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d)
|
||||
: "0" (op));
|
||||
: "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d)
|
||||
: "0" (op)
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
@ -97,23 +98,23 @@ void pa_cpu_init_x86 (void) {
|
|||
}
|
||||
|
||||
pa_log_info ("CPU flags: %s%s%s%s%s%s%s%s%s%s",
|
||||
(flags & PA_CPU_X86_MMX) ? "MMX " : "",
|
||||
(flags & PA_CPU_X86_SSE) ? "SSE " : "",
|
||||
(flags & PA_CPU_X86_SSE2) ? "SSE2 " : "",
|
||||
(flags & PA_CPU_X86_SSE3) ? "SSE3 " : "",
|
||||
(flags & PA_CPU_X86_SSSE3) ? "SSSE3 " : "",
|
||||
(flags & PA_CPU_X86_SSE4_1) ? "SSE4_1 " : "",
|
||||
(flags & PA_CPU_X86_SSE4_2) ? "SSE4_2 " : "",
|
||||
(flags & PA_CPU_X86_MMXEXT) ? "MMXEXT " : "",
|
||||
(flags & PA_CPU_X86_3DNOW) ? "3DNOW " : "",
|
||||
(flags & PA_CPU_X86_3DNOWEXT) ? "3DNOWEXT " : "");
|
||||
(flags & PA_CPU_X86_MMX) ? "MMX " : "",
|
||||
(flags & PA_CPU_X86_SSE) ? "SSE " : "",
|
||||
(flags & PA_CPU_X86_SSE2) ? "SSE2 " : "",
|
||||
(flags & PA_CPU_X86_SSE3) ? "SSE3 " : "",
|
||||
(flags & PA_CPU_X86_SSSE3) ? "SSSE3 " : "",
|
||||
(flags & PA_CPU_X86_SSE4_1) ? "SSE4_1 " : "",
|
||||
(flags & PA_CPU_X86_SSE4_2) ? "SSE4_2 " : "",
|
||||
(flags & PA_CPU_X86_MMXEXT) ? "MMXEXT " : "",
|
||||
(flags & PA_CPU_X86_3DNOW) ? "3DNOW " : "",
|
||||
(flags & PA_CPU_X86_3DNOWEXT) ? "3DNOWEXT " : "");
|
||||
|
||||
/* activate various optimisations */
|
||||
if (flags & PA_CPU_X86_MMX) {
|
||||
if (flags & PA_CPU_X86_MMX)
|
||||
pa_volume_func_init_mmx (flags);
|
||||
}
|
||||
if (flags & PA_CPU_X86_SSE) {
|
||||
pa_volume_func_init_sse (flags);
|
||||
}
|
||||
|
||||
if (flags & PA_CPU_X86_SSE)
|
||||
pa_volume_func_init_sse (flags);
|
||||
|
||||
#endif /* defined (__i386__) || defined (__amd64__) */
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1065,30 +1065,53 @@ static pa_memchunk* convert_to_work_format(pa_resampler *r, pa_memchunk *input)
|
|||
}
|
||||
|
||||
static void remap_mono_to_stereo(pa_resampler *r, void *dst, const void *src, unsigned n) {
|
||||
unsigned i;
|
||||
|
||||
switch (r->work_format) {
|
||||
case PA_SAMPLE_FLOAT32NE:
|
||||
{
|
||||
float *d, *s;
|
||||
|
||||
d = (float *) dst;
|
||||
s = (float *) src;
|
||||
d = (float *) dst;
|
||||
s = (float *) src;
|
||||
|
||||
for (; n > 0; n--, s++, d += 2)
|
||||
d[0] = d[1] = *s;
|
||||
break;
|
||||
}
|
||||
for (i = n >> 2; i; i--) {
|
||||
d[0] = d[1] = s[0];
|
||||
d[2] = d[3] = s[1];
|
||||
d[4] = d[5] = s[2];
|
||||
d[6] = d[7] = s[3];
|
||||
s += 4;
|
||||
d += 8;
|
||||
}
|
||||
for (i = n & 3; i; i--) {
|
||||
d[0] = d[1] = s[0];
|
||||
s++;
|
||||
d += 2;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PA_SAMPLE_S16NE:
|
||||
{
|
||||
int16_t *d, *s;
|
||||
|
||||
d = (int16_t *) dst;
|
||||
s = (int16_t *) src;
|
||||
d = (int16_t *) dst;
|
||||
s = (int16_t *) src;
|
||||
|
||||
for (; n > 0; n--, s++, d += 2)
|
||||
d[0] = d[1] = *s;
|
||||
break;
|
||||
}
|
||||
for (i = n >> 2; i; i--) {
|
||||
d[0] = d[1] = s[0];
|
||||
d[2] = d[3] = s[1];
|
||||
d[4] = d[5] = s[2];
|
||||
d[6] = d[7] = s[3];
|
||||
s += 4;
|
||||
d += 8;
|
||||
}
|
||||
for (i = n & 3; i; i--) {
|
||||
d[0] = d[1] = s[0];
|
||||
s++;
|
||||
d += 2;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
pa_assert_not_reached();
|
||||
}
|
||||
|
|
@ -1114,7 +1137,7 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src,
|
|||
for (ic = 0; ic < n_ic; ic++) {
|
||||
float vol;
|
||||
|
||||
vol = r->map_table_f[oc][ic];
|
||||
vol = r->map_table_f[oc][ic];
|
||||
|
||||
if (vol <= 0.0)
|
||||
continue;
|
||||
|
|
@ -1122,18 +1145,18 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src,
|
|||
d = (float *)dst + oc;
|
||||
s = (float *)src + ic;
|
||||
|
||||
if (vol >= 1.0) {
|
||||
if (vol >= 1.0) {
|
||||
for (i = n; i > 0; i--, s += n_ic, d += n_oc)
|
||||
*d += *s;
|
||||
} else {
|
||||
} else {
|
||||
for (i = n; i > 0; i--, s += n_ic, d += n_oc)
|
||||
*d += *s * vol;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
case PA_SAMPLE_S16NE:
|
||||
{
|
||||
int16_t *d, *s;
|
||||
|
|
@ -1144,7 +1167,7 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src,
|
|||
for (ic = 0; ic < n_ic; ic++) {
|
||||
int32_t vol;
|
||||
|
||||
vol = r->map_table_i[oc][ic];
|
||||
vol = r->map_table_i[oc][ic];
|
||||
|
||||
if (vol <= 0)
|
||||
continue;
|
||||
|
|
@ -1158,11 +1181,11 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src,
|
|||
} else {
|
||||
for (i = n; i > 0; i--, s += n_ic, d += n_oc)
|
||||
*d += (int16_t) (((int32_t)*s * vol) >> 16);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
default:
|
||||
pa_assert_not_reached();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -752,12 +752,13 @@ void pa_volume_memchunk(
|
|||
return;
|
||||
}
|
||||
|
||||
ptr = (uint8_t*) pa_memblock_acquire(c->memblock) + c->index;
|
||||
|
||||
do_volume = pa_get_volume_func (spec->format);
|
||||
pa_assert(do_volume);
|
||||
|
||||
calc_volume_table[spec->format] ((void *)linear, volume);
|
||||
|
||||
ptr = (uint8_t*) pa_memblock_acquire(c->memblock) + c->index;
|
||||
|
||||
do_volume (ptr, (void *)linear, spec->channels, c->length);
|
||||
|
||||
pa_memblock_release(c->memblock);
|
||||
|
|
@ -944,12 +945,12 @@ void pa_sample_clamp(pa_sample_format_t format, void *dst, size_t dstr, const vo
|
|||
for (; n > 0; n--) {
|
||||
float f;
|
||||
|
||||
f = *s;
|
||||
f = *s;
|
||||
*d = PA_CLAMP_UNLIKELY(f, -1.0f, 1.0f);
|
||||
|
||||
s = (const float*) ((const uint8_t*) s + sstr);
|
||||
d = (float*) ((uint8_t*) d + dstr);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
pa_assert(format == PA_SAMPLE_FLOAT32RE);
|
||||
|
||||
|
|
|
|||
|
|
@ -45,81 +45,81 @@
|
|||
static void
|
||||
pa_volume_s16ne_arm (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||
{
|
||||
int32_t *ve;
|
||||
int32_t *ve;
|
||||
|
||||
channels = MAX (4, channels);
|
||||
ve = volumes + channels;
|
||||
channels = MAX (4, channels);
|
||||
ve = volumes + channels;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
" mov r6, %1 \n\t"
|
||||
" mov %3, %3, LSR #1 \n\t" /* length /= sizeof (int16_t) */
|
||||
" tst %3, #1 \n\t" /* check for odd samples */
|
||||
" beq 2f \n\t"
|
||||
__asm__ __volatile__ (
|
||||
" mov r6, %1 \n\t"
|
||||
" mov %3, %3, LSR #1 \n\t" /* length /= sizeof (int16_t) */
|
||||
" tst %3, #1 \n\t" /* check for odd samples */
|
||||
" beq 2f \n\t"
|
||||
|
||||
"1: \n\t"
|
||||
" ldr r0, [r6], #4 \n\t" /* odd samples volumes */
|
||||
" ldrh r2, [%0] \n\t"
|
||||
"1: \n\t"
|
||||
" ldr r0, [r6], #4 \n\t" /* odd samples volumes */
|
||||
" ldrh r2, [%0] \n\t"
|
||||
|
||||
" smulwb r0, r0, r2 \n\t"
|
||||
" ssat r0, #16, r0 \n\t"
|
||||
" smulwb r0, r0, r2 \n\t"
|
||||
" ssat r0, #16, r0 \n\t"
|
||||
|
||||
" strh r0, [%0], #2 \n\t"
|
||||
" strh r0, [%0], #2 \n\t"
|
||||
|
||||
MOD_INC()
|
||||
MOD_INC()
|
||||
|
||||
"2: \n\t"
|
||||
" mov %3, %3, LSR #1 \n\t"
|
||||
" tst %3, #1 \n\t" /* check for odd samples */
|
||||
" beq 4f \n\t"
|
||||
"2: \n\t"
|
||||
" mov %3, %3, LSR #1 \n\t"
|
||||
" tst %3, #1 \n\t" /* check for odd samples */
|
||||
" beq 4f \n\t"
|
||||
|
||||
"3: \n\t"
|
||||
" ldrd r2, [r6], #8 \n\t" /* 2 samples at a time */
|
||||
" ldr r0, [%0] \n\t"
|
||||
"3: \n\t"
|
||||
" ldrd r2, [r6], #8 \n\t" /* 2 samples at a time */
|
||||
" ldr r0, [%0] \n\t"
|
||||
|
||||
" smulwt r2, r2, r0 \n\t"
|
||||
" smulwb r3, r3, r0 \n\t"
|
||||
" smulwt r2, r2, r0 \n\t"
|
||||
" smulwb r3, r3, r0 \n\t"
|
||||
|
||||
" ssat r2, #16, r2 \n\t"
|
||||
" ssat r3, #16, r3 \n\t"
|
||||
" ssat r2, #16, r2 \n\t"
|
||||
" ssat r3, #16, r3 \n\t"
|
||||
|
||||
" pkhbt r0, r3, r2, LSL #16 \n\t"
|
||||
" str r0, [%0], #4 \n\t"
|
||||
" pkhbt r0, r3, r2, LSL #16 \n\t"
|
||||
" str r0, [%0], #4 \n\t"
|
||||
|
||||
MOD_INC()
|
||||
MOD_INC()
|
||||
|
||||
"4: \n\t"
|
||||
" movs %3, %3, LSR #1 \n\t"
|
||||
" beq 6f \n\t"
|
||||
"4: \n\t"
|
||||
" movs %3, %3, LSR #1 \n\t"
|
||||
" beq 6f \n\t"
|
||||
|
||||
"5: \n\t"
|
||||
" ldrd r2, [r6], #8 \n\t" /* 4 samples at a time */
|
||||
" ldrd r4, [r6], #8 \n\t"
|
||||
" ldrd r0, [%0] \n\t"
|
||||
"5: \n\t"
|
||||
" ldrd r2, [r6], #8 \n\t" /* 4 samples at a time */
|
||||
" ldrd r4, [r6], #8 \n\t"
|
||||
" ldrd r0, [%0] \n\t"
|
||||
|
||||
" smulwt r2, r2, r0 \n\t"
|
||||
" smulwb r3, r3, r0 \n\t"
|
||||
" smulwt r4, r4, r1 \n\t"
|
||||
" smulwb r5, r5, r1 \n\t"
|
||||
" smulwt r2, r2, r0 \n\t"
|
||||
" smulwb r3, r3, r0 \n\t"
|
||||
" smulwt r4, r4, r1 \n\t"
|
||||
" smulwb r5, r5, r1 \n\t"
|
||||
|
||||
" ssat r2, #16, r2 \n\t"
|
||||
" ssat r3, #16, r3 \n\t"
|
||||
" ssat r4, #16, r4 \n\t"
|
||||
" ssat r5, #16, r5 \n\t"
|
||||
" ssat r2, #16, r2 \n\t"
|
||||
" ssat r3, #16, r3 \n\t"
|
||||
" ssat r4, #16, r4 \n\t"
|
||||
" ssat r5, #16, r5 \n\t"
|
||||
|
||||
" pkhbt r0, r3, r2, LSL #16 \n\t"
|
||||
" pkhbt r1, r5, r4, LSL #16 \n\t"
|
||||
" strd r0, [%0], #8 \n\t"
|
||||
" pkhbt r0, r3, r2, LSL #16 \n\t"
|
||||
" pkhbt r1, r5, r4, LSL #16 \n\t"
|
||||
" strd r0, [%0], #8 \n\t"
|
||||
|
||||
MOD_INC()
|
||||
MOD_INC()
|
||||
|
||||
" subs %3, %3, #1 \n\t"
|
||||
" bne 5b \n\t"
|
||||
"6: \n\t"
|
||||
" subs %3, %3, #1 \n\t"
|
||||
" bne 5b \n\t"
|
||||
"6: \n\t"
|
||||
|
||||
: "+r" (samples), "+r" (volumes), "+r" (ve), "+r" (length)
|
||||
:
|
||||
: "r6", "r5", "r4", "r3", "r2", "r1", "r0", "cc"
|
||||
);
|
||||
: "+r" (samples), "+r" (volumes), "+r" (ve), "+r" (length)
|
||||
:
|
||||
: "r6", "r5", "r4", "r3", "r2", "r1", "r0", "cc"
|
||||
);
|
||||
}
|
||||
|
||||
#undef RUN_TEST
|
||||
|
|
@ -131,51 +131,51 @@ pa_volume_s16ne_arm (int16_t *samples, int32_t *volumes, unsigned channels, unsi
|
|||
#define PADDING 16
|
||||
|
||||
static void run_test (void) {
|
||||
int16_t samples[SAMPLES];
|
||||
int16_t samples_ref[SAMPLES];
|
||||
int16_t samples_orig[SAMPLES];
|
||||
int32_t volumes[CHANNELS + PADDING];
|
||||
int i, j, padding;
|
||||
pa_do_volume_func_t func;
|
||||
struct timeval start, stop;
|
||||
int16_t samples[SAMPLES];
|
||||
int16_t samples_ref[SAMPLES];
|
||||
int16_t samples_orig[SAMPLES];
|
||||
int32_t volumes[CHANNELS + PADDING];
|
||||
int i, j, padding;
|
||||
pa_do_volume_func_t func;
|
||||
struct timeval start, stop;
|
||||
|
||||
func = pa_get_volume_func (PA_SAMPLE_S16NE);
|
||||
func = pa_get_volume_func (PA_SAMPLE_S16NE);
|
||||
|
||||
printf ("checking ARM %zd\n", sizeof (samples));
|
||||
printf ("checking ARM %zd\n", sizeof (samples));
|
||||
|
||||
pa_random (samples, sizeof (samples));
|
||||
memcpy (samples_ref, samples, sizeof (samples));
|
||||
memcpy (samples_orig, samples, sizeof (samples));
|
||||
pa_random (samples, sizeof (samples));
|
||||
memcpy (samples_ref, samples, sizeof (samples));
|
||||
memcpy (samples_orig, samples, sizeof (samples));
|
||||
|
||||
for (i = 0; i < CHANNELS; i++)
|
||||
volumes[i] = rand() >> 1;
|
||||
for (padding = 0; padding < PADDING; padding++, i++)
|
||||
volumes[i] = volumes[padding];
|
||||
for (i = 0; i < CHANNELS; i++)
|
||||
volumes[i] = rand() >> 1;
|
||||
for (padding = 0; padding < PADDING; padding++, i++)
|
||||
volumes[i] = volumes[padding];
|
||||
|
||||
func (samples_ref, volumes, CHANNELS, sizeof (samples));
|
||||
pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples));
|
||||
for (i = 0; i < SAMPLES; i++) {
|
||||
if (samples[i] != samples_ref[i]) {
|
||||
printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
|
||||
samples_orig[i], volumes[i % CHANNELS]);
|
||||
}
|
||||
}
|
||||
|
||||
pa_gettimeofday(&start);
|
||||
for (j = 0; j < TIMES; j++) {
|
||||
memcpy (samples, samples_orig, sizeof (samples));
|
||||
pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples));
|
||||
}
|
||||
pa_gettimeofday(&stop);
|
||||
pa_log_info("ARM: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
|
||||
|
||||
pa_gettimeofday(&start);
|
||||
for (j = 0; j < TIMES; j++) {
|
||||
memcpy (samples_ref, samples_orig, sizeof (samples));
|
||||
func (samples_ref, volumes, CHANNELS, sizeof (samples));
|
||||
}
|
||||
pa_gettimeofday(&stop);
|
||||
pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
|
||||
pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples));
|
||||
for (i = 0; i < SAMPLES; i++) {
|
||||
if (samples[i] != samples_ref[i]) {
|
||||
printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
|
||||
samples_orig[i], volumes[i % CHANNELS]);
|
||||
}
|
||||
}
|
||||
|
||||
pa_gettimeofday(&start);
|
||||
for (j = 0; j < TIMES; j++) {
|
||||
memcpy (samples, samples_orig, sizeof (samples));
|
||||
pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples));
|
||||
}
|
||||
pa_gettimeofday(&stop);
|
||||
pa_log_info("ARM: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
|
||||
|
||||
pa_gettimeofday(&start);
|
||||
for (j = 0; j < TIMES; j++) {
|
||||
memcpy (samples_ref, samples_orig, sizeof (samples));
|
||||
func (samples_ref, volumes, CHANNELS, sizeof (samples));
|
||||
}
|
||||
pa_gettimeofday(&stop);
|
||||
pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
@ -184,12 +184,12 @@ static void run_test (void) {
|
|||
|
||||
void pa_volume_func_init_arm (pa_cpu_arm_flag_t flags) {
|
||||
#if defined (__arm__)
|
||||
pa_log_info("Initialising ARM optimized functions.");
|
||||
pa_log_info("Initialising ARM optimized functions.");
|
||||
|
||||
#ifdef RUN_TEST
|
||||
run_test ();
|
||||
run_test ();
|
||||
#endif
|
||||
|
||||
pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_arm);
|
||||
pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_arm);
|
||||
#endif /* defined (__arm__) */
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,289 +35,289 @@
|
|||
static void
|
||||
pa_volume_u8_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||
{
|
||||
unsigned channel;
|
||||
unsigned channel;
|
||||
|
||||
for (channel = 0; length; length--) {
|
||||
int32_t t, hi, lo;
|
||||
for (channel = 0; length; length--) {
|
||||
int32_t t, hi, lo;
|
||||
|
||||
hi = volumes[channel] >> 16;
|
||||
lo = volumes[channel] & 0xFFFF;
|
||||
hi = volumes[channel] >> 16;
|
||||
lo = volumes[channel] & 0xFFFF;
|
||||
|
||||
t = (int32_t) *samples - 0x80;
|
||||
t = ((t * lo) >> 16) + (t * hi);
|
||||
t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F);
|
||||
*samples++ = (uint8_t) (t + 0x80);
|
||||
t = (int32_t) *samples - 0x80;
|
||||
t = ((t * lo) >> 16) + (t * hi);
|
||||
t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F);
|
||||
*samples++ = (uint8_t) (t + 0x80);
|
||||
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pa_volume_alaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||
{
|
||||
unsigned channel;
|
||||
unsigned channel;
|
||||
|
||||
for (channel = 0; length; length--) {
|
||||
int32_t t, hi, lo;
|
||||
for (channel = 0; length; length--) {
|
||||
int32_t t, hi, lo;
|
||||
|
||||
hi = volumes[channel] >> 16;
|
||||
lo = volumes[channel] & 0xFFFF;
|
||||
hi = volumes[channel] >> 16;
|
||||
lo = volumes[channel] & 0xFFFF;
|
||||
|
||||
t = (int32_t) st_alaw2linear16(*samples);
|
||||
t = ((t * lo) >> 16) + (t * hi);
|
||||
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
|
||||
*samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3);
|
||||
t = (int32_t) st_alaw2linear16(*samples);
|
||||
t = ((t * lo) >> 16) + (t * hi);
|
||||
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
|
||||
*samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3);
|
||||
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pa_volume_ulaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||
{
|
||||
unsigned channel;
|
||||
unsigned channel;
|
||||
|
||||
for (channel = 0; length; length--) {
|
||||
int32_t t, hi, lo;
|
||||
for (channel = 0; length; length--) {
|
||||
int32_t t, hi, lo;
|
||||
|
||||
hi = volumes[channel] >> 16;
|
||||
lo = volumes[channel] & 0xFFFF;
|
||||
hi = volumes[channel] >> 16;
|
||||
lo = volumes[channel] & 0xFFFF;
|
||||
|
||||
t = (int32_t) st_ulaw2linear16(*samples);
|
||||
t = ((t * lo) >> 16) + (t * hi);
|
||||
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
|
||||
*samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2);
|
||||
t = (int32_t) st_ulaw2linear16(*samples);
|
||||
t = ((t * lo) >> 16) + (t * hi);
|
||||
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
|
||||
*samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2);
|
||||
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pa_volume_s16ne_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||
{
|
||||
unsigned channel;
|
||||
unsigned channel;
|
||||
|
||||
length /= sizeof (int16_t);
|
||||
length /= sizeof (int16_t);
|
||||
|
||||
for (channel = 0; length; length--) {
|
||||
int32_t t, hi, lo;
|
||||
for (channel = 0; length; length--) {
|
||||
int32_t t, hi, lo;
|
||||
|
||||
/* Multiplying the 32bit volume factor with the 16bit
|
||||
* sample might result in an 48bit value. We want to
|
||||
* do without 64 bit integers and hence do the
|
||||
* multiplication independantly for the HI and LO part
|
||||
* of the volume. */
|
||||
/* Multiplying the 32bit volume factor with the 16bit
|
||||
* sample might result in an 48bit value. We want to
|
||||
* do without 64 bit integers and hence do the
|
||||
* multiplication independantly for the HI and LO part
|
||||
* of the volume. */
|
||||
|
||||
hi = volumes[channel] >> 16;
|
||||
lo = volumes[channel] & 0xFFFF;
|
||||
hi = volumes[channel] >> 16;
|
||||
lo = volumes[channel] & 0xFFFF;
|
||||
|
||||
t = (int32_t)(*samples);
|
||||
t = ((t * lo) >> 16) + (t * hi);
|
||||
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
|
||||
*samples++ = (int16_t) t;
|
||||
t = (int32_t)(*samples);
|
||||
t = ((t * lo) >> 16) + (t * hi);
|
||||
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
|
||||
*samples++ = (int16_t) t;
|
||||
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pa_volume_s16re_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||
{
|
||||
unsigned channel;
|
||||
unsigned channel;
|
||||
|
||||
length /= sizeof (int16_t);
|
||||
length /= sizeof (int16_t);
|
||||
|
||||
for (channel = 0; length; length--) {
|
||||
int32_t t, hi, lo;
|
||||
for (channel = 0; length; length--) {
|
||||
int32_t t, hi, lo;
|
||||
|
||||
hi = volumes[channel] >> 16;
|
||||
lo = volumes[channel] & 0xFFFF;
|
||||
hi = volumes[channel] >> 16;
|
||||
lo = volumes[channel] & 0xFFFF;
|
||||
|
||||
t = (int32_t) PA_INT16_SWAP(*samples);
|
||||
t = ((t * lo) >> 16) + (t * hi);
|
||||
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
|
||||
*samples++ = PA_INT16_SWAP((int16_t) t);
|
||||
t = (int32_t) PA_INT16_SWAP(*samples);
|
||||
t = ((t * lo) >> 16) + (t * hi);
|
||||
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
|
||||
*samples++ = PA_INT16_SWAP((int16_t) t);
|
||||
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pa_volume_float32ne_c (float *samples, float *volumes, unsigned channels, unsigned length)
|
||||
{
|
||||
unsigned channel;
|
||||
unsigned channel;
|
||||
|
||||
length /= sizeof (float);
|
||||
length /= sizeof (float);
|
||||
|
||||
for (channel = 0; length; length--) {
|
||||
*samples++ *= volumes[channel];
|
||||
for (channel = 0; length; length--) {
|
||||
*samples++ *= volumes[channel];
|
||||
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pa_volume_float32re_c (float *samples, float *volumes, unsigned channels, unsigned length)
|
||||
{
|
||||
unsigned channel;
|
||||
unsigned channel;
|
||||
|
||||
length /= sizeof (float);
|
||||
length /= sizeof (float);
|
||||
|
||||
for (channel = 0; length; length--) {
|
||||
float t;
|
||||
for (channel = 0; length; length--) {
|
||||
float t;
|
||||
|
||||
t = PA_FLOAT32_SWAP(*samples);
|
||||
t *= volumes[channel];
|
||||
*samples++ = PA_FLOAT32_SWAP(t);
|
||||
t = PA_FLOAT32_SWAP(*samples);
|
||||
t *= volumes[channel];
|
||||
*samples++ = PA_FLOAT32_SWAP(t);
|
||||
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pa_volume_s32ne_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||
{
|
||||
unsigned channel;
|
||||
unsigned channel;
|
||||
|
||||
length /= sizeof (int32_t);
|
||||
length /= sizeof (int32_t);
|
||||
|
||||
for (channel = 0; length; length--) {
|
||||
int64_t t;
|
||||
for (channel = 0; length; length--) {
|
||||
int64_t t;
|
||||
|
||||
t = (int64_t)(*samples);
|
||||
t = (t * volumes[channel]) >> 16;
|
||||
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
|
||||
*samples++ = (int32_t) t;
|
||||
t = (int64_t)(*samples);
|
||||
t = (t * volumes[channel]) >> 16;
|
||||
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
|
||||
*samples++ = (int32_t) t;
|
||||
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pa_volume_s32re_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||
{
|
||||
unsigned channel;
|
||||
unsigned channel;
|
||||
|
||||
length /= sizeof (int32_t);
|
||||
length /= sizeof (int32_t);
|
||||
|
||||
for (channel = 0; length; length--) {
|
||||
int64_t t;
|
||||
for (channel = 0; length; length--) {
|
||||
int64_t t;
|
||||
|
||||
t = (int64_t) PA_INT32_SWAP(*samples);
|
||||
t = (t * volumes[channel]) >> 16;
|
||||
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
|
||||
*samples++ = PA_INT32_SWAP((int32_t) t);
|
||||
t = (int64_t) PA_INT32_SWAP(*samples);
|
||||
t = (t * volumes[channel]) >> 16;
|
||||
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
|
||||
*samples++ = PA_INT32_SWAP((int32_t) t);
|
||||
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pa_volume_s24ne_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||
{
|
||||
unsigned channel;
|
||||
uint8_t *e;
|
||||
unsigned channel;
|
||||
uint8_t *e;
|
||||
|
||||
e = samples + length;
|
||||
e = samples + length;
|
||||
|
||||
for (channel = 0; samples < e; samples += 3) {
|
||||
int64_t t;
|
||||
for (channel = 0; samples < e; samples += 3) {
|
||||
int64_t t;
|
||||
|
||||
t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8));
|
||||
t = (t * volumes[channel]) >> 16;
|
||||
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
|
||||
PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8);
|
||||
t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8));
|
||||
t = (t * volumes[channel]) >> 16;
|
||||
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
|
||||
PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8);
|
||||
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pa_volume_s24re_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||
{
|
||||
unsigned channel;
|
||||
uint8_t *e;
|
||||
unsigned channel;
|
||||
uint8_t *e;
|
||||
|
||||
e = samples + length;
|
||||
e = samples + length;
|
||||
|
||||
for (channel = 0; samples < e; samples += 3) {
|
||||
int64_t t;
|
||||
for (channel = 0; samples < e; samples += 3) {
|
||||
int64_t t;
|
||||
|
||||
t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8));
|
||||
t = (t * volumes[channel]) >> 16;
|
||||
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
|
||||
PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8);
|
||||
t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8));
|
||||
t = (t * volumes[channel]) >> 16;
|
||||
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
|
||||
PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8);
|
||||
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pa_volume_s24_32ne_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||
{
|
||||
unsigned channel;
|
||||
unsigned channel;
|
||||
|
||||
length /= sizeof (uint32_t);
|
||||
length /= sizeof (uint32_t);
|
||||
|
||||
for (channel = 0; length; length--) {
|
||||
int64_t t;
|
||||
for (channel = 0; length; length--) {
|
||||
int64_t t;
|
||||
|
||||
t = (int64_t) ((int32_t) (*samples << 8));
|
||||
t = (t * volumes[channel]) >> 16;
|
||||
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
|
||||
*samples++ = ((uint32_t) ((int32_t) t)) >> 8;
|
||||
t = (int64_t) ((int32_t) (*samples << 8));
|
||||
t = (t * volumes[channel]) >> 16;
|
||||
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
|
||||
*samples++ = ((uint32_t) ((int32_t) t)) >> 8;
|
||||
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pa_volume_s24_32re_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||
{
|
||||
unsigned channel;
|
||||
unsigned channel;
|
||||
|
||||
length /= sizeof (uint32_t);
|
||||
length /= sizeof (uint32_t);
|
||||
|
||||
for (channel = 0; length; length--) {
|
||||
int64_t t;
|
||||
for (channel = 0; length; length--) {
|
||||
int64_t t;
|
||||
|
||||
t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8));
|
||||
t = (t * volumes[channel]) >> 16;
|
||||
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
|
||||
*samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8);
|
||||
t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8));
|
||||
t = (t * volumes[channel]) >> 16;
|
||||
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
|
||||
*samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8);
|
||||
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
if (PA_UNLIKELY(++channel >= channels))
|
||||
channel = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static pa_do_volume_func_t do_volume_table[] =
|
||||
{
|
||||
[PA_SAMPLE_U8] = (pa_do_volume_func_t) pa_volume_u8_c,
|
||||
[PA_SAMPLE_ALAW] = (pa_do_volume_func_t) pa_volume_alaw_c,
|
||||
[PA_SAMPLE_ULAW] = (pa_do_volume_func_t) pa_volume_ulaw_c,
|
||||
[PA_SAMPLE_S16NE] = (pa_do_volume_func_t) pa_volume_s16ne_c,
|
||||
[PA_SAMPLE_S16RE] = (pa_do_volume_func_t) pa_volume_s16re_c,
|
||||
[PA_SAMPLE_FLOAT32NE] = (pa_do_volume_func_t) pa_volume_float32ne_c,
|
||||
[PA_SAMPLE_FLOAT32RE] = (pa_do_volume_func_t) pa_volume_float32re_c,
|
||||
[PA_SAMPLE_S32NE] = (pa_do_volume_func_t) pa_volume_s32ne_c,
|
||||
[PA_SAMPLE_S32RE] = (pa_do_volume_func_t) pa_volume_s32re_c,
|
||||
[PA_SAMPLE_S24NE] = (pa_do_volume_func_t) pa_volume_s24ne_c,
|
||||
[PA_SAMPLE_S24RE] = (pa_do_volume_func_t) pa_volume_s24re_c,
|
||||
[PA_SAMPLE_S24_32NE] = (pa_do_volume_func_t) pa_volume_s24_32ne_c,
|
||||
[PA_SAMPLE_S24_32RE] = (pa_do_volume_func_t) pa_volume_s24_32re_c
|
||||
[PA_SAMPLE_U8] = (pa_do_volume_func_t) pa_volume_u8_c,
|
||||
[PA_SAMPLE_ALAW] = (pa_do_volume_func_t) pa_volume_alaw_c,
|
||||
[PA_SAMPLE_ULAW] = (pa_do_volume_func_t) pa_volume_ulaw_c,
|
||||
[PA_SAMPLE_S16NE] = (pa_do_volume_func_t) pa_volume_s16ne_c,
|
||||
[PA_SAMPLE_S16RE] = (pa_do_volume_func_t) pa_volume_s16re_c,
|
||||
[PA_SAMPLE_FLOAT32NE] = (pa_do_volume_func_t) pa_volume_float32ne_c,
|
||||
[PA_SAMPLE_FLOAT32RE] = (pa_do_volume_func_t) pa_volume_float32re_c,
|
||||
[PA_SAMPLE_S32NE] = (pa_do_volume_func_t) pa_volume_s32ne_c,
|
||||
[PA_SAMPLE_S32RE] = (pa_do_volume_func_t) pa_volume_s32re_c,
|
||||
[PA_SAMPLE_S24NE] = (pa_do_volume_func_t) pa_volume_s24ne_c,
|
||||
[PA_SAMPLE_S24RE] = (pa_do_volume_func_t) pa_volume_s24re_c,
|
||||
[PA_SAMPLE_S24_32NE] = (pa_do_volume_func_t) pa_volume_s24_32ne_c,
|
||||
[PA_SAMPLE_S24_32RE] = (pa_do_volume_func_t) pa_volume_s24_32re_c
|
||||
};
|
||||
|
||||
pa_do_volume_func_t pa_get_volume_func(pa_sample_format_t f) {
|
||||
|
|
|
|||
|
|
@ -96,147 +96,147 @@
|
|||
static void
|
||||
pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||
{
|
||||
pa_reg_x86 channel, temp;
|
||||
pa_reg_x86 channel, temp;
|
||||
|
||||
/* the max number of samples we process at a time, this is also the max amount
|
||||
* we overread the volume array, which should have enough padding. */
|
||||
channels = MAX (4, channels);
|
||||
/* the max number of samples we process at a time, this is also the max amount
|
||||
* we overread the volume array, which should have enough padding. */
|
||||
channels = MAX (4, channels);
|
||||
|
||||
__asm__ __volatile__ (
|
||||
" xor %3, %3 \n\t"
|
||||
" sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
|
||||
" pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */
|
||||
" pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */
|
||||
" pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */
|
||||
" psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */
|
||||
__asm__ __volatile__ (
|
||||
" xor %3, %3 \n\t"
|
||||
" sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
|
||||
" pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */
|
||||
" pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */
|
||||
" pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */
|
||||
" psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */
|
||||
|
||||
" test $1, %2 \n\t" /* check for odd samples */
|
||||
" je 2f \n\t"
|
||||
" test $1, %2 \n\t" /* check for odd samples */
|
||||
" je 2f \n\t"
|
||||
|
||||
" movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */
|
||||
" movw (%0), %w4 \n\t" /* .. | p0 | */
|
||||
" movd %4, %%mm1 \n\t"
|
||||
VOLUME_32x16 (%%mm1, %%mm0)
|
||||
" movd %%mm0, %4 \n\t" /* .. | p0*v0 | */
|
||||
" movw %w4, (%0) \n\t"
|
||||
" add $2, %0 \n\t"
|
||||
MOD_ADD ($1, %5)
|
||||
" movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */
|
||||
" movw (%0), %w4 \n\t" /* .. | p0 | */
|
||||
" movd %4, %%mm1 \n\t"
|
||||
VOLUME_32x16 (%%mm1, %%mm0)
|
||||
" movd %%mm0, %4 \n\t" /* .. | p0*v0 | */
|
||||
" movw %w4, (%0) \n\t"
|
||||
" add $2, %0 \n\t"
|
||||
MOD_ADD ($1, %5)
|
||||
|
||||
"2: \n\t"
|
||||
" sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
|
||||
" test $1, %2 \n\t" /* check for odd samples */
|
||||
" je 4f \n\t"
|
||||
"2: \n\t"
|
||||
" sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
|
||||
" test $1, %2 \n\t" /* check for odd samples */
|
||||
" je 4f \n\t"
|
||||
|
||||
"3: \n\t" /* do samples in groups of 2 */
|
||||
" movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
|
||||
" movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
|
||||
VOLUME_32x16 (%%mm1, %%mm0)
|
||||
" movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
|
||||
" add $4, %0 \n\t"
|
||||
MOD_ADD ($2, %5)
|
||||
"3: \n\t" /* do samples in groups of 2 */
|
||||
" movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
|
||||
" movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
|
||||
VOLUME_32x16 (%%mm1, %%mm0)
|
||||
" movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
|
||||
" add $4, %0 \n\t"
|
||||
MOD_ADD ($2, %5)
|
||||
|
||||
"4: \n\t"
|
||||
" sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
|
||||
" cmp $0, %2 \n\t"
|
||||
" je 6f \n\t"
|
||||
"4: \n\t"
|
||||
" sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
|
||||
" cmp $0, %2 \n\t"
|
||||
" je 6f \n\t"
|
||||
|
||||
"5: \n\t" /* do samples in groups of 4 */
|
||||
" movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
|
||||
" movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */
|
||||
" movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
|
||||
" movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */
|
||||
VOLUME_32x16 (%%mm1, %%mm0)
|
||||
VOLUME_32x16 (%%mm3, %%mm2)
|
||||
" movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
|
||||
" movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */
|
||||
" add $8, %0 \n\t"
|
||||
MOD_ADD ($4, %5)
|
||||
" dec %2 \n\t"
|
||||
" jne 5b \n\t"
|
||||
"5: \n\t" /* do samples in groups of 4 */
|
||||
" movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
|
||||
" movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */
|
||||
" movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
|
||||
" movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */
|
||||
VOLUME_32x16 (%%mm1, %%mm0)
|
||||
VOLUME_32x16 (%%mm3, %%mm2)
|
||||
" movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
|
||||
" movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */
|
||||
" add $8, %0 \n\t"
|
||||
MOD_ADD ($4, %5)
|
||||
" dec %2 \n\t"
|
||||
" jne 5b \n\t"
|
||||
|
||||
"6: \n\t"
|
||||
" emms \n\t"
|
||||
"6: \n\t"
|
||||
" emms \n\t"
|
||||
|
||||
: "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp)
|
||||
: "r" ((pa_reg_x86)channels)
|
||||
: "cc"
|
||||
);
|
||||
: "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp)
|
||||
: "r" ((pa_reg_x86)channels)
|
||||
: "cc"
|
||||
);
|
||||
}
|
||||
|
||||
static void
|
||||
pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||
{
|
||||
pa_reg_x86 channel, temp;
|
||||
pa_reg_x86 channel, temp;
|
||||
|
||||
/* the max number of samples we process at a time, this is also the max amount
|
||||
* we overread the volume array, which should have enough padding. */
|
||||
channels = MAX (4, channels);
|
||||
/* the max number of samples we process at a time, this is also the max amount
|
||||
* we overread the volume array, which should have enough padding. */
|
||||
channels = MAX (4, channels);
|
||||
|
||||
__asm__ __volatile__ (
|
||||
" xor %3, %3 \n\t"
|
||||
" sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
|
||||
" pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */
|
||||
" pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */
|
||||
" pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */
|
||||
" psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */
|
||||
__asm__ __volatile__ (
|
||||
" xor %3, %3 \n\t"
|
||||
" sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
|
||||
" pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */
|
||||
" pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */
|
||||
" pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */
|
||||
" psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */
|
||||
|
||||
" test $1, %2 \n\t" /* check for odd samples */
|
||||
" je 2f \n\t"
|
||||
" test $1, %2 \n\t" /* check for odd samples */
|
||||
" je 2f \n\t"
|
||||
|
||||
" movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */
|
||||
" movw (%0), %w4 \n\t" /* .. | p0 | */
|
||||
" rorw $8, %w4 \n\t"
|
||||
" movd %4, %%mm1 \n\t"
|
||||
VOLUME_32x16 (%%mm1, %%mm0)
|
||||
" movd %%mm0, %4 \n\t" /* .. | p0*v0 | */
|
||||
" rorw $8, %w4 \n\t"
|
||||
" movw %w4, (%0) \n\t"
|
||||
" add $2, %0 \n\t"
|
||||
MOD_ADD ($1, %5)
|
||||
" movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */
|
||||
" movw (%0), %w4 \n\t" /* .. | p0 | */
|
||||
" rorw $8, %w4 \n\t"
|
||||
" movd %4, %%mm1 \n\t"
|
||||
VOLUME_32x16 (%%mm1, %%mm0)
|
||||
" movd %%mm0, %4 \n\t" /* .. | p0*v0 | */
|
||||
" rorw $8, %w4 \n\t"
|
||||
" movw %w4, (%0) \n\t"
|
||||
" add $2, %0 \n\t"
|
||||
MOD_ADD ($1, %5)
|
||||
|
||||
"2: \n\t"
|
||||
" sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
|
||||
" test $1, %2 \n\t" /* check for odd samples */
|
||||
" je 4f \n\t"
|
||||
"2: \n\t"
|
||||
" sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
|
||||
" test $1, %2 \n\t" /* check for odd samples */
|
||||
" je 4f \n\t"
|
||||
|
||||
"3: \n\t" /* do samples in groups of 2 */
|
||||
" movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
|
||||
" movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
|
||||
SWAP_16 (%%mm1)
|
||||
VOLUME_32x16 (%%mm1, %%mm0)
|
||||
SWAP_16 (%%mm0)
|
||||
" movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
|
||||
" add $4, %0 \n\t"
|
||||
MOD_ADD ($2, %5)
|
||||
"3: \n\t" /* do samples in groups of 2 */
|
||||
" movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
|
||||
" movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
|
||||
SWAP_16 (%%mm1)
|
||||
VOLUME_32x16 (%%mm1, %%mm0)
|
||||
SWAP_16 (%%mm0)
|
||||
" movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
|
||||
" add $4, %0 \n\t"
|
||||
MOD_ADD ($2, %5)
|
||||
|
||||
"4: \n\t"
|
||||
" sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
|
||||
" cmp $0, %2 \n\t"
|
||||
" je 6f \n\t"
|
||||
"4: \n\t"
|
||||
" sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
|
||||
" cmp $0, %2 \n\t"
|
||||
" je 6f \n\t"
|
||||
|
||||
"5: \n\t" /* do samples in groups of 4 */
|
||||
" movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
|
||||
" movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */
|
||||
" movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
|
||||
" movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */
|
||||
SWAP_16_2 (%%mm1, %%mm3)
|
||||
VOLUME_32x16 (%%mm1, %%mm0)
|
||||
VOLUME_32x16 (%%mm3, %%mm2)
|
||||
SWAP_16_2 (%%mm0, %%mm2)
|
||||
" movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
|
||||
" movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */
|
||||
" add $8, %0 \n\t"
|
||||
MOD_ADD ($4, %5)
|
||||
" dec %2 \n\t"
|
||||
" jne 5b \n\t"
|
||||
"5: \n\t" /* do samples in groups of 4 */
|
||||
" movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
|
||||
" movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */
|
||||
" movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
|
||||
" movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */
|
||||
SWAP_16_2 (%%mm1, %%mm3)
|
||||
VOLUME_32x16 (%%mm1, %%mm0)
|
||||
VOLUME_32x16 (%%mm3, %%mm2)
|
||||
SWAP_16_2 (%%mm0, %%mm2)
|
||||
" movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
|
||||
" movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */
|
||||
" add $8, %0 \n\t"
|
||||
MOD_ADD ($4, %5)
|
||||
" dec %2 \n\t"
|
||||
" jne 5b \n\t"
|
||||
|
||||
"6: \n\t"
|
||||
" emms \n\t"
|
||||
"6: \n\t"
|
||||
" emms \n\t"
|
||||
|
||||
: "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp)
|
||||
: "r" ((pa_reg_x86)channels)
|
||||
: "cc"
|
||||
);
|
||||
: "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp)
|
||||
: "r" ((pa_reg_x86)channels)
|
||||
: "cc"
|
||||
);
|
||||
}
|
||||
|
||||
#undef RUN_TEST
|
||||
|
|
@ -248,51 +248,51 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
|
|||
#define PADDING 16
|
||||
|
||||
static void run_test (void) {
|
||||
int16_t samples[SAMPLES];
|
||||
int16_t samples_ref[SAMPLES];
|
||||
int16_t samples_orig[SAMPLES];
|
||||
int32_t volumes[CHANNELS + PADDING];
|
||||
int i, j, padding;
|
||||
pa_do_volume_func_t func;
|
||||
struct timeval start, stop;
|
||||
int16_t samples[SAMPLES];
|
||||
int16_t samples_ref[SAMPLES];
|
||||
int16_t samples_orig[SAMPLES];
|
||||
int32_t volumes[CHANNELS + PADDING];
|
||||
int i, j, padding;
|
||||
pa_do_volume_func_t func;
|
||||
struct timeval start, stop;
|
||||
|
||||
func = pa_get_volume_func (PA_SAMPLE_S16NE);
|
||||
func = pa_get_volume_func (PA_SAMPLE_S16NE);
|
||||
|
||||
printf ("checking MMX %zd\n", sizeof (samples));
|
||||
printf ("checking MMX %zd\n", sizeof (samples));
|
||||
|
||||
pa_random (samples, sizeof (samples));
|
||||
memcpy (samples_ref, samples, sizeof (samples));
|
||||
memcpy (samples_orig, samples, sizeof (samples));
|
||||
pa_random (samples, sizeof (samples));
|
||||
memcpy (samples_ref, samples, sizeof (samples));
|
||||
memcpy (samples_orig, samples, sizeof (samples));
|
||||
|
||||
for (i = 0; i < CHANNELS; i++)
|
||||
volumes[i] = rand() >> 1;
|
||||
for (padding = 0; padding < PADDING; padding++, i++)
|
||||
volumes[i] = volumes[padding];
|
||||
for (i = 0; i < CHANNELS; i++)
|
||||
volumes[i] = rand() >> 1;
|
||||
for (padding = 0; padding < PADDING; padding++, i++)
|
||||
volumes[i] = volumes[padding];
|
||||
|
||||
func (samples_ref, volumes, CHANNELS, sizeof (samples));
|
||||
pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples));
|
||||
for (i = 0; i < SAMPLES; i++) {
|
||||
if (samples[i] != samples_ref[i]) {
|
||||
printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
|
||||
samples_orig[i], volumes[i % CHANNELS]);
|
||||
}
|
||||
}
|
||||
|
||||
pa_gettimeofday(&start);
|
||||
for (j = 0; j < TIMES; j++) {
|
||||
memcpy (samples, samples_orig, sizeof (samples));
|
||||
pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples));
|
||||
}
|
||||
pa_gettimeofday(&stop);
|
||||
pa_log_info("MMX: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
|
||||
|
||||
pa_gettimeofday(&start);
|
||||
for (j = 0; j < TIMES; j++) {
|
||||
memcpy (samples_ref, samples_orig, sizeof (samples));
|
||||
func (samples_ref, volumes, CHANNELS, sizeof (samples));
|
||||
}
|
||||
pa_gettimeofday(&stop);
|
||||
pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
|
||||
pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples));
|
||||
for (i = 0; i < SAMPLES; i++) {
|
||||
if (samples[i] != samples_ref[i]) {
|
||||
printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
|
||||
samples_orig[i], volumes[i % CHANNELS]);
|
||||
}
|
||||
}
|
||||
|
||||
pa_gettimeofday(&start);
|
||||
for (j = 0; j < TIMES; j++) {
|
||||
memcpy (samples, samples_orig, sizeof (samples));
|
||||
pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples));
|
||||
}
|
||||
pa_gettimeofday(&stop);
|
||||
pa_log_info("MMX: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
|
||||
|
||||
pa_gettimeofday(&start);
|
||||
for (j = 0; j < TIMES; j++) {
|
||||
memcpy (samples_ref, samples_orig, sizeof (samples));
|
||||
func (samples_ref, volumes, CHANNELS, sizeof (samples));
|
||||
}
|
||||
pa_gettimeofday(&stop);
|
||||
pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
@ -301,13 +301,13 @@ static void run_test (void) {
|
|||
|
||||
void pa_volume_func_init_mmx (pa_cpu_x86_flag_t flags) {
|
||||
#if defined (__i386__) || defined (__amd64__)
|
||||
pa_log_info("Initialising MMX optimized functions.");
|
||||
pa_log_info("Initialising MMX optimized functions.");
|
||||
|
||||
#ifdef RUN_TEST
|
||||
run_test ();
|
||||
run_test ();
|
||||
#endif
|
||||
|
||||
pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_mmx);
|
||||
pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_mmx);
|
||||
pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_mmx);
|
||||
pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_mmx);
|
||||
#endif /* defined (__i386__) || defined (__amd64__) */
|
||||
}
|
||||
|
|
|
|||
|
|
@ -77,169 +77,169 @@
|
|||
static void
|
||||
pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||
{
|
||||
pa_reg_x86 channel, temp;
|
||||
pa_reg_x86 channel, temp;
|
||||
|
||||
/* the max number of samples we process at a time, this is also the max amount
|
||||
* we overread the volume array, which should have enough padding. */
|
||||
channels = MAX (8, channels);
|
||||
/* the max number of samples we process at a time, this is also the max amount
|
||||
* we overread the volume array, which should have enough padding. */
|
||||
channels = MAX (8, channels);
|
||||
|
||||
__asm__ __volatile__ (
|
||||
" xor %3, %3 \n\t"
|
||||
" sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
|
||||
__asm__ __volatile__ (
|
||||
" xor %3, %3 \n\t"
|
||||
" sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
|
||||
|
||||
" test $1, %2 \n\t" /* check for odd samples */
|
||||
" je 2f \n\t"
|
||||
" test $1, %2 \n\t" /* check for odd samples */
|
||||
" je 2f \n\t"
|
||||
|
||||
" movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */
|
||||
" movw (%0), %w4 \n\t" /* .. | p0 | */
|
||||
" movd %4, %%xmm1 \n\t"
|
||||
VOLUME_32x16 (%%xmm1, %%xmm0)
|
||||
" movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */
|
||||
" movw %w4, (%0) \n\t"
|
||||
" add $2, %0 \n\t"
|
||||
MOD_ADD ($1, %5)
|
||||
" movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */
|
||||
" movw (%0), %w4 \n\t" /* .. | p0 | */
|
||||
" movd %4, %%xmm1 \n\t"
|
||||
VOLUME_32x16 (%%xmm1, %%xmm0)
|
||||
" movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */
|
||||
" movw %w4, (%0) \n\t"
|
||||
" add $2, %0 \n\t"
|
||||
MOD_ADD ($1, %5)
|
||||
|
||||
"2: \n\t"
|
||||
" sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
|
||||
" test $1, %2 \n\t"
|
||||
" je 4f \n\t"
|
||||
"2: \n\t"
|
||||
" sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
|
||||
" test $1, %2 \n\t"
|
||||
" je 4f \n\t"
|
||||
|
||||
"3: \n\t" /* do samples in groups of 2 */
|
||||
" movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */
|
||||
" movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */
|
||||
VOLUME_32x16 (%%xmm1, %%xmm0)
|
||||
" movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
|
||||
" add $4, %0 \n\t"
|
||||
MOD_ADD ($2, %5)
|
||||
"3: \n\t" /* do samples in groups of 2 */
|
||||
" movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */
|
||||
" movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */
|
||||
VOLUME_32x16 (%%xmm1, %%xmm0)
|
||||
" movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
|
||||
" add $4, %0 \n\t"
|
||||
MOD_ADD ($2, %5)
|
||||
|
||||
"4: \n\t"
|
||||
" sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
|
||||
" test $1, %2 \n\t"
|
||||
" je 6f \n\t"
|
||||
"4: \n\t"
|
||||
" sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
|
||||
" test $1, %2 \n\t"
|
||||
" je 6f \n\t"
|
||||
|
||||
/* FIXME, we can do aligned access of the volume values if we can guarantee
|
||||
* that the array is 16 bytes aligned, we probably have to do the odd values
|
||||
* after this then. */
|
||||
"5: \n\t" /* do samples in groups of 4 */
|
||||
" movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
|
||||
" movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
|
||||
VOLUME_32x16 (%%xmm1, %%xmm0)
|
||||
" movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
|
||||
" add $8, %0 \n\t"
|
||||
MOD_ADD ($4, %5)
|
||||
/* FIXME, we can do aligned access of the volume values if we can guarantee
|
||||
* that the array is 16 bytes aligned, we probably have to do the odd values
|
||||
* after this then. */
|
||||
"5: \n\t" /* do samples in groups of 4 */
|
||||
" movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
|
||||
" movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
|
||||
VOLUME_32x16 (%%xmm1, %%xmm0)
|
||||
" movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
|
||||
" add $8, %0 \n\t"
|
||||
MOD_ADD ($4, %5)
|
||||
|
||||
"6: \n\t"
|
||||
" sar $1, %2 \n\t" /* prepare for processing 8 samples at a time */
|
||||
" cmp $0, %2 \n\t"
|
||||
" je 8f \n\t"
|
||||
"6: \n\t"
|
||||
" sar $1, %2 \n\t" /* prepare for processing 8 samples at a time */
|
||||
" cmp $0, %2 \n\t"
|
||||
" je 8f \n\t"
|
||||
|
||||
"7: \n\t" /* do samples in groups of 8 */
|
||||
" movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
|
||||
" movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */
|
||||
" movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
|
||||
" movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */
|
||||
VOLUME_32x16 (%%xmm1, %%xmm0)
|
||||
VOLUME_32x16 (%%xmm3, %%xmm2)
|
||||
" movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
|
||||
" movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */
|
||||
" add $16, %0 \n\t"
|
||||
MOD_ADD ($8, %5)
|
||||
" dec %2 \n\t"
|
||||
" jne 7b \n\t"
|
||||
"8: \n\t"
|
||||
"7: \n\t" /* do samples in groups of 8 */
|
||||
" movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
|
||||
" movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */
|
||||
" movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
|
||||
" movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */
|
||||
VOLUME_32x16 (%%xmm1, %%xmm0)
|
||||
VOLUME_32x16 (%%xmm3, %%xmm2)
|
||||
" movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
|
||||
" movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */
|
||||
" add $16, %0 \n\t"
|
||||
MOD_ADD ($8, %5)
|
||||
" dec %2 \n\t"
|
||||
" jne 7b \n\t"
|
||||
"8: \n\t"
|
||||
|
||||
: "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp)
|
||||
: "r" ((pa_reg_x86)channels)
|
||||
: "cc"
|
||||
);
|
||||
: "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp)
|
||||
: "r" ((pa_reg_x86)channels)
|
||||
: "cc"
|
||||
);
|
||||
}
|
||||
|
||||
static void
|
||||
pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
|
||||
{
|
||||
pa_reg_x86 channel, temp;
|
||||
pa_reg_x86 channel, temp;
|
||||
|
||||
/* the max number of samples we process at a time, this is also the max amount
|
||||
* we overread the volume array, which should have enough padding. */
|
||||
channels = MAX (8, channels);
|
||||
/* the max number of samples we process at a time, this is also the max amount
|
||||
* we overread the volume array, which should have enough padding. */
|
||||
channels = MAX (8, channels);
|
||||
|
||||
__asm__ __volatile__ (
|
||||
" xor %3, %3 \n\t"
|
||||
" sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
|
||||
__asm__ __volatile__ (
|
||||
" xor %3, %3 \n\t"
|
||||
" sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
|
||||
|
||||
" test $1, %2 \n\t" /* check for odd samples */
|
||||
" je 2f \n\t"
|
||||
" test $1, %2 \n\t" /* check for odd samples */
|
||||
" je 2f \n\t"
|
||||
|
||||
" movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */
|
||||
" movw (%0), %w4 \n\t" /* .. | p0 | */
|
||||
" rorw $8, %w4 \n\t"
|
||||
" movd %4, %%xmm1 \n\t"
|
||||
VOLUME_32x16 (%%xmm1, %%xmm0)
|
||||
" movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */
|
||||
" rorw $8, %w4 \n\t"
|
||||
" movw %w4, (%0) \n\t"
|
||||
" add $2, %0 \n\t"
|
||||
MOD_ADD ($1, %5)
|
||||
" movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */
|
||||
" movw (%0), %w4 \n\t" /* .. | p0 | */
|
||||
" rorw $8, %w4 \n\t"
|
||||
" movd %4, %%xmm1 \n\t"
|
||||
VOLUME_32x16 (%%xmm1, %%xmm0)
|
||||
" movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */
|
||||
" rorw $8, %w4 \n\t"
|
||||
" movw %w4, (%0) \n\t"
|
||||
" add $2, %0 \n\t"
|
||||
MOD_ADD ($1, %5)
|
||||
|
||||
"2: \n\t"
|
||||
" sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
|
||||
" test $1, %2 \n\t"
|
||||
" je 4f \n\t"
|
||||
"2: \n\t"
|
||||
" sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
|
||||
" test $1, %2 \n\t"
|
||||
" je 4f \n\t"
|
||||
|
||||
"3: \n\t" /* do samples in groups of 2 */
|
||||
" movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */
|
||||
" movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */
|
||||
SWAP_16 (%%xmm1)
|
||||
VOLUME_32x16 (%%xmm1, %%xmm0)
|
||||
SWAP_16 (%%xmm0)
|
||||
" movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
|
||||
" add $4, %0 \n\t"
|
||||
MOD_ADD ($2, %5)
|
||||
"3: \n\t" /* do samples in groups of 2 */
|
||||
" movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */
|
||||
" movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */
|
||||
SWAP_16 (%%xmm1)
|
||||
VOLUME_32x16 (%%xmm1, %%xmm0)
|
||||
SWAP_16 (%%xmm0)
|
||||
" movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
|
||||
" add $4, %0 \n\t"
|
||||
MOD_ADD ($2, %5)
|
||||
|
||||
"4: \n\t"
|
||||
" sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
|
||||
" test $1, %2 \n\t"
|
||||
" je 6f \n\t"
|
||||
"4: \n\t"
|
||||
" sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
|
||||
" test $1, %2 \n\t"
|
||||
" je 6f \n\t"
|
||||
|
||||
/* FIXME, we can do aligned access of the volume values if we can guarantee
|
||||
* that the array is 16 bytes aligned, we probably have to do the odd values
|
||||
* after this then. */
|
||||
"5: \n\t" /* do samples in groups of 4 */
|
||||
" movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
|
||||
" movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
|
||||
SWAP_16 (%%xmm1)
|
||||
VOLUME_32x16 (%%xmm1, %%xmm0)
|
||||
SWAP_16 (%%xmm0)
|
||||
" movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
|
||||
" add $8, %0 \n\t"
|
||||
MOD_ADD ($4, %5)
|
||||
/* FIXME, we can do aligned access of the volume values if we can guarantee
|
||||
* that the array is 16 bytes aligned, we probably have to do the odd values
|
||||
* after this then. */
|
||||
"5: \n\t" /* do samples in groups of 4 */
|
||||
" movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
|
||||
" movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
|
||||
SWAP_16 (%%xmm1)
|
||||
VOLUME_32x16 (%%xmm1, %%xmm0)
|
||||
SWAP_16 (%%xmm0)
|
||||
" movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
|
||||
" add $8, %0 \n\t"
|
||||
MOD_ADD ($4, %5)
|
||||
|
||||
"6: \n\t"
|
||||
" sar $1, %2 \n\t" /* prepare for processing 8 samples at a time */
|
||||
" cmp $0, %2 \n\t"
|
||||
" je 8f \n\t"
|
||||
"6: \n\t"
|
||||
" sar $1, %2 \n\t" /* prepare for processing 8 samples at a time */
|
||||
" cmp $0, %2 \n\t"
|
||||
" je 8f \n\t"
|
||||
|
||||
"7: \n\t" /* do samples in groups of 8 */
|
||||
" movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
|
||||
" movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */
|
||||
" movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
|
||||
" movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */
|
||||
SWAP_16_2 (%%xmm1, %%xmm3)
|
||||
VOLUME_32x16 (%%xmm1, %%xmm0)
|
||||
VOLUME_32x16 (%%xmm3, %%xmm2)
|
||||
SWAP_16_2 (%%xmm0, %%xmm2)
|
||||
" movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
|
||||
" movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */
|
||||
" add $16, %0 \n\t"
|
||||
MOD_ADD ($8, %5)
|
||||
" dec %2 \n\t"
|
||||
" jne 7b \n\t"
|
||||
"8: \n\t"
|
||||
"7: \n\t" /* do samples in groups of 8 */
|
||||
" movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
|
||||
" movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */
|
||||
" movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
|
||||
" movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */
|
||||
SWAP_16_2 (%%xmm1, %%xmm3)
|
||||
VOLUME_32x16 (%%xmm1, %%xmm0)
|
||||
VOLUME_32x16 (%%xmm3, %%xmm2)
|
||||
SWAP_16_2 (%%xmm0, %%xmm2)
|
||||
" movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
|
||||
" movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */
|
||||
" add $16, %0 \n\t"
|
||||
MOD_ADD ($8, %5)
|
||||
" dec %2 \n\t"
|
||||
" jne 7b \n\t"
|
||||
"8: \n\t"
|
||||
|
||||
: "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp)
|
||||
: "r" ((pa_reg_x86)channels)
|
||||
: "cc"
|
||||
);
|
||||
: "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp)
|
||||
: "r" ((pa_reg_x86)channels)
|
||||
: "cc"
|
||||
);
|
||||
}
|
||||
|
||||
#undef RUN_TEST
|
||||
|
|
@ -251,64 +251,64 @@ pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
|
|||
#define PADDING 16
|
||||
|
||||
static void run_test (void) {
|
||||
int16_t samples[SAMPLES];
|
||||
int16_t samples_ref[SAMPLES];
|
||||
int16_t samples_orig[SAMPLES];
|
||||
int32_t volumes[CHANNELS + PADDING];
|
||||
int i, j, padding;
|
||||
pa_do_volume_func_t func;
|
||||
struct timeval start, stop;
|
||||
int16_t samples[SAMPLES];
|
||||
int16_t samples_ref[SAMPLES];
|
||||
int16_t samples_orig[SAMPLES];
|
||||
int32_t volumes[CHANNELS + PADDING];
|
||||
int i, j, padding;
|
||||
pa_do_volume_func_t func;
|
||||
struct timeval start, stop;
|
||||
|
||||
func = pa_get_volume_func (PA_SAMPLE_S16NE);
|
||||
func = pa_get_volume_func (PA_SAMPLE_S16NE);
|
||||
|
||||
printf ("checking SSE %zd\n", sizeof (samples));
|
||||
printf ("checking SSE %zd\n", sizeof (samples));
|
||||
|
||||
pa_random (samples, sizeof (samples));
|
||||
memcpy (samples_ref, samples, sizeof (samples));
|
||||
memcpy (samples_orig, samples, sizeof (samples));
|
||||
pa_random (samples, sizeof (samples));
|
||||
memcpy (samples_ref, samples, sizeof (samples));
|
||||
memcpy (samples_orig, samples, sizeof (samples));
|
||||
|
||||
for (i = 0; i < CHANNELS; i++)
|
||||
volumes[i] = rand() >> 1;
|
||||
for (padding = 0; padding < PADDING; padding++, i++)
|
||||
volumes[i] = volumes[padding];
|
||||
for (i = 0; i < CHANNELS; i++)
|
||||
volumes[i] = rand() >> 1;
|
||||
for (padding = 0; padding < PADDING; padding++, i++)
|
||||
volumes[i] = volumes[padding];
|
||||
|
||||
func (samples_ref, volumes, CHANNELS, sizeof (samples));
|
||||
pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples));
|
||||
for (i = 0; i < SAMPLES; i++) {
|
||||
if (samples[i] != samples_ref[i]) {
|
||||
printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
|
||||
samples_orig[i], volumes[i % CHANNELS]);
|
||||
}
|
||||
}
|
||||
|
||||
pa_gettimeofday(&start);
|
||||
for (j = 0; j < TIMES; j++) {
|
||||
memcpy (samples, samples_orig, sizeof (samples));
|
||||
pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples));
|
||||
}
|
||||
pa_gettimeofday(&stop);
|
||||
pa_log_info("SSE: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
|
||||
|
||||
pa_gettimeofday(&start);
|
||||
for (j = 0; j < TIMES; j++) {
|
||||
memcpy (samples_ref, samples_orig, sizeof (samples));
|
||||
func (samples_ref, volumes, CHANNELS, sizeof (samples));
|
||||
}
|
||||
pa_gettimeofday(&stop);
|
||||
pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
|
||||
pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples));
|
||||
for (i = 0; i < SAMPLES; i++) {
|
||||
if (samples[i] != samples_ref[i]) {
|
||||
printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
|
||||
samples_orig[i], volumes[i % CHANNELS]);
|
||||
}
|
||||
}
|
||||
|
||||
pa_gettimeofday(&start);
|
||||
for (j = 0; j < TIMES; j++) {
|
||||
memcpy (samples, samples_orig, sizeof (samples));
|
||||
pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples));
|
||||
}
|
||||
pa_gettimeofday(&stop);
|
||||
pa_log_info("SSE: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
|
||||
|
||||
pa_gettimeofday(&start);
|
||||
for (j = 0; j < TIMES; j++) {
|
||||
memcpy (samples_ref, samples_orig, sizeof (samples));
|
||||
func (samples_ref, volumes, CHANNELS, sizeof (samples));
|
||||
}
|
||||
pa_gettimeofday(&stop);
|
||||
pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
|
||||
}
|
||||
#endif
|
||||
#endif /* defined (__i386__) || defined (__amd64__) */
|
||||
|
||||
void pa_volume_func_init_sse (pa_cpu_x86_flag_t flags) {
|
||||
#if defined (__i386__) || defined (__amd64__)
|
||||
pa_log_info("Initialising SSE optimized functions.");
|
||||
pa_log_info("Initialising SSE optimized functions.");
|
||||
|
||||
#ifdef RUN_TEST
|
||||
run_test ();
|
||||
run_test ();
|
||||
#endif
|
||||
|
||||
pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_sse);
|
||||
pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_sse);
|
||||
pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_sse);
|
||||
pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_sse);
|
||||
#endif /* defined (__i386__) || defined (__amd64__) */
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue