dmix: add S24_3LE support

Add to the dmix plugin support for the S24_3LE sample format which is
used by 24-bit USB devices.

The optimized assembler version uses only 23 bits for sample data so
that the lowest bit can be used for synchronization because there is no
24-bit cmpxchg instruction.
This commit is contained in:
Clemens Ladisch 2005-12-19 07:39:03 +00:00
parent 46f81f994b
commit 45f63a8735
8 changed files with 404 additions and 7 deletions

View file

@ -800,6 +800,7 @@ int snd_pcm_direct_initialize_slave(snd_pcm_direct_t *dmix, snd_pcm_t *spcm, str
case SND_PCM_FORMAT_S32_BE:
case SND_PCM_FORMAT_S16_LE:
case SND_PCM_FORMAT_S16_BE:
case SND_PCM_FORMAT_S24_3LE:
break;
default:
SNDERR("invalid format");

View file

@ -34,6 +34,11 @@ typedef void (mix_areas2_t)(unsigned int size,
volatile signed int *sum, size_t dst_step,
size_t src_step, size_t sum_step);
typedef void (mix_areas3_t)(unsigned int size,
volatile unsigned char *dst, unsigned char *src,
volatile signed int *sum, size_t dst_step,
size_t src_step, size_t sum_step);
struct slave_params {
snd_pcm_format_t format;
int rate;
@ -120,6 +125,7 @@ struct snd_pcm_direct {
signed int *sum_buffer; /* shared sum buffer */
mix_areas1_t *mix_areas1;
mix_areas2_t *mix_areas2;
mix_areas3_t *mix_areas3;
} dmix;
struct {
} dsnoop;

View file

@ -188,7 +188,8 @@ static void mix_areas(snd_pcm_direct_t *dmix,
sum = dmix->u.dmix.sum_buffer + channels * dst_ofs + chn;
dmix->u.dmix.mix_areas1(size, dst, src, sum, dst_step, src_step, channels * sizeof(signed int));
}
} else {
} else if (dmix->shmptr->s.format == SND_PCM_FORMAT_S32_LE ||
dmix->shmptr->s.format == SND_PCM_FORMAT_S32_BE) {
signed int *src;
volatile signed int *dst;
if (dmix->interleaved) {
@ -216,6 +217,32 @@ static void mix_areas(snd_pcm_direct_t *dmix,
sum = dmix->u.dmix.sum_buffer + channels * dst_ofs + chn;
dmix->u.dmix.mix_areas2(size, dst, src, sum, dst_step, src_step, channels * sizeof(signed int));
}
} else { /* SND_PCM_FORMAT_S24_3LE */
unsigned char *src;
volatile unsigned char *dst;
if (dmix->interleaved) {
/*
* process all areas in one loop
* it optimizes the memory accesses for this case
*/
dmix->u.dmix.mix_areas3(size * channels,
((char *)dst_areas[0].addr) + 3 * dst_ofs * channels,
((char *)src_areas[0].addr) + 3 * src_ofs * channels,
dmix->u.dmix.sum_buffer + (dst_ofs * channels),
3, 3, sizeof(signed int));
return;
}
for (chn = 0; chn < channels; chn++) {
dchn = dmix->bindings ? dmix->bindings[chn] : chn;
if (dchn >= dmix->shmptr->s.channels)
continue;
src_step = src_areas[chn].step / 8;
dst_step = dst_areas[dchn].step / 8;
src = (unsigned char *)(((char *)src_areas[chn].addr + src_areas[chn].first / 8) + (src_ofs * src_step));
dst = (unsigned char *)(((char *)dst_areas[dchn].addr + dst_areas[dchn].first / 8) + (dst_ofs * dst_step));
sum = dmix->u.dmix.sum_buffer + channels * dst_ofs + chn;
dmix->u.dmix.mix_areas3(size, dst, src, sum, dst_step, src_step, channels * sizeof(signed int));
}
}
}

View file

@ -121,7 +121,8 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix)
/* non-concurrent version, supporting both endians */
static unsigned long long dmix_supported_format =
(1ULL << SND_PCM_FORMAT_S16_LE) | (1ULL << SND_PCM_FORMAT_S32_LE) |
(1ULL << SND_PCM_FORMAT_S16_BE) | (1ULL << SND_PCM_FORMAT_S32_BE);
(1ULL << SND_PCM_FORMAT_S16_BE) | (1ULL << SND_PCM_FORMAT_S32_BE) |
(1ULL << SND_PCM_FORMAT_S24_3LE);
#include <byteswap.h>
@ -245,6 +246,37 @@ static void mix_areas2_swap(unsigned int size,
}
}
/* always little endian */
static void mix_areas3(unsigned int size,
volatile unsigned char *dst, unsigned char *src,
volatile signed int *sum, size_t dst_step,
size_t src_step, size_t sum_step)
{
register signed int sample;
for (;;) {
sample = src[0] | (src[1] << 8) | (((signed char *)src)[2] << 16);
if (!(dst[0] | dst[1] | dst[2])) {
*sum = sample;
} else {
sample += *sum;
*sum = sample;
if (sample > 0x7fffff)
sample = 0x7fffff;
else if (sample < -0x800000)
sample = -0x800000;
}
dst[0] = sample;
dst[1] = sample >> 8;
dst[2] = sample >> 16;
if (!--size)
return;
dst += dst_step;
src += src_step;
sum = (signed int *) ((char *)sum + sum_step);
}
}
static void mix_select_callbacks(snd_pcm_direct_t *dmix)
{
@ -255,6 +287,7 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix)
dmix->u.dmix.mix_areas1 = mix_areas1_swap;
dmix->u.dmix.mix_areas2 = mix_areas2_swap;
}
dmix->u.dmix.mix_areas3 = mix_areas3;
}
#endif

View file

@ -5,33 +5,43 @@
#define MIX_AREAS1 mix_areas1
#define MIX_AREAS1_MMX mix_areas1_mmx
#define MIX_AREAS2 mix_areas2
#define MIX_AREAS3 mix_areas3
#define MIX_AREAS3_CMOV mix_areas3_cmov
#define LOCK_PREFIX ""
#include "pcm_dmix_i386.h"
#undef MIX_AREAS1
#undef MIX_AREAS1_MMX
#undef MIX_AREAS2
#undef MIX_AREAS3
#undef MIX_AREAS3_CMOV
#undef LOCK_PREFIX
#define MIX_AREAS1 mix_areas1_smp
#define MIX_AREAS1_MMX mix_areas1_smp_mmx
#define MIX_AREAS2 mix_areas2_smp
#define MIX_AREAS3 mix_areas3_smp
#define MIX_AREAS3_CMOV mix_areas3_smp_cmov
#define LOCK_PREFIX "lock ; "
#include "pcm_dmix_i386.h"
#undef MIX_AREAS1
#undef MIX_AREAS1_MMX
#undef MIX_AREAS2
#undef MIX_AREAS3
#undef MIX_AREAS3_CMOV
#undef LOCK_PREFIX
static unsigned long long dmix_supported_format =
(1ULL << SND_PCM_FORMAT_S16_LE) | (1ULL << SND_PCM_FORMAT_S32_LE);
(1ULL << SND_PCM_FORMAT_S16_LE) |
(1ULL << SND_PCM_FORMAT_S32_LE) |
(1ULL << SND_PCM_FORMAT_S24_3LE);
static void mix_select_callbacks(snd_pcm_direct_t *dmix)
{
FILE *in;
char line[255];
int smp = 0, mmx = 0;
int smp = 0, mmx = 0, cmov = 0;
/* try to determine, if we have a MMX capable CPU */
/* try to determine the capabilities of the CPU */
in = fopen("/proc/cpuinfo", "r");
if (in) {
while (!feof(in)) {
@ -41,15 +51,21 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix)
else if (!strncmp(line, "flags", 5)) {
if (strstr(line, " mmx"))
mmx = 1;
if (strstr(line, " cmov"))
cmov = 1;
}
}
fclose(in);
}
// printf("MMX: %i, SMP: %i\n", mmx, smp);
if (mmx) {
dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp_mmx : mix_areas1_mmx;
} else {
dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp : mix_areas1;
}
dmix->u.dmix.mix_areas2 = smp > 1 ? mix_areas2_smp : mix_areas2;
if (cmov) {
dmix->u.dmix.mix_areas3 = smp > 1 ? mix_areas3_smp_cmov : mix_areas3_cmov;
} else {
dmix->u.dmix.mix_areas3 = smp > 1 ? mix_areas3_smp: mix_areas3;
}
}

View file

@ -352,3 +352,208 @@ static void MIX_AREAS2(unsigned int size,
: "esi", "edi", "edx", "ecx", "eax"
);
}
/*
* 24-bit version for plain i386
*/
static void MIX_AREAS3(unsigned int size,
volatile unsigned char *dst, unsigned char *src,
volatile signed int *sum, size_t dst_step,
size_t src_step, size_t sum_step)
{
unsigned int old_ebx;
/*
* ESI - src
* EDI - dst
* EBX - sum
* ECX - old sample
* EAX - sample / temporary
* EDX - temporary
*/
__asm__ __volatile__ (
"\n"
"\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */
/*
* initialization, load ESI, EDI, EBX registers
*/
"\tmovl %1, %%edi\n"
"\tmovl %2, %%esi\n"
"\tmovl %3, %%ebx\n"
"\tcmpl $0, %0\n"
"\tjnz 1f\n"
"\tjmp 6f\n"
"\t.p2align 4,,15\n"
"1:"
/*
* sample = *src;
* sum_sample = *sum;
* if (test_and_set_bit(0, dst) == 0)
* sample -= sum_sample;
* *sum += sample;
*/
"\tmovsbl 2(%%esi), %%eax\n"
"\tmovzwl (%%esi), %%ecx\n"
"\tmovl (%%ebx), %%edx\n"
"\tsall $16, %%eax\n"
"\t" LOCK_PREFIX "btsl $0, (%%edi)\n"
"\tleal (%%ecx,%%eax,1), %%ecx\n"
"\tjc 2f\n"
"\tsubl %%edx, %%ecx\n"
"2:"
"\t" LOCK_PREFIX "addl %%ecx, (%%ebx)\n"
/*
* do {
* sample = old_sample = *sum;
* saturate(sample);
* *dst = sample | 1;
* } while (old_sample != *sum);
*/
"3:"
"\tmovl (%%ebx), %%ecx\n"
/*
* if (sample > 0x7fffff)
*/
"\tmovl $0x7fffff, %%eax\n"
"\tcmpl %%eax, %%ecx\n"
"\tjg 4f\n"
/*
* if (sample < -0x7fffff)
*/
"\tmovl $-0x7fffff, %%eax\n"
"\tcmpl %%eax, %%ecx\n"
"\tjl 4f\n"
"\tmovl %%ecx, %%eax\n"
"\torl $1, %%eax\n"
"4:"
"\tmovw %%ax, (%%edi)\n"
"\tshrl $16, %%eax\n"
"\tmovb %%al, 2(%%edi)\n"
"\tcmpl %%ecx, (%%ebx)\n"
"\tjnz 3b\n"
/*
* while (size-- > 0)
*/
"\tdecl %0\n"
"\tjz 6f\n"
"\tadd %4, %%edi\n"
"\tadd %5, %%esi\n"
"\tadd %6, %%ebx\n"
"\tjmp 1b\n"
"6:"
"\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */
: /* no output regs */
: "m" (size), "m" (dst), "m" (src),
"m" (sum), "m" (dst_step), "m" (src_step),
"m" (sum_step), "m" (old_ebx)
: "esi", "edi", "edx", "ecx", "eax"
);
}
/*
* 24-bit version for Pentium Pro/II
*/
static void MIX_AREAS3_CMOV(unsigned int size,
volatile unsigned char *dst, unsigned char *src,
volatile signed int *sum, size_t dst_step,
size_t src_step, size_t sum_step)
{
unsigned int old_ebx;
/*
* ESI - src
* EDI - dst
* EBX - sum
* ECX - old sample
* EAX - sample / temporary
* EDX - temporary
*/
__asm__ __volatile__ (
"\n"
"\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */
/*
* initialization, load ESI, EDI, EBX registers
*/
"\tmovl %1, %%edi\n"
"\tmovl %2, %%esi\n"
"\tmovl %3, %%ebx\n"
"\tcmpl $0, %0\n"
"\tjz 6f\n"
"\t.p2align 4,,15\n"
"1:"
/*
* sample = *src;
* sum_sample = *sum;
* if (test_and_set_bit(0, dst) == 0)
* sample -= sum_sample;
* *sum += sample;
*/
"\tmovsbl 2(%%esi), %%eax\n"
"\tmovzwl (%%esi), %%ecx\n"
"\tmovl (%%ebx), %%edx\n"
"\tsall $16, %%eax\n"
"\t" LOCK_PREFIX "btsl $0, (%%edi)\n"
"\tleal (%%ecx,%%eax,1), %%ecx\n"
"\tjc 2f\n"
"\tsubl %%edx, %%ecx\n"
"2:"
"\t" LOCK_PREFIX "addl %%ecx, (%%ebx)\n"
/*
* do {
* sample = old_sample = *sum;
* saturate(sample);
* *dst = sample | 1;
* } while (old_sample != *sum);
*/
"3:"
"\tmovl (%%ebx), %%ecx\n"
"\tmovl $0x7fffff, %%eax\n"
"\tmovl $-0x7fffff, %%edx\n"
"\tcmpl %%eax, %%ecx\n"
"\tcmovng %%ecx, %%eax\n"
"\tcmpl %%edx, %%ecx\n"
"\tcmovl %%edx, %%eax\n"
"\torl $1, %%eax\n"
"\tmovw %%ax, (%%edi)\n"
"\tshrl $16, %%eax\n"
"\tmovb %%al, 2(%%edi)\n"
"\tcmpl %%ecx, (%%ebx)\n"
"\tjnz 3b\n"
/*
* while (size-- > 0)
*/
"\tadd %4, %%edi\n"
"\tadd %5, %%esi\n"
"\tadd %6, %%ebx\n"
"\tdecl %0\n"
"\tjnz 1b\n"
"6:"
"\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */
: /* no output regs */
: "m" (size), "m" (dst), "m" (src),
"m" (sum), "m" (dst_step), "m" (src_step),
"m" (sum_step), "m" (old_ebx)
: "esi", "edi", "edx", "ecx", "eax"
);
}

View file

@ -4,22 +4,28 @@
#define MIX_AREAS1 mix_areas1
#define MIX_AREAS2 mix_areas2
#define MIX_AREAS3 mix_areas3
#define LOCK_PREFIX ""
#include "pcm_dmix_x86_64.h"
#undef MIX_AREAS1
#undef MIX_AREAS2
#undef MIX_AREAS3
#undef LOCK_PREFIX
#define MIX_AREAS1 mix_areas1_smp
#define MIX_AREAS2 mix_areas2_smp
#define MIX_AREAS3 mix_areas3_smp
#define LOCK_PREFIX "lock ; "
#include "pcm_dmix_x86_64.h"
#undef MIX_AREAS1
#undef MIX_AREAS2
#undef MIX_AREAS3
#undef LOCK_PREFIX
static unsigned long long dmix_supported_format =
(1ULL << SND_PCM_FORMAT_S16_LE) | (1ULL << SND_PCM_FORMAT_S32_LE);
(1ULL << SND_PCM_FORMAT_S16_LE) |
(1ULL << SND_PCM_FORMAT_S32_LE) |
(1ULL << SND_PCM_FORMAT_S24_3LE);
static void mix_select_callbacks(snd_pcm_direct_t *dmix)
{
@ -40,4 +46,5 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix)
// printf("SMP: %i\n", smp);
dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp : mix_areas1;
dmix->u.dmix.mix_areas2 = smp > 1 ? mix_areas2_smp : mix_areas2;
dmix->u.dmix.mix_areas3 = smp > 1 ? mix_areas3_smp : mix_areas3;
}

View file

@ -237,3 +237,105 @@ static void MIX_AREAS2(unsigned int size,
);
}
/*
* 24-bit version
*/
static void MIX_AREAS3(unsigned int size,
volatile unsigned char *dst, unsigned char *src,
volatile signed int *sum, size_t dst_step,
size_t src_step, size_t sum_step)
{
unsigned long long old_rbx;
/*
* RSI - src
* RDI - dst
* RBX - sum
* ECX - old sample
* EAX - sample / temporary
* EDX - temporary
*/
__asm__ __volatile__ (
"\n"
"\tmovq %%rbx, %7\n"
/*
* initialization, load ESI, EDI, EBX registers
*/
"\tmovq %1, %%rdi\n"
"\tmovq %2, %%rsi\n"
"\tmovq %3, %%rbx\n"
/*
* while (size-- > 0) {
*/
"\tcmpl $0, %0\n"
"jz 6f\n"
"\t.p2align 4,,15\n"
"1:"
/*
* sample = *src;
* sum_sample = *sum;
* if (test_and_set_bit(0, dst) == 0)
* sample -= sum_sample;
* *sum += sample;
*/
"\tmovsbl 2(%%rsi), %%eax\n"
"\tmovswl (%%rsi), %%ecx\n"
"\tmovl (%%rbx), %%edx\n"
"\tsall $16, %%eax\n"
"\t" LOCK_PREFIX "btsl $0, (%%rdi)\n"
"\tleal (%%ecx,%%eax,1), %%ecx\n"
"\tjc 2f\n"
"\tsubl %%edx, %%ecx\n"
"2:"
"\t" LOCK_PREFIX "addl %%ecx, (%%rbx)\n"
/*
* do {
* sample = old_sample = *sum;
* saturate(sample);
* *dst = sample | 1;
* } while (old_sample != *sum);
*/
"3:"
"\tmovl (%%rbx), %%ecx\n"
"\tmovl $0x7fffff, %%eax\n"
"\tmovl $-0x7fffff, %%edx\n"
"\tcmpl %%eax, %%ecx\n"
"\tcmovng %%ecx, %%eax\n"
"\tcmpl %%edx, %%ecx\n"
"\tcmovl %%edx, %%eax\n"
"\torl $1, %%eax\n"
"\tmovw %%ax, (%%rdi)\n"
"\tshrl $16, %%eax\n"
"\tmovb %%al, 2(%%rdi)\n"
"\tcmpl %%ecx, (%%rbx)\n"
"\tjnz 3b\n"
/*
* while (size-- > 0)
*/
"\tadd %4, %%rdi\n"
"\tadd %5, %%rsi\n"
"\tadd %6, %%rbx\n"
"\tdecl %0\n"
"\tjnz 1b\n"
"6:"
"\tmovq %7, %%rbx\n"
: /* no output regs */
: "m" (size), "m" (dst), "m" (src),
"m" (sum), "m" (dst_step), "m" (src_step),
"m" (sum_step), "m" (old_rbx)
: "rsi", "rdi", "edx", "ecx", "eax"
);
}