mirror of
				https://github.com/alsa-project/alsa-lib.git
				synced 2025-11-03 09:01:52 -05:00 
			
		
		
		
	dmix: add S24_3LE support
Add to the dmix plugin support for the S24_3LE sample format which is used by 24-bit USB devices. The optimized assembler version uses only 23 bits for sample data so that the lowest bit can be used for synchronization because there is no 24-bit cmpxchg instruction.
This commit is contained in:
		
							parent
							
								
									46f81f994b
								
							
						
					
					
						commit
						45f63a8735
					
				
					 8 changed files with 404 additions and 7 deletions
				
			
		| 
						 | 
				
			
			@ -800,6 +800,7 @@ int snd_pcm_direct_initialize_slave(snd_pcm_direct_t *dmix, snd_pcm_t *spcm, str
 | 
			
		|||
			case SND_PCM_FORMAT_S32_BE:
 | 
			
		||||
			case SND_PCM_FORMAT_S16_LE:
 | 
			
		||||
			case SND_PCM_FORMAT_S16_BE:
 | 
			
		||||
			case SND_PCM_FORMAT_S24_3LE:
 | 
			
		||||
				break;
 | 
			
		||||
			default:
 | 
			
		||||
				SNDERR("invalid format");
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -34,6 +34,11 @@ typedef void (mix_areas2_t)(unsigned int size,
 | 
			
		|||
			volatile signed int *sum, size_t dst_step,
 | 
			
		||||
			size_t src_step, size_t sum_step);
 | 
			
		||||
 | 
			
		||||
typedef void (mix_areas3_t)(unsigned int size,
 | 
			
		||||
			volatile unsigned char *dst, unsigned char *src,
 | 
			
		||||
			volatile signed int *sum, size_t dst_step,
 | 
			
		||||
			size_t src_step, size_t sum_step);
 | 
			
		||||
 | 
			
		||||
struct slave_params {
 | 
			
		||||
	snd_pcm_format_t format;
 | 
			
		||||
	int rate;
 | 
			
		||||
| 
						 | 
				
			
			@ -120,6 +125,7 @@ struct snd_pcm_direct {
 | 
			
		|||
			signed int *sum_buffer;		/* shared sum buffer */
 | 
			
		||||
			mix_areas1_t *mix_areas1;
 | 
			
		||||
			mix_areas2_t *mix_areas2;
 | 
			
		||||
			mix_areas3_t *mix_areas3;
 | 
			
		||||
		} dmix;
 | 
			
		||||
		struct {
 | 
			
		||||
		} dsnoop;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -188,7 +188,8 @@ static void mix_areas(snd_pcm_direct_t *dmix,
 | 
			
		|||
			sum = dmix->u.dmix.sum_buffer + channels * dst_ofs + chn;
 | 
			
		||||
			dmix->u.dmix.mix_areas1(size, dst, src, sum, dst_step, src_step, channels * sizeof(signed int));
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
	} else if (dmix->shmptr->s.format == SND_PCM_FORMAT_S32_LE ||
 | 
			
		||||
		   dmix->shmptr->s.format == SND_PCM_FORMAT_S32_BE) {
 | 
			
		||||
		signed int *src;
 | 
			
		||||
		volatile signed int *dst;
 | 
			
		||||
		if (dmix->interleaved) {
 | 
			
		||||
| 
						 | 
				
			
			@ -216,6 +217,32 @@ static void mix_areas(snd_pcm_direct_t *dmix,
 | 
			
		|||
			sum = dmix->u.dmix.sum_buffer + channels * dst_ofs + chn;
 | 
			
		||||
			dmix->u.dmix.mix_areas2(size, dst, src, sum, dst_step, src_step, channels * sizeof(signed int));
 | 
			
		||||
		}
 | 
			
		||||
	} else { /* SND_PCM_FORMAT_S24_3LE */
 | 
			
		||||
		unsigned char *src;
 | 
			
		||||
		volatile unsigned char *dst;
 | 
			
		||||
		if (dmix->interleaved) {
 | 
			
		||||
			/*
 | 
			
		||||
			 * process all areas in one loop
 | 
			
		||||
			 * it optimizes the memory accesses for this case
 | 
			
		||||
			 */
 | 
			
		||||
			dmix->u.dmix.mix_areas3(size * channels,
 | 
			
		||||
					((char *)dst_areas[0].addr) + 3 * dst_ofs * channels,
 | 
			
		||||
					((char *)src_areas[0].addr) + 3 * src_ofs * channels,
 | 
			
		||||
					dmix->u.dmix.sum_buffer + (dst_ofs * channels),
 | 
			
		||||
					3, 3, sizeof(signed int));
 | 
			
		||||
			return;
 | 
			
		||||
		}
 | 
			
		||||
		for (chn = 0; chn < channels; chn++) {
 | 
			
		||||
			dchn = dmix->bindings ? dmix->bindings[chn] : chn;
 | 
			
		||||
			if (dchn >= dmix->shmptr->s.channels)
 | 
			
		||||
				continue;
 | 
			
		||||
			src_step = src_areas[chn].step / 8;
 | 
			
		||||
			dst_step = dst_areas[dchn].step / 8;
 | 
			
		||||
			src = (unsigned char *)(((char *)src_areas[chn].addr + src_areas[chn].first / 8) + (src_ofs * src_step));
 | 
			
		||||
			dst = (unsigned char *)(((char *)dst_areas[dchn].addr + dst_areas[dchn].first / 8) + (dst_ofs * dst_step));
 | 
			
		||||
			sum = dmix->u.dmix.sum_buffer + channels * dst_ofs + chn;
 | 
			
		||||
			dmix->u.dmix.mix_areas3(size, dst, src, sum, dst_step, src_step, channels * sizeof(signed int));
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -121,7 +121,8 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix)
 | 
			
		|||
/* non-concurrent version, supporting both endians */
 | 
			
		||||
static unsigned long long dmix_supported_format =
 | 
			
		||||
	(1ULL << SND_PCM_FORMAT_S16_LE) | (1ULL << SND_PCM_FORMAT_S32_LE) |
 | 
			
		||||
	(1ULL << SND_PCM_FORMAT_S16_BE) | (1ULL << SND_PCM_FORMAT_S32_BE);
 | 
			
		||||
	(1ULL << SND_PCM_FORMAT_S16_BE) | (1ULL << SND_PCM_FORMAT_S32_BE) |
 | 
			
		||||
	(1ULL << SND_PCM_FORMAT_S24_3LE);
 | 
			
		||||
 | 
			
		||||
#include <byteswap.h>
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -245,6 +246,37 @@ static void mix_areas2_swap(unsigned int size,
 | 
			
		|||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* always little endian */
 | 
			
		||||
static void mix_areas3(unsigned int size,
 | 
			
		||||
		       volatile unsigned char *dst, unsigned char *src,
 | 
			
		||||
		       volatile signed int *sum, size_t dst_step,
 | 
			
		||||
		       size_t src_step, size_t sum_step)
 | 
			
		||||
{
 | 
			
		||||
	register signed int sample;
 | 
			
		||||
 | 
			
		||||
	for (;;) {
 | 
			
		||||
		sample = src[0] | (src[1] << 8) | (((signed char *)src)[2] << 16);
 | 
			
		||||
		if (!(dst[0] | dst[1] | dst[2])) {
 | 
			
		||||
			*sum = sample;
 | 
			
		||||
		} else {
 | 
			
		||||
			sample += *sum;
 | 
			
		||||
			*sum = sample;
 | 
			
		||||
			if (sample > 0x7fffff)
 | 
			
		||||
				sample = 0x7fffff;
 | 
			
		||||
			else if (sample < -0x800000)
 | 
			
		||||
				sample = -0x800000;
 | 
			
		||||
		}
 | 
			
		||||
		dst[0] = sample;
 | 
			
		||||
		dst[1] = sample >> 8;
 | 
			
		||||
		dst[2] = sample >> 16;
 | 
			
		||||
		if (!--size)
 | 
			
		||||
			return;
 | 
			
		||||
		dst += dst_step;
 | 
			
		||||
		src += src_step;
 | 
			
		||||
		sum = (signed int *) ((char *)sum + sum_step);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
static void mix_select_callbacks(snd_pcm_direct_t *dmix)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -255,6 +287,7 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix)
 | 
			
		|||
		dmix->u.dmix.mix_areas1 = mix_areas1_swap;
 | 
			
		||||
		dmix->u.dmix.mix_areas2 = mix_areas2_swap;
 | 
			
		||||
	}
 | 
			
		||||
	dmix->u.dmix.mix_areas3 = mix_areas3;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -5,33 +5,43 @@
 | 
			
		|||
#define MIX_AREAS1 mix_areas1
 | 
			
		||||
#define MIX_AREAS1_MMX mix_areas1_mmx
 | 
			
		||||
#define MIX_AREAS2 mix_areas2
 | 
			
		||||
#define MIX_AREAS3 mix_areas3
 | 
			
		||||
#define MIX_AREAS3_CMOV mix_areas3_cmov
 | 
			
		||||
#define LOCK_PREFIX ""
 | 
			
		||||
#include "pcm_dmix_i386.h"
 | 
			
		||||
#undef MIX_AREAS1
 | 
			
		||||
#undef MIX_AREAS1_MMX
 | 
			
		||||
#undef MIX_AREAS2
 | 
			
		||||
#undef MIX_AREAS3
 | 
			
		||||
#undef MIX_AREAS3_CMOV
 | 
			
		||||
#undef LOCK_PREFIX
 | 
			
		||||
 | 
			
		||||
#define MIX_AREAS1 mix_areas1_smp
 | 
			
		||||
#define MIX_AREAS1_MMX mix_areas1_smp_mmx
 | 
			
		||||
#define MIX_AREAS2 mix_areas2_smp
 | 
			
		||||
#define MIX_AREAS3 mix_areas3_smp
 | 
			
		||||
#define MIX_AREAS3_CMOV mix_areas3_smp_cmov
 | 
			
		||||
#define LOCK_PREFIX "lock ; "
 | 
			
		||||
#include "pcm_dmix_i386.h"
 | 
			
		||||
#undef MIX_AREAS1
 | 
			
		||||
#undef MIX_AREAS1_MMX
 | 
			
		||||
#undef MIX_AREAS2
 | 
			
		||||
#undef MIX_AREAS3
 | 
			
		||||
#undef MIX_AREAS3_CMOV
 | 
			
		||||
#undef LOCK_PREFIX
 | 
			
		||||
 
 | 
			
		||||
static unsigned long long dmix_supported_format =
 | 
			
		||||
	(1ULL << SND_PCM_FORMAT_S16_LE) | (1ULL << SND_PCM_FORMAT_S32_LE);
 | 
			
		||||
	(1ULL << SND_PCM_FORMAT_S16_LE) |
 | 
			
		||||
	(1ULL << SND_PCM_FORMAT_S32_LE) |
 | 
			
		||||
	(1ULL << SND_PCM_FORMAT_S24_3LE);
 | 
			
		||||
 | 
			
		||||
static void mix_select_callbacks(snd_pcm_direct_t *dmix)
 | 
			
		||||
{
 | 
			
		||||
	FILE *in;
 | 
			
		||||
	char line[255];
 | 
			
		||||
	int smp = 0, mmx = 0;
 | 
			
		||||
	int smp = 0, mmx = 0, cmov = 0;
 | 
			
		||||
	
 | 
			
		||||
	/* try to determine, if we have a MMX capable CPU */
 | 
			
		||||
	/* try to determine the capabilities of the CPU */
 | 
			
		||||
	in = fopen("/proc/cpuinfo", "r");
 | 
			
		||||
	if (in) {
 | 
			
		||||
		while (!feof(in)) {
 | 
			
		||||
| 
						 | 
				
			
			@ -41,15 +51,21 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix)
 | 
			
		|||
			else if (!strncmp(line, "flags", 5)) {
 | 
			
		||||
				if (strstr(line, " mmx"))
 | 
			
		||||
					mmx = 1;
 | 
			
		||||
				if (strstr(line, " cmov"))
 | 
			
		||||
					cmov = 1;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		fclose(in);
 | 
			
		||||
	}
 | 
			
		||||
	// printf("MMX: %i, SMP: %i\n", mmx, smp);
 | 
			
		||||
	if (mmx) {
 | 
			
		||||
		dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp_mmx : mix_areas1_mmx;
 | 
			
		||||
	} else {
 | 
			
		||||
		dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp : mix_areas1;
 | 
			
		||||
	}
 | 
			
		||||
	dmix->u.dmix.mix_areas2 = smp > 1 ? mix_areas2_smp : mix_areas2;
 | 
			
		||||
	if (cmov) {
 | 
			
		||||
		dmix->u.dmix.mix_areas3 = smp > 1 ? mix_areas3_smp_cmov : mix_areas3_cmov;
 | 
			
		||||
	} else {
 | 
			
		||||
		dmix->u.dmix.mix_areas3 = smp > 1 ? mix_areas3_smp: mix_areas3;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -352,3 +352,208 @@ static void MIX_AREAS2(unsigned int size,
 | 
			
		|||
		: "esi", "edi", "edx", "ecx", "eax"
 | 
			
		||||
	);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * 24-bit version for plain i386
 | 
			
		||||
 */
 | 
			
		||||
static void MIX_AREAS3(unsigned int size,
 | 
			
		||||
		       volatile unsigned char *dst, unsigned char *src,
 | 
			
		||||
		       volatile signed int *sum, size_t dst_step,
 | 
			
		||||
		       size_t src_step, size_t sum_step)
 | 
			
		||||
{
 | 
			
		||||
	unsigned int old_ebx;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 *  ESI - src
 | 
			
		||||
	 *  EDI - dst
 | 
			
		||||
	 *  EBX - sum
 | 
			
		||||
	 *  ECX - old sample
 | 
			
		||||
	 *  EAX - sample / temporary
 | 
			
		||||
	 *  EDX - temporary
 | 
			
		||||
	 */
 | 
			
		||||
	__asm__ __volatile__ (
 | 
			
		||||
		"\n"
 | 
			
		||||
 | 
			
		||||
		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
 | 
			
		||||
		/*
 | 
			
		||||
		 *  initialization, load ESI, EDI, EBX registers
 | 
			
		||||
		 */
 | 
			
		||||
		"\tmovl %1, %%edi\n"
 | 
			
		||||
		"\tmovl %2, %%esi\n"
 | 
			
		||||
		"\tmovl %3, %%ebx\n"
 | 
			
		||||
		"\tcmpl $0, %0\n"
 | 
			
		||||
		"\tjnz 1f\n"
 | 
			
		||||
		"\tjmp 6f\n"
 | 
			
		||||
 | 
			
		||||
		"\t.p2align 4,,15\n"
 | 
			
		||||
 | 
			
		||||
		"1:"
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 *   sample = *src;
 | 
			
		||||
		 *   sum_sample = *sum;
 | 
			
		||||
		 *   if (test_and_set_bit(0, dst) == 0)
 | 
			
		||||
		 *     sample -= sum_sample;
 | 
			
		||||
		 *   *sum += sample;
 | 
			
		||||
		 */
 | 
			
		||||
		"\tmovsbl 2(%%esi), %%eax\n"
 | 
			
		||||
		"\tmovzwl (%%esi), %%ecx\n"
 | 
			
		||||
		"\tmovl (%%ebx), %%edx\n"
 | 
			
		||||
		"\tsall $16, %%eax\n"
 | 
			
		||||
		"\t" LOCK_PREFIX "btsl $0, (%%edi)\n"
 | 
			
		||||
		"\tleal (%%ecx,%%eax,1), %%ecx\n"
 | 
			
		||||
		"\tjc 2f\n"
 | 
			
		||||
		"\tsubl %%edx, %%ecx\n"
 | 
			
		||||
		"2:"
 | 
			
		||||
		"\t" LOCK_PREFIX "addl %%ecx, (%%ebx)\n"
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 *   do {
 | 
			
		||||
		 *     sample = old_sample = *sum;
 | 
			
		||||
		 *     saturate(sample);
 | 
			
		||||
		 *     *dst = sample | 1;
 | 
			
		||||
		 *   } while (old_sample != *sum);
 | 
			
		||||
		 */
 | 
			
		||||
 | 
			
		||||
		"3:"
 | 
			
		||||
		"\tmovl (%%ebx), %%ecx\n"
 | 
			
		||||
		/*
 | 
			
		||||
		 *  if (sample > 0x7fffff)
 | 
			
		||||
		 */
 | 
			
		||||
		"\tmovl $0x7fffff, %%eax\n"
 | 
			
		||||
		"\tcmpl %%eax, %%ecx\n"
 | 
			
		||||
		"\tjg 4f\n"
 | 
			
		||||
		/*
 | 
			
		||||
		 *  if (sample < -0x7fffff)
 | 
			
		||||
		 */
 | 
			
		||||
		"\tmovl $-0x7fffff, %%eax\n"
 | 
			
		||||
		"\tcmpl %%eax, %%ecx\n"
 | 
			
		||||
		"\tjl 4f\n"
 | 
			
		||||
		"\tmovl %%ecx, %%eax\n"
 | 
			
		||||
		"\torl $1, %%eax\n"
 | 
			
		||||
		"4:"
 | 
			
		||||
		"\tmovw %%ax, (%%edi)\n"
 | 
			
		||||
		"\tshrl $16, %%eax\n"
 | 
			
		||||
		"\tmovb %%al, 2(%%edi)\n"
 | 
			
		||||
		"\tcmpl %%ecx, (%%ebx)\n"
 | 
			
		||||
		"\tjnz 3b\n"
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * while (size-- > 0)
 | 
			
		||||
		 */
 | 
			
		||||
		"\tdecl %0\n"
 | 
			
		||||
		"\tjz 6f\n"
 | 
			
		||||
		"\tadd %4, %%edi\n"
 | 
			
		||||
		"\tadd %5, %%esi\n"
 | 
			
		||||
		"\tadd %6, %%ebx\n"
 | 
			
		||||
		"\tjmp 1b\n"
 | 
			
		||||
		
 | 
			
		||||
		"6:"
 | 
			
		||||
		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
 | 
			
		||||
 | 
			
		||||
		: /* no output regs */
 | 
			
		||||
		: "m" (size), "m" (dst), "m" (src),
 | 
			
		||||
		  "m" (sum), "m" (dst_step), "m" (src_step),
 | 
			
		||||
		  "m" (sum_step), "m" (old_ebx)
 | 
			
		||||
		: "esi", "edi", "edx", "ecx", "eax"
 | 
			
		||||
	);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * 24-bit version for Pentium Pro/II
 | 
			
		||||
 */
 | 
			
		||||
static void MIX_AREAS3_CMOV(unsigned int size,
 | 
			
		||||
			    volatile unsigned char *dst, unsigned char *src,
 | 
			
		||||
			    volatile signed int *sum, size_t dst_step,
 | 
			
		||||
			    size_t src_step, size_t sum_step)
 | 
			
		||||
{
 | 
			
		||||
	unsigned int old_ebx;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 *  ESI - src
 | 
			
		||||
	 *  EDI - dst
 | 
			
		||||
	 *  EBX - sum
 | 
			
		||||
	 *  ECX - old sample
 | 
			
		||||
	 *  EAX - sample / temporary
 | 
			
		||||
	 *  EDX - temporary
 | 
			
		||||
	 */
 | 
			
		||||
	__asm__ __volatile__ (
 | 
			
		||||
		"\n"
 | 
			
		||||
 | 
			
		||||
		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
 | 
			
		||||
		/*
 | 
			
		||||
		 *  initialization, load ESI, EDI, EBX registers
 | 
			
		||||
		 */
 | 
			
		||||
		"\tmovl %1, %%edi\n"
 | 
			
		||||
		"\tmovl %2, %%esi\n"
 | 
			
		||||
		"\tmovl %3, %%ebx\n"
 | 
			
		||||
		"\tcmpl $0, %0\n"
 | 
			
		||||
		"\tjz 6f\n"
 | 
			
		||||
 | 
			
		||||
		"\t.p2align 4,,15\n"
 | 
			
		||||
 | 
			
		||||
		"1:"
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 *   sample = *src;
 | 
			
		||||
		 *   sum_sample = *sum;
 | 
			
		||||
		 *   if (test_and_set_bit(0, dst) == 0)
 | 
			
		||||
		 *     sample -= sum_sample;
 | 
			
		||||
		 *   *sum += sample;
 | 
			
		||||
		 */
 | 
			
		||||
		"\tmovsbl 2(%%esi), %%eax\n"
 | 
			
		||||
		"\tmovzwl (%%esi), %%ecx\n"
 | 
			
		||||
		"\tmovl (%%ebx), %%edx\n"
 | 
			
		||||
		"\tsall $16, %%eax\n"
 | 
			
		||||
		"\t" LOCK_PREFIX "btsl $0, (%%edi)\n"
 | 
			
		||||
		"\tleal (%%ecx,%%eax,1), %%ecx\n"
 | 
			
		||||
		"\tjc 2f\n"
 | 
			
		||||
		"\tsubl %%edx, %%ecx\n"
 | 
			
		||||
		"2:"
 | 
			
		||||
		"\t" LOCK_PREFIX "addl %%ecx, (%%ebx)\n"
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 *   do {
 | 
			
		||||
		 *     sample = old_sample = *sum;
 | 
			
		||||
		 *     saturate(sample);
 | 
			
		||||
		 *     *dst = sample | 1;
 | 
			
		||||
		 *   } while (old_sample != *sum);
 | 
			
		||||
		 */
 | 
			
		||||
 | 
			
		||||
		"3:"
 | 
			
		||||
		"\tmovl (%%ebx), %%ecx\n"
 | 
			
		||||
 | 
			
		||||
		"\tmovl $0x7fffff, %%eax\n"
 | 
			
		||||
		"\tmovl $-0x7fffff, %%edx\n"
 | 
			
		||||
		"\tcmpl %%eax, %%ecx\n"
 | 
			
		||||
		"\tcmovng %%ecx, %%eax\n"
 | 
			
		||||
		"\tcmpl %%edx, %%ecx\n"
 | 
			
		||||
		"\tcmovl %%edx, %%eax\n"
 | 
			
		||||
 | 
			
		||||
		"\torl $1, %%eax\n"
 | 
			
		||||
		"\tmovw %%ax, (%%edi)\n"
 | 
			
		||||
		"\tshrl $16, %%eax\n"
 | 
			
		||||
		"\tmovb %%al, 2(%%edi)\n"
 | 
			
		||||
 | 
			
		||||
		"\tcmpl %%ecx, (%%ebx)\n"
 | 
			
		||||
		"\tjnz 3b\n"
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * while (size-- > 0)
 | 
			
		||||
		 */
 | 
			
		||||
		"\tadd %4, %%edi\n"
 | 
			
		||||
		"\tadd %5, %%esi\n"
 | 
			
		||||
		"\tadd %6, %%ebx\n"
 | 
			
		||||
		"\tdecl %0\n"
 | 
			
		||||
		"\tjnz 1b\n"
 | 
			
		||||
		
 | 
			
		||||
		"6:"
 | 
			
		||||
		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
 | 
			
		||||
 | 
			
		||||
		: /* no output regs */
 | 
			
		||||
		: "m" (size), "m" (dst), "m" (src),
 | 
			
		||||
		  "m" (sum), "m" (dst_step), "m" (src_step),
 | 
			
		||||
		  "m" (sum_step), "m" (old_ebx)
 | 
			
		||||
		: "esi", "edi", "edx", "ecx", "eax"
 | 
			
		||||
	);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -4,22 +4,28 @@
 | 
			
		|||
 | 
			
		||||
#define MIX_AREAS1 mix_areas1
 | 
			
		||||
#define MIX_AREAS2 mix_areas2
 | 
			
		||||
#define MIX_AREAS3 mix_areas3
 | 
			
		||||
#define LOCK_PREFIX ""
 | 
			
		||||
#include "pcm_dmix_x86_64.h"
 | 
			
		||||
#undef MIX_AREAS1
 | 
			
		||||
#undef MIX_AREAS2
 | 
			
		||||
#undef MIX_AREAS3
 | 
			
		||||
#undef LOCK_PREFIX
 | 
			
		||||
 | 
			
		||||
#define MIX_AREAS1 mix_areas1_smp
 | 
			
		||||
#define MIX_AREAS2 mix_areas2_smp
 | 
			
		||||
#define MIX_AREAS3 mix_areas3_smp
 | 
			
		||||
#define LOCK_PREFIX "lock ; "
 | 
			
		||||
#include "pcm_dmix_x86_64.h"
 | 
			
		||||
#undef MIX_AREAS1
 | 
			
		||||
#undef MIX_AREAS2
 | 
			
		||||
#undef MIX_AREAS3
 | 
			
		||||
#undef LOCK_PREFIX
 | 
			
		||||
 
 | 
			
		||||
static unsigned long long dmix_supported_format =
 | 
			
		||||
	(1ULL << SND_PCM_FORMAT_S16_LE) | (1ULL << SND_PCM_FORMAT_S32_LE);
 | 
			
		||||
	(1ULL << SND_PCM_FORMAT_S16_LE) |
 | 
			
		||||
	(1ULL << SND_PCM_FORMAT_S32_LE) |
 | 
			
		||||
	(1ULL << SND_PCM_FORMAT_S24_3LE);
 | 
			
		||||
 | 
			
		||||
static void mix_select_callbacks(snd_pcm_direct_t *dmix)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -40,4 +46,5 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix)
 | 
			
		|||
	// printf("SMP: %i\n", smp);
 | 
			
		||||
	dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp : mix_areas1;
 | 
			
		||||
	dmix->u.dmix.mix_areas2 = smp > 1 ? mix_areas2_smp : mix_areas2;
 | 
			
		||||
	dmix->u.dmix.mix_areas3 = smp > 1 ? mix_areas3_smp : mix_areas3;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -237,3 +237,105 @@ static void MIX_AREAS2(unsigned int size,
 | 
			
		|||
	);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 *  24-bit version
 | 
			
		||||
 */
 | 
			
		||||
static void MIX_AREAS3(unsigned int size,
 | 
			
		||||
		       volatile unsigned char *dst, unsigned char *src,
 | 
			
		||||
		       volatile signed int *sum, size_t dst_step,
 | 
			
		||||
		       size_t src_step, size_t sum_step)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long long old_rbx;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 *  RSI - src
 | 
			
		||||
	 *  RDI - dst
 | 
			
		||||
	 *  RBX - sum
 | 
			
		||||
	 *  ECX - old sample
 | 
			
		||||
	 *  EAX - sample / temporary
 | 
			
		||||
	 *  EDX - temporary
 | 
			
		||||
	 */
 | 
			
		||||
	__asm__ __volatile__ (
 | 
			
		||||
		"\n"
 | 
			
		||||
 | 
			
		||||
		"\tmovq %%rbx, %7\n"
 | 
			
		||||
		/*
 | 
			
		||||
		 *  initialization, load ESI, EDI, EBX registers
 | 
			
		||||
		 */
 | 
			
		||||
		"\tmovq %1, %%rdi\n"
 | 
			
		||||
		"\tmovq %2, %%rsi\n"
 | 
			
		||||
		"\tmovq %3, %%rbx\n"
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * while (size-- > 0) {
 | 
			
		||||
		 */
 | 
			
		||||
		"\tcmpl $0, %0\n"
 | 
			
		||||
		"jz 6f\n"
 | 
			
		||||
 | 
			
		||||
		"\t.p2align 4,,15\n"
 | 
			
		||||
 | 
			
		||||
		"1:"
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 *   sample = *src;
 | 
			
		||||
		 *   sum_sample = *sum;
 | 
			
		||||
		 *   if (test_and_set_bit(0, dst) == 0)
 | 
			
		||||
		 *     sample -= sum_sample;
 | 
			
		||||
		 *   *sum += sample;
 | 
			
		||||
		 */
 | 
			
		||||
		"\tmovsbl 2(%%rsi), %%eax\n"
 | 
			
		||||
		"\tmovswl (%%rsi), %%ecx\n"
 | 
			
		||||
		"\tmovl (%%rbx), %%edx\n"
 | 
			
		||||
		"\tsall $16, %%eax\n"
 | 
			
		||||
		"\t" LOCK_PREFIX "btsl $0, (%%rdi)\n"
 | 
			
		||||
		"\tleal (%%ecx,%%eax,1), %%ecx\n"
 | 
			
		||||
		"\tjc 2f\n"
 | 
			
		||||
		"\tsubl %%edx, %%ecx\n"
 | 
			
		||||
		"2:"
 | 
			
		||||
		"\t" LOCK_PREFIX "addl %%ecx, (%%rbx)\n"
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 *   do {
 | 
			
		||||
		 *     sample = old_sample = *sum;
 | 
			
		||||
		 *     saturate(sample);
 | 
			
		||||
		 *     *dst = sample | 1;
 | 
			
		||||
		 *   } while (old_sample != *sum);
 | 
			
		||||
		 */
 | 
			
		||||
 | 
			
		||||
		"3:"
 | 
			
		||||
		"\tmovl (%%rbx), %%ecx\n"
 | 
			
		||||
 | 
			
		||||
		"\tmovl $0x7fffff, %%eax\n"
 | 
			
		||||
		"\tmovl $-0x7fffff, %%edx\n"
 | 
			
		||||
		"\tcmpl %%eax, %%ecx\n"
 | 
			
		||||
		"\tcmovng %%ecx, %%eax\n"
 | 
			
		||||
		"\tcmpl %%edx, %%ecx\n"
 | 
			
		||||
		"\tcmovl %%edx, %%eax\n"
 | 
			
		||||
 | 
			
		||||
		"\torl $1, %%eax\n"
 | 
			
		||||
		"\tmovw %%ax, (%%rdi)\n"
 | 
			
		||||
		"\tshrl $16, %%eax\n"
 | 
			
		||||
		"\tmovb %%al, 2(%%rdi)\n"
 | 
			
		||||
	
 | 
			
		||||
		"\tcmpl %%ecx, (%%rbx)\n"
 | 
			
		||||
		"\tjnz 3b\n"
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * while (size-- > 0)
 | 
			
		||||
		 */
 | 
			
		||||
		"\tadd %4, %%rdi\n"
 | 
			
		||||
		"\tadd %5, %%rsi\n"
 | 
			
		||||
		"\tadd %6, %%rbx\n"
 | 
			
		||||
		"\tdecl %0\n"
 | 
			
		||||
		"\tjnz 1b\n"
 | 
			
		||||
		
 | 
			
		||||
		"6:"
 | 
			
		||||
		"\tmovq %7, %%rbx\n"
 | 
			
		||||
 | 
			
		||||
		: /* no output regs */
 | 
			
		||||
		: "m" (size), "m" (dst), "m" (src),
 | 
			
		||||
		  "m" (sum), "m" (dst_step), "m" (src_step),
 | 
			
		||||
		  "m" (sum_step), "m" (old_rbx)
 | 
			
		||||
		: "rsi", "rdi", "edx", "ecx", "eax"
 | 
			
		||||
	);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue