mirror of
				https://github.com/alsa-project/alsa-lib.git
				synced 2025-11-03 09:01:52 -05:00 
			
		
		
		
	dmix: add S24_3LE support
Add to the dmix plugin support for the S24_3LE sample format which is used by 24-bit USB devices. The optimized assembler version uses only 23 bits for sample data so that the lowest bit can be used for synchronization because there is no 24-bit cmpxchg instruction.
This commit is contained in:
		
							parent
							
								
									46f81f994b
								
							
						
					
					
						commit
						45f63a8735
					
				
					 8 changed files with 404 additions and 7 deletions
				
			
		| 
						 | 
					@ -800,6 +800,7 @@ int snd_pcm_direct_initialize_slave(snd_pcm_direct_t *dmix, snd_pcm_t *spcm, str
 | 
				
			||||||
			case SND_PCM_FORMAT_S32_BE:
 | 
								case SND_PCM_FORMAT_S32_BE:
 | 
				
			||||||
			case SND_PCM_FORMAT_S16_LE:
 | 
								case SND_PCM_FORMAT_S16_LE:
 | 
				
			||||||
			case SND_PCM_FORMAT_S16_BE:
 | 
								case SND_PCM_FORMAT_S16_BE:
 | 
				
			||||||
 | 
								case SND_PCM_FORMAT_S24_3LE:
 | 
				
			||||||
				break;
 | 
									break;
 | 
				
			||||||
			default:
 | 
								default:
 | 
				
			||||||
				SNDERR("invalid format");
 | 
									SNDERR("invalid format");
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -34,6 +34,11 @@ typedef void (mix_areas2_t)(unsigned int size,
 | 
				
			||||||
			volatile signed int *sum, size_t dst_step,
 | 
								volatile signed int *sum, size_t dst_step,
 | 
				
			||||||
			size_t src_step, size_t sum_step);
 | 
								size_t src_step, size_t sum_step);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef void (mix_areas3_t)(unsigned int size,
 | 
				
			||||||
 | 
								volatile unsigned char *dst, unsigned char *src,
 | 
				
			||||||
 | 
								volatile signed int *sum, size_t dst_step,
 | 
				
			||||||
 | 
								size_t src_step, size_t sum_step);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct slave_params {
 | 
					struct slave_params {
 | 
				
			||||||
	snd_pcm_format_t format;
 | 
						snd_pcm_format_t format;
 | 
				
			||||||
	int rate;
 | 
						int rate;
 | 
				
			||||||
| 
						 | 
					@ -120,6 +125,7 @@ struct snd_pcm_direct {
 | 
				
			||||||
			signed int *sum_buffer;		/* shared sum buffer */
 | 
								signed int *sum_buffer;		/* shared sum buffer */
 | 
				
			||||||
			mix_areas1_t *mix_areas1;
 | 
								mix_areas1_t *mix_areas1;
 | 
				
			||||||
			mix_areas2_t *mix_areas2;
 | 
								mix_areas2_t *mix_areas2;
 | 
				
			||||||
 | 
								mix_areas3_t *mix_areas3;
 | 
				
			||||||
		} dmix;
 | 
							} dmix;
 | 
				
			||||||
		struct {
 | 
							struct {
 | 
				
			||||||
		} dsnoop;
 | 
							} dsnoop;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -188,7 +188,8 @@ static void mix_areas(snd_pcm_direct_t *dmix,
 | 
				
			||||||
			sum = dmix->u.dmix.sum_buffer + channels * dst_ofs + chn;
 | 
								sum = dmix->u.dmix.sum_buffer + channels * dst_ofs + chn;
 | 
				
			||||||
			dmix->u.dmix.mix_areas1(size, dst, src, sum, dst_step, src_step, channels * sizeof(signed int));
 | 
								dmix->u.dmix.mix_areas1(size, dst, src, sum, dst_step, src_step, channels * sizeof(signed int));
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	} else {
 | 
						} else if (dmix->shmptr->s.format == SND_PCM_FORMAT_S32_LE ||
 | 
				
			||||||
 | 
							   dmix->shmptr->s.format == SND_PCM_FORMAT_S32_BE) {
 | 
				
			||||||
		signed int *src;
 | 
							signed int *src;
 | 
				
			||||||
		volatile signed int *dst;
 | 
							volatile signed int *dst;
 | 
				
			||||||
		if (dmix->interleaved) {
 | 
							if (dmix->interleaved) {
 | 
				
			||||||
| 
						 | 
					@ -216,6 +217,32 @@ static void mix_areas(snd_pcm_direct_t *dmix,
 | 
				
			||||||
			sum = dmix->u.dmix.sum_buffer + channels * dst_ofs + chn;
 | 
								sum = dmix->u.dmix.sum_buffer + channels * dst_ofs + chn;
 | 
				
			||||||
			dmix->u.dmix.mix_areas2(size, dst, src, sum, dst_step, src_step, channels * sizeof(signed int));
 | 
								dmix->u.dmix.mix_areas2(size, dst, src, sum, dst_step, src_step, channels * sizeof(signed int));
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
						} else { /* SND_PCM_FORMAT_S24_3LE */
 | 
				
			||||||
 | 
							unsigned char *src;
 | 
				
			||||||
 | 
							volatile unsigned char *dst;
 | 
				
			||||||
 | 
							if (dmix->interleaved) {
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
								 * process all areas in one loop
 | 
				
			||||||
 | 
								 * it optimizes the memory accesses for this case
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								dmix->u.dmix.mix_areas3(size * channels,
 | 
				
			||||||
 | 
										((char *)dst_areas[0].addr) + 3 * dst_ofs * channels,
 | 
				
			||||||
 | 
										((char *)src_areas[0].addr) + 3 * src_ofs * channels,
 | 
				
			||||||
 | 
										dmix->u.dmix.sum_buffer + (dst_ofs * channels),
 | 
				
			||||||
 | 
										3, 3, sizeof(signed int));
 | 
				
			||||||
 | 
								return;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							for (chn = 0; chn < channels; chn++) {
 | 
				
			||||||
 | 
								dchn = dmix->bindings ? dmix->bindings[chn] : chn;
 | 
				
			||||||
 | 
								if (dchn >= dmix->shmptr->s.channels)
 | 
				
			||||||
 | 
									continue;
 | 
				
			||||||
 | 
								src_step = src_areas[chn].step / 8;
 | 
				
			||||||
 | 
								dst_step = dst_areas[dchn].step / 8;
 | 
				
			||||||
 | 
								src = (unsigned char *)(((char *)src_areas[chn].addr + src_areas[chn].first / 8) + (src_ofs * src_step));
 | 
				
			||||||
 | 
								dst = (unsigned char *)(((char *)dst_areas[dchn].addr + dst_areas[dchn].first / 8) + (dst_ofs * dst_step));
 | 
				
			||||||
 | 
								sum = dmix->u.dmix.sum_buffer + channels * dst_ofs + chn;
 | 
				
			||||||
 | 
								dmix->u.dmix.mix_areas3(size, dst, src, sum, dst_step, src_step, channels * sizeof(signed int));
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -121,7 +121,8 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix)
 | 
				
			||||||
/* non-concurrent version, supporting both endians */
 | 
					/* non-concurrent version, supporting both endians */
 | 
				
			||||||
static unsigned long long dmix_supported_format =
 | 
					static unsigned long long dmix_supported_format =
 | 
				
			||||||
	(1ULL << SND_PCM_FORMAT_S16_LE) | (1ULL << SND_PCM_FORMAT_S32_LE) |
 | 
						(1ULL << SND_PCM_FORMAT_S16_LE) | (1ULL << SND_PCM_FORMAT_S32_LE) |
 | 
				
			||||||
	(1ULL << SND_PCM_FORMAT_S16_BE) | (1ULL << SND_PCM_FORMAT_S32_BE);
 | 
						(1ULL << SND_PCM_FORMAT_S16_BE) | (1ULL << SND_PCM_FORMAT_S32_BE) |
 | 
				
			||||||
 | 
						(1ULL << SND_PCM_FORMAT_S24_3LE);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <byteswap.h>
 | 
					#include <byteswap.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -245,6 +246,37 @@ static void mix_areas2_swap(unsigned int size,
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* always little endian */
 | 
				
			||||||
 | 
					static void mix_areas3(unsigned int size,
 | 
				
			||||||
 | 
							       volatile unsigned char *dst, unsigned char *src,
 | 
				
			||||||
 | 
							       volatile signed int *sum, size_t dst_step,
 | 
				
			||||||
 | 
							       size_t src_step, size_t sum_step)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						register signed int sample;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (;;) {
 | 
				
			||||||
 | 
							sample = src[0] | (src[1] << 8) | (((signed char *)src)[2] << 16);
 | 
				
			||||||
 | 
							if (!(dst[0] | dst[1] | dst[2])) {
 | 
				
			||||||
 | 
								*sum = sample;
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								sample += *sum;
 | 
				
			||||||
 | 
								*sum = sample;
 | 
				
			||||||
 | 
								if (sample > 0x7fffff)
 | 
				
			||||||
 | 
									sample = 0x7fffff;
 | 
				
			||||||
 | 
								else if (sample < -0x800000)
 | 
				
			||||||
 | 
									sample = -0x800000;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							dst[0] = sample;
 | 
				
			||||||
 | 
							dst[1] = sample >> 8;
 | 
				
			||||||
 | 
							dst[2] = sample >> 16;
 | 
				
			||||||
 | 
							if (!--size)
 | 
				
			||||||
 | 
								return;
 | 
				
			||||||
 | 
							dst += dst_step;
 | 
				
			||||||
 | 
							src += src_step;
 | 
				
			||||||
 | 
							sum = (signed int *) ((char *)sum + sum_step);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void mix_select_callbacks(snd_pcm_direct_t *dmix)
 | 
					static void mix_select_callbacks(snd_pcm_direct_t *dmix)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -255,6 +287,7 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix)
 | 
				
			||||||
		dmix->u.dmix.mix_areas1 = mix_areas1_swap;
 | 
							dmix->u.dmix.mix_areas1 = mix_areas1_swap;
 | 
				
			||||||
		dmix->u.dmix.mix_areas2 = mix_areas2_swap;
 | 
							dmix->u.dmix.mix_areas2 = mix_areas2_swap;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						dmix->u.dmix.mix_areas3 = mix_areas3;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5,33 +5,43 @@
 | 
				
			||||||
#define MIX_AREAS1 mix_areas1
 | 
					#define MIX_AREAS1 mix_areas1
 | 
				
			||||||
#define MIX_AREAS1_MMX mix_areas1_mmx
 | 
					#define MIX_AREAS1_MMX mix_areas1_mmx
 | 
				
			||||||
#define MIX_AREAS2 mix_areas2
 | 
					#define MIX_AREAS2 mix_areas2
 | 
				
			||||||
 | 
					#define MIX_AREAS3 mix_areas3
 | 
				
			||||||
 | 
					#define MIX_AREAS3_CMOV mix_areas3_cmov
 | 
				
			||||||
#define LOCK_PREFIX ""
 | 
					#define LOCK_PREFIX ""
 | 
				
			||||||
#include "pcm_dmix_i386.h"
 | 
					#include "pcm_dmix_i386.h"
 | 
				
			||||||
#undef MIX_AREAS1
 | 
					#undef MIX_AREAS1
 | 
				
			||||||
#undef MIX_AREAS1_MMX
 | 
					#undef MIX_AREAS1_MMX
 | 
				
			||||||
#undef MIX_AREAS2
 | 
					#undef MIX_AREAS2
 | 
				
			||||||
 | 
					#undef MIX_AREAS3
 | 
				
			||||||
 | 
					#undef MIX_AREAS3_CMOV
 | 
				
			||||||
#undef LOCK_PREFIX
 | 
					#undef LOCK_PREFIX
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define MIX_AREAS1 mix_areas1_smp
 | 
					#define MIX_AREAS1 mix_areas1_smp
 | 
				
			||||||
#define MIX_AREAS1_MMX mix_areas1_smp_mmx
 | 
					#define MIX_AREAS1_MMX mix_areas1_smp_mmx
 | 
				
			||||||
#define MIX_AREAS2 mix_areas2_smp
 | 
					#define MIX_AREAS2 mix_areas2_smp
 | 
				
			||||||
 | 
					#define MIX_AREAS3 mix_areas3_smp
 | 
				
			||||||
 | 
					#define MIX_AREAS3_CMOV mix_areas3_smp_cmov
 | 
				
			||||||
#define LOCK_PREFIX "lock ; "
 | 
					#define LOCK_PREFIX "lock ; "
 | 
				
			||||||
#include "pcm_dmix_i386.h"
 | 
					#include "pcm_dmix_i386.h"
 | 
				
			||||||
#undef MIX_AREAS1
 | 
					#undef MIX_AREAS1
 | 
				
			||||||
#undef MIX_AREAS1_MMX
 | 
					#undef MIX_AREAS1_MMX
 | 
				
			||||||
#undef MIX_AREAS2
 | 
					#undef MIX_AREAS2
 | 
				
			||||||
 | 
					#undef MIX_AREAS3
 | 
				
			||||||
 | 
					#undef MIX_AREAS3_CMOV
 | 
				
			||||||
#undef LOCK_PREFIX
 | 
					#undef LOCK_PREFIX
 | 
				
			||||||
 
 | 
					 
 | 
				
			||||||
static unsigned long long dmix_supported_format =
 | 
					static unsigned long long dmix_supported_format =
 | 
				
			||||||
	(1ULL << SND_PCM_FORMAT_S16_LE) | (1ULL << SND_PCM_FORMAT_S32_LE);
 | 
						(1ULL << SND_PCM_FORMAT_S16_LE) |
 | 
				
			||||||
 | 
						(1ULL << SND_PCM_FORMAT_S32_LE) |
 | 
				
			||||||
 | 
						(1ULL << SND_PCM_FORMAT_S24_3LE);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void mix_select_callbacks(snd_pcm_direct_t *dmix)
 | 
					static void mix_select_callbacks(snd_pcm_direct_t *dmix)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	FILE *in;
 | 
						FILE *in;
 | 
				
			||||||
	char line[255];
 | 
						char line[255];
 | 
				
			||||||
	int smp = 0, mmx = 0;
 | 
						int smp = 0, mmx = 0, cmov = 0;
 | 
				
			||||||
	
 | 
						
 | 
				
			||||||
	/* try to determine, if we have a MMX capable CPU */
 | 
						/* try to determine the capabilities of the CPU */
 | 
				
			||||||
	in = fopen("/proc/cpuinfo", "r");
 | 
						in = fopen("/proc/cpuinfo", "r");
 | 
				
			||||||
	if (in) {
 | 
						if (in) {
 | 
				
			||||||
		while (!feof(in)) {
 | 
							while (!feof(in)) {
 | 
				
			||||||
| 
						 | 
					@ -41,15 +51,21 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix)
 | 
				
			||||||
			else if (!strncmp(line, "flags", 5)) {
 | 
								else if (!strncmp(line, "flags", 5)) {
 | 
				
			||||||
				if (strstr(line, " mmx"))
 | 
									if (strstr(line, " mmx"))
 | 
				
			||||||
					mmx = 1;
 | 
										mmx = 1;
 | 
				
			||||||
 | 
									if (strstr(line, " cmov"))
 | 
				
			||||||
 | 
										cmov = 1;
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		fclose(in);
 | 
							fclose(in);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	// printf("MMX: %i, SMP: %i\n", mmx, smp);
 | 
					 | 
				
			||||||
	if (mmx) {
 | 
						if (mmx) {
 | 
				
			||||||
		dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp_mmx : mix_areas1_mmx;
 | 
							dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp_mmx : mix_areas1_mmx;
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp : mix_areas1;
 | 
							dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp : mix_areas1;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	dmix->u.dmix.mix_areas2 = smp > 1 ? mix_areas2_smp : mix_areas2;
 | 
						dmix->u.dmix.mix_areas2 = smp > 1 ? mix_areas2_smp : mix_areas2;
 | 
				
			||||||
 | 
						if (cmov) {
 | 
				
			||||||
 | 
							dmix->u.dmix.mix_areas3 = smp > 1 ? mix_areas3_smp_cmov : mix_areas3_cmov;
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							dmix->u.dmix.mix_areas3 = smp > 1 ? mix_areas3_smp: mix_areas3;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -352,3 +352,208 @@ static void MIX_AREAS2(unsigned int size,
 | 
				
			||||||
		: "esi", "edi", "edx", "ecx", "eax"
 | 
							: "esi", "edi", "edx", "ecx", "eax"
 | 
				
			||||||
	);
 | 
						);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * 24-bit version for plain i386
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static void MIX_AREAS3(unsigned int size,
 | 
				
			||||||
 | 
							       volatile unsigned char *dst, unsigned char *src,
 | 
				
			||||||
 | 
							       volatile signed int *sum, size_t dst_step,
 | 
				
			||||||
 | 
							       size_t src_step, size_t sum_step)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned int old_ebx;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 *  ESI - src
 | 
				
			||||||
 | 
						 *  EDI - dst
 | 
				
			||||||
 | 
						 *  EBX - sum
 | 
				
			||||||
 | 
						 *  ECX - old sample
 | 
				
			||||||
 | 
						 *  EAX - sample / temporary
 | 
				
			||||||
 | 
						 *  EDX - temporary
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						__asm__ __volatile__ (
 | 
				
			||||||
 | 
							"\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 *  initialization, load ESI, EDI, EBX registers
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							"\tmovl %1, %%edi\n"
 | 
				
			||||||
 | 
							"\tmovl %2, %%esi\n"
 | 
				
			||||||
 | 
							"\tmovl %3, %%ebx\n"
 | 
				
			||||||
 | 
							"\tcmpl $0, %0\n"
 | 
				
			||||||
 | 
							"\tjnz 1f\n"
 | 
				
			||||||
 | 
							"\tjmp 6f\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							"\t.p2align 4,,15\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							"1:"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 *   sample = *src;
 | 
				
			||||||
 | 
							 *   sum_sample = *sum;
 | 
				
			||||||
 | 
							 *   if (test_and_set_bit(0, dst) == 0)
 | 
				
			||||||
 | 
							 *     sample -= sum_sample;
 | 
				
			||||||
 | 
							 *   *sum += sample;
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							"\tmovsbl 2(%%esi), %%eax\n"
 | 
				
			||||||
 | 
							"\tmovzwl (%%esi), %%ecx\n"
 | 
				
			||||||
 | 
							"\tmovl (%%ebx), %%edx\n"
 | 
				
			||||||
 | 
							"\tsall $16, %%eax\n"
 | 
				
			||||||
 | 
							"\t" LOCK_PREFIX "btsl $0, (%%edi)\n"
 | 
				
			||||||
 | 
							"\tleal (%%ecx,%%eax,1), %%ecx\n"
 | 
				
			||||||
 | 
							"\tjc 2f\n"
 | 
				
			||||||
 | 
							"\tsubl %%edx, %%ecx\n"
 | 
				
			||||||
 | 
							"2:"
 | 
				
			||||||
 | 
							"\t" LOCK_PREFIX "addl %%ecx, (%%ebx)\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 *   do {
 | 
				
			||||||
 | 
							 *     sample = old_sample = *sum;
 | 
				
			||||||
 | 
							 *     saturate(sample);
 | 
				
			||||||
 | 
							 *     *dst = sample | 1;
 | 
				
			||||||
 | 
							 *   } while (old_sample != *sum);
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							"3:"
 | 
				
			||||||
 | 
							"\tmovl (%%ebx), %%ecx\n"
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 *  if (sample > 0x7fffff)
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							"\tmovl $0x7fffff, %%eax\n"
 | 
				
			||||||
 | 
							"\tcmpl %%eax, %%ecx\n"
 | 
				
			||||||
 | 
							"\tjg 4f\n"
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 *  if (sample < -0x7fffff)
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							"\tmovl $-0x7fffff, %%eax\n"
 | 
				
			||||||
 | 
							"\tcmpl %%eax, %%ecx\n"
 | 
				
			||||||
 | 
							"\tjl 4f\n"
 | 
				
			||||||
 | 
							"\tmovl %%ecx, %%eax\n"
 | 
				
			||||||
 | 
							"\torl $1, %%eax\n"
 | 
				
			||||||
 | 
							"4:"
 | 
				
			||||||
 | 
							"\tmovw %%ax, (%%edi)\n"
 | 
				
			||||||
 | 
							"\tshrl $16, %%eax\n"
 | 
				
			||||||
 | 
							"\tmovb %%al, 2(%%edi)\n"
 | 
				
			||||||
 | 
							"\tcmpl %%ecx, (%%ebx)\n"
 | 
				
			||||||
 | 
							"\tjnz 3b\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * while (size-- > 0)
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							"\tdecl %0\n"
 | 
				
			||||||
 | 
							"\tjz 6f\n"
 | 
				
			||||||
 | 
							"\tadd %4, %%edi\n"
 | 
				
			||||||
 | 
							"\tadd %5, %%esi\n"
 | 
				
			||||||
 | 
							"\tadd %6, %%ebx\n"
 | 
				
			||||||
 | 
							"\tjmp 1b\n"
 | 
				
			||||||
 | 
							
 | 
				
			||||||
 | 
							"6:"
 | 
				
			||||||
 | 
							"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							: /* no output regs */
 | 
				
			||||||
 | 
							: "m" (size), "m" (dst), "m" (src),
 | 
				
			||||||
 | 
							  "m" (sum), "m" (dst_step), "m" (src_step),
 | 
				
			||||||
 | 
							  "m" (sum_step), "m" (old_ebx)
 | 
				
			||||||
 | 
							: "esi", "edi", "edx", "ecx", "eax"
 | 
				
			||||||
 | 
						);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * 24-bit version for Pentium Pro/II
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static void MIX_AREAS3_CMOV(unsigned int size,
 | 
				
			||||||
 | 
								    volatile unsigned char *dst, unsigned char *src,
 | 
				
			||||||
 | 
								    volatile signed int *sum, size_t dst_step,
 | 
				
			||||||
 | 
								    size_t src_step, size_t sum_step)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned int old_ebx;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 *  ESI - src
 | 
				
			||||||
 | 
						 *  EDI - dst
 | 
				
			||||||
 | 
						 *  EBX - sum
 | 
				
			||||||
 | 
						 *  ECX - old sample
 | 
				
			||||||
 | 
						 *  EAX - sample / temporary
 | 
				
			||||||
 | 
						 *  EDX - temporary
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						__asm__ __volatile__ (
 | 
				
			||||||
 | 
							"\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 *  initialization, load ESI, EDI, EBX registers
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							"\tmovl %1, %%edi\n"
 | 
				
			||||||
 | 
							"\tmovl %2, %%esi\n"
 | 
				
			||||||
 | 
							"\tmovl %3, %%ebx\n"
 | 
				
			||||||
 | 
							"\tcmpl $0, %0\n"
 | 
				
			||||||
 | 
							"\tjz 6f\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							"\t.p2align 4,,15\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							"1:"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 *   sample = *src;
 | 
				
			||||||
 | 
							 *   sum_sample = *sum;
 | 
				
			||||||
 | 
							 *   if (test_and_set_bit(0, dst) == 0)
 | 
				
			||||||
 | 
							 *     sample -= sum_sample;
 | 
				
			||||||
 | 
							 *   *sum += sample;
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							"\tmovsbl 2(%%esi), %%eax\n"
 | 
				
			||||||
 | 
							"\tmovzwl (%%esi), %%ecx\n"
 | 
				
			||||||
 | 
							"\tmovl (%%ebx), %%edx\n"
 | 
				
			||||||
 | 
							"\tsall $16, %%eax\n"
 | 
				
			||||||
 | 
							"\t" LOCK_PREFIX "btsl $0, (%%edi)\n"
 | 
				
			||||||
 | 
							"\tleal (%%ecx,%%eax,1), %%ecx\n"
 | 
				
			||||||
 | 
							"\tjc 2f\n"
 | 
				
			||||||
 | 
							"\tsubl %%edx, %%ecx\n"
 | 
				
			||||||
 | 
							"2:"
 | 
				
			||||||
 | 
							"\t" LOCK_PREFIX "addl %%ecx, (%%ebx)\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 *   do {
 | 
				
			||||||
 | 
							 *     sample = old_sample = *sum;
 | 
				
			||||||
 | 
							 *     saturate(sample);
 | 
				
			||||||
 | 
							 *     *dst = sample | 1;
 | 
				
			||||||
 | 
							 *   } while (old_sample != *sum);
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							"3:"
 | 
				
			||||||
 | 
							"\tmovl (%%ebx), %%ecx\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							"\tmovl $0x7fffff, %%eax\n"
 | 
				
			||||||
 | 
							"\tmovl $-0x7fffff, %%edx\n"
 | 
				
			||||||
 | 
							"\tcmpl %%eax, %%ecx\n"
 | 
				
			||||||
 | 
							"\tcmovng %%ecx, %%eax\n"
 | 
				
			||||||
 | 
							"\tcmpl %%edx, %%ecx\n"
 | 
				
			||||||
 | 
							"\tcmovl %%edx, %%eax\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							"\torl $1, %%eax\n"
 | 
				
			||||||
 | 
							"\tmovw %%ax, (%%edi)\n"
 | 
				
			||||||
 | 
							"\tshrl $16, %%eax\n"
 | 
				
			||||||
 | 
							"\tmovb %%al, 2(%%edi)\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							"\tcmpl %%ecx, (%%ebx)\n"
 | 
				
			||||||
 | 
							"\tjnz 3b\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * while (size-- > 0)
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							"\tadd %4, %%edi\n"
 | 
				
			||||||
 | 
							"\tadd %5, %%esi\n"
 | 
				
			||||||
 | 
							"\tadd %6, %%ebx\n"
 | 
				
			||||||
 | 
							"\tdecl %0\n"
 | 
				
			||||||
 | 
							"\tjnz 1b\n"
 | 
				
			||||||
 | 
							
 | 
				
			||||||
 | 
							"6:"
 | 
				
			||||||
 | 
							"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							: /* no output regs */
 | 
				
			||||||
 | 
							: "m" (size), "m" (dst), "m" (src),
 | 
				
			||||||
 | 
							  "m" (sum), "m" (dst_step), "m" (src_step),
 | 
				
			||||||
 | 
							  "m" (sum_step), "m" (old_ebx)
 | 
				
			||||||
 | 
							: "esi", "edi", "edx", "ecx", "eax"
 | 
				
			||||||
 | 
						);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4,22 +4,28 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define MIX_AREAS1 mix_areas1
 | 
					#define MIX_AREAS1 mix_areas1
 | 
				
			||||||
#define MIX_AREAS2 mix_areas2
 | 
					#define MIX_AREAS2 mix_areas2
 | 
				
			||||||
 | 
					#define MIX_AREAS3 mix_areas3
 | 
				
			||||||
#define LOCK_PREFIX ""
 | 
					#define LOCK_PREFIX ""
 | 
				
			||||||
#include "pcm_dmix_x86_64.h"
 | 
					#include "pcm_dmix_x86_64.h"
 | 
				
			||||||
#undef MIX_AREAS1
 | 
					#undef MIX_AREAS1
 | 
				
			||||||
#undef MIX_AREAS2
 | 
					#undef MIX_AREAS2
 | 
				
			||||||
 | 
					#undef MIX_AREAS3
 | 
				
			||||||
#undef LOCK_PREFIX
 | 
					#undef LOCK_PREFIX
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define MIX_AREAS1 mix_areas1_smp
 | 
					#define MIX_AREAS1 mix_areas1_smp
 | 
				
			||||||
#define MIX_AREAS2 mix_areas2_smp
 | 
					#define MIX_AREAS2 mix_areas2_smp
 | 
				
			||||||
 | 
					#define MIX_AREAS3 mix_areas3_smp
 | 
				
			||||||
#define LOCK_PREFIX "lock ; "
 | 
					#define LOCK_PREFIX "lock ; "
 | 
				
			||||||
#include "pcm_dmix_x86_64.h"
 | 
					#include "pcm_dmix_x86_64.h"
 | 
				
			||||||
#undef MIX_AREAS1
 | 
					#undef MIX_AREAS1
 | 
				
			||||||
#undef MIX_AREAS2
 | 
					#undef MIX_AREAS2
 | 
				
			||||||
 | 
					#undef MIX_AREAS3
 | 
				
			||||||
#undef LOCK_PREFIX
 | 
					#undef LOCK_PREFIX
 | 
				
			||||||
 
 | 
					 
 | 
				
			||||||
static unsigned long long dmix_supported_format =
 | 
					static unsigned long long dmix_supported_format =
 | 
				
			||||||
	(1ULL << SND_PCM_FORMAT_S16_LE) | (1ULL << SND_PCM_FORMAT_S32_LE);
 | 
						(1ULL << SND_PCM_FORMAT_S16_LE) |
 | 
				
			||||||
 | 
						(1ULL << SND_PCM_FORMAT_S32_LE) |
 | 
				
			||||||
 | 
						(1ULL << SND_PCM_FORMAT_S24_3LE);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void mix_select_callbacks(snd_pcm_direct_t *dmix)
 | 
					static void mix_select_callbacks(snd_pcm_direct_t *dmix)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -40,4 +46,5 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix)
 | 
				
			||||||
	// printf("SMP: %i\n", smp);
 | 
						// printf("SMP: %i\n", smp);
 | 
				
			||||||
	dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp : mix_areas1;
 | 
						dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp : mix_areas1;
 | 
				
			||||||
	dmix->u.dmix.mix_areas2 = smp > 1 ? mix_areas2_smp : mix_areas2;
 | 
						dmix->u.dmix.mix_areas2 = smp > 1 ? mix_areas2_smp : mix_areas2;
 | 
				
			||||||
 | 
						dmix->u.dmix.mix_areas3 = smp > 1 ? mix_areas3_smp : mix_areas3;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -237,3 +237,105 @@ static void MIX_AREAS2(unsigned int size,
 | 
				
			||||||
	);
 | 
						);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 *  24-bit version
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static void MIX_AREAS3(unsigned int size,
 | 
				
			||||||
 | 
							       volatile unsigned char *dst, unsigned char *src,
 | 
				
			||||||
 | 
							       volatile signed int *sum, size_t dst_step,
 | 
				
			||||||
 | 
							       size_t src_step, size_t sum_step)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned long long old_rbx;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 *  RSI - src
 | 
				
			||||||
 | 
						 *  RDI - dst
 | 
				
			||||||
 | 
						 *  RBX - sum
 | 
				
			||||||
 | 
						 *  ECX - old sample
 | 
				
			||||||
 | 
						 *  EAX - sample / temporary
 | 
				
			||||||
 | 
						 *  EDX - temporary
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						__asm__ __volatile__ (
 | 
				
			||||||
 | 
							"\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							"\tmovq %%rbx, %7\n"
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 *  initialization, load ESI, EDI, EBX registers
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							"\tmovq %1, %%rdi\n"
 | 
				
			||||||
 | 
							"\tmovq %2, %%rsi\n"
 | 
				
			||||||
 | 
							"\tmovq %3, %%rbx\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * while (size-- > 0) {
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							"\tcmpl $0, %0\n"
 | 
				
			||||||
 | 
							"jz 6f\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							"\t.p2align 4,,15\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							"1:"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 *   sample = *src;
 | 
				
			||||||
 | 
							 *   sum_sample = *sum;
 | 
				
			||||||
 | 
							 *   if (test_and_set_bit(0, dst) == 0)
 | 
				
			||||||
 | 
							 *     sample -= sum_sample;
 | 
				
			||||||
 | 
							 *   *sum += sample;
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							"\tmovsbl 2(%%rsi), %%eax\n"
 | 
				
			||||||
 | 
							"\tmovswl (%%rsi), %%ecx\n"
 | 
				
			||||||
 | 
							"\tmovl (%%rbx), %%edx\n"
 | 
				
			||||||
 | 
							"\tsall $16, %%eax\n"
 | 
				
			||||||
 | 
							"\t" LOCK_PREFIX "btsl $0, (%%rdi)\n"
 | 
				
			||||||
 | 
							"\tleal (%%ecx,%%eax,1), %%ecx\n"
 | 
				
			||||||
 | 
							"\tjc 2f\n"
 | 
				
			||||||
 | 
							"\tsubl %%edx, %%ecx\n"
 | 
				
			||||||
 | 
							"2:"
 | 
				
			||||||
 | 
							"\t" LOCK_PREFIX "addl %%ecx, (%%rbx)\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 *   do {
 | 
				
			||||||
 | 
							 *     sample = old_sample = *sum;
 | 
				
			||||||
 | 
							 *     saturate(sample);
 | 
				
			||||||
 | 
							 *     *dst = sample | 1;
 | 
				
			||||||
 | 
							 *   } while (old_sample != *sum);
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							"3:"
 | 
				
			||||||
 | 
							"\tmovl (%%rbx), %%ecx\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							"\tmovl $0x7fffff, %%eax\n"
 | 
				
			||||||
 | 
							"\tmovl $-0x7fffff, %%edx\n"
 | 
				
			||||||
 | 
							"\tcmpl %%eax, %%ecx\n"
 | 
				
			||||||
 | 
							"\tcmovng %%ecx, %%eax\n"
 | 
				
			||||||
 | 
							"\tcmpl %%edx, %%ecx\n"
 | 
				
			||||||
 | 
							"\tcmovl %%edx, %%eax\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							"\torl $1, %%eax\n"
 | 
				
			||||||
 | 
							"\tmovw %%ax, (%%rdi)\n"
 | 
				
			||||||
 | 
							"\tshrl $16, %%eax\n"
 | 
				
			||||||
 | 
							"\tmovb %%al, 2(%%rdi)\n"
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
							"\tcmpl %%ecx, (%%rbx)\n"
 | 
				
			||||||
 | 
							"\tjnz 3b\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * while (size-- > 0)
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							"\tadd %4, %%rdi\n"
 | 
				
			||||||
 | 
							"\tadd %5, %%rsi\n"
 | 
				
			||||||
 | 
							"\tadd %6, %%rbx\n"
 | 
				
			||||||
 | 
							"\tdecl %0\n"
 | 
				
			||||||
 | 
							"\tjnz 1b\n"
 | 
				
			||||||
 | 
							
 | 
				
			||||||
 | 
							"6:"
 | 
				
			||||||
 | 
							"\tmovq %7, %%rbx\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							: /* no output regs */
 | 
				
			||||||
 | 
							: "m" (size), "m" (dst), "m" (src),
 | 
				
			||||||
 | 
							  "m" (sum), "m" (dst_step), "m" (src_step),
 | 
				
			||||||
 | 
							  "m" (sum_step), "m" (old_rbx)
 | 
				
			||||||
 | 
							: "rsi", "rdi", "edx", "ecx", "eax"
 | 
				
			||||||
 | 
						);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue