mirror of
https://github.com/alsa-project/alsa-lib.git
synced 2025-10-31 22:25:35 -04:00
dmix: add S24_3LE support
Add to the dmix plugin support for the S24_3LE sample format which is used by 24-bit USB devices. The optimized assembler version uses only 23 bits for sample data so that the lowest bit can be used for synchronization because there is no 24-bit cmpxchg instruction.
This commit is contained in:
parent
46f81f994b
commit
45f63a8735
8 changed files with 404 additions and 7 deletions
|
|
@ -800,6 +800,7 @@ int snd_pcm_direct_initialize_slave(snd_pcm_direct_t *dmix, snd_pcm_t *spcm, str
|
|||
case SND_PCM_FORMAT_S32_BE:
|
||||
case SND_PCM_FORMAT_S16_LE:
|
||||
case SND_PCM_FORMAT_S16_BE:
|
||||
case SND_PCM_FORMAT_S24_3LE:
|
||||
break;
|
||||
default:
|
||||
SNDERR("invalid format");
|
||||
|
|
|
|||
|
|
@ -34,6 +34,11 @@ typedef void (mix_areas2_t)(unsigned int size,
|
|||
volatile signed int *sum, size_t dst_step,
|
||||
size_t src_step, size_t sum_step);
|
||||
|
||||
typedef void (mix_areas3_t)(unsigned int size,
|
||||
volatile unsigned char *dst, unsigned char *src,
|
||||
volatile signed int *sum, size_t dst_step,
|
||||
size_t src_step, size_t sum_step);
|
||||
|
||||
struct slave_params {
|
||||
snd_pcm_format_t format;
|
||||
int rate;
|
||||
|
|
@ -120,6 +125,7 @@ struct snd_pcm_direct {
|
|||
signed int *sum_buffer; /* shared sum buffer */
|
||||
mix_areas1_t *mix_areas1;
|
||||
mix_areas2_t *mix_areas2;
|
||||
mix_areas3_t *mix_areas3;
|
||||
} dmix;
|
||||
struct {
|
||||
} dsnoop;
|
||||
|
|
|
|||
|
|
@ -188,7 +188,8 @@ static void mix_areas(snd_pcm_direct_t *dmix,
|
|||
sum = dmix->u.dmix.sum_buffer + channels * dst_ofs + chn;
|
||||
dmix->u.dmix.mix_areas1(size, dst, src, sum, dst_step, src_step, channels * sizeof(signed int));
|
||||
}
|
||||
} else {
|
||||
} else if (dmix->shmptr->s.format == SND_PCM_FORMAT_S32_LE ||
|
||||
dmix->shmptr->s.format == SND_PCM_FORMAT_S32_BE) {
|
||||
signed int *src;
|
||||
volatile signed int *dst;
|
||||
if (dmix->interleaved) {
|
||||
|
|
@ -216,6 +217,32 @@ static void mix_areas(snd_pcm_direct_t *dmix,
|
|||
sum = dmix->u.dmix.sum_buffer + channels * dst_ofs + chn;
|
||||
dmix->u.dmix.mix_areas2(size, dst, src, sum, dst_step, src_step, channels * sizeof(signed int));
|
||||
}
|
||||
} else { /* SND_PCM_FORMAT_S24_3LE */
|
||||
unsigned char *src;
|
||||
volatile unsigned char *dst;
|
||||
if (dmix->interleaved) {
|
||||
/*
|
||||
* process all areas in one loop
|
||||
* it optimizes the memory accesses for this case
|
||||
*/
|
||||
dmix->u.dmix.mix_areas3(size * channels,
|
||||
((char *)dst_areas[0].addr) + 3 * dst_ofs * channels,
|
||||
((char *)src_areas[0].addr) + 3 * src_ofs * channels,
|
||||
dmix->u.dmix.sum_buffer + (dst_ofs * channels),
|
||||
3, 3, sizeof(signed int));
|
||||
return;
|
||||
}
|
||||
for (chn = 0; chn < channels; chn++) {
|
||||
dchn = dmix->bindings ? dmix->bindings[chn] : chn;
|
||||
if (dchn >= dmix->shmptr->s.channels)
|
||||
continue;
|
||||
src_step = src_areas[chn].step / 8;
|
||||
dst_step = dst_areas[dchn].step / 8;
|
||||
src = (unsigned char *)(((char *)src_areas[chn].addr + src_areas[chn].first / 8) + (src_ofs * src_step));
|
||||
dst = (unsigned char *)(((char *)dst_areas[dchn].addr + dst_areas[dchn].first / 8) + (dst_ofs * dst_step));
|
||||
sum = dmix->u.dmix.sum_buffer + channels * dst_ofs + chn;
|
||||
dmix->u.dmix.mix_areas3(size, dst, src, sum, dst_step, src_step, channels * sizeof(signed int));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -121,7 +121,8 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix)
|
|||
/* non-concurrent version, supporting both endians */
|
||||
static unsigned long long dmix_supported_format =
|
||||
(1ULL << SND_PCM_FORMAT_S16_LE) | (1ULL << SND_PCM_FORMAT_S32_LE) |
|
||||
(1ULL << SND_PCM_FORMAT_S16_BE) | (1ULL << SND_PCM_FORMAT_S32_BE);
|
||||
(1ULL << SND_PCM_FORMAT_S16_BE) | (1ULL << SND_PCM_FORMAT_S32_BE) |
|
||||
(1ULL << SND_PCM_FORMAT_S24_3LE);
|
||||
|
||||
#include <byteswap.h>
|
||||
|
||||
|
|
@ -245,6 +246,37 @@ static void mix_areas2_swap(unsigned int size,
|
|||
}
|
||||
}
|
||||
|
||||
/* always little endian */
|
||||
static void mix_areas3(unsigned int size,
|
||||
volatile unsigned char *dst, unsigned char *src,
|
||||
volatile signed int *sum, size_t dst_step,
|
||||
size_t src_step, size_t sum_step)
|
||||
{
|
||||
register signed int sample;
|
||||
|
||||
for (;;) {
|
||||
sample = src[0] | (src[1] << 8) | (((signed char *)src)[2] << 16);
|
||||
if (!(dst[0] | dst[1] | dst[2])) {
|
||||
*sum = sample;
|
||||
} else {
|
||||
sample += *sum;
|
||||
*sum = sample;
|
||||
if (sample > 0x7fffff)
|
||||
sample = 0x7fffff;
|
||||
else if (sample < -0x800000)
|
||||
sample = -0x800000;
|
||||
}
|
||||
dst[0] = sample;
|
||||
dst[1] = sample >> 8;
|
||||
dst[2] = sample >> 16;
|
||||
if (!--size)
|
||||
return;
|
||||
dst += dst_step;
|
||||
src += src_step;
|
||||
sum = (signed int *) ((char *)sum + sum_step);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void mix_select_callbacks(snd_pcm_direct_t *dmix)
|
||||
{
|
||||
|
|
@ -255,6 +287,7 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix)
|
|||
dmix->u.dmix.mix_areas1 = mix_areas1_swap;
|
||||
dmix->u.dmix.mix_areas2 = mix_areas2_swap;
|
||||
}
|
||||
dmix->u.dmix.mix_areas3 = mix_areas3;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -5,33 +5,43 @@
|
|||
#define MIX_AREAS1 mix_areas1
|
||||
#define MIX_AREAS1_MMX mix_areas1_mmx
|
||||
#define MIX_AREAS2 mix_areas2
|
||||
#define MIX_AREAS3 mix_areas3
|
||||
#define MIX_AREAS3_CMOV mix_areas3_cmov
|
||||
#define LOCK_PREFIX ""
|
||||
#include "pcm_dmix_i386.h"
|
||||
#undef MIX_AREAS1
|
||||
#undef MIX_AREAS1_MMX
|
||||
#undef MIX_AREAS2
|
||||
#undef MIX_AREAS3
|
||||
#undef MIX_AREAS3_CMOV
|
||||
#undef LOCK_PREFIX
|
||||
|
||||
#define MIX_AREAS1 mix_areas1_smp
|
||||
#define MIX_AREAS1_MMX mix_areas1_smp_mmx
|
||||
#define MIX_AREAS2 mix_areas2_smp
|
||||
#define MIX_AREAS3 mix_areas3_smp
|
||||
#define MIX_AREAS3_CMOV mix_areas3_smp_cmov
|
||||
#define LOCK_PREFIX "lock ; "
|
||||
#include "pcm_dmix_i386.h"
|
||||
#undef MIX_AREAS1
|
||||
#undef MIX_AREAS1_MMX
|
||||
#undef MIX_AREAS2
|
||||
#undef MIX_AREAS3
|
||||
#undef MIX_AREAS3_CMOV
|
||||
#undef LOCK_PREFIX
|
||||
|
||||
static unsigned long long dmix_supported_format =
|
||||
(1ULL << SND_PCM_FORMAT_S16_LE) | (1ULL << SND_PCM_FORMAT_S32_LE);
|
||||
(1ULL << SND_PCM_FORMAT_S16_LE) |
|
||||
(1ULL << SND_PCM_FORMAT_S32_LE) |
|
||||
(1ULL << SND_PCM_FORMAT_S24_3LE);
|
||||
|
||||
static void mix_select_callbacks(snd_pcm_direct_t *dmix)
|
||||
{
|
||||
FILE *in;
|
||||
char line[255];
|
||||
int smp = 0, mmx = 0;
|
||||
int smp = 0, mmx = 0, cmov = 0;
|
||||
|
||||
/* try to determine, if we have a MMX capable CPU */
|
||||
/* try to determine the capabilities of the CPU */
|
||||
in = fopen("/proc/cpuinfo", "r");
|
||||
if (in) {
|
||||
while (!feof(in)) {
|
||||
|
|
@ -41,15 +51,21 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix)
|
|||
else if (!strncmp(line, "flags", 5)) {
|
||||
if (strstr(line, " mmx"))
|
||||
mmx = 1;
|
||||
if (strstr(line, " cmov"))
|
||||
cmov = 1;
|
||||
}
|
||||
}
|
||||
fclose(in);
|
||||
}
|
||||
// printf("MMX: %i, SMP: %i\n", mmx, smp);
|
||||
if (mmx) {
|
||||
dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp_mmx : mix_areas1_mmx;
|
||||
} else {
|
||||
dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp : mix_areas1;
|
||||
}
|
||||
dmix->u.dmix.mix_areas2 = smp > 1 ? mix_areas2_smp : mix_areas2;
|
||||
if (cmov) {
|
||||
dmix->u.dmix.mix_areas3 = smp > 1 ? mix_areas3_smp_cmov : mix_areas3_cmov;
|
||||
} else {
|
||||
dmix->u.dmix.mix_areas3 = smp > 1 ? mix_areas3_smp: mix_areas3;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -352,3 +352,208 @@ static void MIX_AREAS2(unsigned int size,
|
|||
: "esi", "edi", "edx", "ecx", "eax"
|
||||
);
|
||||
}
|
||||
|
||||
/*
|
||||
* 24-bit version for plain i386
|
||||
*/
|
||||
static void MIX_AREAS3(unsigned int size,
|
||||
volatile unsigned char *dst, unsigned char *src,
|
||||
volatile signed int *sum, size_t dst_step,
|
||||
size_t src_step, size_t sum_step)
|
||||
{
|
||||
unsigned int old_ebx;
|
||||
|
||||
/*
|
||||
* ESI - src
|
||||
* EDI - dst
|
||||
* EBX - sum
|
||||
* ECX - old sample
|
||||
* EAX - sample / temporary
|
||||
* EDX - temporary
|
||||
*/
|
||||
__asm__ __volatile__ (
|
||||
"\n"
|
||||
|
||||
"\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */
|
||||
/*
|
||||
* initialization, load ESI, EDI, EBX registers
|
||||
*/
|
||||
"\tmovl %1, %%edi\n"
|
||||
"\tmovl %2, %%esi\n"
|
||||
"\tmovl %3, %%ebx\n"
|
||||
"\tcmpl $0, %0\n"
|
||||
"\tjnz 1f\n"
|
||||
"\tjmp 6f\n"
|
||||
|
||||
"\t.p2align 4,,15\n"
|
||||
|
||||
"1:"
|
||||
|
||||
/*
|
||||
* sample = *src;
|
||||
* sum_sample = *sum;
|
||||
* if (test_and_set_bit(0, dst) == 0)
|
||||
* sample -= sum_sample;
|
||||
* *sum += sample;
|
||||
*/
|
||||
"\tmovsbl 2(%%esi), %%eax\n"
|
||||
"\tmovzwl (%%esi), %%ecx\n"
|
||||
"\tmovl (%%ebx), %%edx\n"
|
||||
"\tsall $16, %%eax\n"
|
||||
"\t" LOCK_PREFIX "btsl $0, (%%edi)\n"
|
||||
"\tleal (%%ecx,%%eax,1), %%ecx\n"
|
||||
"\tjc 2f\n"
|
||||
"\tsubl %%edx, %%ecx\n"
|
||||
"2:"
|
||||
"\t" LOCK_PREFIX "addl %%ecx, (%%ebx)\n"
|
||||
|
||||
/*
|
||||
* do {
|
||||
* sample = old_sample = *sum;
|
||||
* saturate(sample);
|
||||
* *dst = sample | 1;
|
||||
* } while (old_sample != *sum);
|
||||
*/
|
||||
|
||||
"3:"
|
||||
"\tmovl (%%ebx), %%ecx\n"
|
||||
/*
|
||||
* if (sample > 0x7fffff)
|
||||
*/
|
||||
"\tmovl $0x7fffff, %%eax\n"
|
||||
"\tcmpl %%eax, %%ecx\n"
|
||||
"\tjg 4f\n"
|
||||
/*
|
||||
* if (sample < -0x7fffff)
|
||||
*/
|
||||
"\tmovl $-0x7fffff, %%eax\n"
|
||||
"\tcmpl %%eax, %%ecx\n"
|
||||
"\tjl 4f\n"
|
||||
"\tmovl %%ecx, %%eax\n"
|
||||
"\torl $1, %%eax\n"
|
||||
"4:"
|
||||
"\tmovw %%ax, (%%edi)\n"
|
||||
"\tshrl $16, %%eax\n"
|
||||
"\tmovb %%al, 2(%%edi)\n"
|
||||
"\tcmpl %%ecx, (%%ebx)\n"
|
||||
"\tjnz 3b\n"
|
||||
|
||||
/*
|
||||
* while (size-- > 0)
|
||||
*/
|
||||
"\tdecl %0\n"
|
||||
"\tjz 6f\n"
|
||||
"\tadd %4, %%edi\n"
|
||||
"\tadd %5, %%esi\n"
|
||||
"\tadd %6, %%ebx\n"
|
||||
"\tjmp 1b\n"
|
||||
|
||||
"6:"
|
||||
"\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */
|
||||
|
||||
: /* no output regs */
|
||||
: "m" (size), "m" (dst), "m" (src),
|
||||
"m" (sum), "m" (dst_step), "m" (src_step),
|
||||
"m" (sum_step), "m" (old_ebx)
|
||||
: "esi", "edi", "edx", "ecx", "eax"
|
||||
);
|
||||
}
|
||||
|
||||
/*
|
||||
* 24-bit version for Pentium Pro/II
|
||||
*/
|
||||
static void MIX_AREAS3_CMOV(unsigned int size,
|
||||
volatile unsigned char *dst, unsigned char *src,
|
||||
volatile signed int *sum, size_t dst_step,
|
||||
size_t src_step, size_t sum_step)
|
||||
{
|
||||
unsigned int old_ebx;
|
||||
|
||||
/*
|
||||
* ESI - src
|
||||
* EDI - dst
|
||||
* EBX - sum
|
||||
* ECX - old sample
|
||||
* EAX - sample / temporary
|
||||
* EDX - temporary
|
||||
*/
|
||||
__asm__ __volatile__ (
|
||||
"\n"
|
||||
|
||||
"\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */
|
||||
/*
|
||||
* initialization, load ESI, EDI, EBX registers
|
||||
*/
|
||||
"\tmovl %1, %%edi\n"
|
||||
"\tmovl %2, %%esi\n"
|
||||
"\tmovl %3, %%ebx\n"
|
||||
"\tcmpl $0, %0\n"
|
||||
"\tjz 6f\n"
|
||||
|
||||
"\t.p2align 4,,15\n"
|
||||
|
||||
"1:"
|
||||
|
||||
/*
|
||||
* sample = *src;
|
||||
* sum_sample = *sum;
|
||||
* if (test_and_set_bit(0, dst) == 0)
|
||||
* sample -= sum_sample;
|
||||
* *sum += sample;
|
||||
*/
|
||||
"\tmovsbl 2(%%esi), %%eax\n"
|
||||
"\tmovzwl (%%esi), %%ecx\n"
|
||||
"\tmovl (%%ebx), %%edx\n"
|
||||
"\tsall $16, %%eax\n"
|
||||
"\t" LOCK_PREFIX "btsl $0, (%%edi)\n"
|
||||
"\tleal (%%ecx,%%eax,1), %%ecx\n"
|
||||
"\tjc 2f\n"
|
||||
"\tsubl %%edx, %%ecx\n"
|
||||
"2:"
|
||||
"\t" LOCK_PREFIX "addl %%ecx, (%%ebx)\n"
|
||||
|
||||
/*
|
||||
* do {
|
||||
* sample = old_sample = *sum;
|
||||
* saturate(sample);
|
||||
* *dst = sample | 1;
|
||||
* } while (old_sample != *sum);
|
||||
*/
|
||||
|
||||
"3:"
|
||||
"\tmovl (%%ebx), %%ecx\n"
|
||||
|
||||
"\tmovl $0x7fffff, %%eax\n"
|
||||
"\tmovl $-0x7fffff, %%edx\n"
|
||||
"\tcmpl %%eax, %%ecx\n"
|
||||
"\tcmovng %%ecx, %%eax\n"
|
||||
"\tcmpl %%edx, %%ecx\n"
|
||||
"\tcmovl %%edx, %%eax\n"
|
||||
|
||||
"\torl $1, %%eax\n"
|
||||
"\tmovw %%ax, (%%edi)\n"
|
||||
"\tshrl $16, %%eax\n"
|
||||
"\tmovb %%al, 2(%%edi)\n"
|
||||
|
||||
"\tcmpl %%ecx, (%%ebx)\n"
|
||||
"\tjnz 3b\n"
|
||||
|
||||
/*
|
||||
* while (size-- > 0)
|
||||
*/
|
||||
"\tadd %4, %%edi\n"
|
||||
"\tadd %5, %%esi\n"
|
||||
"\tadd %6, %%ebx\n"
|
||||
"\tdecl %0\n"
|
||||
"\tjnz 1b\n"
|
||||
|
||||
"6:"
|
||||
"\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */
|
||||
|
||||
: /* no output regs */
|
||||
: "m" (size), "m" (dst), "m" (src),
|
||||
"m" (sum), "m" (dst_step), "m" (src_step),
|
||||
"m" (sum_step), "m" (old_ebx)
|
||||
: "esi", "edi", "edx", "ecx", "eax"
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,22 +4,28 @@
|
|||
|
||||
#define MIX_AREAS1 mix_areas1
|
||||
#define MIX_AREAS2 mix_areas2
|
||||
#define MIX_AREAS3 mix_areas3
|
||||
#define LOCK_PREFIX ""
|
||||
#include "pcm_dmix_x86_64.h"
|
||||
#undef MIX_AREAS1
|
||||
#undef MIX_AREAS2
|
||||
#undef MIX_AREAS3
|
||||
#undef LOCK_PREFIX
|
||||
|
||||
#define MIX_AREAS1 mix_areas1_smp
|
||||
#define MIX_AREAS2 mix_areas2_smp
|
||||
#define MIX_AREAS3 mix_areas3_smp
|
||||
#define LOCK_PREFIX "lock ; "
|
||||
#include "pcm_dmix_x86_64.h"
|
||||
#undef MIX_AREAS1
|
||||
#undef MIX_AREAS2
|
||||
#undef MIX_AREAS3
|
||||
#undef LOCK_PREFIX
|
||||
|
||||
static unsigned long long dmix_supported_format =
|
||||
(1ULL << SND_PCM_FORMAT_S16_LE) | (1ULL << SND_PCM_FORMAT_S32_LE);
|
||||
(1ULL << SND_PCM_FORMAT_S16_LE) |
|
||||
(1ULL << SND_PCM_FORMAT_S32_LE) |
|
||||
(1ULL << SND_PCM_FORMAT_S24_3LE);
|
||||
|
||||
static void mix_select_callbacks(snd_pcm_direct_t *dmix)
|
||||
{
|
||||
|
|
@ -40,4 +46,5 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix)
|
|||
// printf("SMP: %i\n", smp);
|
||||
dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp : mix_areas1;
|
||||
dmix->u.dmix.mix_areas2 = smp > 1 ? mix_areas2_smp : mix_areas2;
|
||||
dmix->u.dmix.mix_areas3 = smp > 1 ? mix_areas3_smp : mix_areas3;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -237,3 +237,105 @@ static void MIX_AREAS2(unsigned int size,
|
|||
);
|
||||
}
|
||||
|
||||
/*
|
||||
* 24-bit version
|
||||
*/
|
||||
static void MIX_AREAS3(unsigned int size,
|
||||
volatile unsigned char *dst, unsigned char *src,
|
||||
volatile signed int *sum, size_t dst_step,
|
||||
size_t src_step, size_t sum_step)
|
||||
{
|
||||
unsigned long long old_rbx;
|
||||
|
||||
/*
|
||||
* RSI - src
|
||||
* RDI - dst
|
||||
* RBX - sum
|
||||
* ECX - old sample
|
||||
* EAX - sample / temporary
|
||||
* EDX - temporary
|
||||
*/
|
||||
__asm__ __volatile__ (
|
||||
"\n"
|
||||
|
||||
"\tmovq %%rbx, %7\n"
|
||||
/*
|
||||
* initialization, load ESI, EDI, EBX registers
|
||||
*/
|
||||
"\tmovq %1, %%rdi\n"
|
||||
"\tmovq %2, %%rsi\n"
|
||||
"\tmovq %3, %%rbx\n"
|
||||
|
||||
/*
|
||||
* while (size-- > 0) {
|
||||
*/
|
||||
"\tcmpl $0, %0\n"
|
||||
"jz 6f\n"
|
||||
|
||||
"\t.p2align 4,,15\n"
|
||||
|
||||
"1:"
|
||||
|
||||
/*
|
||||
* sample = *src;
|
||||
* sum_sample = *sum;
|
||||
* if (test_and_set_bit(0, dst) == 0)
|
||||
* sample -= sum_sample;
|
||||
* *sum += sample;
|
||||
*/
|
||||
"\tmovsbl 2(%%rsi), %%eax\n"
|
||||
"\tmovswl (%%rsi), %%ecx\n"
|
||||
"\tmovl (%%rbx), %%edx\n"
|
||||
"\tsall $16, %%eax\n"
|
||||
"\t" LOCK_PREFIX "btsl $0, (%%rdi)\n"
|
||||
"\tleal (%%ecx,%%eax,1), %%ecx\n"
|
||||
"\tjc 2f\n"
|
||||
"\tsubl %%edx, %%ecx\n"
|
||||
"2:"
|
||||
"\t" LOCK_PREFIX "addl %%ecx, (%%rbx)\n"
|
||||
|
||||
/*
|
||||
* do {
|
||||
* sample = old_sample = *sum;
|
||||
* saturate(sample);
|
||||
* *dst = sample | 1;
|
||||
* } while (old_sample != *sum);
|
||||
*/
|
||||
|
||||
"3:"
|
||||
"\tmovl (%%rbx), %%ecx\n"
|
||||
|
||||
"\tmovl $0x7fffff, %%eax\n"
|
||||
"\tmovl $-0x7fffff, %%edx\n"
|
||||
"\tcmpl %%eax, %%ecx\n"
|
||||
"\tcmovng %%ecx, %%eax\n"
|
||||
"\tcmpl %%edx, %%ecx\n"
|
||||
"\tcmovl %%edx, %%eax\n"
|
||||
|
||||
"\torl $1, %%eax\n"
|
||||
"\tmovw %%ax, (%%rdi)\n"
|
||||
"\tshrl $16, %%eax\n"
|
||||
"\tmovb %%al, 2(%%rdi)\n"
|
||||
|
||||
"\tcmpl %%ecx, (%%rbx)\n"
|
||||
"\tjnz 3b\n"
|
||||
|
||||
/*
|
||||
* while (size-- > 0)
|
||||
*/
|
||||
"\tadd %4, %%rdi\n"
|
||||
"\tadd %5, %%rsi\n"
|
||||
"\tadd %6, %%rbx\n"
|
||||
"\tdecl %0\n"
|
||||
"\tjnz 1b\n"
|
||||
|
||||
"6:"
|
||||
"\tmovq %7, %%rbx\n"
|
||||
|
||||
: /* no output regs */
|
||||
: "m" (size), "m" (dst), "m" (src),
|
||||
"m" (sum), "m" (dst_step), "m" (src_step),
|
||||
"m" (sum_step), "m" (old_rbx)
|
||||
: "rsi", "rdi", "edx", "ecx", "eax"
|
||||
);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue