mirror of
https://github.com/alsa-project/alsa-lib.git
synced 2025-11-03 09:01:52 -05:00
Performance optimization
The performance of rate conversion is optimized. - with S16 format, no format conversion is done - linear polation of expansion uses 16bit resolution
This commit is contained in:
parent
c7feb066b1
commit
a920ca39db
1 changed files with 155 additions and 94 deletions
|
|
@ -54,16 +54,8 @@ enum rate_type {
|
|||
RATE_TYPE_POLYPHASE, /* polyphase resampling */
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
union {
|
||||
struct {
|
||||
int init;
|
||||
int16_t old_sample, new_sample;
|
||||
int64_t sum;
|
||||
} linear;
|
||||
} u;
|
||||
} snd_pcm_rate_state_t;
|
||||
|
||||
typedef struct _snd_pcm_rate snd_pcm_rate_t;
|
||||
|
||||
typedef void (*rate_f)(const snd_pcm_channel_area_t *dst_areas,
|
||||
snd_pcm_uframes_t dst_offset,
|
||||
snd_pcm_uframes_t dst_frames,
|
||||
|
|
@ -71,11 +63,9 @@ typedef void (*rate_f)(const snd_pcm_channel_area_t *dst_areas,
|
|||
snd_pcm_uframes_t src_offset,
|
||||
snd_pcm_uframes_t src_frames,
|
||||
unsigned int channels,
|
||||
unsigned int getidx, unsigned int putidx,
|
||||
unsigned int arg,
|
||||
snd_pcm_rate_state_t *states);
|
||||
snd_pcm_rate_t *rate);
|
||||
|
||||
typedef struct {
|
||||
struct _snd_pcm_rate {
|
||||
snd_pcm_t *slave;
|
||||
int close_slave;
|
||||
snd_atomic_write_t watom;
|
||||
|
|
@ -86,72 +76,74 @@ typedef struct {
|
|||
unsigned int get_idx;
|
||||
unsigned int put_idx;
|
||||
unsigned int pitch;
|
||||
unsigned int pitch_shift; /* for expand interpolation */
|
||||
rate_f func;
|
||||
snd_pcm_format_t sformat;
|
||||
unsigned int srate;
|
||||
snd_pcm_rate_state_t *states;
|
||||
snd_pcm_channel_area_t *pareas; /* areas for splitted period (rate pcm) */
|
||||
snd_pcm_channel_area_t *sareas; /* areas for splitted period (slave pcm) */
|
||||
} snd_pcm_rate_t;
|
||||
};
|
||||
|
||||
static void snd_pcm_rate_expand(const snd_pcm_channel_area_t *dst_areas,
|
||||
snd_pcm_uframes_t dst_offset, snd_pcm_uframes_t dst_frames,
|
||||
const snd_pcm_channel_area_t *src_areas,
|
||||
snd_pcm_uframes_t src_offset, snd_pcm_uframes_t src_frames,
|
||||
unsigned int channels,
|
||||
unsigned int getidx, unsigned int putidx,
|
||||
unsigned int get_threshold,
|
||||
snd_pcm_rate_state_t *states)
|
||||
snd_pcm_rate_t *rate)
|
||||
{
|
||||
#define GET16_LABELS
|
||||
#define PUT16_LABELS
|
||||
#include "plugin_ops.h"
|
||||
#undef GET16_LABELS
|
||||
#undef PUT16_LABELS
|
||||
void *get = get16_labels[getidx];
|
||||
void *put = put16_labels[putidx];
|
||||
void *get = get16_labels[rate->get_idx];
|
||||
void *put = put16_labels[rate->put_idx];
|
||||
unsigned int get_threshold = rate->pitch;
|
||||
unsigned int channel;
|
||||
snd_pcm_uframes_t src_frames1 = 0;
|
||||
snd_pcm_uframes_t dst_frames1 = 0;
|
||||
snd_pcm_uframes_t src_frames1;
|
||||
snd_pcm_uframes_t dst_frames1;
|
||||
int16_t sample = 0;
|
||||
|
||||
for (channel = 0; channel < channels; ++channel) {
|
||||
//int xpos = 0;
|
||||
const snd_pcm_channel_area_t *src_area = &src_areas[channel];
|
||||
const snd_pcm_channel_area_t *dst_area = &dst_areas[channel];
|
||||
const char *src;
|
||||
char *dst;
|
||||
int src_step, dst_step;
|
||||
int16_t old_sample = states->u.linear.old_sample;
|
||||
int16_t new_sample = states->u.linear.new_sample;
|
||||
int16_t old_sample = 0;
|
||||
int16_t new_sample = 0;
|
||||
int old_weight, new_weight;
|
||||
unsigned int pos = 0;
|
||||
int init;
|
||||
src = snd_pcm_channel_area_addr(src_area, src_offset);
|
||||
dst = snd_pcm_channel_area_addr(dst_area, dst_offset);
|
||||
src_step = snd_pcm_channel_area_step(src_area);
|
||||
dst_step = snd_pcm_channel_area_step(dst_area);
|
||||
src_frames1 = 0;
|
||||
dst_frames1 = 0;
|
||||
if (!states->u.linear.init) {
|
||||
goto __force_get;
|
||||
} else {
|
||||
states->u.linear.init = 2;
|
||||
}
|
||||
init = 1;
|
||||
while (dst_frames1 < dst_frames) {
|
||||
if (states->u.linear.init == 2) {
|
||||
old_sample = new_sample;
|
||||
__force_get:
|
||||
goto *get;
|
||||
if (pos >= get_threshold) {
|
||||
src += src_step;
|
||||
src_frames1++;
|
||||
if (src_frames1 < src_frames) {
|
||||
old_sample = new_sample;
|
||||
goto *get;
|
||||
#define GET16_END after_get
|
||||
#include "plugin_ops.h"
|
||||
#undef GET16_END
|
||||
after_get:
|
||||
new_sample = sample;
|
||||
if (!states->u.linear.init)
|
||||
old_sample = sample;
|
||||
states->u.linear.init = 1;
|
||||
after_get:
|
||||
new_sample = sample;
|
||||
if (init) {
|
||||
init = 0;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
pos -= get_threshold;
|
||||
}
|
||||
sample = (((int64_t)old_sample * (int64_t)(get_threshold - pos)) + ((int64_t)new_sample * pos)) / get_threshold;
|
||||
//printf("sample[%i] = %i (old_sample = %i, new_sample = %i)\n", xpos++, sample, old_sample, new_sample);
|
||||
new_weight = (pos << (16 - rate->pitch_shift)) / (get_threshold >> rate->pitch_shift);
|
||||
old_weight = 0x10000 - new_weight;
|
||||
sample = (old_sample * old_weight + new_sample * new_weight) >> 16;
|
||||
goto *put;
|
||||
#define PUT16_END after_put
|
||||
#include "plugin_ops.h"
|
||||
|
|
@ -160,24 +152,58 @@ static void snd_pcm_rate_expand(const snd_pcm_channel_area_t *dst_areas,
|
|||
dst += dst_step;
|
||||
dst_frames1++;
|
||||
pos += LINEAR_DIV;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* optimized version for S16 format */
|
||||
static void snd_pcm_rate_expand_s16(const snd_pcm_channel_area_t *dst_areas,
|
||||
snd_pcm_uframes_t dst_offset, snd_pcm_uframes_t dst_frames,
|
||||
const snd_pcm_channel_area_t *src_areas,
|
||||
snd_pcm_uframes_t src_offset, snd_pcm_uframes_t src_frames,
|
||||
unsigned int channels,
|
||||
snd_pcm_rate_t *rate)
|
||||
{
|
||||
unsigned int channel;
|
||||
snd_pcm_uframes_t src_frames1;
|
||||
snd_pcm_uframes_t dst_frames1;
|
||||
unsigned int get_threshold = rate->pitch;
|
||||
|
||||
for (channel = 0; channel < channels; ++channel) {
|
||||
const snd_pcm_channel_area_t *src_area = &src_areas[channel];
|
||||
const snd_pcm_channel_area_t *dst_area = &dst_areas[channel];
|
||||
const int16_t *src;
|
||||
int16_t *dst;
|
||||
int src_step, dst_step;
|
||||
int16_t old_sample;
|
||||
int16_t new_sample;
|
||||
int old_weight, new_weight;
|
||||
unsigned int pos;
|
||||
src = snd_pcm_channel_area_addr(src_area, src_offset);
|
||||
dst = snd_pcm_channel_area_addr(dst_area, dst_offset);
|
||||
src_step = snd_pcm_channel_area_step(src_area) >> 1;
|
||||
dst_step = snd_pcm_channel_area_step(dst_area) >> 1;
|
||||
src_frames1 = 0;
|
||||
dst_frames1 = 0;
|
||||
old_sample = new_sample = *src;
|
||||
pos = get_threshold;
|
||||
while (dst_frames1 < dst_frames) {
|
||||
if (pos >= get_threshold) {
|
||||
pos -= get_threshold;
|
||||
src += src_step;
|
||||
src_frames1++;
|
||||
states->u.linear.init = 2; /* get a new sample */
|
||||
//printf("new_src_pos = %i\n", (src - (char *)snd_pcm_channel_area_addr(src_area, src_offset)) / src_step);
|
||||
if (CHECK_SANITY(src_frames1 > src_frames)) {
|
||||
SNDERR("src_frames overflow");
|
||||
break;
|
||||
if (src_frames1 < src_frames) {
|
||||
old_sample = new_sample;
|
||||
new_sample = *src;
|
||||
}
|
||||
}
|
||||
new_weight = (pos << (16 - rate->pitch_shift)) / (get_threshold >> rate->pitch_shift);
|
||||
old_weight = 0x10000 - new_weight;
|
||||
*dst = (old_sample * old_weight + new_sample * new_weight) >> 16;
|
||||
dst += dst_step;
|
||||
dst_frames1++;
|
||||
pos += LINEAR_DIV;
|
||||
}
|
||||
if (dst_frames != dst_frames1) {
|
||||
SNDERR("dst_frames %lu, dst_frames1 %lu, src_frames %lu, src_frames1 %lu\n", dst_frames, dst_frames1, src_frames, src_frames1);
|
||||
}
|
||||
states->u.linear.old_sample = old_sample;
|
||||
states->u.linear.new_sample = new_sample;
|
||||
states++;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -186,37 +212,32 @@ static void snd_pcm_rate_shrink(const snd_pcm_channel_area_t *dst_areas,
|
|||
const snd_pcm_channel_area_t *src_areas,
|
||||
snd_pcm_uframes_t src_offset, snd_pcm_uframes_t src_frames,
|
||||
unsigned int channels,
|
||||
unsigned int getidx, unsigned int putidx,
|
||||
unsigned int get_increment,
|
||||
snd_pcm_rate_state_t *states)
|
||||
snd_pcm_rate_t *rate)
|
||||
{
|
||||
#define GET16_LABELS
|
||||
#define PUT16_LABELS
|
||||
#include "plugin_ops.h"
|
||||
#undef GET16_LABELS
|
||||
#undef PUT16_LABELS
|
||||
void *get = get16_labels[getidx];
|
||||
void *put = put16_labels[putidx];
|
||||
void *get = get16_labels[rate->get_idx];
|
||||
void *put = put16_labels[rate->put_idx];
|
||||
unsigned int get_increment = rate->pitch;
|
||||
unsigned int channel;
|
||||
snd_pcm_uframes_t src_frames1 = 0;
|
||||
snd_pcm_uframes_t dst_frames1 = 0;
|
||||
snd_pcm_uframes_t src_frames1;
|
||||
snd_pcm_uframes_t dst_frames1;
|
||||
int16_t sample = 0;
|
||||
unsigned int pos = 0;
|
||||
unsigned int pos;
|
||||
|
||||
for (channel = 0; channel < channels; ++channel) {
|
||||
const snd_pcm_channel_area_t *src_area = &src_areas[channel];
|
||||
const snd_pcm_channel_area_t *dst_area = &dst_areas[channel];
|
||||
int64_t sum;
|
||||
const char *src;
|
||||
char *dst;
|
||||
int src_step, dst_step;
|
||||
int16_t old_sample = 0;
|
||||
int16_t new_sample = 0;
|
||||
int old_weight, new_weight;
|
||||
sum = states->u.linear.sum;
|
||||
//int16_t old_sample = states->u.linear.old_sample;
|
||||
pos = LINEAR_DIV - get_increment; /* Force first sample to be copied */
|
||||
states->u.linear.init = 0;
|
||||
src = snd_pcm_channel_area_addr(src_area, src_offset);
|
||||
dst = snd_pcm_channel_area_addr(dst_area, dst_offset);
|
||||
src_step = snd_pcm_channel_area_step(src_area);
|
||||
|
|
@ -253,13 +274,59 @@ static void snd_pcm_rate_shrink(const snd_pcm_channel_area_t *dst_areas,
|
|||
}
|
||||
old_sample = new_sample;
|
||||
}
|
||||
|
||||
states->u.linear.sum = sum;
|
||||
states->u.linear.old_sample = sample;
|
||||
states++;
|
||||
}
|
||||
if (dst_frames != dst_frames1) {
|
||||
SNDERR("dst %lu, dst1 %lu, src %lu, src1 %lu pos = %u\n", dst_frames, dst_frames1, src_frames, src_frames1, pos );
|
||||
}
|
||||
|
||||
/* optimized version for S16 format */
|
||||
static void snd_pcm_rate_shrink_s16(const snd_pcm_channel_area_t *dst_areas,
|
||||
snd_pcm_uframes_t dst_offset, snd_pcm_uframes_t dst_frames,
|
||||
const snd_pcm_channel_area_t *src_areas,
|
||||
snd_pcm_uframes_t src_offset, snd_pcm_uframes_t src_frames,
|
||||
unsigned int channels,
|
||||
snd_pcm_rate_t *rate)
|
||||
{
|
||||
unsigned int get_increment = rate->pitch;
|
||||
unsigned int channel;
|
||||
snd_pcm_uframes_t src_frames1;
|
||||
snd_pcm_uframes_t dst_frames1;
|
||||
unsigned int pos = 0;
|
||||
|
||||
for (channel = 0; channel < channels; ++channel) {
|
||||
const snd_pcm_channel_area_t *src_area = &src_areas[channel];
|
||||
const snd_pcm_channel_area_t *dst_area = &dst_areas[channel];
|
||||
const int16_t *src;
|
||||
int16_t *dst;
|
||||
int src_step, dst_step;
|
||||
int16_t old_sample = 0;
|
||||
int16_t new_sample = 0;
|
||||
int old_weight, new_weight;
|
||||
pos = LINEAR_DIV - get_increment; /* Force first sample to be copied */
|
||||
src = snd_pcm_channel_area_addr(src_area, src_offset);
|
||||
dst = snd_pcm_channel_area_addr(dst_area, dst_offset);
|
||||
src_step = snd_pcm_channel_area_step(src_area) >> 1;
|
||||
dst_step = snd_pcm_channel_area_step(dst_area) >> 1 ;
|
||||
src_frames1 = 0;
|
||||
dst_frames1 = 0;
|
||||
while (src_frames1 < src_frames) {
|
||||
|
||||
new_sample = *src;
|
||||
src += src_step;
|
||||
src_frames1++;
|
||||
pos += get_increment;
|
||||
if (pos >= LINEAR_DIV) {
|
||||
pos -= LINEAR_DIV;
|
||||
old_weight = (pos << (32 - LINEAR_DIV_SHIFT)) / (get_increment >> (LINEAR_DIV_SHIFT - 16));
|
||||
new_weight = 0x10000 - old_weight;
|
||||
*dst = (old_sample * old_weight + new_sample * new_weight) >> 16;
|
||||
dst += dst_step;
|
||||
dst_frames1++;
|
||||
if (CHECK_SANITY(dst_frames1 > dst_frames)) {
|
||||
SNDERR("dst_frames overflow");
|
||||
break;
|
||||
}
|
||||
}
|
||||
old_sample = new_sample;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -500,20 +567,23 @@ static int snd_pcm_rate_hw_params(snd_pcm_t *pcm, snd_pcm_hw_params_t * params)
|
|||
rate->get_idx = snd_pcm_linear_get_index(src_format, SND_PCM_FORMAT_S16);
|
||||
rate->put_idx = snd_pcm_linear_put_index(SND_PCM_FORMAT_S16, dst_format);
|
||||
if (src_rate < dst_rate) {
|
||||
rate->func = snd_pcm_rate_expand;
|
||||
if (src_format == dst_format && src_format == SND_PCM_FORMAT_S16)
|
||||
rate->func = snd_pcm_rate_expand_s16;
|
||||
else
|
||||
rate->func = snd_pcm_rate_expand;
|
||||
/* pitch is get_threshold */
|
||||
} else {
|
||||
rate->func = snd_pcm_rate_shrink;
|
||||
if (src_format == dst_format && src_format == SND_PCM_FORMAT_S16)
|
||||
rate->func = snd_pcm_rate_shrink_s16;
|
||||
else
|
||||
rate->func = snd_pcm_rate_shrink;
|
||||
/* pitch is get_increment */
|
||||
}
|
||||
rate->pitch = (((u_int64_t)dst_rate * LINEAR_DIV) + (src_rate / 2)) / src_rate;
|
||||
if (CHECK_SANITY(rate->states || rate->pareas)) {
|
||||
if (CHECK_SANITY(rate->pareas)) {
|
||||
SNDMSG("rate plugin already in use");
|
||||
return -EBUSY;
|
||||
}
|
||||
rate->states = malloc(channels * sizeof(*rate->states));
|
||||
if (rate->states == NULL)
|
||||
return -ENOMEM;
|
||||
if ((buffer_size / period_size) * period_size == buffer_size &&
|
||||
(slave->buffer_size / slave->period_size) * slave->period_size == slave->buffer_size)
|
||||
return 0;
|
||||
|
|
@ -550,10 +620,6 @@ static int snd_pcm_rate_hw_free(snd_pcm_t *pcm)
|
|||
rate->pareas = NULL;
|
||||
rate->sareas = NULL;
|
||||
}
|
||||
if (rate->states) {
|
||||
free(rate->states);
|
||||
rate->states = NULL;
|
||||
}
|
||||
return snd_pcm_hw_free(rate->slave);
|
||||
}
|
||||
|
||||
|
|
@ -657,6 +723,12 @@ static int snd_pcm_rate_sw_params(snd_pcm_t *pcm, snd_pcm_sw_params_t * params)
|
|||
return -EIO;
|
||||
}
|
||||
}
|
||||
if (rate->pitch >= LINEAR_DIV) {
|
||||
/* shift for expand linear interpolation */
|
||||
rate->pitch_shift = 0;
|
||||
while ((rate->pitch >> rate->pitch_shift) >= (1 << 16))
|
||||
rate->pitch_shift++;
|
||||
}
|
||||
recalc(pcm, &sparams->avail_min);
|
||||
rate->orig_avail_min = sparams->avail_min;
|
||||
recalc(pcm, &sparams->xfer_align);
|
||||
|
|
@ -682,15 +754,8 @@ static int snd_pcm_rate_sw_params(snd_pcm_t *pcm, snd_pcm_sw_params_t * params)
|
|||
static int snd_pcm_rate_init(snd_pcm_t *pcm)
|
||||
{
|
||||
snd_pcm_rate_t *rate = pcm->private_data;
|
||||
unsigned int k;
|
||||
switch (rate->type) {
|
||||
case RATE_TYPE_LINEAR:
|
||||
for (k = 0; k < pcm->channels; ++k) {
|
||||
rate->states[k].u.linear.sum = 0;
|
||||
rate->states[k].u.linear.old_sample = 0;
|
||||
rate->states[k].u.linear.new_sample = 0;
|
||||
rate->states[k].u.linear.init = 0;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
|
|
@ -708,9 +773,7 @@ snd_pcm_rate_write_areas1(snd_pcm_t *pcm,
|
|||
snd_pcm_rate_t *rate = pcm->private_data;
|
||||
rate->func(slave_areas, slave_offset, rate->slave->period_size,
|
||||
areas, offset, pcm->period_size,
|
||||
pcm->channels,
|
||||
rate->get_idx, rate->put_idx,
|
||||
rate->pitch, rate->states);
|
||||
pcm->channels, rate);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -724,9 +787,7 @@ snd_pcm_rate_read_areas1(snd_pcm_t *pcm,
|
|||
snd_pcm_rate_t *rate = pcm->private_data;
|
||||
rate->func(areas, offset, pcm->period_size,
|
||||
slave_areas, slave_offset, rate->slave->period_size,
|
||||
pcm->channels,
|
||||
rate->get_idx, rate->put_idx,
|
||||
rate->pitch, rate->states);
|
||||
pcm->channels, rate);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue