Performance optimization

The performance of rate conversion is optimized.
- with S16 format, no format conversion is done
- linear polation of expansion uses 16bit resolution
This commit is contained in:
Takashi Iwai 2005-01-04 13:57:51 +00:00
parent c7feb066b1
commit a920ca39db

View file

@ -54,15 +54,7 @@ enum rate_type {
RATE_TYPE_POLYPHASE, /* polyphase resampling */ RATE_TYPE_POLYPHASE, /* polyphase resampling */
}; };
typedef struct { typedef struct _snd_pcm_rate snd_pcm_rate_t;
union {
struct {
int init;
int16_t old_sample, new_sample;
int64_t sum;
} linear;
} u;
} snd_pcm_rate_state_t;
typedef void (*rate_f)(const snd_pcm_channel_area_t *dst_areas, typedef void (*rate_f)(const snd_pcm_channel_area_t *dst_areas,
snd_pcm_uframes_t dst_offset, snd_pcm_uframes_t dst_offset,
@ -71,11 +63,9 @@ typedef void (*rate_f)(const snd_pcm_channel_area_t *dst_areas,
snd_pcm_uframes_t src_offset, snd_pcm_uframes_t src_offset,
snd_pcm_uframes_t src_frames, snd_pcm_uframes_t src_frames,
unsigned int channels, unsigned int channels,
unsigned int getidx, unsigned int putidx, snd_pcm_rate_t *rate);
unsigned int arg,
snd_pcm_rate_state_t *states);
typedef struct { struct _snd_pcm_rate {
snd_pcm_t *slave; snd_pcm_t *slave;
int close_slave; int close_slave;
snd_atomic_write_t watom; snd_atomic_write_t watom;
@ -86,72 +76,74 @@ typedef struct {
unsigned int get_idx; unsigned int get_idx;
unsigned int put_idx; unsigned int put_idx;
unsigned int pitch; unsigned int pitch;
unsigned int pitch_shift; /* for expand interpolation */
rate_f func; rate_f func;
snd_pcm_format_t sformat; snd_pcm_format_t sformat;
unsigned int srate; unsigned int srate;
snd_pcm_rate_state_t *states;
snd_pcm_channel_area_t *pareas; /* areas for splitted period (rate pcm) */ snd_pcm_channel_area_t *pareas; /* areas for splitted period (rate pcm) */
snd_pcm_channel_area_t *sareas; /* areas for splitted period (slave pcm) */ snd_pcm_channel_area_t *sareas; /* areas for splitted period (slave pcm) */
} snd_pcm_rate_t; };
static void snd_pcm_rate_expand(const snd_pcm_channel_area_t *dst_areas, static void snd_pcm_rate_expand(const snd_pcm_channel_area_t *dst_areas,
snd_pcm_uframes_t dst_offset, snd_pcm_uframes_t dst_frames, snd_pcm_uframes_t dst_offset, snd_pcm_uframes_t dst_frames,
const snd_pcm_channel_area_t *src_areas, const snd_pcm_channel_area_t *src_areas,
snd_pcm_uframes_t src_offset, snd_pcm_uframes_t src_frames, snd_pcm_uframes_t src_offset, snd_pcm_uframes_t src_frames,
unsigned int channels, unsigned int channels,
unsigned int getidx, unsigned int putidx, snd_pcm_rate_t *rate)
unsigned int get_threshold,
snd_pcm_rate_state_t *states)
{ {
#define GET16_LABELS #define GET16_LABELS
#define PUT16_LABELS #define PUT16_LABELS
#include "plugin_ops.h" #include "plugin_ops.h"
#undef GET16_LABELS #undef GET16_LABELS
#undef PUT16_LABELS #undef PUT16_LABELS
void *get = get16_labels[getidx]; void *get = get16_labels[rate->get_idx];
void *put = put16_labels[putidx]; void *put = put16_labels[rate->put_idx];
unsigned int get_threshold = rate->pitch;
unsigned int channel; unsigned int channel;
snd_pcm_uframes_t src_frames1 = 0; snd_pcm_uframes_t src_frames1;
snd_pcm_uframes_t dst_frames1 = 0; snd_pcm_uframes_t dst_frames1;
int16_t sample = 0; int16_t sample = 0;
for (channel = 0; channel < channels; ++channel) { for (channel = 0; channel < channels; ++channel) {
//int xpos = 0;
const snd_pcm_channel_area_t *src_area = &src_areas[channel]; const snd_pcm_channel_area_t *src_area = &src_areas[channel];
const snd_pcm_channel_area_t *dst_area = &dst_areas[channel]; const snd_pcm_channel_area_t *dst_area = &dst_areas[channel];
const char *src; const char *src;
char *dst; char *dst;
int src_step, dst_step; int src_step, dst_step;
int16_t old_sample = states->u.linear.old_sample; int16_t old_sample = 0;
int16_t new_sample = states->u.linear.new_sample; int16_t new_sample = 0;
int old_weight, new_weight;
unsigned int pos = 0; unsigned int pos = 0;
int init;
src = snd_pcm_channel_area_addr(src_area, src_offset); src = snd_pcm_channel_area_addr(src_area, src_offset);
dst = snd_pcm_channel_area_addr(dst_area, dst_offset); dst = snd_pcm_channel_area_addr(dst_area, dst_offset);
src_step = snd_pcm_channel_area_step(src_area); src_step = snd_pcm_channel_area_step(src_area);
dst_step = snd_pcm_channel_area_step(dst_area); dst_step = snd_pcm_channel_area_step(dst_area);
src_frames1 = 0; src_frames1 = 0;
dst_frames1 = 0; dst_frames1 = 0;
if (!states->u.linear.init) { init = 1;
goto __force_get;
} else {
states->u.linear.init = 2;
}
while (dst_frames1 < dst_frames) { while (dst_frames1 < dst_frames) {
if (states->u.linear.init == 2) { if (pos >= get_threshold) {
old_sample = new_sample; src += src_step;
__force_get: src_frames1++;
goto *get; if (src_frames1 < src_frames) {
old_sample = new_sample;
goto *get;
#define GET16_END after_get #define GET16_END after_get
#include "plugin_ops.h" #include "plugin_ops.h"
#undef GET16_END #undef GET16_END
after_get: after_get:
new_sample = sample; new_sample = sample;
if (!states->u.linear.init) if (init) {
old_sample = sample; init = 0;
states->u.linear.init = 1; continue;
}
}
pos -= get_threshold;
} }
sample = (((int64_t)old_sample * (int64_t)(get_threshold - pos)) + ((int64_t)new_sample * pos)) / get_threshold; new_weight = (pos << (16 - rate->pitch_shift)) / (get_threshold >> rate->pitch_shift);
//printf("sample[%i] = %i (old_sample = %i, new_sample = %i)\n", xpos++, sample, old_sample, new_sample); old_weight = 0x10000 - new_weight;
sample = (old_sample * old_weight + new_sample * new_weight) >> 16;
goto *put; goto *put;
#define PUT16_END after_put #define PUT16_END after_put
#include "plugin_ops.h" #include "plugin_ops.h"
@ -160,24 +152,58 @@ static void snd_pcm_rate_expand(const snd_pcm_channel_area_t *dst_areas,
dst += dst_step; dst += dst_step;
dst_frames1++; dst_frames1++;
pos += LINEAR_DIV; pos += LINEAR_DIV;
}
}
}
/* optimized version for S16 format */
static void snd_pcm_rate_expand_s16(const snd_pcm_channel_area_t *dst_areas,
snd_pcm_uframes_t dst_offset, snd_pcm_uframes_t dst_frames,
const snd_pcm_channel_area_t *src_areas,
snd_pcm_uframes_t src_offset, snd_pcm_uframes_t src_frames,
unsigned int channels,
snd_pcm_rate_t *rate)
{
unsigned int channel;
snd_pcm_uframes_t src_frames1;
snd_pcm_uframes_t dst_frames1;
unsigned int get_threshold = rate->pitch;
for (channel = 0; channel < channels; ++channel) {
const snd_pcm_channel_area_t *src_area = &src_areas[channel];
const snd_pcm_channel_area_t *dst_area = &dst_areas[channel];
const int16_t *src;
int16_t *dst;
int src_step, dst_step;
int16_t old_sample;
int16_t new_sample;
int old_weight, new_weight;
unsigned int pos;
src = snd_pcm_channel_area_addr(src_area, src_offset);
dst = snd_pcm_channel_area_addr(dst_area, dst_offset);
src_step = snd_pcm_channel_area_step(src_area) >> 1;
dst_step = snd_pcm_channel_area_step(dst_area) >> 1;
src_frames1 = 0;
dst_frames1 = 0;
old_sample = new_sample = *src;
pos = get_threshold;
while (dst_frames1 < dst_frames) {
if (pos >= get_threshold) { if (pos >= get_threshold) {
pos -= get_threshold; pos -= get_threshold;
src += src_step; src += src_step;
src_frames1++; src_frames1++;
states->u.linear.init = 2; /* get a new sample */ if (src_frames1 < src_frames) {
//printf("new_src_pos = %i\n", (src - (char *)snd_pcm_channel_area_addr(src_area, src_offset)) / src_step); old_sample = new_sample;
if (CHECK_SANITY(src_frames1 > src_frames)) { new_sample = *src;
SNDERR("src_frames overflow");
break;
} }
} }
new_weight = (pos << (16 - rate->pitch_shift)) / (get_threshold >> rate->pitch_shift);
old_weight = 0x10000 - new_weight;
*dst = (old_sample * old_weight + new_sample * new_weight) >> 16;
dst += dst_step;
dst_frames1++;
pos += LINEAR_DIV;
} }
if (dst_frames != dst_frames1) {
SNDERR("dst_frames %lu, dst_frames1 %lu, src_frames %lu, src_frames1 %lu\n", dst_frames, dst_frames1, src_frames, src_frames1);
}
states->u.linear.old_sample = old_sample;
states->u.linear.new_sample = new_sample;
states++;
} }
} }
@ -186,37 +212,32 @@ static void snd_pcm_rate_shrink(const snd_pcm_channel_area_t *dst_areas,
const snd_pcm_channel_area_t *src_areas, const snd_pcm_channel_area_t *src_areas,
snd_pcm_uframes_t src_offset, snd_pcm_uframes_t src_frames, snd_pcm_uframes_t src_offset, snd_pcm_uframes_t src_frames,
unsigned int channels, unsigned int channels,
unsigned int getidx, unsigned int putidx, snd_pcm_rate_t *rate)
unsigned int get_increment,
snd_pcm_rate_state_t *states)
{ {
#define GET16_LABELS #define GET16_LABELS
#define PUT16_LABELS #define PUT16_LABELS
#include "plugin_ops.h" #include "plugin_ops.h"
#undef GET16_LABELS #undef GET16_LABELS
#undef PUT16_LABELS #undef PUT16_LABELS
void *get = get16_labels[getidx]; void *get = get16_labels[rate->get_idx];
void *put = put16_labels[putidx]; void *put = put16_labels[rate->put_idx];
unsigned int get_increment = rate->pitch;
unsigned int channel; unsigned int channel;
snd_pcm_uframes_t src_frames1 = 0; snd_pcm_uframes_t src_frames1;
snd_pcm_uframes_t dst_frames1 = 0; snd_pcm_uframes_t dst_frames1;
int16_t sample = 0; int16_t sample = 0;
unsigned int pos = 0; unsigned int pos;
for (channel = 0; channel < channels; ++channel) { for (channel = 0; channel < channels; ++channel) {
const snd_pcm_channel_area_t *src_area = &src_areas[channel]; const snd_pcm_channel_area_t *src_area = &src_areas[channel];
const snd_pcm_channel_area_t *dst_area = &dst_areas[channel]; const snd_pcm_channel_area_t *dst_area = &dst_areas[channel];
int64_t sum;
const char *src; const char *src;
char *dst; char *dst;
int src_step, dst_step; int src_step, dst_step;
int16_t old_sample = 0; int16_t old_sample = 0;
int16_t new_sample = 0; int16_t new_sample = 0;
int old_weight, new_weight; int old_weight, new_weight;
sum = states->u.linear.sum;
//int16_t old_sample = states->u.linear.old_sample;
pos = LINEAR_DIV - get_increment; /* Force first sample to be copied */ pos = LINEAR_DIV - get_increment; /* Force first sample to be copied */
states->u.linear.init = 0;
src = snd_pcm_channel_area_addr(src_area, src_offset); src = snd_pcm_channel_area_addr(src_area, src_offset);
dst = snd_pcm_channel_area_addr(dst_area, dst_offset); dst = snd_pcm_channel_area_addr(dst_area, dst_offset);
src_step = snd_pcm_channel_area_step(src_area); src_step = snd_pcm_channel_area_step(src_area);
@ -253,13 +274,59 @@ static void snd_pcm_rate_shrink(const snd_pcm_channel_area_t *dst_areas,
} }
old_sample = new_sample; old_sample = new_sample;
} }
states->u.linear.sum = sum;
states->u.linear.old_sample = sample;
states++;
} }
if (dst_frames != dst_frames1) { }
SNDERR("dst %lu, dst1 %lu, src %lu, src1 %lu pos = %u\n", dst_frames, dst_frames1, src_frames, src_frames1, pos );
/* optimized version for S16 format */
static void snd_pcm_rate_shrink_s16(const snd_pcm_channel_area_t *dst_areas,
snd_pcm_uframes_t dst_offset, snd_pcm_uframes_t dst_frames,
const snd_pcm_channel_area_t *src_areas,
snd_pcm_uframes_t src_offset, snd_pcm_uframes_t src_frames,
unsigned int channels,
snd_pcm_rate_t *rate)
{
unsigned int get_increment = rate->pitch;
unsigned int channel;
snd_pcm_uframes_t src_frames1;
snd_pcm_uframes_t dst_frames1;
unsigned int pos = 0;
for (channel = 0; channel < channels; ++channel) {
const snd_pcm_channel_area_t *src_area = &src_areas[channel];
const snd_pcm_channel_area_t *dst_area = &dst_areas[channel];
const int16_t *src;
int16_t *dst;
int src_step, dst_step;
int16_t old_sample = 0;
int16_t new_sample = 0;
int old_weight, new_weight;
pos = LINEAR_DIV - get_increment; /* Force first sample to be copied */
src = snd_pcm_channel_area_addr(src_area, src_offset);
dst = snd_pcm_channel_area_addr(dst_area, dst_offset);
src_step = snd_pcm_channel_area_step(src_area) >> 1;
dst_step = snd_pcm_channel_area_step(dst_area) >> 1 ;
src_frames1 = 0;
dst_frames1 = 0;
while (src_frames1 < src_frames) {
new_sample = *src;
src += src_step;
src_frames1++;
pos += get_increment;
if (pos >= LINEAR_DIV) {
pos -= LINEAR_DIV;
old_weight = (pos << (32 - LINEAR_DIV_SHIFT)) / (get_increment >> (LINEAR_DIV_SHIFT - 16));
new_weight = 0x10000 - old_weight;
*dst = (old_sample * old_weight + new_sample * new_weight) >> 16;
dst += dst_step;
dst_frames1++;
if (CHECK_SANITY(dst_frames1 > dst_frames)) {
SNDERR("dst_frames overflow");
break;
}
}
old_sample = new_sample;
}
} }
} }
@ -500,20 +567,23 @@ static int snd_pcm_rate_hw_params(snd_pcm_t *pcm, snd_pcm_hw_params_t * params)
rate->get_idx = snd_pcm_linear_get_index(src_format, SND_PCM_FORMAT_S16); rate->get_idx = snd_pcm_linear_get_index(src_format, SND_PCM_FORMAT_S16);
rate->put_idx = snd_pcm_linear_put_index(SND_PCM_FORMAT_S16, dst_format); rate->put_idx = snd_pcm_linear_put_index(SND_PCM_FORMAT_S16, dst_format);
if (src_rate < dst_rate) { if (src_rate < dst_rate) {
rate->func = snd_pcm_rate_expand; if (src_format == dst_format && src_format == SND_PCM_FORMAT_S16)
rate->func = snd_pcm_rate_expand_s16;
else
rate->func = snd_pcm_rate_expand;
/* pitch is get_threshold */ /* pitch is get_threshold */
} else { } else {
rate->func = snd_pcm_rate_shrink; if (src_format == dst_format && src_format == SND_PCM_FORMAT_S16)
rate->func = snd_pcm_rate_shrink_s16;
else
rate->func = snd_pcm_rate_shrink;
/* pitch is get_increment */ /* pitch is get_increment */
} }
rate->pitch = (((u_int64_t)dst_rate * LINEAR_DIV) + (src_rate / 2)) / src_rate; rate->pitch = (((u_int64_t)dst_rate * LINEAR_DIV) + (src_rate / 2)) / src_rate;
if (CHECK_SANITY(rate->states || rate->pareas)) { if (CHECK_SANITY(rate->pareas)) {
SNDMSG("rate plugin already in use"); SNDMSG("rate plugin already in use");
return -EBUSY; return -EBUSY;
} }
rate->states = malloc(channels * sizeof(*rate->states));
if (rate->states == NULL)
return -ENOMEM;
if ((buffer_size / period_size) * period_size == buffer_size && if ((buffer_size / period_size) * period_size == buffer_size &&
(slave->buffer_size / slave->period_size) * slave->period_size == slave->buffer_size) (slave->buffer_size / slave->period_size) * slave->period_size == slave->buffer_size)
return 0; return 0;
@ -550,10 +620,6 @@ static int snd_pcm_rate_hw_free(snd_pcm_t *pcm)
rate->pareas = NULL; rate->pareas = NULL;
rate->sareas = NULL; rate->sareas = NULL;
} }
if (rate->states) {
free(rate->states);
rate->states = NULL;
}
return snd_pcm_hw_free(rate->slave); return snd_pcm_hw_free(rate->slave);
} }
@ -657,6 +723,12 @@ static int snd_pcm_rate_sw_params(snd_pcm_t *pcm, snd_pcm_sw_params_t * params)
return -EIO; return -EIO;
} }
} }
if (rate->pitch >= LINEAR_DIV) {
/* shift for expand linear interpolation */
rate->pitch_shift = 0;
while ((rate->pitch >> rate->pitch_shift) >= (1 << 16))
rate->pitch_shift++;
}
recalc(pcm, &sparams->avail_min); recalc(pcm, &sparams->avail_min);
rate->orig_avail_min = sparams->avail_min; rate->orig_avail_min = sparams->avail_min;
recalc(pcm, &sparams->xfer_align); recalc(pcm, &sparams->xfer_align);
@ -682,15 +754,8 @@ static int snd_pcm_rate_sw_params(snd_pcm_t *pcm, snd_pcm_sw_params_t * params)
static int snd_pcm_rate_init(snd_pcm_t *pcm) static int snd_pcm_rate_init(snd_pcm_t *pcm)
{ {
snd_pcm_rate_t *rate = pcm->private_data; snd_pcm_rate_t *rate = pcm->private_data;
unsigned int k;
switch (rate->type) { switch (rate->type) {
case RATE_TYPE_LINEAR: case RATE_TYPE_LINEAR:
for (k = 0; k < pcm->channels; ++k) {
rate->states[k].u.linear.sum = 0;
rate->states[k].u.linear.old_sample = 0;
rate->states[k].u.linear.new_sample = 0;
rate->states[k].u.linear.init = 0;
}
break; break;
default: default:
assert(0); assert(0);
@ -708,9 +773,7 @@ snd_pcm_rate_write_areas1(snd_pcm_t *pcm,
snd_pcm_rate_t *rate = pcm->private_data; snd_pcm_rate_t *rate = pcm->private_data;
rate->func(slave_areas, slave_offset, rate->slave->period_size, rate->func(slave_areas, slave_offset, rate->slave->period_size,
areas, offset, pcm->period_size, areas, offset, pcm->period_size,
pcm->channels, pcm->channels, rate);
rate->get_idx, rate->put_idx,
rate->pitch, rate->states);
return 0; return 0;
} }
@ -724,9 +787,7 @@ snd_pcm_rate_read_areas1(snd_pcm_t *pcm,
snd_pcm_rate_t *rate = pcm->private_data; snd_pcm_rate_t *rate = pcm->private_data;
rate->func(areas, offset, pcm->period_size, rate->func(areas, offset, pcm->period_size,
slave_areas, slave_offset, rate->slave->period_size, slave_areas, slave_offset, rate->slave->period_size,
pcm->channels, pcm->channels, rate);
rate->get_idx, rate->put_idx,
rate->pitch, rate->states);
return 0; return 0;
} }