mirror of
https://gitlab.freedesktop.org/pulseaudio/pulseaudio.git
synced 2025-11-05 13:29:57 -05:00
core: Refactor code to multiply s16 by volume
move code to function pa_mult_s16_volume() in sample-util.h use 64 bit integers on 64 bit platforms (it's faster) on i5, 2.5GHz (64-bit) Running suite(s): Mult-s16 32 bit mult: 1272300 usec (avg: 12723, min = 12533, max = 18749, stddev = 620.48). 64 bit mult: 852241 usec (avg: 8522.41, min = 8420, max = 9148, stddev = 109.388). 100%: Checks: 1, Failures: 0, Errors: 0 on Pentium D, 3.4GHz (32-bit) Running suite(s): Mult-s16 32 bit mult: 2228504 usec (avg: 22285, min = 18775, max = 29648, stddev = 3865.59). 64 bit mult: 5546861 usec (avg: 55468.6, min = 55028, max = 64924, stddev = 978.981). 100%: Checks: 1, Failures: 0, Errors: 0 on TI DM3730, Cortex-A8, 800MHz (32-bit) Running suite(s): Mult-s16 32 bit mult: 23708900 usec (avg: 237089, min = 191864, max = 557312, stddev = 77503.6). 64 bit mult: 22190039 usec (avg: 221900, min = 177978, max = 480469, stddev = 68520.5). 100%: Checks: 1, Failures: 0, Errors: 0 there is a test program called mult-s16-test which checks that the functions compute the same results, and compares runtime Signed-off-by: Peter Meerwald <pmeerw@pmeerw.net>
This commit is contained in:
parent
b123cfa7c9
commit
8fa81a93c9
3 changed files with 37 additions and 92 deletions
|
|
@ -131,23 +131,10 @@ static void pa_mix_s16ne_c(pa_mix_info streams[], unsigned nstreams, unsigned ch
|
||||||
|
|
||||||
for (i = 0; i < nstreams; i++) {
|
for (i = 0; i < nstreams; i++) {
|
||||||
pa_mix_info *m = streams + i;
|
pa_mix_info *m = streams + i;
|
||||||
int32_t v, lo, hi, cv = m->linear[channel].i;
|
int32_t cv = m->linear[channel].i;
|
||||||
|
|
||||||
if (PA_LIKELY(cv > 0)) {
|
if (PA_LIKELY(cv > 0))
|
||||||
|
sum += pa_mult_s16_volume(*((int16_t*) m->ptr), m->linear[channel].i);
|
||||||
/* Multiplying the 32bit volume factor with the
|
|
||||||
* 16bit sample might result in an 48bit value. We
|
|
||||||
* want to do without 64 bit integers and hence do
|
|
||||||
* the multiplication independently for the HI and
|
|
||||||
* LO part of the volume. */
|
|
||||||
|
|
||||||
hi = cv >> 16;
|
|
||||||
lo = cv & 0xFFFF;
|
|
||||||
|
|
||||||
v = *((int16_t*) m->ptr);
|
|
||||||
v = ((v * lo) >> 16) + (v * hi);
|
|
||||||
sum += v;
|
|
||||||
}
|
|
||||||
m->ptr = (uint8_t*) m->ptr + sizeof(int16_t);
|
m->ptr = (uint8_t*) m->ptr + sizeof(int16_t);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -170,17 +157,10 @@ static void pa_mix_s16re_c(pa_mix_info streams[], unsigned nstreams, unsigned ch
|
||||||
|
|
||||||
for (i = 0; i < nstreams; i++) {
|
for (i = 0; i < nstreams; i++) {
|
||||||
pa_mix_info *m = streams + i;
|
pa_mix_info *m = streams + i;
|
||||||
int32_t v, lo, hi, cv = m->linear[channel].i;
|
int32_t cv = m->linear[channel].i;
|
||||||
|
|
||||||
if (PA_LIKELY(cv > 0)) {
|
if (PA_LIKELY(cv > 0))
|
||||||
|
sum += pa_mult_s16_volume(PA_INT16_SWAP(*((int16_t*) m->ptr)), cv);
|
||||||
hi = cv >> 16;
|
|
||||||
lo = cv & 0xFFFF;
|
|
||||||
|
|
||||||
v = PA_INT16_SWAP(*((int16_t*) m->ptr));
|
|
||||||
v = ((v * lo) >> 16) + (v * hi);
|
|
||||||
sum += v;
|
|
||||||
}
|
|
||||||
m->ptr = (uint8_t*) m->ptr + sizeof(int16_t);
|
m->ptr = (uint8_t*) m->ptr + sizeof(int16_t);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -207,7 +187,6 @@ static void pa_mix_s32ne_c(pa_mix_info streams[], unsigned nstreams, unsigned ch
|
||||||
int64_t v;
|
int64_t v;
|
||||||
|
|
||||||
if (PA_LIKELY(cv > 0)) {
|
if (PA_LIKELY(cv > 0)) {
|
||||||
|
|
||||||
v = *((int32_t*) m->ptr);
|
v = *((int32_t*) m->ptr);
|
||||||
v = (v * cv) >> 16;
|
v = (v * cv) >> 16;
|
||||||
sum += v;
|
sum += v;
|
||||||
|
|
@ -238,7 +217,6 @@ static void pa_mix_s32re_c(pa_mix_info streams[], unsigned nstreams, unsigned ch
|
||||||
int64_t v;
|
int64_t v;
|
||||||
|
|
||||||
if (PA_LIKELY(cv > 0)) {
|
if (PA_LIKELY(cv > 0)) {
|
||||||
|
|
||||||
v = PA_INT32_SWAP(*((int32_t*) m->ptr));
|
v = PA_INT32_SWAP(*((int32_t*) m->ptr));
|
||||||
v = (v * cv) >> 16;
|
v = (v * cv) >> 16;
|
||||||
sum += v;
|
sum += v;
|
||||||
|
|
@ -269,7 +247,6 @@ static void pa_mix_s24ne_c(pa_mix_info streams[], unsigned nstreams, unsigned ch
|
||||||
int64_t v;
|
int64_t v;
|
||||||
|
|
||||||
if (PA_LIKELY(cv > 0)) {
|
if (PA_LIKELY(cv > 0)) {
|
||||||
|
|
||||||
v = (int32_t) (PA_READ24NE(m->ptr) << 8);
|
v = (int32_t) (PA_READ24NE(m->ptr) << 8);
|
||||||
v = (v * cv) >> 16;
|
v = (v * cv) >> 16;
|
||||||
sum += v;
|
sum += v;
|
||||||
|
|
@ -300,7 +277,6 @@ static void pa_mix_s24re_c(pa_mix_info streams[], unsigned nstreams, unsigned ch
|
||||||
int64_t v;
|
int64_t v;
|
||||||
|
|
||||||
if (PA_LIKELY(cv > 0)) {
|
if (PA_LIKELY(cv > 0)) {
|
||||||
|
|
||||||
v = (int32_t) (PA_READ24RE(m->ptr) << 8);
|
v = (int32_t) (PA_READ24RE(m->ptr) << 8);
|
||||||
v = (v * cv) >> 16;
|
v = (v * cv) >> 16;
|
||||||
sum += v;
|
sum += v;
|
||||||
|
|
@ -331,7 +307,6 @@ static void pa_mix_s24_32ne_c(pa_mix_info streams[], unsigned nstreams, unsigned
|
||||||
int64_t v;
|
int64_t v;
|
||||||
|
|
||||||
if (PA_LIKELY(cv > 0)) {
|
if (PA_LIKELY(cv > 0)) {
|
||||||
|
|
||||||
v = (int32_t) (*((uint32_t*)m->ptr) << 8);
|
v = (int32_t) (*((uint32_t*)m->ptr) << 8);
|
||||||
v = (v * cv) >> 16;
|
v = (v * cv) >> 16;
|
||||||
sum += v;
|
sum += v;
|
||||||
|
|
@ -362,7 +337,6 @@ static void pa_mix_s24_32re_c(pa_mix_info streams[], unsigned nstreams, unsigned
|
||||||
int64_t v;
|
int64_t v;
|
||||||
|
|
||||||
if (PA_LIKELY(cv > 0)) {
|
if (PA_LIKELY(cv > 0)) {
|
||||||
|
|
||||||
v = (int32_t) (PA_UINT32_SWAP(*((uint32_t*) m->ptr)) << 8);
|
v = (int32_t) (PA_UINT32_SWAP(*((uint32_t*) m->ptr)) << 8);
|
||||||
v = (v * cv) >> 16;
|
v = (v * cv) >> 16;
|
||||||
sum += v;
|
sum += v;
|
||||||
|
|
@ -392,7 +366,6 @@ static void pa_mix_u8_c(pa_mix_info streams[], unsigned nstreams, unsigned chann
|
||||||
int32_t v, cv = m->linear[channel].i;
|
int32_t v, cv = m->linear[channel].i;
|
||||||
|
|
||||||
if (PA_LIKELY(cv > 0)) {
|
if (PA_LIKELY(cv > 0)) {
|
||||||
|
|
||||||
v = (int32_t) *((uint8_t*) m->ptr) - 0x80;
|
v = (int32_t) *((uint8_t*) m->ptr) - 0x80;
|
||||||
v = (v * cv) >> 16;
|
v = (v * cv) >> 16;
|
||||||
sum += v;
|
sum += v;
|
||||||
|
|
@ -419,17 +392,10 @@ static void pa_mix_ulaw_c(pa_mix_info streams[], unsigned nstreams, unsigned cha
|
||||||
|
|
||||||
for (i = 0; i < nstreams; i++) {
|
for (i = 0; i < nstreams; i++) {
|
||||||
pa_mix_info *m = streams + i;
|
pa_mix_info *m = streams + i;
|
||||||
int32_t v, hi, lo, cv = m->linear[channel].i;
|
int32_t cv = m->linear[channel].i;
|
||||||
|
|
||||||
if (PA_LIKELY(cv > 0)) {
|
if (PA_LIKELY(cv > 0))
|
||||||
|
sum += pa_mult_s16_volume(st_ulaw2linear16(*((uint8_t*) m->ptr)), cv);
|
||||||
hi = cv >> 16;
|
|
||||||
lo = cv & 0xFFFF;
|
|
||||||
|
|
||||||
v = (int32_t) st_ulaw2linear16(*((uint8_t*) m->ptr));
|
|
||||||
v = ((v * lo) >> 16) + (v * hi);
|
|
||||||
sum += v;
|
|
||||||
}
|
|
||||||
m->ptr = (uint8_t*) m->ptr + 1;
|
m->ptr = (uint8_t*) m->ptr + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -452,17 +418,10 @@ static void pa_mix_alaw_c(pa_mix_info streams[], unsigned nstreams, unsigned cha
|
||||||
|
|
||||||
for (i = 0; i < nstreams; i++) {
|
for (i = 0; i < nstreams; i++) {
|
||||||
pa_mix_info *m = streams + i;
|
pa_mix_info *m = streams + i;
|
||||||
int32_t v, hi, lo, cv = m->linear[channel].i;
|
int32_t cv = m->linear[channel].i;
|
||||||
|
|
||||||
if (PA_LIKELY(cv > 0)) {
|
if (PA_LIKELY(cv > 0))
|
||||||
|
sum += pa_mult_s16_volume(st_alaw2linear16(*((uint8_t*) m->ptr)), cv);
|
||||||
hi = cv >> 16;
|
|
||||||
lo = cv & 0xFFFF;
|
|
||||||
|
|
||||||
v = (int32_t) st_alaw2linear16(*((uint8_t*) m->ptr));
|
|
||||||
v = ((v * lo) >> 16) + (v * hi);
|
|
||||||
sum += v;
|
|
||||||
}
|
|
||||||
m->ptr = (uint8_t*) m->ptr + 1;
|
m->ptr = (uint8_t*) m->ptr + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -488,7 +447,6 @@ static void pa_mix_float32ne_c(pa_mix_info streams[], unsigned nstreams, unsigne
|
||||||
float v, cv = m->linear[channel].f;
|
float v, cv = m->linear[channel].f;
|
||||||
|
|
||||||
if (PA_LIKELY(cv > 0)) {
|
if (PA_LIKELY(cv > 0)) {
|
||||||
|
|
||||||
v = *((float*) m->ptr);
|
v = *((float*) m->ptr);
|
||||||
v *= cv;
|
v *= cv;
|
||||||
sum += v;
|
sum += v;
|
||||||
|
|
@ -517,7 +475,6 @@ static void pa_mix_float32re_c(pa_mix_info streams[], unsigned nstreams, unsigne
|
||||||
float v, cv = m->linear[channel].f;
|
float v, cv = m->linear[channel].f;
|
||||||
|
|
||||||
if (PA_LIKELY(cv > 0)) {
|
if (PA_LIKELY(cv > 0)) {
|
||||||
|
|
||||||
v = PA_FLOAT32_SWAP(*(float*) m->ptr);
|
v = PA_FLOAT32_SWAP(*(float*) m->ptr);
|
||||||
v *= cv;
|
v *= cv;
|
||||||
sum += v;
|
sum += v;
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,9 @@
|
||||||
USA.
|
USA.
|
||||||
***/
|
***/
|
||||||
|
|
||||||
|
#include <inttypes.h>
|
||||||
|
#include <limits.h>
|
||||||
|
|
||||||
#include <pulse/gccmacro.h>
|
#include <pulse/gccmacro.h>
|
||||||
#include <pulse/sample.h>
|
#include <pulse/sample.h>
|
||||||
#include <pulse/volume.h>
|
#include <pulse/volume.h>
|
||||||
|
|
@ -53,6 +56,23 @@ void pa_deinterleave(const void *src, void *dst[], unsigned channels, size_t ss,
|
||||||
|
|
||||||
void pa_sample_clamp(pa_sample_format_t format, void *dst, size_t dstr, const void *src, size_t sstr, unsigned n);
|
void pa_sample_clamp(pa_sample_format_t format, void *dst, size_t dstr, const void *src, size_t sstr, unsigned n);
|
||||||
|
|
||||||
|
static inline int32_t pa_mult_s16_volume(int16_t v, int32_t cv) {
|
||||||
|
#if __WORDSIZE == 64 || ((ULONG_MAX) > (UINT_MAX))
|
||||||
|
/* Multiply with 64 bit integers on 64 bit platforms */
|
||||||
|
return (v * (int64_t) cv) >> 16;
|
||||||
|
#else
|
||||||
|
/* Multiplying the 32 bit volume factor with the
|
||||||
|
* 16 bit sample might result in an 48 bit value. We
|
||||||
|
* want to do without 64 bit integers and hence do
|
||||||
|
* the multiplication independently for the HI and
|
||||||
|
* LO part of the volume. */
|
||||||
|
|
||||||
|
int32_t hi = cv >> 16;
|
||||||
|
int32_t lo = cv & 0xFFFF;
|
||||||
|
return ((v * lo) >> 16) + (v * hi);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
pa_usec_t pa_bytes_to_usec_round_up(uint64_t length, const pa_sample_spec *spec);
|
pa_usec_t pa_bytes_to_usec_round_up(uint64_t length, const pa_sample_spec *spec);
|
||||||
size_t pa_usec_to_bytes_round_up(pa_usec_t t, const pa_sample_spec *spec);
|
size_t pa_usec_to_bytes_round_up(pa_usec_t t, const pa_sample_spec *spec);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,6 @@
|
||||||
#include <config.h>
|
#include <config.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#include <pulsecore/macro.h>
|
#include <pulsecore/macro.h>
|
||||||
#include <pulsecore/g711.h>
|
#include <pulsecore/g711.h>
|
||||||
#include <pulsecore/endianmacros.h>
|
#include <pulsecore/endianmacros.h>
|
||||||
|
|
@ -35,13 +34,8 @@ static void pa_volume_u8_c(uint8_t *samples, const int32_t *volumes, unsigned ch
|
||||||
unsigned channel;
|
unsigned channel;
|
||||||
|
|
||||||
for (channel = 0; length; length--) {
|
for (channel = 0; length; length--) {
|
||||||
int32_t t, hi, lo;
|
int32_t t = pa_mult_s16_volume(*samples - 0x80, volumes[channel]);
|
||||||
|
|
||||||
hi = volumes[channel] >> 16;
|
|
||||||
lo = volumes[channel] & 0xFFFF;
|
|
||||||
|
|
||||||
t = (int32_t) *samples - 0x80;
|
|
||||||
t = ((t * lo) >> 16) + (t * hi);
|
|
||||||
t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F);
|
t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F);
|
||||||
*samples++ = (uint8_t) (t + 0x80);
|
*samples++ = (uint8_t) (t + 0x80);
|
||||||
|
|
||||||
|
|
@ -54,13 +48,8 @@ static void pa_volume_alaw_c(uint8_t *samples, const int32_t *volumes, unsigned
|
||||||
unsigned channel;
|
unsigned channel;
|
||||||
|
|
||||||
for (channel = 0; length; length--) {
|
for (channel = 0; length; length--) {
|
||||||
int32_t t, hi, lo;
|
int32_t t = pa_mult_s16_volume(st_alaw2linear16(*samples), volumes[channel]);
|
||||||
|
|
||||||
hi = volumes[channel] >> 16;
|
|
||||||
lo = volumes[channel] & 0xFFFF;
|
|
||||||
|
|
||||||
t = (int32_t) st_alaw2linear16(*samples);
|
|
||||||
t = ((t * lo) >> 16) + (t * hi);
|
|
||||||
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
|
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
|
||||||
*samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3);
|
*samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3);
|
||||||
|
|
||||||
|
|
@ -73,13 +62,8 @@ static void pa_volume_ulaw_c(uint8_t *samples, const int32_t *volumes, unsigned
|
||||||
unsigned channel;
|
unsigned channel;
|
||||||
|
|
||||||
for (channel = 0; length; length--) {
|
for (channel = 0; length; length--) {
|
||||||
int32_t t, hi, lo;
|
int32_t t = pa_mult_s16_volume(st_ulaw2linear16(*samples), volumes[channel]);
|
||||||
|
|
||||||
hi = volumes[channel] >> 16;
|
|
||||||
lo = volumes[channel] & 0xFFFF;
|
|
||||||
|
|
||||||
t = (int32_t) st_ulaw2linear16(*samples);
|
|
||||||
t = ((t * lo) >> 16) + (t * hi);
|
|
||||||
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
|
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
|
||||||
*samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2);
|
*samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2);
|
||||||
|
|
||||||
|
|
@ -94,19 +78,8 @@ static void pa_volume_s16ne_c(int16_t *samples, const int32_t *volumes, unsigned
|
||||||
length /= sizeof(int16_t);
|
length /= sizeof(int16_t);
|
||||||
|
|
||||||
for (channel = 0; length; length--) {
|
for (channel = 0; length; length--) {
|
||||||
int32_t t, hi, lo;
|
int32_t t = pa_mult_s16_volume(*samples, volumes[channel]);
|
||||||
|
|
||||||
/* Multiplying the 32bit volume factor with the 16bit
|
|
||||||
* sample might result in an 48bit value. We want to
|
|
||||||
* do without 64 bit integers and hence do the
|
|
||||||
* multiplication independently for the HI and LO part
|
|
||||||
* of the volume. */
|
|
||||||
|
|
||||||
hi = volumes[channel] >> 16;
|
|
||||||
lo = volumes[channel] & 0xFFFF;
|
|
||||||
|
|
||||||
t = (int32_t)(*samples);
|
|
||||||
t = ((t * lo) >> 16) + (t * hi);
|
|
||||||
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
|
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
|
||||||
*samples++ = (int16_t) t;
|
*samples++ = (int16_t) t;
|
||||||
|
|
||||||
|
|
@ -121,13 +94,8 @@ static void pa_volume_s16re_c(int16_t *samples, const int32_t *volumes, unsigned
|
||||||
length /= sizeof(int16_t);
|
length /= sizeof(int16_t);
|
||||||
|
|
||||||
for (channel = 0; length; length--) {
|
for (channel = 0; length; length--) {
|
||||||
int32_t t, hi, lo;
|
int32_t t = pa_mult_s16_volume(PA_INT16_SWAP(*samples), volumes[channel]);
|
||||||
|
|
||||||
hi = volumes[channel] >> 16;
|
|
||||||
lo = volumes[channel] & 0xFFFF;
|
|
||||||
|
|
||||||
t = (int32_t) PA_INT16_SWAP(*samples);
|
|
||||||
t = ((t * lo) >> 16) + (t * hi);
|
|
||||||
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
|
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
|
||||||
*samples++ = PA_INT16_SWAP((int16_t) t);
|
*samples++ = PA_INT16_SWAP((int16_t) t);
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue