core: Refactor code to multiply s16 by volume

move code to function pa_mult_s16_volume() in sample-util.h
use 64 bit integers on 64 bit platforms (it's faster)

on i5, 2.5GHz (64-bit)

Running suite(s): Mult-s16
32 bit mult: 1272300 usec (avg: 12723, min = 12533, max = 18749, stddev = 620.48).
64 bit mult: 852241 usec (avg: 8522.41, min = 8420, max = 9148, stddev = 109.388).
100%: Checks: 1, Failures: 0, Errors: 0

on Pentium D, 3.4GHz (32-bit)

Running suite(s): Mult-s16
32 bit mult: 2228504 usec (avg: 22285, min = 18775, max = 29648, stddev = 3865.59).
64 bit mult: 5546861 usec (avg: 55468.6, min = 55028, max = 64924, stddev = 978.981).
100%: Checks: 1, Failures: 0, Errors: 0

on TI DM3730, Cortex-A8, 800MHz (32-bit)

Running suite(s): Mult-s16
32 bit mult: 23708900 usec (avg: 237089, min = 191864, max = 557312, stddev = 77503.6).
64 bit mult: 22190039 usec (avg: 221900, min = 177978, max = 480469, stddev = 68520.5).
100%: Checks: 1, Failures: 0, Errors: 0

there is a test program called mult-s16-test which checks that the functions compute the
same results, and compares runtime

Signed-off-by: Peter Meerwald <pmeerw@pmeerw.net>
This commit is contained in:
Peter Meerwald 2013-02-13 17:27:05 +01:00 committed by Tanu Kaskinen
parent b123cfa7c9
commit 8fa81a93c9
3 changed files with 37 additions and 92 deletions

View file

@ -24,7 +24,6 @@
#include <config.h>
#endif
#include <pulsecore/macro.h>
#include <pulsecore/g711.h>
#include <pulsecore/endianmacros.h>
@ -35,13 +34,8 @@ static void pa_volume_u8_c(uint8_t *samples, const int32_t *volumes, unsigned ch
unsigned channel;
for (channel = 0; length; length--) {
int32_t t, hi, lo;
int32_t t = pa_mult_s16_volume(*samples - 0x80, volumes[channel]);
hi = volumes[channel] >> 16;
lo = volumes[channel] & 0xFFFF;
t = (int32_t) *samples - 0x80;
t = ((t * lo) >> 16) + (t * hi);
t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F);
*samples++ = (uint8_t) (t + 0x80);
@ -54,13 +48,8 @@ static void pa_volume_alaw_c(uint8_t *samples, const int32_t *volumes, unsigned
unsigned channel;
for (channel = 0; length; length--) {
int32_t t, hi, lo;
int32_t t = pa_mult_s16_volume(st_alaw2linear16(*samples), volumes[channel]);
hi = volumes[channel] >> 16;
lo = volumes[channel] & 0xFFFF;
t = (int32_t) st_alaw2linear16(*samples);
t = ((t * lo) >> 16) + (t * hi);
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
*samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3);
@ -73,13 +62,8 @@ static void pa_volume_ulaw_c(uint8_t *samples, const int32_t *volumes, unsigned
unsigned channel;
for (channel = 0; length; length--) {
int32_t t, hi, lo;
int32_t t = pa_mult_s16_volume(st_ulaw2linear16(*samples), volumes[channel]);
hi = volumes[channel] >> 16;
lo = volumes[channel] & 0xFFFF;
t = (int32_t) st_ulaw2linear16(*samples);
t = ((t * lo) >> 16) + (t * hi);
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
*samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2);
@ -94,19 +78,8 @@ static void pa_volume_s16ne_c(int16_t *samples, const int32_t *volumes, unsigned
length /= sizeof(int16_t);
for (channel = 0; length; length--) {
int32_t t, hi, lo;
int32_t t = pa_mult_s16_volume(*samples, volumes[channel]);
/* Multiplying the 32bit volume factor with the 16bit
* sample might result in an 48bit value. We want to
* do without 64 bit integers and hence do the
* multiplication independently for the HI and LO part
* of the volume. */
hi = volumes[channel] >> 16;
lo = volumes[channel] & 0xFFFF;
t = (int32_t)(*samples);
t = ((t * lo) >> 16) + (t * hi);
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
*samples++ = (int16_t) t;
@ -121,13 +94,8 @@ static void pa_volume_s16re_c(int16_t *samples, const int32_t *volumes, unsigned
length /= sizeof(int16_t);
for (channel = 0; length; length--) {
int32_t t, hi, lo;
int32_t t = pa_mult_s16_volume(PA_INT16_SWAP(*samples), volumes[channel]);
hi = volumes[channel] >> 16;
lo = volumes[channel] & 0xFFFF;
t = (int32_t) PA_INT16_SWAP(*samples);
t = ((t * lo) >> 16) + (t * hi);
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
*samples++ = PA_INT16_SWAP((int16_t) t);