pulseaudio/src/pulsecore/remap.c

467 lines
14 KiB
C
Raw Normal View History

/***
This file is part of PulseAudio.
Copyright 2004-2006 Lennart Poettering
Copyright 2009 Wim Taymans <wim.taymans@collabora.co.uk.com>
PulseAudio is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 2.1 of the License,
or (at your option) any later version.
PulseAudio is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with PulseAudio; if not, see <http://www.gnu.org/licenses/>.
***/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <string.h>
remap: Add special remapping case which just re-arranges channels Input channels may just be copied to output channels, no mixing; this avoids the generic (slow) matrix remapping code in cases where channels are dropped or reordered. This makes use of the remap struct state introduced earlier. on Intel Core i7-870 @ 2.93 GHz (GCC 4.6, 64-bit): Checking special remap (s16, stereo rearrange) func: 126117 usec (avg: 1261.17, min = 1150, max = 2111, stddev = 117.332). orig: 190509 usec (avg: 1905.09, min = 1807, max = 2402, stddev = 100.984). Checking special remap (float, stereo rearrange) func: 194329 usec (avg: 1943.29, min = 1876, max = 2127, stddev = 64.3486). orig: 205263 usec (avg: 2052.63, min = 2005, max = 2452, stddev = 70.177). Checking special remap (s16, 4-channel rearrange) func: 278754 usec (avg: 2787.54, min = 2719, max = 3093, stddev = 78.22). orig: 383885 usec (avg: 3838.85, min = 3634, max = 4121, stddev = 128.522). Checking special remap (float, 4-channel rearrange) func: 312429 usec (avg: 3124.29, min = 3017, max = 3498, stddev = 120.127). orig: 388198 usec (avg: 3881.98, min = 3768, max = 4655, stddev = 138.441). on ARM Cortex-A8 (TI OMAP3 DM3730 @ 1GHz) (Linaro GCC 4.6): Checking special remap (s16, stereo rearrange) func: 1204647 usec (avg: 12046.5, min = 10406, max = 25451, stddev = 2491.9). orig: 1660311 usec (avg: 16603.1, min = 14740, max = 20416, stddev = 1708.07). Checking special remap (float, stereo rearrange) func: 1391392 usec (avg: 13913.9, min = 12207, max = 28260, stddev = 2238.12). orig: 9246707 usec (avg: 92467.1, min = 87525, max = 125611, stddev = 5494.64). Checking special remap (s16, 4-channel rearrange) func: 2540225 usec (avg: 25402.2, min = 16937, max = 68268, stddev = 10786.7). orig: 3319852 usec (avg: 33198.5, min = 29571, max = 36957, stddev = 1250.39). Checking special remap (float, 4-channel rearrange) func: 3024414 usec (avg: 30244.1, min = 26153, max = 58105, stddev = 4506.01). orig: 12643624 usec (avg: 126436, min = 120575, max = 159088, stddev = 5519.28). benchmark code will be posted as follow-up patches Signed-off-by: Peter Meerwald <pmeerw@pmeerw.net>
2014-04-16 15:07:25 +02:00
#include <pulse/xmalloc.h>
#include <pulse/sample.h>
#include <pulse/volume.h>
#include <pulsecore/log.h>
#include <pulsecore/macro.h>
#include "cpu.h"
#include "remap.h"
static void remap_mono_to_stereo_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t *src, unsigned n) {
unsigned i;
for (i = n >> 2; i; i--) {
dst[0] = dst[1] = src[0];
dst[2] = dst[3] = src[1];
dst[4] = dst[5] = src[2];
dst[6] = dst[7] = src[3];
src += 4;
dst += 8;
}
for (i = n & 3; i; i--) {
dst[0] = dst[1] = src[0];
src++;
dst += 2;
}
}
static void remap_mono_to_stereo_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
unsigned i;
for (i = n >> 2; i; i--) {
dst[0] = dst[1] = src[0];
dst[2] = dst[3] = src[1];
dst[4] = dst[5] = src[2];
dst[6] = dst[7] = src[3];
src += 4;
dst += 8;
}
for (i = n & 3; i; i--) {
dst[0] = dst[1] = src[0];
src++;
dst += 2;
}
}
remap: Add stereo to mono and 4-channel special case remapping The generic matrix remapping is rather inefficient; special-case code improves performance by 3x easily. v4: split into s16 and float code, 4-channel remapping v3: fix remap_mono_to_stereo_c(), use assignment v2: use consistent array addressing on Intel Core i7-870 @ 2.93 GHz (GCC 4.6, 64-bit): Checking special remap (float, mono->stereo) func: 70392 usec (avg: 703.92, min = 583, max = 1879, stddev = 295.192). orig: 193042 usec (avg: 1930.42, min = 1457, max = 2269, stddev = 89.9045). Checking special remap (float, mono->4-channel) func: 118408 usec (avg: 1184.08, min = 1151, max = 1454, stddev = 57.1244). orig: 380074 usec (avg: 3800.74, min = 3740, max = 4180, stddev = 96.3389). Checking special remap (s16, mono->stereo) func: 60574 usec (avg: 605.74, min = 582, max = 659, stddev = 20.7681). orig: 188262 usec (avg: 1882.62, min = 1804, max = 2167, stddev = 79.17). Checking special remap (s16, mono->4-channel) func: 120331 usec (avg: 1203.31, min = 1151, max = 1429, stddev = 55.2863). orig: 376028 usec (avg: 3760.28, min = 3609, max = 4096, stddev = 122.043). Checking special remap (float, stereo->mono) func: 61408 usec (avg: 614.08, min = 580, max = 867, stddev = 50.933). orig: 186484 usec (avg: 1864.84, min = 1808, max = 2121, stddev = 65.3967). Checking special remap (float, 4-channel->mono) func: 118101 usec (avg: 1181.01, min = 1157, max = 1383, stddev = 36.4474). orig: 365191 usec (avg: 3651.91, min = 3540, max = 4083, stddev = 117.509). Checking special remap (s16, stereo->mono) func: 82908 usec (avg: 829.08, min = 795, max = 953, stddev = 33.3409). orig: 182565 usec (avg: 1825.65, min = 1774, max = 2117, stddev = 65.5401). Checking special remap (s16, 4-channel->mono) func: 132025 usec (avg: 1320.25, min = 1284, max = 1509, stddev = 47.0133). orig: 363347 usec (avg: 3633.47, min = 3560, max = 4012, stddev = 111.259). on ARM Cortex-A8 (TI OMAP3 DM3730 @ 1GHz) (Linaro GCC 4.6): Checking special remap (float, mono->stereo) func: 1213562 usec (avg: 12135.6, min = 4669, max = 16266, stddev = 2067.64). orig: 9251927 usec (avg: 92519.3, min = 87372, max = 134216, stddev = 5965.79). Checking special remap (float, mono->4-channel) func: 2479550 usec (avg: 24795.5, min = 7507, max = 29358, stddev = 2690.16). orig: 13186133 usec (avg: 131861, min = 119843, max = 263855, stddev = 27309). Checking special remap (s16, mono->stereo) func: 471894 usec (avg: 4718.94, min = 4058, max = 9583, stddev = 1302.7). orig: 1673826 usec (avg: 16738.3, min = 14679, max = 31342, stddev = 2271.67). Checking special remap (s16, mono->4-channel) func: 869508 usec (avg: 8695.08, min = 7019, max = 19165, stddev = 1866.94). orig: 3317020 usec (avg: 33170.2, min = 29327, max = 47577, stddev = 2029.11). Checking special remap (float, stereo->mono) func: 4405182 usec (avg: 44051.8, min = 41443, max = 77912, stddev = 4160.54). orig: 13245064 usec (avg: 132451, min = 125244, max = 182282, stddev = 8543.93). Checking special remap (float, 4-channel->mono) func: 8607974 usec (avg: 86079.7, min = 81909, max = 116608, stddev = 4311.52). orig: 26326036 usec (avg: 263260, min = 255097, max = 312928, stddev = 10111.5). Checking special remap (s16, stereo->mono) func: 1209135 usec (avg: 12091.4, min = 10742, max = 16632, stddev = 1633.88). orig: 3081515 usec (avg: 30815.2, min = 27008, max = 50537, stddev = 3124.35). Checking special remap (s16, 4-channel->mono) func: 1653868 usec (avg: 16538.7, min = 14648, max = 20721, stddev = 1834.52). orig: 6017854 usec (avg: 60178.5, min = 56061, max = 89569, stddev = 4052.86). benchmark code will be posted as follow-up patches Signed-off-by: Peter Meerwald <pmeerw@pmeerw.net>
2014-04-16 19:02:02 +02:00
static void remap_stereo_to_mono_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t *src, unsigned n) {
unsigned i;
for (i = n >> 2; i > 0; i--) {
dst[0] = (src[0] + src[1])/2;
dst[1] = (src[2] + src[3])/2;
dst[2] = (src[4] + src[5])/2;
dst[3] = (src[6] + src[7])/2;
src += 8;
dst += 4;
}
for (i = n & 3; i; i--) {
dst[0] = (src[0] + src[1])/2;
src += 2;
dst += 1;
}
}
static void remap_stereo_to_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
unsigned i;
for (i = n >> 2; i > 0; i--) {
dst[0] = (src[0] + src[1])*0.5f;
dst[1] = (src[2] + src[3])*0.5f;
dst[2] = (src[4] + src[5])*0.5f;
dst[3] = (src[6] + src[7])*0.5f;
src += 8;
dst += 4;
}
for (i = n & 3; i; i--) {
dst[0] = (src[0] + src[1])*0.5f;
src += 2;
dst += 1;
}
}
static void remap_mono_to_ch4_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t *src, unsigned n) {
unsigned i;
for (i = n >> 2; i; i--) {
dst[0] = dst[1] = dst[2] = dst[3] = src[0];
dst[4] = dst[5] = dst[6] = dst[7] = src[1];
dst[8] = dst[9] = dst[10] = dst[11] = src[2];
dst[12] = dst[13] = dst[14] = dst[15] = src[3];
src += 4;
dst += 16;
}
for (i = n & 3; i; i--) {
dst[0] = dst[1] = dst[2] = dst[3] = src[0];
src++;
dst += 4;
}
}
static void remap_mono_to_ch4_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
unsigned i;
for (i = n >> 2; i; i--) {
dst[0] = dst[1] = dst[2] = dst[3] = src[0];
dst[4] = dst[5] = dst[6] = dst[7] = src[1];
dst[8] = dst[9] = dst[10] = dst[11] = src[2];
dst[12] = dst[13] = dst[14] = dst[15] = src[3];
src += 4;
dst += 16;
}
for (i = n & 3; i; i--) {
dst[0] = dst[1] = dst[2] = dst[3] = src[0];
src++;
dst += 4;
}
}
static void remap_ch4_to_mono_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t *src, unsigned n) {
unsigned i;
for (i = n >> 2; i > 0; i--) {
dst[0] = (src[0] + src[1] + src[2] + src[3])/4;
dst[1] = (src[4] + src[5] + src[6] + src[7])/4;
dst[2] = (src[8] + src[9] + src[10] + src[11])/4;
dst[3] = (src[12] + src[13] + src[14] + src[15])/4;
src += 16;
dst += 4;
}
for (i = n & 3; i; i--) {
dst[0] = (src[0] + src[1] + src[2] + src[3])/4;
src += 4;
dst += 1;
}
}
static void remap_ch4_to_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
unsigned i;
for (i = n >> 2; i > 0; i--) {
dst[0] = (src[0] + src[1] + src[2] + src[3])*0.25f;
dst[1] = (src[4] + src[5] + src[6] + src[7])*0.25f;
dst[2] = (src[8] + src[9] + src[10] + src[11])*0.25f;
dst[3] = (src[12] + src[13] + src[14] + src[15])*0.25f;
src += 16;
dst += 4;
}
for (i = n & 3; i; i--) {
dst[0] = (src[0] + src[1] + src[2] + src[3])*0.25f;
src += 4;
dst += 1;
}
}
static void remap_channels_matrix_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t *src, unsigned n) {
remap: Add stereo to mono and 4-channel special case remapping The generic matrix remapping is rather inefficient; special-case code improves performance by 3x easily. v4: split into s16 and float code, 4-channel remapping v3: fix remap_mono_to_stereo_c(), use assignment v2: use consistent array addressing on Intel Core i7-870 @ 2.93 GHz (GCC 4.6, 64-bit): Checking special remap (float, mono->stereo) func: 70392 usec (avg: 703.92, min = 583, max = 1879, stddev = 295.192). orig: 193042 usec (avg: 1930.42, min = 1457, max = 2269, stddev = 89.9045). Checking special remap (float, mono->4-channel) func: 118408 usec (avg: 1184.08, min = 1151, max = 1454, stddev = 57.1244). orig: 380074 usec (avg: 3800.74, min = 3740, max = 4180, stddev = 96.3389). Checking special remap (s16, mono->stereo) func: 60574 usec (avg: 605.74, min = 582, max = 659, stddev = 20.7681). orig: 188262 usec (avg: 1882.62, min = 1804, max = 2167, stddev = 79.17). Checking special remap (s16, mono->4-channel) func: 120331 usec (avg: 1203.31, min = 1151, max = 1429, stddev = 55.2863). orig: 376028 usec (avg: 3760.28, min = 3609, max = 4096, stddev = 122.043). Checking special remap (float, stereo->mono) func: 61408 usec (avg: 614.08, min = 580, max = 867, stddev = 50.933). orig: 186484 usec (avg: 1864.84, min = 1808, max = 2121, stddev = 65.3967). Checking special remap (float, 4-channel->mono) func: 118101 usec (avg: 1181.01, min = 1157, max = 1383, stddev = 36.4474). orig: 365191 usec (avg: 3651.91, min = 3540, max = 4083, stddev = 117.509). Checking special remap (s16, stereo->mono) func: 82908 usec (avg: 829.08, min = 795, max = 953, stddev = 33.3409). orig: 182565 usec (avg: 1825.65, min = 1774, max = 2117, stddev = 65.5401). Checking special remap (s16, 4-channel->mono) func: 132025 usec (avg: 1320.25, min = 1284, max = 1509, stddev = 47.0133). orig: 363347 usec (avg: 3633.47, min = 3560, max = 4012, stddev = 111.259). on ARM Cortex-A8 (TI OMAP3 DM3730 @ 1GHz) (Linaro GCC 4.6): Checking special remap (float, mono->stereo) func: 1213562 usec (avg: 12135.6, min = 4669, max = 16266, stddev = 2067.64). orig: 9251927 usec (avg: 92519.3, min = 87372, max = 134216, stddev = 5965.79). Checking special remap (float, mono->4-channel) func: 2479550 usec (avg: 24795.5, min = 7507, max = 29358, stddev = 2690.16). orig: 13186133 usec (avg: 131861, min = 119843, max = 263855, stddev = 27309). Checking special remap (s16, mono->stereo) func: 471894 usec (avg: 4718.94, min = 4058, max = 9583, stddev = 1302.7). orig: 1673826 usec (avg: 16738.3, min = 14679, max = 31342, stddev = 2271.67). Checking special remap (s16, mono->4-channel) func: 869508 usec (avg: 8695.08, min = 7019, max = 19165, stddev = 1866.94). orig: 3317020 usec (avg: 33170.2, min = 29327, max = 47577, stddev = 2029.11). Checking special remap (float, stereo->mono) func: 4405182 usec (avg: 44051.8, min = 41443, max = 77912, stddev = 4160.54). orig: 13245064 usec (avg: 132451, min = 125244, max = 182282, stddev = 8543.93). Checking special remap (float, 4-channel->mono) func: 8607974 usec (avg: 86079.7, min = 81909, max = 116608, stddev = 4311.52). orig: 26326036 usec (avg: 263260, min = 255097, max = 312928, stddev = 10111.5). Checking special remap (s16, stereo->mono) func: 1209135 usec (avg: 12091.4, min = 10742, max = 16632, stddev = 1633.88). orig: 3081515 usec (avg: 30815.2, min = 27008, max = 50537, stddev = 3124.35). Checking special remap (s16, 4-channel->mono) func: 1653868 usec (avg: 16538.7, min = 14648, max = 20721, stddev = 1834.52). orig: 6017854 usec (avg: 60178.5, min = 56061, max = 89569, stddev = 4052.86). benchmark code will be posted as follow-up patches Signed-off-by: Peter Meerwald <pmeerw@pmeerw.net>
2014-04-16 19:02:02 +02:00
unsigned oc, ic, i;
unsigned n_ic, n_oc;
n_ic = m->i_ss.channels;
n_oc = m->o_ss.channels;
memset(dst, 0, n * sizeof(int16_t) * n_oc);
for (oc = 0; oc < n_oc; oc++) {
for (ic = 0; ic < n_ic; ic++) {
int16_t *d = dst + oc;
const int16_t *s = src + ic;
int32_t vol = m->map_table_i[oc][ic];
if (vol <= 0)
continue;
if (vol >= 0x10000) {
for (i = n; i > 0; i--, s += n_ic, d += n_oc)
*d += *s;
} else {
for (i = n; i > 0; i--, s += n_ic, d += n_oc)
*d += (int16_t) (((int32_t)*s * vol) >> 16);
}
}
}
}
static void remap_channels_matrix_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
unsigned oc, ic, i;
unsigned n_ic, n_oc;
n_ic = m->i_ss.channels;
n_oc = m->o_ss.channels;
memset(dst, 0, n * sizeof(float) * n_oc);
for (oc = 0; oc < n_oc; oc++) {
for (ic = 0; ic < n_ic; ic++) {
float *d = dst + oc;
const float *s = src + ic;
float vol = m->map_table_f[oc][ic];
if (vol <= 0.0f)
continue;
if (vol >= 1.0f) {
for (i = n; i > 0; i--, s += n_ic, d += n_oc)
*d += *s;
} else {
for (i = n; i > 0; i--, s += n_ic, d += n_oc)
*d += *s * vol;
}
}
}
}
/* Produce an array containing input channel indices to map to output channels.
* If the output channel is empty, the array element is -1. */
bool pa_setup_remap_arrange(const pa_remap_t *m, int8_t arrange[PA_CHANNELS_MAX]) {
unsigned ic, oc;
unsigned n_ic, n_oc;
unsigned count_output = 0;
pa_assert(m);
n_ic = m->i_ss.channels;
n_oc = m->o_ss.channels;
for (oc = 0; oc < n_oc; oc++) {
arrange[oc] = -1;
for (ic = 0; ic < n_ic; ic++) {
int32_t vol = m->map_table_i[oc][ic];
/* input channel is not used */
if (vol == 0)
continue;
/* if mixing this channel, we cannot just rearrange */
if (vol != 0x10000 || arrange[oc] >= 0)
return false;
arrange[oc] = ic;
count_output++;
}
}
return count_output > 0;
}
static void remap_arrange_mono_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t *src, unsigned n) {
const unsigned n_ic = m->i_ss.channels;
const int8_t *arrange = m->state;
src += arrange[0];
for (; n > 0; n--) {
*dst++ = *src;
src += n_ic;
}
}
remap: Add special remapping case which just re-arranges channels Input channels may just be copied to output channels, no mixing; this avoids the generic (slow) matrix remapping code in cases where channels are dropped or reordered. This makes use of the remap struct state introduced earlier. on Intel Core i7-870 @ 2.93 GHz (GCC 4.6, 64-bit): Checking special remap (s16, stereo rearrange) func: 126117 usec (avg: 1261.17, min = 1150, max = 2111, stddev = 117.332). orig: 190509 usec (avg: 1905.09, min = 1807, max = 2402, stddev = 100.984). Checking special remap (float, stereo rearrange) func: 194329 usec (avg: 1943.29, min = 1876, max = 2127, stddev = 64.3486). orig: 205263 usec (avg: 2052.63, min = 2005, max = 2452, stddev = 70.177). Checking special remap (s16, 4-channel rearrange) func: 278754 usec (avg: 2787.54, min = 2719, max = 3093, stddev = 78.22). orig: 383885 usec (avg: 3838.85, min = 3634, max = 4121, stddev = 128.522). Checking special remap (float, 4-channel rearrange) func: 312429 usec (avg: 3124.29, min = 3017, max = 3498, stddev = 120.127). orig: 388198 usec (avg: 3881.98, min = 3768, max = 4655, stddev = 138.441). on ARM Cortex-A8 (TI OMAP3 DM3730 @ 1GHz) (Linaro GCC 4.6): Checking special remap (s16, stereo rearrange) func: 1204647 usec (avg: 12046.5, min = 10406, max = 25451, stddev = 2491.9). orig: 1660311 usec (avg: 16603.1, min = 14740, max = 20416, stddev = 1708.07). Checking special remap (float, stereo rearrange) func: 1391392 usec (avg: 13913.9, min = 12207, max = 28260, stddev = 2238.12). orig: 9246707 usec (avg: 92467.1, min = 87525, max = 125611, stddev = 5494.64). Checking special remap (s16, 4-channel rearrange) func: 2540225 usec (avg: 25402.2, min = 16937, max = 68268, stddev = 10786.7). orig: 3319852 usec (avg: 33198.5, min = 29571, max = 36957, stddev = 1250.39). Checking special remap (float, 4-channel rearrange) func: 3024414 usec (avg: 30244.1, min = 26153, max = 58105, stddev = 4506.01). orig: 12643624 usec (avg: 126436, min = 120575, max = 159088, stddev = 5519.28). benchmark code will be posted as follow-up patches Signed-off-by: Peter Meerwald <pmeerw@pmeerw.net>
2014-04-16 15:07:25 +02:00
static void remap_arrange_stereo_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t *src, unsigned n) {
const unsigned n_ic = m->i_ss.channels;
const int8_t *arrange = m->state;
const int8_t ic0 = arrange[0], ic1 = arrange[1];
for (; n > 0; n--) {
*dst++ = (ic0 >= 0) ? *(src + ic0) : 0;
*dst++ = (ic1 >= 0) ? *(src + ic1) : 0;
src += n_ic;
}
}
static void remap_arrange_ch4_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t *src, unsigned n) {
const unsigned n_ic = m->i_ss.channels;
const int8_t *arrange = m->state;
const int8_t ic0 = arrange[0], ic1 = arrange[1],
ic2 = arrange[2], ic3 = arrange[3];
for (; n > 0; n--) {
*dst++ = (ic0 >= 0) ? *(src + ic0) : 0;
*dst++ = (ic1 >= 0) ? *(src + ic1) : 0;
*dst++ = (ic2 >= 0) ? *(src + ic2) : 0;
*dst++ = (ic3 >= 0) ? *(src + ic3) : 0;
src += n_ic;
}
}
static void remap_arrange_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
const unsigned n_ic = m->i_ss.channels;
const int8_t *arrange = m->state;
src += arrange[0];
for (; n > 0; n--) {
*dst++ = *src;
src += n_ic;
}
}
remap: Add special remapping case which just re-arranges channels Input channels may just be copied to output channels, no mixing; this avoids the generic (slow) matrix remapping code in cases where channels are dropped or reordered. This makes use of the remap struct state introduced earlier. on Intel Core i7-870 @ 2.93 GHz (GCC 4.6, 64-bit): Checking special remap (s16, stereo rearrange) func: 126117 usec (avg: 1261.17, min = 1150, max = 2111, stddev = 117.332). orig: 190509 usec (avg: 1905.09, min = 1807, max = 2402, stddev = 100.984). Checking special remap (float, stereo rearrange) func: 194329 usec (avg: 1943.29, min = 1876, max = 2127, stddev = 64.3486). orig: 205263 usec (avg: 2052.63, min = 2005, max = 2452, stddev = 70.177). Checking special remap (s16, 4-channel rearrange) func: 278754 usec (avg: 2787.54, min = 2719, max = 3093, stddev = 78.22). orig: 383885 usec (avg: 3838.85, min = 3634, max = 4121, stddev = 128.522). Checking special remap (float, 4-channel rearrange) func: 312429 usec (avg: 3124.29, min = 3017, max = 3498, stddev = 120.127). orig: 388198 usec (avg: 3881.98, min = 3768, max = 4655, stddev = 138.441). on ARM Cortex-A8 (TI OMAP3 DM3730 @ 1GHz) (Linaro GCC 4.6): Checking special remap (s16, stereo rearrange) func: 1204647 usec (avg: 12046.5, min = 10406, max = 25451, stddev = 2491.9). orig: 1660311 usec (avg: 16603.1, min = 14740, max = 20416, stddev = 1708.07). Checking special remap (float, stereo rearrange) func: 1391392 usec (avg: 13913.9, min = 12207, max = 28260, stddev = 2238.12). orig: 9246707 usec (avg: 92467.1, min = 87525, max = 125611, stddev = 5494.64). Checking special remap (s16, 4-channel rearrange) func: 2540225 usec (avg: 25402.2, min = 16937, max = 68268, stddev = 10786.7). orig: 3319852 usec (avg: 33198.5, min = 29571, max = 36957, stddev = 1250.39). Checking special remap (float, 4-channel rearrange) func: 3024414 usec (avg: 30244.1, min = 26153, max = 58105, stddev = 4506.01). orig: 12643624 usec (avg: 126436, min = 120575, max = 159088, stddev = 5519.28). benchmark code will be posted as follow-up patches Signed-off-by: Peter Meerwald <pmeerw@pmeerw.net>
2014-04-16 15:07:25 +02:00
static void remap_arrange_stereo_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
const unsigned n_ic = m->i_ss.channels;
const int8_t *arrange = m->state;
const int ic0 = arrange[0], ic1 = arrange[1];
for (; n > 0; n--) {
*dst++ = (ic0 >= 0) ? *(src + ic0) : 0.0f;
*dst++ = (ic1 >= 0) ? *(src + ic1) : 0.0f;
src += n_ic;
}
}
static void remap_arrange_ch4_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
const unsigned n_ic = m->i_ss.channels;
const int8_t *arrange = m->state;
const int ic0 = arrange[0], ic1 = arrange[1],
ic2 = arrange[2], ic3 = arrange[3];
for (; n > 0; n--) {
*dst++ = (ic0 >= 0) ? *(src + ic0) : 0.0f;
*dst++ = (ic1 >= 0) ? *(src + ic1) : 0.0f;
*dst++ = (ic2 >= 0) ? *(src + ic2) : 0.0f;
*dst++ = (ic3 >= 0) ? *(src + ic3) : 0.0f;
src += n_ic;
}
}
void pa_set_remap_func(pa_remap_t *m, pa_do_remap_func_t func_s16,
pa_do_remap_func_t func_float) {
pa_assert(m);
if (m->format == PA_SAMPLE_S16NE)
m->do_remap = func_s16;
else if (m->format == PA_SAMPLE_FLOAT32NE)
m->do_remap = func_float;
else
pa_assert_not_reached();
}
static bool force_generic_code = false;
/* set the function that will execute the remapping based on the matrices */
static void init_remap_c(pa_remap_t *m) {
unsigned n_oc, n_ic;
remap: Add special remapping case which just re-arranges channels Input channels may just be copied to output channels, no mixing; this avoids the generic (slow) matrix remapping code in cases where channels are dropped or reordered. This makes use of the remap struct state introduced earlier. on Intel Core i7-870 @ 2.93 GHz (GCC 4.6, 64-bit): Checking special remap (s16, stereo rearrange) func: 126117 usec (avg: 1261.17, min = 1150, max = 2111, stddev = 117.332). orig: 190509 usec (avg: 1905.09, min = 1807, max = 2402, stddev = 100.984). Checking special remap (float, stereo rearrange) func: 194329 usec (avg: 1943.29, min = 1876, max = 2127, stddev = 64.3486). orig: 205263 usec (avg: 2052.63, min = 2005, max = 2452, stddev = 70.177). Checking special remap (s16, 4-channel rearrange) func: 278754 usec (avg: 2787.54, min = 2719, max = 3093, stddev = 78.22). orig: 383885 usec (avg: 3838.85, min = 3634, max = 4121, stddev = 128.522). Checking special remap (float, 4-channel rearrange) func: 312429 usec (avg: 3124.29, min = 3017, max = 3498, stddev = 120.127). orig: 388198 usec (avg: 3881.98, min = 3768, max = 4655, stddev = 138.441). on ARM Cortex-A8 (TI OMAP3 DM3730 @ 1GHz) (Linaro GCC 4.6): Checking special remap (s16, stereo rearrange) func: 1204647 usec (avg: 12046.5, min = 10406, max = 25451, stddev = 2491.9). orig: 1660311 usec (avg: 16603.1, min = 14740, max = 20416, stddev = 1708.07). Checking special remap (float, stereo rearrange) func: 1391392 usec (avg: 13913.9, min = 12207, max = 28260, stddev = 2238.12). orig: 9246707 usec (avg: 92467.1, min = 87525, max = 125611, stddev = 5494.64). Checking special remap (s16, 4-channel rearrange) func: 2540225 usec (avg: 25402.2, min = 16937, max = 68268, stddev = 10786.7). orig: 3319852 usec (avg: 33198.5, min = 29571, max = 36957, stddev = 1250.39). Checking special remap (float, 4-channel rearrange) func: 3024414 usec (avg: 30244.1, min = 26153, max = 58105, stddev = 4506.01). orig: 12643624 usec (avg: 126436, min = 120575, max = 159088, stddev = 5519.28). benchmark code will be posted as follow-up patches Signed-off-by: Peter Meerwald <pmeerw@pmeerw.net>
2014-04-16 15:07:25 +02:00
int8_t arrange[PA_CHANNELS_MAX];
n_oc = m->o_ss.channels;
n_ic = m->i_ss.channels;
/* find some common channel remappings, fall back to full matrix operation. */
if (force_generic_code) {
pa_log_info("Forced to use generic matrix remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_channels_matrix_s16ne_c,
(pa_do_remap_func_t) remap_channels_matrix_float32ne_c);
return;
}
if (n_ic == 1 && n_oc == 2 &&
m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000) {
pa_log_info("Using mono to stereo remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_c,
(pa_do_remap_func_t) remap_mono_to_stereo_float32ne_c);
remap: Add stereo to mono and 4-channel special case remapping The generic matrix remapping is rather inefficient; special-case code improves performance by 3x easily. v4: split into s16 and float code, 4-channel remapping v3: fix remap_mono_to_stereo_c(), use assignment v2: use consistent array addressing on Intel Core i7-870 @ 2.93 GHz (GCC 4.6, 64-bit): Checking special remap (float, mono->stereo) func: 70392 usec (avg: 703.92, min = 583, max = 1879, stddev = 295.192). orig: 193042 usec (avg: 1930.42, min = 1457, max = 2269, stddev = 89.9045). Checking special remap (float, mono->4-channel) func: 118408 usec (avg: 1184.08, min = 1151, max = 1454, stddev = 57.1244). orig: 380074 usec (avg: 3800.74, min = 3740, max = 4180, stddev = 96.3389). Checking special remap (s16, mono->stereo) func: 60574 usec (avg: 605.74, min = 582, max = 659, stddev = 20.7681). orig: 188262 usec (avg: 1882.62, min = 1804, max = 2167, stddev = 79.17). Checking special remap (s16, mono->4-channel) func: 120331 usec (avg: 1203.31, min = 1151, max = 1429, stddev = 55.2863). orig: 376028 usec (avg: 3760.28, min = 3609, max = 4096, stddev = 122.043). Checking special remap (float, stereo->mono) func: 61408 usec (avg: 614.08, min = 580, max = 867, stddev = 50.933). orig: 186484 usec (avg: 1864.84, min = 1808, max = 2121, stddev = 65.3967). Checking special remap (float, 4-channel->mono) func: 118101 usec (avg: 1181.01, min = 1157, max = 1383, stddev = 36.4474). orig: 365191 usec (avg: 3651.91, min = 3540, max = 4083, stddev = 117.509). Checking special remap (s16, stereo->mono) func: 82908 usec (avg: 829.08, min = 795, max = 953, stddev = 33.3409). orig: 182565 usec (avg: 1825.65, min = 1774, max = 2117, stddev = 65.5401). Checking special remap (s16, 4-channel->mono) func: 132025 usec (avg: 1320.25, min = 1284, max = 1509, stddev = 47.0133). orig: 363347 usec (avg: 3633.47, min = 3560, max = 4012, stddev = 111.259). on ARM Cortex-A8 (TI OMAP3 DM3730 @ 1GHz) (Linaro GCC 4.6): Checking special remap (float, mono->stereo) func: 1213562 usec (avg: 12135.6, min = 4669, max = 16266, stddev = 2067.64). orig: 9251927 usec (avg: 92519.3, min = 87372, max = 134216, stddev = 5965.79). Checking special remap (float, mono->4-channel) func: 2479550 usec (avg: 24795.5, min = 7507, max = 29358, stddev = 2690.16). orig: 13186133 usec (avg: 131861, min = 119843, max = 263855, stddev = 27309). Checking special remap (s16, mono->stereo) func: 471894 usec (avg: 4718.94, min = 4058, max = 9583, stddev = 1302.7). orig: 1673826 usec (avg: 16738.3, min = 14679, max = 31342, stddev = 2271.67). Checking special remap (s16, mono->4-channel) func: 869508 usec (avg: 8695.08, min = 7019, max = 19165, stddev = 1866.94). orig: 3317020 usec (avg: 33170.2, min = 29327, max = 47577, stddev = 2029.11). Checking special remap (float, stereo->mono) func: 4405182 usec (avg: 44051.8, min = 41443, max = 77912, stddev = 4160.54). orig: 13245064 usec (avg: 132451, min = 125244, max = 182282, stddev = 8543.93). Checking special remap (float, 4-channel->mono) func: 8607974 usec (avg: 86079.7, min = 81909, max = 116608, stddev = 4311.52). orig: 26326036 usec (avg: 263260, min = 255097, max = 312928, stddev = 10111.5). Checking special remap (s16, stereo->mono) func: 1209135 usec (avg: 12091.4, min = 10742, max = 16632, stddev = 1633.88). orig: 3081515 usec (avg: 30815.2, min = 27008, max = 50537, stddev = 3124.35). Checking special remap (s16, 4-channel->mono) func: 1653868 usec (avg: 16538.7, min = 14648, max = 20721, stddev = 1834.52). orig: 6017854 usec (avg: 60178.5, min = 56061, max = 89569, stddev = 4052.86). benchmark code will be posted as follow-up patches Signed-off-by: Peter Meerwald <pmeerw@pmeerw.net>
2014-04-16 19:02:02 +02:00
} else if (n_ic == 2 && n_oc == 1 &&
m->map_table_i[0][0] == 0x8000 && m->map_table_i[0][1] == 0x8000) {
pa_log_info("Using stereo to mono remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_stereo_to_mono_s16ne_c,
(pa_do_remap_func_t) remap_stereo_to_mono_float32ne_c);
} else if (n_ic == 1 && n_oc == 4 &&
m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000 &&
m->map_table_i[2][0] == 0x10000 && m->map_table_i[3][0] == 0x10000) {
pa_log_info("Using mono to 4-channel remapping");
pa_set_remap_func(m, (pa_do_remap_func_t)remap_mono_to_ch4_s16ne_c,
(pa_do_remap_func_t) remap_mono_to_ch4_float32ne_c);
} else if (n_ic == 4 && n_oc == 1 &&
m->map_table_i[0][0] == 0x4000 && m->map_table_i[0][1] == 0x4000 &&
m->map_table_i[0][2] == 0x4000 && m->map_table_i[0][3] == 0x4000) {
pa_log_info("Using 4-channel to mono remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_ch4_to_mono_s16ne_c,
(pa_do_remap_func_t) remap_ch4_to_mono_float32ne_c);
} else if (pa_setup_remap_arrange(m, arrange) && n_oc == 1) {
pa_log_info("Using mono arrange remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_mono_s16ne_c,
(pa_do_remap_func_t) remap_arrange_mono_float32ne_c);
/* setup state */
m->state = pa_xnewdup(int8_t, arrange, PA_CHANNELS_MAX);
remap: Add special remapping case which just re-arranges channels Input channels may just be copied to output channels, no mixing; this avoids the generic (slow) matrix remapping code in cases where channels are dropped or reordered. This makes use of the remap struct state introduced earlier. on Intel Core i7-870 @ 2.93 GHz (GCC 4.6, 64-bit): Checking special remap (s16, stereo rearrange) func: 126117 usec (avg: 1261.17, min = 1150, max = 2111, stddev = 117.332). orig: 190509 usec (avg: 1905.09, min = 1807, max = 2402, stddev = 100.984). Checking special remap (float, stereo rearrange) func: 194329 usec (avg: 1943.29, min = 1876, max = 2127, stddev = 64.3486). orig: 205263 usec (avg: 2052.63, min = 2005, max = 2452, stddev = 70.177). Checking special remap (s16, 4-channel rearrange) func: 278754 usec (avg: 2787.54, min = 2719, max = 3093, stddev = 78.22). orig: 383885 usec (avg: 3838.85, min = 3634, max = 4121, stddev = 128.522). Checking special remap (float, 4-channel rearrange) func: 312429 usec (avg: 3124.29, min = 3017, max = 3498, stddev = 120.127). orig: 388198 usec (avg: 3881.98, min = 3768, max = 4655, stddev = 138.441). on ARM Cortex-A8 (TI OMAP3 DM3730 @ 1GHz) (Linaro GCC 4.6): Checking special remap (s16, stereo rearrange) func: 1204647 usec (avg: 12046.5, min = 10406, max = 25451, stddev = 2491.9). orig: 1660311 usec (avg: 16603.1, min = 14740, max = 20416, stddev = 1708.07). Checking special remap (float, stereo rearrange) func: 1391392 usec (avg: 13913.9, min = 12207, max = 28260, stddev = 2238.12). orig: 9246707 usec (avg: 92467.1, min = 87525, max = 125611, stddev = 5494.64). Checking special remap (s16, 4-channel rearrange) func: 2540225 usec (avg: 25402.2, min = 16937, max = 68268, stddev = 10786.7). orig: 3319852 usec (avg: 33198.5, min = 29571, max = 36957, stddev = 1250.39). Checking special remap (float, 4-channel rearrange) func: 3024414 usec (avg: 30244.1, min = 26153, max = 58105, stddev = 4506.01). orig: 12643624 usec (avg: 126436, min = 120575, max = 159088, stddev = 5519.28). benchmark code will be posted as follow-up patches Signed-off-by: Peter Meerwald <pmeerw@pmeerw.net>
2014-04-16 15:07:25 +02:00
} else if (pa_setup_remap_arrange(m, arrange) && n_oc == 2) {
pa_log_info("Using stereo arrange remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_stereo_s16ne_c,
(pa_do_remap_func_t) remap_arrange_stereo_float32ne_c);
/* setup state */
m->state = pa_xnewdup(int8_t, arrange, PA_CHANNELS_MAX);
} else if (pa_setup_remap_arrange(m, arrange) && n_oc == 4) {
pa_log_info("Using 4-channel arrange remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_ch4_s16ne_c,
(pa_do_remap_func_t) remap_arrange_ch4_float32ne_c);
/* setup state */
m->state = pa_xnewdup(int8_t, arrange, PA_CHANNELS_MAX);
} else {
remap: Add special remapping case which just re-arranges channels Input channels may just be copied to output channels, no mixing; this avoids the generic (slow) matrix remapping code in cases where channels are dropped or reordered. This makes use of the remap struct state introduced earlier. on Intel Core i7-870 @ 2.93 GHz (GCC 4.6, 64-bit): Checking special remap (s16, stereo rearrange) func: 126117 usec (avg: 1261.17, min = 1150, max = 2111, stddev = 117.332). orig: 190509 usec (avg: 1905.09, min = 1807, max = 2402, stddev = 100.984). Checking special remap (float, stereo rearrange) func: 194329 usec (avg: 1943.29, min = 1876, max = 2127, stddev = 64.3486). orig: 205263 usec (avg: 2052.63, min = 2005, max = 2452, stddev = 70.177). Checking special remap (s16, 4-channel rearrange) func: 278754 usec (avg: 2787.54, min = 2719, max = 3093, stddev = 78.22). orig: 383885 usec (avg: 3838.85, min = 3634, max = 4121, stddev = 128.522). Checking special remap (float, 4-channel rearrange) func: 312429 usec (avg: 3124.29, min = 3017, max = 3498, stddev = 120.127). orig: 388198 usec (avg: 3881.98, min = 3768, max = 4655, stddev = 138.441). on ARM Cortex-A8 (TI OMAP3 DM3730 @ 1GHz) (Linaro GCC 4.6): Checking special remap (s16, stereo rearrange) func: 1204647 usec (avg: 12046.5, min = 10406, max = 25451, stddev = 2491.9). orig: 1660311 usec (avg: 16603.1, min = 14740, max = 20416, stddev = 1708.07). Checking special remap (float, stereo rearrange) func: 1391392 usec (avg: 13913.9, min = 12207, max = 28260, stddev = 2238.12). orig: 9246707 usec (avg: 92467.1, min = 87525, max = 125611, stddev = 5494.64). Checking special remap (s16, 4-channel rearrange) func: 2540225 usec (avg: 25402.2, min = 16937, max = 68268, stddev = 10786.7). orig: 3319852 usec (avg: 33198.5, min = 29571, max = 36957, stddev = 1250.39). Checking special remap (float, 4-channel rearrange) func: 3024414 usec (avg: 30244.1, min = 26153, max = 58105, stddev = 4506.01). orig: 12643624 usec (avg: 126436, min = 120575, max = 159088, stddev = 5519.28). benchmark code will be posted as follow-up patches Signed-off-by: Peter Meerwald <pmeerw@pmeerw.net>
2014-04-16 15:07:25 +02:00
pa_log_info("Using generic matrix remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_channels_matrix_s16ne_c,
(pa_do_remap_func_t) remap_channels_matrix_float32ne_c);
}
}
/* default C implementation */
static pa_init_remap_func_t init_remap_func = init_remap_c;
void pa_init_remap_func(pa_remap_t *m) {
pa_assert(init_remap_func);
m->do_remap = NULL;
/* call the installed remap init function */
init_remap_func(m);
if (m->do_remap == NULL) {
/* nothing was installed, fallback to C version */
init_remap_c(m);
}
}
pa_init_remap_func_t pa_get_init_remap_func(void) {
return init_remap_func;
}
void pa_set_init_remap_func(pa_init_remap_func_t func) {
init_remap_func = func;
}
void pa_remap_func_init(const pa_cpu_info *cpu_info) {
force_generic_code = cpu_info->force_generic_code;
}