channelmix: optimize some more

This commit is contained in:
Wim Taymans 2018-11-21 16:45:31 +01:00
parent c8aa1f38df
commit 6ee3a6003e
3 changed files with 126 additions and 58 deletions

View file

@ -202,6 +202,68 @@ channelmix_f32_5p1_2_sse(void *data, int n_dst, void *dst[n_dst],
}
}
/* FL+FR+FC+LFE+SL+SR -> FL+FR+FC+LFE*/
static void
channelmix_f32_5p1_3p1_sse(void *data, int n_dst, void *dst[n_dst],
int n_src, const void *src[n_src], void *matrix, float v, int n_bytes)
{
int i, n, n_samples = n_bytes / sizeof(float), unrolled, remain;
float **d = (float **) dst;
float **s = (float **) src;
__m128 mix = _mm_set1_ps(v * 0.5f);
__m128 vol = _mm_set1_ps(v);
__m128 avg;
float *dFL = d[0], *dFR = d[1], *dFC = d[2], *dLFE = d[3];
float *sFL = s[0], *sFR = s[1], *sFC = s[2], *sLFE = s[3], *sSL = s[4], *sSR = s[5];
if (v <= VOLUME_MIN) {
for (i = 0; i < n_dst; i++)
memset(d[i], 0, n_bytes);
}
else if (v == VOLUME_NORM) {
unrolled = n_samples / 4;
remain = n_samples & 3;
for(n = 0; unrolled--; n += 4) {
avg = _mm_add_ps(_mm_loadu_ps(&sFL[n]), _mm_loadu_ps(&sSL[n]));
_mm_storeu_ps(&dFL[n], _mm_mul_ps(avg, mix));
avg = _mm_add_ps(_mm_loadu_ps(&sFR[n]), _mm_loadu_ps(&sSR[n]));
_mm_storeu_ps(&dFR[n], _mm_mul_ps(avg, mix));
_mm_storeu_ps(&dFC[n], _mm_loadu_ps(&sFC[n]));
_mm_storeu_ps(&dLFE[n], _mm_loadu_ps(&sLFE[n]));
}
for(; remain--; n++) {
avg = _mm_add_ss(_mm_load_ss(&sFL[n]), _mm_load_ss(&sSL[n]));
_mm_store_ss(&dFL[n], _mm_mul_ss(avg, mix));
avg = _mm_add_ps(_mm_load_ss(&sFR[n]), _mm_load_ss(&sSR[n]));
_mm_store_ss(&dFR[n], _mm_mul_ss(avg, mix));
_mm_store_ss(&dFC[n], _mm_load_ss(&sFC[n]));
_mm_store_ss(&dLFE[n], _mm_load_ss(&sLFE[n]));
}
}
else {
unrolled = n_samples / 4;
remain = n_samples & 3;
for(n = 0; unrolled--; n += 4) {
avg = _mm_add_ps(_mm_loadu_ps(&sFL[n]), _mm_loadu_ps(&sSL[n]));
_mm_storeu_ps(&dFL[n], _mm_mul_ps(avg, mix));
avg = _mm_add_ps(_mm_loadu_ps(&sFR[n]), _mm_loadu_ps(&sSR[n]));
_mm_storeu_ps(&dFR[n], _mm_mul_ps(avg, mix));
_mm_storeu_ps(&dFC[n], _mm_mul_ps(_mm_loadu_ps(&sFC[n]), vol));
_mm_storeu_ps(&dLFE[n], _mm_mul_ps(_mm_loadu_ps(&sLFE[n]), vol));
}
for(; remain--; n++) {
avg = _mm_add_ss(_mm_load_ss(&sFL[n]), _mm_load_ss(&sSL[n]));
_mm_store_ss(&dFL[n], _mm_mul_ss(avg, mix));
avg = _mm_add_ps(_mm_load_ss(&sFR[n]), _mm_load_ss(&sSR[n]));
_mm_store_ss(&dFR[n], _mm_mul_ss(avg, mix));
_mm_store_ss(&dFC[n], _mm_mul_ss(_mm_load_ss(&sFC[n]), vol));
_mm_store_ss(&dLFE[n], _mm_mul_ss(_mm_load_ss(&sLFE[n]), vol));
}
}
}
/* FL+FR+FC+LFE+SL+SR -> FL+FR+RL+RR*/
static void
channelmix_f32_5p1_4_sse(void *data, int n_dst, void *dst[n_dst],

View file

@ -452,6 +452,10 @@ static const struct channelmix_info {
{ 6, MASK_5_1, 4, MASK_QUAD, channelmix_f32_5p1_4_sse, FEATURE_SSE },
#endif
{ 6, MASK_5_1, 4, MASK_QUAD, channelmix_f32_5p1_4, 0 },
#if defined (__SSE2__)
{ 6, MASK_5_1, 4, MASK_3_1, channelmix_f32_5p1_3p1_sse, FEATURE_SSE },
#endif
{ 6, MASK_5_1, 4, MASK_3_1, channelmix_f32_5p1_3p1, 0 },
{ 8, MASK_7_1, 2, MASK_STEREO, channelmix_f32_7p1_2, 0 },

View file

@ -47,7 +47,7 @@
struct impl;
#define DEFAULT_MUTE false
#define DEFAULT_VOLUME 1.0
#define DEFAULT_VOLUME 1.0f
struct props {
float volume;
@ -139,7 +139,9 @@ struct impl {
#define TRR 18
#define NUM_CHAN 19
#define SQRT3_2 1.22474487139158904909 /* sqrt(3/2) */
#define SQRT3_2 1.224744871f /* sqrt(3/2) */
#define SQRT1_2 0.707106781f
#define SQRT2 1.414213562f
#define MATRIX_NORMAL 0
#define MATRIX_DOLBY 1
@ -182,17 +184,17 @@ static int make_matrix(struct impl *this,
uint32_t src_chan, uint64_t src_mask,
uint32_t dst_chan, uint64_t dst_mask)
{
float matrix[NUM_CHAN][NUM_CHAN] = {{ 0 }};
float matrix[NUM_CHAN][NUM_CHAN] = {{ 0.0f }};
uint64_t unassigned;
int i, j, matrix_encoding = MATRIX_NORMAL, c;
float clev = M_SQRT1_2;
float slev = M_SQRT1_2;
float clev = SQRT1_2;
float slev = SQRT1_2;
float llev = 0.5f;
float max = 0.0f;
for (i = 0; i < NUM_CHAN; i++) {
if (src_mask & dst_mask & (1ULL << (i + 2)))
matrix[i][i]= 1.0;
matrix[i][i]= 1.0f;
}
unassigned = src_mask & ~dst_mask;
@ -205,8 +207,8 @@ static int make_matrix(struct impl *this,
matrix[FL][FC] += clev;
matrix[FR][FC] += clev;
} else {
matrix[FL][FC] += M_SQRT1_2;
matrix[FR][FC] += M_SQRT1_2;
matrix[FL][FC] += SQRT1_2;
matrix[FR][FC] += SQRT1_2;
}
} else
return -ENOTSUP;
@ -214,75 +216,75 @@ static int make_matrix(struct impl *this,
if (unassigned & STEREO){
if (dst_mask & _MASK(MONO)) {
matrix[M][FL] += 0.5;
matrix[M][FR] += 0.5;
matrix[M][FL] += 0.5f;
matrix[M][FR] += 0.5f;
}
else if (dst_mask & _MASK(FC)) {
matrix[FC][FL] += M_SQRT1_2;
matrix[FC][FR] += M_SQRT1_2;
matrix[FC][FL] += SQRT1_2;
matrix[FC][FR] += SQRT1_2;
if (src_mask & _MASK(FC))
matrix[FC][FC] = clev * M_SQRT2;
matrix[FC][FC] = clev * SQRT2;
} else
return -ENOTSUP;
}
if (unassigned & _MASK(RC)) {
if (dst_mask & _MASK(RL)){
matrix[RL][RC] += M_SQRT1_2;
matrix[RR][RC] += M_SQRT1_2;
matrix[RL][RC] += SQRT1_2;
matrix[RR][RC] += SQRT1_2;
} else if (dst_mask & _MASK(SL)) {
matrix[SL][RC] += M_SQRT1_2;
matrix[SR][RC] += M_SQRT1_2;
matrix[SL][RC] += SQRT1_2;
matrix[SR][RC] += SQRT1_2;
} else if(dst_mask & _MASK(FL)) {
if (matrix_encoding == MATRIX_DOLBY ||
matrix_encoding == MATRIX_DPLII) {
if (unassigned & (_MASK(RL)|_MASK(RR))) {
matrix[FL][RC] -= slev * M_SQRT1_2;
matrix[FR][RC] += slev * M_SQRT1_2;
matrix[FL][RC] -= slev * SQRT1_2;
matrix[FR][RC] += slev * SQRT1_2;
} else {
matrix[FL][RC] -= slev;
matrix[FR][RC] += slev;
}
} else {
matrix[FL][RC] += slev * M_SQRT1_2;
matrix[FR][RC] += slev * M_SQRT1_2;
matrix[FL][RC] += slev * SQRT1_2;
matrix[FR][RC] += slev * SQRT1_2;
}
} else if (dst_mask & _MASK(FC)) {
matrix[FC][RC] += slev * M_SQRT1_2;
matrix[FC][RC] += slev * SQRT1_2;
} else
return -ENOTSUP;
}
if (unassigned & _MASK(RL)) {
if (dst_mask & _MASK(RC)) {
matrix[RC][RL] += M_SQRT1_2;
matrix[RC][RR] += M_SQRT1_2;
matrix[RC][RL] += SQRT1_2;
matrix[RC][RR] += SQRT1_2;
} else if (dst_mask & _MASK(SL)) {
if (src_mask & _MASK(SL)) {
matrix[SL][RL] += M_SQRT1_2;
matrix[SR][RR] += M_SQRT1_2;
matrix[SL][RL] += SQRT1_2;
matrix[SR][RR] += SQRT1_2;
} else {
matrix[SL][RL] += 1.0;
matrix[SR][RR] += 1.0;
matrix[SL][RL] += 1.0f;
matrix[SR][RR] += 1.0f;
}
} else if (dst_mask & _MASK(FL)) {
if (matrix_encoding == MATRIX_DOLBY) {
matrix[FL][RL] -= slev * M_SQRT1_2;
matrix[FL][RR] -= slev * M_SQRT1_2;
matrix[FR][RL] += slev * M_SQRT1_2;
matrix[FR][RR] += slev * M_SQRT1_2;
matrix[FL][RL] -= slev * SQRT1_2;
matrix[FL][RR] -= slev * SQRT1_2;
matrix[FR][RL] += slev * SQRT1_2;
matrix[FR][RR] += slev * SQRT1_2;
} else if (matrix_encoding == MATRIX_DPLII) {
matrix[FL][RL] -= slev * SQRT3_2;
matrix[FL][RR] -= slev * M_SQRT1_2;
matrix[FR][RL] += slev * M_SQRT1_2;
matrix[FL][RR] -= slev * SQRT1_2;
matrix[FR][RL] += slev * SQRT1_2;
matrix[FR][RR] += slev * SQRT3_2;
} else {
matrix[FL][RL] += slev;
matrix[FR][RR] += slev;
}
} else if (dst_mask & _MASK(FC)) {
matrix[FC][RL]+= slev * M_SQRT1_2;
matrix[FC][RR]+= slev * M_SQRT1_2;
matrix[FC][RL]+= slev * SQRT1_2;
matrix[FC][RR]+= slev * SQRT1_2;
} else
return -ENOTSUP;
}
@ -290,44 +292,44 @@ static int make_matrix(struct impl *this,
if (unassigned & _MASK(SL)) {
if (dst_mask & _MASK(RL)) {
if (src_mask & _MASK(RL)) {
matrix[RL][SL] += M_SQRT1_2;
matrix[RR][SR] += M_SQRT1_2;
matrix[RL][SL] += SQRT1_2;
matrix[RR][SR] += SQRT1_2;
} else {
matrix[RL][SL] += 1.0;
matrix[RR][SR] += 1.0;
matrix[RL][SL] += 1.0f;
matrix[RR][SR] += 1.0f;
}
} else if (dst_mask & _MASK(RC)) {
matrix[RC][SL]+= M_SQRT1_2;
matrix[RC][SR]+= M_SQRT1_2;
matrix[RC][SL]+= SQRT1_2;
matrix[RC][SR]+= SQRT1_2;
} else if (dst_mask & _MASK(FL)) {
if (matrix_encoding == MATRIX_DOLBY) {
matrix[FL][SL] -= slev * M_SQRT1_2;
matrix[FL][SR] -= slev * M_SQRT1_2;
matrix[FR][SL] += slev * M_SQRT1_2;
matrix[FR][SR] += slev * M_SQRT1_2;
matrix[FL][SL] -= slev * SQRT1_2;
matrix[FL][SR] -= slev * SQRT1_2;
matrix[FR][SL] += slev * SQRT1_2;
matrix[FR][SR] += slev * SQRT1_2;
} else if (matrix_encoding == MATRIX_DPLII) {
matrix[FL][SL] -= slev * SQRT3_2;
matrix[FL][SR] -= slev * M_SQRT1_2;
matrix[FR][SL] += slev * M_SQRT1_2;
matrix[FL][SR] -= slev * SQRT1_2;
matrix[FR][SL] += slev * SQRT1_2;
matrix[FR][SR] += slev * SQRT3_2;
} else {
matrix[FL][SL] += slev;
matrix[FR][SR] += slev;
}
} else if (dst_mask & _MASK(FC)) {
matrix[FC][SL] += slev * M_SQRT1_2;
matrix[FC][SR] += slev * M_SQRT1_2;
matrix[FC][SL] += slev * SQRT1_2;
matrix[FC][SR] += slev * SQRT1_2;
} else
return -ENOTSUP;
}
if (unassigned & _MASK(FLC)) {
if (dst_mask & _MASK(FL)) {
matrix[FC][FLC]+= 1.0;
matrix[FC][FRC]+= 1.0;
matrix[FC][FLC]+= 1.0f;
matrix[FC][FRC]+= 1.0f;
} else if(dst_mask & _MASK(FC)) {
matrix[FC][FLC]+= M_SQRT1_2;
matrix[FC][FRC]+= M_SQRT1_2;
matrix[FC][FLC]+= SQRT1_2;
matrix[FC][FRC]+= SQRT1_2;
} else
return -ENOTSUP;
}
@ -335,15 +337,15 @@ static int make_matrix(struct impl *this,
if (dst_mask & _MASK(FC)) {
matrix[FC][LFE] += llev;
} else if (dst_mask & _MASK(FL)) {
matrix[FL][LFE] += llev * M_SQRT1_2;
matrix[FR][LFE] += llev * M_SQRT1_2;
matrix[FL][LFE] += llev * SQRT1_2;
matrix[FR][LFE] += llev * SQRT1_2;
} else
return -ENOTSUP;
}
c = 0;
for (i = 0; i < NUM_CHAN; i++) {
float sum = 0.0;
float sum = 0.0f;
if ((dst_mask & (1UL << (i + 2))) == 0)
continue;
for (j = 0; j < NUM_CHAN; j++) {
@ -1149,7 +1151,7 @@ static int impl_node_process(struct spa_node *node)
this->convert(this, n_dst_datas, dst_datas,
n_src_datas, src_datas,
this->matrix, this->props.mute ? 0.0 : this->props.volume,
this->matrix, this->props.mute ? 0.0f : this->props.volume,
n_bytes);
}