mirror of
https://gitlab.freedesktop.org/pulseaudio/pulseaudio.git
synced 2025-11-02 09:01:46 -05:00
sbc: MMX optimization for scale factors calculation
Improves SBC encoding performance when joint stereo is not used. Benchmarked on Pentium-M: == Before: == $ time ./sbcenc -b53 -s8 test.au > /dev/null real 0m1.439s user 0m1.336s sys 0m0.104s samples % image name symbol name 8642 33.7473 sbcenc sbc_pack_frame 5873 22.9342 sbcenc sbc_analyze_4b_8s_mmx 4435 17.3188 sbcenc sbc_calc_scalefactors 4285 16.7331 sbcenc sbc_calculate_bits 1942 7.5836 sbcenc sbc_enc_process_input_8s_be 322 1.2574 sbcenc sbc_encode == After: == $ time ./sbcenc -b53 -s8 test.au > /dev/null real 0m1.319s user 0m1.220s sys 0m0.084s samples % image name symbol name 8706 37.9959 sbcenc sbc_pack_frame 5740 25.0513 sbcenc sbc_analyze_4b_8s_mmx 4307 18.7972 sbcenc sbc_calculate_bits 1937 8.4537 sbcenc sbc_enc_process_input_8s_be 1801 7.8602 sbcenc sbc_calc_scalefactors_mmx 307 1.3399 sbcenc sbc_encode
This commit is contained in:
parent
c2b2fc1640
commit
1f617ea9ec
1 changed files with 54 additions and 0 deletions
|
|
@ -275,6 +275,59 @@ static inline void sbc_analyze_4b_8s_mmx(int16_t *x, int32_t *out,
|
||||||
asm volatile ("emms\n");
|
asm volatile ("emms\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void sbc_calc_scalefactors_mmx(
|
||||||
|
int32_t sb_sample_f[16][2][8],
|
||||||
|
uint32_t scale_factor[2][8],
|
||||||
|
int blocks, int channels, int subbands)
|
||||||
|
{
|
||||||
|
static const SBC_ALIGNED int32_t consts[2] = {
|
||||||
|
1 << SCALE_OUT_BITS,
|
||||||
|
1 << SCALE_OUT_BITS,
|
||||||
|
};
|
||||||
|
int ch, sb;
|
||||||
|
intptr_t blk;
|
||||||
|
for (ch = 0; ch < channels; ch++) {
|
||||||
|
for (sb = 0; sb < subbands; sb += 2) {
|
||||||
|
blk = (blocks - 1) * (((char *) &sb_sample_f[1][0][0] -
|
||||||
|
(char *) &sb_sample_f[0][0][0]));
|
||||||
|
asm volatile (
|
||||||
|
"movq (%4), %%mm0\n"
|
||||||
|
"1:\n"
|
||||||
|
"movq (%1, %0), %%mm1\n"
|
||||||
|
"pxor %%mm2, %%mm2\n"
|
||||||
|
"pcmpgtd %%mm2, %%mm1\n"
|
||||||
|
"paddd (%1, %0), %%mm1\n"
|
||||||
|
"pcmpgtd %%mm1, %%mm2\n"
|
||||||
|
"pxor %%mm2, %%mm1\n"
|
||||||
|
|
||||||
|
"por %%mm1, %%mm0\n"
|
||||||
|
|
||||||
|
"sub %2, %0\n"
|
||||||
|
"jns 1b\n"
|
||||||
|
|
||||||
|
"movd %%mm0, %k0\n"
|
||||||
|
"psrlq $32, %%mm0\n"
|
||||||
|
"bsrl %k0, %k0\n"
|
||||||
|
"subl %5, %k0\n"
|
||||||
|
"movl %k0, (%3)\n"
|
||||||
|
|
||||||
|
"movd %%mm0, %k0\n"
|
||||||
|
"bsrl %k0, %k0\n"
|
||||||
|
"subl %5, %k0\n"
|
||||||
|
"movl %k0, 4(%3)\n"
|
||||||
|
: "+r" (blk)
|
||||||
|
: "r" (&sb_sample_f[0][ch][sb]),
|
||||||
|
"i" ((char *) &sb_sample_f[1][0][0] -
|
||||||
|
(char *) &sb_sample_f[0][0][0]),
|
||||||
|
"r" (&scale_factor[ch][sb]),
|
||||||
|
"r" (&consts),
|
||||||
|
"i" (SCALE_OUT_BITS)
|
||||||
|
: "memory");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
asm volatile ("emms\n");
|
||||||
|
}
|
||||||
|
|
||||||
static int check_mmx_support(void)
|
static int check_mmx_support(void)
|
||||||
{
|
{
|
||||||
#ifdef __amd64__
|
#ifdef __amd64__
|
||||||
|
|
@ -313,6 +366,7 @@ void sbc_init_primitives_mmx(struct sbc_encoder_state *state)
|
||||||
if (check_mmx_support()) {
|
if (check_mmx_support()) {
|
||||||
state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_mmx;
|
state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_mmx;
|
||||||
state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_mmx;
|
state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_mmx;
|
||||||
|
state->sbc_calc_scalefactors = sbc_calc_scalefactors_mmx;
|
||||||
state->implementation_info = "MMX";
|
state->implementation_info = "MMX";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue