sbc: MMX optimization for scale factors calculation

Improves SBC encoding performance when joint stereo is not used.
Benchmarked on Pentium-M:

== Before: ==

$ time ./sbcenc -b53 -s8 test.au > /dev/null

real    0m1.439s
user    0m1.336s
sys     0m0.104s

samples  %        image name               symbol name
8642     33.7473  sbcenc                   sbc_pack_frame
5873     22.9342  sbcenc                   sbc_analyze_4b_8s_mmx
4435     17.3188  sbcenc                   sbc_calc_scalefactors
4285     16.7331  sbcenc                   sbc_calculate_bits
1942      7.5836  sbcenc                   sbc_enc_process_input_8s_be
322       1.2574  sbcenc                   sbc_encode

== After: ==

$ time ./sbcenc -b53 -s8 test.au > /dev/null

real    0m1.319s
user    0m1.220s
sys     0m0.084s

samples  %        image name               symbol name
8706     37.9959  sbcenc                   sbc_pack_frame
5740     25.0513  sbcenc                   sbc_analyze_4b_8s_mmx
4307     18.7972  sbcenc                   sbc_calculate_bits
1937      8.4537  sbcenc                   sbc_enc_process_input_8s_be
1801      7.8602  sbcenc                   sbc_calc_scalefactors_mmx
307       1.3399  sbcenc                   sbc_encode
This commit is contained in:
Siarhei Siamashka 2011-03-14 15:17:31 -03:00 committed by Luiz Augusto von Dentz
parent c2b2fc1640
commit 1f617ea9ec

View file

@ -275,6 +275,59 @@ static inline void sbc_analyze_4b_8s_mmx(int16_t *x, int32_t *out,
asm volatile ("emms\n"); asm volatile ("emms\n");
} }
static void sbc_calc_scalefactors_mmx(
int32_t sb_sample_f[16][2][8],
uint32_t scale_factor[2][8],
int blocks, int channels, int subbands)
{
static const SBC_ALIGNED int32_t consts[2] = {
1 << SCALE_OUT_BITS,
1 << SCALE_OUT_BITS,
};
int ch, sb;
intptr_t blk;
for (ch = 0; ch < channels; ch++) {
for (sb = 0; sb < subbands; sb += 2) {
blk = (blocks - 1) * (((char *) &sb_sample_f[1][0][0] -
(char *) &sb_sample_f[0][0][0]));
asm volatile (
"movq (%4), %%mm0\n"
"1:\n"
"movq (%1, %0), %%mm1\n"
"pxor %%mm2, %%mm2\n"
"pcmpgtd %%mm2, %%mm1\n"
"paddd (%1, %0), %%mm1\n"
"pcmpgtd %%mm1, %%mm2\n"
"pxor %%mm2, %%mm1\n"
"por %%mm1, %%mm0\n"
"sub %2, %0\n"
"jns 1b\n"
"movd %%mm0, %k0\n"
"psrlq $32, %%mm0\n"
"bsrl %k0, %k0\n"
"subl %5, %k0\n"
"movl %k0, (%3)\n"
"movd %%mm0, %k0\n"
"bsrl %k0, %k0\n"
"subl %5, %k0\n"
"movl %k0, 4(%3)\n"
: "+r" (blk)
: "r" (&sb_sample_f[0][ch][sb]),
"i" ((char *) &sb_sample_f[1][0][0] -
(char *) &sb_sample_f[0][0][0]),
"r" (&scale_factor[ch][sb]),
"r" (&consts),
"i" (SCALE_OUT_BITS)
: "memory");
}
}
asm volatile ("emms\n");
}
static int check_mmx_support(void) static int check_mmx_support(void)
{ {
#ifdef __amd64__ #ifdef __amd64__
@ -313,6 +366,7 @@ void sbc_init_primitives_mmx(struct sbc_encoder_state *state)
if (check_mmx_support()) { if (check_mmx_support()) {
state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_mmx; state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_mmx;
state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_mmx; state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_mmx;
state->sbc_calc_scalefactors = sbc_calc_scalefactors_mmx;
state->implementation_info = "MMX"; state->implementation_info = "MMX";
} }
} }