volume: improved comments

This commit is contained in:
Wim Taymans 2009-08-14 15:41:32 +02:00
parent a1235446a7
commit f24c24c14b
2 changed files with 30 additions and 27 deletions

View file

@ -196,7 +196,7 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
" movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
" movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
VOLUME_32x16 (%%mm1, %%mm0)
" movd %%mm0, (%0) \n\t" /* | p1*v1 | p0*v0 | */
" movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
" add $4, %0 \n\t"
MOD_ADD ($2, %5)
@ -212,8 +212,8 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
" movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */
VOLUME_32x16 (%%mm1, %%mm0)
VOLUME_32x16 (%%mm3, %%mm2)
" movd %%mm0, (%0) \n\t" /* | p1*v1 | p0*v0 | */
" movd %%mm2, 4(%0) \n\t" /* | p3*v3 | p2*v2 | */
" movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
" movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */
" add $8, %0 \n\t"
MOD_ADD ($4, %5)
" dec %2 \n\t"
@ -270,7 +270,7 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
SWAP_16 (%%mm1)
VOLUME_32x16 (%%mm1, %%mm0)
SWAP_16 (%%mm0)
" movd %%mm0, (%0) \n\t" /* | p1*v1 | p0*v0 | */
" movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
" add $4, %0 \n\t"
MOD_ADD ($2, %5)
@ -288,8 +288,8 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
VOLUME_32x16 (%%mm1, %%mm0)
VOLUME_32x16 (%%mm3, %%mm2)
SWAP_16_2 (%%mm0, %%mm2)
" movd %%mm0, (%0) \n\t" /* | p1*v1 | p0*v0 | */
" movd %%mm2, 4(%0) \n\t" /* | p3*v3 | p2*v2 | */
" movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
" movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */
" add $8, %0 \n\t"
MOD_ADD ($4, %5)
" dec %2 \n\t"

View file

@ -231,12 +231,12 @@ pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
" test $1, %2 \n\t" /* check for odd samples */
" je 2f \n\t"
" movd (%1, %3, 4), %%xmm0 \n\t" /* do odd sample */
" movw (%0), %4 \n\t"
" movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */
" movw (%0), %4 \n\t" /* .. | p0 | */
" rorw $8, %4 \n\t"
" movd %4, %%xmm1 \n\t"
VOLUME_32x16 (%%xmm1, %%xmm0)
" movd %%xmm0, %4 \n\t"
" movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */
" rorw $8, %4 \n\t"
" movw %4, (%0) \n\t"
" add $2, %0 \n\t"
@ -244,31 +244,34 @@ pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
"2: \n\t"
" sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
" test $1, %2 \n\t" /* check for odd samples */
" test $1, %2 \n\t"
" je 4f \n\t"
"3: \n\t" /* do samples in pairs of 2 */
" movq (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */
" movd (%0), %%xmm1 \n\t" /* X | X | p1 | p0 */
"3: \n\t" /* do samples in groups of 2 */
" movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */
" movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */
SWAP_16 (%%xmm1)
VOLUME_32x16 (%%xmm1, %%xmm0)
SWAP_16 (%%xmm0)
" movd %%xmm0, (%0) \n\t"
" movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
" add $4, %0 \n\t"
MOD_ADD ($2, %5)
"4: \n\t"
" sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
" test $1, %2 \n\t" /* check for odd samples */
" test $1, %2 \n\t"
" je 6f \n\t"
"5: \n\t" /* do samples in pairs of 4 */
" movdqu (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */
" movq (%0), %%xmm1 \n\t" /* X | X | p1 | p0 */
/* FIXME, we can do aligned access of the volume values if we can guarantee
* that the array is 16 bytes aligned, we probably have to do the odd values
* after this then. */
"5: \n\t" /* do samples in groups of 4 */
" movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
" movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
SWAP_16 (%%xmm1)
VOLUME_32x16 (%%xmm1, %%xmm0)
SWAP_16 (%%xmm0)
" movq %%xmm0, (%0) \n\t"
" movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
" add $8, %0 \n\t"
MOD_ADD ($4, %5)
@ -277,17 +280,17 @@ pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
" cmp $0, %2 \n\t"
" je 8f \n\t"
"7: \n\t" /* do samples in pairs of 8 */
" movdqu (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */
" movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* v3_h | v3_l | v2_h | v2_l */
" movq (%0), %%xmm1 \n\t" /* X | X | p1 | p0 */
" movq 8(%0), %%xmm3 \n\t" /* X | X | p3 | p2 */
"7: \n\t" /* do samples in groups of 8 */
" movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
" movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */
" movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
" movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */
SWAP_16_2 (%%xmm1, %%xmm3)
VOLUME_32x16 (%%xmm1, %%xmm0)
VOLUME_32x16 (%%xmm3, %%xmm2)
SWAP_16_2 (%%xmm0, %%xmm2)
" movq %%xmm0, (%0) \n\t"
" movq %%xmm2, 8(%0) \n\t"
" movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
" movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */
" add $16, %0 \n\t"
MOD_ADD ($8, %5)
" dec %2 \n\t"
@ -458,7 +461,7 @@ pa_volume_s24_32re_sse (uint32_t *samples, int32_t *volumes, unsigned channels,
}
#endif
#define RUN_TEST
#undef RUN_TEST
#ifdef RUN_TEST
#define CHANNELS 2