mirror of
				https://gitlab.freedesktop.org/pipewire/pipewire.git
				synced 2025-11-03 09:01:54 -05:00 
			
		
		
		
	audioconvert: simplify 24 bits handling
Make a new uint42_t and int24_t type and use that to handle 24 bits samples. This makes it easier because we can iterate and copy the structs like other types.
This commit is contained in:
		
							parent
							
								
									e395f62425
								
							
						
					
					
						commit
						817d5bd7a4
					
				
					 6 changed files with 204 additions and 247 deletions
				
			
		| 
						 | 
				
			
			@ -147,7 +147,7 @@ void
 | 
			
		|||
conv_s24_to_f32d_1s_avx2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src,
 | 
			
		||||
		uint32_t n_channels, uint32_t n_samples)
 | 
			
		||||
{
 | 
			
		||||
	const uint8_t *s = src;
 | 
			
		||||
	const int24_t *s = src;
 | 
			
		||||
	float *d0 = dst[0];
 | 
			
		||||
	uint32_t n, unrolled;
 | 
			
		||||
	__m128i in;
 | 
			
		||||
| 
						 | 
				
			
			@ -164,21 +164,21 @@ conv_s24_to_f32d_1s_avx2(void *data, void * SPA_RESTRICT dst[], const void * SPA
 | 
			
		|||
	for(n = 0; n < unrolled; n += 4) {
 | 
			
		||||
		in = _mm_setr_epi32(
 | 
			
		||||
			*((uint32_t*)&s[0 * n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[3 * n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[6 * n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[9 * n_channels]));
 | 
			
		||||
			*((uint32_t*)&s[1 * n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[2 * n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[3 * n_channels]));
 | 
			
		||||
		in = _mm_slli_epi32(in, 8);
 | 
			
		||||
		in = _mm_srai_epi32(in, 8);
 | 
			
		||||
		out = _mm_cvtepi32_ps(in);
 | 
			
		||||
		out = _mm_mul_ps(out, factor);
 | 
			
		||||
		_mm_store_ps(&d0[n], out);
 | 
			
		||||
		s += 12 * n_channels;
 | 
			
		||||
		s += 4 * n_channels;
 | 
			
		||||
	}
 | 
			
		||||
	for(; n < n_samples; n++) {
 | 
			
		||||
		out = _mm_cvtsi32_ss(factor, read_s24(s));
 | 
			
		||||
		out = _mm_cvtsi32_ss(factor, s24_to_s32(*s));
 | 
			
		||||
		out = _mm_mul_ss(out, factor);
 | 
			
		||||
		_mm_store_ss(&d0[n], out);
 | 
			
		||||
		s += 3 * n_channels;
 | 
			
		||||
		s += n_channels;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -186,7 +186,7 @@ static void
 | 
			
		|||
conv_s24_to_f32d_2s_avx2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src,
 | 
			
		||||
		uint32_t n_channels, uint32_t n_samples)
 | 
			
		||||
{
 | 
			
		||||
	const uint8_t *s = src;
 | 
			
		||||
	const int24_t *s = src;
 | 
			
		||||
	float *d0 = dst[0], *d1 = dst[1];
 | 
			
		||||
	uint32_t n, unrolled;
 | 
			
		||||
	__m128i in[2];
 | 
			
		||||
| 
						 | 
				
			
			@ -205,14 +205,14 @@ conv_s24_to_f32d_2s_avx2(void *data, void * SPA_RESTRICT dst[], const void * SPA
 | 
			
		|||
	for(n = 0; n < unrolled; n += 4) {
 | 
			
		||||
		in[0] = _mm_setr_epi32(
 | 
			
		||||
			*((uint32_t*)&s[0 + 0*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[0 + 3*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[0 + 6*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[0 + 9*n_channels]));
 | 
			
		||||
			*((uint32_t*)&s[0 + 1*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[0 + 2*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[0 + 3*n_channels]));
 | 
			
		||||
		in[1] = _mm_setr_epi32(
 | 
			
		||||
			*((uint32_t*)&s[3 + 0*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[3 + 3*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[3 + 6*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[3 + 9*n_channels]));
 | 
			
		||||
			*((uint32_t*)&s[1 + 0*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[1 + 1*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[1 + 2*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[1 + 3*n_channels]));
 | 
			
		||||
 | 
			
		||||
		in[0] = _mm_slli_epi32(in[0], 8);
 | 
			
		||||
		in[1] = _mm_slli_epi32(in[1], 8);
 | 
			
		||||
| 
						 | 
				
			
			@ -229,23 +229,23 @@ conv_s24_to_f32d_2s_avx2(void *data, void * SPA_RESTRICT dst[], const void * SPA
 | 
			
		|||
		_mm_store_ps(&d0[n], out[0]);
 | 
			
		||||
		_mm_store_ps(&d1[n], out[1]);
 | 
			
		||||
 | 
			
		||||
		s += 12 * n_channels;
 | 
			
		||||
		s += 4 * n_channels;
 | 
			
		||||
	}
 | 
			
		||||
	for(; n < n_samples; n++) {
 | 
			
		||||
		out[0] = _mm_cvtsi32_ss(factor, read_s24(s));
 | 
			
		||||
		out[1] = _mm_cvtsi32_ss(factor, read_s24(s+3));
 | 
			
		||||
		out[0] = _mm_cvtsi32_ss(factor, s24_to_s32(*s));
 | 
			
		||||
		out[1] = _mm_cvtsi32_ss(factor, s24_to_s32(*(s+1)));
 | 
			
		||||
		out[0] = _mm_mul_ss(out[0], factor);
 | 
			
		||||
		out[1] = _mm_mul_ss(out[1], factor);
 | 
			
		||||
		_mm_store_ss(&d0[n], out[0]);
 | 
			
		||||
		_mm_store_ss(&d1[n], out[1]);
 | 
			
		||||
		s += 3 * n_channels;
 | 
			
		||||
		s += n_channels;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
static void
 | 
			
		||||
conv_s24_to_f32d_4s_avx2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src,
 | 
			
		||||
		uint32_t n_channels, uint32_t n_samples)
 | 
			
		||||
{
 | 
			
		||||
	const uint8_t *s = src;
 | 
			
		||||
	const int24_t *s = src;
 | 
			
		||||
	float *d0 = dst[0], *d1 = dst[1], *d2 = dst[2], *d3 = dst[3];
 | 
			
		||||
	uint32_t n, unrolled;
 | 
			
		||||
	__m128i in[4];
 | 
			
		||||
| 
						 | 
				
			
			@ -266,24 +266,24 @@ conv_s24_to_f32d_4s_avx2(void *data, void * SPA_RESTRICT dst[], const void * SPA
 | 
			
		|||
	for(n = 0; n < unrolled; n += 4) {
 | 
			
		||||
		in[0] = _mm_setr_epi32(
 | 
			
		||||
			*((uint32_t*)&s[0 + 0*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[0 + 3*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[0 + 6*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[0 + 9*n_channels]));
 | 
			
		||||
			*((uint32_t*)&s[0 + 1*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[0 + 2*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[0 + 3*n_channels]));
 | 
			
		||||
		in[1] = _mm_setr_epi32(
 | 
			
		||||
			*((uint32_t*)&s[3 + 0*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[3 + 3*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[3 + 6*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[3 + 9*n_channels]));
 | 
			
		||||
			*((uint32_t*)&s[1 + 0*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[1 + 1*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[1 + 2*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[1 + 3*n_channels]));
 | 
			
		||||
		in[2] = _mm_setr_epi32(
 | 
			
		||||
			*((uint32_t*)&s[6 + 0*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[6 + 3*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[6 + 6*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[6 + 9*n_channels]));
 | 
			
		||||
			*((uint32_t*)&s[2 + 0*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[2 + 1*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[2 + 2*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[2 + 3*n_channels]));
 | 
			
		||||
		in[3] = _mm_setr_epi32(
 | 
			
		||||
			*((uint32_t*)&s[9 + 0*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[9 + 3*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[9 + 6*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[9 + 9*n_channels]));
 | 
			
		||||
			*((uint32_t*)&s[3 + 0*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[3 + 1*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[3 + 2*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[3 + 3*n_channels]));
 | 
			
		||||
 | 
			
		||||
		in[0] = _mm_slli_epi32(in[0], 8);
 | 
			
		||||
		in[1] = _mm_slli_epi32(in[1], 8);
 | 
			
		||||
| 
						 | 
				
			
			@ -310,13 +310,13 @@ conv_s24_to_f32d_4s_avx2(void *data, void * SPA_RESTRICT dst[], const void * SPA
 | 
			
		|||
		_mm_store_ps(&d2[n], out[2]);
 | 
			
		||||
		_mm_store_ps(&d3[n], out[3]);
 | 
			
		||||
 | 
			
		||||
		s += 12 * n_channels;
 | 
			
		||||
		s += 4 * n_channels;
 | 
			
		||||
	}
 | 
			
		||||
	for(; n < n_samples; n++) {
 | 
			
		||||
		out[0] = _mm_cvtsi32_ss(factor, read_s24(s));
 | 
			
		||||
		out[1] = _mm_cvtsi32_ss(factor, read_s24(s+3));
 | 
			
		||||
		out[2] = _mm_cvtsi32_ss(factor, read_s24(s+6));
 | 
			
		||||
		out[3] = _mm_cvtsi32_ss(factor, read_s24(s+9));
 | 
			
		||||
		out[0] = _mm_cvtsi32_ss(factor, s24_to_s32(*s));
 | 
			
		||||
		out[1] = _mm_cvtsi32_ss(factor, s24_to_s32(*(s+1)));
 | 
			
		||||
		out[2] = _mm_cvtsi32_ss(factor, s24_to_s32(*(s+2)));
 | 
			
		||||
		out[3] = _mm_cvtsi32_ss(factor, s24_to_s32(*(s+3)));
 | 
			
		||||
		out[0] = _mm_mul_ss(out[0], factor);
 | 
			
		||||
		out[1] = _mm_mul_ss(out[1], factor);
 | 
			
		||||
		out[2] = _mm_mul_ss(out[2], factor);
 | 
			
		||||
| 
						 | 
				
			
			@ -325,7 +325,7 @@ conv_s24_to_f32d_4s_avx2(void *data, void * SPA_RESTRICT dst[], const void * SPA
 | 
			
		|||
		_mm_store_ss(&d1[n], out[1]);
 | 
			
		||||
		_mm_store_ss(&d2[n], out[2]);
 | 
			
		||||
		_mm_store_ss(&d3[n], out[3]);
 | 
			
		||||
		s += 3 * n_channels;
 | 
			
		||||
		s += n_channels;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -55,7 +55,7 @@ MAKE_COPY(24);
 | 
			
		|||
MAKE_COPY(32);
 | 
			
		||||
MAKE_COPY(64);
 | 
			
		||||
 | 
			
		||||
#define MAKE_D_TO_D_F(sname,stype,dname,dtype,func) 				\
 | 
			
		||||
#define MAKE_D_TO_D(sname,stype,dname,dtype,func) 				\
 | 
			
		||||
void conv_ ##sname## d_to_ ##dname## d_c(struct convert *conv,			\
 | 
			
		||||
		void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],	\
 | 
			
		||||
                uint32_t n_samples)						\
 | 
			
		||||
| 
						 | 
				
			
			@ -64,15 +64,12 @@ void conv_ ##sname## d_to_ ##dname## d_c(struct convert *conv,			\
 | 
			
		|||
	for (i = 0; i < n_channels; i++) {					\
 | 
			
		||||
		const stype *s = src[i];					\
 | 
			
		||||
		dtype *d = dst[i];						\
 | 
			
		||||
		for (j = 0; j < n_samples; j++) {				\
 | 
			
		||||
			func;							\
 | 
			
		||||
		}								\
 | 
			
		||||
		for (j = 0; j < n_samples; j++)					\
 | 
			
		||||
			d[j] = func (s[j]);			 		\
 | 
			
		||||
	}									\
 | 
			
		||||
}
 | 
			
		||||
#define MAKE_D_TO_D(sname,stype,dname,dtype,func) 				\
 | 
			
		||||
	MAKE_D_TO_D_F(sname,stype,dname,dtype, d[j] = func (s[j])) 		\
 | 
			
		||||
 | 
			
		||||
#define MAKE_I_TO_I_F(sname,stype,dname,dtype,func) 				\
 | 
			
		||||
#define MAKE_I_TO_I(sname,stype,dname,dtype,func) 				\
 | 
			
		||||
void conv_ ##sname## _to_ ##dname## _c(struct convert *conv,			\
 | 
			
		||||
		void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],	\
 | 
			
		||||
                uint32_t n_samples)						\
 | 
			
		||||
| 
						 | 
				
			
			@ -81,14 +78,11 @@ void conv_ ##sname## _to_ ##dname## _c(struct convert *conv,			\
 | 
			
		|||
	const stype *s = src[0];						\
 | 
			
		||||
	dtype *d = dst[0];							\
 | 
			
		||||
	n_samples *= conv->n_channels;						\
 | 
			
		||||
	for (j = 0; j < n_samples; j++) {					\
 | 
			
		||||
		func;								\
 | 
			
		||||
	}									\
 | 
			
		||||
	for (j = 0; j < n_samples; j++)						\
 | 
			
		||||
		d[j] = func (s[j]);				 		\
 | 
			
		||||
}
 | 
			
		||||
#define MAKE_I_TO_I(sname,stype,dname,dtype,func) 				\
 | 
			
		||||
	MAKE_I_TO_I_F(sname,stype,dname,dtype, d[j] = func (s[j])) 		\
 | 
			
		||||
 | 
			
		||||
#define MAKE_I_TO_D_F(sname,stype,dname,dtype,func) 				\
 | 
			
		||||
#define MAKE_I_TO_D(sname,stype,dname,dtype,func) 				\
 | 
			
		||||
void conv_ ##sname## _to_ ##dname## d_c(struct convert *conv,			\
 | 
			
		||||
		void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],	\
 | 
			
		||||
                uint32_t n_samples)						\
 | 
			
		||||
| 
						 | 
				
			
			@ -97,15 +91,12 @@ void conv_ ##sname## _to_ ##dname## d_c(struct convert *conv,			\
 | 
			
		|||
	dtype **d = (dtype**)dst;						\
 | 
			
		||||
	uint32_t i, j, n_channels = conv->n_channels;				\
 | 
			
		||||
	for (j = 0; j < n_samples; j++) {					\
 | 
			
		||||
		for (i = 0; i < n_channels; i++) {				\
 | 
			
		||||
			func;							\
 | 
			
		||||
		}								\
 | 
			
		||||
		for (i = 0; i < n_channels; i++)				\
 | 
			
		||||
			d[i][j] = func (*s++);			 		\
 | 
			
		||||
	}									\
 | 
			
		||||
}
 | 
			
		||||
#define MAKE_I_TO_D(sname,stype,dname,dtype,func) 				\
 | 
			
		||||
	MAKE_I_TO_D_F(sname,stype,dname,dtype, d[i][j] = func (*s++)) 		\
 | 
			
		||||
 | 
			
		||||
#define MAKE_D_TO_I_F(sname,stype,dname,dtype,func) 				\
 | 
			
		||||
#define MAKE_D_TO_I(sname,stype,dname,dtype,func) 				\
 | 
			
		||||
void conv_ ##sname## d_to_ ##dname## _c(struct convert *conv,			\
 | 
			
		||||
		void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],	\
 | 
			
		||||
                uint32_t n_samples)						\
 | 
			
		||||
| 
						 | 
				
			
			@ -114,13 +105,10 @@ void conv_ ##sname## d_to_ ##dname## _c(struct convert *conv,			\
 | 
			
		|||
	dtype *d = dst[0];							\
 | 
			
		||||
	uint32_t i, j, n_channels = conv->n_channels;				\
 | 
			
		||||
	for (j = 0; j < n_samples; j++) {					\
 | 
			
		||||
		for (i = 0; i < n_channels; i++) {				\
 | 
			
		||||
			func;							\
 | 
			
		||||
		}								\
 | 
			
		||||
		for (i = 0; i < n_channels; i++)				\
 | 
			
		||||
			*d++ = func (s[i][j]);			 		\
 | 
			
		||||
	}									\
 | 
			
		||||
}
 | 
			
		||||
#define MAKE_D_TO_I(sname,stype,dname,dtype,func) 				\
 | 
			
		||||
	MAKE_D_TO_I_F(sname,stype,dname,dtype, *d++ = func (s[i][j])) 		\
 | 
			
		||||
 | 
			
		||||
/* to f32 */
 | 
			
		||||
MAKE_D_TO_D(u8, uint8_t, f32, float, U8_TO_F32);
 | 
			
		||||
| 
						 | 
				
			
			@ -154,14 +142,14 @@ MAKE_I_TO_D(s32, int32_t, f32, float, S32_TO_F32);
 | 
			
		|||
MAKE_D_TO_I(s32, int32_t, f32, float, S32_TO_F32);
 | 
			
		||||
MAKE_I_TO_D(s32s, uint32_t, f32, float, S32S_TO_F32);
 | 
			
		||||
 | 
			
		||||
MAKE_I_TO_I_F(u24, uint8_t, f32, float, d[j] = U24_TO_F32(read_u24(s)); s += 3);
 | 
			
		||||
MAKE_I_TO_D_F(u24, uint8_t, f32, float, d[i][j] = U24_TO_F32(read_u24(s)); s += 3);
 | 
			
		||||
MAKE_I_TO_I(u24, uint24_t, f32, float, U24_TO_F32);
 | 
			
		||||
MAKE_I_TO_D(u24, uint24_t, f32, float, U24_TO_F32);
 | 
			
		||||
 | 
			
		||||
MAKE_D_TO_D_F(s24, int8_t, f32, float, d[j] = S24_TO_F32(read_s24(s)); s += 3);
 | 
			
		||||
MAKE_I_TO_I_F(s24, int8_t, f32, float, d[j] = S24_TO_F32(read_s24(s)); s += 3);
 | 
			
		||||
MAKE_I_TO_D_F(s24, int8_t, f32, float, d[i][j] = S24_TO_F32(read_s24(s)); s += 3);
 | 
			
		||||
MAKE_D_TO_I_F(s24, int8_t, f32, float, *d++ = S24_TO_F32(read_s24(&s[i][j*3])));
 | 
			
		||||
MAKE_I_TO_D_F(s24s, int8_t, f32, float, d[i][j] = S24_TO_F32(read_s24s(s)); s += 3);
 | 
			
		||||
MAKE_D_TO_D(s24, int24_t, f32, float, S24_TO_F32);
 | 
			
		||||
MAKE_I_TO_I(s24, int24_t, f32, float, S24_TO_F32);
 | 
			
		||||
MAKE_I_TO_D(s24, int24_t, f32, float, S24_TO_F32);
 | 
			
		||||
MAKE_D_TO_I(s24, int24_t, f32, float, S24_TO_F32);
 | 
			
		||||
MAKE_I_TO_D(s24s, int24_t, f32, float, S24S_TO_F32);
 | 
			
		||||
 | 
			
		||||
MAKE_I_TO_I(u24_32, uint32_t, f32, float, U24_32_TO_F32);
 | 
			
		||||
MAKE_I_TO_D(u24_32, uint32_t, f32, float, U24_32_TO_F32);
 | 
			
		||||
| 
						 | 
				
			
			@ -211,14 +199,14 @@ MAKE_I_TO_D(f32, float, s32, int32_t, F32_TO_S32);
 | 
			
		|||
MAKE_D_TO_I(f32, float, s32, int32_t, F32_TO_S32);
 | 
			
		||||
MAKE_D_TO_I(f32, float, s32s, uint32_t, F32_TO_S32S);
 | 
			
		||||
 | 
			
		||||
MAKE_I_TO_I_F(f32, float, u24, uint8_t, write_u24(d, F32_TO_U24(s[j])); d += 3);
 | 
			
		||||
MAKE_D_TO_I_F(f32, float, u24, uint8_t, write_u24(d, F32_TO_U24(s[i][j])); d += 3);
 | 
			
		||||
MAKE_I_TO_I(f32, float, u24, uint24_t, F32_TO_U24);
 | 
			
		||||
MAKE_D_TO_I(f32, float, u24, uint24_t, F32_TO_U24);
 | 
			
		||||
 | 
			
		||||
MAKE_D_TO_D_F(f32, float, s24, uint8_t, write_s24(d, F32_TO_S24(s[j])); d += 3);
 | 
			
		||||
MAKE_I_TO_I_F(f32, float, s24, uint8_t, write_s24(d, F32_TO_S24(s[j])); d += 3);
 | 
			
		||||
MAKE_I_TO_D_F(f32, float, s24, uint8_t, write_s24(&d[i][j*3], F32_TO_S24(*s++)));
 | 
			
		||||
MAKE_D_TO_I_F(f32, float, s24, uint8_t, write_s24(d, F32_TO_S24(s[i][j])); d += 3);
 | 
			
		||||
MAKE_D_TO_I_F(f32, float, s24s, uint8_t, write_s24s(d, F32_TO_S24(s[i][j])); d += 3);
 | 
			
		||||
MAKE_D_TO_D(f32, float, s24, int24_t, F32_TO_S24);
 | 
			
		||||
MAKE_I_TO_I(f32, float, s24, int24_t, F32_TO_S24);
 | 
			
		||||
MAKE_I_TO_D(f32, float, s24, int24_t, F32_TO_S24);
 | 
			
		||||
MAKE_D_TO_I(f32, float, s24, int24_t, F32_TO_S24);
 | 
			
		||||
MAKE_D_TO_I(f32, float, s24s, int24_t, F32_TO_S24S);
 | 
			
		||||
 | 
			
		||||
MAKE_I_TO_I(f32, float, u24_32, uint32_t, F32_TO_U24_32);
 | 
			
		||||
MAKE_D_TO_I(f32, float, u24_32, uint32_t, F32_TO_U24_32);
 | 
			
		||||
| 
						 | 
				
			
			@ -253,7 +241,7 @@ static inline void update_dither_c(struct convert *conv, uint32_t n_samples)
 | 
			
		|||
		dither[n] = lcnoise(state) * scale;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define MAKE_D_dither_F(dname,dtype,func) 					\
 | 
			
		||||
#define MAKE_D_dither(dname,dtype,func) 					\
 | 
			
		||||
void conv_f32d_to_ ##dname## d_dither_c(struct convert *conv,			\
 | 
			
		||||
		void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],	\
 | 
			
		||||
                uint32_t n_samples)						\
 | 
			
		||||
| 
						 | 
				
			
			@ -266,16 +254,13 @@ void conv_f32d_to_ ##dname## d_dither_c(struct convert *conv,			\
 | 
			
		|||
		dtype *d = dst[i];						\
 | 
			
		||||
		for (j = 0; j < n_samples;) {					\
 | 
			
		||||
			chunk = SPA_MIN(n_samples - j, dither_size);		\
 | 
			
		||||
			for (k = 0; k < chunk; k++, j++) {			\
 | 
			
		||||
				func;						\
 | 
			
		||||
			}							\
 | 
			
		||||
			for (k = 0; k < chunk; k++, j++)			\
 | 
			
		||||
				d[j] = func (s[j], dither[k]);			\
 | 
			
		||||
		}								\
 | 
			
		||||
	}									\
 | 
			
		||||
}
 | 
			
		||||
#define MAKE_D_dither(dname,dtype,func) 					\
 | 
			
		||||
	MAKE_D_dither_F(dname,dtype, d[j] = func (s[j], dither[k]))		\
 | 
			
		||||
 | 
			
		||||
#define MAKE_I_dither_F(dname,dtype,func) 					\
 | 
			
		||||
#define MAKE_I_dither(dname,dtype,func) 					\
 | 
			
		||||
void conv_f32d_to_ ##dname## _dither_c(struct convert *conv,			\
 | 
			
		||||
		void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],	\
 | 
			
		||||
                uint32_t n_samples)						\
 | 
			
		||||
| 
						 | 
				
			
			@ -288,14 +273,11 @@ void conv_f32d_to_ ##dname## _dither_c(struct convert *conv,			\
 | 
			
		|||
	for (j = 0; j < n_samples;) {						\
 | 
			
		||||
		chunk = SPA_MIN(n_samples - j, dither_size);			\
 | 
			
		||||
		for (k = 0; k < chunk; k++, j++) {				\
 | 
			
		||||
			for (i = 0; i < n_channels; i++) {			\
 | 
			
		||||
				func;						\
 | 
			
		||||
			}							\
 | 
			
		||||
			for (i = 0; i < n_channels; i++)			\
 | 
			
		||||
				*d++ = func (s[i][j], dither[k]);		\
 | 
			
		||||
		}								\
 | 
			
		||||
	}									\
 | 
			
		||||
}
 | 
			
		||||
#define MAKE_I_dither(dname,dtype,func) 					\
 | 
			
		||||
	MAKE_I_dither_F(dname,dtype, *d++ = func (s[i][j], dither[k]))		\
 | 
			
		||||
 | 
			
		||||
MAKE_D_dither(u8, uint8_t, F32_TO_U8_D);
 | 
			
		||||
MAKE_I_dither(u8, uint8_t, F32_TO_U8_D);
 | 
			
		||||
| 
						 | 
				
			
			@ -307,9 +289,9 @@ MAKE_I_dither(s16s, uint16_t, F32_TO_S16S_D);
 | 
			
		|||
MAKE_D_dither(s32, int32_t, F32_TO_S32_D);
 | 
			
		||||
MAKE_I_dither(s32, int32_t, F32_TO_S32_D);
 | 
			
		||||
MAKE_I_dither(s32s, uint32_t, F32_TO_S32S_D);
 | 
			
		||||
MAKE_D_dither_F(s24, uint8_t, write_s24(d, F32_TO_S24_D(s[j], dither[k])); d += 3);
 | 
			
		||||
MAKE_I_dither_F(s24, uint8_t, write_s24(d, F32_TO_S24_D(s[i][j], dither[k])); d += 3);
 | 
			
		||||
MAKE_I_dither_F(s24s, uint8_t, write_s24s(d, F32_TO_S24_D(s[i][j], dither[k])); d += 3);
 | 
			
		||||
MAKE_D_dither(s24, int24_t, F32_TO_S24_D);
 | 
			
		||||
MAKE_I_dither(s24, int24_t, F32_TO_S24_D);
 | 
			
		||||
MAKE_I_dither(s24s, int24_t, F32_TO_S24_D);
 | 
			
		||||
MAKE_D_dither(s24_32, int32_t, F32_TO_S24_32_D);
 | 
			
		||||
MAKE_I_dither(s24_32, int32_t, F32_TO_S24_32_D);
 | 
			
		||||
MAKE_I_dither(s24_32s, int32_t, F32_TO_S24_32S_D);
 | 
			
		||||
| 
						 | 
				
			
			@ -335,7 +317,7 @@ MAKE_I_dither(s24_32s, int32_t, F32_TO_S24_32S_D);
 | 
			
		|||
#define F32_TO_S16_SH(s,sh,d)	SHAPER5(int16_t, s, S16_SCALE, 0, sh, S16_MIN, S16_MAX, d)
 | 
			
		||||
#define F32_TO_S16S_SH(s,sh,d)	bswap_16(F32_TO_S16_SH(s,sh,d))
 | 
			
		||||
 | 
			
		||||
#define MAKE_D_shaped_F(dname,dtype,func) 					\
 | 
			
		||||
#define MAKE_D_shaped(dname,dtype,func) 					\
 | 
			
		||||
void conv_f32d_to_ ##dname## d_shaped_c(struct convert *conv,			\
 | 
			
		||||
		void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],	\
 | 
			
		||||
                uint32_t n_samples)						\
 | 
			
		||||
| 
						 | 
				
			
			@ -350,17 +332,14 @@ void conv_f32d_to_ ##dname## d_shaped_c(struct convert *conv,			\
 | 
			
		|||
		uint32_t idx = sh->idx;						\
 | 
			
		||||
		for (j = 0; j < n_samples;) {					\
 | 
			
		||||
			chunk = SPA_MIN(n_samples - j, dither_size);		\
 | 
			
		||||
			for (k = 0; k < chunk; k++, j++) {			\
 | 
			
		||||
				func;						\
 | 
			
		||||
			}							\
 | 
			
		||||
			for (k = 0; k < chunk; k++, j++)			\
 | 
			
		||||
				d[j] = func (s[j], sh, dither[k]);		\
 | 
			
		||||
		}								\
 | 
			
		||||
		sh->idx = idx;							\
 | 
			
		||||
	}									\
 | 
			
		||||
}
 | 
			
		||||
#define MAKE_D_shaped(dname,dtype,func) 					\
 | 
			
		||||
	MAKE_D_shaped_F(dname,dtype, d[j] = func (s[j], sh, dither[k]))		\
 | 
			
		||||
 | 
			
		||||
#define MAKE_I_shaped_F(dname,dtype,func) 					\
 | 
			
		||||
#define MAKE_I_shaped(dname,dtype,func) 					\
 | 
			
		||||
void conv_f32d_to_ ##dname## _shaped_c(struct convert *conv,			\
 | 
			
		||||
		void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],	\
 | 
			
		||||
                uint32_t n_samples)						\
 | 
			
		||||
| 
						 | 
				
			
			@ -376,15 +355,12 @@ void conv_f32d_to_ ##dname## _shaped_c(struct convert *conv,			\
 | 
			
		|||
		uint32_t idx = sh->idx;						\
 | 
			
		||||
		for (j = 0; j < n_samples;) {					\
 | 
			
		||||
			chunk = SPA_MIN(n_samples - j, dither_size);		\
 | 
			
		||||
			for (k = 0; k < chunk; k++, j++) {			\
 | 
			
		||||
				func;						\
 | 
			
		||||
			}							\
 | 
			
		||||
			for (k = 0; k < chunk; k++, j++)			\
 | 
			
		||||
				d[j*n_channels] = func (s[j], sh, dither[k]);	\
 | 
			
		||||
		}								\
 | 
			
		||||
		sh->idx = idx;							\
 | 
			
		||||
	}									\
 | 
			
		||||
}
 | 
			
		||||
#define MAKE_I_shaped(dname,dtype,func) 						\
 | 
			
		||||
	MAKE_I_shaped_F(dname,dtype, d[j*n_channels] = func (s[j], sh, dither[k]))	\
 | 
			
		||||
 | 
			
		||||
MAKE_D_shaped(u8, uint8_t, F32_TO_U8_SH);
 | 
			
		||||
MAKE_I_shaped(u8, uint8_t, F32_TO_U8_SH);
 | 
			
		||||
| 
						 | 
				
			
			@ -395,23 +371,21 @@ MAKE_I_shaped(s16, int16_t, F32_TO_S16_SH);
 | 
			
		|||
MAKE_I_shaped(s16s, uint16_t, F32_TO_S16S_SH);
 | 
			
		||||
 | 
			
		||||
#define MAKE_DEINTERLEAVE(size,type,func)					\
 | 
			
		||||
	MAKE_I_TO_D_F(size,type,size,type,func)
 | 
			
		||||
#define DEINTERLEAVE_COPY	(d[i][j] = *s++)
 | 
			
		||||
	MAKE_I_TO_D(size,type,size,type,func)
 | 
			
		||||
 | 
			
		||||
MAKE_DEINTERLEAVE(8, uint8_t, DEINTERLEAVE_COPY);
 | 
			
		||||
MAKE_DEINTERLEAVE(16, uint16_t, DEINTERLEAVE_COPY);
 | 
			
		||||
MAKE_DEINTERLEAVE(24, uint8_t, write_s24(&d[i][j*3], read_s24(s)); s+=3);
 | 
			
		||||
MAKE_DEINTERLEAVE(32, uint32_t, DEINTERLEAVE_COPY);
 | 
			
		||||
MAKE_DEINTERLEAVE(32s, uint32_t, d[i][j] = bswap_32(*s++));
 | 
			
		||||
MAKE_DEINTERLEAVE(64, uint64_t, DEINTERLEAVE_COPY);
 | 
			
		||||
MAKE_DEINTERLEAVE(8, uint8_t, (uint8_t));
 | 
			
		||||
MAKE_DEINTERLEAVE(16, uint16_t, (uint16_t));
 | 
			
		||||
MAKE_DEINTERLEAVE(24, uint24_t, (uint24_t));
 | 
			
		||||
MAKE_DEINTERLEAVE(32, uint32_t, (uint32_t));
 | 
			
		||||
MAKE_DEINTERLEAVE(32s, uint32_t, bswap_32);
 | 
			
		||||
MAKE_DEINTERLEAVE(64, uint64_t, (uint64_t));
 | 
			
		||||
 | 
			
		||||
#define MAKE_INTERLEAVE(size,type,func)						\
 | 
			
		||||
	MAKE_D_TO_I_F(size,type,size,type,func)
 | 
			
		||||
#define INTERLEAVE_COPY		(*d++ = s[i][j])
 | 
			
		||||
	MAKE_D_TO_I(size,type,size,type,func)
 | 
			
		||||
 | 
			
		||||
MAKE_INTERLEAVE(8, uint8_t, INTERLEAVE_COPY);
 | 
			
		||||
MAKE_INTERLEAVE(16, uint16_t, INTERLEAVE_COPY);
 | 
			
		||||
MAKE_INTERLEAVE(24, uint8_t, write_s24(d, read_s24(&s[i][j*3])); d+=3);
 | 
			
		||||
MAKE_INTERLEAVE(32, uint32_t, INTERLEAVE_COPY);
 | 
			
		||||
MAKE_INTERLEAVE(32s, uint32_t, *d++ = bswap_32(s[i][j]));
 | 
			
		||||
MAKE_INTERLEAVE(64, uint64_t, INTERLEAVE_COPY);
 | 
			
		||||
MAKE_INTERLEAVE(8, uint8_t, (uint8_t));
 | 
			
		||||
MAKE_INTERLEAVE(16, uint16_t, (uint16_t));
 | 
			
		||||
MAKE_INTERLEAVE(24, uint24_t, (uint24_t));
 | 
			
		||||
MAKE_INTERLEAVE(32, uint32_t, (uint32_t));
 | 
			
		||||
MAKE_INTERLEAVE(32s, uint32_t, bswap_32);
 | 
			
		||||
MAKE_INTERLEAVE(64, uint64_t, (uint64_t));
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -132,7 +132,7 @@ void
 | 
			
		|||
conv_s24_to_f32d_1s_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src,
 | 
			
		||||
		uint32_t n_channels, uint32_t n_samples)
 | 
			
		||||
{
 | 
			
		||||
	const uint8_t *s = src;
 | 
			
		||||
	const int24_t *s = src;
 | 
			
		||||
	float *d0 = dst[0];
 | 
			
		||||
	uint32_t n, unrolled;
 | 
			
		||||
	__m128i in;
 | 
			
		||||
| 
						 | 
				
			
			@ -149,21 +149,21 @@ conv_s24_to_f32d_1s_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA
 | 
			
		|||
	for(n = 0; n < unrolled; n += 4) {
 | 
			
		||||
		in = _mm_setr_epi32(
 | 
			
		||||
			*((uint32_t*)&s[0 * n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[3 * n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[6 * n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[9 * n_channels]));
 | 
			
		||||
			*((uint32_t*)&s[1 * n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[2 * n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[3 * n_channels]));
 | 
			
		||||
		in = _mm_slli_epi32(in, 8);
 | 
			
		||||
		in = _mm_srai_epi32(in, 8);
 | 
			
		||||
		out = _mm_cvtepi32_ps(in);
 | 
			
		||||
		out = _mm_mul_ps(out, factor);
 | 
			
		||||
		_mm_store_ps(&d0[n], out);
 | 
			
		||||
		s += 12 * n_channels;
 | 
			
		||||
		s += 4 * n_channels;
 | 
			
		||||
	}
 | 
			
		||||
	for(; n < n_samples; n++) {
 | 
			
		||||
		out = _mm_cvtsi32_ss(factor, read_s24(s));
 | 
			
		||||
		out = _mm_cvtsi32_ss(factor, s24_to_s32(*s));
 | 
			
		||||
		out = _mm_mul_ss(out, factor);
 | 
			
		||||
		_mm_store_ss(&d0[n], out);
 | 
			
		||||
		s += 3 * n_channels;
 | 
			
		||||
		s += n_channels;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -171,7 +171,7 @@ static void
 | 
			
		|||
conv_s24_to_f32d_2s_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src,
 | 
			
		||||
		uint32_t n_channels, uint32_t n_samples)
 | 
			
		||||
{
 | 
			
		||||
	const uint8_t *s = src;
 | 
			
		||||
	const int24_t *s = src;
 | 
			
		||||
	float *d0 = dst[0], *d1 = dst[1];
 | 
			
		||||
	uint32_t n, unrolled;
 | 
			
		||||
	__m128i in[2];
 | 
			
		||||
| 
						 | 
				
			
			@ -190,14 +190,14 @@ conv_s24_to_f32d_2s_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA
 | 
			
		|||
	for(n = 0; n < unrolled; n += 4) {
 | 
			
		||||
		in[0] = _mm_setr_epi32(
 | 
			
		||||
			*((uint32_t*)&s[0 + 0*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[0 + 3*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[0 + 6*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[0 + 9*n_channels]));
 | 
			
		||||
			*((uint32_t*)&s[0 + 1*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[0 + 2*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[0 + 3*n_channels]));
 | 
			
		||||
		in[1] = _mm_setr_epi32(
 | 
			
		||||
			*((uint32_t*)&s[3 + 0*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[3 + 3*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[3 + 6*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[3 + 9*n_channels]));
 | 
			
		||||
			*((uint32_t*)&s[1 + 0*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[1 + 1*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[1 + 2*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[1 + 3*n_channels]));
 | 
			
		||||
 | 
			
		||||
		in[0] = _mm_slli_epi32(in[0], 8);
 | 
			
		||||
		in[1] = _mm_slli_epi32(in[1], 8);
 | 
			
		||||
| 
						 | 
				
			
			@ -214,23 +214,23 @@ conv_s24_to_f32d_2s_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA
 | 
			
		|||
		_mm_store_ps(&d0[n], out[0]);
 | 
			
		||||
		_mm_store_ps(&d1[n], out[1]);
 | 
			
		||||
 | 
			
		||||
		s += 12 * n_channels;
 | 
			
		||||
		s += 4 * n_channels;
 | 
			
		||||
	}
 | 
			
		||||
	for(; n < n_samples; n++) {
 | 
			
		||||
		out[0] = _mm_cvtsi32_ss(factor, read_s24(s));
 | 
			
		||||
		out[1] = _mm_cvtsi32_ss(factor, read_s24(s+3));
 | 
			
		||||
		out[0] = _mm_cvtsi32_ss(factor, s24_to_s32(*s));
 | 
			
		||||
		out[1] = _mm_cvtsi32_ss(factor, s24_to_s32(*(s+1)));
 | 
			
		||||
		out[0] = _mm_mul_ss(out[0], factor);
 | 
			
		||||
		out[1] = _mm_mul_ss(out[1], factor);
 | 
			
		||||
		_mm_store_ss(&d0[n], out[0]);
 | 
			
		||||
		_mm_store_ss(&d1[n], out[1]);
 | 
			
		||||
		s += 3 * n_channels;
 | 
			
		||||
		s += n_channels;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
static void
 | 
			
		||||
conv_s24_to_f32d_4s_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src,
 | 
			
		||||
		uint32_t n_channels, uint32_t n_samples)
 | 
			
		||||
{
 | 
			
		||||
	const uint8_t *s = src;
 | 
			
		||||
	const int24_t *s = src;
 | 
			
		||||
	float *d0 = dst[0], *d1 = dst[1], *d2 = dst[2], *d3 = dst[3];
 | 
			
		||||
	uint32_t n, unrolled;
 | 
			
		||||
	__m128i in[4];
 | 
			
		||||
| 
						 | 
				
			
			@ -251,24 +251,24 @@ conv_s24_to_f32d_4s_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA
 | 
			
		|||
	for(n = 0; n < unrolled; n += 4) {
 | 
			
		||||
		in[0] = _mm_setr_epi32(
 | 
			
		||||
			*((uint32_t*)&s[0 + 0*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[0 + 3*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[0 + 6*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[0 + 9*n_channels]));
 | 
			
		||||
			*((uint32_t*)&s[0 + 1*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[0 + 2*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[0 + 3*n_channels]));
 | 
			
		||||
		in[1] = _mm_setr_epi32(
 | 
			
		||||
			*((uint32_t*)&s[3 + 0*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[3 + 3*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[3 + 6*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[3 + 9*n_channels]));
 | 
			
		||||
			*((uint32_t*)&s[1 + 0*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[1 + 1*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[1 + 2*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[1 + 3*n_channels]));
 | 
			
		||||
		in[2] = _mm_setr_epi32(
 | 
			
		||||
			*((uint32_t*)&s[6 + 0*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[6 + 3*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[6 + 6*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[6 + 9*n_channels]));
 | 
			
		||||
			*((uint32_t*)&s[2 + 0*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[2 + 1*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[2 + 2*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[2 + 3*n_channels]));
 | 
			
		||||
		in[3] = _mm_setr_epi32(
 | 
			
		||||
			*((uint32_t*)&s[9 + 0*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[9 + 3*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[9 + 6*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[9 + 9*n_channels]));
 | 
			
		||||
			*((uint32_t*)&s[3 + 0*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[3 + 1*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[3 + 2*n_channels]),
 | 
			
		||||
			*((uint32_t*)&s[3 + 3*n_channels]));
 | 
			
		||||
 | 
			
		||||
		in[0] = _mm_slli_epi32(in[0], 8);
 | 
			
		||||
		in[1] = _mm_slli_epi32(in[1], 8);
 | 
			
		||||
| 
						 | 
				
			
			@ -295,13 +295,13 @@ conv_s24_to_f32d_4s_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA
 | 
			
		|||
		_mm_store_ps(&d2[n], out[2]);
 | 
			
		||||
		_mm_store_ps(&d3[n], out[3]);
 | 
			
		||||
 | 
			
		||||
		s += 12 * n_channels;
 | 
			
		||||
		s += 4 * n_channels;
 | 
			
		||||
	}
 | 
			
		||||
	for(; n < n_samples; n++) {
 | 
			
		||||
		out[0] = _mm_cvtsi32_ss(factor, read_s24(s));
 | 
			
		||||
		out[1] = _mm_cvtsi32_ss(factor, read_s24(s+3));
 | 
			
		||||
		out[2] = _mm_cvtsi32_ss(factor, read_s24(s+6));
 | 
			
		||||
		out[3] = _mm_cvtsi32_ss(factor, read_s24(s+9));
 | 
			
		||||
		out[0] = _mm_cvtsi32_ss(factor, s24_to_s32(*s));
 | 
			
		||||
		out[1] = _mm_cvtsi32_ss(factor, s24_to_s32(*(s+1)));
 | 
			
		||||
		out[2] = _mm_cvtsi32_ss(factor, s24_to_s32(*(s+2)));
 | 
			
		||||
		out[3] = _mm_cvtsi32_ss(factor, s24_to_s32(*(s+3)));
 | 
			
		||||
		out[0] = _mm_mul_ss(out[0], factor);
 | 
			
		||||
		out[1] = _mm_mul_ss(out[1], factor);
 | 
			
		||||
		out[2] = _mm_mul_ss(out[2], factor);
 | 
			
		||||
| 
						 | 
				
			
			@ -310,7 +310,7 @@ conv_s24_to_f32d_4s_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA
 | 
			
		|||
		_mm_store_ss(&d1[n], out[1]);
 | 
			
		||||
		_mm_store_ss(&d2[n], out[2]);
 | 
			
		||||
		_mm_store_ss(&d3[n], out[3]);
 | 
			
		||||
		s += 3 * n_channels;
 | 
			
		||||
		s += n_channels;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -30,7 +30,7 @@ static void
 | 
			
		|||
conv_s24_to_f32d_1s_sse41(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src,
 | 
			
		||||
		uint32_t n_channels, uint32_t n_samples)
 | 
			
		||||
{
 | 
			
		||||
	const uint8_t *s = src;
 | 
			
		||||
	const int24_t *s = src;
 | 
			
		||||
	float *d0 = dst[0];
 | 
			
		||||
	uint32_t n, unrolled;
 | 
			
		||||
	__m128i in = _mm_setzero_si128();
 | 
			
		||||
| 
						 | 
				
			
			@ -43,21 +43,21 @@ conv_s24_to_f32d_1s_sse41(void *data, void * SPA_RESTRICT dst[], const void * SP
 | 
			
		|||
 | 
			
		||||
	for(n = 0; n < unrolled; n += 4) {
 | 
			
		||||
		in = _mm_insert_epi32(in, *((uint32_t*)&s[0 * n_channels]), 0);
 | 
			
		||||
		in = _mm_insert_epi32(in, *((uint32_t*)&s[3 * n_channels]), 1);
 | 
			
		||||
		in = _mm_insert_epi32(in, *((uint32_t*)&s[6 * n_channels]), 2);
 | 
			
		||||
		in = _mm_insert_epi32(in, *((uint32_t*)&s[9 * n_channels]), 3);
 | 
			
		||||
		in = _mm_insert_epi32(in, *((uint32_t*)&s[1 * n_channels]), 1);
 | 
			
		||||
		in = _mm_insert_epi32(in, *((uint32_t*)&s[2 * n_channels]), 2);
 | 
			
		||||
		in = _mm_insert_epi32(in, *((uint32_t*)&s[3 * n_channels]), 3);
 | 
			
		||||
		in = _mm_slli_epi32(in, 8);
 | 
			
		||||
		in = _mm_srai_epi32(in, 8);
 | 
			
		||||
		out = _mm_cvtepi32_ps(in);
 | 
			
		||||
		out = _mm_mul_ps(out, factor);
 | 
			
		||||
		_mm_store_ps(&d0[n], out);
 | 
			
		||||
		s += 12 * n_channels;
 | 
			
		||||
		s += 4 * n_channels;
 | 
			
		||||
	}
 | 
			
		||||
	for(; n < n_samples; n++) {
 | 
			
		||||
		out = _mm_cvtsi32_ss(factor, read_s24(s));
 | 
			
		||||
		out = _mm_cvtsi32_ss(factor, s24_to_s32(*s));
 | 
			
		||||
		out = _mm_mul_ss(out, factor);
 | 
			
		||||
		_mm_store_ss(&d0[n], out);
 | 
			
		||||
		s += 3 * n_channels;
 | 
			
		||||
		s += n_channels;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -30,7 +30,7 @@ static void
 | 
			
		|||
conv_s24_to_f32d_4s_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src,
 | 
			
		||||
		uint32_t n_channels, uint32_t n_samples)
 | 
			
		||||
{
 | 
			
		||||
	const uint8_t *s = src;
 | 
			
		||||
	const int24_t *s = src;
 | 
			
		||||
	float *d0 = dst[0], *d1 = dst[1], *d2 = dst[2], *d3 = dst[3];
 | 
			
		||||
	uint32_t n, unrolled;
 | 
			
		||||
	__m128i in[4];
 | 
			
		||||
| 
						 | 
				
			
			@ -48,9 +48,9 @@ conv_s24_to_f32d_4s_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SP
 | 
			
		|||
 | 
			
		||||
	for(n = 0; n < unrolled; n += 4) {
 | 
			
		||||
                in[0] = _mm_loadu_si128((__m128i*)(s + 0*n_channels));
 | 
			
		||||
                in[1] = _mm_loadu_si128((__m128i*)(s + 3*n_channels));
 | 
			
		||||
                in[2] = _mm_loadu_si128((__m128i*)(s + 6*n_channels));
 | 
			
		||||
                in[3] = _mm_loadu_si128((__m128i*)(s + 9*n_channels));
 | 
			
		||||
                in[1] = _mm_loadu_si128((__m128i*)(s + 1*n_channels));
 | 
			
		||||
                in[2] = _mm_loadu_si128((__m128i*)(s + 2*n_channels));
 | 
			
		||||
                in[3] = _mm_loadu_si128((__m128i*)(s + 3*n_channels));
 | 
			
		||||
		in[0] = _mm_shuffle_epi8(in[0], mask);
 | 
			
		||||
		in[1] = _mm_shuffle_epi8(in[1], mask);
 | 
			
		||||
		in[2] = _mm_shuffle_epi8(in[2], mask);
 | 
			
		||||
| 
						 | 
				
			
			@ -74,13 +74,13 @@ conv_s24_to_f32d_4s_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SP
 | 
			
		|||
		_mm_store_ps(&d1[n], out[1]);
 | 
			
		||||
		_mm_store_ps(&d2[n], out[2]);
 | 
			
		||||
		_mm_store_ps(&d3[n], out[3]);
 | 
			
		||||
		s += 12 * n_channels;
 | 
			
		||||
		s += 4 * n_channels;
 | 
			
		||||
	}
 | 
			
		||||
	for(; n < n_samples; n++) {
 | 
			
		||||
		out[0] = _mm_cvtsi32_ss(factor, read_s24(s));
 | 
			
		||||
		out[1] = _mm_cvtsi32_ss(factor, read_s24(s+3));
 | 
			
		||||
		out[2] = _mm_cvtsi32_ss(factor, read_s24(s+6));
 | 
			
		||||
		out[3] = _mm_cvtsi32_ss(factor, read_s24(s+9));
 | 
			
		||||
		out[0] = _mm_cvtsi32_ss(factor, s24_to_s32(*s));
 | 
			
		||||
		out[1] = _mm_cvtsi32_ss(factor, s24_to_s32(*(s+1)));
 | 
			
		||||
		out[2] = _mm_cvtsi32_ss(factor, s24_to_s32(*(s+2)));
 | 
			
		||||
		out[3] = _mm_cvtsi32_ss(factor, s24_to_s32(*(s+3)));
 | 
			
		||||
		out[0] = _mm_mul_ss(out[0], factor);
 | 
			
		||||
		out[1] = _mm_mul_ss(out[1], factor);
 | 
			
		||||
		out[2] = _mm_mul_ss(out[2], factor);
 | 
			
		||||
| 
						 | 
				
			
			@ -89,7 +89,7 @@ conv_s24_to_f32d_4s_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SP
 | 
			
		|||
		_mm_store_ss(&d1[n], out[1]);
 | 
			
		||||
		_mm_store_ss(&d2[n], out[2]);
 | 
			
		||||
		_mm_store_ss(&d3[n], out[3]);
 | 
			
		||||
		s += 3 * n_channels;
 | 
			
		||||
		s += n_channels;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -79,17 +79,19 @@
 | 
			
		|||
#define U24_MAX			16777215u
 | 
			
		||||
#define U24_SCALE		8388607.5f
 | 
			
		||||
#define U24_OFFS		8388608.f
 | 
			
		||||
#define U24_TO_F32(v)		((((uint32_t)(v)) * (1.0f / U24_OFFS)) - 1.0)
 | 
			
		||||
#define F32_TO_U24(v)		(uint32_t)SPA_CLAMP((v) * U24_SCALE + U24_OFFS, U24_MIN, U24_MAX)
 | 
			
		||||
#define F32_TO_U24_D(v,d)	(uint32_t)SPA_CLAMP((v) * U24_SCALE + U24_OFFS + (d), U24_MIN, U24_MAX)
 | 
			
		||||
#define U24_TO_F32(v)		((u24_to_u32(v) * (1.0f / U24_OFFS)) - 1.0)
 | 
			
		||||
#define F32_TO_U24(v)		u32_to_u24(SPA_CLAMP((v) * U24_SCALE + U24_OFFS, U24_MIN, U24_MAX))
 | 
			
		||||
#define F32_TO_U24_D(v,d)	u32_to_u24(SPA_CLAMP((v) * U24_SCALE + U24_OFFS + (d), U24_MIN, U24_MAX))
 | 
			
		||||
 | 
			
		||||
#define S24_MIN			-8388607
 | 
			
		||||
#define S24_MAX			8388607
 | 
			
		||||
#define S24_MAX_F		8388607.0f
 | 
			
		||||
#define S24_SCALE		8388607.0f
 | 
			
		||||
#define S24_TO_F32(v)		(((int32_t)(v)) * (1.0f / S24_SCALE))
 | 
			
		||||
#define F32_TO_S24(v)		(int32_t)SPA_CLAMP((v) * S24_SCALE, S24_MIN, S24_MAX)
 | 
			
		||||
#define F32_TO_S24_D(v,d)	(int32_t)SPA_CLAMP((v) * S24_SCALE + (d), S24_MIN, S24_MAX)
 | 
			
		||||
#define S24_TO_F32(v)		(s24_to_s32(v) * (1.0f / S24_SCALE))
 | 
			
		||||
#define S24S_TO_F32(v)		(s24_to_s32(bswap_s24(v)) * (1.0f / S24_SCALE))
 | 
			
		||||
#define F32_TO_S24(v)		s32_to_s24(SPA_CLAMP((v) * S24_SCALE, S24_MIN, S24_MAX))
 | 
			
		||||
#define F32_TO_S24S(v)		bswap_s24(F32_TO_S24(v))
 | 
			
		||||
#define F32_TO_S24_D(v,d)	s32_to_s24(SPA_CLAMP((v) * S24_SCALE + (d), S24_MIN, S24_MAX))
 | 
			
		||||
 | 
			
		||||
#define U32_MIN			0u
 | 
			
		||||
#define U32_MAX			4294967040u
 | 
			
		||||
| 
						 | 
				
			
			@ -112,88 +114,69 @@
 | 
			
		|||
 | 
			
		||||
#define U24_32_TO_F32(v)	U32_TO_F32((v)<<8)
 | 
			
		||||
#define U24_32S_TO_F32(v)	U32_TO_F32(((int32_t)bswap_32(v))<<8)
 | 
			
		||||
#define F32_TO_U24_32(v)	F32_TO_U24(v)
 | 
			
		||||
#define F32_TO_U24_32S(v)	bswap_32(F32_TO_U24(v))
 | 
			
		||||
#define F32_TO_U24_32_D(v,d)	F32_TO_U24_D(v,d)
 | 
			
		||||
#define F32_TO_U24_32S_D(v,d)	bswap_32(F32_TO_U24_D(v,d))
 | 
			
		||||
#define F32_TO_U24_32(v)	(uint32_t)SPA_CLAMP((v) * U24_SCALE + U24_OFFS, U24_MIN, U24_MAX)
 | 
			
		||||
#define F32_TO_U24_32S(v)	bswap_32(F32_TO_U24_32(v))
 | 
			
		||||
#define F32_TO_U24_32_D(v,d)	(uint32_t)SPA_CLAMP((v) * U24_SCALE + U24_OFFS + (d), U24_MIN, U24_MAX)
 | 
			
		||||
#define F32_TO_U24_32S_D(v,d)	bswap_32(F32_TO_U24_32_D(v,d))
 | 
			
		||||
 | 
			
		||||
#define S24_32_TO_F32(v)	S32_TO_F32((v)<<8)
 | 
			
		||||
#define S24_32S_TO_F32(v)	S32_TO_F32(((int32_t)bswap_32(v))<<8)
 | 
			
		||||
#define F32_TO_S24_32(v)	F32_TO_S24(v)
 | 
			
		||||
#define F32_TO_S24_32S(v)	bswap_32(F32_TO_S24(v))
 | 
			
		||||
#define F32_TO_S24_32_D(v,d)	F32_TO_S24_D(v,d)
 | 
			
		||||
#define F32_TO_S24_32S_D(v,d)	bswap_32(F32_TO_S24_D(v,d))
 | 
			
		||||
#define F32_TO_S24_32(v)	(int32_t)SPA_CLAMP((v) * S24_SCALE, S24_MIN, S24_MAX)
 | 
			
		||||
#define F32_TO_S24_32S(v)	bswap_32(F32_TO_S24_32(v))
 | 
			
		||||
#define F32_TO_S24_32_D(v,d)	(int32_t)SPA_CLAMP((v) * S24_SCALE + (d), S24_MIN, S24_MAX)
 | 
			
		||||
#define F32_TO_S24_32S_D(v,d)	bswap_32(F32_TO_S24_32_D(v,d))
 | 
			
		||||
 | 
			
		||||
static inline uint32_t read_u24(const void *src)
 | 
			
		||||
{
 | 
			
		||||
	const uint8_t *s = src;
 | 
			
		||||
typedef struct {
 | 
			
		||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
 | 
			
		||||
	return (((uint32_t)s[2] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[0]);
 | 
			
		||||
	uint8_t v3;
 | 
			
		||||
	uint8_t v2;
 | 
			
		||||
	uint8_t v1;
 | 
			
		||||
#else
 | 
			
		||||
	return (((uint32_t)s[0] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[2]);
 | 
			
		||||
	uint8_t v1;
 | 
			
		||||
	uint8_t v2;
 | 
			
		||||
	uint8_t v3;
 | 
			
		||||
#endif
 | 
			
		||||
} __attribute__ ((packed)) uint24_t;
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
 | 
			
		||||
	uint8_t v3;
 | 
			
		||||
	uint8_t v2;
 | 
			
		||||
	int8_t v1;
 | 
			
		||||
#else
 | 
			
		||||
	int8_t v1;
 | 
			
		||||
	uint8_t v2;
 | 
			
		||||
	uint8_t v3;
 | 
			
		||||
#endif
 | 
			
		||||
} __attribute__ ((packed)) int24_t;
 | 
			
		||||
 | 
			
		||||
static inline uint32_t u24_to_u32(uint24_t src)
 | 
			
		||||
{
 | 
			
		||||
	return ((uint32_t)src.v1 << 16) | ((uint32_t)src.v2 << 8) | (uint32_t)src.v3;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline int32_t read_s24(const void *src)
 | 
			
		||||
static inline uint24_t u32_to_u24(uint32_t src)
 | 
			
		||||
{
 | 
			
		||||
	const int8_t *s = src;
 | 
			
		||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
 | 
			
		||||
	return (((int32_t)s[2] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[0]);
 | 
			
		||||
#else
 | 
			
		||||
	return (((int32_t)s[0] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[2]);
 | 
			
		||||
#endif
 | 
			
		||||
	return (uint24_t) { src >> 16, src >> 8, src };
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline int32_t read_s24s(const void *src)
 | 
			
		||||
static inline int32_t s24_to_s32(int24_t src)
 | 
			
		||||
{
 | 
			
		||||
	const int8_t *s = src;
 | 
			
		||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
 | 
			
		||||
	return (((int32_t)s[0] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[2]);
 | 
			
		||||
#else
 | 
			
		||||
	return (((int32_t)s[2] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[0]);
 | 
			
		||||
#endif
 | 
			
		||||
	return ((int32_t)src.v1 << 16) | ((uint32_t)src.v2 << 8) | (uint32_t)src.v3;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void write_u24(void *dst, uint32_t val)
 | 
			
		||||
static inline int24_t s32_to_s24(int32_t src)
 | 
			
		||||
{
 | 
			
		||||
	uint8_t *d = dst;
 | 
			
		||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
 | 
			
		||||
	d[0] = (uint8_t) (val);
 | 
			
		||||
	d[1] = (uint8_t) (val >> 8);
 | 
			
		||||
	d[2] = (uint8_t) (val >> 16);
 | 
			
		||||
#else
 | 
			
		||||
	d[0] = (uint8_t) (val >> 16);
 | 
			
		||||
	d[1] = (uint8_t) (val >> 8);
 | 
			
		||||
	d[2] = (uint8_t) (val);
 | 
			
		||||
#endif
 | 
			
		||||
	return (int24_t) { src >> 16, src >> 8, src };
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void write_s24(void *dst, int32_t val)
 | 
			
		||||
static inline uint24_t bswap_u24(uint24_t src)
 | 
			
		||||
{
 | 
			
		||||
	uint8_t *d = dst;
 | 
			
		||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
 | 
			
		||||
	d[0] = (uint8_t) (val);
 | 
			
		||||
	d[1] = (uint8_t) (val >> 8);
 | 
			
		||||
	d[2] = (uint8_t) (val >> 16);
 | 
			
		||||
#else
 | 
			
		||||
	d[0] = (uint8_t) (val >> 16);
 | 
			
		||||
	d[1] = (uint8_t) (val >> 8);
 | 
			
		||||
	d[2] = (uint8_t) (val);
 | 
			
		||||
#endif
 | 
			
		||||
	return (uint24_t) { src.v3, src.v2, src.v1 };
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void write_s24s(void *dst, int32_t val)
 | 
			
		||||
static inline int24_t bswap_s24(int24_t src)
 | 
			
		||||
{
 | 
			
		||||
	uint8_t *d = dst;
 | 
			
		||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
 | 
			
		||||
	d[0] = (uint8_t) (val >> 16);
 | 
			
		||||
	d[1] = (uint8_t) (val >> 8);
 | 
			
		||||
	d[2] = (uint8_t) (val);
 | 
			
		||||
#else
 | 
			
		||||
	d[0] = (uint8_t) (val);
 | 
			
		||||
	d[1] = (uint8_t) (val >> 8);
 | 
			
		||||
	d[2] = (uint8_t) (val >> 16);
 | 
			
		||||
#endif
 | 
			
		||||
	return (int24_t) { src.v3, src.v2, src.v1 };
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define NS_MAX	8
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue