mirror of
				https://gitlab.freedesktop.org/pipewire/pipewire.git
				synced 2025-10-29 05:40:27 -04:00 
			
		
		
		
	filter-chain: add avx mix function
This commit is contained in:
		
							parent
							
								
									8e8b661340
								
							
						
					
					
						commit
						f810c7c15f
					
				
					 3 changed files with 62 additions and 1 deletions
				
			
		|  | @ -8,10 +8,70 @@ | |||
| 
 | ||||
| #include <spa/utils/defs.h> | ||||
| 
 | ||||
| #include "config.h" | ||||
| #include "dsp-ops.h" | ||||
| 
 | ||||
| #include <immintrin.h> | ||||
| 
 | ||||
| void dsp_mix_gain_avx(struct dsp_ops *ops, | ||||
| 		void * SPA_RESTRICT dst, | ||||
| 		const void * SPA_RESTRICT src[], | ||||
| 		float gain[], uint32_t n_src, uint32_t n_samples) | ||||
| { | ||||
| 	if (n_src == 0) { | ||||
| 		memset(dst, 0, n_samples * sizeof(float)); | ||||
| 	} else if (n_src == 1 && gain[0] == 1.0f) { | ||||
| 		if (dst != src[0]) | ||||
| 			spa_memcpy(dst, src[0], n_samples * sizeof(float)); | ||||
| 	} else { | ||||
| 		uint32_t n, i, unrolled; | ||||
| 		__m256 in[4], g; | ||||
| 		const float **s = (const float **)src; | ||||
| 		float *d = dst; | ||||
| 
 | ||||
| 		if (SPA_LIKELY(SPA_IS_ALIGNED(dst, 32))) { | ||||
| 			unrolled = n_samples & ~31; | ||||
| 			for (i = 0; i < n_src; i++) { | ||||
| 				if (SPA_UNLIKELY(!SPA_IS_ALIGNED(src[i], 32))) { | ||||
| 					unrolled = 0; | ||||
| 					break; | ||||
| 				} | ||||
| 			} | ||||
| 		} else | ||||
| 			unrolled = 0; | ||||
| 
 | ||||
| 		for (n = 0; n < unrolled; n += 32) { | ||||
| 			g = _mm256_set1_ps(gain[0]); | ||||
| 			in[0] = _mm256_mul_ps(g, _mm256_load_ps(&s[0][n+ 0])); | ||||
| 			in[1] = _mm256_mul_ps(g, _mm256_load_ps(&s[0][n+ 8])); | ||||
| 			in[2] = _mm256_mul_ps(g, _mm256_load_ps(&s[0][n+16])); | ||||
| 			in[3] = _mm256_mul_ps(g, _mm256_load_ps(&s[0][n+24])); | ||||
| 
 | ||||
| 			for (i = 1; i < n_src; i++) { | ||||
| 				g = _mm256_set1_ps(gain[i]); | ||||
| 				in[0] = _mm256_add_ps(in[0], _mm256_mul_ps(g, _mm256_load_ps(&s[i][n+ 0]))); | ||||
| 				in[1] = _mm256_add_ps(in[1], _mm256_mul_ps(g, _mm256_load_ps(&s[i][n+ 8]))); | ||||
| 				in[2] = _mm256_add_ps(in[2], _mm256_mul_ps(g, _mm256_load_ps(&s[i][n+16]))); | ||||
| 				in[3] = _mm256_add_ps(in[3], _mm256_mul_ps(g, _mm256_load_ps(&s[i][n+24]))); | ||||
| 			} | ||||
| 			_mm256_store_ps(&d[n+ 0], in[0]); | ||||
| 			_mm256_store_ps(&d[n+ 8], in[1]); | ||||
| 			_mm256_store_ps(&d[n+16], in[2]); | ||||
| 			_mm256_store_ps(&d[n+24], in[3]); | ||||
| 		} | ||||
| 		for (; n < n_samples; n++) { | ||||
| 			__m128 in[1], g; | ||||
| 			g = _mm_set_ss(gain[0]); | ||||
| 			in[0] = _mm_mul_ss(g, _mm_load_ss(&s[0][n])); | ||||
| 			for (i = 1; i < n_src; i++) { | ||||
| 				g = _mm_set_ss(gain[i]); | ||||
| 				in[0] = _mm_add_ss(in[0], _mm_mul_ss(g, _mm_load_ss(&s[i][n]))); | ||||
| 			} | ||||
| 			_mm_store_ss(&d[n], in[0]); | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| void dsp_sum_avx(struct dsp_ops *ops, float *r, const float *a, const float *b, uint32_t n_samples) | ||||
| { | ||||
| 	uint32_t n, unrolled; | ||||
|  |  | |||
|  | @ -25,7 +25,7 @@ static struct dsp_info dsp_table[] = | |||
| 	{ SPA_CPU_FLAG_AVX, | ||||
| 		.funcs.clear = dsp_clear_c, | ||||
| 		.funcs.copy = dsp_copy_c, | ||||
| 		.funcs.mix_gain = dsp_mix_gain_sse, | ||||
| 		.funcs.mix_gain = dsp_mix_gain_avx, | ||||
| 		.funcs.biquad_run = dsp_biquad_run_sse, | ||||
| 		.funcs.sum = dsp_sum_avx, | ||||
| 		.funcs.linear = dsp_linear_c, | ||||
|  |  | |||
|  | @ -150,6 +150,7 @@ MAKE_BIQUADN_RUN_FUNC(sse); | |||
| MAKE_DELAY_FUNC(sse); | ||||
| #endif | ||||
| #if defined (HAVE_AVX) | ||||
| MAKE_MIX_GAIN_FUNC(avx); | ||||
| MAKE_SUM_FUNC(avx); | ||||
| #endif | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Wim Taymans
						Wim Taymans