| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | #include <stdlib.h>
 | 
					
						
							| 
									
										
										
										
											2003-02-19 21:15:59 +00:00
										 |  |  | #include <stdio.h>
 | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | #include <string.h>
 | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | #include <unistd.h>
 | 
					
						
							|  |  |  | #include <sched.h>
 | 
					
						
							| 
									
										
										
										
											2018-10-23 10:17:12 +02:00
										 |  |  | #include <time.h>
 | 
					
						
							| 
									
										
										
										
											2004-01-27 19:49:48 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | #define rdtscll(val) \
 | 
					
						
							|  |  |  |      __asm__ __volatile__("rdtsc" : "=A" (val)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define likely(x)       __builtin_expect((x),1)
 | 
					
						
							|  |  |  | #define unlikely(x)     __builtin_expect((x),0)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef short int s16; | 
					
						
							|  |  |  | typedef int s32; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | #if 0
 | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | #define CONFIG_SMP
 | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | #ifdef CONFIG_SMP
 | 
					
						
							|  |  |  | #define LOCK_PREFIX "lock ; "
 | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  | #define LOCK_PREFIX ""
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct __xchg_dummy { unsigned long a[100]; }; | 
					
						
							|  |  |  | #define __xg(x) ((struct __xchg_dummy *)(x))
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, | 
					
						
							|  |  |  | 				      unsigned long new, int size) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	unsigned long prev; | 
					
						
							|  |  |  | 	switch (size) { | 
					
						
							|  |  |  | 	case 1: | 
					
						
							|  |  |  | 		__asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" | 
					
						
							|  |  |  | 				     : "=a"(prev) | 
					
						
							|  |  |  | 				     : "q"(new), "m"(*__xg(ptr)), "0"(old) | 
					
						
							|  |  |  | 				     : "memory"); | 
					
						
							|  |  |  | 		return prev; | 
					
						
							|  |  |  | 	case 2: | 
					
						
							|  |  |  | 		__asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" | 
					
						
							|  |  |  | 				     : "=a"(prev) | 
					
						
							|  |  |  | 				     : "q"(new), "m"(*__xg(ptr)), "0"(old) | 
					
						
							|  |  |  | 				     : "memory"); | 
					
						
							|  |  |  | 		return prev; | 
					
						
							|  |  |  | 	case 4: | 
					
						
							|  |  |  | 		__asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" | 
					
						
							|  |  |  | 				     : "=a"(prev) | 
					
						
							|  |  |  | 				     : "q"(new), "m"(*__xg(ptr)), "0"(old) | 
					
						
							|  |  |  | 				     : "memory"); | 
					
						
							|  |  |  | 		return prev; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return old; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define cmpxchg(ptr,o,n)\
 | 
					
						
							|  |  |  | 	((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ | 
					
						
							| 
									
										
										
										
											2003-03-01 11:16:42 +00:00
										 |  |  | 				       (unsigned long)(n),sizeof(*(ptr)))) | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | static inline void atomic_add(volatile int *dst, int v) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	__asm__ __volatile__( | 
					
						
							|  |  |  | 		LOCK_PREFIX "addl %1,%0" | 
					
						
							|  |  |  | 		:"=m" (*dst) | 
					
						
							| 
									
										
										
										
											2003-02-21 10:09:36 +00:00
										 |  |  | 		:"ir" (v), "m" (*dst)); | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | static double detect_cpu_clock() | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2018-10-23 10:17:12 +02:00
										 |  |  | 	struct timespec tm_begin, tm_end; | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | 	unsigned long long tsc_begin, tsc_end; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Warm cache */ | 
					
						
							| 
									
										
										
										
											2018-10-23 10:17:12 +02:00
										 |  |  | 	clock_gettime(CLOCK_MONOTONIC, &tm_begin); | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	rdtscll(tsc_begin); | 
					
						
							| 
									
										
										
										
											2018-10-23 10:17:12 +02:00
										 |  |  | 	clock_gettime(CLOCK_MONOTONIC, &tm_begin); | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	usleep(1000000); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	rdtscll(tsc_end); | 
					
						
							| 
									
										
										
										
											2018-10-23 10:17:12 +02:00
										 |  |  | 	clock_gettime(CLOCK_MONOTONIC, &tm_end); | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-10-23 10:17:12 +02:00
										 |  |  | 	return (tsc_end - tsc_begin) / (tm_end.tv_sec - tm_begin.tv_sec + (tm_end.tv_nsec - tm_begin.tv_nsec) / 1e9); | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-02-20 19:47:37 +00:00
										 |  |  | void mix_areas_srv(unsigned int size, | 
					
						
							|  |  |  | 		   const s16 *src, | 
					
						
							|  |  |  | 		   volatile s32 *sum, | 
					
						
							| 
									
										
										
										
											2003-02-21 10:09:36 +00:00
										 |  |  | 		   unsigned int src_step, unsigned int sum_step) | 
					
						
							| 
									
										
										
										
											2003-02-20 19:47:37 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2005-11-21 10:48:52 +00:00
										 |  |  | 	src_step /= sizeof(*src); | 
					
						
							|  |  |  | 	sum_step /= sizeof(*sum); | 
					
						
							| 
									
										
										
										
											2003-02-20 19:47:37 +00:00
										 |  |  |         while (size-- > 0) { | 
					
						
							|  |  |  |                 atomic_add(sum, *src); | 
					
						
							| 
									
										
										
										
											2005-11-21 10:48:52 +00:00
										 |  |  |                 src += src_step; | 
					
						
							|  |  |  |                 sum += sum_step; | 
					
						
							| 
									
										
										
										
											2003-02-20 19:47:37 +00:00
										 |  |  |         } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void saturate(unsigned int size, | 
					
						
							|  |  |  |               s16 *dst, const s32 *sum, | 
					
						
							| 
									
										
										
										
											2003-02-21 10:09:36 +00:00
										 |  |  |               unsigned int dst_step, unsigned int sum_step) | 
					
						
							| 
									
										
										
										
											2003-02-20 19:47:37 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2005-11-21 10:48:52 +00:00
										 |  |  | 	dst_step /= sizeof(*dst); | 
					
						
							|  |  |  | 	sum_step /= sizeof(*sum); | 
					
						
							| 
									
										
										
										
											2003-02-20 19:47:37 +00:00
										 |  |  |         while (size-- > 0) { | 
					
						
							|  |  |  |                 s32 sample = *sum; | 
					
						
							|  |  |  |                 if (unlikely(sample < -0x8000)) | 
					
						
							|  |  |  |                         *dst = -0x8000; | 
					
						
							|  |  |  |                 else if (unlikely(sample > 0x7fff)) | 
					
						
							|  |  |  |                         *dst = 0x7fff; | 
					
						
							|  |  |  |                 else | 
					
						
							|  |  |  |                         *dst = sample; | 
					
						
							| 
									
										
										
										
											2005-11-21 10:48:52 +00:00
										 |  |  |                 dst += dst_step; | 
					
						
							|  |  |  |                 sum += sum_step; | 
					
						
							| 
									
										
										
										
											2003-02-20 19:47:37 +00:00
										 |  |  |         } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | void mix_areas0(unsigned int size, | 
					
						
							|  |  |  | 		volatile s16 *dst, s16 *src, | 
					
						
							|  |  |  | 		volatile s32 *sum, | 
					
						
							|  |  |  | 		unsigned int dst_step, | 
					
						
							|  |  |  | 		unsigned int src_step, | 
					
						
							|  |  |  | 		unsigned int sum_step) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2005-11-21 10:48:52 +00:00
										 |  |  | 	dst_step /= sizeof(*dst); | 
					
						
							|  |  |  | 	src_step /= sizeof(*src); | 
					
						
							|  |  |  | 	sum_step /= sizeof(*sum); | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | 	while (size-- > 0) { | 
					
						
							|  |  |  | 		s32 sample = *dst + *src; | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | 		if (unlikely(sample < -0x8000)) | 
					
						
							|  |  |  | 			*dst = -0x8000; | 
					
						
							|  |  |  | 		else if (unlikely(sample > 0x7fff)) | 
					
						
							|  |  |  | 			*dst = 0x7fff; | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | 		else | 
					
						
							|  |  |  | 			*dst = sample; | 
					
						
							| 
									
										
										
										
											2005-11-21 10:48:52 +00:00
										 |  |  | 		dst += dst_step; | 
					
						
							|  |  |  | 		src += src_step; | 
					
						
							|  |  |  | 		sum += sum_step; | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-10-23 10:17:12 +02:00
										 |  |  | #define MIX_AREAS_16 mix_areas1
 | 
					
						
							|  |  |  | #define MIX_AREAS_16_MMX mix_areas1_mmx
 | 
					
						
							|  |  |  | #define MIX_AREAS_32 mix_areas1_32
 | 
					
						
							|  |  |  | #define MIX_AREAS_24 mix_areas1_24
 | 
					
						
							|  |  |  | #define MIX_AREAS_24_CMOV mix_areas1_24_cmov
 | 
					
						
							|  |  |  | #define XADD "addl"
 | 
					
						
							|  |  |  | #define XSUB "subl"
 | 
					
						
							| 
									
										
										
										
											2003-03-01 11:16:42 +00:00
										 |  |  | #include "../src/pcm/pcm_dmix_i386.h"
 | 
					
						
							| 
									
										
										
										
											2018-10-23 10:17:12 +02:00
										 |  |  | static void *ptr_mix_areas1_32 __attribute__((unused)) = &mix_areas1_32; | 
					
						
							|  |  |  | static void *ptr_mix_areas1_24 __attribute__((unused)) = &mix_areas1_24; | 
					
						
							|  |  |  | static void *ptr_mix_areas1_24_cmov __attribute__((unused)) = &mix_areas1_24_cmov; | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | void mix_areas2(unsigned int size, | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | 		volatile s16 *dst, const s16 *src, | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | 		volatile s32 *sum, | 
					
						
							|  |  |  | 		unsigned int dst_step, | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | 		unsigned int src_step) | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2005-11-21 10:48:52 +00:00
										 |  |  | 	dst_step /= sizeof(*dst); | 
					
						
							|  |  |  | 	src_step /= sizeof(*src); | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | 	while (size-- > 0) { | 
					
						
							|  |  |  | 		s32 sample = *src; | 
					
						
							| 
									
										
										
										
											2003-03-01 14:24:42 +00:00
										 |  |  | 		s32 old_sample = *sum; | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | 		if (cmpxchg(dst, 0, 1) == 0) | 
					
						
							| 
									
										
										
										
											2003-03-01 14:24:42 +00:00
										 |  |  | 			sample -= old_sample; | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | 		atomic_add(sum, sample); | 
					
						
							|  |  |  | 		do { | 
					
						
							|  |  |  | 			sample = *sum; | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | 			if (unlikely(sample < -0x8000)) | 
					
						
							|  |  |  | 				*dst = -0x8000; | 
					
						
							|  |  |  | 			else if (unlikely(sample > 0x7fff)) | 
					
						
							|  |  |  | 				*dst = 0x7fff; | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | 			else | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | 				*dst = sample; | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | 		} while (unlikely(sample != *sum)); | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | 		sum++; | 
					
						
							| 
									
										
										
										
											2005-11-21 10:48:52 +00:00
										 |  |  | 		dst += dst_step; | 
					
						
							|  |  |  | 		src += src_step; | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | void setscheduler(void) | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | 	struct sched_param sched_param; | 
					
						
							| 
									
										
										
										
											2003-02-19 21:15:59 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | 	if (sched_getparam(0, &sched_param) < 0) { | 
					
						
							|  |  |  | 		printf("Scheduler getparam failed...\n"); | 
					
						
							|  |  |  | 		return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	sched_param.sched_priority = sched_get_priority_max(SCHED_RR); | 
					
						
							|  |  |  | 	if (!sched_setscheduler(0, SCHED_RR, &sched_param)) { | 
					
						
							|  |  |  | 		printf("Scheduler set to Round Robin with priority %i...\n", sched_param.sched_priority); | 
					
						
							|  |  |  | 		fflush(stdout); | 
					
						
							|  |  |  | 		return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	printf("!!!Scheduler set to Round Robin with priority %i FAILED!!!\n", sched_param.sched_priority); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-02-21 10:09:36 +00:00
										 |  |  | int cache_size = 1024*1024; | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | void init(s16 *dst, s32 *sum, int size) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	int count; | 
					
						
							|  |  |  | 	char *a; | 
					
						
							|  |  |  | 	 | 
					
						
							|  |  |  | 	for (count = size - 1; count >= 0; count--) | 
					
						
							|  |  |  | 		*sum++ = 0; | 
					
						
							|  |  |  | 	for (count = size - 1; count >= 0; count--) | 
					
						
							|  |  |  | 		*dst++ = 0; | 
					
						
							| 
									
										
										
										
											2003-02-21 10:09:36 +00:00
										 |  |  | 	a = malloc(cache_size); | 
					
						
							|  |  |  | 	for (count = cache_size - 1; count >= 0; count--) { | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | 		a[count] = count & 0xff; | 
					
						
							|  |  |  | 		a[count] ^= 0x55; | 
					
						
							|  |  |  | 		a[count] ^= 0xaa; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	free(a); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int main(int argc, char **argv) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	int size = 2048, n = 4, max = 32267; | 
					
						
							| 
									
										
										
										
											2003-02-20 16:45:06 +00:00
										 |  |  | 	int LOOP = 100; | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | 	int i, t; | 
					
						
							| 
									
										
										
										
											2003-02-20 19:47:37 +00:00
										 |  |  | 	unsigned long long begin, end, diff, diffS, diff0, diff1, diff1_mmx, diff2; | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  |         double cpu_clock = detect_cpu_clock(); | 
					
						
							| 
									
										
										
										
											2004-01-27 19:49:48 +00:00
										 |  |  | 	s16 *dst = malloc(sizeof(*dst) * size); | 
					
						
							|  |  |  | 	s32 *sum = calloc(size, sizeof(*sum)); | 
					
						
							|  |  |  | 	s16 **srcs = malloc(sizeof(*srcs) * n); | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	setscheduler(); | 
					
						
							| 
									
										
										
										
											2003-02-20 16:41:58 +00:00
										 |  |  | #ifndef CONFIG_SMP
 | 
					
						
							|  |  |  |         printf("CPU clock: %fMhz (UP)\n\n", cpu_clock / 10e5); | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |         printf("CPU clock: %fMhz (SMP)\n\n", cpu_clock / 10e5); | 
					
						
							|  |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2003-02-21 10:09:36 +00:00
										 |  |  | 	if (argc > 3) { | 
					
						
							| 
									
										
										
										
											2003-02-19 21:15:59 +00:00
										 |  |  | 		size = atoi(argv[1]); | 
					
						
							|  |  |  | 		n = atoi(argv[2]); | 
					
						
							|  |  |  | 		max = atoi(argv[3]); | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2003-02-21 10:09:36 +00:00
										 |  |  | 	if (argc > 4) | 
					
						
							|  |  |  | 		cache_size = atoi(argv[4]) * 1024; | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | 	for (i = 0; i < n; i++) { | 
					
						
							|  |  |  | 		int k; | 
					
						
							|  |  |  | 		s16 *s; | 
					
						
							|  |  |  | 		srcs[i] = s = malloc(sizeof(s16) * size); | 
					
						
							|  |  |  | 		for (k = 0; k < size; ++k, ++s) { | 
					
						
							|  |  |  | 			*s = (rand() % (max * 2)) - max; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-02-20 19:47:37 +00:00
										 |  |  | 	for (t = 0, diffS = -1; t < LOOP; t++) { | 
					
						
							|  |  |  | 		init(dst, sum, size); | 
					
						
							|  |  |  | 		rdtscll(begin); | 
					
						
							|  |  |  | 		for (i = 0; i < n; i++) { | 
					
						
							| 
									
										
										
										
											2003-02-21 10:09:36 +00:00
										 |  |  | 			mix_areas_srv(size, srcs[i], sum, 2, 4); | 
					
						
							| 
									
										
										
										
											2003-02-20 19:47:37 +00:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2003-02-21 10:09:36 +00:00
										 |  |  | 		saturate(size, dst, sum, 2, 4); | 
					
						
							| 
									
										
										
										
											2003-02-20 19:47:37 +00:00
										 |  |  | 		rdtscll(end); | 
					
						
							|  |  |  | 		diff = end - begin; | 
					
						
							|  |  |  | 		if (diff < diffS) | 
					
						
							|  |  |  | 			diffS = diff; | 
					
						
							| 
									
										
										
										
											2019-02-17 04:54:17 -08:00
										 |  |  | 		printf("mix_areas_srv : %llu               \r", diff); fflush(stdout); | 
					
						
							| 
									
										
										
										
											2003-02-20 19:47:37 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | 	for (t = 0, diff0 = -1; t < LOOP; t++) { | 
					
						
							|  |  |  | 		init(dst, sum, size); | 
					
						
							|  |  |  | 		rdtscll(begin); | 
					
						
							|  |  |  | 		for (i = 0; i < n; i++) { | 
					
						
							|  |  |  | 			mix_areas0(size, dst, srcs[i], sum, 2, 2, 4); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		rdtscll(end); | 
					
						
							|  |  |  | 		diff = end - begin; | 
					
						
							|  |  |  | 		if (diff < diff0) | 
					
						
							|  |  |  | 			diff0 = diff; | 
					
						
							| 
									
										
										
										
											2019-02-17 04:54:17 -08:00
										 |  |  | 		printf("mix_areas0    : %llu               \r", diff); fflush(stdout); | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	for (t = 0, diff1 = -1; t < LOOP; t++) { | 
					
						
							|  |  |  | 		init(dst, sum, size); | 
					
						
							|  |  |  | 		rdtscll(begin); | 
					
						
							|  |  |  | 		for (i = 0; i < n; i++) { | 
					
						
							|  |  |  | 			mix_areas1(size, dst, srcs[i], sum, 2, 2, 4); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		rdtscll(end); | 
					
						
							|  |  |  | 		diff = end - begin; | 
					
						
							|  |  |  | 		if (diff < diff1) | 
					
						
							|  |  |  | 			diff1 = diff; | 
					
						
							| 
									
										
										
										
											2019-02-17 04:54:17 -08:00
										 |  |  | 		printf("mix_areas1    : %llu              \r", diff); fflush(stdout); | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	for (t = 0, diff1_mmx = -1; t < LOOP; t++) { | 
					
						
							|  |  |  | 		init(dst, sum, size); | 
					
						
							|  |  |  | 		rdtscll(begin); | 
					
						
							|  |  |  | 		for (i = 0; i < n; i++) { | 
					
						
							|  |  |  | 			mix_areas1_mmx(size, dst, srcs[i], sum, 2, 2, 4); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		rdtscll(end); | 
					
						
							|  |  |  | 		diff = end - begin; | 
					
						
							|  |  |  | 		if (diff < diff1_mmx) | 
					
						
							|  |  |  | 			diff1_mmx = diff; | 
					
						
							| 
									
										
										
										
											2019-02-17 04:54:17 -08:00
										 |  |  | 		printf("mix_areas1_mmx: %llu              \r", diff); fflush(stdout); | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	for (t = 0, diff2 = -1; t < LOOP; t++) { | 
					
						
							|  |  |  | 		init(dst, sum, size); | 
					
						
							|  |  |  | 		rdtscll(begin); | 
					
						
							|  |  |  | 		for (i = 0; i < n; i++) { | 
					
						
							|  |  |  | 			mix_areas2(size, dst, srcs[i], sum, 2, 2); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		rdtscll(end); | 
					
						
							|  |  |  | 		diff = end - begin; | 
					
						
							|  |  |  | 		if (diff < diff2) | 
					
						
							|  |  |  | 			diff2 = diff; | 
					
						
							| 
									
										
										
										
											2019-02-17 04:54:17 -08:00
										 |  |  | 		printf("mix_areas2    : %llu              \r", diff); fflush(stdout); | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	printf("                                                                           \r"); | 
					
						
							|  |  |  | 	printf("Summary (the best times):\n"); | 
					
						
							| 
									
										
										
										
											2019-02-17 04:54:17 -08:00
										 |  |  | 	printf("mix_areas_srv  : %8llu %f%%\n", diffS, 100*2*44100.0*diffS/(size*n*cpu_clock)); | 
					
						
							|  |  |  | 	printf("mix_areas0     : %8llu %f%%\n", diff0, 100*2*44100.0*diff0/(size*n*cpu_clock)); | 
					
						
							|  |  |  | 	printf("mix_areas1     : %8llu %f%%\n", diff1, 100*2*44100.0*diff1/(size*n*cpu_clock)); | 
					
						
							|  |  |  | 	printf("mix_areas1_mmx : %8llu %f%%\n", diff1_mmx, 100*2*44100.0*diff1_mmx/(size*n*cpu_clock)); | 
					
						
							|  |  |  | 	printf("mix_areas2     : %8llu %f%%\n", diff2, 100*2*44100.0*diff2/(size*n*cpu_clock)); | 
					
						
							| 
									
										
										
										
											2003-02-20 19:47:37 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	printf("\n"); | 
					
						
							|  |  |  | 	printf("areas1/srv ratio     : %f\n", (double)diff1 / diffS); | 
					
						
							|  |  |  | 	printf("areas1_mmx/srv ratio : %f\n", (double)diff1_mmx / diffS); | 
					
						
							| 
									
										
										
										
											2003-02-20 16:38:52 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-02-19 21:15:59 +00:00
										 |  |  | 	return 0; | 
					
						
							| 
									
										
										
										
											2003-02-19 21:00:45 +00:00
										 |  |  | } |