#define _GNU_SOURCE #include #include #include #include #include #define SIZE (128*1024*1024) inline unsigned long long rdtsc() { unsigned long long ret; __asm__ volatile ("rdtsc" : "=A" (ret)); return ret; } typedef struct{ int id; volatile unsigned int *ptr; unsigned long long read_time; unsigned long long read_max; unsigned long long read_min; unsigned long long read_count; unsigned long long write_time; unsigned long long write_max; unsigned long long write_min; unsigned long long write_count; } thread_arg_t; inline void write_update(unsigned long long v, thread_arg_t *arg){ arg->write_count++; arg->write_time += v; if(v > arg->write_max) arg->write_max = v; if(v < arg->write_min || arg->write_min == 0) arg->write_min = v; } inline void read_update(unsigned long long v, thread_arg_t *arg){ arg->read_count++; arg->read_time += v; if(v > arg->read_max) arg->read_max = v; if(v < arg->read_min || arg->read_min == 0) arg->read_min = v; } void thread_func(void *a) { volatile int times; volatile unsigned int i; volatile unsigned long long t0, t1; volatile unsigned int d; volatile unsigned int *addr; volatile unsigned int *ptr; cpu_set_t mask; thread_arg_t *arg = (thread_arg_t*)a; CPU_ZERO(&mask); CPU_SET(arg->id, &mask); if(sched_setaffinity(0, sizeof(mask), &mask) == -1){ printf("faild to set CPU: %d\n", arg->id); } ptr = arg->ptr; // write test for(i = 0; i < SIZE; i++){ addr = ptr+i; t0 = rdtsc(); for(times = 0; times < 16; times++){ *addr = i; } t1 = rdtsc(); if(t1 > t0) write_update(t1 - t0, arg); } // read test for(i = 0; i < SIZE; i++){ addr = ptr+i; t0 = rdtsc(); for(times = 0; times < 16; times++){ d = *addr; } t1 = rdtsc(); if(t1 > t0) read_update(t1 - t0, arg); } return; } int main(int argc, char **argv) { pthread_t *thread; thread_arg_t *targ; volatile unsigned int *ptr; int i, num; unsigned long long write_max, write_min, write_count, write_sum; unsigned long long read_max, read_min, read_count, read_sum; if(argc < 2){ printf("usage: ./a.out thread_num\n"); return 0; } num = atoi(argv[1]); thread = (pthread_t*)malloc(sizeof(pthread_t)*num); if(thread==NULL){ fprintf(stderr, "cannot allocate memory for thread handler\n"); return 1; } targ = (thread_arg_t*)malloc(sizeof(thread_arg_t)*num); if(targ==NULL){ fprintf(stderr, "cannot allocate memory for thread arguments\n"); return 1; } ptr = (volatile unsigned int*)malloc(sizeof(int)*SIZE*num); if(ptr==NULL){ fprintf(stderr, "cannot allocate memory for work area\n"); return 1; } for(i = 0; i < num; i++){ memset(&targ[i], 0, sizeof(thread_arg_t)); targ[i].id = i; targ[i].ptr = ptr + SIZE * i; } for(i = 0; i < num; i++){ pthread_create(&(thread[i]), NULL, (void*)(&thread_func), (void*)(&(targ[i]))); } write_count = read_count = 0; write_sum = read_sum = 0; write_min = read_min = 0; write_max = read_max = 0; for(i = 0; i < num; i++){ pthread_join(thread[i], NULL); write_sum += targ[i].write_time; write_count += targ[i].write_count; if(targ[i].write_max > write_max) write_max = targ[i].write_max; if(targ[i].write_min < write_min || write_min == 0) write_min = targ[i].write_min; read_sum += targ[i].read_time; read_count += targ[i].read_count; if(targ[i].read_max > read_max) read_max = targ[i].read_max; if(targ[i].read_min < read_min || read_min == 0) read_min = targ[i].read_min; /* printf("read_count: %llu\n", targ[i].read_count); printf("read_time: %llu\n", targ[i].read_time); printf("read_max: %llu\n", targ[i].read_max); printf("read_min: %llu\n", targ[i].read_min); printf("write_count: %llu\n", targ[i].write_count); printf("write_time: %llu\n", targ[i].write_time); printf("write_max: %llu\n", targ[i].write_max); printf("write_min: %llu\n", targ[i].write_min); */ } printf("%15s,%15s,%15s,%15s,%15s,%15s\n", "write", "", "", "read", "", ""); printf("%15s,%15s,%15s,%15s,%15s,%15s\n", "avg", "max", "min", "avg", "max", "min"); printf("%15llu,%15llu,%15llu,%15llu,%15llu,%15llu\n", (write_sum/write_count), write_max, write_min, (read_sum/read_count), read_max, read_min); free((void*)ptr); free(targ); free(thread); return 0; }