// Build a-la // gcc -m32 -static -O2 -Wall memset_bench.c memset.s #define _GNU_SOURCE #include #include #include #include #include #include #include #include #define FILL 0 /* libc has incredibly messy way of doing this, * typically requiring -lrt. We just skip all this mess */ #ifndef CLOCK_MONOTONIC #define CLOCK_MONOTONIC 1 #endif static void get_mono(struct timespec *ts) { syscall(__NR_clock_gettime, CLOCK_MONOTONIC, ts); } unsigned gett() { #if 0 struct timeval tv; gettimeofday(&tv, NULL); return tv.tv_usec; #else struct timespec ts; get_mono(&ts); return ts.tv_nsec; #endif } unsigned difft(unsigned t2, unsigned t1) { t2 -= t1; if ((int)t2 < 0) t2 += 1000000000; return t2; } void measure(unsigned sz, void *buf, void* (*m)(void *ptr, int c, size_t cnt)) { unsigned t1, t2, cnt; unsigned repeat = 1; /* For small sizes, call m() many times before measuring time diff */ repeat = ((256*1024) / (sz|1)) ? : 1; m(buf, FILL, sz); /* warm up caches */ m(buf, FILL, sz); /* warm up caches */ t2 = -1U; cnt = 1000; while (--cnt) { unsigned rep = repeat; t1 = gett(); do { m(buf, FILL, sz); } while (--rep); t1 = difft(gett(), t1); if (t2 > t1) t2 = t1; } printf("%u byte block: %.2f bytes/ns\n", sz, (double)(sz) * repeat / t2); } int main(int argc, char **argv) { int sz; char *buf; sz = argv[1] ? atoi(argv[1]) : 1024; buf = malloc(sz + 4096); buf += 0x100; buf = (char*)((long)buf & ~0xffL); setlinebuf(stdout); printf("size:%u (%uk) buf:%p\n", sz, sz/1024, buf); do { measure(sz, buf, memset); } while (--sz > 0); return 0; }