1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
| | // Build a-la
// gcc -m32 -static -O2 -Wall memset_bench.c memset.s
#define _GNU_SOURCE
#include <sys/types.h>
#include <sys/time.h>
#include <sys/syscall.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#define FILL 0
/* libc has incredibly messy way of doing this,
* typically requiring -lrt. We just skip all this mess */
#ifndef CLOCK_MONOTONIC
#define CLOCK_MONOTONIC 1
#endif
static void get_mono(struct timespec *ts)
{
syscall(__NR_clock_gettime, CLOCK_MONOTONIC, ts);
}
unsigned gett()
{
#if 0
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_usec;
#else
struct timespec ts;
get_mono(&ts);
return ts.tv_nsec;
#endif
}
unsigned difft(unsigned t2, unsigned t1)
{
t2 -= t1;
if ((int)t2 < 0)
t2 += 1000000000;
return t2;
}
void measure(unsigned sz, void *buf, void* (*m)(void *ptr, int c, size_t cnt))
{
unsigned t1, t2, cnt;
unsigned repeat = 1;
/* For small sizes, call m() many times before measuring time diff */
repeat = ((256*1024) / (sz|1)) ? : 1;
m(buf, FILL, sz); /* warm up caches */
m(buf, FILL, sz); /* warm up caches */
t2 = -1U;
cnt = 1000;
while (--cnt) {
unsigned rep = repeat;
t1 = gett();
do {
m(buf, FILL, sz);
} while (--rep);
t1 = difft(gett(), t1);
if (t2 > t1)
t2 = t1;
}
printf("%u byte block: %.2f bytes/ns\n", sz, (double)(sz) * repeat / t2);
}
int main(int argc, char **argv)
{
int sz;
char *buf;
sz = argv[1] ? atoi(argv[1]) : 1024;
buf = malloc(sz + 4096);
buf += 0x100;
buf = (char*)((long)buf & ~0xffL);
setlinebuf(stdout);
printf("size:%u (%uk) buf:%p\n", sz, sz/1024, buf);
do {
measure(sz, buf, memset);
} while (--sz > 0);
return 0;
}
|