mailing list of musl libc
 help / color / mirror / code / Atom feed
From: Denys Vlasenko <vda.linux@googlemail.com>
To: Rich Felker <dalias@libc.org>
Cc: musl <musl@lists.openwall.com>
Subject: Re: [PATCH] x86_64/memset: use "small block" code for blocks up to 30 bytes long
Date: Tue, 17 Feb 2015 19:53:28 +0100	[thread overview]
Message-ID: <CAK1hOcMtnZLy7Kw36YQWdi3tzmACepAWjQ=_82GuE_NUHd0hpw@mail.gmail.com> (raw)
In-Reply-To: <20150217174045.GH23507@brightrain.aerifal.cx>

[-- Attachment #1: Type: text/plain, Size: 632 bytes --]

On Tue, Feb 17, 2015 at 6:40 PM, Rich Felker <dalias@libc.org> wrote:
>> With your program I see similar results:
>>
>> ....
>> size 50: min=10, avg=10           min=10, avg=10
>> size 52: min=10, avg=10           min=10, avg=10
>
> The ... was the part where mine seemed better. :)

Do you seriously think I would go as low as lying by omission?
Here are the full, unabridged files of three runs of both algorithms.

$ gcc -O2 -Wall memset-cycles-vda.c vda1.s.
$ ./a.out >vda1.OUT1 && ./a.out >vda1.OUT2 && ./a.out >vda1.OUT3
$ gcc -O2 -Wall memset-cycles-vda.c my2.s.
$ ./a.out >my2.OUT1 && ./a.out >my2.OUT2 && ./a.out >my2.OUT3

[-- Attachment #2: memset-cycles-vda.c --]
[-- Type: text/x-csrc, Size: 1222 bytes --]

#define _XOPEN_SOURCE 700
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>

static inline unsigned rdtsc()
{
#if defined __i386__ || defined __x86_64__
	unsigned x;
	__asm__ __volatile__ ( "rdtsc" : "=a"(x) : : "rdx" );
//	__asm__ __volatile__ ( "cpuid ; rdtsc" : "=a"(x)
//		: : "rbx", "rcx", "rdx" );
	return x;
#else
	struct timespec ts;
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
	return ts.tv_nsec;
#endif
}

char buf[32768+100];

int main()
{
	unsigned i, t, tmin;
	unsigned long long tmean;
	unsigned n;

// I need a million of iterations to get a stable "min" measurement
#define REP (1024*4096)

	for (n=2; n<32768; n+=(n<64 ? 2 : n<512 ? 32 : n)) {
		int repeat = (1024 / (n|1)) ? : 1;

		memset(buf, 0, n);
		tmin = -1;
		tmean = 0;
		for (i=0; i < REP; i++) {
			int j = repeat;
			__asm__ __volatile__ ("" : : : "memory");
			t = rdtsc();
			do {
				memset(buf, 0, n);
				__asm__ __volatile__ ("" : : : "memory");
			} while (--j != 0);
			t = rdtsc() - t;
			__asm__ __volatile__ ("" : : : "memory");
			if (t < tmin) tmin = t;
			tmean += t;
		}
		printf("size %u: min=%.2f, avg=%.2f\n",
			n,
			(double)tmin / repeat,
			(double)tmean / (repeat*REP)
		);
	}
	return 0;
}

[-- Attachment #3: my2.OUT1 --]
[-- Type: application/octet-stream, Size: 1532 bytes --]

size 2: min=7.96, avg=8.04
size 4: min=8.01, avg=8.07
size 6: min=8.01, avg=8.09
size 8: min=8.12, avg=8.26
size 10: min=8.16, avg=8.26
size 12: min=8.19, avg=8.31
size 14: min=8.25, avg=8.35
size 16: min=9.10, avg=9.20
size 18: min=9.17, avg=9.26
size 20: min=9.12, avg=9.32
size 22: min=9.20, avg=9.36
size 24: min=9.15, avg=9.42
size 26: min=9.32, avg=9.47
size 28: min=9.34, avg=9.92
size 30: min=9.36, avg=9.58
size 32: min=10.94, avg=11.09
size 34: min=12.41, avg=12.60
size 36: min=12.56, avg=12.70
size 38: min=12.58, avg=12.74
size 40: min=11.75, avg=11.91
size 42: min=11.87, avg=11.91
size 44: min=11.73, avg=12.85
size 46: min=11.71, avg=13.06
size 48: min=11.70, avg=12.15
size 50: min=11.70, avg=12.54
size 52: min=11.84, avg=12.77
size 54: min=11.67, avg=11.91
size 56: min=11.65, avg=11.90
size 58: min=11.65, avg=11.90
size 60: min=11.62, avg=11.90
size 62: min=11.62, avg=11.90
size 64: min=19.40, avg=19.77
size 96: min=18.00, avg=18.32
size 128: min=32.57, avg=34.11
size 160: min=36.00, avg=37.56
size 192: min=39.00, avg=41.05
size 224: min=42.75, avg=44.75
size 256: min=45.00, avg=48.84
size 288: min=49.00, avg=52.01
size 320: min=52.00, avg=55.30
size 352: min=57.00, avg=59.91
size 384: min=60.00, avg=63.21
size 416: min=63.00, avg=67.28
size 448: min=66.00, avg=69.39
size 480: min=69.00, avg=72.74
size 512: min=75.00, avg=81.59
size 1024: min=126.00, avg=132.66
size 2048: min=228.00, avg=234.63
size 4096: min=432.00, avg=438.55
size 8192: min=837.00, avg=846.30
size 16384: min=1650.00, avg=1662.39

[-- Attachment #4: my2.OUT2 --]
[-- Type: application/octet-stream, Size: 1532 bytes --]

size 2: min=7.95, avg=8.03
size 4: min=7.99, avg=8.05
size 6: min=8.01, avg=8.09
size 8: min=8.12, avg=8.19
size 10: min=8.16, avg=8.24
size 12: min=8.19, avg=8.30
size 14: min=8.25, avg=8.35
size 16: min=9.10, avg=9.20
size 18: min=9.11, avg=9.25
size 20: min=9.19, avg=9.31
size 22: min=9.20, avg=9.36
size 24: min=9.22, avg=9.42
size 26: min=9.24, avg=9.46
size 28: min=9.34, avg=9.92
size 30: min=9.36, avg=9.55
size 32: min=10.94, avg=11.08
size 34: min=12.41, avg=12.60
size 36: min=12.56, avg=12.70
size 38: min=12.58, avg=12.72
size 40: min=11.75, avg=11.93
size 42: min=11.87, avg=11.90
size 44: min=11.73, avg=12.77
size 46: min=11.71, avg=12.68
size 48: min=11.70, avg=12.41
size 50: min=11.70, avg=12.36
size 52: min=11.84, avg=12.58
size 54: min=11.67, avg=11.90
size 56: min=11.65, avg=11.89
size 58: min=11.65, avg=11.89
size 60: min=11.62, avg=11.89
size 62: min=11.62, avg=11.89
size 64: min=19.40, avg=19.77
size 96: min=18.00, avg=18.31
size 128: min=32.57, avg=34.10
size 160: min=36.00, avg=37.62
size 192: min=39.00, avg=41.06
size 224: min=42.75, avg=44.87
size 256: min=45.00, avg=48.80
size 288: min=49.00, avg=51.98
size 320: min=52.00, avg=55.16
size 352: min=57.00, avg=59.86
size 384: min=60.00, avg=63.03
size 416: min=63.00, avg=66.21
size 448: min=66.00, avg=69.40
size 480: min=70.50, avg=76.88
size 512: min=78.00, avg=86.55
size 1024: min=129.00, avg=149.69
size 2048: min=228.00, avg=273.30
size 4096: min=432.00, avg=510.75
size 8192: min=837.00, avg=985.31
size 16384: min=1650.00, avg=2060.94

[-- Attachment #5: my2.OUT3 --]
[-- Type: application/octet-stream, Size: 1532 bytes --]

size 2: min=7.97, avg=8.03
size 4: min=8.00, avg=8.13
size 6: min=8.01, avg=8.11
size 8: min=8.12, avg=8.20
size 10: min=8.16, avg=8.25
size 12: min=8.19, avg=8.30
size 14: min=8.25, avg=8.35
size 16: min=9.10, avg=9.20
size 18: min=9.17, avg=9.26
size 20: min=9.19, avg=9.31
size 22: min=9.20, avg=9.36
size 24: min=9.22, avg=9.42
size 26: min=9.24, avg=9.47
size 28: min=9.34, avg=9.93
size 30: min=9.36, avg=9.56
size 32: min=10.94, avg=11.09
size 34: min=12.41, avg=12.61
size 36: min=12.56, avg=12.71
size 38: min=12.58, avg=12.74
size 40: min=11.75, avg=11.93
size 42: min=11.87, avg=11.99
size 44: min=11.73, avg=12.87
size 46: min=11.71, avg=12.48
size 48: min=11.70, avg=12.56
size 50: min=11.70, avg=12.12
size 52: min=11.84, avg=12.70
size 54: min=11.67, avg=11.90
size 56: min=11.65, avg=11.90
size 58: min=11.65, avg=11.90
size 60: min=11.62, avg=11.90
size 62: min=11.62, avg=11.90
size 64: min=19.40, avg=19.77
size 96: min=18.00, avg=18.35
size 128: min=32.14, avg=34.11
size 160: min=36.00, avg=37.54
size 192: min=39.00, avg=41.07
size 224: min=42.00, avg=44.80
size 256: min=45.00, avg=48.82
size 288: min=49.00, avg=51.99
size 320: min=52.00, avg=55.20
size 352: min=57.00, avg=59.85
size 384: min=60.00, avg=63.08
size 416: min=63.00, avg=66.26
size 448: min=66.00, avg=69.41
size 480: min=69.00, avg=72.70
size 512: min=75.00, avg=81.62
size 1024: min=126.00, avg=132.54
size 2048: min=228.00, avg=234.49
size 4096: min=432.00, avg=438.37
size 8192: min=837.00, avg=846.75
size 16384: min=1650.00, avg=1662.54

[-- Attachment #6: vda1.OUT1 --]
[-- Type: application/octet-stream, Size: 1532 bytes --]

size 2: min=7.97, avg=8.05
size 4: min=8.01, avg=8.10
size 6: min=8.01, avg=8.10
size 8: min=8.12, avg=8.21
size 10: min=8.16, avg=8.26
size 12: min=8.19, avg=8.32
size 14: min=8.25, avg=8.36
size 16: min=9.10, avg=9.34
size 18: min=9.17, avg=9.27
size 20: min=9.19, avg=9.31
size 22: min=9.20, avg=9.36
size 24: min=9.22, avg=9.42
size 26: min=9.32, avg=9.47
size 28: min=9.34, avg=9.92
size 30: min=9.36, avg=9.55
size 32: min=10.94, avg=11.22
size 34: min=12.52, avg=12.74
size 36: min=12.67, avg=12.81
size 38: min=12.58, avg=12.85
size 40: min=11.75, avg=12.04
size 42: min=11.87, avg=11.99
size 44: min=11.73, avg=12.71
size 46: min=11.71, avg=12.62
size 48: min=11.85, avg=13.25
size 50: min=11.85, avg=13.09
size 52: min=11.84, avg=12.99
size 54: min=11.67, avg=12.08
size 56: min=11.65, avg=12.02
size 58: min=11.65, avg=12.03
size 60: min=11.62, avg=12.02
size 62: min=11.62, avg=12.05
size 64: min=19.40, avg=19.77
size 96: min=18.00, avg=18.32
size 128: min=31.71, avg=32.93
size 160: min=35.00, avg=36.29
size 192: min=37.80, avg=39.71
size 224: min=41.25, avg=43.34
size 256: min=45.00, avg=47.18
size 288: min=48.00, avg=50.37
size 320: min=51.00, avg=53.54
size 352: min=55.50, avg=57.92
size 384: min=58.50, avg=61.10
size 416: min=61.50, avg=64.26
size 448: min=64.50, avg=67.44
size 480: min=67.50, avg=70.73
size 512: min=75.00, avg=79.25
size 1024: min=126.00, avg=129.92
size 2048: min=228.00, avg=232.58
size 4096: min=432.00, avg=435.83
size 8192: min=837.00, avg=844.30
size 16384: min=1650.00, avg=1661.93

[-- Attachment #7: vda1.OUT2 --]
[-- Type: application/octet-stream, Size: 1534 bytes --]

size 2: min=7.97, avg=8.10
size 4: min=8.01, avg=8.05
size 6: min=8.03, avg=8.09
size 8: min=8.12, avg=9.39
size 10: min=8.16, avg=8.24
size 12: min=8.19, avg=8.66
size 14: min=8.25, avg=8.35
size 16: min=9.10, avg=9.20
size 18: min=9.17, avg=9.27
size 20: min=9.19, avg=9.31
size 22: min=9.20, avg=9.36
size 24: min=9.22, avg=9.42
size 26: min=9.24, avg=10.98
size 28: min=9.34, avg=10.25
size 30: min=9.36, avg=9.56
size 32: min=10.94, avg=11.22
size 34: min=12.52, avg=12.72
size 36: min=12.56, avg=14.66
size 38: min=12.69, avg=13.77
size 40: min=11.75, avg=11.99
size 42: min=11.87, avg=12.09
size 44: min=11.73, avg=12.48
size 46: min=11.71, avg=12.59
size 48: min=11.85, avg=12.89
size 50: min=11.85, avg=12.91
size 52: min=11.84, avg=13.56
size 54: min=11.67, avg=12.08
size 56: min=11.65, avg=12.21
size 58: min=11.65, avg=12.01
size 60: min=11.62, avg=12.03
size 62: min=11.62, avg=12.69
size 64: min=19.40, avg=19.77
size 96: min=18.00, avg=18.31
size 128: min=31.71, avg=32.91
size 160: min=35.00, avg=36.27
size 192: min=38.40, avg=40.45
size 224: min=41.25, avg=43.28
size 256: min=45.00, avg=47.20
size 288: min=48.00, avg=50.41
size 320: min=51.00, avg=69.57
size 352: min=55.50, avg=58.77
size 384: min=58.50, avg=61.09
size 416: min=61.50, avg=64.28
size 448: min=64.50, avg=72.61
size 480: min=69.00, avg=74.83
size 512: min=75.00, avg=83.85
size 1024: min=126.00, avg=146.97
size 2048: min=228.00, avg=270.72
size 4096: min=432.00, avg=508.53
size 8192: min=837.00, avg=982.23
size 16384: min=1650.00, avg=2077.87

[-- Attachment #8: vda1.OUT3 --]
[-- Type: application/octet-stream, Size: 1533 bytes --]

size 2: min=7.97, avg=8.19
size 4: min=8.01, avg=8.11
size 6: min=8.03, avg=8.81
size 8: min=8.12, avg=8.21
size 10: min=8.16, avg=8.24
size 12: min=8.19, avg=8.30
size 14: min=8.25, avg=8.35
size 16: min=9.10, avg=9.19
size 18: min=9.06, avg=9.25
size 20: min=9.19, avg=9.31
size 22: min=9.20, avg=9.35
size 24: min=9.22, avg=9.42
size 26: min=9.32, avg=9.48
size 28: min=9.34, avg=9.92
size 30: min=9.36, avg=9.56
size 32: min=10.94, avg=11.21
size 34: min=12.52, avg=12.72
size 36: min=12.67, avg=12.81
size 38: min=12.69, avg=12.84
size 40: min=11.75, avg=11.99
size 42: min=11.87, avg=11.98
size 44: min=11.73, avg=12.69
size 46: min=11.71, avg=12.40
size 48: min=11.85, avg=13.19
size 50: min=11.85, avg=13.01
size 52: min=11.84, avg=13.14
size 54: min=11.67, avg=12.09
size 56: min=11.65, avg=12.05
size 58: min=11.65, avg=12.02
size 60: min=11.62, avg=12.02
size 62: min=11.62, avg=12.02
size 64: min=19.40, avg=19.76
size 96: min=18.00, avg=18.31
size 128: min=31.71, avg=32.92
size 160: min=34.50, avg=36.28
size 192: min=38.40, avg=39.96
size 224: min=41.25, avg=46.85
size 256: min=45.00, avg=47.19
size 288: min=48.00, avg=50.35
size 320: min=51.00, avg=53.54
size 352: min=55.50, avg=57.88
size 384: min=58.50, avg=61.11
size 416: min=61.50, avg=64.26
size 448: min=64.50, avg=67.44
size 480: min=69.00, avg=74.91
size 512: min=75.00, avg=83.72
size 1024: min=126.00, avg=148.88
size 2048: min=228.00, avg=270.60
size 4096: min=432.00, avg=510.83
size 8192: min=837.00, avg=1000.11
size 16384: min=1650.00, avg=2079.31

  reply	other threads:[~2015-02-17 18:53 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-02-13 16:39 Denys Vlasenko
2015-02-14 19:35 ` Rich Felker
2015-02-15  4:06   ` Rich Felker
2015-02-15 14:07     ` Denys Vlasenko
2015-02-15 15:03       ` Rich Felker
2015-02-15 21:44         ` Denys Vlasenko
2015-02-15 22:55           ` Rich Felker
2015-02-16 10:09             ` Denys Vlasenko
2015-02-16 15:12               ` Rich Felker
2015-02-16 17:36           ` Rich Felker
2015-02-17 13:08             ` Denys Vlasenko
2015-02-17 16:12               ` Rich Felker
2015-02-17 16:51                 ` Denys Vlasenko
2015-02-17 17:30                   ` Denys Vlasenko
2015-02-17 17:40                   ` Rich Felker
2015-02-17 18:53                     ` Denys Vlasenko [this message]
2015-02-17 21:12                       ` Rich Felker
2015-02-18  9:05                         ` Denys Vlasenko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAK1hOcMtnZLy7Kw36YQWdi3tzmACepAWjQ=_82GuE_NUHd0hpw@mail.gmail.com' \
    --to=vda.linux@googlemail.com \
    --cc=dalias@libc.org \
    --cc=musl@lists.openwall.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).