mailing list of musl libc
 help / color / mirror / code / Atom feed
From: Rich Felker <dalias@aerifal.cx>
To: musl@lists.openwall.com
Subject: Re: ldso : dladdr support
Date: Thu, 23 Aug 2012 18:21:13 -0400	[thread overview]
Message-ID: <20120823222113.GT27715@brightrain.aerifal.cx> (raw)
In-Reply-To: <20120823213937.GS27715@brightrain.aerifal.cx>

[-- Attachment #1: Type: text/plain, Size: 1679 bytes --]

On Thu, Aug 23, 2012 at 05:39:37PM -0400, Rich Felker wrote:
> On Mon, Aug 20, 2012 at 04:32:00PM +0200, musl wrote:
> > I missed a bug in my previous patch :
> > in find_sym func precomptab was always set to sysv_precomp.
> 
> It's still broken; h is being used in the comparisons even if h was
> not initialized, rather than using gh. I'm working on integrating the
> code right now. I'll either commit my version or reply with a patch
> here soon for review.

Here's my proposed patch for gnu hash support. I've left dladdr to be
committed separately. I handled the precomputed hashes by duplicating
the code in the two branches; this is _ugly_ but it's moderately
faster, and I really don't like the performance impact of these checks
to begin with, so I'd rather not make them even worse.

Some other changes I've made since Boris's last version:

- Prefer GNU hash if it's available. It's a lot faster even in single
  runs, and should make even more difference when data-locality issues
  come into play (resolving whole files rather than just a single
  dlsym call).

- Omit bloom filter checks. It's not clear if they're beneficial on
  average in large programs, but for single lookups where the symbol
  is present, they increase lookup time by about 8%.

- Replace the over-complicated decode_vec2 with search_vec, since we
  only need a single extended entry anyway. In any case, the big-O
  performance of high-entry lookups will always be the same as this
  linear search unless we use heavy data structures, so we might as
  well just do it this super-simple way.

Comments welcome. I'll hold off on committing for a while in case I
made any dumb mistakes.

Rich

[-- Attachment #2: gnuhash.diff --]
[-- Type: text/plain, Size: 5091 bytes --]

diff --git a/configure b/configure
index 1e8b974..1105180 100755
--- a/configure
+++ b/configure
@@ -268,7 +268,7 @@ fi
 
 # Some patched GCC builds have these defaults messed up...
 tryflag CFLAGS_AUTO -fno-stack-protector
-tryldflag LDFLAGS_AUTO -Wl,--hash-style=sysv
+tryldflag LDFLAGS_AUTO -Wl,--hash-style=both
 
 # Disable dynamic linking if ld is broken and can't do -Bsymbolic-functions
 LDFLAGS_DUMMY=
diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
index 9692c6b..d7d6800 100644
--- a/src/ldso/dynlink.c
+++ b/src/ldso/dynlink.c
@@ -53,6 +53,7 @@ struct dso {
 	int refcnt;
 	Sym *syms;
 	uint32_t *hashtab;
+	uint32_t *ghashtab;
 	char *strings;
 	unsigned char *map;
 	size_t map_len;
@@ -95,7 +96,15 @@ static void decode_vec(size_t *v, size_t *a, size_t cnt)
 	}
 }
 
-static uint32_t hash(const char *s0)
+static int search_vec(size_t *v, size_t *r, size_t key)
+{
+	for (; v[0]!=key; v+=2)
+		if (!v[0]) return 0;
+	*r = v[1];
+	return 1;
+}
+
+static uint32_t sysv_hash(const char *s0)
 {
 	const unsigned char *s = (void *)s0;
 	uint_fast32_t h = 0;
@@ -106,7 +115,16 @@ static uint32_t hash(const char *s0)
 	return h & 0xfffffff;
 }
 
-static Sym *lookup(const char *s, uint32_t h, struct dso *dso)
+static uint32_t gnu_hash(const char *s0)
+{
+	const unsigned char *s = (void *)s0;
+	uint_fast32_t h = 5381;
+	for (; *s; s++)
+		h = h*33 + *s;
+	return h;
+}
+
+static Sym *sysv_lookup(const char *s, uint32_t h, struct dso *dso)
 {
 	size_t i;
 	Sym *syms = dso->syms;
@@ -119,20 +137,61 @@ static Sym *lookup(const char *s, uint32_t h, struct dso *dso)
 	return 0;
 }
 
+static Sym *gnu_lookup(const char *s, uint32_t h1, struct dso *dso)
+{
+	Sym *sym;
+	char *strings;
+	uint32_t *hashtab = dso->ghashtab;
+	uint32_t nbuckets = hashtab[0];
+	uint32_t *buckets = hashtab + 4 + hashtab[2]*(sizeof(size_t)/4);
+	uint32_t h2;
+	uint32_t *hashval;
+	uint32_t n = buckets[h1 % nbuckets];
+
+	if (!n) return 0;
+
+	strings = dso->strings;
+	sym = dso->syms + n;
+	hashval = buckets + nbuckets + (n - hashtab[1]);
+
+	for (h1 |= 1; ; sym++) {
+		h2 = *hashval++;
+		if ((h1 == (h2|1)) && !strcmp(s, strings + sym->st_name))
+			return sym;
+		if (h2 & 1) break;
+	}
+
+	return 0;
+}
+
 #define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON)
 #define OK_BINDS (1<<STB_GLOBAL | 1<<STB_WEAK)
 
 static void *find_sym(struct dso *dso, const char *s, int need_def)
 {
-	uint32_t h = hash(s);
+	uint32_t h = 0, gh = 0;
 	void *def = 0;
-	if (h==0x6b366be && !strcmp(s, "dlopen")) rtld_used = 1;
-	if (h==0x6b3afd && !strcmp(s, "dlsym")) rtld_used = 1;
-	if (h==0x595a4cc && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
+	if (dso->ghashtab) {
+		gh = gnu_hash(s);
+		if (gh == 0xf9040207 && !strcmp(s, "dlopen")) rtld_used = 1;
+		if (gh == 0xf4dc4ae && !strcmp(s, "dlsym")) rtld_used = 1;
+		if (gh == 0x1f4039c9 && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
+	} else {
+		h = sysv_hash(s);
+		if (h == 0x6b366be && !strcmp(s, "dlopen")) rtld_used = 1;
+		if (h == 0x6b3afd && !strcmp(s, "dlsym")) rtld_used = 1;
+		if (h == 0x595a4cc && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
+	}
 	for (; dso; dso=dso->next) {
 		Sym *sym;
 		if (!dso->global) continue;
-		sym = lookup(s, h, dso);
+		if (dso->ghashtab) {
+			if (!gh) gh = gnu_hash(s);
+			sym = gnu_lookup(s, gh, dso);
+		} else {
+			if (!h) h = sysv_hash(s);
+			sym = sysv_lookup(s, h, dso);
+		}
 		if (sym && (!need_def || sym->st_shndx) && sym->st_value
 		 && (1<<(sym->st_info&0xf) & OK_TYPES)
 		 && (1<<(sym->st_info>>4) & OK_BINDS)) {
@@ -325,8 +384,11 @@ static void decode_dyn(struct dso *p)
 	size_t dyn[DYN_CNT] = {0};
 	decode_vec(p->dynv, dyn, DYN_CNT);
 	p->syms = (void *)(p->base + dyn[DT_SYMTAB]);
-	p->hashtab = (void *)(p->base + dyn[DT_HASH]);
 	p->strings = (void *)(p->base + dyn[DT_STRTAB]);
+	if (dyn[0]&(1<<DT_HASH))
+		p->hashtab = (void *)(p->base + dyn[DT_HASH]);
+	if (search_vec(p->dynv, dyn, DT_GNU_HASH))
+		p->ghashtab = (void *)(p->base + *dyn);
 }
 
 static struct dso *load_library(const char *name)
@@ -788,7 +850,7 @@ end:
 static void *do_dlsym(struct dso *p, const char *s, void *ra)
 {
 	size_t i;
-	uint32_t h;
+	uint32_t h = 0, gh = 0;
 	Sym *sym;
 	if (p == RTLD_NEXT) {
 		for (p=head; p && (unsigned char *)ra-p->map>p->map_len; p=p->next);
@@ -802,12 +864,23 @@ static void *do_dlsym(struct dso *p, const char *s, void *ra)
 		if (!res) goto failed;
 		return res;
 	}
-	h = hash(s);
-	sym = lookup(s, h, p);
+	if (p->ghashtab) {
+		gh = gnu_hash(s);
+		sym = gnu_lookup(s, gh, p);
+	} else {
+		h = sysv_hash(s);
+		sym = sysv_lookup(s, h, p);
+	}
 	if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
 		return p->base + sym->st_value;
 	if (p->deps) for (i=0; p->deps[i]; i++) {
-		sym = lookup(s, h, p->deps[i]);
+		if (p->deps[i]->ghashtab) {
+			if (!gh) gh = gnu_hash(s);
+			sym = gnu_lookup(s, h, p->deps[i]);
+		} else {
+			if (!h) h = sysv_hash(s);
+			sym = sysv_lookup(s, h, p->deps[i]);
+		}
 		if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
 			return p->deps[i]->base + sym->st_value;
 	}

  reply	other threads:[~2012-08-23 22:21 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-08-07  9:04 musl
2012-08-07 11:46 ` Szabolcs Nagy
2012-08-07 14:15   ` musl
2012-08-07 14:53     ` Szabolcs Nagy
2012-08-07 23:09     ` Rich Felker
2012-08-08  9:55       ` musl
2012-08-08 11:52         ` Szabolcs Nagy
2012-08-08 12:54           ` Rich Felker
2012-08-08 13:57           ` musl
2012-08-11 23:05             ` Rich Felker
2012-08-15 22:41               ` boris brezillon
2012-08-17  5:39                 ` Rich Felker
2012-08-19 16:42                   ` musl
2012-08-20  2:06                     ` Rich Felker
2012-08-20 12:55                       ` musl
2012-08-20 14:32                         ` musl
2012-08-23 21:39                           ` Rich Felker
2012-08-23 22:21                             ` Rich Felker [this message]
2012-08-24  7:29                               ` musl
2012-08-24 18:38                                 ` Rich Felker
2012-08-25  7:42                                   ` boris brezillon
2012-08-25 12:35                                     ` Rich Felker
2012-08-25 22:13                                   ` musl
2012-08-25 22:37                                     ` musl
2012-08-26  0:00                                   ` musl
2012-08-24  8:12                               ` Szabolcs Nagy
2012-08-24  8:56                                 ` musl
2012-08-24  9:38                                   ` Szabolcs Nagy
2012-08-25 21:34                               ` musl
2012-08-25 21:42                                 ` Rich Felker
2012-08-16 18:03               ` musl
2012-08-17 16:35               ` musl
2012-08-08 12:49         ` Rich Felker

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120823222113.GT27715@brightrain.aerifal.cx \
    --to=dalias@aerifal.cx \
    --cc=musl@lists.openwall.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).