mailing list of musl libc
 help / color / mirror / code / Atom feed
* ldso : dladdr support
@ 2012-08-07  9:04 musl
  2012-08-07 11:46 ` Szabolcs Nagy
  0 siblings, 1 reply; 33+ messages in thread
From: musl @ 2012-08-07  9:04 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 120 bytes --]

Hi,

This patch adds support for dladdr function.
It is based on my previous patch (gnu hash support).

Regards,

Boris

[-- Attachment #2: ldso-dladdr-support.patch --]
[-- Type: text/x-patch, Size: 4844 bytes --]

From 1ac52fe3a7e934223ec887b6d95bb74ecc0477b1 Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <b.brezillon@overkiz.com>
Date: Tue, 7 Aug 2012 10:57:26 +0200
Subject: [PATCH] ldso : dladdr support

---
 include/dlfcn.h    |   15 ++++++++
 src/ldso/dynlink.c |  105 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 119 insertions(+), 1 deletion(-)

diff --git a/include/dlfcn.h b/include/dlfcn.h
index dea74c7..8c45822 100644
--- a/include/dlfcn.h
+++ b/include/dlfcn.h
@@ -18,6 +18,21 @@ char  *dlerror(void);
 void  *dlopen(const char *, int);
 void  *dlsym(void *, const char *);
 
+#ifdef _GNU_SOURCE
+typedef struct {
+    const char *dli_fname;  /* Pathname of shared object that
+                               contains address */
+    void       *dli_fbase;  /* Address at which shared object
+                               is loaded */
+    const char *dli_sname;  /* Name of nearest symbol with address
+                               lower than addr */
+    void       *dli_saddr;  /* Exact address of symbol named
+                               in dli_sname */
+} Dl_info;
+
+int dladdr (void *addr, Dl_info *info);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
index 0c8d75a..acf8e56 100644
--- a/src/ldso/dynlink.c
+++ b/src/ldso/dynlink.c
@@ -1,3 +1,4 @@
+#define _GNU_SOURCE
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -28,12 +29,14 @@ typedef Elf32_Phdr Phdr;
 typedef Elf32_Sym Sym;
 #define R_TYPE(x) ((x)&255)
 #define R_SYM(x) ((x)>>8)
+#define ELF_ST_TYPE ELF32_ST_TYPE
 #else
 typedef Elf64_Ehdr Ehdr;
 typedef Elf64_Phdr Phdr;
 typedef Elf64_Sym Sym;
 #define R_TYPE(x) ((x)&0xffffffff)
 #define R_SYM(x) ((x)>>32)
+#define ELF_ST_TYPE ELF64_ST_TYPE
 #endif
 
 struct debug {
@@ -69,7 +72,8 @@ struct dso {
 
 struct hash_algo {
 	uint32_t (*hash) (const char *);
-	Sym *(*lookup) (const char *s, uint32_t h, struct dso *dso);
+	Sym *(*lookup) (const char *, uint32_t, struct dso *);
+	void (*iterate) (struct dso *, int (*) (struct dso *, Sym *, void *), void *);
 };
 
 #define SYSV_HASH_ALG_IDX   0
@@ -178,14 +182,50 @@ static Sym *gnu_lookup(const char *s, uint32_t h1, struct dso *dso)
 	return 0;
 }
 
+static void sysv_iterate (struct dso *dso, int (*func) (struct dso *, Sym *, void *), void *data)
+{
+	size_t i;
+	Sym *syms = dso->syms;
+	uint32_t *hashtab = dso->hashtab;
+	uint32_t nbucket = hashtab[0];
+	uint32_t nchain = hashtab[1];
+	for (i = 0; i < hashtab[1]; i++) {
+		if (!func (dso, syms + i, data))
+			return;
+	}
+}
+
+static void gnu_iterate (struct dso *dso, int (*func) (struct dso *, Sym *, void *), void *data)
+{
+	uint32_t i;
+	Sym *syms = dso->syms;
+	uint32_t *hashtab = dso->hashtab;
+	uint32_t *buckets = hashtab + 4 + (hashtab[2] * (sizeof(size_t) / sizeof(uint32_t)));
+	uint32_t nbuckets = hashtab[0];
+	uint32_t *hashvals = buckets + nbuckets;
+	uint32_t symndx = hashtab[1];
+
+	for (i = 0; i < nbuckets; ++i) {
+		uint32_t n = buckets[i];
+		Sym *sym = syms + n;
+		uint32_t *hashval = hashvals + n - symndx;
+		do {
+			if (!func (dso, sym++, data))
+				return;
+		}while (!(*hashval++ & 1));
+  }
+}
+
 static struct hash_algo hashalgs[] = {
 	{
 		.hash = sysv_hash,
 		.lookup = sysv_lookup,
+		.iterate = sysv_iterate,
 	},
 	{
 		.hash = gnu_hash,
 		.lookup = gnu_lookup,
+		.iterate = gnu_iterate,
 	},
 };
 
@@ -918,6 +958,69 @@ failed:
 	return 0;
 }
 
+struct sym_search {
+	void *addr;
+	Dl_info *info;
+};
+
+static int find_closest_sym (struct dso *dso, Sym *sym, void *data)
+{
+	struct sym_search *search = data;
+	void *symaddr = dso->base + sym->st_value;
+	char *strings = dso->strings;
+	Dl_info *info = search->info;
+	void *addr = search->addr;
+	void *prevaddr = info->dli_saddr;
+
+	if (sym->st_value == 0 && sym->st_shndx == SHN_UNDEF)
+		return 1;
+
+	if (ELF_ST_TYPE(sym->st_info) == STT_TLS)
+		return 1;
+
+	if (addr < symaddr)
+		return 1;
+
+	if (prevaddr && (addr - symaddr) > (addr - prevaddr))
+		return 1;
+
+	info->dli_saddr = symaddr;
+	info->dli_sname = strings + sym->st_name;
+
+	if (addr == symaddr)
+		return 0;
+
+	return 1;
+
+}
+
+static int do_dladdr (void *addr, Dl_info *info)
+{
+	struct sym_search search;
+	struct dso *p;
+	memset (info, 0, sizeof (*info));
+	search.info = info;
+	search.addr = addr;
+	for (p=head; p; p=p->next) {
+		if ((unsigned char *)addr >= p->map && (unsigned char *)addr < p->map + p->map_len) {
+			info->dli_fname = p->name;
+			info->dli_fbase = p->base;
+			hashalgs[p->hashalg].iterate (p, find_closest_sym, &search);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+int dladdr (void *addr, Dl_info *info)
+{
+	int res;
+	pthread_rwlock_rdlock(&lock);
+	res = do_dladdr (addr, info);
+	pthread_rwlock_unlock(&lock);
+	return res;
+}
+
 void *__dlsym(void *p, const char *s, void *ra)
 {
 	void *res;
-- 
1.7.9.5


[-- Attachment #3: gnu-hash-support.patch --]
[-- Type: text/x-patch, Size: 6101 bytes --]

From 28edcc77f5096d17be29da1be1c3972c2356d269 Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <b.brezillon@overkiz.com>
Date: Mon, 6 Aug 2012 20:17:01 +0200
Subject: [PATCH] Add gnu hash support.

---
 src/ldso/dynlink.c |  125 +++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 115 insertions(+), 10 deletions(-)

diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
index 31ef684..0c8d75a 100644
--- a/src/ldso/dynlink.c
+++ b/src/ldso/dynlink.c
@@ -52,6 +52,7 @@ struct dso {
 
 	int refcnt;
 	Sym *syms;
+	uint32_t hashalg;
 	uint32_t *hashtab;
 	char *strings;
 	unsigned char *map;
@@ -66,6 +67,15 @@ struct dso {
 	char buf[];
 };
 
+struct hash_algo {
+	uint32_t (*hash) (const char *);
+	Sym *(*lookup) (const char *s, uint32_t h, struct dso *dso);
+};
+
+#define SYSV_HASH_ALG_IDX   0
+#define GNU_HASH_ALG_IDX    1
+#define HASH_ALG_CNT        2
+
 #include "reloc.h"
 
 void __init_ssp(size_t *);
@@ -94,7 +104,7 @@ static void decode_vec(size_t *v, size_t *a, size_t cnt)
 	}
 }
 
-static uint32_t hash(const char *s0)
+static uint32_t sysv_hash(const char *s0)
 {
 	const unsigned char *s = (void *)s0;
 	uint_fast32_t h = 0;
@@ -105,7 +115,16 @@ static uint32_t hash(const char *s0)
 	return h & 0xfffffff;
 }
 
-static Sym *lookup(const char *s, uint32_t h, struct dso *dso)
+static uint32_t gnu_hash (const char *s0)
+{
+	const unsigned char *s = (void *)s0;
+	uint_fast32_t h = 5381;
+	for (unsigned char c = *s; c != '\0'; c = *++s)
+		h = h * 33 + c;
+	return h & 0xffffffff;
+}
+
+static Sym *sysv_lookup(const char *s, uint32_t h, struct dso *dso)
 {
 	size_t i;
 	Sym *syms = dso->syms;
@@ -118,20 +137,86 @@ static Sym *lookup(const char *s, uint32_t h, struct dso *dso)
 	return 0;
 }
 
+static Sym *gnu_lookup(const char *s, uint32_t h1, struct dso *dso)
+{
+	size_t i;
+	Sym *sym;
+	char *strings = dso->strings;
+	uint32_t *hashtab = dso->hashtab;
+	uint32_t nbuckets = hashtab[0];
+	size_t *maskwords = (size_t *)(hashtab + 4);
+	uint32_t *buckets = hashtab + 4 + (hashtab[2] * (sizeof(size_t) / sizeof(uint32_t)));
+	uint32_t symndx = hashtab[1];
+	Sym *syms = dso->syms;
+	uint32_t shift2 = hashtab[3];
+	uint32_t h2 = h1 >> shift2;
+	uint32_t *hashvals = buckets + nbuckets;
+	uint32_t *hashval;
+	size_t c = sizeof(size_t) * 8;
+	size_t n = (h1 / c) & (hashtab[2] - 1);
+	size_t bitmask = (1 << (h1 % c)) | (1 << (h2 % c));
+
+	if ((maskwords[n] & bitmask) != bitmask)
+		return 0;
+
+	n = buckets[h1 % nbuckets];
+	if (!n)
+		return 0;
+
+	sym = syms + n;
+	hashval = hashvals + n - symndx;
+
+	for (h1 &= ~1; 1; sym++) {
+		h2 = *hashval++;
+		if ((h1 == (h2 & ~1)) && !strcmp(s, strings + sym->st_name))
+			return sym;
+
+		if (h2 & 1)
+			break;
+	}
+
+	return 0;
+}
+
+static struct hash_algo hashalgs[] = {
+	{
+		.hash = sysv_hash,
+		.lookup = sysv_lookup,
+	},
+	{
+		.hash = gnu_hash,
+		.lookup = gnu_lookup,
+	},
+};
+
+
 #define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON)
 #define OK_BINDS (1<<STB_GLOBAL | 1<<STB_WEAK)
 
 static void *find_sym(struct dso *dso, const char *s, int need_def)
 {
-	uint32_t h = hash(s);
 	void *def = 0;
-	if (h==0x6b366be && !strcmp(s, "dlopen")) rtld_used = 1;
-	if (h==0x6b3afd && !strcmp(s, "dlsym")) rtld_used = 1;
-	if (h==0x595a4cc && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
+	uint32_t computed[HASH_ALG_CNT / 32 + 1];
+	uint32_t hashes[HASH_ALG_CNT];
+	memset (computed, 0, sizeof (computed));
+	if (!strcmp(s, "dlopen")) rtld_used = 1;
+	if (!strcmp(s, "dlsym")) rtld_used = 1;
+	if (!strcmp(s, "__stack_chk_fail")) ssp_used = 1;
 	for (; dso; dso=dso->next) {
 		Sym *sym;
+		uint32_t h;
 		if (!dso->global) continue;
-		sym = lookup(s, h, dso);
+
+		if (!(computed[dso->hashalg / 32] & (1 << (dso->hashalg % 32)))) {
+			h = hashalgs[dso->hashalg].hash(s);
+			hashes[dso->hashalg] = h;
+			computed[dso->hashalg / 32] |= (1 << (dso->hashalg % 32));
+		}
+		else {
+			h = hashes[dso->hashalg];
+		}
+
+		sym = hashalgs[dso->hashalg].lookup(s, h, dso);
 		if (sym && (!need_def || sym->st_shndx) && sym->st_value
 		 && (1<<(sym->st_info&0xf) & OK_TYPES)
 		 && (1<<(sym->st_info>>4) & OK_BINDS)) {
@@ -320,11 +405,17 @@ static int path_open(const char *name, const char *search, char *buf, size_t buf
 
 static void decode_dyn(struct dso *p)
 {
+	size_t *v = p->dynv;
 	size_t dyn[DYN_CNT] = {0};
 	decode_vec(p->dynv, dyn, DYN_CNT);
 	p->syms = (void *)(p->base + dyn[DT_SYMTAB]);
 	p->hashtab = (void *)(p->base + dyn[DT_HASH]);
+	p->hashalg = SYSV_HASH_ALG_IDX;
 	p->strings = (void *)(p->base + dyn[DT_STRTAB]);
+	for (; v[0]; v+=2) if (v[0] == DT_GNU_HASH) {
+		p->hashtab = (void *)(p->base + v[1]);
+		p->hashalg = GNU_HASH_ALG_IDX;
+	}
 }
 
 static struct dso *load_library(const char *name)
@@ -786,6 +877,9 @@ static void *do_dlsym(struct dso *p, const char *s, void *ra)
 	size_t i;
 	uint32_t h;
 	Sym *sym;
+	uint32_t computed[HASH_ALG_CNT / 32 + 1];
+	uint32_t hashes[HASH_ALG_CNT];
+
 	if (p == RTLD_NEXT) {
 		for (p=head; p && (unsigned char *)ra-p->map>p->map_len; p=p->next);
 		if (!p) p=head;
@@ -798,12 +892,23 @@ static void *do_dlsym(struct dso *p, const char *s, void *ra)
 		if (!res) goto failed;
 		return res;
 	}
-	h = hash(s);
-	sym = lookup(s, h, p);
+	memset (computed, 0, sizeof (computed));
+	h = hashalgs[p->hashalg].hash(s);
+	computed[p->hashalg / 32] |= (1 << (p->hashalg % 32));
+	hashes[p->hashalg] = h;
+	sym = hashalgs[p->hashalg].lookup(s, h, p);
 	if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
 		return p->base + sym->st_value;
 	if (p->deps) for (i=0; p->deps[i]; i++) {
-		sym = lookup(s, h, p);
+		if (!(computed[p->deps[i]->hashalg / 32] & (1 << (p->deps[i]->hashalg % 32)))) {
+			h = hashalgs[p->deps[i]->hashalg].hash(s);
+			hashes[p->deps[i]->hashalg] = h;
+			computed[p->deps[i]->hashalg / 32] |= (1 << (p->deps[i]->hashalg % 32));
+		}
+		else {
+			h = hashes[p->deps[i]->hashalg];
+		}
+		sym = hashalgs[p->deps[i]->hashalg].lookup(s, h, p->deps[i]);
 		if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
 			return p->deps[i]->base + sym->st_value;
 	}
-- 
1.7.9.5


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-07  9:04 ldso : dladdr support musl
@ 2012-08-07 11:46 ` Szabolcs Nagy
  2012-08-07 14:15   ` musl
  0 siblings, 1 reply; 33+ messages in thread
From: Szabolcs Nagy @ 2012-08-07 11:46 UTC (permalink / raw)
  To: musl

* musl <b.brezillon.musl@gmail.com> [2012-08-07 11:04:19 +0200]:
> This patch adds support for dladdr function.
> It is based on my previous patch (gnu hash support).
> 

i havent checked the content of your patch yet
just had a quick glance

i think before you make substantial changes
it's better to have some discussion about
the design

i'll just have some stylistic comments for now
(i think there is no official guideline and
we are not terribly anal about it but i'll
still point out a few things for future reference)

>  struct hash_algo {
>  	uint32_t (*hash) (const char *);
> -	Sym *(*lookup) (const char *s, uint32_t h, struct dso *dso);
> +	Sym *(*lookup) (const char *, uint32_t, struct dso *);
> +	void (*iterate) (struct dso *, int (*) (struct dso *, Sym *, void *), void *);
>  };
>  

use
  foo(arg)
instead of
  foo (arg)

in case of keywords it's not clear what's better
(musl usually uses spaces after if,while,for,..)
but for function calls no space is clearer

(in general musl code rarely uses unnecessary spaces and parentheses)

actually i'm not sure if the function pointer approach is
the right one here, but that's a design question


> +	uint32_t *hashtab = dso->hashtab;
> +	uint32_t *buckets = hashtab + 4 + (hashtab[2] * (sizeof(size_t) / sizeof(uint32_t)));

i don't know the algorithm but the sizeof magic looks suspicious to me

> +static uint32_t gnu_hash (const char *s0)
> +{
> +	const unsigned char *s = (void *)s0;
> +	uint_fast32_t h = 5381;
> +	for (unsigned char c = *s; c != '\0'; c = *++s)
> +		h = h * 33 + c;
> +	return h & 0xffffffff;
> +}
> +

i think c != '\0' is not very idiomatic
and i'd avoid using c99 style declaration in for

use

for (; *s; s++)
	h = 33*h + *s;


> +	uint32_t shift2 = hashtab[3];
> +	uint32_t h2 = h1 >> shift2;

i'm not sure if input validation makes sense in ldso
but shifts can be tricky (hashtab[3] must be in 0..31 here)

> +	for (h1 &= ~1; 1; sym++) {

the 1; in the middle is unnecessary

h1 &= ~1; is problematic if signed int representation is
not two's complement
(~1 is an implementation defined negative value which is
then converted to uint32_t according to well defined rules)

so i'd use

  for (h1 &= -2;; sym++) {

which is probably less clear but more correct
(maybe an explicit (uint32_t)-2 cast helps with that)

> -	if (h==0x6b366be && !strcmp(s, "dlopen")) rtld_used = 1;
> -	if (h==0x6b3afd && !strcmp(s, "dlsym")) rtld_used = 1;
> -	if (h==0x595a4cc && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
> +	uint32_t computed[HASH_ALG_CNT / 32 + 1];
> +	uint32_t hashes[HASH_ALG_CNT];
> +	memset (computed, 0, sizeof (computed));
> +	if (!strcmp(s, "dlopen")) rtld_used = 1;
> +	if (!strcmp(s, "dlsym")) rtld_used = 1;
> +	if (!strcmp(s, "__stack_chk_fail")) ssp_used = 1;
...
> +		if (!(computed[dso->hashalg / 32] & (1 << (dso->hashalg % 32)))) {
> +			h = hashalgs[dso->hashalg].hash(s);
> +			hashes[dso->hashalg] = h;
> +			computed[dso->hashalg / 32] |= (1 << (dso->hashalg % 32));
> +		}
> +		else {
> +			h = hashes[dso->hashalg];
> +		}

i think we can have more efficient code here if
only gnu and sysv hash algos are supported

(i don't think it's realistic to assume new hash algorithms
even gnu hash is a fairly useless addition to elf)

the if else style is

  if (cond) {
  ...
  } else {
  ...
  }




^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-07 11:46 ` Szabolcs Nagy
@ 2012-08-07 14:15   ` musl
  2012-08-07 14:53     ` Szabolcs Nagy
  2012-08-07 23:09     ` Rich Felker
  0 siblings, 2 replies; 33+ messages in thread
From: musl @ 2012-08-07 14:15 UTC (permalink / raw)
  To: musl; +Cc: Szabolcs Nagy

Thanks for your review.

On 07/08/2012 13:46, Szabolcs Nagy wrote:
> * musl <b.brezillon.musl@gmail.com> [2012-08-07 11:04:19 +0200]:
>> This patch adds support for dladdr function.
>> It is based on my previous patch (gnu hash support).
>>
> i havent checked the content of your patch yet
> just had a quick glance
>
> i think before you make substantial changes
> it's better to have some discussion about
> the design
Could you tell me more about the design issues
(I guess this has something to do with function pointers and multi hash algorithms design) ?
>
> i'll just have some stylistic comments for now
> (i think there is no official guideline and
> we are not terribly anal about it but i'll
> still point out a few things for future reference)
>
>>  struct hash_algo {
>>  	uint32_t (*hash) (const char *);
>> -	Sym *(*lookup) (const char *s, uint32_t h, struct dso *dso);
>> +	Sym *(*lookup) (const char *, uint32_t, struct dso *);
>> +	void (*iterate) (struct dso *, int (*) (struct dso *, Sym *, void *), void *);
>>  };
>>  
> use
>   foo(arg)
> instead of
>   foo (arg)
>
> in case of keywords it's not clear what's better
> (musl usually uses spaces after if,while,for,..)
> but for function calls no space is clearer
>
> (in general musl code rarely uses unnecessary spaces and parentheses)
I'm correcting my code to match the musl coding style.
>
> actually i'm not sure if the function pointer approach is
> the right one here, but that's a design question
>
Could you tell me why (performance)?
>> +	uint32_t *hashtab = dso->hashtab;
>> +	uint32_t *buckets = hashtab + 4 + (hashtab[2] * (sizeof(size_t) / sizeof(uint32_t)));
> i don't know the algorithm but the sizeof magic looks suspicious to me
I took the algorithm described here :

https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections

The maskwords are native word size (64 bits for elf64 and 32 bits for elf32).
I could define these macros:
#define MASKWORD_SIZE sizeof (Elf32_Word)
#define MASKWORD_SIZE sizeof (Elf64_Word)
>
>> +static uint32_t gnu_hash (const char *s0)
>> +{
>> +	const unsigned char *s = (void *)s0;
>> +	uint_fast32_t h = 5381;
>> +	for (unsigned char c = *s; c != '\0'; c = *++s)
>> +		h = h * 33 + c;
>> +	return h & 0xffffffff;
>> +}
>> +
> i think c != '\0' is not very idiomatic
> and i'd avoid using c99 style declaration in for
>
> use
>
> for (; *s; s++)
> 	h = 33*h + *s;
>
Corrected.
>> +	uint32_t shift2 = hashtab[3];
>> +	uint32_t h2 = h1 >> shift2;
> i'm not sure if input validation makes sense in ldso
> but shifts can be tricky (hashtab[3] must be in 0..31 here)
By validation do you mean mask the shift value with 0x1F ?
>
>> +	for (h1 &= ~1; 1; sym++) {
> the 1; in the middle is unnecessary
>
> h1 &= ~1; is problematic if signed int representation is
> not two's complement
> (~1 is an implementation defined negative value which is
> then converted to uint32_t according to well defined rules)
>
> so i'd use
>
>   for (h1 &= -2;; sym++) {
>
> which is probably less clear but more correct
> (maybe an explicit (uint32_t)-2 cast helps with that)
I'll take a closer look at the gnu_lookup algo to see if I can improve it.
>> -	if (h==0x6b366be && !strcmp(s, "dlopen")) rtld_used = 1;
>> -	if (h==0x6b3afd && !strcmp(s, "dlsym")) rtld_used = 1;
>> -	if (h==0x595a4cc && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
>> +	uint32_t computed[HASH_ALG_CNT / 32 + 1];
>> +	uint32_t hashes[HASH_ALG_CNT];
>> +	memset (computed, 0, sizeof (computed));
>> +	if (!strcmp(s, "dlopen")) rtld_used = 1;
>> +	if (!strcmp(s, "dlsym")) rtld_used = 1;
>> +	if (!strcmp(s, "__stack_chk_fail")) ssp_used = 1;
> ...
>> +		if (!(computed[dso->hashalg / 32] & (1 << (dso->hashalg % 32)))) {
>> +			h = hashalgs[dso->hashalg].hash(s);
>> +			hashes[dso->hashalg] = h;
>> +			computed[dso->hashalg / 32] |= (1 << (dso->hashalg % 32));
>> +		}
>> +		else {
>> +			h = hashes[dso->hashalg];
>> +		}
> i think we can have more efficient code here if
> only gnu and sysv hash algos are supported
> (i don't think it's realistic to assume new hash algorithms
> even gnu hash is a fairly useless addition to elf)
This has to do with the function pointer approach.
Do you prefer if/else alternative?
> the if else style is
>
>   if (cond) {
>   ...
>   } else {
>   ...
>   }
>
>



^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-07 14:15   ` musl
@ 2012-08-07 14:53     ` Szabolcs Nagy
  2012-08-07 23:09     ` Rich Felker
  1 sibling, 0 replies; 33+ messages in thread
From: Szabolcs Nagy @ 2012-08-07 14:53 UTC (permalink / raw)
  To: musl

* musl <b.brezillon.musl@gmail.com> [2012-08-07 16:15:34 +0200]:
> On 07/08/2012 13:46, Szabolcs Nagy wrote:
> > * musl <b.brezillon.musl@gmail.com> [2012-08-07 11:04:19 +0200]:
> >> This patch adds support for dladdr function.
> >> It is based on my previous patch (gnu hash support).
> >>
> > i havent checked the content of your patch yet
> > just had a quick glance
> >
> > i think before you make substantial changes
> > it's better to have some discussion about
> > the design
> Could you tell me more about the design issues
> (I guess this has something to do with function pointers and multi hash algorithms design) ?


i didn't have anything specific in mind about the design
it was a general remark in case you plan to submit
larger contributions

about the function pointers:

using plain if/else might work, but if you use func pointers
then i'd make those part of the dso struct so you dont
need an additional lookup step, so

  dso->hash(s)

instead of

  hashalgo[dso->hashalg].hash(s)


in general keep the number of indirections and layers small
if possible


> >> +	uint32_t shift2 = hashtab[3];
> >> +	uint32_t h2 = h1 >> shift2;
> > i'm not sure if input validation makes sense in ldso
> > but shifts can be tricky (hashtab[3] must be in 0..31 here)
> By validation do you mean mask the shift value with 0x1F ?

i meant that if hashtab[3] comes from outside source
and ldso requires strict validation (i don't know if it does)
then you need to check the range of it somewhere
and return error for bad values



^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-07 14:15   ` musl
  2012-08-07 14:53     ` Szabolcs Nagy
@ 2012-08-07 23:09     ` Rich Felker
  2012-08-08  9:55       ` musl
  1 sibling, 1 reply; 33+ messages in thread
From: Rich Felker @ 2012-08-07 23:09 UTC (permalink / raw)
  To: musl

On Tue, Aug 07, 2012 at 04:15:34PM +0200, musl wrote:
> > actually i'm not sure if the function pointer approach is
> > the right one here, but that's a design question
> >
> Could you tell me why (performance)?

Some reasons I would avoid gratuitous tables of function pointer
tables for things like this:

1. In shared libraries, they have to be relocated at load time, which
   adds to startup time cost. A single table doesn't make much
   difference, but the _policy_ of avoiding them in general can make a
   difference when there are many places they might have been used.

2. Likewise, since they're non-constant, they have to be in the data
   segment, which increases the size of the library's data segment.
   Same "single table vs policy" point as in #1 applies.

3. Since they're non-constant, they're nice targets for an attacker
   looking to perform code execution exploits. I don't like
   artificially restricting our use of the language just because
   certain constructs could help an attacker exploit an
   ALREADY-vulnerable program, but I also think it's prudent to avoid
   gratuitous uses like this that serve no purpose and potentially
   contribute to exploitability.



> >> +	uint32_t *hashtab = dso->hashtab;
> >> +	uint32_t *buckets = hashtab + 4 + (hashtab[2] * (sizeof(size_t) / sizeof(uint32_t)));
> > i don't know the algorithm but the sizeof magic looks suspicious to me
> I took the algorithm described here :
> 
> https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections
> 
> The maskwords are native word size (64 bits for elf64 and 32 bits for elf32).
> I could define these macros:
> #define MASKWORD_SIZE sizeof (Elf32_Word)
> #define MASKWORD_SIZE sizeof (Elf64_Word)

In that case, we use size_t, not the ElfXX_Foo mess which only makes
sense when writing generic tools for dealing with ELF files from
arbitrary archs, not when handling native ELF structures.

> >> +static uint32_t gnu_hash (const char *s0)
> >> +{
> >> +	const unsigned char *s = (void *)s0;
> >> +	uint_fast32_t h = 5381;
> >> +	for (unsigned char c = *s; c != '\0'; c = *++s)
> >> +		h = h * 33 + c;
> >> +	return h & 0xffffffff;
> >> +}
> >> +
> > i think c != '\0' is not very idiomatic
> > and i'd avoid using c99 style declaration in for
> >
> > use
> >
> > for (; *s; s++)
> > 	h = 33*h + *s;
> >
> Corrected.

Indeed, that's a lot cleaner.

> >> +	uint32_t shift2 = hashtab[3];
> >> +	uint32_t h2 = h1 >> shift2;
> > i'm not sure if input validation makes sense in ldso
> > but shifts can be tricky (hashtab[3] must be in 0..31 here)
> By validation do you mean mask the shift value with 0x1F ?

There are millions of ways an invalid/corrupt ELF file can cause the
dynamic linker/calling program to invoke UB. I don't see much value in
checking for them. Such invalid files will only exist if created
intentionally or if your storage medium is corrupted. There is no
"safe" interface corresponding to dlopen by which you can try opening
a DSO and sanity-checking it before accessing any code or data from it
(e.g. the ctors run before dlopen returns!) so the only way to handle
evil ELF files is to validate the image on disk _before_ calling
dlopen, and opening it with an absolute pathname to a location under
your control.

> >> +	for (h1 &= ~1; 1; sym++) {
> > the 1; in the middle is unnecessary
> >
> > h1 &= ~1; is problematic if signed int representation is
> > not two's complement
> > (~1 is an implementation defined negative value which is

It's not implementation-defined. It's defined as operating on the bit
representation, and once the signed representation is chosen among the
3 allowed, the implementation is forced to perform ~ accordingly.

> > then converted to uint32_t according to well defined rules)

musl will never deal with non-twos-complement implementations (AFAIK,
no non-twos-complement conformant C implementation has ever been
created) so it's not a big deal, but I agree it would be cleaner to
avoid the issue. Personally I try to avoid ever using bitwise
operators on signed types.

> > i think we can have more efficient code here if
> > only gnu and sysv hash algos are supported
> > (i don't think it's realistic to assume new hash algorithms
> > even gnu hash is a fairly useless addition to elf)
> This has to do with the function pointer approach.
> Do you prefer if/else alternative?

Yes, I prefer if/else.

Also, we had a conversation on the list and/or IRC a while back (I
don't remember which) where I described some optimizations that should
be present if gnu hash is to be supported. Basically, the dynamic
linker should keep track of whether there's one of the two hashes
that's supported by ALL loaded libs, and in that case, only ever use
that hash, to avoid computing two different hashes.

Another issue that needs to be fixed before gnu hash support can be
committed is that you removed the hash comparisons for "dlopen",
"dlsym", and "__stack_chk_fail". Incuring a strcmp against these 3
strings for every single symbol lookup seems extremely costly; that's
why I did the hash comparisons in the first place. Keeping track of
them is important so we can avoid the overhead of supporting these
functions when there's no way the application can ever attempt to use
them, but we also want to minimize the startup overhead of making this
determination.

Rich


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-07 23:09     ` Rich Felker
@ 2012-08-08  9:55       ` musl
  2012-08-08 11:52         ` Szabolcs Nagy
  2012-08-08 12:49         ` Rich Felker
  0 siblings, 2 replies; 33+ messages in thread
From: musl @ 2012-08-08  9:55 UTC (permalink / raw)
  To: musl; +Cc: Rich Felker

[-- Attachment #1: Type: text/plain, Size: 5954 bytes --]

Here is the new patch for dladdr + gnu hash support.

Please tell me if you find coding style errors.

On 08/08/2012 01:09, Rich Felker wrote:
> On Tue, Aug 07, 2012 at 04:15:34PM +0200, musl wrote:
>>> actually i'm not sure if the function pointer approach is
>>> the right one here, but that's a design question
>>>
>> Could you tell me why (performance)?
> Some reasons I would avoid gratuitous tables of function pointer
> tables for things like this:
>
> 1. In shared libraries, they have to be relocated at load time, which
>    adds to startup time cost. A single table doesn't make much
>    difference, but the _policy_ of avoiding them in general can make a
>    difference when there are many places they might have been used.
>
> 2. Likewise, since they're non-constant, they have to be in the data
>    segment, which increases the size of the library's data segment.
>    Same "single table vs policy" point as in #1 applies.
>
> 3. Since they're non-constant, they're nice targets for an attacker
>    looking to perform code execution exploits. I don't like
>    artificially restricting our use of the language just because
>    certain constructs could help an attacker exploit an
>    ALREADY-vulnerable program, but I also think it's prudent to avoid
>    gratuitous uses like this that serve no purpose and potentially
>    contribute to exploitability.
>
I reworked the implementation to remove all function pointers.
>
>>>> +	uint32_t *hashtab = dso->hashtab;
>>>> +	uint32_t *buckets = hashtab + 4 + (hashtab[2] * (sizeof(size_t) / sizeof(uint32_t)));
>>> i don't know the algorithm but the sizeof magic looks suspicious to me
>> I took the algorithm described here :
>>
>> https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections
>>
>> The maskwords are native word size (64 bits for elf64 and 32 bits for elf32).
>> I could define these macros:
>> #define MASKWORD_SIZE sizeof (Elf32_Word)
>> #define MASKWORD_SIZE sizeof (Elf64_Word)
> In that case, we use size_t, not the ElfXX_Foo mess which only makes
> sense when writing generic tools for dealing with ELF files from
> arbitrary archs, not when handling native ELF structures.
>
>>>> +static uint32_t gnu_hash (const char *s0)
>>>> +{
>>>> +	const unsigned char *s = (void *)s0;
>>>> +	uint_fast32_t h = 5381;
>>>> +	for (unsigned char c = *s; c != '\0'; c = *++s)
>>>> +		h = h * 33 + c;
>>>> +	return h & 0xffffffff;
>>>> +}
>>>> +
>>> i think c != '\0' is not very idiomatic
>>> and i'd avoid using c99 style declaration in for
>>>
>>> use
>>>
>>> for (; *s; s++)
>>> 	h = 33*h + *s;
>>>
>> Corrected.
> Indeed, that's a lot cleaner.
>
>>>> +	uint32_t shift2 = hashtab[3];
>>>> +	uint32_t h2 = h1 >> shift2;
>>> i'm not sure if input validation makes sense in ldso
>>> but shifts can be tricky (hashtab[3] must be in 0..31 here)
>> By validation do you mean mask the shift value with 0x1F ?
> There are millions of ways an invalid/corrupt ELF file can cause the
> dynamic linker/calling program to invoke UB. I don't see much value in
> checking for them. Such invalid files will only exist if created
> intentionally or if your storage medium is corrupted. There is no
> "safe" interface corresponding to dlopen by which you can try opening
> a DSO and sanity-checking it before accessing any code or data from it
> (e.g. the ctors run before dlopen returns!) so the only way to handle
> evil ELF files is to validate the image on disk _before_ calling
> dlopen, and opening it with an absolute pathname to a location under
> your control.
>
>>>> +	for (h1 &= ~1; 1; sym++) {
>>> the 1; in the middle is unnecessary
>>>
>>> h1 &= ~1; is problematic if signed int representation is
>>> not two's complement
>>> (~1 is an implementation defined negative value which is
> It's not implementation-defined. It's defined as operating on the bit
> representation, and once the signed representation is chosen among the
> 3 allowed, the implementation is forced to perform ~ accordingly.
>
>>> then converted to uint32_t according to well defined rules)
> musl will never deal with non-twos-complement implementations (AFAIK,
> no non-twos-complement conformant C implementation has ever been
> created) so it's not a big deal, but I agree it would be cleaner to
> avoid the issue. Personally I try to avoid ever using bitwise
> operators on signed types.
>
>>> i think we can have more efficient code here if
>>> only gnu and sysv hash algos are supported
>>> (i don't think it's realistic to assume new hash algorithms
>>> even gnu hash is a fairly useless addition to elf)
>> This has to do with the function pointer approach.
>> Do you prefer if/else alternative?
> Yes, I prefer if/else.
>
> Also, we had a conversation on the list and/or IRC a while back (I
> don't remember which) where I described some optimizations that should
> be present if gnu hash is to be supported. Basically, the dynamic
> linker should keep track of whether there's one of the two hashes
> that's supported by ALL loaded libs, and in that case, only ever use
> that hash, to avoid computing two different hashes.
The hash for given algo is only computed once (if needed).
That's the reason for the computed table.
If all libs uses the same hash algo, the other will never be computed.
> Another issue that needs to be fixed before gnu hash support can be
> committed is that you removed the hash comparisons for "dlopen",
> "dlsym", and "__stack_chk_fail". Incuring a strcmp against these 3
> strings for every single symbol lookup seems extremely costly; that's
> why I did the hash comparisons in the first place. Keeping track of
> them is important so we can avoid the overhead of supporting these
> functions when there's no way the application can ever attempt to use
> them, but we also want to minimize the startup overhead of making this
> determination.
I added the precomp table wich includes both gnu and sysv hashes for the dlopen,
dlsym and __stack_chk_fail symbols.
>
> Rich


[-- Attachment #2: ldso-add-dladdr-and-gnu-hash-support.patch --]
[-- Type: text/x-patch, Size: 9709 bytes --]

From 4a809b89c38ca3a5b69e5758a04effd6d404039c Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <b.brezillon@overkiz.com>
Date: Wed, 8 Aug 2012 11:45:31 +0200
Subject: [PATCH] ldso : add dladdr and gnu hash support

---
 include/dlfcn.h    |   15 ++++
 src/ldso/dynlink.c |  231 +++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 235 insertions(+), 11 deletions(-)

diff --git a/include/dlfcn.h b/include/dlfcn.h
index dea74c7..8c45822 100644
--- a/include/dlfcn.h
+++ b/include/dlfcn.h
@@ -18,6 +18,21 @@ char  *dlerror(void);
 void  *dlopen(const char *, int);
 void  *dlsym(void *, const char *);
 
+#ifdef _GNU_SOURCE
+typedef struct {
+    const char *dli_fname;  /* Pathname of shared object that
+                               contains address */
+    void       *dli_fbase;  /* Address at which shared object
+                               is loaded */
+    const char *dli_sname;  /* Name of nearest symbol with address
+                               lower than addr */
+    void       *dli_saddr;  /* Exact address of symbol named
+                               in dli_sname */
+} Dl_info;
+
+int dladdr (void *addr, Dl_info *info);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
index f55c6f1..f40c1e5 100644
--- a/src/ldso/dynlink.c
+++ b/src/ldso/dynlink.c
@@ -1,3 +1,4 @@
+#define _GNU_SOURCE
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -28,12 +29,14 @@ typedef Elf32_Phdr Phdr;
 typedef Elf32_Sym Sym;
 #define R_TYPE(x) ((x)&255)
 #define R_SYM(x) ((x)>>8)
+#define ELF_ST_TYPE ELF32_ST_TYPE
 #else
 typedef Elf64_Ehdr Ehdr;
 typedef Elf64_Phdr Phdr;
 typedef Elf64_Sym Sym;
 #define R_TYPE(x) ((x)&0xffffffff)
 #define R_SYM(x) ((x)>>32)
+#define ELF_ST_TYPE ELF64_ST_TYPE
 #endif
 
 struct debug {
@@ -52,6 +55,7 @@ struct dso {
 
 	int refcnt;
 	Sym *syms;
+	uint32_t hashalg;
 	uint32_t *hashtab;
 	char *strings;
 	unsigned char *map;
@@ -66,6 +70,10 @@ struct dso {
 	char buf[];
 };
 
+#define SYSV_HASH_ALG_IDX   0
+#define GNU_HASH_ALG_IDX    1
+#define HASH_ALG_CNT		2
+
 #include "reloc.h"
 
 void __init_ssp(size_t *);
@@ -94,7 +102,7 @@ static void decode_vec(size_t *v, size_t *a, size_t cnt)
 	}
 }
 
-static uint32_t hash(const char *s0)
+static uint32_t sysv_hash(const char *s0)
 {
 	const unsigned char *s = (void *)s0;
 	uint_fast32_t h = 0;
@@ -105,7 +113,16 @@ static uint32_t hash(const char *s0)
 	return h & 0xfffffff;
 }
 
-static Sym *lookup(const char *s, uint32_t h, struct dso *dso)
+static uint32_t gnu_hash (const char *s0)
+{
+	const unsigned char *s = (void *)s0;
+	uint_fast32_t h = 5381;
+	for (; *s; s++)
+		h = h*33 + *s;
+	return h & 0xffffffff;
+}
+
+static Sym *sysv_lookup(const char *s, uint32_t h, struct dso *dso)
 {
 	size_t i;
 	Sym *syms = dso->syms;
@@ -118,20 +135,94 @@ static Sym *lookup(const char *s, uint32_t h, struct dso *dso)
 	return 0;
 }
 
+static Sym *gnu_lookup(const char *s, uint32_t h1, struct dso *dso)
+{
+	size_t i;
+	Sym *sym;
+	char *strings = dso->strings;
+	uint32_t *hashtab = dso->hashtab;
+	uint32_t nbuckets = hashtab[0];
+	size_t *maskwords = (size_t *)(hashtab + 4);
+	uint32_t *buckets = hashtab + 4 + (hashtab[2]*(sizeof(size_t)/sizeof(uint32_t)));
+	uint32_t symndx = hashtab[1];
+	Sym *syms = dso->syms;
+	uint32_t shift2 = hashtab[3];
+	uint32_t h2 = h1 >> shift2;
+	uint32_t *hashvals = buckets + nbuckets;
+	uint32_t *hashval;
+	size_t c = sizeof(size_t) * 8;
+	size_t n = (h1/c) & (hashtab[2]-1);
+	size_t bitmask = (1 << (h1%c)) | (1 << (h2%c));
+
+	if ((maskwords[n] & bitmask) != bitmask)
+		return 0;
+
+	n = buckets[h1 % nbuckets];
+	if (!n)
+		return 0;
+
+	sym = syms + n;
+	hashval = hashvals + n - symndx;
+
+	for (h1 &= (uint32_t)-2;; sym++) {
+		h2 = *hashval++;
+		if ((h1 == (h2 & ~1)) && !strcmp(s, strings + sym->st_name))
+			return sym;
+
+		if (h2 & 1)
+			break;
+	}
+
+	return 0;
+}
+
 #define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON)
 #define OK_BINDS (1<<STB_GLOBAL | 1<<STB_WEAK)
 
 static void *find_sym(struct dso *dso, const char *s, int need_def)
 {
-	uint32_t h = hash(s);
 	void *def = 0;
-	if (h==0x6b366be && !strcmp(s, "dlopen")) rtld_used = 1;
-	if (h==0x6b3afd && !strcmp(s, "dlsym")) rtld_used = 1;
-	if (h==0x595a4cc && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
+	static uint32_t precomp[HASH_ALG_CNT][3] = {
+	  {0x6b366be, 0x6b3afd, 0x595a4cc},
+	  {0xf9040207, 0xf4dc4ae, 0x1f4039c9},
+	};
+	uint32_t h[HASH_ALG_CNT];
+	uint8_t computed[HASH_ALG_CNT] = {0, 0};
+	uint8_t alg = dso->hashalg;
+	if (alg == SYSV_HASH_ALG_IDX)
+		h[alg] = sysv_hash(s);
+	else
+		h[alg] = gnu_hash(s);
+
+	computed[alg] = 1;
+
+	if (h[alg] == precomp[alg][0] && !strcmp(s, "dlopen")) rtld_used = 1;
+	if (h[alg] == precomp[alg][1] && !strcmp(s, "dlsym")) rtld_used = 1;
+	if (h[alg] == precomp[alg][2] && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
+
 	for (; dso; dso=dso->next) {
 		Sym *sym;
+
 		if (!dso->global) continue;
-		sym = lookup(s, h, dso);
+
+		alg = dso->hashalg;
+		if (!computed[alg]) {
+			if (alg == SYSV_HASH_ALG_IDX) {
+				h[alg] = sysv_hash(s);
+				sym = sysv_lookup(s, h[alg], dso);
+			}
+			else {
+				h[alg] = gnu_hash(s);
+				sym = gnu_lookup(s, h[alg], dso);
+			}
+			computed[alg] = 1;
+		} else {
+			if (alg == SYSV_HASH_ALG_IDX)
+				sym = sysv_lookup(s, h[alg], dso);
+			else
+				sym = gnu_lookup(s, h[alg], dso);
+		}
+
 		if (sym && (!need_def || sym->st_shndx) && sym->st_value
 		 && (1<<(sym->st_info&0xf) & OK_TYPES)
 		 && (1<<(sym->st_info>>4) & OK_BINDS)) {
@@ -320,11 +411,17 @@ static int path_open(const char *name, const char *search, char *buf, size_t buf
 
 static void decode_dyn(struct dso *p)
 {
+	size_t *v = p->dynv;
 	size_t dyn[DYN_CNT] = {0};
 	decode_vec(p->dynv, dyn, DYN_CNT);
 	p->syms = (void *)(p->base + dyn[DT_SYMTAB]);
 	p->hashtab = (void *)(p->base + dyn[DT_HASH]);
+	p->hashalg = SYSV_HASH_ALG_IDX;
 	p->strings = (void *)(p->base + dyn[DT_STRTAB]);
+	for (; v[0]; v+=2) if (v[0] == DT_GNU_HASH) {
+		p->hashtab = (void *)(p->base + v[1]);
+		p->hashalg = GNU_HASH_ALG_IDX;
+	}
 }
 
 static struct dso *load_library(const char *name)
@@ -784,8 +881,11 @@ end:
 static void *do_dlsym(struct dso *p, const char *s, void *ra)
 {
 	size_t i;
-	uint32_t h;
 	Sym *sym;
+	uint8_t computed[HASH_ALG_CNT] = {0, 0};
+	uint8_t alg = p->hashalg;
+	uint32_t h[HASH_ALG_CNT];
+
 	if (p == RTLD_NEXT) {
 		for (p=head; p && (unsigned char *)ra-p->map>p->map_len; p=p->next);
 		if (!p) p=head;
@@ -798,12 +898,36 @@ static void *do_dlsym(struct dso *p, const char *s, void *ra)
 		if (!res) goto failed;
 		return res;
 	}
-	h = hash(s);
-	sym = lookup(s, h, p);
+
+	if (alg == SYSV_HASH_ALG_IDX) {
+		h[alg] = sysv_hash(s);
+		sym = sysv_lookup(s, h[alg], p);
+	} else {
+		h[alg] = gnu_hash(s);
+		sym = gnu_lookup(s, h[alg], p);
+	}
+	computed[alg] = 1;
+
 	if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
 		return p->base + sym->st_value;
 	if (p->deps) for (i=0; p->deps[i]; i++) {
-		sym = lookup(s, h, p->deps[i]);
+		alg = p->deps[i]->hashalg;
+		if (!computed[alg]) {
+			if (alg == SYSV_HASH_ALG_IDX) {
+				h[alg] = sysv_hash(s);
+				sym = sysv_lookup(s, h[alg], p->deps[i]);
+			} else {
+				h[alg] = gnu_hash(s);
+				sym = gnu_lookup(s, h[alg], p->deps[i]);
+			}
+			computed[alg] = 1;
+		} else {
+			if (alg == SYSV_HASH_ALG_IDX)
+				sym = sysv_lookup(s, h[alg], p->deps[i]);
+			else
+				sym = gnu_lookup(s, h[alg], p->deps[i]);
+		}
+
 		if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
 			return p->deps[i]->base + sym->st_value;
 	}
@@ -813,6 +937,91 @@ failed:
 	return 0;
 }
 
+struct sym_search {
+	void *addr;
+	Dl_info *info;
+};
+
+static int find_closest_sym (struct dso *dso, Sym *sym, struct sym_search *search)
+{
+	void *symaddr = dso->base + sym->st_value;
+	char *strings = dso->strings;
+	Dl_info *info = search->info;
+	void *addr = search->addr;
+	void *prevaddr = info->dli_saddr;
+
+	if (sym->st_value == 0 && sym->st_shndx == SHN_UNDEF)
+		return 1;
+
+	if (ELF_ST_TYPE(sym->st_info) == STT_TLS)
+		return 1;
+
+	if (addr < symaddr)
+		return 1;
+
+	if (prevaddr && (addr - symaddr) > (addr - prevaddr))
+		return 1;
+
+	info->dli_saddr = symaddr;
+	info->dli_sname = strings + sym->st_name;
+
+	if (addr == symaddr)
+		return 0;
+
+	return 1;
+
+}
+
+static int do_dladdr (void *addr, Dl_info *info)
+{
+	struct sym_search search;
+	struct dso *p;
+	memset (info, 0, sizeof (*info));
+	search.info = info;
+	search.addr = addr;
+	for (p=head; p; p=p->next) {
+		if ((unsigned char *)addr >= p->map && (unsigned char *)addr < p->map + p->map_len) {
+			Sym *syms = p->syms;
+			uint32_t *hashtab = p->hashtab;
+			size_t i;
+			info->dli_fname = p->name;
+			info->dli_fbase = p->base;
+			if (p->hashalg == SYSV_HASH_ALG_IDX) {
+				for (i = 0; i < hashtab[1]; i++) {
+					if (!find_closest_sym (p, syms + i, &search))
+						return 1;
+				}
+			} else {
+				uint32_t *buckets = hashtab + 4 + (hashtab[2] * (sizeof(size_t)/sizeof(uint32_t)));
+				uint32_t nbuckets = hashtab[0];
+				uint32_t *hashvals = buckets + nbuckets;
+				uint32_t symndx = hashtab[1];
+				for (i = 0; i < nbuckets; ++i) {
+					uint32_t n = buckets[i];
+					Sym *sym = syms + n;
+					uint32_t *hashval = hashvals + n - symndx;
+
+					do {
+						if (!find_closest_sym (p, sym, &search))
+							return 1;
+					}while (!(*hashval++ & 1));
+				}
+			}
+			return 1;
+		}
+	}
+	return 0;
+}
+
+int dladdr (void *addr, Dl_info *info)
+{
+	int res;
+	pthread_rwlock_rdlock(&lock);
+	res = do_dladdr (addr, info);
+	pthread_rwlock_unlock(&lock);
+	return res;
+}
+
 void *__dlsym(void *p, const char *s, void *ra)
 {
 	void *res;
-- 
1.7.9.5


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-08  9:55       ` musl
@ 2012-08-08 11:52         ` Szabolcs Nagy
  2012-08-08 12:54           ` Rich Felker
  2012-08-08 13:57           ` musl
  2012-08-08 12:49         ` Rich Felker
  1 sibling, 2 replies; 33+ messages in thread
From: Szabolcs Nagy @ 2012-08-08 11:52 UTC (permalink / raw)
  To: musl

* musl <b.brezillon.musl@gmail.com> [2012-08-08 11:55:45 +0200]:
> Here is the new patch for dladdr + gnu hash support.
> 
> On 08/08/2012 01:09, Rich Felker wrote:
> > don't remember which) where I described some optimizations that should
> > be present if gnu hash is to be supported. Basically, the dynamic
> > linker should keep track of whether there's one of the two hashes
> > that's supported by ALL loaded libs, and in that case, only ever use
> > that hash, to avoid computing two different hashes.
> The hash for given algo is only computed once (if needed).
> That's the reason for the computed table.
> If all libs uses the same hash algo, the other will never be computed.

a lib can support both hash tables
(so it's non-trivial to use the one that all libs support,
eg instead of a single alg index a bit mask should be used)

> +#define SYSV_HASH_ALG_IDX   0
> +#define GNU_HASH_ALG_IDX    1
> +#define HASH_ALG_CNT		2

if we go with this approach then use shorter names
(thes are only used locally)

eg. SYSV_HASH, GNU_HASH, NHASH

> +	for (h1 &= (uint32_t)-2;; sym++) {
> +		h2 = *hashval++;
> +		if ((h1 == (h2 & ~1)) && !strcmp(s, strings + sym->st_name))

these is still a ~1

looking at it now probably writing out & 0xfffffffe
is the cleanest

> +	static uint32_t precomp[HASH_ALG_CNT][3] = {
> +	  {0x6b366be, 0x6b3afd, 0x595a4cc},
> +	  {0xf9040207, 0xf4dc4ae, 0x1f4039c9},
> +	};
> +	uint32_t h[HASH_ALG_CNT];
> +	uint8_t computed[HASH_ALG_CNT] = {0, 0};
> +	uint8_t alg = dso->hashalg;
> +	if (alg == SYSV_HASH_ALG_IDX)
> +		h[alg] = sysv_hash(s);
> +	else
> +		h[alg] = gnu_hash(s);
> +
> +	computed[alg] = 1;
> +
> +	if (h[alg] == precomp[alg][0] && !strcmp(s, "dlopen")) rtld_used = 1;
> +	if (h[alg] == precomp[alg][1] && !strcmp(s, "dlsym")) rtld_used = 1;
> +	if (h[alg] == precomp[alg][2] && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
> +
>  	for (; dso; dso=dso->next) {
>  		Sym *sym;
> +
>  		if (!dso->global) continue;
> -		sym = lookup(s, h, dso);
> +
> +		alg = dso->hashalg;
> +		if (!computed[alg]) {
> +			if (alg == SYSV_HASH_ALG_IDX) {
> +				h[alg] = sysv_hash(s);
> +				sym = sysv_lookup(s, h[alg], dso);
> +			}
> +			else {
> +				h[alg] = gnu_hash(s);
> +				sym = gnu_lookup(s, h[alg], dso);
> +			}
> +			computed[alg] = 1;
> +		} else {
> +			if (alg == SYSV_HASH_ALG_IDX)
> +				sym = sysv_lookup(s, h[alg], dso);
> +			else
> +				sym = gnu_lookup(s, h[alg], dso);
> +		}

instead of arrays i'd write

alg = dso->hashalg;
if (alg == SYSV_HASH) {
	if (sysv_ok) {
		...
		sysv_ok = 1;
	}
	sym = sysv_lookup(s, sysv_h, dso);
} else {
}

since there are many ifs anyway

the table approach is nicer when all data and functions
are in a table:

if (!ok[alg]) {
	h[alg] = hash[alg](s);
	ok[alg] = 1;
}
sym = lookup[alg](s, h[alg], dso);

> +	p->hashalg = SYSV_HASH_ALG_IDX;
>  	p->strings = (void *)(p->base + dyn[DT_STRTAB]);
> +	for (; v[0]; v+=2) if (v[0] == DT_GNU_HASH) {
> +		p->hashtab = (void *)(p->base + v[1]);
> +		p->hashalg = GNU_HASH_ALG_IDX;
> +	}

so it seems gnu hash is used whenever it's present
i'm not sure if that's the right default..

another possibility is to have a plain find_sym function
which is simple and only supports sysv hash
and whenever it encounters a lib that has no sysv hash in
it a find_sym_gnu is called that does the hard work
(so using gnu only libs is penalized, i don't know how
common that case is though)


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-08  9:55       ` musl
  2012-08-08 11:52         ` Szabolcs Nagy
@ 2012-08-08 12:49         ` Rich Felker
  1 sibling, 0 replies; 33+ messages in thread
From: Rich Felker @ 2012-08-08 12:49 UTC (permalink / raw)
  To: musl

On Wed, Aug 08, 2012 at 11:55:45AM +0200, musl wrote:
> > Also, we had a conversation on the list and/or IRC a while back (I
> > don't remember which) where I described some optimizations that should
> > be present if gnu hash is to be supported. Basically, the dynamic
> > linker should keep track of whether there's one of the two hashes
> > that's supported by ALL loaded libs, and in that case, only ever use
> > that hash, to avoid computing two different hashes.
> The hash for given algo is only computed once (if needed).
> That's the reason for the computed table.
> If all libs uses the same hash algo, the other will never be computed.

All libs can have both algos (and MUST, if they use gnu hash at all,
in order to be conformant; the only reason this code is needed is to
support non-conformant lib files that lack standard ELF (sysv) hash).
The case I'm talking about is where 10 libs are loaded, and 9 of them
have both hashes but the last only has sysv. In that case, sysv should
get used and gnu should never get computed, or at least that was my
thought/intent.

> I added the precomp table wich includes both gnu and sysv hashes for the dlopen,
> dlsym and __stack_chk_fail symbols.

Indeed, that seems to be correct now.

I'll give the patch a better review soon.

Rich


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-08 11:52         ` Szabolcs Nagy
@ 2012-08-08 12:54           ` Rich Felker
  2012-08-08 13:57           ` musl
  1 sibling, 0 replies; 33+ messages in thread
From: Rich Felker @ 2012-08-08 12:54 UTC (permalink / raw)
  To: musl

On Wed, Aug 08, 2012 at 01:52:02PM +0200, Szabolcs Nagy wrote:
> > +	for (h1 &= (uint32_t)-2;; sym++) {
> > +		h2 = *hashval++;
> > +		if ((h1 == (h2 & ~1)) && !strcmp(s, strings + sym->st_name))
> 
> these is still a ~1
> 
> looking at it now probably writing out & 0xfffffffe
> is the cleanest

I prefer -2. It's the standard idiom used elsewhere in musl (e.g. in
malloc, for alignment purposes, and for page masks) to say ("keep all
bits starting with the value of 2 and up and discard the rest").

> another possibility is to have a plain find_sym function
> which is simple and only supports sysv hash
> and whenever it encounters a lib that has no sysv hash in
> it a find_sym_gnu is called that does the hard work
> (so using gnu only libs is penalized, i don't know how
> common that case is though)

It could easily be known in advance if there's any lib that has
gnu-only, so I don't see this approach being very helpful. And it
would increase code duplication, I think...

Rich


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-08 11:52         ` Szabolcs Nagy
  2012-08-08 12:54           ` Rich Felker
@ 2012-08-08 13:57           ` musl
  2012-08-11 23:05             ` Rich Felker
  1 sibling, 1 reply; 33+ messages in thread
From: musl @ 2012-08-08 13:57 UTC (permalink / raw)
  To: musl; +Cc: Szabolcs Nagy

[-- Attachment #1: Type: text/plain, Size: 3712 bytes --]

Same as before except I use bit mask to support multiple hash algorithms.

On 08/08/2012 13:52, Szabolcs Nagy wrote:
> * musl <b.brezillon.musl@gmail.com> [2012-08-08 11:55:45 +0200]:
>> Here is the new patch for dladdr + gnu hash support.
>>
>> On 08/08/2012 01:09, Rich Felker wrote:
>>> don't remember which) where I described some optimizations that should
>>> be present if gnu hash is to be supported. Basically, the dynamic
>>> linker should keep track of whether there's one of the two hashes
>>> that's supported by ALL loaded libs, and in that case, only ever use
>>> that hash, to avoid computing two different hashes.
>> The hash for given algo is only computed once (if needed).
>> That's the reason for the computed table.
>> If all libs uses the same hash algo, the other will never be computed.
> a lib can support both hash tables
> (so it's non-trivial to use the one that all libs support,
> eg instead of a single alg index a bit mask should be used)
>
See patch changes.
>> +#define SYSV_HASH_ALG_IDX   0
>> +#define GNU_HASH_ALG_IDX    1
>> +#define HASH_ALG_CNT		2
> if we go with this approach then use shorter names
> (thes are only used locally)
>
> eg. SYSV_HASH, GNU_HASH, NHASH
Corrected.
>> +	for (h1 &= (uint32_t)-2;; sym++) {
>> +		h2 = *hashval++;
>> +		if ((h1 == (h2 & ~1)) && !strcmp(s, strings + sym->st_name))
> these is still a ~1
>
> looking at it now probably writing out & 0xfffffffe
> is the cleanest
>
>> +	static uint32_t precomp[HASH_ALG_CNT][3] = {
>> +	  {0x6b366be, 0x6b3afd, 0x595a4cc},
>> +	  {0xf9040207, 0xf4dc4ae, 0x1f4039c9},
>> +	};
>> +	uint32_t h[HASH_ALG_CNT];
>> +	uint8_t computed[HASH_ALG_CNT] = {0, 0};
>> +	uint8_t alg = dso->hashalg;
>> +	if (alg == SYSV_HASH_ALG_IDX)
>> +		h[alg] = sysv_hash(s);
>> +	else
>> +		h[alg] = gnu_hash(s);
>> +
>> +	computed[alg] = 1;
>> +
>> +	if (h[alg] == precomp[alg][0] && !strcmp(s, "dlopen")) rtld_used = 1;
>> +	if (h[alg] == precomp[alg][1] && !strcmp(s, "dlsym")) rtld_used = 1;
>> +	if (h[alg] == precomp[alg][2] && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
>> +
>>  	for (; dso; dso=dso->next) {
>>  		Sym *sym;
>> +
>>  		if (!dso->global) continue;
>> -		sym = lookup(s, h, dso);
>> +
>> +		alg = dso->hashalg;
>> +		if (!computed[alg]) {
>> +			if (alg == SYSV_HASH_ALG_IDX) {
>> +				h[alg] = sysv_hash(s);
>> +				sym = sysv_lookup(s, h[alg], dso);
>> +			}
>> +			else {
>> +				h[alg] = gnu_hash(s);
>> +				sym = gnu_lookup(s, h[alg], dso);
>> +			}
>> +			computed[alg] = 1;
>> +		} else {
>> +			if (alg == SYSV_HASH_ALG_IDX)
>> +				sym = sysv_lookup(s, h[alg], dso);
>> +			else
>> +				sym = gnu_lookup(s, h[alg], dso);
>> +		}
> instead of arrays i'd write
>
> alg = dso->hashalg;
> if (alg == SYSV_HASH) {
> 	if (sysv_ok) {
> 		...
> 		sysv_ok = 1;
> 	}
> 	sym = sysv_lookup(s, sysv_h, dso);
> } else {
> }
Haven't changed it yet.
> since there are many ifs anyway
>
> the table approach is nicer when all data and functions
> are in a table:
>
> if (!ok[alg]) {
> 	h[alg] = hash[alg](s);
> 	ok[alg] = 1;
> }
> sym = lookup[alg](s, h[alg], dso);
>
>> +	p->hashalg = SYSV_HASH_ALG_IDX;
>>  	p->strings = (void *)(p->base + dyn[DT_STRTAB]);
>> +	for (; v[0]; v+=2) if (v[0] == DT_GNU_HASH) {
>> +		p->hashtab = (void *)(p->base + v[1]);
>> +		p->hashalg = GNU_HASH_ALG_IDX;
>> +	}
> so it seems gnu hash is used whenever it's present
> i'm not sure if that's the right default..
>
> another possibility is to have a plain find_sym function
> which is simple and only supports sysv hash
> and whenever it encounters a lib that has no sysv hash in
> it a find_sym_gnu is called that does the hard work
> (so using gnu only libs is penalized, i don't know how
> common that case is though)


[-- Attachment #2: ldso-add-dladdr-and-gnu-hash-support.patch --]
[-- Type: text/x-patch, Size: 10514 bytes --]

From 359843b57223db412847f69702e19511dfeb435d Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <b.brezillon@overkiz.com>
Date: Wed, 8 Aug 2012 15:49:29 +0200
Subject: [PATCH] ldso : add dladdr and gnu hash support

---
 include/dlfcn.h    |   15 +++
 src/ldso/dynlink.c |  272 ++++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 269 insertions(+), 18 deletions(-)

diff --git a/include/dlfcn.h b/include/dlfcn.h
index dea74c7..8c45822 100644
--- a/include/dlfcn.h
+++ b/include/dlfcn.h
@@ -18,6 +18,21 @@ char  *dlerror(void);
 void  *dlopen(const char *, int);
 void  *dlsym(void *, const char *);
 
+#ifdef _GNU_SOURCE
+typedef struct {
+    const char *dli_fname;  /* Pathname of shared object that
+                               contains address */
+    void       *dli_fbase;  /* Address at which shared object
+                               is loaded */
+    const char *dli_sname;  /* Name of nearest symbol with address
+                               lower than addr */
+    void       *dli_saddr;  /* Exact address of symbol named
+                               in dli_sname */
+} Dl_info;
+
+int dladdr (void *addr, Dl_info *info);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
index f55c6f1..4a236b2 100644
--- a/src/ldso/dynlink.c
+++ b/src/ldso/dynlink.c
@@ -1,3 +1,4 @@
+#define _GNU_SOURCE
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -28,14 +29,20 @@ typedef Elf32_Phdr Phdr;
 typedef Elf32_Sym Sym;
 #define R_TYPE(x) ((x)&255)
 #define R_SYM(x) ((x)>>8)
+#define ELF_ST_TYPE ELF32_ST_TYPE
 #else
 typedef Elf64_Ehdr Ehdr;
 typedef Elf64_Phdr Phdr;
 typedef Elf64_Sym Sym;
 #define R_TYPE(x) ((x)&0xffffffff)
 #define R_SYM(x) ((x)>>32)
+#define ELF_ST_TYPE ELF64_ST_TYPE
 #endif
 
+#define SYSV_HASH	0
+#define GNU_HASH	1
+#define NHASH		2
+
 struct debug {
 	int ver;
 	void *head;
@@ -52,7 +59,8 @@ struct dso {
 
 	int refcnt;
 	Sym *syms;
-	uint32_t *hashtab;
+	uint32_t hashalgs;
+	uint32_t *hashtabs[NHASH];
 	char *strings;
 	unsigned char *map;
 	size_t map_len;
@@ -94,7 +102,7 @@ static void decode_vec(size_t *v, size_t *a, size_t cnt)
 	}
 }
 
-static uint32_t hash(const char *s0)
+static uint32_t sysv_hash(const char *s0)
 {
 	const unsigned char *s = (void *)s0;
 	uint_fast32_t h = 0;
@@ -105,11 +113,20 @@ static uint32_t hash(const char *s0)
 	return h & 0xfffffff;
 }
 
-static Sym *lookup(const char *s, uint32_t h, struct dso *dso)
+static uint32_t gnu_hash (const char *s0)
+{
+	const unsigned char *s = (void *)s0;
+	uint_fast32_t h = 5381;
+	for (; *s; s++)
+		h = h*33 + *s;
+	return h & 0xffffffff;
+}
+
+static Sym *sysv_lookup(const char *s, uint32_t h, struct dso *dso)
 {
 	size_t i;
 	Sym *syms = dso->syms;
-	uint32_t *hashtab = dso->hashtab;
+	uint32_t *hashtab = dso->hashtabs[SYSV_HASH];
 	char *strings = dso->strings;
 	for (i=hashtab[2+h%hashtab[0]]; i; i=hashtab[2+hashtab[0]+i]) {
 		if (!strcmp(s, strings+syms[i].st_name))
@@ -118,20 +135,101 @@ static Sym *lookup(const char *s, uint32_t h, struct dso *dso)
 	return 0;
 }
 
+static Sym *gnu_lookup(const char *s, uint32_t h1, struct dso *dso)
+{
+	size_t i;
+	Sym *sym;
+	char *strings = dso->strings;
+	uint32_t *hashtab = dso->hashtabs[GNU_HASH];
+	uint32_t nbuckets = hashtab[0];
+	size_t *maskwords = (size_t *)(hashtab + 4);
+	uint32_t *buckets = hashtab + 4 + (hashtab[2]*(sizeof(size_t)/sizeof(uint32_t)));
+	uint32_t symndx = hashtab[1];
+	Sym *syms = dso->syms;
+	uint32_t shift2 = hashtab[3];
+	uint32_t h2 = h1 >> shift2;
+	uint32_t *hashvals = buckets + nbuckets;
+	uint32_t *hashval;
+	size_t c = sizeof(size_t) * 8;
+	size_t n = (h1/c) & (hashtab[2]-1);
+	size_t bitmask = (1 << (h1%c)) | (1 << (h2%c));
+
+	if ((maskwords[n] & bitmask) != bitmask)
+		return 0;
+
+	n = buckets[h1 % nbuckets];
+	if (!n)
+		return 0;
+
+	sym = syms + n;
+	hashval = hashvals + n - symndx;
+
+	for (h1 &= (uint32_t)-2;; sym++) {
+		h2 = *hashval++;
+		if ((h1 == (h2 & ~1)) && !strcmp(s, strings + sym->st_name))
+			return sym;
+
+		if (h2 & 1)
+			break;
+	}
+
+	return 0;
+}
+
 #define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON)
 #define OK_BINDS (1<<STB_GLOBAL | 1<<STB_WEAK)
 
 static void *find_sym(struct dso *dso, const char *s, int need_def)
 {
-	uint32_t h = hash(s);
 	void *def = 0;
-	if (h==0x6b366be && !strcmp(s, "dlopen")) rtld_used = 1;
-	if (h==0x6b3afd && !strcmp(s, "dlsym")) rtld_used = 1;
-	if (h==0x595a4cc && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
+	static uint32_t precomp[NHASH][3] = {
+		{0x6b366be, 0x6b3afd, 0x595a4cc},
+		{0xf9040207, 0xf4dc4ae, 0x1f4039c9},
+	};
+	uint32_t h[NHASH];
+	uint8_t ok = 0;
+	uint8_t algs = dso->hashalgs;
+	uint8_t alg;
+	if (algs & (1 << SYSV_HASH)) {
+		h[SYSV_HASH] = sysv_hash(s);
+		alg = SYSV_HASH;
+	} else {
+		h[GNU_HASH] = gnu_hash(s);
+		alg = GNU_HASH;
+	}
+
+	ok |= (1 << alg);
+
+	if (h[alg] == precomp[alg][0] && !strcmp(s, "dlopen")) rtld_used = 1;
+	if (h[alg] == precomp[alg][1] && !strcmp(s, "dlsym")) rtld_used = 1;
+	if (h[alg] == precomp[alg][2] && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
+
 	for (; dso; dso=dso->next) {
 		Sym *sym;
+
 		if (!dso->global) continue;
-		sym = lookup(s, h, dso);
+
+		algs = dso->hashalgs;
+		if (!(algs & ok)) {
+			if (algs & (1 << SYSV_HASH)) {
+				alg = SYSV_HASH;
+				h[alg] = sysv_hash(s);
+				sym = sysv_lookup(s, h[alg], dso);
+			}
+			else {
+				alg = GNU_HASH;
+				h[alg] = gnu_hash(s);
+				sym = gnu_lookup(s, h[alg], dso);
+			}
+
+			ok |= (1 << alg);
+		} else {
+			if ((algs & ok) & (1 << SYSV_HASH))
+				sym = sysv_lookup(s, h[SYSV_HASH], dso);
+			else
+				sym = gnu_lookup(s, h[GNU_HASH], dso);
+		}
+
 		if (sym && (!need_def || sym->st_shndx) && sym->st_value
 		 && (1<<(sym->st_info&0xf) & OK_TYPES)
 		 && (1<<(sym->st_info>>4) & OK_BINDS)) {
@@ -320,11 +418,28 @@ static int path_open(const char *name, const char *search, char *buf, size_t buf
 
 static void decode_dyn(struct dso *p)
 {
-	size_t dyn[DYN_CNT] = {0};
-	decode_vec(p->dynv, dyn, DYN_CNT);
-	p->syms = (void *)(p->base + dyn[DT_SYMTAB]);
-	p->hashtab = (void *)(p->base + dyn[DT_HASH]);
-	p->strings = (void *)(p->base + dyn[DT_STRTAB]);
+	size_t *v;
+	p->hashalgs = 0;
+	for (v = p->dynv; v[0]; v+=2) {
+		switch (v[0]) {
+		case DT_SYMTAB:
+			p->syms = (void *)(p->base + v[1]);
+			break;
+		case DT_HASH:
+			p->hashtabs[SYSV_HASH] = (void *)(p->base + v[1]);
+			p->hashalgs |= (1 << SYSV_HASH);
+			break;
+		case DT_STRTAB:
+			p->strings = (void *)(p->base + v[1]);
+			break;
+		case DT_GNU_HASH:
+			p->hashtabs[GNU_HASH] = (void *)(p->base + v[1]);
+			p->hashalgs |= (1 << GNU_HASH);
+			break;
+		default:
+			break;
+		}
+	}
 }
 
 static struct dso *load_library(const char *name)
@@ -784,8 +899,11 @@ end:
 static void *do_dlsym(struct dso *p, const char *s, void *ra)
 {
 	size_t i;
-	uint32_t h;
 	Sym *sym;
+	uint32_t ok = 0;
+	uint8_t algs = p->hashalgs;
+	uint32_t h[NHASH];
+
 	if (p == RTLD_NEXT) {
 		for (p=head; p && (unsigned char *)ra-p->map>p->map_len; p=p->next);
 		if (!p) p=head;
@@ -798,12 +916,39 @@ static void *do_dlsym(struct dso *p, const char *s, void *ra)
 		if (!res) goto failed;
 		return res;
 	}
-	h = hash(s);
-	sym = lookup(s, h, p);
+
+	if (algs & (1 << SYSV_HASH)) {
+		h[SYSV_HASH] = sysv_hash(s);
+		sym = sysv_lookup(s, h[SYSV_HASH], p);
+		ok |= 1 << SYSV_HASH;
+	} else {
+		h[GNU_HASH] = gnu_hash(s);
+		sym = gnu_lookup(s, h[GNU_HASH], p);
+		ok |= 1 << GNU_HASH;
+	}
+
+
 	if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
 		return p->base + sym->st_value;
 	if (p->deps) for (i=0; p->deps[i]; i++) {
-		sym = lookup(s, h, p->deps[i]);
+		algs = p->deps[i]->hashalgs;
+		if (!(algs & ok)) {
+			if (algs & SYSV_HASH) {
+				h[SYSV_HASH] = sysv_hash(s);
+				sym = sysv_lookup(s, h[SYSV_HASH], p->deps[i]);
+				ok |= SYSV_HASH;
+			} else {
+				h[GNU_HASH] = gnu_hash(s);
+				sym = gnu_lookup(s, h[GNU_HASH], p->deps[i]);
+				ok |= GNU_HASH;
+			}
+		} else {
+			if (algs & SYSV_HASH)
+				sym = sysv_lookup(s, h[SYSV_HASH], p->deps[i]);
+			else
+				sym = gnu_lookup(s, h[GNU_HASH], p->deps[i]);
+		}
+
 		if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
 			return p->deps[i]->base + sym->st_value;
 	}
@@ -813,6 +958,97 @@ failed:
 	return 0;
 }
 
+struct sym_search {
+	void *addr;
+	Dl_info *info;
+};
+
+static int find_closest_sym (struct dso *dso, Sym *sym, struct sym_search *search)
+{
+	void *symaddr = dso->base + sym->st_value;
+	char *strings = dso->strings;
+	Dl_info *info = search->info;
+	void *addr = search->addr;
+	void *prevaddr = info->dli_saddr;
+
+	if (sym->st_value == 0 && sym->st_shndx == SHN_UNDEF)
+		return 1;
+
+	if (ELF_ST_TYPE(sym->st_info) == STT_TLS)
+		return 1;
+
+	if (addr < symaddr)
+		return 1;
+
+	if (prevaddr && (addr - symaddr) > (addr - prevaddr))
+		return 1;
+
+	info->dli_saddr = symaddr;
+	info->dli_sname = strings + sym->st_name;
+
+	if (addr == symaddr)
+		return 0;
+
+	return 1;
+
+}
+
+static int do_dladdr (void *addr, Dl_info *info)
+{
+	struct sym_search search;
+	struct dso *p;
+	memset (info, 0, sizeof (*info));
+	search.info = info;
+	search.addr = addr;
+	for (p=head; p; p=p->next) {
+		if ((unsigned char *)addr >= p->map && (unsigned char *)addr < p->map + p->map_len) {
+			Sym *syms = p->syms;
+			uint32_t *hashtab;
+			size_t i;
+			info->dli_fname = p->name;
+			info->dli_fbase = p->base;
+			if (p->hashalgs & (1 << SYSV_HASH)) {
+				hashtab = p->hashtabs[SYSV_HASH];
+				for (i = 0; i < hashtab[1]; i++) {
+					if (!find_closest_sym (p, syms + i, &search))
+						return 1;
+				}
+			} else if(p->hashalgs & (1 << GNU_HASH)) {
+				uint32_t *buckets;
+				uint32_t nbuckets;
+				uint32_t *hashvals;
+				uint32_t symndx;
+				hashtab = p->hashtabs[GNU_HASH];
+				buckets = hashtab + 4 + (hashtab[2] * (sizeof(size_t)/sizeof(uint32_t)));
+				nbuckets = hashtab[0];
+				hashvals = buckets + nbuckets;
+				symndx = hashtab[1];
+				for (i = 0; i < nbuckets; ++i) {
+					uint32_t n = buckets[i];
+					Sym *sym = syms + n;
+					uint32_t *hashval = hashvals + n - symndx;
+
+					do {
+						if (!find_closest_sym (p, sym, &search))
+							return 1;
+					}while (!(*hashval++ & 1));
+				}
+			}
+			return 1;
+		}
+	}
+	return 0;
+}
+
+int dladdr (void *addr, Dl_info *info)
+{
+	int res;
+	pthread_rwlock_rdlock(&lock);
+	res = do_dladdr (addr, info);
+	pthread_rwlock_unlock(&lock);
+	return res;
+}
+
 void *__dlsym(void *p, const char *s, void *ra)
 {
 	void *res;
-- 
1.7.9.5


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-08 13:57           ` musl
@ 2012-08-11 23:05             ` Rich Felker
  2012-08-15 22:41               ` boris brezillon
                                 ` (2 more replies)
  0 siblings, 3 replies; 33+ messages in thread
From: Rich Felker @ 2012-08-11 23:05 UTC (permalink / raw)
  To: musl

On Wed, Aug 08, 2012 at 03:57:15PM +0200, musl wrote:
> Same as before except I use bit mask to support multiple hash algorithms.

Sorry for taking a while to get back to you. I haven't had as much
time to work on musl the past couple weeks and some other topics (like
mips dynamic linking) had priority, but I hope to have more again for
a while now. Here's a quick review of some things that will hopefully
turn into a discussion for improving/simplifying the code.

> +#ifdef _GNU_SOURCE
> +typedef struct {
> +    const char *dli_fname;  /* Pathname of shared object that
> +                               contains address */
> +    void       *dli_fbase;  /* Address at which shared object
> +                               is loaded */
> +    const char *dli_sname;  /* Name of nearest symbol with address
> +                               lower than addr */
> +    void       *dli_saddr;  /* Exact address of symbol named
> +                               in dli_sname */
> +} Dl_info;

musl policy is not to have commentary, especially copied from
third-party sources, in the public headers. This is partly to
strengthen the claim that all public headers are public domain
(contain no copyrightable content) and partly just to avoid size creep
and wasted parsing time by the compiler.

>  static void decode_dyn(struct dso *p)
>  {
> -	size_t dyn[DYN_CNT] = {0};
> -	decode_vec(p->dynv, dyn, DYN_CNT);
> -	p->syms = (void *)(p->base + dyn[DT_SYMTAB]);
> -	p->hashtab = (void *)(p->base + dyn[DT_HASH]);
> -	p->strings = (void *)(p->base + dyn[DT_STRTAB]);
> +	size_t *v;
> +	p->hashalgs = 0;
> +	for (v = p->dynv; v[0]; v+=2) {
> +		switch (v[0]) {
> +		case DT_SYMTAB:
> +			p->syms = (void *)(p->base + v[1]);
> +			break;
> +		case DT_HASH:
> +			p->hashtabs[SYSV_HASH] = (void *)(p->base + v[1]);
> +			p->hashalgs |= (1 << SYSV_HASH);
> +			break;
> +		case DT_STRTAB:
> +			p->strings = (void *)(p->base + v[1]);
> +			break;
> +		case DT_GNU_HASH:
> +			p->hashtabs[GNU_HASH] = (void *)(p->base + v[1]);
> +			p->hashalgs |= (1 << GNU_HASH);
> +			break;
> +		default:
> +			break;
> +		}
> +	}
>  }

This is rather ugly but I don't see a better way right off, since
DT_GNU_HASH has that huge value... Maybe it would be nice to improve
decode_vec to have a variant that takes a (static const) table of DT_x
values and struct offsets to store them at when found..? This is just
some rambling and I'm not sure it's better, but it might be smart if
we're going to want to continue adding support for more
non-original-sysv DT_* entries with huge values, so we don't have to
write new switches for each one.

BTW, while I _want_ it to be safe, it's possible that early switches
(early meaning prior to the comment in __dynlink that says libc is now
fully functional) will actually fail to work/crash on some archs... So
this needs consideration too.

>  static struct dso *load_library(const char *name)
> @@ -784,8 +899,11 @@ end:
>  static void *do_dlsym(struct dso *p, const char *s, void *ra)
>  {
> [...]
>  	if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
>  		return p->base + sym->st_value;
>  	if (p->deps) for (i=0; p->deps[i]; i++) {
> -		sym = lookup(s, h, p->deps[i]);
> +		algs = p->deps[i]->hashalgs;
> +		if (!(algs & ok)) {
> +			if (algs & SYSV_HASH) {
> +				h[SYSV_HASH] = sysv_hash(s);
> +				sym = sysv_lookup(s, h[SYSV_HASH], p->deps[i]);
> +				ok |= SYSV_HASH;
> +			} else {
> +				h[GNU_HASH] = gnu_hash(s);
> +				sym = gnu_lookup(s, h[GNU_HASH], p->deps[i]);
> +				ok |= GNU_HASH;
> +			}
> +		} else {
> +			if (algs & SYSV_HASH)
> +				sym = sysv_lookup(s, h[SYSV_HASH], p->deps[i]);
> +			else
> +				sym = gnu_lookup(s, h[GNU_HASH], p->deps[i]);
> +		}

This looks like a lot of code duplication and extra unnecessary
variables. The way I would do it is something like:

if (p->deps[i]->hashtab && (h || !p->deps[i]->ghashtab)) {
	if (!h) h = hash(s);
	sym = sysv_lookup(s, h, p->deps[i]);
}

i.e. if there's a sysv hash table and we've already computed h (sysv
hash) or if there's no gnu hash table, compute h if it wasn't already
computed, and then attempt a lookup with it.

I'm not sure I got the logic all right (this is really a 1-minute
glance over the code right now amidst doing lots of other stuff too)
but the ideas are:

- no need for extra vars for bitmask. Whether the hash var for the
  corresponding hash type is nonzero is sufficient to tell whether
  it's been computed.
- no need for extra vars/fields to store which hash types a dso has.
  Just use the hashtab/ghashtab fields in the dso struct, and let them
  be null if the corresponding hash table does not exist. (And don't
  make them an array unless there's a real benefit in using an array;
  I don't think there is any benefit unless you're aiming for
  extensibility to support N hash types.)

> +static int do_dladdr (void *addr, Dl_info *info)
> [...]
> +			if (p->hashalgs & (1 << SYSV_HASH)) {
> +				hashtab = p->hashtabs[SYSV_HASH];
> +				for (i = 0; i < hashtab[1]; i++) {

I'm not seeing why this function needs hash tables at all. It's not
looking up symbols, just iterating over the entire symbol table, no?
Please explain if I'm mistaken.

Rich


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-11 23:05             ` Rich Felker
@ 2012-08-15 22:41               ` boris brezillon
  2012-08-17  5:39                 ` Rich Felker
  2012-08-16 18:03               ` musl
  2012-08-17 16:35               ` musl
  2 siblings, 1 reply; 33+ messages in thread
From: boris brezillon @ 2012-08-15 22:41 UTC (permalink / raw)
  To: musl

Hi,

I'm sorry that I did not reply to you before (I've been away for few days).

2012/8/12 Rich Felker <dalias@aerifal.cx>:
> On Wed, Aug 08, 2012 at 03:57:15PM +0200, musl wrote:
>> Same as before except I use bit mask to support multiple hash algorithms.
>
> Sorry for taking a while to get back to you. I haven't had as much
> time to work on musl the past couple weeks and some other topics (like
> mips dynamic linking) had priority, but I hope to have more again for
> a while now. Here's a quick review of some things that will hopefully
> turn into a discussion for improving/simplifying the code.
No problem.
Thanks for the review.
Do you want to discuss it on irc or keep going on the mailing list?
>
>> +#ifdef _GNU_SOURCE
>> +typedef struct {
>> +    const char *dli_fname;  /* Pathname of shared object that
>> +                               contains address */
>> +    void       *dli_fbase;  /* Address at which shared object
>> +                               is loaded */
>> +    const char *dli_sname;  /* Name of nearest symbol with address
>> +                               lower than addr */
>> +    void       *dli_saddr;  /* Exact address of symbol named
>> +                               in dli_sname */
>> +} Dl_info;
>
> musl policy is not to have commentary, especially copied from
> third-party sources, in the public headers. This is partly to
> strengthen the claim that all public headers are public domain
> (contain no copyrightable content) and partly just to avoid size creep
> and wasted parsing time by the compiler.
>
I'll remove those comments.
>>  static void decode_dyn(struct dso *p)
>>  {
>> -     size_t dyn[DYN_CNT] = {0};
>> -     decode_vec(p->dynv, dyn, DYN_CNT);
>> -     p->syms = (void *)(p->base + dyn[DT_SYMTAB]);
>> -     p->hashtab = (void *)(p->base + dyn[DT_HASH]);
>> -     p->strings = (void *)(p->base + dyn[DT_STRTAB]);
>> +     size_t *v;
>> +     p->hashalgs = 0;
>> +     for (v = p->dynv; v[0]; v+=2) {
>> +             switch (v[0]) {
>> +             case DT_SYMTAB:
>> +                     p->syms = (void *)(p->base + v[1]);
>> +                     break;
>> +             case DT_HASH:
>> +                     p->hashtabs[SYSV_HASH] = (void *)(p->base + v[1]);
>> +                     p->hashalgs |= (1 << SYSV_HASH);
>> +                     break;
>> +             case DT_STRTAB:
>> +                     p->strings = (void *)(p->base + v[1]);
>> +                     break;
>> +             case DT_GNU_HASH:
>> +                     p->hashtabs[GNU_HASH] = (void *)(p->base + v[1]);
>> +                     p->hashalgs |= (1 << GNU_HASH);
>> +                     break;
>> +             default:
>> +                     break;
>> +             }
>> +     }
>>  }
>
> This is rather ugly but I don't see a better way right off, since
> DT_GNU_HASH has that huge value... Maybe it would be nice to improve
> decode_vec to have a variant that takes a (static const) table of DT_x
> values and struct offsets to store them at when found..? This is just
> some rambling and I'm not sure it's better, but it might be smart if
> we're going to want to continue adding support for more
> non-original-sysv DT_* entries with huge values, so we don't have to
> write new switches for each one.
>
I'll take a look at this.
> BTW, while I _want_ it to be safe, it's possible that early switches
> (early meaning prior to the comment in __dynlink that says libc is now
> fully functional) will actually fail to work/crash on some archs... So
> this needs consideration too.
>
I didn't knew that. Could explain me why?
>>  static struct dso *load_library(const char *name)
>> @@ -784,8 +899,11 @@ end:
>>  static void *do_dlsym(struct dso *p, const char *s, void *ra)
>>  {
>> [...]
>>       if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
>>               return p->base + sym->st_value;
>>       if (p->deps) for (i=0; p->deps[i]; i++) {
>> -             sym = lookup(s, h, p->deps[i]);
>> +             algs = p->deps[i]->hashalgs;
>> +             if (!(algs & ok)) {
>> +                     if (algs & SYSV_HASH) {
>> +                             h[SYSV_HASH] = sysv_hash(s);
>> +                             sym = sysv_lookup(s, h[SYSV_HASH], p->deps[i]);
>> +                             ok |= SYSV_HASH;
>> +                     } else {
>> +                             h[GNU_HASH] = gnu_hash(s);
>> +                             sym = gnu_lookup(s, h[GNU_HASH], p->deps[i]);
>> +                             ok |= GNU_HASH;
>> +                     }
>> +             } else {
>> +                     if (algs & SYSV_HASH)
>> +                             sym = sysv_lookup(s, h[SYSV_HASH], p->deps[i]);
>> +                     else
>> +                             sym = gnu_lookup(s, h[GNU_HASH], p->deps[i]);
>> +             }
>
> This looks like a lot of code duplication and extra unnecessary
> variables. The way I would do it is something like:
>
> if (p->deps[i]->hashtab && (h || !p->deps[i]->ghashtab)) {
>         if (!h) h = hash(s);
>         sym = sysv_lookup(s, h, p->deps[i]);
> }
>
> i.e. if there's a sysv hash table and we've already computed h (sysv
> hash) or if there's no gnu hash table, compute h if it wasn't already
> computed, and then attempt a lookup with it.
>
> I'm not sure I got the logic all right (this is really a 1-minute
> glance over the code right now amidst doing lots of other stuff too)
> but the ideas are:
>
> - no need for extra vars for bitmask. Whether the hash var for the
>   corresponding hash type is nonzero is sufficient to tell whether
>   it's been computed.
You're right. I didn't realized an hash result couldn't be nul.
> - no need for extra vars/fields to store which hash types a dso has.
>   Just use the hashtab/ghashtab fields in the dso struct, and let them
>   be null if the corresponding hash table does not exist. (And don't
>   make them an array unless there's a real benefit in using an array;
>   I don't think there is any benefit unless you're aiming for
>   extensibility to support N hash types.)
I'll remove the hashalgs field and use the nil test instead.
>
>> +static int do_dladdr (void *addr, Dl_info *info)
>> [...]
>> +                     if (p->hashalgs & (1 << SYSV_HASH)) {
>> +                             hashtab = p->hashtabs[SYSV_HASH];
>> +                             for (i = 0; i < hashtab[1]; i++) {
>
> I'm not seeing why this function needs hash tables at all. It's not
> looking up symbols, just iterating over the entire symbol table, no?
> Please explain if I'm mistaken.
I don't see any other way to know the sym table size except reading
the .dynsym section header.
That's why I'm iterating over the hash table.
For sysv hash the nchain (second entry of hash table) gives the sym table size.
For gnu hash It's a little bit more complicated (see
https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections).

Should we parse the .dynsym section header and store the sym table
size in dso struct?
Do you see any other way to get the dynsym table size or at least
iterate over the dynsym table (specific pattern for last element ?).

>
> Rich


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-11 23:05             ` Rich Felker
  2012-08-15 22:41               ` boris brezillon
@ 2012-08-16 18:03               ` musl
  2012-08-17 16:35               ` musl
  2 siblings, 0 replies; 33+ messages in thread
From: musl @ 2012-08-16 18:03 UTC (permalink / raw)
  To: musl; +Cc: Rich Felker

[-- Attachment #1: Type: text/plain, Size: 5563 bytes --]

Hi,

Here is a new patch (and a diff from the previous one).

Could you tell me if the new decode_vec function is what you expected?

Regards,

Boris

On 12/08/2012 01:05, Rich Felker wrote:
> On Wed, Aug 08, 2012 at 03:57:15PM +0200, musl wrote:
>> Same as before except I use bit mask to support multiple hash algorithms.
> Sorry for taking a while to get back to you. I haven't had as much
> time to work on musl the past couple weeks and some other topics (like
> mips dynamic linking) had priority, but I hope to have more again for
> a while now. Here's a quick review of some things that will hopefully
> turn into a discussion for improving/simplifying the code.
>
>> +#ifdef _GNU_SOURCE
>> +typedef struct {
>> +    const char *dli_fname;  /* Pathname of shared object that
>> +                               contains address */
>> +    void       *dli_fbase;  /* Address at which shared object
>> +                               is loaded */
>> +    const char *dli_sname;  /* Name of nearest symbol with address
>> +                               lower than addr */
>> +    void       *dli_saddr;  /* Exact address of symbol named
>> +                               in dli_sname */
>> +} Dl_info;
> musl policy is not to have commentary, especially copied from
> third-party sources, in the public headers. This is partly to
> strengthen the claim that all public headers are public domain
> (contain no copyrightable content) and partly just to avoid size creep
> and wasted parsing time by the compiler.
>
>>  static void decode_dyn(struct dso *p)
>>  {
>> -	size_t dyn[DYN_CNT] = {0};
>> -	decode_vec(p->dynv, dyn, DYN_CNT);
>> -	p->syms = (void *)(p->base + dyn[DT_SYMTAB]);
>> -	p->hashtab = (void *)(p->base + dyn[DT_HASH]);
>> -	p->strings = (void *)(p->base + dyn[DT_STRTAB]);
>> +	size_t *v;
>> +	p->hashalgs = 0;
>> +	for (v = p->dynv; v[0]; v+=2) {
>> +		switch (v[0]) {
>> +		case DT_SYMTAB:
>> +			p->syms = (void *)(p->base + v[1]);
>> +			break;
>> +		case DT_HASH:
>> +			p->hashtabs[SYSV_HASH] = (void *)(p->base + v[1]);
>> +			p->hashalgs |= (1 << SYSV_HASH);
>> +			break;
>> +		case DT_STRTAB:
>> +			p->strings = (void *)(p->base + v[1]);
>> +			break;
>> +		case DT_GNU_HASH:
>> +			p->hashtabs[GNU_HASH] = (void *)(p->base + v[1]);
>> +			p->hashalgs |= (1 << GNU_HASH);
>> +			break;
>> +		default:
>> +			break;
>> +		}
>> +	}
>>  }
> This is rather ugly but I don't see a better way right off, since
> DT_GNU_HASH has that huge value... Maybe it would be nice to improve
> decode_vec to have a variant that takes a (static const) table of DT_x
> values and struct offsets to store them at when found..? This is just
> some rambling and I'm not sure it's better, but it might be smart if
> we're going to want to continue adding support for more
> non-original-sysv DT_* entries with huge values, so we don't have to
> write new switches for each one.
>
> BTW, while I _want_ it to be safe, it's possible that early switches
> (early meaning prior to the comment in __dynlink that says libc is now
> fully functional) will actually fail to work/crash on some archs... So
> this needs consideration too.
>
>>  static struct dso *load_library(const char *name)
>> @@ -784,8 +899,11 @@ end:
>>  static void *do_dlsym(struct dso *p, const char *s, void *ra)
>>  {
>> [...]
>>  	if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
>>  		return p->base + sym->st_value;
>>  	if (p->deps) for (i=0; p->deps[i]; i++) {
>> -		sym = lookup(s, h, p->deps[i]);
>> +		algs = p->deps[i]->hashalgs;
>> +		if (!(algs & ok)) {
>> +			if (algs & SYSV_HASH) {
>> +				h[SYSV_HASH] = sysv_hash(s);
>> +				sym = sysv_lookup(s, h[SYSV_HASH], p->deps[i]);
>> +				ok |= SYSV_HASH;
>> +			} else {
>> +				h[GNU_HASH] = gnu_hash(s);
>> +				sym = gnu_lookup(s, h[GNU_HASH], p->deps[i]);
>> +				ok |= GNU_HASH;
>> +			}
>> +		} else {
>> +			if (algs & SYSV_HASH)
>> +				sym = sysv_lookup(s, h[SYSV_HASH], p->deps[i]);
>> +			else
>> +				sym = gnu_lookup(s, h[GNU_HASH], p->deps[i]);
>> +		}
> This looks like a lot of code duplication and extra unnecessary
> variables. The way I would do it is something like:
>
> if (p->deps[i]->hashtab && (h || !p->deps[i]->ghashtab)) {
> 	if (!h) h = hash(s);
> 	sym = sysv_lookup(s, h, p->deps[i]);
> }
>
> i.e. if there's a sysv hash table and we've already computed h (sysv
> hash) or if there's no gnu hash table, compute h if it wasn't already
> computed, and then attempt a lookup with it.
>
> I'm not sure I got the logic all right (this is really a 1-minute
> glance over the code right now amidst doing lots of other stuff too)
> but the ideas are:
>
> - no need for extra vars for bitmask. Whether the hash var for the
>   corresponding hash type is nonzero is sufficient to tell whether
>   it's been computed.
> - no need for extra vars/fields to store which hash types a dso has.
>   Just use the hashtab/ghashtab fields in the dso struct, and let them
>   be null if the corresponding hash table does not exist. (And don't
>   make them an array unless there's a real benefit in using an array;
>   I don't think there is any benefit unless you're aiming for
>   extensibility to support N hash types.)
>
>> +static int do_dladdr (void *addr, Dl_info *info)
>> [...]
>> +			if (p->hashalgs & (1 << SYSV_HASH)) {
>> +				hashtab = p->hashtabs[SYSV_HASH];
>> +				for (i = 0; i < hashtab[1]; i++) {
> I'm not seeing why this function needs hash tables at all. It's not
> looking up symbols, just iterating over the entire symbol table, no?
> Please explain if I'm mistaken.
>
> Rich


[-- Attachment #2: dladdr-gnu-hash-v2.diff --]
[-- Type: text/x-patch, Size: 9819 bytes --]

diff --git a/include/dlfcn.h b/include/dlfcn.h
index 8c45822..8524e0b 100644
--- a/include/dlfcn.h
+++ b/include/dlfcn.h
@@ -20,14 +20,10 @@ void  *dlsym(void *, const char *);
 
 #ifdef _GNU_SOURCE
 typedef struct {
-    const char *dli_fname;  /* Pathname of shared object that
-                               contains address */
-    void       *dli_fbase;  /* Address at which shared object
-                               is loaded */
-    const char *dli_sname;  /* Name of nearest symbol with address
-                               lower than addr */
-    void       *dli_saddr;  /* Exact address of symbol named
-                               in dli_sname */
+	const char *dli_fname;
+	void *dli_fbase;
+	const char *dli_sname;
+	void *dli_saddr;
 } Dl_info;
 
 int dladdr (void *addr, Dl_info *info);
diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
index 4a236b2..d63da1c 100644
--- a/src/ldso/dynlink.c
+++ b/src/ldso/dynlink.c
@@ -59,8 +59,8 @@ struct dso {
 
 	int refcnt;
 	Sym *syms;
-	uint32_t hashalgs;
-	uint32_t *hashtabs[NHASH];
+	uint32_t *hashtab;
+	uint32_t *ghashtab;
 	char *strings;
 	unsigned char *map;
 	size_t map_len;
@@ -93,12 +93,23 @@ struct debug *_dl_debug_addr = &debug;
 #define AUX_CNT 24
 #define DYN_CNT 34
 
-static void decode_vec(size_t *v, size_t *a, size_t cnt)
+struct tag_range {
+	size_t start;
+	size_t size;
+};
+
+static void decode_vec(size_t *v, const struct tag_range *defs, size_t **storages, size_t defsize)
 {
-	memset(a, 0, cnt*sizeof(size_t));
-	for (; v[0]; v+=2) if (v[0]<cnt) {
-		a[0] |= 1ULL<<v[0];
-		a[v[0]] = v[1];
+	size_t i;
+	for (i = 0; i < defsize; ++i)
+		memset(storages[i], 0, defs[i].size*sizeof(size_t));
+	for (; v[0]; v+=2) {
+		for (i = 0; i < defsize; ++i) {
+			if (v[0]>defs[i].start && v[0]<=defs[i].start+defs[i].size) {
+				storages[i][0] |= 1ULL<<v[0];
+				storages[i][v[0] - defs[i].start] = v[1];
+			}
+		}
 	}
 }
 
@@ -126,7 +137,7 @@ static Sym *sysv_lookup(const char *s, uint32_t h, struct dso *dso)
 {
 	size_t i;
 	Sym *syms = dso->syms;
-	uint32_t *hashtab = dso->hashtabs[SYSV_HASH];
+	uint32_t *hashtab = dso->hashtab;
 	char *strings = dso->strings;
 	for (i=hashtab[2+h%hashtab[0]]; i; i=hashtab[2+hashtab[0]+i]) {
 		if (!strcmp(s, strings+syms[i].st_name))
@@ -140,7 +151,7 @@ static Sym *gnu_lookup(const char *s, uint32_t h1, struct dso *dso)
 	size_t i;
 	Sym *sym;
 	char *strings = dso->strings;
-	uint32_t *hashtab = dso->hashtabs[GNU_HASH];
+	uint32_t *hashtab = dso->ghashtab;
 	uint32_t nbuckets = hashtab[0];
 	size_t *maskwords = (size_t *)(hashtab + 4);
 	uint32_t *buckets = hashtab + 4 + (hashtab[2]*(sizeof(size_t)/sizeof(uint32_t)));
@@ -182,52 +193,37 @@ static Sym *gnu_lookup(const char *s, uint32_t h1, struct dso *dso)
 static void *find_sym(struct dso *dso, const char *s, int need_def)
 {
 	void *def = 0;
-	static uint32_t precomp[NHASH][3] = {
+	static uint32_t precomp[2][3] = {
 		{0x6b366be, 0x6b3afd, 0x595a4cc},
 		{0xf9040207, 0xf4dc4ae, 0x1f4039c9},
 	};
-	uint32_t h[NHASH];
-	uint8_t ok = 0;
-	uint8_t algs = dso->hashalgs;
-	uint8_t alg;
-	if (algs & (1 << SYSV_HASH)) {
-		h[SYSV_HASH] = sysv_hash(s);
-		alg = SYSV_HASH;
+	uint32_t *precomptab;
+	uint32_t h = 0, gh = 0;
+	if (dso->hashtab) {
+		h = sysv_hash(s);
+		precomptab = precomp[0];
 	} else {
-		h[GNU_HASH] = gnu_hash(s);
-		alg = GNU_HASH;
+		gh = gnu_hash(s);
+		precomptab = precomp[0];
 	}
 
-	ok |= (1 << alg);
-
-	if (h[alg] == precomp[alg][0] && !strcmp(s, "dlopen")) rtld_used = 1;
-	if (h[alg] == precomp[alg][1] && !strcmp(s, "dlsym")) rtld_used = 1;
-	if (h[alg] == precomp[alg][2] && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
+	if (h == precomptab[0] && !strcmp(s, "dlopen")) rtld_used = 1;
+	if (h == precomptab[1] && !strcmp(s, "dlsym")) rtld_used = 1;
+	if (h == precomptab[2] && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
 
 	for (; dso; dso=dso->next) {
 		Sym *sym;
 
 		if (!dso->global) continue;
 
-		algs = dso->hashalgs;
-		if (!(algs & ok)) {
-			if (algs & (1 << SYSV_HASH)) {
-				alg = SYSV_HASH;
-				h[alg] = sysv_hash(s);
-				sym = sysv_lookup(s, h[alg], dso);
-			}
-			else {
-				alg = GNU_HASH;
-				h[alg] = gnu_hash(s);
-				sym = gnu_lookup(s, h[alg], dso);
-			}
-
-			ok |= (1 << alg);
+		if (dso->hashtab && (h || !dso->ghashtab)) {
+			if (!h)
+				h = sysv_hash(s);
+			sym = sysv_lookup(s, h, dso);
 		} else {
-			if ((algs & ok) & (1 << SYSV_HASH))
-				sym = sysv_lookup(s, h[SYSV_HASH], dso);
-			else
-				sym = gnu_lookup(s, h[GNU_HASH], dso);
+			if (!gh)
+				gh = gnu_hash(s);
+			sym = gnu_lookup(s, gh, dso);
 		}
 
 		if (sym && (!need_def || sym->st_shndx) && sym->st_value
@@ -418,28 +414,18 @@ static int path_open(const char *name, const char *search, char *buf, size_t buf
 
 static void decode_dyn(struct dso *p)
 {
-	size_t *v;
-	p->hashalgs = 0;
-	for (v = p->dynv; v[0]; v+=2) {
-		switch (v[0]) {
-		case DT_SYMTAB:
-			p->syms = (void *)(p->base + v[1]);
-			break;
-		case DT_HASH:
-			p->hashtabs[SYSV_HASH] = (void *)(p->base + v[1]);
-			p->hashalgs |= (1 << SYSV_HASH);
-			break;
-		case DT_STRTAB:
-			p->strings = (void *)(p->base + v[1]);
-			break;
-		case DT_GNU_HASH:
-			p->hashtabs[GNU_HASH] = (void *)(p->base + v[1]);
-			p->hashalgs |= (1 << GNU_HASH);
-			break;
-		default:
-			break;
-		}
-	}
+	static const struct tag_range defs[2] = {{0, DYN_CNT}, {DT_ADDRRNGHI-DT_ADDRNUM, DT_ADDRNUM}};
+	size_t dynbase[DYN_CNT] = {0};
+	size_t dynaddr[DT_ADDRNUM+1] = {0};
+	size_t *storage[2] = {dynbase, dynaddr};
+	decode_vec (p->dynv, defs, storage, 2);
+	p->syms = (void *)(p->base + dynbase[DT_SYMTAB]);
+	p->strings = (void *)(p->base + dynbase[DT_STRTAB]);
+	if (dynbase[DT_HASH])
+		p->hashtab = (void *)(p->base + dynbase[DT_HASH]);
+	if (dynaddr[DT_ADDRNUM-DT_ADDRTAGIDX(DT_GNU_HASH)])
+		p->ghashtab = (void *)(p->base + dynaddr[DT_ADDRNUM-DT_ADDRTAGIDX(DT_GNU_HASH)]);
+
 }
 
 static struct dso *load_library(const char *name)
@@ -601,9 +587,11 @@ static void make_global(struct dso *p)
 static void reloc_all(struct dso *p)
 {
 	size_t dyn[DYN_CNT] = {0};
+	static const struct tag_range defs = {0, DYN_CNT};
+	size_t *storage = dyn;
 	for (; p; p=p->next) {
 		if (p->relocated) continue;
-		decode_vec(p->dynv, dyn, DYN_CNT);
+		decode_vec(p->dynv, &defs, &storage, 1);
 #ifdef NEED_ARCH_RELOCS
 		do_arch_relocs(p, head);
 #endif
@@ -636,9 +624,11 @@ static size_t find_dyn(Phdr *ph, size_t cnt, size_t stride)
 static void do_init_fini(struct dso *p)
 {
 	size_t dyn[DYN_CNT] = {0};
+	static const struct tag_range defs = {0, DYN_CNT};
+	size_t *storage = dyn;
 	for (; p; p=p->prev) {
 		if (p->constructed) return;
-		decode_vec(p->dynv, dyn, DYN_CNT);
+		decode_vec(p->dynv, &defs, &storage, 1);
 		if (dyn[0] & (1<<DT_FINI))
 			atexit((void (*)(void))(p->base + dyn[DT_FINI]));
 		if (dyn[0] & (1<<DT_INIT))
@@ -654,6 +644,8 @@ void _dl_debug_state(void)
 void *__dynlink(int argc, char **argv)
 {
 	size_t *auxv, aux[AUX_CNT] = {0};
+	static const struct tag_range defs = {0, AUX_CNT};
+	size_t *storage = aux;
 	size_t i;
 	Phdr *phdr;
 	Ehdr *ehdr;
@@ -671,7 +663,7 @@ void *__dynlink(int argc, char **argv)
 			env_preload = argv[i]+11;
 	auxv = (void *)(argv+i+1);
 
-	decode_vec(auxv, aux, AUX_CNT);
+	decode_vec(auxv, &defs, &storage, 1);
 
 	/* Only trust user/env if kernel says we're not suid/sgid */
 	if ((aux[0]&0x7800)!=0x7800 || aux[AT_UID]!=aux[AT_EUID]
@@ -900,9 +892,7 @@ static void *do_dlsym(struct dso *p, const char *s, void *ra)
 {
 	size_t i;
 	Sym *sym;
-	uint32_t ok = 0;
-	uint8_t algs = p->hashalgs;
-	uint32_t h[NHASH];
+	uint32_t h = 0, gh = 0;
 
 	if (p == RTLD_NEXT) {
 		for (p=head; p && (unsigned char *)ra-p->map>p->map_len; p=p->next);
@@ -917,36 +907,26 @@ static void *do_dlsym(struct dso *p, const char *s, void *ra)
 		return res;
 	}
 
-	if (algs & (1 << SYSV_HASH)) {
-		h[SYSV_HASH] = sysv_hash(s);
-		sym = sysv_lookup(s, h[SYSV_HASH], p);
-		ok |= 1 << SYSV_HASH;
+	if (p->hashtab) {
+		h = sysv_hash(s);
+		sym = sysv_lookup(s, h, p);
 	} else {
-		h[GNU_HASH] = gnu_hash(s);
-		sym = gnu_lookup(s, h[GNU_HASH], p);
-		ok |= 1 << GNU_HASH;
+		gh = gnu_hash(s);
+		sym = gnu_lookup(s, gh, p);
 	}
 
 
 	if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
 		return p->base + sym->st_value;
 	if (p->deps) for (i=0; p->deps[i]; i++) {
-		algs = p->deps[i]->hashalgs;
-		if (!(algs & ok)) {
-			if (algs & SYSV_HASH) {
-				h[SYSV_HASH] = sysv_hash(s);
-				sym = sysv_lookup(s, h[SYSV_HASH], p->deps[i]);
-				ok |= SYSV_HASH;
-			} else {
-				h[GNU_HASH] = gnu_hash(s);
-				sym = gnu_lookup(s, h[GNU_HASH], p->deps[i]);
-				ok |= GNU_HASH;
-			}
+		if (p->deps[i]->hashtab && (h || !p->deps[i]->ghashtab)) {
+			if (!h)
+				h = sysv_hash(s);
+			sym = sysv_lookup(s, h, p->deps[i]);
 		} else {
-			if (algs & SYSV_HASH)
-				sym = sysv_lookup(s, h[SYSV_HASH], p->deps[i]);
-			else
-				sym = gnu_lookup(s, h[GNU_HASH], p->deps[i]);
+			if (!gh)
+				gh = gnu_hash(s);
+			sym = gnu_lookup(s, h, p->deps[i]);
 		}
 
 		if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
@@ -1007,18 +987,18 @@ static int do_dladdr (void *addr, Dl_info *info)
 			size_t i;
 			info->dli_fname = p->name;
 			info->dli_fbase = p->base;
-			if (p->hashalgs & (1 << SYSV_HASH)) {
-				hashtab = p->hashtabs[SYSV_HASH];
+			if (p->hashtab) {
+				hashtab = p->hashtab;
 				for (i = 0; i < hashtab[1]; i++) {
 					if (!find_closest_sym (p, syms + i, &search))
 						return 1;
 				}
-			} else if(p->hashalgs & (1 << GNU_HASH)) {
+			} else {
 				uint32_t *buckets;
 				uint32_t nbuckets;
 				uint32_t *hashvals;
 				uint32_t symndx;
-				hashtab = p->hashtabs[GNU_HASH];
+				hashtab = p->ghashtab;
 				buckets = hashtab + 4 + (hashtab[2] * (sizeof(size_t)/sizeof(uint32_t)));
 				nbuckets = hashtab[0];
 				hashvals = buckets + nbuckets;

[-- Attachment #3: dladdr-gnu-hash-v2.patch --]
[-- Type: text/x-patch, Size: 10873 bytes --]

diff --git a/include/dlfcn.h b/include/dlfcn.h
index dea74c7..8524e0b 100644
--- a/include/dlfcn.h
+++ b/include/dlfcn.h
@@ -18,6 +18,17 @@ char  *dlerror(void);
 void  *dlopen(const char *, int);
 void  *dlsym(void *, const char *);
 
+#ifdef _GNU_SOURCE
+typedef struct {
+	const char *dli_fname;
+	void *dli_fbase;
+	const char *dli_sname;
+	void *dli_saddr;
+} Dl_info;
+
+int dladdr (void *addr, Dl_info *info);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
index f55c6f1..d63da1c 100644
--- a/src/ldso/dynlink.c
+++ b/src/ldso/dynlink.c
@@ -1,3 +1,4 @@
+#define _GNU_SOURCE
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -28,14 +29,20 @@ typedef Elf32_Phdr Phdr;
 typedef Elf32_Sym Sym;
 #define R_TYPE(x) ((x)&255)
 #define R_SYM(x) ((x)>>8)
+#define ELF_ST_TYPE ELF32_ST_TYPE
 #else
 typedef Elf64_Ehdr Ehdr;
 typedef Elf64_Phdr Phdr;
 typedef Elf64_Sym Sym;
 #define R_TYPE(x) ((x)&0xffffffff)
 #define R_SYM(x) ((x)>>32)
+#define ELF_ST_TYPE ELF64_ST_TYPE
 #endif
 
+#define SYSV_HASH	0
+#define GNU_HASH	1
+#define NHASH		2
+
 struct debug {
 	int ver;
 	void *head;
@@ -53,6 +60,7 @@ struct dso {
 	int refcnt;
 	Sym *syms;
 	uint32_t *hashtab;
+	uint32_t *ghashtab;
 	char *strings;
 	unsigned char *map;
 	size_t map_len;
@@ -85,16 +93,27 @@ struct debug *_dl_debug_addr = &debug;
 #define AUX_CNT 24
 #define DYN_CNT 34
 
-static void decode_vec(size_t *v, size_t *a, size_t cnt)
+struct tag_range {
+	size_t start;
+	size_t size;
+};
+
+static void decode_vec(size_t *v, const struct tag_range *defs, size_t **storages, size_t defsize)
 {
-	memset(a, 0, cnt*sizeof(size_t));
-	for (; v[0]; v+=2) if (v[0]<cnt) {
-		a[0] |= 1ULL<<v[0];
-		a[v[0]] = v[1];
+	size_t i;
+	for (i = 0; i < defsize; ++i)
+		memset(storages[i], 0, defs[i].size*sizeof(size_t));
+	for (; v[0]; v+=2) {
+		for (i = 0; i < defsize; ++i) {
+			if (v[0]>defs[i].start && v[0]<=defs[i].start+defs[i].size) {
+				storages[i][0] |= 1ULL<<v[0];
+				storages[i][v[0] - defs[i].start] = v[1];
+			}
+		}
 	}
 }
 
-static uint32_t hash(const char *s0)
+static uint32_t sysv_hash(const char *s0)
 {
 	const unsigned char *s = (void *)s0;
 	uint_fast32_t h = 0;
@@ -105,7 +124,16 @@ static uint32_t hash(const char *s0)
 	return h & 0xfffffff;
 }
 
-static Sym *lookup(const char *s, uint32_t h, struct dso *dso)
+static uint32_t gnu_hash (const char *s0)
+{
+	const unsigned char *s = (void *)s0;
+	uint_fast32_t h = 5381;
+	for (; *s; s++)
+		h = h*33 + *s;
+	return h & 0xffffffff;
+}
+
+static Sym *sysv_lookup(const char *s, uint32_t h, struct dso *dso)
 {
 	size_t i;
 	Sym *syms = dso->syms;
@@ -118,20 +146,86 @@ static Sym *lookup(const char *s, uint32_t h, struct dso *dso)
 	return 0;
 }
 
+static Sym *gnu_lookup(const char *s, uint32_t h1, struct dso *dso)
+{
+	size_t i;
+	Sym *sym;
+	char *strings = dso->strings;
+	uint32_t *hashtab = dso->ghashtab;
+	uint32_t nbuckets = hashtab[0];
+	size_t *maskwords = (size_t *)(hashtab + 4);
+	uint32_t *buckets = hashtab + 4 + (hashtab[2]*(sizeof(size_t)/sizeof(uint32_t)));
+	uint32_t symndx = hashtab[1];
+	Sym *syms = dso->syms;
+	uint32_t shift2 = hashtab[3];
+	uint32_t h2 = h1 >> shift2;
+	uint32_t *hashvals = buckets + nbuckets;
+	uint32_t *hashval;
+	size_t c = sizeof(size_t) * 8;
+	size_t n = (h1/c) & (hashtab[2]-1);
+	size_t bitmask = (1 << (h1%c)) | (1 << (h2%c));
+
+	if ((maskwords[n] & bitmask) != bitmask)
+		return 0;
+
+	n = buckets[h1 % nbuckets];
+	if (!n)
+		return 0;
+
+	sym = syms + n;
+	hashval = hashvals + n - symndx;
+
+	for (h1 &= (uint32_t)-2;; sym++) {
+		h2 = *hashval++;
+		if ((h1 == (h2 & ~1)) && !strcmp(s, strings + sym->st_name))
+			return sym;
+
+		if (h2 & 1)
+			break;
+	}
+
+	return 0;
+}
+
 #define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON)
 #define OK_BINDS (1<<STB_GLOBAL | 1<<STB_WEAK)
 
 static void *find_sym(struct dso *dso, const char *s, int need_def)
 {
-	uint32_t h = hash(s);
 	void *def = 0;
-	if (h==0x6b366be && !strcmp(s, "dlopen")) rtld_used = 1;
-	if (h==0x6b3afd && !strcmp(s, "dlsym")) rtld_used = 1;
-	if (h==0x595a4cc && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
+	static uint32_t precomp[2][3] = {
+		{0x6b366be, 0x6b3afd, 0x595a4cc},
+		{0xf9040207, 0xf4dc4ae, 0x1f4039c9},
+	};
+	uint32_t *precomptab;
+	uint32_t h = 0, gh = 0;
+	if (dso->hashtab) {
+		h = sysv_hash(s);
+		precomptab = precomp[0];
+	} else {
+		gh = gnu_hash(s);
+		precomptab = precomp[0];
+	}
+
+	if (h == precomptab[0] && !strcmp(s, "dlopen")) rtld_used = 1;
+	if (h == precomptab[1] && !strcmp(s, "dlsym")) rtld_used = 1;
+	if (h == precomptab[2] && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
+
 	for (; dso; dso=dso->next) {
 		Sym *sym;
+
 		if (!dso->global) continue;
-		sym = lookup(s, h, dso);
+
+		if (dso->hashtab && (h || !dso->ghashtab)) {
+			if (!h)
+				h = sysv_hash(s);
+			sym = sysv_lookup(s, h, dso);
+		} else {
+			if (!gh)
+				gh = gnu_hash(s);
+			sym = gnu_lookup(s, gh, dso);
+		}
+
 		if (sym && (!need_def || sym->st_shndx) && sym->st_value
 		 && (1<<(sym->st_info&0xf) & OK_TYPES)
 		 && (1<<(sym->st_info>>4) & OK_BINDS)) {
@@ -320,11 +414,18 @@ static int path_open(const char *name, const char *search, char *buf, size_t buf
 
 static void decode_dyn(struct dso *p)
 {
-	size_t dyn[DYN_CNT] = {0};
-	decode_vec(p->dynv, dyn, DYN_CNT);
-	p->syms = (void *)(p->base + dyn[DT_SYMTAB]);
-	p->hashtab = (void *)(p->base + dyn[DT_HASH]);
-	p->strings = (void *)(p->base + dyn[DT_STRTAB]);
+	static const struct tag_range defs[2] = {{0, DYN_CNT}, {DT_ADDRRNGHI-DT_ADDRNUM, DT_ADDRNUM}};
+	size_t dynbase[DYN_CNT] = {0};
+	size_t dynaddr[DT_ADDRNUM+1] = {0};
+	size_t *storage[2] = {dynbase, dynaddr};
+	decode_vec (p->dynv, defs, storage, 2);
+	p->syms = (void *)(p->base + dynbase[DT_SYMTAB]);
+	p->strings = (void *)(p->base + dynbase[DT_STRTAB]);
+	if (dynbase[DT_HASH])
+		p->hashtab = (void *)(p->base + dynbase[DT_HASH]);
+	if (dynaddr[DT_ADDRNUM-DT_ADDRTAGIDX(DT_GNU_HASH)])
+		p->ghashtab = (void *)(p->base + dynaddr[DT_ADDRNUM-DT_ADDRTAGIDX(DT_GNU_HASH)]);
+
 }
 
 static struct dso *load_library(const char *name)
@@ -486,9 +587,11 @@ static void make_global(struct dso *p)
 static void reloc_all(struct dso *p)
 {
 	size_t dyn[DYN_CNT] = {0};
+	static const struct tag_range defs = {0, DYN_CNT};
+	size_t *storage = dyn;
 	for (; p; p=p->next) {
 		if (p->relocated) continue;
-		decode_vec(p->dynv, dyn, DYN_CNT);
+		decode_vec(p->dynv, &defs, &storage, 1);
 #ifdef NEED_ARCH_RELOCS
 		do_arch_relocs(p, head);
 #endif
@@ -521,9 +624,11 @@ static size_t find_dyn(Phdr *ph, size_t cnt, size_t stride)
 static void do_init_fini(struct dso *p)
 {
 	size_t dyn[DYN_CNT] = {0};
+	static const struct tag_range defs = {0, DYN_CNT};
+	size_t *storage = dyn;
 	for (; p; p=p->prev) {
 		if (p->constructed) return;
-		decode_vec(p->dynv, dyn, DYN_CNT);
+		decode_vec(p->dynv, &defs, &storage, 1);
 		if (dyn[0] & (1<<DT_FINI))
 			atexit((void (*)(void))(p->base + dyn[DT_FINI]));
 		if (dyn[0] & (1<<DT_INIT))
@@ -539,6 +644,8 @@ void _dl_debug_state(void)
 void *__dynlink(int argc, char **argv)
 {
 	size_t *auxv, aux[AUX_CNT] = {0};
+	static const struct tag_range defs = {0, AUX_CNT};
+	size_t *storage = aux;
 	size_t i;
 	Phdr *phdr;
 	Ehdr *ehdr;
@@ -556,7 +663,7 @@ void *__dynlink(int argc, char **argv)
 			env_preload = argv[i]+11;
 	auxv = (void *)(argv+i+1);
 
-	decode_vec(auxv, aux, AUX_CNT);
+	decode_vec(auxv, &defs, &storage, 1);
 
 	/* Only trust user/env if kernel says we're not suid/sgid */
 	if ((aux[0]&0x7800)!=0x7800 || aux[AT_UID]!=aux[AT_EUID]
@@ -784,8 +891,9 @@ end:
 static void *do_dlsym(struct dso *p, const char *s, void *ra)
 {
 	size_t i;
-	uint32_t h;
 	Sym *sym;
+	uint32_t h = 0, gh = 0;
+
 	if (p == RTLD_NEXT) {
 		for (p=head; p && (unsigned char *)ra-p->map>p->map_len; p=p->next);
 		if (!p) p=head;
@@ -798,12 +906,29 @@ static void *do_dlsym(struct dso *p, const char *s, void *ra)
 		if (!res) goto failed;
 		return res;
 	}
-	h = hash(s);
-	sym = lookup(s, h, p);
+
+	if (p->hashtab) {
+		h = sysv_hash(s);
+		sym = sysv_lookup(s, h, p);
+	} else {
+		gh = gnu_hash(s);
+		sym = gnu_lookup(s, gh, p);
+	}
+
+
 	if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
 		return p->base + sym->st_value;
 	if (p->deps) for (i=0; p->deps[i]; i++) {
-		sym = lookup(s, h, p->deps[i]);
+		if (p->deps[i]->hashtab && (h || !p->deps[i]->ghashtab)) {
+			if (!h)
+				h = sysv_hash(s);
+			sym = sysv_lookup(s, h, p->deps[i]);
+		} else {
+			if (!gh)
+				gh = gnu_hash(s);
+			sym = gnu_lookup(s, h, p->deps[i]);
+		}
+
 		if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
 			return p->deps[i]->base + sym->st_value;
 	}
@@ -813,6 +938,97 @@ failed:
 	return 0;
 }
 
+struct sym_search {
+	void *addr;
+	Dl_info *info;
+};
+
+static int find_closest_sym (struct dso *dso, Sym *sym, struct sym_search *search)
+{
+	void *symaddr = dso->base + sym->st_value;
+	char *strings = dso->strings;
+	Dl_info *info = search->info;
+	void *addr = search->addr;
+	void *prevaddr = info->dli_saddr;
+
+	if (sym->st_value == 0 && sym->st_shndx == SHN_UNDEF)
+		return 1;
+
+	if (ELF_ST_TYPE(sym->st_info) == STT_TLS)
+		return 1;
+
+	if (addr < symaddr)
+		return 1;
+
+	if (prevaddr && (addr - symaddr) > (addr - prevaddr))
+		return 1;
+
+	info->dli_saddr = symaddr;
+	info->dli_sname = strings + sym->st_name;
+
+	if (addr == symaddr)
+		return 0;
+
+	return 1;
+
+}
+
+static int do_dladdr (void *addr, Dl_info *info)
+{
+	struct sym_search search;
+	struct dso *p;
+	memset (info, 0, sizeof (*info));
+	search.info = info;
+	search.addr = addr;
+	for (p=head; p; p=p->next) {
+		if ((unsigned char *)addr >= p->map && (unsigned char *)addr < p->map + p->map_len) {
+			Sym *syms = p->syms;
+			uint32_t *hashtab;
+			size_t i;
+			info->dli_fname = p->name;
+			info->dli_fbase = p->base;
+			if (p->hashtab) {
+				hashtab = p->hashtab;
+				for (i = 0; i < hashtab[1]; i++) {
+					if (!find_closest_sym (p, syms + i, &search))
+						return 1;
+				}
+			} else {
+				uint32_t *buckets;
+				uint32_t nbuckets;
+				uint32_t *hashvals;
+				uint32_t symndx;
+				hashtab = p->ghashtab;
+				buckets = hashtab + 4 + (hashtab[2] * (sizeof(size_t)/sizeof(uint32_t)));
+				nbuckets = hashtab[0];
+				hashvals = buckets + nbuckets;
+				symndx = hashtab[1];
+				for (i = 0; i < nbuckets; ++i) {
+					uint32_t n = buckets[i];
+					Sym *sym = syms + n;
+					uint32_t *hashval = hashvals + n - symndx;
+
+					do {
+						if (!find_closest_sym (p, sym, &search))
+							return 1;
+					}while (!(*hashval++ & 1));
+				}
+			}
+			return 1;
+		}
+	}
+	return 0;
+}
+
+int dladdr (void *addr, Dl_info *info)
+{
+	int res;
+	pthread_rwlock_rdlock(&lock);
+	res = do_dladdr (addr, info);
+	pthread_rwlock_unlock(&lock);
+	return res;
+}
+
 void *__dlsym(void *p, const char *s, void *ra)
 {
 	void *res;

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-15 22:41               ` boris brezillon
@ 2012-08-17  5:39                 ` Rich Felker
  2012-08-19 16:42                   ` musl
  0 siblings, 1 reply; 33+ messages in thread
From: Rich Felker @ 2012-08-17  5:39 UTC (permalink / raw)
  To: musl

On Thu, Aug 16, 2012 at 12:41:48AM +0200, boris brezillon wrote:
> > Sorry for taking a while to get back to you. I haven't had as much
> > time to work on musl the past couple weeks and some other topics (like
> > mips dynamic linking) had priority, but I hope to have more again for
> > a while now. Here's a quick review of some things that will hopefully
> > turn into a discussion for improving/simplifying the code.
> No problem.
> Thanks for the review.
> Do you want to discuss it on irc or keep going on the mailing list?

List is best I think, but I don't mind having some specific
discussions that need more real-time feedback on IRC.

> > BTW, while I _want_ it to be safe, it's possible that early switches
> > (early meaning prior to the comment in __dynlink that says libc is now
> > fully functional) will actually fail to work/crash on some archs... So
> > this needs consideration too.
> >
> I didn't knew that. Could explain me why?

The compiler may compile a switch to something like:

jmp *local_jmptable@GOTOFF(%ebx,%ecx,4)

where the local_jmptable needs to contain absolute jump addresses.
Prior to relocation, it will obly have the load-address-relative
addresses.

> > I'm not seeing why this function needs hash tables at all. It's not
> > looking up symbols, just iterating over the entire symbol table, no?
> > Please explain if I'm mistaken.
> I don't see any other way to know the sym table size except reading
> the .dynsym section header.

Indeed, I missed the fact that there's no DT_* entry reporting the
symbol table size. I checked how readelf reports the number of
entries, and it's using the section headers. I'm under the impression
that using them would not be valid for the dynamic linker; reportedly
ELF files are supposed to be valid even with the section headers
stripped.

> That's why I'm iterating over the hash table.
> For sysv hash the nchain (second entry of hash table) gives the sym table size.
> For gnu hash It's a little bit more complicated (see
> https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections).

Wow, leave it to the GNU folks to take something simple and make it
difficult...

> Should we parse the .dynsym section header and store the sym table
> size in dso struct?
> Do you see any other way to get the dynsym table size or at least
> iterate over the dynsym table (specific pattern for last element ?).

I've been looking but I don't see any way yet.

Rich


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-11 23:05             ` Rich Felker
  2012-08-15 22:41               ` boris brezillon
  2012-08-16 18:03               ` musl
@ 2012-08-17 16:35               ` musl
  2 siblings, 0 replies; 33+ messages in thread
From: musl @ 2012-08-17 16:35 UTC (permalink / raw)
  To: musl

On 12/08/2012 01:05, Rich Felker wrote:
> On Wed, Aug 08, 2012 at 03:57:15PM +0200, musl wrote:
>> Same as before except I use bit mask to support multiple hash algorithms.
> Sorry for taking a while to get back to you. I haven't had as much
> time to work on musl the past couple weeks and some other topics (like
> mips dynamic linking) had priority, but I hope to have more again for
> a while now. Here's a quick review of some things that will hopefully
> turn into a discussion for improving/simplifying the code.
>
>> +#ifdef _GNU_SOURCE
>> +typedef struct {
>> +    const char *dli_fname;  /* Pathname of shared object that
>> +                               contains address */
>> +    void       *dli_fbase;  /* Address at which shared object
>> +                               is loaded */
>> +    const char *dli_sname;  /* Name of nearest symbol with address
>> +                               lower than addr */
>> +    void       *dli_saddr;  /* Exact address of symbol named
>> +                               in dli_sname */
>> +} Dl_info;
> musl policy is not to have commentary, especially copied from
> third-party sources, in the public headers. This is partly to
> strengthen the claim that all public headers are public domain
> (contain no copyrightable content) and partly just to avoid size creep
> and wasted parsing time by the compiler.
>
>>  static void decode_dyn(struct dso *p)
>>  {
>> -	size_t dyn[DYN_CNT] = {0};
>> -	decode_vec(p->dynv, dyn, DYN_CNT);
>> -	p->syms = (void *)(p->base + dyn[DT_SYMTAB]);
>> -	p->hashtab = (void *)(p->base + dyn[DT_HASH]);
>> -	p->strings = (void *)(p->base + dyn[DT_STRTAB]);
>> +	size_t *v;
>> +	p->hashalgs = 0;
>> +	for (v = p->dynv; v[0]; v+=2) {
>> +		switch (v[0]) {
>> +		case DT_SYMTAB:
>> +			p->syms = (void *)(p->base + v[1]);
>> +			break;
>> +		case DT_HASH:
>> +			p->hashtabs[SYSV_HASH] = (void *)(p->base + v[1]);
>> +			p->hashalgs |= (1 << SYSV_HASH);
>> +			break;
>> +		case DT_STRTAB:
>> +			p->strings = (void *)(p->base + v[1]);
>> +			break;
>> +		case DT_GNU_HASH:
>> +			p->hashtabs[GNU_HASH] = (void *)(p->base + v[1]);
>> +			p->hashalgs |= (1 << GNU_HASH);
>> +			break;
>> +		default:
>> +			break;
>> +		}
>> +	}
>>  }
> This is rather ugly but I don't see a better way right off, since
> DT_GNU_HASH has that huge value... Maybe it would be nice to improve
> decode_vec to have a variant that takes a (static const) table of DT_x
> values and struct offsets to store them at when found..? This is just
> some rambling and I'm not sure it's better, but it might be smart if
> we're going to want to continue adding support for more
> non-original-sysv DT_* entries with huge values, so we don't have to
> write new switches for each one.
I've done another implem of the decode_vec function :

static void decode_vec(size_t *v, const size_t *def, size_t *storage, size_t *found, size_t size)
{
    size_t i;
    memset(storage, 0, size*sizeof(size_t));
    memset(found, 0, (size+sizeof(size_t)*8-1)/8);
    for (; v[0]; v+=2) {
        if (v[0] < def[0] || v[0] > def[size-1])
            continue;
        for (i = 0; i < size; ++i) {
            if (v[0] < def[i])
                break;
            else if (v[0] == def[i]) {
                storage[i] = v[1];
                found[i/(sizeof(size_t)*8)] |= (1 << (i%(sizeof(size_t)*8)));
                break;
            }
        }
    }
}

The def table is a table of ordered tag values (DT_x, AT_x, ...) values. When a tag is found in v the value is stored in
the storage table and a flag is set in the found table.

This is the modified decode_dyn function:

static void decode_dyn(struct dso *p)
{
    static const size_t def[] = {DT_HASH, DT_STRTAB, DT_SYMTAB, DT_GNU_HASH};
    size_t dyn[4];
    size_t found;
    decode_vec (p->dynv, def, dyn, &found, 4);
    p->syms = (void *)(p->base + dyn[2]);
    p->strings = (void *)(p->base + dyn[1]);
    if (found&(1<<0))
        p->hashtab = (void *)(p->base + dyn[0]);
    if (found&(1<<3))
        p->ghashtab = (void *)(p->base + dyn[3]);

}

I'm not sure it's the best way to support big DT_x values as it implies an iteration over the def table for each entry
in the vector table.
In the previous example, if the dyn vector contains multiple tags with values between DT_SYMTAB and DT_GNU_HASH, the
decode_vec func iterates over the first 3 entries of the def table for each of these tags.

> BTW, while I _want_ it to be safe, it's possible that early switches
> (early meaning prior to the comment in __dynlink that says libc is now
> fully functional) will actually fail to work/crash on some archs... So
> this needs consideration too.
>
>>  static struct dso *load_library(const char *name)
>> @@ -784,8 +899,11 @@ end:
>>  static void *do_dlsym(struct dso *p, const char *s, void *ra)
>>  {
>> [...]
>>  	if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
>>  		return p->base + sym->st_value;
>>  	if (p->deps) for (i=0; p->deps[i]; i++) {
>> -		sym = lookup(s, h, p->deps[i]);
>> +		algs = p->deps[i]->hashalgs;
>> +		if (!(algs & ok)) {
>> +			if (algs & SYSV_HASH) {
>> +				h[SYSV_HASH] = sysv_hash(s);
>> +				sym = sysv_lookup(s, h[SYSV_HASH], p->deps[i]);
>> +				ok |= SYSV_HASH;
>> +			} else {
>> +				h[GNU_HASH] = gnu_hash(s);
>> +				sym = gnu_lookup(s, h[GNU_HASH], p->deps[i]);
>> +				ok |= GNU_HASH;
>> +			}
>> +		} else {
>> +			if (algs & SYSV_HASH)
>> +				sym = sysv_lookup(s, h[SYSV_HASH], p->deps[i]);
>> +			else
>> +				sym = gnu_lookup(s, h[GNU_HASH], p->deps[i]);
>> +		}
> This looks like a lot of code duplication and extra unnecessary
> variables. The way I would do it is something like:
>
> if (p->deps[i]->hashtab && (h || !p->deps[i]->ghashtab)) {
> 	if (!h) h = hash(s);
> 	sym = sysv_lookup(s, h, p->deps[i]);
> }
>
> i.e. if there's a sysv hash table and we've already computed h (sysv
> hash) or if there's no gnu hash table, compute h if it wasn't already
> computed, and then attempt a lookup with it.
>
> I'm not sure I got the logic all right (this is really a 1-minute
> glance over the code right now amidst doing lots of other stuff too)
> but the ideas are:
>
> - no need for extra vars for bitmask. Whether the hash var for the
>   corresponding hash type is nonzero is sufficient to tell whether
>   it's been computed.
> - no need for extra vars/fields to store which hash types a dso has.
>   Just use the hashtab/ghashtab fields in the dso struct, and let them
>   be null if the corresponding hash table does not exist. (And don't
>   make them an array unless there's a real benefit in using an array;
>   I don't think there is any benefit unless you're aiming for
>   extensibility to support N hash types.)
>
>> +static int do_dladdr (void *addr, Dl_info *info)
>> [...]
>> +			if (p->hashalgs & (1 << SYSV_HASH)) {
>> +				hashtab = p->hashtabs[SYSV_HASH];
>> +				for (i = 0; i < hashtab[1]; i++) {
> I'm not seeing why this function needs hash tables at all. It's not
> looking up symbols, just iterating over the entire symbol table, no?
> Please explain if I'm mistaken.
>
> Rich



^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-17  5:39                 ` Rich Felker
@ 2012-08-19 16:42                   ` musl
  2012-08-20  2:06                     ` Rich Felker
  0 siblings, 1 reply; 33+ messages in thread
From: musl @ 2012-08-19 16:42 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 2933 bytes --]

Hi,

This patch fixes a bug in dladdr: sym var was not incremented across gnu hash chain iteration).
I also reworked the dladdr implem to share more code between sysv and gnu hash.
I still haven't found a better way to get the symbol table size. Do you?

This patch uses the new decode_vec function, but as I told you in my previous mail, I'm not sure this the way to go.
Could you tell me what you think?

On 17/08/2012 07:39, Rich Felker wrote:
> On Thu, Aug 16, 2012 at 12:41:48AM +0200, boris brezillon wrote:
>>> Sorry for taking a while to get back to you. I haven't had as much
>>> time to work on musl the past couple weeks and some other topics (like
>>> mips dynamic linking) had priority, but I hope to have more again for
>>> a while now. Here's a quick review of some things that will hopefully
>>> turn into a discussion for improving/simplifying the code.
>> No problem.
>> Thanks for the review.
>> Do you want to discuss it on irc or keep going on the mailing list?
> List is best I think, but I don't mind having some specific
> discussions that need more real-time feedback on IRC.
>
>>> BTW, while I _want_ it to be safe, it's possible that early switches
>>> (early meaning prior to the comment in __dynlink that says libc is now
>>> fully functional) will actually fail to work/crash on some archs... So
>>> this needs consideration too.
>>>
>> I didn't knew that. Could explain me why?
> The compiler may compile a switch to something like:
>
> jmp *local_jmptable@GOTOFF(%ebx,%ecx,4)
>
> where the local_jmptable needs to contain absolute jump addresses.
> Prior to relocation, it will obly have the load-address-relative
> addresses.
>
>>> I'm not seeing why this function needs hash tables at all. It's not
>>> looking up symbols, just iterating over the entire symbol table, no?
>>> Please explain if I'm mistaken.
>> I don't see any other way to know the sym table size except reading
>> the .dynsym section header.
> Indeed, I missed the fact that there's no DT_* entry reporting the
> symbol table size. I checked how readelf reports the number of
> entries, and it's using the section headers. I'm under the impression
> that using them would not be valid for the dynamic linker; reportedly
> ELF files are supposed to be valid even with the section headers
> stripped.
>
>> That's why I'm iterating over the hash table.
>> For sysv hash the nchain (second entry of hash table) gives the sym table size.
>> For gnu hash It's a little bit more complicated (see
>> https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections).
> Wow, leave it to the GNU folks to take something simple and make it
> difficult...
>
>> Should we parse the .dynsym section header and store the sym table
>> size in dso struct?
>> Do you see any other way to get the dynsym table size or at least
>> iterate over the dynsym table (specific pattern for last element ?).
> I've been looking but I don't see any way yet.
>
> Rich


[-- Attachment #2: dladdr-gnu-hash-v3.patch --]
[-- Type: text/x-patch, Size: 14726 bytes --]

diff --git a/include/dlfcn.h b/include/dlfcn.h
index dea74c7..8524e0b 100644
--- a/include/dlfcn.h
+++ b/include/dlfcn.h
@@ -18,6 +18,17 @@ char  *dlerror(void);
 void  *dlopen(const char *, int);
 void  *dlsym(void *, const char *);
 
+#ifdef _GNU_SOURCE
+typedef struct {
+	const char *dli_fname;
+	void *dli_fbase;
+	const char *dli_sname;
+	void *dli_saddr;
+} Dl_info;
+
+int dladdr (void *addr, Dl_info *info);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
index f55c6f1..bf1ec6b 100644
--- a/src/ldso/dynlink.c
+++ b/src/ldso/dynlink.c
@@ -1,3 +1,4 @@
+#define _GNU_SOURCE
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -28,12 +29,14 @@ typedef Elf32_Phdr Phdr;
 typedef Elf32_Sym Sym;
 #define R_TYPE(x) ((x)&255)
 #define R_SYM(x) ((x)>>8)
+#define ELF_ST_TYPE ELF32_ST_TYPE
 #else
 typedef Elf64_Ehdr Ehdr;
 typedef Elf64_Phdr Phdr;
 typedef Elf64_Sym Sym;
 #define R_TYPE(x) ((x)&0xffffffff)
 #define R_SYM(x) ((x)>>32)
+#define ELF_ST_TYPE ELF64_ST_TYPE
 #endif
 
 struct debug {
@@ -53,6 +56,7 @@ struct dso {
 	int refcnt;
 	Sym *syms;
 	uint32_t *hashtab;
+	uint32_t *ghashtab;
 	char *strings;
 	unsigned char *map;
 	size_t map_len;
@@ -82,19 +86,27 @@ static struct debug debug;
 
 struct debug *_dl_debug_addr = &debug;
 
-#define AUX_CNT 24
-#define DYN_CNT 34
-
-static void decode_vec(size_t *v, size_t *a, size_t cnt)
+static void decode_vec(size_t *v, const size_t *def, size_t *storage, size_t *found, size_t size)
 {
-	memset(a, 0, cnt*sizeof(size_t));
-	for (; v[0]; v+=2) if (v[0]<cnt) {
-		a[0] |= 1ULL<<v[0];
-		a[v[0]] = v[1];
+	size_t i;
+	memset(storage, 0, size*sizeof(size_t));
+	memset(found, 0, (size+sizeof(size_t)*8-1)/8);
+	for (; v[0]; v+=2) {
+		if (v[0] < def[0] || v[0] > def[size-1])
+			continue;
+		for (i = 0; i < size; ++i) {
+			if (v[0] < def[i])
+				break;
+			else if (v[0] == def[i]) {
+				storage[i] = v[1];
+				found[i/(sizeof(size_t)*8)] |= (1 << (i%(sizeof(size_t)*8)));
+				break;
+			}
+		}
 	}
 }
 
-static uint32_t hash(const char *s0)
+static uint32_t sysv_hash(const char *s0)
 {
 	const unsigned char *s = (void *)s0;
 	uint_fast32_t h = 0;
@@ -105,7 +117,16 @@ static uint32_t hash(const char *s0)
 	return h & 0xfffffff;
 }
 
-static Sym *lookup(const char *s, uint32_t h, struct dso *dso)
+static uint32_t gnu_hash (const char *s0)
+{
+	const unsigned char *s = (void *)s0;
+	uint_fast32_t h = 5381;
+	for (; *s; s++)
+		h = h*33 + *s;
+	return h & 0xffffffff;
+}
+
+static Sym *sysv_lookup(const char *s, uint32_t h, struct dso *dso)
 {
 	size_t i;
 	Sym *syms = dso->syms;
@@ -118,20 +139,86 @@ static Sym *lookup(const char *s, uint32_t h, struct dso *dso)
 	return 0;
 }
 
+static Sym *gnu_lookup(const char *s, uint32_t h1, struct dso *dso)
+{
+	size_t i;
+	Sym *sym;
+	char *strings = dso->strings;
+	uint32_t *hashtab = dso->ghashtab;
+	uint32_t nbuckets = hashtab[0];
+	size_t *maskwords = (size_t *)(hashtab + 4);
+	uint32_t *buckets = hashtab + 4 + (hashtab[2]*(sizeof(size_t)/sizeof(uint32_t)));
+	uint32_t symndx = hashtab[1];
+	Sym *syms = dso->syms;
+	uint32_t shift2 = hashtab[3];
+	uint32_t h2 = h1 >> shift2;
+	uint32_t *hashvals = buckets + nbuckets;
+	uint32_t *hashval;
+	size_t c = sizeof(size_t) * 8;
+	size_t n = (h1/c) & (hashtab[2]-1);
+	size_t bitmask = (1 << (h1%c)) | (1 << (h2%c));
+
+	if ((maskwords[n] & bitmask) != bitmask)
+		return 0;
+
+	n = buckets[h1 % nbuckets];
+	if (!n)
+		return 0;
+
+	sym = syms + n;
+	hashval = hashvals + n - symndx;
+
+	for (h1 &= (uint32_t)-2;; sym++) {
+		h2 = *hashval++;
+		if ((h1 == (h2 & ~1)) && !strcmp(s, strings + sym->st_name))
+			return sym;
+
+		if (h2 & 1)
+			break;
+	}
+
+	return 0;
+}
+
 #define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON)
 #define OK_BINDS (1<<STB_GLOBAL | 1<<STB_WEAK)
 
 static void *find_sym(struct dso *dso, const char *s, int need_def)
 {
-	uint32_t h = hash(s);
 	void *def = 0;
-	if (h==0x6b366be && !strcmp(s, "dlopen")) rtld_used = 1;
-	if (h==0x6b3afd && !strcmp(s, "dlsym")) rtld_used = 1;
-	if (h==0x595a4cc && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
+	static uint32_t precomp[2][3] = {
+		{0x6b366be, 0x6b3afd, 0x595a4cc},
+		{0xf9040207, 0xf4dc4ae, 0x1f4039c9},
+	};
+	uint32_t *precomptab;
+	uint32_t h = 0, gh = 0;
+	if (dso->hashtab) {
+		h = sysv_hash(s);
+		precomptab = precomp[0];
+	} else {
+		gh = gnu_hash(s);
+		precomptab = precomp[0];
+	}
+
+	if (h == precomptab[0] && !strcmp(s, "dlopen")) rtld_used = 1;
+	if (h == precomptab[1] && !strcmp(s, "dlsym")) rtld_used = 1;
+	if (h == precomptab[2] && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
+
 	for (; dso; dso=dso->next) {
 		Sym *sym;
+
 		if (!dso->global) continue;
-		sym = lookup(s, h, dso);
+
+		if (dso->hashtab && (h || !dso->ghashtab)) {
+			if (!h)
+				h = sysv_hash(s);
+			sym = sysv_lookup(s, h, dso);
+		} else {
+			if (!gh)
+				gh = gnu_hash(s);
+			sym = gnu_lookup(s, gh, dso);
+		}
+
 		if (sym && (!need_def || sym->st_shndx) && sym->st_value
 		 && (1<<(sym->st_info&0xf) & OK_TYPES)
 		 && (1<<(sym->st_info>>4) & OK_BINDS)) {
@@ -320,11 +407,16 @@ static int path_open(const char *name, const char *search, char *buf, size_t buf
 
 static void decode_dyn(struct dso *p)
 {
-	size_t dyn[DYN_CNT] = {0};
-	decode_vec(p->dynv, dyn, DYN_CNT);
-	p->syms = (void *)(p->base + dyn[DT_SYMTAB]);
-	p->hashtab = (void *)(p->base + dyn[DT_HASH]);
-	p->strings = (void *)(p->base + dyn[DT_STRTAB]);
+	static const size_t def[] = {DT_HASH, DT_STRTAB, DT_SYMTAB, DT_GNU_HASH};
+	size_t dyn[4];
+	size_t found;
+	decode_vec (p->dynv, def, dyn, &found, 4);
+	p->syms = (void *)(p->base + dyn[2]);
+	p->strings = (void *)(p->base + dyn[1]);
+	if (found&(1<<0))
+		p->hashtab = (void *)(p->base + dyn[0]);
+	if (found&(1<<3))
+		p->ghashtab = (void *)(p->base + dyn[3]);
 }
 
 static struct dso *load_library(const char *name)
@@ -485,17 +577,22 @@ static void make_global(struct dso *p)
 
 static void reloc_all(struct dso *p)
 {
-	size_t dyn[DYN_CNT] = {0};
+	size_t dyn[7];
+	size_t found;
+	static const size_t def[] = {
+		DT_PLTRELSZ, DT_RELA, DT_RELASZ, DT_REL, 
+		DT_RELSZ, DT_PLTREL, DT_JMPREL
+	};
 	for (; p; p=p->next) {
 		if (p->relocated) continue;
-		decode_vec(p->dynv, dyn, DYN_CNT);
+		decode_vec(p->dynv, def, dyn, &found, 7);
 #ifdef NEED_ARCH_RELOCS
 		do_arch_relocs(p, head);
 #endif
-		do_relocs(p, (void *)(p->base+dyn[DT_JMPREL]), dyn[DT_PLTRELSZ],
-			2+(dyn[DT_PLTREL]==DT_RELA));
-		do_relocs(p, (void *)(p->base+dyn[DT_REL]), dyn[DT_RELSZ], 2);
-		do_relocs(p, (void *)(p->base+dyn[DT_RELA]), dyn[DT_RELASZ], 3);
+		do_relocs(p, (void *)(p->base+dyn[6]), dyn[0],
+			2+(dyn[5]==DT_RELA));
+		do_relocs(p, (void *)(p->base+dyn[3]), dyn[4], 2);
+		do_relocs(p, (void *)(p->base+dyn[1]), dyn[2], 3);
 		p->relocated = 1;
 	}
 }
@@ -520,14 +617,16 @@ static size_t find_dyn(Phdr *ph, size_t cnt, size_t stride)
 
 static void do_init_fini(struct dso *p)
 {
-	size_t dyn[DYN_CNT] = {0};
+	size_t dyn[2];
+	static const size_t def[] = {DT_INIT, DT_FINI};
+	size_t found;
 	for (; p; p=p->prev) {
 		if (p->constructed) return;
-		decode_vec(p->dynv, dyn, DYN_CNT);
-		if (dyn[0] & (1<<DT_FINI))
-			atexit((void (*)(void))(p->base + dyn[DT_FINI]));
-		if (dyn[0] & (1<<DT_INIT))
-			((void (*)(void))(p->base + dyn[DT_INIT]))();
+		decode_vec(p->dynv, def, dyn, &found, 2);
+		if (found & (1<<1))
+			atexit((void (*)(void))(p->base + dyn[1]));
+		if (found & (1<<0))
+			((void (*)(void))(p->base + dyn[0]))();
 		p->constructed = 1;
 	}
 }
@@ -538,7 +637,14 @@ void _dl_debug_state(void)
 
 void *__dynlink(int argc, char **argv)
 {
-	size_t *auxv, aux[AUX_CNT] = {0};
+	size_t *auxv;
+	static const size_t def[] = {
+		AT_PHDR, AT_PHENT, AT_PHNUM, AT_BASE, AT_ENTRY,
+		AT_UID,	AT_EUID, AT_GID, AT_EGID, AT_SECURE,
+		AT_SYSINFO_EHDR
+	};
+	size_t aux[11];
+	size_t found;
 	size_t i;
 	Phdr *phdr;
 	Ehdr *ehdr;
@@ -556,11 +662,11 @@ void *__dynlink(int argc, char **argv)
 			env_preload = argv[i]+11;
 	auxv = (void *)(argv+i+1);
 
-	decode_vec(auxv, aux, AUX_CNT);
+	decode_vec(auxv, def, aux, &found, 11);
 
 	/* Only trust user/env if kernel says we're not suid/sgid */
-	if ((aux[0]&0x7800)!=0x7800 || aux[AT_UID]!=aux[AT_EUID]
-	  || aux[AT_GID]!=aux[AT_EGID] || aux[AT_SECURE]) {
+	if ((found&0x1e0)!=0x1e0 || aux[5]!=aux[6]
+	  || aux[7]!=aux[8] || aux[9]) {
 		env_path = 0;
 		env_preload = 0;
 	}
@@ -569,36 +675,35 @@ void *__dynlink(int argc, char **argv)
 	 * will not be set. In that case, we assume the base address is
 	 * the start of the page containing the PHDRs; I don't know any
 	 * better approach... */
-	if (!aux[AT_BASE]) {
-		aux[AT_BASE] = aux[AT_PHDR] & -PAGE_SIZE;
-		aux[AT_PHDR] = aux[AT_PHENT] = aux[AT_PHNUM] = 0;
+	if (!aux[3]) {
+		aux[3] = aux[0] & -PAGE_SIZE;
+		aux[0] = aux[1] = aux[2] = 0;
 	}
 
 	/* The dynamic linker load address is passed by the kernel
 	 * in the AUX vector, so this is easy. */
-	lib->base = (void *)aux[AT_BASE];
+	lib->base = (void *)aux[3];
 	lib->name = lib->shortname = "libc.so";
 	lib->global = 1;
 	ehdr = (void *)lib->base;
 	lib->dynv = (void *)(lib->base + find_dyn(
-		(void *)(aux[AT_BASE]+ehdr->e_phoff),
+		(void *)(aux[3]+ehdr->e_phoff),
 		ehdr->e_phnum, ehdr->e_phentsize));
 	decode_dyn(lib);
-
-	if (aux[AT_PHDR]) {
+	if (aux[0]) {
 		size_t interp_off = 0;
 		/* Find load address of the main program, via AT_PHDR vs PT_PHDR. */
-		phdr = (void *)aux[AT_PHDR];
-		for (i=aux[AT_PHNUM]; i; i--, phdr=(void *)((char *)phdr + aux[AT_PHENT])) {
+		phdr = (void *)aux[0];
+		for (i=aux[2]; i; i--, phdr=(void *)((char *)phdr + aux[1])) {
 			if (phdr->p_type == PT_PHDR)
-				app->base = (void *)(aux[AT_PHDR] - phdr->p_vaddr);
+				app->base = (void *)(aux[0] - phdr->p_vaddr);
 			else if (phdr->p_type == PT_INTERP)
 				interp_off = (size_t)phdr->p_vaddr;
 		}
 		if (interp_off) lib->name = (char *)app->base + interp_off;
 		app->name = argv[0];
 		app->dynv = (void *)(app->base + find_dyn(
-			(void *)aux[AT_PHDR], aux[AT_PHNUM], aux[AT_PHENT]));
+			(void *)aux[0], aux[2], aux[1]));
 	} else {
 		int fd;
 		char *ldname = argv[0];
@@ -628,16 +733,15 @@ void *__dynlink(int argc, char **argv)
 		lib->name = ldname;
 		app->name = argv[0];
 		app->dynv = (void *)(app->base + dyno);
-		aux[AT_ENTRY] = ehdr->e_entry;
+		aux[4] = ehdr->e_entry;
 	}
 	app->global = 1;
 	app->constructed = 1;
 	decode_dyn(app);
 
 	/* Attach to vdso, if provided by the kernel */
-	for (i=0; auxv[i]; i+=2) {
-		size_t vdso_base = auxv[i+1];
-		if (auxv[i] != AT_SYSINFO_EHDR) continue;
+	if (found&(1<<10)) {
+		size_t vdso_base = aux[10];
 		ehdr = (void *)vdso_base;
 		phdr = (void *)(vdso_base + ehdr->e_phoff);
 		for (i=ehdr->e_phnum; i; i--, phdr=(void *)((char *)phdr + ehdr->e_phentsize)) {
@@ -651,7 +755,6 @@ void *__dynlink(int argc, char **argv)
 		decode_dyn(vdso);
 		vdso->prev = lib;
 		lib->next = vdso;
-		break;
 	}
 
 	/* Initial dso chain consists only of the app. We temporarily
@@ -667,7 +770,7 @@ void *__dynlink(int argc, char **argv)
 	/* PAST THIS POINT, ALL LIBC INTERFACES ARE FULLY USABLE. */
 
 	/* Donate unused parts of app and library mapping to malloc */
-	reclaim_gaps(app->base, (void *)aux[AT_PHDR], aux[AT_PHENT], aux[AT_PHNUM]);
+	reclaim_gaps(app->base, (void *)aux[0], aux[1], aux[2]);
 	ehdr = (void *)lib->base;
 	reclaim_gaps(lib->base, (void *)(lib->base+ehdr->e_phoff),
 		ehdr->e_phentsize, ehdr->e_phnum);
@@ -713,7 +816,7 @@ void *__dynlink(int argc, char **argv)
 	}
 
 	errno = 0;
-	return (void *)aux[AT_ENTRY];
+	return (void *)aux[4];
 }
 
 void *dlopen(const char *file, int mode)
@@ -784,7 +887,7 @@ end:
 static void *do_dlsym(struct dso *p, const char *s, void *ra)
 {
 	size_t i;
-	uint32_t h;
+	uint32_t h = 0, gh = 0;
 	Sym *sym;
 	if (p == RTLD_NEXT) {
 		for (p=head; p && (unsigned char *)ra-p->map>p->map_len; p=p->next);
@@ -798,12 +901,28 @@ static void *do_dlsym(struct dso *p, const char *s, void *ra)
 		if (!res) goto failed;
 		return res;
 	}
-	h = hash(s);
-	sym = lookup(s, h, p);
+	
+	if (p->hashtab) {
+		h = sysv_hash(s);
+		sym = sysv_lookup(s, h, p);
+	} else {
+		gh = gnu_hash(s);
+		sym = gnu_lookup(s, gh, p);
+	}
+
 	if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
 		return p->base + sym->st_value;
 	if (p->deps) for (i=0; p->deps[i]; i++) {
-		sym = lookup(s, h, p->deps[i]);
+		if (p->deps[i]->hashtab && (h || !p->deps[i]->ghashtab)) {
+			if (!h)
+				h = sysv_hash(s);
+			sym = sysv_lookup(s, h, p->deps[i]);
+		} else {
+			if (!gh)
+				gh = gnu_hash(s);
+			sym = gnu_lookup(s, h, p->deps[i]);
+		}
+
 		if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
 			return p->deps[i]->base + sym->st_value;
 	}
@@ -813,6 +932,96 @@ failed:
 	return 0;
 }
 
+struct sym_search {
+	void *addr;
+	Dl_info *info;
+};
+
+static int find_closest_sym (struct dso *dso, Sym *sym, struct sym_search *search)
+{
+	void *symaddr = dso->base + sym->st_value;
+	char *strings = dso->strings;
+	Dl_info *info = search->info;
+	void *addr = search->addr;
+	void *prevaddr = info->dli_saddr;
+
+	if (sym->st_value == 0 && sym->st_shndx == SHN_UNDEF)
+		return 1;
+
+	if (ELF_ST_TYPE(sym->st_info) == STT_TLS)
+		return 1;
+
+	if (addr < symaddr)
+		return 1;
+
+	if (prevaddr && (addr - symaddr) > (addr - prevaddr))
+		return 1;
+
+	info->dli_saddr = symaddr;
+	info->dli_sname = strings + sym->st_name;
+
+	if (addr == symaddr)
+		return 0;
+
+	return 1;
+
+}
+
+static int do_dladdr (void *addr, Dl_info *info)
+{
+	struct sym_search search;
+	struct dso *p;
+	memset (info, 0, sizeof (*info));
+	search.info = info;
+	search.addr = addr;
+	for (p=head; p; p=p->next) {
+		if ((unsigned char *)addr >= p->map && (unsigned char *)addr < p->map + p->map_len) {
+			Sym *syms = p->syms;
+			uint32_t nsym = 0;
+			size_t i;
+			info->dli_fname = p->name;
+			info->dli_fbase = p->base;
+			if (p->hashtab)
+				nsym = p->hashtab[1];
+			else {
+				uint32_t *buckets;
+				buckets = p->ghashtab + 4 + (p->ghashtab[2] * (sizeof(size_t)/sizeof(uint32_t)));
+				syms += p->ghashtab[1];
+				for (i = 0; i < p->ghashtab[0]; ++i) {
+					if (buckets[i] > nsym)
+						nsym = buckets[i];
+				}
+
+				if (nsym) {
+					nsym -= p->ghashtab[1];
+					uint32_t *hashval = buckets + p->ghashtab[0] + nsym;
+					do {
+						nsym++;
+					}while (!(*hashval++ & 1));
+				}
+
+			}
+
+			for (i = 0; i < nsym; i++) {
+				if (!find_closest_sym (p, syms + i, &search))
+					return 1;
+			}
+
+			return 1;
+		}
+	}
+	return 0;
+}
+
+int dladdr (void *addr, Dl_info *info)
+{
+	int res;
+	pthread_rwlock_rdlock(&lock);
+	res = do_dladdr (addr, info);
+	pthread_rwlock_unlock(&lock);
+	return res;
+}
+
 void *__dlsym(void *p, const char *s, void *ra)
 {
 	void *res;

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-19 16:42                   ` musl
@ 2012-08-20  2:06                     ` Rich Felker
  2012-08-20 12:55                       ` musl
  0 siblings, 1 reply; 33+ messages in thread
From: Rich Felker @ 2012-08-20  2:06 UTC (permalink / raw)
  To: musl

On Sun, Aug 19, 2012 at 06:42:30PM +0200, musl wrote:
> Hi,
> 
> This patch fixes a bug in dladdr: sym var was not incremented across gnu hash chain iteration).
> I also reworked the dladdr implem to share more code between sysv and gnu hash.
> I still haven't found a better way to get the symbol table size. Do you?
> 
> This patch uses the new decode_vec function, but as I told you in my
> previous mail, I'm not sure this the way to go.
> Could you tell me what you think?

Yeah, I'm not really happy with it either. Trying to think of
something better...

> diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
> index f55c6f1..bf1ec6b 100644
> --- a/src/ldso/dynlink.c
> +++ b/src/ldso/dynlink.c
> @@ -1,3 +1,4 @@
> +#define _GNU_SOURCE
>  #include <stdio.h>
>  #include <stdlib.h>
>  #include <string.h>
> @@ -28,12 +29,14 @@ typedef Elf32_Phdr Phdr;
>  typedef Elf32_Sym Sym;
>  #define R_TYPE(x) ((x)&255)
>  #define R_SYM(x) ((x)>>8)
> +#define ELF_ST_TYPE ELF32_ST_TYPE
>  #else
>  typedef Elf64_Ehdr Ehdr;
>  typedef Elf64_Phdr Phdr;
>  typedef Elf64_Sym Sym;
>  #define R_TYPE(x) ((x)&0xffffffff)
>  #define R_SYM(x) ((x)>>32)
> +#define ELF_ST_TYPE ELF64_ST_TYPE
>  #endif

These definitions are actually the same. I would just
#define ST_TYPE(x) ((x)&15)

> -static uint32_t hash(const char *s0)
> +static uint32_t sysv_hash(const char *s0)
>  {
>  	const unsigned char *s = (void *)s0;
>  	uint_fast32_t h = 0;
> @@ -105,7 +117,16 @@ static uint32_t hash(const char *s0)
>  	return h & 0xfffffff;
>  }
>  
> -static Sym *lookup(const char *s, uint32_t h, struct dso *dso)
> +static uint32_t gnu_hash (const char *s0)
> +{
> +	const unsigned char *s = (void *)s0;
> +	uint_fast32_t h = 5381;
> +	for (; *s; s++)
> +		h = h*33 + *s;
> +	return h & 0xffffffff;
> +}

The final &0xffffffff is a no-op. Note that the one in sysv_hash is
not a no-op; sysv_hash's result is 28 bits, not 32.

Re-reading this code also raised another issue: I'm not entirely
convinced that 0 is not a possible hash value, which may invalidate
what I said before about using h==0 to indicate "not yet computed". Of
course, it may not matter; if one in 4 billion symbol names get their
hashes repeatedly recomputed rather than being reused, it's not going
to make any difference to overall performance...

>  	/* Only trust user/env if kernel says we're not suid/sgid */
> -	if ((aux[0]&0x7800)!=0x7800 || aux[AT_UID]!=aux[AT_EUID]
> -	  || aux[AT_GID]!=aux[AT_EGID] || aux[AT_SECURE]) {
> +	if ((found&0x1e0)!=0x1e0 || aux[5]!=aux[6]
> +	  || aux[7]!=aux[8] || aux[9]) {
>  		env_path = 0;
>  		env_preload = 0;

Looking at this, I agree that the new decode_vec idea is not a good
direction. It's obfuscating the code badly.

For now, how about leaving the old decode_vec alone and just adding a
new one with a different name for getting to "high" entries. I wonder
if it would be possible, rather than using a list of wanted entries,
to use a base/count rather than always working zero-based like
decode_vec does. This would allow the resulting indices to still
actually mean something so we don't wind up with magic numbers all
over the code..

Rich


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-20  2:06                     ` Rich Felker
@ 2012-08-20 12:55                       ` musl
  2012-08-20 14:32                         ` musl
  0 siblings, 1 reply; 33+ messages in thread
From: musl @ 2012-08-20 12:55 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 3846 bytes --]

Hi,

This patch adds:
* decode_vec2 implemented for high DT_x values.
* find_closest_sym code integrated in do_dladdr function.
* other fixes you asked in your previous mail.

Regards,

Boris.


On 20/08/2012 04:06, Rich Felker wrote:
> On Sun, Aug 19, 2012 at 06:42:30PM +0200, musl wrote:
>> Hi,
>>
>> This patch fixes a bug in dladdr: sym var was not incremented across gnu hash chain iteration).
>> I also reworked the dladdr implem to share more code between sysv and gnu hash.
>> I still haven't found a better way to get the symbol table size. Do you?
>>
>> This patch uses the new decode_vec function, but as I told you in my
>> previous mail, I'm not sure this the way to go.
>> Could you tell me what you think?
> Yeah, I'm not really happy with it either. Trying to think of
> something better...
>
>> diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
>> index f55c6f1..bf1ec6b 100644
>> --- a/src/ldso/dynlink.c
>> +++ b/src/ldso/dynlink.c
>> @@ -1,3 +1,4 @@
>> +#define _GNU_SOURCE
>>  #include <stdio.h>
>>  #include <stdlib.h>
>>  #include <string.h>
>> @@ -28,12 +29,14 @@ typedef Elf32_Phdr Phdr;
>>  typedef Elf32_Sym Sym;
>>  #define R_TYPE(x) ((x)&255)
>>  #define R_SYM(x) ((x)>>8)
>> +#define ELF_ST_TYPE ELF32_ST_TYPE
>>  #else
>>  typedef Elf64_Ehdr Ehdr;
>>  typedef Elf64_Phdr Phdr;
>>  typedef Elf64_Sym Sym;
>>  #define R_TYPE(x) ((x)&0xffffffff)
>>  #define R_SYM(x) ((x)>>32)
>> +#define ELF_ST_TYPE ELF64_ST_TYPE
>>  #endif
> These definitions are actually the same. I would just
> #define ST_TYPE(x) ((x)&15)
>
>> -static uint32_t hash(const char *s0)
>> +static uint32_t sysv_hash(const char *s0)
>>  {
>>  	const unsigned char *s = (void *)s0;
>>  	uint_fast32_t h = 0;
>> @@ -105,7 +117,16 @@ static uint32_t hash(const char *s0)
>>  	return h & 0xfffffff;
>>  }
>>  
>> -static Sym *lookup(const char *s, uint32_t h, struct dso *dso)
>> +static uint32_t gnu_hash (const char *s0)
>> +{
>> +	const unsigned char *s = (void *)s0;
>> +	uint_fast32_t h = 5381;
>> +	for (; *s; s++)
>> +		h = h*33 + *s;
>> +	return h & 0xffffffff;
>> +}
> The final &0xffffffff is a no-op. Note that the one in sysv_hash is
> not a no-op; sysv_hash's result is 28 bits, not 32.
>
> Re-reading this code also raised another issue: I'm not entirely
> convinced that 0 is not a possible hash value, which may invalidate
> what I said before about using h==0 to indicate "not yet computed". Of
> course, it may not matter; if one in 4 billion symbol names get their
> hashes repeatedly recomputed rather than being reused, it's not going
> to make any difference to overall performance...
>
>>  	/* Only trust user/env if kernel says we're not suid/sgid */
>> -	if ((aux[0]&0x7800)!=0x7800 || aux[AT_UID]!=aux[AT_EUID]
>> -	  || aux[AT_GID]!=aux[AT_EGID] || aux[AT_SECURE]) {
>> +	if ((found&0x1e0)!=0x1e0 || aux[5]!=aux[6]
>> +	  || aux[7]!=aux[8] || aux[9]) {
>>  		env_path = 0;
>>  		env_preload = 0;
> Looking at this, I agree that the new decode_vec idea is not a good
> direction. It's obfuscating the code badly.
>
> For now, how about leaving the old decode_vec alone and just adding a
> new one with a different name for getting to "high" entries. I wonder
> if it would be possible, rather than using a list of wanted entries,
> to use a base/count rather than always working zero-based like
> decode_vec does. This would allow the resulting indices to still
> actually mean something so we don't wind up with magic numbers all
> over the code..
The decode_vec uses the first entry in 'a' to store the tags found in the given vector.
If cnt is bigger than sizeof(size_t) * 8, there is a shift overflow.
It's fine in the current use as you only test tag values < 32 (or don't use decode_vec to test it : AT_SYSINFO_EHDR).
Should I take care of those cases in decode_vec2 (add a separate 'found' table argument)?
>
> Rich


[-- Attachment #2: dladdr-gnu-hash-v4.patch --]
[-- Type: text/x-patch, Size: 7892 bytes --]

diff --git a/include/dlfcn.h b/include/dlfcn.h
index dea74c7..8524e0b 100644
--- a/include/dlfcn.h
+++ b/include/dlfcn.h
@@ -18,6 +18,17 @@ char  *dlerror(void);
 void  *dlopen(const char *, int);
 void  *dlsym(void *, const char *);
 
+#ifdef _GNU_SOURCE
+typedef struct {
+	const char *dli_fname;
+	void *dli_fbase;
+	const char *dli_sname;
+	void *dli_saddr;
+} Dl_info;
+
+int dladdr (void *addr, Dl_info *info);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
index 9692c6b..90272c5 100644
--- a/src/ldso/dynlink.c
+++ b/src/ldso/dynlink.c
@@ -1,3 +1,4 @@
+#define _GNU_SOURCE
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -35,6 +36,7 @@ typedef Elf64_Sym Sym;
 #define R_TYPE(x) ((x)&0xffffffff)
 #define R_SYM(x) ((x)>>32)
 #endif
+#define ST_TYPE(x) ((x)&15)
 
 struct debug {
 	int ver;
@@ -53,6 +55,7 @@ struct dso {
 	int refcnt;
 	Sym *syms;
 	uint32_t *hashtab;
+	uint32_t *ghashtab;
 	char *strings;
 	unsigned char *map;
 	size_t map_len;
@@ -85,6 +88,7 @@ struct debug *_dl_debug_addr = &debug;
 
 #define AUX_CNT 24
 #define DYN_CNT 34
+#define DYN_ADDR_CNT 12
 
 static void decode_vec(size_t *v, size_t *a, size_t cnt)
 {
@@ -95,7 +99,16 @@ static void decode_vec(size_t *v, size_t *a, size_t cnt)
 	}
 }
 
-static uint32_t hash(const char *s0)
+static void decode_vec2(size_t *v, size_t *a, size_t cnt, size_t offset)
+{
+	memset(a, 0, cnt*sizeof(size_t));
+	for (; v[0]; v+=2) if (v[0]<offset && v[0]>(offset-cnt)) {
+		a[0] |= 1ULL<<(offset-v[0]);
+		a[offset-v[0]] = v[1];
+	}
+}
+
+static uint32_t sysv_hash(const char *s0)
 {
 	const unsigned char *s = (void *)s0;
 	uint_fast32_t h = 0;
@@ -106,7 +119,16 @@ static uint32_t hash(const char *s0)
 	return h & 0xfffffff;
 }
 
-static Sym *lookup(const char *s, uint32_t h, struct dso *dso)
+static uint32_t gnu_hash (const char *s0)
+{
+	const unsigned char *s = (void *)s0;
+	uint_fast32_t h = 5381;
+	for (; *s; s++)
+		h = h*33 + *s;
+	return h;
+}
+
+static Sym *sysv_lookup(const char *s, uint32_t h, struct dso *dso)
 {
 	size_t i;
 	Sym *syms = dso->syms;
@@ -119,20 +141,81 @@ static Sym *lookup(const char *s, uint32_t h, struct dso *dso)
 	return 0;
 }
 
+static Sym *gnu_lookup(const char *s, uint32_t h1, struct dso *dso)
+{
+	size_t i;
+	Sym *sym;
+	char *strings = dso->strings;
+	uint32_t *hashtab = dso->ghashtab;
+	uint32_t nbuckets = hashtab[0];
+	size_t *maskwords = (size_t *)(hashtab + 4);
+	uint32_t *buckets = hashtab + 4 + (hashtab[2]*(sizeof(size_t)/sizeof(uint32_t)));
+	uint32_t symndx = hashtab[1];
+	Sym *syms = dso->syms;
+	uint32_t shift2 = hashtab[3];
+	uint32_t h2 = h1 >> shift2;
+	uint32_t *hashvals = buckets + nbuckets;
+	uint32_t *hashval;
+	size_t c = sizeof(size_t) * 8;
+	size_t n = (h1/c) & (hashtab[2]-1);
+	size_t bitmask = (1 << (h1%c)) | (1 << (h2%c));
+
+	if ((maskwords[n] & bitmask) != bitmask)
+		return 0;
+
+	n = buckets[h1 % nbuckets];
+	if (!n)
+		return 0;
+
+	sym = syms + n;
+	hashval = hashvals + n - symndx;
+
+	for (h1 &= (uint32_t)-2;; sym++) {
+		h2 = *hashval++;
+		if ((h1 == (h2 & ~1)) && !strcmp(s, strings + sym->st_name))
+			return sym;
+
+		if (h2 & 1)
+			break;
+	}
+
+	return 0;
+}
+
 #define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON)
 #define OK_BINDS (1<<STB_GLOBAL | 1<<STB_WEAK)
 
 static void *find_sym(struct dso *dso, const char *s, int need_def)
 {
-	uint32_t h = hash(s);
+	uint32_t h = 0, gh = 0;
 	void *def = 0;
-	if (h==0x6b366be && !strcmp(s, "dlopen")) rtld_used = 1;
-	if (h==0x6b3afd && !strcmp(s, "dlsym")) rtld_used = 1;
-	if (h==0x595a4cc && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
+	static uint32_t precomp[2][3] = {
+		{0x6b366be, 0x6b3afd, 0x595a4cc},
+		{0xf9040207, 0xf4dc4ae, 0x1f4039c9},
+	};
+	uint32_t *precomptab;
+	if (dso->hashtab) {
+		h = sysv_hash(s);
+		precomptab = precomp[0];
+	} else {
+		gh = gnu_hash(s);
+		precomptab = precomp[0];
+	}
+	if (h == precomptab[0] && !strcmp(s, "dlopen")) rtld_used = 1;
+	if (h == precomptab[1] && !strcmp(s, "dlsym")) rtld_used = 1;
+	if (h == precomptab[2] && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
 	for (; dso; dso=dso->next) {
 		Sym *sym;
 		if (!dso->global) continue;
-		sym = lookup(s, h, dso);
+		if (dso->hashtab && (h || !dso->ghashtab)) {
+			if (!h)
+				h = sysv_hash(s);
+			sym = sysv_lookup(s, h, dso);
+		} else {
+			if (!gh)
+				gh = gnu_hash(s);
+			sym = gnu_lookup(s, gh, dso);
+		}
 		if (sym && (!need_def || sym->st_shndx) && sym->st_value
 		 && (1<<(sym->st_info&0xf) & OK_TYPES)
 		 && (1<<(sym->st_info>>4) & OK_BINDS)) {
@@ -325,8 +408,12 @@ static void decode_dyn(struct dso *p)
 	size_t dyn[DYN_CNT] = {0};
 	decode_vec(p->dynv, dyn, DYN_CNT);
 	p->syms = (void *)(p->base + dyn[DT_SYMTAB]);
-	p->hashtab = (void *)(p->base + dyn[DT_HASH]);
 	p->strings = (void *)(p->base + dyn[DT_STRTAB]);
+	if (dyn[0]&(1<<DT_HASH))
+		p->hashtab = (void *)(p->base + dyn[DT_HASH]);
+	decode_vec2(p->dynv, dyn, DYN_ADDR_CNT, DT_ADDRRNGHI+1);
+	if (dyn[0]&(1<<(DT_ADDRTAGIDX(DT_GNU_HASH)+1)))
+		p->ghashtab = (void *)(p->base + dyn[DT_ADDRTAGIDX(DT_GNU_HASH)+1]);
 }
 
 static struct dso *load_library(const char *name)
@@ -788,7 +875,7 @@ end:
 static void *do_dlsym(struct dso *p, const char *s, void *ra)
 {
 	size_t i;
-	uint32_t h;
+	uint32_t h = 0, gh = 0;
 	Sym *sym;
 	if (p == RTLD_NEXT) {
 		for (p=head; p && (unsigned char *)ra-p->map>p->map_len; p=p->next);
@@ -802,12 +889,25 @@ static void *do_dlsym(struct dso *p, const char *s, void *ra)
 		if (!res) goto failed;
 		return res;
 	}
-	h = hash(s);
-	sym = lookup(s, h, p);
+	if (p->hashtab) {
+		h = sysv_hash(s);
+		sym = sysv_lookup(s, h, p);
+	} else {
+		gh = gnu_hash(s);
+		sym = gnu_lookup(s, gh, p);
+	}
 	if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
 		return p->base + sym->st_value;
 	if (p->deps) for (i=0; p->deps[i]; i++) {
-		sym = lookup(s, h, p->deps[i]);
+		if (p->deps[i]->hashtab && (h || !p->deps[i]->ghashtab)) {
+			if (!h)
+				h = sysv_hash(s);
+			sym = sysv_lookup(s, h, p->deps[i]);
+		} else {
+			if (!gh)
+				gh = gnu_hash(s);
+			sym = gnu_lookup(s, h, p->deps[i]);
+		}
 		if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
 			return p->deps[i]->base + sym->st_value;
 	}
@@ -817,6 +917,69 @@ failed:
 	return 0;
 }
 
+static int do_dladdr (void *addr, Dl_info *info)
+{
+	struct dso *p;
+	memset (info, 0, sizeof (*info));
+	for (p=head; p; p=p->next) {
+		if ((unsigned char *)addr >= p->map && (unsigned char *)addr < p->map + p->map_len) {
+			Sym *syms = p->syms;
+			uint32_t nsym = 0;
+			char *strings = p->strings;
+			size_t i;
+			info->dli_fname = p->name;
+			info->dli_fbase = p->base;
+			if (p->hashtab)
+				nsym = p->hashtab[1];
+			else {
+				uint32_t *buckets;
+				buckets = p->ghashtab + 4 + (p->ghashtab[2] * (sizeof(size_t)/sizeof(uint32_t)));
+				syms += p->ghashtab[1];
+				for (i = 0; i < p->ghashtab[0]; ++i) {
+					if (buckets[i] > nsym)
+						nsym = buckets[i];
+				}
+
+				if (nsym) {
+					nsym -= p->ghashtab[1];
+					uint32_t *hashval = buckets + p->ghashtab[0] + nsym;
+					do {
+						nsym++;
+					}while (!(*hashval++ & 1));
+				}
+
+			}
+
+			for (i = 0; i < nsym; i++) {
+				if ((syms[i].st_value != 0 || syms[i].st_shndx != SHN_UNDEF) &&
+					 ST_TYPE(syms[i].st_info) != STT_TLS) {
+					void *symaddr = p->base + syms[i].st_value;
+
+					if (addr >= symaddr && (!info->dli_saddr || (addr-symaddr)<(addr-info->dli_saddr))) {
+						info->dli_saddr = symaddr;
+						info->dli_sname = strings + syms[i].st_name;
+
+						if (addr == symaddr)
+							break;
+					}
+				}
+			}
+
+			return 1;
+		}
+	}
+	return 0;
+}
+
+int dladdr (void *addr, Dl_info *info)
+{
+	int res;
+	pthread_rwlock_rdlock(&lock);
+	res = do_dladdr (addr, info);
+	pthread_rwlock_unlock(&lock);
+	return res;
+}
+
 void *__dlsym(void *p, const char *s, void *ra)
 {
 	void *res;

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-20 12:55                       ` musl
@ 2012-08-20 14:32                         ` musl
  2012-08-23 21:39                           ` Rich Felker
  0 siblings, 1 reply; 33+ messages in thread
From: musl @ 2012-08-20 14:32 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 4089 bytes --]

I missed a bug in my previous patch :
in find_sym func precomptab was always set to sysv_precomp.

On 20/08/2012 14:55, musl wrote:
> Hi,
>
> This patch adds:
> * decode_vec2 implemented for high DT_x values.
> * find_closest_sym code integrated in do_dladdr function.
> * other fixes you asked in your previous mail.
>
> Regards,
>
> Boris.
>
>
> On 20/08/2012 04:06, Rich Felker wrote:
>> On Sun, Aug 19, 2012 at 06:42:30PM +0200, musl wrote:
>>> Hi,
>>>
>>> This patch fixes a bug in dladdr: sym var was not incremented across gnu hash chain iteration).
>>> I also reworked the dladdr implem to share more code between sysv and gnu hash.
>>> I still haven't found a better way to get the symbol table size. Do you?
>>>
>>> This patch uses the new decode_vec function, but as I told you in my
>>> previous mail, I'm not sure this the way to go.
>>> Could you tell me what you think?
>> Yeah, I'm not really happy with it either. Trying to think of
>> something better...
>>
>>> diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
>>> index f55c6f1..bf1ec6b 100644
>>> --- a/src/ldso/dynlink.c
>>> +++ b/src/ldso/dynlink.c
>>> @@ -1,3 +1,4 @@
>>> +#define _GNU_SOURCE
>>>  #include <stdio.h>
>>>  #include <stdlib.h>
>>>  #include <string.h>
>>> @@ -28,12 +29,14 @@ typedef Elf32_Phdr Phdr;
>>>  typedef Elf32_Sym Sym;
>>>  #define R_TYPE(x) ((x)&255)
>>>  #define R_SYM(x) ((x)>>8)
>>> +#define ELF_ST_TYPE ELF32_ST_TYPE
>>>  #else
>>>  typedef Elf64_Ehdr Ehdr;
>>>  typedef Elf64_Phdr Phdr;
>>>  typedef Elf64_Sym Sym;
>>>  #define R_TYPE(x) ((x)&0xffffffff)
>>>  #define R_SYM(x) ((x)>>32)
>>> +#define ELF_ST_TYPE ELF64_ST_TYPE
>>>  #endif
>> These definitions are actually the same. I would just
>> #define ST_TYPE(x) ((x)&15)
>>
>>> -static uint32_t hash(const char *s0)
>>> +static uint32_t sysv_hash(const char *s0)
>>>  {
>>>  	const unsigned char *s = (void *)s0;
>>>  	uint_fast32_t h = 0;
>>> @@ -105,7 +117,16 @@ static uint32_t hash(const char *s0)
>>>  	return h & 0xfffffff;
>>>  }
>>>  
>>> -static Sym *lookup(const char *s, uint32_t h, struct dso *dso)
>>> +static uint32_t gnu_hash (const char *s0)
>>> +{
>>> +	const unsigned char *s = (void *)s0;
>>> +	uint_fast32_t h = 5381;
>>> +	for (; *s; s++)
>>> +		h = h*33 + *s;
>>> +	return h & 0xffffffff;
>>> +}
>> The final &0xffffffff is a no-op. Note that the one in sysv_hash is
>> not a no-op; sysv_hash's result is 28 bits, not 32.
>>
>> Re-reading this code also raised another issue: I'm not entirely
>> convinced that 0 is not a possible hash value, which may invalidate
>> what I said before about using h==0 to indicate "not yet computed". Of
>> course, it may not matter; if one in 4 billion symbol names get their
>> hashes repeatedly recomputed rather than being reused, it's not going
>> to make any difference to overall performance...
>>
>>>  	/* Only trust user/env if kernel says we're not suid/sgid */
>>> -	if ((aux[0]&0x7800)!=0x7800 || aux[AT_UID]!=aux[AT_EUID]
>>> -	  || aux[AT_GID]!=aux[AT_EGID] || aux[AT_SECURE]) {
>>> +	if ((found&0x1e0)!=0x1e0 || aux[5]!=aux[6]
>>> +	  || aux[7]!=aux[8] || aux[9]) {
>>>  		env_path = 0;
>>>  		env_preload = 0;
>> Looking at this, I agree that the new decode_vec idea is not a good
>> direction. It's obfuscating the code badly.
>>
>> For now, how about leaving the old decode_vec alone and just adding a
>> new one with a different name for getting to "high" entries. I wonder
>> if it would be possible, rather than using a list of wanted entries,
>> to use a base/count rather than always working zero-based like
>> decode_vec does. This would allow the resulting indices to still
>> actually mean something so we don't wind up with magic numbers all
>> over the code..
> The decode_vec uses the first entry in 'a' to store the tags found in the given vector.
> If cnt is bigger than sizeof(size_t) * 8, there is a shift overflow.
> It's fine in the current use as you only test tag values < 32 (or don't use decode_vec to test it : AT_SYSINFO_EHDR).
> Should I take care of those cases in decode_vec2 (add a separate 'found' table argument)?
>> Rich


[-- Attachment #2: dladdr-gnu-hash-v4-bis.patch --]
[-- Type: text/x-patch, Size: 7892 bytes --]

diff --git a/include/dlfcn.h b/include/dlfcn.h
index dea74c7..8524e0b 100644
--- a/include/dlfcn.h
+++ b/include/dlfcn.h
@@ -18,6 +18,17 @@ char  *dlerror(void);
 void  *dlopen(const char *, int);
 void  *dlsym(void *, const char *);
 
+#ifdef _GNU_SOURCE
+typedef struct {
+	const char *dli_fname;
+	void *dli_fbase;
+	const char *dli_sname;
+	void *dli_saddr;
+} Dl_info;
+
+int dladdr (void *addr, Dl_info *info);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
index 9692c6b..a7725ca 100644
--- a/src/ldso/dynlink.c
+++ b/src/ldso/dynlink.c
@@ -1,3 +1,4 @@
+#define _GNU_SOURCE
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -35,6 +36,7 @@ typedef Elf64_Sym Sym;
 #define R_TYPE(x) ((x)&0xffffffff)
 #define R_SYM(x) ((x)>>32)
 #endif
+#define ST_TYPE(x) ((x)&15)
 
 struct debug {
 	int ver;
@@ -53,6 +55,7 @@ struct dso {
 	int refcnt;
 	Sym *syms;
 	uint32_t *hashtab;
+	uint32_t *ghashtab;
 	char *strings;
 	unsigned char *map;
 	size_t map_len;
@@ -85,6 +88,7 @@ struct debug *_dl_debug_addr = &debug;
 
 #define AUX_CNT 24
 #define DYN_CNT 34
+#define DYN_ADDR_CNT 12
 
 static void decode_vec(size_t *v, size_t *a, size_t cnt)
 {
@@ -95,7 +99,16 @@ static void decode_vec(size_t *v, size_t *a, size_t cnt)
 	}
 }
 
-static uint32_t hash(const char *s0)
+static void decode_vec2(size_t *v, size_t *a, size_t cnt, size_t offset)
+{
+	memset(a, 0, cnt*sizeof(size_t));
+	for (; v[0]; v+=2) if (v[0]<offset && v[0]>(offset-cnt)) {
+		a[0] |= 1ULL<<(offset-v[0]);
+		a[offset-v[0]] = v[1];
+	}
+}
+
+static uint32_t sysv_hash(const char *s0)
 {
 	const unsigned char *s = (void *)s0;
 	uint_fast32_t h = 0;
@@ -106,7 +119,16 @@ static uint32_t hash(const char *s0)
 	return h & 0xfffffff;
 }
 
-static Sym *lookup(const char *s, uint32_t h, struct dso *dso)
+static uint32_t gnu_hash (const char *s0)
+{
+	const unsigned char *s = (void *)s0;
+	uint_fast32_t h = 5381;
+	for (; *s; s++)
+		h = h*33 + *s;
+	return h;
+}
+
+static Sym *sysv_lookup(const char *s, uint32_t h, struct dso *dso)
 {
 	size_t i;
 	Sym *syms = dso->syms;
@@ -119,20 +141,81 @@ static Sym *lookup(const char *s, uint32_t h, struct dso *dso)
 	return 0;
 }
 
+static Sym *gnu_lookup(const char *s, uint32_t h1, struct dso *dso)
+{
+	size_t i;
+	Sym *sym;
+	char *strings = dso->strings;
+	uint32_t *hashtab = dso->ghashtab;
+	uint32_t nbuckets = hashtab[0];
+	size_t *maskwords = (size_t *)(hashtab + 4);
+	uint32_t *buckets = hashtab + 4 + (hashtab[2]*(sizeof(size_t)/sizeof(uint32_t)));
+	uint32_t symndx = hashtab[1];
+	Sym *syms = dso->syms;
+	uint32_t shift2 = hashtab[3];
+	uint32_t h2 = h1 >> shift2;
+	uint32_t *hashvals = buckets + nbuckets;
+	uint32_t *hashval;
+	size_t c = sizeof(size_t) * 8;
+	size_t n = (h1/c) & (hashtab[2]-1);
+	size_t bitmask = (1 << (h1%c)) | (1 << (h2%c));
+
+	if ((maskwords[n] & bitmask) != bitmask)
+		return 0;
+
+	n = buckets[h1 % nbuckets];
+	if (!n)
+		return 0;
+
+	sym = syms + n;
+	hashval = hashvals + n - symndx;
+
+	for (h1 &= (uint32_t)-2;; sym++) {
+		h2 = *hashval++;
+		if ((h1 == (h2 & ~1)) && !strcmp(s, strings + sym->st_name))
+			return sym;
+
+		if (h2 & 1)
+			break;
+	}
+
+	return 0;
+}
+
 #define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON)
 #define OK_BINDS (1<<STB_GLOBAL | 1<<STB_WEAK)
 
 static void *find_sym(struct dso *dso, const char *s, int need_def)
 {
-	uint32_t h = hash(s);
+	uint32_t h = 0, gh = 0;
 	void *def = 0;
-	if (h==0x6b366be && !strcmp(s, "dlopen")) rtld_used = 1;
-	if (h==0x6b3afd && !strcmp(s, "dlsym")) rtld_used = 1;
-	if (h==0x595a4cc && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
+	static uint32_t precomp[2][3] = {
+		{0x6b366be, 0x6b3afd, 0x595a4cc},
+		{0xf9040207, 0xf4dc4ae, 0x1f4039c9},
+	};
+	uint32_t *precomptab;
+	if (dso->hashtab) {
+		h = sysv_hash(s);
+		precomptab = precomp[0];
+	} else {
+		gh = gnu_hash(s);
+		precomptab = precomp[1];
+	}
+	if (h == precomptab[0] && !strcmp(s, "dlopen")) rtld_used = 1;
+	if (h == precomptab[1] && !strcmp(s, "dlsym")) rtld_used = 1;
+	if (h == precomptab[2] && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
 	for (; dso; dso=dso->next) {
 		Sym *sym;
 		if (!dso->global) continue;
-		sym = lookup(s, h, dso);
+		if (dso->hashtab && (h || !dso->ghashtab)) {
+			if (!h)
+				h = sysv_hash(s);
+			sym = sysv_lookup(s, h, dso);
+		} else {
+			if (!gh)
+				gh = gnu_hash(s);
+			sym = gnu_lookup(s, gh, dso);
+		}
 		if (sym && (!need_def || sym->st_shndx) && sym->st_value
 		 && (1<<(sym->st_info&0xf) & OK_TYPES)
 		 && (1<<(sym->st_info>>4) & OK_BINDS)) {
@@ -325,8 +408,12 @@ static void decode_dyn(struct dso *p)
 	size_t dyn[DYN_CNT] = {0};
 	decode_vec(p->dynv, dyn, DYN_CNT);
 	p->syms = (void *)(p->base + dyn[DT_SYMTAB]);
-	p->hashtab = (void *)(p->base + dyn[DT_HASH]);
 	p->strings = (void *)(p->base + dyn[DT_STRTAB]);
+	if (dyn[0]&(1<<DT_HASH))
+		p->hashtab = (void *)(p->base + dyn[DT_HASH]);
+	decode_vec2(p->dynv, dyn, DYN_ADDR_CNT, DT_ADDRRNGHI+1);
+	if (dyn[0]&(1<<(DT_ADDRTAGIDX(DT_GNU_HASH)+1)))
+		p->ghashtab = (void *)(p->base + dyn[DT_ADDRTAGIDX(DT_GNU_HASH)+1]);
 }
 
 static struct dso *load_library(const char *name)
@@ -788,7 +875,7 @@ end:
 static void *do_dlsym(struct dso *p, const char *s, void *ra)
 {
 	size_t i;
-	uint32_t h;
+	uint32_t h = 0, gh = 0;
 	Sym *sym;
 	if (p == RTLD_NEXT) {
 		for (p=head; p && (unsigned char *)ra-p->map>p->map_len; p=p->next);
@@ -802,12 +889,25 @@ static void *do_dlsym(struct dso *p, const char *s, void *ra)
 		if (!res) goto failed;
 		return res;
 	}
-	h = hash(s);
-	sym = lookup(s, h, p);
+	if (p->hashtab) {
+		h = sysv_hash(s);
+		sym = sysv_lookup(s, h, p);
+	} else {
+		gh = gnu_hash(s);
+		sym = gnu_lookup(s, gh, p);
+	}
 	if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
 		return p->base + sym->st_value;
 	if (p->deps) for (i=0; p->deps[i]; i++) {
-		sym = lookup(s, h, p->deps[i]);
+		if (p->deps[i]->hashtab && (h || !p->deps[i]->ghashtab)) {
+			if (!h)
+				h = sysv_hash(s);
+			sym = sysv_lookup(s, h, p->deps[i]);
+		} else {
+			if (!gh)
+				gh = gnu_hash(s);
+			sym = gnu_lookup(s, h, p->deps[i]);
+		}
 		if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
 			return p->deps[i]->base + sym->st_value;
 	}
@@ -817,6 +917,69 @@ failed:
 	return 0;
 }
 
+static int do_dladdr (void *addr, Dl_info *info)
+{
+	struct dso *p;
+	memset (info, 0, sizeof (*info));
+	for (p=head; p; p=p->next) {
+		if ((unsigned char *)addr >= p->map && (unsigned char *)addr < p->map + p->map_len) {
+			Sym *syms = p->syms;
+			uint32_t nsym = 0;
+			char *strings = p->strings;
+			size_t i;
+			info->dli_fname = p->name;
+			info->dli_fbase = p->base;
+			if (p->hashtab)
+				nsym = p->hashtab[1];
+			else {
+				uint32_t *buckets;
+				buckets = p->ghashtab + 4 + (p->ghashtab[2] * (sizeof(size_t)/sizeof(uint32_t)));
+				syms += p->ghashtab[1];
+				for (i = 0; i < p->ghashtab[0]; ++i) {
+					if (buckets[i] > nsym)
+						nsym = buckets[i];
+				}
+
+				if (nsym) {
+					nsym -= p->ghashtab[1];
+					uint32_t *hashval = buckets + p->ghashtab[0] + nsym;
+					do {
+						nsym++;
+					}while (!(*hashval++ & 1));
+				}
+
+			}
+
+			for (i = 0; i < nsym; i++) {
+				if ((syms[i].st_value != 0 || syms[i].st_shndx != SHN_UNDEF) &&
+					 ST_TYPE(syms[i].st_info) != STT_TLS) {
+					void *symaddr = p->base + syms[i].st_value;
+
+					if (addr >= symaddr && (!info->dli_saddr || (addr-symaddr)<(addr-info->dli_saddr))) {
+						info->dli_saddr = symaddr;
+						info->dli_sname = strings + syms[i].st_name;
+
+						if (addr == symaddr)
+							break;
+					}
+				}
+			}
+
+			return 1;
+		}
+	}
+	return 0;
+}
+
+int dladdr (void *addr, Dl_info *info)
+{
+	int res;
+	pthread_rwlock_rdlock(&lock);
+	res = do_dladdr (addr, info);
+	pthread_rwlock_unlock(&lock);
+	return res;
+}
+
 void *__dlsym(void *p, const char *s, void *ra)
 {
 	void *res;

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-20 14:32                         ` musl
@ 2012-08-23 21:39                           ` Rich Felker
  2012-08-23 22:21                             ` Rich Felker
  0 siblings, 1 reply; 33+ messages in thread
From: Rich Felker @ 2012-08-23 21:39 UTC (permalink / raw)
  To: musl

On Mon, Aug 20, 2012 at 04:32:00PM +0200, musl wrote:
> I missed a bug in my previous patch :
> in find_sym func precomptab was always set to sysv_precomp.

It's still broken; h is being used in the comparisons even if h was
not initialized, rather than using gh. I'm working on integrating the
code right now. I'll either commit my version or reply with a patch
here soon for review.

Rich


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-23 21:39                           ` Rich Felker
@ 2012-08-23 22:21                             ` Rich Felker
  2012-08-24  7:29                               ` musl
                                                 ` (2 more replies)
  0 siblings, 3 replies; 33+ messages in thread
From: Rich Felker @ 2012-08-23 22:21 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 1679 bytes --]

On Thu, Aug 23, 2012 at 05:39:37PM -0400, Rich Felker wrote:
> On Mon, Aug 20, 2012 at 04:32:00PM +0200, musl wrote:
> > I missed a bug in my previous patch :
> > in find_sym func precomptab was always set to sysv_precomp.
> 
> It's still broken; h is being used in the comparisons even if h was
> not initialized, rather than using gh. I'm working on integrating the
> code right now. I'll either commit my version or reply with a patch
> here soon for review.

Here's my proposed patch for gnu hash support. I've left dladdr to be
committed separately. I handled the precomputed hashes by duplicating
the code in the two branches; this is _ugly_ but it's moderately
faster, and I really don't like the performance impact of these checks
to begin with, so I'd rather not make them even worse.

Some other changes I've made since Boris's last version:

- Prefer GNU hash if it's available. It's a lot faster even in single
  runs, and should make even more difference when data-locality issues
  come into play (resolving whole files rather than just a single
  dlsym call).

- Omit bloom filter checks. It's not clear if they're beneficial on
  average in large programs, but for single lookups where the symbol
  is present, they increase lookup time by about 8%.

- Replace the over-complicated decode_vec2 with search_vec, since we
  only need a single extended entry anyway. In any case, the big-O
  performance of high-entry lookups will always be the same as this
  linear search unless we use heavy data structures, so we might as
  well just do it this super-simple way.

Comments welcome. I'll hold off on committing for a while in case I
made any dumb mistakes.

Rich

[-- Attachment #2: gnuhash.diff --]
[-- Type: text/plain, Size: 5091 bytes --]

diff --git a/configure b/configure
index 1e8b974..1105180 100755
--- a/configure
+++ b/configure
@@ -268,7 +268,7 @@ fi
 
 # Some patched GCC builds have these defaults messed up...
 tryflag CFLAGS_AUTO -fno-stack-protector
-tryldflag LDFLAGS_AUTO -Wl,--hash-style=sysv
+tryldflag LDFLAGS_AUTO -Wl,--hash-style=both
 
 # Disable dynamic linking if ld is broken and can't do -Bsymbolic-functions
 LDFLAGS_DUMMY=
diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
index 9692c6b..d7d6800 100644
--- a/src/ldso/dynlink.c
+++ b/src/ldso/dynlink.c
@@ -53,6 +53,7 @@ struct dso {
 	int refcnt;
 	Sym *syms;
 	uint32_t *hashtab;
+	uint32_t *ghashtab;
 	char *strings;
 	unsigned char *map;
 	size_t map_len;
@@ -95,7 +96,15 @@ static void decode_vec(size_t *v, size_t *a, size_t cnt)
 	}
 }
 
-static uint32_t hash(const char *s0)
+static int search_vec(size_t *v, size_t *r, size_t key)
+{
+	for (; v[0]!=key; v+=2)
+		if (!v[0]) return 0;
+	*r = v[1];
+	return 1;
+}
+
+static uint32_t sysv_hash(const char *s0)
 {
 	const unsigned char *s = (void *)s0;
 	uint_fast32_t h = 0;
@@ -106,7 +115,16 @@ static uint32_t hash(const char *s0)
 	return h & 0xfffffff;
 }
 
-static Sym *lookup(const char *s, uint32_t h, struct dso *dso)
+static uint32_t gnu_hash(const char *s0)
+{
+	const unsigned char *s = (void *)s0;
+	uint_fast32_t h = 5381;
+	for (; *s; s++)
+		h = h*33 + *s;
+	return h;
+}
+
+static Sym *sysv_lookup(const char *s, uint32_t h, struct dso *dso)
 {
 	size_t i;
 	Sym *syms = dso->syms;
@@ -119,20 +137,61 @@ static Sym *lookup(const char *s, uint32_t h, struct dso *dso)
 	return 0;
 }
 
+static Sym *gnu_lookup(const char *s, uint32_t h1, struct dso *dso)
+{
+	Sym *sym;
+	char *strings;
+	uint32_t *hashtab = dso->ghashtab;
+	uint32_t nbuckets = hashtab[0];
+	uint32_t *buckets = hashtab + 4 + hashtab[2]*(sizeof(size_t)/4);
+	uint32_t h2;
+	uint32_t *hashval;
+	uint32_t n = buckets[h1 % nbuckets];
+
+	if (!n) return 0;
+
+	strings = dso->strings;
+	sym = dso->syms + n;
+	hashval = buckets + nbuckets + (n - hashtab[1]);
+
+	for (h1 |= 1; ; sym++) {
+		h2 = *hashval++;
+		if ((h1 == (h2|1)) && !strcmp(s, strings + sym->st_name))
+			return sym;
+		if (h2 & 1) break;
+	}
+
+	return 0;
+}
+
 #define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON)
 #define OK_BINDS (1<<STB_GLOBAL | 1<<STB_WEAK)
 
 static void *find_sym(struct dso *dso, const char *s, int need_def)
 {
-	uint32_t h = hash(s);
+	uint32_t h = 0, gh = 0;
 	void *def = 0;
-	if (h==0x6b366be && !strcmp(s, "dlopen")) rtld_used = 1;
-	if (h==0x6b3afd && !strcmp(s, "dlsym")) rtld_used = 1;
-	if (h==0x595a4cc && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
+	if (dso->ghashtab) {
+		gh = gnu_hash(s);
+		if (gh == 0xf9040207 && !strcmp(s, "dlopen")) rtld_used = 1;
+		if (gh == 0xf4dc4ae && !strcmp(s, "dlsym")) rtld_used = 1;
+		if (gh == 0x1f4039c9 && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
+	} else {
+		h = sysv_hash(s);
+		if (h == 0x6b366be && !strcmp(s, "dlopen")) rtld_used = 1;
+		if (h == 0x6b3afd && !strcmp(s, "dlsym")) rtld_used = 1;
+		if (h == 0x595a4cc && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
+	}
 	for (; dso; dso=dso->next) {
 		Sym *sym;
 		if (!dso->global) continue;
-		sym = lookup(s, h, dso);
+		if (dso->ghashtab) {
+			if (!gh) gh = gnu_hash(s);
+			sym = gnu_lookup(s, gh, dso);
+		} else {
+			if (!h) h = sysv_hash(s);
+			sym = sysv_lookup(s, h, dso);
+		}
 		if (sym && (!need_def || sym->st_shndx) && sym->st_value
 		 && (1<<(sym->st_info&0xf) & OK_TYPES)
 		 && (1<<(sym->st_info>>4) & OK_BINDS)) {
@@ -325,8 +384,11 @@ static void decode_dyn(struct dso *p)
 	size_t dyn[DYN_CNT] = {0};
 	decode_vec(p->dynv, dyn, DYN_CNT);
 	p->syms = (void *)(p->base + dyn[DT_SYMTAB]);
-	p->hashtab = (void *)(p->base + dyn[DT_HASH]);
 	p->strings = (void *)(p->base + dyn[DT_STRTAB]);
+	if (dyn[0]&(1<<DT_HASH))
+		p->hashtab = (void *)(p->base + dyn[DT_HASH]);
+	if (search_vec(p->dynv, dyn, DT_GNU_HASH))
+		p->ghashtab = (void *)(p->base + *dyn);
 }
 
 static struct dso *load_library(const char *name)
@@ -788,7 +850,7 @@ end:
 static void *do_dlsym(struct dso *p, const char *s, void *ra)
 {
 	size_t i;
-	uint32_t h;
+	uint32_t h = 0, gh = 0;
 	Sym *sym;
 	if (p == RTLD_NEXT) {
 		for (p=head; p && (unsigned char *)ra-p->map>p->map_len; p=p->next);
@@ -802,12 +864,23 @@ static void *do_dlsym(struct dso *p, const char *s, void *ra)
 		if (!res) goto failed;
 		return res;
 	}
-	h = hash(s);
-	sym = lookup(s, h, p);
+	if (p->ghashtab) {
+		gh = gnu_hash(s);
+		sym = gnu_lookup(s, gh, p);
+	} else {
+		h = sysv_hash(s);
+		sym = sysv_lookup(s, h, p);
+	}
 	if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
 		return p->base + sym->st_value;
 	if (p->deps) for (i=0; p->deps[i]; i++) {
-		sym = lookup(s, h, p->deps[i]);
+		if (p->deps[i]->ghashtab) {
+			if (!gh) gh = gnu_hash(s);
+			sym = gnu_lookup(s, h, p->deps[i]);
+		} else {
+			if (!h) h = sysv_hash(s);
+			sym = sysv_lookup(s, h, p->deps[i]);
+		}
 		if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
 			return p->deps[i]->base + sym->st_value;
 	}

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-23 22:21                             ` Rich Felker
@ 2012-08-24  7:29                               ` musl
  2012-08-24 18:38                                 ` Rich Felker
  2012-08-24  8:12                               ` Szabolcs Nagy
  2012-08-25 21:34                               ` musl
  2 siblings, 1 reply; 33+ messages in thread
From: musl @ 2012-08-24  7:29 UTC (permalink / raw)
  To: musl

On 24/08/2012 00:21, Rich Felker wrote:
> On Thu, Aug 23, 2012 at 05:39:37PM -0400, Rich Felker wrote:
>> On Mon, Aug 20, 2012 at 04:32:00PM +0200, musl wrote:
>>> I missed a bug in my previous patch :
>>> in find_sym func precomptab was always set to sysv_precomp.
>> It's still broken; h is being used in the comparisons even if h was
>> not initialized, rather than using gh. I'm working on integrating the
>> code right now. I'll either commit my version or reply with a patch
>> here soon for review.
> Here's my proposed patch for gnu hash support. I've left dladdr to be
> committed separately. I handled the precomputed hashes by duplicating
> the code in the two branches; this is _ugly_ but it's moderately
> faster, and I really don't like the performance impact of these checks
> to begin with, so I'd rather not make them even worse.
>
> Some other changes I've made since Boris's last version:
>
> - Prefer GNU hash if it's available. It's a lot faster even in single
>   runs, and should make even more difference when data-locality issues
>   come into play (resolving whole files rather than just a single
>   dlsym call).
>
> - Omit bloom filter checks. It's not clear if they're beneficial on
>   average in large programs, but for single lookups where the symbol
>   is present, they increase lookup time by about 8%.
>
> - Replace the over-complicated decode_vec2 with search_vec, since we
>   only need a single extended entry anyway. In any case, the big-O
>   performance of high-entry lookups will always be the same as this
>   linear search unless we use heavy data structures, so we might as
>   well just do it this super-simple way.
>
> Comments welcome. I'll hold off on committing for a while in case I
> made any dumb mistakes.
I tested it and it works well.
My tests are based on small libs (with a small set of shared symbols).
I mixed libs with gnu hash and sysv hash.
Tried to resolve symbols via dlsym.

Have you tested it on big libraries ?
Do you want me to do some specific tests ?
> Rich



^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-23 22:21                             ` Rich Felker
  2012-08-24  7:29                               ` musl
@ 2012-08-24  8:12                               ` Szabolcs Nagy
  2012-08-24  8:56                                 ` musl
  2012-08-25 21:34                               ` musl
  2 siblings, 1 reply; 33+ messages in thread
From: Szabolcs Nagy @ 2012-08-24  8:12 UTC (permalink / raw)
  To: musl

* Rich Felker <dalias@aerifal.cx> [2012-08-23 18:21:13 -0400]:
> +static Sym *gnu_lookup(const char *s, uint32_t h1, struct dso *dso)
> +{
> +	Sym *sym;
> +	char *strings;
> +	uint32_t *hashtab = dso->ghashtab;
> +	uint32_t nbuckets = hashtab[0];
> +	uint32_t *buckets = hashtab + 4 + hashtab[2]*(sizeof(size_t)/4);
> +	uint32_t h2;
> +	uint32_t *hashval;
> +	uint32_t n = buckets[h1 % nbuckets];
> +
> +	if (!n) return 0;
> +
> +	strings = dso->strings;
> +	sym = dso->syms + n;
> +	hashval = buckets + nbuckets + (n - hashtab[1]);
> +
> +	for (h1 |= 1; ; sym++) {
> +		h2 = *hashval++;
> +		if ((h1 == (h2|1)) && !strcmp(s, strings + sym->st_name))
> +			return sym;
> +		if (h2 & 1) break;
> +	}

heh, is this really the gnuhash lookup logic?
they drop a valuable low bit with h1 |= 1

high bits are less important for short
strings because of the *33 logic
(last 3-4 chars have no effect on the msb)

not that it matters much of course..
but seems silly design to me especially
when nbuckets is a power-of-2, then
highbits are not used for much and
could be used as flags for whatever



^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-24  8:12                               ` Szabolcs Nagy
@ 2012-08-24  8:56                                 ` musl
  2012-08-24  9:38                                   ` Szabolcs Nagy
  0 siblings, 1 reply; 33+ messages in thread
From: musl @ 2012-08-24  8:56 UTC (permalink / raw)
  To: musl

On 24/08/2012 10:12, Szabolcs Nagy wrote:
> * Rich Felker <dalias@aerifal.cx> [2012-08-23 18:21:13 -0400]:
>> +static Sym *gnu_lookup(const char *s, uint32_t h1, struct dso *dso)
>> +{
>> +	Sym *sym;
>> +	char *strings;
>> +	uint32_t *hashtab = dso->ghashtab;
>> +	uint32_t nbuckets = hashtab[0];
>> +	uint32_t *buckets = hashtab + 4 + hashtab[2]*(sizeof(size_t)/4);
>> +	uint32_t h2;
>> +	uint32_t *hashval;
>> +	uint32_t n = buckets[h1 % nbuckets];
>> +
>> +	if (!n) return 0;
>> +
>> +	strings = dso->strings;
>> +	sym = dso->syms + n;
>> +	hashval = buckets + nbuckets + (n - hashtab[1]);
>> +
>> +	for (h1 |= 1; ; sym++) {
>> +		h2 = *hashval++;
>> +		if ((h1 == (h2|1)) && !strcmp(s, strings + sym->st_name))
>> +			return sym;
>> +		if (h2 & 1) break;
>> +	}
> heh, is this really the gnuhash lookup logic?
> they drop a valuable low bit with h1 |= 1
When h1 |= 1 is done the bucket index and first hashval have already been stored in n and hashval.
Then h1 is only used in the (h1 == (h2 | 1)) comparison.

> high bits are less important for short
> strings because of the *33 logic
> (last 3-4 chars have no effect on the msb)
>
> not that it matters much of course..
> but seems silly design to me especially
> when nbuckets is a power-of-2, then
> highbits are not used for much and
> could be used as flags for whatever
>



^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-24  8:56                                 ` musl
@ 2012-08-24  9:38                                   ` Szabolcs Nagy
  0 siblings, 0 replies; 33+ messages in thread
From: Szabolcs Nagy @ 2012-08-24  9:38 UTC (permalink / raw)
  To: musl

* musl <b.brezillon.musl@gmail.com> [2012-08-24 10:56:39 +0200]:
> When h1 |= 1 is done the bucket index and first hashval have already been stored in n and hashval.
> Then h1 is only used in the (h1 == (h2 | 1)) comparison.
> 

meanwhile i realized that if a%n == b%n and a/2 == b/2 then a == b

so there is no loss of information, sorry for the noise


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-24  7:29                               ` musl
@ 2012-08-24 18:38                                 ` Rich Felker
  2012-08-25  7:42                                   ` boris brezillon
                                                     ` (2 more replies)
  0 siblings, 3 replies; 33+ messages in thread
From: Rich Felker @ 2012-08-24 18:38 UTC (permalink / raw)
  To: musl

On Fri, Aug 24, 2012 at 09:29:29AM +0200, musl wrote:
> I tested it and it works well.

Is there anything I changed that you think might be better done a
different way?

> My tests are based on small libs (with a small set of shared symbols).
> I mixed libs with gnu hash and sysv hash.
> Tried to resolve symbols via dlsym.
> 
> Have you tested it on big libraries ?

No, just very minimal testing.

> Do you want me to do some specific tests ?

Actually, the main thing I'm interested in is whether the bloom filter
is ever beneficial. I took it out trying to streamline the code and
shaved about 8% off the lookup time for symbols in the main program,
but I didn't investigate how the change affects symbols not found in
the first file searched. Would you be interested in running some tests
to determine if it might be useful to try adding it back?

Since it seems to be working/non-broken right now, I'll probably go
ahead and commit soon unless you find a major problem I've overlooked.
Then we can work on improving it once it's in the repo.

Rich


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-24 18:38                                 ` Rich Felker
@ 2012-08-25  7:42                                   ` boris brezillon
  2012-08-25 12:35                                     ` Rich Felker
  2012-08-25 22:13                                   ` musl
  2012-08-26  0:00                                   ` musl
  2 siblings, 1 reply; 33+ messages in thread
From: boris brezillon @ 2012-08-25  7:42 UTC (permalink / raw)
  To: musl

2012/8/24 Rich Felker <dalias@aerifal.cx>:
> On Fri, Aug 24, 2012 at 09:29:29AM +0200, musl wrote:
>> I tested it and it works well.
>
> Is there anything I changed that you think might be better done a
> different way?

No, but I'm not an expert in size/speed code optimization.

>
>> My tests are based on small libs (with a small set of shared symbols).
>> I mixed libs with gnu hash and sysv hash.
>> Tried to resolve symbols via dlsym.
>>
>> Have you tested it on big libraries ?
>
> No, just very minimal testing.
>
>> Do you want me to do some specific tests ?
>
> Actually, the main thing I'm interested in is whether the bloom filter
> is ever beneficial. I took it out trying to streamline the code and
> shaved about 8% off the lookup time for symbols in the main program,
> but I didn't investigate how the change affects symbols not found in
> the first file searched. Would you be interested in running some tests
> to determine if it might be useful to try adding it back?
I'll do some tests with multiple levels of big libraries : prog ->
libtest -> libc -> libb -> liba ...
How do you get your perf results (specific tools, time measurement
inside libc code, time measurement in main program, ...)?
>
> Since it seems to be working/non-broken right now, I'll probably go
> ahead and commit soon unless you find a major problem I've overlooked.
> Then we can work on improving it once it's in the repo.
I agree.
>
> Rich


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-25  7:42                                   ` boris brezillon
@ 2012-08-25 12:35                                     ` Rich Felker
  0 siblings, 0 replies; 33+ messages in thread
From: Rich Felker @ 2012-08-25 12:35 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 1063 bytes --]

On Sat, Aug 25, 2012 at 09:42:38AM +0200, boris brezillon wrote:
> >> Do you want me to do some specific tests ?
> >
> > Actually, the main thing I'm interested in is whether the bloom filter
> > is ever beneficial. I took it out trying to streamline the code and
> > shaved about 8% off the lookup time for symbols in the main program,
> > but I didn't investigate how the change affects symbols not found in
> > the first file searched. Would you be interested in running some tests
> > to determine if it might be useful to try adding it back?
> I'll do some tests with multiple levels of big libraries : prog ->
> libtest -> libc -> libb -> liba ...
> How do you get your perf results (specific tools, time measurement
> inside libc code, time measurement in main program, ...)?

I forgot to attach my test code. It's x86-specific (uses rdtsc because
that's the most accurate way) but hopefully that's not a problem.

Note: if searching for symbols in the main program like it's doing as
I have it configured now, you'll need to compile with -rdynamic.

Rich

[-- Attachment #2: gnuhash.c --]
[-- Type: text/plain, Size: 356 bytes --]

#include <dlfcn.h>
#include <stdio.h>

static inline unsigned rdtsc()
{
	unsigned x, dummy;
	__asm__ __volatile__ ( "rdtsc" : "=a"(x), "=d"(dummy) );
	return x;
}

int main()
{
	unsigned i, t0, t, tmin=-1;
	void *p;
	for (i=0; i<16; i++) {
		t0 = rdtsc();
		p = dlsym(0, "main");
		t = rdtsc()-t0;
		if (t<tmin) tmin = t;
	}
	printf("%u %p\n", tmin, p);
}

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-23 22:21                             ` Rich Felker
  2012-08-24  7:29                               ` musl
  2012-08-24  8:12                               ` Szabolcs Nagy
@ 2012-08-25 21:34                               ` musl
  2012-08-25 21:42                                 ` Rich Felker
  2 siblings, 1 reply; 33+ messages in thread
From: musl @ 2012-08-25 21:34 UTC (permalink / raw)
  To: musl

On 24/08/2012 00:21, Rich Felker wrote:
> On Thu, Aug 23, 2012 at 05:39:37PM -0400, Rich Felker wrote:
>> On Mon, Aug 20, 2012 at 04:32:00PM +0200, musl wrote:
>>> I missed a bug in my previous patch :
>>> in find_sym func precomptab was always set to sysv_precomp.
>> It's still broken; h is being used in the comparisons even if h was
>> not initialized, rather than using gh. I'm working on integrating the
>> code right now. I'll either commit my version or reply with a patch
>> here soon for review.
> Here's my proposed patch for gnu hash support. I've left dladdr to be
> committed separately. I handled the precomputed hashes by duplicating
> the code in the two branches; this is _ugly_ but it's moderately
> faster, and I really don't like the performance impact of these checks
> to begin with, so I'd rather not make them even worse.
>
> Some other changes I've made since Boris's last version:
>
> - Prefer GNU hash if it's available. It's a lot faster even in single
>   runs, and should make even more difference when data-locality issues
>   come into play (resolving whole files rather than just a single
>   dlsym call).
>
> - Omit bloom filter checks. It's not clear if they're beneficial on
>   average in large programs, but for single lookups where the symbol
>   is present, they increase lookup time by about 8%.
>
> - Replace the over-complicated decode_vec2 with search_vec, since we
>   only need a single extended entry anyway. In any case, the big-O
>   performance of high-entry lookups will always be the same as this
>   linear search unless we use heavy data structures, so we might as
>   well just do it this super-simple way.
>
> Comments welcome. I'll hold off on committing for a while in case I
> made any dumb mistakes.
I found a bug in gnu_lookup of dependencies :

+        if (p->deps[i]->ghashtab) {
+            if (!gh) gh = gnu_hash(s);
+            sym = gnu_lookup(s, h, p->deps[i]);
+        } else {
+            if (!h) h = sysv_hash(s);
+            sym = sysv_lookup(s, h, p->deps[i]);
+        }

you pass 'h' instead of 'gh' to gnu_lookup func
>
> Rich



^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-25 21:34                               ` musl
@ 2012-08-25 21:42                                 ` Rich Felker
  0 siblings, 0 replies; 33+ messages in thread
From: Rich Felker @ 2012-08-25 21:42 UTC (permalink / raw)
  To: musl

On Sat, Aug 25, 2012 at 11:34:13PM +0200, musl wrote:
> I found a bug in gnu_lookup of dependencies :
> 
> +        if (p->deps[i]->ghashtab) {
> +            if (!gh) gh = gnu_hash(s);
> +            sym = gnu_lookup(s, h, p->deps[i]);
> +        } else {
> +            if (!h) h = sysv_hash(s);
> +            sym = sysv_lookup(s, h, p->deps[i]);
> +        }
> 
> you pass 'h' instead of 'gh' to gnu_lookup func

Thanks! Fixed.

Rich


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-24 18:38                                 ` Rich Felker
  2012-08-25  7:42                                   ` boris brezillon
@ 2012-08-25 22:13                                   ` musl
  2012-08-25 22:37                                     ` musl
  2012-08-26  0:00                                   ` musl
  2 siblings, 1 reply; 33+ messages in thread
From: musl @ 2012-08-25 22:13 UTC (permalink / raw)
  To: musl

On 24/08/2012 20:38, Rich Felker wrote:
> On Fri, Aug 24, 2012 at 09:29:29AM +0200, musl wrote:
>> I tested it and it works well.
> Is there anything I changed that you think might be better done a
> different way?
>
>> My tests are based on small libs (with a small set of shared symbols).
>> I mixed libs with gnu hash and sysv hash.
>> Tried to resolve symbols via dlsym.
>>
>> Have you tested it on big libraries ?
> No, just very minimal testing.
>
>> Do you want me to do some specific tests ?
> Actually, the main thing I'm interested in is whether the bloom filter
> is ever beneficial. I took it out trying to streamline the code and
> shaved about 8% off the lookup time for symbols in the main program,
> but I didn't investigate how the change affects symbols not found in
> the first file searched. Would you be interested in running some tests
> to determine if it might be useful to try adding it back?
>
> Since it seems to be working/non-broken right now, I'll probably go
> ahead and commit soon unless you find a major problem I've overlooked.
> Then we can work on improving it once it's in the repo.
I executed your test program (gnuhash) with and without bloom filter test, and I get pretty much the same results in
both cases if the symbol is defined.
What compiler option did you use to compile gnuhash.c ?

I also tried to search for a missing symbol and the version with bloom filter is 3% faster.

I'll do more tests with bigger libs and different linker optimizations (some linker optims change the number of buckets
in the hash table => less entries per hash chains => faster search in case there's no valid entry for a given name).

 

 
>
> Rich



^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-25 22:13                                   ` musl
@ 2012-08-25 22:37                                     ` musl
  0 siblings, 0 replies; 33+ messages in thread
From: musl @ 2012-08-25 22:37 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 1806 bytes --]

On 26/08/2012 00:13, musl wrote:
> On 24/08/2012 20:38, Rich Felker wrote:
>> On Fri, Aug 24, 2012 at 09:29:29AM +0200, musl wrote:
>>> I tested it and it works well.
>> Is there anything I changed that you think might be better done a
>> different way?
>>
>>> My tests are based on small libs (with a small set of shared symbols).
>>> I mixed libs with gnu hash and sysv hash.
>>> Tried to resolve symbols via dlsym.
>>>
>>> Have you tested it on big libraries ?
>> No, just very minimal testing.
>>
>>> Do you want me to do some specific tests ?
>> Actually, the main thing I'm interested in is whether the bloom filter
>> is ever beneficial. I took it out trying to streamline the code and
>> shaved about 8% off the lookup time for symbols in the main program,
>> but I didn't investigate how the change affects symbols not found in
>> the first file searched. Would you be interested in running some tests
>> to determine if it might be useful to try adding it back?
>>
>> Since it seems to be working/non-broken right now, I'll probably go
>> ahead and commit soon unless you find a major problem I've overlooked.
>> Then we can work on improving it once it's in the repo.
> I executed your test program (gnuhash) with and without bloom filter test, and I get pretty much the same results in
> both cases if the symbol is defined.
> What compiler option did you use to compile gnuhash.c ?
>
> I also tried to search for a missing symbol and the version with bloom filter is 3% faster.
>
> I'll do more tests with bigger libs and different linker optimizations (some linker optims change the number of buckets
> in the hash table => less entries per hash chains => faster search in case there's no valid entry for a given name).
>
>  
Here is the patch I used to add bloom filter test.
>
>  
>> Rich


[-- Attachment #2: gnuhash-bloom-filter.diff --]
[-- Type: text/x-patch, Size: 958 bytes --]

diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
index c733dc5..a4b150c 100644
--- a/src/ldso/dynlink.c
+++ b/src/ldso/dynlink.c
@@ -138,6 +138,7 @@ static Sym *sysv_lookup(const char *s, uint32_t h, struct dso *dso)
 	return 0;
 }
 
+#define BWSZ (sizeof(size_t)*8)
 static Sym *gnu_lookup(const char *s, uint32_t h1, struct dso *dso)
 {
 	Sym *sym;
@@ -145,10 +146,15 @@ static Sym *gnu_lookup(const char *s, uint32_t h1, struct dso *dso)
 	uint32_t *hashtab = dso->ghashtab;
 	uint32_t nbuckets = hashtab[0];
 	uint32_t *buckets = hashtab + 4 + hashtab[2]*(sizeof(size_t)/4);
-	uint32_t h2;
+	size_t *maskwords = (size_t *)(hashtab + 4);
+	uint32_t h2 = h1 >> hashtab[3];
 	uint32_t *hashval;
-	uint32_t n = buckets[h1 % nbuckets];
+	uint32_t n = (h1/BWSZ) & (hashtab[2]-1);
+	size_t bm = (1 << (h1%BWSZ)) | (1 << (h2%BWSZ));
 
+	if ((maskwords[n] & bm) != bm) return 0;
+
+	n = buckets[h1 % nbuckets];
 	if (!n) return 0;
 
 	strings = dso->strings;

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: ldso : dladdr support
  2012-08-24 18:38                                 ` Rich Felker
  2012-08-25  7:42                                   ` boris brezillon
  2012-08-25 22:13                                   ` musl
@ 2012-08-26  0:00                                   ` musl
  2 siblings, 0 replies; 33+ messages in thread
From: musl @ 2012-08-26  0:00 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 2057 bytes --]

On 24/08/2012 20:38, Rich Felker wrote:
> On Fri, Aug 24, 2012 at 09:29:29AM +0200, musl wrote:
>> I tested it and it works well.
> Is there anything I changed that you think might be better done a
> different way?
>
>> My tests are based on small libs (with a small set of shared symbols).
>> I mixed libs with gnu hash and sysv hash.
>> Tried to resolve symbols via dlsym.
>>
>> Have you tested it on big libraries ?
> No, just very minimal testing.
>
>> Do you want me to do some specific tests ?
> Actually, the main thing I'm interested in is whether the bloom filter
> is ever beneficial. I took it out trying to streamline the code and
> shaved about 8% off the lookup time for symbols in the main program,
> but I didn't investigate how the change affects symbols not found in
> the first file searched. Would you be interested in running some tests
> to determine if it might be useful to try adding it back?
I tried the perf application ( https://perf.wiki.kernel.org/index.php/Tutorial
<https://perf.wiki.kernel.org/index.php/Tutorial#Counting_with_perf_stat>)
with this command : 'perf stat -e instructions:u ./gnuhash main' (gives number of instructions executed in user space by
gnuhash program).

Here are the results :

With bloom filter :

577 0x80483c4

 Performance counter stats for './gnuhash main':

            31 895 instructions:u            #    0,00  insns per cycle       

       0,000792737 seconds time elapsed


Without bloom filter :

588 0x80483c4

 Performance counter stats for './gnuhash main':

            31 195 instructions:u            #    0,00  insns per cycle       

       0,000802368 seconds time elapsed

As you can see there are less instructions executed in the version without bloom filter but it's slower.
I guess this difference come from the symbol resolution during the relocation process.
> Since it seems to be working/non-broken right now, I'll probably go
> ahead and commit soon unless you find a major problem I've overlooked.
> Then we can work on improving it once it's in the repo.
>
> Rich


[-- Attachment #2: Type: text/html, Size: 3627 bytes --]

^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2012-08-26  0:00 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-08-07  9:04 ldso : dladdr support musl
2012-08-07 11:46 ` Szabolcs Nagy
2012-08-07 14:15   ` musl
2012-08-07 14:53     ` Szabolcs Nagy
2012-08-07 23:09     ` Rich Felker
2012-08-08  9:55       ` musl
2012-08-08 11:52         ` Szabolcs Nagy
2012-08-08 12:54           ` Rich Felker
2012-08-08 13:57           ` musl
2012-08-11 23:05             ` Rich Felker
2012-08-15 22:41               ` boris brezillon
2012-08-17  5:39                 ` Rich Felker
2012-08-19 16:42                   ` musl
2012-08-20  2:06                     ` Rich Felker
2012-08-20 12:55                       ` musl
2012-08-20 14:32                         ` musl
2012-08-23 21:39                           ` Rich Felker
2012-08-23 22:21                             ` Rich Felker
2012-08-24  7:29                               ` musl
2012-08-24 18:38                                 ` Rich Felker
2012-08-25  7:42                                   ` boris brezillon
2012-08-25 12:35                                     ` Rich Felker
2012-08-25 22:13                                   ` musl
2012-08-25 22:37                                     ` musl
2012-08-26  0:00                                   ` musl
2012-08-24  8:12                               ` Szabolcs Nagy
2012-08-24  8:56                                 ` musl
2012-08-24  9:38                                   ` Szabolcs Nagy
2012-08-25 21:34                               ` musl
2012-08-25 21:42                                 ` Rich Felker
2012-08-16 18:03               ` musl
2012-08-17 16:35               ` musl
2012-08-08 12:49         ` Rich Felker

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).