From mboxrd@z Thu Jan 1 00:00:00 1970 X-Msuck: nntp://news.gmane.org/gmane.linux.lib.musl.general/7996 Path: news.gmane.org!not-for-mail From: Alexander Monakov Newsgroups: gmane.linux.lib.musl.general Subject: [PATCH 2/5] dynlink.c: compute modulus via magic multiplication Date: Wed, 24 Jun 2015 02:24:52 +0300 Message-ID: <1435101895-18240-3-git-send-email-amonakov@ispras.ru> References: <1435101895-18240-1-git-send-email-amonakov@ispras.ru> Reply-To: musl@lists.openwall.com NNTP-Posting-Host: plane.gmane.org X-Trace: ger.gmane.org 1435107354 14211 80.91.229.3 (24 Jun 2015 00:55:54 GMT) X-Complaints-To: usenet@ger.gmane.org NNTP-Posting-Date: Wed, 24 Jun 2015 00:55:54 +0000 (UTC) Cc: Alexander Monakov To: musl@lists.openwall.com Original-X-From: musl-return-8009-gllmg-musl=m.gmane.org@lists.openwall.com Wed Jun 24 02:55:45 2015 Return-path: Envelope-to: gllmg-musl@m.gmane.org Original-Received: from mother.openwall.net ([195.42.179.200]) by plane.gmane.org with smtp (Exim 4.69) (envelope-from ) id 1Z7Yye-00049b-7X for gllmg-musl@m.gmane.org; Wed, 24 Jun 2015 02:55:44 +0200 Original-Received: (qmail 23833 invoked by uid 550); 24 Jun 2015 00:55:42 -0000 Mailing-List: contact musl-help@lists.openwall.com; run by ezmlm Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: Original-Received: (qmail 3901 invoked from network); 23 Jun 2015 23:25:37 -0000 X-Mailer: git-send-email 2.1.4 In-Reply-To: <1435101895-18240-1-git-send-email-amonakov@ispras.ru> Xref: news.gmane.org gmane.linux.lib.musl.general:7996 Archived-At: Based on http://ridiculousfish.com/blog/posts/labor-of-division-episode-iii.html Do a little hand-holding for the compiler and fold magic post-shift into 32-bit high word -> low word shift on 64-bit platforms. --- src/ldso/dynlink.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c index fa91b39..99dadd4 100644 --- a/src/ldso/dynlink.c +++ b/src/ldso/dynlink.c @@ -41,6 +41,11 @@ struct td_index { struct td_index *next; }; +struct udiv { + uint32_t mul; + int s1, s2, inc; +}; + struct dso { unsigned char *base; char *name; @@ -55,6 +60,7 @@ struct dso { uint32_t *hashtab; uint32_t *ghashtab; uint32_t ghashmask; + struct udiv gudiv; int16_t *versym; char *strings; unsigned char *map; @@ -141,6 +147,60 @@ static int search_vec(size_t *v, size_t *r, size_t key) return 1; } +static void precompute_udiv(uint32_t div, struct udiv *p) +{ + if (!(div&(div-1))) + return; + int bits = 0; +again: + p->s1 = bits; + uint32_t tmp = 1u<<31, quo = tmp/div, rem = tmp%div; + + int log=0; + tmp=div; do log++; while (tmp>>=1); + + int exp, rdshf; + uint32_t pow, rdmul=0; + for (exp=0, pow=1u<= div - rem; + quo *= 2; rem *= 2; + if (ovf) quo++, rem -= div; + + if (exp >= log-bits || div-rem <= pow) + break; + + if (!rdmul && rem <= pow) + rdmul = quo, rdshf = exp; + } + if (exp < log) { + p->mul = quo+1; + p->s2 = exp; + p->inc = 0; + } else if (div & 1) { + p->mul = rdmul; + p->s2 = rdshf; + p->inc = 1; + } else { + do bits++; while (!((div >>= 1) & 1)); + goto again; + } +} + +static uint32_t umod(uint32_t x, uint32_t div, struct udiv *p) +{ + if (!(div&(div-1))) + return x&(div-1); + uint32_t v = x; + if (p->s1) v >>= p->s1; + else if (v != ~0u) v += p->inc; + int s32=32, s2=p->s2; + if (sizeof(long) == 8) + s32+=s2, s2=0; + v = (1ull * v * p->mul) >> s32; + v >>= s2; + return x-v*div; +} + static uint32_t sysv_hash(const char *s0) { const unsigned char *s = (void *)s0; @@ -184,7 +244,7 @@ static Sym *gnu_lookup(const char *s, uint32_t h1, struct dso *dso) uint32_t *buckets = hashtab + 4 + hashtab[2]*(sizeof(size_t)/4); uint32_t h2; uint32_t *hashval; - uint32_t i = buckets[h1 % nbuckets]; + uint32_t i = buckets[umod(h1, nbuckets, &dso->gudiv)]; if (!i) return 0; @@ -705,6 +765,7 @@ static void decode_dyn(struct dso *p) if (search_vec(p->dynv, dyn, DT_GNU_HASH)) { p->ghashtab = (void *)(p->base + *dyn); p->ghashmask = p->ghashtab[2]-1; + precompute_udiv(p->ghashtab[0], &p->gudiv); } if (search_vec(p->dynv, dyn, DT_VERSYM)) p->versym = (void *)(p->base + *dyn);