From: Joakim Sindholt <opensource@zhasha.com>
To: musl@lists.openwall.com
Subject: IDNA support in name lookups
Date: Wed, 29 Mar 2017 13:26:29 +0200 [thread overview]
Message-ID: <20170329112629.GA3506324@wirbelwind> (raw)
[-- Attachment #1: Type: text/plain, Size: 167 bytes --]
Here's a first draft patch for internationalized domain name support.
I implemented it based on the pseudocode in RFC3492[1].
[1] https://tools.ietf.org/html/rfc3492
[-- Attachment #2: 0001-add-IDNA-support-to-name-lookups.patch --]
[-- Type: text/x-diff, Size: 6835 bytes --]
From 7542dfe05b33b200360f982caf1631615cde30fb Mon Sep 17 00:00:00 2001
From: Joakim Sindholt <opensource@zhasha.com>
Date: Wed, 29 Mar 2017 11:51:02 +0200
Subject: [PATCH] add IDNA support to name lookups
---
src/network/lookup_name.c | 202 ++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 194 insertions(+), 8 deletions(-)
diff --git a/src/network/lookup_name.c b/src/network/lookup_name.c
index fb7303a..3590cb1 100644
--- a/src/network/lookup_name.c
+++ b/src/network/lookup_name.c
@@ -10,9 +10,21 @@
#include <unistd.h>
#include <pthread.h>
#include <errno.h>
+#include <wchar.h>
#include "lookup.h"
#include "stdio_impl.h"
#include "syscall.h"
+#include "locale_impl.h"
+
+enum {
+ base = 36,
+ tmin = 1,
+ tmax = 26,
+ skew = 38,
+ damp = 700,
+ initial_bias = 72,
+ initial_n = 128,
+};
static int is_valid_hostname(const char *host)
{
@@ -22,6 +34,162 @@ static int is_valid_hostname(const char *host)
return !*s;
}
+static unsigned int adapt(unsigned int delta, unsigned int numpoints, int firsttime)
+{
+ unsigned int k = 0;
+ delta /= firsttime ? damp : 2;
+ delta += delta / numpoints;
+ while (delta > ((base - tmin) * tmax) / 2) {
+ delta /= base - tmin;
+ k += base;
+ }
+ return k + ((base - tmin + 1) * delta) / (delta + skew);
+}
+
+static ssize_t punyenc(char *dst, const char *src, size_t len, size_t max)
+{
+ static const char *const tbl = "abcdefghijklmnopqrstuvwxyz0123456789";
+ const unsigned char *usrc = (void *)src;
+ unsigned int codepoints = 0;
+ unsigned int dlen = 0;
+ unsigned int si, mi;
+ unsigned int n = initial_n;
+ unsigned int delta = 0;
+ unsigned int bias = initial_bias;
+ unsigned int h, b;
+ for (si = 0; si < len; ++si) {
+ if (usrc[si] < 0x80) {
+ if (dlen == max)
+ return -1;
+ dst[dlen++] = src[si];
+ } else if ((usrc[si] & 0xC0) == 0xC0) {
+ ++codepoints;
+ }
+ }
+ codepoints += dlen;
+ h = b = dlen;
+ if (dlen) {
+ if (dlen == max)
+ return -1;
+ dst[dlen++] = '-';
+ }
+ while (h < codepoints) {
+ unsigned int m = (unsigned int)-1;
+ unsigned int c;
+ wchar_t wc;
+ for (mi = 0; mi < len; ) {
+ mi += mbtowc(&wc, src + mi, len - mi);
+ c = (unsigned int)wc;
+ if (c >= n && c < m)
+ m = c;
+ }
+ if (((unsigned int)-1 - delta) / (h + 1) < m - n)
+ return -1;
+ delta += (m - n) * (h + 1);
+ n = m;
+
+ for (mi = 0; mi < len; ) {
+ mi += mbtowc(&wc, src + mi, len - mi);
+ c = (unsigned int)wc;
+ if (c < n /* || c < 0x80 not necessary*/)
+ if (++delta == 0)
+ return -1;
+ if (c == n) {
+ unsigned int q = delta;
+ unsigned int k;
+ for (k = base; ; k += base) {
+ unsigned int t;
+ if (k <= bias + tmin) {
+ t = tmin;
+ } else if (k >= bias + tmax) {
+ t = tmax;
+ } else {
+ t = k - bias;
+ }
+ if (q < t)
+ break;
+ if (dlen == max)
+ return -1;
+ dst[dlen++] = tbl[t + ((q - t) % (base - t))];
+ q = (q - t) / (base - t);
+ }
+ if (dlen == max)
+ return -1;
+ dst[dlen++] = tbl[q];
+ bias = adapt(delta, h + 1, h == b);
+ delta = 0;
+ ++h;
+ }
+ }
+ ++delta;
+ ++n;
+ }
+ return dlen;
+}
+
+static ssize_t idnaenc(char dst[static 256], const char *src)
+{
+ size_t left = strlen(src);
+ size_t olen = 0;
+
+ while (left) {
+ const char *dot;
+ size_t len, i;
+ int basic = 1;
+
+ dot = memchr(src, '.', left);
+ if (!dot) { dot = src + left; }
+ len = dot - src;
+ if (len == 0) { return -1; }
+ left -= len + !!*dot;
+
+ for (i = 0; i < len; ) {
+ unsigned int c;
+ wchar_t wc;
+ int n = mbtowc(&wc, src + i, len - i);
+ c = (n <= 0) ? 0 : (unsigned int)wc;
+ if (c < 0x80) {
+ if (!isalnum(c) && !(i > 0 && c == '-'))
+ return -1;
+ } else {
+ if (c >= 0x7F && c <= 0x9F)
+ return -1;
+ basic = 0;
+ }
+ i += n;
+ }
+ if (basic) {
+ if (len > 63 || len > 254 - olen)
+ return -1;
+ for (i = 0; i < len; ++i)
+ dst[olen + i] = tolower(src[i]);
+ olen += len;
+ } else {
+ ssize_t r;
+ size_t max;
+ if (olen >= 254 - 4)
+ return -1;
+ max = 254 - 4 - olen;
+ if (max > 63 - 4)
+ max = 63 - 4;
+ memcpy(dst + olen, "xn--", 4);
+ r = punyenc(dst + olen + 4, src, len, max);
+ if (r <= 0)
+ return -1;
+ olen += r + 4;
+ }
+ if (olen == 255 || !*dot && olen == 254)
+ return -1;
+ if (*dot)
+ dst[olen++] = *dot;
+ src = dot + !!*dot;
+ }
+ if (olen == 0)
+ return -1;
+ dst[olen] = 0;
+ return olen;
+}
+
static int name_from_null(struct address buf[static 2], const char *name, int family, int flags)
{
int cnt = 0;
@@ -61,12 +229,25 @@ static int name_from_hosts(struct address buf[static MAXADDRS], char canon[stati
return EAI_SYSTEM;
}
while (fgets(line, sizeof line, f) && cnt < MAXADDRS) {
- char *p, *z;
+ char idna[256];
+ ssize_t r;
+ char *p, *z, c;
if ((p=strchr(line, '#'))) *p++='\n', *p=0;
- for(p=line+1; (p=strstr(p, name)) &&
- (!isspace(p[-1]) || !isspace(p[l])); p++);
- if (!p) continue;
+ /* skip ip address and canonicalize names */
+ for (p=line; *p && !isspace(*p); p++);
+ while (*p) {
+ for (; *p && isspace(*p); p++);
+ for (z=p; *z && !isspace(*z); z++);
+ c = *z;
+ *z = 0;
+ r = idnaenc(idna, p);
+ *z = c;
+ if (r == l && memcmp(idna, name, l) == 0)
+ break;
+ p = z;
+ }
+ if (!*p) continue;
/* Isolate IP address to parse */
for (p=line; *p && !isspace(*p); p++);
@@ -86,7 +267,7 @@ static int name_from_hosts(struct address buf[static MAXADDRS], char canon[stati
for (; *p && isspace(*p); p++);
for (z=p; *z && !isspace(*z); z++);
*z = 0;
- if (is_valid_hostname(p)) memcpy(canon, p, z-p+1);
+ if ((r = idnaenc(idna, p)) > 0) memcpy(canon, idna, r);
}
__fclose_ca(f);
return cnt ? cnt : badfam;
@@ -285,15 +466,19 @@ static int addrcmp(const void *_a, const void *_b)
int __lookup_name(struct address buf[static MAXADDRS], char canon[static 256], const char *name, int family, int flags)
{
+ locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
+ char _name[256];
int cnt = 0, i, j;
+ *ploc = UTF8_LOCALE;
*canon = 0;
if (name) {
/* reject empty name and check len so it fits into temp bufs */
- size_t l = strnlen(name, 255);
- if (l-1 >= 254)
+ ssize_t l;
+ if ((l = idnaenc(_name, name)) <= 0)
return EAI_NONAME;
- memcpy(canon, name, l+1);
+ memcpy(canon, _name, l+1);
+ name = _name;
}
/* Procedurally, a request for v6 addresses with the v4-mapped
@@ -311,6 +496,7 @@ int __lookup_name(struct address buf[static MAXADDRS], char canon[static 256], c
cnt = name_from_hosts(buf, canon, name, family);
if (!cnt) cnt = name_from_dns_search(buf, canon, name, family);
}
+ *ploc = loc;
if (cnt<=0) return cnt ? cnt : EAI_NONAME;
/* Filter/transform results for v4-mapped lookup, if requested. */
--
2.10.2
next reply other threads:[~2017-03-29 11:26 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-03-29 11:26 Joakim Sindholt [this message]
2017-04-02 7:30 ` [PATCH v2] " Joakim Sindholt
2017-04-23 1:01 ` Rich Felker
2017-04-23 8:14 ` Joakim Sindholt
2017-04-23 15:07 ` Rich Felker
2017-04-23 16:38 ` Joakim Sindholt
2017-04-23 16:56 ` Rich Felker
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170329112629.GA3506324@wirbelwind \
--to=opensource@zhasha.com \
--cc=musl@lists.openwall.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.vuxu.org/mirror/musl/
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).