From: Joakim Sindholt <opensource@zhasha.com>
To: musl@lists.openwall.com
Subject: [PATCH v2] IDNA support in name lookups
Date: Sun, 2 Apr 2017 09:30:26 +0200 [thread overview]
Message-ID: <20170402073026.GA4177284@wirbelwind> (raw)
In-Reply-To: <20170329112629.GA3506324@wirbelwind>
[-- Attachment #1: Type: text/plain, Size: 342 bytes --]
Changes since v1:
* Reject UTF-16 surrogate range runes
* Remove locale override
This is from some discussion on IRC and while I agree that it's more
"correct" in POSIX terms, I'm not particularly happy about having to
explicitly enable UTF-8 support with setlocale.
There might still be bugs and character ranges that need to be rejected.
[-- Attachment #2: 0001-add-IDNA-support-to-name-lookups.patch --]
[-- Type: text/x-diff, Size: 6507 bytes --]
From 54d5caf36cdce4e5008aecfcc2b02580fb52d0cb Mon Sep 17 00:00:00 2001
From: Joakim Sindholt <opensource@zhasha.com>
Date: Wed, 29 Mar 2017 11:51:02 +0200
Subject: [PATCH] add IDNA support to name lookups
---
src/network/lookup_name.c | 202 +++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 193 insertions(+), 9 deletions(-)
diff --git a/src/network/lookup_name.c b/src/network/lookup_name.c
index fb7303a..fd4275c 100644
--- a/src/network/lookup_name.c
+++ b/src/network/lookup_name.c
@@ -10,9 +10,21 @@
#include <unistd.h>
#include <pthread.h>
#include <errno.h>
+#include <wchar.h>
#include "lookup.h"
#include "stdio_impl.h"
#include "syscall.h"
+#include "locale_impl.h"
+
+enum {
+ base = 36,
+ tmin = 1,
+ tmax = 26,
+ skew = 38,
+ damp = 700,
+ initial_bias = 72,
+ initial_n = 128,
+};
static int is_valid_hostname(const char *host)
{
@@ -22,6 +34,163 @@ static int is_valid_hostname(const char *host)
return !*s;
}
+static unsigned int adapt(unsigned int delta, unsigned int numpoints, int firsttime)
+{
+ unsigned int k = 0;
+ delta /= firsttime ? damp : 2;
+ delta += delta / numpoints;
+ while (delta > ((base - tmin) * tmax) / 2) {
+ delta /= base - tmin;
+ k += base;
+ }
+ return k + ((base - tmin + 1) * delta) / (delta + skew);
+}
+
+static ssize_t punyenc(char *dst, const char *src, size_t len, size_t max)
+{
+ static const char *const tbl = "abcdefghijklmnopqrstuvwxyz0123456789";
+ const unsigned char *usrc = (void *)src;
+ unsigned int codepoints = 0;
+ unsigned int dlen = 0;
+ unsigned int si, mi;
+ unsigned int n = initial_n;
+ unsigned int delta = 0;
+ unsigned int bias = initial_bias;
+ unsigned int h, b;
+ for (si = 0; si < len; ++si) {
+ if (usrc[si] < 0x80) {
+ if (dlen == max)
+ return -1;
+ dst[dlen++] = src[si];
+ } else if ((usrc[si] & 0xC0) == 0xC0) {
+ ++codepoints;
+ }
+ }
+ codepoints += dlen;
+ h = b = dlen;
+ if (dlen) {
+ if (dlen == max)
+ return -1;
+ dst[dlen++] = '-';
+ }
+ while (h < codepoints) {
+ unsigned int m = (unsigned int)-1;
+ unsigned int c;
+ wchar_t wc;
+ for (mi = 0; mi < len; ) {
+ mi += mbtowc(&wc, src + mi, len - mi);
+ c = (unsigned int)wc;
+ if (c >= n && c < m)
+ m = c;
+ }
+ if (((unsigned int)-1 - delta) / (h + 1) < m - n)
+ return -1;
+ delta += (m - n) * (h + 1);
+ n = m;
+
+ for (mi = 0; mi < len; ) {
+ mi += mbtowc(&wc, src + mi, len - mi);
+ c = (unsigned int)wc;
+ if (c < n /* || c < 0x80 not necessary*/)
+ if (++delta == 0)
+ return -1;
+ if (c == n) {
+ unsigned int q = delta;
+ unsigned int k;
+ for (k = base; ; k += base) {
+ unsigned int t;
+ if (k <= bias + tmin) {
+ t = tmin;
+ } else if (k >= bias + tmax) {
+ t = tmax;
+ } else {
+ t = k - bias;
+ }
+ if (q < t)
+ break;
+ if (dlen == max)
+ return -1;
+ dst[dlen++] = tbl[t + ((q - t) % (base - t))];
+ q = (q - t) / (base - t);
+ }
+ if (dlen == max)
+ return -1;
+ dst[dlen++] = tbl[q];
+ bias = adapt(delta, h + 1, h == b);
+ delta = 0;
+ ++h;
+ }
+ }
+ ++delta;
+ ++n;
+ }
+ return dlen;
+}
+
+static ssize_t idnaenc(char dst[static 256], const char *src)
+{
+ size_t left = strlen(src);
+ size_t olen = 0;
+
+ while (left) {
+ const char *dot;
+ size_t len, i;
+ int basic = 1;
+
+ dot = memchr(src, '.', left);
+ if (!dot) { dot = src + left; }
+ len = dot - src;
+ if (len == 0) { return -1; }
+ left -= len + !!*dot;
+
+ for (i = 0; i < len; ) {
+ unsigned int c;
+ wchar_t wc;
+ int n = mbtowc(&wc, src + i, len - i);
+ c = (n <= 0) ? 0 : (unsigned int)wc;
+ if (c < 0x80) {
+ if (!isalnum(c) && !(i > 0 && c == '-'))
+ return -1;
+ } else {
+ if ((c >= 0x7F && c <= 0x9F) ||
+ (c >= 0xD800 && c <= 0xDFFF))
+ return -1;
+ basic = 0;
+ }
+ i += n;
+ }
+ if (basic) {
+ if (len > 63 || len > 254 - olen)
+ return -1;
+ for (i = 0; i < len; ++i)
+ dst[olen + i] = tolower(src[i]);
+ olen += len;
+ } else {
+ ssize_t r;
+ size_t max;
+ if (olen >= 254 - 4)
+ return -1;
+ max = 254 - 4 - olen;
+ if (max > 63 - 4)
+ max = 63 - 4;
+ memcpy(dst + olen, "xn--", 4);
+ r = punyenc(dst + olen + 4, src, len, max);
+ if (r <= 0)
+ return -1;
+ olen += r + 4;
+ }
+ if (olen == 255 || (!*dot && olen == 254))
+ return -1;
+ if (*dot)
+ dst[olen++] = *dot;
+ src = dot + !!*dot;
+ }
+ if (olen == 0)
+ return -1;
+ dst[olen] = 0;
+ return olen;
+}
+
static int name_from_null(struct address buf[static 2], const char *name, int family, int flags)
{
int cnt = 0;
@@ -61,12 +230,25 @@ static int name_from_hosts(struct address buf[static MAXADDRS], char canon[stati
return EAI_SYSTEM;
}
while (fgets(line, sizeof line, f) && cnt < MAXADDRS) {
- char *p, *z;
+ char idna[256];
+ ssize_t r;
+ char *p, *z, c;
if ((p=strchr(line, '#'))) *p++='\n', *p=0;
- for(p=line+1; (p=strstr(p, name)) &&
- (!isspace(p[-1]) || !isspace(p[l])); p++);
- if (!p) continue;
+ /* skip ip address and canonicalize names */
+ for (p=line; *p && !isspace(*p); p++);
+ while (*p) {
+ for (; *p && isspace(*p); p++);
+ for (z=p; *z && !isspace(*z); z++);
+ c = *z;
+ *z = 0;
+ r = idnaenc(idna, p);
+ *z = c;
+ if (r == l && memcmp(idna, name, l) == 0)
+ break;
+ p = z;
+ }
+ if (!*p) continue;
/* Isolate IP address to parse */
for (p=line; *p && !isspace(*p); p++);
@@ -86,7 +268,7 @@ static int name_from_hosts(struct address buf[static MAXADDRS], char canon[stati
for (; *p && isspace(*p); p++);
for (z=p; *z && !isspace(*z); z++);
*z = 0;
- if (is_valid_hostname(p)) memcpy(canon, p, z-p+1);
+ if ((r = idnaenc(idna, p)) > 0) memcpy(canon, idna, r);
}
__fclose_ca(f);
return cnt ? cnt : badfam;
@@ -285,15 +467,17 @@ static int addrcmp(const void *_a, const void *_b)
int __lookup_name(struct address buf[static MAXADDRS], char canon[static 256], const char *name, int family, int flags)
{
+ char _name[256];
int cnt = 0, i, j;
*canon = 0;
if (name) {
- /* reject empty name and check len so it fits into temp bufs */
- size_t l = strnlen(name, 255);
- if (l-1 >= 254)
+ /* convert unicode name to RFC3492 punycode */
+ ssize_t l;
+ if ((l = idnaenc(_name, name)) <= 0)
return EAI_NONAME;
- memcpy(canon, name, l+1);
+ memcpy(canon, _name, l+1);
+ name = _name;
}
/* Procedurally, a request for v6 addresses with the v4-mapped
--
2.10.2
next prev parent reply other threads:[~2017-04-02 7:30 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-03-29 11:26 Joakim Sindholt
2017-04-02 7:30 ` Joakim Sindholt [this message]
2017-04-23 1:01 ` [PATCH v2] " Rich Felker
2017-04-23 8:14 ` Joakim Sindholt
2017-04-23 15:07 ` Rich Felker
2017-04-23 16:38 ` Joakim Sindholt
2017-04-23 16:56 ` Rich Felker
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170402073026.GA4177284@wirbelwind \
--to=opensource@zhasha.com \
--cc=musl@lists.openwall.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.vuxu.org/mirror/musl/
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).