From mboxrd@z Thu Jan 1 00:00:00 1970 From: erik quanstrom Date: Mon, 30 Mar 2009 14:55:03 -0400 To: 9fans@9fans.net Message-ID: <0a4210eab763d755c83895780bf505a5@coraid.com> In-Reply-To: <138575260903300830q546ddabv46227554c3630688@mail.gmail.com> References: <138575260903300830q546ddabv46227554c3630688@mail.gmail.com> MIME-Version: 1.0 Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: 8bit Subject: Re: [9fans] UTF and the preprocessor Topicbox-Message-UUID: cda1f6c6-ead4-11e9-9d60-3106f5b1d025 others more qualified may disagree, but in this case i think an enum would be preferred. however, there are some cases where an enum will not do. for example, #define prγ(γ, fmt, ...) if(γ >1.) print(fmt, __VA_ARGS__) i think that this patch will do the trick. i continued the assumption that the the ctype macros are valid for any c, even if !isascii(c). this diff is hard to read, the source is also at http://www.quanstro.net/plan9/macbody /n/dump/2009/0330/sys/src/cmd/cc/lex.c:1029,1035 - lex.c:1029,1035 } else c = GETC(); for(;;) { - if(!isspace(c)) + if(c >= Runeself || !isspace(c)) return c; if(c == '\n') { lineno++; ; diffy -c macbody /n/dump/2009/0330/sys/src/cmd/cc/macbody:18,39 - macbody:18,44 return n; } - Sym* - getsym(void) + static void + nextsym(int c) { - int c; + int c1; char *cp; - c = getnsc(); - if(!isalpha(c) && c != '_') { - unget(c); - return S; - } for(cp = symb;;) { - if(cp <= symb+NSYMB-4) - *cp++ = c; + if(c >= Runeself) { + for(c1=0;;) { + if(cp <= symb+NSYMB-4) + cp[c1++] = c; + if(fullrune(cp, c1)) + break; + c = getc(); + } + cp += c1; + }else + if(cp <= symb+NSYMB-4) + *cp++ = c; c = getc(); - if(isalnum(c) || c == '_') + if(c >= Runeself || isalnum(c) || c == '_') continue; unget(c); break; /n/dump/2009/0330/sys/src/cmd/cc/macbody:41,46 - macbody:46,64 *cp = 0; if(cp > symb+NSYMB-4) yyerror("symbol too large: %s", symb); + } + + Sym* + getsym(void) + { + int c; + + c = getnsc(); + if(c < Runeself && !isalpha(c) && c != '_') { + unget(c); + return S; + } + nextsym(c); return lookup(); } /n/dump/2009/0330/sys/src/cmd/cc/macbody:193,199 - macbody:211,217 macdef(void) { Sym *s, *a; - char *args[NARG], *np, *base; + char *args[NARG], *base; int n, i, c, len, dots; int ischr; /n/dump/2009/0330/sys/src/cmd/cc/macbody:235,249 - macbody:253,261 len = 1; ischr = 0; for(;;) { - if(isalpha(c) || c == '_') { - np = symb; - *np++ = c; + if(c >= Runeself || isalpha(c) || c == '_') { + nextsym(c); c = getc(); - while(isalnum(c) || c == '_') { - *np++ = c; - c = getc(); - } - *np = 0; for(i=0; i= Runeself || !isspace(c)) bol = 0; if(c == '\n') bol = 1;