From mboxrd@z Thu Jan 1 00:00:00 1970 Message-ID: <002c01c23c07$62ef4200$6501a8c0@KIKE> From: "matt" To: <9fans@cse.psu.edu> MIME-Version: 1.0 Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: 8bit Subject: [9fans] URL Encode/Decode Verb Date: Sun, 4 Aug 2002 23:36:32 +0100 Topicbox-Message-UUID: d8163784-eaca-11e9-9e20-41e7f4b1d025 Hi, I've spent plenty of today sidetracked by this so I hope someone uses it :) It's a verb for printing char* data as a URL encoded string. It also does URL decoding I must admit I've been having a bit of trouble with UTF (I think) as what Windows types into drawterm seems different from the decoded strings. If someone can offer some help there I'd appreciate it. That aside, well behaved input produces well behaved output. Because there is some UTF in the examples I don't trust the copy and paste to work but it's only in the examples. I've put the files on the web too. http://www.proweb.co.uk/~matt/plan9/url_encode.c or http://www.proweb.co.uk/~matt/plan9/url_encode.c.gz I'm quite impressed I'm picking up this c stuff fairly easily. I've got some user level file servers in development as well. Whoever thought of user defined verbs was inspired. Well done to them. Comments suggestions welcome. As always. Is the static int url_trans_flag array the best way to do this btw. ? M #include #include #include int url_encode(Fmt *fmt) { int cnt; uchar c, e; char *str_start; char *str_index; static int url_trans_flag[] = { [32] 0, [33] 0, [34] 0, [35] 0, [36] 0, [37] 0, [38] 0, [39] 0, [40] 0, [41] 0, [42] 0, [43] 0, [44] 0, [45] '-', [46] '.', [47] 0, [48] '0', [49] '1', [50] '2', [51] '3', [52] '4', [53] '5', [54] '6', [55] '7', [56] '8', [57] '9', [58] 0, [59] 0, [60] 0, [61] 0, [62] 0, [63] 0, [64] 0, [65] 'A', [66] 'B', [67] 'C', [68] 'D', [69] 'E', [70] 'F', [71] 'G', [72] 'H', [73] 'I', [74] 'J', [75] 'K', [76] 'L', [77] 'M', [78] 'N', [79] 'O', [80] 'P', [81] 'Q', [82] 'R', [83] 'S', [84] 'T', [85] 'U', [86] 'V', [87] 'W', [88] 'X', [89] 'Y', [90] 'Z', [91] 0, [92] 0, [93] 0, [94] 0, [95] '_', [96] 0, [97] 'a', [98] 'b', [99] 'c', [100] 'd', [101] 'e', [102] 'f', [103] 'g', [104] 'h', [105] 'i', [106] 'j', [107] 'k', [108] 'l', [109] 'm', [110] 'n', [111] 'o', [112] 'p', [113] 'q', [114] 'r', [115] 's', [116] 't', [117] 'u', [118] 'v', [119] 'w', [120] 'x', [121] 'y', [122] 'z', [123] 0, [124] 0, [125] 0, [126] 0, [127] 0 }; str_start = str_index= smprint("%s", va_arg(fmt->args, char *)); if (fmt->flags & FmtSign) url_trans_flag[32] = '+'; else url_trans_flag[32] = 0; cnt = 0; while(c = *(str_index++)) { if (c > 127 || c < 32) { cnt += fmtprint(fmt, "%%%02x", c); continue; } e = url_trans_flag[c]; if(e) cnt += fmtprint(fmt, "%c", e); else cnt += fmtprint(fmt, "%%%02x", c); } free(str_start); return cnt; } int url_decode(Fmt *fmt) { char *str_arg; char *str_start; char * token; int token_index; int str_length; int ante_token_length; int c, cnt, t; Rune r; str_start = str_arg = smprint("%s", va_arg(fmt->args, char *)); for (token = str_arg ; *token; token++) if (*(token) == '+') *token = ' '; cnt = 0; while(token = strchr(str_arg, '%')) { str_length = strlen(str_arg); token_index = str_length - strlen(token); ante_token_length = str_length - token_index - 1; if (token_index) { str_arg[token_index] = 0; cnt += fmtprint(fmt, "%s", str_arg); } if (ante_token_length < 1) { str_arg = token +1; continue; } if(token[1] == '%') { cnt += fmtprint(fmt, "%%"); str_arg = &token[2]; continue; } if ((ante_token_length > 1) && isxdigit(token[1]) && isxdigit(token[2]) ) { t = token[3]; token[3] = 0; r = strtol(&token[1], nil, 16); cnt += fmtprint(fmt, "%C",r); token[3] = t; str_arg = &token[3]; continue; } // if we get here the string is malformed, I'll silently drop it str_arg++; } if(*str_arg) cnt += fmtprint(fmt, "%s", str_arg); free(str_start); return cnt; } int url_encoding(Fmt *fmt) { if(fmt->flags & FmtLeft) return url_decode(fmt); else return url_encode(fmt); } /* here's an example 8c url_encode.c && 8l url_encode.8 && mv 8.out url_encode && ./url_encode output is : fancy : caf€ bl€nc +enc : caf%e9+bl%e1nc %20 enc : caf%e9%20bl%e1nc plain : café blánc oh_no : %c3%b3h%20n%c3%b3 óh nó */ void main (void) { char *fancy_string = "cafe blanc"; char *prev_estring = "caf%E9%20bl%E1nc"; char *plus_enc_string; char *p20_enc_string; char *plain_string; fancy_string[3] = 0xE9; fancy_string[7] = 0xE1; fmtinstall('R', url_encoding); plus_enc_string = smprint("%+R", fancy_string); p20_enc_string = smprint("%R", fancy_string); plain_string = smprint("%-R", prev_estring); print("fancy : %s\n+enc : %s\n%%20 enc : %s\nplain : %s\n", fancy_string, plus_enc_string, p20_enc_string, plain_string); print("oh_no : %R\n%-R\n", "óh nó", "%c3%b3h%20n%c3%b3"); exits(0); }