9fans - fans of the OS Plan 9 from Bell Labs
 help / color / mirror / Atom feed
* [9fans] URL Encode/Decode Verb
@ 2002-08-04 22:36 matt
  2002-08-19 15:36 ` Boyd Roberts
  0 siblings, 1 reply; 3+ messages in thread
From: matt @ 2002-08-04 22:36 UTC (permalink / raw)
  To: 9fans

Hi,

I've spent plenty of today sidetracked by this so I hope someone uses it :)

It's a verb for printing char* data as a URL encoded string.

It also does URL decoding

I must admit I've been having a bit of trouble with UTF (I think) as what
Windows types into drawterm seems different from the decoded strings. If
someone can offer some help there I'd appreciate it. That aside, well
behaved input produces well behaved output.

Because there is some UTF in the examples I don't trust the copy and paste
to work but it's only in the examples.
I've put the files on the web too.
http://www.proweb.co.uk/~matt/plan9/url_encode.c
or
http://www.proweb.co.uk/~matt/plan9/url_encode.c.gz

I'm quite impressed I'm picking up this c stuff fairly easily.
I've got some user level file servers in development as well.

Whoever thought of user defined verbs was inspired. Well done to them.

Comments suggestions welcome. As always.

Is the static int url_trans_flag array the best way to do this btw. ?

M

#include <u.h>
#include <libc.h>
#include <ctype.h>

int url_encode(Fmt *fmt) {
 int cnt;
 uchar c, e;
 char *str_start;
 char *str_index;

 static int url_trans_flag[] = { [32] 0, [33] 0, [34] 0, [35] 0, [36] 0,
[37] 0, [38] 0, [39] 0, [40] 0, [41] 0, [42] 0, [43] 0, [44] 0, [45] '-',
[46] '.', [47] 0, [48] '0', [49] '1', [50] '2', [51] '3', [52] '4', [53]
'5', [54] '6', [55] '7', [56] '8', [57] '9', [58] 0, [59] 0, [60] 0, [61] 0,
[62] 0, [63] 0, [64] 0, [65] 'A', [66] 'B', [67] 'C', [68] 'D', [69] 'E',
[70] 'F', [71] 'G', [72] 'H', [73] 'I', [74] 'J', [75] 'K', [76] 'L', [77]
'M', [78] 'N', [79] 'O', [80] 'P', [81] 'Q', [82] 'R', [83] 'S', [84] 'T',
[85] 'U', [86] 'V', [87] 'W', [88] 'X', [89] 'Y', [90] 'Z', [91] 0, [92] 0,
[93] 0, [94] 0, [95] '_', [96] 0, [97] 'a', [98] 'b', [99] 'c', [100] 'd',
[101] 'e', [102] 'f', [103] 'g', [104] 'h', [105] 'i', [106] 'j', [107] 'k',
[108] 'l', [109] 'm', [110] 'n', [111] 'o', [112] 'p', [113] 'q', [114] 'r',
[115] 's', [116] 't', [117] 'u', [118] 'v', [119] 'w', [120] 'x', [121] 'y',
[122] 'z', [123] 0, [124] 0, [125] 0, [126] 0, [127] 0 };

 str_start = str_index= smprint("%s", va_arg(fmt->args, char *));
 if (fmt->flags & FmtSign)
  url_trans_flag[32] = '+';
 else
  url_trans_flag[32] = 0;

 cnt = 0;
 while(c = *(str_index++)) {
  if (c > 127 || c < 32) {
   cnt += fmtprint(fmt, "%%%02x", c);
   continue;
  }

  e = url_trans_flag[c];
  if(e)
   cnt += fmtprint(fmt, "%c", e);
  else
   cnt += fmtprint(fmt, "%%%02x", c);
 }
 free(str_start);
 return cnt;
}

int
url_decode(Fmt *fmt) {
 char *str_arg;
 char *str_start;
 char * token;
 int token_index;
 int str_length;
 int ante_token_length;
 int c, cnt, t;
 Rune r;


 str_start = str_arg = smprint("%s", va_arg(fmt->args, char *));
 for (token = str_arg ; *token; token++) if (*(token) == '+') *token = ' ';

 cnt = 0;
 while(token =  strchr(str_arg, '%')) {
  str_length = strlen(str_arg);
  token_index = str_length - strlen(token);
  ante_token_length = str_length - token_index - 1;

  if (token_index) {
   str_arg[token_index] = 0;
   cnt += fmtprint(fmt, "%s", str_arg);
  }

  if (ante_token_length < 1) {
   str_arg = token +1;
   continue;
  }

  if(token[1] == '%') {
   cnt += fmtprint(fmt, "%%");
   str_arg = &token[2];
   continue;
  }

  if ((ante_token_length > 1) && isxdigit(token[1]) && isxdigit(token[2]) )
{
   t = token[3];
   token[3] = 0;
   r = strtol(&token[1], nil, 16);
   cnt += fmtprint(fmt, "%C",r);
   token[3] = t;
   str_arg = &token[3];
   continue;
  }

  // if we get here the string is malformed, I'll silently drop it
  str_arg++;
 }
 if(*str_arg)
  cnt += fmtprint(fmt, "%s", str_arg);
 free(str_start);
 return cnt;

}


int
url_encoding(Fmt *fmt) {
 if(fmt->flags & FmtLeft)
  return url_decode(fmt);
 else
  return url_encode(fmt);
}






/*

here's an example

8c url_encode.c && 8l url_encode.8 && mv 8.out url_encode && ./url_encode

output is :

fancy : caf€ bl€nc
+enc : caf%e9+bl%e1nc
%20 enc : caf%e9%20bl%e1nc
plain : café blánc
oh_no : %c3%b3h%20n%c3%b3
óh nó

*/


void
main (void) {
 char *fancy_string = "cafe blanc";
 char *prev_estring = "caf%E9%20bl%E1nc";
 char *plus_enc_string;
 char *p20_enc_string;
 char *plain_string;

 fancy_string[3] = 0xE9;
 fancy_string[7] = 0xE1;

 fmtinstall('R', url_encoding);

 plus_enc_string = smprint("%+R", fancy_string);
 p20_enc_string = smprint("%R", fancy_string);
 plain_string = smprint("%-R", prev_estring);

 print("fancy : %s\n+enc : %s\n%%20 enc : %s\nplain : %s\n", fancy_string,
plus_enc_string, p20_enc_string, plain_string);


 print("oh_no : %R\n%-R\n", "óh nó", "%c3%b3h%20n%c3%b3");


 exits(0);
}



^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [9fans] URL Encode/Decode Verb
  2002-08-04 22:36 [9fans] URL Encode/Decode Verb matt
@ 2002-08-19 15:36 ` Boyd Roberts
  2002-08-19 16:35   ` matt
  0 siblings, 1 reply; 3+ messages in thread
From: Boyd Roberts @ 2002-08-19 15:36 UTC (permalink / raw)
  To: 9fans

matt wrote:

> It's a verb for printing char* data as a URL encoded string.
> It also does URL decoding

Why not get really into it and handle these MIME atrocities:

    From: Mec =?iso-8859-1?q?Givr=E9?= <mec@givre.fr>




^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [9fans] URL Encode/Decode Verb
  2002-08-19 15:36 ` Boyd Roberts
@ 2002-08-19 16:35   ` matt
  0 siblings, 0 replies; 3+ messages in thread
From: matt @ 2002-08-19 16:35 UTC (permalink / raw)
  To: 9fans

> Why not get really into it and handle these MIME atrocities:
>
>     From: Mec =?iso-8859-1?q?Givr=E9?= <mec@givre.fr>

can't do anything atm. KFS crashed (no swap - doh) and my fonts got
corrupted. I can't see any text.
Plus I guess that it's not only the fonts.

I do get kernel panics too so I'm getting a bit loathe to trust KFS to stay
alive.
And what with KFS long files names being so clumsy I think it's time to
build a file server.

hmm Anyone know where I can find a  Symbios 53C8XX SCSI adapter mail-order
in the uk?

m


---
Outgoing mail is certified as a Virus.
Checked by AVG anti-virus system (http://www.grisoft.com).
Version: 6.0.381 / Virus Database: 214 - Release Date: 02/08/2002



^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2002-08-19 16:35 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-08-04 22:36 [9fans] URL Encode/Decode Verb matt
2002-08-19 15:36 ` Boyd Roberts
2002-08-19 16:35   ` matt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).