From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mimir.eigenstate.org ([206.124.132.107]) by ewsd; Sat Jan 4 17:22:29 EST 2020 Received: from abbatoir.fios-router.home (pool-96-239-17-137.nycmny.fios.verizon.net [96.239.17.137]) by mimir.eigenstate.org (OpenSMTPD) with ESMTPSA id 499f0c3c (TLSv1.2:ECDHE-RSA-AES256-SHA:256:NO) for <9front@9front.org>; Sat, 4 Jan 2020 14:22:21 -0800 (PST) Message-ID: To: 9front@9front.org Subject: Fmt with prefix regex Date: Sat, 4 Jan 2020 14:22:20 -0800 From: ori@eigenstate.org MIME-Version: 1.0 Content-Type: text/plain; charset="US-ASCII" Content-Transfer-Encoding: 7bit List-ID: <9front.9front.org> List-Help: X-Glyph: ➈ X-Bullshit: STM firewall blockchain enhancement A respin of the old patch. Now, we support a regex instead of a literal string, allowing us to do sometsomething like: fmt -p '([}>][ ]*)*' Which will reflow > } quote a long line > and a response into > } quote a > } long line > and a > response Using regex actually makes the patch both more flexible and smaller. Attached below: diff -r c95260e1d096 sys/src/cmd/fmt.c --- a/sys/src/cmd/fmt.c Mon Dec 23 01:31:30 2019 +0100 +++ b/sys/src/cmd/fmt.c Sat Jan 04 14:22:02 2020 -0800 @@ -2,13 +2,14 @@ #include #include #include +#include /* * block up paragraphs, possibly with indentation */ - +Reprog *pfxpat = nil; int extraindent = 0; /* how many spaces to indent all lines */ -int indent = 0; /* current value of indent, before extra indent */ +char *prefix = nil; /* current value of indent, before extra indent */ int length = 70; /* how many columns per output line */ int join = 1; /* can lines be joined? */ int maxtab = 8; @@ -21,10 +22,10 @@ { Word *next; - int indent; int length; char bol; - char text[]; + char *text; + char *pfx; }; void fmt(void); @@ -40,7 +41,7 @@ main(int argc, char **argv) { int i, f; - char *s, *err; + char *s, *pat, *err; ARGBEGIN{ case 'i': @@ -53,19 +54,29 @@ case 'l': length = atoi(EARGF(usage())); break; + case 'p': + pat = smprint("^(%s)", EARGF(usage())); + pfxpat = regcomp(pat); + free(pat); + if(pfxpat == nil) + sysfatal("regcomp: %r"); + break; default: usage(); }ARGEND - if(length <= indent){ + if(length <= extraindent){ fprint(2, "%s: line length<=indentation\n", argv0); exits("length"); } + if(pfxpat == nil) + pfxpat = regcomp("^([ \t]*)"); s=getenv("tabstop"); if(s!=nil && atoi(s)>0) maxtab=atoi(s); err = nil; + prefix = strdup(""); Binit(&bout, 1, OWRITE); if(argc <= 0){ Binit(&bin, 0, OREAD); @@ -88,41 +99,49 @@ exits(err); } +void* +emalloc(uint sz) +{ + void *p; + + if((p = malloc(sz)) == nil) + sysfatal("emalloc: %r"); + setmalloctag(p, getcallerpc(&sz)); + return p; +} + int -indentof(char *s) +width(char *s) { - int ind; + Rune r; + int w; - ind = 0; - for(; *s != '\0'; s++) - switch(*s){ - default: - return ind; - case ' ': - ind++; - break; - case '\t': - ind += maxtab; - ind -= ind%maxtab; - break; - } - - /* plain white space doesn't change the indent */ - return indent; + /* + * Doesn't handle things like accents and + * double width characters. + */ + w = 0; + while(*s){ + s += chartorune(&r, s); + if(r == '\t') + w += (w + 7) % 8; + w++; + } + return w; } Word* -newword(char *s, int n, int ind, int bol) +newword(char *s, int n, char *pfx, int bol) { Word *w; - w = malloc(sizeof(Word) + n+1); + w = emalloc(sizeof(*w), 1)) + w->text = emalloc(n + 1); + strecpy(w->text, w->text + n + 1, s); + w->pfx = pfx ? strdup(pfx) : nil; w->next = nil; - w->indent = ind; w->bol = bol; - memmove(w->text, s, n); - w->text[n] = 0; - w->length = utflen(w->text); + w->length = width(w->text); return w; } @@ -130,8 +149,10 @@ getword(void) { static Word *head, *tail; - char *line, *s; + char *line, *s, *p; + Resub m[2]; Word *w; + int n; w = head; if(w != nil){ @@ -142,20 +163,29 @@ if(line == nil) return nil; tail = nil; - indent = indentof(line); + free(prefix); + memset(m, 0, sizeof(m)); + if(regexec(pfxpat, line, m, nelem(m)) && m[0].sp != m[0].ep){ + n = m[0].ep - m[0].sp + 1; + p = emalloc(n); + strecpy(p, p + n, m[0].sp); + line = m[0].ep; + }else + p = strdup(""); + prefix = p; for(;;){ while(*line == ' ' || *line == '\t') line++; if(*line == '\0'){ if(head == nil) - return newword("", 0, -1, 1); + return newword("", 0, nil, 1); break; } /* how long is this word? */ for(s=line++; *line != '\0'; line++) if(*line==' ' || *line=='\t') break; - w = newword(s, line-s, indent, head==nil); + w = newword(s, line-s, prefix, head==nil); if(head == nil) head = w; else @@ -168,7 +198,7 @@ } void -printindent(int w) +printindent(int w, char *pfx) { while(w >= maxtab){ Bputc(&bout, '\t'); @@ -178,6 +208,7 @@ Bputc(&bout, ' '); w--; } + Bprint(&bout, "%s", pfx); } /* give extra space if word ends with period, etc. */ @@ -196,6 +227,16 @@ return 1; } +int +zstreq(char *a, char *b) +{ + if(a == b) + return 1; + if(a && b) + return strcmp(a, b) == 0; + return 0; +} + void fmt(void) { @@ -204,15 +245,17 @@ w = getword(); while(w != nil){ - if(w->indent == -1){ + if(w->pfx == nil){ Bputc(&bout, '\n'); + free(w->text); + free(w->pfx); free(w); w = getword(); if(w == nil) break; } - col = w->indent; - printindent(extraindent+col); + col = width(w->pfx); + printindent(extraindent, w->pfx); /* emit words until overflow; always emit at least one word */ for(;;){ Bprint(&bout, "%s", w->text); @@ -221,7 +264,7 @@ w = getword(); if(w == nil) break; - if(w->indent != o->indent) + if(!zstreq(w->pfx, o->pfx)) break; /* indent change */ nsp = nspaceafter(o->text); if(col+nsp+w->length > length) @@ -232,6 +275,8 @@ Bputc(&bout, ' '); /* emit space; another word will follow */ col++; } + free(o->text); + free(o->pfx); free(o); } free(o);