* Fmt with prefix regex
@ 2020-01-04 22:22 ori
0 siblings, 0 replies; only message in thread
From: ori @ 2020-01-04 22:22 UTC (permalink / raw)
To: 9front
A respin of the old patch. Now, we support a regex instead
of a literal string, allowing us to do sometsomething like:
fmt -p '([}>][ ]*)*'
Which will reflow
> } quote a long line
> and a response
into
> } quote a
> } long line
> and a
> response
Using regex actually makes the patch both more flexible and
smaller.
Attached below:
diff -r c95260e1d096 sys/src/cmd/fmt.c
--- a/sys/src/cmd/fmt.c Mon Dec 23 01:31:30 2019 +0100
+++ b/sys/src/cmd/fmt.c Sat Jan 04 14:22:02 2020 -0800
@@ -2,13 +2,14 @@
#include <libc.h>
#include <bio.h>
#include <ctype.h>
+#include <regexp.h>
/*
* block up paragraphs, possibly with indentation
*/
-
+Reprog *pfxpat = nil;
int extraindent = 0; /* how many spaces to indent all lines */
-int indent = 0; /* current value of indent, before extra indent */
+char *prefix = nil; /* current value of indent, before extra indent */
int length = 70; /* how many columns per output line */
int join = 1; /* can lines be joined? */
int maxtab = 8;
@@ -21,10 +22,10 @@
{
Word *next;
- int indent;
int length;
char bol;
- char text[];
+ char *text;
+ char *pfx;
};
void fmt(void);
@@ -40,7 +41,7 @@
main(int argc, char **argv)
{
int i, f;
- char *s, *err;
+ char *s, *pat, *err;
ARGBEGIN{
case 'i':
@@ -53,19 +54,29 @@
case 'l':
length = atoi(EARGF(usage()));
break;
+ case 'p':
+ pat = smprint("^(%s)", EARGF(usage()));
+ pfxpat = regcomp(pat);
+ free(pat);
+ if(pfxpat == nil)
+ sysfatal("regcomp: %r");
+ break;
default:
usage();
}ARGEND
- if(length <= indent){
+ if(length <= extraindent){
fprint(2, "%s: line length<=indentation\n", argv0);
exits("length");
}
+ if(pfxpat == nil)
+ pfxpat = regcomp("^([ \t]*)");
s=getenv("tabstop");
if(s!=nil && atoi(s)>0)
maxtab=atoi(s);
err = nil;
+ prefix = strdup("");
Binit(&bout, 1, OWRITE);
if(argc <= 0){
Binit(&bin, 0, OREAD);
@@ -88,41 +99,49 @@
exits(err);
}
+void*
+emalloc(uint sz)
+{
+ void *p;
+
+ if((p = malloc(sz)) == nil)
+ sysfatal("emalloc: %r");
+ setmalloctag(p, getcallerpc(&sz));
+ return p;
+}
+
int
-indentof(char *s)
+width(char *s)
{
- int ind;
+ Rune r;
+ int w;
- ind = 0;
- for(; *s != '\0'; s++)
- switch(*s){
- default:
- return ind;
- case ' ':
- ind++;
- break;
- case '\t':
- ind += maxtab;
- ind -= ind%maxtab;
- break;
- }
-
- /* plain white space doesn't change the indent */
- return indent;
+ /*
+ * Doesn't handle things like accents and
+ * double width characters.
+ */
+ w = 0;
+ while(*s){
+ s += chartorune(&r, s);
+ if(r == '\t')
+ w += (w + 7) % 8;
+ w++;
+ }
+ return w;
}
Word*
-newword(char *s, int n, int ind, int bol)
+newword(char *s, int n, char *pfx, int bol)
{
Word *w;
- w = malloc(sizeof(Word) + n+1);
+ w = emalloc(sizeof(*w), 1))
+ w->text = emalloc(n + 1);
+ strecpy(w->text, w->text + n + 1, s);
+ w->pfx = pfx ? strdup(pfx) : nil;
w->next = nil;
- w->indent = ind;
w->bol = bol;
- memmove(w->text, s, n);
- w->text[n] = 0;
- w->length = utflen(w->text);
+ w->length = width(w->text);
return w;
}
@@ -130,8 +149,10 @@
getword(void)
{
static Word *head, *tail;
- char *line, *s;
+ char *line, *s, *p;
+ Resub m[2];
Word *w;
+ int n;
w = head;
if(w != nil){
@@ -142,20 +163,29 @@
if(line == nil)
return nil;
tail = nil;
- indent = indentof(line);
+ free(prefix);
+ memset(m, 0, sizeof(m));
+ if(regexec(pfxpat, line, m, nelem(m)) && m[0].sp != m[0].ep){
+ n = m[0].ep - m[0].sp + 1;
+ p = emalloc(n);
+ strecpy(p, p + n, m[0].sp);
+ line = m[0].ep;
+ }else
+ p = strdup("");
+ prefix = p;
for(;;){
while(*line == ' ' || *line == '\t')
line++;
if(*line == '\0'){
if(head == nil)
- return newword("", 0, -1, 1);
+ return newword("", 0, nil, 1);
break;
}
/* how long is this word? */
for(s=line++; *line != '\0'; line++)
if(*line==' ' || *line=='\t')
break;
- w = newword(s, line-s, indent, head==nil);
+ w = newword(s, line-s, prefix, head==nil);
if(head == nil)
head = w;
else
@@ -168,7 +198,7 @@
}
void
-printindent(int w)
+printindent(int w, char *pfx)
{
while(w >= maxtab){
Bputc(&bout, '\t');
@@ -178,6 +208,7 @@
Bputc(&bout, ' ');
w--;
}
+ Bprint(&bout, "%s", pfx);
}
/* give extra space if word ends with period, etc. */
@@ -196,6 +227,16 @@
return 1;
}
+int
+zstreq(char *a, char *b)
+{
+ if(a == b)
+ return 1;
+ if(a && b)
+ return strcmp(a, b) == 0;
+ return 0;
+}
+
void
fmt(void)
{
@@ -204,15 +245,17 @@
w = getword();
while(w != nil){
- if(w->indent == -1){
+ if(w->pfx == nil){
Bputc(&bout, '\n');
+ free(w->text);
+ free(w->pfx);
free(w);
w = getword();
if(w == nil)
break;
}
- col = w->indent;
- printindent(extraindent+col);
+ col = width(w->pfx);
+ printindent(extraindent, w->pfx);
/* emit words until overflow; always emit at least one word */
for(;;){
Bprint(&bout, "%s", w->text);
@@ -221,7 +264,7 @@
w = getword();
if(w == nil)
break;
- if(w->indent != o->indent)
+ if(!zstreq(w->pfx, o->pfx))
break; /* indent change */
nsp = nspaceafter(o->text);
if(col+nsp+w->length > length)
@@ -232,6 +275,8 @@
Bputc(&bout, ' '); /* emit space; another word will follow */
col++;
}
+ free(o->text);
+ free(o->pfx);
free(o);
}
free(o);
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2020-01-04 22:22 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-04 22:22 Fmt with prefix regex ori
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).