9front - general discussion about 9front
 help / color / mirror / Atom feed
* Fmt with prefix regex
@ 2020-01-04 22:22 ori
  0 siblings, 0 replies; only message in thread
From: ori @ 2020-01-04 22:22 UTC (permalink / raw)
  To: 9front

A respin of the old patch. Now, we support a regex instead
of a literal string, allowing us to do sometsomething like:

	fmt -p '([}>][ 	]*)*'

Which will reflow


	> } quote a long line
	> and a response

into

	> } quote a
	> } long line
	> and a
	> response

Using regex actually makes the patch both more flexible and
smaller.

Attached below:

diff -r c95260e1d096 sys/src/cmd/fmt.c
--- a/sys/src/cmd/fmt.c	Mon Dec 23 01:31:30 2019 +0100
+++ b/sys/src/cmd/fmt.c	Sat Jan 04 14:22:02 2020 -0800
@@ -2,13 +2,14 @@
 #include <libc.h>
 #include <bio.h>
 #include <ctype.h>
+#include <regexp.h>
 
 /*
  * block up paragraphs, possibly with indentation
  */
-
+Reprog *pfxpat = nil; 
 int extraindent = 0;		/* how many spaces to indent all lines */
-int indent = 0;			/* current value of indent, before extra indent */
+char *prefix = nil;		/* current value of indent, before extra indent */
 int length = 70;		/* how many columns per output line */
 int join = 1;			/* can lines be joined? */
 int maxtab = 8;
@@ -21,10 +22,10 @@
 {
 	Word	*next;
 
-	int	indent;
 	int	length;
 	char	bol;
-	char	text[];
+	char	*text;
+	char	*pfx;
 };
 
 void	fmt(void);
@@ -40,7 +41,7 @@
 main(int argc, char **argv)
 {
 	int i, f;
-	char *s, *err;
+	char *s, *pat, *err;
 
 	ARGBEGIN{
 	case 'i':
@@ -53,19 +54,29 @@
 	case 'l':
 		length = atoi(EARGF(usage()));
 		break;
+	case 'p':
+		pat = smprint("^(%s)", EARGF(usage()));
+		pfxpat = regcomp(pat);
+		free(pat);
+		if(pfxpat == nil)
+			sysfatal("regcomp: %r");
+		break;
 	default:
 		usage();
 	}ARGEND
 
-	if(length <= indent){
+	if(length <= extraindent){
 		fprint(2, "%s: line length<=indentation\n", argv0);
 		exits("length");
 	}
+	if(pfxpat == nil)
+		pfxpat = regcomp("^([ \t]*)");
 
 	s=getenv("tabstop");
 	if(s!=nil && atoi(s)>0)
 		maxtab=atoi(s);
 	err = nil;
+	prefix = strdup("");
 	Binit(&bout, 1, OWRITE);
 	if(argc <= 0){
 		Binit(&bin, 0, OREAD);
@@ -88,41 +99,49 @@
 	exits(err);
 }
 
+void*
+emalloc(uint sz)
+{
+	void *p;
+
+	if((p = malloc(sz)) == nil)
+		sysfatal("emalloc: %r");
+	setmalloctag(p, getcallerpc(&sz));
+	return p;
+}
+
 int
-indentof(char *s)
+width(char *s)
 {
-	int ind;
+	Rune r;
+	int w;
 
-	ind = 0;
-	for(; *s != '\0'; s++)
-		switch(*s){
-		default:
-			return ind;
-		case ' ':
-			ind++;
-			break;
-		case '\t':
-			ind += maxtab;
-			ind -= ind%maxtab;
-			break;
-		}
-
-	/* plain white space doesn't change the indent */
-	return indent;
+	/*
+	 * Doesn't handle things like accents and
+	 * double width characters.
+	 */
+	w = 0;
+	while(*s){
+		s += chartorune(&r, s);
+		if(r == '\t')
+			w += (w + 7) % 8;
+		w++;
+	}
+	return w;
 }
 
 Word*
-newword(char *s, int n, int ind, int bol)
+newword(char *s, int n, char *pfx, int bol)
 {
 	Word *w;
 
-	w = malloc(sizeof(Word) + n+1);
+	w = emalloc(sizeof(*w), 1))
+	w->text = emalloc(n + 1);
+	strecpy(w->text, w->text + n + 1, s);
+	w->pfx = pfx ? strdup(pfx) : nil;
 	w->next = nil;
-	w->indent = ind;
 	w->bol = bol;
-	memmove(w->text, s, n);
-	w->text[n] = 0;
-	w->length = utflen(w->text);
+	w->length = width(w->text);
 	return w;
 }
 
@@ -130,8 +149,10 @@
 getword(void)
 {
 	static Word *head, *tail;
-	char *line, *s;
+	char *line, *s, *p;
+	Resub m[2];
 	Word *w;
+	int n;
 	
 	w = head;
 	if(w != nil){
@@ -142,20 +163,29 @@
 	if(line == nil)
 		return nil;
 	tail = nil;
-	indent = indentof(line);
+	free(prefix);
+	memset(m, 0, sizeof(m));
+	if(regexec(pfxpat, line, m, nelem(m)) && m[0].sp != m[0].ep){
+		n = m[0].ep - m[0].sp + 1;
+		p = emalloc(n);
+		strecpy(p, p + n, m[0].sp);
+		line = m[0].ep;
+	}else
+		p = strdup("");
+	prefix = p;
 	for(;;){
 		while(*line == ' ' || *line == '\t')
 			line++;
 		if(*line == '\0'){
 			if(head == nil)
-				return newword("", 0, -1, 1);
+				return newword("", 0, nil, 1);
 			break;
 		}
 		/* how long is this word? */
 		for(s=line++; *line != '\0'; line++)
 			if(*line==' ' || *line=='\t')
 				break;
-		w = newword(s, line-s, indent, head==nil);
+		w = newword(s, line-s, prefix, head==nil);
 		if(head == nil)
 			head = w;
 		else
@@ -168,7 +198,7 @@
 }
 
 void
-printindent(int w)
+printindent(int w, char *pfx)
 {
 	while(w >= maxtab){
 		Bputc(&bout, '\t');
@@ -178,6 +208,7 @@
 		Bputc(&bout, ' ');
 		w--;
 	}
+	Bprint(&bout, "%s", pfx);
 }
 
 /* give extra space if word ends with period, etc. */
@@ -196,6 +227,16 @@
 	return 1;
 }
 
+int
+zstreq(char *a, char *b)
+{
+	if(a == b)
+		return 1;
+	if(a && b)
+		return strcmp(a, b) == 0;
+	return 0;
+}
+
 void
 fmt(void)
 {
@@ -204,15 +245,17 @@
 
 	w = getword();
 	while(w != nil){
-		if(w->indent == -1){
+		if(w->pfx == nil){
 			Bputc(&bout, '\n');
+			free(w->text);
+			free(w->pfx);
 			free(w);
 			w = getword();
 			if(w == nil)
 				break;
 		}
-		col = w->indent;
-		printindent(extraindent+col);
+		col = width(w->pfx);
+		printindent(extraindent, w->pfx);
 		/* emit words until overflow; always emit at least one word */
 		for(;;){
 			Bprint(&bout, "%s", w->text);
@@ -221,7 +264,7 @@
 			w = getword();
 			if(w == nil)
 				break;
-			if(w->indent != o->indent)
+			if(!zstreq(w->pfx, o->pfx))
 				break;	/* indent change */
 			nsp = nspaceafter(o->text);
 			if(col+nsp+w->length > length)
@@ -232,6 +275,8 @@
 				Bputc(&bout, ' ');	/* emit space; another word will follow */
 				col++;
 			}
+			free(o->text);
+			free(o->pfx);
 			free(o);
 		}
 		free(o);



^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2020-01-04 22:22 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-04 22:22 Fmt with prefix regex ori

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).