9front - general discussion about 9front
 help / color / mirror / Atom feed
* [9front] sed: fix moving '^' match
@ 2023-08-25 21:46 ori
  2023-08-26  0:04 ` [9front] " Anthony Martin
  2023-08-26  9:05 ` [9front] " hiro
  0 siblings, 2 replies; 16+ messages in thread
From: ori @ 2023-08-25 21:46 UTC (permalink / raw)
  To: 9front; +Cc: k0ga

Currently, if you do something like:

	echo aabbccd | sed s/^..//g

it will output simply:

	'd'

the start of line match movnig around as the
replacement progresses is unexpected, and
inconsistent with the way that other sed
implementations behave.

This happens because in our sed, we process
substitutions match by match, applying the
substitution as we go; normally, this is
unobservable, and the substitution looks
atomic, as regexp matches never look back;
the one exception is the '^' operator,
which checks if the current char is at the
start of the string or was a newline.

This patch works by adding a dummy character
at the start of the line, so we aren't at
the start of a line after the first sub.

This patch brings us inline with at least
openbsd and gnu sed, as well as reducing
the amount of surprise I experience when
I put a 'g' in a match out of habit.

Before this patch:

	echo abc | sed s/^.//g => ''
	echo abc | sed s/.$//g => 'ab'

after:

	echo abc | sed s/^.//g => 'bc'
	echo abc | sed s/.$//g => 'ab'

anyone aware of any unexpected side effets
that this may have?


diff 44a2f89a03c370940fa0f4747c2357c73984d653 uncommitted
--- a/sys/src/cmd/sed.c
+++ b/sys/src/cmd/sed.c
@@ -127,9 +127,10 @@
 Rune	*loc2;				/* End of pattern match */
 Rune	seof;				/* Pattern delimiter char */
 
-Rune	linebuf[LBSIZE+1];		/* Input data buffer */
-Rune	*lbend = linebuf+LBSIZE;	/* End of buffer */
-Rune	*spend = linebuf;		/* End of input data */
+Rune	linestor[LBSIZE+1];		/* Input data storage */
+Rune	*linebuf = linestor+1;		/* Input data buffer */
+Rune	*lbend = linestor+LBSIZE;	/* End of buffer */
+Rune	*spend = linestor;		/* End of input data */
 Rune	*cp;				/* Current scan point in linebuf */
 
 Rune	holdsp[LBSIZE+1];		/* Hold buffer */
@@ -187,7 +188,7 @@
 void	fcomp(void);
 long	getrune(void);
 Rune	*gline(Rune *);
-int	match(Reprog *, Rune *);
+int	match(Reprog *, Rune *, int);
 void	newfile(enum PTYPE, char *);
 int 	opendata(void);
 Biobuf	*open_file(char *);
@@ -980,7 +981,7 @@
 			ipc->active = 0;	/* out of range */
 			return ipc->negfl;
 		case A_RE:		/* Check for matching R.E. */
-			if (match(ipc->ad2.rp, linebuf))
+			if (match(ipc->ad2.rp, linebuf, 1))
 				ipc->active = 0;
 			return !ipc->negfl;
 		default:
@@ -1001,7 +1002,7 @@
 		}
 		break;
 	case A_RE:			/* Check R.E. */
-		if (match(ipc->ad1.rp, linebuf)) {
+		if (match(ipc->ad1.rp, linebuf, 1)) {
 			ipc->active = 1;	/* In range */
 			return !ipc->negfl;
 		}
@@ -1013,13 +1014,22 @@
 }
 
 int
-match(Reprog *pattern, Rune *buf)
+match(Reprog *pattern, Rune *buf, int first)
 {
+	Rune *p;
+
 	if (!pattern)
 		return 0;
+	/*
+	 * a regex that replaces the the start of a line
+	 * with an empty string moves the location of a
+	 * '^' match, so we need to insert a dummy char
+	 * when we're not on the first match of a line.
+	 */
+	p = first ? linebuf : linestor;
 	subexp[0].rsp = buf;
 	subexp[0].ep = 0;
-	if (rregexec(pattern, linebuf, subexp, MAXSUB) > 0) {
+	if (rregexec(pattern, p, subexp, MAXSUB) > 0) {
 		loc1 = subexp[0].rsp;
 		loc2 = subexp[0].rep;
 		return 1;
@@ -1033,7 +1043,7 @@
 {
 	int len;
 
-	if(!match(ipc->re1, linebuf))
+	if(!match(ipc->re1, linebuf, 1))
 		return 0;
 
 	/*
@@ -1054,7 +1064,7 @@
 				loc2++;		/* bump over 0-length match */
 			if(*loc2 == 0)		/* end of string */
 				break;
-		} while(match(ipc->re1, loc2));
+		} while(match(ipc->re1, loc2, 0));
 	return 1;
 }
 


^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2023-09-24 15:42 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-25 21:46 [9front] sed: fix moving '^' match ori
2023-08-26  0:04 ` [9front] " Anthony Martin
2023-08-26  9:14   ` hiro
2023-08-26 17:58     ` ori
2023-08-26 18:02     ` ori
2023-08-26  9:05 ` [9front] " hiro
2023-08-26  9:41   ` ieliedonge
2023-08-26 16:48     ` hiro
2023-08-26 18:00       ` ori
2023-08-26 23:23       ` ieliedonge
2023-08-27  8:33         ` hiro
2023-08-27  9:54           ` tlaronde
2023-08-27 11:19             ` ieliedonge
2023-09-23  1:35             ` ieliedonge
2023-09-23 10:30               ` hiro
2023-09-24 15:35               ` ori

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).