9fans - fans of the OS Plan 9 from Bell Labs
 help / color / mirror / Atom feed
* [9fans] sed line limit
@ 2010-03-15 13:58 erik quanstrom
  0 siblings, 0 replies; only message in thread
From: erik quanstrom @ 2010-03-15 13:58 UTC (permalink / raw)
  To: 9fans

it appears that sed won't handle an input line longer than 8k characters.
yet no diagnostic is printed.  and this is not mentioned in the
man page.

example
	for(i in `{seq 3000 3849}) x = ($x IMG_$i.JPG)
	echo $x | sed 's/IMG_([0-9]+)\.JPG:\1:g'
no error is printed, yet the output is too short and the tail
is corrupted (... 3565 IM)

i added a diagnostic for input line too long.
in trying to test this with
	cat fileof8192is | sed 's/i/j/g'
i found that a single long line can result in at least
1 "sed: Output line too long." per character of the input
line.  i also addressed this problem.  just one output
too long message is printed per line.

the diff would be shorter if i weren't paranoid
that the output buffer could lose its null terminator.
i couldn't provoke that situation so perhaps i just haven't
read the source carefully enough.

- erik


/n/dump/2010/0315/sys/src/cmd/sed.c:161,166 - sed.c:161,167
  Rune	bad;				/* Dummy err ptr reference */
  Rune	*badp = &bad;

+ int	tlwarn;				/* during sub: have warned too long */

  char	CGMES[]	 = 	"%S command garbled: %S";
  char	TMMES[]	 = 	"Too much text: %S";
/n/dump/2010/0315/sys/src/cmd/sed.c:197,202 - sed.c:198,204
  char	*text(char *);
  Rune	*stext(Rune *, Rune *);
  int	ycomp(SedCom *);
+ void	toolong(void);
  char *	trans(int c);
  void	putline(Biobuf *bp, Rune *buf, int n);
  void	ebputc(Biobufhdr*, int);
/n/dump/2010/0315/sys/src/cmd/sed.c:697,705 - sed.c:699,708
  int
  rline(Rune *buf, Rune *end)
  {
- 	long c;
+ 	long c, w;
  	Rune r;

+ 	w = 0;
  	while ((c = getrune()) >= 0) {
  		r = c;
  		if (r == '\\') {
/n/dump/2010/0315/sys/src/cmd/sed.c:714,719 - sed.c:717,726
  		}
  		if (buf <= end)
  			*buf++ = r;
+ 		else if(w == 0){
+ 			fprint(2, "sed: Input line too long.\n");
+ 			w = 1;
+ 		}
  	}
  	*buf = '\0';
  	return -1;
/n/dump/2010/0315/sys/src/cmd/sed.c:1022,1027 - sed.c:1029,1035
  	 * bump to the character after a 0-length match to keep from looping.
  	 */
  	sflag = 1;
+ 	tlwarn = 0;
  	if(ipc->gfl == 0)			/* single substitution */
  		dosub(ipc->rhs);
  	else
/n/dump/2010/0315/sys/src/cmd/sed.c:1065,1083 - sed.c:1073,1096
  				errexit();
  			}
  		}
- 		*sp++ = c;
- 		if (sp >= &genbuf[LBSIZE])
- 			fprint(2, "sed: Output line too long.\n");
+ 		if(sp < &genbuf[LBSIZE]){
+ 			*sp++ = c;
+ 			if (sp >= &genbuf[LBSIZE])
+ 				toolong();
+ 		}
  	}
  	lp = loc2;
  	loc2 = sp - genbuf + linebuf;
  	while (*sp++ = *lp++)
- 		if (sp >= &genbuf[LBSIZE])
- 			fprint(2, "sed: Output line too long.\n");
+ 		if (sp >= &genbuf[LBSIZE]){
+ 			toolong();
+ 			break;
+ 		}
  	lp = linebuf;
  	sp = genbuf;
  	while (*lp++ = *sp++)
- 		;
+ 		if (sp >= &genbuf[LBSIZE])
+ 			break;
  	spend = lp - 1;
  }

/n/dump/2010/0315/sys/src/cmd/sed.c:1086,1097 - sed.c:1099,1120
  {
  	while (l1 < l2) {
  		*sp++ = *l1++;
- 		if (sp >= &genbuf[LBSIZE])
- 			fprint(2, "sed: Output line too long.\n");
+ 		if (sp >= &genbuf[LBSIZE]){
+ 			toolong();
+ 			break;
+ 		}
  	}
  	return sp;
  }

+ void
+ toolong(void)
+ {
+ 	if(tlwarn == 0)
+ 		fprint(2, "sed: Output line too long.\n");
+ 	tlwarn = 1;
+ }
+
  char *
  trans(int c)
  {
/n/dump/2010/0315/sys/src/cmd/sed.c:1408,1414 - sed.c:1431,1437
  Rune *
  gline(Rune *addr)
  {
- 	long c;
+ 	long c, w;
  	Rune *p;
  	static long peekc = 0;

/n/dump/2010/0315/sys/src/cmd/sed.c:1417,1422 - sed.c:1440,1446
  	sflag = 0;
  	lnum++;
  /*	Bflush(&fout);********* dumped 4/30/92 - bobf****/
+ 	w = 0;
  	do {
  		p = addr;
  		for (c = (peekc? peekc: Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
/n/dump/2010/0315/sys/src/cmd/sed.c:1426,1433 - sed.c:1450,1463
  				*p = '\0';
  				return p;
  			}
- 			if (c && p < lbend)
- 				*p++ = c;
+ 			if (c) {
+ 				if (p < lbend)
+ 					*p++ = c;
+ 				else if(w == 0) {
+ 					w = 1;
+ 					fprint(2, "sed: Input line too long.\n");
+ 				}
+ 			}
  		}
  		/* return partial final line, adding implicit newline */
  		if(p != addr) {



^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2010-03-15 13:58 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-03-15 13:58 [9fans] sed line limit erik quanstrom

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).