source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* texi2mdoc: First the memmem(3) invocations to have the proper length of
@ 2015-02-25 10:02 kristaps
  0 siblings, 0 replies; only message in thread
From: kristaps @ 2015-02-25 10:02 UTC (permalink / raw)
  To: source

Log Message:
-----------
First the memmem(3) invocations to have the proper length of the file.
Next, make sure that conditionally-ignored text (e.g., @ifset) allows
for nested @ifset's and corresponding @end ifset pairs.
Without this, the parser would have stopped at the first @end ifset and
let subsequent text through.

Modified Files:
--------------
    texi2mdoc:
        main.c
        texi2mdoc.1

Revision Data
-------------
Index: texi2mdoc.1
===================================================================
RCS file: /home/cvs/mdocml/texi2mdoc/texi2mdoc.1,v
retrieving revision 1.10
retrieving revision 1.11
diff -Ltexi2mdoc.1 -Ltexi2mdoc.1 -u -p -r1.10 -r1.11
--- texi2mdoc.1
+++ texi2mdoc.1
@@ -94,25 +94,27 @@ The
 utility was written by
 .An Kristaps Dzonsons Aq Mt kristaps@bsd.lv .
 .Sh CAVEATS
-The output of
-.Nm
-currently doesn't play nicely with
-.Xr mdoc 7 :
-it doesn't detect whether a line trailing slash is properly escaped nor
-does it properly account for how it mingles
+The output consists of superfluous
 .Sq \&Pp
-with the text.
+that cause
+.D1 % mandoc -Tlint
+to complain.
 .Pp
 Many commands that might contain useful information (such as
 .Li @footnote )
 are thrown away.
 .Pp
-Lastly, the comma in Texinfo macro arguments may at times be escaped
-with a backslash.
+The comma in Texinfo macro arguments may at times be escaped with a backslash.
 This is
 .Em not
 supported.
-.\" .Sh BUGS
+.Sh BUGS
+Macros in
+.Nm
+are assumed to be self-contained (with matching block and respective
+.Li @end
+pairs).
+In Texinfo, they aren't: they're considered as if typed in place.
 .Sh SECURITY CONSIDERATIONS
 As a security precaution,
 .Nm
@@ -123,4 +125,3 @@ or
 of absolute paths or paths with
 .Li \&..
 as directory components.
-.\" Not used in OpenBSD.
Index: main.c
===================================================================
RCS file: /home/cvs/mdocml/texi2mdoc/main.c,v
retrieving revision 1.44
retrieving revision 1.45
diff -Lmain.c -Lmain.c -u -p -r1.44 -r1.45
--- main.c
+++ main.c
@@ -537,7 +537,7 @@ domacro(struct texi *p, enum texicmd cmd
 	start = *pos;
 	endtok = "\n@end macro\n";
 	endtoksz = strlen(endtok);
-	blk = memmem(&buf[start], sz, endtok, endtoksz);
+	blk = memmem(&buf[start], sz - start, endtok, endtoksz);
 	if (NULL == blk)
 		texierr(p, "unterminated macro body");
 	while (&buf[*pos] != blk)
@@ -566,34 +566,64 @@ static void
 doignblock(struct texi *p, enum texicmd cmd, 
 	const char *buf, size_t sz, size_t *pos)
 {
-	char		 end[32];
-	const char	*term;
-	size_t		 endsz, endpos;
+	char		 end[32], start[32];
+	const char	*endt, *startt;
+	size_t		 esz, ssz, newpos, stack;
 
 	/* 
-	 * We want to completely ignore everything in these blocks, so
-	 * simply jump to the @end block.
+	 * FIXME: this is cheating.
+	 * These tokens are supposed to begin on a newline.
+	 * However, if we do that, then we would need to check within
+	 * the loop for trailer (or leading, as the case may be)
+	 * newline, and that's just a bit too complicated right now.
+	 * This is becasue
+	 * 	@ifset BAR
+	 * 	@ifset FOO
+	 * 	@end ifset
+	 * 	@end ifset
+	 * won't work right now: we'd read after the first "@end ifset"
+	 * to the next line, then look for the next line after that.
 	 */
-	endsz = snprintf(end, sizeof(end), 
-		"\n@end %s\n", texitoks[cmd].tok);
-	assert(endsz < sizeof(end));
-
-	/* 
-	 * Look up where our end token occurs.
-	 * Set our end position based on the relative offset of that
-	 * from our current position, or the EOF if we don't have a
-	 * proper ending point.
+	ssz = snprintf(start, sizeof(start), 
+		"@%s", texitoks[cmd].tok);
+	assert(ssz < sizeof(start));
+	esz = snprintf(end, sizeof(end), 
+		"@end %s\n", texitoks[cmd].tok);
+	assert(esz < sizeof(end));
+	stack = 1;
+
+	/*
+	 * Here we look for the end token "end" somewhere in the file in
+	 * front of us.
+	 * It's not that easy, of course: if we have a nested block,
+	 * then there'll be an "end" token of the same kind between us.
+	 * Thus, we keep track of scopes for matching "end" blocks.
 	 */
-	term = memmem(&buf[*pos], sz, end, endsz);
-	endpos = NULL == term ? sz :
-		*pos + (size_t)(term - &buf[*pos]);
-	assert(endpos <= sz);
-	while (*pos < endpos)
-		advance(p, buf, pos);
+	while (stack > 0 && *pos < sz) {
+		if (stack > 10)
+			abort();
+		endt = memmem(&buf[*pos], sz - *pos, end, esz);
+		startt = memmem(&buf[*pos], sz - *pos, start, ssz);
+		if (NULL == endt) {
+			texiwarn(p, "unterminated \"%s\" "
+				"block", texitoks[cmd].tok);
+			*pos = sz;
+			break;
+		} 
 
-	/* Only do this if we're not already at the end. */
-	if (endpos < sz)
-		advanceto(p, buf, pos, endpos + endsz);
+		newpos = *pos;
+		if (NULL == startt || startt > endt) {
+			newpos += esz + (size_t)(endt - &buf[*pos]);
+			stack--;
+		} else {
+			newpos += ssz + (size_t)(startt - &buf[*pos]);
+			stack++;
+		}
+
+		assert(newpos <= sz);
+		while (*pos < newpos)
+			advance(p, buf, pos);
+	}
 }
 
 static void
@@ -705,29 +735,33 @@ doverbatim(struct texi *p, enum texicmd 
 	const char	*end, *term;
 	size_t		 endsz, endpos;
 
-	advanceeoln(p, buf, sz, pos, 1);
-
-	/* We end at exactly this token. */
+	/* We read from the @verbatim\n newline inclusive! */
 	end = "\n@end verbatim\n";
 	endsz = strlen(end);
+	advanceeoln(p, buf, sz, pos, 0);
+	if (*pos == sz) {
+		texiwarn(p, "unexpected end of file");
+		return;
+	}
 
-	/* 
-	 * Look up where our end token occurs.
-	 * Set our end position based on the relative offset of that
-	 * from our current position.
-	 */
-	term = memmem(&buf[*pos], sz, end, endsz);
-	endpos = NULL == term ? sz :
-		*pos + (size_t)(term - &buf[*pos]);
+	term = memmem(&buf[*pos], sz - *pos, end, endsz);
+	if (NULL == term) {
+		texiwarn(p, "unterminated verbatim block");
+		endpos = sz;
+	} else
+		endpos = *pos + (size_t)(term - &buf[*pos]);
 
-	teximacro(p, "Bd -literal -offset indent");
 	assert(endpos <= sz);
+	assert('\n' == buf[*pos]);
+	advance(p, buf, pos);
+	teximacro(p, "Bd -literal -offset indent");
 	while (*pos < endpos) {
 		texiputchar(p, buf[*pos]);
 		advance(p, buf, pos);
 	}
 	teximacro(p, "Ed");
-	advanceto(p, buf, pos, endpos + endsz);
+	if (*pos < sz)
+		advanceto(p, buf, pos, endpos + endsz);
 }
 
 static void
--
 To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2015-02-25 10:02 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-02-25 10:02 texi2mdoc: First the memmem(3) invocations to have the proper length of kristaps

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).