source@mandoc.bsd.lv
 help / color / mirror / Atom feed
From: schwarze@mdocml.bsd.lv
To: source@mdocml.bsd.lv
Subject: mdocml: Refactor, no functional change: Remove the parse point from
Date: Sat, 1 Nov 2014 00:08:43 -0400 (EDT)	[thread overview]
Message-ID: <201411010408.sA148hxZ018774@krisdoz.my.domain> (raw)

Log Message:
-----------
Refactor, no functional change: Remove the parse point from struct buf.
Some functions need multiple parse points, some none at all,
and it varies whether any of them need to be passed around.
So better pass them as a separate argument, and only when needed.

Modified Files:
--------------
    mdocml:
        libmandoc.h
        preconv.c
        read.c

Revision Data
-------------
Index: read.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/read.c,v
retrieving revision 1.94
retrieving revision 1.95
diff -Lread.c -Lread.c -u -p -r1.94 -r1.95
--- read.c
+++ read.c
@@ -68,7 +68,7 @@ struct	mparse {
 
 static	void	  choose_parser(struct mparse *);
 static	void	  resize_buf(struct buf *, size_t);
-static	void	  mparse_buf_r(struct mparse *, struct buf, int);
+static	void	  mparse_buf_r(struct mparse *, struct buf, size_t, int);
 static	int	  read_whole_file(struct mparse *, const char *, int,
 				struct buf *, int *);
 static	void	  mparse_end(struct mparse *);
@@ -303,27 +303,29 @@ choose_parser(struct mparse *curp)
 }
 
 /*
- * Main parse routine for an opened file.  This is called for each
- * opened file and simply loops around the full input file, possibly
- * nesting (i.e., with `so').
+ * Main parse routine for a buffer.
+ * It assumes encoding and line numbering are already set up.
+ * It can recurse directly (for invocations of user-defined
+ * macros, inline equations, and input line traps)
+ * and indirectly (for .so file inclusion).
  */
 static void
-mparse_buf_r(struct mparse *curp, struct buf blk, int start)
+mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
 {
 	const struct tbl_span	*span;
 	struct buf	 ln;
+	size_t		 pos; /* byte number in the ln buffer */
 	enum rofferr	 rr;
-	int		 i, of, rc;
-	int		 pos; /* byte number in the ln buffer */
+	int		 of, rc;
 	int		 lnn; /* line number in the real file */
 	unsigned char	 c;
 
-	memset(&ln, 0, sizeof(struct buf));
+	memset(&ln, 0, sizeof(ln));
 
 	lnn = curp->line;
 	pos = 0;
 
-	for (i = blk.offs; i < (int)blk.sz; ) {
+	while (i < blk.sz) {
 		if (0 == pos && '\0' == blk.buf[i])
 			break;
 
@@ -333,13 +335,11 @@ mparse_buf_r(struct mparse *curp, struct
 
 			if (lnn < 3 &&
 			    curp->filenc & MPARSE_UTF8 &&
-			    curp->filenc & MPARSE_LATIN1) {
-				blk.offs = i;
-				curp->filenc = preconv_cue(&blk);
-			}
+			    curp->filenc & MPARSE_LATIN1)
+				curp->filenc = preconv_cue(&blk, i);
 		}
 
-		while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) {
+		while (i < blk.sz && (start || blk.buf[i] != '\0')) {
 
 			/*
 			 * When finding an unescaped newline character,
@@ -347,7 +347,7 @@ mparse_buf_r(struct mparse *curp, struct
 			 * Skip a preceding carriage return, if any.
 			 */
 
-			if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz &&
+			if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
 			    '\n' == blk.buf[i + 1])
 				++i;
 			if ('\n' == blk.buf[i]) {
@@ -361,7 +361,7 @@ mparse_buf_r(struct mparse *curp, struct
 			 * case of 11 bytes: "\\[u10ffff]\0"
 			 */
 
-			if (pos + 11 > (int)ln.sz)
+			if (pos + 11 > ln.sz)
 				resize_buf(&ln, 256);
 
 			/*
@@ -370,13 +370,8 @@ mparse_buf_r(struct mparse *curp, struct
 
 			c = blk.buf[i];
 			if (c & 0x80) {
-				blk.offs = i;
-				ln.offs = pos;
-				if (curp->filenc && preconv_encode(
-				    &blk, &ln, &curp->filenc)) {
-					pos = ln.offs;
-					i = blk.offs;
-				} else {
+				if ( ! (curp->filenc && preconv_encode(
+				    &blk, &i, &ln, &pos, &curp->filenc))) {
 					mandoc_vmsg(MANDOCERR_BADCHAR,
 					    curp, curp->line, pos,
 					    "0x%x", c);
@@ -400,7 +395,7 @@ mparse_buf_r(struct mparse *curp, struct
 
 			/* Trailing backslash = a plain char. */
 
-			if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
+			if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
 				ln.buf[pos++] = blk.buf[i++];
 				continue;
 			}
@@ -412,7 +407,7 @@ mparse_buf_r(struct mparse *curp, struct
 			 * skip that one as well.
 			 */
 
-			if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz &&
+			if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
 			    '\n' == blk.buf[i + 2])
 				++i;
 			if ('\n' == blk.buf[i + 1]) {
@@ -424,7 +419,7 @@ mparse_buf_r(struct mparse *curp, struct
 			if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
 				i += 2;
 				/* Comment, skip to end of line */
-				for (; i < (int)blk.sz; ++i) {
+				for (; i < blk.sz; ++i) {
 					if ('\n' == blk.buf[i]) {
 						++i;
 						++lnn;
@@ -461,7 +456,7 @@ mparse_buf_r(struct mparse *curp, struct
 			ln.buf[pos++] = blk.buf[i++];
 		}
 
-		if (pos >= (int)ln.sz)
+		if (pos >= ln.sz)
 			resize_buf(&ln, 256);
 
 		ln.buf[pos] = '\0';
@@ -504,14 +499,14 @@ rerun:
 		switch (rr) {
 		case ROFF_REPARSE:
 			if (REPARSE_LIMIT >= ++curp->reparse_count)
-				mparse_buf_r(curp, ln, 0);
+				mparse_buf_r(curp, ln, of, 0);
 			else
 				mandoc_msg(MANDOCERR_ROFFLOOP, curp,
 				    curp->line, pos, NULL);
 			pos = 0;
 			continue;
 		case ROFF_APPEND:
-			pos = (int)strlen(ln.buf);
+			pos = strlen(ln.buf);
 			continue;
 		case ROFF_RERUN:
 			goto rerun;
@@ -522,8 +517,8 @@ rerun:
 			assert(MANDOCLEVEL_FATAL <= curp->file_status);
 			break;
 		case ROFF_SO:
-			if (0 == (MPARSE_SO & curp->options) &&
-			    (i >= (int)blk.sz || '\0' == blk.buf[i])) {
+			if ( ! (curp->options & MPARSE_SO) &&
+			    (i >= blk.sz || blk.buf[i] == '\0')) {
 				curp->sodest = mandoc_strdup(ln.buf + of);
 				free(ln.buf);
 				return;
@@ -650,7 +645,6 @@ read_whole_file(struct mparse *curp, con
 			return(0);
 		}
 		*with_mmap = 1;
-		fb->offs = 0;
 		fb->sz = (size_t)st.st_size;
 		fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
 		if (fb->buf != MAP_FAILED)
@@ -682,7 +676,6 @@ read_whole_file(struct mparse *curp, con
 		ssz = read(fd, fb->buf + (int)off, fb->sz - off);
 		if (ssz == 0) {
 			fb->sz = off;
-			fb->offs = 0;
 			return(1);
 		}
 		if (ssz == -1) {
@@ -739,6 +732,7 @@ mparse_parse_buffer(struct mparse *curp,
 {
 	struct buf	*svprimary;
 	const char	*svfile;
+	size_t		 offset;
 	static int	 recursion_depth;
 
 	if (64 < recursion_depth) {
@@ -759,11 +753,12 @@ mparse_parse_buffer(struct mparse *curp,
 	    (unsigned char)blk.buf[0] == 0xef &&
 	    (unsigned char)blk.buf[1] == 0xbb &&
 	    (unsigned char)blk.buf[2] == 0xbf) {
-		blk.offs = 3;
+		offset = 3;
 		curp->filenc &= ~MPARSE_LATIN1;
-	}
+	} else
+		offset = 0;
 
-	mparse_buf_r(curp, blk, 1);
+	mparse_buf_r(curp, blk, offset, 1);
 
 	if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status)
 		mparse_end(curp);
@@ -780,7 +775,6 @@ mparse_readmem(struct mparse *curp, cons
 
 	blk.buf = UNCONST(buf);
 	blk.sz = len;
-	blk.offs = 0;
 
 	mparse_parse_buffer(curp, blk, file);
 	return(curp->file_status);
Index: preconv.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/preconv.c,v
retrieving revision 1.10
retrieving revision 1.11
diff -Lpreconv.c -Lpreconv.c -u -p -r1.10 -r1.11
--- preconv.c
+++ preconv.c
@@ -25,7 +25,8 @@
 #include "libmandoc.h"
 
 int
-preconv_encode(struct buf *ib, struct buf *ob, int *filenc)
+preconv_encode(struct buf *ib, size_t *ii, struct buf *ob, size_t *oi,
+    int *filenc)
 {
 	size_t		 i;
 	const long	 one = 1L;
@@ -45,7 +46,7 @@ preconv_encode(struct buf *ib, struct bu
 	if ( ! (*((const char *)(&one))))
 		be = 1;
 
-	for (i = ib->offs; i < ib->sz; i++) {
+	for (i = *ii; i < ib->sz; i++) {
 		cu = ib->buf[i];
 		if (state) {
 			if ( ! (cu & 128) || (cu & 64)) {
@@ -79,11 +80,11 @@ preconv_encode(struct buf *ib, struct bu
 					(accum << 24);
 
 			if (accum < 0x80)
-				ob->buf[ob->offs++] = accum;
+				ob->buf[(*oi)++] = accum;
 			else
-				ob->offs += snprintf(ob->buf + ob->offs,
+				*oi += snprintf(ob->buf + *oi,
 				    11, "\\[u%.4X]", accum);
-			ib->offs = i + 1;
+			*ii = i + 1;
 			*filenc &= ~MPARSE_LATIN1;
 			return(1);
 		} else {
@@ -134,21 +135,21 @@ latin:
 	if ( ! (*filenc & MPARSE_LATIN1))
 		return(0);
 
-	ob->offs += snprintf(ob->buf + ob->offs, 11,
-	    "\\[u%.4X]", (unsigned char)ib->buf[ib->offs++]);
+	*oi += snprintf(ob->buf + *oi, 11,
+	    "\\[u%.4X]", (unsigned char)ib->buf[(*ii)++]);
 
 	*filenc &= ~MPARSE_UTF8;
 	return(1);
 }
 
 int
-preconv_cue(const struct buf *b)
+preconv_cue(const struct buf *b, size_t offset)
 {
 	const char	*ln, *eoln, *eoph;
 	size_t		 sz, phsz;
 
-	ln = b->buf + b->offs;
-	sz = b->sz - b->offs;
+	ln = b->buf + offset;
+	sz = b->sz - offset;
 
 	/* Look for the end-of-line. */
 
Index: libmandoc.h
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/libmandoc.h,v
retrieving revision 1.46
retrieving revision 1.47
diff -Llibmandoc.h -Llibmandoc.h -u -p -r1.46 -r1.47
--- libmandoc.h
+++ libmandoc.h
@@ -33,7 +33,6 @@ enum	rofferr {
 struct	buf {
 	char	*buf;
 	size_t	 sz;
-	size_t	 offs;
 };
 
 __BEGIN_DECLS
@@ -72,8 +71,9 @@ int		 man_endparse(struct man *);
 int		 man_addspan(struct man *, const struct tbl_span *);
 int		 man_addeqn(struct man *, const struct eqn *);
 
-int		 preconv_cue(const struct buf *);
-int		 preconv_encode(struct buf *, struct buf *, int *);
+int		 preconv_cue(const struct buf *, size_t);
+int		 preconv_encode(struct buf *, size_t *,
+			struct buf *, size_t *, int *);
 
 void		 roff_free(struct roff *);
 struct roff	*roff_alloc(struct mparse *, const struct mchars *, int);
--
 To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv

                 reply	other threads:[~2014-11-01  4:08 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=201411010408.sA148hxZ018774@krisdoz.my.domain \
    --to=schwarze@mdocml.bsd.lv \
    --cc=source@mdocml.bsd.lv \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).