tech@mandoc.bsd.lv
 help / color / mirror / Atom feed
* Improved handling of literals in -man.
@ 2011-07-08 13:10 Kristaps Dzonsons
  2011-07-08 18:23 ` Ingo Schwarze
  0 siblings, 1 reply; 3+ messages in thread
From: Kristaps Dzonsons @ 2011-07-08 13:10 UTC (permalink / raw)
  To: tech

[-- Attachment #1: Type: text/plain, Size: 468 bytes --]

Hi,

Enclosed is a patch that considerably simplifies and improves the 
handling of literals in -man.  I've implemented it for all frontends but 
would like a bit of testing, first.

Basically, it pushes LITERAL mode handling directly into the 
print_man_node() functions instead of scattering them here and there 
(except where we need to manually unset it).  It's a lot clearer than it 
was before, and in my few tests, works much, much better.

Thoughts?

Kristaps

[-- Attachment #2: patch.txt --]
[-- Type: text/plain, Size: 9137 bytes --]

Index: TODO
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/TODO,v
retrieving revision 1.118
diff -u -r1.118 TODO
--- TODO	7 Jul 2011 06:34:13 -0000	1.118
+++ TODO	8 Jul 2011 13:08:28 -0000
@@ -132,6 +132,8 @@
 - groff an-ext.tmac macros (.UR, .UE) occur in xine(5)
   reported by brad@  Sat, 15 Jan 2011 15:45:23 -0500
 
+- -T[x]html doesn't stipulate non-collapsing spaces in literal mode
+
 --- missing tbl features -----------------------------------------------
 
 - implement basic non-parametric .de to support e.g. sox(1)
Index: man_term.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/man_term.c,v
retrieving revision 1.113
diff -u -r1.113 man_term.c
--- man_term.c	3 Jul 2011 22:57:32 -0000	1.113
+++ man_term.c	8 Jul 2011 13:08:29 -0000
@@ -127,8 +127,8 @@
 	{ pre_alternate, NULL, 0 }, /* RI */
 	{ pre_ign, NULL, MAN_NOTEXT }, /* na */
 	{ pre_sp, NULL, MAN_NOTEXT }, /* sp */
-	{ pre_literal, NULL, 0 }, /* nf */
-	{ pre_literal, NULL, 0 }, /* fi */
+	{ pre_literal, NULL, MAN_NOTEXT }, /* nf */
+	{ pre_literal, NULL, MAN_NOTEXT }, /* fi */
 	{ NULL, NULL, 0 }, /* RE */
 	{ pre_RS, post_RS, 0 }, /* RS */
 	{ pre_ign, NULL, 0 }, /* DT */
@@ -247,8 +247,6 @@
 pre_literal(DECL_ARGS)
 {
 
-	term_newln(p);
-
 	if (MAN_nf == n->tok)
 		mt->fl |= MANT_LITERAL;
 	else
@@ -263,7 +261,7 @@
 {
 	enum termfont		 font[2];
 	const struct man_node	*nn;
-	int			 savelit, i;
+	int			 i;
 
 	switch (n->tok) {
 	case (MAN_RB):
@@ -294,13 +292,8 @@
 		abort();
 	}
 
-	savelit = MANT_LITERAL & mt->fl;
-	mt->fl &= ~MANT_LITERAL;
-
 	for (i = 0, nn = n->child; nn; nn = nn->next, i = 1 - i) {
 		term_fontrepl(p, font[i]);
-		if (savelit && NULL == nn->next)
-			mt->fl |= MANT_LITERAL;
 		print_man_node(p, mt, nn, m);
 		if (nn->next)
 			p->flags |= TERMP_NOSPACE;
@@ -863,43 +856,35 @@
 static void
 print_man_node(DECL_ARGS)
 {
-	size_t		 rm, rmax;
 	int		 c;
 
 	switch (n->type) {
 	case(MAN_TEXT):
 		/*
 		 * If we have a blank line, output a vertical space.
-		 * If we have a space as the first character, break
-		 * before printing the line's data.
 		 */
 		if ('\0' == *n->string) {
 			term_vspace(p);
 			return;
-		} else if (' ' == *n->string && MAN_LINE & n->flags)
-			term_newln(p);
-
-		term_word(p, n->string);
-
+		}
+		
 		/*
-		 * If we're in a literal context, make sure that words
-		 * togehter on the same line stay together.  This is a
-		 * POST-printing call, so we check the NEXT word.  Since
-		 * -man doesn't have nested macros, we don't need to be
-		 * more specific than this.
+		 * If we're not in literal mode already and  have a
+		 * space as the first character, then we're in an
+		 * implied literal mode with a break before the text.
+		 *
+		 * If we're in a literal mode, break before the first
+		 * word only if we have a prior node or are on a new
+		 * line.
 		 */
-		if (MANT_LITERAL & mt->fl && 
-				(NULL == n->next || 
-				 n->next->line > n->line)) {
-			rm = p->rmargin;
-			rmax = p->maxrmargin;
-			p->rmargin = p->maxrmargin = TERM_MAXMARGIN;
-			p->flags |= TERMP_NOSPACE;
-			term_flushln(p);
-			p->flags &= ~TERMP_NOLPAD;
-			p->rmargin = rm;
-			p->maxrmargin = rmax;
-		}
+		if ( ! (MANT_LITERAL & mt->fl)) {
+			if (' ' == *n->string && MAN_LINE & n->flags)
+				term_newln(p);
+		} else if (MANT_LITERAL & mt->fl)
+			if (n->prev && n->prev->line != n->line)
+				term_newln(p);
+
+		term_word(p, n->string);
 
 		if (MAN_EOS & n->flags)
 			p->flags |= TERMP_SENTENCE;
@@ -916,6 +901,17 @@
 			term_newln(p);
 		term_tbl(p, n->span);
 		return;
+	case (MAN_ELEM):
+		/*
+		 * Break here because the MAN_TEXT break doesn't happen
+		 * if n->prev is NULL, which happens for the first child
+		 * of en element.
+		 * Only do so if we're "printing" nodes.
+		 */
+		if (MANT_LITERAL & mt->fl)
+			if ( ! (MAN_NOTEXT & termacts[n->tok].flags))
+				term_newln(p);
+		break;
 	default:
 		break;
 	}
Index: man_html.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/man_html.c,v
retrieving revision 1.78
diff -u -r1.78 man_html.c
--- man_html.c	7 Jul 2011 13:54:36 -0000	1.78
+++ man_html.c	8 Jul 2011 13:08:29 -0000
@@ -50,7 +50,8 @@
 
 struct	htmlman {
 	int		(*pre)(MAN_ARGS);
-	int		(*post)(MAN_ARGS);
+	int		  flags;
+#define	MANH_NOTEXT	 (1 << 0) /* no printed children */
 };
 
 static	void		  print_bvspace(struct html *, 
@@ -81,39 +82,39 @@
 static	int		  man_SS_pre(MAN_ARGS);
 
 static	const struct htmlman mans[MAN_MAX] = {
-	{ man_br_pre, NULL }, /* br */
-	{ NULL, NULL }, /* TH */
-	{ man_SH_pre, NULL }, /* SH */
-	{ man_SS_pre, NULL }, /* SS */
-	{ man_IP_pre, NULL }, /* TP */
-	{ man_PP_pre, NULL }, /* LP */
-	{ man_PP_pre, NULL }, /* PP */
-	{ man_PP_pre, NULL }, /* P */
-	{ man_IP_pre, NULL }, /* IP */
-	{ man_HP_pre, NULL }, /* HP */ 
-	{ man_SM_pre, NULL }, /* SM */
-	{ man_SM_pre, NULL }, /* SB */
-	{ man_alt_pre, NULL }, /* BI */
-	{ man_alt_pre, NULL }, /* IB */
-	{ man_alt_pre, NULL }, /* BR */
-	{ man_alt_pre, NULL }, /* RB */
-	{ NULL, NULL }, /* R */
-	{ man_B_pre, NULL }, /* B */
-	{ man_I_pre, NULL }, /* I */
-	{ man_alt_pre, NULL }, /* IR */
-	{ man_alt_pre, NULL }, /* RI */
-	{ man_ign_pre, NULL }, /* na */
-	{ man_br_pre, NULL }, /* sp */
-	{ man_literal_pre, NULL }, /* nf */
-	{ man_literal_pre, NULL }, /* fi */
-	{ NULL, NULL }, /* RE */
-	{ man_RS_pre, NULL }, /* RS */
-	{ man_ign_pre, NULL }, /* DT */
-	{ man_ign_pre, NULL }, /* UC */
-	{ man_ign_pre, NULL }, /* PD */
-	{ man_ign_pre, NULL }, /* AT */
-	{ man_in_pre, NULL }, /* in */
-	{ man_ign_pre, NULL }, /* ft */
+	{ man_br_pre, MANH_NOTEXT }, /* br */
+	{ NULL, 0 }, /* TH */
+	{ man_SH_pre, 0 }, /* SH */
+	{ man_SS_pre, 0 }, /* SS */
+	{ man_IP_pre, 0 }, /* TP */
+	{ man_PP_pre, 0 }, /* LP */
+	{ man_PP_pre, 0 }, /* PP */
+	{ man_PP_pre, 0 }, /* P */
+	{ man_IP_pre, 0 }, /* IP */
+	{ man_HP_pre, 0 }, /* HP */ 
+	{ man_SM_pre, 0 }, /* SM */
+	{ man_SM_pre, 0 }, /* SB */
+	{ man_alt_pre, 0 }, /* BI */
+	{ man_alt_pre, 0 }, /* IB */
+	{ man_alt_pre, 0 }, /* BR */
+	{ man_alt_pre, 0 }, /* RB */
+	{ 0, 0 }, /* R */
+	{ man_B_pre, 0 }, /* B */
+	{ man_I_pre, 0 }, /* I */
+	{ man_alt_pre, 0 }, /* IR */
+	{ man_alt_pre, 0 }, /* RI */
+	{ man_ign_pre, MANH_NOTEXT }, /* na */
+	{ man_br_pre, MANH_NOTEXT }, /* sp */
+	{ man_literal_pre, MANH_NOTEXT }, /* nf */
+	{ man_literal_pre, MANH_NOTEXT }, /* fi */
+	{ NULL, 0 }, /* RE */
+	{ man_RS_pre, 0 }, /* RS */
+	{ man_ign_pre, 0 }, /* DT */
+	{ man_ign_pre, 0 }, /* UC */
+	{ man_ign_pre, 0 }, /* PD */
+	{ man_ign_pre, 0 }, /* AT */
+	{ man_in_pre, MANH_NOTEXT }, /* in */
+	{ man_ign_pre, MANH_NOTEXT }, /* ft */
 };
 
 /*
@@ -221,10 +222,12 @@
 			return;
 		}
 
-		if (' ' == *n->string && MAN_LINE & n->flags)
-			print_otag(h, TAG_BR, 0, NULL);
-		else if (MANH_LITERAL & mh->fl && n->prev)
-			print_otag(h, TAG_BR, 0, NULL);
+		if ( ! (MANH_LITERAL & mh->fl)) {
+			if (' ' == *n->string && MAN_LINE & n->flags)
+				print_otag(h, TAG_BR, 0, NULL);
+		} else if (MANH_LITERAL & mh->fl)
+			if (n->prev && n->prev->line != n->line)
+				print_otag(h, TAG_BR, 0, NULL);
 
 		print_text(h, n->string);
 		return;
@@ -241,6 +244,11 @@
 		 */
 		print_tbl(h, n->span);
 		return;
+	case (MAN_ELEM):
+		if (MANH_LITERAL & mh->fl)
+			if ( ! (MANH_NOTEXT & mans[n->tok].flags))
+				print_otag(h, TAG_BR, 0, NULL);
+		/* FALLTHROUGH */
 	default:
 		/* 
 		 * Close out scope of font prior to opening a macro
@@ -271,17 +279,8 @@
 	/* This will automatically close out any font scope. */
 	print_stagq(h, t);
 
-	switch (n->type) {
-	case (MAN_ROOT):
+	if (MAN_ROOT == n->type)
 		man_root_post(m, n, mh, h);
-		break;
-	case (MAN_EQN):
-		break;
-	default:
-		if (mans[n->tok].post)
-			(*mans[n->tok].post)(m, n, mh, h);
-		break;
-	}
 }
 
 
@@ -445,15 +444,10 @@
 man_alt_pre(MAN_ARGS)
 {
 	const struct man_node	*nn;
-	int		 i, savelit;
+	int		 i;
 	enum htmltag	 fp;
 	struct tag	*t;
 
-	if ((savelit = mh->fl & MANH_LITERAL)) 
-		print_otag(h, TAG_BR, 0, NULL);
-
-	mh->fl &= ~MANH_LITERAL;
-
 	for (i = 0, nn = n->child; nn; nn = nn->next, i++) {
 		t = NULL;
 		switch (n->tok) {
@@ -492,9 +486,6 @@
 			print_tagq(h, t);
 	}
 
-	if (savelit)
-		mh->fl |= MANH_LITERAL;
-
 	return(0);
 }
 
@@ -544,7 +535,8 @@
 static int
 man_IP_pre(MAN_ARGS)
 {
-	const struct man_node	*nn;
+	const struct man_node *nn;
+	int		 savelit;
 
 	if (MAN_BODY == n->type) { 
 		print_otag(h, TAG_DD, 0, NULL);
@@ -560,6 +552,9 @@
 
 	/* For IP, only print the first header element. */
 
+	if ((savelit = MANH_LITERAL & mh->fl))
+		mh->fl &= ~MANH_LITERAL;
+
 	if (MAN_IP == n->tok && n->child)
 		print_man_node(m, n->child, mh, h);
 
@@ -570,6 +565,9 @@
 			if (nn->line > n->line)
 				print_man_node(m, nn, mh, h);
 
+	if (savelit)
+		mh->fl |= MANH_LITERAL;
+
 	return(0);
 }
 
@@ -625,10 +623,9 @@
 man_literal_pre(MAN_ARGS)
 {
 
-	if (MAN_nf != n->tok) {
-		print_otag(h, TAG_BR, 0, NULL);
+	if (MAN_nf != n->tok)
 		mh->fl &= ~MANH_LITERAL;
-	} else
+	else
 		mh->fl |= MANH_LITERAL;
 
 	return(0);

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2011-07-09 10:10 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-07-08 13:10 Improved handling of literals in -man Kristaps Dzonsons
2011-07-08 18:23 ` Ingo Schwarze
2011-07-09 10:10   ` Kristaps Dzonsons

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).