source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* mdocml: Modified version of Ingo Schwarze's patch for hyphen-breaking.
@ 2010-05-25 12:37 kristaps
  0 siblings, 0 replies; only message in thread
From: kristaps @ 2010-05-25 12:37 UTC (permalink / raw)
  To: source

Log Message:
-----------
Modified version of Ingo Schwarze's patch for hyphen-breaking.
Breakable hyphens are cued in the back-ends (with ASCII_HYPH) and acted
upon in term.c or ignored in html.c.

Also cleaned up XML decl printing (no need for extra vars).

Modified Files:
--------------
    mdocml:
        chars.c
        chars.h
        html.c
        libmandoc.h
        mandoc.c
        mandoc.h
        mdoc.c
        term.c

Revision Data
-------------
Index: term.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/term.c,v
retrieving revision 1.139
retrieving revision 1.140
diff -Lterm.c -Lterm.c -u -p -r1.139 -r1.140
--- term.c
+++ term.c
@@ -138,6 +138,7 @@ term_flushln(struct termp *p)
 	size_t		 vend;	/* end of word visual position on output */
 	size_t		 bp;    /* visual right border position */
 	int		 j;     /* temporary loop index */
+	int		 jhy;	/* last hyphen before line overflow */
 	size_t		 maxvis, mmax;
 
 	/*
@@ -190,20 +191,23 @@ term_flushln(struct termp *p)
 		 */
 
 		/* LINTED */
-		for ( ; j < (int)p->col; j++) {
+		for (jhy = 0; j < (int)p->col; j++) {
 			if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j])
 				break;
-			if (8 == p->buf[j])
-				vend--;
-			else
+			if (8 != p->buf[j]) {
+				if (vend > vis && vend < bp &&
+				    ASCII_HYPH == p->buf[j])
+					jhy = j;
 				vend++;
+			} else
+				vend--;
 		}
 
 		/*
 		 * Find out whether we would exceed the right margin.
 		 * If so, break to the next line.
 		 */
-		if (vend > bp && vis > 0) {
+		if (vend > bp && 0 == jhy && vis > 0) {
 			vend -= vis;
 			putchar('\n');
 			if (TERMP_NOBREAK & p->flags) {
@@ -231,6 +235,8 @@ term_flushln(struct termp *p)
 
 		/* Write out the [remaining] word. */
 		for ( ; i < (int)p->col; i++) {
+			if (vend > bp && jhy > 0 && i > jhy)
+				break;
 			if ('\t' == p->buf[i])
 				break;
 			if (' ' == p->buf[i]) {
@@ -256,7 +262,12 @@ term_flushln(struct termp *p)
 				p->viscol += vbl;
 				vbl = 0;
 			}
-			putchar(p->buf[i]);
+
+			if (ASCII_HYPH == p->buf[i])
+				putchar('-');
+			else
+				putchar(p->buf[i]);
+
 			p->viscol += 1;
 		}
 		vend += vbl;
Index: mandoc.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandoc.c,v
retrieving revision 1.15
retrieving revision 1.16
diff -Lmandoc.c -Lmandoc.c -u -p -r1.15 -r1.16
--- mandoc.c
+++ mandoc.c
@@ -340,3 +340,31 @@ mandoc_eos(const char *p, size_t sz)
 
 	return(0);
 }
+
+
+int
+mandoc_hyph(const char *start, const char *c)
+{
+
+	/*
+	 * Choose whether to break at a hyphenated character.  We only
+	 * do this if it's free-standing within a word.
+	 */
+
+	/* Skip first/last character of buffer. */
+	if (c == start || '\0' == *(c + 1))
+		return(0);
+	/* Skip first/last character of word. */
+	if ('\t' == *(c + 1) || '\t' == *(c - 1))
+		return(0);
+	if (' ' == *(c + 1) || ' ' == *(c - 1))
+		return(0);
+	/* Skip double invocations. */
+	if ('-' == *(c + 1) || '-' == *(c - 1))
+		return(0);
+	/* Skip escapes. */
+	if ('\\' == *(c - 1))
+		return(0);
+
+	return(1);
+}
Index: chars.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/chars.c,v
retrieving revision 1.17
retrieving revision 1.18
diff -Lchars.c -Lchars.c -u -p -r1.17 -r1.18
--- chars.c
+++ chars.c
@@ -23,6 +23,7 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include "mandoc.h"
 #include "chars.h"
 
 #define	PRINT_HI	 126
Index: html.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/html.c,v
retrieving revision 1.99
retrieving revision 1.100
diff -Lhtml.c -Lhtml.c -u -p -r1.99 -r1.100
--- html.c
+++ html.c
@@ -29,6 +29,7 @@
 #include <string.h>
 #include <unistd.h>
 
+#include "mandoc.h"
 #include "out.h"
 #include "chars.h"
 #include "html.h"
@@ -296,11 +297,12 @@ print_encode(struct html *h, const char 
 	int		 len, nospace;
 	const char	*seq;
 	enum roffdeco	 deco;
+	static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' };
 
 	nospace = 0;
 
 	for (; *p; p++) {
-		sz = strcspn(p, "\\<>&");
+		sz = strcspn(p, rejs);
 
 		fwrite(p, 1, sz, stdout);
 		p += /* LINTED */
@@ -315,6 +317,15 @@ print_encode(struct html *h, const char 
 		} else if ('&' == *p) {
 			printf("&amp;");
 			continue;
+		} else if (ASCII_HYPH == *p) {
+			/*
+			 * Note: "soft hyphens" aren't graphically
+			 * displayed when not breaking the text; we want
+			 * them to be displayed.
+			 */
+			/*printf("&#173;");*/
+			putchar('-');
+			continue;
 		} else if ('\0' == *p)
 			break;
 
@@ -443,21 +454,9 @@ print_gen_decls(struct html *h)
 static void
 print_xmltype(struct html *h)
 {
-	const char	*decl;
-
-	switch (h->type) {
-	case (HTML_XHTML_1_0_STRICT):
-		decl = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
-		break;
-	default:
-		decl = NULL;
-		break;
-	}
-
-	if (NULL == decl)
-		return;
 
-	printf("%s\n", decl);
+	if (HTML_XHTML_1_0_STRICT == h->type)
+		printf("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
 }
 
 
Index: mdoc.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mdoc.c,v
retrieving revision 1.137
retrieving revision 1.138
diff -Lmdoc.c -Lmdoc.c -u -p -r1.137 -r1.138
--- mdoc.c
+++ mdoc.c
@@ -563,6 +563,10 @@ mdoc_ptext(struct mdoc *m, int line, cha
 	ws = NULL;
 	for (c = end = buf + offs; *c; c++) {
 		switch (*c) {
+		case '-':
+			if (mandoc_hyph(buf + offs, c))
+				*c = ASCII_HYPH;
+			break;
 		case ' ':
 			if (NULL == ws)
 				ws = c;
Index: mandoc.h
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandoc.h,v
retrieving revision 1.6
retrieving revision 1.7
diff -Lmandoc.h -Lmandoc.h -u -p -r1.6 -r1.7
--- mandoc.h
+++ mandoc.h
@@ -17,6 +17,10 @@
 #ifndef MANDOC_H
 #define MANDOC_H
 
+#define ASCII_NBRSP	 31  /* non-breaking space */
+#define	ASCII_HYPH	 30  /* breakable hyphen */
+
+
 __BEGIN_DECLS
 
 enum	mandocerr {
Index: chars.h
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/chars.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -Lchars.h -Lchars.h -u -p -r1.2 -r1.3
--- chars.h
+++ chars.h
@@ -17,8 +17,6 @@
 #ifndef CHARS_H
 #define CHARS_H
 
-#define ASCII_NBRSP	 31  /* non-breaking space */
-
 __BEGIN_DECLS
 
 enum	chars {
Index: libmandoc.h
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/libmandoc.h,v
retrieving revision 1.5
retrieving revision 1.6
diff -Llibmandoc.h -Llibmandoc.h -u -p -r1.5 -r1.6
--- libmandoc.h
+++ libmandoc.h
@@ -30,6 +30,7 @@ time_t		 mandoc_a2time(int, const char *
 #define		 MTIME_MDOCDATE		(1 << 2)
 #define		 MTIME_ISO_8601		(1 << 3)
 int		 mandoc_eos(const char *, size_t);
+int		 mandoc_hyph(const char *, const char *);
 
 __END_DECLS
 
--
 To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2010-05-25 12:37 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-05-25 12:37 mdocml: Modified version of Ingo Schwarze's patch for hyphen-breaking kristaps

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).