source@mandoc.bsd.lv
 help / color / mirror / Atom feed
From: schwarze@mdocml.bsd.lv
To: source@mdocml.bsd.lv
Subject: mdocml: If a man(7) NAME section contains macros, avoid truncated or
Date: Sun, 23 Mar 2014 08:26:58 -0400 (EDT)	[thread overview]
Message-ID: <201403231226.s2NCQweG002054@krisdoz.my.domain> (raw)

Log Message:
-----------
If a man(7) NAME section contains macros, avoid truncated or empty
entries for .Nd in mandocdb(8), instead use the macro content
recursively.  This improves indexing of more than 200 manuals
in Xenocara, i.e. more than 15%, in particular GL and some Xkb.

Modified Files:
--------------
    mdocml:
        man.c
        man.h
        mandocdb.c

Revision Data
-------------
Index: man.h
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/man.h,v
retrieving revision 1.62
retrieving revision 1.63
diff -Lman.h -Lman.h -u -p -r1.62 -r1.63
--- man.h
+++ man.h
@@ -1,6 +1,7 @@
 /*	$Id$ */
 /*
  * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -111,6 +112,7 @@ struct	man;
 const struct man_node *man_node(const struct man *);
 const struct man_meta *man_meta(const struct man *);
 const struct mparse   *man_mparse(const struct man *);
+void man_deroff(char **, const struct man_node *);
 
 __END_DECLS
 
Index: mandocdb.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandocdb.c,v
retrieving revision 1.120
retrieving revision 1.121
diff -Lmandocdb.c -Lmandocdb.c -u -p -r1.120 -r1.121
--- mandocdb.c
+++ mandocdb.c
@@ -1263,9 +1263,9 @@ static void
 parse_man(struct mpage *mpage, const struct man_node *n)
 {
 	const struct man_node *head, *body;
-	char		*start, *sv, *title;
+	char		*start, *title;
 	char		 byte;
-	size_t		 sz, titlesz;
+	size_t		 sz;
 
 	if (NULL == n)
 		return;
@@ -1285,11 +1285,7 @@ parse_man(struct mpage *mpage, const str
 				NULL != (head = (head->child)) &&
 				MAN_TEXT == head->type &&
 				0 == strcmp(head->string, "NAME") &&
-				NULL != (body = body->child) &&
-				MAN_TEXT == body->type) {
-
-			title = NULL;
-			titlesz = 0;
+				NULL != body->child) {
 
 			/*
 			 * Suck the entire NAME section into memory.
@@ -1298,47 +1294,11 @@ parse_man(struct mpage *mpage, const str
 			 * NAME sections over many lines.
 			 */
 
-			for ( ; NULL != body; body = body->next) {
-				if (MAN_TEXT != body->type)
-					break;
-				if (0 == (sz = strlen(body->string)))
-					continue;
-				title = mandoc_realloc
-					(title, titlesz + sz + 1);
-				memcpy(title + titlesz, body->string, sz);
-				titlesz += sz + 1;
-				title[titlesz - 1] = ' ';
-			}
+			title = NULL;
+			man_deroff(&title, body);
 			if (NULL == title)
 				return;
 
-			title = mandoc_realloc(title, titlesz + 1);
-			title[titlesz] = '\0';
-
-			/* Skip leading space.  */
-
-			sv = title;
-			while (isspace((unsigned char)*sv))
-				sv++;
-
-			if (0 == (sz = strlen(sv))) {
-				free(title);
-				return;
-			}
-
-			/* Erase trailing space. */
-
-			start = &sv[sz - 1];
-			while (start > sv && isspace((unsigned char)*start))
-				*start-- = '\0';
-
-			if (start == sv) {
-				free(title);
-				return;
-			}
-
-			start = sv;
-
 			/* 
 			 * Go through a special heuristic dance here.
 			 * Conventionally, one or more manual names are
@@ -1347,6 +1307,7 @@ parse_man(struct mpage *mpage, const str
 			 * the name parts here.
 			 */
 
+			start = title;
 			for ( ;; ) {
 				sz = strcspn(start, " ,");
 				if ('\0' == start[sz])
@@ -1377,7 +1338,7 @@ parse_man(struct mpage *mpage, const str
 					start++;
 			}
 
-			if (sv == start) {
+			if (start == title) {
 				putkey(mpage, start, TYPE_Nm);
 				free(title);
 				return;
Index: man.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/man.c,v
retrieving revision 1.125
retrieving revision 1.126
diff -Lman.c -Lman.c -u -p -r1.125 -r1.126
--- man.c
+++ man.c
@@ -23,6 +23,7 @@
 #include <sys/types.h>
 
 #include <assert.h>
+#include <ctype.h>
 #include <stdarg.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -705,4 +706,43 @@ man_mparse(const struct man *man)
 
 	assert(man && man->parse);
 	return(man->parse);
+}
+
+void
+man_deroff(char **dest, const struct man_node *n)
+{
+	char	*cp;
+	size_t	 sz;
+
+	if (MAN_TEXT != n->type) {
+		for (n = n->child; n; n = n->next)
+			man_deroff(dest, n);
+		return;
+	}
+
+	/* Skip leading whitespace. */
+
+	for (cp = n->string; '\0' != *cp; cp++)
+		if (0 == isspace((unsigned char)*cp))
+			break;
+
+	/* Skip trailing whitespace. */
+
+	for (sz = strlen(cp); sz; sz--)
+		if (0 == isspace((unsigned char)cp[sz-1]))
+			break;
+
+	/* Skip empty strings. */
+
+	if (0 == sz)
+		return;
+
+	if (NULL == *dest) {
+		*dest = mandoc_strndup(cp, sz);
+		return;
+	}
+
+	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
+	free(*dest);
+	*dest = cp;
 }
--
 To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv

                 reply	other threads:[~2014-03-23 12:26 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=201403231226.s2NCQweG002054@krisdoz.my.domain \
    --to=schwarze@mdocml.bsd.lv \
    --cc=source@mdocml.bsd.lv \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).