From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from krisdoz.my.domain (schwarze@localhost [127.0.0.1]) by krisdoz.my.domain (8.14.5/8.14.5) with ESMTP id s2NCQxEt022435 for ; Sun, 23 Mar 2014 08:26:59 -0400 (EDT) Received: (from schwarze@localhost) by krisdoz.my.domain (8.14.5/8.14.3/Submit) id s2NCQweG002054; Sun, 23 Mar 2014 08:26:58 -0400 (EDT) Date: Sun, 23 Mar 2014 08:26:58 -0400 (EDT) Message-Id: <201403231226.s2NCQweG002054@krisdoz.my.domain> X-Mailinglist: mdocml-source Reply-To: source@mdocml.bsd.lv MIME-Version: 1.0 From: schwarze@mdocml.bsd.lv To: source@mdocml.bsd.lv Subject: mdocml: If a man(7) NAME section contains macros, avoid truncated or X-Mailer: activitymail 1.26, http://search.cpan.org/dist/activitymail/ Content-Type: text/plain; charset=utf-8 Log Message: ----------- If a man(7) NAME section contains macros, avoid truncated or empty entries for .Nd in mandocdb(8), instead use the macro content recursively. This improves indexing of more than 200 manuals in Xenocara, i.e. more than 15%, in particular GL and some Xkb. Modified Files: -------------- mdocml: man.c man.h mandocdb.c Revision Data ------------- Index: man.h =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/man.h,v retrieving revision 1.62 retrieving revision 1.63 diff -Lman.h -Lman.h -u -p -r1.62 -r1.63 --- man.h +++ man.h @@ -1,6 +1,7 @@ /* $Id$ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons + * Copyright (c) 2014 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -111,6 +112,7 @@ struct man; const struct man_node *man_node(const struct man *); const struct man_meta *man_meta(const struct man *); const struct mparse *man_mparse(const struct man *); +void man_deroff(char **, const struct man_node *); __END_DECLS Index: mandocdb.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandocdb.c,v retrieving revision 1.120 retrieving revision 1.121 diff -Lmandocdb.c -Lmandocdb.c -u -p -r1.120 -r1.121 --- mandocdb.c +++ mandocdb.c @@ -1263,9 +1263,9 @@ static void parse_man(struct mpage *mpage, const struct man_node *n) { const struct man_node *head, *body; - char *start, *sv, *title; + char *start, *title; char byte; - size_t sz, titlesz; + size_t sz; if (NULL == n) return; @@ -1285,11 +1285,7 @@ parse_man(struct mpage *mpage, const str NULL != (head = (head->child)) && MAN_TEXT == head->type && 0 == strcmp(head->string, "NAME") && - NULL != (body = body->child) && - MAN_TEXT == body->type) { - - title = NULL; - titlesz = 0; + NULL != body->child) { /* * Suck the entire NAME section into memory. @@ -1298,47 +1294,11 @@ parse_man(struct mpage *mpage, const str * NAME sections over many lines. */ - for ( ; NULL != body; body = body->next) { - if (MAN_TEXT != body->type) - break; - if (0 == (sz = strlen(body->string))) - continue; - title = mandoc_realloc - (title, titlesz + sz + 1); - memcpy(title + titlesz, body->string, sz); - titlesz += sz + 1; - title[titlesz - 1] = ' '; - } + title = NULL; + man_deroff(&title, body); if (NULL == title) return; - title = mandoc_realloc(title, titlesz + 1); - title[titlesz] = '\0'; - - /* Skip leading space. */ - - sv = title; - while (isspace((unsigned char)*sv)) - sv++; - - if (0 == (sz = strlen(sv))) { - free(title); - return; - } - - /* Erase trailing space. */ - - start = &sv[sz - 1]; - while (start > sv && isspace((unsigned char)*start)) - *start-- = '\0'; - - if (start == sv) { - free(title); - return; - } - - start = sv; - /* * Go through a special heuristic dance here. * Conventionally, one or more manual names are @@ -1347,6 +1307,7 @@ parse_man(struct mpage *mpage, const str * the name parts here. */ + start = title; for ( ;; ) { sz = strcspn(start, " ,"); if ('\0' == start[sz]) @@ -1377,7 +1338,7 @@ parse_man(struct mpage *mpage, const str start++; } - if (sv == start) { + if (start == title) { putkey(mpage, start, TYPE_Nm); free(title); return; Index: man.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/man.c,v retrieving revision 1.125 retrieving revision 1.126 diff -Lman.c -Lman.c -u -p -r1.125 -r1.126 --- man.c +++ man.c @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -705,4 +706,43 @@ man_mparse(const struct man *man) assert(man && man->parse); return(man->parse); +} + +void +man_deroff(char **dest, const struct man_node *n) +{ + char *cp; + size_t sz; + + if (MAN_TEXT != n->type) { + for (n = n->child; n; n = n->next) + man_deroff(dest, n); + return; + } + + /* Skip leading whitespace. */ + + for (cp = n->string; '\0' != *cp; cp++) + if (0 == isspace((unsigned char)*cp)) + break; + + /* Skip trailing whitespace. */ + + for (sz = strlen(cp); sz; sz--) + if (0 == isspace((unsigned char)cp[sz-1])) + break; + + /* Skip empty strings. */ + + if (0 == sz) + return; + + if (NULL == *dest) { + *dest = mandoc_strndup(cp, sz); + return; + } + + mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); + free(*dest); + *dest = cp; } -- To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv