From: Kristaps Dzonsons <kristaps@bsd.lv>
To: tech@mdocml.bsd.lv
Subject: Improve mandocdb catpage/man heuristics.
Date: Wed, 28 Dec 2011 01:57:44 +0200 [thread overview]
Message-ID: <4EFA5B78.5030507@bsd.lv> (raw)
[-- Attachment #1: Type: text/plain, Size: 583 bytes --]
Hi,
This improves the mandocdb(8) catpage heuristic to, well, more or less
as good as it's going to get. It now reads multiple lines into a
buffer, joining the lines with a space.
While here, I removed the 70-character limit. I recoded this into
apropos.c's and whatis.c's printf(3) statements. We should really
consider a better way: if not 70-char, to the COLUMN limit?
Lastly, I added an extra man(7) heuristic for separating names and
descriptions, namely the \-\- I observed in some POD manuals. This
cleans up "apropos -s 3p ~.*" quite a lot.
Thoughts?
Kristaps
[-- Attachment #2: patch.txt --]
[-- Type: text/plain, Size: 4147 bytes --]
Index: apropos.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/apropos.c,v
retrieving revision 1.24
diff -u -r1.24 apropos.c
--- apropos.c 12 Dec 2011 02:00:49 -0000 1.24
+++ apropos.c 27 Dec 2011 23:50:39 -0000
@@ -127,11 +127,11 @@
qsort(res, sz, sizeof(struct res), cmp);
for (i = 0; i < (int)sz; i++)
- printf("%s(%s%s%s) - %s\n", res[i].title,
+ printf("%s(%s%s%s) - %.*s\n", res[i].title,
res[i].cat,
*res[i].arch ? "/" : "",
*res[i].arch ? res[i].arch : "",
- res[i].desc);
+ 70, res[i].desc);
}
static int
Index: mandocdb.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandocdb.c,v
retrieving revision 1.41
diff -u -r1.41 mandocdb.c
--- mandocdb.c 25 Dec 2011 19:31:25 -0000 1.41
+++ mandocdb.c 27 Dec 2011 23:50:39 -0000
@@ -23,6 +23,7 @@
#include <sys/types.h>
#include <assert.h>
+#include <ctype.h>
#include <dirent.h>
#include <fcntl.h>
#include <getopt.h>
@@ -129,8 +130,8 @@
static void ofile_dirbuild(const char *, const char *,
const char *, int, struct of **);
static void ofile_free(struct of *);
-static void pformatted(DB *, struct buf *, struct buf *,
- const struct of *);
+static void pformatted(DB *, struct buf *,
+ struct buf *, const struct of *);
static int pman_node(MAN_ARGS);
static void pmdoc_node(MDOC_ARGS);
static int pmdoc_head(MDOC_ARGS);
@@ -1319,6 +1320,8 @@
if (0 == strncmp(start, "-", 1))
start += 1;
+ else if (0 == strncmp(start, "\\-\\-", 4))
+ start += 4;
else if (0 == strncmp(start, "\\-", 2))
start += 2;
else if (0 == strncmp(start, "\\(en", 4))
@@ -1349,12 +1352,12 @@
* By necessity, this involves rather crude guesswork.
*/
static void
-pformatted(DB *hash, struct buf *buf, struct buf *dbuf,
- const struct of *of)
+pformatted(DB *hash, struct buf *buf,
+ struct buf *dbuf, const struct of *of)
{
FILE *stream;
- char *line, *p;
- size_t len, plen;
+ char *line, *p, *title;
+ size_t len, plen, titlesz;
if (NULL == (stream = fopen(of->fname, "r"))) {
if (warnings)
@@ -1387,6 +1390,32 @@
while (NULL != (line = fgetln(stream, &len)))
if ('\n' != *line && ' ' != *line)
break;
+
+ /*
+ * Read up until the next section into a buffer.
+ * Strip the leading and trailing newline from each read line,
+ * appending a trailing space.
+ * Ignore empty (whitespace-only) lines.
+ */
+
+ titlesz = 0;
+ title = NULL;
+
+ while (NULL != (line = fgetln(stream, &len))) {
+ if (' ' != *line || '\n' != line[(int)len - 1])
+ break;
+ while (len > 0 && isspace((unsigned char)*line)) {
+ line++;
+ len--;
+ }
+ if (1 == len)
+ continue;
+ title = mandoc_realloc(title, titlesz + len);
+ memcpy(title + titlesz, line, len);
+ titlesz += len;
+ title[(int)titlesz - 1] = ' ';
+ }
+
/*
* If no page content can be found, or the input line
@@ -1395,18 +1424,19 @@
* description.
*/
- line = fgetln(stream, &len);
- if (NULL == line || ' ' != *line || '\n' != line[(int)len - 1]) {
+ if (NULL == title || '\0' == *title) {
if (warnings)
fprintf(stderr, "%s: cannot find NAME section\n",
of->fname);
buf_appendb(dbuf, buf->cp, buf->size);
hash_put(hash, buf, TYPE_Nd);
fclose(stream);
+ free(title);
return;
}
- line[(int)--len] = '\0';
+ title = mandoc_realloc(title, titlesz + 1);
+ title[(int)titlesz] = '\0';
/*
* Skip to the first dash.
@@ -1414,20 +1444,17 @@
* bytes).
*/
- if (NULL != (p = strstr(line, "- "))) {
+ if (NULL != (p = strstr(title, "- "))) {
for (p += 2; ' ' == *p || '\b' == *p; p++)
/* Skip to next word. */ ;
} else {
if (warnings)
fprintf(stderr, "%s: no dash in title line\n",
of->fname);
- p = line;
+ p = title;
}
- if ((plen = strlen(p)) > 70) {
- plen = 70;
- p[plen] = '\0';
- }
+ plen = strlen(p);
/* Strip backspace-encoding from line. */
@@ -1446,6 +1473,7 @@
buf_appendb(buf, p, plen + 1);
hash_put(hash, buf, TYPE_Nd);
fclose(stream);
+ free(title);
}
static void
next reply other threads:[~2011-12-27 23:57 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-12-27 23:57 Kristaps Dzonsons [this message]
2011-12-28 1:27 ` Ingo Schwarze
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4EFA5B78.5030507@bsd.lv \
--to=kristaps@bsd.lv \
--cc=tech@mdocml.bsd.lv \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).