tech@mandoc.bsd.lv
 help / color / mirror / Atom feed
From: Ingo Schwarze <schwarze@usta.de>
To: tech@mdocml.bsd.lv
Subject: Re: mandocdb: do not use bogus files
Date: Mon, 14 Nov 2011 20:13:02 +0100	[thread overview]
Message-ID: <20111114191302.GJ27815@iris.usta.de> (raw)
In-Reply-To: <20111114000638.GK3374@iris.usta.de>

Hi,

because this patch started getting large and unwieldy, and i'm trying
to move fast, i have just put it into OpenBSD:

 ----- 8< ----- schnipp ----- >8 ----- 8< ----- schnapp ----- >8 -----

CVSROOT:        /cvs
Module name:    src
Changes by:     schwarze@cvs.openbsd.org        2011/11/14 11:52:05

Modified files:
        usr.bin/mandoc : mandocdb.8 mandocdb.c

Log message:
Store page titles in the correct case, and by default, only
put stuff into the database that man(1) will be able to retrieve.
However, support an option to use all directories and files.

Kristaps@ agreed with the general direction and provided some feedback.

 ----- 8< ----- schnipp ----- >8 ----- 8< ----- schnapp ----- >8 -----

Of course, more tweaking will be required, and i'm open for
suggestions!

Tell me if you think i should merge to bsd.lv;
for that purpose, i'm appending the committed patch below.

Yours,
  Ingo


Index: mandocdb.8
===================================================================
RCS file: /cvs/src/usr.bin/mandoc/mandocdb.8,v
retrieving revision 1.4
diff -u -p -r1.4 mandocdb.8
--- mandocdb.8	9 Oct 2011 17:59:56 -0000	1.4
+++ mandocdb.8	14 Nov 2011 18:36:22 -0000
@@ -22,7 +22,7 @@
 .Nd index UNIX manuals
 .Sh SYNOPSIS
 .Nm
-.Op Fl v
+.Op Fl av
 .Op Ar dir...
 .Nm
 .Op Fl v
@@ -42,8 +42,15 @@ manuals and indexes them in a
 and
 .Sx Index Database
 for fast retrieval.
+.Pp
 The arguments are as follows:
 .Bl -tag -width Ds
+.It Fl a
+Use all directories and files found below
+.Ar dir ... .
+By default, directories and files
+.Xr man 1
+cannot find will be silently skipped.
 .It Fl d Ar dir
 Merge (remove and re-add)
 .Ar
Index: mandocdb.c
===================================================================
RCS file: /cvs/src/usr.bin/mandoc/mandocdb.c,v
retrieving revision 1.5
diff -u -p -r1.5 mandocdb.c
--- mandocdb.c	13 Nov 2011 10:40:52 -0000	1.5
+++ mandocdb.c	14 Nov 2011 18:36:22 -0000
@@ -38,6 +38,9 @@
 
 struct	of {
 	char		 *fname; /* heap-allocated */
+	char		 *sec;
+	char		 *arch;
+	char		 *title;
 	struct of	 *next; /* NULL for last one */
 	struct of	 *first; /* first in list */
 };
@@ -79,13 +82,15 @@ static	void		  hash_reset(DB **);
 static	void		  index_merge(const struct of *, struct mparse *,
 				struct buf *, struct buf *,
 				DB *, DB *, const char *, 
-				DB *, const char *, int,
+				DB *, const char *, int, int,
 				recno_t, const recno_t *, size_t);
 static	void		  index_prune(const struct of *, DB *, 
 				const char *, DB *, const char *, 
 				int, recno_t *, recno_t **, size_t *);
-static	void		  ofile_argbuild(char *[], int, int, struct of **);
-static	int		  ofile_dirbuild(const char *, int, struct of **);
+static	void		  ofile_argbuild(char *[], int, int, int,
+				struct of **);
+static	int		  ofile_dirbuild(const char *, const char *,
+				const char *, int, int, struct of **);
 static	void		  ofile_free(struct of *);
 static	int		  pman_node(MAN_ARGS);
 static	void		  pmdoc_node(MDOC_ARGS);
@@ -243,6 +248,7 @@ mandocdb(int argc, char *argv[])
 	char		 ibuf[MAXPATHLEN], /* index fname */
 			 fbuf[MAXPATHLEN];  /* btree fname */
 	int		 verb, /* output verbosity */
+			 use_all, /* use all directories and files */
 			 ch, i, flags;
 	DB		*idx, /* index database */
 			*db, /* keyword database */
@@ -266,6 +272,7 @@ mandocdb(int argc, char *argv[])
 		++progname;
 
 	verb = 0;
+	use_all = 0;
 	of = NULL;
 	db = idx = NULL;
 	mp = NULL;
@@ -276,8 +283,11 @@ mandocdb(int argc, char *argv[])
 	op = OP_NEW;
 	dir = NULL;
 
-	while (-1 != (ch = getopt(argc, argv, "d:u:v")))
+	while (-1 != (ch = getopt(argc, argv, "ad:u:v")))
 		switch (ch) {
+		case ('a'):
+			use_all = 1;
+			break;
 		case ('d'):
 			dir = optarg;
 			op = OP_UPDATE;
@@ -344,7 +354,7 @@ mandocdb(int argc, char *argv[])
 			printf("%s: Opened\n", ibuf);
 		}
 
-		ofile_argbuild(argv, argc, verb, &of);
+		ofile_argbuild(argv, argc, use_all, verb, &of);
 		if (NULL == of)
 			goto out;
 
@@ -355,8 +365,8 @@ mandocdb(int argc, char *argv[])
 
 		if (OP_UPDATE == op)
 			index_merge(of, mp, &dbuf, &buf, hash, 
-					db, fbuf, idx, ibuf, verb,
-					maxrec, recs, reccur);
+					db, fbuf, idx, ibuf, use_all,
+					verb, maxrec, recs, reccur);
 
 		goto out;
 	}
@@ -396,7 +406,8 @@ mandocdb(int argc, char *argv[])
 		ofile_free(of);
 		of = NULL;
 
-		if ( ! ofile_dirbuild(argv[i], verb, &of)) 
+		if ( ! ofile_dirbuild(argv[i], NULL, NULL,
+				use_all, verb, &of)) 
 			exit((int)MANDOCLEVEL_SYSERR);
 
 		if (NULL == of)
@@ -405,7 +416,8 @@ mandocdb(int argc, char *argv[])
 		of = of->first;
 
 		index_merge(of, mp, &dbuf, &buf, hash, db, fbuf, 
-				idx, ibuf, verb, maxrec, recs, reccur);
+				idx, ibuf, use_all, verb,
+				maxrec, recs, reccur);
 	}
 
 out:
@@ -430,7 +442,7 @@ void
 index_merge(const struct of *of, struct mparse *mp,
 		struct buf *dbuf, struct buf *buf,
 		DB *hash, DB *db, const char *dbf, 
-		DB *idx, const char *idxf, int verb,
+		DB *idx, const char *idxf, int use_all, int verb,
 		recno_t maxrec, const recno_t *recs, size_t reccur)
 {
 	recno_t		 rec;
@@ -466,17 +478,52 @@ index_merge(const struct of *of, struct 
 		if (NULL == mdoc && NULL == man)
 			continue;
 
+		/*
+		 * Make sure the manual section and architecture
+		 * agree with the directory where the file is located
+		 * or man(1) will not be able to find it.
+		 */
+
 		msec = NULL != mdoc ? 
 			mdoc_meta(mdoc)->msec : man_meta(man)->msec;
-		mtitle = NULL != mdoc ? 
-			mdoc_meta(mdoc)->title : man_meta(man)->title;
 		arch = NULL != mdoc ? 
 			mdoc_meta(mdoc)->arch : NULL;
 
+		if (0 == use_all) {
+			assert(of->sec);
+			assert(msec);
+			if (strcmp(msec, of->sec))
+				continue;
+
+			if (NULL == arch) {
+				if (NULL != of->arch)
+					continue;
+			} else if (NULL == of->arch ||
+					strcmp(arch, of->arch))
+				continue;
+		}
+
 		if (NULL == arch)
 			arch = "";
 
 		/* 
+		 * Case is relevant for man(1), so use the file name
+		 * instead of the (usually) all caps page title,
+		 * if the two agree.
+		 */
+
+		mtitle = NULL != mdoc ? 
+			mdoc_meta(mdoc)->title : man_meta(man)->title;
+
+		assert(of->title);
+		assert(mtitle);
+
+		if (0 == strcasecmp(mtitle, of->title))
+			mtitle = of->title;
+		else if (0 == use_all)
+			continue;
+
+		/* 
 		 * The index record value consists of a nil-terminated
 		 * filename, a nil-terminated manual section, and a
 		 * nil-terminated description.  Since the description
@@ -1150,14 +1197,62 @@ pman_node(MAN_ARGS)
 }
 
 static void
-ofile_argbuild(char *argv[], int argc, int verb, struct of **of)
+ofile_argbuild(char *argv[], int argc, int use_all, int verb,
+		struct of **of)
 {
+	char		 buf[MAXPATHLEN];
+	char		*sec, *arch, *title, *p;
 	int		 i;
 	struct of	*nof;
 
 	for (i = 0; i < argc; i++) {
+
+		/*
+		 * Analyze the path.
+		 */
+
+		if (strlcpy(buf, argv[i], sizeof(buf)) >= sizeof(buf)) {
+			fprintf(stderr, "%s: Path too long\n", argv[i]);
+			continue;
+		}
+		sec = arch = title = NULL;
+		p = strrchr(buf, '\0');
+		while (p-- > buf) {
+			if (NULL == sec && '.' == *p) {
+				sec = p + 1;
+				*p = '\0';
+				continue;
+			}
+			if ('/' != *p)
+				continue;
+			if (NULL == title) {
+				title = p + 1;
+				*p = '\0';
+				continue;
+			}
+			if (strncmp("man", p + 1, 3))
+				arch = p + 1;
+			break;
+		}
+		if (NULL == title)
+			title = buf;
+
+		/*
+		 * Build the file structure.
+		 */
+
 		nof = mandoc_calloc(1, sizeof(struct of));
-		nof->fname = strdup(argv[i]);
+		nof->fname = mandoc_strdup(argv[i]);
+		if (NULL != sec)
+			nof->sec = mandoc_strdup(sec);
+		if (NULL != arch)
+			nof->arch = mandoc_strdup(arch);
+		nof->title = mandoc_strdup(title);
+
+		/*
+		 * Add the structure to the list.
+		 */
+
 		if (verb > 2) 
 			printf("%s: Scheduling\n", argv[i]);
 		if (NULL == *of) {
@@ -1180,12 +1275,14 @@ ofile_argbuild(char *argv[], int argc, i
  * Pass in a pointer to a NULL structure for the first invocation.
  */
 static int
-ofile_dirbuild(const char *dir, int verb, struct of **of)
+ofile_dirbuild(const char *dir, const char* psec, const char *parch,
+		int use_all, int verb, struct of **of)
 {
 	char		 buf[MAXPATHLEN];
 	size_t		 sz;
 	DIR		*d;
-	const char	*fn;
+	const char	*fn, *sec, *arch;
+	char		*suffix;
 	struct of	*nof;
 	struct dirent	*dp;
 
@@ -1196,10 +1293,30 @@ ofile_dirbuild(const char *dir, int verb
 
 	while (NULL != (dp = readdir(d))) {
 		fn = dp->d_name;
+
+		if ('.' == *fn)
+			continue;
+
 		if (DT_DIR == dp->d_type) {
-			if (0 == strcmp(".", fn))
-				continue;
-			if (0 == strcmp("..", fn))
+			sec = psec;
+			arch = parch;
+
+			/*
+	 		 * Don't bother parsing directories
+			 * that man(1) won't find.
+			 */
+
+			if (NULL == sec) {
+				if(0 == strncmp("man", fn, 3))
+					sec = fn + 3;
+				else if (use_all)
+					sec = fn;
+				else
+					continue;
+			} else if (NULL == arch && (use_all ||
+					NULL == strchr(fn, '.')))
+				arch = fn;
+			else if (0 == use_all)
 				continue;
 
 			buf[0] = '\0';
@@ -1207,22 +1324,35 @@ ofile_dirbuild(const char *dir, int verb
 			strlcat(buf, "/", MAXPATHLEN);
 			sz = strlcat(buf, fn, MAXPATHLEN);
 
-			if (sz < MAXPATHLEN) {
-				if ( ! ofile_dirbuild(buf, verb, of))
-					return(0);
-				continue;
-			} else if (sz < MAXPATHLEN)
-				continue;
-
-			fprintf(stderr, "%s: Path too long\n", dir);
-			return(0);
+			if (MAXPATHLEN <= sz) {
+				fprintf(stderr, "%s: Path too long\n", dir);
+				return(0);
+			}
+ 
+			if (verb > 2)
+				printf("%s: Scanning\n", buf);
+
+			if ( ! ofile_dirbuild(buf, sec, arch,
+					use_all, verb, of))
+				return(0);
 		}
-		if (DT_REG != dp->d_type)
+		if (DT_REG != dp->d_type ||
+		    (NULL == psec && !use_all) ||
+		    !strcmp(MANDOC_DB, fn) ||
+		    !strcmp(MANDOC_IDX, fn))
 			continue;
 
-		if (0 == strcmp(MANDOC_DB, fn) ||
-				0 == strcmp(MANDOC_IDX, fn))
-			continue;
+		/*
+		 * Don't bother parsing files that man(1) won't find.
+		 */
+
+		suffix = strrchr(fn, '.');
+		if (0 == use_all) {
+			if (NULL == suffix)
+				continue;
+			if (strcmp(suffix + 1, psec))
+				continue;
+		}
 
 		buf[0] = '\0';
 		strlcat(buf, dir, MAXPATHLEN);
@@ -1235,6 +1365,13 @@ ofile_dirbuild(const char *dir, int verb
 
 		nof = mandoc_calloc(1, sizeof(struct of));
 		nof->fname = mandoc_strdup(buf);
+		if (NULL != psec)
+			nof->sec = mandoc_strdup(psec);
+		if (NULL != parch)
+			nof->arch = mandoc_strdup(parch);
+		if (NULL != suffix)
+			*suffix = '\0';
+		nof->title = mandoc_strdup(fn);
 
 		if (verb > 2)
 			printf("%s: Scheduling\n", buf);
@@ -1261,6 +1398,9 @@ ofile_free(struct of *of)
 	while (of) {
 		nof = of->next;
 		free(of->fname);
+		free(of->sec);
+		free(of->arch);
+		free(of->title);
 		free(of);
 		of = nof;
 	}
--
 To unsubscribe send an email to tech+unsubscribe@mdocml.bsd.lv

  reply	other threads:[~2011-11-14 19:13 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-11-13 18:42 Ingo Schwarze
2011-11-13 20:05 ` Kristaps Dzonsons
2011-11-14  0:06   ` Ingo Schwarze
2011-11-14 19:13     ` Ingo Schwarze [this message]
2011-11-14 20:07       ` Kristaps Dzonsons
2011-11-14 23:34         ` Ingo Schwarze
2011-11-24 10:10           ` Kristaps Dzonsons

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20111114191302.GJ27815@iris.usta.de \
    --to=schwarze@usta.de \
    --cc=tech@mdocml.bsd.lv \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).