tech@mandoc.bsd.lv
 help / color / mirror / Atom feed
From: Ingo Schwarze <schwarze@usta.de>
To: tech@mdocml.bsd.lv
Subject: Re: mandocdb: do not use bogus files
Date: Mon, 14 Nov 2011 01:06:38 +0100	[thread overview]
Message-ID: <20111114000638.GK3374@iris.usta.de> (raw)
In-Reply-To: <4EC022F4.7060008@bsd.lv>

Hi Kristaps,

Kristaps Dzonsons wrote on Sun, Nov 13, 2011 at 09:05:08PM +0100:

> Yes, I like this as the default behaviour.

Good, so i'll prod on down that road.  :-)

> However, there must be a command-line flag not to do so,
> as I use mandocdb in my own directory structures.

You have a point, and i have added -a.

> You also need to free up the memory in ofile_free().

Sure, i planned to, but forget to write it down before sending
the patch.  Done now.

> Furthermore, please amend mandocdb.8 to note which directories are
> visited and which are not (in the default case).

I have put some minimal text for now.

> I also think it's a good idea to note skipped directories with
> a warning that's described in DIAGNOSTICS.

Not sure i want that in weekly(8) output; in any case, deciding that
is for later, when the dust has settled.  Right now, i want to get
the stuff to do some basic work, not polish it.

> Since this is a .8, we should (in my humble opinion) be quite
> rigorous in what's done and what's not done.

In general, i agree, but i don't think now is the right time.
This is not yet stable at all, so i'd have to rewrite that
documentation multiple times.  I'm not yet sure how much
man.conf(5) will be used here.  There are several other
open questions, and what i have so far is rudimentary at
best.  So rigourous documentation is for later, too.

Here is an update:

 * -a, use_all
 * calculate the same info in ofile_argbuild as well;
   obviously, the algorithm must be rather different
 * skip all dot-files; i think that's reasonable
 * free the memory

Not asking for OKs right now, i'm planning to first use the info
in index_merge, then commit.  Of course, comments are welcome!

Yours,
  Ingo


--- mandocdb.8.orig
+++ mandocdb.8
@@ -22,7 +22,7 @@
 .Nd index UNIX manuals
 .Sh SYNOPSIS
 .Nm
-.Op Fl v
+.Op Fl av
 .Op Ar dir...
 .Nm
 .Op Fl v
@@ -42,8 +42,15 @@ manuals and indexes them in a
 and
 .Sx Index Database
 for fast retrieval.
+.Pp
 The arguments are as follows:
 .Bl -tag -width Ds
+.It Fl a
+Use all directories and files found below
+.Ar dir ... .
+By default, directories and files
+.Xr man 1
+cannot find will be silently skipped.
 .It Fl d Ar dir
 Merge (remove and re-add)
 .Ar
--- mandocdb.c.orig
+++ mandocdb.c
@@ -38,6 +38,9 @@
 
 struct	of {
 	char		 *fname; /* heap-allocated */
+	char		 *sec;
+	char		 *arch;
+	char		 *title;
 	struct of	 *next; /* NULL for last one */
 	struct of	 *first; /* first in list */
 };
@@ -79,13 +82,15 @@ static	void		  hash_reset(DB **);
 static	void		  index_merge(const struct of *, struct mparse *,
 				struct buf *, struct buf *,
 				DB *, DB *, const char *, 
-				DB *, const char *, int,
+				DB *, const char *, int, int,
 				recno_t, const recno_t *, size_t);
 static	void		  index_prune(const struct of *, DB *, 
 				const char *, DB *, const char *, 
 				int, recno_t *, recno_t **, size_t *);
-static	void		  ofile_argbuild(char *[], int, int, struct of **);
-static	int		  ofile_dirbuild(const char *, int, struct of **);
+static	void		  ofile_argbuild(char *[], int, int, int,
+				struct of **);
+static	int		  ofile_dirbuild(const char *, const char *,
+				const char *, int, int, struct of **);
 static	void		  ofile_free(struct of *);
 static	int		  pman_node(MAN_ARGS);
 static	void		  pmdoc_node(MDOC_ARGS);
@@ -243,6 +248,7 @@ mandocdb(int argc, char *argv[])
 	char		 ibuf[MAXPATHLEN], /* index fname */
 			 fbuf[MAXPATHLEN];  /* btree fname */
 	int		 verb, /* output verbosity */
+			 use_all, /* use all directories and files */
 			 ch, i, flags;
 	DB		*idx, /* index database */
 			*db, /* keyword database */
@@ -266,6 +272,7 @@ mandocdb(int argc, char *argv[])
 		++progname;
 
 	verb = 0;
+	use_all = 0;
 	of = NULL;
 	db = idx = NULL;
 	mp = NULL;
@@ -276,8 +283,11 @@ mandocdb(int argc, char *argv[])
 	op = OP_NEW;
 	dir = NULL;
 
-	while (-1 != (ch = getopt(argc, argv, "d:u:v")))
+	while (-1 != (ch = getopt(argc, argv, "ad:u:v")))
 		switch (ch) {
+		case ('a'):
+			use_all = 1;
+			break;
 		case ('d'):
 			dir = optarg;
 			op = OP_UPDATE;
@@ -344,7 +354,7 @@ mandocdb(int argc, char *argv[])
 			printf("%s: Opened\n", ibuf);
 		}
 
-		ofile_argbuild(argv, argc, verb, &of);
+		ofile_argbuild(argv, argc, use_all, verb, &of);
 		if (NULL == of)
 			goto out;
 
@@ -355,8 +365,8 @@ mandocdb(int argc, char *argv[])
 
 		if (OP_UPDATE == op)
 			index_merge(of, mp, &dbuf, &buf, hash, 
-					db, fbuf, idx, ibuf, verb,
-					maxrec, recs, reccur);
+					db, fbuf, idx, ibuf, use_all,
+					verb, maxrec, recs, reccur);
 
 		goto out;
 	}
@@ -396,7 +406,8 @@ mandocdb(int argc, char *argv[])
 		ofile_free(of);
 		of = NULL;
 
-		if ( ! ofile_dirbuild(argv[i], verb, &of)) 
+		if ( ! ofile_dirbuild(argv[i], NULL, NULL,
+				use_all, verb, &of)) 
 			exit((int)MANDOCLEVEL_SYSERR);
 
 		if (NULL == of)
@@ -405,7 +416,8 @@ mandocdb(int argc, char *argv[])
 		of = of->first;
 
 		index_merge(of, mp, &dbuf, &buf, hash, db, fbuf, 
-				idx, ibuf, verb, maxrec, recs, reccur);
+				idx, ibuf, use_all, verb,
+				maxrec, recs, reccur);
 	}
 
 out:
@@ -430,7 +442,7 @@ void
 index_merge(const struct of *of, struct mparse *mp,
 		struct buf *dbuf, struct buf *buf,
 		DB *hash, DB *db, const char *dbf, 
-		DB *idx, const char *idxf, int verb,
+		DB *idx, const char *idxf, int use_all, int verb,
 		recno_t maxrec, const recno_t *recs, size_t reccur)
 {
 	recno_t		 rec;
@@ -1150,14 +1162,62 @@ pman_node(MAN_ARGS)
 }
 
 static void
-ofile_argbuild(char *argv[], int argc, int verb, struct of **of)
+ofile_argbuild(char *argv[], int argc, int use_all, int verb,
+		struct of **of)
 {
+	char		 buf[MAXPATHLEN];
+	char		*sec, *arch, *title, *p;
 	int		 i;
 	struct of	*nof;
 
 	for (i = 0; i < argc; i++) {
+
+		/*
+		 * Analyze the path.
+		 */
+
+		if (strlcpy(buf, argv[i], sizeof(buf)) >= sizeof(buf)) {
+			fprintf(stderr, "%s: Path too long\n", argv[i]);
+			continue;
+		}
+		sec = arch = title = NULL;
+		p = strrchr(buf, '\0');
+		while (p-- > buf) {
+			if (NULL == sec && '.' == *p) {
+				sec = p + 1;
+				*p = '\0';
+				continue;
+			}
+			if ('/' != *p)
+				continue;
+			if (NULL == title) {
+				title = p + 1;
+				*p = '\0';
+				continue;
+			}
+			if (strncmp("man", p + 1, 3))
+				arch = p + 1;
+			break;
+		}
+		if (NULL == title)
+			title = buf;
+
+		/*
+		 * Build the file structure.
+		 */
+
 		nof = mandoc_calloc(1, sizeof(struct of));
-		nof->fname = strdup(argv[i]);
+		nof->fname = mandoc_strdup(argv[i]);
+		if (NULL != sec)
+			nof->sec = mandoc_strdup(sec);
+		if (NULL != arch)
+			nof->arch = mandoc_strdup(arch);
+		nof->title = mandoc_strdup(title);
+
+		/*
+		 * Add the structure to the list.
+		 */
+
 		if (verb > 2) 
 			printf("%s: Scheduling\n", argv[i]);
 		if (NULL == *of) {
@@ -1180,12 +1240,14 @@ ofile_argbuild(char *argv[], int argc, int verb, struct of **of)
  * Pass in a pointer to a NULL structure for the first invocation.
  */
 static int
-ofile_dirbuild(const char *dir, int verb, struct of **of)
+ofile_dirbuild(const char *dir, const char* psec, const char *parch,
+		int use_all, int verb, struct of **of)
 {
 	char		 buf[MAXPATHLEN];
 	size_t		 sz;
 	DIR		*d;
-	const char	*fn;
+	const char	*fn, *sec, *arch;
+	char		*suffix;
 	struct of	*nof;
 	struct dirent	*dp;
 
@@ -1194,12 +1256,34 @@ ofile_dirbuild(const char *dir, int verb, struct of **of)
 		return(0);
 	}
 
+	sec = psec;
+	arch = parch;
+
 	while (NULL != (dp = readdir(d))) {
 		fn = dp->d_name;
+
+		if ('.' == *fn)
+			continue;
+
 		if (DT_DIR == dp->d_type) {
-			if (0 == strcmp(".", fn))
-				continue;
-			if (0 == strcmp("..", fn))
+
+			/*
+	 		 * Don't bother parsing directories
+			 * that man(1) won't find.
+			 */
+
+			if (NULL == psec) {
+				if(0 == strncmp("man", fn, 3))
+					sec = fn + 3;
+				else if (use_all)
+					sec = fn;
+				else
+					continue;
+				arch = NULL;
+			} else if (NULL == parch && (use_all ||
+					NULL == strchr(fn, '.')))
+				arch = fn;
+			else if (0 == use_all)
 				continue;
 
 			buf[0] = '\0';
@@ -1207,22 +1291,35 @@ ofile_dirbuild(const char *dir, int verb, struct of **of)
 			strlcat(buf, "/", MAXPATHLEN);
 			sz = strlcat(buf, fn, MAXPATHLEN);
 
-			if (sz < MAXPATHLEN) {
-				if ( ! ofile_dirbuild(buf, verb, of))
-					return(0);
-				continue;
-			} else if (sz < MAXPATHLEN)
-				continue;
+			if (MAXPATHLEN <= sz) {
+				fprintf(stderr, "%s: Path too long\n", dir);
+				return(0);
+			}
+ 
+			if (verb > 2)
+				printf("%s: Scanning\n", buf);
 
-			fprintf(stderr, "%s: Path too long\n", dir);
-			return(0);
+			if ( ! ofile_dirbuild(buf, sec, arch,
+					use_all, verb, of))
+				return(0);
 		}
-		if (DT_REG != dp->d_type)
+		if (DT_REG != dp->d_type ||
+		    (NULL == sec && !use_all) ||
+		    !strcmp(MANDOC_DB, fn) ||
+		    !strcmp(MANDOC_IDX, fn))
 			continue;
 
-		if (0 == strcmp(MANDOC_DB, fn) ||
-				0 == strcmp(MANDOC_IDX, fn))
-			continue;
+		/*
+		 * Don't bother parsing files that man(1) won't find.
+		 */
+
+		suffix = strrchr(fn, '.');
+		if (0 == use_all) {
+			if (NULL == suffix)
+				continue;
+			if (strcmp(suffix + 1, sec))
+				continue;
+		}
 
 		buf[0] = '\0';
 		strlcat(buf, dir, MAXPATHLEN);
@@ -1235,6 +1332,13 @@ ofile_dirbuild(const char *dir, int verb, struct of **of)
 
 		nof = mandoc_calloc(1, sizeof(struct of));
 		nof->fname = mandoc_strdup(buf);
+		if (NULL != sec)
+			nof->sec = mandoc_strdup(sec);
+		if (NULL != arch)
+			nof->arch = mandoc_strdup(arch);
+		if (NULL != suffix)
+			*suffix = '\0';
+		nof->title = mandoc_strdup(fn);
 
 		if (verb > 2)
 			printf("%s: Scheduling\n", buf);
@@ -1261,6 +1365,9 @@ ofile_free(struct of *of)
 	while (of) {
 		nof = of->next;
 		free(of->fname);
+		free(of->sec);
+		free(of->arch);
+		free(of->title);
 		free(of);
 		of = nof;
 	}
--
 To unsubscribe send an email to tech+unsubscribe@mdocml.bsd.lv

  reply	other threads:[~2011-11-14  0:06 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-11-13 18:42 Ingo Schwarze
2011-11-13 20:05 ` Kristaps Dzonsons
2011-11-14  0:06   ` Ingo Schwarze [this message]
2011-11-14 19:13     ` Ingo Schwarze
2011-11-14 20:07       ` Kristaps Dzonsons
2011-11-14 23:34         ` Ingo Schwarze
2011-11-24 10:10           ` Kristaps Dzonsons

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20111114000638.GK3374@iris.usta.de \
    --to=schwarze@usta.de \
    --cc=tech@mdocml.bsd.lv \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).