source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* mdocml: Store page titles in the correct case, and by default, only put
@ 2011-11-26 11:23 schwarze
  2011-11-26 11:54 ` Kristaps Dzonsons
  0 siblings, 1 reply; 2+ messages in thread
From: schwarze @ 2011-11-26 11:23 UTC (permalink / raw)
  To: source

Log Message:
-----------
Store page titles in the correct case, and by default, only
put stuff into the database that man(1) will be able to retrieve.
However, support an option to use all directories and files.
feedback and ok  kristaps@

Modified Files:
--------------
    mdocml:
        mandocdb.8
        mandocdb.c

Revision Data
-------------
Index: mandocdb.8
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandocdb.8,v
retrieving revision 1.6
retrieving revision 1.7
diff -Lmandocdb.8 -Lmandocdb.8 -u -p -r1.6 -r1.7
--- mandocdb.8
+++ mandocdb.8
@@ -22,7 +22,7 @@
 .Nd index UNIX manuals
 .Sh SYNOPSIS
 .Nm
-.Op Fl v
+.Op Fl av
 .Op Ar dir...
 .Nm
 .Op Fl v
@@ -42,8 +42,19 @@ manuals and indexes them in a
 and
 .Sx Index Database
 for fast retrieval.
+.Pp
 The arguments are as follows:
 .Bl -tag -width Ds
+.It Fl a
+Use all directories and files found below
+.Ar dir ... .
+By default, only files matching
+.Sm off
+.Sy man Ar section Li /
+.Op Ar arch Li /
+.Ar title . section
+.Sm on
+will be used.
 .It Fl d Ar dir
 Merge (remove and re-add)
 .Ar
Index: mandocdb.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandocdb.c,v
retrieving revision 1.11
retrieving revision 1.12
diff -Lmandocdb.c -Lmandocdb.c -u -p -r1.11 -r1.12
--- mandocdb.c
+++ mandocdb.c
@@ -1,6 +1,7 @@
 /*	$Id$ */
 /*
  * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -48,6 +49,9 @@
 
 struct	of {
 	char		 *fname; /* heap-allocated */
+	char		 *sec;
+	char		 *arch;
+	char		 *title;
 	struct of	 *next; /* NULL for last one */
 	struct of	 *first; /* first in list */
 };
@@ -89,13 +93,15 @@ static	void		  hash_reset(DB **);
 static	void		  index_merge(const struct of *, struct mparse *,
 				struct buf *, struct buf *,
 				DB *, DB *, const char *, 
-				DB *, const char *, int,
+				DB *, const char *, int, int,
 				recno_t, const recno_t *, size_t);
 static	void		  index_prune(const struct of *, DB *, 
 				const char *, DB *, const char *, 
 				int, recno_t *, recno_t **, size_t *);
-static	void		  ofile_argbuild(char *[], int, int, struct of **);
-static	int		  ofile_dirbuild(const char *, int, struct of **);
+static	void		  ofile_argbuild(char *[], int, int, int,
+				struct of **);
+static	int		  ofile_dirbuild(const char *, const char *,
+				const char *, int, int, struct of **);
 static	void		  ofile_free(struct of *);
 static	int		  pman_node(MAN_ARGS);
 static	void		  pmdoc_node(MDOC_ARGS);
@@ -254,16 +260,17 @@ main(int argc, char *argv[])
 	char		 ibuf[MAXPATHLEN], /* index fname */
 			 fbuf[MAXPATHLEN];  /* btree fname */
 	int		 verb, /* output verbosity */
+			 use_all, /* use all directories and files */
 			 ch, i, flags;
 	DB		*idx, /* index database */
 			*db, /* keyword database */
 			*hash; /* temporary keyword hashtable */
 	BTREEINFO	 info; /* btree configuration */
-	recno_t		 maxrec; /* supremum of all records */
-	recno_t		*recs; /* buffer of empty records */
+	recno_t		 maxrec; /* last record number in the index */
+	recno_t		*recs; /* the numbers of all empty records */
 	size_t		 sz1, sz2,
-			 recsz, /* buffer size of recs */
-			 reccur; /* valid number of recs */
+			 recsz, /* number of allocated slots in recs */
+			 reccur; /* current number of empty records */
 	struct buf	 buf, /* keyword buffer */
 			 dbuf; /* description buffer */
 	struct of	*of; /* list of files for processing */
@@ -279,6 +286,7 @@ main(int argc, char *argv[])
 	memset(&dirs, 0, sizeof(struct manpaths));
 
 	verb = 0;
+	use_all = 0;
 	of = NULL;
 	db = idx = NULL;
 	mp = NULL;
@@ -289,8 +297,11 @@ main(int argc, char *argv[])
 	op = OP_NEW;
 	dir = NULL;
 
-	while (-1 != (ch = getopt(argc, argv, "d:u:v")))
+	while (-1 != (ch = getopt(argc, argv, "ad:u:v")))
 		switch (ch) {
+		case ('a'):
+			use_all = 1;
+			break;
 		case ('d'):
 			dir = optarg;
 			op = OP_UPDATE;
@@ -347,7 +358,7 @@ main(int argc, char *argv[])
 		if (NULL == db) {
 			perror(fbuf);
 			exit((int)MANDOCLEVEL_SYSERR);
-		} else if (NULL == db) {
+		} else if (NULL == idx) {
 			perror(ibuf);
 			exit((int)MANDOCLEVEL_SYSERR);
 		}
@@ -357,7 +368,7 @@ main(int argc, char *argv[])
 			printf("%s: Opened\n", ibuf);
 		}
 
-		ofile_argbuild(argv, argc, verb, &of);
+		ofile_argbuild(argv, argc, use_all, verb, &of);
 		if (NULL == of)
 			goto out;
 
@@ -368,8 +379,8 @@ main(int argc, char *argv[])
 
 		if (OP_UPDATE == op)
 			index_merge(of, mp, &dbuf, &buf, hash, 
-					db, fbuf, idx, ibuf, verb,
-					maxrec, recs, reccur);
+					db, fbuf, idx, ibuf, use_all,
+					verb, maxrec, recs, reccur);
 
 		goto out;
 	}
@@ -411,7 +422,7 @@ main(int argc, char *argv[])
 		if (NULL == db) {
 			perror(fbuf);
 			exit((int)MANDOCLEVEL_SYSERR);
-		} else if (NULL == db) {
+		} else if (NULL == idx) {
 			perror(ibuf);
 			exit((int)MANDOCLEVEL_SYSERR);
 		}
@@ -424,7 +435,8 @@ main(int argc, char *argv[])
 		ofile_free(of);
 		of = NULL;
 
-		if ( ! ofile_dirbuild(dirs.paths[i], verb, &of)) 
+		if ( ! ofile_dirbuild(dirs.paths[i], NULL, NULL,
+				use_all, verb, &of))
 			exit((int)MANDOCLEVEL_SYSERR);
 
 		if (NULL == of)
@@ -433,7 +445,8 @@ main(int argc, char *argv[])
 		of = of->first;
 
 		index_merge(of, mp, &dbuf, &buf, hash, db, fbuf, 
-				idx, ibuf, verb, maxrec, recs, reccur);
+				idx, ibuf, use_all, verb,
+				maxrec, recs, reccur);
 	}
 
 out:
@@ -459,7 +472,7 @@ void
 index_merge(const struct of *of, struct mparse *mp,
 		struct buf *dbuf, struct buf *buf,
 		DB *hash, DB *db, const char *dbf, 
-		DB *idx, const char *idxf, int verb,
+		DB *idx, const char *idxf, int use_all, int verb,
 		recno_t maxrec, const recno_t *recs, size_t reccur)
 {
 	recno_t		 rec;
@@ -495,17 +508,53 @@ index_merge(const struct of *of, struct 
 		if (NULL == mdoc && NULL == man)
 			continue;
 
+		/*
+		 * By default, skip a file if the manual section
+		 * and architecture given in the file disagree
+		 * with the directory where the file is located.
+		 */
+
 		msec = NULL != mdoc ? 
 			mdoc_meta(mdoc)->msec : man_meta(man)->msec;
-		mtitle = NULL != mdoc ? 
-			mdoc_meta(mdoc)->title : man_meta(man)->title;
 		arch = NULL != mdoc ? 
 			mdoc_meta(mdoc)->arch : NULL;
 
+		if (0 == use_all) {
+			assert(of->sec);
+			assert(msec);
+			if (strcmp(msec, of->sec))
+				continue;
+
+			if (NULL == arch) {
+				if (NULL != of->arch)
+					continue;
+			} else if (NULL == of->arch ||
+					strcmp(arch, of->arch))
+				continue;
+		}
+
 		if (NULL == arch)
 			arch = "";
 
 		/* 
+		 * By default, skip a file if the title given
+		 * in the file disagrees with the file name.
+		 * If both agree, use the file name as the title,
+		 * because the one in the file usually is all caps.
+		 */
+
+		mtitle = NULL != mdoc ? 
+			mdoc_meta(mdoc)->title : man_meta(man)->title;
+
+		assert(of->title);
+		assert(mtitle);
+
+		if (0 == strcasecmp(mtitle, of->title))
+			mtitle = of->title;
+		else if (0 == use_all)
+			continue;
+
+		/* 
 		 * The index record value consists of a nil-terminated
 		 * filename, a nil-terminated manual section, and a
 		 * nil-terminated description.  Since the description
@@ -1179,14 +1228,64 @@ pman_node(MAN_ARGS)
 }
 
 static void
-ofile_argbuild(char *argv[], int argc, int verb, struct of **of)
+ofile_argbuild(char *argv[], int argc, int use_all, int verb,
+		struct of **of)
 {
+	char		 buf[MAXPATHLEN];
+	char		*sec, *arch, *title, *p;
 	int		 i;
 	struct of	*nof;
 
 	for (i = 0; i < argc; i++) {
+
+		/*
+		 * Try to infer the manual section, architecture and
+		 * page title from the path, assuming it looks like
+		 *   man*[/<arch>]/<title>.<section>
+		 */
+
+		if (strlcpy(buf, argv[i], sizeof(buf)) >= sizeof(buf)) {
+			fprintf(stderr, "%s: Path too long\n", argv[i]);
+			continue;
+		}
+		sec = arch = title = NULL;
+		p = strrchr(buf, '\0');
+		while (p-- > buf) {
+			if (NULL == sec && '.' == *p) {
+				sec = p + 1;
+				*p = '\0';
+				continue;
+			}
+			if ('/' != *p)
+				continue;
+			if (NULL == title) {
+				title = p + 1;
+				*p = '\0';
+				continue;
+			}
+			if (strncmp("man", p + 1, 3))
+				arch = p + 1;
+			break;
+		}
+		if (NULL == title)
+			title = buf;
+
+		/*
+		 * Build the file structure.
+		 */
+
 		nof = mandoc_calloc(1, sizeof(struct of));
-		nof->fname = strdup(argv[i]);
+		nof->fname = mandoc_strdup(argv[i]);
+		if (NULL != sec)
+			nof->sec = mandoc_strdup(sec);
+		if (NULL != arch)
+			nof->arch = mandoc_strdup(arch);
+		nof->title = mandoc_strdup(title);
+
+		/*
+		 * Add the structure to the list.
+		 */
+
 		if (verb > 2) 
 			printf("%s: Scheduling\n", argv[i]);
 		if (NULL == *of) {
@@ -1209,12 +1308,14 @@ ofile_argbuild(char *argv[], int argc, i
  * Pass in a pointer to a NULL structure for the first invocation.
  */
 static int
-ofile_dirbuild(const char *dir, int verb, struct of **of)
+ofile_dirbuild(const char *dir, const char* psec, const char *parch,
+		int use_all, int verb, struct of **of)
 {
 	char		 buf[MAXPATHLEN];
 	size_t		 sz;
 	DIR		*d;
-	const char	*fn;
+	const char	*fn, *sec, *arch;
+	char		*suffix;
 	struct of	*nof;
 	struct dirent	*dp;
 
@@ -1225,10 +1326,30 @@ ofile_dirbuild(const char *dir, int verb
 
 	while (NULL != (dp = readdir(d))) {
 		fn = dp->d_name;
+
+		if ('.' == *fn)
+			continue;
+
 		if (DT_DIR == dp->d_type) {
-			if (0 == strcmp(".", fn))
-				continue;
-			if (0 == strcmp("..", fn))
+			sec = psec;
+			arch = parch;
+
+			/*
+			 * By default, only use directories called:
+			 *   man<section>/[<arch>/]
+			 */
+
+			if (NULL == sec) {
+				if(0 == strncmp("man", fn, 3))
+					sec = fn + 3;
+				else if (use_all)
+					sec = fn;
+				else
+					continue;
+			} else if (NULL == arch && (use_all ||
+					NULL == strchr(fn, '.')))
+				arch = fn;
+			else if (0 == use_all)
 				continue;
 
 			buf[0] = '\0';
@@ -1236,22 +1357,37 @@ ofile_dirbuild(const char *dir, int verb
 			strlcat(buf, "/", MAXPATHLEN);
 			sz = strlcat(buf, fn, MAXPATHLEN);
 
-			if (sz < MAXPATHLEN) {
-				if ( ! ofile_dirbuild(buf, verb, of))
-					return(0);
-				continue;
-			} else if (sz < MAXPATHLEN)
-				continue;
-
-			fprintf(stderr, "%s: Path too long\n", dir);
-			return(0);
+			if (MAXPATHLEN <= sz) {
+				fprintf(stderr, "%s: Path too long\n", dir);
+				return(0);
+			}
+ 
+			if (verb > 2)
+				printf("%s: Scanning\n", buf);
+
+			if ( ! ofile_dirbuild(buf, sec, arch,
+					use_all, verb, of))
+				return(0);
 		}
-		if (DT_REG != dp->d_type)
+		if (DT_REG != dp->d_type ||
+		    (NULL == psec && !use_all) ||
+		    !strcmp(MANDOC_DB, fn) ||
+		    !strcmp(MANDOC_IDX, fn))
 			continue;
 
-		if (0 == strcmp(MANDOC_DB, fn) ||
-				0 == strcmp(MANDOC_IDX, fn))
-			continue;
+		/*
+		 * By default, skip files where the file name suffix
+		 * does not agree with the section directory
+		 * they are located in.
+		 */
+
+		suffix = strrchr(fn, '.');
+		if (0 == use_all) {
+			if (NULL == suffix)
+				continue;
+			if (strcmp(suffix + 1, psec))
+				continue;
+		}
 
 		buf[0] = '\0';
 		strlcat(buf, dir, MAXPATHLEN);
@@ -1264,6 +1400,19 @@ ofile_dirbuild(const char *dir, int verb
 
 		nof = mandoc_calloc(1, sizeof(struct of));
 		nof->fname = mandoc_strdup(buf);
+		if (NULL != psec)
+			nof->sec = mandoc_strdup(psec);
+		if (NULL != parch)
+			nof->arch = mandoc_strdup(parch);
+
+		/*
+		 * Remember the file name without the extension,
+		 * to be used as the page title in the database.
+		 */
+
+		if (NULL != suffix)
+			*suffix = '\0';
+		nof->title = mandoc_strdup(fn);
 
 		if (verb > 2)
 			printf("%s: Scheduling\n", buf);
@@ -1290,6 +1439,9 @@ ofile_free(struct of *of)
 	while (of) {
 		nof = of->next;
 		free(of->fname);
+		free(of->sec);
+		free(of->arch);
+		free(of->title);
 		free(of);
 		of = nof;
 	}
--
 To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: mdocml: Store page titles in the correct case, and by default, only put
  2011-11-26 11:23 mdocml: Store page titles in the correct case, and by default, only put schwarze
@ 2011-11-26 11:54 ` Kristaps Dzonsons
  0 siblings, 0 replies; 2+ messages in thread
From: Kristaps Dzonsons @ 2011-11-26 11:54 UTC (permalink / raw)
  To: source, Ingo Schwarze

On 26/11/2011 12:23, schwarze@mdocml.bsd.lv wrote:
> Log Message:
> -----------
> Store page titles in the correct case, and by default, only
> put stuff into the database that man(1) will be able to retrieve.
> However, support an option to use all directories and files.
> feedback and ok  kristaps@

Ingo,

Some post-checkin comments.  First, I think we can make verbose and 
use_all into global statics, as they don't change and we're just 
cluttering the function prototypes with them.  No?  And second, in the 
manual, can you put the default usage following the arguments?

  Its arguments are as follows:

  -a    blah blah

  -b    blah blah

  By default, foo does this and that...

This is much easier to read and doesn't require that operators read each 
option to understand the defaults.

Thanks,

Kristaps


--
 To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2011-11-26 11:54 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-11-26 11:23 mdocml: Store page titles in the correct case, and by default, only put schwarze
2011-11-26 11:54 ` Kristaps Dzonsons

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).