tech@mandoc.bsd.lv
 help / color / mirror / Atom feed
From: Ingo Schwarze <schwarze@usta.de>
To: tech@mdocml.bsd.lv
Cc: jmc@openbsd.org
Subject: mandocdb: full set of search types
Date: Wed, 16 Nov 2011 01:39:19 +0100	[thread overview]
Message-ID: <20111116003919.GD30189@iris.usta.de> (raw)

Hi,

we are close to going into production.  I have talked to espie@,
and he is d'accord with putting small pieces into
  /usr/src/libexec/makewhatis/Makewhatis.pm
to call out to mandocdb(8) for automated updates of the mandoc.db
files alongside the updates of the whatis.db files when installing
and removing packages, such that people running -current can easily
start testing the new apropos.

We will keep the old makewhatis/apropos-combo in place until the new
system is reasonably feature-complete and clearly better than the
old one, then install the new apropos in place of the old one and
finally remove the old components.

Before enabling mandocdb in pkg_add(8), i'd like to get the database
format complete, such that we don't force people to rebuild the
databases after upgrading to new snapshots.

Here is a patch defining TYPE_ flags for all macros that i can
imagine might be worth searching for (maybe even a few more, but i'd
rather have too many than too few).  I have left out all obsolete
macros and most physical formatting macros.

This requires switching the mask to 64 bits.  During the switch,
i have replaced the very error-prone handling of the key database
values by a new struct db_val; some memcpy() calls by normal
assignments; and some magical constants by sizeof() constructs.

OK to put this in, or do you see anything that is missing?


Right now, i will start to implement the new search types.

Yours,
  Ingo


--- apropos_db.c.orig
+++ apropos_db.c
@@ -19,6 +19,7 @@
 #include <fcntl.h>
 #include <regex.h>
 #include <stdarg.h>
+#include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 
@@ -34,28 +35,53 @@
 
 struct	expr {
 	int		 regex;
-	int	 	 mask;
+	uint64_t 	 mask;
 	char		*v;
 	regex_t	 	 re;
 };
 
 struct	type {
-	int		 mask;
+	uint64_t	 mask;
 	const char	*name;
 };
 
 static	const struct type types[] = {
 	{ TYPE_An, "An" },
+	{ TYPE_Ar, "Ar" },
+	{ TYPE_At, "At" },
+	{ TYPE_Bsx, "Bsx" },
+	{ TYPE_Bx, "Bx" },
 	{ TYPE_Cd, "Cd" },
+	{ TYPE_Cm, "Cm" },
+	{ TYPE_Dv, "Dv" },
+	{ TYPE_Dx, "Dx" },
+	{ TYPE_Em, "Em" },
 	{ TYPE_Er, "Er" },
 	{ TYPE_Ev, "Ev" },
+	{ TYPE_Fa, "Fa" },
+	{ TYPE_Fl, "Fl" },
 	{ TYPE_Fn, "Fn" },
 	{ TYPE_Fn, "Fo" },
+	{ TYPE_Ft, "Ft" },
+	{ TYPE_Fx, "Fx" },
+	{ TYPE_Ic, "Ic" },
 	{ TYPE_In, "In" },
+	{ TYPE_Lb, "Lb" },
+	{ TYPE_Li, "Li" },
+	{ TYPE_Lk, "Lk" },
+	{ TYPE_Ms, "Ms" },
+	{ TYPE_Mt, "Mt" },
 	{ TYPE_Nd, "Nd" },
 	{ TYPE_Nm, "Nm" },
+	{ TYPE_Nx, "Nx" },
+	{ TYPE_Ox, "Ox" },
 	{ TYPE_Pa, "Pa" },
+	{ TYPE_Rs, "Rs" },
+	{ TYPE_Sh, "Sh" },
+	{ TYPE_Ss, "Ss" },
 	{ TYPE_St, "St" },
+	{ TYPE_Sy, "Sy" },
+	{ TYPE_Tn, "Tn" },
 	{ TYPE_Va, "Va" },
 	{ TYPE_Va, "Vt" },
 	{ TYPE_Xr, "Xr" },
@@ -65,7 +91,7 @@ static	const struct type types[] = {
 
 static	DB	*btree_open(void);
 static	int	 btree_read(const DBT *, const struct mchars *, char **);
-static	int	 exprexec(const struct expr *, char *, int);
+static	int	 exprexec(const struct expr *, char *, uint64_t);
 static	DB	*index_open(void);
 static	int	 index_read(const DBT *, const DBT *, 
 			const struct mchars *, struct rec *);
@@ -328,6 +354,7 @@ apropos_search(const struct opts *opts, const struct expr *expr,
 	recno_t		 rec;
 	struct rec	*recs;
 	struct rec	 srec;
+	struct db_val	*vbuf;
 
 	root	= -1;
 	leaf	= -1;
@@ -357,15 +384,15 @@ apropos_search(const struct opts *opts, const struct expr *expr,
 		 * The key must have something in it, and the value must
 		 * have the correct tags/recno mix.
 		 */
-		if (key.size < 2 || 8 != val.size) 
+		if (key.size < 2 || sizeof(struct db_val) != val.size) 
 			break;
 		if ( ! btree_read(&key, mc, &buf))
 			break;
 
-		if ( ! exprexec(expr, buf, *(int *)val.data))
+		vbuf = val.data;
+		if ( ! exprexec(expr, buf, vbuf->mask))
 			continue;
-
-		memcpy(&rec, val.data + 4, sizeof(recno_t));
+		rec = vbuf->rec;
 
 		/*
 		 * O(log n) scan for prior records.  Since a record
@@ -524,7 +551,7 @@ exprfree(struct expr *p)
 }
 
 static int
-exprexec(const struct expr *p, char *cp, int mask)
+exprexec(const struct expr *p, char *cp, uint64_t mask)
 {
 
 	if ( ! (mask & p->mask))
--- mandocdb.c.orig
+++ mandocdb.c
@@ -77,7 +77,7 @@ static	void		  buf_append(struct buf *, const char *);
 static	void		  buf_appendb(struct buf *, 
 				const void *, size_t);
 static	void		  dbt_put(DB *, const char *, DBT *, DBT *);
-static	void		  hash_put(DB *, const struct buf *, int);
+static	void		  hash_put(DB *, const struct buf *, uint64_t);
 static	void		  hash_reset(DB **);
 static	void		  index_merge(const struct of *, struct mparse *,
 				struct buf *, struct buf *,
@@ -453,7 +453,7 @@ index_merge(const struct of *of, struct mparse *mp,
 	const char	*fn, *msec, *mtitle, *arch;
 	size_t		 sv;
 	unsigned	 seq;
-	char		 vbuf[8];
+	struct db_val	 vbuf;
 
 	for (rec = 0; of; of = of->next) {
 		fn = of->fname;
@@ -552,17 +552,15 @@ index_merge(const struct of *of, struct mparse *mp,
 		 * Copy from the in-memory hashtable of pending keywords
 		 * into the database.
 		 */
-		
-		memset(vbuf, 0, sizeof(uint32_t));
-		memcpy(vbuf + 4, &rec, sizeof(uint32_t));
 
+		vbuf.rec = rec;
 		seq = R_FIRST;
 		while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) {
 			seq = R_NEXT;
 
-			memcpy(vbuf, val.data, sizeof(uint32_t));
-			val.size = sizeof(vbuf);
-			val.data = vbuf;
+			vbuf.mask = *(uint64_t *)val.data;
+			val.size = sizeof(struct db_val);
+			val.data = &vbuf;
 
 			if (verb > 1)
 				printf("%s: Added keyword: %s\n", 
@@ -607,6 +605,7 @@ index_prune(const struct of *ofile, DB *db, const char *dbf,
 {
 	const struct of	*of;
 	const char	*fn;
+	struct db_val	*vbuf;
 	unsigned	 seq, sseq;
 	DBT		 key, val;
 	size_t		 reccur;
@@ -639,8 +638,9 @@ index_prune(const struct of *ofile, DB *db, const char *dbf,
 		sseq = R_FIRST;
 		while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) {
 			sseq = R_NEXT;
-			assert(8 == val.size);
-			if (*maxrec != *(recno_t *)(val.data + 4))
+			assert(sizeof(struct db_val) == val.size);
+			vbuf = val.data;
+			if (*maxrec != vbuf->rec)
 				continue;
 			if (verb)
 				printf("%s: Deleted keyword: %s\n", 
@@ -1021,7 +1021,7 @@ pmdoc_Nm(MDOC_ARGS)
 }
 
 static void
-hash_put(DB *db, const struct buf *buf, int mask)
+hash_put(DB *db, const struct buf *buf, uint64_t mask)
 {
 	DBT		 key, val;
 	int		 rc;
@@ -1036,10 +1036,10 @@ hash_put(DB *db, const struct buf *buf, int mask)
 		perror("hash");
 		exit((int)MANDOCLEVEL_SYSERR);
 	} else if (0 == rc)
-		mask |= *(int *)val.data;
+		mask |= *(uint64_t *)val.data;
 
 	val.data = &mask;
-	val.size = sizeof(int); 
+	val.size = sizeof(uint64_t); 
 
 	if ((rc = (*db->put)(db, &key, &val, 0)) < 0) {
 		perror("hash");
--- mandocdb.h.orig
+++ mandocdb.h
@@ -15,18 +15,49 @@
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
+struct db_val {
+	uint64_t	mask;
+	uint32_t	rec;
+};
+
 #define	MANDOC_DB	"mandoc.db"
 #define	MANDOC_IDX	"mandoc.index"
 
-#define	TYPE_An		0x01
-#define	TYPE_Cd		0x02
-#define	TYPE_Er		0x04
-#define	TYPE_Ev		0x08
-#define	TYPE_Fn		0x10
-#define	TYPE_In		0x20
-#define	TYPE_Nd		0x40
-#define	TYPE_Nm		0x100
-#define	TYPE_Pa		0x200
-#define	TYPE_St		0x400
-#define	TYPE_Va		0x1000
-#define	TYPE_Xr		0x2000
+#define	TYPE_An		0x0000000000000001ULL
+#define	TYPE_Ar		0x0000000000000002ULL
+#define	TYPE_At		0x0000000000000004ULL
+#define	TYPE_Bsx	0x0000000000000008ULL
+#define	TYPE_Bx         0x0000000000000010ULL
+#define	TYPE_Cd		0x0000000000000020ULL
+#define	TYPE_Cm		0x0000000000000040ULL
+#define	TYPE_Dv		0x0000000000000080ULL
+#define	TYPE_Dx		0x0000000000000100ULL
+#define	TYPE_Em		0x0000000000000200ULL
+#define	TYPE_Er		0x0000000000000400ULL
+#define	TYPE_Ev		0x0000000000000800ULL
+#define	TYPE_Fa		0x0000000000001000ULL
+#define	TYPE_Fl		0x0000000000002000ULL
+#define	TYPE_Fn		0x0000000000004000ULL
+#define	TYPE_Ft		0x0000000000008000ULL
+#define	TYPE_Fx		0x0000000000010000ULL
+#define	TYPE_Ic		0x0000000000020000ULL
+#define	TYPE_In		0x0000000000040000ULL
+#define	TYPE_Lb		0x0000000000080000ULL
+#define	TYPE_Li		0x0000000000100000ULL
+#define	TYPE_Lk		0x0000000000200000ULL
+#define	TYPE_Ms		0x0000000000400000ULL
+#define	TYPE_Mt		0x0000000000800000ULL
+#define	TYPE_Nd		0x0000000001000000ULL
+#define	TYPE_Nm		0x0000000002000000ULL
+#define	TYPE_Nx		0x0000000004000000ULL
+#define	TYPE_Ox		0x0000000008000000ULL
+#define	TYPE_Pa		0x0000000010000000ULL
+#define	TYPE_Rs		0x0000000020000000ULL
+#define	TYPE_Sh		0x0000000040000000ULL
+#define	TYPE_Ss		0x0000000080000000ULL
+#define	TYPE_St		0x0000000100000000ULL
+#define	TYPE_Sy		0x0000000200000000ULL
+#define	TYPE_Tn		0x0000000400000000ULL
+#define	TYPE_Va		0x0000000800000000ULL
+#define	TYPE_Vt		0x0000001000000000ULL
+#define	TYPE_Xr		0x0000002000000000ULL
--
 To unsubscribe send an email to tech+unsubscribe@mdocml.bsd.lv

             reply	other threads:[~2011-11-16  0:39 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-11-16  0:39 Ingo Schwarze [this message]
2011-11-16  0:52 ` Kristaps Dzonsons
2011-11-16  1:50   ` Ingo Schwarze
2011-11-16 16:59   ` Ingo Schwarze

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20111116003919.GD30189@iris.usta.de \
    --to=schwarze@usta.de \
    --cc=jmc@openbsd.org \
    --cc=tech@mdocml.bsd.lv \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).