From: Ingo Schwarze <schwarze@usta.de>
To: tech@mdocml.bsd.lv
Cc: jmc@openbsd.org
Subject: mandocdb: full set of search types
Date: Wed, 16 Nov 2011 01:39:19 +0100 [thread overview]
Message-ID: <20111116003919.GD30189@iris.usta.de> (raw)
Hi,
we are close to going into production. I have talked to espie@,
and he is d'accord with putting small pieces into
/usr/src/libexec/makewhatis/Makewhatis.pm
to call out to mandocdb(8) for automated updates of the mandoc.db
files alongside the updates of the whatis.db files when installing
and removing packages, such that people running -current can easily
start testing the new apropos.
We will keep the old makewhatis/apropos-combo in place until the new
system is reasonably feature-complete and clearly better than the
old one, then install the new apropos in place of the old one and
finally remove the old components.
Before enabling mandocdb in pkg_add(8), i'd like to get the database
format complete, such that we don't force people to rebuild the
databases after upgrading to new snapshots.
Here is a patch defining TYPE_ flags for all macros that i can
imagine might be worth searching for (maybe even a few more, but i'd
rather have too many than too few). I have left out all obsolete
macros and most physical formatting macros.
This requires switching the mask to 64 bits. During the switch,
i have replaced the very error-prone handling of the key database
values by a new struct db_val; some memcpy() calls by normal
assignments; and some magical constants by sizeof() constructs.
OK to put this in, or do you see anything that is missing?
Right now, i will start to implement the new search types.
Yours,
Ingo
--- apropos_db.c.orig
+++ apropos_db.c
@@ -19,6 +19,7 @@
#include <fcntl.h>
#include <regex.h>
#include <stdarg.h>
+#include <stdint.h>
#include <stdlib.h>
#include <string.h>
@@ -34,28 +35,53 @@
struct expr {
int regex;
- int mask;
+ uint64_t mask;
char *v;
regex_t re;
};
struct type {
- int mask;
+ uint64_t mask;
const char *name;
};
static const struct type types[] = {
{ TYPE_An, "An" },
+ { TYPE_Ar, "Ar" },
+ { TYPE_At, "At" },
+ { TYPE_Bsx, "Bsx" },
+ { TYPE_Bx, "Bx" },
{ TYPE_Cd, "Cd" },
+ { TYPE_Cm, "Cm" },
+ { TYPE_Dv, "Dv" },
+ { TYPE_Dx, "Dx" },
+ { TYPE_Em, "Em" },
{ TYPE_Er, "Er" },
{ TYPE_Ev, "Ev" },
+ { TYPE_Fa, "Fa" },
+ { TYPE_Fl, "Fl" },
{ TYPE_Fn, "Fn" },
{ TYPE_Fn, "Fo" },
+ { TYPE_Ft, "Ft" },
+ { TYPE_Fx, "Fx" },
+ { TYPE_Ic, "Ic" },
{ TYPE_In, "In" },
+ { TYPE_Lb, "Lb" },
+ { TYPE_Li, "Li" },
+ { TYPE_Lk, "Lk" },
+ { TYPE_Ms, "Ms" },
+ { TYPE_Mt, "Mt" },
{ TYPE_Nd, "Nd" },
{ TYPE_Nm, "Nm" },
+ { TYPE_Nx, "Nx" },
+ { TYPE_Ox, "Ox" },
{ TYPE_Pa, "Pa" },
+ { TYPE_Rs, "Rs" },
+ { TYPE_Sh, "Sh" },
+ { TYPE_Ss, "Ss" },
{ TYPE_St, "St" },
+ { TYPE_Sy, "Sy" },
+ { TYPE_Tn, "Tn" },
{ TYPE_Va, "Va" },
{ TYPE_Va, "Vt" },
{ TYPE_Xr, "Xr" },
@@ -65,7 +91,7 @@ static const struct type types[] = {
static DB *btree_open(void);
static int btree_read(const DBT *, const struct mchars *, char **);
-static int exprexec(const struct expr *, char *, int);
+static int exprexec(const struct expr *, char *, uint64_t);
static DB *index_open(void);
static int index_read(const DBT *, const DBT *,
const struct mchars *, struct rec *);
@@ -328,6 +354,7 @@ apropos_search(const struct opts *opts, const struct expr *expr,
recno_t rec;
struct rec *recs;
struct rec srec;
+ struct db_val *vbuf;
root = -1;
leaf = -1;
@@ -357,15 +384,15 @@ apropos_search(const struct opts *opts, const struct expr *expr,
* The key must have something in it, and the value must
* have the correct tags/recno mix.
*/
- if (key.size < 2 || 8 != val.size)
+ if (key.size < 2 || sizeof(struct db_val) != val.size)
break;
if ( ! btree_read(&key, mc, &buf))
break;
- if ( ! exprexec(expr, buf, *(int *)val.data))
+ vbuf = val.data;
+ if ( ! exprexec(expr, buf, vbuf->mask))
continue;
-
- memcpy(&rec, val.data + 4, sizeof(recno_t));
+ rec = vbuf->rec;
/*
* O(log n) scan for prior records. Since a record
@@ -524,7 +551,7 @@ exprfree(struct expr *p)
}
static int
-exprexec(const struct expr *p, char *cp, int mask)
+exprexec(const struct expr *p, char *cp, uint64_t mask)
{
if ( ! (mask & p->mask))
--- mandocdb.c.orig
+++ mandocdb.c
@@ -77,7 +77,7 @@ static void buf_append(struct buf *, const char *);
static void buf_appendb(struct buf *,
const void *, size_t);
static void dbt_put(DB *, const char *, DBT *, DBT *);
-static void hash_put(DB *, const struct buf *, int);
+static void hash_put(DB *, const struct buf *, uint64_t);
static void hash_reset(DB **);
static void index_merge(const struct of *, struct mparse *,
struct buf *, struct buf *,
@@ -453,7 +453,7 @@ index_merge(const struct of *of, struct mparse *mp,
const char *fn, *msec, *mtitle, *arch;
size_t sv;
unsigned seq;
- char vbuf[8];
+ struct db_val vbuf;
for (rec = 0; of; of = of->next) {
fn = of->fname;
@@ -552,17 +552,15 @@ index_merge(const struct of *of, struct mparse *mp,
* Copy from the in-memory hashtable of pending keywords
* into the database.
*/
-
- memset(vbuf, 0, sizeof(uint32_t));
- memcpy(vbuf + 4, &rec, sizeof(uint32_t));
+ vbuf.rec = rec;
seq = R_FIRST;
while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) {
seq = R_NEXT;
- memcpy(vbuf, val.data, sizeof(uint32_t));
- val.size = sizeof(vbuf);
- val.data = vbuf;
+ vbuf.mask = *(uint64_t *)val.data;
+ val.size = sizeof(struct db_val);
+ val.data = &vbuf;
if (verb > 1)
printf("%s: Added keyword: %s\n",
@@ -607,6 +605,7 @@ index_prune(const struct of *ofile, DB *db, const char *dbf,
{
const struct of *of;
const char *fn;
+ struct db_val *vbuf;
unsigned seq, sseq;
DBT key, val;
size_t reccur;
@@ -639,8 +638,9 @@ index_prune(const struct of *ofile, DB *db, const char *dbf,
sseq = R_FIRST;
while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) {
sseq = R_NEXT;
- assert(8 == val.size);
- if (*maxrec != *(recno_t *)(val.data + 4))
+ assert(sizeof(struct db_val) == val.size);
+ vbuf = val.data;
+ if (*maxrec != vbuf->rec)
continue;
if (verb)
printf("%s: Deleted keyword: %s\n",
@@ -1021,7 +1021,7 @@ pmdoc_Nm(MDOC_ARGS)
}
static void
-hash_put(DB *db, const struct buf *buf, int mask)
+hash_put(DB *db, const struct buf *buf, uint64_t mask)
{
DBT key, val;
int rc;
@@ -1036,10 +1036,10 @@ hash_put(DB *db, const struct buf *buf, int mask)
perror("hash");
exit((int)MANDOCLEVEL_SYSERR);
} else if (0 == rc)
- mask |= *(int *)val.data;
+ mask |= *(uint64_t *)val.data;
val.data = &mask;
- val.size = sizeof(int);
+ val.size = sizeof(uint64_t);
if ((rc = (*db->put)(db, &key, &val, 0)) < 0) {
perror("hash");
--- mandocdb.h.orig
+++ mandocdb.h
@@ -15,18 +15,49 @@
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
+struct db_val {
+ uint64_t mask;
+ uint32_t rec;
+};
+
#define MANDOC_DB "mandoc.db"
#define MANDOC_IDX "mandoc.index"
-#define TYPE_An 0x01
-#define TYPE_Cd 0x02
-#define TYPE_Er 0x04
-#define TYPE_Ev 0x08
-#define TYPE_Fn 0x10
-#define TYPE_In 0x20
-#define TYPE_Nd 0x40
-#define TYPE_Nm 0x100
-#define TYPE_Pa 0x200
-#define TYPE_St 0x400
-#define TYPE_Va 0x1000
-#define TYPE_Xr 0x2000
+#define TYPE_An 0x0000000000000001ULL
+#define TYPE_Ar 0x0000000000000002ULL
+#define TYPE_At 0x0000000000000004ULL
+#define TYPE_Bsx 0x0000000000000008ULL
+#define TYPE_Bx 0x0000000000000010ULL
+#define TYPE_Cd 0x0000000000000020ULL
+#define TYPE_Cm 0x0000000000000040ULL
+#define TYPE_Dv 0x0000000000000080ULL
+#define TYPE_Dx 0x0000000000000100ULL
+#define TYPE_Em 0x0000000000000200ULL
+#define TYPE_Er 0x0000000000000400ULL
+#define TYPE_Ev 0x0000000000000800ULL
+#define TYPE_Fa 0x0000000000001000ULL
+#define TYPE_Fl 0x0000000000002000ULL
+#define TYPE_Fn 0x0000000000004000ULL
+#define TYPE_Ft 0x0000000000008000ULL
+#define TYPE_Fx 0x0000000000010000ULL
+#define TYPE_Ic 0x0000000000020000ULL
+#define TYPE_In 0x0000000000040000ULL
+#define TYPE_Lb 0x0000000000080000ULL
+#define TYPE_Li 0x0000000000100000ULL
+#define TYPE_Lk 0x0000000000200000ULL
+#define TYPE_Ms 0x0000000000400000ULL
+#define TYPE_Mt 0x0000000000800000ULL
+#define TYPE_Nd 0x0000000001000000ULL
+#define TYPE_Nm 0x0000000002000000ULL
+#define TYPE_Nx 0x0000000004000000ULL
+#define TYPE_Ox 0x0000000008000000ULL
+#define TYPE_Pa 0x0000000010000000ULL
+#define TYPE_Rs 0x0000000020000000ULL
+#define TYPE_Sh 0x0000000040000000ULL
+#define TYPE_Ss 0x0000000080000000ULL
+#define TYPE_St 0x0000000100000000ULL
+#define TYPE_Sy 0x0000000200000000ULL
+#define TYPE_Tn 0x0000000400000000ULL
+#define TYPE_Va 0x0000000800000000ULL
+#define TYPE_Vt 0x0000001000000000ULL
+#define TYPE_Xr 0x0000002000000000ULL
--
To unsubscribe send an email to tech+unsubscribe@mdocml.bsd.lv
next reply other threads:[~2011-11-16 0:39 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-11-16 0:39 Ingo Schwarze [this message]
2011-11-16 0:52 ` Kristaps Dzonsons
2011-11-16 1:50 ` Ingo Schwarze
2011-11-16 16:59 ` Ingo Schwarze
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20111116003919.GD30189@iris.usta.de \
--to=schwarze@usta.de \
--cc=jmc@openbsd.org \
--cc=tech@mdocml.bsd.lv \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).