From: Ingo Schwarze <schwarze@usta.de>
To: tech@mdocml.bsd.lv
Subject: Re: mandocdb: full set of search types
Date: Wed, 16 Nov 2011 17:59:35 +0100 [thread overview]
Message-ID: <20111116165935.GO31182@iris.usta.de> (raw)
In-Reply-To: <4EC3093E.3030504@bsd.lv>
Hi Kristaps,
Kristaps Dzonsons wrote on Wed, Nov 16, 2011 at 01:52:14AM +0100:
> On 16/11/2011 01:39, Ingo Schwarze wrote:
[...]
>> Before enabling mandocdb in pkg_add(8), i'd like to get the database
>> format complete, such that we don't force people to rebuild the
>> databases after upgrading to new snapshots.
>>
>> Here is a patch defining TYPE_ flags for all macros that i can
>> imagine might be worth searching for (maybe even a few more, but i'd
>> rather have too many than too few). I have left out all obsolete
>> macros and most physical formatting macros.
>>
>> This requires switching the mask to 64 bits. During the switch,
>> i have replaced the very error-prone handling of the key database
>> values by a new struct db_val; some memcpy() calls by normal
>> assignments; and some magical constants by sizeof() constructs.
> Just a word or two before I sleep. This approach is sound and I've
> no issues with a quick look over the patch, but will wait til
> tomorrow to do so in earnest. But first, before mandocdb gets
> production, the database should be checked for endian-neutrality.
That's still TODO.
However, that doesn't prevent getting the TYPE_* flags right first,
so here is an updated patch, to be applied on top of my man.conf
stuff sent right before.
OK?
Ingo
--- apropos_db.c.orig
+++ apropos_db.c
@@ -19,6 +19,7 @@
#include <fcntl.h>
#include <regex.h>
#include <stdarg.h>
+#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
@@ -41,7 +42,7 @@ struct rectree {
struct expr {
int regex;
int index;
- int mask;
+ uint64_t mask;
int and;
char *v;
regex_t re;
@@ -49,22 +50,47 @@ struct expr {
};
struct type {
- int mask;
+ uint64_t mask;
const char *name;
};
static const struct type types[] = {
{ TYPE_An, "An" },
+ { TYPE_Ar, "Ar" },
+ { TYPE_At, "At" },
+ { TYPE_Bsx, "Bsx" },
+ { TYPE_Bx, "Bx" },
{ TYPE_Cd, "Cd" },
+ { TYPE_Cm, "Cm" },
+ { TYPE_Dv, "Dv" },
+ { TYPE_Dx, "Dx" },
+ { TYPE_Em, "Em" },
{ TYPE_Er, "Er" },
{ TYPE_Ev, "Ev" },
+ { TYPE_Fa, "Fa" },
+ { TYPE_Fl, "Fl" },
{ TYPE_Fn, "Fn" },
{ TYPE_Fn, "Fo" },
+ { TYPE_Ft, "Ft" },
+ { TYPE_Fx, "Fx" },
+ { TYPE_Ic, "Ic" },
{ TYPE_In, "In" },
+ { TYPE_Lb, "Lb" },
+ { TYPE_Li, "Li" },
+ { TYPE_Lk, "Lk" },
+ { TYPE_Ms, "Ms" },
+ { TYPE_Mt, "Mt" },
{ TYPE_Nd, "Nd" },
{ TYPE_Nm, "Nm" },
+ { TYPE_Nx, "Nx" },
+ { TYPE_Ox, "Ox" },
{ TYPE_Pa, "Pa" },
+ { TYPE_Rs, "Rs" },
+ { TYPE_Sh, "Sh" },
+ { TYPE_Ss, "Ss" },
{ TYPE_St, "St" },
+ { TYPE_Sy, "Sy" },
+ { TYPE_Tn, "Tn" },
{ TYPE_Va, "Va" },
{ TYPE_Va, "Vt" },
{ TYPE_Xr, "Xr" },
@@ -74,9 +100,9 @@ static const struct type types[] = {
static DB *btree_open(void);
static int btree_read(const DBT *, const struct mchars *, char **);
-static int exprexecpre(const struct expr *, const char *, int);
+static int exprexecpre(const struct expr *, const char *, uint64_t);
static void exprexecpost(const struct expr *,
- const char *, int, int *, size_t);
+ const char *, uint64_t, int *, size_t);
static struct expr *exprterm(char *, int, int);
static DB *index_open(void);
static int index_read(const DBT *, const DBT *,
@@ -381,7 +407,7 @@ single_search(struct rectree *tree, const struct opts *opts,
const struct expr *expr, size_t terms,
struct mchars *mc)
{
- int root, leaf, mask;
+ int root, leaf;
DBT key, val;
DB *btree, *idx;
int ch;
@@ -389,6 +415,7 @@ single_search(struct rectree *tree, const struct opts *opts,
recno_t rec;
struct rec *recs;
struct rec srec;
+ struct db_val *vbuf;
root = -1;
leaf = -1;
@@ -412,21 +439,19 @@ single_search(struct rectree *tree, const struct opts *opts,
* The key must have something in it, and the value must
* have the correct tags/recno mix.
*/
- if (key.size < 2 || 8 != val.size)
+ if (key.size < 2 || sizeof(struct db_val) != val.size)
break;
if ( ! btree_read(&key, mc, &buf))
break;
- mask = *(int *)val.data;
-
/*
* See if this keyword record matches any of the
* expressions we have stored.
*/
- if ( ! exprexecpre(expr, buf, mask))
+ vbuf = val.data;
+ if ( ! exprexecpre(expr, buf, vbuf->mask))
continue;
-
- memcpy(&rec, val.data + 4, sizeof(recno_t));
+ rec = vbuf->rec;
/*
* O(log n) scan for prior records. Since a record
@@ -445,7 +470,7 @@ single_search(struct rectree *tree, const struct opts *opts,
if (leaf >= 0 && recs[leaf].rec == rec) {
if (0 == recs[leaf].matches[0])
exprexecpost
- (expr, buf, mask,
+ (expr, buf, vbuf->mask,
recs[leaf].matches, terms);
continue;
}
@@ -478,7 +503,7 @@ single_search(struct rectree *tree, const struct opts *opts,
mandoc_calloc(terms + 1, sizeof(int));
exprexecpost
- (expr, buf, mask,
+ (expr, buf, vbuf->mask,
recs[tree->len].matches, terms);
/* Append to our tree. */
@@ -642,7 +667,7 @@ exprfree(struct expr *p)
* Return 1 if any expression evaluates to true, else 0.
*/
static int
-exprexecpre(const struct expr *p, const char *cp, int mask)
+exprexecpre(const struct expr *p, const char *cp, uint64_t mask)
{
for ( ; NULL != p; p = p->next) {
@@ -666,7 +691,7 @@ exprexecpre(const struct expr *p, const char *cp, int mask)
*/
static void
exprexecpost(const struct expr *e, const char *cp,
- int mask, int *matches, size_t matchsz)
+ uint64_t mask, int *matches, size_t matchsz)
{
const struct expr *p;
int match;
--- mandocdb.8.orig
+++ mandocdb.8
@@ -48,9 +48,13 @@ The arguments are as follows:
.It Fl a
Use all directories and files found below
.Ar dir ... .
-By default, directories and files
-.Xr man 1
-cannot find will be silently skipped.
+By default, only files matching
+.Sm off
+.Sy man Ar section Li /
+.Op Ar arch Li /
+.Ar title . section
+.Sm on
+will be used.
.It Fl d Ar dir
Merge (remove and re-add)
.Ar
--- mandocdb.c.orig
+++ mandocdb.c
@@ -79,7 +79,7 @@ static void buf_append(struct buf *, const char *);
static void buf_appendb(struct buf *,
const void *, size_t);
static void dbt_put(DB *, const char *, DBT *, DBT *);
-static void hash_put(DB *, const struct buf *, int);
+static void hash_put(DB *, const struct buf *, uint64_t);
static void hash_reset(DB **);
static void index_merge(const struct of *, struct mparse *,
struct buf *, struct buf *,
@@ -257,11 +257,11 @@ mandocdb(int argc, char *argv[])
*db, /* keyword database */
*hash; /* temporary keyword hashtable */
BTREEINFO info; /* btree configuration */
- recno_t maxrec; /* supremum of all records */
- recno_t *recs; /* buffer of empty records */
+ recno_t maxrec; /* last record number in the index */
+ recno_t *recs; /* the numbers of all empty records */
size_t sz1, sz2,
- recsz, /* buffer size of recs */
- reccur; /* valid number of recs */
+ recsz, /* number of allocated slots in recs */
+ reccur; /* current number of empty records */
struct buf buf, /* keyword buffer */
dbuf; /* description buffer */
struct of *of; /* list of files for processing */
@@ -348,7 +348,7 @@ mandocdb(int argc, char *argv[])
if (NULL == db) {
perror(fbuf);
exit((int)MANDOCLEVEL_SYSERR);
- } else if (NULL == db) {
+ } else if (NULL == idx) {
perror(ibuf);
exit((int)MANDOCLEVEL_SYSERR);
}
@@ -410,7 +410,7 @@ mandocdb(int argc, char *argv[])
if (NULL == db) {
perror(fbuf);
exit((int)MANDOCLEVEL_SYSERR);
- } else if (NULL == db) {
+ } else if (NULL == idx) {
perror(ibuf);
exit((int)MANDOCLEVEL_SYSERR);
}
@@ -473,7 +473,7 @@ index_merge(const struct of *of, struct mparse *mp,
const char *fn, *msec, *mtitle, *arch;
size_t sv;
unsigned seq;
- char vbuf[8];
+ struct db_val vbuf;
for (rec = 0; of; of = of->next) {
fn = of->fname;
@@ -499,9 +499,9 @@ index_merge(const struct of *of, struct mparse *mp,
continue;
/*
- * Make sure the manual section and architecture
- * agree with the directory where the file is located
- * or man(1) will not be able to find it.
+ * By default, skip a file if the manual section
+ * and architecture given in the file disagree
+ * with the directory where the file is located.
*/
msec = NULL != mdoc ?
@@ -527,9 +527,10 @@ index_merge(const struct of *of, struct mparse *mp,
arch = "";
/*
- * Case is relevant for man(1), so use the file name
- * instead of the (usually) all caps page title,
- * if the two agree.
+ * By default, skip a file if the title given
+ * in the file disagrees with the file name.
+ * If both agree, use the file name as the title,
+ * because the one in the file usually is all caps.
*/
mtitle = NULL != mdoc ?
@@ -571,17 +572,15 @@ index_merge(const struct of *of, struct mparse *mp,
* Copy from the in-memory hashtable of pending keywords
* into the database.
*/
-
- memset(vbuf, 0, sizeof(uint32_t));
- memcpy(vbuf + 4, &rec, sizeof(uint32_t));
+ vbuf.rec = rec;
seq = R_FIRST;
while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) {
seq = R_NEXT;
- memcpy(vbuf, val.data, sizeof(uint32_t));
- val.size = sizeof(vbuf);
- val.data = vbuf;
+ vbuf.mask = *(uint64_t *)val.data;
+ val.size = sizeof(struct db_val);
+ val.data = &vbuf;
if (verb > 1)
printf("%s: Added keyword: %s\n",
@@ -626,6 +625,7 @@ index_prune(const struct of *ofile, DB *db, const char *dbf,
{
const struct of *of;
const char *fn;
+ struct db_val *vbuf;
unsigned seq, sseq;
DBT key, val;
size_t reccur;
@@ -658,8 +658,9 @@ index_prune(const struct of *ofile, DB *db, const char *dbf,
sseq = R_FIRST;
while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) {
sseq = R_NEXT;
- assert(8 == val.size);
- if (*maxrec != *(recno_t *)(val.data + 4))
+ assert(sizeof(struct db_val) == val.size);
+ vbuf = val.data;
+ if (*maxrec != vbuf->rec)
continue;
if (verb)
printf("%s: Deleted keyword: %s\n",
@@ -1040,7 +1041,7 @@ pmdoc_Nm(MDOC_ARGS)
}
static void
-hash_put(DB *db, const struct buf *buf, int mask)
+hash_put(DB *db, const struct buf *buf, uint64_t mask)
{
DBT key, val;
int rc;
@@ -1055,10 +1056,10 @@ hash_put(DB *db, const struct buf *buf, int mask)
perror("hash");
exit((int)MANDOCLEVEL_SYSERR);
} else if (0 == rc)
- mask |= *(int *)val.data;
+ mask |= *(uint64_t *)val.data;
val.data = &mask;
- val.size = sizeof(int);
+ val.size = sizeof(uint64_t);
if ((rc = (*db->put)(db, &key, &val, 0)) < 0) {
perror("hash");
@@ -1228,7 +1229,9 @@ ofile_argbuild(char *argv[], int argc, int use_all, int verb,
for (i = 0; i < argc; i++) {
/*
- * Analyze the path.
+ * Try to infer the manual section, architecture and
+ * page title from the path, assuming it looks like
+ * man*[/<arch>]/<title>.<section>
*/
if (strlcpy(buf, argv[i], sizeof(buf)) >= sizeof(buf)) {
@@ -1322,8 +1325,8 @@ ofile_dirbuild(const char *dir, const char* psec, const char *parch,
arch = parch;
/*
- * Don't bother parsing directories
- * that man(1) won't find.
+ * By default, only use directories called:
+ * man<section>/[<arch>/]
*/
if (NULL == sec) {
@@ -1363,7 +1366,9 @@ ofile_dirbuild(const char *dir, const char* psec, const char *parch,
continue;
/*
- * Don't bother parsing files that man(1) won't find.
+ * By default, skip files where the file name suffix
+ * does not agree with the section directory
+ * they are located in.
*/
suffix = strrchr(fn, '.');
@@ -1389,6 +1394,12 @@ ofile_dirbuild(const char *dir, const char* psec, const char *parch,
nof->sec = mandoc_strdup(psec);
if (NULL != parch)
nof->arch = mandoc_strdup(parch);
+
+ /*
+ * Remember the file name without the extension,
+ * to be used as the page title in the database.
+ */
+
if (NULL != suffix)
*suffix = '\0';
nof->title = mandoc_strdup(fn);
--- mandocdb.h.orig
+++ mandocdb.h
@@ -15,18 +15,49 @@
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
+struct db_val {
+ uint64_t mask;
+ uint32_t rec;
+};
+
#define MANDOC_DB "mandoc.db"
#define MANDOC_IDX "mandoc.index"
-#define TYPE_An 0x01
-#define TYPE_Cd 0x02
-#define TYPE_Er 0x04
-#define TYPE_Ev 0x08
-#define TYPE_Fn 0x10
-#define TYPE_In 0x20
-#define TYPE_Nd 0x40
-#define TYPE_Nm 0x100
-#define TYPE_Pa 0x200
-#define TYPE_St 0x400
-#define TYPE_Va 0x1000
-#define TYPE_Xr 0x2000
+#define TYPE_An 0x0000000000000001ULL
+#define TYPE_Ar 0x0000000000000002ULL
+#define TYPE_At 0x0000000000000004ULL
+#define TYPE_Bsx 0x0000000000000008ULL
+#define TYPE_Bx 0x0000000000000010ULL
+#define TYPE_Cd 0x0000000000000020ULL
+#define TYPE_Cm 0x0000000000000040ULL
+#define TYPE_Dv 0x0000000000000080ULL
+#define TYPE_Dx 0x0000000000000100ULL
+#define TYPE_Em 0x0000000000000200ULL
+#define TYPE_Er 0x0000000000000400ULL
+#define TYPE_Ev 0x0000000000000800ULL
+#define TYPE_Fa 0x0000000000001000ULL
+#define TYPE_Fl 0x0000000000002000ULL
+#define TYPE_Fn 0x0000000000004000ULL
+#define TYPE_Ft 0x0000000000008000ULL
+#define TYPE_Fx 0x0000000000010000ULL
+#define TYPE_Ic 0x0000000000020000ULL
+#define TYPE_In 0x0000000000040000ULL
+#define TYPE_Lb 0x0000000000080000ULL
+#define TYPE_Li 0x0000000000100000ULL
+#define TYPE_Lk 0x0000000000200000ULL
+#define TYPE_Ms 0x0000000000400000ULL
+#define TYPE_Mt 0x0000000000800000ULL
+#define TYPE_Nd 0x0000000001000000ULL
+#define TYPE_Nm 0x0000000002000000ULL
+#define TYPE_Nx 0x0000000004000000ULL
+#define TYPE_Ox 0x0000000008000000ULL
+#define TYPE_Pa 0x0000000010000000ULL
+#define TYPE_Rs 0x0000000020000000ULL
+#define TYPE_Sh 0x0000000040000000ULL
+#define TYPE_Ss 0x0000000080000000ULL
+#define TYPE_St 0x0000000100000000ULL
+#define TYPE_Sy 0x0000000200000000ULL
+#define TYPE_Tn 0x0000000400000000ULL
+#define TYPE_Va 0x0000000800000000ULL
+#define TYPE_Vt 0x0000001000000000ULL
+#define TYPE_Xr 0x0000002000000000ULL
--
To unsubscribe send an email to tech+unsubscribe@mdocml.bsd.lv
prev parent reply other threads:[~2011-11-16 16:59 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-11-16 0:39 Ingo Schwarze
2011-11-16 0:52 ` Kristaps Dzonsons
2011-11-16 1:50 ` Ingo Schwarze
2011-11-16 16:59 ` Ingo Schwarze [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20111116165935.GO31182@iris.usta.de \
--to=schwarze@usta.de \
--cc=tech@mdocml.bsd.lv \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).