* mandocdb: full set of search types @ 2011-11-16 0:39 Ingo Schwarze 2011-11-16 0:52 ` Kristaps Dzonsons 0 siblings, 1 reply; 4+ messages in thread From: Ingo Schwarze @ 2011-11-16 0:39 UTC (permalink / raw) To: tech; +Cc: jmc Hi, we are close to going into production. I have talked to espie@, and he is d'accord with putting small pieces into /usr/src/libexec/makewhatis/Makewhatis.pm to call out to mandocdb(8) for automated updates of the mandoc.db files alongside the updates of the whatis.db files when installing and removing packages, such that people running -current can easily start testing the new apropos. We will keep the old makewhatis/apropos-combo in place until the new system is reasonably feature-complete and clearly better than the old one, then install the new apropos in place of the old one and finally remove the old components. Before enabling mandocdb in pkg_add(8), i'd like to get the database format complete, such that we don't force people to rebuild the databases after upgrading to new snapshots. Here is a patch defining TYPE_ flags for all macros that i can imagine might be worth searching for (maybe even a few more, but i'd rather have too many than too few). I have left out all obsolete macros and most physical formatting macros. This requires switching the mask to 64 bits. During the switch, i have replaced the very error-prone handling of the key database values by a new struct db_val; some memcpy() calls by normal assignments; and some magical constants by sizeof() constructs. OK to put this in, or do you see anything that is missing? Right now, i will start to implement the new search types. Yours, Ingo --- apropos_db.c.orig +++ apropos_db.c @@ -19,6 +19,7 @@ #include <fcntl.h> #include <regex.h> #include <stdarg.h> +#include <stdint.h> #include <stdlib.h> #include <string.h> @@ -34,28 +35,53 @@ struct expr { int regex; - int mask; + uint64_t mask; char *v; regex_t re; }; struct type { - int mask; + uint64_t mask; const char *name; }; static const struct type types[] = { { TYPE_An, "An" }, + { TYPE_Ar, "Ar" }, + { TYPE_At, "At" }, + { TYPE_Bsx, "Bsx" }, + { TYPE_Bx, "Bx" }, { TYPE_Cd, "Cd" }, + { TYPE_Cm, "Cm" }, + { TYPE_Dv, "Dv" }, + { TYPE_Dx, "Dx" }, + { TYPE_Em, "Em" }, { TYPE_Er, "Er" }, { TYPE_Ev, "Ev" }, + { TYPE_Fa, "Fa" }, + { TYPE_Fl, "Fl" }, { TYPE_Fn, "Fn" }, { TYPE_Fn, "Fo" }, + { TYPE_Ft, "Ft" }, + { TYPE_Fx, "Fx" }, + { TYPE_Ic, "Ic" }, { TYPE_In, "In" }, + { TYPE_Lb, "Lb" }, + { TYPE_Li, "Li" }, + { TYPE_Lk, "Lk" }, + { TYPE_Ms, "Ms" }, + { TYPE_Mt, "Mt" }, { TYPE_Nd, "Nd" }, { TYPE_Nm, "Nm" }, + { TYPE_Nx, "Nx" }, + { TYPE_Ox, "Ox" }, { TYPE_Pa, "Pa" }, + { TYPE_Rs, "Rs" }, + { TYPE_Sh, "Sh" }, + { TYPE_Ss, "Ss" }, { TYPE_St, "St" }, + { TYPE_Sy, "Sy" }, + { TYPE_Tn, "Tn" }, { TYPE_Va, "Va" }, { TYPE_Va, "Vt" }, { TYPE_Xr, "Xr" }, @@ -65,7 +91,7 @@ static const struct type types[] = { static DB *btree_open(void); static int btree_read(const DBT *, const struct mchars *, char **); -static int exprexec(const struct expr *, char *, int); +static int exprexec(const struct expr *, char *, uint64_t); static DB *index_open(void); static int index_read(const DBT *, const DBT *, const struct mchars *, struct rec *); @@ -328,6 +354,7 @@ apropos_search(const struct opts *opts, const struct expr *expr, recno_t rec; struct rec *recs; struct rec srec; + struct db_val *vbuf; root = -1; leaf = -1; @@ -357,15 +384,15 @@ apropos_search(const struct opts *opts, const struct expr *expr, * The key must have something in it, and the value must * have the correct tags/recno mix. */ - if (key.size < 2 || 8 != val.size) + if (key.size < 2 || sizeof(struct db_val) != val.size) break; if ( ! btree_read(&key, mc, &buf)) break; - if ( ! exprexec(expr, buf, *(int *)val.data)) + vbuf = val.data; + if ( ! exprexec(expr, buf, vbuf->mask)) continue; - - memcpy(&rec, val.data + 4, sizeof(recno_t)); + rec = vbuf->rec; /* * O(log n) scan for prior records. Since a record @@ -524,7 +551,7 @@ exprfree(struct expr *p) } static int -exprexec(const struct expr *p, char *cp, int mask) +exprexec(const struct expr *p, char *cp, uint64_t mask) { if ( ! (mask & p->mask)) --- mandocdb.c.orig +++ mandocdb.c @@ -77,7 +77,7 @@ static void buf_append(struct buf *, const char *); static void buf_appendb(struct buf *, const void *, size_t); static void dbt_put(DB *, const char *, DBT *, DBT *); -static void hash_put(DB *, const struct buf *, int); +static void hash_put(DB *, const struct buf *, uint64_t); static void hash_reset(DB **); static void index_merge(const struct of *, struct mparse *, struct buf *, struct buf *, @@ -453,7 +453,7 @@ index_merge(const struct of *of, struct mparse *mp, const char *fn, *msec, *mtitle, *arch; size_t sv; unsigned seq; - char vbuf[8]; + struct db_val vbuf; for (rec = 0; of; of = of->next) { fn = of->fname; @@ -552,17 +552,15 @@ index_merge(const struct of *of, struct mparse *mp, * Copy from the in-memory hashtable of pending keywords * into the database. */ - - memset(vbuf, 0, sizeof(uint32_t)); - memcpy(vbuf + 4, &rec, sizeof(uint32_t)); + vbuf.rec = rec; seq = R_FIRST; while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) { seq = R_NEXT; - memcpy(vbuf, val.data, sizeof(uint32_t)); - val.size = sizeof(vbuf); - val.data = vbuf; + vbuf.mask = *(uint64_t *)val.data; + val.size = sizeof(struct db_val); + val.data = &vbuf; if (verb > 1) printf("%s: Added keyword: %s\n", @@ -607,6 +605,7 @@ index_prune(const struct of *ofile, DB *db, const char *dbf, { const struct of *of; const char *fn; + struct db_val *vbuf; unsigned seq, sseq; DBT key, val; size_t reccur; @@ -639,8 +638,9 @@ index_prune(const struct of *ofile, DB *db, const char *dbf, sseq = R_FIRST; while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) { sseq = R_NEXT; - assert(8 == val.size); - if (*maxrec != *(recno_t *)(val.data + 4)) + assert(sizeof(struct db_val) == val.size); + vbuf = val.data; + if (*maxrec != vbuf->rec) continue; if (verb) printf("%s: Deleted keyword: %s\n", @@ -1021,7 +1021,7 @@ pmdoc_Nm(MDOC_ARGS) } static void -hash_put(DB *db, const struct buf *buf, int mask) +hash_put(DB *db, const struct buf *buf, uint64_t mask) { DBT key, val; int rc; @@ -1036,10 +1036,10 @@ hash_put(DB *db, const struct buf *buf, int mask) perror("hash"); exit((int)MANDOCLEVEL_SYSERR); } else if (0 == rc) - mask |= *(int *)val.data; + mask |= *(uint64_t *)val.data; val.data = &mask; - val.size = sizeof(int); + val.size = sizeof(uint64_t); if ((rc = (*db->put)(db, &key, &val, 0)) < 0) { perror("hash"); --- mandocdb.h.orig +++ mandocdb.h @@ -15,18 +15,49 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +struct db_val { + uint64_t mask; + uint32_t rec; +}; + #define MANDOC_DB "mandoc.db" #define MANDOC_IDX "mandoc.index" -#define TYPE_An 0x01 -#define TYPE_Cd 0x02 -#define TYPE_Er 0x04 -#define TYPE_Ev 0x08 -#define TYPE_Fn 0x10 -#define TYPE_In 0x20 -#define TYPE_Nd 0x40 -#define TYPE_Nm 0x100 -#define TYPE_Pa 0x200 -#define TYPE_St 0x400 -#define TYPE_Va 0x1000 -#define TYPE_Xr 0x2000 +#define TYPE_An 0x0000000000000001ULL +#define TYPE_Ar 0x0000000000000002ULL +#define TYPE_At 0x0000000000000004ULL +#define TYPE_Bsx 0x0000000000000008ULL +#define TYPE_Bx 0x0000000000000010ULL +#define TYPE_Cd 0x0000000000000020ULL +#define TYPE_Cm 0x0000000000000040ULL +#define TYPE_Dv 0x0000000000000080ULL +#define TYPE_Dx 0x0000000000000100ULL +#define TYPE_Em 0x0000000000000200ULL +#define TYPE_Er 0x0000000000000400ULL +#define TYPE_Ev 0x0000000000000800ULL +#define TYPE_Fa 0x0000000000001000ULL +#define TYPE_Fl 0x0000000000002000ULL +#define TYPE_Fn 0x0000000000004000ULL +#define TYPE_Ft 0x0000000000008000ULL +#define TYPE_Fx 0x0000000000010000ULL +#define TYPE_Ic 0x0000000000020000ULL +#define TYPE_In 0x0000000000040000ULL +#define TYPE_Lb 0x0000000000080000ULL +#define TYPE_Li 0x0000000000100000ULL +#define TYPE_Lk 0x0000000000200000ULL +#define TYPE_Ms 0x0000000000400000ULL +#define TYPE_Mt 0x0000000000800000ULL +#define TYPE_Nd 0x0000000001000000ULL +#define TYPE_Nm 0x0000000002000000ULL +#define TYPE_Nx 0x0000000004000000ULL +#define TYPE_Ox 0x0000000008000000ULL +#define TYPE_Pa 0x0000000010000000ULL +#define TYPE_Rs 0x0000000020000000ULL +#define TYPE_Sh 0x0000000040000000ULL +#define TYPE_Ss 0x0000000080000000ULL +#define TYPE_St 0x0000000100000000ULL +#define TYPE_Sy 0x0000000200000000ULL +#define TYPE_Tn 0x0000000400000000ULL +#define TYPE_Va 0x0000000800000000ULL +#define TYPE_Vt 0x0000001000000000ULL +#define TYPE_Xr 0x0000002000000000ULL -- To unsubscribe send an email to tech+unsubscribe@mdocml.bsd.lv ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: mandocdb: full set of search types 2011-11-16 0:39 mandocdb: full set of search types Ingo Schwarze @ 2011-11-16 0:52 ` Kristaps Dzonsons 2011-11-16 1:50 ` Ingo Schwarze 2011-11-16 16:59 ` Ingo Schwarze 0 siblings, 2 replies; 4+ messages in thread From: Kristaps Dzonsons @ 2011-11-16 0:52 UTC (permalink / raw) To: tech On 16/11/2011 01:39, Ingo Schwarze wrote: > Hi, > > we are close to going into production. I have talked to espie@, > and he is d'accord with putting small pieces into > /usr/src/libexec/makewhatis/Makewhatis.pm > to call out to mandocdb(8) for automated updates of the mandoc.db > files alongside the updates of the whatis.db files when installing > and removing packages, such that people running -current can easily > start testing the new apropos. > > We will keep the old makewhatis/apropos-combo in place until the new > system is reasonably feature-complete and clearly better than the > old one, then install the new apropos in place of the old one and > finally remove the old components. > > Before enabling mandocdb in pkg_add(8), i'd like to get the database > format complete, such that we don't force people to rebuild the > databases after upgrading to new snapshots. > > Here is a patch defining TYPE_ flags for all macros that i can > imagine might be worth searching for (maybe even a few more, but i'd > rather have too many than too few). I have left out all obsolete > macros and most physical formatting macros. > > This requires switching the mask to 64 bits. During the switch, > i have replaced the very error-prone handling of the key database > values by a new struct db_val; some memcpy() calls by normal > assignments; and some magical constants by sizeof() constructs. > > OK to put this in, or do you see anything that is missing? > > > Right now, i will start to implement the new search types. Hi Ingo, Just a word or two before I sleep. This approach is sound and I've no issues with a quick look over the patch, but will wait til tomorrow to do so in earnest. But first, before mandocdb gets production, the database should be checked for endian-neutrality. Second, we'd long ago mentioned splitting SYNOPSIS-invoked macros (Fl, Fn, etc.) for querying on their SYNOPSIS or non-SYNOPSIS usage. I think an elegant method is to encode the section within the keyword database, which allows for apropos Fn~mdoc -a -s SYNOPSIS or whatever `-s' replacement operator. How does that sound? This sounds a lot more reasonable than encoding separate Fn, Nm, etc. macros for SYNOPSIS and non-SYNOPSIS invocation. Thoughts? Kristaps -- To unsubscribe send an email to tech+unsubscribe@mdocml.bsd.lv ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: mandocdb: full set of search types 2011-11-16 0:52 ` Kristaps Dzonsons @ 2011-11-16 1:50 ` Ingo Schwarze 2011-11-16 16:59 ` Ingo Schwarze 1 sibling, 0 replies; 4+ messages in thread From: Ingo Schwarze @ 2011-11-16 1:50 UTC (permalink / raw) To: tech Hi Kristaps, Kristaps Dzonsons wrote on Wed, Nov 16, 2011 at 01:52:14AM +0100: > Just a word or two before I sleep. This approach is sound and I've > no issues with a quick look over the patch, but will wait til > tomorrow to do so in earnest. About the same for your logical operations: The approach looks nice, an i will merge it tomorrow and read it in detail. > But first, before mandocdb gets production, the database should be > checked for endian-neutrality. Hm, i never though about that. > Second, we'd long ago mentioned splitting SYNOPSIS-invoked macros > (Fl, Fn, etc.) for querying on their SYNOPSIS or non-SYNOPSIS usage. > I think an elegant method is to encode the section within the > keyword database, which allows for > > apropos Fn~mdoc -a -s SYNOPSIS Not -s, since -s is the other section (grrr). The database field would have to be a bitmask, or we would multiply the size auf the database. The syntax is not completely logical, as the -s SYNOPSIS is a qualifier for the mdoc query string, not a stand-alone query phrase. apropos SYNOPSIS:Fn=mdoc would be more logical. If you really want -o, you have to say: apropos SYNOPSIS:any=mdoc -o Fn=mdoc Your proposal causes ambiguities: apropos Fn=mdoc -a -s SYNOPSIS -a Nm=man Is that: apropos SYNOPSIS:Fn=mdoc -a Nm=man apropos Fn=mdoc -a SYNOPSIS:Nm=man apropos SYNOPSIS:Fn=mdoc -a SYNOPSIS:Nm=man And even worse, what the heck is: apropos Fn=mdoc -o -s SYNOPSIS My proposal also has a quirk. Consider: .Sh SYNOPSIS .Nm foo .Ar mdoc .Sh DESCRIPTION .Nm mdoc That would match SYNOPSIS:Nm=mdoc. But that's unfixable, unless we drop the whole bitfield approach, which will make the database size explode. Or we could use a bitfield of the size 20 (sections) times 40 (macros) = 800 bits = 100 bytes, which is also very big. > or whatever `-s' replacement operator. How does that sound? This > sounds a lot more reasonable than encoding separate Fn, Nm, etc. > macros for SYNOPSIS and non-SYNOPSIS invocation. Yes, in particular since you will be looking for other macros in other sections: SEE ALSO:Xr FILES:Pa AUTHORS:An STANDARDS:St HISTORY:Bx DIAGNOSTICS:Er. And atypical queries may occasionally make sense, like STANDARDS:Fl. Hand-picking combinations seems like unreasonable implementation effort and not at all user-friendly. Maybe we don't need section restrictions at all. The only use for section restrictions would be controlling noise in searches - or do you see other uses? But seriously, how much noise do you expect from Nm outside SYNOPSIS, Xr outside SEE ALSO, Pa outside FILES, and so on? I expect little, because macros are rare outside their typical sections. On top of that, some of these atypical occurrences will contribute to the signal, so i'd recommend that people not use section restrictions by default, but only switch them on when drowning in noise - and then, honestly, it's not even likely to help much. So i'd probably suggest to not implement section restrictions right now, but reconsider this in a year or two, when we have a better feeling how the new apropos will actually be used. The database format is not set in stone for eternity, i just don't want to announce public availability and then gratuitously break the format the very next week. Yours, Ingo -- To unsubscribe send an email to tech+unsubscribe@mdocml.bsd.lv ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: mandocdb: full set of search types 2011-11-16 0:52 ` Kristaps Dzonsons 2011-11-16 1:50 ` Ingo Schwarze @ 2011-11-16 16:59 ` Ingo Schwarze 1 sibling, 0 replies; 4+ messages in thread From: Ingo Schwarze @ 2011-11-16 16:59 UTC (permalink / raw) To: tech Hi Kristaps, Kristaps Dzonsons wrote on Wed, Nov 16, 2011 at 01:52:14AM +0100: > On 16/11/2011 01:39, Ingo Schwarze wrote: [...] >> Before enabling mandocdb in pkg_add(8), i'd like to get the database >> format complete, such that we don't force people to rebuild the >> databases after upgrading to new snapshots. >> >> Here is a patch defining TYPE_ flags for all macros that i can >> imagine might be worth searching for (maybe even a few more, but i'd >> rather have too many than too few). I have left out all obsolete >> macros and most physical formatting macros. >> >> This requires switching the mask to 64 bits. During the switch, >> i have replaced the very error-prone handling of the key database >> values by a new struct db_val; some memcpy() calls by normal >> assignments; and some magical constants by sizeof() constructs. > Just a word or two before I sleep. This approach is sound and I've > no issues with a quick look over the patch, but will wait til > tomorrow to do so in earnest. But first, before mandocdb gets > production, the database should be checked for endian-neutrality. That's still TODO. However, that doesn't prevent getting the TYPE_* flags right first, so here is an updated patch, to be applied on top of my man.conf stuff sent right before. OK? Ingo --- apropos_db.c.orig +++ apropos_db.c @@ -19,6 +19,7 @@ #include <fcntl.h> #include <regex.h> #include <stdarg.h> +#include <stdint.h> #include <stdlib.h> #include <string.h> #include <unistd.h> @@ -41,7 +42,7 @@ struct rectree { struct expr { int regex; int index; - int mask; + uint64_t mask; int and; char *v; regex_t re; @@ -49,22 +50,47 @@ struct expr { }; struct type { - int mask; + uint64_t mask; const char *name; }; static const struct type types[] = { { TYPE_An, "An" }, + { TYPE_Ar, "Ar" }, + { TYPE_At, "At" }, + { TYPE_Bsx, "Bsx" }, + { TYPE_Bx, "Bx" }, { TYPE_Cd, "Cd" }, + { TYPE_Cm, "Cm" }, + { TYPE_Dv, "Dv" }, + { TYPE_Dx, "Dx" }, + { TYPE_Em, "Em" }, { TYPE_Er, "Er" }, { TYPE_Ev, "Ev" }, + { TYPE_Fa, "Fa" }, + { TYPE_Fl, "Fl" }, { TYPE_Fn, "Fn" }, { TYPE_Fn, "Fo" }, + { TYPE_Ft, "Ft" }, + { TYPE_Fx, "Fx" }, + { TYPE_Ic, "Ic" }, { TYPE_In, "In" }, + { TYPE_Lb, "Lb" }, + { TYPE_Li, "Li" }, + { TYPE_Lk, "Lk" }, + { TYPE_Ms, "Ms" }, + { TYPE_Mt, "Mt" }, { TYPE_Nd, "Nd" }, { TYPE_Nm, "Nm" }, + { TYPE_Nx, "Nx" }, + { TYPE_Ox, "Ox" }, { TYPE_Pa, "Pa" }, + { TYPE_Rs, "Rs" }, + { TYPE_Sh, "Sh" }, + { TYPE_Ss, "Ss" }, { TYPE_St, "St" }, + { TYPE_Sy, "Sy" }, + { TYPE_Tn, "Tn" }, { TYPE_Va, "Va" }, { TYPE_Va, "Vt" }, { TYPE_Xr, "Xr" }, @@ -74,9 +100,9 @@ static const struct type types[] = { static DB *btree_open(void); static int btree_read(const DBT *, const struct mchars *, char **); -static int exprexecpre(const struct expr *, const char *, int); +static int exprexecpre(const struct expr *, const char *, uint64_t); static void exprexecpost(const struct expr *, - const char *, int, int *, size_t); + const char *, uint64_t, int *, size_t); static struct expr *exprterm(char *, int, int); static DB *index_open(void); static int index_read(const DBT *, const DBT *, @@ -381,7 +407,7 @@ single_search(struct rectree *tree, const struct opts *opts, const struct expr *expr, size_t terms, struct mchars *mc) { - int root, leaf, mask; + int root, leaf; DBT key, val; DB *btree, *idx; int ch; @@ -389,6 +415,7 @@ single_search(struct rectree *tree, const struct opts *opts, recno_t rec; struct rec *recs; struct rec srec; + struct db_val *vbuf; root = -1; leaf = -1; @@ -412,21 +439,19 @@ single_search(struct rectree *tree, const struct opts *opts, * The key must have something in it, and the value must * have the correct tags/recno mix. */ - if (key.size < 2 || 8 != val.size) + if (key.size < 2 || sizeof(struct db_val) != val.size) break; if ( ! btree_read(&key, mc, &buf)) break; - mask = *(int *)val.data; - /* * See if this keyword record matches any of the * expressions we have stored. */ - if ( ! exprexecpre(expr, buf, mask)) + vbuf = val.data; + if ( ! exprexecpre(expr, buf, vbuf->mask)) continue; - - memcpy(&rec, val.data + 4, sizeof(recno_t)); + rec = vbuf->rec; /* * O(log n) scan for prior records. Since a record @@ -445,7 +470,7 @@ single_search(struct rectree *tree, const struct opts *opts, if (leaf >= 0 && recs[leaf].rec == rec) { if (0 == recs[leaf].matches[0]) exprexecpost - (expr, buf, mask, + (expr, buf, vbuf->mask, recs[leaf].matches, terms); continue; } @@ -478,7 +503,7 @@ single_search(struct rectree *tree, const struct opts *opts, mandoc_calloc(terms + 1, sizeof(int)); exprexecpost - (expr, buf, mask, + (expr, buf, vbuf->mask, recs[tree->len].matches, terms); /* Append to our tree. */ @@ -642,7 +667,7 @@ exprfree(struct expr *p) * Return 1 if any expression evaluates to true, else 0. */ static int -exprexecpre(const struct expr *p, const char *cp, int mask) +exprexecpre(const struct expr *p, const char *cp, uint64_t mask) { for ( ; NULL != p; p = p->next) { @@ -666,7 +691,7 @@ exprexecpre(const struct expr *p, const char *cp, int mask) */ static void exprexecpost(const struct expr *e, const char *cp, - int mask, int *matches, size_t matchsz) + uint64_t mask, int *matches, size_t matchsz) { const struct expr *p; int match; --- mandocdb.8.orig +++ mandocdb.8 @@ -48,9 +48,13 @@ The arguments are as follows: .It Fl a Use all directories and files found below .Ar dir ... . -By default, directories and files -.Xr man 1 -cannot find will be silently skipped. +By default, only files matching +.Sm off +.Sy man Ar section Li / +.Op Ar arch Li / +.Ar title . section +.Sm on +will be used. .It Fl d Ar dir Merge (remove and re-add) .Ar --- mandocdb.c.orig +++ mandocdb.c @@ -79,7 +79,7 @@ static void buf_append(struct buf *, const char *); static void buf_appendb(struct buf *, const void *, size_t); static void dbt_put(DB *, const char *, DBT *, DBT *); -static void hash_put(DB *, const struct buf *, int); +static void hash_put(DB *, const struct buf *, uint64_t); static void hash_reset(DB **); static void index_merge(const struct of *, struct mparse *, struct buf *, struct buf *, @@ -257,11 +257,11 @@ mandocdb(int argc, char *argv[]) *db, /* keyword database */ *hash; /* temporary keyword hashtable */ BTREEINFO info; /* btree configuration */ - recno_t maxrec; /* supremum of all records */ - recno_t *recs; /* buffer of empty records */ + recno_t maxrec; /* last record number in the index */ + recno_t *recs; /* the numbers of all empty records */ size_t sz1, sz2, - recsz, /* buffer size of recs */ - reccur; /* valid number of recs */ + recsz, /* number of allocated slots in recs */ + reccur; /* current number of empty records */ struct buf buf, /* keyword buffer */ dbuf; /* description buffer */ struct of *of; /* list of files for processing */ @@ -348,7 +348,7 @@ mandocdb(int argc, char *argv[]) if (NULL == db) { perror(fbuf); exit((int)MANDOCLEVEL_SYSERR); - } else if (NULL == db) { + } else if (NULL == idx) { perror(ibuf); exit((int)MANDOCLEVEL_SYSERR); } @@ -410,7 +410,7 @@ mandocdb(int argc, char *argv[]) if (NULL == db) { perror(fbuf); exit((int)MANDOCLEVEL_SYSERR); - } else if (NULL == db) { + } else if (NULL == idx) { perror(ibuf); exit((int)MANDOCLEVEL_SYSERR); } @@ -473,7 +473,7 @@ index_merge(const struct of *of, struct mparse *mp, const char *fn, *msec, *mtitle, *arch; size_t sv; unsigned seq; - char vbuf[8]; + struct db_val vbuf; for (rec = 0; of; of = of->next) { fn = of->fname; @@ -499,9 +499,9 @@ index_merge(const struct of *of, struct mparse *mp, continue; /* - * Make sure the manual section and architecture - * agree with the directory where the file is located - * or man(1) will not be able to find it. + * By default, skip a file if the manual section + * and architecture given in the file disagree + * with the directory where the file is located. */ msec = NULL != mdoc ? @@ -527,9 +527,10 @@ index_merge(const struct of *of, struct mparse *mp, arch = ""; /* - * Case is relevant for man(1), so use the file name - * instead of the (usually) all caps page title, - * if the two agree. + * By default, skip a file if the title given + * in the file disagrees with the file name. + * If both agree, use the file name as the title, + * because the one in the file usually is all caps. */ mtitle = NULL != mdoc ? @@ -571,17 +572,15 @@ index_merge(const struct of *of, struct mparse *mp, * Copy from the in-memory hashtable of pending keywords * into the database. */ - - memset(vbuf, 0, sizeof(uint32_t)); - memcpy(vbuf + 4, &rec, sizeof(uint32_t)); + vbuf.rec = rec; seq = R_FIRST; while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) { seq = R_NEXT; - memcpy(vbuf, val.data, sizeof(uint32_t)); - val.size = sizeof(vbuf); - val.data = vbuf; + vbuf.mask = *(uint64_t *)val.data; + val.size = sizeof(struct db_val); + val.data = &vbuf; if (verb > 1) printf("%s: Added keyword: %s\n", @@ -626,6 +625,7 @@ index_prune(const struct of *ofile, DB *db, const char *dbf, { const struct of *of; const char *fn; + struct db_val *vbuf; unsigned seq, sseq; DBT key, val; size_t reccur; @@ -658,8 +658,9 @@ index_prune(const struct of *ofile, DB *db, const char *dbf, sseq = R_FIRST; while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) { sseq = R_NEXT; - assert(8 == val.size); - if (*maxrec != *(recno_t *)(val.data + 4)) + assert(sizeof(struct db_val) == val.size); + vbuf = val.data; + if (*maxrec != vbuf->rec) continue; if (verb) printf("%s: Deleted keyword: %s\n", @@ -1040,7 +1041,7 @@ pmdoc_Nm(MDOC_ARGS) } static void -hash_put(DB *db, const struct buf *buf, int mask) +hash_put(DB *db, const struct buf *buf, uint64_t mask) { DBT key, val; int rc; @@ -1055,10 +1056,10 @@ hash_put(DB *db, const struct buf *buf, int mask) perror("hash"); exit((int)MANDOCLEVEL_SYSERR); } else if (0 == rc) - mask |= *(int *)val.data; + mask |= *(uint64_t *)val.data; val.data = &mask; - val.size = sizeof(int); + val.size = sizeof(uint64_t); if ((rc = (*db->put)(db, &key, &val, 0)) < 0) { perror("hash"); @@ -1228,7 +1229,9 @@ ofile_argbuild(char *argv[], int argc, int use_all, int verb, for (i = 0; i < argc; i++) { /* - * Analyze the path. + * Try to infer the manual section, architecture and + * page title from the path, assuming it looks like + * man*[/<arch>]/<title>.<section> */ if (strlcpy(buf, argv[i], sizeof(buf)) >= sizeof(buf)) { @@ -1322,8 +1325,8 @@ ofile_dirbuild(const char *dir, const char* psec, const char *parch, arch = parch; /* - * Don't bother parsing directories - * that man(1) won't find. + * By default, only use directories called: + * man<section>/[<arch>/] */ if (NULL == sec) { @@ -1363,7 +1366,9 @@ ofile_dirbuild(const char *dir, const char* psec, const char *parch, continue; /* - * Don't bother parsing files that man(1) won't find. + * By default, skip files where the file name suffix + * does not agree with the section directory + * they are located in. */ suffix = strrchr(fn, '.'); @@ -1389,6 +1394,12 @@ ofile_dirbuild(const char *dir, const char* psec, const char *parch, nof->sec = mandoc_strdup(psec); if (NULL != parch) nof->arch = mandoc_strdup(parch); + + /* + * Remember the file name without the extension, + * to be used as the page title in the database. + */ + if (NULL != suffix) *suffix = '\0'; nof->title = mandoc_strdup(fn); --- mandocdb.h.orig +++ mandocdb.h @@ -15,18 +15,49 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +struct db_val { + uint64_t mask; + uint32_t rec; +}; + #define MANDOC_DB "mandoc.db" #define MANDOC_IDX "mandoc.index" -#define TYPE_An 0x01 -#define TYPE_Cd 0x02 -#define TYPE_Er 0x04 -#define TYPE_Ev 0x08 -#define TYPE_Fn 0x10 -#define TYPE_In 0x20 -#define TYPE_Nd 0x40 -#define TYPE_Nm 0x100 -#define TYPE_Pa 0x200 -#define TYPE_St 0x400 -#define TYPE_Va 0x1000 -#define TYPE_Xr 0x2000 +#define TYPE_An 0x0000000000000001ULL +#define TYPE_Ar 0x0000000000000002ULL +#define TYPE_At 0x0000000000000004ULL +#define TYPE_Bsx 0x0000000000000008ULL +#define TYPE_Bx 0x0000000000000010ULL +#define TYPE_Cd 0x0000000000000020ULL +#define TYPE_Cm 0x0000000000000040ULL +#define TYPE_Dv 0x0000000000000080ULL +#define TYPE_Dx 0x0000000000000100ULL +#define TYPE_Em 0x0000000000000200ULL +#define TYPE_Er 0x0000000000000400ULL +#define TYPE_Ev 0x0000000000000800ULL +#define TYPE_Fa 0x0000000000001000ULL +#define TYPE_Fl 0x0000000000002000ULL +#define TYPE_Fn 0x0000000000004000ULL +#define TYPE_Ft 0x0000000000008000ULL +#define TYPE_Fx 0x0000000000010000ULL +#define TYPE_Ic 0x0000000000020000ULL +#define TYPE_In 0x0000000000040000ULL +#define TYPE_Lb 0x0000000000080000ULL +#define TYPE_Li 0x0000000000100000ULL +#define TYPE_Lk 0x0000000000200000ULL +#define TYPE_Ms 0x0000000000400000ULL +#define TYPE_Mt 0x0000000000800000ULL +#define TYPE_Nd 0x0000000001000000ULL +#define TYPE_Nm 0x0000000002000000ULL +#define TYPE_Nx 0x0000000004000000ULL +#define TYPE_Ox 0x0000000008000000ULL +#define TYPE_Pa 0x0000000010000000ULL +#define TYPE_Rs 0x0000000020000000ULL +#define TYPE_Sh 0x0000000040000000ULL +#define TYPE_Ss 0x0000000080000000ULL +#define TYPE_St 0x0000000100000000ULL +#define TYPE_Sy 0x0000000200000000ULL +#define TYPE_Tn 0x0000000400000000ULL +#define TYPE_Va 0x0000000800000000ULL +#define TYPE_Vt 0x0000001000000000ULL +#define TYPE_Xr 0x0000002000000000ULL -- To unsubscribe send an email to tech+unsubscribe@mdocml.bsd.lv ^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2011-11-16 16:59 UTC | newest] Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2011-11-16 0:39 mandocdb: full set of search types Ingo Schwarze 2011-11-16 0:52 ` Kristaps Dzonsons 2011-11-16 1:50 ` Ingo Schwarze 2011-11-16 16:59 ` Ingo Schwarze
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).