From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from krisdoz.my.domain (kristaps@localhost [127.0.0.1]) by krisdoz.my.domain (8.14.3/8.14.3) with ESMTP id p6EAv3AD018231 for ; Thu, 14 Jul 2011 06:57:04 -0400 (EDT) Received: (from kristaps@localhost) by krisdoz.my.domain (8.14.3/8.14.3/Submit) id p6EAv36S004455; Thu, 14 Jul 2011 06:57:03 -0400 (EDT) Date: Thu, 14 Jul 2011 06:57:03 -0400 (EDT) Message-Id: <201107141057.p6EAv36S004455@krisdoz.my.domain> X-Mailinglist: mdocml-source Reply-To: source@mdocml.bsd.lv MIME-Version: 1.0 From: kristaps@mdocml.bsd.lv To: source@mdocml.bsd.lv Subject: mdocml: Rename makewhatis [back] into mandocdb. X-Mailer: activitymail 1.26, http://search.cpan.org/dist/activitymail/ Content-Type: text/plain; charset=utf-8 Log Message: ----------- Rename makewhatis [back] into mandocdb. This is to maintain consistency with OpenBSD, which is sandboxing the code for merge. It makes sense because it doesn't really make a `makewhatis' file in the traditional sense, so it may be confusing. Modified Files: -------------- mdocml: Makefile index.sgml Added Files: ----------- mdocml: mandocdb.1 mandocdb.c Revision Data ------------- --- /dev/null +++ mandocdb.1 @@ -0,0 +1,191 @@ +.\" $Id: mandocdb.1,v 1.1 2011/07/14 10:57:02 kristaps Exp $ +.\" +.\" Copyright (c) 2011 Kristaps Dzonsons +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: July 14 2011 $ +.Dt MANDOCDB 1 +.Os +.Sh NAME +.Nm mandocdb +.Nd index UNIX manuals +.Sh SYNOPSIS +.Nm +.Op Fl ruv +.Op Fl d Ar dir +.Ar +.Sh DESCRIPTION +The +.Nm +utility extracts keywords from +.Ux +manuals and indexes them for fast retrieval. +The arguments are as follows: +.Bl -tag -width Ds +.It Fl d Ar dir +The directory into which to write the keyword and index databases. +.It Ar +Read input from zero or more files in +.Xr mdoc 7 +or +.Xr man 7 +.Ux +manual format. +.It Fl r +Remove entries. +This will remove the index and keyword references. +If the record is not found, it is ignored. +.It Fl u +Update the record. +This will first remove the record (as in +.Fl r ) +then re-add it. +.It Fl v +Verbose output. +If specified once, prints the name of each indexed file. +If twice, prints keywords for each file. +.El +.Pp +By default, +.Nm +constructs a new +.Sx Index Database +and +.Sx Keyword Database +in the current working directory. +Existing databases are truncated. +.Pp +If fatal parse errors are encountered, the offending file is printed to +stderr, omitted from the index, and the parse continues with the next +input file. +.Ss Index Database +The index database, +.Pa mandoc.index , +is a +.Xr recno 3 +database with record values consisting of +.Pp +.Bl -enum -compact +.It +a nil-terminated filename, +.It +a nil-terminated manual section, +.It +a nil-terminated manual title, +.It +a nil-terminated architecture +.Pq this is not often available +.It +and a nil-terminated description. +.El +.Pp +Both the manual section and description may be zero-length. +Entries are sequentially-numbered, but the filenames are unordered. +.Ss Keyword Database +The keyword database, +.Pa mandoc.db , +is a +.Xr btree 3 +database of nil-terminated keywords (record length is non-zero string +length plus one) mapping to a 8-byte binary field consisting of the +keyword type and source +.Sx Index Database +record number. +The type, a 32-bit bit-mask in host order, consists of the following +fields: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It Li 0x01 +The name of a manual page as given in the NAME section. +.It Li 0x02 +A function prototype name as given in the SYNOPSIS section. +.It Li 0x04 +A utility name as given in the SYNOPSIS section. +.It Li 0x08 +An include file as given in the SYNOPSIS section. +.It Li 0x10 +A variable name as given in the SYNOPSIS section. +.It Li 0x20 +A standard as given in the STANDARDS section. +.It Li 0x40 +An author as given in the AUTHORS section. +.It Li 0x80 +A configuration as given in the SYNOPSIS section. +.It Li 0x100 +Free-form descriptive text as given in the NAME section. +.It Li 0x200 +Cross-links between manuals. +Listed as the link name, then a period, then the link section. +If the link has no section, the period terminates the string. +.It Li 0x400 +Path reference as given in the FILES section. +.It Li 0x800 +Environment variable as given in the ENVIRONMENT section. +.It Li 0x1000 +Error codes as given in the ERRORS section. +.El +.Pp +The last four bytes are a host-ordered record number within the +.Sx Index Database . +.Pp +The +.Nm +utility is +.Ud +.Sh IMPLEMENTATION NOTES +The time to construct a new database pair grows linearly with the +number of keywords in the input. +However, removing or updating entries with +.Fl r +or +.Fl u , +respectively, grows as a multiple of the index length and input size. +.Sh FILES +.Bl -tag -width Ds +.It Pa mandoc.db +A +.Xr btree 3 +keyword database mapping keywords to a type and file reference in +.Pa mandoc.index . +.It Pa mandoc.index +A +.Xr recno 3 +database of indexed file-names. +.El +.Sh EXIT STATUS +The +.Nm +utility exits with one of the following values: +.Pp +.Bl -tag -width Ds -compact +.It 0 +No errors occurred. +.It 5 +Invalid command line arguments were specified. +No input files have been read. +.It 6 +An operating system error occurred, for example memory exhaustion or an +error accessing input files. +Such errors cause +.Nm +to exit at once, possibly in the middle of parsing or formatting a file. +The output databases are corrupt and should be removed . +.El +.Sh SEE ALSO +.Xr mandoc 1 +.Sh AUTHORS +The +.Nm +utility was written by +.An Kristaps Dzonsons Aq kristaps@bsd.lv . --- /dev/null +++ mandocdb.c @@ -0,0 +1,1105 @@ +/* $Id: mandocdb.c,v 1.1 2011/07/14 10:57:02 kristaps Exp $ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include +#include +#include +#include +#include +#include + +#ifdef __linux__ +# include +#else +# include +#endif + +#include "man.h" +#include "mdoc.h" +#include "mandoc.h" + +#define MANDOC_DB "mandoc.db" +#define MANDOC_IDX "mandoc.index" +#define MANDOC_BUFSZ BUFSIZ +#define MANDOC_FLAGS O_CREAT|O_TRUNC|O_RDWR +#define MANDOC_SLOP 1024 + +/* Bit-fields. See makewhatis.1. */ + +#define TYPE_NAME 0x01 +#define TYPE_FUNCTION 0x02 +#define TYPE_UTILITY 0x04 +#define TYPE_INCLUDES 0x08 +#define TYPE_VARIABLE 0x10 +#define TYPE_STANDARD 0x20 +#define TYPE_AUTHOR 0x40 +#define TYPE_CONFIG 0x80 +#define TYPE_DESC 0x100 +#define TYPE_XREF 0x200 +#define TYPE_PATH 0x400 +#define TYPE_ENV 0x800 +#define TYPE_ERR 0x1000 + +/* Buffer for storing growable data. */ + +struct buf { + char *cp; + size_t len; + size_t size; +}; + +/* Operation we're going to perform. */ + +enum op { + OP_NEW = 0, /* new database */ + OP_UPDATE, /* update entries in existing database */ + OP_DELETE /* delete entries from existing database */ +}; + +#define MAN_ARGS DB *hash, \ + struct buf *buf, \ + struct buf *dbuf, \ + const struct man_node *n +#define MDOC_ARGS DB *hash, \ + struct buf *buf, \ + struct buf *dbuf, \ + const struct mdoc_node *n, \ + const struct mdoc_meta *m + +static void buf_appendmdoc(struct buf *, + const struct mdoc_node *, int); +static void buf_append(struct buf *, const char *); +static void buf_appendb(struct buf *, + const void *, size_t); +static void dbt_put(DB *, const char *, DBT *, DBT *); +static void hash_put(DB *, const struct buf *, int); +static void hash_reset(DB **); +static int pman_node(MAN_ARGS); +static void pmdoc_node(MDOC_ARGS); +static void pmdoc_An(MDOC_ARGS); +static void pmdoc_Cd(MDOC_ARGS); +static void pmdoc_Er(MDOC_ARGS); +static void pmdoc_Ev(MDOC_ARGS); +static void pmdoc_Fd(MDOC_ARGS); +static void pmdoc_In(MDOC_ARGS); +static void pmdoc_Fn(MDOC_ARGS); +static void pmdoc_Fo(MDOC_ARGS); +static void pmdoc_Nd(MDOC_ARGS); +static void pmdoc_Nm(MDOC_ARGS); +static void pmdoc_Pa(MDOC_ARGS); +static void pmdoc_St(MDOC_ARGS); +static void pmdoc_Vt(MDOC_ARGS); +static void pmdoc_Xr(MDOC_ARGS); +static void usage(void); + +typedef void (*pmdoc_nf)(MDOC_ARGS); + +static const pmdoc_nf mdocs[MDOC_MAX] = { + NULL, /* Ap */ + NULL, /* Dd */ + NULL, /* Dt */ + NULL, /* Os */ + NULL, /* Sh */ + NULL, /* Ss */ + NULL, /* Pp */ + NULL, /* D1 */ + NULL, /* Dl */ + NULL, /* Bd */ + NULL, /* Ed */ + NULL, /* Bl */ + NULL, /* El */ + NULL, /* It */ + NULL, /* Ad */ + pmdoc_An, /* An */ + NULL, /* Ar */ + pmdoc_Cd, /* Cd */ + NULL, /* Cm */ + NULL, /* Dv */ + pmdoc_Er, /* Er */ + pmdoc_Ev, /* Ev */ + NULL, /* Ex */ + NULL, /* Fa */ + pmdoc_Fd, /* Fd */ + NULL, /* Fl */ + pmdoc_Fn, /* Fn */ + NULL, /* Ft */ + NULL, /* Ic */ + pmdoc_In, /* In */ + NULL, /* Li */ + pmdoc_Nd, /* Nd */ + pmdoc_Nm, /* Nm */ + NULL, /* Op */ + NULL, /* Ot */ + pmdoc_Pa, /* Pa */ + NULL, /* Rv */ + pmdoc_St, /* St */ + pmdoc_Vt, /* Va */ + pmdoc_Vt, /* Vt */ + pmdoc_Xr, /* Xr */ + NULL, /* %A */ + NULL, /* %B */ + NULL, /* %D */ + NULL, /* %I */ + NULL, /* %J */ + NULL, /* %N */ + NULL, /* %O */ + NULL, /* %P */ + NULL, /* %R */ + NULL, /* %T */ + NULL, /* %V */ + NULL, /* Ac */ + NULL, /* Ao */ + NULL, /* Aq */ + NULL, /* At */ + NULL, /* Bc */ + NULL, /* Bf */ + NULL, /* Bo */ + NULL, /* Bq */ + NULL, /* Bsx */ + NULL, /* Bx */ + NULL, /* Db */ + NULL, /* Dc */ + NULL, /* Do */ + NULL, /* Dq */ + NULL, /* Ec */ + NULL, /* Ef */ + NULL, /* Em */ + NULL, /* Eo */ + NULL, /* Fx */ + NULL, /* Ms */ + NULL, /* No */ + NULL, /* Ns */ + NULL, /* Nx */ + NULL, /* Ox */ + NULL, /* Pc */ + NULL, /* Pf */ + NULL, /* Po */ + NULL, /* Pq */ + NULL, /* Qc */ + NULL, /* Ql */ + NULL, /* Qo */ + NULL, /* Qq */ + NULL, /* Re */ + NULL, /* Rs */ + NULL, /* Sc */ + NULL, /* So */ + NULL, /* Sq */ + NULL, /* Sm */ + NULL, /* Sx */ + NULL, /* Sy */ + NULL, /* Tn */ + NULL, /* Ux */ + NULL, /* Xc */ + NULL, /* Xo */ + pmdoc_Fo, /* Fo */ + NULL, /* Fc */ + NULL, /* Oo */ + NULL, /* Oc */ + NULL, /* Bk */ + NULL, /* Ek */ + NULL, /* Bt */ + NULL, /* Hf */ + NULL, /* Fr */ + NULL, /* Ud */ + NULL, /* Lb */ + NULL, /* Lp */ + NULL, /* Lk */ + NULL, /* Mt */ + NULL, /* Brq */ + NULL, /* Bro */ + NULL, /* Brc */ + NULL, /* %C */ + NULL, /* Es */ + NULL, /* En */ + NULL, /* Dx */ + NULL, /* %Q */ + NULL, /* br */ + NULL, /* sp */ + NULL, /* %U */ + NULL, /* Ta */ +}; + +static const char *progname; + +int +main(int argc, char *argv[]) +{ + struct mparse *mp; /* parse sequence */ + struct mdoc *mdoc; /* resulting mdoc */ + struct man *man; /* resulting man */ + enum op op; /* current operation */ + char *fn; /* current file being parsed */ + const char *msec, /* manual section */ + *mtitle, /* manual title */ + *arch, /* manual architecture */ + *dir; /* result dir (default: cwd) */ + char ibuf[MAXPATHLEN], /* index fname */ + fbuf[MAXPATHLEN], /* btree fname */ + vbuf[8]; /* stringified record number */ + int ch, seq, sseq, verb, i; + DB *idx, /* index database */ + *db, /* keyword database */ + *hash; /* temporary keyword hashtable */ + DBT key, val; + enum mandoclevel ec; /* exit status */ + size_t sv; + BTREEINFO info; /* btree configuration */ + recno_t rec, + maxrec; /* supremum of all records */ + recno_t *recs; /* buffer of empty records */ + size_t recsz, /* buffer size of recs */ + reccur; /* valid number of recs */ + struct buf buf, /* keyword buffer */ + dbuf; /* description buffer */ + extern int optind; + extern char *optarg; + + progname = strrchr(argv[0], '/'); + if (progname == NULL) + progname = argv[0]; + else + ++progname; + + dir = ""; + verb = 0; + db = idx = NULL; + mp = NULL; + hash = NULL; + recs = NULL; + recsz = reccur = 0; + maxrec = 0; + op = OP_NEW; + ec = MANDOCLEVEL_SYSERR; + + memset(&buf, 0, sizeof(struct buf)); + memset(&dbuf, 0, sizeof(struct buf)); + + while (-1 != (ch = getopt(argc, argv, "d:ruv"))) + switch (ch) { + case ('d'): + dir = optarg; + break; + case ('r'): + op = OP_DELETE; + break; + case ('u'): + op = OP_UPDATE; + break; + case ('v'): + verb++; + break; + default: + usage(); + return((int)MANDOCLEVEL_BADARG); + } + + argc -= optind; + argv += optind; + + ibuf[0] = ibuf[MAXPATHLEN - 2] = + fbuf[0] = fbuf[MAXPATHLEN - 2] = '\0'; + + strlcat(fbuf, dir, MAXPATHLEN); + strlcat(fbuf, MANDOC_DB, MAXPATHLEN); + + strlcat(ibuf, dir, MAXPATHLEN); + strlcat(ibuf, MANDOC_IDX, MAXPATHLEN); + + if ('\0' != fbuf[MAXPATHLEN - 2] || + '\0' != ibuf[MAXPATHLEN - 2]) { + fprintf(stderr, "%s: Path too long\n", dir); + goto out; + } + + /* + * For the keyword database, open a BTREE database that allows + * duplicates. + * For the index database, use a standard RECNO database type. + * Truncate the database if we're creating a new one. + */ + + memset(&info, 0, sizeof(BTREEINFO)); + info.flags = R_DUP; + + if (OP_NEW == op) { + db = dbopen(fbuf, MANDOC_FLAGS, 0644, DB_BTREE, &info); + idx = dbopen(ibuf, MANDOC_FLAGS, 0644, DB_RECNO, NULL); + } else { + db = dbopen(fbuf, O_CREAT|O_RDWR, 0644, DB_BTREE, &info); + idx = dbopen(ibuf, O_CREAT|O_RDWR, 0644, DB_RECNO, NULL); + } + + if (NULL == db) { + perror(fbuf); + goto out; + } else if (NULL == db) { + perror(ibuf); + goto out; + } + + /* + * If we're going to delete or update a database, remove the + * entries now (both the index and all keywords pointing to it). + * This doesn't actually remove them: it only sets their record + * value lengths to zero. + * While doing so, add the empty records to a list we'll access + * later in re-adding entries to the database. + */ + + if (OP_DELETE == op || OP_UPDATE == op) { + seq = R_FIRST; + while (0 == (ch = (*idx->seq)(idx, &key, &val, seq))) { + seq = R_NEXT; + maxrec = *(recno_t *)key.data; + if (0 == val.size && OP_UPDATE == op) { + if (reccur >= recsz) { + recsz += MANDOC_SLOP; + recs = mandoc_realloc + (recs, recsz * sizeof(recno_t)); + } + recs[(int)reccur] = maxrec; + reccur++; + continue; + } + + fn = (char *)val.data; + for (i = 0; i < argc; i++) + if (0 == strcmp(fn, argv[i])) + break; + + if (i == argc) + continue; + + sseq = R_FIRST; + while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) { + sseq = R_NEXT; + assert(8 == val.size); + if (maxrec != *(recno_t *)(val.data + 4)) + continue; + if (verb > 1) + printf("%s: Deleted keyword: %s\n", + fn, (char *)key.data); + ch = (*db->del)(db, &key, R_CURSOR); + if (ch < 0) + break; + } + if (ch < 0) { + perror(fbuf); + exit((int)MANDOCLEVEL_SYSERR); + } + + if (verb) + printf("%s: Deleted index\n", fn); + + val.size = 0; + ch = (*idx->put)(idx, &key, &val, R_CURSOR); + if (ch < 0) { + perror(ibuf); + exit((int)MANDOCLEVEL_SYSERR); + } + + if (OP_UPDATE == op) { + if (reccur >= recsz) { + recsz += MANDOC_SLOP; + recs = mandoc_realloc + (recs, recsz * sizeof(recno_t)); + } + recs[(int)reccur] = maxrec; + reccur++; + } + } + maxrec++; + } + + if (OP_DELETE == op) { + ec = MANDOCLEVEL_OK; + goto out; + } + + /* + * Add records to the database. + * Try parsing each manual given on the command line. + * If we fail, then emit an error and keep on going. + * Take resulting trees and push them down into the database code. + * Use the auto-parser and don't report any errors. + */ + + mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL); + + buf.size = dbuf.size = MANDOC_BUFSZ; + buf.cp = mandoc_malloc(buf.size); + dbuf.cp = mandoc_malloc(dbuf.size); + + for (rec = 0, i = 0; i < argc; i++) { + fn = argv[i]; + if (OP_UPDATE == op) { + if (reccur > 0) { + --reccur; + rec = recs[(int)reccur]; + } else if (maxrec > 0) { + rec = maxrec; + maxrec = 0; + } else + rec++; + } else + rec++; + + mparse_reset(mp); + hash_reset(&hash); + + if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) { + fprintf(stderr, "%s: Parse failure\n", fn); + continue; + } + + mparse_result(mp, &mdoc, &man); + if (NULL == mdoc && NULL == man) + continue; + + msec = NULL != mdoc ? + mdoc_meta(mdoc)->msec : man_meta(man)->msec; + mtitle = NULL != mdoc ? + mdoc_meta(mdoc)->title : man_meta(man)->title; + arch = NULL != mdoc ? mdoc_meta(mdoc)->arch : NULL; + + if (NULL == arch) + arch = ""; + + /* + * The index record value consists of a nil-terminated + * filename, a nil-terminated manual section, and a + * nil-terminated description. Since the description + * may not be set, we set a sentinel to see if we're + * going to write a nil byte in its place. + */ + + dbuf.len = 0; + buf_appendb(&dbuf, fn, strlen(fn) + 1); + buf_appendb(&dbuf, msec, strlen(msec) + 1); + buf_appendb(&dbuf, mtitle, strlen(mtitle) + 1); + buf_appendb(&dbuf, arch, strlen(arch) + 1); + + sv = dbuf.len; + + /* Fix the record number in the btree value. */ + + if (mdoc) + pmdoc_node(hash, &buf, &dbuf, + mdoc_node(mdoc), mdoc_meta(mdoc)); + else + pman_node(hash, &buf, &dbuf, man_node(man)); + + /* + * Copy from the in-memory hashtable of pending keywords + * into the database. + */ + + memset(vbuf, 0, sizeof(uint32_t)); + memcpy(vbuf + 4, &rec, sizeof(uint32_t)); + + seq = R_FIRST; + while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) { + seq = R_NEXT; + + memcpy(vbuf, val.data, sizeof(uint32_t)); + val.size = sizeof(vbuf); + val.data = vbuf; + + if (verb > 1) + printf("%s: Added keyword: %s, 0x%x\n", + fn, (char *)key.data, + *(int *)val.data); + dbt_put(db, fbuf, &key, &val); + } + if (ch < 0) { + perror("hash"); + exit((int)MANDOCLEVEL_SYSERR); + } + + /* + * Apply to the index. If we haven't had a description + * set, put an empty one in now. + */ + + if (dbuf.len == sv) + buf_appendb(&dbuf, "", 1); + + key.data = &rec; + key.size = sizeof(recno_t); + + val.data = dbuf.cp; + val.size = dbuf.len; + + if (verb > 0) + printf("%s: Added index\n", fn); + + dbt_put(idx, ibuf, &key, &val); + } + + ec = MANDOCLEVEL_OK; +out: + if (db) + (*db->close)(db); + if (idx) + (*idx->close)(idx); + if (hash) + (*hash->close)(hash); + if (mp) + mparse_free(mp); + + free(buf.cp); + free(dbuf.cp); + free(recs); + + return((int)ec); +} + +/* + * Grow the buffer (if necessary) and copy in a binary string. + */ +static void +buf_appendb(struct buf *buf, const void *cp, size_t sz) +{ + + /* Overshoot by MANDOC_BUFSZ. */ + + while (buf->len + sz >= buf->size) { + buf->size = buf->len + sz + MANDOC_BUFSZ; + buf->cp = mandoc_realloc(buf->cp, buf->size); + } + + memcpy(buf->cp + (int)buf->len, cp, sz); + buf->len += sz; +} + +/* + * Append a nil-terminated string to the buffer. + * This can be invoked multiple times. + * The buffer string will be nil-terminated. + * If invoked multiple times, a space is put between strings. + */ +static void +buf_append(struct buf *buf, const char *cp) +{ + size_t sz; + + if (0 == (sz = strlen(cp))) + return; + + if (buf->len) + buf->cp[(int)buf->len - 1] = ' '; + + buf_appendb(buf, cp, sz + 1); +} + +/* + * Recursively add all text from a given node. + * This is optimised for general mdoc nodes in this context, which do + * not consist of subexpressions and having a recursive call for n->next + * would be wasteful. + * The "f" variable should be 0 unless called from pmdoc_Nd for the + * description buffer, which does not start at the beginning of the + * buffer. + */ +static void +buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f) +{ + + for ( ; n; n = n->next) { + if (n->child) + buf_appendmdoc(buf, n->child, f); + + if (MDOC_TEXT == n->type && f) { + f = 0; + buf_appendb(buf, n->string, + strlen(n->string) + 1); + } else if (MDOC_TEXT == n->type) + buf_append(buf, n->string); + + } +} + +/* ARGSUSED */ +static void +pmdoc_An(MDOC_ARGS) +{ + + if (SEC_AUTHORS != n->sec) + return; + + buf_appendmdoc(buf, n->child, 0); + hash_put(hash, buf, TYPE_AUTHOR); +} + +static void +hash_reset(DB **db) +{ + DB *hash; + + if (NULL != (hash = *db)) + (*hash->close)(hash); + + *db = dbopen(NULL, MANDOC_FLAGS, 0644, DB_HASH, NULL); + if (NULL == *db) { + perror("hash"); + exit((int)MANDOCLEVEL_SYSERR); + } +} + +/* ARGSUSED */ +static void +pmdoc_Fd(MDOC_ARGS) +{ + const char *start, *end; + size_t sz; + + if (SEC_SYNOPSIS != n->sec) + return; + if (NULL == (n = n->child) || MDOC_TEXT != n->type) + return; + + /* + * Only consider those `Fd' macro fields that begin with an + * "inclusion" token (versus, e.g., #define). + */ + if (strcmp("#include", n->string)) + return; + + if (NULL == (n = n->next) || MDOC_TEXT != n->type) + return; + + /* + * Strip away the enclosing angle brackets and make sure we're + * not zero-length. + */ + + start = n->string; + if ('<' == *start || '"' == *start) + start++; + + if (0 == (sz = strlen(start))) + return; + + end = &start[(int)sz - 1]; + if ('>' == *end || '"' == *end) + end--; + + assert(end >= start); + + buf_appendb(buf, start, (size_t)(end - start + 1)); + buf_appendb(buf, "", 1); + + hash_put(hash, buf, TYPE_INCLUDES); +} + +/* ARGSUSED */ +static void +pmdoc_Cd(MDOC_ARGS) +{ + + if (SEC_SYNOPSIS != n->sec) + return; + + buf_appendmdoc(buf, n->child, 0); + hash_put(hash, buf, TYPE_CONFIG); +} + +/* ARGSUSED */ +static void +pmdoc_In(MDOC_ARGS) +{ + + if (SEC_SYNOPSIS != n->sec) + return; + if (NULL == n->child || MDOC_TEXT != n->child->type) + return; + + buf_append(buf, n->child->string); + hash_put(hash, buf, TYPE_INCLUDES); +} + +/* ARGSUSED */ +static void +pmdoc_Fn(MDOC_ARGS) +{ + const char *cp; + + if (SEC_SYNOPSIS != n->sec) + return; + if (NULL == n->child || MDOC_TEXT != n->child->type) + return; + + /* .Fn "struct type *arg" "foo" */ + + cp = strrchr(n->child->string, ' '); + if (NULL == cp) + cp = n->child->string; + + /* Strip away pointer symbol. */ + + while ('*' == *cp) + cp++; + + buf_append(buf, cp); + hash_put(hash, buf, TYPE_FUNCTION); +} + +/* ARGSUSED */ +static void +pmdoc_St(MDOC_ARGS) +{ + + if (SEC_STANDARDS != n->sec) + return; + if (NULL == n->child || MDOC_TEXT != n->child->type) + return; + + buf_append(buf, n->child->string); + hash_put(hash, buf, TYPE_STANDARD); +} + +/* ARGSUSED */ +static void +pmdoc_Xr(MDOC_ARGS) +{ + + if (NULL == (n = n->child)) + return; + + buf_appendb(buf, n->string, strlen(n->string)); + + if (NULL != (n = n->next)) { + buf_appendb(buf, ".", 1); + buf_appendb(buf, n->string, strlen(n->string) + 1); + } else + buf_appendb(buf, ".", 2); + + hash_put(hash, buf, TYPE_XREF); +} + +/* ARGSUSED */ +static void +pmdoc_Vt(MDOC_ARGS) +{ + const char *start; + size_t sz; + + if (SEC_SYNOPSIS != n->sec) + return; + if (MDOC_Vt == n->tok && MDOC_BODY != n->type) + return; + if (NULL == n->last || MDOC_TEXT != n->last->type) + return; + + /* + * Strip away leading pointer symbol '*' and trailing ';'. + */ + + start = n->last->string; + + while ('*' == *start) + start++; + + if (0 == (sz = strlen(start))) + return; + + if (';' == start[(int)sz - 1]) + sz--; + + if (0 == sz) + return; + + buf_appendb(buf, start, sz); + buf_appendb(buf, "", 1); + hash_put(hash, buf, TYPE_VARIABLE); +} + +/* ARGSUSED */ +static void +pmdoc_Fo(MDOC_ARGS) +{ + + if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) + return; + if (NULL == n->child || MDOC_TEXT != n->child->type) + return; + + buf_append(buf, n->child->string); + hash_put(hash, buf, TYPE_FUNCTION); +} + + +/* ARGSUSED */ +static void +pmdoc_Nd(MDOC_ARGS) +{ + + if (MDOC_BODY != n->type) + return; + + buf_appendmdoc(dbuf, n->child, 1); + buf_appendmdoc(buf, n->child, 0); + + hash_put(hash, buf, TYPE_DESC); +} + +/* ARGSUSED */ +static void +pmdoc_Er(MDOC_ARGS) +{ + + if (SEC_ERRORS != n->sec) + return; + + buf_appendmdoc(buf, n->child, 0); + hash_put(hash, buf, TYPE_ERR); +} + +/* ARGSUSED */ +static void +pmdoc_Ev(MDOC_ARGS) +{ + + if (SEC_ENVIRONMENT != n->sec) + return; + + buf_appendmdoc(buf, n->child, 0); + hash_put(hash, buf, TYPE_ENV); +} + +/* ARGSUSED */ +static void +pmdoc_Pa(MDOC_ARGS) +{ + + if (SEC_FILES != n->sec) + return; + + buf_appendmdoc(buf, n->child, 0); + hash_put(hash, buf, TYPE_PATH); +} + +/* ARGSUSED */ +static void +pmdoc_Nm(MDOC_ARGS) +{ + + if (SEC_NAME == n->sec) { + buf_appendmdoc(buf, n->child, 0); + hash_put(hash, buf, TYPE_NAME); + return; + } else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) + return; + + if (NULL == n->child) + buf_append(buf, m->name); + + buf_appendmdoc(buf, n->child, 0); + hash_put(hash, buf, TYPE_UTILITY); +} + +static void +hash_put(DB *db, const struct buf *buf, int mask) +{ + DBT key, val; + int rc; + + if (buf->len < 2) + return; + + key.data = buf->cp; + key.size = buf->len; + + if ((rc = (*db->get)(db, &key, &val, 0)) < 0) { + perror("hash"); + exit((int)MANDOCLEVEL_SYSERR); + } else if (0 == rc) + mask |= *(int *)val.data; + + val.data = &mask; + val.size = sizeof(int); + + if ((rc = (*db->put)(db, &key, &val, 0)) < 0) { + perror("hash"); + exit((int)MANDOCLEVEL_SYSERR); + } +} + +static void +dbt_put(DB *db, const char *dbn, DBT *key, DBT *val) +{ + + assert(key->size); + assert(val->size); + + if (0 == (*db->put)(db, key, val, 0)) + return; + + perror(dbn); + exit((int)MANDOCLEVEL_SYSERR); + /* NOTREACHED */ +} + +/* + * Call out to per-macro handlers after clearing the persistent database + * key. If the macro sets the database key, flush it to the database. + */ +static void +pmdoc_node(MDOC_ARGS) +{ + + if (NULL == n) + return; + + switch (n->type) { + case (MDOC_HEAD): + /* FALLTHROUGH */ + case (MDOC_BODY): + /* FALLTHROUGH */ + case (MDOC_TAIL): + /* FALLTHROUGH */ + case (MDOC_BLOCK): + /* FALLTHROUGH */ + case (MDOC_ELEM): + if (NULL == mdocs[n->tok]) + break; + + buf->len = 0; + (*mdocs[n->tok])(hash, buf, dbuf, n, m); + break; + default: + break; + } + + pmdoc_node(hash, buf, dbuf, n->child, m); + pmdoc_node(hash, buf, dbuf, n->next, m); +} + +static int +pman_node(MAN_ARGS) +{ + const struct man_node *head, *body; + const char *start, *sv; + size_t sz; + + if (NULL == n) + return(0); + + /* + * We're only searching for one thing: the first text child in + * the BODY of a NAME section. Since we don't keep track of + * sections in -man, run some hoops to find out whether we're in + * the correct section or not. + */ + + if (MAN_BODY == n->type && MAN_SH == n->tok) { + body = n; + assert(body->parent); + if (NULL != (head = body->parent->head) && + 1 == head->nchild && + NULL != (head = (head->child)) && + MAN_TEXT == head->type && + 0 == strcmp(head->string, "NAME") && + NULL != (body = body->child) && + MAN_TEXT == body->type) { + + assert(body->string); + start = sv = body->string; + + /* + * Go through a special heuristic dance here. + * This is why -man manuals are great! + * (I'm being sarcastic: my eyes are bleeding.) + * Conventionally, one or more manual names are + * comma-specified prior to a whitespace, then a + * dash, then a description. Try to puzzle out + * the name parts here. + */ + + for ( ;; ) { + sz = strcspn(start, " ,"); + if ('\0' == start[(int)sz]) + break; + + buf->len = 0; + buf_appendb(buf, start, sz); + buf_appendb(buf, "", 1); + + hash_put(hash, buf, TYPE_NAME); + + if (' ' == start[(int)sz]) { + start += (int)sz + 1; + break; + } + + assert(',' == start[(int)sz]); + start += (int)sz + 1; + while (' ' == *start) + start++; + } + + buf->len = 0; + + if (sv == start) { + buf_append(buf, start); + return(1); + } + + while (' ' == *start) + start++; + + if (0 == strncmp(start, "-", 1)) + start += 1; + else if (0 == strncmp(start, "\\-", 2)) + start += 2; + else if (0 == strncmp(start, "\\(en", 4)) + start += 4; + else if (0 == strncmp(start, "\\(em", 4)) + start += 4; + + while (' ' == *start) + start++; + + sz = strlen(start) + 1; + buf_appendb(dbuf, start, sz); + buf_appendb(buf, start, sz); + + hash_put(hash, buf, TYPE_DESC); + } + } + + if (pman_node(hash, buf, dbuf, n->child)) + return(1); + if (pman_node(hash, buf, dbuf, n->next)) + return(1); + + return(0); +} + +static void +usage(void) +{ + + fprintf(stderr, "usage: %s [-ruv] [-d path] [file...]\n", + progname); +} Index: index.sgml =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/index.sgml,v retrieving revision 1.126 retrieving revision 1.127 diff -Lindex.sgml -Lindex.sgml -u -p -r1.126 -r1.127 --- index.sgml +++ index.sgml @@ -29,7 +29,7 @@ HREF="mandoc.1.html">mandoc, which interfaces with the compiler library to format output for UNIX terminals (with support for wide-character locales), XHTML, HTML, PostScript, and PDF. It also includes preconv, for recoding multibyte manuals; and makewhatis, for indexing manuals. + HREF="mandocdb.1.html">mandocdb, for indexing manuals. It is a BSD.lv project.

@@ -40,15 +40,15 @@

mdocml is in plain-old ANSI C and should build and run on any UNIX system, although makewhatis requires mandocdb requires Berkeley Database (this is installed by default on all BSD operating systems). To compile mdocml, run make, then make install to install into /usr/local. Be aware: if you have an existing groff installation, this may overwrite its preconv binary. - The makewhatis utility is not yet linked to the build. You must run make - makewhatis to build it (it does not install). + The mandocdb utility is not yet linked to the build. You must run make + mandocdb to build it (it does not install).

The most current version of mdocml is @VERSION@, dated - makewhatis(1) + mandocdb(1) index UNIX manuals - (text | - xhtml | - pdf | - postscript) + (text | + xhtml | + pdf | + postscript) @@ -314,8 +314,8 @@ 12-07-2011: version 1.11.4

- Bug-fixes and clean-ups across all systems, especially in makewhatis (note: still not - connected to the general build and must be compiled with make makewhatis) and the mandocdb makewhatis (note: still not + connected to the general build and must be compiled with make mandocdb make makewhatis) and the man parser. This release was significantly assisted by participants in OpenBSD's c2k11. Thanks!

@@ -341,7 +341,7 @@

Corrected some installation issues in version 1.11.1. Further migration to libmandoc. - Initial public release (this utility is very much under development) of makewhatis, + Initial public release (this utility is very much under development) of mandocdbmakewhatis, initially named mandoc-db. This utility produces keyword databases of manual content mandoc-cgi, Index: Makefile =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/Makefile,v retrieving revision 1.354 retrieving revision 1.355 diff -LMakefile -LMakefile -u -p -r1.354 -r1.355 --- Makefile +++ Makefile @@ -73,8 +73,8 @@ SRCS = Makefile \ mandoc.3 \ mandoc.c \ mandoc.h \ - makewhatis.1 \ - makewhatis.c \ + mandocdb.1 \ + mandocdb.c \ mandoc_char.7 \ mdoc.h \ mdoc.7 \ @@ -227,21 +227,21 @@ $(MANDOC_OBJS) $(MANDOC_LNS): main.h man compat.o compat.ln: config.h -MAKEWHATIS_OBJS = makewhatis.o -MAKEWHATIS_LNS = makewhatis.ln +MANDOCDB_OBJS = mandocdb.o +MANDOCDB_LNS = mandocdb.ln -$(MAKEWHATIS_OBJS) $(MAKEWHATIS_LNS): mandoc.h mdoc.h man.h config.h +$(MANDOCDB_OBJS) $(MANDOCDB_LNS): mandoc.h mdoc.h man.h config.h PRECONV_OBJS = preconv.o PRECONV_LNS = preconv.ln $(PRECONV_OBJS) $(PRECONV_LNS): config.h -INDEX_MANS = makewhatis.1.html \ - makewhatis.1.xhtml \ - makewhatis.1.ps \ - makewhatis.1.pdf \ - makewhatis.1.txt \ +INDEX_MANS = mandocdb.1.html \ + mandocdb.1.xhtml \ + mandocdb.1.ps \ + mandocdb.1.pdf \ + mandocdb.1.txt \ mandoc.1.html \ mandoc.1.xhtml \ mandoc.1.ps \ @@ -304,8 +304,8 @@ lint: llib-llibmandoc.ln llib-lmandoc.ln clean: rm -f libmandoc.a $(LIBMANDOC_OBJS) rm -f llib-llibmandoc.ln $(LIBMANDOC_LNS) - rm -f makewhatis $(MAKEWHATIS_OBJS) - rm -f llib-lmakewhatis.ln $(MAKEWHATIS_LNS) + rm -f mandocdb $(MANDOCDB_OBJS) + rm -f llib-lmandocdb.ln $(MANDOCDB_LNS) rm -f preconv $(PRECONV_OBJS) rm -f llib-lpreconv.ln $(PRECONV_LNS) rm -f mandoc $(MANDOC_OBJS) @@ -353,11 +353,11 @@ llib-lmandoc.ln: $(MANDOC_LNS) $(LINT) $(LINTFLAGS) -Cmandoc $(MANDOC_LNS) # You'll need -ldb for Linux. -makewhatis: $(MAKEWHATIS_OBJS) libmandoc.a - $(CC) -o $@ $(MAKEWHATIS_OBJS) libmandoc.a +mandocdb: $(MANDOCDB_OBJS) libmandoc.a + $(CC) -o $@ $(MANDOCDB_OBJS) libmandoc.a -llib-lmakewhatis.ln: $(MAKEWHATIS_LNS) - $(LINT) $(LINTFLAGS) -Cmakewhatis $(MAKEWHATIS_LNS) +llib-lmandocdb.ln: $(MANDOCDB_LNS) + $(LINT) $(LINTFLAGS) -Cmandocdb $(MANDOCDB_LNS) preconv: $(PRECONV_OBJS) $(CC) -o $@ $(PRECONV_OBJS) -- To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv