From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from smtp-2.sys.kth.se (smtp-2.sys.kth.se [130.237.32.160]) by krisdoz.my.domain (8.14.3/8.14.3) with ESMTP id pAG0eurR022136 for ; Tue, 15 Nov 2011 19:40:56 -0500 (EST) Received: from mailscan-1.sys.kth.se (mailscan-1.sys.kth.se [130.237.32.91]) by smtp-2.sys.kth.se (Postfix) with ESMTP id 175AF14D7CC for ; Wed, 16 Nov 2011 01:40:50 +0100 (CET) X-Virus-Scanned: by amavisd-new at kth.se Received: from smtp-2.sys.kth.se ([130.237.32.160]) by mailscan-1.sys.kth.se (mailscan-1.sys.kth.se [130.237.32.91]) (amavisd-new, port 10024) with LMTP id u8gWgmNActKl for ; Wed, 16 Nov 2011 01:40:46 +0100 (CET) X-KTH-Auth: kristaps [77.201.233.73] X-KTH-mail-from: kristaps@bsd.lv X-KTH-rcpt-to: tech@mdocml.bsd.lv Received: from [192.168.1.57] (73.233.201.77.rev.sfr.net [77.201.233.73]) by smtp-2.sys.kth.se (Postfix) with ESMTP id 8701D14D7CB for ; Wed, 16 Nov 2011 01:40:44 +0100 (CET) Message-ID: <4EC3068B.40101@bsd.lv> Date: Wed, 16 Nov 2011 01:40:43 +0100 From: Kristaps Dzonsons User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:7.0.1) Gecko/20110929 Thunderbird/7.0.1 X-Mailinglist: mdocml-tech Reply-To: tech@mdocml.bsd.lv MIME-Version: 1.0 To: tech@mdocml.bsd.lv Subject: AND/OR in apropos(1). Content-Type: multipart/mixed; boundary="------------060201070707020707010408" This is a multi-part message in MIME format. --------------060201070707020707010408 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Hi, Enclosed is a patch to allow for arbitrary AND/OR operations in apropos(1). The syntax is very similar to the existing syntax, proposed by schwarze@, except that I made case-insensitivity into an operator. This will set the tone for further unary operators (such as -S for sections). It goes like this: apropos Nm~foo -a Nd~bar (AND) apropos Nm~foo Nd~bar (implied OR) apropos Nm~foo -o Nd~bar (OR) apropos Nm~foo -a Nd~bar -o baz Operators follow operator(7) syntax: AND is evaluated before OR and both are left-right. The implementation is straightforward. exprcomp() compiles a set of arguments into a series of expressions. When searching, exprexecpre() pre-filters keywords over the expressions; exprexecpost() actually evaluates the full expression over all records. Thoughts? The one thing this doesn't support is grouping with (expression), which would only require expanding the expression into nested lists. The other thing that will change is hiding away the tree and match structures now in apropos_db.h. Regarding -S and -s or whatever they are, these should be unary operators for further filtering: apropos Nm~foo -o Nm~bar -a -S i386 Thanks, Kristaps --------------060201070707020707010408 Content-Type: text/plain; name="patch.txt" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="patch.txt" Index: apropos.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/apropos.c,v retrieving revision 1.13 diff -u -r1.13 apropos.c --- apropos.c 14 Nov 2011 10:07:06 -0000 1.13 +++ apropos.c 16 Nov 2011 00:38:26 -0000 @@ -38,8 +38,7 @@ main(int argc, char *argv[]) { int ch; - size_t sz; - char *buf; + size_t terms; struct opts opts; struct expr *e; extern int optind; @@ -72,32 +71,11 @@ if (0 == argc) return(EXIT_SUCCESS); - /* - * Collapse expressions into a single string. - * First count up the contained strings, adding a space at the - * end of each (plus nil-terminator). Then merge. - */ - - for (sz = 0, ch = 0; ch < argc; ch++) - sz += strlen(argv[ch]) + 1; - - buf = mandoc_malloc(++sz); - - for (*buf = '\0', ch = 0; ch < argc; ch++) { - strlcat(buf, argv[ch], sz); - strlcat(buf, " ", sz); - } - - buf[sz - 2] = '\0'; - - if (NULL == (e = exprcomp(buf))) { + if (NULL == (e = exprcomp(argc, argv, &terms))) { fprintf(stderr, "Bad expression\n"); - free(buf); return(EXIT_FAILURE); } - free(buf); - /* * Configure databases. * The keyword database is a btree that allows for duplicate @@ -105,7 +83,7 @@ * The index database is a recno. */ - apropos_search(&opts, e, NULL, list); + apropos_search(&opts, e, terms, NULL, list); exprfree(e); return(EXIT_SUCCESS); } Index: apropos_db.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/apropos_db.c,v retrieving revision 1.4 diff -u -r1.4 apropos_db.c --- apropos_db.c 14 Nov 2011 10:07:06 -0000 1.4 +++ apropos_db.c 16 Nov 2011 00:38:26 -0000 @@ -34,9 +34,12 @@ struct expr { int regex; + int index; int mask; + int and; char *v; regex_t re; + struct expr *next; }; struct type { @@ -65,13 +68,17 @@ static DB *btree_open(void); static int btree_read(const DBT *, const struct mchars *, char **); -static int exprexec(const struct expr *, char *, int); +static int exprexecpre(const struct expr *, const char *, int); +static void exprexecpost(const struct expr *, + const char *, int, int *, size_t); +static struct expr *exprterm(char *, int, int); static DB *index_open(void); static int index_read(const DBT *, const DBT *, const struct mchars *, struct rec *); static void norm_string(const char *, const struct mchars *, char **); static size_t norm_utf8(unsigned int, char[7]); +static void recfree(struct rec *); /* * Open the keyword mandoc-db database. @@ -317,16 +324,17 @@ */ void apropos_search(const struct opts *opts, const struct expr *expr, - void *arg, void (*res)(struct rec *, size_t, void *)) + size_t terms, void *arg, + void (*res)(struct rec *, size_t, void *)) { - int i, len, root, leaf; + int i, len, root, leaf, mask, mlen; DBT key, val; DB *btree, *idx; struct mchars *mc; int ch; char *buf; recno_t rec; - struct rec *recs; + struct rec *recs, *rrecs; struct rec srec; root = -1; @@ -362,7 +370,13 @@ if ( ! btree_read(&key, mc, &buf)) break; - if ( ! exprexec(expr, buf, *(int *)val.data)) + mask = *(int *)val.data; + + /* + * See if this keyword record matches any of the + * expressions we have stored. + */ + if ( ! exprexecpre(expr, buf, mask)) continue; memcpy(&rec, val.data + 4, sizeof(recno_t)); @@ -381,8 +395,13 @@ else break; - if (leaf >= 0 && recs[leaf].rec == rec) + if (leaf >= 0 && recs[leaf].rec == rec) { + if (0 == recs[leaf].matches[0]) + exprexecpost + (expr, buf, mask, + recs[leaf].matches, terms); continue; + } /* * Now we actually extract the manpage's metadata from @@ -408,6 +427,12 @@ (recs, (len + 1) * sizeof(struct rec)); memcpy(&recs[len], &srec, sizeof(struct rec)); + recs[len].matches = + mandoc_calloc(terms + 1, sizeof(int)); + + exprexecpost + (expr, buf, mask, + recs[len].matches, terms); /* Append to our tree. */ @@ -423,24 +448,25 @@ len++; } - if (1 == ch) - (*res)(recs, len, arg); + if (1 == ch) { + for (mlen = i = 0; i < len; i++) + if (recs[i].matches[0]) + mlen++; + rrecs = mandoc_malloc(mlen * sizeof(struct rec)); + for (mlen = i = 0; i < len; i++) + if (recs[i].matches[0]) + memcpy(&rrecs[mlen++], &recs[i], + sizeof(struct rec)); + (*res)(rrecs, mlen, arg); + free(rrecs); + } /* XXX: else? corrupt database error? */ out: - for (i = 0; i < len; i++) { - free(recs[i].file); - free(recs[i].cat); - free(recs[i].title); - free(recs[i].arch); - free(recs[i].desc); - } + for (i = 0; i < len; i++) + recfree(&recs[i]); - free(srec.file); - free(srec.cat); - free(srec.title); - free(srec.arch); - free(srec.desc); + recfree(&srec); if (mc) mchars_free(mc); @@ -453,16 +479,77 @@ free(recs); } +static void +recfree(struct rec *rec) +{ + + free(rec->file); + free(rec->matches); + free(rec->cat); + free(rec->title); + free(rec->arch); + free(rec->desc); +} + struct expr * -exprcomp(char *buf) +exprcomp(int argc, char *argv[], size_t *tt) +{ + struct expr *e, *first, *next; + int pos, log; + + first = next = NULL; + (*tt) = 0; + + for (pos = 0; pos < argc; pos++) { + e = next; + log = 0; + + if (0 == strcmp("-a", argv[pos])) + log = 1; + else if (0 == strcmp("-o", argv[pos])) + log = 2; + + if (log > 0 && ++pos >= argc) + goto err; + + if (0 == strcmp("-i", argv[pos])) { + if (++pos >= argc) + goto err; + next = exprterm(argv[pos], 1, log == 1); + } else + next = exprterm(argv[pos], 0, log == 1); + + if (NULL == next) + goto err; + + next->index = (int)(*tt)++; + + if (NULL == first) { + assert(NULL == e); + first = next; + } else { + assert(NULL != e); + e->next = next; + } + } + + return(first); +err: + exprfree(first); + return(NULL); +} + +static struct expr * +exprterm(char *buf, int cs, int and) { - struct expr *p; struct expr e; + struct expr *p; char *key; - int i, icase; + int i; + + memset(&e, 0, sizeof(struct expr)); - if ('\0' == *buf) - return(NULL); + e.and = and; /* * Choose regex or substring match. @@ -480,14 +567,9 @@ * Determine the record types to search for. */ - icase = 0; e.mask = 0; if (buf < e.v) { while (NULL != (key = strsep(&buf, ","))) { - if ('i' == key[0] && '\0' == key[1]) { - icase = REG_ICASE; - continue; - } i = 0; while (types[i].mask && strcmp(types[i].name, key)) @@ -498,9 +580,11 @@ if (0 == e.mask) e.mask = TYPE_Nm | TYPE_Nd; - if (e.regex && - regcomp(&e.re, e.v, REG_EXTENDED | REG_NOSUB | icase)) - return(NULL); + if (e.regex) { + i = REG_EXTENDED | REG_NOSUB | cs ? REG_ICASE : 0; + if (regcomp(&e.re, e.v, i)) + return(NULL); + } e.v = mandoc_strdup(e.v); @@ -512,26 +596,71 @@ void exprfree(struct expr *p) { + struct expr *pp; + + while (NULL != p) { + if (p->regex) + regfree(&p->re); + free(p->v); + pp = p->next; + free(p); + p = pp; + } +} - if (NULL == p) - return; - - if (p->regex) - regfree(&p->re); +/* + * See if this expression evaluates to true for any terms. + * Return 1 if any expression evaluates to true, else 0. + */ +static int +exprexecpre(const struct expr *p, const char *cp, int mask) +{ - free(p->v); - free(p); + for ( ; NULL != p; p = p->next) { + if ( ! (mask & p->mask)) + continue; + if (p->regex) { + if (0 == regexec(&p->re, cp, 0, NULL, 0)) + return(1); + } else if (NULL != strcasestr(cp, p->v)) + return(1); + } + return(0); } -static int -exprexec(const struct expr *p, char *cp, int mask) +/* + * First, update the array of terms for which this expression evaluates + * to true. + * Second, logically evaluate all terms over the updated array of truth + * values. + * If this evaluates to true, mark the expression as satisfied. + */ +static void +exprexecpost(const struct expr *e, const char *cp, + int mask, int *matches, size_t matchsz) { + const struct expr *p; + int match; - if ( ! (mask & p->mask)) - return(0); + assert(0 == matches[0]); + + for (p = e; p; p = p->next) { + if ( ! (mask & p->mask)) + continue; + if (p->regex) { + if (regexec(&p->re, cp, 0, NULL, 0)) + continue; + } else if (NULL == strcasestr(cp, p->v)) + continue; + + matches[p->index + 1] = 1; + } + + for (match = 0, p = e; p && ! match; p = p->next) { + match = matches[p->index + 1]; + for ( ; p->next && p->next->and; p = p->next) + match = match && matches[p->next->index + 1]; + } - if (p->regex) - return(0 == regexec(&p->re, cp, 0, NULL, 0)); - else - return(NULL != strcasestr(cp, p->v)); + matches[0] = match; } Index: apropos_db.h =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/apropos_db.h,v retrieving revision 1.4 diff -u -r1.4 apropos_db.h --- apropos_db.h 14 Nov 2011 10:07:06 -0000 1.4 +++ apropos_db.h 16 Nov 2011 00:38:26 -0000 @@ -34,6 +34,7 @@ */ int lhs; int rhs; + int *matches; }; struct opts { @@ -46,10 +47,9 @@ struct expr; void apropos_search(const struct opts *, - const struct expr *, void *, + const struct expr *, size_t, void *, void (*)(struct rec *, size_t, void *)); - -struct expr *exprcomp(char *); +struct expr *exprcomp(int, char *[], size_t *); void exprfree(struct expr *); __END_DECLS --------------060201070707020707010408-- -- To unsubscribe send an email to tech+unsubscribe@mdocml.bsd.lv