* AND/OR in apropos(1).
@ 2011-11-16 0:40 Kristaps Dzonsons
0 siblings, 0 replies; only message in thread
From: Kristaps Dzonsons @ 2011-11-16 0:40 UTC (permalink / raw)
To: tech
[-- Attachment #1: Type: text/plain, Size: 1216 bytes --]
Hi,
Enclosed is a patch to allow for arbitrary AND/OR operations in apropos(1).
The syntax is very similar to the existing syntax, proposed by
schwarze@, except that I made case-insensitivity into an operator. This
will set the tone for further unary operators (such as -S for sections).
It goes like this:
apropos Nm~foo -a Nd~bar (AND)
apropos Nm~foo Nd~bar (implied OR)
apropos Nm~foo -o Nd~bar (OR)
apropos Nm~foo -a Nd~bar -o baz
Operators follow operator(7) syntax: AND is evaluated before OR and both
are left-right.
The implementation is straightforward. exprcomp() compiles a set of
arguments into a series of expressions. When searching, exprexecpre()
pre-filters keywords over the expressions; exprexecpost() actually
evaluates the full expression over all records.
Thoughts?
The one thing this doesn't support is grouping with (expression), which
would only require expanding the expression into nested lists. The
other thing that will change is hiding away the tree and match
structures now in apropos_db.h.
Regarding -S and -s or whatever they are, these should be unary
operators for further filtering:
apropos Nm~foo -o Nm~bar -a -S i386
Thanks,
Kristaps
[-- Attachment #2: patch.txt --]
[-- Type: text/plain, Size: 9419 bytes --]
Index: apropos.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/apropos.c,v
retrieving revision 1.13
diff -u -r1.13 apropos.c
--- apropos.c 14 Nov 2011 10:07:06 -0000 1.13
+++ apropos.c 16 Nov 2011 00:38:26 -0000
@@ -38,8 +38,7 @@
main(int argc, char *argv[])
{
int ch;
- size_t sz;
- char *buf;
+ size_t terms;
struct opts opts;
struct expr *e;
extern int optind;
@@ -72,32 +71,11 @@
if (0 == argc)
return(EXIT_SUCCESS);
- /*
- * Collapse expressions into a single string.
- * First count up the contained strings, adding a space at the
- * end of each (plus nil-terminator). Then merge.
- */
-
- for (sz = 0, ch = 0; ch < argc; ch++)
- sz += strlen(argv[ch]) + 1;
-
- buf = mandoc_malloc(++sz);
-
- for (*buf = '\0', ch = 0; ch < argc; ch++) {
- strlcat(buf, argv[ch], sz);
- strlcat(buf, " ", sz);
- }
-
- buf[sz - 2] = '\0';
-
- if (NULL == (e = exprcomp(buf))) {
+ if (NULL == (e = exprcomp(argc, argv, &terms))) {
fprintf(stderr, "Bad expression\n");
- free(buf);
return(EXIT_FAILURE);
}
- free(buf);
-
/*
* Configure databases.
* The keyword database is a btree that allows for duplicate
@@ -105,7 +83,7 @@
* The index database is a recno.
*/
- apropos_search(&opts, e, NULL, list);
+ apropos_search(&opts, e, terms, NULL, list);
exprfree(e);
return(EXIT_SUCCESS);
}
Index: apropos_db.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/apropos_db.c,v
retrieving revision 1.4
diff -u -r1.4 apropos_db.c
--- apropos_db.c 14 Nov 2011 10:07:06 -0000 1.4
+++ apropos_db.c 16 Nov 2011 00:38:26 -0000
@@ -34,9 +34,12 @@
struct expr {
int regex;
+ int index;
int mask;
+ int and;
char *v;
regex_t re;
+ struct expr *next;
};
struct type {
@@ -65,13 +68,17 @@
static DB *btree_open(void);
static int btree_read(const DBT *, const struct mchars *, char **);
-static int exprexec(const struct expr *, char *, int);
+static int exprexecpre(const struct expr *, const char *, int);
+static void exprexecpost(const struct expr *,
+ const char *, int, int *, size_t);
+static struct expr *exprterm(char *, int, int);
static DB *index_open(void);
static int index_read(const DBT *, const DBT *,
const struct mchars *, struct rec *);
static void norm_string(const char *,
const struct mchars *, char **);
static size_t norm_utf8(unsigned int, char[7]);
+static void recfree(struct rec *);
/*
* Open the keyword mandoc-db database.
@@ -317,16 +324,17 @@
*/
void
apropos_search(const struct opts *opts, const struct expr *expr,
- void *arg, void (*res)(struct rec *, size_t, void *))
+ size_t terms, void *arg,
+ void (*res)(struct rec *, size_t, void *))
{
- int i, len, root, leaf;
+ int i, len, root, leaf, mask, mlen;
DBT key, val;
DB *btree, *idx;
struct mchars *mc;
int ch;
char *buf;
recno_t rec;
- struct rec *recs;
+ struct rec *recs, *rrecs;
struct rec srec;
root = -1;
@@ -362,7 +370,13 @@
if ( ! btree_read(&key, mc, &buf))
break;
- if ( ! exprexec(expr, buf, *(int *)val.data))
+ mask = *(int *)val.data;
+
+ /*
+ * See if this keyword record matches any of the
+ * expressions we have stored.
+ */
+ if ( ! exprexecpre(expr, buf, mask))
continue;
memcpy(&rec, val.data + 4, sizeof(recno_t));
@@ -381,8 +395,13 @@
else
break;
- if (leaf >= 0 && recs[leaf].rec == rec)
+ if (leaf >= 0 && recs[leaf].rec == rec) {
+ if (0 == recs[leaf].matches[0])
+ exprexecpost
+ (expr, buf, mask,
+ recs[leaf].matches, terms);
continue;
+ }
/*
* Now we actually extract the manpage's metadata from
@@ -408,6 +427,12 @@
(recs, (len + 1) * sizeof(struct rec));
memcpy(&recs[len], &srec, sizeof(struct rec));
+ recs[len].matches =
+ mandoc_calloc(terms + 1, sizeof(int));
+
+ exprexecpost
+ (expr, buf, mask,
+ recs[len].matches, terms);
/* Append to our tree. */
@@ -423,24 +448,25 @@
len++;
}
- if (1 == ch)
- (*res)(recs, len, arg);
+ if (1 == ch) {
+ for (mlen = i = 0; i < len; i++)
+ if (recs[i].matches[0])
+ mlen++;
+ rrecs = mandoc_malloc(mlen * sizeof(struct rec));
+ for (mlen = i = 0; i < len; i++)
+ if (recs[i].matches[0])
+ memcpy(&rrecs[mlen++], &recs[i],
+ sizeof(struct rec));
+ (*res)(rrecs, mlen, arg);
+ free(rrecs);
+ }
/* XXX: else? corrupt database error? */
out:
- for (i = 0; i < len; i++) {
- free(recs[i].file);
- free(recs[i].cat);
- free(recs[i].title);
- free(recs[i].arch);
- free(recs[i].desc);
- }
+ for (i = 0; i < len; i++)
+ recfree(&recs[i]);
- free(srec.file);
- free(srec.cat);
- free(srec.title);
- free(srec.arch);
- free(srec.desc);
+ recfree(&srec);
if (mc)
mchars_free(mc);
@@ -453,16 +479,77 @@
free(recs);
}
+static void
+recfree(struct rec *rec)
+{
+
+ free(rec->file);
+ free(rec->matches);
+ free(rec->cat);
+ free(rec->title);
+ free(rec->arch);
+ free(rec->desc);
+}
+
struct expr *
-exprcomp(char *buf)
+exprcomp(int argc, char *argv[], size_t *tt)
+{
+ struct expr *e, *first, *next;
+ int pos, log;
+
+ first = next = NULL;
+ (*tt) = 0;
+
+ for (pos = 0; pos < argc; pos++) {
+ e = next;
+ log = 0;
+
+ if (0 == strcmp("-a", argv[pos]))
+ log = 1;
+ else if (0 == strcmp("-o", argv[pos]))
+ log = 2;
+
+ if (log > 0 && ++pos >= argc)
+ goto err;
+
+ if (0 == strcmp("-i", argv[pos])) {
+ if (++pos >= argc)
+ goto err;
+ next = exprterm(argv[pos], 1, log == 1);
+ } else
+ next = exprterm(argv[pos], 0, log == 1);
+
+ if (NULL == next)
+ goto err;
+
+ next->index = (int)(*tt)++;
+
+ if (NULL == first) {
+ assert(NULL == e);
+ first = next;
+ } else {
+ assert(NULL != e);
+ e->next = next;
+ }
+ }
+
+ return(first);
+err:
+ exprfree(first);
+ return(NULL);
+}
+
+static struct expr *
+exprterm(char *buf, int cs, int and)
{
- struct expr *p;
struct expr e;
+ struct expr *p;
char *key;
- int i, icase;
+ int i;
+
+ memset(&e, 0, sizeof(struct expr));
- if ('\0' == *buf)
- return(NULL);
+ e.and = and;
/*
* Choose regex or substring match.
@@ -480,14 +567,9 @@
* Determine the record types to search for.
*/
- icase = 0;
e.mask = 0;
if (buf < e.v) {
while (NULL != (key = strsep(&buf, ","))) {
- if ('i' == key[0] && '\0' == key[1]) {
- icase = REG_ICASE;
- continue;
- }
i = 0;
while (types[i].mask &&
strcmp(types[i].name, key))
@@ -498,9 +580,11 @@
if (0 == e.mask)
e.mask = TYPE_Nm | TYPE_Nd;
- if (e.regex &&
- regcomp(&e.re, e.v, REG_EXTENDED | REG_NOSUB | icase))
- return(NULL);
+ if (e.regex) {
+ i = REG_EXTENDED | REG_NOSUB | cs ? REG_ICASE : 0;
+ if (regcomp(&e.re, e.v, i))
+ return(NULL);
+ }
e.v = mandoc_strdup(e.v);
@@ -512,26 +596,71 @@
void
exprfree(struct expr *p)
{
+ struct expr *pp;
+
+ while (NULL != p) {
+ if (p->regex)
+ regfree(&p->re);
+ free(p->v);
+ pp = p->next;
+ free(p);
+ p = pp;
+ }
+}
- if (NULL == p)
- return;
-
- if (p->regex)
- regfree(&p->re);
+/*
+ * See if this expression evaluates to true for any terms.
+ * Return 1 if any expression evaluates to true, else 0.
+ */
+static int
+exprexecpre(const struct expr *p, const char *cp, int mask)
+{
- free(p->v);
- free(p);
+ for ( ; NULL != p; p = p->next) {
+ if ( ! (mask & p->mask))
+ continue;
+ if (p->regex) {
+ if (0 == regexec(&p->re, cp, 0, NULL, 0))
+ return(1);
+ } else if (NULL != strcasestr(cp, p->v))
+ return(1);
+ }
+ return(0);
}
-static int
-exprexec(const struct expr *p, char *cp, int mask)
+/*
+ * First, update the array of terms for which this expression evaluates
+ * to true.
+ * Second, logically evaluate all terms over the updated array of truth
+ * values.
+ * If this evaluates to true, mark the expression as satisfied.
+ */
+static void
+exprexecpost(const struct expr *e, const char *cp,
+ int mask, int *matches, size_t matchsz)
{
+ const struct expr *p;
+ int match;
- if ( ! (mask & p->mask))
- return(0);
+ assert(0 == matches[0]);
+
+ for (p = e; p; p = p->next) {
+ if ( ! (mask & p->mask))
+ continue;
+ if (p->regex) {
+ if (regexec(&p->re, cp, 0, NULL, 0))
+ continue;
+ } else if (NULL == strcasestr(cp, p->v))
+ continue;
+
+ matches[p->index + 1] = 1;
+ }
+
+ for (match = 0, p = e; p && ! match; p = p->next) {
+ match = matches[p->index + 1];
+ for ( ; p->next && p->next->and; p = p->next)
+ match = match && matches[p->next->index + 1];
+ }
- if (p->regex)
- return(0 == regexec(&p->re, cp, 0, NULL, 0));
- else
- return(NULL != strcasestr(cp, p->v));
+ matches[0] = match;
}
Index: apropos_db.h
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/apropos_db.h,v
retrieving revision 1.4
diff -u -r1.4 apropos_db.h
--- apropos_db.h 14 Nov 2011 10:07:06 -0000 1.4
+++ apropos_db.h 16 Nov 2011 00:38:26 -0000
@@ -34,6 +34,7 @@
*/
int lhs;
int rhs;
+ int *matches;
};
struct opts {
@@ -46,10 +47,9 @@
struct expr;
void apropos_search(const struct opts *,
- const struct expr *, void *,
+ const struct expr *, size_t, void *,
void (*)(struct rec *, size_t, void *));
-
-struct expr *exprcomp(char *);
+struct expr *exprcomp(int, char *[], size_t *);
void exprfree(struct expr *);
__END_DECLS
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2011-11-16 0:40 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-11-16 0:40 AND/OR in apropos(1) Kristaps Dzonsons
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).