tech@mandoc.bsd.lv
 help / color / mirror / Atom feed
* AND/OR in apropos(1).
@ 2011-11-16  0:40 Kristaps Dzonsons
  0 siblings, 0 replies; only message in thread
From: Kristaps Dzonsons @ 2011-11-16  0:40 UTC (permalink / raw)
  To: tech

[-- Attachment #1: Type: text/plain, Size: 1216 bytes --]

Hi,

Enclosed is a patch to allow for arbitrary AND/OR operations in apropos(1).

The syntax is very similar to the existing syntax, proposed by 
schwarze@, except that I made case-insensitivity into an operator.  This 
will set the tone for further unary operators (such as -S for sections).

It goes like this:

  apropos Nm~foo -a Nd~bar   (AND)
  apropos Nm~foo Nd~bar      (implied OR)
  apropos Nm~foo -o Nd~bar   (OR)
  apropos Nm~foo -a Nd~bar -o baz

Operators follow operator(7) syntax: AND is evaluated before OR and both 
are left-right.

The implementation is straightforward.  exprcomp() compiles a set of 
arguments into a series of expressions.  When searching, exprexecpre() 
pre-filters keywords over the expressions; exprexecpost() actually 
evaluates the full expression over all records.

Thoughts?

The one thing this doesn't support is grouping with (expression), which 
would only require expanding the expression into nested lists.  The 
other thing that will change is hiding away the tree and match 
structures now in apropos_db.h.

Regarding -S and -s or whatever they are, these should be unary 
operators for further filtering:

  apropos Nm~foo -o Nm~bar -a -S i386

Thanks,

Kristaps

[-- Attachment #2: patch.txt --]
[-- Type: text/plain, Size: 9419 bytes --]

Index: apropos.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/apropos.c,v
retrieving revision 1.13
diff -u -r1.13 apropos.c
--- apropos.c	14 Nov 2011 10:07:06 -0000	1.13
+++ apropos.c	16 Nov 2011 00:38:26 -0000
@@ -38,8 +38,7 @@
 main(int argc, char *argv[])
 {
 	int		 ch;
-	size_t		 sz;
-	char		*buf;
+	size_t		 terms;
 	struct opts	 opts;
 	struct expr	*e;
 	extern int	 optind;
@@ -72,32 +71,11 @@
 	if (0 == argc) 
 		return(EXIT_SUCCESS);
 
-	/* 
-	 * Collapse expressions into a single string.  
-	 * First count up the contained strings, adding a space at the
-	 * end of each (plus nil-terminator).  Then merge.
-	 */
-
-	for (sz = 0, ch = 0; ch < argc; ch++)
-		sz += strlen(argv[ch]) + 1;
-
-	buf = mandoc_malloc(++sz);
-
-	for (*buf = '\0', ch = 0; ch < argc; ch++) {
-		strlcat(buf, argv[ch], sz);
-		strlcat(buf, " ", sz);
-	}
-
-	buf[sz - 2] = '\0';
-
-	if (NULL == (e = exprcomp(buf))) {
+	if (NULL == (e = exprcomp(argc, argv, &terms))) {
 		fprintf(stderr, "Bad expression\n");
-		free(buf);
 		return(EXIT_FAILURE);
 	}
 
-	free(buf);
-
 	/*
 	 * Configure databases.
 	 * The keyword database is a btree that allows for duplicate
@@ -105,7 +83,7 @@
 	 * The index database is a recno.
 	 */
 
-	apropos_search(&opts, e, NULL, list);
+	apropos_search(&opts, e, terms, NULL, list);
 	exprfree(e);
 	return(EXIT_SUCCESS);
 }
Index: apropos_db.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/apropos_db.c,v
retrieving revision 1.4
diff -u -r1.4 apropos_db.c
--- apropos_db.c	14 Nov 2011 10:07:06 -0000	1.4
+++ apropos_db.c	16 Nov 2011 00:38:26 -0000
@@ -34,9 +34,12 @@
 
 struct	expr {
 	int		 regex;
+	int		 index;
 	int	 	 mask;
+	int		 and;
 	char		*v;
 	regex_t	 	 re;
+	struct expr	*next;
 };
 
 struct	type {
@@ -65,13 +68,17 @@
 
 static	DB	*btree_open(void);
 static	int	 btree_read(const DBT *, const struct mchars *, char **);
-static	int	 exprexec(const struct expr *, char *, int);
+static	int	 exprexecpre(const struct expr *, const char *, int);
+static	void	 exprexecpost(const struct expr *, 
+			const char *, int, int *, size_t);
+static	struct expr *exprterm(char *, int, int);
 static	DB	*index_open(void);
 static	int	 index_read(const DBT *, const DBT *, 
 			const struct mchars *, struct rec *);
 static	void	 norm_string(const char *,
 			const struct mchars *, char **);
 static	size_t	 norm_utf8(unsigned int, char[7]);
+static	void	 recfree(struct rec *);
 
 /*
  * Open the keyword mandoc-db database.
@@ -317,16 +324,17 @@
  */
 void
 apropos_search(const struct opts *opts, const struct expr *expr,
-		void *arg, void (*res)(struct rec *, size_t, void *))
+		size_t terms, void *arg, 
+		void (*res)(struct rec *, size_t, void *))
 {
-	int		 i, len, root, leaf;
+	int		 i, len, root, leaf, mask, mlen;
 	DBT		 key, val;
 	DB		*btree, *idx;
 	struct mchars	*mc;
 	int		 ch;
 	char		*buf;
 	recno_t		 rec;
-	struct rec	*recs;
+	struct rec	*recs, *rrecs;
 	struct rec	 srec;
 
 	root	= -1;
@@ -362,7 +370,13 @@
 		if ( ! btree_read(&key, mc, &buf))
 			break;
 
-		if ( ! exprexec(expr, buf, *(int *)val.data))
+		mask = *(int *)val.data;
+
+		/*
+		 * See if this keyword record matches any of the
+		 * expressions we have stored.
+		 */
+		if ( ! exprexecpre(expr, buf, mask))
 			continue;
 
 		memcpy(&rec, val.data + 4, sizeof(recno_t));
@@ -381,8 +395,13 @@
 			else 
 				break;
 
-		if (leaf >= 0 && recs[leaf].rec == rec)
+		if (leaf >= 0 && recs[leaf].rec == rec) {
+			if (0 == recs[leaf].matches[0])
+				exprexecpost
+					(expr, buf, mask, 
+					 recs[leaf].matches, terms);
 			continue;
+		}
 
 		/*
 		 * Now we actually extract the manpage's metadata from
@@ -408,6 +427,12 @@
 			(recs, (len + 1) * sizeof(struct rec));
 
 		memcpy(&recs[len], &srec, sizeof(struct rec));
+		recs[len].matches = 
+			mandoc_calloc(terms + 1, sizeof(int));
+
+		exprexecpost
+			(expr, buf, mask, 
+			 recs[len].matches, terms);
 
 		/* Append to our tree. */
 
@@ -423,24 +448,25 @@
 		len++;
 	}
 
-	if (1 == ch)
-		(*res)(recs, len, arg);
+	if (1 == ch) {
+		for (mlen = i = 0; i < len; i++)
+			if (recs[i].matches[0])
+				mlen++;
+		rrecs = mandoc_malloc(mlen * sizeof(struct rec));
+		for (mlen = i = 0; i < len; i++)
+			if (recs[i].matches[0])
+				memcpy(&rrecs[mlen++], &recs[i], 
+						sizeof(struct rec));
+		(*res)(rrecs, mlen, arg);
+		free(rrecs);
+	}
 
 	/* XXX: else?  corrupt database error? */
 out:
-	for (i = 0; i < len; i++) {
-		free(recs[i].file);
-		free(recs[i].cat);
-		free(recs[i].title);
-		free(recs[i].arch);
-		free(recs[i].desc);
-	}
+	for (i = 0; i < len; i++)
+		recfree(&recs[i]);
 
-	free(srec.file);
-	free(srec.cat);
-	free(srec.title);
-	free(srec.arch);
-	free(srec.desc);
+	recfree(&srec);
 
 	if (mc)
 		mchars_free(mc);
@@ -453,16 +479,77 @@
 	free(recs);
 }
 
+static void
+recfree(struct rec *rec)
+{
+
+	free(rec->file);
+	free(rec->matches);
+	free(rec->cat);
+	free(rec->title);
+	free(rec->arch);
+	free(rec->desc);
+}
+
 struct expr *
-exprcomp(char *buf)
+exprcomp(int argc, char *argv[], size_t *tt)
+{
+	struct expr	*e, *first, *next;
+	int		 pos, log;
+
+	first = next = NULL;
+	(*tt) = 0;
+
+	for (pos = 0; pos < argc; pos++) {
+		e = next;
+		log = 0;
+
+		if (0 == strcmp("-a", argv[pos]))
+			log = 1;			
+		else if (0 == strcmp("-o", argv[pos]))
+			log = 2;
+
+		if (log > 0 && ++pos >= argc)
+			goto err;
+
+		if (0 == strcmp("-i", argv[pos])) {
+			if (++pos >= argc)
+				goto err;
+			next = exprterm(argv[pos], 1, log == 1);
+		} else
+			next = exprterm(argv[pos], 0, log == 1);
+
+		if (NULL == next)
+			goto err;
+
+		next->index = (int)(*tt)++;
+
+		if (NULL == first) {
+			assert(NULL == e);
+			first = next;
+		} else {
+			assert(NULL != e);
+			e->next = next;
+		}
+	}
+
+	return(first);
+err:
+	exprfree(first);
+	return(NULL);
+}
+
+static struct expr *
+exprterm(char *buf, int cs, int and)
 {
-	struct expr	*p;
 	struct expr	 e;
+	struct expr	*p;
 	char		*key;
-	int		 i, icase;
+	int		 i;
+
+	memset(&e, 0, sizeof(struct expr));
 
-	if ('\0' == *buf)
-		return(NULL);
+	e.and = and;
 
 	/*
 	 * Choose regex or substring match.
@@ -480,14 +567,9 @@
 	 * Determine the record types to search for.
 	 */
 
-	icase = 0;
 	e.mask = 0;
 	if (buf < e.v) {
 		while (NULL != (key = strsep(&buf, ","))) {
-			if ('i' == key[0] && '\0' == key[1]) {
-				icase = REG_ICASE;
-				continue;
-			}
 			i = 0;
 			while (types[i].mask &&
 					strcmp(types[i].name, key))
@@ -498,9 +580,11 @@
 	if (0 == e.mask)
 		e.mask = TYPE_Nm | TYPE_Nd;
 
-	if (e.regex &&
-	    regcomp(&e.re, e.v, REG_EXTENDED | REG_NOSUB | icase))
-		return(NULL);
+	if (e.regex) {
+		i = REG_EXTENDED | REG_NOSUB | cs ? REG_ICASE : 0;
+		if (regcomp(&e.re, e.v, i))
+			return(NULL);
+	}
 
 	e.v = mandoc_strdup(e.v);
 
@@ -512,26 +596,71 @@
 void
 exprfree(struct expr *p)
 {
+	struct expr	*pp;
+	
+	while (NULL != p) {
+		if (p->regex)
+			regfree(&p->re);
+		free(p->v);
+		pp = p->next;
+		free(p);
+		p = pp;
+	}
+}
 
-	if (NULL == p)
-		return;
-
-	if (p->regex)
-		regfree(&p->re);
+/*
+ * See if this expression evaluates to true for any terms.
+ * Return 1 if any expression evaluates to true, else 0.
+ */
+static int
+exprexecpre(const struct expr *p, const char *cp, int mask)
+{
 
-	free(p->v);
-	free(p);
+	for ( ; NULL != p; p = p->next) {
+		if ( ! (mask & p->mask))
+			continue;
+		if (p->regex) {
+			if (0 == regexec(&p->re, cp, 0, NULL, 0))
+				return(1);
+		} else if (NULL != strcasestr(cp, p->v))
+			return(1);
+	}
+	return(0);
 }
 
-static int
-exprexec(const struct expr *p, char *cp, int mask)
+/*
+ * First, update the array of terms for which this expression evaluates
+ * to true.
+ * Second, logically evaluate all terms over the updated array of truth
+ * values.
+ * If this evaluates to true, mark the expression as satisfied.
+ */
+static void
+exprexecpost(const struct expr *e, const char *cp, 
+		int mask, int *matches, size_t matchsz)
 {
+	const struct expr *p;
+	int		   match;
 
-	if ( ! (mask & p->mask))
-		return(0);
+	assert(0 == matches[0]);
+
+	for (p = e; p; p = p->next) {
+		if ( ! (mask & p->mask))
+			continue;
+		if (p->regex) {
+			if (regexec(&p->re, cp, 0, NULL, 0))
+				continue;
+		} else if (NULL == strcasestr(cp, p->v))
+			continue;
+
+		matches[p->index + 1] = 1;
+	}
+
+	for (match = 0, p = e; p && ! match; p = p->next) {
+		match = matches[p->index + 1];
+		for ( ; p->next && p->next->and; p = p->next)
+			match = match && matches[p->next->index + 1];
+	}
 
-	if (p->regex)
-		return(0 == regexec(&p->re, cp, 0, NULL, 0));
-	else
-		return(NULL != strcasestr(cp, p->v));
+	matches[0] = match;
 }
Index: apropos_db.h
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/apropos_db.h,v
retrieving revision 1.4
diff -u -r1.4 apropos_db.h
--- apropos_db.h	14 Nov 2011 10:07:06 -0000	1.4
+++ apropos_db.h	16 Nov 2011 00:38:26 -0000
@@ -34,6 +34,7 @@
 	 */
 	int		 lhs;
 	int		 rhs;
+	int		*matches;
 };
 
 struct	opts {
@@ -46,10 +47,9 @@
 struct	expr;
 
 void	 	 apropos_search(const struct opts *, 
-			const struct expr *, void *, 
+			const struct expr *, size_t, void *, 
 			void (*)(struct rec *, size_t, void *));
-
-struct	expr	*exprcomp(char *);
+struct	expr	*exprcomp(int, char *[], size_t *);
 void		 exprfree(struct expr *);
 
 __END_DECLS

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2011-11-16  0:40 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-11-16  0:40 AND/OR in apropos(1) Kristaps Dzonsons

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).