tech@mandoc.bsd.lv
 help / color / mirror / Atom feed
From: Ingo Schwarze <schwarze@usta.de>
To: tech@mdocml.bsd.lv
Subject: Re: overhaul apropos(1) interface
Date: Sun, 13 Nov 2011 00:54:10 +0100	[thread overview]
Message-ID: <20111112235410.GC16229@iris.usta.de> (raw)
In-Reply-To: <20111109013044.GA25679@iris.usta.de>

Hi,

here is a new version of my overhaul after Kristaps' changes.
The patch given below is not likely to apply cleanly to anybody's
repo, it requires the apropos_db.* rename first.  It is meant
for quick review; if you agree with the direction, i'll figure
out how to get this in cleanly.

So, here is what the overhaul does:

 * Remove -I from the main program.
   Logically, that's not a global option,
   but a per-expression thingy.
 * Sort the arguments of exprcomp.
   All the world uses (argc, argv),
   why should we suddenly go for (argv, argc)?
 * I agree with dropping MATCH_EXACT:
   That's rarely needed and can easily be constructed
   as MATCH_REGEX with ^...$.
 * For the same reason, let's drop case-sensitive MATCH_STR:
   It's rarely needed and can easily be constructed with MATCH_REGEX.
 * Keeping MATCH_STR seems OK: It is needed as the default
   when the type is unspecified.
 * The MATCH_REGEXCASE enum item is unused already now,
   since the iflag is compiled into the regex object.
   So drop that one as well.
 * Only two enum items remain; that's better and more easily
   expressed by a single boolean integer (expr.regex).
 * The exprexec() function requires a mask argument,
   or all search keys will act as "any" (important bug fix
   along the way!).

The most massive changes are in exprcomp().
I strongly dislike the proposed interface.
It is cumbersome and requires too much typing.
The -eq and -re arguments are exceedingly ugly, and the
implementation is hard to get right - if i remember
correctly, not all cases work properly right now.

Thus, i have simplified my interface proposal to just this:

  apropos [-s section] [-S arch] query_phrase [...]

  query_phrase ::= [[macro[,...]](=|~)]query_value

So, the value to be searched for can optionally
be preceded by '=' (for string search) or '~' (for regex search),
and that can optionally be preceded by one or more macro names,
joined by commas.  Including "i" among the macros switches
regex searches to case-insensitive and has no effect on
string searches.

OK?
  Ingo


--- apropos.c.orig
+++ apropos.c
@@ -33,7 +33,7 @@ static	char	*progname;
 int
 apropos(int argc, char *argv[])
 {
-	int		 ch, cs;
+	int		 ch;
 	struct opts	 opts;
 	struct expr	*e;
 	extern int	 optind;
@@ -47,9 +47,7 @@ apropos(int argc, char *argv[])
 	else
 		++progname;
 
-	cs = 0;
-
-	while (-1 != (ch = getopt(argc, argv, "S:s:I"))) 
+	while (-1 != (ch = getopt(argc, argv, "S:s:"))) 
 		switch (ch) {
 		case ('S'):
 			opts.arch = optarg;
@@ -57,9 +55,6 @@ apropos(int argc, char *argv[])
 		case ('s'):
 			opts.cat = optarg;
 			break;
-		case ('I'):
-			cs = 1;
-			break;
 		default:
 			usage();
 			return(EXIT_FAILURE);
@@ -71,7 +66,7 @@ apropos(int argc, char *argv[])
 	if (0 == argc) 
 		return(EXIT_SUCCESS);
 
-	if (NULL == (e = exprcomp(cs, argv, argc))) {
+	if (NULL == (e = exprcomp(argc, argv))) {
 		fprintf(stderr, "Bad expression\n");
 		return(EXIT_FAILURE);
 	}
--- apropos_db.c.orig
+++ apropos_db.c
@@ -1,6 +1,7 @@
 /*	$Id: db.c,v 1.1 2011/11/09 01:24:23 kristaps Exp $ */
 /*
  * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -31,15 +32,8 @@
 #include "apropos_db.h"
 #include "mandoc.h"
 
-enum	match {
-	MATCH_REGEX,
-	MATCH_REGEXCASE,
-	MATCH_STR,
-	MATCH_STRCASE
-};
-
 struct	expr {
-	enum match	 match;
+	int		 regex;
 	int	 	 mask;
 	char		*v;
 	regex_t	 	 re;
@@ -71,7 +65,7 @@ static	const struct type types[] = {
 
 static	DB	*btree_open(void);
 static	int	 btree_read(const DBT *, const struct mchars *, char **);
-static	int	 exprexec(const struct expr *, char *);
+static	int	 exprexec(const struct expr *, char *, int);
 static	DB	*index_open(void);
 static	int	 index_read(const DBT *, const DBT *, 
 			const struct mchars *, struct rec *);
@@ -368,7 +362,7 @@ apropos_search(const struct opts *opts, const struct expr *expr,
 		if ( ! btree_read(&key, mc, &buf))
 			break;
 
-		if ( ! exprexec(expr, buf))
+		if ( ! exprexec(expr, buf, *(int *)val.data))
 			continue;
 
 		memcpy(&rec, val.data + 4, sizeof(recno_t));
@@ -460,55 +454,55 @@ out:
 }
 
 struct expr *
-exprcomp(int cs, char *argv[], int argc)
+exprcomp(int argc, char *argv[])
 {
 	struct expr	*p;
 	struct expr	 e;
-	int		 i, pos, ch;
+	char		*key;
+	int		 i, icase;
 
-	pos = 0;
-
-	if (pos > argc)
+	if (0 >= argc)
 		return(NULL);
 
-	for (i = 0; 0 != types[i].mask; i++)
-		if (0 == strcmp(types[i].name, argv[pos]))
-			break;
-
-	if (0 == (e.mask = types[i].mask))
-		return(NULL);
-
-	if (++pos > argc--)
-		return(NULL);
+	/*
+	 * Choose regex or substring match.
+	 */
 
-	if ('-' != *argv[pos]) 
-		e.match = cs ? MATCH_STRCASE : MATCH_STR;
-	else if (0 == strcmp("-eq", argv[pos]))
-		e.match = cs ? MATCH_STRCASE : MATCH_STR;
-	else if (0 == strcmp("-ieq", argv[pos]))
-		e.match = MATCH_STRCASE;
-	else if (0 == strcmp("-re", argv[pos]))
-		e.match = cs ? MATCH_REGEXCASE : MATCH_REGEX;
-	else if (0 == strcmp("-ire", argv[pos]))
-		e.match = MATCH_REGEXCASE;
-	else
-		return(NULL);
+	if (NULL == (e.v = strpbrk(*argv, "=~"))) {
+		e.regex = 0;
+		e.v = *argv;
+	} else {
+		e.regex = '~' == *e.v;
+		*e.v++ = '\0';
+	}
 
-	if ('-' == *argv[pos])
-		pos++;
+	/*
+	 * Determine the record types to search for.
+	 */
+
+	icase = 0;
+	e.mask = 0;
+	if (*argv < e.v) {
+		while (NULL != (key = strsep(argv, ","))) {
+			if ('i' == key[0] && '\0' == key[1]) {
+				icase = REG_ICASE;
+				continue;
+			}
+			i = 0;
+			while (types[i].mask &&
+			    strcmp(types[i].name, key))
+				i++;
+			e.mask |= types[i].mask;
+		}
+	}
+	if (0 == e.mask)
+		e.mask = TYPE_Nm | TYPE_Nd;
 
-	if (pos > argc--)
+	if (e.regex &&
+	    regcomp(&e.re, e.v, REG_EXTENDED | REG_NOSUB | icase))
 		return(NULL);
 
-	e.v = mandoc_strdup(argv[pos]);
-
-	if (MATCH_REGEX == e.match || MATCH_REGEXCASE == e.match) {
-		ch = REG_EXTENDED | REG_NOSUB;
-		if (MATCH_REGEXCASE == e.match)
-			ch |= REG_ICASE;
-		if (regcomp(&e.re, e.v, ch))
-			return(NULL);
-	}
+	e.v = mandoc_strdup(e.v);
 
 	p = mandoc_calloc(1, sizeof(struct expr));
 	memcpy(p, &e, sizeof(struct expr));
@@ -522,7 +516,7 @@ exprfree(struct expr *p)
 	if (NULL == p)
 		return;
 
-	if (MATCH_REGEX == p->match)
+	if (p->regex)
 		regfree(&p->re);
 
 	free(p->v);
@@ -530,14 +524,14 @@ exprfree(struct expr *p)
 }
 
 static int
-exprexec(const struct expr *p, char *cp)
+exprexec(const struct expr *p, char *cp, int mask)
 {
 
-	if (MATCH_STR == p->match)
-		return(0 == strcmp(p->v, cp));
-	else if (MATCH_STRCASE == p->match)
-		return(0 == strcasecmp(p->v, cp));
+	if ( ! (mask & p->mask))
+		return(0);
 
-	assert(MATCH_REGEX == p->match);
-	return(0 == regexec(&p->re, cp, 0, NULL, 0));
+	if (p->regex)
+		return(0 == regexec(&p->re, cp, 0, NULL, 0));
+	else
+		return(NULL != strcasestr(cp, p->v));
 }
--- apropos_db.h.orig
+++ apropos_db.h
@@ -49,7 +49,7 @@ void	 	 apropos_search(const struct opts *,
 			const struct expr *, void *, 
 			void (*)(struct rec *, size_t, void *));
 
-struct	expr	*exprcomp(int, char *[], int);
+struct	expr	*exprcomp(int, char *[]);
 void		 exprfree(struct expr *);
 
 __END_DECLS
--
 To unsubscribe send an email to tech+unsubscribe@mdocml.bsd.lv

  parent reply	other threads:[~2011-11-12 23:54 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-11-09  1:30 Ingo Schwarze
2011-11-09 10:19 ` Kristaps Dzonsons
2011-11-12 23:54 ` Ingo Schwarze [this message]
2011-11-13  0:07   ` Kristaps Dzonsons
2011-11-13  0:39     ` Ingo Schwarze
2011-11-13  9:23       ` Kristaps Dzonsons

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20111112235410.GC16229@iris.usta.de \
    --to=schwarze@usta.de \
    --cc=tech@mdocml.bsd.lv \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).