source@mandoc.bsd.lv
 help / color / mirror / Atom feed
From: schwarze@mdocml.bsd.lv
To: source@mdocml.bsd.lv
Subject: mdocml: Fix whatis(1) to correctly match words instead of any
Date: Sat, 12 Jul 2014 10:00:26 -0400 (EDT)	[thread overview]
Message-ID: <201407121400.s6CE0PRl020859@krisdoz.my.domain> (raw)

Log Message:
-----------
Fix whatis(1) to correctly match words instead of any substrings.
While here, also provide an internal mode (MANSEARCH_MAN) to match
complete names, to be used by man.cgi(8).

Modified Files:
--------------
    mdocml:
        mansearch.c
        mansearch.h

Revision Data
-------------
Index: mansearch.h
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mansearch.h,v
retrieving revision 1.13
retrieving revision 1.14
diff -Lmansearch.h -Lmansearch.h -u -p -r1.13 -r1.14
--- mansearch.h
+++ mansearch.h
@@ -82,7 +82,8 @@ struct	mansearch {
 	const char	*sec; /* mansection/NULL */
 	uint64_t	 deftype; /* type if no key  */
 	int		 flags;
-#define	MANSEARCH_WHATIS 0x01 /* whatis mode: equality, no key */
+#define	MANSEARCH_WHATIS 0x01 /* whatis(1) mode: whole words, no keys */
+#define	MANSEARCH_MAN    0x02 /* man(1) mode: string equality, no keys */
 };
 
 int	mansearch_setup(int);
Index: mansearch.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mansearch.c,v
retrieving revision 1.37
retrieving revision 1.38
diff -Lmansearch.c -Lmansearch.c -u -p -r1.37 -r1.38
--- mansearch.c
+++ mansearch.c
@@ -64,13 +64,14 @@ extern const char *const mansearch_keyna
 	} while (0)
 
 struct	expr {
-	uint64_t	 bits;    /* type-mask */
-	const char	*substr;  /* to search for, if applicable */
 	regex_t		 regexp;  /* compiled regexp, if applicable */
+	const char	*substr;  /* to search for, if applicable */
+	struct expr	*next;    /* next in sequence */
+	uint64_t	 bits;    /* type-mask */
+	int		 equal;   /* equality, not subsring match */
 	int		 open;    /* opening parentheses before */
 	int		 and;	  /* logical AND before */
 	int		 close;   /* closing parentheses after */
-	struct expr	*next;    /* next in sequence */
 };
 
 struct	match {
@@ -561,6 +562,9 @@ sql_statement(const struct expr *e)
 		    ? (NULL == e->substr
 			? "pageid IN (SELECT pageid FROM names "
 			  "WHERE name REGEXP ?)"
+			: e->equal
+			? "pageid IN (SELECT pageid FROM names "
+			  "WHERE name = ?)"
 			: "pageid IN (SELECT pageid FROM names "
 			  "WHERE name MATCH ?)")
 		    : (NULL == e->substr
@@ -702,7 +706,7 @@ exprterm(const struct mansearch *search,
 {
 	char		 errbuf[BUFSIZ];
 	struct expr	*e;
-	char		*key, *v;
+	char		*key, *val;
 	uint64_t	 iterbit;
 	int		 i, irc;
 
@@ -711,40 +715,64 @@ exprterm(const struct mansearch *search,
 
 	e = mandoc_calloc(1, sizeof(struct expr));
 
-	/*"whatis" mode uses an opaque string and default fields. */
-
-	if (MANSEARCH_WHATIS & search->flags) {
-		e->substr = buf;
+	if (MANSEARCH_MAN & search->flags) {
 		e->bits = search->deftype;
+		e->substr = buf;
+		e->equal = 1;
 		return(e);
 	}
 
 	/*
-	 * If no =~ is specified, search with equality over names and
-	 * descriptions.
-	 * If =~ begins the phrase, use name and description fields.
+	 * Look for an '=' or '~' operator,
+	 * unless forced to some fixed macro keys.
 	 */
 
-	if (NULL == (v = strpbrk(buf, "=~"))) {
-		e->substr = buf;
-		e->bits = search->deftype;
-		return(e);
-	} else if (v == buf)
+	if (MANSEARCH_WHATIS & search->flags)
+		val = NULL;
+	else
+		val = strpbrk(buf, "=~");
+
+	if (NULL == val) {
 		e->bits = search->deftype;
+		e->substr = buf;
 
-	if ('~' == *v++) {
+	/*
+	 * Found an operator.
+	 * Regexp search is requested by !e->substr.
+	 */
+
+	} else {
+		if (val == buf)
+			e->bits = search->deftype;
+		if ('=' == *val)
+			e->substr = val + 1;
+		*val++ = '\0';
 		if (NULL != strstr(buf, "arch"))
 			cs = 0;
-		if (0 != (irc = regcomp(&e->regexp, v,
-		    REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE)))) {
+	}
+
+	/* Compile regular expressions. */
+
+	if (MANSEARCH_WHATIS & search->flags) {
+		e->substr = NULL;
+		mandoc_asprintf(&val, "[[:<:]]%s[[:>:]]", buf);
+	}
+
+	if (NULL == e->substr) {
+		irc = regcomp(&e->regexp, val,
+		    REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE));
+		if (MANSEARCH_WHATIS & search->flags)
+			free(val);
+		if (irc) {
 			regerror(irc, &e->regexp, errbuf, sizeof(errbuf));
 			fprintf(stderr, "regcomp: %s\n", errbuf);
 			free(e);
 			return(NULL);
 		}
-	} else
-		e->substr = v;
-	v[-1] = '\0';
+	}
+
+	if (e->bits)
+		return(e);
 
 	/*
 	 * Parse out all possible fields.
--
 To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv

                 reply	other threads:[~2014-07-12 14:00 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=201407121400.s6CE0PRl020859@krisdoz.my.domain \
    --to=schwarze@mdocml.bsd.lv \
    --cc=source@mdocml.bsd.lv \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).