From: schwarze@mdocml.bsd.lv
To: source@mdocml.bsd.lv
Subject: mdocml: New implementation of complex search criteria using \(, \), -a
Date: Sat, 4 Jan 2014 18:43:53 -0500 (EST) [thread overview]
Message-ID: <201401042343.s04Nhrl1028056@krisdoz.my.domain> (raw)
Log Message:
-----------
New implementation of complex search criteria using \(, \), -a because
the old implementation got lost in the Berkeley to SQLite switch.
Note that this is not just feature creep, but required for upcoming
database format cleanup and simplification.
Modified Files:
--------------
mdocml:
mansearch.c
Revision Data
-------------
Index: mansearch.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mansearch.c,v
retrieving revision 1.12
retrieving revision 1.13
diff -Lmansearch.c -Lmansearch.c -u -p -r1.12 -r1.13
--- mansearch.c
+++ mansearch.c
@@ -1,7 +1,7 @@
/* $Id$ */
/*
* Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2013 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -62,6 +62,9 @@ struct expr {
uint64_t bits; /* type-mask */
const char *substr; /* to search for, if applicable */
regex_t regexp; /* compiled regexp, if applicable */
+ int open; /* opening parentheses before */
+ int and; /* logical AND before */
+ int close; /* closing parentheses after */
struct expr *next; /* next in sequence */
};
@@ -131,6 +134,8 @@ static struct expr *exprcomp(const struc
int, char *[]);
static void exprfree(struct expr *);
static struct expr *exprterm(const struct mansearch *, char *, int);
+static void sql_append(char **sql, size_t *sz,
+ const char *newstr, int count);
static void sql_match(sqlite3_context *context,
int argc, sqlite3_value **argv);
static void sql_regexp(sqlite3_context *context,
@@ -276,7 +281,7 @@ mansearch(const struct mansearch *search
* distribution of buckets in the table.
*/
while (SQLITE_ROW == (c = sqlite3_step(s))) {
- id = sqlite3_column_int64(s, 0);
+ id = sqlite3_column_int64(s, 5);
idx = ohash_lookup_memory
(&htab, (char *)&id,
sizeof(uint64_t), (uint32_t)id);
@@ -287,10 +292,10 @@ mansearch(const struct mansearch *search
mp = mandoc_calloc(1, sizeof(struct match));
mp->id = id;
mp->file = mandoc_strdup
- ((char *)sqlite3_column_text(s, 3));
+ ((char *)sqlite3_column_text(s, 0));
mp->desc = mandoc_strdup
- ((char *)sqlite3_column_text(s, 4));
- mp->form = sqlite3_column_int(s, 5);
+ ((char *)sqlite3_column_text(s, 3));
+ mp->form = sqlite3_column_int(s, 4);
ohash_insert(&htab, idx, mp);
}
@@ -454,55 +459,57 @@ sql_regexp(sqlite3_context *context, int
0, NULL, 0));
}
+static void
+sql_append(char **sql, size_t *sz, const char *newstr, int count)
+{
+ size_t newsz;
+
+ newsz = 1 < count ? (size_t)count : strlen(newstr);
+ *sql = mandoc_realloc(*sql, *sz + newsz + 1);
+ if (1 < count)
+ memset(*sql + *sz, *newstr, (size_t)count);
+ else
+ memcpy(*sql + *sz, newstr, newsz);
+ *sz += newsz;
+ (*sql)[*sz] = '\0';
+}
+
/*
* Prepare the search SQL statement.
- * We search for any of the words specified in our match expression.
- * We filter the per-doc AND expressions when collecting results.
*/
static char *
sql_statement(const struct expr *e, const char *arch, const char *sec)
{
char *sql;
- const char *substr = "(key MATCH ? AND bits & ?)";
- const char *regexp = "(key REGEXP ? AND bits & ?)";
- const char *andarch = "arch = ? AND ";
- const char *andsec = "sec = ? AND ";
- size_t substrsz;
- size_t regexpsz;
size_t sz;
+ int needop;
- sql = mandoc_strdup
- ("SELECT pageid,bits,key,file,desc,form,sec,arch "
- "FROM keys "
- "INNER JOIN mpages ON mpages.id=keys.pageid "
- "WHERE ");
+ sql = mandoc_strdup("SELECT * FROM mpages WHERE ");
sz = strlen(sql);
- substrsz = strlen(substr);
- regexpsz = strlen(regexp);
-
- if (NULL != arch) {
- sz += strlen(andarch) + 1;
- sql = mandoc_realloc(sql, sz);
- strlcat(sql, andarch, sz);
- }
- if (NULL != sec) {
- sz += strlen(andsec) + 1;
- sql = mandoc_realloc(sql, sz);
- strlcat(sql, andsec, sz);
- }
-
- sz += 2;
- sql = mandoc_realloc(sql, sz);
- strlcat(sql, "(", sz);
-
- for ( ; NULL != e; e = e->next) {
- sz += (NULL == e->substr ? regexpsz : substrsz) +
- (NULL == e->next ? 3 : 5);
- sql = mandoc_realloc(sql, sz);
- strlcat(sql, NULL == e->substr ? regexp : substr, sz);
- strlcat(sql, NULL == e->next ? ");" : " OR ", sz);
+ if (NULL != arch)
+ sql_append(&sql, &sz, "arch = ? AND ", 1);
+ if (NULL != sec)
+ sql_append(&sql, &sz, "sec = ? AND ", 1);
+ sql_append(&sql, &sz, "(", 1);
+
+ for (needop = 0; NULL != e; e = e->next) {
+ if (e->and)
+ sql_append(&sql, &sz, " AND ", 1);
+ else if (needop)
+ sql_append(&sql, &sz, " OR ", 1);
+ if (e->open)
+ sql_append(&sql, &sz, "(", e->open);
+ sql_append(&sql, &sz, NULL == e->substr ?
+ "id IN (SELECT pageid FROM keys "
+ "WHERE key REGEXP ? AND bits & ?)" :
+ "id IN (SELECT pageid FROM keys "
+ "WHERE key MATCH ? AND bits & ?)", 1);
+ if (e->close)
+ sql_append(&sql, &sz, ")", e->close);
+ needop = 1;
}
+ sql_append(&sql, &sz, ")", 1);
return(sql);
}
@@ -515,31 +522,60 @@ sql_statement(const struct expr *e, cons
static struct expr *
exprcomp(const struct mansearch *search, int argc, char *argv[])
{
- int i, cs;
+ int i, toopen, logic, igncase, toclose;
struct expr *first, *next, *cur;
first = cur = NULL;
+ toopen = logic = igncase = toclose = 0;
for (i = 0; i < argc; i++) {
- if (0 == strcmp("-i", argv[i])) {
- if (++i >= argc)
- return(NULL);
- cs = 0;
- } else
- cs = 1;
- next = exprterm(search, argv[i], cs);
- if (NULL == next) {
- exprfree(first);
- return(NULL);
+ if (0 == strcmp("(", argv[i])) {
+ if (igncase)
+ goto fail;
+ toopen++;
+ toclose++;
+ continue;
+ } else if (0 == strcmp(")", argv[i])) {
+ if (toopen || logic || igncase || NULL == cur)
+ goto fail;
+ cur->close++;
+ if (0 > --toclose)
+ goto fail;
+ continue;
+ } else if (0 == strcmp("-a", argv[i])) {
+ if (toopen || logic || igncase || NULL == cur)
+ goto fail;
+ logic = 1;
+ continue;
+ } else if (0 == strcmp("-o", argv[i])) {
+ if (toopen || logic || igncase || NULL == cur)
+ goto fail;
+ logic = 2;
+ continue;
+ } else if (0 == strcmp("-i", argv[i])) {
+ if (igncase)
+ goto fail;
+ igncase = 1;
+ continue;
}
+ next = exprterm(search, argv[i], !igncase);
+ if (NULL == next)
+ goto fail;
+ next->open = toopen;
+ next->and = (1 == logic);
if (NULL != first) {
cur->next = next;
cur = next;
} else
cur = first = next;
+ toopen = logic = igncase = 0;
}
-
- return(first);
+ if ( ! (toopen || logic || igncase || toclose))
+ return(first);
+fail:
+ if (NULL != first)
+ exprfree(first);
+ return(NULL);
}
static struct expr *
--
To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv
reply other threads:[~2014-01-04 23:43 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=201401042343.s04Nhrl1028056@krisdoz.my.domain \
--to=schwarze@mdocml.bsd.lv \
--cc=source@mdocml.bsd.lv \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).