From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from krisdoz.my.domain (schwarze@localhost [127.0.0.1]) by krisdoz.my.domain (8.14.5/8.14.5) with ESMTP id s04Nhrxx004436 for ; Sat, 4 Jan 2014 18:43:53 -0500 (EST) Received: (from schwarze@localhost) by krisdoz.my.domain (8.14.5/8.14.3/Submit) id s04Nhrl1028056; Sat, 4 Jan 2014 18:43:53 -0500 (EST) Date: Sat, 4 Jan 2014 18:43:53 -0500 (EST) Message-Id: <201401042343.s04Nhrl1028056@krisdoz.my.domain> X-Mailinglist: mdocml-source Reply-To: source@mdocml.bsd.lv MIME-Version: 1.0 From: schwarze@mdocml.bsd.lv To: source@mdocml.bsd.lv Subject: mdocml: New implementation of complex search criteria using \(, \), -a X-Mailer: activitymail 1.26, http://search.cpan.org/dist/activitymail/ Content-Type: text/plain; charset=utf-8 Log Message: ----------- New implementation of complex search criteria using \(, \), -a because the old implementation got lost in the Berkeley to SQLite switch. Note that this is not just feature creep, but required for upcoming database format cleanup and simplification. Modified Files: -------------- mdocml: mansearch.c Revision Data ------------- Index: mansearch.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mansearch.c,v retrieving revision 1.12 retrieving revision 1.13 diff -Lmansearch.c -Lmansearch.c -u -p -r1.12 -r1.13 --- mansearch.c +++ mansearch.c @@ -1,7 +1,7 @@ /* $Id$ */ /* * Copyright (c) 2012 Kristaps Dzonsons - * Copyright (c) 2013 Ingo Schwarze + * Copyright (c) 2013, 2014 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -62,6 +62,9 @@ struct expr { uint64_t bits; /* type-mask */ const char *substr; /* to search for, if applicable */ regex_t regexp; /* compiled regexp, if applicable */ + int open; /* opening parentheses before */ + int and; /* logical AND before */ + int close; /* closing parentheses after */ struct expr *next; /* next in sequence */ }; @@ -131,6 +134,8 @@ static struct expr *exprcomp(const struc int, char *[]); static void exprfree(struct expr *); static struct expr *exprterm(const struct mansearch *, char *, int); +static void sql_append(char **sql, size_t *sz, + const char *newstr, int count); static void sql_match(sqlite3_context *context, int argc, sqlite3_value **argv); static void sql_regexp(sqlite3_context *context, @@ -276,7 +281,7 @@ mansearch(const struct mansearch *search * distribution of buckets in the table. */ while (SQLITE_ROW == (c = sqlite3_step(s))) { - id = sqlite3_column_int64(s, 0); + id = sqlite3_column_int64(s, 5); idx = ohash_lookup_memory (&htab, (char *)&id, sizeof(uint64_t), (uint32_t)id); @@ -287,10 +292,10 @@ mansearch(const struct mansearch *search mp = mandoc_calloc(1, sizeof(struct match)); mp->id = id; mp->file = mandoc_strdup - ((char *)sqlite3_column_text(s, 3)); + ((char *)sqlite3_column_text(s, 0)); mp->desc = mandoc_strdup - ((char *)sqlite3_column_text(s, 4)); - mp->form = sqlite3_column_int(s, 5); + ((char *)sqlite3_column_text(s, 3)); + mp->form = sqlite3_column_int(s, 4); ohash_insert(&htab, idx, mp); } @@ -454,55 +459,57 @@ sql_regexp(sqlite3_context *context, int 0, NULL, 0)); } +static void +sql_append(char **sql, size_t *sz, const char *newstr, int count) +{ + size_t newsz; + + newsz = 1 < count ? (size_t)count : strlen(newstr); + *sql = mandoc_realloc(*sql, *sz + newsz + 1); + if (1 < count) + memset(*sql + *sz, *newstr, (size_t)count); + else + memcpy(*sql + *sz, newstr, newsz); + *sz += newsz; + (*sql)[*sz] = '\0'; +} + /* * Prepare the search SQL statement. - * We search for any of the words specified in our match expression. - * We filter the per-doc AND expressions when collecting results. */ static char * sql_statement(const struct expr *e, const char *arch, const char *sec) { char *sql; - const char *substr = "(key MATCH ? AND bits & ?)"; - const char *regexp = "(key REGEXP ? AND bits & ?)"; - const char *andarch = "arch = ? AND "; - const char *andsec = "sec = ? AND "; - size_t substrsz; - size_t regexpsz; size_t sz; + int needop; - sql = mandoc_strdup - ("SELECT pageid,bits,key,file,desc,form,sec,arch " - "FROM keys " - "INNER JOIN mpages ON mpages.id=keys.pageid " - "WHERE "); + sql = mandoc_strdup("SELECT * FROM mpages WHERE "); sz = strlen(sql); - substrsz = strlen(substr); - regexpsz = strlen(regexp); - - if (NULL != arch) { - sz += strlen(andarch) + 1; - sql = mandoc_realloc(sql, sz); - strlcat(sql, andarch, sz); - } - if (NULL != sec) { - sz += strlen(andsec) + 1; - sql = mandoc_realloc(sql, sz); - strlcat(sql, andsec, sz); - } - - sz += 2; - sql = mandoc_realloc(sql, sz); - strlcat(sql, "(", sz); - - for ( ; NULL != e; e = e->next) { - sz += (NULL == e->substr ? regexpsz : substrsz) + - (NULL == e->next ? 3 : 5); - sql = mandoc_realloc(sql, sz); - strlcat(sql, NULL == e->substr ? regexp : substr, sz); - strlcat(sql, NULL == e->next ? ");" : " OR ", sz); + if (NULL != arch) + sql_append(&sql, &sz, "arch = ? AND ", 1); + if (NULL != sec) + sql_append(&sql, &sz, "sec = ? AND ", 1); + sql_append(&sql, &sz, "(", 1); + + for (needop = 0; NULL != e; e = e->next) { + if (e->and) + sql_append(&sql, &sz, " AND ", 1); + else if (needop) + sql_append(&sql, &sz, " OR ", 1); + if (e->open) + sql_append(&sql, &sz, "(", e->open); + sql_append(&sql, &sz, NULL == e->substr ? + "id IN (SELECT pageid FROM keys " + "WHERE key REGEXP ? AND bits & ?)" : + "id IN (SELECT pageid FROM keys " + "WHERE key MATCH ? AND bits & ?)", 1); + if (e->close) + sql_append(&sql, &sz, ")", e->close); + needop = 1; } + sql_append(&sql, &sz, ")", 1); return(sql); } @@ -515,31 +522,60 @@ sql_statement(const struct expr *e, cons static struct expr * exprcomp(const struct mansearch *search, int argc, char *argv[]) { - int i, cs; + int i, toopen, logic, igncase, toclose; struct expr *first, *next, *cur; first = cur = NULL; + toopen = logic = igncase = toclose = 0; for (i = 0; i < argc; i++) { - if (0 == strcmp("-i", argv[i])) { - if (++i >= argc) - return(NULL); - cs = 0; - } else - cs = 1; - next = exprterm(search, argv[i], cs); - if (NULL == next) { - exprfree(first); - return(NULL); + if (0 == strcmp("(", argv[i])) { + if (igncase) + goto fail; + toopen++; + toclose++; + continue; + } else if (0 == strcmp(")", argv[i])) { + if (toopen || logic || igncase || NULL == cur) + goto fail; + cur->close++; + if (0 > --toclose) + goto fail; + continue; + } else if (0 == strcmp("-a", argv[i])) { + if (toopen || logic || igncase || NULL == cur) + goto fail; + logic = 1; + continue; + } else if (0 == strcmp("-o", argv[i])) { + if (toopen || logic || igncase || NULL == cur) + goto fail; + logic = 2; + continue; + } else if (0 == strcmp("-i", argv[i])) { + if (igncase) + goto fail; + igncase = 1; + continue; } + next = exprterm(search, argv[i], !igncase); + if (NULL == next) + goto fail; + next->open = toopen; + next->and = (1 == logic); if (NULL != first) { cur->next = next; cur = next; } else cur = first = next; + toopen = logic = igncase = 0; } - - return(first); + if ( ! (toopen || logic || igncase || toclose)) + return(first); +fail: + if (NULL != first) + exprfree(first); + return(NULL); } static struct expr * -- To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv