source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* mdocml: Next speed optimization step for the new apropos(1).
@ 2014-04-10  2:46 schwarze
  0 siblings, 0 replies; only message in thread
From: schwarze @ 2014-04-10  2:46 UTC (permalink / raw)
  To: source

Log Message:
-----------
Next speed optimization step for the new apropos(1).
Split manual names out of the common "keys" table into their 
own "names" table.  This reduces standard apropos(1) search 
times (i.e. searching for names and descriptions only) by 
typically about 70% for the full /usr/share/man database.      
(Yes, that multiplies with the previous optimization step,
so both together have reduced search times by a factor of 
more than six.  I'm not done yet, expect more to come.)

Even with the minimal databases built with makewhatis(8) -Q, 
this step still reduces search times by 15-20%.  For both cases,
database sizes and build times hardly change (+/-2%).

Modified Files:
--------------
    mdocml:
        mandocdb.c
        mansearch.c
        mansearch.h
        mansearch_const.c

Revision Data
-------------
Index: mansearch.h
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mansearch.h,v
retrieving revision 1.11
retrieving revision 1.12
diff -Lmansearch.h -Lmansearch.h -u -p -r1.11 -r1.12
--- mansearch.h
+++ mansearch.h
@@ -20,47 +20,53 @@
 
 #define	MANDOC_DB	 "mandoc.db"
 
-#define	TYPE_NAME	 0x0000000000000001ULL
-#define	TYPE_Nm		 0x0000000000000002ULL
-#define	TYPE_arch	 0x0000000000000004ULL
-#define	TYPE_sec	 0x0000000000000008ULL
-#define	TYPE_Xr		 0x0000000000000010ULL
-#define	TYPE_Ar		 0x0000000000000020ULL
-#define	TYPE_Fa		 0x0000000000000040ULL
-#define	TYPE_Fl		 0x0000000000000080ULL
-#define	TYPE_Dv		 0x0000000000000100ULL
-#define	TYPE_Fn		 0x0000000000000200ULL
-#define	TYPE_Ic		 0x0000000000000400ULL
-#define	TYPE_Pa		 0x0000000000000800ULL
-#define	TYPE_Cm		 0x0000000000001000ULL
-#define	TYPE_Li		 0x0000000000002000ULL
-#define	TYPE_Em		 0x0000000000004000ULL
-#define	TYPE_Cd		 0x0000000000008000ULL
-#define	TYPE_Va		 0x0000000000010000ULL
-#define	TYPE_Ft		 0x0000000000020000ULL
-#define	TYPE_Tn		 0x0000000000040000ULL
-#define	TYPE_Er		 0x0000000000080000ULL
-#define	TYPE_Ev		 0x0000000000100000ULL
-#define	TYPE_Sy		 0x0000000000200000ULL
-#define	TYPE_Sh		 0x0000000000400000ULL
-#define	TYPE_In		 0x0000000000800000ULL
-#define	TYPE_Ss		 0x0000000001000000ULL
-#define	TYPE_Ox		 0x0000000002000000ULL
-#define	TYPE_An		 0x0000000004000000ULL
-#define	TYPE_Mt		 0x0000000008000000ULL
-#define	TYPE_St		 0x0000000010000000ULL
-#define	TYPE_Bx		 0x0000000020000000ULL
-#define	TYPE_At		 0x0000000040000000ULL
-#define	TYPE_Nx		 0x0000000080000000ULL
-#define	TYPE_Fx		 0x0000000100000000ULL
-#define	TYPE_Lk		 0x0000000200000000ULL
-#define	TYPE_Ms		 0x0000000400000000ULL
-#define	TYPE_Bsx	 0x0000000800000000ULL
-#define	TYPE_Dx		 0x0000001000000000ULL
-#define	TYPE_Rs		 0x0000002000000000ULL
-#define	TYPE_Vt		 0x0000004000000000ULL
-#define	TYPE_Lb		 0x0000008000000000ULL
-#define	TYPE_Nd		 0x0000010000000000ULL
+#define	TYPE_arch	 0x0000000000000001ULL
+#define	TYPE_sec	 0x0000000000000002ULL
+#define	TYPE_Xr		 0x0000000000000004ULL
+#define	TYPE_Ar		 0x0000000000000008ULL
+#define	TYPE_Fa		 0x0000000000000010ULL
+#define	TYPE_Fl		 0x0000000000000020ULL
+#define	TYPE_Dv		 0x0000000000000040ULL
+#define	TYPE_Fn		 0x0000000000000080ULL
+#define	TYPE_Ic		 0x0000000000000100ULL
+#define	TYPE_Pa		 0x0000000000000200ULL
+#define	TYPE_Cm		 0x0000000000000400ULL
+#define	TYPE_Li		 0x0000000000000800ULL
+#define	TYPE_Em		 0x0000000000001000ULL
+#define	TYPE_Cd		 0x0000000000002000ULL
+#define	TYPE_Va		 0x0000000000004000ULL
+#define	TYPE_Ft		 0x0000000000008000ULL
+#define	TYPE_Tn		 0x0000000000010000ULL
+#define	TYPE_Er		 0x0000000000020000ULL
+#define	TYPE_Ev		 0x0000000000040000ULL
+#define	TYPE_Sy		 0x0000000000080000ULL
+#define	TYPE_Sh		 0x0000000000100000ULL
+#define	TYPE_In		 0x0000000000200000ULL
+#define	TYPE_Ss		 0x0000000000400000ULL
+#define	TYPE_Ox		 0x0000000000800000ULL
+#define	TYPE_An		 0x0000000001000000ULL
+#define	TYPE_Mt		 0x0000000002000000ULL
+#define	TYPE_St		 0x0000000004000000ULL
+#define	TYPE_Bx		 0x0000000008000000ULL
+#define	TYPE_At		 0x0000000010000000ULL
+#define	TYPE_Nx		 0x0000000020000000ULL
+#define	TYPE_Fx		 0x0000000040000000ULL
+#define	TYPE_Lk		 0x0000000080000000ULL
+#define	TYPE_Ms		 0x0000000100000000ULL
+#define	TYPE_Bsx	 0x0000000200000000ULL
+#define	TYPE_Dx		 0x0000000400000000ULL
+#define	TYPE_Rs		 0x0000000800000000ULL
+#define	TYPE_Vt		 0x0000001000000000ULL
+#define	TYPE_Lb		 0x0000002000000000ULL
+#define	TYPE_Nm		 0x0000004000000000ULL
+#define	TYPE_Nd		 0x0000008000000000ULL
+
+#define	NAME_SYN	 0x0000004000000001ULL
+#define	NAME_FILE	 0x0000004000000002ULL
+#define	NAME_TITLE	 0x000000400000000cULL
+#define	NAME_FIRST	 0x0000004000000008ULL
+#define	NAME_HEAD	 0x0000004000000010ULL
+#define	NAME_MASK	 0x000000000000001fULL
 
 __BEGIN_DECLS
 
Index: mansearch.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mansearch.c,v
retrieving revision 1.26
retrieving revision 1.27
diff -Lmansearch.c -Lmansearch.c -u -p -r1.26 -r1.27
--- mansearch.c
+++ mansearch.c
@@ -221,7 +221,7 @@ mansearch(const struct mansearch *search
 				SQL_BIND_BLOB(db, s, j, ep->regexp);
 			} else
 				SQL_BIND_TEXT(db, s, j, ep->substr);
-			if (0 == (TYPE_Nd & ep->bits))
+			if (0 == ((TYPE_Nd | TYPE_Nm) & ep->bits))
 				SQL_BIND_INT64(db, s, j, ep->bits);
 		}
 
@@ -504,6 +504,12 @@ sql_statement(const struct expr *e)
 		    ? (NULL == e->substr
 			? "desc REGEXP ?"
 			: "desc MATCH ?")
+		    : TYPE_Nm == e->bits
+		    ? (NULL == e->substr
+			? "id IN (SELECT pageid FROM names "
+			  "WHERE name REGEXP ?)"
+			: "id IN (SELECT pageid FROM names "
+			  "WHERE name MATCH ?)")
 		    : (NULL == e->substr
 			? "id IN (SELECT pageid FROM keys "
 			  "WHERE key REGEXP ? AND bits & ?)"
@@ -525,8 +531,9 @@ sql_statement(const struct expr *e)
 static struct expr *
 exprcomp(const struct mansearch *search, int argc, char *argv[])
 {
+	uint64_t	 mask;
 	int		 i, toopen, logic, igncase, toclose;
-	struct expr	*first, *next, *cur;
+	struct expr	*first, *prev, *cur, *next;
 
 	first = cur = NULL;
 	logic = igncase = toclose = 0;
@@ -569,6 +576,7 @@ exprcomp(const struct mansearch *search,
 			first = next;
 		else
 			cur->next = next;
+		prev = cur = next;
 
 		/*
 		 * Searching for descriptions must be split out
@@ -576,18 +584,23 @@ exprcomp(const struct mansearch *search,
 		 * not in the keys table.
 		 */
 
-		if (TYPE_Nd & next->bits && ~TYPE_Nd & next->bits) {
-			cur = mandoc_calloc(1, sizeof(struct expr));
-			memcpy(cur, next, sizeof(struct expr));
-			next->open = 1;
-			next->bits = TYPE_Nd;
-			next->next = cur;
-			cur->bits &= ~TYPE_Nd;
+		for (mask = TYPE_Nm; mask <= TYPE_Nd; mask <<= 1) {
+			if (mask & cur->bits && ~mask & cur->bits) {
+				next = mandoc_calloc(1,
+				    sizeof(struct expr));
+				memcpy(next, cur, sizeof(struct expr));
+				prev->open = 1;
+				cur->bits = mask;
+				cur->next = next;
+				cur = next;
+				cur->bits &= ~mask;
+			}
+		}
+		prev->and = (1 == logic);
+		prev->open += toopen;
+		if (cur != prev)
 			cur->close = 1;
-		} else
-			cur = next;
-		next->and = (1 == logic);
-		next->open += toopen;
+
 		toopen = logic = igncase = 0;
 	}
 	if (toopen || logic || igncase || toclose)
Index: mandocdb.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandocdb.c,v
retrieving revision 1.132
retrieving revision 1.133
diff -Lmandocdb.c -Lmandocdb.c -u -p -r1.132 -r1.133
--- mandocdb.c
+++ mandocdb.c
@@ -126,6 +126,7 @@ enum	stmt {
 	STMT_DELETE_PAGE = 0,	/* delete mpage */
 	STMT_INSERT_PAGE,	/* insert mpage */
 	STMT_INSERT_LINK,	/* insert mlink */
+	STMT_INSERT_NAME,	/* insert name */
 	STMT_INSERT_KEY,	/* insert parsed key */
 	STMT__MAX
 };
@@ -188,9 +189,11 @@ static	enum op	  	 op; /* operational mo
 static	char		 basedir[PATH_MAX]; /* current base directory */
 static	struct ohash	 mpages; /* table of distinct manual pages */
 static	struct ohash	 mlinks; /* table of directory entries */
+static	struct ohash	 names; /* table of all names */
 static	struct ohash	 strings; /* table of all strings */
 static	sqlite3		*db = NULL; /* current database */
 static	sqlite3_stmt	*stmts[STMT__MAX]; /* current statements */
+static	uint64_t	 name_mask;
 
 static	const struct mdoc_handler mdocs[MDOC_MAX] = {
 	{ NULL, 0 },  /* Ap */
@@ -963,7 +966,7 @@ mlink_check(struct mpage *mpage, struct 
 
 	/*
 	 * XXX
-	 * parse_cat() doesn't set TYPE_Nm and TYPE_NAME yet.
+	 * parse_cat() doesn't set NAME_TITLE yet.
 	 */
 
 	if (FORM_CAT == mpage->form)
@@ -974,10 +977,10 @@ mlink_check(struct mpage *mpage, struct 
 	 * appears as a name in the NAME section.
 	 */
 
-	slot = ohash_qlookup(&strings, mlink->name);
-	str = ohash_find(&strings, slot);
+	slot = ohash_qlookup(&names, mlink->name);
+	str = ohash_find(&names, slot);
 	assert(NULL != str);
-	if ( ! (TYPE_NAME & str->mask))
+	if ( ! (NAME_TITLE & str->mask))
 		say(mlink->file, "Name missing in NAME section");
 }
 
@@ -1021,6 +1024,8 @@ mpages_merge(struct mchars *mc, struct m
 			continue;
 		}
 
+		name_mask = NAME_MASK;
+		ohash_init(&names, 4, &str_info);
 		ohash_init(&strings, 6, &str_info);
 		mparse_reset(mp);
 		mdoc = NULL;
@@ -1146,12 +1151,12 @@ mpages_merge(struct mchars *mc, struct m
 				putkey(mpage, mlink->fsec, TYPE_sec);
 			putkey(mpage, '\0' == *mlink->arch ?
 			    any : mlink->arch, TYPE_arch);
-			putkey(mpage, mlink->name, TYPE_Nm);
+			putkey(mpage, mlink->name, NAME_FILE);
 		}
 
 		if (NULL != mdoc) {
 			if (NULL != (cp = mdoc_meta(mdoc)->name))
-				putkey(mpage, cp, TYPE_Nm);
+				putkey(mpage, cp, NAME_HEAD);
 			assert(NULL == mpage->desc);
 			parse_mdoc(mpage, mdoc_node(mdoc));
 			if (NULL == mpage->desc)
@@ -1187,6 +1192,7 @@ nextpage:
 			}
 		}
 		ohash_delete(&strings);
+		ohash_delete(&names);
 		mpage = ohash_next(&mpages, &pslot);
 	}
 
@@ -1204,11 +1210,11 @@ names_check(void)
 
 	sqlite3_prepare_v2(db,
 	  "SELECT name, sec, arch, key FROM ("
-	    "SELECT key, pageid FROM keys "
+	    "SELECT name AS key, pageid FROM names "
 	    "WHERE bits & ? AND NOT EXISTS ("
 	      "SELECT pageid FROM mlinks "
-	      "WHERE mlinks.pageid == keys.pageid "
-	      "AND mlinks.name == keys.key"
+	      "WHERE mlinks.pageid == names.pageid "
+	      "AND mlinks.name == names.name"
 	    ")"
 	  ") JOIN ("
 	    "SELECT * FROM mlinks GROUP BY pageid"
@@ -1216,7 +1222,7 @@ names_check(void)
 	  -1, &stmt, NULL);
 
 	i = 1;
-	SQL_BIND_INT64(stmt, i, TYPE_NAME);
+	SQL_BIND_INT64(stmt, i, NAME_TITLE);
 
 	while (SQLITE_ROW == (irc = sqlite3_step(stmt))) {
 		name = sqlite3_column_text(stmt, 0);
@@ -1441,7 +1447,7 @@ parse_man(struct mpage *mpage, const str
 				    ('\\' == start[0] && '-' == start[1]))
 					break;
 
-				putkey(mpage, start, TYPE_NAME | TYPE_Nm);
+				putkey(mpage, start, NAME_TITLE);
 
 				if (' ' == byte) {
 					start += sz + 1;
@@ -1455,7 +1461,7 @@ parse_man(struct mpage *mpage, const str
 			}
 
 			if (start == title) {
-				putkey(mpage, start, TYPE_NAME | TYPE_Nm);
+				putkey(mpage, start, NAME_TITLE);
 				free(title);
 				return;
 			}
@@ -1632,9 +1638,9 @@ parse_mdoc_Nm(struct mpage *mpage, const
 {
 
 	if (SEC_NAME == n->sec)
-		putmdockey(mpage, n->child, TYPE_NAME | TYPE_Nm);
+		putmdockey(mpage, n->child, NAME_TITLE);
 	else if (SEC_SYNOPSIS == n->sec && MDOC_HEAD == n->type)
-		putmdockey(mpage, n->child, TYPE_Nm);
+		putmdockey(mpage, n->child, NAME_SYN);
 	return(0);
 }
 
@@ -1668,28 +1674,35 @@ static void
 putkeys(const struct mpage *mpage,
 	const char *cp, size_t sz, uint64_t v)
 {
+	struct ohash	*htab;
 	struct str	*s;
 	const char	*end;
-	uint64_t	 mask;
 	unsigned int	 slot;
 	int		 i;
 
 	if (0 == sz)
 		return;
 
-	if (debug > 1) {
-		for (i = 0, mask = 1;
-		     i < mansearch_keymax;
-		     i++, mask <<= 1)
-			if (mask & v)
-				break;
-		say(mpage->mlinks->file, "Adding key %s=%*s",
-		    mansearch_keynames[i], sz, cp);
+	if (TYPE_Nm & v) {
+		htab = &names;
+		v &= name_mask;
+		name_mask &= ~NAME_FIRST;
+		if (debug > 1)
+			say(mpage->mlinks->file,
+			    "Adding name %*s", sz, cp);
+	} else {
+		htab = &strings;
+		if (debug > 1)
+		    for (i = 0; i < mansearch_keymax; i++)
+			if (1 << i & v)
+			    say(mpage->mlinks->file,
+				"Adding key %s=%*s",
+				mansearch_keynames[i], sz, cp);
 	}
 
 	end = cp + sz;
-	slot = ohash_qlookupi(&strings, cp, &end);
-	s = ohash_find(&strings, slot);
+	slot = ohash_qlookupi(htab, cp, &end);
+	s = ohash_find(htab, slot);
 
 	if (NULL != s && mpage == s->mpage) {
 		s->mask |= v;
@@ -1697,7 +1710,7 @@ putkeys(const struct mpage *mpage,
 	} else if (NULL == s) {
 		s = mandoc_calloc(sizeof(struct str) + sz + 1, 1);
 		memcpy(s->key, cp, sz);
-		ohash_insert(&strings, slot, s);
+		ohash_insert(htab, slot, s);
 	}
 	s->mpage = mpage;
 	s->mask = v;
@@ -1945,6 +1958,21 @@ dbadd(struct mpage *mpage, struct mchars
 		mlink = mlink->next;
 	}
 
+	for (key = ohash_first(&names, &slot); NULL != key;
+	     key = ohash_next(&names, &slot)) {
+		assert(key->mpage == mpage);
+		if (NULL == key->rendered)
+			render_key(mc, key);
+		i = 1;
+		SQL_BIND_INT64(stmts[STMT_INSERT_NAME], i, key->mask);
+		SQL_BIND_TEXT(stmts[STMT_INSERT_NAME], i, key->rendered);
+		SQL_BIND_INT64(stmts[STMT_INSERT_NAME], i, mpage->recno);
+		SQL_STEP(stmts[STMT_INSERT_NAME]);
+		sqlite3_reset(stmts[STMT_INSERT_NAME]);
+		if (key->rendered != key->key)
+			free(key->rendered);
+		free(key);
+	}
 	for (key = ohash_first(&strings, &slot); NULL != key;
 	     key = ohash_next(&strings, &slot)) {
 		assert(key->mpage == mpage);
@@ -2160,6 +2188,13 @@ create_tables:
 		"ON DELETE CASCADE\n"
 	      ");\n"
 	      "\n"
+	      "CREATE TABLE \"names\" (\n"
+	      " \"bits\" INTEGER NOT NULL,\n"
+	      " \"name\" TEXT NOT NULL,\n"
+	      " \"pageid\" INTEGER NOT NULL REFERENCES mpages(id) "
+		"ON DELETE CASCADE\n"
+	      ");\n"
+	      "\n"
 	      "CREATE TABLE \"keys\" (\n"
 	      " \"bits\" INTEGER NOT NULL,\n"
 	      " \"key\" TEXT NOT NULL,\n"
@@ -2185,6 +2220,9 @@ prepare_statements:
 	sql = "INSERT INTO mlinks "
 		"(sec,arch,name,pageid) VALUES (?,?,?,?)";
 	sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_LINK], NULL);
+	sql = "INSERT INTO names "
+		"(bits,name,pageid) VALUES (?,?,?)";
+	sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_NAME], NULL);
 	sql = "INSERT INTO keys "
 		"(bits,key,pageid) VALUES (?,?,?)";
 	sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_KEY], NULL);
Index: mansearch_const.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mansearch_const.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -Lmansearch_const.c -Lmansearch_const.c -u -p -r1.3 -r1.4
--- mansearch_const.c
+++ mansearch_const.c
@@ -20,13 +20,12 @@
 #include "manpath.h"
 #include "mansearch.h"
 
-const int mansearch_keymax = 41;
+const int mansearch_keymax = 40;
 
-const char *const mansearch_keynames[41] = {
-	"NAME",	"Nm",	"arch", "sec",	"Xr",	"Ar",	"Fa",	"Fl",
-	"Dv",	"Fn",	"Ic",	"Pa",	"Cm",	"Li",	"Em",	"Cd",
-	"Va",	"Ft",	"Tn",	"Er",	"Ev",	"Sy",	"Sh",	"In",
-	"Ss",	"Ox",	"An",	"Mt",	"St",	"Bx",	"At",	"Nx",
-	"Fx",	"Lk",	"Ms",	"Bsx",	"Dx",	"Rs",	"Vt",	"Lb",
-	"Nd"
+const char *const mansearch_keynames[40] = {
+	"arch", "sec",	"Xr",	"Ar",	"Fa",	"Fl",	"Dv",	"Fn",
+	"Ic",	"Pa",	"Cm",	"Li",	"Em",	"Cd",	"Va",	"Ft",
+	"Tn",	"Er",	"Ev",	"Sy",	"Sh",	"In",	"Ss",	"Ox",
+	"An",	"Mt",	"St",	"Bx",	"At",	"Nx",	"Fx",	"Lk",
+	"Ms",	"Bsx",	"Dx",	"Rs",	"Vt",	"Lb",	"Nm",	"Nd"
 };
--
 To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2014-04-10  2:46 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-04-10  2:46 mdocml: Next speed optimization step for the new apropos(1) schwarze

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).