source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* mdocml: After careful gprof(1)ing of the new apropos(1), move the
@ 2014-04-09 21:50 schwarze
  0 siblings, 0 replies; only message in thread
From: schwarze @ 2014-04-09 21:50 UTC (permalink / raw)
  To: source

Log Message:
-----------
After careful gprof(1)ing of the new apropos(1), move the descriptions 
back from the keys table to the mpages table:  I found a good way 
to still use them in searches, without complication of the code.

On my notebook, this reduces typical apropos(1) search times by about 40%,
it reduces /usr/share/man database size by 6% in makewhatis(8) -Q mode
and by 2% in standard mode (less overhead storing pointers to mpages),
and it doesn't measurably change database build times (may even be 
going down by a percent or so because less data is being copied 
around in ohashes).

Modified Files:
--------------
    mdocml:
        mandocdb.c
        mansearch.c
        mansearch.h
        mansearch_const.c

Revision Data
-------------
Index: mansearch.h
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mansearch.h,v
retrieving revision 1.10
retrieving revision 1.11
diff -Lmansearch.h -Lmansearch.h -u -p -r1.10 -r1.11
--- mansearch.h
+++ mansearch.h
@@ -22,45 +22,45 @@
 
 #define	TYPE_NAME	 0x0000000000000001ULL
 #define	TYPE_Nm		 0x0000000000000002ULL
-#define	TYPE_Nd		 0x0000000000000004ULL
-#define	TYPE_arch	 0x0000000000000008ULL
-#define	TYPE_sec	 0x0000000000000010ULL
-#define	TYPE_Xr		 0x0000000000000020ULL
-#define	TYPE_Ar		 0x0000000000000040ULL
-#define	TYPE_Fa		 0x0000000000000080ULL
-#define	TYPE_Fl		 0x0000000000000100ULL
-#define	TYPE_Dv		 0x0000000000000200ULL
-#define	TYPE_Fn		 0x0000000000000400ULL
-#define	TYPE_Ic		 0x0000000000000800ULL
-#define	TYPE_Pa		 0x0000000000001000ULL
-#define	TYPE_Cm		 0x0000000000002000ULL
-#define	TYPE_Li		 0x0000000000004000ULL
-#define	TYPE_Em		 0x0000000000008000ULL
-#define	TYPE_Cd		 0x0000000000010000ULL
-#define	TYPE_Va		 0x0000000000020000ULL
-#define	TYPE_Ft		 0x0000000000040000ULL
-#define	TYPE_Tn		 0x0000000000080000ULL
-#define	TYPE_Er		 0x0000000000100000ULL
-#define	TYPE_Ev		 0x0000000000200000ULL
-#define	TYPE_Sy		 0x0000000000400000ULL
-#define	TYPE_Sh		 0x0000000000800000ULL
-#define	TYPE_In		 0x0000000001000000ULL
-#define	TYPE_Ss		 0x0000000002000000ULL
-#define	TYPE_Ox		 0x0000000004000000ULL
-#define	TYPE_An		 0x0000000008000000ULL
-#define	TYPE_Mt		 0x0000000010000000ULL
-#define	TYPE_St		 0x0000000020000000ULL
-#define	TYPE_Bx		 0x0000000040000000ULL
-#define	TYPE_At		 0x0000000080000000ULL
-#define	TYPE_Nx		 0x0000000100000000ULL
-#define	TYPE_Fx		 0x0000000200000000ULL
-#define	TYPE_Lk		 0x0000000400000000ULL
-#define	TYPE_Ms		 0x0000000800000000ULL
-#define	TYPE_Bsx	 0x0000001000000000ULL
-#define	TYPE_Dx		 0x0000002000000000ULL
-#define	TYPE_Rs		 0x0000004000000000ULL
-#define	TYPE_Vt		 0x0000008000000000ULL
-#define	TYPE_Lb		 0x0000010000000000ULL
+#define	TYPE_arch	 0x0000000000000004ULL
+#define	TYPE_sec	 0x0000000000000008ULL
+#define	TYPE_Xr		 0x0000000000000010ULL
+#define	TYPE_Ar		 0x0000000000000020ULL
+#define	TYPE_Fa		 0x0000000000000040ULL
+#define	TYPE_Fl		 0x0000000000000080ULL
+#define	TYPE_Dv		 0x0000000000000100ULL
+#define	TYPE_Fn		 0x0000000000000200ULL
+#define	TYPE_Ic		 0x0000000000000400ULL
+#define	TYPE_Pa		 0x0000000000000800ULL
+#define	TYPE_Cm		 0x0000000000001000ULL
+#define	TYPE_Li		 0x0000000000002000ULL
+#define	TYPE_Em		 0x0000000000004000ULL
+#define	TYPE_Cd		 0x0000000000008000ULL
+#define	TYPE_Va		 0x0000000000010000ULL
+#define	TYPE_Ft		 0x0000000000020000ULL
+#define	TYPE_Tn		 0x0000000000040000ULL
+#define	TYPE_Er		 0x0000000000080000ULL
+#define	TYPE_Ev		 0x0000000000100000ULL
+#define	TYPE_Sy		 0x0000000000200000ULL
+#define	TYPE_Sh		 0x0000000000400000ULL
+#define	TYPE_In		 0x0000000000800000ULL
+#define	TYPE_Ss		 0x0000000001000000ULL
+#define	TYPE_Ox		 0x0000000002000000ULL
+#define	TYPE_An		 0x0000000004000000ULL
+#define	TYPE_Mt		 0x0000000008000000ULL
+#define	TYPE_St		 0x0000000010000000ULL
+#define	TYPE_Bx		 0x0000000020000000ULL
+#define	TYPE_At		 0x0000000040000000ULL
+#define	TYPE_Nx		 0x0000000080000000ULL
+#define	TYPE_Fx		 0x0000000100000000ULL
+#define	TYPE_Lk		 0x0000000200000000ULL
+#define	TYPE_Ms		 0x0000000400000000ULL
+#define	TYPE_Bsx	 0x0000000800000000ULL
+#define	TYPE_Dx		 0x0000001000000000ULL
+#define	TYPE_Rs		 0x0000002000000000ULL
+#define	TYPE_Vt		 0x0000004000000000ULL
+#define	TYPE_Lb		 0x0000008000000000ULL
+#define	TYPE_Nd		 0x0000010000000000ULL
 
 __BEGIN_DECLS
 
Index: mansearch.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mansearch.c,v
retrieving revision 1.25
retrieving revision 1.26
diff -Lmansearch.c -Lmansearch.c -u -p -r1.25 -r1.26
--- mansearch.c
+++ mansearch.c
@@ -74,6 +74,7 @@ struct	expr {
 
 struct	match {
 	uint64_t	 id; /* identifier in database */
+	char		*desc; /* manual page description */
 	int		 form; /* 0 == catpage */
 };
 
@@ -220,7 +221,8 @@ mansearch(const struct mansearch *search
 				SQL_BIND_BLOB(db, s, j, ep->regexp);
 			} else
 				SQL_BIND_TEXT(db, s, j, ep->substr);
-			SQL_BIND_INT64(db, s, j, ep->bits);
+			if (0 == (TYPE_Nd & ep->bits))
+				SQL_BIND_INT64(db, s, j, ep->bits);
 		}
 
 		memset(&htab, 0, sizeof(struct ohash));
@@ -235,9 +237,9 @@ mansearch(const struct mansearch *search
 		 * distribution of buckets in the table.
 		 */
 		while (SQLITE_ROW == (c = sqlite3_step(s))) {
-			id = sqlite3_column_int64(s, 1);
+			id = sqlite3_column_int64(s, 2);
 			idx = ohash_lookup_memory
-				(&htab, (char *)&id, 
+				(&htab, (char *)&id,
 				 sizeof(uint64_t), (uint32_t)id);
 
 			if (NULL != ohash_find(&htab, idx))
@@ -245,7 +247,10 @@ mansearch(const struct mansearch *search
 
 			mp = mandoc_calloc(1, sizeof(struct match));
 			mp->id = id;
-			mp->form = sqlite3_column_int(s, 0);
+			mp->form = sqlite3_column_int(s, 1);
+			if (TYPE_Nd == outbit)
+				mp->desc = mandoc_strdup(
+				    sqlite3_column_text(s, 0));
 			ohash_insert(&htab, idx, mp);
 		}
 
@@ -279,7 +284,8 @@ mansearch(const struct mansearch *search
 			mpage->form = mp->form;
 			buildnames(mpage, db, s, mp->id,
 			    paths->paths[i], mp->form);
-			mpage->output = outbit ?
+			mpage->output = TYPE_Nd & outbit ?
+			    mp->desc : outbit ?
 			    buildoutput(db, s2, mp->id, outbit) : NULL;
 
 			free(mp);
@@ -493,11 +499,16 @@ sql_statement(const struct expr *e)
 			sql_append(&sql, &sz, " OR ", 1);
 		if (e->open)
 			sql_append(&sql, &sz, "(", e->open);
-		sql_append(&sql, &sz, NULL == e->substr ?
-		    "id IN (SELECT pageid FROM keys "
-		    "WHERE key REGEXP ? AND bits & ?)" :
-		    "id IN (SELECT pageid FROM keys "
-		    "WHERE key MATCH ? AND bits & ?)", 1);
+		sql_append(&sql, &sz,
+		    TYPE_Nd & e->bits
+		    ? (NULL == e->substr
+			? "desc REGEXP ?"
+			: "desc MATCH ?")
+		    : (NULL == e->substr
+			? "id IN (SELECT pageid FROM keys "
+			  "WHERE key REGEXP ? AND bits & ?)"
+			: "id IN (SELECT pageid FROM keys "
+			  "WHERE key MATCH ? AND bits & ?)"), 1);
 		if (e->close)
 			sql_append(&sql, &sz, ")", e->close);
 		needop = 1;
@@ -554,13 +565,29 @@ exprcomp(const struct mansearch *search,
 		next = exprterm(search, argv[i], !igncase);
 		if (NULL == next)
 			goto fail;
-		next->open = toopen;
-		next->and = (1 == logic);
-		if (NULL != first) {
+		if (NULL == first)
+			first = next;
+		else
 			cur->next = next;
-			cur = next;
+
+		/*
+		 * Searching for descriptions must be split out
+		 * because they are stored in the mpages table,
+		 * not in the keys table.
+		 */
+
+		if (TYPE_Nd & next->bits && ~TYPE_Nd & next->bits) {
+			cur = mandoc_calloc(1, sizeof(struct expr));
+			memcpy(cur, next, sizeof(struct expr));
+			next->open = 1;
+			next->bits = TYPE_Nd;
+			next->next = cur;
+			cur->bits &= ~TYPE_Nd;
+			cur->close = 1;
 		} else
-			cur = first = next;
+			cur = next;
+		next->and = (1 == logic);
+		next->open += toopen;
 		toopen = logic = igncase = 0;
 	}
 	if (toopen || logic || igncase || toclose)
Index: mandocdb.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandocdb.c,v
retrieving revision 1.131
retrieving revision 1.132
diff -Lmandocdb.c -Lmandocdb.c -u -p -r1.131 -r1.132
--- mandocdb.c
+++ mandocdb.c
@@ -224,7 +224,7 @@ static	const struct mdoc_handler mdocs[M
 	{ NULL, TYPE_Ic },  /* Ic */
 	{ NULL, TYPE_In },  /* In */
 	{ NULL, TYPE_Li },  /* Li */
-	{ parse_mdoc_Nd, TYPE_Nd },  /* Nd */
+	{ parse_mdoc_Nd, 0 },  /* Nd */
 	{ parse_mdoc_Nm, 0 },  /* Nm */
 	{ NULL, 0 },  /* Op */
 	{ NULL, 0 },  /* Ot */
@@ -1154,8 +1154,9 @@ mpages_merge(struct mchars *mc, struct m
 				putkey(mpage, cp, TYPE_Nm);
 			assert(NULL == mpage->desc);
 			parse_mdoc(mpage, mdoc_node(mdoc));
-			putkey(mpage, NULL != mpage->desc ?
-			    mpage->desc : mpage->mlinks->name, TYPE_Nd);
+			if (NULL == mpage->desc)
+				mpage->desc = mandoc_strdup(
+				    mpage->mlinks->name);
 		} else if (NULL != man)
 			parse_man(mpage, man_node(man));
 		else
@@ -1298,7 +1299,6 @@ parse_cat(struct mpage *mpage, int fd)
 			    "Cannot find NAME section");
 		assert(NULL == mpage->desc);
 		mpage->desc = mandoc_strdup(mpage->mlinks->name);
-		putkey(mpage, mpage->mlinks->name, TYPE_Nd);
 		fclose(stream);
 		free(title);
 		return;
@@ -1339,7 +1339,6 @@ parse_cat(struct mpage *mpage, int fd)
 
 	assert(NULL == mpage->desc);
 	mpage->desc = mandoc_strdup(p);
-	putkey(mpage, mpage->desc, TYPE_Nd);
 	fclose(stream);
 	free(title);
 }
@@ -1480,7 +1479,6 @@ parse_man(struct mpage *mpage, const str
 
 			assert(NULL == mpage->desc);
 			mpage->desc = mandoc_strdup(start);
-			putkey(mpage, mpage->desc, TYPE_Nd);
 			free(title);
 			return;
 		}
@@ -1928,16 +1926,7 @@ dbadd(struct mpage *mpage, struct mchars
 			if (NULL != mlink)
 				fputs(", ", stdout);
 		}
-		for (key = ohash_first(&strings, &slot); NULL != key;
-		     key = ohash_next(&strings, &slot)) {
-			if (TYPE_Nd & key->mask) {
-				if (NULL == key->rendered)
-					render_key(mc, key);
-				printf(" - %s", key->rendered);
-				break;
-			}
-		}
-		putchar('\n');
+		printf(" - %s\n", mpage->desc);
 		return;
 	}
 
@@ -1945,6 +1934,7 @@ dbadd(struct mpage *mpage, struct mchars
 		say(mlink->file, "Adding to database");
 
 	i = 1;
+	SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, mpage->desc);
 	SQL_BIND_INT(stmts[STMT_INSERT_PAGE], i, FORM_SRC == mpage->form);
 	SQL_STEP(stmts[STMT_INSERT_PAGE]);
 	mpage->recno = sqlite3_last_insert_rowid(db);
@@ -2157,6 +2147,7 @@ dbopen(int real)
 
 create_tables:
 	sql = "CREATE TABLE \"mpages\" (\n"
+	      " \"desc\" TEXT NOT NULL,\n"
 	      " \"form\" INTEGER NOT NULL,\n"
 	      " \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n"
 	      ");\n"
@@ -2189,7 +2180,7 @@ prepare_statements:
 		"sec=? AND arch=? AND name=?)";
 	sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_DELETE_PAGE], NULL);
 	sql = "INSERT INTO mpages "
-		"(form) VALUES (?)";
+		"(desc,form) VALUES (?,?)";
 	sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_PAGE], NULL);
 	sql = "INSERT INTO mlinks "
 		"(sec,arch,name,pageid) VALUES (?,?,?,?)";
Index: mansearch_const.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mansearch_const.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -Lmansearch_const.c -Lmansearch_const.c -u -p -r1.2 -r1.3
--- mansearch_const.c
+++ mansearch_const.c
@@ -23,10 +23,10 @@
 const int mansearch_keymax = 41;
 
 const char *const mansearch_keynames[41] = {
-	"NAME",	"Nm",	"Nd",	"arch", "sec",	"Xr",	"Ar",	"Fa",
-	"Fl",	"Dv",	"Fn",	"Ic",	"Pa",	"Cm",	"Li",	"Em",
-	"Cd",	"Va",	"Ft",	"Tn",	"Er",	"Ev",	"Sy",	"Sh",
-	"In",	"Ss",	"Ox",	"An",	"Mt",	"St",	"Bx",	"At",
-	"Nx",	"Fx",	"Lk",	"Ms",	"Bsx",	"Dx",	"Rs",	"Vt",
-	"Lb"
+	"NAME",	"Nm",	"arch", "sec",	"Xr",	"Ar",	"Fa",	"Fl",
+	"Dv",	"Fn",	"Ic",	"Pa",	"Cm",	"Li",	"Em",	"Cd",
+	"Va",	"Ft",	"Tn",	"Er",	"Ev",	"Sy",	"Sh",	"In",
+	"Ss",	"Ox",	"An",	"Mt",	"St",	"Bx",	"At",	"Nx",
+	"Fx",	"Lk",	"Ms",	"Bsx",	"Dx",	"Rs",	"Vt",	"Lb",
+	"Nd"
 };
--
 To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2014-04-09 21:50 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-04-09 21:50 mdocml: After careful gprof(1)ing of the new apropos(1), move the schwarze

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).