source@mandoc.bsd.lv
 help / color / mirror / Atom feed
From: kristaps@mdocml.bsd.lv
To: source@mdocml.bsd.lv
Subject: mdocml: Use a binary tree (for now, unbalanced) for deduping the records
Date: Sun, 9 Oct 2011 06:35:12 -0400 (EDT)	[thread overview]
Message-ID: <201110091035.p99AZCp6001336@krisdoz.my.domain> (raw)

Log Message:
-----------
Use a binary tree (for now, unbalanced) for deduping the records in the
results array.  This is much faster than the previous method, a linear
search, at a small cost.  Note that array offsets are used instead of
storing the res pointer because we may realloc the results vector.

Modified Files:
--------------
    mdocml:
        apropos.c

Revision Data
-------------
Index: apropos.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/apropos.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -Lapropos.c -Lapropos.c -u -p -r1.4 -r1.5
--- apropos.c
+++ apropos.c
@@ -103,6 +103,14 @@ struct	res {
 	char		*title; /* manual section */
 	char		*uri; /* formatted uri of file */
 	recno_t		 rec; /* unique id of underlying manual */
+	/*
+	 * Maintain a binary tree for checking the uniqueness of `rec'
+	 * when adding elements to the results array.
+	 * Since the results array is dynamic, use offset in the array
+	 * instead of a pointer to the structure.
+	 */
+	int		 lhs;
+	int		 rhs;
 };
 
 struct	state {
@@ -282,7 +290,7 @@ out:
 static void
 state_search(struct state *p, const struct opts *opts, char *q)
 {
-	int		 i, len, ch, rflags, dflag;
+	int		 leaf, root, len, ch, rflags, dflag;
 	struct mchars	*mc;
 	char		*buf;
 	size_t		 bufsz;
@@ -295,6 +303,7 @@ state_search(struct state *p, const stru
 	char		 filebuf[10];
 	struct rec	 record;
 
+	root = leaf = -1;
 	res = NULL;
 	len = 0;
 	buf = NULL;
@@ -400,13 +409,20 @@ state_search(struct state *p, const stru
 		if (opts->arch && strcasecmp(opts->arch, record.arch))
 			continue;
 
-		/* FIXME: this needs to be changed.  Ugh.  Linear. */
+		/* 
+		 * Do a binary search to dedupe the results tree of the
+		 * same record: we don't print the same file.
+		 */
 
-		for (i = 0; i < len; i++)
-			if (res[i].rec == record.rec)
+		for (leaf = root; leaf >= 0; )
+			if (rec > res[leaf].rec && res[leaf].rhs >= 0)
+				leaf = res[leaf].rhs;
+			else if (rec < res[leaf].rec && res[leaf].lhs >= 0)
+				leaf = res[leaf].lhs;
+			else 
 				break;
 
-		if (i < len)
+		if (leaf >= 0 && res[leaf].rec == rec)
 			continue;
 
 		res = mandoc_realloc
@@ -424,6 +440,7 @@ state_search(struct state *p, const stru
 
 		res[len].rec = record.rec;
 		res[len].types = fl;
+		res[len].lhs = res[len].rhs = -1;
 
 		buf_dup(mc, &res[len].keyword, buf);
 		buf_dup(mc, &res[len].uri, filebuf);
@@ -431,6 +448,15 @@ state_search(struct state *p, const stru
 		buf_dup(mc, &res[len].arch, record.arch);
 		buf_dup(mc, &res[len].title, record.title);
 		buf_dup(mc, &res[len].desc, record.desc);
+
+		if (leaf >= 0) {
+			if (record.rec > res[leaf].rec)
+				res[leaf].rhs = len;
+			else
+				res[leaf].lhs = len;
+		} else
+			root = len;
+
 		len++;
 	}
 
--
 To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv

                 reply	other threads:[~2011-10-09 10:35 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=201110091035.p99AZCp6001336@krisdoz.my.domain \
    --to=kristaps@mdocml.bsd.lv \
    --cc=source@mdocml.bsd.lv \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).