tech@mandoc.bsd.lv
 help / color / mirror / Atom feed
From: Wesley Moore <wes@wezm.net>
To: tech@mandoc.bsd.lv
Cc: Wesley Moore <wes@wezm.net>
Subject: [PATCH 1/3] Use ohash for strtab
Date: Mon, 13 Nov 2023 11:42:43 +1000	[thread overview]
Message-ID: <20231113014330.2247710-2-wes@wezm.net> (raw)
In-Reply-To: <20231113014330.2247710-1-wes@wezm.net>

---
 roff.c     | 190 +++++++++++++++++++++++++++++++++++++++++++++--------
 roff_int.h |   6 ++
 2 files changed, 170 insertions(+), 26 deletions(-)

diff --git a/roff.c b/roff.c
index bdb0210..4e5bbf9 100644
--- a/roff.c
+++ b/roff.c
@@ -71,6 +71,14 @@ struct	roffkv {
 	struct roffkv	*next; /* next in list */
 };
 
+/*
+ * A key-value string pair used as an entry in an ohash.
+ */
+struct	roff_entry {
+	struct roffstr	 val;
+	char		 key[];
+};
+
 /*
  * A single number register as part of a singly-linked list.
  */
@@ -106,8 +114,8 @@ struct	roff {
 	int		*rstack; /* stack of inverted `ie' values */
 	struct ohash	*reqtab; /* request lookup table */
 	struct roffreg	*regtab; /* number registers */
-	struct roffkv	*strtab; /* user-defined strings & macros */
 	struct roffkv	*rentab; /* renamed strings & macros */
+	struct ohash	*strtab; /* user-defined strings & macros */
 	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
 	struct roffstr	*xtab; /* single-byte trans table (`tr') */
 	const char	*current_string; /* value of last called user macro */
@@ -204,6 +212,7 @@ static	void		 roff_expand_patch(struct buf *, int,
 static	void		 roff_free1(struct roff *);
 static	void		 roff_freereg(struct roffreg *);
 static	void		 roff_freestr(struct roffkv *);
+static	void		 roff_free_entry(struct roff_entry *entry);
 static	size_t		 roff_getname(struct roff *, char **, int, int);
 static	int		 roff_getnum(const char *, int *, int *, int);
 static	int		 roff_getop(const char *, int *, char *);
@@ -244,6 +253,8 @@ static	void		 roff_setstr(struct roff *,
 				const char *, const char *, int);
 static	void		 roff_setstrn(struct roffkv **, const char *,
 				size_t, const char *, size_t, int);
+static	void		 roff_setentry(struct ohash *r, const char *name,
+				size_t namesz, const char *string, size_t stringsz, int append);
 static	int		 roff_shift(ROFF_ARGS);
 static	int		 roff_so(ROFF_ARGS);
 static	int		 roff_tr(ROFF_ARGS);
@@ -687,6 +698,55 @@ roffhash_find(struct ohash *htab, const char *name, size_t sz)
 	return req == NULL ? TOKEN_NONE : req->tok;
 }
 
+/* --- key-value table ---------------------------------------------------- */
+
+struct ohash *
+roff_strhash_alloc(void)
+{
+	struct ohash	*htab;
+
+	htab = mandoc_malloc(sizeof(*htab));
+	mandoc_ohash_init(htab, 6, offsetof(struct roff_entry, key));
+	return htab;
+}
+
+void
+roff_strhash_free(struct ohash *htab)
+{
+	struct roff_entry	*entry;
+	unsigned int	 slot;
+
+	if (htab == NULL)
+		return;
+	for (entry = ohash_first(htab, &slot); entry != NULL;
+	     entry = ohash_next(htab, &slot))
+		roff_free_entry(entry);
+	ohash_delete(htab);
+	free(htab);
+}
+
+struct roff_entry	*
+roff_strhash_find(struct ohash *htab, const char *name, size_t sz)
+{
+	struct roff_entry	*entry;
+	const char	*end;
+
+	if (sz) {
+		end = name + sz;
+		entry = ohash_find(htab, ohash_qlookupi(htab, name, &end));
+	} else
+		entry = ohash_find(htab, ohash_qlookup(htab, name));
+	return entry;
+}
+
+void
+roff_strhash_insert(struct ohash *htab, struct roff_entry *entry) {
+	unsigned int slot;
+
+	slot = ohash_qlookup(htab, entry->key);
+	ohash_insert(htab, slot, entry);
+}
+
 /* --- stack of request blocks -------------------------------------------- */
 
 /*
@@ -757,10 +817,11 @@ roff_free1(struct roff *r)
 	roff_freereg(r->regtab);
 	r->regtab = NULL;
 
-	roff_freestr(r->strtab);
+	roff_strhash_free(r->strtab);
 	roff_freestr(r->rentab);
 	roff_freestr(r->xmbtab);
-	r->strtab = r->rentab = r->xmbtab = NULL;
+	r->strtab = NULL;
+	r->rentab = r->xmbtab = NULL;
 
 	if (r->xtab)
 		for (i = 0; i < 128; i++)
@@ -773,6 +834,7 @@ void
 roff_reset(struct roff *r)
 {
 	roff_free1(r);
+	r->strtab = roff_strhash_alloc();
 	r->options |= MPARSE_COMMENT;
 	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
 	r->control = '\0';
@@ -803,6 +865,7 @@ roff_alloc(int options)
 
 	r = mandoc_calloc(1, sizeof(struct roff));
 	r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
+	r->strtab = roff_strhash_alloc();
 	r->options = options | MPARSE_COMMENT;
 	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
 	r->mstackpos = -1;
@@ -1458,7 +1521,7 @@ roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char ec)
 			if (iendarg - iarg == 2 &&
 			    buf->buf[iarg] == '.' &&
 			    buf->buf[iarg + 1] == 'T') {
-				roff_setstrn(&r->strtab, ".T", 2, NULL, 0, 0);
+				roff_setentry(r->strtab, ".T", 2, NULL, 0, 0);
 				pos = iend;
 				continue;
 			}
@@ -2021,8 +2084,8 @@ roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
 		*pos = cp - buf;
 	else if (deftype == ROFFDEF_UNDEF) {
 		/* Using an undefined macro defines it to be empty. */
-		roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
 		roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
+		roff_setentry(r->strtab, mac, maclen, "", 0, 0);
 	}
 	return t;
 }
@@ -2189,21 +2252,21 @@ roff_block(ROFF_ARGS)
 	 */
 
 	if (tok == ROFF_de || tok == ROFF_dei) {
-		roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
+		roff_setentry(r->strtab, name, namesz, "", 0, 0);
 	} else if (tok == ROFF_am || tok == ROFF_ami) {
 		deftype = ROFFDEF_ANY;
 		value = roff_getstrn(r, iname, namesz, &deftype);
 		switch (deftype) {  /* Before appending, ... */
 		case ROFFDEF_PRE: /* copy predefined to user-defined. */
-			roff_setstrn(&r->strtab, name, namesz,
+			roff_setentry(r->strtab, name, namesz,
 			    value, strlen(value), 0);
 			break;
 		case ROFFDEF_REN: /* call original standard macro. */
 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
 			    (int)strlen(value), value);
-			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
 			roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
+			roff_setentry(r->strtab, name, namesz, call, csz, 0);
 			free(call);
 			break;
 		case ROFFDEF_STD:  /* rename and call standard macro. */
@@ -2211,7 +2274,7 @@ roff_block(ROFF_ARGS)
 			roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
 			    (int)rsz, rname);
-			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
+			roff_setentry(r->strtab, name, namesz, call, csz, 0);
 			free(call);
 			free(rname);
 			break;
@@ -2787,7 +2850,7 @@ roff_ds(ROFF_ARGS)
 		string++;
 
 	/* The rest is the value. */
-	roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
+	roff_setentry(r->strtab, name, namesz, string, strlen(string),
 	    ROFF_as == tok);
 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
 	return ROFF_IGN;
@@ -3218,8 +3281,8 @@ roff_rm(ROFF_ARGS)
 	while (*cp != '\0') {
 		name = cp;
 		namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
-		roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
+		roff_setentry(r->strtab, name, namesz, NULL, 0, 0);
 		if (name[namesz] == '\\' || name[namesz] == '\t')
 			break;
 	}
@@ -3565,8 +3628,8 @@ roff_als(ROFF_ARGS)
 
 	valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
 	    (int)oldsz, oldn);
-	roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
 	roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
+	roff_setentry(r->strtab, newn, newsz, value, valsz, 0);
 	free(value);
 	return ROFF_IGN;
 }
@@ -3849,26 +3912,26 @@ roff_rn(ROFF_ARGS)
 	value = roff_getstrn(r, oldn, oldsz, &deftype);
 	switch (deftype) {
 	case ROFFDEF_USER:
-		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
-		roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
+		roff_setentry(r->strtab, newn, newsz, value, strlen(value), 0);
+		roff_setentry(r->strtab, oldn, oldsz, NULL, 0, 0);
 		break;
 	case ROFFDEF_PRE:
-		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
+		roff_setentry(r->strtab, newn, newsz, value, strlen(value), 0);
 		break;
 	case ROFFDEF_REN:
 		roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
 		roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
-		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
+		roff_setentry(r->strtab, newn, newsz, NULL, 0, 0);
 		break;
 	case ROFFDEF_STD:
 		roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
-		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
+		roff_setentry(r->strtab, newn, newsz, NULL, 0, 0);
 		break;
 	default:
-		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
+		roff_setentry(r->strtab, newn, newsz, NULL, 0, 0);
 		break;
 	}
 	return ROFF_IGN;
@@ -4102,7 +4165,7 @@ roff_setstr(struct roff *r, const char *name, const char *string,
 	size_t	 namesz;
 
 	namesz = strlen(name);
-	roff_setstrn(&r->strtab, name, namesz, string,
+	roff_setentry(r->strtab, name, namesz, string,
 	    string ? strlen(string) : 0, append);
 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
 }
@@ -4179,25 +4242,93 @@ roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
 	n->val.sz = (int)(c - n->val.p);
 }
 
+static void
+roff_setentry(struct ohash *r, const char *name, size_t namesz,
+		const char *string, size_t stringsz, int append)
+{
+	struct roff_entry	*entry;
+	char			*c;
+	int			 i;
+	size_t			 oldch, newch;
+
+	/* Search for an existing string with the same name. */
+	entry = roff_strhash_find(r, name, namesz);
+	if (NULL == entry) {
+		/* Create a new string table entry. */
+		entry = mandoc_malloc(sizeof(*entry) + namesz + 1);
+		memcpy(entry->key, name, namesz);
+		entry->key[namesz] = '\0';
+
+		entry->val.p = NULL;
+		entry->val.sz = 0;
+		roff_strhash_insert(r, entry);
+	} else if (0 == append) {
+		/* string is already in hash table, free it in
+		 * preparation for updating.
+		 */
+		free(entry->val.p);
+		entry->val.p = NULL;
+		entry->val.sz = 0;
+	}
+
+	if (NULL == string)
+		return;
+
+	/*
+	 * One additional byte for the '\n' in multiline mode,
+	 * and one for the terminating '\0'.
+	 */
+	newch = stringsz + (1 < append ? 2u : 1u);
+
+	if (NULL == entry->val.p) {
+		entry->val.p = mandoc_malloc(newch);
+		*entry->val.p = '\0';
+		oldch = 0;
+	} else {
+		oldch = entry->val.sz;
+		entry->val.p = mandoc_realloc(entry->val.p, oldch + newch);
+	}
+
+	/* Skip existing content in the destination buffer. */
+	c = entry->val.p + (int)oldch;
+
+	/* Append new content to the destination buffer. */
+	i = 0;
+	while (i < (int)stringsz) {
+		/*
+		 * Rudimentary roff copy mode:
+		 * Handle escaped backslashes.
+		 */
+		if ('\\' == string[i] && '\\' == string[i + 1])
+			i++;
+		*c++ = string[i++];
+	}
+
+	/* Append terminating bytes. */
+	if (1 < append)
+		*c++ = '\n';
+
+	*c = '\0';
+	entry->val.sz = (int)(c - entry->val.p);
+}
+
 static const char *
 roff_getstrn(struct roff *r, const char *name, size_t len,
     int *deftype)
 {
 	const struct roffkv	*n;
 	int			 found, i;
+	const struct roff_entry	*entry;
 	enum roff_tok		 tok;
 
 	found = 0;
-	for (n = r->strtab; n != NULL; n = n->next) {
-		if (strncmp(name, n->key.p, len) != 0 ||
-		    n->key.p[len] != '\0' || n->val.p == NULL)
-			continue;
+	entry = roff_strhash_find(r->strtab, name, len);
+	if (entry != NULL && entry->val.p != NULL) {
 		if (*deftype & ROFFDEF_USER) {
 			*deftype = ROFFDEF_USER;
-			return n->val.p;
+			return entry->val.p;
 		} else {
 			found = 1;
-			break;
 		}
 	}
 	for (n = r->rentab; n != NULL; n = n->next) {
@@ -4265,8 +4396,8 @@ roff_getstrn(struct roff *r, const char *name, size_t len,
 
 		/* Using an undefined string defines it to be empty. */
 
-		roff_setstrn(&r->strtab, name, len, "", 0, 0);
 		roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
+		roff_setentry(r->strtab, name, len, "", 0, 0);
 	}
 
 	*deftype = 0;
@@ -4286,6 +4417,13 @@ roff_freestr(struct roffkv *r)
 	}
 }
 
+static void
+roff_free_entry(struct roff_entry *entry)
+{
+	free(entry->val.p);
+	free(entry);
+}
+
 /* --- accessors and utility functions ------------------------------------ */
 
 /*
diff --git a/roff_int.h b/roff_int.h
index a26afa9..b18a782 100644
--- a/roff_int.h
+++ b/roff_int.h
@@ -19,6 +19,7 @@
  */
 
 struct	ohash;
+struct	roff_entry;
 struct	roff_node;
 struct	roff_meta;
 struct	roff;
@@ -82,6 +83,11 @@ struct ohash	 *roffhash_alloc(enum roff_tok, enum roff_tok);
 enum roff_tok	  roffhash_find(struct ohash *, const char *, size_t);
 void		  roffhash_free(struct ohash *);
 
+struct ohash	 *roff_strhash_alloc(void);
+struct roff_entry*roff_strhash_find(struct ohash *htab, const char *name, size_t sz);
+void		  roff_strhash_insert(struct ohash *htab, struct roff_entry *entry);
+void		  roff_strhash_free(struct ohash *htab);
+
 enum mandoc_esc	  roff_escape(const char *, const int, const int,
 			int *, int *, int *, int *, int *);
 void		  roff_state_reset(struct roff_man *);
-- 
2.42.1

--
 To unsubscribe send an email to tech+unsubscribe@mandoc.bsd.lv


  reply	other threads:[~2023-11-13  1:44 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-13  1:42 Improve performance of makewhatis Wesley Moore
2023-11-13  1:42 ` Wesley Moore [this message]
2023-11-13  1:42 ` [PATCH 2/3] Use ohash for rentab Wesley Moore
2023-11-13  1:42 ` [PATCH 3/3] Use ohash for predefined strings Wesley Moore
2023-11-15  1:26 ` Improve performance of makewhatis Ingo Schwarze
2023-11-15  8:29   ` Wesley Moore
2023-11-15 17:36     ` Ingo Schwarze

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231113014330.2247710-2-wes@wezm.net \
    --to=wes@wezm.net \
    --cc=tech@mandoc.bsd.lv \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).