source@mandoc.bsd.lv
 help / color / mirror / Atom feed
From: schwarze@mandoc.bsd.lv
To: source@mandoc.bsd.lv
Subject: mandoc: 1.
Date: Fri, 24 Nov 2023 05:02:21 +0000 (UTC)	[thread overview]
Message-ID: <d679f07a4926f7e2@mandoc.bsd.lv> (raw)

Log Message:
-----------
1. Do not put ASCII_HYPH (0x1c) into the tag file.
That happened when tagging a string containing '-' on an input text line,
most commonly in man(7) .TP next line scope.
2. Do not let "\-" end the tag.
In both cases, translate ASCII_HYPH and "\-" to plain '-' for output.
For example, this improves handling of unbound.conf(5).

These two bugs were found thanks to a posting by weerd@.

Modified Files:
--------------
    mandoc:
        TODO
        tag.c
    mandoc/regress/mdoc/Cm:
        tag.out_html
        tag.out_tag

Revision Data
-------------
Index: tag.c
===================================================================
RCS file: /home/cvs/mandoc/mandoc/tag.c,v
retrieving revision 1.37
retrieving revision 1.38
diff -Ltag.c -Ltag.c -u -p -r1.37 -r1.38
--- tag.c
+++ tag.c
@@ -1,6 +1,6 @@
 /* $Id$ */
 /*
- * Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022
+ * Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022, 2023
  *               Ingo Schwarze <schwarze@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
@@ -26,11 +26,13 @@
 #include <limits.h>
 #include <stddef.h>
 #include <stdint.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include "mandoc_aux.h"
 #include "mandoc_ohash.h"
+#include "mandoc.h"
 #include "roff.h"
 #include "mdoc.h"
 #include "roff_int.h"
@@ -88,9 +90,11 @@ tag_put(const char *s, int prio, struct 
 {
 	struct tag_entry	*entry;
 	struct roff_node	*nold;
-	const char		*se;
+	const char		*se, *src;
+	char			*cpy;
 	size_t			 len;
 	unsigned int		 slot;
+	int			 changed;
 
 	assert(prio <= TAG_FALLBACK);
 
@@ -106,6 +110,7 @@ tag_put(const char *s, int prio, struct 
 
 	/* Determine the implicit tag. */
 
+	changed = 1;
 	if (s == NULL) {
 		if (n->child == NULL || n->child->type != ROFFT_TEXT)
 			return;
@@ -122,27 +127,53 @@ tag_put(const char *s, int prio, struct 
 				s += 2;
 				break;
 			default:
-				break;
+				return;
 			}
 			break;
 		default:
+			changed = 0;
 			break;
 		}
 	}
 
 	/*
+	 * Translate \- and ASCII_HYPH to plain '-'.
 	 * Skip whitespace and escapes and whatever follows,
 	 * and if there is any, downgrade the priority.
 	 */
 
-	len = strcspn(s, " \t\\");
+	cpy = mandoc_malloc(strlen(s) + 1);
+	for (src = s, len = 0; *src != '\0'; src++, len++) {
+		switch (*src) {
+		case '\t':
+		case ' ':
+			changed = 1;
+			break;
+		case ASCII_HYPH:
+			cpy[len] = '-';
+			changed = 1;
+			continue;
+		case '\\':
+			if (src[1] != '-')
+				break;
+			src++;
+			changed = 1;
+			/* FALLTHROUGH */
+		default:
+			cpy[len] = *src;
+			continue;
+		}
+		break;
+	}
 	if (len == 0)
-		return;
+		goto out;
+	cpy[len] = '\0';
 
-	se = s + len;
-	if (*se != '\0' && prio < TAG_WEAK)
+	if (*src != '\0' && prio < TAG_WEAK)
 		prio = TAG_WEAK;
 
+	s = cpy;
+	se = cpy + len;
 	slot = ohash_qlookupi(&tag_data, s, &se);
 	entry = ohash_find(&tag_data, slot);
 
@@ -150,8 +181,7 @@ tag_put(const char *s, int prio, struct 
 
 	if (entry == NULL) {
 		entry = mandoc_malloc(sizeof(*entry) + len + 1);
-		memcpy(entry->s, s, len);
-		entry->s[len] = '\0';
+		memcpy(entry->s, s, len + 1);
 		entry->nodes = NULL;
 		entry->maxnodes = entry->nnodes = 0;
 		ohash_insert(&tag_data, slot, entry);
@@ -163,7 +193,7 @@ tag_put(const char *s, int prio, struct 
 	 */
 
 	else if (entry->prio < prio)
-		return;
+		goto out;
 
 	/*
 	 * If the existing entry is worse, clear it.
@@ -180,7 +210,7 @@ tag_put(const char *s, int prio, struct 
 		}
 		if (prio == TAG_FALLBACK) {
 			entry->prio = TAG_DELETE;
-			return;
+			goto out;
 		}
 	}
 
@@ -194,10 +224,13 @@ tag_put(const char *s, int prio, struct 
 	entry->nodes[entry->nnodes++] = n;
 	entry->prio = prio;
 	n->flags |= NODE_ID;
-	if (n->child == NULL || n->child->string != s || *se != '\0') {
+	if (changed) {
 		assert(n->tag == NULL);
 		n->tag = mandoc_strndup(s, len);
 	}
+
+ out:
+	free(cpy);
 }
 
 int
Index: TODO
===================================================================
RCS file: /home/cvs/mandoc/mandoc/TODO,v
retrieving revision 1.333
retrieving revision 1.334
diff -LTODO -LTODO -u -p -r1.333 -r1.334
--- TODO
+++ TODO
@@ -76,11 +76,6 @@ are mere guesses, and some may be wrong.
   to Nab 8 Aug 2023 20:05:32 +0200 Subject: if/ie d condition always true
   loc **  exist ***  algo ***  size **  imp *
 
-- tag.c, tag_put() should not put ASCII_HYPH into the tag file,
-  which happens when the tag contains "-" on the input side
-  weerd@ 28 Sep 2021 12:44:07 +0200
-  loc *  exist *  algo *  size *  imp ***
-
 
 ************************************************************************
 * missing features
@@ -323,12 +318,6 @@ are mere guesses, and some may be wrong.
   this may be feasible using fts_set(FTS_FOLLOW)
   mail to sternenseemann 19 Aug 2021 19:11:50 +0200
   loc *  exist **  algo **  size *  imp **
-
-- tag.c, tag_put() and callers like man_validate.c, check_tag()
-  should not mistake "\-" as a word-ending escape sequence but
-  instead translate it to plain "-" in the tag name
-  weerd@ 28 Sep 2021 12:44:07 +0200
-  loc **  exist *  algo *  size *  imp ***
 
 - handle Unicode letters in tags in both HTML and terminal output
   thread "section headers with diacritics" starting with
Index: tag.out_tag
===================================================================
RCS file: /home/cvs/mandoc/mandoc/regress/mdoc/Cm/tag.out_tag,v
retrieving revision 1.4
retrieving revision 1.5
diff -Lregress/mdoc/Cm/tag.out_tag -Lregress/mdoc/Cm/tag.out_tag -u -p -r1.4 -r1.5
--- regress/mdoc/Cm/tag.out_tag
+++ regress/mdoc/Cm/tag.out_tag
@@ -4,6 +4,6 @@ one tag.mandoc_ascii 9
 two tag.mandoc_ascii 9
 three tag.mandoc_ascii 12
 hyphen tag.mandoc_ascii 14
-minus tag.mandoc_ascii 17
+minus-sign tag.mandoc_ascii 17
 backslash tag.mandoc_ascii 20
 four tag.mandoc_ascii 22
Index: tag.out_html
===================================================================
RCS file: /home/cvs/mandoc/mandoc/regress/mdoc/Cm/tag.out_html,v
retrieving revision 1.3
retrieving revision 1.4
diff -Lregress/mdoc/Cm/tag.out_html -Lregress/mdoc/Cm/tag.out_html -u -p -r1.3 -r1.4
--- regress/mdoc/Cm/tag.out_html
+++ regress/mdoc/Cm/tag.out_html
@@ -7,7 +7,7 @@
   <dd>text</dd>
   <dt id="hyphen"><a class="permalink" href="#hyphen"><code class="Cm">-hyphen</code></a></dt>
   <dd>text</dd>
-  <dt id="minus"><a class="permalink" href="#minus"><code class="Cm">-minus-sign</code></a></dt>
+  <dt id="minus-sign"><a class="permalink" href="#minus-sign"><code class="Cm">-minus-sign</code></a></dt>
   <dd>text</dd>
   <dt id="backslash"><a class="permalink" href="#backslash"><code class="Cm">\backslash</code></a></dt>
   <dd>text</dd>
--
 To unsubscribe send an email to source+unsubscribe@mandoc.bsd.lv


             reply	other threads:[~2023-11-24  5:02 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-24  5:02 schwarze [this message]
  -- strict thread matches above, loose matches on Subject: below --
2017-07-08 14:51 schwarze

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d679f07a4926f7e2@mandoc.bsd.lv \
    --to=schwarze@mandoc.bsd.lv \
    --cc=source@mandoc.bsd.lv \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).