source@mandoc.bsd.lv
 help / color / mirror / Atom feed
From: schwarze@mandoc.bsd.lv
To: source@mandoc.bsd.lv
Subject: mandoc: Repair more of the issues that i found in filescan() while
Date: Sun, 26 Jan 2020 16:26:11 -0500 (EST)	[thread overview]
Message-ID: <ba40c633d2d5c643@mandoc.bsd.lv> (raw)

Log Message:
-----------
Repair more of the issues that i found in filescan() while investigating 
the report from <Andreas dot Kahari at abc dot se> on ports@:

For a symlink, use the first of the following names that is available:
1. In -t mode, the symlink itself (unchanged).
2. When the (unresolved) symlink already resides inside the manpath,
just strip the manpath and use the rest (unchanged).
3. When prefix(es) of the unresolved symlink point to the manpath,
strip the longest such prefix and use the rest (new); this fixes
situations where the manpath or one of its parent directories is a
symlink and at the same time contains symlinks to manual pages.
4. Fall back to the fully resolved symlink, with the manpath stripped
(new); this may for example happen when the command line passes
symlinks from outside the manpath that point to manual pages inside
the manpath, or if manual page trees contain symlinks to symlinks and
not all of them are given on the command line.

The fallback (4) isn't perfect.  You can construct symlink spaghetti
in such a way that this algorithm will not enter all manual page
names into the database that a human would be able to deduce.  But
i do not expect such spaghetti to actually occur in practice (not
even in ports), and a full fix would require re-implementing
realpath(3) in terms of step-by-step readlink(2) calls, repeating
the complicated algorithm (3) after each step.

While here, also stop using PATH_MAX as the size of a static buffer 
in filescan(); on some systems, it can be unreasonably large.
Instead, allocate path strings dynamically.

Modified Files:
--------------
    mandoc:
        mandocdb.c

Revision Data
-------------
Index: mandocdb.c
===================================================================
RCS file: /home/cvs/mandoc/mandoc/mandocdb.c,v
retrieving revision 1.265
retrieving revision 1.266
diff -Lmandocdb.c -Lmandocdb.c -u -p -r1.265 -r1.266
--- mandocdb.c
+++ mandocdb.c
@@ -778,17 +778,17 @@ treescan(void)
  * See treescan() for the fts(3) version of this.
  */
 static void
-filescan(const char *file)
+filescan(const char *infile)
 {
-	char		 buf[PATH_MAX];
 	struct stat	 st;
 	struct mlink	*mlink;
-	char		*p, *start;
+	char		*linkfile, *p, *realdir, *start, *usefile;
+	size_t		 realdir_len;
 
 	assert(use_all);
 
-	if (strncmp(file, "./", 2) == 0)
-		file += 2;
+	if (strncmp(infile, "./", 2) == 0)
+		infile += 2;
 
 	/*
 	 * We have to do lstat(2) before realpath(3) loses
@@ -797,13 +797,13 @@ filescan(const char *file)
 	 * we want to use the orginal file name, while for
 	 * regular files, we want to use the real path.
 	 */
-	if (lstat(file, &st) == -1) {
+	if (lstat(infile, &st) == -1) {
 		exitcode = (int)MANDOCLEVEL_BADARG;
-		say(file, "&lstat");
+		say(infile, "&lstat");
 		return;
 	} else if (S_ISREG(st.st_mode) == 0 && S_ISLNK(st.st_mode) == 0) {
 		exitcode = (int)MANDOCLEVEL_BADARG;
-		say(file, "Not a regular file");
+		say(infile, "Not a regular file");
 		return;
 	}
 
@@ -811,23 +811,24 @@ filescan(const char *file)
 	 * We have to resolve the file name to the real path
 	 * in any case for the base directory check.
 	 */
-	if (realpath(file, buf) == NULL) {
+	if ((usefile = realpath(infile, NULL)) == NULL) {
 		exitcode = (int)MANDOCLEVEL_BADARG;
-		say(file, "&realpath");
+		say(infile, "&realpath");
 		return;
 	}
 
 	if (op == OP_TEST)
-		start = buf;
-	else if (strncmp(buf, basedir, basedir_len) == 0)
-		start = buf + basedir_len;
+		start = usefile;
+	else if (strncmp(usefile, basedir, basedir_len) == 0)
+		start = usefile + basedir_len;
 #ifdef HOMEBREWDIR
-	else if (strncmp(buf, HOMEBREWDIR, strlen(HOMEBREWDIR)) == 0)
-		start = buf;
+	else if (strncmp(usefile, HOMEBREWDIR, strlen(HOMEBREWDIR)) == 0)
+		start = usefile;
 #endif
 	else {
 		exitcode = (int)MANDOCLEVEL_BADARG;
-		say("", "%s: outside base directory", buf);
+		say("", "%s: outside base directory", infile);
+		free(usefile);
 		return;
 	}
 
@@ -835,25 +836,72 @@ filescan(const char *file)
 	 * Now we are sure the file is inside our tree.
 	 * If it is a symbolic link, ignore the real path
 	 * and use the original name.
-	 * This implies passing stuff like "cat1/../man1/foo.1"
-	 * on the command line won't work.  So don't do that.
-	 * Note the stat(2) can still fail if the link target
-	 * doesn't exist.
 	 */
-	if (S_ISLNK(st.st_mode)) {
-		if (stat(buf, &st) == -1) {
+	do {
+		if (S_ISLNK(st.st_mode) == 0)
+			break;
+
+		/*
+		 * Some implementations of realpath(3) may succeed
+		 * even if the target of the link does not exist,
+		 * so check again for extra safety.
+		 */
+		if (stat(usefile, &st) == -1) {
 			exitcode = (int)MANDOCLEVEL_BADARG;
-			say(file, "&stat");
+			say(infile, "&stat");
+			free(usefile);
 			return;
 		}
-		if (strlcpy(buf, file, sizeof(buf)) >= sizeof(buf)) {
-			say(file, "Filename too long");
-			return;
+		linkfile = mandoc_strdup(infile);
+		if (op == OP_TEST) {
+			free(usefile);
+			start = usefile = linkfile;
+			break;
 		}
-		start = buf;
-		if (op != OP_TEST && strncmp(buf, basedir, basedir_len) == 0)
-			start += basedir_len;
-	}
+		if (strncmp(infile, basedir, basedir_len) == 0) {
+			free(usefile);
+			usefile = linkfile;
+			start = usefile + basedir_len;
+			break;
+		}
+
+		/*
+		 * This symbolic link points into the basedir
+		 * from the outside.  Let's see whether any of
+		 * the parent directories resolve to the basedir.
+		 */
+		p = strchr(linkfile, '\0');
+		do {
+			while (*--p != '/')
+				continue;
+			*p = '\0';
+			if ((realdir = realpath(linkfile, NULL)) == NULL) {
+				exitcode = (int)MANDOCLEVEL_BADARG;
+				say(infile, "&realpath");
+				free(linkfile);
+				free(usefile);
+				return;
+			}
+			realdir_len = strlen(realdir) + 1;
+			free(realdir);
+			*p = '/';
+		} while (realdir_len > basedir_len);
+
+		/*
+		 * If one of the directories resolves to the basedir,
+		 * use the rest of the original name.
+		 * Otherwise, the best we can do
+		 * is to use the filename pointed to.
+		 */
+		if (realdir_len == basedir_len) {
+			free(usefile);
+			usefile = linkfile;
+			start = p + 1;
+		} else {
+			free(linkfile);
+			start = usefile + basedir_len;
+		}
+	} while (/* CONSTCOND */ 0);
 
 	mlink = mandoc_calloc(1, sizeof(struct mlink));
 	mlink->dform = FORM_NONE;
@@ -861,6 +909,7 @@ filescan(const char *file)
 	    sizeof(mlink->file)) {
 		say(start, "Filename too long");
 		free(mlink);
+		free(usefile);
 		return;
 	}
 
@@ -869,13 +918,13 @@ filescan(const char *file)
 	 * but outside our tree, guess the base directory.
 	 */
 
-	if (op == OP_TEST || (start == buf && *start == '/')) {
-		if (strncmp(buf, "man/", 4) == 0)
-			start = buf + 4;
-		else if ((start = strstr(buf, "/man/")) != NULL)
+	if (op == OP_TEST || (start == usefile && *start == '/')) {
+		if (strncmp(usefile, "man/", 4) == 0)
+			start = usefile + 4;
+		else if ((start = strstr(usefile, "/man/")) != NULL)
 			start += 5;
 		else
-			start = buf;
+			start = usefile;
 	}
 
 	/*
@@ -925,6 +974,7 @@ filescan(const char *file)
 		*p = '\0';
 	}
 	mlink_add(mlink, &st);
+	free(usefile);
 }
 
 static void
--
 To unsubscribe send an email to source+unsubscribe@mandoc.bsd.lv

                 reply	other threads:[~2020-01-26 21:26 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ba40c633d2d5c643@mandoc.bsd.lv \
    --to=schwarze@mandoc.bsd.lv \
    --cc=source@mandoc.bsd.lv \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).