source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* mandoc: Repair more of the issues that i found in filescan() while
@ 2020-01-26 21:26 schwarze
  0 siblings, 0 replies; only message in thread
From: schwarze @ 2020-01-26 21:26 UTC (permalink / raw)
  To: source

Log Message:
-----------
Repair more of the issues that i found in filescan() while investigating 
the report from <Andreas dot Kahari at abc dot se> on ports@:

For a symlink, use the first of the following names that is available:
1. In -t mode, the symlink itself (unchanged).
2. When the (unresolved) symlink already resides inside the manpath,
just strip the manpath and use the rest (unchanged).
3. When prefix(es) of the unresolved symlink point to the manpath,
strip the longest such prefix and use the rest (new); this fixes
situations where the manpath or one of its parent directories is a
symlink and at the same time contains symlinks to manual pages.
4. Fall back to the fully resolved symlink, with the manpath stripped
(new); this may for example happen when the command line passes
symlinks from outside the manpath that point to manual pages inside
the manpath, or if manual page trees contain symlinks to symlinks and
not all of them are given on the command line.

The fallback (4) isn't perfect.  You can construct symlink spaghetti
in such a way that this algorithm will not enter all manual page
names into the database that a human would be able to deduce.  But
i do not expect such spaghetti to actually occur in practice (not
even in ports), and a full fix would require re-implementing
realpath(3) in terms of step-by-step readlink(2) calls, repeating
the complicated algorithm (3) after each step.

While here, also stop using PATH_MAX as the size of a static buffer 
in filescan(); on some systems, it can be unreasonably large.
Instead, allocate path strings dynamically.

Modified Files:
--------------
    mandoc:
        mandocdb.c

Revision Data
-------------
Index: mandocdb.c
===================================================================
RCS file: /home/cvs/mandoc/mandoc/mandocdb.c,v
retrieving revision 1.265
retrieving revision 1.266
diff -Lmandocdb.c -Lmandocdb.c -u -p -r1.265 -r1.266
--- mandocdb.c
+++ mandocdb.c
@@ -778,17 +778,17 @@ treescan(void)
  * See treescan() for the fts(3) version of this.
  */
 static void
-filescan(const char *file)
+filescan(const char *infile)
 {
-	char		 buf[PATH_MAX];
 	struct stat	 st;
 	struct mlink	*mlink;
-	char		*p, *start;
+	char		*linkfile, *p, *realdir, *start, *usefile;
+	size_t		 realdir_len;
 
 	assert(use_all);
 
-	if (strncmp(file, "./", 2) == 0)
-		file += 2;
+	if (strncmp(infile, "./", 2) == 0)
+		infile += 2;
 
 	/*
 	 * We have to do lstat(2) before realpath(3) loses
@@ -797,13 +797,13 @@ filescan(const char *file)
 	 * we want to use the orginal file name, while for
 	 * regular files, we want to use the real path.
 	 */
-	if (lstat(file, &st) == -1) {
+	if (lstat(infile, &st) == -1) {
 		exitcode = (int)MANDOCLEVEL_BADARG;
-		say(file, "&lstat");
+		say(infile, "&lstat");
 		return;
 	} else if (S_ISREG(st.st_mode) == 0 && S_ISLNK(st.st_mode) == 0) {
 		exitcode = (int)MANDOCLEVEL_BADARG;
-		say(file, "Not a regular file");
+		say(infile, "Not a regular file");
 		return;
 	}
 
@@ -811,23 +811,24 @@ filescan(const char *file)
 	 * We have to resolve the file name to the real path
 	 * in any case for the base directory check.
 	 */
-	if (realpath(file, buf) == NULL) {
+	if ((usefile = realpath(infile, NULL)) == NULL) {
 		exitcode = (int)MANDOCLEVEL_BADARG;
-		say(file, "&realpath");
+		say(infile, "&realpath");
 		return;
 	}
 
 	if (op == OP_TEST)
-		start = buf;
-	else if (strncmp(buf, basedir, basedir_len) == 0)
-		start = buf + basedir_len;
+		start = usefile;
+	else if (strncmp(usefile, basedir, basedir_len) == 0)
+		start = usefile + basedir_len;
 #ifdef HOMEBREWDIR
-	else if (strncmp(buf, HOMEBREWDIR, strlen(HOMEBREWDIR)) == 0)
-		start = buf;
+	else if (strncmp(usefile, HOMEBREWDIR, strlen(HOMEBREWDIR)) == 0)
+		start = usefile;
 #endif
 	else {
 		exitcode = (int)MANDOCLEVEL_BADARG;
-		say("", "%s: outside base directory", buf);
+		say("", "%s: outside base directory", infile);
+		free(usefile);
 		return;
 	}
 
@@ -835,25 +836,72 @@ filescan(const char *file)
 	 * Now we are sure the file is inside our tree.
 	 * If it is a symbolic link, ignore the real path
 	 * and use the original name.
-	 * This implies passing stuff like "cat1/../man1/foo.1"
-	 * on the command line won't work.  So don't do that.
-	 * Note the stat(2) can still fail if the link target
-	 * doesn't exist.
 	 */
-	if (S_ISLNK(st.st_mode)) {
-		if (stat(buf, &st) == -1) {
+	do {
+		if (S_ISLNK(st.st_mode) == 0)
+			break;
+
+		/*
+		 * Some implementations of realpath(3) may succeed
+		 * even if the target of the link does not exist,
+		 * so check again for extra safety.
+		 */
+		if (stat(usefile, &st) == -1) {
 			exitcode = (int)MANDOCLEVEL_BADARG;
-			say(file, "&stat");
+			say(infile, "&stat");
+			free(usefile);
 			return;
 		}
-		if (strlcpy(buf, file, sizeof(buf)) >= sizeof(buf)) {
-			say(file, "Filename too long");
-			return;
+		linkfile = mandoc_strdup(infile);
+		if (op == OP_TEST) {
+			free(usefile);
+			start = usefile = linkfile;
+			break;
 		}
-		start = buf;
-		if (op != OP_TEST && strncmp(buf, basedir, basedir_len) == 0)
-			start += basedir_len;
-	}
+		if (strncmp(infile, basedir, basedir_len) == 0) {
+			free(usefile);
+			usefile = linkfile;
+			start = usefile + basedir_len;
+			break;
+		}
+
+		/*
+		 * This symbolic link points into the basedir
+		 * from the outside.  Let's see whether any of
+		 * the parent directories resolve to the basedir.
+		 */
+		p = strchr(linkfile, '\0');
+		do {
+			while (*--p != '/')
+				continue;
+			*p = '\0';
+			if ((realdir = realpath(linkfile, NULL)) == NULL) {
+				exitcode = (int)MANDOCLEVEL_BADARG;
+				say(infile, "&realpath");
+				free(linkfile);
+				free(usefile);
+				return;
+			}
+			realdir_len = strlen(realdir) + 1;
+			free(realdir);
+			*p = '/';
+		} while (realdir_len > basedir_len);
+
+		/*
+		 * If one of the directories resolves to the basedir,
+		 * use the rest of the original name.
+		 * Otherwise, the best we can do
+		 * is to use the filename pointed to.
+		 */
+		if (realdir_len == basedir_len) {
+			free(usefile);
+			usefile = linkfile;
+			start = p + 1;
+		} else {
+			free(linkfile);
+			start = usefile + basedir_len;
+		}
+	} while (/* CONSTCOND */ 0);
 
 	mlink = mandoc_calloc(1, sizeof(struct mlink));
 	mlink->dform = FORM_NONE;
@@ -861,6 +909,7 @@ filescan(const char *file)
 	    sizeof(mlink->file)) {
 		say(start, "Filename too long");
 		free(mlink);
+		free(usefile);
 		return;
 	}
 
@@ -869,13 +918,13 @@ filescan(const char *file)
 	 * but outside our tree, guess the base directory.
 	 */
 
-	if (op == OP_TEST || (start == buf && *start == '/')) {
-		if (strncmp(buf, "man/", 4) == 0)
-			start = buf + 4;
-		else if ((start = strstr(buf, "/man/")) != NULL)
+	if (op == OP_TEST || (start == usefile && *start == '/')) {
+		if (strncmp(usefile, "man/", 4) == 0)
+			start = usefile + 4;
+		else if ((start = strstr(usefile, "/man/")) != NULL)
 			start += 5;
 		else
-			start = buf;
+			start = usefile;
 	}
 
 	/*
@@ -925,6 +974,7 @@ filescan(const char *file)
 		*p = '\0';
 	}
 	mlink_add(mlink, &st);
+	free(usefile);
 }
 
 static void
--
 To unsubscribe send an email to source+unsubscribe@mandoc.bsd.lv

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2020-01-26 21:26 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-26 21:26 mandoc: Repair more of the issues that i found in filescan() while schwarze

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).