From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from localhost (mandoc.bsd.lv [local]) by mandoc.bsd.lv (OpenSMTPD) with ESMTPA id ceac4abb for ; Sun, 26 Jan 2020 16:26:11 -0500 (EST) Date: Sun, 26 Jan 2020 16:26:11 -0500 (EST) X-Mailinglist: mandoc-source Reply-To: source@mandoc.bsd.lv MIME-Version: 1.0 From: schwarze@mandoc.bsd.lv To: source@mandoc.bsd.lv Subject: mandoc: Repair more of the issues that i found in filescan() while X-Mailer: activitymail 1.26, http://search.cpan.org/dist/activitymail/ Content-Type: text/plain; charset=utf-8 Message-ID: Log Message: ----------- Repair more of the issues that i found in filescan() while investigating the report from on ports@: For a symlink, use the first of the following names that is available: 1. In -t mode, the symlink itself (unchanged). 2. When the (unresolved) symlink already resides inside the manpath, just strip the manpath and use the rest (unchanged). 3. When prefix(es) of the unresolved symlink point to the manpath, strip the longest such prefix and use the rest (new); this fixes situations where the manpath or one of its parent directories is a symlink and at the same time contains symlinks to manual pages. 4. Fall back to the fully resolved symlink, with the manpath stripped (new); this may for example happen when the command line passes symlinks from outside the manpath that point to manual pages inside the manpath, or if manual page trees contain symlinks to symlinks and not all of them are given on the command line. The fallback (4) isn't perfect. You can construct symlink spaghetti in such a way that this algorithm will not enter all manual page names into the database that a human would be able to deduce. But i do not expect such spaghetti to actually occur in practice (not even in ports), and a full fix would require re-implementing realpath(3) in terms of step-by-step readlink(2) calls, repeating the complicated algorithm (3) after each step. While here, also stop using PATH_MAX as the size of a static buffer in filescan(); on some systems, it can be unreasonably large. Instead, allocate path strings dynamically. Modified Files: -------------- mandoc: mandocdb.c Revision Data ------------- Index: mandocdb.c =================================================================== RCS file: /home/cvs/mandoc/mandoc/mandocdb.c,v retrieving revision 1.265 retrieving revision 1.266 diff -Lmandocdb.c -Lmandocdb.c -u -p -r1.265 -r1.266 --- mandocdb.c +++ mandocdb.c @@ -778,17 +778,17 @@ treescan(void) * See treescan() for the fts(3) version of this. */ static void -filescan(const char *file) +filescan(const char *infile) { - char buf[PATH_MAX]; struct stat st; struct mlink *mlink; - char *p, *start; + char *linkfile, *p, *realdir, *start, *usefile; + size_t realdir_len; assert(use_all); - if (strncmp(file, "./", 2) == 0) - file += 2; + if (strncmp(infile, "./", 2) == 0) + infile += 2; /* * We have to do lstat(2) before realpath(3) loses @@ -797,13 +797,13 @@ filescan(const char *file) * we want to use the orginal file name, while for * regular files, we want to use the real path. */ - if (lstat(file, &st) == -1) { + if (lstat(infile, &st) == -1) { exitcode = (int)MANDOCLEVEL_BADARG; - say(file, "&lstat"); + say(infile, "&lstat"); return; } else if (S_ISREG(st.st_mode) == 0 && S_ISLNK(st.st_mode) == 0) { exitcode = (int)MANDOCLEVEL_BADARG; - say(file, "Not a regular file"); + say(infile, "Not a regular file"); return; } @@ -811,23 +811,24 @@ filescan(const char *file) * We have to resolve the file name to the real path * in any case for the base directory check. */ - if (realpath(file, buf) == NULL) { + if ((usefile = realpath(infile, NULL)) == NULL) { exitcode = (int)MANDOCLEVEL_BADARG; - say(file, "&realpath"); + say(infile, "&realpath"); return; } if (op == OP_TEST) - start = buf; - else if (strncmp(buf, basedir, basedir_len) == 0) - start = buf + basedir_len; + start = usefile; + else if (strncmp(usefile, basedir, basedir_len) == 0) + start = usefile + basedir_len; #ifdef HOMEBREWDIR - else if (strncmp(buf, HOMEBREWDIR, strlen(HOMEBREWDIR)) == 0) - start = buf; + else if (strncmp(usefile, HOMEBREWDIR, strlen(HOMEBREWDIR)) == 0) + start = usefile; #endif else { exitcode = (int)MANDOCLEVEL_BADARG; - say("", "%s: outside base directory", buf); + say("", "%s: outside base directory", infile); + free(usefile); return; } @@ -835,25 +836,72 @@ filescan(const char *file) * Now we are sure the file is inside our tree. * If it is a symbolic link, ignore the real path * and use the original name. - * This implies passing stuff like "cat1/../man1/foo.1" - * on the command line won't work. So don't do that. - * Note the stat(2) can still fail if the link target - * doesn't exist. */ - if (S_ISLNK(st.st_mode)) { - if (stat(buf, &st) == -1) { + do { + if (S_ISLNK(st.st_mode) == 0) + break; + + /* + * Some implementations of realpath(3) may succeed + * even if the target of the link does not exist, + * so check again for extra safety. + */ + if (stat(usefile, &st) == -1) { exitcode = (int)MANDOCLEVEL_BADARG; - say(file, "&stat"); + say(infile, "&stat"); + free(usefile); return; } - if (strlcpy(buf, file, sizeof(buf)) >= sizeof(buf)) { - say(file, "Filename too long"); - return; + linkfile = mandoc_strdup(infile); + if (op == OP_TEST) { + free(usefile); + start = usefile = linkfile; + break; } - start = buf; - if (op != OP_TEST && strncmp(buf, basedir, basedir_len) == 0) - start += basedir_len; - } + if (strncmp(infile, basedir, basedir_len) == 0) { + free(usefile); + usefile = linkfile; + start = usefile + basedir_len; + break; + } + + /* + * This symbolic link points into the basedir + * from the outside. Let's see whether any of + * the parent directories resolve to the basedir. + */ + p = strchr(linkfile, '\0'); + do { + while (*--p != '/') + continue; + *p = '\0'; + if ((realdir = realpath(linkfile, NULL)) == NULL) { + exitcode = (int)MANDOCLEVEL_BADARG; + say(infile, "&realpath"); + free(linkfile); + free(usefile); + return; + } + realdir_len = strlen(realdir) + 1; + free(realdir); + *p = '/'; + } while (realdir_len > basedir_len); + + /* + * If one of the directories resolves to the basedir, + * use the rest of the original name. + * Otherwise, the best we can do + * is to use the filename pointed to. + */ + if (realdir_len == basedir_len) { + free(usefile); + usefile = linkfile; + start = p + 1; + } else { + free(linkfile); + start = usefile + basedir_len; + } + } while (/* CONSTCOND */ 0); mlink = mandoc_calloc(1, sizeof(struct mlink)); mlink->dform = FORM_NONE; @@ -861,6 +909,7 @@ filescan(const char *file) sizeof(mlink->file)) { say(start, "Filename too long"); free(mlink); + free(usefile); return; } @@ -869,13 +918,13 @@ filescan(const char *file) * but outside our tree, guess the base directory. */ - if (op == OP_TEST || (start == buf && *start == '/')) { - if (strncmp(buf, "man/", 4) == 0) - start = buf + 4; - else if ((start = strstr(buf, "/man/")) != NULL) + if (op == OP_TEST || (start == usefile && *start == '/')) { + if (strncmp(usefile, "man/", 4) == 0) + start = usefile + 4; + else if ((start = strstr(usefile, "/man/")) != NULL) start += 5; else - start = buf; + start = usefile; } /* @@ -925,6 +974,7 @@ filescan(const char *file) *p = '\0'; } mlink_add(mlink, &st); + free(usefile); } static void -- To unsubscribe send an email to source+unsubscribe@mandoc.bsd.lv