tech@mandoc.bsd.lv
 help / color / mirror / Atom feed
From: Ingo Schwarze <schwarze@usta.de>
To: Soeren Tempel <soeren@soeren-tempel.net>
Cc: tech@mandoc.bsd.lv
Subject: Re: Discrepancy in mansearch and fs_lookup behavior
Date: Thu, 2 Sep 2021 21:09:37 +0200	[thread overview]
Message-ID: <20210902190937.GI14065@athene.usta.de> (raw)
In-Reply-To: <20210901203523.GB15706@athene.usta.de>

Hi Soeren,

Ingo Schwarze wrote on Wed, Sep 01, 2021 at 10:35:23PM +0200:

> Now finally, fs_search().
> 
>   rm -f Test/mandoc.db
>   man -M Test open      -> POSIX; random
>   man -M Test 3p open   -> No entry; because directory does not exist
>   man -M Test 3pm open  -> No entry; because directory does not exist
> 
> I think the globbing in the second part of fs_lookup could be relaxed
> to allow "manpath/man3*/name.[01-9]*" for sec == 3*, and multiple
> paths could be added to *res.  Then the main program would do the
> usual prioritization.

See below for a patch resulting in

    man -M Test open      -> POSIX; random
    man -M Test 3p open   -> POSIX because exact match of file name extension
    man -M Test 3pm open  -> Perl because man3/open.3p does not match at all

[...]
> They cannot be equivalent: fs_search() needs to be *much* simpler
> because we cannot possibly duplicate all the mandocdb.c logic in main.c.
> That would be too much code and too slow.  Besides, main.c cannot look
> at file contents.
> 
> When it doesn't cause too much complexity, making the two more similar
> to each other may be be worthwhile for particular cases that matter
> for practical purposes, though.

This was trickier than anticipated.  When given "man -M Test 3p open",
my first try was allowing "Test/man3*/name.[01-9]*" which worked well
enough for finding the desired files, but blew up quite badly elsewhere
with lots of false positives and fallout all over the place.

To keep false positives at bay, it is definitely required that (unless
both match in the first place) either the directory name or the file
name extension match the requested section number.

What i'm proposing here is the following list of priorities,
described using the example of "man 3p open":

 1. man3p/open.3p          # double exact match
 2. cat3p/open.0           # preformatted exact match
 3. man3p/<arch>/open.3p   # architecture dependent page
 4. man3p/open.[01-9]*     # directory + name match
 5. man3/open.3p*          # name + extension match

Does the patch make sense to you and work for your use case?

Yours,
  Ingo


Index: main.c
===================================================================
RCS file: /home/cvs/mandoc/mandoc/main.c,v
retrieving revision 1.356
diff -u -p -r1.356 main.c
--- main.c	14 Aug 2021 13:53:08 -0000	1.356
+++ main.c	2 Sep 2021 18:36:49 -0000
@@ -94,9 +94,11 @@ struct	outstate {
 int			  mandocdb(int, char *[]);
 
 static	void		  check_xr(struct manpaths *);
-static	int		  fs_lookup(const struct manpaths *,
-				size_t ipath, const char *,
-				const char *, const char *,
+static	void		  fs_append(char **, size_t, int,
+				size_t, const char *, enum form,
+				struct manpage **, size_t *);
+static	int		  fs_lookup(const struct manpaths *, size_t,
+				const char *, const char *, const char *,
 				struct manpage **, size_t *);
 static	int		  fs_search(const struct mansearch *,
 				const struct manpaths *, const char *,
@@ -700,6 +702,30 @@ glob_esc(char **dst, const char *src, co
 		*(*dst)++ = *suffix++;
 }
 
+static void
+fs_append(char **file, size_t filesz, int copy, size_t ipath,
+    const char *sec, enum form form, struct manpage **res, size_t *ressz)
+{
+	struct manpage	*page;
+
+	*res = mandoc_reallocarray(*res, *ressz + filesz, sizeof(**res));
+	page = *res + *ressz;
+	*ressz += filesz;
+	for (;;) {
+		page->file = copy ? mandoc_strdup(*file) : *file;
+		page->names = NULL;
+		page->output = NULL;
+		page->bits = NAME_FILE & NAME_MASK;
+		page->ipath = ipath;
+		page->sec = (*sec >= '1' && *sec <= '9') ? *sec - '1' + 1 : 10;
+		page->form = form;
+		if (--filesz == 0)
+			break;
+		file++;
+		page++;
+	}
+}
+
 static int
 fs_lookup(const struct manpaths *paths, size_t ipath,
 	const char *sec, const char *arch, const char *name,
@@ -707,16 +733,19 @@ fs_lookup(const struct manpaths *paths, 
 {
 	struct stat	 sb;
 	glob_t		 globinfo;
-	struct manpage	*page;
-	char		*file, *cp;
+	char		*file, *cp, secnum[2];
 	int		 globres;
 	enum form	 form;
 
 	const char *const slman = "/man";
 	const char *const slash = "/";
 	const char *const sglob = ".[01-9]*";
+	const char *const dot   = ".";
+	const char *const aster = "*";
 
+	memset(&globinfo, 0, sizeof(globinfo));
 	form = FORM_SRC;
+
 	mandoc_asprintf(&file, "%s/man%s/%s.%s",
 	    paths->paths[ipath], sec, name, sec);
 	if (stat(file, &sb) != -1)
@@ -751,14 +780,34 @@ fs_lookup(const struct manpaths *paths, 
 		mandoc_msg(MANDOCERR_GLOB, 0, 0,
 		    "%s: %s", file, strerror(errno));
 	free(file);
+	file = NULL;
 	if (globres == 0)
-		file = mandoc_strdup(*globinfo.gl_pathv);
+		goto found;
 	globfree(&globinfo);
-	if (globres == 0) {
-		if (stat(file, &sb) != -1)
-			goto found;
+
+	if (sec[1] != '\0' && *ressz == 0) {
+		secnum[0] = sec[0];
+		secnum[1] = '\0';
+		cp = file = mandoc_malloc(strlen(paths->paths[ipath]) * 2 +
+		    strlen(slman) + strlen(secnum) * 2 + strlen(slash) +
+		    strlen(name) * 2 + strlen(dot) +
+		    strlen(sec) * 2 + strlen(aster) + 1);
+		glob_esc(&cp, paths->paths[ipath], slman);
+		glob_esc(&cp, secnum, slash);
+		glob_esc(&cp, name, dot);
+		glob_esc(&cp, sec, aster);
+		*cp = '\0';
+		globres = glob(file, 0, NULL, &globinfo);
+		if (globres != 0 && globres != GLOB_NOMATCH)
+			mandoc_msg(MANDOCERR_GLOB, 0, 0,
+			    "%s: %s", file, strerror(errno));
 		free(file);
+		file = NULL;
+		if (globres == 0)
+			goto found;
+		globfree(&globinfo);
 	}
+
 	if (res != NULL || ipath + 1 != paths->sz)
 		return -1;
 
@@ -770,19 +819,14 @@ fs_lookup(const struct manpaths *paths, 
 found:
 	warnx("outdated mandoc.db lacks %s(%s) entry, run %s %s",
 	    name, sec, BINM_MAKEWHATIS, paths->paths[ipath]);
-	if (res == NULL) {
+	if (res == NULL)
 		free(file);
-		return 0;
-	}
-	*res = mandoc_reallocarray(*res, ++*ressz, sizeof(**res));
-	page = *res + (*ressz - 1);
-	page->file = file;
-	page->names = NULL;
-	page->output = NULL;
-	page->bits = NAME_FILE & NAME_MASK;
-	page->ipath = ipath;
-	page->sec = (*sec >= '1' && *sec <= '9') ? *sec - '1' + 1 : 10;
-	page->form = form;
+	else if (file == NULL)
+		fs_append(globinfo.gl_pathv, globinfo.gl_pathc, 1,
+		    ipath, sec, form, res, ressz);
+	else
+		fs_append(&file, 1, 0, ipath, sec, form, res, ressz);
+	globfree(&globinfo);
 	return 0;
 }
 
--
 To unsubscribe send an email to tech+unsubscribe@mandoc.bsd.lv


  reply	other threads:[~2021-09-02 19:09 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-30 19:26 Sören Tempel
2021-09-01 20:35 ` Ingo Schwarze
2021-09-02 19:09   ` Ingo Schwarze [this message]
2021-09-04 10:12   ` Sören Tempel
2021-09-04 13:18     ` Ingo Schwarze
2021-09-04 16:16       ` Ingo Schwarze
2021-09-04 17:51         ` Sören Tempel
2021-09-05 12:47           ` Ingo Schwarze

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210902190937.GI14065@athene.usta.de \
    --to=schwarze@usta.de \
    --cc=soeren@soeren-tempel.net \
    --cc=tech@mandoc.bsd.lv \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).