tech@mandoc.bsd.lv
 help / color / mirror / Atom feed
* Emulating apropos/man.cgi behaviour.
@ 2011-12-09  0:25 Kristaps Dzonsons
  2011-12-09  2:13 ` Ingo Schwarze
  0 siblings, 1 reply; 3+ messages in thread
From: Kristaps Dzonsons @ 2011-12-09  0:25 UTC (permalink / raw)
  To: tech

[-- Attachment #1: Type: text/plain, Size: 1518 bytes --]

Hi,

I'm trying to duplicate the behaviour of OpenBSD's apropos(1) and 
man.cgi.  I find they're somewhat inconsistent, but enclosed is a patch 
that does so.  My notes follow.

First, OpenBSD's apropos(1) accepts `-S' in that it prints manuals that 
have no architecture alongside those that have specified the same one. 
Try apropos -S amd64 intro for an example.  I'd think an architecture 
would select ONLY those manuals with that architecture, but...

Second, man.cgi outright throws away architecture and section when 
running apropos.  Er...?

In this patch, I first implement (1) in apropos_db.c, adding a note to 
whatis.1 and apropos.1.  Then for (2) in cgi.c, I detect "legacy" mode 
(if the "apropos" query key is passed in) and do the same, otherwise I 
pass the architecture and section to apropos as might be expected.  I 
print out a "legacy mode" message if applicable.  I'm afraid the patch 
has some churn in cgi.c (sorry).

Does this approach sound reasonable?

Note that my man.cgi is a bit different from OpenBSD's.  Mine has a 
"whatis" or "apropos" search type, which return the same results of the 
command-line of the same.  If, however, there's only one result, that 
result is displayed.  My reasoning was behind a personal distaste for 
how man(1) unintuitively handles multiple manuals matching the same 
query (e.g., man intro): it seems intuitive to display all matches and 
let the user decide, instead of picking the first one and hoping it 
matches PATH order.

Thoughts?

Kristaps

[-- Attachment #2: patch.txt --]
[-- Type: text/plain, Size: 15175 bytes --]

Index: apropos.1
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/apropos.1,v
retrieving revision 1.12
diff -u -r1.12 apropos.1
--- apropos.1	29 Nov 2011 10:59:19 -0000	1.12
+++ apropos.1	9 Dec 2011 00:22:32 -0000
@@ -50,7 +50,8 @@
 databases.
 Invalid paths, or paths without manual databases, are ignored.
 .It Fl S Ar arch
-Search only for a particular architecture.
+Search only a particular architecture (only applies to manuals
+specifying an architecture).
 .It Fl s Ar cat
 Search only for a manual section.
 See
Index: whatis.1
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/whatis.1,v
retrieving revision 1.3
diff -u -r1.3 whatis.1
--- whatis.1	29 Nov 2011 10:59:19 -0000	1.3
+++ whatis.1	9 Dec 2011 00:22:32 -0000
@@ -49,7 +49,8 @@
 databases.
 Invalid paths, or paths without manual databases, are ignored.
 .It Fl S Ar arch
-Search only for a particular architecture.
+Search only a particular architecture (only applies to manuals
+specifying an architecture).
 .It Fl s Ar cat
 Search only for a manual section.
 See
Index: apropos_db.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/apropos_db.c,v
retrieving revision 1.21
diff -u -r1.21 apropos_db.c
--- apropos_db.c	3 Dec 2011 18:47:09 -0000	1.21
+++ apropos_db.c	9 Dec 2011 00:22:34 -0000
@@ -541,8 +541,10 @@
 
 		if (opts->cat && strcasecmp(opts->cat, r.res.cat))
 			continue;
-		if (opts->arch && strcasecmp(opts->arch, r.res.arch))
-			continue;
+
+		if (opts->arch && *r.res.arch)
+			if (strcasecmp(opts->arch, r.res.arch))
+				continue;
 
 		tree->node = rs = mandoc_realloc
 			(rs, (tree->len + 1) * sizeof(struct rec));
Index: cgi.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/cgi.c,v
retrieving revision 1.19
diff -u -r1.19 cgi.c
--- cgi.c	8 Dec 2011 22:47:09 -0000	1.19
+++ cgi.c	9 Dec 2011 00:22:35 -0000
@@ -54,6 +54,18 @@
 	PAGE__MAX
 };
 
+/*
+ * A query as passed to the search function.
+ * See kval_query() on how this is parsed.
+ */
+struct	query {
+	const char	*arch; /* architecture */
+	const char	*sec; /* manual section */
+	const char	*expr; /* unparsed expression string */
+	int		 whatis; /* whether whatis mode */
+	int		 legacy; /* whether legacy mode */
+};
+
 struct	kval {
 	char		*key;
 	char		*val;
@@ -72,8 +84,10 @@
 static	void		 html_print(const char *);
 static	void		 html_putchar(char);
 static	int 		 kval_decode(char *);
-static	void		 kval_parse(struct kval **, size_t *, char *);
 static	void		 kval_free(struct kval *, size_t);
+static	void		 kval_parse(struct kval **, size_t *, char *);
+static	void		 kval_query(struct query *, 
+				const struct kval *, size_t);
 static	void		 pg_index(const struct manpaths *,
 				const struct req *, char *);
 static	void		 pg_search(const struct manpaths *,
@@ -102,6 +116,54 @@
 };
 
 /*
+ * Initialise and parse a query structure from input.
+ * This accomodates for mdocml's man.cgi and also for legacy man.cgi
+ * input keys ("sektion" and "apropos").
+ * Note that legacy mode has some quirks: if apropos legacy mode is
+ * detected, we unset the section and architecture string.
+ */
+static void
+kval_query(struct query *q, const struct kval *fields, size_t sz)
+{
+	int		 i, legacy;
+
+	memset(q, 0, sizeof(struct query));
+	legacy = -1;
+
+	for (i = 0; i < (int)sz; i++)
+		if (0 == strcmp(fields[i].key, "expr"))
+			q->expr = fields[i].val;
+		else if (0 == strcmp(fields[i].key, "query"))
+			q->expr = fields[i].val;
+		else if (0 == strcmp(fields[i].key, "sec"))
+			q->sec = fields[i].val;
+		else if (0 == strcmp(fields[i].key, "sektion"))
+			q->sec = fields[i].val;
+		else if (0 == strcmp(fields[i].key, "arch"))
+			q->arch = fields[i].val;
+		else if (0 == strcmp(fields[i].key, "apropos"))
+			legacy = 0 == strcmp
+				(fields[i].val, "0");
+		else if (0 == strcmp(fields[i].key, "op"))
+			q->whatis = 0 == strcasecmp
+				(fields[i].val, "whatis");
+
+	/* Compatibility mode takes precedence! */
+
+	if (legacy == 0) {
+		q->sec = q->arch = NULL;
+		q->whatis = 0;
+		q->legacy = 1;
+	} else if (legacy > 0) {
+		q->legacy = 1;
+		q->whatis = 1;
+	}
+
+	if (legacy >= 0 && NULL != q->sec && 0 == strcmp(q->sec, "0"))
+		q->sec = NULL;
+}
+
+/*
  * This is just OpenBSD's strtol(3) suggestion.
  * I use it instead of strtonum(3) for portability's sake.
  */
@@ -124,6 +186,10 @@
 	return(1);
 }
 
+/*
+ * Print a character, escaping HTML along the way.
+ * This will pass non-ASCII straight to output: be warned!
+ */
 static void
 html_putchar(char c)
 {
@@ -148,8 +214,8 @@
 }
 
 /*
- * Print a word, escaping HTML along the way.
- * This will pass non-ASCII straight to output: be warned!
+ * Call through to html_putchar().
+ * Accepts NULL strings.
  */
 static void
 html_print(const char *p)
@@ -277,9 +343,9 @@
 	if (200 != code)
 		printf("Status: %d %s\n", code, msg);
 
-	puts("Content-Type: text/html; charset=utf-8"		"\n"
-	     "Cache-Control: no-cache"				"\n"
-	     "Pragma: no-cache"					"\n"
+	puts("Content-Type: text/html; charset=utf-8\n"
+	     "Cache-Control: no-cache\n"
+	     "Pragma: no-cache\n"
 	     "");
 
 	fflush(stdout);
@@ -291,18 +357,18 @@
 
 	resp_begin_http(code, msg);
 
-	puts("<!DOCTYPE HTML PUBLIC "				"\n"
-	     " \"-//W3C//DTD HTML 4.01//EN\""			"\n"
-	     " \"http://www.w3.org/TR/html4/strict.dtd\">"	"\n"
-	     "<HTML>"						"\n"
-	     " <HEAD>"						"\n"
-	     "  <META HTTP-EQUIV=\"Content-Type\" "		"\n"
-	     "        CONTENT=\"text/html; charset=utf-8\">"	"\n"
-	     "  <LINK REL=\"stylesheet\" HREF=\"/man.cgi.css\""	"\n"
-	     "        TYPE=\"text/css\" media=\"all\">"		"\n"
-	     "  <TITLE>System Manpage Reference</TITLE>"	"\n"
-	     " </HEAD>"						"\n"
-	     " <BODY>"						"\n"
+	puts("<!DOCTYPE HTML PUBLIC "
+	     " \"-//W3C//DTD HTML 4.01//EN\""
+	     " \"http://www.w3.org/TR/html4/strict.dtd\">\n"
+	     "<HTML>\n"
+	     "<HEAD>\n"
+	     "<META HTTP-EQUIV=\"Content-Type\""
+	     " CONTENT=\"text/html; charset=utf-8\">\n"
+	     "<LINK REL=\"stylesheet\" HREF=\"/man.cgi.css\""
+	     " TYPE=\"text/css\" media=\"all\">\n"
+	     "<TITLE>System Manpage Reference</TITLE>\n"
+	     "</HEAD>\n"
+	     "<BODY>\n"
 	     "<!-- Begin page content. //-->");
 }
 
@@ -310,31 +376,16 @@
 resp_end_html(void)
 {
 
-	puts(" </BODY>\n</HTML>");
+	puts("</BODY>\n"
+	     "</HTML>");
 }
 
 static void
 resp_searchform(const struct req *req)
 {
-	int	 	 i;
-	const char	*expr, *sec, *arch;
-
-	expr = sec = arch = "";
-
-	for (i = 0; i < (int)req->fieldsz; i++)
-		if (0 == strcmp(req->fields[i].key, "expr"))
-			expr = req->fields[i].val;
-		else if (0 == strcmp(req->fields[i].key, "query"))
-			expr = req->fields[i].val;
-		else if (0 == strcmp(req->fields[i].key, "sec"))
-			sec = req->fields[i].val;
-		else if (0 == strcmp(req->fields[i].key, "sektion"))
-			sec = req->fields[i].val;
-		else if (0 == strcmp(req->fields[i].key, "arch"))
-			arch = req->fields[i].val;
+	struct query	 q;
 
-	if (NULL != sec && 0 == strcmp(sec, "0"))
-		sec = NULL;
+	kval_query(&q, req->fields, req->fieldsz);
 
 	puts("<!-- Begin search form. //-->");
 	printf("<FORM ACTION=\"");
@@ -342,25 +393,30 @@
 	printf("/search.html\" METHOD=\"get\">\n");
 	printf("<FIELDSET>\n"
 	       "<LEGEND>Search Parameters</LEGEND>\n"
-	       "<INPUT TYPE=\"submit\" NAME=\"op\" "
-	        "VALUE=\"Whatis\"> or \n"
-	       "<INPUT TYPE=\"submit\" NAME=\"op\" "
-	        "VALUE=\"apropos\"> for manuals satisfying \n"
+	       "<INPUT TYPE=\"submit\" NAME=\"op\""
+	       " VALUE=\"Whatis\"> or \n"
+	       "<INPUT TYPE=\"submit\" NAME=\"op\""
+	       " VALUE=\"apropos\"> for manuals satisfying \n"
 	       "<INPUT TYPE=\"text\" NAME=\"expr\" VALUE=\"");
-	html_print(expr);
+	html_print(q.expr ? q.expr : "");
 	printf("\">, section "
-	       "<INPUT TYPE=\"text\" "
-	        "SIZE=\"4\" NAME=\"sec\" VALUE=\"");
-	html_print(sec);
+	       "<INPUT TYPE=\"text\""
+	       " SIZE=\"4\" NAME=\"sec\" VALUE=\"");
+	html_print(q.sec ? q.sec : "");
 	printf("\">, arch "
-	       "<INPUT TYPE=\"text\" "
-	        "SIZE=\"8\" NAME=\"arch\" VALUE=\"");
-	html_print(arch);
+	       "<INPUT TYPE=\"text\""
+	       " SIZE=\"8\" NAME=\"arch\" VALUE=\"");
+	html_print(q.arch ? q.arch : "");
 	puts("\">.\n"
 	     "<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n"
 	     "</FIELDSET>\n"
-	     "</FORM>\n"
-	     "<!-- End search form. //-->");
+	     "</FORM>");
+	if (q.legacy && ! q.whatis)
+		puts("<P>\n"
+		     "Using legacy apropos search mode.\n"
+		     "(Discarding architecture and section.)\n"
+		     "</P>");
+	puts("<!-- End search form. //-->");
 }
 
 static void
@@ -379,9 +435,9 @@
 	resp_begin_html(400, "Query Malformed");
 	printf("<H1>Malformed Query</H1>\n"
 	       "<P>\n"
-	       "  The query your entered was malformed.\n"
-	       "  Try again from the\n"
-	       "  <A HREF=\"%s/index.html\">main page</A>\n"
+	       "The query your entered was malformed.\n"
+	       "Try again from the\n"
+	       "<A HREF=\"%s/index.html\">main page</A>\n"
 	       "</P>", progname);
 	resp_end_html();
 }
@@ -393,13 +449,13 @@
 	resp_begin_html(404, "Not Found");
 	puts("<H1>Page Not Found</H1>\n"
 	     "<P>\n"
-	     "  The page you're looking for, ");
-	printf("  <B>");
+	     "The page you're looking for, ");
+	printf("<B>");
 	html_print(page);
 	printf("</B>,\n"
-	       "  could not be found.\n"
-	       "  Try searching from the\n"
-	       "  <A HREF=\"%s/index.html\">main page</A>\n"
+	       "could not be found.\n"
+	       "Try searching from the\n"
+	       "<A HREF=\"%s/index.html\">main page</A>\n"
 	       "</P>", progname);
 	resp_end_html();
 }
@@ -424,13 +480,10 @@
 static void
 resp_search(struct res *r, size_t sz, void *arg)
 {
-	int		 i, whatis;
-	const char	*ep, *sec, *arch;
+	int		  i;
+	struct query	  q;
 	const struct req *req;
 
-	whatis = 1;
-	ep = sec = arch = NULL;
-
 	if (1 == sz) {
 		/*
 		 * If we have just one result, then jump there now
@@ -444,43 +497,26 @@
 		return;
 	}
 
-	req = (const struct req *)arg;
-
-	for (i = 0; i < (int)req->fieldsz; i++)
-		if (0 == strcmp(req->fields[i].key, "expr"))
-			ep = req->fields[i].val;
-		else if (0 == strcmp(req->fields[i].key, "query"))
-			ep = req->fields[i].val;
-		else if (0 == strcmp(req->fields[i].key, "sec"))
-			sec = req->fields[i].val;
-		else if (0 == strcmp(req->fields[i].key, "sektion"))
-			sec = req->fields[i].val;
-		else if (0 == strcmp(req->fields[i].key, "arch"))
-			arch = req->fields[i].val;
-		else if (0 == strcmp(req->fields[i].key, "apropos"))
-			whatis = 0 == strcmp
-				(req->fields[i].val, "0");
-		else if (0 == strcmp(req->fields[i].key, "op"))
-			whatis = 0 == strcasecmp
-				(req->fields[i].val, "whatis");
-
 	qsort(r, sz, sizeof(struct res), cmp);
 
 	resp_begin_html(200, NULL);
+
+	req = (const struct req *)arg;
 	resp_searchform(req);
+	kval_query(&q, req->fields, req->fieldsz);
 
 	if (0 == sz) {
 		puts("<P>\n"
 		     "No results found.");
-		if (whatis) {
+		if (q.whatis) {
 			printf("(Try <A HREF=\"");
 			html_print(progname);
 			printf("/search.html?op=apropos&amp;expr=");
-			html_print(ep ? ep : "");
+			html_print(q.expr ? q.expr : "");
 			printf("&amp;sec=");
-			html_print(sec ? sec : "");
+			html_print(q.sec ? q.sec : "");
 			printf("&amp;arch=");
-			html_print(arch ? arch : "");
+			html_print(q.arch ? q.arch : "");
 			puts("\">apropos</A>?)");
 		}
 		puts("</P>");
@@ -492,7 +528,9 @@
 	     "<TABLE>");
 
 	for (i = 0; i < (int)sz; i++) {
-		printf("<TR><TD CLASS=\"title\"><A HREF=\"");
+		printf("<TR>\n"
+		       "<TD CLASS=\"title\">\n"
+		       "<A HREF=\"");
 		html_print(progname);
 		printf("/show/%u/%u.html\">", r[i].volume, r[i].rec);
 		html_print(r[i].title);
@@ -502,13 +540,15 @@
 			putchar('/');
 			html_print(r[i].arch);
 		}
-		printf(")</A></TD><TD CLASS=\"desc\">");
+		printf(")</A>\n"
+		       "</TD>\n"
+		       "<TD CLASS=\"desc\">");
 		html_print(r[i].desc);
-		puts("</TD></TR>");
+		puts("</TD>\n"
+		     "</TR>");
 	}
 
 	puts("</TABLE>");
-
 	resp_end_html();
 }
 
@@ -535,21 +575,21 @@
 	}
 
 	resp_begin_http(200, NULL);
-	puts("<!DOCTYPE HTML PUBLIC "				"\n"
-	     " \"-//W3C//DTD HTML 4.01//EN\""			"\n"
-	     " \"http://www.w3.org/TR/html4/strict.dtd\">"	"\n"
-	     "<HTML>"						"\n"
-	     " <HEAD>"						"\n"
-	     "  <META HTTP-EQUIV=\"Content-Type\" "		"\n"
-	     "        CONTENT=\"text/html; charset=utf-8\">"	"\n"
-	     "  <LINK REL=\"stylesheet\" HREF=\"/catman.css\""	"\n"
-	     "        TYPE=\"text/css\" media=\"all\">"		"\n"
-	     "  <TITLE>System Manpage Reference</TITLE>"	"\n"
-	     " </HEAD>"						"\n"
-	     " <BODY>"						"\n"
-	     "<!-- Begin page content. //-->");
+	puts("<!DOCTYPE HTML PUBLIC "
+	     " \"-//W3C//DTD HTML 4.01//EN\""
+	     " \"http://www.w3.org/TR/html4/strict.dtd\">\n"
+	     "<HTML>\n"
+	     "<HEAD>\n"
+	     "<META HTTP-EQUIV=\"Content-Type\""
+	     " CONTENT=\"text/html; charset=utf-8\">\n"
+	     "<LINK REL=\"stylesheet\" HREF=\"/catman.css\""
+	     " TYPE=\"text/css\" media=\"all\">\n"
+	     "<TITLE>System Manpage Reference</TITLE>\n"
+	     "</HEAD>\n"
+	     "<BODY>\n"
+	     "<!-- Begin page content. //-->\n"
+	     "<PRE>");
 
-	puts("<PRE>");
 	while (NULL != (p = fgetln(f, &len))) {
 		bold = italic = 0;
 		for (i = 0; i < (int)len - 1; i++) {
@@ -784,40 +824,22 @@
 pg_search(const struct manpaths *ps, const struct req *req, char *path)
 {
 	size_t		  tt;
-	int		  i, sz, rc, whatis;
+	int		  i, sz, rc;
 	const char	 *ep, *start;
 	char		**cp;
 	struct opts	  opt;
 	struct expr	 *expr;
+	struct query	  q;
 
-	expr = NULL;
-	cp = NULL;
-	ep = NULL;
-	sz = 0;
-	whatis = 1;
-
+	kval_query(&q, req->fields, req->fieldsz);
 	memset(&opt, 0, sizeof(struct opts));
 
-	for (sz = i = 0; i < (int)req->fieldsz; i++)
-		if (0 == strcmp(req->fields[i].key, "expr"))
-			ep = req->fields[i].val;
-		else if (0 == strcmp(req->fields[i].key, "query"))
-			ep = req->fields[i].val;
-		else if (0 == strcmp(req->fields[i].key, "sec"))
-			opt.cat = req->fields[i].val;
-		else if (0 == strcmp(req->fields[i].key, "sektion"))
-			opt.cat = req->fields[i].val;
-		else if (0 == strcmp(req->fields[i].key, "arch"))
-			opt.arch = req->fields[i].val;
-		else if (0 == strcmp(req->fields[i].key, "apropos"))
-			whatis = 0 == strcmp
-				(req->fields[i].val, "0");
-		else if (0 == strcmp(req->fields[i].key, "op"))
-			whatis = 0 == strcasecmp
-				(req->fields[i].val, "whatis");
-
-	if (NULL != opt.cat && 0 == strcmp(opt.cat, "0"))
-		opt.cat = NULL;
+	ep 	 = q.expr;
+	opt.arch = q.arch;
+	opt.cat  = q.sec;
+	rc 	 = -1;
+	sz 	 = 0;
+	cp	 = NULL;
 
 	/*
 	 * Poor man's tokenisation.
@@ -840,15 +862,13 @@
 			ep++;
 	}
 
-	rc = -1;
-
 	/*
 	 * Pump down into apropos backend.
 	 * The resp_search() function is called with the results.
 	 */
 
-	expr = whatis ? termcomp(sz, cp, &tt) :
-		        exprcomp(sz, cp, &tt);
+	expr = q.whatis ? termcomp(sz, cp, &tt) :
+		          exprcomp(sz, cp, &tt);
 
 	if (NULL != expr)
 		rc = apropos_search

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2011-12-09 11:44 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-12-09  0:25 Emulating apropos/man.cgi behaviour Kristaps Dzonsons
2011-12-09  2:13 ` Ingo Schwarze
2011-12-09 11:44   ` Kristaps Dzonsons

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).