From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from krisdoz.my.domain (kristaps@localhost [127.0.0.1]) by krisdoz.my.domain (8.14.3/8.14.3) with ESMTP id pB70N6Et008222 for ; Tue, 6 Dec 2011 19:23:06 -0500 (EST) Received: (from kristaps@localhost) by krisdoz.my.domain (8.14.3/8.14.3/Submit) id pB70N4l3000576; Tue, 6 Dec 2011 19:23:04 -0500 (EST) Date: Tue, 6 Dec 2011 19:23:04 -0500 (EST) Message-Id: <201112070023.pB70N4l3000576@krisdoz.my.domain> X-Mailinglist: mdocml-source Reply-To: source@mdocml.bsd.lv MIME-Version: 1.0 From: kristaps@mdocml.bsd.lv To: source@mdocml.bsd.lv Subject: mdocml: Add cat2html functionality. X-Mailer: activitymail 1.26, http://search.cpan.org/dist/activitymail/ Content-Type: text/plain; charset=utf-8 Log Message: ----------- Add cat2html functionality. This keeps track of italic/bold mode per line and properly handles some funny troff-isms we've exposed. I originally wanted to use man2html.c (found on W3's website with no known author) but the code is dodgy. This will need some more work (links, etc.) but does a decent job thusfar. Note: I think it's better style NOT to use
, and instead have each
line employ 
afterward. This allows browsers to break the lines if necessary. This can be changed trivially (replacing the newline and pre tags with the
and new tag). Modified Files: -------------- mdocml: cgi.c Revision Data ------------- Index: cgi.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/cgi.c,v retrieving revision 1.9 retrieving revision 1.10 diff -Lcgi.c -Lcgi.c -u -p -r1.9 -r1.10 --- cgi.c +++ cgi.c @@ -69,6 +69,7 @@ static int atou(const char *, unsigned static void catman(const char *); static void format(const char *); static void html_print(const char *); +static void html_putchar(char); static int kval_decode(char *); static void kval_parse(struct kval **, size_t *, char *); static void kval_free(struct kval *, size_t); @@ -80,9 +81,8 @@ static void pg_show(const struct manpa const struct req *, char *); static void resp_bad(void); static void resp_baddb(void); -static void resp_badexpr(const struct req *); -static void resp_badmanual(void); -static void resp_badpage(void); +static void resp_error400(void); +static void resp_error404(const char *); static void resp_begin_html(int, const char *); static void resp_begin_http(int, const char *); static void resp_end_html(void); @@ -123,6 +123,29 @@ atou(const char *buf, unsigned *v) return(1); } +static void +html_putchar(char c) +{ + + switch (c) { + case ('"'): + printf(""e;"); + break; + case ('&'): + printf("&"); + break; + case ('>'): + printf(">"); + break; + case ('<'): + printf("<"); + break; + default: + putchar((unsigned char)c); + break; + } +} + /* * Print a word, escaping HTML along the way. * This will pass non-ASCII straight to output: be warned! @@ -130,29 +153,11 @@ atou(const char *buf, unsigned *v) static void html_print(const char *p) { - char c; if (NULL == p) return; - while ('\0' != *p) - switch ((c = *p++)) { - case ('"'): - printf(""e;"); - break; - case ('&'): - printf("&"); - break; - case ('>'): - printf(">"); - break; - case ('<'): - printf("<"); - break; - default: - putchar((unsigned char)c); - break; - } + html_putchar(*p++); } static void @@ -290,6 +295,8 @@ resp_begin_html(int code, const char *ms " \"http://www.w3.org/TR/html4/strict.dtd\">" "\n" "" "\n" " " "\n" + " " "\n" " System Manpage Reference" "\n" " " "\n" " " "\n" @@ -323,7 +330,7 @@ resp_searchform(const struct req *req) printf("
\n"); - puts("
" "\n" + puts("
\n" " "); printf(" Terms: Page not found.

"); + resp_begin_html(400, "Query Malformed"); + puts("

Malformed Query

\n" + "

\n" + " The query your entered was malformed.\n" + " Try again from the\n" + " main page\n" + "

"); resp_end_html(); } static void -resp_badmanual(void) +resp_error404(const char *page) { resp_begin_html(404, "Not Found"); - puts("

Requested manual not found.

"); - resp_end_html(); -} - -static void -resp_badexpr(const struct req *req) -{ - - resp_begin_html(200, NULL); - resp_searchform(req); - puts("

Your search didn't work.

"); + puts("

Page Not Found

\n" + "

\n" + " The page you're looking for, "); + printf(" "); + html_print(page); + puts(",\n" + " could not be found.\n" + " Try searching from the\n" + " main page\n" + "

"); resp_end_html(); } @@ -448,24 +459,134 @@ pg_index(const struct manpaths *ps, cons static void catman(const char *file) { - int fd; - char buf[BUFSIZ]; - ssize_t ssz; + FILE *f; + size_t len; + int i; + char *p; + int italic, bold; - if (-1 == (fd = open(file, O_RDONLY, 0))) { + if (NULL == (f = fopen(file, "r"))) { resp_baddb(); return; } - resp_begin_http(200, NULL); + resp_begin_html(200, NULL); - while ((ssz = read(fd, buf, BUFSIZ)) > 0) - write(STDOUT_FILENO, buf, (size_t)ssz); + puts("
");
+	while (NULL != (p = fgetln(f, &len))) {
+		bold = italic = 0;
+		for (i = 0; i < (int)len - 1; i++) {
+			/* 
+			 * This means that the catpage is out of state.
+			 * Ignore it and keep going (although the
+			 * catpage is bogus).
+			 */
+
+			if ('\b' == p[i] || '\n' == p[i])
+				continue;
+
+			/*
+			 * Print a regular character.
+			 * Close out any bold/italic scopes.
+			 * If we're in back-space mode, make sure we'll
+			 * have something to enter when we backspace.
+			 */
+
+			if ('\b' != p[i + 1]) {
+				if (italic)
+					printf("");
+				if (bold)
+					printf("");
+				italic = bold = 0;
+				html_putchar(p[i]);
+				continue;
+			} else if (i + 2 >= (int)len)
+				continue;
+
+			/* Italic mode. */
+
+			if ('_' == p[i]) {
+				if (bold)
+					printf("");
+				if ( ! italic)
+					printf("");
+				bold = 0;
+				italic = 1;
+				i += 2;
+				html_putchar(p[i]);
+				continue;
+			}
 
-	if (ssz < 0)
-		perror(file);
+			/* 
+			 * Handle funny behaviour troff-isms.
+			 * These grok'd from the original man2html.c.
+			 */
+
+			if (('+' == p[i] && 'o' == p[i + 2]) ||
+					('o' == p[i] && '+' == p[i + 2]) ||
+					('|' == p[i] && '=' == p[i + 2]) ||
+					('=' == p[i] && '|' == p[i + 2]) ||
+					('*' == p[i] && '=' == p[i + 2]) ||
+					('=' == p[i] && '*' == p[i + 2]) ||
+					('*' == p[i] && '|' == p[i + 2]) ||
+					('|' == p[i] && '*' == p[i + 2]))  {
+				if (italic)
+					printf("");
+				if (bold)
+					printf("");
+				italic = bold = 0;
+				putchar('*');
+				i += 2;
+				continue;
+			} else if (('|' == p[i] && '-' == p[i + 2]) ||
+					('-' == p[i] && '|' == p[i + 1]) ||
+					('+' == p[i] && '-' == p[i + 1]) ||
+					('-' == p[i] && '+' == p[i + 1]) ||
+					('+' == p[i] && '|' == p[i + 1]) ||
+					('|' == p[i] && '+' == p[i + 1]))  {
+				if (italic)
+					printf("");
+				if (bold)
+					printf("");
+				italic = bold = 0;
+				putchar('+');
+				i += 2;
+				continue;
+			}
 
-	close(fd);
+			/* Bold mode. */
+			
+			if (italic)
+				printf("");
+			if ( ! bold)
+				printf("");
+			bold = 1;
+			italic = 0;
+			i += 2;
+			html_putchar(p[i]);
+		}
+
+		/* 
+		 * Clean up the last character.
+		 * We can get to a newline; don't print that. 
+		 */
+
+		if (italic)
+			printf("");
+		if (bold)
+			printf("");
+
+		if (i == (int)len - 1 && '\n' != p[i])
+			html_putchar(p[i]);
+
+		putchar('\n');
+	}
+
+	puts("
\n" + "\n" + ""); + + fclose(f); } static void @@ -477,6 +598,7 @@ format(const char *file) struct man *man; void *vp; enum mandoclevel rc; + char opts[MAXPATHLEN + 128]; if (-1 == (fd = open(file, O_RDONLY, 0))) { resp_baddb(); @@ -492,8 +614,13 @@ format(const char *file) return; } + snprintf(opts, sizeof(opts), "style=/style.css," + "man=%s/search.html?sec=%%S&expr=%%N," + "includes=/cgi-bin/man.cgi/usr/include/%%I", + progname); + mparse_result(mp, &mdoc, &man); - vp = html_alloc(NULL); + vp = html_alloc(opts); if (NULL != mdoc) { resp_begin_http(200, NULL); @@ -520,19 +647,19 @@ pg_show(const struct manpaths *ps, const DBT key, val; if (NULL == path) { - resp_badmanual(); + resp_error400(); return; } else if (NULL == (sub = strrchr(path, '/'))) { - resp_badmanual(); + resp_error400(); return; } else *sub++ = '\0'; if ( ! (atou(path, &vol) && atou(sub, &rec))) { - resp_badmanual(); + resp_error400(); return; } else if (vol >= (unsigned int)ps->sz) { - resp_badmanual(); + resp_error400(); return; } @@ -551,7 +678,7 @@ pg_show(const struct manpaths *ps, const key.size = 4; if (0 != (rc = (*idx->get)(idx, &key, &val, 0))) { - rc < 0 ? resp_baddb() : resp_badmanual(); + rc < 0 ? resp_baddb() : resp_error400(); goto out; } @@ -639,7 +766,7 @@ pg_search(const struct manpaths *ps, con if (0 == rc) resp_baddb(); else if (-1 == rc) - resp_badexpr(req); + resp_search(NULL, 0, (void *)req); for (i = 0; i < sz; i++) free(cp[i]); @@ -729,7 +856,7 @@ main(void) pg_show(&paths, &req, subpath); break; default: - resp_badpage(); + resp_error404(path); break; } -- To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv