source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* mdocml: Do not fork and exec gunzip(1), just link with libz instead.
@ 2015-07-19  6:05 schwarze
  0 siblings, 0 replies; only message in thread
From: schwarze @ 2015-07-19  6:05 UTC (permalink / raw)
  To: source

Log Message:
-----------
Do not fork and exec gunzip(1), just link with libz instead.
As discussed with deraadt@, that's cleaner and will help tame(2).
Something like this was also suggested earlier by bapt at FreeBSD.
Minus 50 lines of code, deleting one interface function (mparse_wait),
no functional change intended.

Modified Files:
--------------
    mdocml:
        INSTALL
        Makefile
        configure
        main.c
        mandoc.3
        mandoc.h
        mandocdb.c
        read.c

Revision Data
-------------
Index: mandocdb.c
===================================================================
RCS file: /home/cvs/mdocml/mdocml/mandocdb.c,v
retrieving revision 1.194
retrieving revision 1.195
diff -Lmandocdb.c -Lmandocdb.c -u -p -r1.194 -r1.195
--- mandocdb.c
+++ mandocdb.c
@@ -1246,10 +1246,6 @@ mpages_merge(struct mparse *mp)
 		mlink = mpage->mlinks;
 
 nextpage:
-		if (mparse_wait(mp) != MANDOCLEVEL_OK) {
-			exitcode = (int)MANDOCLEVEL_SYSERR;
-			say(mlink->file, "&wait gunzip");
-		}
 		ohash_delete(&strings);
 		ohash_delete(&names);
 		mpage = ohash_next(&mpages, &pslot);
Index: read.c
===================================================================
RCS file: /home/cvs/mdocml/mdocml/read.c,v
retrieving revision 1.139
retrieving revision 1.140
diff -Lread.c -Lread.c -u -p -r1.139 -r1.140
--- read.c
+++ read.c
@@ -23,19 +23,18 @@
 #include <sys/mman.h>
 #include <sys/stat.h>
 #endif
-#include <sys/wait.h>
 
 #include <assert.h>
 #include <ctype.h>
 #include <errno.h>
 #include <fcntl.h>
-#include <signal.h>
 #include <stdarg.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <zlib.h>
 
 #include "mandoc_aux.h"
 #include "mandoc.h"
@@ -60,10 +59,10 @@ struct	mparse {
 	enum mandoclevel  file_status; /* status of current parse */
 	enum mandoclevel  wlevel; /* ignore messages below this */
 	int		  options; /* parser options */
+	int		  gzip; /* current input file is gzipped */
 	int		  filenc; /* encoding of the current file */
 	int		  reparse_count; /* finite interp. stack */
 	int		  line; /* line number in the file */
-	pid_t		  child; /* the gunzip(1) process */
 };
 
 static	void	  choose_parser(struct mparse *);
@@ -327,7 +326,6 @@ mparse_buf_r(struct mparse *curp, struct
 	int		 of;
 	int		 lnn; /* line number in the real file */
 	int		 fd;
-	pid_t		 save_child;
 	unsigned char	 c;
 
 	memset(&ln, 0, sizeof(ln));
@@ -539,7 +537,6 @@ rerun:
 			if (curp->secondary)
 				curp->secondary->sz -= pos + 1;
 			save_file = curp->file;
-			save_child = curp->child;
 			if (mparse_open(curp, &fd, ln.buf + of) ==
 			    MANDOCLEVEL_OK) {
 				mparse_readfd(curp, fd, ln.buf + of);
@@ -557,7 +554,6 @@ rerun:
 				of = 0;
 				mparse_buf_r(curp, ln, of, 0);
 			}
-			curp->child = save_child;
 			pos = 0;
 			continue;
 		default:
@@ -611,6 +607,7 @@ static int
 read_whole_file(struct mparse *curp, const char *file, int fd,
 		struct buf *fb, int *with_mmap)
 {
+	gzFile		 gz;
 	size_t		 off;
 	ssize_t		 ssz;
 
@@ -628,7 +625,7 @@ read_whole_file(struct mparse *curp, con
 	 * concerned that this is going to tank any machines.
 	 */
 
-	if (S_ISREG(st.st_mode)) {
+	if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
 		if (st.st_size > 0x7fffffff) {
 			mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
 			return(0);
@@ -641,6 +638,14 @@ read_whole_file(struct mparse *curp, con
 	}
 #endif
 
+	if (curp->gzip) {
+		if ((gz = gzdopen(fd, "rb")) == NULL) {
+			perror(file);
+			exit((int)MANDOCLEVEL_SYSERR);
+		}
+	} else
+		gz = NULL;
+
 	/*
 	 * If this isn't a regular file (like, say, stdin), then we must
 	 * go the old way and just read things in bit by bit.
@@ -659,7 +664,9 @@ read_whole_file(struct mparse *curp, con
 			}
 			resize_buf(fb, 65536);
 		}
-		ssz = read(fd, fb->buf + (int)off, fb->sz - off);
+		ssz = curp->gzip ?
+		    gzread(gz, fb->buf + (int)off, fb->sz - off) :
+		    read(fd, fb->buf + (int)off, fb->sz - off);
 		if (ssz == 0) {
 			fb->sz = off;
 			return(1);
@@ -773,99 +780,42 @@ mparse_readfd(struct mparse *curp, int f
 	if (fd != STDIN_FILENO && close(fd) == -1)
 		perror(file);
 
-	mparse_wait(curp);
 	return(curp->file_status);
 }
 
 enum mandoclevel
 mparse_open(struct mparse *curp, int *fd, const char *file)
 {
-	int		  pfd[2];
-	int		  save_errno;
 	char		 *cp;
 
 	curp->file = file;
+	cp = strrchr(file, '.');
+	curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
 
-	/* Unless zipped, try to just open the file. */
+	/* First try to use the filename as it is. */
 
-	if ((cp = strrchr(file, '.')) == NULL ||
-	    strcmp(cp + 1, "gz")) {
-		curp->child = 0;
-		if ((*fd = open(file, O_RDONLY)) != -1)
-			return(MANDOCLEVEL_OK);
+	if ((*fd = open(file, O_RDONLY)) != -1)
+		return(MANDOCLEVEL_OK);
 
-		/* Open failed; try to append ".gz". */
+	/*
+	 * If that doesn't work and the filename doesn't
+	 * already  end in .gz, try appending .gz.
+	 */
 
+	if ( ! curp->gzip) {
 		mandoc_asprintf(&cp, "%s.gz", file);
-		file = cp;
-	} else
-		cp = NULL;
-
-	/* Before forking, make sure the file can be read. */
-
-	save_errno = errno;
-	if (access(file, R_OK) == -1) {
-		if (cp != NULL)
-			errno = save_errno;
+		*fd = open(file, O_RDONLY);
 		free(cp);
-		*fd = -1;
-		curp->child = 0;
-		mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
-		return(MANDOCLEVEL_ERROR);
-	}
-
-	/* Run gunzip(1). */
-
-	if (pipe(pfd) == -1) {
-		perror("pipe");
-		exit((int)MANDOCLEVEL_SYSERR);
-	}
-
-	switch (curp->child = fork()) {
-	case -1:
-		perror("fork");
-		exit((int)MANDOCLEVEL_SYSERR);
-	case 0:
-		close(pfd[0]);
-		if (dup2(pfd[1], STDOUT_FILENO) == -1) {
-			perror("dup");
-			exit((int)MANDOCLEVEL_SYSERR);
+		if (*fd != -1) {
+			curp->gzip = 1;
+			return(MANDOCLEVEL_OK);
 		}
-		signal(SIGPIPE, SIG_DFL);
-		execlp("gunzip", "gunzip", "-c", file, NULL);
-		perror("exec");
-		exit((int)MANDOCLEVEL_SYSERR);
-	default:
-		close(pfd[1]);
-		*fd = pfd[0];
-		return(MANDOCLEVEL_OK);
 	}
-}
 
-enum mandoclevel
-mparse_wait(struct mparse *curp)
-{
-	int	  status;
-
-	if (curp->child == 0)
-		return(MANDOCLEVEL_OK);
+	/* Neither worked, give up. */
 
-	if (waitpid(curp->child, &status, 0) == -1) {
-		perror("wait");
-		exit((int)MANDOCLEVEL_SYSERR);
-	}
-	curp->child = 0;
-	if (WIFSIGNALED(status)) {
-		mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
-		    "gunzip died from signal %d", WTERMSIG(status));
-		return(MANDOCLEVEL_ERROR);
-	}
-	if (WEXITSTATUS(status)) {
-		mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
-		    "gunzip failed with code %d", WEXITSTATUS(status));
-		return(MANDOCLEVEL_ERROR);
-	}
-	return(MANDOCLEVEL_OK);
+	mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
+	return(MANDOCLEVEL_ERROR);
 }
 
 struct mparse *
Index: mandoc.h
===================================================================
RCS file: /home/cvs/mdocml/mdocml/mandoc.h,v
retrieving revision 1.203
retrieving revision 1.204
diff -Lmandoc.h -Lmandoc.h -u -p -r1.203 -r1.204
--- mandoc.h
+++ mandoc.h
@@ -438,6 +438,5 @@ void		  mparse_result(struct mparse *,
 const char	 *mparse_getkeep(const struct mparse *);
 const char	 *mparse_strerror(enum mandocerr);
 const char	 *mparse_strlevel(enum mandoclevel);
-enum mandoclevel  mparse_wait(struct mparse *);
 
 __END_DECLS
Index: configure
===================================================================
RCS file: /home/cvs/mdocml/mdocml/configure,v
retrieving revision 1.26
retrieving revision 1.27
diff -Lconfigure -Lconfigure -u -p -r1.26 -r1.27
--- configure
+++ configure
@@ -239,9 +239,9 @@ fi
 
 # --- DBLIB ---
 if [ ${BUILD_DB} -eq 0 ]; then
-	DBLIB=
+	DBLIB="-lz"
 elif [ -z "${DBLIB}" ]; then
-	DBLIB="${DETECTLIB}"
+	DBLIB="${DETECTLIB} -lz"
 	echo "DBLIB=\"${DBLIB}\"" 1>&2
 	echo "DBLIB=\"${DBLIB}\"" 1>&3
 	echo 1>&3
Index: Makefile
===================================================================
RCS file: /home/cvs/mdocml/mdocml/Makefile,v
retrieving revision 1.465
retrieving revision 1.466
diff -LMakefile -LMakefile -u -p -r1.465 -r1.466
--- Makefile
+++ Makefile
@@ -403,7 +403,7 @@ man.cgi: $(CGI_OBJS) libmandoc.a
 	$(CC) $(LDFLAGS) $(STATIC) -o $@ $(CGI_OBJS) libmandoc.a $(DBLIB)
 
 demandoc: $(DEMANDOC_OBJS) libmandoc.a
-	$(CC) $(LDFLAGS) -o $@ $(DEMANDOC_OBJS) libmandoc.a
+	$(CC) $(LDFLAGS) -o $@ $(DEMANDOC_OBJS) libmandoc.a -lz
 
 soelim: $(SOELIM_OBJS) compat_reallocarray.o
 	$(CC) $(LDFLAGS) -o $@ $(SOELIM_OBJS) compat_reallocarray.o
Index: INSTALL
===================================================================
RCS file: /home/cvs/mdocml/mdocml/INSTALL,v
retrieving revision 1.11
retrieving revision 1.12
diff -LINSTALL -LINSTALL -u -p -r1.11 -r1.12
--- INSTALL
+++ INSTALL
@@ -84,9 +84,10 @@ manual page source.
 
 Understanding mandoc dependencies
 ---------------------------------
-The mandoc(1), man(1), and demandoc(1) utilities have no external
-dependencies, but makewhatis(8) and apropos(1) depend on the
-following software:
+The mandoc(1), man(1), and demandoc(1) utilities only depend
+on the zlib library for decompressing gzipped manual pages,
+but makewhatis(8) and apropos(1) depend on the following
+additional software:
 
 1. The SQLite database system, see <http://sqlite.org/>.
 The recommended version of SQLite is 3.8.4.3 or newer.  The mandoc
Index: main.c
===================================================================
RCS file: /home/cvs/mdocml/mdocml/main.c,v
retrieving revision 1.241
retrieving revision 1.242
diff -Lmain.c -Lmain.c -u -p -r1.241 -r1.242
--- main.c
+++ main.c
@@ -458,10 +458,6 @@ main(int argc, char *argv[])
 				passthrough(resp->file, fd,
 				    conf.output.synopsisonly);
 
-			rctmp = mparse_wait(curp.mp);
-			if (rc < rctmp)
-				rc = rctmp;
-
 			if (argc > 1 && curp.outtype <= OUTT_UTF8)
 				ascii_sepline(curp.outdata);
 		}
Index: mandoc.3
===================================================================
RCS file: /home/cvs/mdocml/mdocml/mandoc.3,v
retrieving revision 1.31
retrieving revision 1.32
diff -Lmandoc.3 -Lmandoc.3 -u -p -r1.31 -r1.32
--- mandoc.3
+++ mandoc.3
@@ -37,7 +37,6 @@
 .Nm mparse_result ,
 .Nm mparse_strerror ,
 .Nm mparse_strlevel
-.Nm mparse_wait ,
 .Nd mandoc macro compiler library
 .Sh SYNOPSIS
 .In sys/types.h
@@ -106,10 +105,6 @@
 .Fo mparse_strlevel
 .Fa "enum mandoclevel"
 .Fc
-.Ft "enum mandoclevel"
-.Fo mparse_wait
-.Fa "struct mparse *parse"
-.Fc
 .In sys/types.h
 .In mandoc.h
 .In mdoc.h
@@ -392,20 +387,14 @@ Declared in
 implemented in
 .Pa read.c .
 .It Fn mparse_open
-If the
+Open the file for reading.
+If that fails and
 .Fa fname
-ends in
-.Pa .gz ,
-open with
-.Xr gunzip 1 ;
-otherwise, with
-.Xr open 2 .
-If
-.Xr open 2
-fails, append
-.Pa .gz
-and try with
-.Xr gunzip 1 .
+does not already end in
+.Ql .gz ,
+try again after appending
+.Ql .gz .
+Save the information whether the file is zipped or not.
 Return a file descriptor open for reading in
 .Fa fd ,
 or -1 on failure.
@@ -423,9 +412,6 @@ or
 .Fn mparse_open .
 Pass the associated filename in
 .Va fname .
-Calls
-.Fn mparse_wait
-before returning.
 This function may be called multiple times with different parameters; however,
 .Fn mparse_reset
 should be invoked between parses.
@@ -456,28 +442,6 @@ implemented in
 .Pa read.c .
 .It Fn mparse_strlevel
 Return a statically-allocated string representation of a level code.
-Declared in
-.In mandoc.h ,
-implemented in
-.Pa read.c .
-.It Fn mparse_wait
-Bury a
-.Xr gunzip 1
-child process that was spawned with
-.Fn mparse_open .
-To be called after the parse sequence is complete.
-Not needed after
-.Fn mparse_readfd ,
-but does no harm in that case, either.
-Returns
-.Dv MANDOCLEVEL_OK
-on success and
-.Dv MANDOCLEVEL_SYSERR
-on failure, that is, when
-.Xr wait 2
-fails, or when
-.Xr gunzip 1
-died from a signal or exited with non-zero status.
 Declared in
 .In mandoc.h ,
 implemented in
--
 To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2015-07-19  6:05 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-19  6:05 mdocml: Do not fork and exec gunzip(1), just link with libz instead schwarze

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).