zsh-workers
 help / color / mirror / code / Atom feed
* PATCH: job control debug
@ 2018-09-05 20:07 ` Peter Stephenson
  2018-09-06  8:09   ` Peter Stephenson
       [not found]   ` <20180906090902.1f344e9f@camnpupstephen.cam.scsc.local>
  0 siblings, 2 replies; 17+ messages in thread
From: Peter Stephenson @ 2018-09-05 20:07 UTC (permalink / raw)
  To: Zsh hackers list

This should help debug Rimmerworld pipeline problems.

There may be more that can be added, but the different patterns of
setting process group leaders and attaching the terminal to a process
group has certainly got a lot to do with it.

There appears to be an obscure error where it prints out a bogus
attachtty() location index.

pws


diff --git a/Src/Modules/zpty.c b/Src/Modules/zpty.c
index 2f83f7ce6..f8a5a9534 100644
--- a/Src/Modules/zpty.c
+++ b/Src/Modules/zpty.c
@@ -356,7 +356,7 @@ newptycmd(char *nam, char *pname, char **args, int echo, int nblock)
 	if (get_pty(0, &slave))
 	    exit(1);
 	SHTTY = slave;
-	attachtty(mypid);
+	ATTACHTTY(mypid, 17);
 #ifdef TIOCGWINSZ
 	/* Set the window size before associating with the terminal *
 	 * so that we don't get hit with a SIGWINCH.  I'm paranoid. */
diff --git a/Src/Zle/zle_main.c b/Src/Zle/zle_main.c
index db70e7d7e..b57245708 100644
--- a/Src/Zle/zle_main.c
+++ b/Src/Zle/zle_main.c
@@ -233,7 +233,7 @@ zsetterm(void)
     shttyinfo.lmodes &= ~LFLUSHO;
 #endif
 
-    attachtty(mypgrp);
+    ATTACHTTY(mypgrp, 18);
     ti = shttyinfo;
 #ifdef HAS_TIO
     if (unset(FLOWCONTROL))
@@ -917,7 +917,7 @@ getbyte(long do_keytmout, int *timeout)
 	    } else if (errno == EIO && !die) {
 		ret = opts[MONITOR];
 		opts[MONITOR] = 1;
-		attachtty(mypgrp);
+		ATTACHTTY(mypgrp, 19);
 		zrefresh();	/* kludge! */
 		opts[MONITOR] = ret;
 		die = 1;
diff --git a/Src/builtin.c b/Src/builtin.c
index 93fa9112c..02bcd854d 100644
--- a/Src/builtin.c
+++ b/Src/builtin.c
@@ -6129,7 +6129,7 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
 	    if (unset(INTERACTIVE))
 		gettyinfo(&shttyinfo);
 	    /* attach to the tty */
-	    attachtty(mypgrp);
+	    ATTACHTTY(mypgrp, 1);
 	    if (!isem)
 		setcbreak();
 	    readfd = SHTTY;
diff --git a/Src/exec.c b/Src/exec.c
index 615a5086f..9af6ea8d1 100644
--- a/Src/exec.c
+++ b/Src/exec.c
@@ -1020,11 +1020,12 @@ entersubsh(int flags)
 	if (jobtab[list_pipe_job].gleader && (list_pipe || list_pipe_child)) {
 	    if (setpgrp(0L, jobtab[list_pipe_job].gleader) == -1 ||
 		killpg(jobtab[list_pipe_job].gleader, 0) == -1) {
-		jobtab[list_pipe_job].gleader =
-		    jobtab[thisjob].gleader = (list_pipe_child ? mypgrp : getpid());
+		SET_GLEADER(list_pipe_job,
+			    (list_pipe_child ? mypgrp : getpid()), 1);
+		SET_GLEADER(thisjob, jobtab[list_pipe_job].gleader, 2);
 		setpgrp(0L, jobtab[list_pipe_job].gleader);
 		if (!(flags & ESUB_ASYNC))
-		    attachtty(jobtab[thisjob].gleader);
+		    ATTACHTTY(jobtab[thisjob].gleader, 2);
 	    }
 	}
 	else if (!jobtab[thisjob].gleader ||
@@ -1039,13 +1040,13 @@ entersubsh(int flags)
 	     * the operation is allowed to work (in addition to not
 	     * causing the shell to be suspended).
 	     */
-	    jobtab[thisjob].gleader = getpid();
+	    SET_GLEADER(thisjob, getpid(), 3);
 	    if (list_pipe_job != thisjob &&
 		!jobtab[list_pipe_job].gleader)
-		jobtab[list_pipe_job].gleader = jobtab[thisjob].gleader;
+		SET_GLEADER(list_pipe_job, jobtab[thisjob].gleader, 4);
 	    setpgrp(0L, jobtab[thisjob].gleader);
 	    if (!(flags & ESUB_ASYNC))
-		attachtty(jobtab[thisjob].gleader);
+		ATTACHTTY(jobtab[thisjob].gleader, 3);
 	}
     }
     if (!(flags & ESUB_FAKE))
@@ -1683,7 +1684,7 @@ execpline(Estate state, wordcode slcode, int how, int last1)
 		    /* If the super-job contains only the sub-shell, the
 		       sub-shell is the group leader. */
 		    if (!jn->procs->next || lpforked == 2) {
-			jn->gleader = list_pipe_pid;
+			SET_GLEADER(jn->gleader, list_pipe_pid, 5);
 			jn->stat |= STAT_SUBLEADER;
 			/*
 			 * Pick up any subjob that's still lying around
@@ -1806,7 +1807,7 @@ execpline(Estate state, wordcode slcode, int how, int last1)
 			    jn->stat |= STAT_SUBJOB | STAT_NOPRINT;
 			    jn->other = list_pipe_pid;	/* see zsh.h */
 			    if (hasprocs(list_pipe_job))
-				jn->gleader = jobtab[list_pipe_job].gleader;
+				SET_GLEADER(jn-jobtab, jobtab[list_pipe_job].gleader, 6);
 			}
 			if ((list_pipe || last1) && hasprocs(list_pipe_job))
 			    killpg(jobtab[list_pipe_job].gleader, SIGSTOP);
diff --git a/Src/hist.c b/Src/hist.c
index dbdc1e4e5..4e50b75e2 100644
--- a/Src/hist.c
+++ b/Src/hist.c
@@ -1116,7 +1116,7 @@ hbegin(int dohist)
 	hist_ring->ftim = time(NULL);
     if ((dohist == 2 || (interact && isset(SHINSTDIN))) && !strin) {
 	histactive = HA_ACTIVE;
-	attachtty(mypgrp);
+	ATTACHTTY(mypgrp, 4);
 	linkcurline();
 	defev = addhistnum(curhist, -1, HIST_FOREIGN);
     } else
diff --git a/Src/jobs.c b/Src/jobs.c
index 38b3d896b..397ef9727 100644
--- a/Src/jobs.c
+++ b/Src/jobs.c
@@ -260,7 +260,7 @@ handle_sub(int job, int fg)
 		       killpg(jn->gleader, 0) == -1))) {
 		Process p;
 		for (p = jn->procs; p->next; p = p->next);
-		jn->gleader = p->pid;
+		SET_GLEADER(jn-jobtab, p->pid, 7);
 	    }
 	    /* This deleted the job too early if the parent
 	       shell waited for a command in a list that will
@@ -275,7 +275,7 @@ handle_sub(int job, int fg)
 	       now. */
 	    if ((fg || thisjob == job) &&
 		(!jn->procs->next || cp || jn->procs->pid != jn->gleader))
-		attachtty(jn->gleader);
+		ATTACHTTY(jn->gleader, 5);
 	    kill(sj->other, SIGCONT);
 	    if (jn->stat & STAT_DISOWN)
 	    {
@@ -495,7 +495,7 @@ update_job(Job jn)
 	    (jn->gleader == pgrp || (pgrp > 1 && kill(-pgrp, 0) == -1))) {
 	    if (list_pipe) {
 		if (somestopped || (pgrp > 1 && kill(-pgrp, 0) == -1)) {
-		    attachtty(mypgrp);
+		    ATTACHTTY(mypgrp, 6);
 		    /* check window size and adjust if necessary */
 		    adjustwinsize(0);
 		} else {
@@ -522,7 +522,7 @@ update_job(Job jn)
 		    inerrflush();
 		}
 	    } else {
-		attachtty(mypgrp);
+		ATTACHTTY(mypgrp, 7);
 		/* check window size and adjust if necessary */
 		adjustwinsize(0);
 	    }
@@ -1326,7 +1326,8 @@ freejob(Job jn, int deleting)
 	    freejob(jobtab + jn->other, 0);
 	jn = jobtab + job;
     }
-    jn->gleader = jn->other = 0;
+    SET_GLEADER(jn-jobtab, 0, 8);
+    jn->other = 0;
     jn->stat = jn->stty_in_env = 0;
     jn->filelist = NULL;
     jn->ty = NULL;
@@ -1353,7 +1354,7 @@ deletejob(Job jn, int disowning)
 {
     deletefilelist(jn->filelist, disowning);
     if (jn->stat & STAT_ATTACH) {
-	attachtty(mypgrp);
+	ATTACHTTY(mypgrp, 8);
 	adjustwinsize(0);
     }
     if (jn->stat & STAT_SUPERJOB) {
@@ -1395,7 +1396,7 @@ addproc(pid_t pid, char *text, int aux, struct timeval *bgtime)
 	/* if this is the first process we are adding to *
 	 * the job, then it's the group leader.          */
 	if (!jobtab[thisjob].gleader)
-	    jobtab[thisjob].gleader = pid;
+	    SET_GLEADER(thisjob, pid, 9);
 	/* attach this process to end of process list of current job */
 	pnlist = &jobtab[thisjob].procs;
     }
@@ -1643,7 +1644,7 @@ static int initnewjob(int i)
 	zsfree(jobtab[i].pwd);
 	jobtab[i].pwd = NULL;
     }
-    jobtab[i].gleader = 0;
+    SET_GLEADER(i, 0, 10);
 
     if (i > maxjob)
 	maxjob = i;
@@ -2384,9 +2385,9 @@ bin_fg(char *name, char **argv, Options ops, int func)
 			  (jobtab[job].stat & STAT_SUBLEADER) ||
 			  killpg(jobtab[job].gleader, 0) == -1)) &&
 			jobtab[jobtab[job].other].gleader)
-			attachtty(jobtab[jobtab[job].other].gleader);
+			ATTACHTTY(jobtab[jobtab[job].other].gleader, 9);
 		    else
-			attachtty(jobtab[job].gleader);
+			ATTACHTTY(jobtab[job].gleader, 10);
 		}
 	    }
 	    if (stopped) {
@@ -2848,11 +2849,15 @@ acquire_pgrp(void)
 	oldset = signal_block(blockset);
 	while ((ttpgrp = gettygrp()) != -1 && ttpgrp != mypgrp) {
 	    mypgrp = GETPGRP();
+#ifdef DEBUG_JOB_CONTROL
+	    fprintf(stderr, "mypgrp(1) %ld -> %d, pid %d, mypid %ld\n",
+		    lastpgrp, mypgrp, getpid(), mypid);
+#endif
 	    if (mypgrp == mypid) {
 		if (!interact)
 		    break; /* attachtty() will be a no-op, give up */
 		signal_setmask(oldset);
-		attachtty(mypgrp); /* Might generate SIGT* */
+		ATTACHTTY(mypgrp, 11); /* Might generate SIGT* */
 		signal_block(blockset);
 	    }
 	    if (mypgrp == gettygrp())
@@ -2861,6 +2866,10 @@ acquire_pgrp(void)
 	    if (read(0, NULL, 0) != 0) {} /* Might generate SIGT* */
 	    signal_block(blockset);
 	    mypgrp = GETPGRP();
+#ifdef DEBUG_JOB_CONTROL
+	    fprintf(stderr, "mypgrp(2) %ld -> %d, pid %d, mypid %ld\n",
+		    lastpgrp, mypgrp, getpid(), mypid);
+#endif
 	    if (mypgrp == lastpgrp && !interact)
 		break; /* Unlikely that pgrp will ever change */
 	    lastpgrp = mypgrp;
@@ -2868,7 +2877,11 @@ acquire_pgrp(void)
 	if (mypgrp != mypid) {
 	    if (setpgrp(0, 0) == 0) {
 		mypgrp = mypid;
-		attachtty(mypgrp);
+#ifdef DEBUG_JOB_CONTROL
+		fprintf(stderr, "mypgrp(3) %ld -> %d, pid %d, mypid %ld\n",
+			lastpgrp, mypgrp, getpid(), mypid);
+#endif
+		ATTACHTTY(mypgrp, 12);
 	    } else
 		opts[MONITOR] = 0;
 	}
@@ -2886,9 +2899,13 @@ release_pgrp(void)
     if (origpgrp != mypgrp) {
 	/* in linux pid namespaces, origpgrp may never have been set */
 	if (origpgrp) {
-	    attachtty(origpgrp);
+	    ATTACHTTY(origpgrp, 13);
 	    setpgrp(0, origpgrp);
 	}
+#ifdef DEBUG_JOB_CONTROL
+	fprintf(stderr, "mypgrp(4) %d -> %d, pid %d, mypid %ld\n",
+		mypgrp, origpgrp, getpid(), mypid);
+#endif
 	mypgrp = origpgrp;
     }
 }
diff --git a/Src/params.c b/Src/params.c
index a1c299f60..66687fbf3 100644
--- a/Src/params.c
+++ b/Src/params.c
@@ -2586,7 +2586,7 @@ assignstrvalue(Value v, char *val, int flags)
 		Param pm = v->pm;
                 /* Size doesn't change, can limit actions to only
                  * overwriting bytes in already allocated string */
-                strncpy(z + v->start, val, vlen);
+                memcpy(z + v->start, val, vlen);
 		/* Implement remainder of strsetfn */
 		if (!(pm->node.flags & PM_HASHELEM) &&
 		    ((pm->node.flags & PM_NAMEDDIR) ||
diff --git a/Src/signals.c b/Src/signals.c
index 20c6fdf4a..868764dd9 100644
--- a/Src/signals.c
+++ b/Src/signals.c
@@ -540,7 +540,7 @@ wait_for_processes(void)
 		if (WIFEXITED(status) &&
 		    pn->pid == jn->gleader &&
 		    killpg(pn->pid, 0) == -1) {
-		    jn->gleader = 0;
+		    SET_GLEADER(jn-jobtab, 0, 11);
 		    if (!(jn->stat & STAT_NOSTTY)) {
 			/*
 			 * This PID was in control of the terminal;
@@ -549,7 +549,7 @@ wait_for_processes(void)
 			 * process of this job will become group
 			 * leader, however.
 			 */
-			attachtty(mypgrp);
+			ATTACHTTY(mypgrp, 14);
 		    }
 		}
 	    }
diff --git a/Src/utils.c b/Src/utils.c
index 075d27241..df077f1da 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -2952,7 +2952,7 @@ getquery(char *valid_chars, int purge)
     int isem = !strcmp(term, "emacs");
     struct ttyinfo ti;
 
-    attachtty(mypgrp);
+    ATTACHTTY(mypgrp, 15);
 
     gettyinfo(&ti);
 #ifdef HAS_TIO
@@ -4639,13 +4639,21 @@ setcbreak(void)
 
 /* give the tty to some process */
 
-/**/
+/* note: deliberately not automatically prototyped */
 mod_export void
-attachtty(pid_t pgrp)
+attachtty(pid_t pgrp
+#ifdef DEBUG_JOB_CONTROL	  
+	  , int index
+#endif
+    )
 {
     static int ep = 0;
 
     if (jobbing && interact) {
+#ifdef DEBUG_JOB_CONTROL
+	fprintf(stderr, "attachtty(%d): pgrp = %d, mypgrp = %d\n",
+		index, pgrp, mypgrp);
+#endif
 #ifdef HAVE_TCSETPGRP
 	if (SHTTY != -1 && tcsetpgrp(SHTTY, pgrp) == -1 && !ep)
 #else
@@ -4659,7 +4667,7 @@ attachtty(pid_t pgrp)
 #endif
 	{
 	    if (pgrp != mypgrp && kill(-pgrp, 0) == -1)
-		attachtty(mypgrp);
+		ATTACHTTY(mypgrp, 16);
 	    else {
 		if (errno != ENOTTY)
 		{
@@ -4673,6 +4681,18 @@ attachtty(pid_t pgrp)
     }
 }
 
+/**/
+#ifdef DEBUG_JOB_CONTROL
+
+void set_gleader(int job, int pid, int index)
+{
+    jobtab[job].gleader = pid;
+    fprintf(stderr, "set_gleader(%d): %d = %d\n", index, job, pid);
+}
+
+/**/
+#endif /* DEBUG_JOB_CONTROL */
+
 /* get the process group associated with the tty */
 
 /**/
diff --git a/Src/zsh.h b/Src/zsh.h
index 8e7f20b2c..1ec89e56f 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -3292,7 +3292,7 @@ typedef int convchar_t;
 #define MB_METASTRLEN2(str, widthp)	ztrlen(str)
 #define MB_METASTRLEN2END(str, widthp, eptr)	ztrlenend(str, eptr)
 
-#define MB_CHARINIT()
+#define MBT_CHARINIT()
 #define MB_CHARLENCONV(str, len, cp) charlenconv((str), (len), (cp))
 #define MB_CHARLEN(str, len) ((len) ? 1 : 0)
 
@@ -3303,3 +3303,20 @@ typedef int convchar_t;
 #define ZWS(s)	s
 
 #endif /* MULTIBYTE_SUPPORT */
+
+
+/* Uncomment to debug problems with job control */
+/*#define DEBUG_JOB_CONTROL*/
+
+#ifdef DEBUG_JOB_CONTROL
+#define ATTACHTTY(pgrp, index) attachtty(pgpr, index)
+void attachtty(pid_t pgrp, int index);
+
+#define SET_GLEADER(job, pid, index) set_gleader(job, pid, index)
+void set_gleader(int job, int pid, int index);
+#else
+#define ATTACHTTY(pgrp, index) attachtty(pgrp)
+
+#define SET_GLEADER(job, pid, index) (jobtab[(job)].gleader = (pid))
+void attachtty(pid_t pgrp);
+#endif

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: job control debug
  2018-09-05 20:07 ` PATCH: job control debug Peter Stephenson
@ 2018-09-06  8:09   ` Peter Stephenson
       [not found]   ` <20180906090902.1f344e9f@camnpupstephen.cam.scsc.local>
  1 sibling, 0 replies; 17+ messages in thread
From: Peter Stephenson @ 2018-09-06  8:09 UTC (permalink / raw)
  To: Zsh hackers list

On Wed, 5 Sep 2018 21:07:40 +0100
Peter Stephenson <p.w.stephenson@ntlworld.com> wrote:
> This should help debug Rimmerworld pipeline problems.
> +/* Uncomment to debug problems with job control */
> +/*#define DEBUG_JOB_CONTROL*/
> +
> +#ifdef DEBUG_JOB_CONTROL
> +#define ATTACHTTY(pgrp, index) attachtty(pgpr, index)
                                              ^^

Sorry, small typo.

I won't check this in since there seems to be at least one (other)
oddity as you'll see from the loctaion indexes below.

Here's the output of working and non-working cases: nothing glaring
stands out, and indeed there's no key difference between the
attachtty() calls.


Working:

camnpupstephen% echo foo | { sleep 0.01 | more }
attachtty(7233728): pgrp = 10503, mypgrp = 10503
set_gleader(10): 1 = 0
set_gleader(3): 1 = 10511
attachtty(3): pgrp = 10511, mypgrp = 10503
set_gleader(10): 1 = 0
attachtty(7233728): pgrp = 10503, mypgrp = 10503
set_gleader(10): 1 = 0
set_gleader(9): 1 = 10511
set_gleader(10): 2 = 0
set_gleader(10): 1 = 0
set_gleader(10): 2 = 0
set_gleader(9): 2 = 10512
set_gleader(10): 1 = 0
set_gleader(10): 2 = 0
set_gleader(9): 2 = 10512
set_gleader(11): 2 = 0
attachtty(14): pgrp = 10503, mypgrp = 10503
set_gleader(8): 2 = 0
attachtty(8): pgrp = 10503, mypgrp = 10503
set_gleader(8): 1 = 0
attachtty(4): pgrp = 10503, mypgrp = 10503


Non-working:

camnpupstephen% echo foo | { : | more }
attachtty(7233728): pgrp = 10503, mypgrp = 10503
set_gleader(10): 1 = 0
set_gleader(3): 1 = 10514
attachtty(3): pgrp = 10514, mypgrp = 10503
set_gleader(10): 1 = 0
attachtty(7233728): pgrp = 10503, mypgrp = 10503
set_gleader(10): 1 = 0
set_gleader(9): 1 = 10514
set_gleader(10): 2 = 0
set_gleader(10): 1 = 0
set_gleader(10): 2 = 0
set_gleader(9): 2 = 10515
set_gleader(10): 1 = 0
zsh: done                    echo foo | 
zsh: suspended (tty output)  { : | more; }
set_gleader(10): 2 = 0
set_gleader(9): 2 = 10515
set_gleader(11): 2 = 0
attachtty(14): pgrp = 10503, mypgrp = 10503
set_gleader(6): 2 = 10514
attachtty(4): pgrp = 10503, mypgrp = 10503

parent shell = 10503
more = 10516

I can also see another zsh job while this is suspended --- probably a
"superjob" or something like that.


pws

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: job control debug
       [not found]   ` <20180906090902.1f344e9f@camnpupstephen.cam.scsc.local>
@ 2018-09-06  9:22     ` Peter Stephenson
  2018-09-07  3:18       ` Bart Schaefer
  0 siblings, 1 reply; 17+ messages in thread
From: Peter Stephenson @ 2018-09-06  9:22 UTC (permalink / raw)
  To: Zsh hackers list

Typo fixed, now with setpgrp debugging.

Starting to think the key difference may be set_gleader(11) --- transfer
of job ownership on exit, which happens too early for that to be
possible in the bad case.

pws


Good:

% echo | { sleep 0.01 | more }
attachtty(7233728): pgrp = 15378, mypgrp = 15378
set_gleader(10): 1 = 0
set_gleader(3): 1 = 15379
setpgrp(4): pid 0, pgid 15379, current pid 15379, ret 0
attachtty(3): pgrp = 15379, mypgrp = 15378
set_gleader(10): 1 = 0
attachtty(7233728): pgrp = 15378, mypgrp = 15378
set_gleader(10): 1 = 0
set_gleader(9): 1 = 15379
set_gleader(10): 2 = 0
setpgrp(1): pid 0, pgid 15379, current pid 15380, ret 0
set_gleader(10): 1 = 0
set_gleader(10): 2 = 0
set_gleader(9): 2 = 15380
setpgrp(1): pid 0, pgid 15379, current pid 15381, ret 0
set_gleader(10): 1 = 0
set_gleader(10): 2 = 0
set_gleader(9): 2 = 15380
set_gleader(11): 2 = 0
attachtty(14): pgrp = 15378, mypgrp = 15378
set_gleader(8): 2 = 0
attachtty(8): pgrp = 15378, mypgrp = 15378
set_gleader(8): 1 = 0
attachtty(4): pgrp = 15378, mypgrp = 15378


Bad ("sleep 0" is the same, should perhaps use that for consistency):

camnpupstephen% echo | { : | more }
attachtty(7233728): pgrp = 15378, mypgrp = 15378
set_gleader(10): 1 = 0
set_gleader(3): 1 = 15384
setpgrp(4): pid 0, pgid 15384, current pid 15384, ret 0
attachtty(3): pgrp = 15384, mypgrp = 15378
set_gleader(10): 1 = 0
attachtty(7233728): pgrp = 15378, mypgrp = 15378
set_gleader(10): 1 = 0
set_gleader(9): 1 = 15384
set_gleader(10): 2 = 0
setpgrp(1): pid 0, pgid 15384, current pid 15385, ret 0
set_gleader(10): 1 = 0
set_gleader(10): 2 = 0
set_gleader(9): 2 = 15385
setpgrp(1): pid 0, pgid 15384, current pid 15386, ret 0
set_gleader(10): 1 = 0
zsh: done                    echo |
zsh: suspended (tty output)  { : | more; }
set_gleader(10): 2 = 0
set_gleader(9): 2 = 15385
set_gleader(11): 2 = 0
attachtty(14): pgrp = 15378, mypgrp = 15378
set_gleader(6): 2 = 15384
attachtty(4): pgrp = 15378, mypgrp = 15378


diff --git a/Src/Modules/zpty.c b/Src/Modules/zpty.c
index 2f83f7c..f8a5a95 100644
--- a/Src/Modules/zpty.c
+++ b/Src/Modules/zpty.c
@@ -356,7 +356,7 @@ newptycmd(char *nam, char *pname, char **args, int echo, int nblock)
 	if (get_pty(0, &slave))
 	    exit(1);
 	SHTTY = slave;
-	attachtty(mypid);
+	ATTACHTTY(mypid, 17);
 #ifdef TIOCGWINSZ
 	/* Set the window size before associating with the terminal *
 	 * so that we don't get hit with a SIGWINCH.  I'm paranoid. */
diff --git a/Src/Zle/zle_main.c b/Src/Zle/zle_main.c
index 3487b5d..770c0c4 100644
--- a/Src/Zle/zle_main.c
+++ b/Src/Zle/zle_main.c
@@ -233,7 +233,7 @@ zsetterm(void)
     shttyinfo.lmodes &= ~LFLUSHO;
 #endif
 
-    attachtty(mypgrp);
+    ATTACHTTY(mypgrp, 18);
     ti = shttyinfo;
 #ifdef HAS_TIO
     if (unset(FLOWCONTROL))
@@ -917,7 +917,7 @@ getbyte(long do_keytmout, int *timeout, int full)
 	    } else if (errno == EIO && !die) {
 		ret = opts[MONITOR];
 		opts[MONITOR] = 1;
-		attachtty(mypgrp);
+		ATTACHTTY(mypgrp, 19);
 		zrefresh();	/* kludge! */
 		opts[MONITOR] = ret;
 		die = 1;
diff --git a/Src/builtin.c b/Src/builtin.c
index 93fa911..02bcd85 100644
--- a/Src/builtin.c
+++ b/Src/builtin.c
@@ -6129,7 +6129,7 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
 	    if (unset(INTERACTIVE))
 		gettyinfo(&shttyinfo);
 	    /* attach to the tty */
-	    attachtty(mypgrp);
+	    ATTACHTTY(mypgrp, 1);
 	    if (!isem)
 		setcbreak();
 	    readfd = SHTTY;
diff --git a/Src/exec.c b/Src/exec.c
index 09ee132..e00fae0 100644
--- a/Src/exec.c
+++ b/Src/exec.c
@@ -1022,17 +1022,18 @@ entersubsh(int flags)
 	}
     } else if (thisjob != -1 && (flags & ESUB_PGRP)) {
 	if (jobtab[list_pipe_job].gleader && (list_pipe || list_pipe_child)) {
-	    if (setpgrp(0L, jobtab[list_pipe_job].gleader) == -1 ||
+	    if (SETPGRP(0L, jobtab[list_pipe_job].gleader, 1) == -1 ||
 		killpg(jobtab[list_pipe_job].gleader, 0) == -1) {
-		jobtab[list_pipe_job].gleader =
-		    jobtab[thisjob].gleader = (list_pipe_child ? mypgrp : getpid());
-		setpgrp(0L, jobtab[list_pipe_job].gleader);
+		SET_GLEADER(list_pipe_job,
+			    (list_pipe_child ? mypgrp : getpid()), 1);
+		SET_GLEADER(thisjob, jobtab[list_pipe_job].gleader, 2);
+		SETPGRP(0L, jobtab[list_pipe_job].gleader, 2);
 		if (!(flags & ESUB_ASYNC))
-		    attachtty(jobtab[thisjob].gleader);
+		    ATTACHTTY(jobtab[thisjob].gleader, 2);
 	    }
 	}
 	else if (!jobtab[thisjob].gleader ||
-		 setpgrp(0L, jobtab[thisjob].gleader) == -1) {
+		 SETPGRP(0L, jobtab[thisjob].gleader, 3) == -1) {
 	    /*
 	     * This is the standard point at which a newly started
 	     * process gets put into the foreground by taking over
@@ -1043,13 +1044,13 @@ entersubsh(int flags)
 	     * the operation is allowed to work (in addition to not
 	     * causing the shell to be suspended).
 	     */
-	    jobtab[thisjob].gleader = getpid();
+	    SET_GLEADER(thisjob, getpid(), 3);
 	    if (list_pipe_job != thisjob &&
 		!jobtab[list_pipe_job].gleader)
-		jobtab[list_pipe_job].gleader = jobtab[thisjob].gleader;
-	    setpgrp(0L, jobtab[thisjob].gleader);
+		SET_GLEADER(list_pipe_job, jobtab[thisjob].gleader, 4);
+	    SETPGRP(0L, jobtab[thisjob].gleader, 4);
 	    if (!(flags & ESUB_ASYNC))
-		attachtty(jobtab[thisjob].gleader);
+		ATTACHTTY(jobtab[thisjob].gleader, 3);
 	}
     }
     if (!(flags & ESUB_FAKE))
@@ -1687,7 +1688,7 @@ execpline(Estate state, wordcode slcode, int how, int last1)
 		    /* If the super-job contains only the sub-shell, the
 		       sub-shell is the group leader. */
 		    if (!jn->procs->next || lpforked == 2) {
-			jn->gleader = list_pipe_pid;
+			SET_GLEADER(jn->gleader, list_pipe_pid, 5);
 			jn->stat |= STAT_SUBLEADER;
 			/*
 			 * Pick up any subjob that's still lying around
@@ -1810,7 +1811,7 @@ execpline(Estate state, wordcode slcode, int how, int last1)
 			    jn->stat |= STAT_SUBJOB | STAT_NOPRINT;
 			    jn->other = list_pipe_pid;	/* see zsh.h */
 			    if (hasprocs(list_pipe_job))
-				jn->gleader = jobtab[list_pipe_job].gleader;
+				SET_GLEADER(jn-jobtab, jobtab[list_pipe_job].gleader, 6);
 			}
 			if ((list_pipe || last1) && hasprocs(list_pipe_job))
 			    killpg(jobtab[list_pipe_job].gleader, SIGSTOP);
@@ -1833,7 +1834,7 @@ execpline(Estate state, wordcode slcode, int how, int last1)
 			 * do anything other than the following, but no
 			 * doubt we'll find out...
 			 */
-			setpgrp(0L, mypgrp = getpid());
+			SETPGRP(0L, mypgrp = getpid(), 5);
 			close(synch[1]);
 			kill(getpid(), SIGSTOP);
 			list_pipe = 0;
diff --git a/Src/hist.c b/Src/hist.c
index dbdc1e4..4e50b75 100644
--- a/Src/hist.c
+++ b/Src/hist.c
@@ -1116,7 +1116,7 @@ hbegin(int dohist)
 	hist_ring->ftim = time(NULL);
     if ((dohist == 2 || (interact && isset(SHINSTDIN))) && !strin) {
 	histactive = HA_ACTIVE;
-	attachtty(mypgrp);
+	ATTACHTTY(mypgrp, 4);
 	linkcurline();
 	defev = addhistnum(curhist, -1, HIST_FOREIGN);
     } else
diff --git a/Src/jobs.c b/Src/jobs.c
index 38b3d89..1edb065 100644
--- a/Src/jobs.c
+++ b/Src/jobs.c
@@ -260,7 +260,7 @@ handle_sub(int job, int fg)
 		       killpg(jn->gleader, 0) == -1))) {
 		Process p;
 		for (p = jn->procs; p->next; p = p->next);
-		jn->gleader = p->pid;
+		SET_GLEADER(jn-jobtab, p->pid, 7);
 	    }
 	    /* This deleted the job too early if the parent
 	       shell waited for a command in a list that will
@@ -275,7 +275,7 @@ handle_sub(int job, int fg)
 	       now. */
 	    if ((fg || thisjob == job) &&
 		(!jn->procs->next || cp || jn->procs->pid != jn->gleader))
-		attachtty(jn->gleader);
+		ATTACHTTY(jn->gleader, 5);
 	    kill(sj->other, SIGCONT);
 	    if (jn->stat & STAT_DISOWN)
 	    {
@@ -495,7 +495,7 @@ update_job(Job jn)
 	    (jn->gleader == pgrp || (pgrp > 1 && kill(-pgrp, 0) == -1))) {
 	    if (list_pipe) {
 		if (somestopped || (pgrp > 1 && kill(-pgrp, 0) == -1)) {
-		    attachtty(mypgrp);
+		    ATTACHTTY(mypgrp, 6);
 		    /* check window size and adjust if necessary */
 		    adjustwinsize(0);
 		} else {
@@ -522,7 +522,7 @@ update_job(Job jn)
 		    inerrflush();
 		}
 	    } else {
-		attachtty(mypgrp);
+		ATTACHTTY(mypgrp, 7);
 		/* check window size and adjust if necessary */
 		adjustwinsize(0);
 	    }
@@ -1326,7 +1326,8 @@ freejob(Job jn, int deleting)
 	    freejob(jobtab + jn->other, 0);
 	jn = jobtab + job;
     }
-    jn->gleader = jn->other = 0;
+    SET_GLEADER(jn-jobtab, 0, 8);
+    jn->other = 0;
     jn->stat = jn->stty_in_env = 0;
     jn->filelist = NULL;
     jn->ty = NULL;
@@ -1353,7 +1354,7 @@ deletejob(Job jn, int disowning)
 {
     deletefilelist(jn->filelist, disowning);
     if (jn->stat & STAT_ATTACH) {
-	attachtty(mypgrp);
+	ATTACHTTY(mypgrp, 8);
 	adjustwinsize(0);
     }
     if (jn->stat & STAT_SUPERJOB) {
@@ -1395,7 +1396,7 @@ addproc(pid_t pid, char *text, int aux, struct timeval *bgtime)
 	/* if this is the first process we are adding to *
 	 * the job, then it's the group leader.          */
 	if (!jobtab[thisjob].gleader)
-	    jobtab[thisjob].gleader = pid;
+	    SET_GLEADER(thisjob, pid, 9);
 	/* attach this process to end of process list of current job */
 	pnlist = &jobtab[thisjob].procs;
     }
@@ -1643,7 +1644,7 @@ static int initnewjob(int i)
 	zsfree(jobtab[i].pwd);
 	jobtab[i].pwd = NULL;
     }
-    jobtab[i].gleader = 0;
+    SET_GLEADER(i, 0, 10);
 
     if (i > maxjob)
 	maxjob = i;
@@ -2384,9 +2385,9 @@ bin_fg(char *name, char **argv, Options ops, int func)
 			  (jobtab[job].stat & STAT_SUBLEADER) ||
 			  killpg(jobtab[job].gleader, 0) == -1)) &&
 			jobtab[jobtab[job].other].gleader)
-			attachtty(jobtab[jobtab[job].other].gleader);
+			ATTACHTTY(jobtab[jobtab[job].other].gleader, 9);
 		    else
-			attachtty(jobtab[job].gleader);
+			ATTACHTTY(jobtab[job].gleader, 10);
 		}
 	    }
 	    if (stopped) {
@@ -2848,11 +2849,15 @@ acquire_pgrp(void)
 	oldset = signal_block(blockset);
 	while ((ttpgrp = gettygrp()) != -1 && ttpgrp != mypgrp) {
 	    mypgrp = GETPGRP();
+#ifdef DEBUG_JOB_CONTROL
+	    fprintf(stderr, "mypgrp(1) %ld -> %d, pid %d, mypid %ld\n",
+		    lastpgrp, mypgrp, getpid(), mypid);
+#endif
 	    if (mypgrp == mypid) {
 		if (!interact)
 		    break; /* attachtty() will be a no-op, give up */
 		signal_setmask(oldset);
-		attachtty(mypgrp); /* Might generate SIGT* */
+		ATTACHTTY(mypgrp, 11); /* Might generate SIGT* */
 		signal_block(blockset);
 	    }
 	    if (mypgrp == gettygrp())
@@ -2861,14 +2866,22 @@ acquire_pgrp(void)
 	    if (read(0, NULL, 0) != 0) {} /* Might generate SIGT* */
 	    signal_block(blockset);
 	    mypgrp = GETPGRP();
+#ifdef DEBUG_JOB_CONTROL
+	    fprintf(stderr, "mypgrp(2) %ld -> %d, pid %d, mypid %ld\n",
+		    lastpgrp, mypgrp, getpid(), mypid);
+#endif
 	    if (mypgrp == lastpgrp && !interact)
 		break; /* Unlikely that pgrp will ever change */
 	    lastpgrp = mypgrp;
 	}
 	if (mypgrp != mypid) {
-	    if (setpgrp(0, 0) == 0) {
+	    if (SETPGRP(0, 0, 6) == 0) {
 		mypgrp = mypid;
-		attachtty(mypgrp);
+#ifdef DEBUG_JOB_CONTROL
+		fprintf(stderr, "mypgrp(3) %ld -> %d, pid %d, mypid %ld\n",
+			lastpgrp, mypgrp, getpid(), mypid);
+#endif
+		ATTACHTTY(mypgrp, 12);
 	    } else
 		opts[MONITOR] = 0;
 	}
@@ -2886,9 +2899,13 @@ release_pgrp(void)
     if (origpgrp != mypgrp) {
 	/* in linux pid namespaces, origpgrp may never have been set */
 	if (origpgrp) {
-	    attachtty(origpgrp);
-	    setpgrp(0, origpgrp);
+	    ATTACHTTY(origpgrp, 13);
+	    SETPGRP(0, origpgrp, 7);
 	}
+#ifdef DEBUG_JOB_CONTROL
+	fprintf(stderr, "mypgrp(4) %d -> %d, pid %d, mypid %ld\n",
+		mypgrp, origpgrp, getpid(), mypid);
+#endif
 	mypgrp = origpgrp;
     }
 }
diff --git a/Src/params.c b/Src/params.c
index a1c299f..66687fb 100644
--- a/Src/params.c
+++ b/Src/params.c
@@ -2586,7 +2586,7 @@ assignstrvalue(Value v, char *val, int flags)
 		Param pm = v->pm;
                 /* Size doesn't change, can limit actions to only
                  * overwriting bytes in already allocated string */
-                strncpy(z + v->start, val, vlen);
+                memcpy(z + v->start, val, vlen);
 		/* Implement remainder of strsetfn */
 		if (!(pm->node.flags & PM_HASHELEM) &&
 		    ((pm->node.flags & PM_NAMEDDIR) ||
diff --git a/Src/signals.c b/Src/signals.c
index 20c6fdf..868764d 100644
--- a/Src/signals.c
+++ b/Src/signals.c
@@ -540,7 +540,7 @@ wait_for_processes(void)
 		if (WIFEXITED(status) &&
 		    pn->pid == jn->gleader &&
 		    killpg(pn->pid, 0) == -1) {
-		    jn->gleader = 0;
+		    SET_GLEADER(jn-jobtab, 0, 11);
 		    if (!(jn->stat & STAT_NOSTTY)) {
 			/*
 			 * This PID was in control of the terminal;
@@ -549,7 +549,7 @@ wait_for_processes(void)
 			 * process of this job will become group
 			 * leader, however.
 			 */
-			attachtty(mypgrp);
+			ATTACHTTY(mypgrp, 14);
 		    }
 		}
 	    }
diff --git a/Src/utils.c b/Src/utils.c
index 075d272..42f6ab7 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -2952,7 +2952,7 @@ getquery(char *valid_chars, int purge)
     int isem = !strcmp(term, "emacs");
     struct ttyinfo ti;
 
-    attachtty(mypgrp);
+    ATTACHTTY(mypgrp, 15);
 
     gettyinfo(&ti);
 #ifdef HAS_TIO
@@ -4639,13 +4639,21 @@ setcbreak(void)
 
 /* give the tty to some process */
 
-/**/
+/* note: deliberately not automatically prototyped */
 mod_export void
-attachtty(pid_t pgrp)
+attachtty(pid_t pgrp
+#ifdef DEBUG_JOB_CONTROL
+	  , int index
+#endif
+    )
 {
     static int ep = 0;
 
     if (jobbing && interact) {
+#ifdef DEBUG_JOB_CONTROL
+	fprintf(stderr, "attachtty(%d): pgrp = %d, mypgrp = %d\n",
+		index, pgrp, mypgrp);
+#endif
 #ifdef HAVE_TCSETPGRP
 	if (SHTTY != -1 && tcsetpgrp(SHTTY, pgrp) == -1 && !ep)
 #else
@@ -4658,8 +4666,11 @@ attachtty(pid_t pgrp)
 # endif
 #endif
 	{
+#ifdef DEBUG_JOB_CONTROL
+	    fprintf(stderr, "attachtty failed\n");
+#endif
 	    if (pgrp != mypgrp && kill(-pgrp, 0) == -1)
-		attachtty(mypgrp);
+		ATTACHTTY(mypgrp, 16);
 	    else {
 		if (errno != ENOTTY)
 		{
@@ -4673,6 +4684,26 @@ attachtty(pid_t pgrp)
     }
 }
 
+/**/
+#ifdef DEBUG_JOB_CONTROL
+
+void set_gleader(int job, int pid, int index)
+{
+    jobtab[job].gleader = pid;
+    fprintf(stderr, "set_gleader(%d): %d = %d\n", index, job, pid);
+}
+
+int setpgrp_debug(int pid, int pgid, int index)
+{
+    int ret = setpgrp(pid, pgid);
+    fprintf(stderr, "setpgrp(%d): pid %d, pgid %d, current pid %d, ret %d\n",
+	    index, pid, pgid, getpid(), ret);
+    return ret;
+}
+
+/**/
+#endif /* DEBUG_JOB_CONTROL */
+
 /* get the process group associated with the tty */
 
 /**/
diff --git a/Src/zsh.h b/Src/zsh.h
index 8e7f20b..bcad47b 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -3292,7 +3292,7 @@ typedef int convchar_t;
 #define MB_METASTRLEN2(str, widthp)	ztrlen(str)
 #define MB_METASTRLEN2END(str, widthp, eptr)	ztrlenend(str, eptr)
 
-#define MB_CHARINIT()
+#define MBT_CHARINIT()
 #define MB_CHARLENCONV(str, len, cp) charlenconv((str), (len), (cp))
 #define MB_CHARLEN(str, len) ((len) ? 1 : 0)
 
@@ -3303,3 +3303,26 @@ typedef int convchar_t;
 #define ZWS(s)	s
 
 #endif /* MULTIBYTE_SUPPORT */
+
+
+/* Uncomment to debug problems with job control */
+/*#define DEBUG_JOB_CONTROL*/
+
+#ifdef DEBUG_JOB_CONTROL
+#define ATTACHTTY(pgrp, index) attachtty(pgrp, index)
+void attachtty(pid_t pgrp, int index);
+
+#define SET_GLEADER(job, pid, index) set_gleader(job, pid, index)
+void set_gleader(int job, int pid, int index);
+
+#define SETPGRP(pid, pgid, index) setpgrp_debug(pid, pgid, index)
+int setpgrp_debug(int pid, int pgid, int index);
+#else
+
+#define ATTACHTTY(pgrp, index) attachtty(pgrp)
+
+#define SET_GLEADER(job, pid, index) (jobtab[(job)].gleader = (pid))
+void attachtty(pid_t pgrp);
+
+#define SETPGRP(pid, pgid, index) setpgrp(pid, pgid)
+#endif

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: job control debug
  2018-09-06  9:22     ` Peter Stephenson
@ 2018-09-07  3:18       ` Bart Schaefer
  2018-09-07  9:18         ` Peter Stephenson
       [not found]         ` <20180907101852.62415ff9@camnpupstephen.cam.scsc.local>
  0 siblings, 2 replies; 17+ messages in thread
From: Bart Schaefer @ 2018-09-07  3:18 UTC (permalink / raw)
  To: Peter Stephenson; +Cc: Zsh hackers list

On Thu, Sep 6, 2018 at 2:22 AM, Peter Stephenson
<p.stephenson@samsung.com> wrote:
> Typo fixed, now with setpgrp debugging.
>
> Starting to think the key difference may be set_gleader(11) --- transfer
> of job ownership on exit, which happens too early for that to be
> possible in the bad case.

I don't think that can be it, at least not directly.  set_gleader()
only updates internal data structures.  Something has to be either
changing the tty process group too soon, or not soon enough, in order
for a TTOU signal to be sent.  The other possibility is that in the
"good" case the TTOU is sent but the process is ignoring it, whereas
in the bad case the process is allowing it.  You may need to add some
debugging for that.

In particular entersubsh() does some juggling of SIGTTOU -- which I
think predates this bug appearing, but there may be an interaction of
that with some later change.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: job control debug
  2018-09-07  3:18       ` Bart Schaefer
@ 2018-09-07  9:18         ` Peter Stephenson
       [not found]         ` <20180907101852.62415ff9@camnpupstephen.cam.scsc.local>
  1 sibling, 0 replies; 17+ messages in thread
From: Peter Stephenson @ 2018-09-07  9:18 UTC (permalink / raw)
  To: Zsh hackers list

On Thu, 6 Sep 2018 20:18:14 -0700
Bart Schaefer <schaefer@brasslantern.com> wrote:
> In particular entersubsh() does some juggling of SIGTTOU -- which I
> think predates this bug appearing, but there may be an interaction of
> that with some later change.

That's definitely a good tip.  We usually get here:

    if (!job_control_ok) {
	/*
	 * If this process is not going to be doing job control,
	 * we don't want to do special things with the corresponding
	 * signals.  If it is, we need to keep the special behaviour:
	 * see note about attachtty() above.
	 */
	signal_default(SIGTTOU);
	signal_default(SIGTTIN);
	signal_default(SIGTSTP);
    }

Sure enough, disabling this stops the problem happening.  Note this is
not the case where there is no job control at all --- that's a separate
test.  This is if we think we can't do job control in the current
subprocess even if we could in the parent.

What do you think of the following?  If we are in list_pipe_job land, aka
Rimmerworld, and we're attached to the terminal, keep the current signal
behaviour.

My best guess about what's changed is a newly exposed race.

diff --git a/Src/exec.c b/Src/exec.c
index 09ee132..4861ae5 100644
--- a/Src/exec.c
+++ b/Src/exec.c
@@ -1029,6 +1029,13 @@ entersubsh(int flags)
 		setpgrp(0L, jobtab[list_pipe_job].gleader);
 		if (!(flags & ESUB_ASYNC))
 		    attachtty(jobtab[thisjob].gleader);
+	    } else if (gettygrp() == GETPGRP()) {
+		/*
+		 * There are races where if the process is attached
+		 * to the terminal blocking SIGTTOU causes errors.
+		 * So just leaves signals alone.
+		 */
+		job_control_ok = 1;
 	    }
 	}
 	else if (!jobtab[thisjob].gleader ||

pws

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: job control debug
       [not found]         ` <20180907101852.62415ff9@camnpupstephen.cam.scsc.local>
@ 2018-09-07 11:21           ` Peter Stephenson
       [not found]           ` <20180907122145.2af5bcba@camnpupstephen.cam.scsc.local>
  1 sibling, 0 replies; 17+ messages in thread
From: Peter Stephenson @ 2018-09-07 11:21 UTC (permalink / raw)
  To: Zsh hackers list

On Fri, 7 Sep 2018 10:18:52 +0100
Peter Stephenson <p.stephenson@samsung.com> wrote:
> What do you think of the following?  If we are in list_pipe_job land, aka
> Rimmerworld, and we're attached to the terminal, keep the current signal
> behaviour.

No, don't believe this.  We get to this code way too often for this to
be useful.  (Why?  Is list_pipe_job being set too often or this how it's
supposed to work?)

> diff --git a/Src/exec.c b/Src/exec.c
> index 09ee132..4861ae5 100644
> --- a/Src/exec.c
> +++ b/Src/exec.c
> @@ -1029,6 +1029,13 @@ entersubsh(int flags)
>  		setpgrp(0L, jobtab[list_pipe_job].gleader);
>  		if (!(flags & ESUB_ASYNC))
>  		    attachtty(jobtab[thisjob].gleader);
> +	    } else if (gettygrp() == GETPGRP()) {
> +		/*
> +		 * There are races where if the process is attached
> +		 * to the terminal blocking SIGTTOU causes errors.
> +		 * So just leaves signals alone.
> +		 */
> +		job_control_ok = 1;
>  	    }
>  	}
>  	else if (!jobtab[thisjob].gleader ||

I'm seeing the terminal being attached to the first forked process which
is the group leader for the list pipe job.  So at this point  (the if
just above the patch) we're checking and finding that process is still
there and we're in the right process group, so everything is OK.

But it isn't OK.

I even tried moving this check after the point where we set the signal,
to see if there was a race with that process exiting after the check.
But that drew a blank, too --- it still thinks that process is there and
attached to the terminal and we're in the same process group.

But in that case we won't get SIGTTOU.  I can't see any further
setpgrp() that would put us in a different one, and I can't see any
attachtty() that would reassign the terminal.

Once we're in the process group and attached to the terminal, it
shouldn't matter if the other process exits.  This only gets confusing
if the shell has to decide who has grabbed the terminal from outside,
which shouldn't be a problem here --- it's just letting the 
synchronous processed manage themselves.

So I'm really confused.

Debug: I have pushed the job_control_debug branch.  Problems I was
seeing before were (i) because I was using an inconsistent ZLE library
that didn't instrument attachtty() (ii) because I wasn't flushing stderr
so I was seeing multiple occurrences of the same debug output.

pws

^ permalink raw reply	[flat|nested] 17+ messages in thread

* PATCH: Rimmerworld pipeline race
       [not found]           ` <20180907122145.2af5bcba@camnpupstephen.cam.scsc.local>
@ 2018-09-07 14:01             ` Peter Stephenson
       [not found]             ` <20180907150140.46a05880@camnpupstephen.cam.scsc.local>
  1 sibling, 0 replies; 17+ messages in thread
From: Peter Stephenson @ 2018-09-07 14:01 UTC (permalink / raw)
  To: Zsh hackers list

OK, this is *much* closer and possibly correct.

When adding a new process to the list in the main shell, we looked to
see if the group leader for that job was empty and if so marked the new
process as the group leader.

This conflicts with entersubsh(), where there are hairy list-pipe tests
to determine what will actually be used as the process group leader
(look in the block controlled by the ESUB_PGRP test --- that's where I
was fiddling in the last patch, but this bit isn't the one that's
wrong).

Most of the time this isn't a problem, but if we decide in the main
shell that that process exited while that group was in control of the terminal,
the main shell will think it's time for it to reclaim the terminal.
Whatever happens to be running in the foreground will then find it's
been pushed into the background.

This showed up in this case

  echo help | { cat | more }

because

- The echo had set the process group in a list_pipe_job fashion.

- The cat was in that process group and the bogus logic made the parent
shell think it had its own process group.

- The more put itself in the same process group, too.

- The cat exited.  The following logic

		if (WIFEXITED(status) &&
		    pn->pid == jn->gleader &&
		    killpg(pn->pid, 0) == -1) {
		    jn->gleader = 0;
		    if (!(jn->stat & STAT_NOSTTY)) {
			/*
			 * This PID was in control of the terminal;
			 * reclaim terminal now it has exited.
			 * It's still possible some future forked
			 * process of this job will become group
			 * leader, however.
			 */
			attachtty(mypgrp);
		    }
		}

  then triggered, grabbing back the terminal.  The more found itself
  without a terminal.

The fix is to second-guess the test the process itself makes when
setting the group leader and to add some clear comments.  Then
jn->gleader in the hunk above correctly reflects the actual foreground
process group, so the shell doesn't reclaim the terminal yet.

Neither of the two related bits, in entersubsh() or addproc(), is new,
increasing my suspicion that the problem isn't fundamentally new but a
newly exposed race.

It's hard to think how fixing this can make things worse (although I
always say this).  How about committing it, making a new test build,
encouraging EVERYONE to try it out, and seeing if that is releasable?

pws


diff --git a/Src/exec.c b/Src/exec.c
index 09ee132..8062f0c 100644
--- a/Src/exec.c
+++ b/Src/exec.c
@@ -994,6 +994,31 @@ enum {
     ESUB_JOB_CONTROL = 0x40
 };
 
+
+/*
+ * If the list_pipe magic is in effect, its process group controls
+ * the terminal: return this.  Otherwise, return -1.
+ *
+ * C.f. the test in entersubsh() for process group of forked subshell.
+ */
+
+/**/
+int
+get_list_pipe_process_group(void)
+{
+    int gleader;
+    if (!list_pipe && !list_pipe_child)
+	return -1;
+    gleader = jobtab[list_pipe_job].gleader;
+
+    if (gleader &&
+	killpg(gleader, 0) != -1 &&
+	gettygrp() == gleader)
+	return gleader;
+
+    return -1;
+}
+
 /**/
 static void
 entersubsh(int flags)
diff --git a/Src/jobs.c b/Src/jobs.c
index 38b3d89..c841150 100644
--- a/Src/jobs.c
+++ b/Src/jobs.c
@@ -1392,10 +1392,21 @@ addproc(pid_t pid, char *text, int aux, struct timeval *bgtime)
     if (!aux)
     {
 	pn->bgtime = *bgtime;
-	/* if this is the first process we are adding to *
-	 * the job, then it's the group leader.          */
-	if (!jobtab[thisjob].gleader)
-	    jobtab[thisjob].gleader = pid;
+	/*
+	 * If this is the first process we are adding to
+	 * the job, then it's the group leader.
+	 *
+	 * Exception: if the list_pipe_job pipeline madness is in
+	 * effect, we need to use the process group that's already
+	 * controlling the terminal or we'll attach to something
+	 * bogus when that process exits.  This test mimics the
+	 * test the process itself makes in entersubsh(), so the
+	 * two should be kept in sync.
+	 */
+	if (!jobtab[thisjob].gleader) {
+	    int gleader = get_list_pipe_process_group();
+	    jobtab[thisjob].gleader = (gleader == -1) ? pid : gleader;
+	}
 	/* attach this process to end of process list of current job */
 	pnlist = &jobtab[thisjob].procs;
     }

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: Rimmerworld pipeline race
       [not found]             ` <20180907150140.46a05880@camnpupstephen.cam.scsc.local>
@ 2018-09-07 14:58               ` Peter Stephenson
  2018-09-07 16:46                 ` Peter Stephenson
  2018-09-10 22:55                 ` Axel Beckert
  0 siblings, 2 replies; 17+ messages in thread
From: Peter Stephenson @ 2018-09-07 14:58 UTC (permalink / raw)
  To: Zsh hackers list

On Fri, 7 Sep 2018 15:01:40 +0100
Peter Stephenson <p.stephenson@samsung.com> wrote:
> OK, this is *much* closer and possibly correct.

Here's a more sophisticated fix, though I'll take votes on which to use
for now.

Use the existing subshell syncronisation mechanism to return the
group leader actually used by the subshell, if it picks one.  No second
guessing involved --- just argument passing and a read/write between
the processes that was already there but used just to terminate in
an error when the write side was closed.

pws


diff --git a/Src/exec.c b/Src/exec.c
index 09ee132..074265f 100644
--- a/Src/exec.c
+++ b/Src/exec.c
@@ -994,9 +994,15 @@ enum {
     ESUB_JOB_CONTROL = 0x40
 };
 
+/*
+ * gleaderp may be NULL.  Otherwise, *gleaderp is set to point to the
+ * group leader of the job of the new process if this is assigned.  Else
+ * it is left alone: it is initialised to -1.
+ */
+
 /**/
 static void
-entersubsh(int flags)
+entersubsh(int flags, int *gleaderp)
 {
     int i, sig, monitor, job_control_ok;
 
@@ -1030,6 +1036,8 @@ entersubsh(int flags)
 		if (!(flags & ESUB_ASYNC))
 		    attachtty(jobtab[thisjob].gleader);
 	    }
+	    if (gleaderp)
+		*gleaderp = jobtab[list_pipe_job].gleader;
 	}
 	else if (!jobtab[thisjob].gleader ||
 		 setpgrp(0L, jobtab[thisjob].gleader) == -1) {
@@ -1050,6 +1058,8 @@ entersubsh(int flags)
 	    setpgrp(0L, jobtab[thisjob].gleader);
 	    if (!(flags & ESUB_ASYNC))
 		attachtty(jobtab[thisjob].gleader);
+	    if (gleaderp)
+		*gleaderp = jobtab[thisjob].gleader;
 	}
     }
     if (!(flags & ESUB_FAKE))
@@ -1682,7 +1692,7 @@ execpline(Estate state, wordcode slcode, int how, int last1)
 		    curjob = newjob;
 		    DPUTS(!list_pipe_pid, "invalid list_pipe_pid");
 		    addproc(list_pipe_pid, list_pipe_text, 0,
-			    &list_pipe_start);
+			    &list_pipe_start, -1);
 
 		    /* If the super-job contains only the sub-shell, the
 		       sub-shell is the group leader. */
@@ -1818,7 +1828,7 @@ execpline(Estate state, wordcode slcode, int how, int last1)
 		    }
 		    else {
 			close(synch[0]);
-			entersubsh(ESUB_ASYNC);
+			entersubsh(ESUB_ASYNC, NULL);
 			/*
 			 * At this point, we used to attach this process
 			 * to the process group of list_pipe_job (the
@@ -2175,7 +2185,7 @@ closemn(struct multio **mfds, int fd, int type)
 	    }
 	    mn->ct = 1;
 	    mn->fds[0] = fd;
-	    addproc(pid, NULL, 1, &bgtime);
+	    addproc(pid, NULL, 1, &bgtime, -1);
 	    child_unblock();
 	    return;
 	}
@@ -2676,7 +2686,7 @@ execcmd_fork(Estate state, int how, int type, Wordcode varspc,
 {
     pid_t pid;
     int synch[2], flags;
-    char dummy;
+    int gleader = -1;
     struct timeval bgtime;
 
     child_block();
@@ -2693,7 +2703,7 @@ execcmd_fork(Estate state, int how, int type, Wordcode varspc,
     }
     if (pid) {
 	close(synch[1]);
-	read_loop(synch[0], &dummy, 1);
+	read_loop(synch[0], (char *)&gleader, sizeof(gleader));
 	close(synch[0]);
 	if (how & Z_ASYNC) {
 	    lastpid = (zlong) pid;
@@ -2711,7 +2721,7 @@ execcmd_fork(Estate state, int how, int type, Wordcode varspc,
 		      3 : WC_ASSIGN_NUM(ac) + 2);
 	    }
 	}
-	addproc(pid, text, 0, &bgtime);
+	addproc(pid, text, 0, &bgtime, gleader);
 	if (oautocont >= 0)
 	    opts[AUTOCONTINUE] = oautocont;
 	pipecleanfilelist(jobtab[thisjob].filelist, 1);
@@ -2726,7 +2736,8 @@ execcmd_fork(Estate state, int how, int type, Wordcode varspc,
     if (type == WC_SUBSH && !(how & Z_ASYNC))
 	flags |= ESUB_JOB_CONTROL;
     *filelistp = jobtab[thisjob].filelist;
-    entersubsh(flags);
+    entersubsh(flags, &gleader);
+    write(synch[1], &gleader, sizeof(gleader));
     close(synch[1]);
     zclose(close_if_forked);
 
@@ -3842,7 +3853,7 @@ execcmd_exec(Estate state, Execcmd_params eparams,
 	    if ((do_exec || (type >= WC_CURSH && last1 == 1))
 		&& !forked)
 		flags |= ESUB_REVERTPGRP;
-	    entersubsh(flags);
+	    entersubsh(flags, NULL);
 	}
 	if (type == WC_FUNCDEF) {
 	    Eprog redir_prog;
@@ -4604,7 +4615,7 @@ getoutput(char *cmd, int qt)
     child_unblock();
     zclose(pipes[0]);
     redup(pipes[1], 1);
-    entersubsh(ESUB_PGRP|ESUB_NOMONITOR);
+    entersubsh(ESUB_PGRP|ESUB_NOMONITOR, NULL);
     cmdpush(CS_CMDSUBST);
     execode(prog, 0, 1, "cmdsubst");
     cmdpop();
@@ -4798,7 +4809,7 @@ getoutputfile(char *cmd, char **eptr)
 
     /* pid == 0 */
     redup(fd, 1);
-    entersubsh(ESUB_PGRP|ESUB_NOMONITOR);
+    entersubsh(ESUB_PGRP|ESUB_NOMONITOR, NULL);
     cmdpush(CS_CMDSUBST);
     execode(prog, 0, 1, "equalsubst");
     cmdpop();
@@ -4865,7 +4876,7 @@ getproc(char *cmd, char **eptr)
 	if (pid == -1)
 	    return NULL;
 	if (!out)
-	    addproc(pid, NULL, 1, &bgtime);
+	    addproc(pid, NULL, 1, &bgtime, -1);
 	procsubstpid = pid;
 	return pnam;
     }
@@ -4875,7 +4886,7 @@ getproc(char *cmd, char **eptr)
 	zerr("can't open %s: %e", pnam, errno);
 	_exit(1);
     }
-    entersubsh(ESUB_ASYNC|ESUB_PGRP);
+    entersubsh(ESUB_ASYNC|ESUB_PGRP, NULL);
     redup(fd, out);
 #else /* PATH_DEV_FD */
     int pipes[2], fd;
@@ -4902,12 +4913,12 @@ getproc(char *cmd, char **eptr)
 	addfilelist(NULL, fd);
 	if (!out)
 	{
-	    addproc(pid, NULL, 1, &bgtime);
+	    addproc(pid, NULL, 1, &bgtime, -1);
 	}
 	procsubstpid = pid;
 	return pnam;
     }
-    entersubsh(ESUB_ASYNC|ESUB_PGRP);
+    entersubsh(ESUB_ASYNC|ESUB_PGRP, NULL);
     redup(pipes[out], out);
     closem(FDT_UNUSED, 0);   /* this closes pipes[!out] as well */
 #endif /* PATH_DEV_FD */
@@ -4954,11 +4965,11 @@ getpipe(char *cmd, int nullexec)
 	    return -1;
 	}
 	if (!nullexec)
-	    addproc(pid, NULL, 1, &bgtime);
+	    addproc(pid, NULL, 1, &bgtime, -1);
 	procsubstpid = pid;
 	return pipes[!out];
     }
-    entersubsh(ESUB_PGRP);
+    entersubsh(ESUB_PGRP, NULL);
     redup(pipes[out], out);
     closem(FDT_UNUSED, 0);	/* this closes pipes[!out] as well */
     cmdpush(CS_CMDSUBST);
diff --git a/Src/jobs.c b/Src/jobs.c
index 38b3d89..ba87a17 100644
--- a/Src/jobs.c
+++ b/Src/jobs.c
@@ -1375,7 +1375,7 @@ deletejob(Job jn, int disowning)
 
 /**/
 void
-addproc(pid_t pid, char *text, int aux, struct timeval *bgtime)
+addproc(pid_t pid, char *text, int aux, struct timeval *bgtime, int gleader)
 {
     Process pn, *pnlist;
 
@@ -1392,10 +1392,15 @@ addproc(pid_t pid, char *text, int aux, struct timeval *bgtime)
     if (!aux)
     {
 	pn->bgtime = *bgtime;
-	/* if this is the first process we are adding to *
-	 * the job, then it's the group leader.          */
+	/*
+	 * if this is the first process we are adding to
+	 * the job, then it's the group leader.
+	 *
+	 * Exception: if the forked subshell reported its own group
+	 * leader, set that.
+	 */
 	if (!jobtab[thisjob].gleader)
-	    jobtab[thisjob].gleader = pid;
+	    jobtab[thisjob].gleader = (gleader != -1) ? gleader : pid;
 	/* attach this process to end of process list of current job */
 	pnlist = &jobtab[thisjob].procs;
     }

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: Rimmerworld pipeline race
  2018-09-07 14:58               ` Peter Stephenson
@ 2018-09-07 16:46                 ` Peter Stephenson
  2018-09-07 23:40                   ` Bart Schaefer
  2018-09-10 22:55                 ` Axel Beckert
  1 sibling, 1 reply; 17+ messages in thread
From: Peter Stephenson @ 2018-09-07 16:46 UTC (permalink / raw)
  To: Peter Stephenson, Zsh hackers list

> On 07 September 2018 at 15:58 Peter Stephenson <p.stephenson@samsung.com> wrote:
> 
> 
> On Fri, 7 Sep 2018 15:01:40 +0100
> Peter Stephenson <p.stephenson@samsung.com> wrote:
> > OK, this is *much* closer and possibly correct.
> 
> Here's a more sophisticated fix, though I'll take votes on which to use
> for now.

(Don't all shout at once.)

Actually, I can't see a good reason why I shouldn't simply commit this.  It's much
the neatest way of keeping the states aligned.  I'll do that.

pws

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: Rimmerworld pipeline race
  2018-09-07 16:46                 ` Peter Stephenson
@ 2018-09-07 23:40                   ` Bart Schaefer
  2018-09-08 17:37                     ` Peter Stephenson
  0 siblings, 1 reply; 17+ messages in thread
From: Bart Schaefer @ 2018-09-07 23:40 UTC (permalink / raw)
  To: Peter Stephenson; +Cc: Peter Stephenson, Zsh hackers list

On Fri, Sep 7, 2018 at 9:46 AM, Peter Stephenson
<p.w.stephenson@ntlworld.com> wrote:
>>
>> Here's a more sophisticated fix, though I'll take votes on which to use
>> for now.
>
> (Don't all shout at once.)

Timezone difference and mornings are $REALJOB busy for me, so I
couldn't answer right away.

> Actually, I can't see a good reason why I shouldn't simply commit this.  It's much
> the neatest way of keeping the states aligned.  I'll do that.

I was going to ask whether this replaces or adds to the previous
patch, but either way, yes, do this.

Is there a point to replacing &dummy with &gleader in the call to
read_loop(), other than to save declaring "int dummy"?  It doesn't
appear that read_loop() puts returns anything useful via &gleader.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: Rimmerworld pipeline race
  2018-09-07 23:40                   ` Bart Schaefer
@ 2018-09-08 17:37                     ` Peter Stephenson
  2018-09-08 18:40                       ` Peter Stephenson
  0 siblings, 1 reply; 17+ messages in thread
From: Peter Stephenson @ 2018-09-08 17:37 UTC (permalink / raw)
  To: Zsh hackers list

On Fri, 7 Sep 2018 16:40:11 -0700
Bart Schaefer <schaefer@brasslantern.com> wrote:
> Timezone difference and mornings are $REALJOB busy for me, so I
> couldn't answer right away.

Sure, was just hoping *someone* was at least vaguely interested in
getting the bug fixed.

> Is there a point to replacing &dummy with &gleader in the call to
> read_loop(), other than to save declaring "int dummy"?  It doesn't
> appear that read_loop() puts returns anything useful via &gleader.

It gets passed into addproc().  That's how the parent shell knows for
sure which group leader the subshell has picked without any second
guessing.

pws

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: Rimmerworld pipeline race
  2018-09-08 17:37                     ` Peter Stephenson
@ 2018-09-08 18:40                       ` Peter Stephenson
  0 siblings, 0 replies; 17+ messages in thread
From: Peter Stephenson @ 2018-09-08 18:40 UTC (permalink / raw)
  To: Zsh hackers list

Here's a regression test that seems to work, though the job control
tests are rather brittle.  Not needed for 5.6.1 as I'm happy this is
currently working.

pws


diff --git a/Test/W02jobs.ztst b/Test/W02jobs.ztst
index 1e7ac76c6..98c9f82be 100644
--- a/Test/W02jobs.ztst
+++ b/Test/W02jobs.ztst
@@ -191,6 +191,14 @@
 *>\[2]  ? interrupt*sleep*
 *>\[1]  ? kill*sleep*
 
+  # Use the shell itself to get interactive output to a terminal.
+  # It's bizarrely difficult to find a standard programme to do this.
+  zpty_start
+  zpty_input "echo | { : | $ZTST_testdir/../Src/zsh -ifc 'print Working'; }"
+  zpty_stop	
+0:Regression test for race with process group setting in nested pipeline
+>Working
+
 %clean
 
   zmodload -ui zsh/zpty

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: Rimmerworld pipeline race
  2018-09-07 14:58               ` Peter Stephenson
  2018-09-07 16:46                 ` Peter Stephenson
@ 2018-09-10 22:55                 ` Axel Beckert
  2018-09-11  8:58                   ` Peter Stephenson
  1 sibling, 1 reply; 17+ messages in thread
From: Axel Beckert @ 2018-09-10 22:55 UTC (permalink / raw)
  To: zsh-workers

Hi,

On Fri, Sep 07, 2018 at 03:58:26PM +0100, Peter Stephenson wrote:
> On Fri, 7 Sep 2018 15:01:40 +0100 Peter Stephenson <p.stephenson@samsung.com> wrote:
> > OK, this is *much* closer and possibly correct.
> 
> Here's a more sophisticated fix, though I'll take votes on which to use
> for now.

Does this fix Vincent Lefevre's problem discussed in the "zsh 5.6
regression: a pipe sometimes yields a TTOU signal" thread starting at
zsh-workers 43379?

		Kind regards, Axel
-- 
PGP: 2FF9CD59612616B5      /~\  Plain Text Ribbon Campaign, http://arc.pasp.de/
Mail: abe@deuxchevaux.org  \ /  Say No to HTML in E-Mail and Usenet
Mail+Jabber: abe@noone.org  X
https://axel.beckert.ch/   / \  I love long mails: https://email.is-not-s.ms/

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: Rimmerworld pipeline race
  2018-09-10 22:55                 ` Axel Beckert
@ 2018-09-11  8:58                   ` Peter Stephenson
  2018-09-11 14:40                     ` Vincent Lefevre
  0 siblings, 1 reply; 17+ messages in thread
From: Peter Stephenson @ 2018-09-11  8:58 UTC (permalink / raw)
  To: zsh-workers

On Tue, 11 Sep 2018 00:55:30 +0200
Axel Beckert <abe@deuxchevaux.org> wrote:
> On Fri, Sep 07, 2018 at 03:58:26PM +0100, Peter Stephenson wrote:
> > On Fri, 7 Sep 2018 15:01:40 +0100 Peter Stephenson <p.stephenson@samsung.com> wrote:  
> > > OK, this is *much* closer and possibly correct.  
> > 
> > Here's a more sophisticated fix, though I'll take votes on which to use
> > for now.  
> 
> Does this fix Vincent Lefevre's problem discussed in the "zsh 5.6
> regression: a pipe sometimes yields a TTOU signal" thread starting at
> zsh-workers 43379?

Yes, it's that problem.  Probably a newly exposed race rather than
a regression as this is the first time the code has been joined up
like this.

pws

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: Rimmerworld pipeline race
  2018-09-11  8:58                   ` Peter Stephenson
@ 2018-09-11 14:40                     ` Vincent Lefevre
  2018-09-11 15:03                       ` Peter Stephenson
       [not found]                       ` <20180911160326.33dfd575@camnpupstephen.cam.scsc.local>
  0 siblings, 2 replies; 17+ messages in thread
From: Vincent Lefevre @ 2018-09-11 14:40 UTC (permalink / raw)
  To: zsh-workers

On 2018-09-11 09:58:25 +0100, Peter Stephenson wrote:
> On Tue, 11 Sep 2018 00:55:30 +0200
> Axel Beckert <abe@deuxchevaux.org> wrote:
> > Does this fix Vincent Lefevre's problem discussed in the "zsh 5.6
> > regression: a pipe sometimes yields a TTOU signal" thread starting at
> > zsh-workers 43379?
> 
> Yes, it's that problem.  Probably a newly exposed race rather than
> a regression as this is the first time the code has been joined up
> like this.

Apparently, it is not entirely fixed, or perhaps this is another
problem:

cventin% for i in 1 2 3; do : | { : | tput init } done
zsh: suspended (tty output)  for i in 1 2 3; do; : | { : | tput init; }; done

-- 
Vincent Lefèvre <vincent@vinc17.net> - Web: <https://www.vinc17.net/>
100% accessible validated (X)HTML - Blog: <https://www.vinc17.net/blog/>
Work: CR INRIA - computer arithmetic / AriC project (LIP, ENS-Lyon)

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: Rimmerworld pipeline race
  2018-09-11 14:40                     ` Vincent Lefevre
@ 2018-09-11 15:03                       ` Peter Stephenson
       [not found]                       ` <20180911160326.33dfd575@camnpupstephen.cam.scsc.local>
  1 sibling, 0 replies; 17+ messages in thread
From: Peter Stephenson @ 2018-09-11 15:03 UTC (permalink / raw)
  To: zsh-workers

On Tue, 11 Sep 2018 16:40:46 +0200
Vincent Lefevre <vincent@vinc17.net> wrote:
> Apparently, it is not entirely fixed, or perhaps this is another
> problem:
> 
> cventin% for i in 1 2 3; do : | { : | tput init } done
> zsh: suspended (tty output)  for i in 1 2 3; do; : | { : | tput init; }; done

You can get it with any outer current-shell construct.

{ : | { : | more } }

Looks a bit different to me.  Not happening in the instrumented build
on the job_control_debug branch, either, so even harder to debug.

pws

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: Rimmerworld pipeline race
       [not found]                       ` <20180911160326.33dfd575@camnpupstephen.cam.scsc.local>
@ 2018-09-11 15:32                         ` Peter Stephenson
  0 siblings, 0 replies; 17+ messages in thread
From: Peter Stephenson @ 2018-09-11 15:32 UTC (permalink / raw)
  To: zsh-workers

On Tue, 11 Sep 2018 16:03:26 +0100
Peter Stephenson <p.stephenson@samsung.com> wrote:

> On Tue, 11 Sep 2018 16:40:46 +0200
> Vincent Lefevre <vincent@vinc17.net> wrote:
> > Apparently, it is not entirely fixed, or perhaps this is another
> > problem:
> > 
> > cventin% for i in 1 2 3; do : | { : | tput init } done
> > zsh: suspended (tty output)  for i in 1 2 3; do; : | { : | tput init; }; done  
> 
> You can get it with any outer current-shell construct.
> 
> { : | { : | more } }
> 
> Looks a bit different to me.  Not happening in the instrumented build
> on the job_control_debug branch, either, so even harder to debug.

OK, suppose we also pass back information about list_pipe_job itself,
not just the group leader?  This seems to work for me.

I'm in two minds whether to pass the whole structure into addproc(): it
seems to be abusing the interface.

pws

diff --git a/Src/exec.c b/Src/exec.c
index 074265f..b9af9ea 100644
--- a/Src/exec.c
+++ b/Src/exec.c
@@ -1002,7 +1002,7 @@ enum {
 
 /**/
 static void
-entersubsh(int flags, int *gleaderp)
+entersubsh(int flags, struct entersubsh_ret *retp)
 {
     int i, sig, monitor, job_control_ok;
 
@@ -1036,8 +1036,10 @@ entersubsh(int flags, int *gleaderp)
 		if (!(flags & ESUB_ASYNC))
 		    attachtty(jobtab[thisjob].gleader);
 	    }
-	    if (gleaderp)
-		*gleaderp = jobtab[list_pipe_job].gleader;
+	    if (retp) {
+		retp->gleader = jobtab[list_pipe_job].gleader;
+		retp->list_pipe_job = list_pipe_job;
+	    }
 	}
 	else if (!jobtab[thisjob].gleader ||
 		 setpgrp(0L, jobtab[thisjob].gleader) == -1) {
@@ -1058,8 +1060,11 @@ entersubsh(int flags, int *gleaderp)
 	    setpgrp(0L, jobtab[thisjob].gleader);
 	    if (!(flags & ESUB_ASYNC))
 		attachtty(jobtab[thisjob].gleader);
-	    if (gleaderp)
-		*gleaderp = jobtab[thisjob].gleader;
+	    if (retp) {
+		retp->gleader = jobtab[thisjob].gleader;
+		if (list_pipe_job != thisjob)
+		    retp->list_pipe_job = list_pipe_job;
+	    }
 	}
     }
     if (!(flags & ESUB_FAKE))
@@ -1692,7 +1697,7 @@ execpline(Estate state, wordcode slcode, int how, int last1)
 		    curjob = newjob;
 		    DPUTS(!list_pipe_pid, "invalid list_pipe_pid");
 		    addproc(list_pipe_pid, list_pipe_text, 0,
-			    &list_pipe_start, -1);
+			    &list_pipe_start, -1, -1);
 
 		    /* If the super-job contains only the sub-shell, the
 		       sub-shell is the group leader. */
@@ -2185,7 +2190,7 @@ closemn(struct multio **mfds, int fd, int type)
 	    }
 	    mn->ct = 1;
 	    mn->fds[0] = fd;
-	    addproc(pid, NULL, 1, &bgtime, -1);
+	    addproc(pid, NULL, 1, &bgtime, -1, -1);
 	    child_unblock();
 	    return;
 	}
@@ -2686,10 +2691,12 @@ execcmd_fork(Estate state, int how, int type, Wordcode varspc,
 {
     pid_t pid;
     int synch[2], flags;
-    int gleader = -1;
+    struct entersubsh_ret esret;
     struct timeval bgtime;
 
     child_block();
+    esret.gleader = -1;
+    esret.list_pipe_job = -1;
 
     if (pipe(synch) < 0) {
 	zerr("pipe failed: %e", errno);
@@ -2703,7 +2710,7 @@ execcmd_fork(Estate state, int how, int type, Wordcode varspc,
     }
     if (pid) {
 	close(synch[1]);
-	read_loop(synch[0], (char *)&gleader, sizeof(gleader));
+	read_loop(synch[0], (char *)&esret, sizeof(esret));
 	close(synch[0]);
 	if (how & Z_ASYNC) {
 	    lastpid = (zlong) pid;
@@ -2721,7 +2728,7 @@ execcmd_fork(Estate state, int how, int type, Wordcode varspc,
 		      3 : WC_ASSIGN_NUM(ac) + 2);
 	    }
 	}
-	addproc(pid, text, 0, &bgtime, gleader);
+	addproc(pid, text, 0, &bgtime, esret.gleader, esret.list_pipe_job);
 	if (oautocont >= 0)
 	    opts[AUTOCONTINUE] = oautocont;
 	pipecleanfilelist(jobtab[thisjob].filelist, 1);
@@ -2736,8 +2743,8 @@ execcmd_fork(Estate state, int how, int type, Wordcode varspc,
     if (type == WC_SUBSH && !(how & Z_ASYNC))
 	flags |= ESUB_JOB_CONTROL;
     *filelistp = jobtab[thisjob].filelist;
-    entersubsh(flags, &gleader);
-    write(synch[1], &gleader, sizeof(gleader));
+    entersubsh(flags, &esret);
+    write(synch[1], &esret, sizeof(esret));
     close(synch[1]);
     zclose(close_if_forked);
 
@@ -4876,7 +4883,7 @@ getproc(char *cmd, char **eptr)
 	if (pid == -1)
 	    return NULL;
 	if (!out)
-	    addproc(pid, NULL, 1, &bgtime, -1);
+	    addproc(pid, NULL, 1, &bgtime, -1, -1);
 	procsubstpid = pid;
 	return pnam;
     }
@@ -4913,7 +4920,7 @@ getproc(char *cmd, char **eptr)
 	addfilelist(NULL, fd);
 	if (!out)
 	{
-	    addproc(pid, NULL, 1, &bgtime, -1);
+	    addproc(pid, NULL, 1, &bgtime, -1, -1);
 	}
 	procsubstpid = pid;
 	return pnam;
@@ -4965,7 +4972,7 @@ getpipe(char *cmd, int nullexec)
 	    return -1;
 	}
 	if (!nullexec)
-	    addproc(pid, NULL, 1, &bgtime, -1);
+	    addproc(pid, NULL, 1, &bgtime, -1, -1);
 	procsubstpid = pid;
 	return pipes[!out];
     }
diff --git a/Src/jobs.c b/Src/jobs.c
index ba87a17..db2e87e 100644
--- a/Src/jobs.c
+++ b/Src/jobs.c
@@ -1375,7 +1375,8 @@ deletejob(Job jn, int disowning)
 
 /**/
 void
-addproc(pid_t pid, char *text, int aux, struct timeval *bgtime, int gleader)
+addproc(pid_t pid, char *text, int aux, struct timeval *bgtime,
+	int gleader, int list_pipe_job_used)
 {
     Process pn, *pnlist;
 
@@ -1397,10 +1398,15 @@ addproc(pid_t pid, char *text, int aux, struct timeval *bgtime, int gleader)
 	 * the job, then it's the group leader.
 	 *
 	 * Exception: if the forked subshell reported its own group
-	 * leader, set that.
+	 * leader, set that.  If it reported the use of list_pipe_job,
+	 * set it for that, too.
 	 */
-	if (!jobtab[thisjob].gleader)
-	    jobtab[thisjob].gleader = (gleader != -1) ? gleader : pid;
+	if (gleader != -1) {
+	    jobtab[thisjob].gleader = gleader;
+	    if (list_pipe_job_used != -1)
+		jobtab[list_pipe_job_used].gleader = gleader;
+	} else if (!jobtab[thisjob].gleader)
+		jobtab[thisjob].gleader = pid;
 	/* attach this process to end of process list of current job */
 	pnlist = &jobtab[thisjob].procs;
     }
diff --git a/Src/zsh.h b/Src/zsh.h
index 8e7f20b..b81db15 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -505,6 +505,14 @@ enum {
     ZCONTEXT_PARSE      = (1<<2)
 };
 
+/* Report from entersubsh() to pass subshell info to addproc */
+struct entersubsh_ret {
+    /* Process group leader chosen by subshell, else -1 */
+    int gleader;
+    /* list_pipe_job setting used by subshell, else -1 */
+    int list_pipe_job;
+};
+
 /**************************/
 /* Abstract types for zsh */
 /**************************/

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2018-09-11 15:32 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <CGME20180905200816epcas5p18ce6c49baa637e7d83a769e97c4364fb@epcas5p1.samsung.com>
2018-09-05 20:07 ` PATCH: job control debug Peter Stephenson
2018-09-06  8:09   ` Peter Stephenson
     [not found]   ` <20180906090902.1f344e9f@camnpupstephen.cam.scsc.local>
2018-09-06  9:22     ` Peter Stephenson
2018-09-07  3:18       ` Bart Schaefer
2018-09-07  9:18         ` Peter Stephenson
     [not found]         ` <20180907101852.62415ff9@camnpupstephen.cam.scsc.local>
2018-09-07 11:21           ` Peter Stephenson
     [not found]           ` <20180907122145.2af5bcba@camnpupstephen.cam.scsc.local>
2018-09-07 14:01             ` PATCH: Rimmerworld pipeline race Peter Stephenson
     [not found]             ` <20180907150140.46a05880@camnpupstephen.cam.scsc.local>
2018-09-07 14:58               ` Peter Stephenson
2018-09-07 16:46                 ` Peter Stephenson
2018-09-07 23:40                   ` Bart Schaefer
2018-09-08 17:37                     ` Peter Stephenson
2018-09-08 18:40                       ` Peter Stephenson
2018-09-10 22:55                 ` Axel Beckert
2018-09-11  8:58                   ` Peter Stephenson
2018-09-11 14:40                     ` Vincent Lefevre
2018-09-11 15:03                       ` Peter Stephenson
     [not found]                       ` <20180911160326.33dfd575@camnpupstephen.cam.scsc.local>
2018-09-11 15:32                         ` Peter Stephenson

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).