PATCH: print -S uses lexical history

zsh-workers
 help / color / mirror / code / Atom feed

* PATCH: print -S uses lexical history
@ 2011-05-31 20:37 Peter Stephenson
  2011-06-01  6:24 ` Bart Schaefer
  0 siblings, 1 reply; 3+ messages in thread
From: Peter Stephenson @ 2011-05-31 20:37 UTC (permalink / raw)
  To: Zsh hackers list

As we've had all the pain of the HIST_LEX_WORDS option, which almost no
one is using, to split a history line into words using the lexical
analyser, it's worth making this available directly using a relative of
"print -s" imaginatively called "print -S".  This just takes a single
argument which will be split in the same way as if read in from a
history file with HIST_LEX_WORDS on.  This doesn't give you very much
beyond print -s with a (z)plit parameter, but as the code's already
there it's worth having.

I've split the core code out into a new function, histsplitwords().

Index: Doc/Zsh/builtins.yo
===================================================================
RCS file: /cvsroot/zsh/zsh/Doc/Zsh/builtins.yo,v
retrieving revision 1.135
diff -p -u -r1.135 builtins.yo
--- Doc/Zsh/builtins.yo	19 May 2011 16:10:47 -0000	1.135
+++ Doc/Zsh/builtins.yo	31 May 2011 20:06:27 -0000
@@ -914,7 +914,7 @@ and the new directory stack is not print
 tt(popd) that do not change the environment seen by an interactive user.
 )
 findex(print)
-xitem(tt(print) [ tt(-abcDilmnNoOpPrsz) ] [ tt(-u) var(n) ] [ tt(-f) var(format) ] [ tt(-C) var(cols) ])
+xitem(tt(print) [ tt(-abcDilmnNoOpPrsSz) ] [ tt(-u) var(n) ] [ tt(-f) var(format) ] [ tt(-C) var(cols) ])
 item(  [ tt(-R) [ tt(-en) ]] [ var(arg) ... ])(
 With the `tt(-f)' option the arguments are printed as described by tt(printf).
 With no flags or with the flag `tt(-)', the arguments are printed on
@@ -994,6 +994,14 @@ tt(-R); all other arguments and options 
 )
 item(tt(-s))(
 Place the results in the history list instead of on the standard output.
+Each argument to the tt(print) command is treated as a single word in the
+history, regardless of its content.
+)
+item(tt(-S))(
+Place the results in the history list instead of on the standard output.
+In this case only a single argument is allowed; it will be split into
+words by analysing the line as if it had been read from the history
+file with the tt(HIST_LEX_WORDS) option.
 )
 item(tt(-u) var(n))(
 Print the arguments to file descriptor var(n).
Index: Src/builtin.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/builtin.c,v
retrieving revision 1.251
diff -p -u -r1.251 builtin.c
--- Src/builtin.c	9 May 2011 09:49:09 -0000	1.251
+++ Src/builtin.c	31 May 2011 20:06:27 -0000
@@ -99,7 +99,7 @@ static struct builtin builtins[] =
 #endif
 
     BUILTIN("popd", BINF_SKIPINVALID | BINF_SKIPDASH | BINF_DASHDASHVALID, bin_cd, 0, 1, BIN_POPD, "q", NULL),
-    BUILTIN("print", BINF_PRINTOPTS, bin_print, 0, -1, BIN_PRINT, "abcC:Df:ilmnNoOpPrRsu:z-", NULL),
+    BUILTIN("print", BINF_PRINTOPTS, bin_print, 0, -1, BIN_PRINT, "abcC:Df:ilmnNoOpPrRsSu:z-", NULL),
     BUILTIN("printf", 0, bin_print, 1, -1, BIN_PRINTF, NULL, NULL),
     BUILTIN("pushd", BINF_SKIPINVALID | BINF_SKIPDASH | BINF_DASHDASHVALID, bin_cd, 0, 2, BIN_PUSHD, "qsPL", NULL),
     BUILTIN("pushln", 0, bin_print, 0, -1, BIN_PRINT, NULL, "-nz"),
@@ -3965,25 +3965,45 @@ bin_print(char *name, char **args, Optio
 	    return 0;
 	}
 	/* -s option -- add the arguments to the history list */
-	if (OPT_ISSET(ops,'s')) {
+	if (OPT_ISSET(ops,'s') || OPT_ISSET(ops,'S')) {
 	    int nwords = 0, nlen, iwords;
 	    char **pargs = args;
 
 	    queue_signals();
-	    ent = prepnexthistent();
 	    while (*pargs++)
 		nwords++;
-	    if ((ent->nwords = nwords)) {
-		ent->words = (short *)zalloc(nwords*2*sizeof(short));
-		nlen = iwords = 0;
-		for (pargs = args; *pargs; pargs++) {
-		    ent->words[iwords++] = nlen;
-		    nlen += strlen(*pargs);
-		    ent->words[iwords++] = nlen;
-		    nlen++;
+	    if (nwords) {
+		if (OPT_ISSET(ops,'S')) {
+		    int wordsize;
+		    short *words;
+		    if (nwords > 1) {
+			zwarnnam(name, "option -S takes a single argument");
+			return 1;
+		    }
+		    words = NULL;
+		    wordsize = 0;
+		    histsplitwords(*args, &words, &wordsize, &nwords, 1);
+		    ent = prepnexthistent();
+		    ent->words = (short *)zalloc(nwords*sizeof(short));
+		    memcpy(ent->words, words, nwords*sizeof(short));
+		    free(words);
+		    ent->nwords = nwords/2;
+		} else {
+		    ent = prepnexthistent();
+		    ent->words = (short *)zalloc(nwords*2*sizeof(short));
+		    ent->nwords = nwords;
+		    nlen = iwords = 0;
+		    for (pargs = args; *pargs; pargs++) {
+			ent->words[iwords++] = nlen;
+			nlen += strlen(*pargs);
+			ent->words[iwords++] = nlen;
+			nlen++;
+		    }
 		}
-	    } else
+	    } else {
+		ent = prepnexthistent();
 		ent->words = (short *)NULL;
+	    }
 	    ent->node.nam = zjoin(args, ' ', 0);
 	    ent->stim = ent->ftim = time(NULL);
 	    ent->node.flags = 0;
Index: Src/hist.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/hist.c,v
retrieving revision 1.111
diff -p -u -r1.111 hist.c
--- Src/hist.c	27 May 2011 21:56:14 -0000	1.111
+++ Src/hist.c	31 May 2011 20:06:27 -0000
@@ -2338,110 +2338,11 @@ readhistfile(char *fn, int err, int read
 	    /*
 	     * Divide up the words.
 	     */
-	    nwordpos = 0;
 	    start = pt;
 	    uselex = isset(HISTLEXWORDS) && !(readflags & HFILE_FAST);
-	    if (uselex) {
-		/*
-		 * Attempt to do this using the lexer.
-		 */
-		LinkList wordlist = bufferwords(NULL, pt, NULL,
-						LEXFLAGS_COMMENTS_KEEP);
-		LinkNode wordnode;
-		int nwords_max;
-		nwords_max = 2 * countlinknodes(wordlist);
-		if (nwords_max > nwords) {
-		    nwords = nwords_max;
-		    words = (short *)realloc(words, nwords*sizeof(short));
-		}
-		for (wordnode = firstnode(wordlist);
-		     wordnode;
-		     incnode(wordnode)) {
-		    char *word = getdata(wordnode);
-
-		    for (;;) {
-			/*
-			 * Not really an oddity: "\\\n" is
-			 * removed from input as if whitespace.
-			 */
-			if (inblank(*pt))
-			    pt++;
-			else if (pt[0] == '\\' && pt[1] == '\n')
-			    pt += 2;
-			else
-			    break;
-		    }
-		    if (!strpfx(word, pt)) {
-			int bad = 0;
-			/*
-			 * Oddity 1: newlines turn into semicolons.
-			 */
-			if (!strcmp(word, ";"))
-			    continue;
-			while (*pt) {
-			    if (!*word) {
-				bad = 1;
-				break;
-			    }
-			    /*
-			     * Oddity 2: !'s turn into |'s.
-			     */
-			    if (*pt == *word ||
-				(*pt == '!' && *word == '|')) {
-				pt++;
-				word++;
-			    } else {
-				bad = 1;
-				break;
-			    }
-			}
-			if (bad) {
-#ifdef DEBUG
-			    dputs(ERRMSG("bad wordsplit reading history: "
-					 "%s\nat: %s\nword: %s"),
-				  start, pt, word);
-#endif
-			    pt = start;
-			    nwordpos = 0;
-			    uselex = 0;
-			    break;
-			}
-		    } else if (!strcmp(word, ";") && strpfx(";;", pt)) {
-			/*
-			 * Don't get confused between a semicolon that's
-			 * probably really a newline and a double
-			 * semicolon that's terminating a case.
-			 */
-			continue;
-		    }
-		    words[nwordpos++] = pt - start;
-		    pt += strlen(word);
-		    words[nwordpos++] = pt - start;
-		}
+	    histsplitwords(pt, &words, &nwords, &nwordpos, uselex);
+	    if (uselex)
 		freeheap();
-	    }
-	    if (!uselex) {
-		do {
-		    for (;;) {
-			if (inblank(*pt))
-			    pt++;
-			else if (pt[0] == '\\' && pt[1] == '\n')
-			    pt += 2;
-			else
-			    break;
-		    }
-		    if (*pt) {
-			if (nwordpos >= nwords)
-			    words = (short *)
-				realloc(words, (nwords += 64)*sizeof(short));
-			words[nwordpos++] = pt - start;
-			while (*pt && !inblank(*pt))
-			    pt++;
-			words[nwordpos++] = pt - start;
-		    }
-		} while (*pt);
-
-	    }
 
 	    he->nwords = nwordpos/2;
 	    if (he->nwords) {
@@ -3141,6 +3042,142 @@ bufferwords(LinkList list, char *buf, in
     return list;
 }
 
+/*
+ * Split up a line into words for use in a history file.
+ *
+ * lineptr is the line to be split.
+ *
+ * *wordsp and *nwordsp are an array already allocated to hold words
+ * and its length.  The array holds both start and end positions,
+ * so *nwordsp actually counts twice the number of words in the
+ * original string.  *nwordsp may be zero in which case the array
+ * will be allocated.
+ *
+ * *nwordposp returns the used length of *wordsp in the same units as
+ * *nwordsp, i.e. twice the number of words in the input line.
+ *
+ * If uselex is 1, attempt to do this using the lexical analyser.
+ * This is more accurate, but slower; for reading history files it's
+ * controlled by the option HISTLEXWORDS.  If this failed (which
+ * indicates a bug in the shell) it falls back to whitespace-separated
+ * strings, printing a message if in debug mode.
+ *
+ * If uselex is 0, just look for whitespace-separated words; the only
+ * special handling is for a backslash-newline combination as used
+ * by the history file format to save multiline buffers.
+ */
+/**/
+mod_export void
+histsplitwords(char *lineptr, short **wordsp, int *nwordsp, int *nwordposp,
+	       int uselex)
+{
+    int nwords = *nwordsp, nwordpos = 0;
+    short *words = *wordsp;
+    char *start = lineptr;
+
+    if (uselex) {
+	LinkList wordlist = bufferwords(NULL, lineptr, NULL,
+					LEXFLAGS_COMMENTS_KEEP);
+	LinkNode wordnode;
+	int nwords_max;
+
+	nwords_max = 2 * countlinknodes(wordlist);
+	if (nwords_max > nwords) {
+	    *nwordsp = nwords = nwords_max;
+	    *wordsp = words = (short *)zrealloc(words, nwords*sizeof(short));
+	}
+	for (wordnode = firstnode(wordlist);
+	     wordnode;
+	     incnode(wordnode)) {
+	    char *word = getdata(wordnode);
+
+	    for (;;) {
+		/*
+		 * Not really an oddity: "\\\n" is
+		 * removed from input as if whitespace.
+		 */
+		if (inblank(*lineptr))
+		    lineptr++;
+		else if (lineptr[0] == '\\' && lineptr[1] == '\n')
+		    lineptr += 2;
+		else
+		    break;
+	    }
+	    if (!strpfx(word, lineptr)) {
+		int bad = 0;
+		/*
+		 * Oddity 1: newlines turn into semicolons.
+		 */
+		if (!strcmp(word, ";"))
+		    continue;
+		while (*lineptr) {
+		    if (!*word) {
+			bad = 1;
+			break;
+		    }
+		    /*
+		     * Oddity 2: !'s turn into |'s.
+		     */
+		    if (*lineptr == *word ||
+			(*lineptr == '!' && *word == '|')) {
+			lineptr++;
+			word++;
+		    } else {
+			bad = 1;
+			break;
+		    }
+		}
+		if (bad) {
+#ifdef DEBUG
+		    dputs(ERRMSG("bad wordsplit reading history: "
+				 "%s\nat: %s\nword: %s"),
+			  start, lineptr, word);
+#endif
+		    lineptr = start;
+		    nwordpos = 0;
+		    uselex = 0;
+		    break;
+		}
+	    } else if (!strcmp(word, ";") && strpfx(";;", lineptr)) {
+		/*
+		 * Don't get confused between a semicolon that's
+		 * probably really a newline and a double
+		 * semicolon that's terminating a case.
+		 */
+		continue;
+	    }
+	    words[nwordpos++] = lineptr - start;
+	    lineptr += strlen(word);
+	    words[nwordpos++] = lineptr - start;
+	}
+    }
+    if (!uselex) {
+	do {
+	    for (;;) {
+		if (inblank(*lineptr))
+		    lineptr++;
+		else if (lineptr[0] == '\\' && lineptr[1] == '\n')
+		    lineptr += 2;
+		else
+		    break;
+	    }
+	    if (*lineptr) {
+		if (nwordpos >= nwords) {
+		    *nwordsp = nwords = nwords + 64;
+		    *wordsp = words = (short *)
+			zrealloc(words, nwords*sizeof(*words));
+		}
+		words[nwordpos++] = lineptr - start;
+		while (*lineptr && !inblank(*lineptr))
+		    lineptr++;
+		words[nwordpos++] = lineptr - start;
+	    }
+	} while (*lineptr);
+    }
+
+    *nwordposp = nwordpos;
+}
+
 /* Move the current history list out of the way and prepare a fresh history
  * list using hf for HISTFILE, hs for HISTSIZE, and shs for SAVEHIST.  If
  * the hf value is an empty string, HISTFILE will be unset from the new


-- 
Peter Stephenson <p.w.stephenson@ntlworld.com>
Web page now at http://homepage.ntlworld.com/p.w.stephenson/


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: PATCH: print -S uses lexical history
  2011-05-31 20:37 PATCH: print -S uses lexical history Peter Stephenson
@ 2011-06-01  6:24 ` Bart Schaefer
  2011-06-01 18:45   ` Peter Stephenson
  0 siblings, 1 reply; 3+ messages in thread
From: Bart Schaefer @ 2011-06-01  6:24 UTC (permalink / raw)
  To: Zsh hackers list

On May 31,  9:37pm, Peter Stephenson wrote:
}
} As we've had all the pain of the HIST_LEX_WORDS option, which almost no
} one is using, to split a history line into words using the lexical
} analyser, it's worth making this available directly using a relative of
} "print -s" imaginatively called "print -S".

This is nice, but I have to ask ... why not just make the behavior of
"print -s" dependend on the current setting of the option?  In what
circumstances would one wish to (be able to choose at the level of
individual commands to) have part of the history in lexed form and
part of it not?

I guess strictly speaking one already gets that effect when the option
is off, as the loaded history will be unlexed and the interactively
appeneded remainder lexed; so one could also argue that "print -s"
should simply always go the lexing route, but I suppose there are also
cases where speed might be a factor.

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: PATCH: print -S uses lexical history
  2011-06-01  6:24 ` Bart Schaefer
@ 2011-06-01 18:45   ` Peter Stephenson
  0 siblings, 0 replies; 3+ messages in thread
From: Peter Stephenson @ 2011-06-01 18:45 UTC (permalink / raw)
  To: Zsh hackers list

On Tue, 31 May 2011 23:24:36 -0700
Bart Schaefer <schaefer@brasslantern.com> wrote:
> On May 31,  9:37pm, Peter Stephenson wrote:
> }
> } As we've had all the pain of the HIST_LEX_WORDS option, which almost no
> } one is using, to split a history line into words using the lexical
> } analyser, it's worth making this available directly using a relative of
> } "print -s" imaginatively called "print -S".
> 
> This is nice, but I have to ask ... why not just make the behavior of
> "print -s" dependend on the current setting of the option?  In what
> circumstances would one wish to (be able to choose at the level of
> individual commands to) have part of the history in lexed form and
> part of it not?

"print -s" allows you to pick your own words; the different arguments
become the words in the history.  "print -S", on the other hand, takes a
single argument which is the entire command line that will be split.  It
didn't seem appropriate to me to change the behaviour of "print -s" to
do automatic splitting.

What would be a bit neater would be to be able to have the option for an
option, i.e. -s with a variant rather than a completely separate
option.  But we don't have anything like that.

-- 
Peter Stephenson <p.w.stephenson@ntlworld.com>
Web page now at http://homepage.ntlworld.com/p.w.stephenson/

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2011-06-01 19:21 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-05-31 20:37 PATCH: print -S uses lexical history Peter Stephenson
2011-06-01  6:24 ` Bart Schaefer
2011-06-01 18:45   ` Peter Stephenson

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).