From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 1328 invoked by alias); 31 May 2011 20:37:48 -0000 Mailing-List: contact zsh-workers-help@zsh.org; run by ezmlm Precedence: bulk X-No-Archive: yes List-Id: Zsh Workers List List-Post: List-Help: X-Seq: 29413 Received: (qmail 6092 invoked from network); 31 May 2011 20:37:42 -0000 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on f.primenet.com.au X-Spam-Level: X-Spam-Status: No, score=-1.9 required=5.0 tests=BAYES_00,RCVD_IN_DNSWL_NONE autolearn=ham version=3.3.1 Received-SPF: pass (ns1.primenet.com.au: SPF record at ntlworld.com designates 81.103.221.48 as permitted sender) From: Peter Stephenson To: zsh-workers@zsh.org (Zsh hackers list) Subject: PATCH: print -S uses lexical history X-Mailer: MH-E 8.2; nmh 1.3; GNU Emacs 23.2.1 Date: Tue, 31 May 2011 21:37:17 +0100 Message-ID: <18618.1306874237@pws-pc.ntlworld.com> X-Cloudmark-Analysis: v=1.1 cv=R50lirqlHffDPPkwUlkuVa99MrvKdVWo//yz83qex8g= c=1 sm=0 a=spWAfVVtWG8A:10 a=uObrxnre4hsA:10 a=NLZqzBF-AAAA:8 a=OqE5uPhCcLq0UjLHZuIA:9 a=UtLB7NNEIW8pyoDlSoEA:7 a=_dQi-Dcv4p4A:10 a=UXVmiyVkT6AA:10 a=zvWTluKU_V2J305S:21 a=3FQVMx3Iimjf3Qp-:21 a=HpAAvcLHHh0Zw7uRqdWCyQ==:117 As we've had all the pain of the HIST_LEX_WORDS option, which almost no one is using, to split a history line into words using the lexical analyser, it's worth making this available directly using a relative of "print -s" imaginatively called "print -S". This just takes a single argument which will be split in the same way as if read in from a history file with HIST_LEX_WORDS on. This doesn't give you very much beyond print -s with a (z)plit parameter, but as the code's already there it's worth having. I've split the core code out into a new function, histsplitwords(). Index: Doc/Zsh/builtins.yo =================================================================== RCS file: /cvsroot/zsh/zsh/Doc/Zsh/builtins.yo,v retrieving revision 1.135 diff -p -u -r1.135 builtins.yo --- Doc/Zsh/builtins.yo 19 May 2011 16:10:47 -0000 1.135 +++ Doc/Zsh/builtins.yo 31 May 2011 20:06:27 -0000 @@ -914,7 +914,7 @@ and the new directory stack is not print tt(popd) that do not change the environment seen by an interactive user. ) findex(print) -xitem(tt(print) [ tt(-abcDilmnNoOpPrsz) ] [ tt(-u) var(n) ] [ tt(-f) var(format) ] [ tt(-C) var(cols) ]) +xitem(tt(print) [ tt(-abcDilmnNoOpPrsSz) ] [ tt(-u) var(n) ] [ tt(-f) var(format) ] [ tt(-C) var(cols) ]) item( [ tt(-R) [ tt(-en) ]] [ var(arg) ... ])( With the `tt(-f)' option the arguments are printed as described by tt(printf). With no flags or with the flag `tt(-)', the arguments are printed on @@ -994,6 +994,14 @@ tt(-R); all other arguments and options ) item(tt(-s))( Place the results in the history list instead of on the standard output. +Each argument to the tt(print) command is treated as a single word in the +history, regardless of its content. +) +item(tt(-S))( +Place the results in the history list instead of on the standard output. +In this case only a single argument is allowed; it will be split into +words by analysing the line as if it had been read from the history +file with the tt(HIST_LEX_WORDS) option. ) item(tt(-u) var(n))( Print the arguments to file descriptor var(n). Index: Src/builtin.c =================================================================== RCS file: /cvsroot/zsh/zsh/Src/builtin.c,v retrieving revision 1.251 diff -p -u -r1.251 builtin.c --- Src/builtin.c 9 May 2011 09:49:09 -0000 1.251 +++ Src/builtin.c 31 May 2011 20:06:27 -0000 @@ -99,7 +99,7 @@ static struct builtin builtins[] = #endif BUILTIN("popd", BINF_SKIPINVALID | BINF_SKIPDASH | BINF_DASHDASHVALID, bin_cd, 0, 1, BIN_POPD, "q", NULL), - BUILTIN("print", BINF_PRINTOPTS, bin_print, 0, -1, BIN_PRINT, "abcC:Df:ilmnNoOpPrRsu:z-", NULL), + BUILTIN("print", BINF_PRINTOPTS, bin_print, 0, -1, BIN_PRINT, "abcC:Df:ilmnNoOpPrRsSu:z-", NULL), BUILTIN("printf", 0, bin_print, 1, -1, BIN_PRINTF, NULL, NULL), BUILTIN("pushd", BINF_SKIPINVALID | BINF_SKIPDASH | BINF_DASHDASHVALID, bin_cd, 0, 2, BIN_PUSHD, "qsPL", NULL), BUILTIN("pushln", 0, bin_print, 0, -1, BIN_PRINT, NULL, "-nz"), @@ -3965,25 +3965,45 @@ bin_print(char *name, char **args, Optio return 0; } /* -s option -- add the arguments to the history list */ - if (OPT_ISSET(ops,'s')) { + if (OPT_ISSET(ops,'s') || OPT_ISSET(ops,'S')) { int nwords = 0, nlen, iwords; char **pargs = args; queue_signals(); - ent = prepnexthistent(); while (*pargs++) nwords++; - if ((ent->nwords = nwords)) { - ent->words = (short *)zalloc(nwords*2*sizeof(short)); - nlen = iwords = 0; - for (pargs = args; *pargs; pargs++) { - ent->words[iwords++] = nlen; - nlen += strlen(*pargs); - ent->words[iwords++] = nlen; - nlen++; + if (nwords) { + if (OPT_ISSET(ops,'S')) { + int wordsize; + short *words; + if (nwords > 1) { + zwarnnam(name, "option -S takes a single argument"); + return 1; + } + words = NULL; + wordsize = 0; + histsplitwords(*args, &words, &wordsize, &nwords, 1); + ent = prepnexthistent(); + ent->words = (short *)zalloc(nwords*sizeof(short)); + memcpy(ent->words, words, nwords*sizeof(short)); + free(words); + ent->nwords = nwords/2; + } else { + ent = prepnexthistent(); + ent->words = (short *)zalloc(nwords*2*sizeof(short)); + ent->nwords = nwords; + nlen = iwords = 0; + for (pargs = args; *pargs; pargs++) { + ent->words[iwords++] = nlen; + nlen += strlen(*pargs); + ent->words[iwords++] = nlen; + nlen++; + } } - } else + } else { + ent = prepnexthistent(); ent->words = (short *)NULL; + } ent->node.nam = zjoin(args, ' ', 0); ent->stim = ent->ftim = time(NULL); ent->node.flags = 0; Index: Src/hist.c =================================================================== RCS file: /cvsroot/zsh/zsh/Src/hist.c,v retrieving revision 1.111 diff -p -u -r1.111 hist.c --- Src/hist.c 27 May 2011 21:56:14 -0000 1.111 +++ Src/hist.c 31 May 2011 20:06:27 -0000 @@ -2338,110 +2338,11 @@ readhistfile(char *fn, int err, int read /* * Divide up the words. */ - nwordpos = 0; start = pt; uselex = isset(HISTLEXWORDS) && !(readflags & HFILE_FAST); - if (uselex) { - /* - * Attempt to do this using the lexer. - */ - LinkList wordlist = bufferwords(NULL, pt, NULL, - LEXFLAGS_COMMENTS_KEEP); - LinkNode wordnode; - int nwords_max; - nwords_max = 2 * countlinknodes(wordlist); - if (nwords_max > nwords) { - nwords = nwords_max; - words = (short *)realloc(words, nwords*sizeof(short)); - } - for (wordnode = firstnode(wordlist); - wordnode; - incnode(wordnode)) { - char *word = getdata(wordnode); - - for (;;) { - /* - * Not really an oddity: "\\\n" is - * removed from input as if whitespace. - */ - if (inblank(*pt)) - pt++; - else if (pt[0] == '\\' && pt[1] == '\n') - pt += 2; - else - break; - } - if (!strpfx(word, pt)) { - int bad = 0; - /* - * Oddity 1: newlines turn into semicolons. - */ - if (!strcmp(word, ";")) - continue; - while (*pt) { - if (!*word) { - bad = 1; - break; - } - /* - * Oddity 2: !'s turn into |'s. - */ - if (*pt == *word || - (*pt == '!' && *word == '|')) { - pt++; - word++; - } else { - bad = 1; - break; - } - } - if (bad) { -#ifdef DEBUG - dputs(ERRMSG("bad wordsplit reading history: " - "%s\nat: %s\nword: %s"), - start, pt, word); -#endif - pt = start; - nwordpos = 0; - uselex = 0; - break; - } - } else if (!strcmp(word, ";") && strpfx(";;", pt)) { - /* - * Don't get confused between a semicolon that's - * probably really a newline and a double - * semicolon that's terminating a case. - */ - continue; - } - words[nwordpos++] = pt - start; - pt += strlen(word); - words[nwordpos++] = pt - start; - } + histsplitwords(pt, &words, &nwords, &nwordpos, uselex); + if (uselex) freeheap(); - } - if (!uselex) { - do { - for (;;) { - if (inblank(*pt)) - pt++; - else if (pt[0] == '\\' && pt[1] == '\n') - pt += 2; - else - break; - } - if (*pt) { - if (nwordpos >= nwords) - words = (short *) - realloc(words, (nwords += 64)*sizeof(short)); - words[nwordpos++] = pt - start; - while (*pt && !inblank(*pt)) - pt++; - words[nwordpos++] = pt - start; - } - } while (*pt); - - } he->nwords = nwordpos/2; if (he->nwords) { @@ -3141,6 +3042,142 @@ bufferwords(LinkList list, char *buf, in return list; } +/* + * Split up a line into words for use in a history file. + * + * lineptr is the line to be split. + * + * *wordsp and *nwordsp are an array already allocated to hold words + * and its length. The array holds both start and end positions, + * so *nwordsp actually counts twice the number of words in the + * original string. *nwordsp may be zero in which case the array + * will be allocated. + * + * *nwordposp returns the used length of *wordsp in the same units as + * *nwordsp, i.e. twice the number of words in the input line. + * + * If uselex is 1, attempt to do this using the lexical analyser. + * This is more accurate, but slower; for reading history files it's + * controlled by the option HISTLEXWORDS. If this failed (which + * indicates a bug in the shell) it falls back to whitespace-separated + * strings, printing a message if in debug mode. + * + * If uselex is 0, just look for whitespace-separated words; the only + * special handling is for a backslash-newline combination as used + * by the history file format to save multiline buffers. + */ +/**/ +mod_export void +histsplitwords(char *lineptr, short **wordsp, int *nwordsp, int *nwordposp, + int uselex) +{ + int nwords = *nwordsp, nwordpos = 0; + short *words = *wordsp; + char *start = lineptr; + + if (uselex) { + LinkList wordlist = bufferwords(NULL, lineptr, NULL, + LEXFLAGS_COMMENTS_KEEP); + LinkNode wordnode; + int nwords_max; + + nwords_max = 2 * countlinknodes(wordlist); + if (nwords_max > nwords) { + *nwordsp = nwords = nwords_max; + *wordsp = words = (short *)zrealloc(words, nwords*sizeof(short)); + } + for (wordnode = firstnode(wordlist); + wordnode; + incnode(wordnode)) { + char *word = getdata(wordnode); + + for (;;) { + /* + * Not really an oddity: "\\\n" is + * removed from input as if whitespace. + */ + if (inblank(*lineptr)) + lineptr++; + else if (lineptr[0] == '\\' && lineptr[1] == '\n') + lineptr += 2; + else + break; + } + if (!strpfx(word, lineptr)) { + int bad = 0; + /* + * Oddity 1: newlines turn into semicolons. + */ + if (!strcmp(word, ";")) + continue; + while (*lineptr) { + if (!*word) { + bad = 1; + break; + } + /* + * Oddity 2: !'s turn into |'s. + */ + if (*lineptr == *word || + (*lineptr == '!' && *word == '|')) { + lineptr++; + word++; + } else { + bad = 1; + break; + } + } + if (bad) { +#ifdef DEBUG + dputs(ERRMSG("bad wordsplit reading history: " + "%s\nat: %s\nword: %s"), + start, lineptr, word); +#endif + lineptr = start; + nwordpos = 0; + uselex = 0; + break; + } + } else if (!strcmp(word, ";") && strpfx(";;", lineptr)) { + /* + * Don't get confused between a semicolon that's + * probably really a newline and a double + * semicolon that's terminating a case. + */ + continue; + } + words[nwordpos++] = lineptr - start; + lineptr += strlen(word); + words[nwordpos++] = lineptr - start; + } + } + if (!uselex) { + do { + for (;;) { + if (inblank(*lineptr)) + lineptr++; + else if (lineptr[0] == '\\' && lineptr[1] == '\n') + lineptr += 2; + else + break; + } + if (*lineptr) { + if (nwordpos >= nwords) { + *nwordsp = nwords = nwords + 64; + *wordsp = words = (short *) + zrealloc(words, nwords*sizeof(*words)); + } + words[nwordpos++] = lineptr - start; + while (*lineptr && !inblank(*lineptr)) + lineptr++; + words[nwordpos++] = lineptr - start; + } + } while (*lineptr); + } + + *nwordposp = nwordpos; +} + /* Move the current history list out of the way and prepare a fresh history * list using hf for HISTFILE, hs for HISTSIZE, and shs for SAVEHIST. If * the hf value is an empty string, HISTFILE will be unset from the new -- Peter Stephenson Web page now at http://homepage.ntlworld.com/p.w.stephenson/