zsh-workers
 help / color / mirror / code / Atom feed
* PATCH: expand tabs
@ 2015-06-01 15:39 Peter Stephenson
  2015-06-04 16:32 ` Peter Stephenson
  0 siblings, 1 reply; 4+ messages in thread
From: Peter Stephenson @ 2015-06-01 15:39 UTC (permalink / raw)
  To: Zsh Hackers' List

I thought it was about time print had the ability to expand tabs
internally.  The main intended use for this is to get reasonably
formatted function bodies from shell builtins and special variables
without having to pipe through an external command.  It could be
attached to a whence option with a bit of wiring in hashtable.c and
support for metafied strings.

diff --git a/Doc/Zsh/builtins.yo b/Doc/Zsh/builtins.yo
index 1fcc7c2..6fa603a 100644
--- a/Doc/Zsh/builtins.yo
+++ b/Doc/Zsh/builtins.yo
@@ -1106,7 +1106,7 @@ tt(popd) that do not change the environment seen by an interactive user.
 )
 findex(print)
 xitem(tt(print )[ tt(-abcDilmnNoOpPrsSz) ] [ tt(-u) var(n) ] [ tt(-f) var(format) ] [ tt(-C) var(cols) ])
-item(SPACES()[ tt(-R) [ tt(-en) ]] [ var(arg) ... ])(
+item(SPACES()[ tt(-xX) var(tab-stop) ] [ tt(-R) [ tt(-en) ]] [ var(arg) ... ])(
 With the `tt(-f)' option the arguments are printed as described by tt(printf).
 With no flags or with the flag `tt(-)', the arguments are printed on
 the standard output as described by tt(echo), with the following differences:
@@ -1201,6 +1201,27 @@ tt(HIST_LEX_WORDS) option active.
 item(tt(-u) var(n))(
 Print the arguments to file descriptor var(n).
 )
+item(tt(-x) var(tab-stop))(
+Expand leading tabs on each line of output in the printed string
+assuming a tab stop every var(tab-stop) characters.  This is appropriate
+for formatting code that may be indented with tabs.  Note that leading
+tabs of any argument to print, not just the first, are expanded, even if
+tt(print) is using spaces to separate arguments (the column count
+is maintained across arguments but may be incorrect on output
+owing to previous unexpanded tabs).
+
+The start of the output of each print command is assumed to be aligned
+with a tab stop.  Widths of multibyte characters are handled if the
+option tt(MULTIBYTE) is in effect.  This option is ignored if other
+formatting options are in effect, namely column alignment or
+tt(printf) style, or if output is to a special location such as shell
+history or the command line editor.
+)
+item(tt(-X) var(tab-stop))(
+This is similar to tt(-x), except that all tabs in the printed string
+are expanded.  This is appropriate if tabs in the arguments are
+being used to produce a table format.
+)
 item(tt(-z))(
 Push the arguments onto the editing buffer stack, separated by spaces.
 )
diff --git a/Src/builtin.c b/Src/builtin.c
index 9358e8b..4b08146 100644
--- a/Src/builtin.c
+++ b/Src/builtin.c
@@ -99,7 +99,7 @@ static struct builtin builtins[] =
 #endif
 
     BUILTIN("popd", BINF_SKIPINVALID | BINF_SKIPDASH | BINF_DASHDASHVALID, bin_cd, 0, 1, BIN_POPD, "q", NULL),
-    BUILTIN("print", BINF_PRINTOPTS, bin_print, 0, -1, BIN_PRINT, "abcC:Df:ilmnNoOpPrRsSu:z-", NULL),
+    BUILTIN("print", BINF_PRINTOPTS, bin_print, 0, -1, BIN_PRINT, "abcC:Df:ilmnNoOpPrRsSu:x:X:z-", NULL),
     BUILTIN("printf", 0, bin_print, 1, -1, BIN_PRINTF, NULL, NULL),
     BUILTIN("pushd", BINF_SKIPINVALID | BINF_SKIPDASH | BINF_DASHDASHVALID, bin_cd, 0, 2, BIN_PUSHD, "qsPL", NULL),
     BUILTIN("pushln", 0, bin_print, 0, -1, BIN_PRINT, NULL, "-nz"),
@@ -4208,11 +4208,40 @@ bin_print(char *name, char **args, Options ops, int func)
 	    return 0;
 	}
 
-	for (; *args; args++, len++) {
-	    fwrite(*args, *len, 1, fout);
-	    if (args[1])
-		fputc(OPT_ISSET(ops,'l') ? '\n' :
-		      OPT_ISSET(ops,'N') ? '\0' : ' ', fout);
+	if (OPT_HASARG(ops, 'x') || OPT_HASARG(ops, 'X')) {
+	    char *eptr;
+	    int expand, startpos = 0;
+	    int all = OPT_HASARG(ops, 'X');
+	    char *xarg = all ? OPT_ARG(ops, 'X') : OPT_ARG(ops, 'x');
+
+	    expand = (int)zstrtol(xarg, &eptr, 10);
+	    if (*eptr || expand <= 0) {
+		zwarnnam(name, "positive integer expected after -%c: %s", 'x',
+			 xarg);
+		return 1;
+	    }
+	    for (; *args; args++, len++) {
+		startpos = zexpandtabs(*args, *len, expand, startpos, fout,
+				       all);
+		if (args[1]) {
+		    if (OPT_ISSET(ops, 'l')) {
+			fputc('\n', fout);
+			startpos = 0;
+		    } else if (OPT_ISSET(ops,'N')) {
+			fputc('\0', fout);
+		    } else {
+			fputc(' ', fout);
+			startpos++;
+		    }
+		}
+	    }
+	} else {
+	    for (; *args; args++, len++) {
+		fwrite(*args, *len, 1, fout);
+		if (args[1])
+		    fputc(OPT_ISSET(ops,'l') ? '\n' :
+			  OPT_ISSET(ops,'N') ? '\0' : ' ', fout);
+	    }
 	}
 	if (!(OPT_ISSET(ops,'n') || nnl))
 	    fputc(OPT_ISSET(ops,'N') ? '\0' : '\n', fout);
diff --git a/Src/utils.c b/Src/utils.c
index 271c800..ddfe480 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -4964,6 +4964,108 @@ metacharlenconv(const char *x, int *c)
 /**/
 #endif /* MULTIBYTE_SUPPORT */
 
+/*
+ * Expand tabs to given width, with given starting position on line.
+ * len is length of unmetafied string in bytes.
+ * Output to fout.
+ * Return the end position on the line, i.e. if this is 0 modulo width
+ * the next character is aligned with a tab stop.
+ *
+ * If all is set, all tabs are expanded, else only leading tabs.
+ */
+
+/**/
+mod_export int
+zexpandtabs(const char *s, int len, int width, int startpos, FILE *fout,
+	    int all)
+{
+    int at_start = 1;
+
+#ifdef MULTIBYTE_SUPPORT
+    mbstate_t mbs;
+    size_t ret;
+    wchar_t wc;
+
+    memset(&mbs, 0, sizeof(mbs));
+#endif
+
+    while (len) {
+	if (*s == '\t') {
+	    if (all || at_start) {
+		s++;
+		len--;
+		if (width <= 0 || !(startpos % width)) {
+		    /* always output at least one space */
+		    fputc(' ', fout);
+		    startpos++;
+		}
+		if (width <= 0)
+		    continue;	/* paranoia */
+		while (startpos % width) {
+		    fputc(' ', fout);
+		    startpos++;
+		}
+	    } else {
+		/*
+		 * Leave tab alone.
+		 * Guess width to apply... we might get this wrong.
+		 * This is only needed if there's a following string
+		 * that needs tabs expanding, which is unusual.
+		 */
+		startpos += width - startpos % width;
+		s++;
+		len--;
+		fputc('\t', fout);
+	    }
+	    continue;
+	} else if (*s == '\n' || *s == '\r') {
+	    fputc(*s, fout);
+	    s++;
+	    len--;
+	    startpos = 0;
+	    at_start = 1;
+	    continue;
+	}
+
+	at_start = 0;
+#ifdef MULTIBYTE_SUPPORT
+	if (isset(MULTIBYTE)) {
+	    const char *sstart = s;
+	    ret = mbrtowc(&wc, s, len, &mbs);
+	    if (ret == MB_INVALID) {
+		/* Assume single character per character */
+		memset(&mbs, 0, sizeof(mbs));
+		s++;
+		len--;
+	    } else if (ret == MB_INCOMPLETE) {
+		/* incomplete at end --- assume likewise, best we've got */
+		s++;
+		len--;
+	    } else {
+		s += ret;
+		len -= (int)ret;
+	    }
+	    if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
+		startpos++;
+	    } else {
+		int wcw = WCWIDTH(wc);
+		if (wcw > 0)	/* paranoia */
+		    startpos += wcw;
+	    }
+	    fwrite(sstart, s - sstart, 1, fout);
+
+	    continue;
+	}
+#endif /* MULTIBYTE_SUPPORT */
+	fputc(*s, fout);
+	s++;
+	len--;
+	startpos++;
+    }
+
+    return startpos;
+}
+
 /* check for special characters in the string */
 
 /**/
diff --git a/Test/B03print.ztst b/Test/B03print.ztst
index 48574c2..54d6350 100644
--- a/Test/B03print.ztst
+++ b/Test/B03print.ztst
@@ -284,3 +284,16 @@
 >610062
 >6100
 >61
+
+ foo=$'one\ttwo\tthree\tfour\n'
+ foo+=$'\tone\ttwo\tthree\tfour\n'
+ foo+=$'\t\tone\t\ttwo\t\tthree\t\tfour'
+ print -x4 $foo
+ print -X4 $foo
+0:Tab expansion by print
+>one	two	three	four
+>    one	two	three	four
+>        one		two		three		four
+>one two three   four
+>    one two three   four
+>        one     two     three       four


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: PATCH: expand tabs
  2015-06-01 15:39 PATCH: expand tabs Peter Stephenson
@ 2015-06-04 16:32 ` Peter Stephenson
  2015-06-05 14:15   ` Oliver Kiddle
  0 siblings, 1 reply; 4+ messages in thread
From: Peter Stephenson @ 2015-06-04 16:32 UTC (permalink / raw)
  To: Zsh Hackers' List

On Mon, 1 Jun 2015 16:39:26 +0100
Peter Stephenson <p.stephenson@samsung.com> wrote:
> I thought it was about time print had the ability to expand tabs
> internally.  The main intended use for this is to get reasonably
> formatted function bodies from shell builtins and special variables
> without having to pipe through an external command.  It could be
> attached to a whence option with a bit of wiring in hashtable.c and
> support for metafied strings.

This is what I thought I was going to be doing... so I smartened up some
multibyte and non-multibyte code to be able to take the length and width
of prefixes of metafied strings.  That's useful in the pattern code, so
I've left it.

However, then I realised that expanding tabs after generating the text
representation ran the risk of replacing tabs in strings with newlines
(the same danger obviously exists when piping through "expand", so
actually there's no external safe way of expanding tabs).

Then I looked at the code that generates the textual representation and,
amazingly, for the first time in the history of the zsh project, the
code has been made properly modular --- tabs get added in exactly one
place.  (Well, if you don't count the "foo() { ... }" wrapper added by
the function printing code.)  So expanding the tabs at source with a
guaranteed pukka effect is trivial.

I've added a -x option where this makes sense.  This has propagated as
far up as allowing "zed -f -x 2" to edit functions in ZLE with the
indentation looking like it does for contributed functions.

pws

diff --git a/Doc/Zsh/builtins.yo b/Doc/Zsh/builtins.yo
index 6fa603a..53b6682 100644
--- a/Doc/Zsh/builtins.yo
+++ b/Doc/Zsh/builtins.yo
@@ -791,11 +791,18 @@ Equivalent to tt(typeset -E), except that options irrelevant to floating
 point numbers are not permitted.
 )
 findex(functions)
-xitem(tt(functions) [ {tt(PLUS())|tt(-)}tt(UkmtTuz) ] [ var(name) ... ])
+xitem(tt(functions) [ {tt(PLUS())|tt(-)}tt(UkmtTuz) ] [ tt(-x) var(num) ] [ var(name) ... ])
 xitem(tt(functions -M) var(mathfn) [ var(min) [ var(max) [ var(shellfn) ] ] ])
 xitem(tt(functions -M) [ tt(-m) var(pattern) ... ])
 item(tt(functions +M) [ tt(-m) ] var(mathfn) ... )(
-Equivalent to tt(typeset -f), with the exception of the tt(-M) option.
+Equivalent to tt(typeset -f), with the exception of the tt(-x) and
+tt(-M) options.
+
+The tt(-x) option indicates that any functions output will have
+each leading tab for indentation, added by the shell to show syntactic
+structure, expanded to the given number var(num) of spaces.  var(num)
+can also be 0 to suppress all indentation.
+
 Use of the tt(-M) option may not be combined with any of the options
 handled by tt(typeset -f).
 
@@ -1927,6 +1934,9 @@ function is first referenced; see noderef(Functions). The tt(-k) and
 tt(-z) flags make the function be loaded using ksh-style or zsh-style
 autoloading respectively. If neither is given, the setting of the
 tt(KSH_AUTOLOAD) option determines how the function is loaded.
+
+Note that the builtin tt(functions) provides the same basic capabilities
+as tt(typeset -f) but gives access to a few extra options.
 )
 item(tt(-h))(
 Hide: only useful for special parameters (those marked `<S>' in the table in
@@ -2180,7 +2190,7 @@ the user is potentially interested in both, so this problem is intrinsic
 to process IDs.
 )
 findex(whence)
-item(tt(whence) [ tt(-vcwfpamsS) ] var(name) ...)(
+item(tt(whence) [ tt(-vcwfpamsS) ] [ tt(-x) var(num) ] var(name) ...)(
 For each var(name), indicate how it would be interpreted if used as a
 command name.
 
@@ -2233,14 +2243,19 @@ As tt(-s), but if the pathname had to be resolved by following
 multiple symlinks, the intermediate steps are printed, too.  The
 symlink resolved at each step might be anywhere in the path.
 )
+item(tt(-x) var(num))(
+Expand tabs when outputting shell functions using the tt(-c) option.
+This has the same effect as the tt(-x) option to the tt(functions)
+builtin.
+)
 enditem()
 )
 findex(where)
-item(tt(where) [ tt(-wpmsS) ] var(name) ...)(
+item(tt(where) [ tt(-wpmsS) ] [ tt(-x) var(num) ] var(name) ...)(
 Equivalent to tt(whence -ca).
 )
 findex(which)
-item(tt(which) [ tt(-wpamsS) ] var(name) ...)(
+item(tt(which) [ tt(-wpamsS) ] [ tt(-x) var(num) ] var(name) ...)(
 Equivalent to tt(whence -c).
 )
 findex(zcompile)
diff --git a/Doc/Zsh/contrib.yo b/Doc/Zsh/contrib.yo
index 8b6b7d3..323bf0f 100644
--- a/Doc/Zsh/contrib.yo
+++ b/Doc/Zsh/contrib.yo
@@ -3561,7 +3561,7 @@ set to the ANSI terminal escapes that turn off all attributes and turn on
 bold intensity, respectively.
 )
 findex(fned)
-item(tt(fned) var(name))(
+item(tt(fned) [ tt(-x) var(num) ] var(name))(
 Same as tt(zed -f).  This function does not appear in the zsh
 distribution, but can be created by linking tt(zed) to the name tt(fned)
 in some directory in your tt(fpath).
@@ -3749,7 +3749,7 @@ the difference in function between tt(zargs) and tt(xargs)) or run
 tt(zargs) with the tt(-)tt(-help) option.
 )
 findex(zed)
-xitem(tt(zed) [ tt(-f) ] var(name))
+xitem(tt(zed) [ tt(-f) [ tt(-x) var(num) ] var(name))
 item(tt(zed -b))(
 This function uses the ZLE editor to edit a file or function.
 
@@ -3758,7 +3758,10 @@ If the tt(-f) option is given, the name is taken to be that of
 a function; if the function is marked for autoloading, tt(zed) searches
 for it in the tt(fpath) and loads it.  Note that functions edited this way
 are installed into the current shell, but em(not) written back to the
-autoload file.
+autoload file.  In this case the tt(-x) option specifies that leading
+tabs indenting the function according to syntax should be converted into
+the given number of spaces; `tt(-x 2)' is consistent with the layout
+of functions distributed with the shell.
 
 Without tt(-f), var(name) is the path name of the file to edit, which need
 not exist; it is created on write, if necessary.
diff --git a/Functions/Misc/zed b/Functions/Misc/zed
index c2caaf3..010b69b 100644
--- a/Functions/Misc/zed
+++ b/Functions/Misc/zed
@@ -9,8 +9,9 @@
 local var opt zed_file_name
 # We do not want timeout while we are editing a file
 integer TMOUT=0 okargs=1 fun bind
+local -a expand
 
-while getopts "fb" opt; do
+while getopts "fbx:" opt; do
   case $opt in
     (f)
     fun=1
@@ -19,6 +20,14 @@ while getopts "fb" opt; do
     (b)
     bind=1
     ;;
+
+    (x)
+    if [[ $OPTARG != <-> ]]; then
+      print -r "Integer expected after -x: $OPTARG" >&2
+      return 1
+    fi
+    expand=(-x $OPTARG)
+    ;;
   esac
 done
 shift $(( OPTIND - 1 ))
@@ -29,8 +38,8 @@ shift $(( OPTIND - 1 ))
 if (( $# != okargs )); then
     echo 'Usage:
 zed filename
-zed -f function
-zed -b'
+zed -f [ -x N ] function
+zed -b' >&2
     return 1
 fi
 
@@ -71,7 +80,7 @@ fi
 setopt localoptions nobanghist
 
 if ((fun)) then
-  var="$(functions $1)"
+  var="$(functions $expand $1)"
   # If function is undefined but autoloadable, load it
   if [[ $var = *\#\ undefined* ]] then
     var="$(autoload +X $1; functions $1)"
diff --git a/Src/builtin.c b/Src/builtin.c
index 4b08146..0d1d00e 100644
--- a/Src/builtin.c
+++ b/Src/builtin.c
@@ -72,7 +72,7 @@ static struct builtin builtins[] =
     BUILTIN("fc", 0, bin_fc, 0, -1, BIN_FC, "aAdDe:EfiIlLmnpPrRt:W", NULL),
     BUILTIN("fg", 0, bin_fg, 0, -1, BIN_FG, NULL, NULL),
     BUILTIN("float", BINF_PLUSOPTS | BINF_MAGICEQUALS | BINF_PSPECIAL, bin_typeset, 0, -1, 0, "E:%F:%HL:%R:%Z:%ghlprtux", "E"),
-    BUILTIN("functions", BINF_PLUSOPTS, bin_functions, 0, -1, 0, "kmMtTuUz", NULL),
+    BUILTIN("functions", BINF_PLUSOPTS, bin_functions, 0, -1, 0, "kmMtTuUx:z", NULL),
     BUILTIN("getln", 0, bin_read, 0, -1, 0, "ecnAlE", "zr"),
     BUILTIN("getopts", 0, bin_getopts, 2, -1, 0, NULL, NULL),
     BUILTIN("hash", BINF_MAGICEQUALS, bin_hash, 0, -1, 0, "Ldfmrv", NULL),
@@ -128,9 +128,9 @@ static struct builtin builtins[] =
     BUILTIN("unset", BINF_PSPECIAL, bin_unset, 1, -1, 0, "fmv", NULL),
     BUILTIN("unsetopt", 0, bin_setopt, 0, -1, BIN_UNSETOPT, NULL, NULL),
     BUILTIN("wait", 0, bin_fg, 0, -1, BIN_WAIT, NULL, NULL),
-    BUILTIN("whence", 0, bin_whence, 0, -1, 0, "acmpvfsSw", NULL),
-    BUILTIN("where", 0, bin_whence, 0, -1, 0, "pmsSw", "ca"),
-    BUILTIN("which", 0, bin_whence, 0, -1, 0, "ampsSw", "c"),
+    BUILTIN("whence", 0, bin_whence, 0, -1, 0, "acmpvfsSwx:", NULL),
+    BUILTIN("where", 0, bin_whence, 0, -1, 0, "pmsSwx:", "ca"),
+    BUILTIN("which", 0, bin_whence, 0, -1, 0, "ampsSwx:", "c"),
     BUILTIN("zmodload", 0, bin_zmodload, 0, -1, 0, "AFRILP:abcfdilmpue", NULL),
     BUILTIN("zcompile", 0, bin_zcompile, 0, -1, 0, "tUMRcmzka", NULL),
 };
@@ -2749,7 +2749,7 @@ bin_functions(char *name, char **argv, Options ops, int func)
     Patprog pprog;
     Shfunc shf;
     int i, returnval = 0;
-    int on = 0, off = 0, pflags = 0, roff;
+    int on = 0, off = 0, pflags = 0, roff, expand = 0;
 
     /* Do we have any flags defined? */
     if (OPT_PLUS(ops,'u'))
@@ -2785,11 +2785,23 @@ bin_functions(char *name, char **argv, Options ops, int func)
     }
 
     if ((off & PM_UNDEFINED) || (OPT_ISSET(ops,'k') && OPT_ISSET(ops,'z')) ||
+	(OPT_ISSET(ops,'x') && !OPT_HASARG(ops,'x')) ||
 	(OPT_MINUS(ops,'X') && (OPT_ISSET(ops,'m') || *argv || !scriptname))) {
 	zwarnnam(name, "invalid option(s)");
 	return 1;
     }
 
+    if (OPT_ISSET(ops,'x')) {
+	char *eptr;
+	expand = (int)zstrtol(OPT_ARG(ops,'x'), &eptr, 10);
+	if (*eptr) {
+	    zwarnnam(name, "number expected after -x");
+	    return 1;
+	}
+	if (expand == 0)	/* no indentation at all */
+	    expand = -1;
+    }
+
     if (OPT_PLUS(ops,'f') || roff || OPT_ISSET(ops,'+'))
 	pflags |= PRINT_NAMEONLY;
 
@@ -2948,8 +2960,8 @@ bin_functions(char *name, char **argv, Options ops, int func)
 	} else {
 	    if (OPT_ISSET(ops,'U') && !OPT_ISSET(ops,'u'))
 		on &= ~PM_UNDEFINED;
-	    scanhashtable(shfunctab, 1, on|off, DISABLED, shfunctab->printnode,
-			  pflags);
+	    scanshfunc(1, on|off, DISABLED, shfunctab->printnode,
+		       pflags, expand);
 	}
 	unqueue_signals();
 	return ret;
@@ -2965,8 +2977,8 @@ bin_functions(char *name, char **argv, Options ops, int func)
 		/* with no options, just print all functions matching the glob pattern */
 		queue_signals();
 		if (!(on|off) && !OPT_ISSET(ops,'X')) {
-		    scanmatchtable(shfunctab, pprog, 1, 0, DISABLED,
-				   shfunctab->printnode, pflags);
+		    scanmatchshfunc(pprog, 1, 0, DISABLED,
+				   shfunctab->printnode, pflags, expand);
 		} else {
 		    /* apply the options to all functions matching the glob pattern */
 		    for (i = 0; i < shfunctab->hsize; i++) {
@@ -3008,7 +3020,7 @@ bin_functions(char *name, char **argv, Options ops, int func)
 		    returnval = 1;
 	    } else
 		/* no flags, so just print */
-		shfunctab->printnode(&shf->node, pflags);
+		printshfuncexpand(&shf->node, pflags, expand);
 	} else if (on & PM_UNDEFINED) {
 	    int signum = -1, ok = 1;
 
@@ -3222,6 +3234,7 @@ bin_whence(char *nam, char **argv, Options ops, int func)
     int aliasflags;
     int csh, all, v, wd;
     int informed = 0;
+    int expand = 0;
     char *cnam, **allmatched = 0;
 
     /* Check some option information */
@@ -3230,6 +3243,17 @@ bin_whence(char *nam, char **argv, Options ops, int func)
     all = OPT_ISSET(ops,'a');
     wd  = OPT_ISSET(ops,'w');
 
+    if (OPT_ISSET(ops,'x')) {
+	char *eptr;
+	expand = (int)zstrtol(OPT_ARG(ops,'x'), &eptr, 10);
+	if (*eptr) {
+	    zwarnnam(nam, "number expected after -x");
+	    return 1;
+	}
+	if (expand == 0)	/* no indentation at all */
+	    expand = -1;
+    }
+
     if (OPT_ISSET(ops,'w'))
 	printflags |= PRINT_WHENCE_WORD;
     else if (OPT_ISSET(ops,'c'))
@@ -3286,8 +3310,8 @@ bin_whence(char *nam, char **argv, Options ops, int func)
 
 		/* and shell functions... */
 		informed +=
-		scanmatchtable(shfunctab, pprog, 1, 0, DISABLED,
-			       shfunctab->printnode, printflags);
+		scanmatchshfunc(pprog, 1, 0, DISABLED,
+			       shfunctab->printnode, printflags, expand);
 
 		/* and builtins. */
 		informed +=
@@ -3342,7 +3366,7 @@ bin_whence(char *nam, char **argv, Options ops, int func)
 	    }
 	    /* Look for shell function */
 	    if ((hn = shfunctab->getnode(shfunctab, *argv))) {
-		shfunctab->printnode(hn, printflags);
+		printshfuncexpand(hn, printflags, expand);
 		informed = 1;
 		if (!all)
 		    continue;
diff --git a/Src/hashtable.c b/Src/hashtable.c
index ab381cc..2b55249 100644
--- a/Src/hashtable.c
+++ b/Src/hashtable.c
@@ -937,13 +937,17 @@ printshfuncnode(HashNode hn, int printflags)
  
     quotedzputs(f->node.nam, stdout);
     if (f->funcdef || f->node.flags & PM_UNDEFINED) {
-	printf(" () {\n\t");
-	if (f->node.flags & PM_UNDEFINED)
-	    printf("%c undefined\n\t", hashchar);
-	else
+	printf(" () {\n");
+	zoutputtab(stdout);
+	if (f->node.flags & PM_UNDEFINED) {
+	    printf("%c undefined\n", hashchar);
+	    zoutputtab(stdout);
+	} else
 	    t = getpermtext(f->funcdef, NULL, 1);
-	if (f->node.flags & (PM_TAGGED|PM_TAGGED_LOCAL))
-	    printf("%c traced\n\t", hashchar);
+	if (f->node.flags & (PM_TAGGED|PM_TAGGED_LOCAL)) {
+	    printf("%c traced\n", hashchar);
+	    zoutputtab(stdout);
+	}
 	if (!t) {
 	    char *fopt = "UtTkz";
 	    int flgs[] = {
@@ -959,11 +963,12 @@ printshfuncnode(HashNode hn, int printflags)
 	    zputs(t, stdout);
 	    zsfree(t);
 	    if (f->funcdef->flags & EF_RUN) {
-		printf("\n\t");
+		printf("\n");
+		zoutputtab(stdout);
 		quotedzputs(f->node.nam, stdout);
 		printf(" \"$@\"");
 	    }
-	}   
+	}
 	printf("\n}");
     } else {
 	printf(" () { }");
@@ -979,6 +984,59 @@ printshfuncnode(HashNode hn, int printflags)
     putchar('\n');
 }
 
+/*
+ * Wrap scanmatchtable for shell functions with optional
+ * expansion of leading tabs.
+ * expand = 0 is standard: use hard tabs.
+ * expand > 0 uses that many spaces.
+ * expand < 0 uses no identation.
+ *
+ * Note this function and the following two are called with
+ * interrupts queued, so saving and restoring text_expand_tabs
+ * is safe.
+ */
+
+/**/
+mod_export int
+scanmatchshfunc(Patprog pprog, int sorted, int flags1, int flags2,
+		ScanFunc scanfunc, int scanflags, int expand)
+{
+    int ret, save_expand;
+
+    save_expand = text_expand_tabs;
+    text_expand_tabs = expand;
+    ret = scanmatchtable(shfunctab, pprog, sorted, flags1, flags2,
+			scanfunc, scanflags);
+    text_expand_tabs = save_expand;
+
+    return ret;
+}
+
+/* Wrap scanhashtable to expand tabs for shell functions */
+
+/**/
+mod_export int
+scanshfunc(int sorted, int flags1, int flags2,
+	      ScanFunc scanfunc, int scanflags, int expand)
+{
+    return scanmatchshfunc(NULL, sorted, flags1, flags2,
+			   scanfunc, scanflags, expand);
+}
+
+/* Wrap shfunctab->printnode to expand tabs */
+
+/**/
+mod_export void
+printshfuncexpand(HashNode hn, int printflags, int expand)
+{
+    int save_expand;
+
+    save_expand = text_expand_tabs;
+    text_expand_tabs = expand;
+    shfunctab->printnode(hn, printflags);
+    text_expand_tabs = save_expand;
+}
+
 /**************************************/
 /* Reserved Word Hash Table Functions */
 /**************************************/
diff --git a/Src/pattern.c b/Src/pattern.c
index 4e5e8a1..7e07548 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -2202,20 +2202,15 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen,
 		if ((patglobflags & GF_MATCHREF) &&
 		    !(patflags & PAT_FILE)) {
 		    char *str = ztrduppfx(patinstart, patinlen);
-		    char *ptr = patinstart;
-		    int mlen = 0;
+		    int mlen;
 
 		    /*
 		     * Count the characters.  We're not using CHARSUB()
-		     * because the string is still metafied.  We're
-		     * not using mb_metastrlen() because that expects
-		     * the string to be null terminated.
+		     * because the string is still metafied.
 		     */
 		    MB_METACHARINIT();
-		    while (ptr < patinstart + patinlen) {
-			mlen++;
-			ptr += MB_METACHARLEN(ptr);
-		    }
+		    mlen = MB_METASTRLEN2END(patinstart, 0,
+					     patinstart + patinlen);
 
 		    setsparam("MATCH", str);
 		    setiparam("MBEGIN",
diff --git a/Src/text.c b/Src/text.c
index 958303c..8508796 100644
--- a/Src/text.c
+++ b/Src/text.c
@@ -30,6 +30,16 @@
 #include "zsh.mdh"
 #include "text.pro"
 
+/*
+ * If non-zero, expand syntactically significant leading tabs in text
+ * to this number of spaces.
+ *
+ * If negative, don't output leading whitespace at all.
+ */
+
+/**/
+int text_expand_tabs;
+
 static char *tptr, *tbuf, *tlim, *tpending;
 static int tsiz, tindent, tnewlins, tjob;
 
@@ -156,8 +166,16 @@ taddnl(int no_semicolon)
     if (tnewlins) {
 	tdopending();
 	taddchr('\n');
-	for (t0 = 0; t0 != tindent; t0++)
-	    taddchr('\t');
+	for (t0 = 0; t0 != tindent; t0++) {
+	    if (text_expand_tabs >= 0) {
+		if (text_expand_tabs) {
+		    int t1;
+		    for (t1 = 0; t1 < text_expand_tabs; t1++)
+			taddchr(' ');
+		} else
+		    taddchr('\t');
+	    }
+	}
     } else if (no_semicolon) {
 	taddstr(" ");
     } else {
@@ -165,6 +183,30 @@ taddnl(int no_semicolon)
     }
 }
 
+/*
+ * Output a tab that may be expanded as part of a leading set.
+ * Note this is not part of the text framework; it's for
+ * code that needs to output its own tabs that are to be
+ * consistent with those from getpermtext().
+ *
+ * Note these tabs are only expected to be useful at the
+ * start of the line, so we make no attempt to count columns.
+ */
+
+/**/
+void
+zoutputtab(FILE *outf)
+{
+    if (text_expand_tabs < 0)
+	return;
+    if (text_expand_tabs) {
+	int i;
+	for (i = 0; i < text_expand_tabs; i++)
+	    fputc(' ', outf);
+    } else
+	fputc('\t', outf);
+}
+
 /* get a permanent textual representation of n */
 
 /**/
diff --git a/Src/utils.c b/Src/utils.c
index 7409dc8..c33c16d 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -4471,9 +4471,37 @@ ztrlen(char const *s)
     for (l = 0; *s; l++) {
 	if (*s++ == Meta) {
 #ifdef DEBUG
-	    if (! *s)
+	    if (! *s) {
 		fprintf(stderr, "BUG: unexpected end of string in ztrlen()\n");
-	    else
+		break;
+	    } else
+#endif
+	    s++;
+	}
+    }
+    return l;
+}
+
+#ifndef MULTIBYTE_SUPPORT
+/*
+ * ztrlen() but with explicit end point for non-null-terminated
+ * segments.  eptr may not be NULL.
+ */
+
+/**/
+mod_export int
+ztrlenend(char const *s, char const *eptr)
+{
+    int l;
+
+    for (l = 0; s < eptr; l++) {
+	if (*s++ == Meta) {
+#ifdef DEBUG
+	    if (! *s) {
+		fprintf(stderr,
+			"BUG: unexpected end of string in ztrlenend()\n");
+		break;
+	    } else
 #endif
 	    s++;
 	}
@@ -4481,6 +4509,8 @@ ztrlen(char const *s)
     return l;
 }
 
+#endif /* MULTIBYTE_SUPPORT */
+
 /* Subtract two pointers in a metafied string. */
 
 /**/
@@ -4879,11 +4909,16 @@ mb_metacharlenconv(const char *s, wint_t *wcp)
  * If width is 1, return total character width rather than number.
  * If width is greater than 1, return 1 if character has non-zero width,
  * else 0.
+ *
+ * Ends if either *ptr is '\0', the normal case (eptr may be NULL for
+ * this), or ptr is eptr (i.e.  *eptr is where the null would be if null
+ * terminated) for strings not delimited by nulls --- note these are
+ * still metafied.
  */
 
 /**/
 mod_export int
-mb_metastrlen(char *ptr, int width)
+mb_metastrlenend(char *ptr, int width, char *eptr)
 {
     char inchar, *laststart;
     size_t ret;
@@ -4898,7 +4933,7 @@ mb_metastrlen(char *ptr, int width)
     num = num_in_char = 0;
 
     memset(&mb_shiftstate, 0, sizeof(mb_shiftstate));
-    while (*ptr) {
+    while (*ptr && !(eptr && ptr >= eptr)) {
 	if (*ptr == Meta)
 	    inchar = *++ptr ^ 32;
 	else
diff --git a/Src/zsh.h b/Src/zsh.h
index f6e08e2..c88c2e7 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -2926,9 +2926,11 @@ enum {
 typedef wint_t convchar_t;
 #define MB_METACHARLENCONV(str, cp)	mb_metacharlenconv((str), (cp))
 #define MB_METACHARLEN(str)	mb_metacharlenconv(str, NULL)
-#define MB_METASTRLEN(str)	mb_metastrlen(str, 0)
-#define MB_METASTRWIDTH(str)	mb_metastrlen(str, 1)
-#define MB_METASTRLEN2(str, widthp)	mb_metastrlen(str, widthp)
+#define MB_METASTRLEN(str)	mb_metastrlenend(str, 0, NULL)
+#define MB_METASTRWIDTH(str)	mb_metastrlenend(str, 1, NULL)
+#define MB_METASTRLEN2(str, widthp)	mb_metastrlenend(str, widthp, NULL)
+#define MB_METASTRLEN2END(str, widthp, eptr)	\
+    mb_metastrlenend(str, widthp, eptr)
 
 /*
  * We replace broken implementations with one that uses Unicode
@@ -3011,6 +3013,7 @@ typedef int convchar_t;
 #define MB_METASTRLEN(str)	ztrlen(str)
 #define MB_METASTRWIDTH(str)	ztrlen(str)
 #define MB_METASTRLEN2(str, widthp)	ztrlen(str)
+#define MB_METASTRLEN2END(str, widthp, eptr)	ztrlenend(str, eptr)
 
 #define WCWIDTH_WINT(c)	(1)
 


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: PATCH: expand tabs
  2015-06-04 16:32 ` Peter Stephenson
@ 2015-06-05 14:15   ` Oliver Kiddle
  2015-06-05 21:11     ` Peter Stephenson
  0 siblings, 1 reply; 4+ messages in thread
From: Oliver Kiddle @ 2015-06-05 14:15 UTC (permalink / raw)
  To: Zsh Hackers' List

Peter wrote:
> I've added a -x option where this makes sense.  This has propagated as
> far up as allowing "zed -f -x 2" to edit functions in ZLE with the
> indentation looking like it does for contributed functions.

Is definitely nice to be able to get function definitions with just the
two spaces of indentation. I have an expand-function zle widget which
has had to pipe the output through expand.

This will cause me to add a bunch of aliases to have -x2 be the default.
Would it perhaps make sense to allow a default to be specified in a
special variable. This could also affect the values for the functions
association. It might also be more flexible in the long term if we want
to allow more aspects of the output format to be customisable: different
indentation for continuation lines, ksh style function definitions or
whatever. Having the user's preference in one place might also be used
in future for other contexts such as perhaps doing smart indentation
from zle.

Oliver


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: PATCH: expand tabs
  2015-06-05 14:15   ` Oliver Kiddle
@ 2015-06-05 21:11     ` Peter Stephenson
  0 siblings, 0 replies; 4+ messages in thread
From: Peter Stephenson @ 2015-06-05 21:11 UTC (permalink / raw)
  To: Zsh Hackers' List

On Fri, 05 Jun 2015 16:15:23 +0200
Oliver Kiddle <okiddle@yahoo.co.uk> wrote:
> This will cause me to add a bunch of aliases to have -x2 be the default.
> Would it perhaps make sense to allow a default to be specified in a
> special variable. This could also affect the values for the functions
> association. It might also be more flexible in the long term if we want
> to allow more aspects of the output format to be customisable: different
> indentation for continuation lines, ksh style function definitions or
> whatever. Having the user's preference in one place might also be used
> in future for other contexts such as perhaps doing smart indentation
> from zle.

Possibly, though it'll take some careful thinking to ensure this is easy
to use an extensible.  A special has would be the obvious choice.

pws


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2015-06-05 21:16 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-01 15:39 PATCH: expand tabs Peter Stephenson
2015-06-04 16:32 ` Peter Stephenson
2015-06-05 14:15   ` Oliver Kiddle
2015-06-05 21:11     ` Peter Stephenson

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).