zsh-workers
 help / color / mirror / code / Atom feed
* PATCH: sort ordering by function
@ 2009-01-26 16:34 Peter Stephenson
  2009-01-30  4:54 ` Bart Schaefer
  0 siblings, 1 reply; 5+ messages in thread
From: Peter Stephenson @ 2009-01-26 16:34 UTC (permalink / raw)
  To: Zsh hackers list

I wrote this because I needed it.

I am faced with a set of directories named <day>-<month>-<year>,
e.g. 26-01-08.  The modification times on the directories aren't useful.
I want these in time order.

After the patch, the solution (assuming EXTENDED_GLOB is on and
KSH_ARRAYS isn't, this isn't supposed to be portable, it's just a
trivial example) is

  sd() {
    local -a match mbegin mend
    [[ $REPLY= (#b)(*)-(*)-(*) ]] && REPLY="$match[3]-$match[2]-$match[1]"
  }
  print -l *(o+sd)

Comments welcome.

Test code should appear when I have time, though if anyone has time to
write test code for this or anything else, that would be fantastic.

Index: Doc/Zsh/expn.yo
===================================================================
RCS file: /cvsroot/zsh/zsh/Doc/Zsh/expn.yo,v
retrieving revision 1.100
diff -u -r1.100 expn.yo
--- Doc/Zsh/expn.yo	17 Nov 2008 16:56:42 -0000	1.100
+++ Doc/Zsh/expn.yo	26 Jan 2009 16:20:05 -0000
@@ -2229,7 +2229,7 @@
 item(tt(o)var(c))(
 specifies how the names of the files should be sorted. If var(c) is
 tt(n) they are sorted by name (the default); if it is tt(L) they
-are sorted depending on the size (length) of the files; if tt(l) 
+are sorted depending on the size (length) of the files; if tt(l)
 they are sorted by the number of links; if tt(a), tt(m), or tt(c)
 they are sorted by the time of the last access, modification, or
 inode change respectively; if tt(d), files in subdirectories appear before
@@ -2242,6 +2242,16 @@
 so `tt(*(^-oL))' gives a list of all files sorted by file size in descending
 order, following any symbolic links.  Unless tt(oN) is used, multiple order
 specifiers may occur to resolve ties.
+
+tt(oe) and tt(o+) are special cases; they are each followed by shell code,
+delimited as for the tt(e) glob qualifier and the tt(+) glob qualifier
+respectively (see above).  The code is executed for each matched file with
+the parameter tt(REPLY) set to the name of the file on entry.  The code
+should modify the parameter tt(REPLY) in some fashion.  On return, the value
+of the parameter is used instead of the file name as the string on which to
+sort.  Unlike other sort operators, tt(oe) and tt(o+) may be repeated, but
+note that the maximum number of sort operators of any kind that may appear
+in any glob expression is 12.
 )
 item(tt(O)var(c))(
 like `tt(o)', but sorts in descending order; i.e. `tt(*(^oc))' is the
Index: Src/glob.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/glob.c,v
retrieving revision 1.68
diff -u -r1.68 glob.c
--- Src/glob.c	8 Nov 2008 06:31:02 -0000	1.68
+++ Src/glob.c	26 Jan 2009 16:20:06 -0000
@@ -42,6 +42,11 @@
 
 struct gmatch {
     char *name;
+    /*
+     * Array of sort strings:  one for each GS_EXEC sort type in
+     * the glob qualifiers.
+     */
+    char **sortstrs;
     off_t size ALIGN64;
     long atime;
     long mtime;
@@ -68,8 +73,9 @@
 
 #define GS_NAME   1
 #define GS_DEPTH  2
+#define GS_EXEC	  4
 
-#define GS_SHIFT_BASE	4
+#define GS_SHIFT_BASE	8
 
 #define GS_SIZE  (GS_SHIFT_BASE)
 #define GS_ATIME (GS_SHIFT_BASE << 1)
@@ -135,6 +141,17 @@
 /**/
 mod_export char *glob_pre, *glob_suf;
 
+/* Element of a glob sort */
+struct globsort {
+    /* Sort type */
+    int tp;
+    /* Sort code to eval, if type is GS_EXEC */
+    char *exec;
+};
+
+/* Maximum entries in sort array */
+#define MAX_SORTS	(12)
+
 /* struct to easily save/restore current state */
 
 struct globdata {
@@ -157,7 +174,8 @@
     int gd_range, gd_amc, gd_units;
     int gd_gf_nullglob, gd_gf_markdirs, gd_gf_noglobdots, gd_gf_listtypes;
     int gd_gf_numsort;
-    int gd_gf_follow, gd_gf_sorts, gd_gf_nsorts, gd_gf_sortlist[11];
+    int gd_gf_follow, gd_gf_sorts, gd_gf_nsorts;
+    struct globsort gd_gf_sortlist[MAX_SORTS];
 
     char *gd_glob_pre, *gd_glob_suf;
 };
@@ -880,11 +898,13 @@
 static int
 gmatchcmp(Gmatch a, Gmatch b)
 {
-    int i, *s;
+    int i;
     off_t r = 0L;
+    struct globsort *s;
+    char **asortstrp = NULL, **bsortstrp = NULL;
 
     for (i = gf_nsorts, s = gf_sortlist; i; i--, s++) {
-	switch (*s & ~GS_DESC) {
+	switch (s->tp & ~GS_DESC) {
 	case GS_NAME:
 	    r = zstrcmp(b->name, a->name, gf_numsort ? SORTIT_NUMERICALLY : 0);
 	    break;
@@ -910,6 +930,17 @@
 		r = slasha - slashb;
 	    }
 	    break;
+	case GS_EXEC:
+	    if (!asortstrp) {
+		asortstrp = a->sortstrs;
+		bsortstrp = b->sortstrs;
+	    } else {
+		asortstrp++;
+		bsortstrp++;
+	    }
+	    r = zstrcmp(*bsortstrp, *asortstrp,
+			gf_numsort ? SORTIT_NUMERICALLY : 0);
+	    break;
 	case GS_SIZE:
 	    r = b->size - a->size;
 	    break;
@@ -966,7 +997,7 @@
 	    break;
 	}
 	if (r)
-	    return (int) ((*s & GS_DESC) ? -r : r);
+	    return (int) ((s->tp & GS_DESC) ? -r : r);
     }
     return 0;
 }
@@ -1000,6 +1031,49 @@
     return qfirst;
 }
 
+
+/*
+ * Get a glob string for execution, following e or + qualifiers.
+ * Pointer is character after the e or +.
+ */
+
+/**/
+static char *
+glob_exec_string(char **sp)
+{
+    char sav, *tt, *sdata, *s = *sp;
+    int plus;
+
+    if (s[-1] == '+') {
+	plus = 0;
+	tt = itype_end(s, IIDENT, 0);
+	if (tt == s)
+	{
+	    zerr("missing identifier after `+'");
+	    return NULL;
+	}
+    } else {
+	tt = get_strarg(s, &plus);
+	if (!*tt)
+	{
+	    zerr("missing end of string");
+	    return NULL;
+	}
+    }
+
+    sav = *tt;
+    *tt = '\0';
+    sdata = dupstring(s + plus);
+    untokenize(sdata);
+    *tt = sav;
+    if (sav)
+	*sp = tt + plus;
+    else
+	*sp = tt;
+
+    return sdata;
+}
+
 /* Main entry point to the globbing code for filename globbing. *
  * np points to a node in the list list which will be expanded  *
  * into a series of nodes.                                      */
@@ -1449,7 +1523,16 @@
 		case 'O':
 		{
 		    int t;
+		    char *send;
 
+		    if (gf_nsorts == MAX_SORTS) {
+			zerr("too many glob sort specifiers");
+			restore_globstate(saved);
+			return;
+		    }
+
+		    /* usually just one character */
+		    send = s+1;
 		    switch (*s) {
 		    case 'n': t = GS_NAME; break;
 		    case 'L': t = GS_SIZE; break;
@@ -1459,60 +1542,50 @@
 		    case 'c': t = GS_CTIME; break;
 		    case 'd': t = GS_DEPTH; break;
 		    case 'N': t = GS_NONE; break;
+		    case 'e':
+		    case '+':
+		    {
+			t = GS_EXEC;
+			if ((gf_sortlist[gf_nsorts].exec =
+			     glob_exec_string(&send)) == NULL)
+			{
+			    restore_globstate(saved);
+			    return;
+			}
+			break;
+		    }
 		    default:
 			zerr("unknown sort specifier");
 			restore_globstate(saved);
 			return;
 		    }
-		    if ((sense & 2) && !(t & (GS_NAME|GS_DEPTH)))
-			t <<= GS_SHIFT;
-		    if (gf_sorts & t) {
-			zerr("doubled sort specifier");
-			restore_globstate(saved);
-			return;
+		    if (t != GS_EXEC) {
+			if ((sense & 2) && !(t & (GS_NAME|GS_DEPTH)))
+			    t <<= GS_SHIFT; /* HERE: GS_EXEC? */
+			if (gf_sorts & t) {
+			    zerr("doubled sort specifier");
+			    restore_globstate(saved);
+			    return;
+			}
 		    }
 		    gf_sorts |= t;
-		    gf_sortlist[gf_nsorts++] = t |
+		    gf_sortlist[gf_nsorts++].tp = t |
 			(((sense & 1) ^ (s[-1] == 'O')) ? GS_DESC : 0);
-		    s++;
+		    s = send;
 		    break;
 		}
 		case '+':
 		case 'e':
 		{
-		    char sav, *tt;
-		    int plus;
+		    char *tt;
 
-		    if (s[-1] == '+') {
-			plus = 0;
-			tt = itype_end(s, IIDENT, 0);
-			if (tt == s)
-			{
-			    zerr("missing identifier after `+'");
-			    tt = NULL;
-			}
-		    } else {
-			tt = get_strarg(s, &plus);
-			if (!*tt)
-			{
-			    zerr("missing end of string");
-			    tt = NULL;
-			}
-		    }
+		    tt = glob_exec_string(&s);
 
 		    if (tt == NULL) {
 			data = 0;
 		    } else {
-			sav = *tt;
-			*tt = '\0';
 			func = qualsheval;
-			sdata = dupstring(s + plus);
-			untokenize(sdata);
-			*tt = sav;
-			if (sav)
-			    s = tt + plus;
-			else
-			    s = tt;
+			sdata = tt;
 		    }
 		    break;
 		}
@@ -1632,7 +1705,7 @@
 	return;
     }
     if (!gf_nsorts) {
-	gf_sortlist[0] = gf_sorts = GS_NAME;
+	gf_sortlist[0].tp = gf_sorts = GS_NAME;
 	gf_nsorts = 1;
     }
     /* Initialise receptacle for matched files, *
@@ -1665,7 +1738,65 @@
 	}
     }
 
-    if (!(gf_sortlist[0] & GS_NONE)) {
+    if (!(gf_sortlist[0].tp & GS_NONE)) {
+	/*
+	 * Get the strings to use for sorting by executing
+	 * the code chunk.  We allow more than one of these.
+	 */
+	int nexecs = 0;
+	struct globsort *sortp;
+	struct globsort *lastsortp = gf_sortlist + gf_nsorts;
+
+	/* First find out if there are any GS_EXECs, counting them. */
+	for (sortp = gf_sortlist; sortp < lastsortp; sortp++)
+	{
+	    if (sortp->tp & GS_EXEC)
+		nexecs++;
+	}
+
+	if (nexecs) {
+	    Gmatch tmpptr;
+	    int iexec = 0;
+
+	    /* Yes; allocate enough space for strings for each */
+	    for (tmpptr = matchbuf; tmpptr < matchptr; tmpptr++)
+		tmpptr->sortstrs = (char **)zhalloc(nexecs*sizeof(char*));
+
+	    /* Loop over each one, incrementing iexec */
+	    for (sortp = gf_sortlist; sortp < lastsortp; sortp++)
+	    {
+		/* Ignore unless this is a GS_EXEC */
+		if (sortp->tp & GS_EXEC) {
+		    Eprog prog;
+
+		    if ((prog = parse_string(sortp->exec, 0))) {
+			int ef = errflag, lv = lastval, ret;
+
+			/* Parsed OK, execute for each name */
+			for (tmpptr = matchbuf; tmpptr < matchptr; tmpptr++) {
+			    setsparam("REPLY", ztrdup(tmpptr->name));
+			    execode(prog, 1, 0);
+			    if (!errflag)
+				tmpptr->sortstrs[iexec] =
+				    dupstring(getsparam("REPLY"));
+			    else
+				tmpptr->sortstrs[iexec] = tmpptr->name;
+			}
+
+			ret = lastval;
+			errflag = ef;
+			lastval = lv;
+		    } else {
+			/* Failed, let's be safe */
+			for (tmpptr = matchbuf; tmpptr < matchptr; tmpptr++)
+			    tmpptr->sortstrs[iexec] = tmpptr->name;
+		    }
+
+		    iexec++;
+		}
+	    }
+	}
+
 	/* Sort arguments in to lexical (and possibly numeric) order. *
 	 * This is reversed to facilitate insertion into the list.    */
 	qsort((void *) & matchbuf[0], matchct, sizeof(struct gmatch),
@@ -1682,7 +1813,7 @@
     else if (end > matchct)
 	end = matchct;
     if ((end -= first) > 0) {
-	if (gf_sortlist[0] & GS_NONE) {
+	if (gf_sortlist[0].tp & GS_NONE) {
 	    /* Match list was never reversed, so insert back to front. */
 	    matchptr = matchbuf + matchct - first - 1;
 	    while (end-- > 0) {


-- 
Peter Stephenson <pws@csr.com>                  Software Engineer
CSR PLC, Churchill House, Cambridge Business Park, Cowley Road
Cambridge, CB4 0WZ, UK                          Tel: +44 (0)1223 692070


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: PATCH: sort ordering by function
  2009-01-26 16:34 PATCH: sort ordering by function Peter Stephenson
@ 2009-01-30  4:54 ` Bart Schaefer
  2009-01-30  9:47   ` Peter Stephenson
  0 siblings, 1 reply; 5+ messages in thread
From: Bart Schaefer @ 2009-01-30  4:54 UTC (permalink / raw)
  To: Zsh hackers list

More catch-up ...

On Jan 26,  4:34pm, Peter Stephenson wrote:
}
} Comments welcome.

Very nice.  I recall trying to hack up something like this before with
the (e...) qualifier and finding it quite difficult to get the execution
order of modify $REPLY -> sort -> restore file name to work properly.
 
} +note that the maximum number of sort operators of any kind that may appear
} +in any glob expression is 12.

Where did 12 come from?


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: PATCH: sort ordering by function
  2009-01-30  4:54 ` Bart Schaefer
@ 2009-01-30  9:47   ` Peter Stephenson
  2009-01-30 15:04     ` Vin Shelton
  0 siblings, 1 reply; 5+ messages in thread
From: Peter Stephenson @ 2009-01-30  9:47 UTC (permalink / raw)
  To: Zsh hackers list

On Thu, 29 Jan 2009 20:54:23 -0800
Bart Schaefer <schaefer@brasslantern.com> wrote:
> } +note that the maximum number of sort operators of any kind that may appear
> } +in any glob expression is 12.
> 
> Where did 12 come from?

It's a bit random.  There used be 11 sort operators; you could have as many
as you wanted in order, but once only, so the limit was 11.  Having added a
new operator, I extended it to 12---but then realised that actually the new
operator was powerful enough that allowing it to be run once only
wasn't very helpful.  So I removed that restriction, but left the overall limit.
It wouldn't take too much work to remove but in practice it's not going to
hurt anyone.  Hitting it should be gracefully handled.

-- 
Peter Stephenson <pws@csr.com>                  Software Engineer
CSR PLC, Churchill House, Cambridge Business Park, Cowley Road
Cambridge, CB4 0WZ, UK                          Tel: +44 (0)1223 692070


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: PATCH: sort ordering by function
  2009-01-30  9:47   ` Peter Stephenson
@ 2009-01-30 15:04     ` Vin Shelton
  2009-01-30 15:54       ` Richard Hartmann
  0 siblings, 1 reply; 5+ messages in thread
From: Vin Shelton @ 2009-01-30 15:04 UTC (permalink / raw)
  To: Peter Stephenson; +Cc: Zsh hackers list

On Fri, Jan 30, 2009 at 4:47 AM, Peter Stephenson <pws@csr.com> wrote:
> On Thu, 29 Jan 2009 20:54:23 -0800
> Bart Schaefer <schaefer@brasslantern.com> wrote:
>> } +note that the maximum number of sort operators of any kind that may appear
>> } +in any glob expression is 12.
>>
>> Where did 12 come from?
>
> It's a bit random.  There used be 11 sort operators; you could have as many
> as you wanted in order, but once only, so the limit was 11.  Having added a
> new operator, I extended it to 12

Peter -

I think you misunderstood why the old limit was 11:

    Marty:  Why don't you just make ten louder and make ten be the
top...number...and make that a little louder?
    Nigel:  ...these go to eleven.

  :-)

( In case you're not a Spinal Tap fan:
http://corky.net/scripts/ThisIsSpinalTap.html )

Warm regards on a cold day,
  Vin


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: PATCH: sort ordering by function
  2009-01-30 15:04     ` Vin Shelton
@ 2009-01-30 15:54       ` Richard Hartmann
  0 siblings, 0 replies; 5+ messages in thread
From: Richard Hartmann @ 2009-01-30 15:54 UTC (permalink / raw)
  To: Vin Shelton; +Cc: Peter Stephenson, Zsh hackers list

On Fri, Jan 30, 2009 at 16:04, Vin Shelton <acs@alumni.princeton.edu> wrote:

> I think you misunderstood why the old limit was 11:

So you're saying it should go over 9000 now?


scnr,
Richard


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2009-01-30 15:54 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-01-26 16:34 PATCH: sort ordering by function Peter Stephenson
2009-01-30  4:54 ` Bart Schaefer
2009-01-30  9:47   ` Peter Stephenson
2009-01-30 15:04     ` Vin Shelton
2009-01-30 15:54       ` Richard Hartmann

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).