zsh-workers
 help / color / mirror / code / Atom feed
From: Peter Stephenson <p.stephenson@samsung.com>
To: zsh-workers@zsh.org
Subject: Re: Substitution ${...///} slows down when certain UTF character occurs
Date: Wed, 30 Sep 2015 09:59:50 +0100	[thread overview]
Message-ID: <20150930095950.6c9c583b@pwslap01u.europe.root.pri> (raw)
In-Reply-To: <150929122356.ZM30421@torch.brasslantern.com>

On Tue, 29 Sep 2015 12:23:56 -0700
Bart Schaefer <schaefer@brasslantern.com> wrote:
> On Sep 29,  7:37pm, Peter Stephenson wrote:
> }
> } This uses the new interface.  I haven't done any testing apart from the
> } normal test suite.
> 
> I ran my looping version of Sebastian's test program and got comparable
> (fast!) times for all of his samples.

OK, that suggests we're now allocating memory infrequently enough (as
intended) that it's safe to put it on the heap, which simplifies things
further.  One other unnecessary chunk removed.

pws

diff --git a/Src/glob.c b/Src/glob.c
index d998663..24e60d0 100644
--- a/Src/glob.c
+++ b/Src/glob.c
@@ -2478,9 +2478,6 @@ get_match_ret(Imatchdata imd, int b, int e)
 	if (imeta(*p))
 	    add++;
     e += add;
-    for (; p < imd->ustr + imd->ulen; p++)
-	if (imeta(*p))
-	    add++;
 
     /* Everything now refers to metafied lengths. */
     if (replstr || (fl & SUB_LIST)) {
@@ -2808,7 +2805,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	    imd.replstr = NULL;
 	}
 	*sp = get_match_ret(&imd, 0, umltot);
-	patfreestr(&patstralloc);
 	if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i)))
 	    return 0;
 	return 1;
@@ -2856,7 +2852,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 		    }
 		}
 		*sp = get_match_ret(&imd, 0, mlen);
-		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    break;
@@ -2884,13 +2879,11 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	    }
 	    if (tmatch) {
 		*sp = get_match_ret(&imd, tmatch - s, umltot);
-		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    if (!(fl & SUB_START) && pattrylen(p, s + umltot, 0, 0,
 					       &patstralloc, ioff)) {
 		*sp = get_match_ret(&imd, umltot, umltot);
-		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    break;
@@ -2904,7 +2897,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 		set_pat_start(p, t-s);
 		if (pattrylen(p, t, umlen, 0, &patstralloc, ioff)) {
 		    *sp = get_match_ret(&imd, t-s, umltot);
-		    patfreestr(&patstralloc);
 		    return 1;
 		}
 		if (fl & SUB_START)
@@ -2914,7 +2906,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	    if (!(fl & SUB_START) && pattrylen(p, send, 0, 0,
 					       &patstralloc, ioff)) {
 		*sp = get_match_ret(&imd, umltot, umltot);
-		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    break;
@@ -2926,7 +2917,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 		pattrylen(p, send, 0, 0, &patstralloc, 0) &&
 		!--n) {
 		*sp = get_match_ret(&imd, 0, 0);
-		patfreestr(&patstralloc);
 		return 1;
 	    } /* fall through */
 	case (SUB_SUBSTR|SUB_LONG):
@@ -2984,7 +2974,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 				umlen -= iincchar(&t, send - t);
 				continue;
 			    } else {
-				patfreestr(&patstralloc);
 				return 1;
 			    }
 			}
@@ -3011,7 +3000,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	    if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG &&
 		pattrylen(p, send, 0, 0, &patstralloc, 0) && !--n) {
 		*sp = get_match_ret(&imd, 0, 0);
-		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    break;
@@ -3024,7 +3012,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 		if (pattrylen(p, send, 0, 0, &patstralloc, umltot) &&
 		    !--n) {
 		    *sp = get_match_ret(&imd, umltot, umltot);
-		    patfreestr(&patstralloc);
 		    return 1;
 		}
 	    }
@@ -3081,7 +3068,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 		    }
 		}
 		*sp = get_match_ret(&imd, tmatch-s, mpos-s);
-		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    set_pat_start(p, l);
@@ -3089,7 +3075,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 					     &patstralloc, umltot) &&
 		!--n) {
 		*sp = get_match_ret(&imd, umltot, umltot);
-		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    break;
@@ -3134,11 +3119,9 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	    start[lleft] = '\0';
 	    *sp = (char *)start;
 	}
-	patfreestr(&patstralloc);
 	return 1;
     }
     if (fl & SUB_LIST) {	/* safety: don't think this can happen */
-	patfreestr(&patstralloc);
 	return 0;
     }
 
@@ -3146,7 +3129,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
     imd.replstr = NULL;
     imd.repllist = NULL;
     *sp = get_match_ret(&imd, 0, 0);
-    patfreestr(&patstralloc);
     return (fl & SUB_RETFAIL) ? 0 : 1;
 }
 
@@ -3244,7 +3226,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 		    }
 		}
 		*sp = get_match_ret(&imd, 0, mlen);
-		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    break;
@@ -3357,7 +3338,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	    if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG &&
 		pattrylen(p, send, 0, 0, &patstralloc, 0) && !--n) {
 		*sp = get_match_ret(&imd, 0, 0);
-		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    break;
@@ -3369,7 +3349,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 		set_pat_start(p, l);
 		if (pattrylen(p, send, 0, 0, &patstralloc, uml) && !--n) {
 		    *sp = get_match_ret(&imd, uml, uml);
-		    patfreestr(&patstralloc);
 		    return 1;
 		}
 	    }
@@ -3394,7 +3373,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 			}
 		    }
 		    *sp = get_match_ret(&imd, t-s, mpos-s);
-		    patfreestr(&patstralloc);
 		    return 1;
 		}
 	    }
@@ -3403,7 +3381,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 					     &patstralloc, uml) &&
 		!--n) {
 		*sp = get_match_ret(&imd, uml, uml);
-		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    break;
@@ -3445,7 +3422,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	memcpy(t, s + i, l - i);
 	start[lleft] = '\0';
 	*sp = (char *)start;
-	patfreestr(&patstralloc);
 	return 1;
     }
 
@@ -3453,7 +3429,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
     imd.replstr = NULL;
     imd.repllist = NULL;
     *sp = get_match_ret(&imd, 0, 0);
-    patfreestr(&patstralloc);
     return 1;
 }
 
diff --git a/Src/pattern.c b/Src/pattern.c
index 8de372c..68a3409 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -2028,8 +2028,8 @@ pattrystart(void)
  *
  * Unmetafy a trial string for use in pattern matching, if needed.
  *
- * If it is needed, returns a zalloc()'d string; if not needed, returns
- * NULL.
+ * If it is needed, returns a heap allocated string; if not needed,
+ * returns NULL.
  *
  * prog is the pattern to be executed.
  * string is the metafied trial string.
@@ -2046,7 +2046,7 @@ pattrystart(void)
  *  unmetalenp is the umetafied length of a path segment preceeding
  *    the trial string needed for file mananagement; it is calculated as
  *    needed so does not need to be initialised.
- *  alloced is the memory allocated --- same as return value from
+ *  alloced is the memory allocated on the heap --- same as return value from
  *    function.
  */
 /**/
@@ -2097,7 +2097,7 @@ char *patallocstr(Patprog prog, char *string, int stringlen, int unmetalen,
 	int i, icopy, ncopy;
 
 	dst = patstralloc->alloced =
-	    zalloc(patstralloc->unmetalen + patstralloc->unmetalenp);
+	    zhalloc(patstralloc->unmetalen + patstralloc->unmetalenp);
 
 	if (needfullpath) {
 	    /* loop twice, copy path buffer first time */
@@ -2134,20 +2134,6 @@ char *patallocstr(Patprog prog, char *string, int stringlen, int unmetalen,
 
 
 /*
- * Free memory allocated by patallocstr().
- */
-
-/**/
-mod_export
-void patfreestr(Patstralloc patstralloc)
-{
-    if (patstralloc->alloced)
-	zfree(patstralloc->alloced,
-	      patstralloc->unmetalen + patstralloc->unmetalenp);
-}
-
-
-/*
  * Test prog against null-terminated, metafied string.
  */
 
@@ -2189,8 +2175,9 @@ pattrylen(Patprog prog, char *string, int len, int unmetalen,
  * done if there is no path prefix (pathpos == 0) as otherwise the path
  * buffer and unmetafied string may not match.  To do this,
  * patallocstr() is callled (use force = 1 to ensure it is alway
- * unmetafied); paststralloc points to existing storage.  When all
- * pattern matching is done, patfreestr() is called.
+ * unmetafied); paststralloc points to existing storage. Memory is
+ * on the heap.
+ *
  * patstralloc->alloced and patstralloc->unmetalen contain the
  * unmetafied string and its length.  In that case, the rules for the
  * earlier arguments change:
@@ -2387,8 +2374,6 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
 	    }
 	}
 
-	if (patstralloc == &patstralloc_struct)
-	    patfreestr(patstralloc);
 	return ret;
     } else {
 	int q = queue_signal_level();
@@ -2425,8 +2410,6 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
 	    }
 	}
 	if (!ret) {
-	    if (patstralloc == &patstralloc_struct)
-		patfreestr(patstralloc);
 	    return 0;
 	}
 
@@ -2583,9 +2566,6 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
 
 	restore_queue_signals(q);
 
-	if (patstralloc == &patstralloc_struct)
-	    patfreestr(patstralloc);
-
 	return ret;
     }
 }


  reply	other threads:[~2015-09-30  9:10 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-09-26 12:19 Sebastian Gniazdowski
2015-09-26 20:44 ` Bart Schaefer
2015-09-27  8:13   ` Sebastian Gniazdowski
2015-09-27 16:11     ` Bart Schaefer
2015-09-28  8:51       ` Peter Stephenson
2015-09-28 11:30         ` Peter Stephenson
2015-09-28 19:23         ` Peter Stephenson
2015-09-29  8:44           ` Peter Stephenson
2015-09-29 18:37             ` Peter Stephenson
2015-09-29 19:23               ` Bart Schaefer
2015-09-30  8:59                 ` Peter Stephenson [this message]
2015-09-30 14:04                   ` Peter Stephenson
2015-09-30 21:19                     ` Bart Schaefer
2015-10-01  8:41                       ` Peter Stephenson
2015-10-01 14:28                         ` Heap corruption [the thread formerly known as substitution] Peter Stephenson
2015-10-01 15:07                           ` Bart Schaefer
2015-10-01 15:13                           ` Peter Stephenson
2015-10-03 18:59                             ` Peter Stephenson
2015-10-01 13:45       ` Substitution ${...///} slows down when certain UTF character occurs Sebastian Gniazdowski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150930095950.6c9c583b@pwslap01u.europe.root.pri \
    --to=p.stephenson@samsung.com \
    --cc=zsh-workers@zsh.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).