zsh-workers
 help / color / mirror / code / Atom feed
* PATCH: Unicode additions, next phase.
@ 2005-01-25 14:46 Peter Stephenson
  2005-01-25 16:02 ` Clint Adams
  2005-01-26  2:29 ` Clint Adams
  0 siblings, 2 replies; 7+ messages in thread
From: Peter Stephenson @ 2005-01-25 14:46 UTC (permalink / raw)
  To: Zsh hackers list

Here are some more changes.  ZLE_UNICODE_SUPPORT is still not complete
enough to be turned on, so it hasn't even been compiled with that in
effect, hence there may be arbitrarily many arbitrarily glaring bugs.
I've only tested it in the old-fashioned case so far by compiling and
trying some simple editing.

- Added a few extra definitions: characters and sizes.  Quite possibly
  '\0' will convert smoothly to L'\0' without a definition, but (since
  we are currently testing for full Unicode compiler support) I don't
  see any point in risking it.

- Modified zlegetline to get general zlelineasstring and stringaszleline
  functions.  Use zlegetline to return value from zlereadline; this
  already returns allocated memory so the callers don't need changing.
  Careful freeing: as an optimisation the functions don't bother
  resizing down to the minimum needed size, so you can't use zfree
  with the length of the array.

- Undo/redo should now use the right format for command line chunks.

- The cutbuffer and the kill ring should use the right format for killed
  text, including the zle variables.

- Went through zle_main.c and zle_utils.c fixing things up (hence the
  previous two items).  They still may need changing if more things need
  to be passed as ZLE_STRING_T instead of char */unsigned char *.  (I
  don't like all the casts between char * and unsigned char * but it's
  about a dozen years too late for that.)

The key missing bits are screen output from zle_refresh.c and key
input, but there will be lots of little things in the other files I
haven't yet looked at.  For example, we need multibyte/wide-character
support for word separator characters, as well as extending use of
ctype macros to iswupper etc.  Those are interesting projects
for anyone who wants a relatively self-contained chunk.

We may need to widen some uses of wchar_t to wint_t (which is defined to
be able to hold a WEOF, which replaces EOF).  We probably need more
configure tests for such things eventually.

Index: Src/system.h
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/system.h,v
retrieving revision 1.22
diff -u -r1.22 system.h
--- Src/system.h	14 Jan 2005 13:05:22 -0000	1.22
+++ Src/system.h	25 Jan 2005 14:40:56 -0000
@@ -709,6 +709,7 @@
 #ifdef ZLE_UNICODE_SUPPORT
 typedef wchar_t ZLE_CHAR_T;
 typedef wchar_t *ZLE_STRING_T;
+#define ZLE_CHAR_SIZE	sizeof(wchar_t)
 
 /*
  * MB_CUR_MAX is the maximum number of bytes that a single wide
@@ -720,7 +721,14 @@
 #ifndef MB_CUR_MAX
 #define MB_CUR_MAX 6
 #endif
+
+#define ZLENL	L'\n'
+#define ZLENUL	L'\0'
 #else
 typedef int ZLE_CHAR_T;
 typedef unsigned char *ZLE_STRING_T;
+#define ZLE_CHAR_SIZE	sizeof(unsigned char)
+
+#define ZLENL	'\n'
+#define ZLENUL	'\0'
 #endif
Index: Src/Zle/zle.h
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle.h,v
retrieving revision 1.4
diff -u -r1.4 zle.h
--- Src/Zle/zle.h	14 Jan 2005 13:05:23 -0000	1.4
+++ Src/Zle/zle.h	25 Jan 2005 14:40:57 -0000
@@ -99,8 +99,10 @@
     int flags;			/* see below */
     int hist;			/* history line being changed */
     int off;			/* offset of the text changes */
-    char *del;			/* characters to delete (metafied) */
-    char *ins;			/* characters to insert (metafied) */
+    ZLE_STRING_T del;		/* characters to delete */
+    int dell;			/* no. of characters in del */
+    ZLE_STRING_T ins;		/* characters to insert */
+    int insl;			/* no. of characters in ins */
     int old_cs, new_cs;		/* old and new cursor positions */
 };
 
@@ -123,12 +125,15 @@
 
 #define removesuffix() iremovesuffix(256, 0)
 
-/* Cut/kill buffer type.  The buffer itself is purely binary data, *
- * not NUL-terminated.  len is a length count.  flags uses the     *
- * CUTBUFFER_* constants defined below.                            */
+/*
+ * Cut/kill buffer type.  The buffer itself is purely binary data, not
+ * NUL-terminated.  len is a length count (N.B. number of characters,
+ * not size in bytes).  flags uses the CUTBUFFER_* constants defined
+ * below.
+ */
 
 struct cutbuffer {
-    char *buf;
+    ZLE_STRING_T buf;
     size_t len;
     char flags;
 };
Index: Src/Zle/zle_main.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_main.c,v
retrieving revision 1.56
diff -u -r1.56 zle_main.c
--- Src/Zle/zle_main.c	14 Jan 2005 13:05:23 -0000	1.56
+++ Src/Zle/zle_main.c	25 Jan 2005 14:40:57 -0000
@@ -719,7 +719,7 @@
 	    handleprefixes();
 	    /* for vi mode, make sure the cursor isn't somewhere illegal */
 	    if (invicmdmode() && zlecs > findbol() &&
-		(zlecs == zlell || zleline[zlecs] == '\n'))
+		(zlecs == zlell || zleline[zlecs] == ZLENL))
 		zlecs--;
 	    if (undoing)
 		handleundo();
@@ -819,8 +819,8 @@
     zlecontext = context;
     histline = curhist;
     undoing = 1;
-    zleline = (unsigned char *)zalloc((linesz = 256) + 2);
-    *zleline = '\0';
+    zleline = (unsigned char *)zalloc(((linesz = 256) + 2) * ZLE_CHAR_SIZE);
+    *zleline = ZLENUL;
     virangeflag = lastcmd = done = zlecs = zlell = mark = 0;
     vichgflag = 0;
     viinsbegin = 0;
@@ -877,15 +877,16 @@
 
     freeundo();
     if (eofsent) {
-	free(zleline);
-	zleline = NULL;
+	s = NULL;
     } else {
-	zleline[zlell++] = '\n';
-	zleline = (unsigned char *) metafy((char *) zleline, zlell, META_REALLOC);
+	zleline[zlell++] = ZLENL;
+	s = zlegetline(NULL, NULL);
     }
+    free(zleline);
+    zleline = NULL;
     forget_edits();
     errno = old_errno;
-    return zleline;
+    return s;
 }
 
 /* execute a widget */
@@ -1512,10 +1513,10 @@
     free_isrch_spots();
     if (rdstrs)
         freelinklist(rdstrs, freestr);
-    zfree(cutbuf.buf, cutbuf.len);
+    free(cutbuf.buf);
     if (kring) {
 	for(i = kringsize; i--; )
-	    zfree(kring[i].buf, kring[i].len);
+	    free(kring[i].buf);
 	zfree(kring, kringsize * sizeof(struct cutbuffer));
     }
     for(i = 35; i--; )
Index: Src/Zle/zle_misc.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_misc.c,v
retrieving revision 1.13
diff -u -r1.13 zle_misc.c
--- Src/Zle/zle_misc.c	14 Jan 2005 13:05:23 -0000	1.13
+++ Src/Zle/zle_misc.c	25 Jan 2005 14:40:57 -0000
@@ -358,7 +358,8 @@
     while (n--) {
 	kct = -1;
 	spaceinline(kctbuf->len);
-	memcpy((char *)zleline + zlecs, kctbuf->buf, kctbuf->len);
+	memcpy((char *)(zleline + zlecs), (char *)kctbuf->buf,
+	       kctbuf->len * ZLE_CHAR_SIZE);
 	zlecs += kctbuf->len;
 	yanke = zlecs;
     }
@@ -412,13 +413,13 @@
 	 *    was full, we could loop round and round it, otherwise
 	 *    we just stopped when we hit the first empty buffer.
 	 */
-    } while (!buf->buf || !*buf->buf);
+    } while (!buf->buf || *buf->buf == ZLENUL);
 
     zlecs = yankb;
     foredel(yanke - yankb);
     cc = buf->len;
     spaceinline(cc);
-    memcpy((char *)zleline + zlecs, buf->buf, cc);
+    memcpy((char *)(zleline + zlecs), (char *)buf->buf, cc * ZLE_CHAR_SIZE);
     zlecs += cc;
     yanke = zlecs;
     return 0;
Index: Src/Zle/zle_params.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_params.c,v
retrieving revision 1.20
diff -u -r1.20 zle_params.c
--- Src/Zle/zle_params.c	14 Jan 2005 13:05:24 -0000	1.20
+++ Src/Zle/zle_params.c	25 Jan 2005 14:40:57 -0000
@@ -418,7 +418,8 @@
 get_cutbuffer(UNUSED(Param pm))
 {
     if (cutbuf.buf)
-	return metafy(cutbuf.buf, cutbuf.len, META_HEAPDUP);
+	return (char *)
+	    zlelineasstring(cutbuf.buf, cutbuf.len, 0, NULL, NULL, 1);
     else
 	return "";
 }
@@ -433,10 +434,8 @@
     cutbuf.flags = 0;
     if (x) {
 	int n;
-	unmetafy(x, &n);
+	cutbuf.buf = stringaszleline((unsigned char *)x, &n, NULL);
 	cutbuf.len = n;
-	cutbuf.buf = zalloc(cutbuf.len);
-	memcpy((char *)cutbuf.buf, x, cutbuf.len);
 	free(x);
     } else {
 	cutbuf.buf = NULL;
@@ -469,7 +468,7 @@
     if (kring) {
 	for (kptr = kring, kcnt = 0; kcnt < kringsize; kcnt++, kptr++)
 	    if (kptr->buf)
-		zfree(kptr->buf, kptr->len);
+		free(kptr->buf);
 	zfree(kring, kringsize * sizeof(struct cutbuffer));
 	kring = NULL;
 	kringsize = kringnum = 0;
@@ -489,10 +488,10 @@
 	for (p = x; *p; p++) {
 	    int n, len = strlen(*p);
 	    kptr = kring + kpos;
-	    unmetafy(*p, &n);
+
+	    kptr->buf = stringaszleline((unsigned char *)*p, &n, NULL);
 	    kptr->len = n;
-	    kptr->buf = (char *)zalloc(kptr->len);
-	    memcpy(kptr->buf, *p, kptr->len);
+
 	    zfree(*p, len+1);
 	    kpos = (kpos + kringsize -1 ) % kringsize;
 	}
@@ -524,11 +523,9 @@
 	Cutbuffer kptr = kring + kpos;
 	if (kptr->buf)
 	{
-	    /*
-	     * Need to use HEAPDUP to make sure there's room for the
-	     * terminating NULL.
-	     */
-	    *p++ = metafy((char *)kptr->buf, kptr->len, META_HEAPDUP);
+	    /* Allocate on heap. */
+	    *p++ = (char *)zlelineasstring(kptr->buf, kptr->len,
+					   0, NULL, NULL, 1);
 	}
 	else
 	    *p++ = dupstring("");
Index: Src/Zle/zle_utils.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_utils.c,v
retrieving revision 1.14
diff -u -r1.14 zle_utils.c
--- Src/Zle/zle_utils.c	22 Jan 2005 16:26:43 -0000	1.14
+++ Src/Zle/zle_utils.c	25 Jan 2005 14:40:57 -0000
@@ -51,7 +51,7 @@
 /* the line before last mod (for undo purposes) */
 
 /**/
-char *lastline;
+ZLE_STRING_T lastline;
 /**/
 int lastlinesz, lastll, lastcs;
 
@@ -67,7 +67,9 @@
 sizeline(int sz)
 {
     while (sz > linesz)
-	zleline = (ZLE_STRING_T)realloc(zleline, (linesz *= 4) + 2);
+	zleline = 
+	    (ZLE_STRING_T)realloc(zleline,
+				  ((linesz *= 4) + 2) * ZLE_CHAR_SIZE);
 }
 
 /*
@@ -82,22 +84,36 @@
     zleline[zlecs++] = chr;
 }
 
+/*
+ * Input a line in internal zle format, possibly using wide characters,
+ * possibly not, together with its length and the cursor position.
+ * Output an ordinary string, using multibyte characters instead of wide
+ * characters where appropriate and with the contents metafied.
+ *
+ * If outll is non-NULL, assign the new length.  If outcs is non-NULL,
+ * assign the new character position.
+ *
+ * If useheap is 1, memory is returned from the heap, else is allocated
+ * for later freeing.
+ */
+
 /**/
 mod_export unsigned char *
-zlegetline(int *ll, int *cs)
+zlelineasstring(ZLE_STRING_T instr, int inll, int incs, int *outll,
+		int *outcs, int useheap)
 {
-    char *s;
 #ifdef ZLE_UNICODE_SUPPORT
+    char *s;
     char *mb_cursor;
     int i, j;
     size_t mb_len = 0;
 
-    mb_cursor = s = zalloc(zlell * MB_CUR_MAX);
+    mb_cursor = s = zalloc(inll * MB_CUR_MAX);
 
-    for(i=0;i<=zlell;i++) {
-	if (i == zlecs)
-	    *cs = mb_len;
-	j = wctomb(mb_cursor, zleline[i]);
+    for(i=0;i<=inll;i++) {
+	if (outcs != NULL && i == incs)
+	    *outcs = mb_len;
+	j = wctomb(mb_cursor, instr[i]);
 	if (j == -1) {
 	    /* invalid char; what to do? */
 	} else {
@@ -105,14 +121,130 @@
 	}
     }
 
-    *ll = mb_len;
+    if (outll != NULL)
+	*outll = mb_len;
+    if (useheap)
+    {
+	unsigned char *ret =
+	    (unsigned char *) metafy((char *) s, mb_len, META_HEAPDUP);
+
+	zfree((char *)s, inll * MB_CUR_MAX);
+
+	return ret;
+    }
+    else
+    {
+	return (unsigned char *) metafy((char *) s, mb_len, META_REALLOC);
+    }
 #else
-    *ll = zlell;
-    *cs = zlecs;
+    if (outll != NULL)
+	*outll = inll;
+    if (outcs != NULL)
+	*outcs = incs;
+
+    return (unsigned char *) metafy((char *) instr, inll,
+				    useheap ? META_HEAPDUP : META_DUP);
+#endif
+}
+
+
+/*
+ * Input a NULL-terminated metafied string instr.
+ * Output a line in internal zle format, together with its length
+ * in the appropriate character units.  Note that outll may not be NULL.
+ *
+ * If outsz is non-NULL, the number of allocated characters in the
+ * string is written there.  For compatibility with use of the linesz
+ * variable (allocate size of zleline), at least two characters are
+ * allocated more than needed for immediate use.  (The extra characters
+ * may take a newline and a null at a later stage.)  These are not
+ * included in *outsz.
+ *
+ * Note that instr is modified in place, hence should be copied
+ * first if necessary;
+ *
+ * Memory for the returned string is permanently allocated.  *outsz may
+ * be longer than the *outll returned.  Hence it should be freed with
+ * zfree(outstr, *outsz) or free(outstr), not zfree(outstr, *outll).
+ */
+
+/**/
+mod_export ZLE_STRING_T
+stringaszleline(unsigned char *instr, int *outll, int *outsz)
+{
+    ZLE_STRING_T outstr;
+    int ll, sz;
+#ifdef ZLE_UNICODE_SUPPORT
+    int cll;
+    mbstate_t ps;
+#endif
+
+    unmetafy(instr, &ll);
+
+    /*
+     * ll is the maximum number of characters there can be in
+     * the output string; the closer to ASCII the string, the
+     * better the guess.  For the 2 see above.
+     */
+    sz = (ll + 2) * ZLE_CHAR_SIZE;
+    if (outsz)
+	*outsz = ll;
+    outstr = (ZLE_STRING_T)zalloc(sz);
+
+#ifdef ZLE_UNICODE_SUPPORT
+    if (ll) {
+	/* reset shift state by converting null. */
+	char cnull = '\0';
+	char *inptr = (char *)instr;
+	wchar_t *outptr = outstr;
+
+	mbrtowc(outstr, &cnull, 1, &ps);
+
+	while (ll) {
+	    size_t ret = mbrtowc(outptr, inptr, ll, &ps);
+
+	    /*
+	     * At this point we don't handle either incomplete (-2) or
+	     * invalid (-1) multibyte sequences.  Use the current length
+	     * and return.
+	     */
+	    if (ret == (size_t)-1 || ret == (size_t)-2)
+		break;
 
-    s = ztrdup(zleline);
+	    /*
+	     * Careful: converting a wide NUL returns zero, but we
+	     * want to treat NULs as regular characters.
+	     * The NUL does get converted, however, so test that.
+	     * Assume it was represented by a single ASCII NUL;
+	     * certainly true for Unicode and unlikely to be false
+	     * in any non-pathological multibyte representation.
+	     */
+	    if (*outptr == L'\0' && ret == 0)
+		ret = 1;
+
+	    inptr += ret;
+	    outptr++;
+	    ll -= ret;
+	}
+	*outll = outptr - outstr;
+    }
+    else
+	*outll = 0;
+#else
+    memcpy((char *)outstr, (char *)instr, ll);
+    *outll = ll;
 #endif
-    return (unsigned char *) metafy((char *) s, zlell, META_REALLOC);
+
+    return outstr;
+}
+
+
+
+/**/
+mod_export unsigned char *
+zlegetline(int *ll, int *cs)
+{
+    return zlelineasstring(zleline, zlell, zlecs, ll, cs, 0);
 }
 
 
@@ -128,7 +260,7 @@
     for (i = zlell; --i >= zlecs;)
 	zleline[i + ct] = zleline[i];
     zlell += ct;
-    zleline[zlell] = '\0';
+    zleline[zlell] = ZLENUL;
 
     if (mark > zlecs)
 	mark += ct;
@@ -147,7 +279,7 @@
 	zleline[to] = zleline[to + cnt];
 	to++;
     }
-    zleline[zlell = to] = '\0';
+    zleline[zlell = to] = ZLENUL;
 }
 
 /**/
@@ -181,9 +313,9 @@
 	struct cutbuffer *b = &vibuf[zmod.vibuf];
 
 	if (!(zmod.flags & MOD_VIAPP) || !b->buf) {
-	    zfree(b->buf, b->len);
-	    b->buf = (char *)zalloc(ct);
-	    memcpy(b->buf, (char *) zleline + i, ct);
+	    free(b->buf);
+	    b->buf = (ZLE_STRING_T)zalloc(ct * ZLE_CHAR_SIZE);
+	    memcpy((char *)b->buf, (char *)(zleline + i), ct * ZLE_CHAR_SIZE);
 	    b->len = ct;
 	    b->flags = vilinerange ? CUTBUFFER_LINE : 0;
 	} else {
@@ -191,26 +323,32 @@
 
 	    if(vilinerange)
 		b->flags |= CUTBUFFER_LINE;
-	    b->buf = realloc(b->buf, ct + len + !!(b->flags & CUTBUFFER_LINE));
+	    b->buf = (ZLE_STRING_T)
+		realloc((char *)b->buf,
+			(ct + len + !!(b->flags & CUTBUFFER_LINE))
+			* ZLE_CHAR_SIZE);
 	    if (b->flags & CUTBUFFER_LINE)
-		b->buf[len++] = '\n';
-	    memcpy(b->buf + len, (char *) zleline + i, ct);
+		b->buf[len++] = ZLENL;
+	    memcpy((char *)(b->buf + len), (char *)(zleline + i),
+		   ct * ZLE_CHAR_SIZE);
 	    b->len = len + ct;
 	}
 	return;
     } else {
 	/* Save in "1, shifting "1-"8 along to "2-"9 */
 	int n;
-	zfree(vibuf[34].buf, vibuf[34].len);
+	free(vibuf[34].buf);
 	for(n=34; n>26; n--)
 	    vibuf[n] = vibuf[n-1];
-	vibuf[26].buf = (char *)zalloc(ct);
-	memcpy(vibuf[26].buf, (char *) zleline + i, ct);
+	vibuf[26].buf = (ZLE_STRING_T)zalloc(ct * ZLE_CHAR_SIZE);
+	memcpy((char *)vibuf[26].buf, (char *)(zleline + i),
+	       ct * ZLE_CHAR_SIZE);
 	vibuf[26].len = ct;
 	vibuf[26].flags = vilinerange ? CUTBUFFER_LINE : 0;
     }
     if (!cutbuf.buf) {
-	cutbuf.buf = ztrdup("");
+	cutbuf.buf = (ZLE_STRING_T)zalloc(ZLE_CHAR_SIZE);
+	cutbuf.buf[0] = ZLENUL;
 	cutbuf.len = cutbuf.flags = 0;
     } else if (!(lastcmd & ZLE_KILL)) {
 	Cutbuffer kptr;
@@ -221,22 +359,26 @@
 	    kringnum = (kringnum + 1) % kringsize;
 	kptr = kring + kringnum;
 	if (kptr->buf)
-	    zfree(kptr->buf, kptr->len);
+	    free(kptr->buf);
 	*kptr = cutbuf;
-	cutbuf.buf = ztrdup("");
+	cutbuf.buf = (ZLE_STRING_T)zalloc(ZLE_CHAR_SIZE);
+	cutbuf.buf[0] = ZLENUL;
 	cutbuf.len = cutbuf.flags = 0;
     }
     if (dir) {
-	char *s = (char *)zalloc(cutbuf.len + ct);
+	ZLE_STRING_T s = (ZLE_STRING_T)zalloc((cutbuf.len + ct)*ZLE_CHAR_SIZE);
 
-	memcpy(s, (char *) zleline + i, ct);
-	memcpy(s + ct, cutbuf.buf, cutbuf.len);
+	memcpy(s, (char *) (zleline + i), ct * ZLE_CHAR_SIZE);
+	memcpy((char *)(s + ct), (char *)cutbuf.buf,
+	       cutbuf.len * ZLE_CHAR_SIZE);
 	free(cutbuf.buf);
 	cutbuf.buf = s;
 	cutbuf.len += ct;
     } else {
-	cutbuf.buf = realloc(cutbuf.buf, cutbuf.len + ct);
-	memcpy(cutbuf.buf + cutbuf.len, (char *) zleline + i, ct);
+	cutbuf.buf = realloc((char *)cutbuf.buf,
+			     (cutbuf.len + ct) * ZLE_CHAR_SIZE);
+	memcpy((char *)(cutbuf.buf + cutbuf.len), (char *) (zleline + i),
+	       ct * ZLE_CHAR_SIZE);
 	cutbuf.len += ct;
     }
     if(vilinerange)
@@ -263,11 +405,19 @@
 void
 setline(char const *s)
 {
-    sizeline(strlen(s));
-    strcpy((char *) zleline, s);
-    unmetafy((char *) zleline, &zlell);
+    char *scp = ztrdup(s);
+    /*
+     * TBD: we could make this more efficient by passing the existing
+     * allocated line to stringaszleline.
+     */
+    free(zleline);
+
+    zleline = stringaszleline(scp, &zlell, &linesz);
+
     if ((zlecs = zlell) && invicmdmode())
 	zlecs--;
+
+    free(scp);
 }
 
 /**/
@@ -276,7 +426,7 @@
 {
     int x = zlecs;
 
-    while (x > 0 && zleline[x - 1] != '\n')
+    while (x > 0 && zleline[x - 1] != ZLENL)
 	x--;
     return x;
 }
@@ -287,7 +437,7 @@
 {
     int x = zlecs;
 
-    while (x != zlell && zleline[x] != '\n')
+    while (x != zlell && zleline[x] != ZLENL)
 	x++;
     return x;
 }
@@ -328,15 +478,18 @@
     return NULL;
 }
 
-/* Query the user, and return a single character response.  The *
- * question is assumed to have been printed already, and the    *
- * cursor is left immediately after the response echoed.        *
- * (Might cause a problem if this takes it onto the next line.) *
- * If yesno is non-zero:                                        *
- * <Tab> is interpreted as 'y'; any other control character is  *
- * interpreted as 'n'.  If there are any characters in the      *
- * buffer, this is taken as a negative response, and no         *
- * characters are read.  Case is folded.                        */
+/*
+ * Query the user, and return a single character response.  The question
+ * is assumed to have been printed already, and the cursor is left
+ * immediately after the response echoed.  (Might cause a problem if
+ * this takes it onto the next line.)  If yesno is non-zero: <Tab> is
+ * interpreted as 'y'; any other control character is interpreted as
+ * 'n'.  If there are any characters in the buffer, this is taken as a
+ * negative response, and no characters are read.  Case is folded.
+ *
+ * TBD: this may need extending to return a wchar_t or possibly
+ * a wint_t.
+ */
 
 /**/
 mod_export int
@@ -496,8 +649,9 @@
     changes = curchange = zalloc(sizeof(*curchange));
     curchange->prev = curchange->next = NULL;
     curchange->del = curchange->ins = NULL;
-    lastline = zalloc(lastlinesz = linesz);
-    memcpy(lastline, zleline, lastll = zlell);
+    curchange->dell = curchange->insl = 0;
+    lastline = zalloc((lastlinesz = linesz) * ZLE_CHAR_SIZE);
+    memcpy(lastline, zleline, (lastll = zlell) * ZLE_CHAR_SIZE);
     lastcs = zlecs;
 }
 
@@ -518,8 +672,8 @@
 
     for(; p; p = n) {
 	n = p->next;
-	zsfree(p->del);
-	zsfree(p->ins);
+	free(p->del);
+	free(p->ins);
 	zfree(p, sizeof(*p));
     }
 }
@@ -537,9 +691,10 @@
     if(curchange->next) {
 	freechanges(curchange->next);
 	curchange->next = NULL;
-	zsfree(curchange->del);
-	zsfree(curchange->ins);
+	free(curchange->del);
+	free(curchange->ins);
 	curchange->del = curchange->ins = NULL;
+	curchange->dell = curchange->insl = 0;
     }
     nextchanges->prev = curchange->prev;
     if(curchange->prev)
@@ -561,7 +716,7 @@
     int sh = zlell < lastll ? zlell : lastll;
     struct change *ch;
 
-    if(lastll == zlell && !memcmp(lastline, zleline, zlell))
+    if(lastll == zlell && !memcmp(lastline, zleline, zlell * ZLE_CHAR_SIZE))
 	return;
     for(pre = 0; pre < sh && zleline[pre] == lastline[pre]; )
 	pre++;
@@ -574,14 +729,24 @@
     ch->off = pre;
     ch->old_cs = lastcs;
     ch->new_cs = zlecs;
-    if(suf + pre == lastll)
+    if(suf + pre == lastll) {
 	ch->del = NULL;
-    else
-	ch->del = metafy(lastline + pre, lastll - pre - suf, META_DUP);
-    if(suf + pre == zlell)
+	ch->dell = 0;
+    } else {
+	ch->dell = lastll - pre - suf;
+	ch->del = (ZLE_STRING_T)zalloc(ch->dell * ZLE_CHAR_SIZE);
+	memcpy((char *)ch->del, (char *)(lastline + pre),
+	       ch->dell * ZLE_CHAR_SIZE);
+    }
+    if(suf + pre == zlell) {
 	ch->ins = NULL;
-    else
-	ch->ins = metafy((char *)zleline + pre, zlell - pre - suf, META_DUP);
+	ch->insl = 0;
+    } else {
+	ch->insl = zlell - pre - suf;
+	ch->ins = (ZLE_STRING_T)zalloc(ch->insl * ZLE_CHAR_SIZE);
+	memcpy((char *)ch->ins, (char *)(zleline + pre),
+	       ch->insl * ZLE_CHAR_SIZE);
+    }
     if(nextchanges) {
 	ch->flags = CH_PREV;
 	ch->prev = endnextchanges;
@@ -602,8 +767,8 @@
 setlastline(void)
 {
     if(lastlinesz != linesz)
-	lastline = realloc(lastline, lastlinesz = linesz);
-    memcpy(lastline, zleline, lastll = zlell);
+	lastline = realloc(lastline, (lastlinesz = linesz) * ZLE_CHAR_SIZE);
+    memcpy(lastline, zleline, (lastll = zlell) * ZLE_CHAR_SIZE);
     lastcs = zlecs;
 }
 
@@ -637,16 +802,12 @@
     }
     zlecs = ch->off;
     if(ch->ins)
-	foredel(ztrlen(ch->ins));
+	foredel(ch->insl);
     if(ch->del) {
-	char *c = ch->del;
-
-	spaceinline(ztrlen(c));
-	for(; *c; c++)
-	    if(*c == Meta)
-		zleline[zlecs++] = STOUC(*++c) ^ 32;
-	    else
-		zleline[zlecs++] = STOUC(*c);
+	spaceinline(ch->dell);
+	memcpy((char *)(zleline + zlecs), (char *)ch->del,
+	       ch->dell * ZLE_CHAR_SIZE);
+	zlecs += ch->dell;
     }
     zlecs = ch->old_cs;
     return 1;
@@ -682,16 +843,12 @@
     }
     zlecs = ch->off;
     if(ch->del)
-	foredel(ztrlen(ch->del));
+	foredel(ch->dell);
     if(ch->ins) {
-	char *c = ch->ins;
-
-	spaceinline(ztrlen(c));
-	for(; *c; c++)
-	    if(*c == Meta)
-		zleline[zlecs++] = STOUC(*++c) ^ 32;
-	    else
-		zleline[zlecs++] = STOUC(*c);
+	spaceinline(ch->insl);
+	memcpy((char *)(zleline + zlecs), (char *)ch->ins,
+	       ch->insl * ZLE_CHAR_SIZE);
+	zlecs += ch->insl;
     }
     zlecs = ch->new_cs;
     return 1;

-- 
Peter Stephenson <pws@csr.com>                  Software Engineer
CSR PLC, Churchill House, Cambridge Business Park, Cowley Road
Cambridge, CB4 0WZ, UK                          Tel: +44 (0)1223 692070


**********************************************************************
This email and any files transmitted with it are confidential and
intended solely for the use of the individual or entity to whom they
are addressed. If you have received this email in error please notify
the system manager.

This footnote also confirms that this email message has been swept by
MIMEsweeper for the presence of computer viruses.

www.mimesweeper.com
**********************************************************************


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: PATCH: Unicode additions, next phase.
  2005-01-25 14:46 PATCH: Unicode additions, next phase Peter Stephenson
@ 2005-01-25 16:02 ` Clint Adams
       [not found]   ` <clint@zsh.org>
  2005-01-26  2:29 ` Clint Adams
  1 sibling, 1 reply; 7+ messages in thread
From: Clint Adams @ 2005-01-25 16:02 UTC (permalink / raw)
  To: Peter Stephenson; +Cc: Zsh hackers list

> Here are some more changes.  ZLE_UNICODE_SUPPORT is still not complete

Did you mean to commit that?


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: PATCH: Unicode additions, next phase.
       [not found]   ` <clint@zsh.org>
@ 2005-01-25 16:37     ` Peter Stephenson
  0 siblings, 0 replies; 7+ messages in thread
From: Peter Stephenson @ 2005-01-25 16:37 UTC (permalink / raw)
  To: Peter Stephenson, Zsh hackers list

Clint Adams wrote:
> > Here are some more changes.  ZLE_UNICODE_SUPPORT is still not complete
> 
> Did you mean to commit that?

I was going to see how badly broken it was for a bit first, but since
any problems look like they're fairly obscure I'll commit it.

pws


**********************************************************************
This email and any files transmitted with it are confidential and
intended solely for the use of the individual or entity to whom they
are addressed. If you have received this email in error please notify
the system manager.

This footnote also confirms that this email message has been swept by
MIMEsweeper for the presence of computer viruses.

www.mimesweeper.com
**********************************************************************


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: PATCH: Unicode additions, next phase.
  2005-01-25 14:46 PATCH: Unicode additions, next phase Peter Stephenson
  2005-01-25 16:02 ` Clint Adams
@ 2005-01-26  2:29 ` Clint Adams
  2005-01-26 10:33   ` Peter Stephenson
  1 sibling, 1 reply; 7+ messages in thread
From: Clint Adams @ 2005-01-26  2:29 UTC (permalink / raw)
  To: Peter Stephenson; +Cc: Zsh hackers list

> The key missing bits are screen output from zle_refresh.c and key

Here's a stab at that, though any line containing the string "error" is
broken.

M  Src/Zle/zle_refresh.c
M  Src/system.h

* modified files

--- orig/Src/Zle/zle_refresh.c
+++ mod/Src/Zle/zle_refresh.c
@@ -86,7 +86,7 @@
  * add non-editable text to that being displayed.
  */
 /**/
-unsigned char *predisplay, *postdisplay;
+ZLE_STRING_T predisplay, postdisplay;
 /**/
 int predisplaylen, postdisplaylen;
 
@@ -284,13 +284,17 @@
 	t0 = -1,		/* tmp					     */
 	tosln = 0;		/* tmp in statusline stuff		     */
     unsigned char *s,		/* pointer into the video buffer	     */
-	*t,			/* pointer into the real buffer		     */
 	*sen,			/* pointer to end of the video buffer (eol)  */
+	*u;			/* pointer for status line stuff */
+    ZLE_STRING_T t,		/* pointer into the real buffer		     */
 	*scs;			/* pointer to cursor position in real buffer */
     char **qbuf;		/* tmp					     */
-    unsigned char *tmpline;	/* line with added pre/post text */
+    ZLE_STRING_T tmpline;	/* line with added pre/post text */
     int tmpcs, tmpll;		/* ditto cursor position and line length */
     int tmpalloced;		/* flag to free tmpline when finished */
+#ifdef ZLE_UNICODE_SUPPORT
+    mbstate_t shiftstate;	/* wcrtomb shift state */
+#endif
 
     if (trashedzle)
 	reexpandprompt();
@@ -304,13 +308,13 @@
 
     if (predisplaylen || postdisplaylen) {
 	/* There is extra text to display at the start or end of the line */
-	tmpline = zalloc(zlell + predisplaylen + postdisplaylen);
+	tmpline = zalloc((zlell + predisplaylen + postdisplaylen)*ZLE_CHAR_SIZE);
 	if (predisplaylen)
-	    memcpy(tmpline, predisplay, predisplaylen);
+	    ZS_memcpy(tmpline, predisplay, predisplaylen);
 	if (zlell)
-	    memcpy(tmpline+predisplaylen, zleline, zlell);
+	    ZS_memcpy(tmpline+predisplaylen, zleline, zlell);
 	if (postdisplaylen)
-	    memcpy(tmpline+predisplaylen+zlell, postdisplay, postdisplaylen);
+	    ZS_memcpy(tmpline+predisplaylen+zlell, postdisplay, postdisplaylen);
 	tmpcs = zlecs + predisplaylen;
 	tmpll = predisplaylen + zlell + postdisplaylen;
 	tmpalloced = 1;
@@ -455,10 +459,10 @@
 	if (t == scs)			/* if cursor is here, remember it */
 	    nvcs = s - (unsigned char *)(nbuf[nvln = ln]);
 
-	if (*t == '\n')	{		/* newline */
+	if (*t == ZLENL){		/* newline */
 	    nbuf[ln][winw + 1] = '\0';	/* text not wrapped */
 	    nextline
-	} else if (*t == '\t') {		/* tab */
+	} else if (*t == ZLETAB) {		/* tab */
 	    t0 = (char *)s - nbuf[ln];
 	    if ((t0 | 7) + 1 >= winw) {
 		nbuf[ln][winw + 1] = '\n';	/* text wrapped */
@@ -467,15 +471,31 @@
 		do
 		    *s++ = ' ';
 		while ((++t0) & 7);
-	} else if (icntrl(*t)) {	/* other control character */
+	} else if (ZC_icntrl(*t)) {	/* other control character */
 	    *s++ = '^';
 	    if (s == sen) {
 		nbuf[ln][winw + 1] = '\n';	/* text wrapped */
 		nextline
 	    }
+#ifdef ZLE_UNICODE_SUPPORT
+# error What to do here
+#else
 	    *s++ = (*t == 127) ? '?' : (*t | '@');
-	} else				/* normal character */
+#endif
+	} else {			/* normal character */
+#ifdef ZLE_UNICODE_SUPPORT
+	    size_t i;
+
+	    i = wcrtomb(s, *t, &shiftstate);
+	    if (i == -1) {
+		/* error; what to do? */
+	    } else {
+		s += i;
+	    }
+#else
 	    *s++ = *t;
+#endif
+	}
 	if (s == sen) {
 	    nbuf[ln][winw + 1] = '\n';	/* text wrapped */
 	    nextline
@@ -501,17 +521,17 @@
 	tosln = ln + 1;
 	nbuf[ln][winw + 1] = '\0';	/* text not wrapped */
 	snextline
-	t = (unsigned char *)statusline;
-	for (; t < (unsigned char *)statusline + statusll; t++) {
-	    if (icntrl(*t)) {	/* simplified processing in the status line */
+	u = (unsigned char *)statusline;
+	for (; u < (unsigned char *)statusline + statusll; u++) {
+	    if (icntrl(*u)) {	/* simplified processing in the status line */
 		*s++ = '^';
 		if (s == sen) {
 		    nbuf[ln][winw + 1] = '\n';	/* text wrapped */
 		    snextline
 		}
-		*s++ = (*t == 127) ? '?' : (*t | '@');
+		*s++ = (*u == 127) ? '?' : (*u | '@');
 	    } else
-		*s++ = *t;
+		*s++ = *u;
 	    if (s == sen) {
 		nbuf[ln][winw + 1] = '\n';	/* text wrapped */
 		snextline


--- orig/Src/system.h
+++ mod/Src/system.h
@@ -724,6 +724,9 @@
 
 #define ZLENL	L'\n'
 #define ZLENUL	L'\0'
+#define ZLETAB	L'\t'
+#define ZS_memcpy wmemcpy
+#define ZC_icntrl iswcntrl
 #else
 typedef int ZLE_CHAR_T;
 typedef unsigned char *ZLE_STRING_T;
@@ -731,4 +734,7 @@
 
 #define ZLENL	'\n'
 #define ZLENUL	'\0'
+#define ZLETAB	'\t'
+#define ZS_memcpy memcpy
+#define ZC_icntrl icntrl
 #endif




^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: PATCH: Unicode additions, next phase.
  2005-01-26  2:29 ` Clint Adams
@ 2005-01-26 10:33   ` Peter Stephenson
  2005-01-26 14:26     ` Clint Adams
  2005-01-27 11:30     ` Matthias B.
  0 siblings, 2 replies; 7+ messages in thread
From: Peter Stephenson @ 2005-01-26 10:33 UTC (permalink / raw)
  To: Zsh hackers list

Clint Adams wrote:
> > The key missing bits are screen output from zle_refresh.c and key
> 
> Here's a stab at that, though any line containing the string "error" is
> broken.

Plus we'll probably need to have wcwidth on the menu at some
point... not sure if that helps with wide characters that are control
characters (do these exist?), probably not.  I'd be tempted to see if
control characters were converted to a single byte string, if they do
print them as at present, and if they don't just skip them; same for
conversion failures, although that shouldn't happen for characters that
convert to 7-bit ASCII anyway.

Terminal emulators never treat 8-bit characters as special (apart from
extended character sets), do they?  I presume someone thought of that
before now.

-- 
Peter Stephenson <pws@csr.com>                  Software Engineer
CSR PLC, Churchill House, Cambridge Business Park, Cowley Road
Cambridge, CB4 0WZ, UK                          Tel: +44 (0)1223 692070


**********************************************************************
The information transmitted is intended only for the person or
entity to which it is addressed and may contain confidential 
and/or privileged material. 
Any review, retransmission, dissemination or other use of, or
taking of any action in reliance upon, this information by 
persons or entities other than the intended recipient is 
prohibited. 
If you received this in error, please contact the sender and 
delete the material from any computer.
**********************************************************************


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: PATCH: Unicode additions, next phase.
  2005-01-26 10:33   ` Peter Stephenson
@ 2005-01-26 14:26     ` Clint Adams
  2005-01-27 11:30     ` Matthias B.
  1 sibling, 0 replies; 7+ messages in thread
From: Clint Adams @ 2005-01-26 14:26 UTC (permalink / raw)
  To: Peter Stephenson; +Cc: Zsh hackers list

> Plus we'll probably need to have wcwidth on the menu at some

Oh dear.. also any memory allocations based on winw have to be modified.

> point... not sure if that helps with wide characters that are control
> characters (do these exist?), probably not.  I'd be tempted to see if
> control characters were converted to a single byte string, if they do
> print them as at present, and if they don't just skip them; same for
> conversion failures, although that shouldn't happen for characters that
> convert to 7-bit ASCII anyway.

This uses '?' for both eventualities.  That might obscure character
conversion errors.

Index: Src/Zle/zle_refresh.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_refresh.c,v
retrieving revision 1.15
diff -u -r1.15 zle_refresh.c
--- Src/Zle/zle_refresh.c	26 Jan 2005 13:39:52 -0000	1.15
+++ Src/Zle/zle_refresh.c	26 Jan 2005 14:24:23 -0000
@@ -478,7 +478,7 @@
 		nextline
 	    }
 #ifdef ZLE_UNICODE_SUPPORT
-# error What to do here
+	    *s++ = ((*t == 127) || (*t > 255)) ? '?' : (*t | '@');
 #else
 	    *s++ = (*t == 127) ? '?' : (*t | '@');
 #endif
@@ -488,7 +488,7 @@
 
 	    i = wcrtomb(s, *t, &shiftstate);
 	    if (i == -1) {
-		/* error; what to do? */
+		*s++ = '?';
 	    } else {
 		s += i;
 	    }


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: PATCH: Unicode additions, next phase.
  2005-01-26 10:33   ` Peter Stephenson
  2005-01-26 14:26     ` Clint Adams
@ 2005-01-27 11:30     ` Matthias B.
  1 sibling, 0 replies; 7+ messages in thread
From: Matthias B. @ 2005-01-27 11:30 UTC (permalink / raw)
  To: Zsh hackers list

On Wed, 26 Jan 2005 10:33:47 +0000 Peter Stephenson <pws@csr.com> wrote:

> Terminal emulators never treat 8-bit characters as special (apart from
> extended character sets), do they?  

Maybe I'm misunderstanding you, but try this in an xterm

echo $'\x9a'

I don't know what it does, but it certainly triggers some kind of control
function in xterm that causes it to print out some weird code.

MSB

-- 
Better small and real than big and fake.


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2005-01-27 11:30 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-01-25 14:46 PATCH: Unicode additions, next phase Peter Stephenson
2005-01-25 16:02 ` Clint Adams
     [not found]   ` <clint@zsh.org>
2005-01-25 16:37     ` Peter Stephenson
2005-01-26  2:29 ` Clint Adams
2005-01-26 10:33   ` Peter Stephenson
2005-01-26 14:26     ` Clint Adams
2005-01-27 11:30     ` Matthias B.

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).