zsh-workers
 help / color / mirror / code / Atom feed
* PATCH: displaying wide characters
@ 2005-10-19 20:31 Peter Stephenson
  2005-10-19 20:41 ` Peter Stephenson
  2005-10-19 22:47 ` Peter Stephenson
  0 siblings, 2 replies; 20+ messages in thread
From: Peter Stephenson @ 2005-10-19 20:31 UTC (permalink / raw)
  To: Zsh hackers list

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 20771 bytes --]

First pass at displaying wide characters correctly (with multibyte
support active, obviously).  It uses the WEOF hack I mentioned before.
All I can promise is it didnʼt seem to be too horribly broken when I
tried it briefly (it was when I forgot to decrement len in
ZR_memset()...)  Thereʼs a lot of code which Iʼll look through again,
but meanwhile it would be good to have it tried out.

I suspect a whole load of horrible corner cases (i) at the ends of lines
(ii) when optimising changes to the display.

Oliver pointed out there are wide ASCII characters at the top of the
basic multilingual plane, from U+ff00 onward, which can be used for
testing.  Iʼd be happy to add these to define-composed-chars for
convenience but couldnʼt offhand see how to do it intuitively without
clashing with RFC1345.  (Iʼve just realised I could use ^a etc. since
RFC1345 doesnʼt assume youʼve got ^ on the keyboard, but after three
solid hours at this I need a rest (nearly wrote "reset").)  For now,
"print -z ʼ\uff38ʼ" etc. is the way to do it.  These didnʼt work on my
(fairly recent) xterm, but did in both gnome-terminal and konsole.

complist.c and anything else using wcs_nicechar are already supposed to
count widths properly but I havenʼt tested completion displays yet.

Please report compiler warnings in zle_refresh.c:  there could be systems
where my assumptions about types are wrong.

Index: Src/Zle/zle_refresh.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_refresh.c,v
retrieving revision 1.29
diff -u -r1.29 zle_refresh.c
--- Src/Zle/zle_refresh.c	19 Oct 2005 18:23:07 -0000	1.29
+++ Src/Zle/zle_refresh.c	19 Oct 2005 20:18:07 -0000
@@ -28,6 +28,67 @@
  */
 
 #include "zle.mdh"
+
+#ifdef ZLE_UNICODE_SUPPORT
+/*
+ * We use a wint_t here, since we need an invalid character as a
+ * placeholder and wint_t guarantees that we can use WEOF to do this.
+ */
+typedef wint_t *REFRESH_STRING;
+
+/*
+ * Unfortunately, that means the pointer is the wrong type for
+ * wmemset and friends.
+ */
+static void
+ZR_memset(wint_t *dst, wchar_t wc, int len)
+{
+    while (len--)
+	*dst++ = wc;
+}
+#define ZR_memcpy(d, s, l)  memcpy((d), (s), (l)*sizeof(wint_t))
+static void
+ZR_strcpy(wint_t *dst, wint_t *src)
+{
+    while ((*dst++ = *src++) != L'\0')
+	;
+}
+static size_t
+ZR_strlen(wint_t *wstr)
+{
+    int len = 0;
+
+    while (*wstr++ != L'\0')
+	len++;
+
+    return len;
+}
+/*
+ * Simplified strcmp: we don't need the sign, just whether
+ * the strings are equal.
+ */
+static int
+ZR_strncmp(wint_t *wstr1, wint_t *wstr2, int len)
+{
+    while (len--) {
+	if (!*wstr1 || !*wstr2)
+	    return (*wstr1 == *wstr2) ? 0 : 1;
+	if (*wstr1++ != *wstr2++)
+	    return 1;
+    }
+
+    return 0;
+}
+#else
+typedef char *REFRESH_STRING;
+
+#define ZR_memset	memset
+#define ZR_memcpy	memcpy
+#define ZR_strcpy	strcpy
+#define ZR_strlen	strlen
+#define ZR_strncmp	strncmp
+#endif
+
 #include "zle_refresh.pro"
 
 /*
@@ -112,26 +173,27 @@
 #endif
 
 /**/
-int
-zwcputc(ZLE_CHAR_T c)
+void
+zwcputc(ZLE_INT_T c)
 {
 #ifdef ZLE_UNICODE_SUPPORT
     char mbtmp[MB_CUR_MAX + 1];
     mbstate_t mbstate;
     int i;
 
-    wcrtomb(NULL, L'\0', &mbstate);
-    if ((i = wcrtomb(mbtmp, c, &mbstate)) > 0)
-	return fwrite(mbtmp, i, 1, shout);
-    /* TODO conversion failed; what should we output? */
-    return 0;
+    if (c == WEOF)
+	return;
+
+    memset(&mbstate, 0, sizeof(mbstate_t));
+    if ((i = wcrtomb(mbtmp, (wchar_t)c, &mbstate)) > 0)
+	fwrite(mbtmp, i, 1, shout);
 #else
-    return fputc(c, shout);
+    fputc(c, shout);
 #endif
 }
 
 static int
-zwcwrite(ZLE_STRING_T s, size_t i)
+zwcwrite(REFRESH_STRING s, size_t i)
 {
 #ifdef ZLE_UNICODE_SUPPORT
     size_t j;
@@ -149,7 +211,8 @@
    I've put my fingers into just about every routine in here -
    any queries about updates to mason@primenet.com.au */
 
-static ZLE_STRING_T *nbuf = NULL,/* new video buffer line-by-line char array */
+static REFRESH_STRING 
+    *nbuf = NULL,		/* new video buffer line-by-line char array */
     *obuf = NULL;		/* old video buffer line-by-line char array */
 static int more_start,		/* more text before start of screen?	    */
     more_end,			/* more stuff after end of screen?	    */
@@ -187,10 +250,10 @@
 	    free(nbuf);
 	    free(obuf);
 	}
-	nbuf = (ZLE_STRING_T *)zshcalloc((winh + 1) * sizeof(*nbuf));
-	obuf = (ZLE_STRING_T *)zshcalloc((winh + 1) * sizeof(*obuf));
-	nbuf[0] = (ZLE_STRING_T)zalloc((winw + 2) * sizeof(**nbuf));
-	obuf[0] = (ZLE_STRING_T)zalloc((winw + 2) * sizeof(**obuf));
+	nbuf = (REFRESH_STRING *)zshcalloc((winh + 1) * sizeof(*nbuf));
+	obuf = (REFRESH_STRING *)zshcalloc((winh + 1) * sizeof(*obuf));
+	nbuf[0] = (REFRESH_STRING)zalloc((winw + 2) * sizeof(**nbuf));
+	obuf[0] = (REFRESH_STRING)zalloc((winw + 2) * sizeof(**obuf));
 
 	lwinw = winw;
 	lwinh = winh;
@@ -221,8 +284,8 @@
     }
 
     if (lpromptw) {
-    	ZS_memset(nbuf[0], ZWC(' '), lpromptw);
-	ZS_memset(obuf[0], ZWC(' '), lpromptw);
+    	ZR_memset(nbuf[0], ZWC(' '), lpromptw);
+	ZR_memset(obuf[0], ZWC(' '), lpromptw);
 	nbuf[0][lpromptw] = obuf[0][lpromptw] = ZWC('\0');
     }
 
@@ -242,7 +305,7 @@
 scrollwindow(int tline)
 {
     int t0;
-    ZLE_STRING_T s;
+    REFRESH_STRING s;
 
     s = nbuf[tline];
     for (t0 = tline; t0 < winh - 1; t0++)
@@ -263,8 +326,8 @@
     int nvcs;			/* video cursor column */
     int nvln;			/* video cursor line */
     int tosln;			/* tmp in statusline stuff */
-    ZLE_STRING_T s;		/* pointer into the video buffer */
-    ZLE_STRING_T sen;		/* pointer to end of the video buffer (eol) */
+    REFRESH_STRING s;		/* pointer into the video buffer */
+    REFRESH_STRING sen;		/* pointer to end of the video buffer (eol) */
 };
 typedef struct rparams *Rparams;
 
@@ -278,10 +341,14 @@
 /*
  * Go to the next line in the main display area.  Return 1 if we should abort
  * processing the line loop at this point, else 0.
+ *
+ * If wrapped is non-zero, text wrapped, so output newline.
+ * Otherwise, text not wrapped, so output null.
  */
 static int
-nextline(Rparams rpms)
+nextline(Rparams rpms, int wrapped)
 {
+    nbuf[rpms->ln][winw+1] = wrapped ? ZWC('\n') : ZWC('\0');
     *rpms->s = ZWC('\0');
     if (rpms->ln != winh - 1)
 	rpms->ln++;
@@ -300,7 +367,7 @@
 	    rpms->nvln--;
     }
     if (!nbuf[rpms->ln])
-	nbuf[rpms->ln] = (ZLE_STRING_T)zalloc((winw + 2) * sizeof(**nbuf));
+	nbuf[rpms->ln] = (REFRESH_STRING)zalloc((winw + 2) * sizeof(**nbuf));
     rpms->s = nbuf[rpms->ln];
     rpms->sen = rpms->s + winw;
 
@@ -339,7 +406,7 @@
 	    scrollwindow(rpms->tosln + 1);
 	}
     if (!nbuf[rpms->ln])
-	nbuf[rpms->ln] = (ZLE_STRING_T)zalloc((winw + 2) * sizeof(**nbuf));
+	nbuf[rpms->ln] = (REFRESH_STRING)zalloc((winw + 2) * sizeof(**nbuf));
     rpms->s = nbuf[rpms->ln];
     rpms->sen = rpms->s + winw;
 }
@@ -356,14 +423,14 @@
     static int inlist;		/* avoiding recursion                        */
     int iln;			/* current line as index in loops            */
     int t0 = -1;		/* tmp					     */
-    ZLE_STRING_T t,		/* pointer into the real buffer		     */
+    ZLE_STRING_T tmpline,	/* line with added pre/post text             */
+	t,			/* pointer into the real buffer		     */
 	scs,			/* pointer to cursor position in real buffer */
-	u;			/* pointer for status line stuff */
-    ZLE_STRING_T tmpline,	/* line with added pre/post text */
-	*qbuf;			/* tmp					     */
-    int tmpcs, tmpll;		/* ditto cursor position and line length */
-    int tmpalloced;		/* flag to free tmpline when finished */
-    int remetafy;		/* flag that zle line is metafied */
+	u;			/* pointer for status line stuff             */
+    REFRESH_STRING 	*qbuf;	/* tmp					     */
+    int tmpcs, tmpll;		/* ditto cursor position and line length     */
+    int tmpalloced;		/* flag to free tmpline when finished        */
+    int remetafy;		/* flag that zle line is metafied            */
     struct rparams rpms;
     
     if (trashedzle)
@@ -395,7 +462,9 @@
 	if (zlell)
 	    ZS_memcpy(tmpline+predisplaylen, zleline, zlell);
 	if (postdisplaylen)
-	    ZS_memcpy(tmpline+predisplaylen+zlell, postdisplay, postdisplaylen);
+	    ZS_memcpy(tmpline+predisplaylen+zlell, postdisplay,
+		      postdisplaylen);
+
 	tmpcs = zlecs + predisplaylen;
 	tmpll = predisplaylen + zlell + postdisplaylen;
 	tmpalloced = 1;
@@ -409,7 +478,7 @@
     if (clearlist && listshown > 0) {
 	if (tccan(TCCLEAREOD)) {
 	    int ovln = vln, ovcs = vcs;
-	    ZLE_STRING_T nb = nbuf[vln];
+	    REFRESH_STRING nb = nbuf[vln];
 
 	    nbuf[vln] = obuf[vln];
 	    moveto(nlnct, 0);
@@ -531,7 +600,7 @@
 
     /* Deemed necessary by PWS 1995/05/15 due to kill-line problems */
     if (!*nbuf)
-	*nbuf = (ZLE_STRING_T)zalloc((winw + 2) * sizeof(**nbuf));
+	*nbuf = (REFRESH_STRING)zalloc((winw + 2) * sizeof(**nbuf));
 
     memset(&rpms, 0, sizeof(rpms));
     rpms.nvln = -1;
@@ -544,24 +613,56 @@
 	    rpms.nvcs = rpms.s - (nbuf[rpms.nvln = rpms.ln]);
 
 	if (*t == ZWC('\n')){		/* newline */
-	    nbuf[rpms.ln][winw + 1] = ZWC('\0');	/* text not wrapped */
-	    if (nextline(&rpms))
+	    /* text not wrapped */
+	    if (nextline(&rpms, 0))
 		break;
 	} else if (*t == ZWC('\t')) {		/* tab */
 	    t0 = rpms.s - nbuf[rpms.ln];
 	    if ((t0 | 7) + 1 >= winw) {
-		nbuf[rpms.ln][winw + 1] = ZWC('\n');	/* text wrapped */
-		if (nextline(&rpms))
+		/* text wrapped */
+		if (nextline(&rpms, 1))
 		    break;
 	    } else
 		do
 		    *rpms.s++ = ZWC(' ');
 		while ((++t0) & 7);
-	} else if (ZC_icntrl(*t)) {	/* other control character */
+	}
+#ifdef ZLE_UNICODE_SUPPORT
+	else if (iswprint(*t)) {
+	    int width = wcwidth(*t), break2 = 0;
+	    *rpms.s++ = *t;
+	    while (--width > 0) {
+		/*
+		 * Character is wider than a single position.
+		 * Put WEOF into the positions above 1 as placeholders.
+		 * This keeps the indexing into the video buffer correct.
+		 */
+		if (rpms.s == rpms.sen) {
+		    /*
+		     * Text wrapped.
+		     *
+		     * TODO: hmm, what is the terminal emulator going to
+		     * do?  Let's assume some kind of automatic margin
+		     * behaviour, implying we continue the procedure on the
+		     * next line.  Wrapping behaviour has always been
+		     * problematic.  I foresee interesting times...
+		     */
+		    if (nextline(&rpms, 1)) {
+			break2 = 1;
+			break;
+		    }
+		}
+		*rpms.s++ = WEOF;
+	    }
+	    if (break2)
+		break;
+	}
+#endif
+	else if (ZC_icntrl(*t)) {	/* other control character */
 	    *rpms.s++ = ZWC('^');
 	    if (rpms.s == rpms.sen) {
-		nbuf[rpms.ln][winw + 1] = ZWC('\n');	/* text wrapped */
-		if (nextline(&rpms))
+		/* text wrapped */
+		if (nextline(&rpms, 1))
 		    break;
 	    }
 #ifdef ZLE_UNICODE_SUPPORT
@@ -574,17 +675,17 @@
 	    *rpms.s++ = *t;
 	}
 	if (rpms.s == rpms.sen) {
-	    nbuf[rpms.ln][winw + 1] = ZWC('\n');	/* text wrapped */
-	    if (nextline(&rpms))
+	    /* text wrapped */
+	    if (nextline(&rpms, 1))
 		break;
 	}
     }
 
 /* if we're really on the next line, don't fake it; do everything properly */
-    if (t == scs && (rpms.nvcs = rpms.s - (nbuf[rpms.nvln = rpms.ln]))
-	== winw) {
-	nbuf[rpms.ln][winw + 1] = ZWC('\n');	/* text wrapped */
-	(void)nextline(&rpms);
+    if (t == scs &&
+	(rpms.nvcs = rpms.s - (nbuf[rpms.nvln = rpms.ln])) == winw) {
+	/* text wrapped */
+	(void)nextline(&rpms, 1);
 	*rpms.s = ZWC('\0');
 	rpms.nvcs = 0;
 	rpms.nvln++;
@@ -599,6 +700,21 @@
 	snextline(&rpms);
 	u = statusline;
 	for (; u < statusline + statusll; u++) {
+#ifdef ZLE_UNICODE_SUPPORT
+	    if (iswprint(*u)) {
+		int width = wcwidth(*u);
+		*rpms.s++ = *u;
+		while (--width > 0) {
+		    /* Wide character, handled as above */
+		    if (rpms.s == rpms.sen) {
+			nbuf[rpms.ln][winw + 1] = ZWC('\n');
+			snextline(&rpms);
+		    }
+		    *rpms.s++ = WEOF;
+		}
+	    }
+	    else
+#endif
 	    if (ZC_icntrl(*u)) { /* simplified processing in the status line */
 		*rpms.s++ = ZWC('^');
 		if (rpms.s == rpms.sen) {
@@ -613,12 +729,20 @@
 		snextline(&rpms);
 	    }
 	}
-	if (rpms.s == rpms.sen)
+	if (rpms.s == rpms.sen) {
+	    /*
+	     * I suppose we don't modify nbuf[rpms.ln][winw+1] here
+	     * since we're right at the end?
+	     */
 	    snextline(&rpms);
+	}
     }
     *rpms.s = ZWC('\0');
 
 /* insert <.... at end of last line if there is more text past end of screen */
+/* TODO: if we start overwriting in the middle of a wide character, mayhem
+ * will ensue.
+ */
     if (more_end) {
 	if (!statusline)
 	    rpms.tosln = winh;
@@ -631,7 +755,7 @@
 		break;
 	    }
 	}
-	ZS_strncpy(rpms.sen, ZWS(" <.... "), 7);
+	ZR_memcpy(rpms.sen, ZWS(" <.... "), 7);
 	nbuf[rpms.tosln - 1][winw] = nbuf[rpms.tosln - 1][winw + 1]
 	    = ZWC('\0');
     }
@@ -647,7 +771,7 @@
 		break;
 	    }
 	}
-	ZS_strncpy(rpms.sen, ZWS(" <....> "), 8);
+	ZR_memcpy(rpms.sen, ZWS(" <....> "), 8);
 	nbuf[rpms.tosln][winw] = nbuf[rpms.tosln][winw + 1] = ZWC('\0');
     }
 
@@ -664,14 +788,14 @@
 	else
 	    put_rpmpt = rprompth == 1 && rpromptbuf[0] &&
 		!strchr(rpromptbuf, '\t') &&
-		(int)ZS_strlen(nbuf[0]) + rpromptw < winw - 1;
+		(int)ZR_strlen(nbuf[0]) + rpromptw < winw - 1;
     } else {
 /* insert >.... on first line if there is more text before start of screen */
 	memset(nbuf[0], ZWC(' '), lpromptw);
 	t0 = winw - lpromptw;
 	t0 = t0 > 5 ? 5 : t0;
-	ZS_strncpy(nbuf[0] + lpromptw, ZWS(">...."), t0);
-	ZS_memset(nbuf[0] + lpromptw + t0, ZWC(' '), winw - t0 - lpromptw);
+	ZR_memcpy(nbuf[0] + lpromptw, ZWS(">...."), t0);
+	ZR_memset(nbuf[0] + lpromptw + t0, ZWC(' '), winw - t0 - lpromptw);
 	nbuf[0][winw] = nbuf[0][winw + 1] = ZWC('\0');
     }
 
@@ -686,10 +810,10 @@
 	if (!clearf && iln > 0 && iln < olnct - 1 &&
 	    !(hasam && vcs == winw) &&
 	    nbuf[iln] && obuf[iln] &&
-	    ZS_strncmp(nbuf[iln], obuf[iln], 16)) {
+	    ZR_strncmp(nbuf[iln], obuf[iln], 16)) {
 	    if (tccan(TCDELLINE) && obuf[iln + 1] &&
 		obuf[iln + 1][0] && nbuf[iln] &&
-		!ZS_strncmp(nbuf[iln], obuf[iln + 1], 16)) {
+		!ZR_strncmp(nbuf[iln], obuf[iln + 1], 16)) {
 		moveto(iln, 0);
 		tcout(TCDELLINE);
 		zfree(obuf[iln], (winw + 2) * sizeof(**obuf));
@@ -702,8 +826,8 @@
 	   go off the end of the screen. */
 
 	    else if (tccan(TCINSLINE) && olnct < vmaxln && nbuf[iln + 1] &&
-		     obuf[iln] && !ZS_strncmp(nbuf[iln + 1], 
-						  obuf[iln], 16)) {
+		     obuf[iln] && !ZR_strncmp(nbuf[iln + 1], 
+					      obuf[iln], 16)) {
 		moveto(iln, 0);
 		tcout(TCINSLINE);
 		for (t0 = olnct; t0 != iln; t0--)
@@ -815,13 +939,13 @@
 
 /* TODO remove it when pfxlen is fixed */
 static int
-wpfxlen(ZLE_STRING_T s, ZLE_STRING_T t)
+wpfxlen(REFRESH_STRING s, REFRESH_STRING t)
 {
-        int i = 0;
+    int i = 0;
 
-	    while (*s && *s == *t)
-		        s++, t++, i++;
-	        return i;
+    while (*s && *s == *t)
+	s++, t++, i++;
+    return i;
 }
 
 /* refresh one line, using whatever speed-up tricks are provided by the tty */
@@ -830,7 +954,7 @@
 static void
 refreshline(int ln)
 {
-    ZLE_STRING_T nl, ol, p1;	/* line buffer pointers			 */
+    REFRESH_STRING nl, ol, p1;	/* line buffer pointers			 */
     int ccs = 0,		/* temporary count for cursor position	 */
 	char_ins = 0,		/* number of characters inserted/deleted */
 	col_cleareol,		/* clear to end-of-line from this column */
@@ -841,9 +965,16 @@
 
 /* 0: setup */
     nl = nbuf[ln];
-    rnllen = nllen = nl ? ZS_strlen(nl) : 0;
-    ol = obuf[ln] ? obuf[ln] : ZWS("");
-    ollen = ZS_strlen(ol);
+    rnllen = nllen = nl ? ZR_strlen(nl) : 0;
+    if (obuf[ln]) {
+	ol = obuf[ln];
+	ollen = ZR_strlen(ol);
+    }
+    else {
+	static ZLE_INT_T nullchr = ZWC('\0');
+	ol = &nullchr;
+	ollen = 0;
+    }
 
 /* optimisation: can easily happen for clearing old lines.  If the terminal has
    the capability, then this is the easiest way to skip unnecessary stuff */
@@ -862,19 +993,19 @@
 	|| (ln == 0 && (put_rpmpt != oput_rpmpt))) {	/* prompt changed */
 	p1 = zhalloc((winw + 2) * sizeof(*p1));
 	if (nllen)
-	    ZS_strncpy(p1, nl, nllen);
-	ZS_memset(p1 + nllen, ZWC(' '), winw - nllen);
+	    ZR_memcpy(p1, nl, nllen);
+	ZR_memset(p1 + nllen, ZWC(' '), winw - nllen);
 	p1[winw] = ZWC('\0');
 	p1[winw + 1] = (nllen < winw) ? ZWC('\0') : nl[winw + 1];
 	if (ln && nbuf[ln])
-	    ZS_memcpy(nl, p1, winw + 2);	/* next time obuf will be up-to-date */
+	    ZR_memcpy(nl, p1, winw + 2);	/* next time obuf will be up-to-date */
 	else
 	    nl = p1;		/* don't keep the padding for prompt line */
 	nllen = winw;
     } else if (ollen > nllen) { /* make new line at least as long as old */
 	p1 = zhalloc((ollen + 1) * sizeof(*p1));
-	ZS_strncpy(p1, nl, nllen);
-	ZS_memset(p1 + nllen, ZWC(' '), ollen - nllen);
+	ZR_memcpy(p1, nl, nllen);
+	ZR_memset(p1 + nllen, ZWC(' '), ollen - nllen);
 	p1[ollen] = ZWC('\0');
 	nl = p1;
 	nllen = ollen;
@@ -903,9 +1034,9 @@
     if (hasam && vcs == winw) {
 	if (nbuf[vln] && nbuf[vln][vcs + 1] == ZWC('\n')) {
 	    vln++, vcs = 1;
-            if (nbuf[vln]  && *nbuf[vln])
+            if (nbuf[vln]  && *nbuf[vln]) {
 		zputc(*nbuf[vln]);
-	    else
+	    } else
 		zputc(ZWC(' '));  /* I don't think this should happen */
 	    if (ln == vln) {	/* better safe than sorry */
 		nl++;
@@ -925,7 +1056,7 @@
 
     if (ln == 0 && lpromptw) {
 	i = lpromptw - ccs;
-	j = ZS_strlen(ol);
+	j = ZR_strlen(ol);
 	nl += i;
 	ol += (i > j ? j : i);	/* if ol is too short, point it to '\0' */
 	ccs = lpromptw;
@@ -1049,7 +1180,7 @@
 void
 moveto(int ln, int cl)
 {
-    ZLE_CHAR_T c;
+    ZLE_INT_T c;
 
     if (vcs == winw) {
 	vln++, vcs = 0;
@@ -1126,7 +1257,7 @@
     int cl,			/* ``desired'' absolute horizontal position */
 	i = vcs,		/* cursor position after initial movements  */
 	j;
-    ZLE_STRING_T t;
+    REFRESH_STRING t;
 
     cl = ct + vcs;
 
@@ -1261,14 +1392,18 @@
 static void
 singlerefresh(ZLE_STRING_T tmpline, int tmpll, int tmpcs)
 {
-    ZLE_STRING_T vbuf, vp,	/* video buffer and pointer    */
+    REFRESH_STRING vbuf, vp,	/* video buffer and pointer    */
 	*qbuf,			/* tmp			       */
 	refreshop = *obuf;	/* pointer to old video buffer */
     int t0,			/* tmp			       */
 	vsiz,			/* size of new video buffer    */
 	nvcs = 0;		/* new video cursor column     */
 #ifdef ZLE_UNICODE_SUPPORT
-    ZLE_STRING_T lpwbuf, lpwp;	/* converted lprompt and pointer */
+    /*
+     * converted lprompt and pointer: no WEOF hack here since
+     * we always output the full prompt and count its width.
+     */
+    ZLE_STRING_T lpwbuf, lpwp;
     char *lpptr,		/* pointer into multibyte lprompt */
 	*lpend;			/* end of multibyte lprompt */
     mbstate_t ps;		/* shift state */
@@ -1279,9 +1414,13 @@
     for (vsiz = 1 + lpromptw, t0 = 0; t0 != tmpll; t0++, vsiz++)
 	if (tmpline[t0] == ZWC('\t'))
 	    vsiz = (vsiz | 7) + 1;
+#ifdef ZLE_UNICODE_SUPPORT
+	else if (iswprint(tmpline[t0]))
+	    vsiz += wcwidth(tmpline[t0]);
+#endif
 	else if (ZC_icntrl(tmpline[t0]))
 	    vsiz++;
-    vbuf = (ZLE_STRING_T)zalloc(vsiz * sizeof(*vbuf));
+    vbuf = (REFRESH_STRING)zalloc(vsiz * sizeof(*vbuf));
 
     if (tmpcs < 0) {
 #ifdef DEBUG
@@ -1318,12 +1457,12 @@
     }
     if (lpwp - lpwbuf < lpromptw) {
 	/* Not enough characters for lpromptw. */
-	ZS_memcpy(vbuf, lpwbuf, lpwp - lpwbuf);
+	ZR_memcpy(vbuf, lpwbuf, lpwp - lpwbuf);
 	vp = vbuf + (lpwp - lpwbuf);
 	while (vp < vbuf + lpromptw)
 	    *vp++ = ZWC(' ');
     } else {
-	ZS_memcpy(vbuf, lpwp - lpromptw, lpromptw);
+	ZR_memcpy(vbuf, lpwp - lpromptw, lpromptw);
 	vp = vbuf + lpromptw;
     }
     *vp = ZWC('\0');
@@ -1341,8 +1480,16 @@
 	} else if (tmpline[t0] == ZWC('\n')) {
 	    *vp++ = ZWC('\\');
 	    *vp++ = ZWC('n');
+#ifdef ZLE_UNICODE_SUPPORT
+	} else if (iswprint(tmpline[t0])) {
+	    int width;
+	    *vp++ = tmpline[t0];
+	    width = wcwidth(tmpline[t0]);
+	    while (--width > 0)
+		*vp++ = WEOF;
+#endif
 	} else if (ZC_icntrl(tmpline[t0])) {
-	    ZLE_CHAR_T t = tmpline[++t0];
+	    ZLE_INT_T t = tmpline[++t0];
 
 	    *vp++ = ZWC('^');
 	    /* FIXME is it portable? */
@@ -1363,11 +1510,11 @@
     }
     if (winpos)
 	vbuf[winpos] = ZWC('<');	/* line continues to the left */
-    if ((int)ZS_strlen(vbuf + winpos) > (winw - hasam)) {
+    if ((int)ZR_strlen(vbuf + winpos) > (winw - hasam)) {
 	vbuf[winpos + winw - hasam - 1] = ZWC('>');	/* line continues to right */
 	vbuf[winpos + winw - hasam] = ZWC('\0');
     }
-    ZS_strcpy(nbuf[0], vbuf + winpos);
+    ZR_strcpy(nbuf[0], vbuf + winpos);
     zfree(vbuf, vsiz * sizeof(*vbuf));
     nvcs -= winpos;
 
@@ -1382,7 +1529,7 @@
 	singmoveto(t0);		/* move to where we do all output from */
 
 	if (!*refreshop) {
-	    if ((t0 = ZS_strlen(vp)))
+	    if ((t0 = ZR_strlen(vp)))
 		zwrite(vp, t0);
 	    vcs += t0;
 	    break;

-- 
Peter Stephenson <p.w.stephenson@ntlworld.com>
Web page still at http://www.pwstephenson.fsnet.co.uk/


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: PATCH: displaying wide characters
  2005-10-19 20:31 PATCH: displaying wide characters Peter Stephenson
@ 2005-10-19 20:41 ` Peter Stephenson
  2005-10-20  0:39   ` Mikael Magnusson
  2005-10-19 22:47 ` Peter Stephenson
  1 sibling, 1 reply; 20+ messages in thread
From: Peter Stephenson @ 2005-10-19 20:41 UTC (permalink / raw)
  To: Zsh hackers list

> First pass at displaying wide characters correctly (with multibyte
> support active, obviously).

By "wide characters", I meant of course "characters with a display width
greater than a single character cell", which is not the same thing.

-- 
Peter Stephenson <p.w.stephenson@ntlworld.com>
Web page still at http://www.pwstephenson.fsnet.co.uk/


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: PATCH: displaying wide characters
  2005-10-19 20:31 PATCH: displaying wide characters Peter Stephenson
  2005-10-19 20:41 ` Peter Stephenson
@ 2005-10-19 22:47 ` Peter Stephenson
  1 sibling, 0 replies; 20+ messages in thread
From: Peter Stephenson @ 2005-10-19 22:47 UTC (permalink / raw)
  To: Zsh hackers list

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 2559 bytes --]

> Oliver pointed out there are wide ASCII characters at the top of the
> basic multilingual plane, from U+ff00 onward, which can be used for
> testing.  Iʼd be happy to add these to define-composed-chars for
> convenience but couldnʼt offhand see how to do it intuitively without
> clashing with RFC1345.  (Iʼve just realised I could use ^a etc. since
> RFC1345 doesnʼt assume youʼve got ^ on the keyboard, but after three
> solid hours at this I need a rest (nearly wrote "reset").)

Here they are.

Index: Doc/Zsh/contrib.yo
===================================================================
RCS file: /cvsroot/zsh/zsh/Doc/Zsh/contrib.yo,v
retrieving revision 1.49
diff -u -r1.49 contrib.yo
--- Doc/Zsh/contrib.yo	17 Oct 2005 09:51:48 -0000	1.49
+++ Doc/Zsh/contrib.yo	19 Oct 2005 22:47:16 -0000
@@ -719,6 +719,11 @@
 
 The most common characters from the Arabic, Cyrillic, Greek and Hebrew
 alphabets are available; consult RFC 1345 for the appropriate sequences.
+In addition, a set of two letter codes not in RFC 1345 are available for
+the double-width characters corresponding to ASCII characters from tt(!)
+to tt(~) (0x21 to 0x7e) by preceeding the character with tt(^), for
+example tt(^A) for a double-width tt(A).
+
 The following other two-character sequences are understood.
 
 startitem()
Index: Functions/Zle/define-composed-chars
===================================================================
RCS file: /cvsroot/zsh/zsh/Functions/Zle/define-composed-chars,v
retrieving revision 1.2
diff -u -r1.2 define-composed-chars
--- Functions/Zle/define-composed-chars	17 Oct 2005 09:51:48 -0000	1.2
+++ Functions/Zle/define-composed-chars	19 Oct 2005 22:47:17 -0000
@@ -1,6 +1,9 @@
 # This is not a widget function, it is only a helper for insert-composed-char
 # to cut down on resident memory use.
 
+emulate -L zsh
+setopt cbases
+
 # The associative array zsh_accent_chars is indexed by the
 # accent.  The values are sets of character / Unicode pairs for
 # the character with the given accent.  The Unicode value is
@@ -249,6 +252,15 @@
 a=h
 z[$a]+=" S 5e9"
 
+typeset -i 16 -Z 4 ia
+typeset -i 16 -Z 6 iuni
+# Extended width characters ^A, ^B, ... (not RFC1345)
+for (( ia = 0x21; ia < 0x7f; ia++ )); do
+  (( iuni = ia + 0xff00 - 0x20 ))
+  eval a="\$'\\x${ia##0x}'"
+  z[$a]+=" ^ ${iuni##0x}"
+done
+
 # Card suits: here first character is the interesting one
 for a b in S 2660 H 2661 D 2662 C 2663; do
   z[$a]+=" c $b"


-- 
Peter Stephenson <p.w.stephenson@ntlworld.com>
Web page still at http://www.pwstephenson.fsnet.co.uk/


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: PATCH: displaying wide characters
  2005-10-19 20:41 ` Peter Stephenson
@ 2005-10-20  0:39   ` Mikael Magnusson
  2005-10-20  9:39     ` Peter Stephenson
  0 siblings, 1 reply; 20+ messages in thread
From: Mikael Magnusson @ 2005-10-20  0:39 UTC (permalink / raw)
  To: Peter Stephenson; +Cc: Zsh hackers list

On 10/19/05, Peter Stephenson <p.w.stephenson@ntlworld.com> wrote:
> > First pass at displaying wide characters correctly (with multibyte
> > support active, obviously).
>
> By "wide characters", I meant of course "characters with a display width
> greater than a single character cell", which is not the same thing.

This seems to work fine so far, and the prompt truncation also seems
to be fixed. I just noticed the following while testing when i missed
the space bar,
% echoちとしはちとしはち
zsh: command not found:
echo\M-c\M-^A\M-!\M-c\M-^A\M-(\M-c\M-^A\M-^W\M-c\M-^A\M-/\M-c\M-^A\M-!\M-c\M-^A\M-(\M-c\M-^A\M-^W\M-c\M-^A\M-/\M-c\M-^A\M-!

Maybe the error message could show multibyte printable characters too?
--
Mikael Magnusson


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: PATCH: displaying wide characters
  2005-10-20  0:39   ` Mikael Magnusson
@ 2005-10-20  9:39     ` Peter Stephenson
  2005-10-20 15:02       ` Bart Schaefer
  2005-10-24  8:40       ` Mikael Magnusson
  0 siblings, 2 replies; 20+ messages in thread
From: Peter Stephenson @ 2005-10-20  9:39 UTC (permalink / raw)
  To: Zsh hackers list

Mikael Magnusson wrote:
> This seems to work fine so far, and the prompt truncation also seems
> to be fixed. 

Phew.  I think I may have messed up --disable-multibyte, I'll check that...

> I just noticed the following while testing when i missed
> the space bar,
>   [error messages are output using the old nicechar method]
> 
> Maybe the error message could show multibyte printable characters too?

Yes, we're approaching the time when we should start to update stuff
in the main shell (and the ZLE_UNICODE_SUPPORT definition is no longer
appropriate, as Oliver predicted; it probably needs to be MULTIBYTE_SUPPORT
now).  I'm not expecting every little bit (such as pattern matching)
to support multibyte characters before the first 4.3 release, but
things like error messages should now be relatively straightforward with
the code that's been added to utils.c

pws


This message has been scanned for viruses by BlackSpider MailControl - www.blackspider.com


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: PATCH: displaying wide characters
  2005-10-20  9:39     ` Peter Stephenson
@ 2005-10-20 15:02       ` Bart Schaefer
  2005-10-21  9:03         ` Peter Stephenson
  2005-10-24  8:40       ` Mikael Magnusson
  1 sibling, 1 reply; 20+ messages in thread
From: Bart Schaefer @ 2005-10-20 15:02 UTC (permalink / raw)
  To: Zsh hackers list

On Oct 20, 10:39am, Peter Stephenson wrote:
}
} I think I may have messed up --disable-multibyte, I'll check that...

Looks like an off-by-one error somewhere.

*** /tmp/zsh.ztst.out.4543      Thu Oct 20 08:00:11 2005
--- /tmp/zsh.ztst.tout.4543     Thu Oct 20 08:00:11 2005
***************
*** 1,2 ****
! start ...d at 10 Not truncated ...
! start truncat... Not truncated ...
--- 1,2 ----
! start ...d at 10 Not truncated ..
! start truncat... Not truncated ..
Test ../../zsh-4.0/Test/D01prompt.ztst failed: output differs from expected as
shown above for:
  print -P 'start %10<...<truncated at 10%<< Not truncated%3< ...<Not shown'
  print -P 'start %10>...>truncated at 10%>> Not truncated%3> ...>Not shown'
Was testing: prompt truncation


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: PATCH: displaying wide characters
  2005-10-20 15:02       ` Bart Schaefer
@ 2005-10-21  9:03         ` Peter Stephenson
  2005-10-21 14:39           ` Bart Schaefer
  0 siblings, 1 reply; 20+ messages in thread
From: Peter Stephenson @ 2005-10-21  9:03 UTC (permalink / raw)
  To: zsh-workers

Bart Schaefer <schaefer@brasslantern.com> wrote:
> Looks like an off-by-one error somewhere.
> 
> *** /tmp/zsh.ztst.out.4543      Thu Oct 20 08:00:11 2005
> --- /tmp/zsh.ztst.tout.4543     Thu Oct 20 08:00:11 2005
> ***************
> *** 1,2 ****
> ! start ...d at 10 Not truncated ...
> ! start truncat... Not truncated ...
> --- 1,2 ----
> ! start ...d at 10 Not truncated ..
> ! start truncat... Not truncated ..
> Test ../../zsh-4.0/Test/D01prompt.ztst failed: output differs from expected as
> shown above for:
>   print -P 'start %10<...<truncated at 10%<< Not truncated%3< ...<Not shown'
>   print -P 'start %10>...>truncated at 10%>> Not truncated%3> ...>Not shown'
> Was testing: prompt truncation

It looks like it, but it isn't.  I didn't change the old code here, and
this feature is in 4.2.  I was going by the manual:

     %<string<
     %>string>
     %[xstring]
...
          If the string is longer than the  specified  truncation
          length,  it  will  appear in full, completely replacing
          the truncated string.

which is unambiguous, so I presume there is a bug in the old code.  Even
more confusingly, it does the following (in both 4.2 and the non-multibyte
code):

% print -P '%10<0123456789<truncated at 10'
0123456789
% print -P '%10>0123456789abcde>truncated at 10'
0123456789
% print -P '%10<0123456789abcdef<truncated at 10'
truncated at 100

May be I should rewrite the code in the other branch to be more like the
code I just added.  Unless anyone wants to argue the manual is wrong?  (The
last output is certainly a bug.)  I suspect it's too minor to worry much
about and I should just fix the old code.  The only time I can think of
where this is significant is if you have a string with a variable
truncation length or truncation indicator so can't guarantee in advance
this won't happen, which must be fairly unlikely.

-- 
Peter Stephenson <pws@csr.com>                  Software Engineer
CSR PLC, Churchill House, Cambridge Business Park, Cowley Road
Cambridge, CB4 0WZ, UK                          Tel: +44 (0)1223 692070


This message has been scanned for viruses by BlackSpider MailControl - www.blackspider.com


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: PATCH: displaying wide characters
  2005-10-21  9:03         ` Peter Stephenson
@ 2005-10-21 14:39           ` Bart Schaefer
  2005-10-21 22:29             ` Peter Stephenson
  0 siblings, 1 reply; 20+ messages in thread
From: Bart Schaefer @ 2005-10-21 14:39 UTC (permalink / raw)
  To: zsh-workers

On Oct 21, 10:03am, Peter Stephenson wrote:
} Subject: Re: PATCH: displaying wide characters
}
} It looks like it, but it isn't.  I didn't change the old code here, and
} this feature is in 4.2.

Ah, so this is a newly-added test that's failing.  I missed that part.

}           If the string is longer than the  specified  truncation
}           length,  it  will  appear in full, completely replacing
}           the truncated string.
} 
} which is unambiguous, so I presume there is a bug in the old code.

Either that or an intentional change that failed to make it into the
documentation.  The prompt truncation code was completely rewritten a
while back (prior to 4.0, so *quite* a while back).

} I suspect it's too minor to worry much about and I should just fix the
} old code.  The only time I can think of where this is significant is
} if you have a string with a variable truncation length or truncation
} indicator so can't guarantee in advance this won't happen, which must
} be fairly unlikely.

Actually that's used in some of the themed prompts that are distributed
with the shell, including adam2 and (ahem) bart, so it's not as unlikely
as you might think.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: PATCH: displaying wide characters
  2005-10-21 14:39           ` Bart Schaefer
@ 2005-10-21 22:29             ` Peter Stephenson
  0 siblings, 0 replies; 20+ messages in thread
From: Peter Stephenson @ 2005-10-21 22:29 UTC (permalink / raw)
  To: zsh-workers

Bart Schaefer wrote:
> }           If the string is longer than the  specified  truncation
> }           length,  it  will  appear in full, completely replacing
> }           the truncated string.
> } 
> } which is unambiguous, so I presume there is a bug in the old code.
> 
> Either that or an intentional change that failed to make it into the
> documentation.  The prompt truncation code was completely rewritten a
> while back (prior to 4.0, so *quite* a while back).

I'll assume the manual page is still correct.  I don't remember any
 ^   suggestion of a change.
 |
 Look! ASCII apostrophes!

This change makes the whole thing much more maintainable...
I've also removed a few TODOs which no longer apply.

Index: Src/prompt.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/prompt.c,v
retrieving revision 1.28
diff -u -r1.28 prompt.c
--- Src/prompt.c	19 Oct 2005 21:59:36 -0000	1.28
+++ Src/prompt.c	21 Oct 2005 22:23:18 -0000
@@ -1038,18 +1038,18 @@
 	     * Note that if the truncation string is longer than the
 	     * truncation length (twidth > truncwidth), the truncation
 	     * string is used in full.
-	     *
-	     * TODO: we don't take account of multibyte characters
-	     * in the string we're truncating.
 	     */
 	    char *t = truncstr;
 	    int fullen = bp - ptr;
 	    int twidth, maxwidth;
-#ifdef ZLE_UNICODE_SUPPORT
 	    int ntrunc = strlen(t);
 
+#ifdef ZLE_UNICODE_SUPPORT
 	    /* Use screen width of string */
 	    twidth = mb_width(t);
+#else
+	    twidth = ztrlen(t);
+#endif
 	    if (twidth < truncwidth) {
 		maxwidth = truncwidth - twidth;
 		/*
@@ -1110,6 +1110,7 @@
 				fulltextptr++;
 			    }
 			} else {
+#ifdef ZLE_UNICODE_SUPPORT
 			    /*
 			     * Normal text: build up a multibyte character.
 			     */
@@ -1143,6 +1144,13 @@
 				    remw -= wcwidth(cc);
 				}
 			    }
+#else
+			    /* Single byte character */
+			    if (*fulltextptr == Meta)
+				fulltextptr++;
+			    fulltextptr++;
+			    remw--;
+#endif
 			}
 		    }
 
@@ -1170,6 +1178,7 @@
 			    for (; *skiptext != Outpar && *skiptext;
 				 skiptext++);
 			} else {
+#ifdef ZLE_UNICODE_SUPPORT
 			    char inchar;
 			    wchar_t cc;
 			    int ret;
@@ -1194,6 +1203,12 @@
 				    maxwidth -= wcwidth(cc);
 				}
 			    }
+#else
+			    if (*skiptext == Meta)
+				skiptext++;
+			    skiptext++;
+			    maxwidth--;
+#endif
 			}
 		    }
 		    /*
@@ -1240,96 +1255,6 @@
 		bp = ptr;
 	    }
 	    *bp = '\0';
-#else
-	    twidth = ztrlen(t);
-	    maxwidth = twidth < truncwidth ? truncwidth - twidth : 0;
-	    if (w < fullen) {
-		/* Invisible substrings, lots of shuffling. */
-		int n = strlen(t);
-		char *p = ptr, *q = buf;
-		addbufspc(n);
-		ptr = buf + (p - q); /* addbufspc() may have realloc()'d */
-
-		if (truncatleft) {
-		    p = ptr + n;
-		    q = p;
-
-		    /*
-		     * I don't think we need n and the test below since
-		     * we must have enough space (we are using a subset
-		     * of the existing text with no repetition) and the
-		     * string is null-terminated, so I haven't copied it
-		     * to the ZLE_UNICODE_SUPPORT section.
-		     */
-		    n = fullen - w;
-
-		    /* Shift the whole string right, then *
-		     * selectively copy to the left.      */
-		    memmove(p, ptr, fullen);
-		    while (w > 0 || n > 0) {
-			if (*p == Inpar)
-			    do {
-				*q++ = *p;
-				--n;
-			    } while (*p++ != Outpar && *p && n);
-			else if (w) {
-			    if (--w < maxwidth)
-				*q++ = *p;
-			    ++p;
-			}
-		    }
-		    bp = q;
-		} else {
-		    /* Truncate on the right, selectively */
-		    q = ptr + fullen;
-
-		    /* First skip over as much as will "fit". */
-		    while (w > 0 && maxwidth > 0) {
-			if (*ptr == Inpar)
-			    while (*ptr++ != Outpar && *ptr) {;}
-			else
-			    ++ptr, --w, --maxwidth;
-		    }
-		    if (ptr < q) {
-			/* We didn't reach the end of the string. *
-			 * In case there are more invisible bits, *
-			 * insert the truncstr and keep looking.  */
-			memmove(ptr + n, ptr, q - ptr);
-			q = ptr + n;
-			while (*t)
-			    *ptr++ = *t++;
-			while (*q) {
-			    if (*q == Inpar)
-				do {
-				    *ptr++ = *q;
-				} while (*q++ != Outpar && *q);
-			    else
-				++q;
-			}
-			bp = ptr;
-			*bp = 0;
-		    } else
-			bp = ptr + n;
-		}
-	    } else {
-		/* No invisible substrings. */
-		if (twidth > fullen) {
-		    addbufspc(twidth - fullen);
-		    ptr = bp;	/* addbufspc() may have realloc()'d buf */
-		    bp += twidth - fullen;
-		} else
-		    bp -= fullen - truncwidth;
-		if (truncatleft) {
-		    if (maxwidth)
-			memmove(ptr + strlen(t), ptr + fullen - maxwidth,
-				maxwidth);
-		} else
-		    ptr += maxwidth;
-	    }
-	    /* Finally, copy the truncstr into place. */
-	    while (*t)
-		*ptr++ = *t++;
-#endif
 	}
 	zsfree(truncstr);
 	truncwidth = 0;
Index: Src/Zle/zle_refresh.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_refresh.c,v
retrieving revision 1.31
diff -u -r1.31 zle_refresh.c
--- Src/Zle/zle_refresh.c	19 Oct 2005 23:45:06 -0000	1.31
+++ Src/Zle/zle_refresh.c	21 Oct 2005 22:23:19 -0000
@@ -413,11 +413,7 @@
     rpms->sen = rpms->s + winw;
 }
 
-/*
- * TODO currently it assumes sceenwidth 1 for every character
- * (except for characters in the prompt which are correctly handled
- * by wcwidth()).
- */
+
 /**/
 mod_export void
 zrefresh(void)

-- 
Peter Stephenson <p.w.stephenson@ntlworld.com>
Web page still at http://www.pwstephenson.fsnet.co.uk/


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: PATCH: displaying wide characters
  2005-10-20  9:39     ` Peter Stephenson
  2005-10-20 15:02       ` Bart Schaefer
@ 2005-10-24  8:40       ` Mikael Magnusson
  2005-10-24  9:07         ` Peter Stephenson
  2005-10-25 23:11         ` Peter Stephenson
  1 sibling, 2 replies; 20+ messages in thread
From: Mikael Magnusson @ 2005-10-24  8:40 UTC (permalink / raw)
  To: Peter Stephenson; +Cc: Zsh hackers list

On 10/20/05, Peter Stephenson <pws@csr.com> wrote:
> Mikael Magnusson wrote:
> > This seems to work fine so far, and the prompt truncation also seems
> > to be fixed.

Found one issue with editing wide characters on the command line,
input some doublewidth characters, erase one near the left end, then
go to the right and erase another. The second time the cursor will
only move one character to the left instead of two and cursor display
will not correspond with reality anymore. For some reason it only
seems to happen with 6 characters or more, the ones i'm using are
hiragana. It seems it also matters a bit which characters it is, this
string exhibits the problem
ちとしはきく
when you delete the first character, and then move one to the right
and delete the second. Seems just pressing delete twice at the
beginning also triggers the bug.
ちちちちちち doesn't trigger the bug for example.

--
Mikael Magnusson


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: PATCH: displaying wide characters
  2005-10-24  8:40       ` Mikael Magnusson
@ 2005-10-24  9:07         ` Peter Stephenson
  2005-10-24 12:41           ` Mikael Magnusson
  2005-10-25 23:11         ` Peter Stephenson
  1 sibling, 1 reply; 20+ messages in thread
From: Peter Stephenson @ 2005-10-24  9:07 UTC (permalink / raw)
  To: zsh-workers

Mikael Magnusson <mikachu@gmail.com> wrote:
> Found one issue with editing wide characters on the command line,
> input some doublewidth characters, erase one near the left end, then
> go to the right and erase another.

I think this is to do with the redisplay optimisations, which aren't
aware of the dummy WEOFs.  I've also had problems at the end of the
line:  I think the right thing to do there is first output enough spaces
so that the wide character appears at the start of the next line.

-- 
Peter Stephenson <pws@csr.com>                  Software Engineer
CSR PLC, Churchill House, Cambridge Business Park, Cowley Road
Cambridge, CB4 0WZ, UK                          Tel: +44 (0)1223 692070


This message has been scanned for viruses by BlackSpider MailControl - www.blackspider.com


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: PATCH: displaying wide characters
  2005-10-24  9:07         ` Peter Stephenson
@ 2005-10-24 12:41           ` Mikael Magnusson
  2005-10-24 12:47             ` Peter Stephenson
  0 siblings, 1 reply; 20+ messages in thread
From: Mikael Magnusson @ 2005-10-24 12:41 UTC (permalink / raw)
  To: Peter Stephenson; +Cc: zsh-workers

On 10/24/05, Peter Stephenson <pws@csr.com> wrote:
> Mikael Magnusson <mikachu@gmail.com> wrote:
> > Found one issue with editing wide characters on the command line,
> > input some doublewidth characters, erase one near the left end, then
> > go to the right and erase another.
>
> I think this is to do with the redisplay optimisations, which aren't
> aware of the dummy WEOFs.  I've also had problems at the end of the
> line:  I think the right thing to do there is first output enough spaces
> so that the wide character appears at the start of the next line.

My terminal (rxvt-unicode) automatically breaks a wide character to
the next line if you try to print it in the last (singlewidth) column
of a line. Zsh does seem to get confused when you get to the second
line then.

--
Mikael Magnusson


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: PATCH: displaying wide characters
  2005-10-24 12:41           ` Mikael Magnusson
@ 2005-10-24 12:47             ` Peter Stephenson
  2005-10-24 16:53               ` Peter Stephenson
  0 siblings, 1 reply; 20+ messages in thread
From: Peter Stephenson @ 2005-10-24 12:47 UTC (permalink / raw)
  To: Zsh hackers list

Mikael Magnusson wrote:
> My terminal (rxvt-unicode) automatically breaks a wide character to
> the next line if you try to print it in the last (singlewidth) column
> of a line. Zsh does seem to get confused when you get to the second
> line then.

Yes, I think this is fairly standard, hence adding spaces in front ought
to be the right thing to do.  However, I bet there's one that likes to
be different...

pws


This message has been scanned for viruses by BlackSpider MailControl - www.blackspider.com


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: PATCH: displaying wide characters
  2005-10-24 12:47             ` Peter Stephenson
@ 2005-10-24 16:53               ` Peter Stephenson
  0 siblings, 0 replies; 20+ messages in thread
From: Peter Stephenson @ 2005-10-24 16:53 UTC (permalink / raw)
  To: zsh-workers

Peter Stephenson <pws@csr.com> wrote:
> Mikael Magnusson wrote:
> > My terminal (rxvt-unicode) automatically breaks a wide character to
> > the next line if you try to print it in the last (singlewidth) column
> > of a line. Zsh does seem to get confused when you get to the second
> > line then.
> 
> Yes, I think this is fairly standard, hence adding spaces in front ought
> to be the right thing to do.

I think this fixes the end-of-line problem, which is the easier one.  If
the screen's just too narrow for the character it simply prints a '?';
that's a very unusual case where safety is probably best.  However, maybe
some other character might make the user more aware of what's going on, '>'
for example?

Index: Src/Zle/zle_refresh.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_refresh.c,v
retrieving revision 1.31
diff -u -r1.31 zle_refresh.c
--- Src/Zle/zle_refresh.c	19 Oct 2005 23:45:06 -0000	1.31
+++ Src/Zle/zle_refresh.c	24 Oct 2005 16:50:20 -0000
@@ -612,7 +612,7 @@
     rpms.sen = *nbuf + winw;
     for (; t < tmpline+tmpll; t++) {
 	if (t == scs)			/* if cursor is here, remember it */
-	    rpms.nvcs = rpms.s - (nbuf[rpms.nvln = rpms.ln]);
+	    rpms.nvcs = rpms.s - nbuf[rpms.nvln = rpms.ln];
 
 	if (*t == ZWC('\n')){		/* newline */
 	    /* text not wrapped */
@@ -631,33 +631,33 @@
 	}
 #ifdef ZLE_UNICODE_SUPPORT
 	else if (iswprint(*t)) {
-	    int width = wcwidth(*t), break2 = 0;
-	    *rpms.s++ = *t;
-	    while (--width > 0) {
+	    int width = wcwidth(*t);
+	    if (width > rpms.sen - rpms.s) {
 		/*
-		 * Character is wider than a single position.
-		 * Put WEOF into the positions above 1 as placeholders.
-		 * This keeps the indexing into the video buffer correct.
+		 * Too wide to fit.  Insert spaces to end of current line.
 		 */
-		if (rpms.s == rpms.sen) {
-		    /*
-		     * Text wrapped.
-		     *
-		     * TODO: hmm, what is the terminal emulator going to
-		     * do?  Let's assume some kind of automatic margin
-		     * behaviour, implying we continue the procedure on the
-		     * next line.  Wrapping behaviour has always been
-		     * problematic.  I foresee interesting times...
-		     */
-		    if (nextline(&rpms, 1)) {
-			break2 = 1;
-			break;
-		    }
+		do {
+		    *rpms.s++ = ZWC(' ');
+		} while (rpms.s < rpms.sen);
+		if (nextline(&rpms, 1))
+		    break;
+		if (t == scs) {
+		    /* Update cursor to this point */
+		    rpms.nvcs = rpms.s - nbuf[rpms.nvln = rpms.ln];
 		}
-		*rpms.s++ = WEOF;
 	    }
-	    if (break2)
-		break;
+	    if (width > rpms.sen - rpms.s) {
+		/*
+		 * The screen width is too small to fit even one
+		 * occurrence.
+		 */
+		*rpms.s++ = ZWC('?');
+	    } else {
+		/* We can fit it without reaching the end of the line. */
+		*rpms.s++ = *t;
+		while (--width > 0)
+		    *rpms.s++ = WEOF;
+	    }
 	}
 #endif
 	else if (ZC_icntrl(*t)) {	/* other control character */
@@ -705,14 +705,20 @@
 #ifdef ZLE_UNICODE_SUPPORT
 	    if (iswprint(*u)) {
 		int width = wcwidth(*u);
-		*rpms.s++ = *u;
-		while (--width > 0) {
-		    /* Wide character, handled as above */
-		    if (rpms.s == rpms.sen) {
-			nbuf[rpms.ln][winw + 1] = ZWC('\n');
-			snextline(&rpms);
-		    }
-		    *rpms.s++ = WEOF;
+		/* Handle wide characters as above */
+		if (width > rpms.sen - rpms.s) {
+		    do {
+			*rpms.s++ = ZWC(' ');
+		    } while (rpms.s < rpms.sen);
+		    nbuf[rpms.ln][winw + 1] = ZWC('\n');
+		    snextline(&rpms);
+		}
+		if (width > rpms.sen - rpms.s) {
+		    *rpms.s++ = ZWC('?');
+		} else {
+		    *rpms.s++ = *u;
+		    while (--width > 0)
+			*rpms.s++ = WEOF;
 		}
 	    }
 	    else


-- 
Peter Stephenson <pws@csr.com>                  Software Engineer
CSR PLC, Churchill House, Cambridge Business Park, Cowley Road
Cambridge, CB4 0WZ, UK                          Tel: +44 (0)1223 692070


This message has been scanned for viruses by BlackSpider MailControl - www.blackspider.com


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: PATCH: displaying wide characters
  2005-10-24  8:40       ` Mikael Magnusson
  2005-10-24  9:07         ` Peter Stephenson
@ 2005-10-25 23:11         ` Peter Stephenson
  2005-10-26  5:19           ` Mikael Magnusson
  1 sibling, 1 reply; 20+ messages in thread
From: Peter Stephenson @ 2005-10-25 23:11 UTC (permalink / raw)
  To: Mikael Magnusson; +Cc: pws, zsh-workers

On Mon, 24 Oct 2005 10:40:42 +0200
Mikael Magnusson <mikachu@gmail.com> wrote:
> Found one issue with editing wide characters on the command line,
> input some doublewidth characters, erase one near the left end, then
> go to the right and erase another. The second time the cursor will
> only move one character to the left instead of two and cursor display
> will not correspond with reality anymore. For some reason it only
> seems to happen with 6 characters or more, the ones i'm using are
> hiragana.

I think the following patch fixes this issue, at least what I was
seeing.  I'm quite sure there are plenty of other oddities like this.
I also think that, while you can now type a line of double-width
characters that wraps without the display going, there are likely to be
problems when you edit the part of the line before the wrap.

Index: Src/Zle/zle_refresh.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_refresh.c,v
retrieving revision 1.33
diff -u -r1.33 zle_refresh.c
--- Src/Zle/zle_refresh.c	24 Oct 2005 16:58:43 -0000	1.33
+++ Src/Zle/zle_refresh.c	25 Oct 2005 23:09:09 -0000
@@ -1138,6 +1138,12 @@
 			tc_delchars(i);
 			ol += i;
 			char_ins -= i;
+#ifdef ZLE_UNICODE_SUPPORT
+			while (*ol == WEOF) {
+			    ol++;
+			    char_ins--;
+			}
+#endif
 			i = 0;
 			break;
 		    }



-- 
Peter Stephenson <p.w.stephenson@ntlworld.com>
Web page still at http://www.pwstephenson.fsnet.co.uk/


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: PATCH: displaying wide characters
  2005-10-25 23:11         ` Peter Stephenson
@ 2005-10-26  5:19           ` Mikael Magnusson
  2005-10-26  9:17             ` Peter Stephenson
  0 siblings, 1 reply; 20+ messages in thread
From: Mikael Magnusson @ 2005-10-26  5:19 UTC (permalink / raw)
  To: Peter Stephenson; +Cc: zsh-workers

On 10/26/05, Peter Stephenson <p.w.stephenson@ntlworld.com> wrote:
> On Mon, 24 Oct 2005 10:40:42 +0200
> Mikael Magnusson <mikachu@gmail.com> wrote:
> > Found one issue with editing wide characters on the command line,
> > input some doublewidth characters, erase one near the left end, then
> > go to the right and erase another. The second time the cursor will
> > only move one character to the left instead of two and cursor display
> > will not correspond with reality anymore. For some reason it only
> > seems to happen with 6 characters or more, the ones i'm using are
> > hiragana.
>
> I think the following patch fixes this issue, at least what I was
> seeing.  I'm quite sure there are plenty of other oddities like this.
> I also think that, while you can now type a line of double-width
> characters that wraps without the display going, there are likely to be
> problems when you edit the part of the line before the wrap.

This didn't fix the problem for me, but it is acting differently now.
Not sure how to describe it accurately, I'll paste a few lines where
all I've done is press delete at the first character:
% ちとしはきくま
% と しはきく
%   はきくま
% はきくま
% きくま
from this point they display correctly

--
Mikael Magnusson


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: PATCH: displaying wide characters
  2005-10-26  5:19           ` Mikael Magnusson
@ 2005-10-26  9:17             ` Peter Stephenson
  2005-10-26 14:31               ` Mikael Magnusson
  0 siblings, 1 reply; 20+ messages in thread
From: Peter Stephenson @ 2005-10-26  9:17 UTC (permalink / raw)
  To: zsh-workers

Mikael Magnusson <mikachu@gmail.com> wrote:
> Content-Type: text/plain; charset=ISO-2022-JP

This doesn't display on my system.  Are you able to show it with
UTF-8?

-- 
Peter Stephenson <pws@csr.com>                  Software Engineer
CSR PLC, Churchill House, Cambridge Business Park, Cowley Road
Cambridge, CB4 0WZ, UK                          Tel: +44 (0)1223 692070


This message has been scanned for viruses by BlackSpider MailControl - www.blackspider.com


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: PATCH: displaying wide characters
  2005-10-26  9:17             ` Peter Stephenson
@ 2005-10-26 14:31               ` Mikael Magnusson
  2005-10-28 12:03                 ` Peter Stephenson
  0 siblings, 1 reply; 20+ messages in thread
From: Mikael Magnusson @ 2005-10-26 14:31 UTC (permalink / raw)
  To: Peter Stephenson; +Cc: zsh-workers

On 10/26/05, Peter Stephenson <pws@csr.com> wrote:
> Mikael Magnusson <mikachu@gmail.com> wrote:
> > Content-Type: text/plain; charset=ISO-2022-JP
>
> This doesn't display on my system.  Are you able to show it with
> UTF-8?

Hm, i have no idea why gmail decided to send that in another encoding,
maybe if i include some swedish characters then. (maybe you can do a
iconv -f ISO-2022-JP -t UTF-8 on it too).

åäö
This didn't fix the problem for me, but it is acting differently now.
Not sure how to describe it accurately, I'll paste a few lines where
all I've done is press delete at the first character:
% ちとしはきくま
% と しはきく
%   はきくま
% はきくま
% きくま
from this point they display correctly.

--
Mikael Magnusson

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: PATCH: displaying wide characters
  2005-10-26 14:31               ` Mikael Magnusson
@ 2005-10-28 12:03                 ` Peter Stephenson
  2005-10-28 17:19                   ` Mikael Magnusson
  0 siblings, 1 reply; 20+ messages in thread
From: Peter Stephenson @ 2005-10-28 12:03 UTC (permalink / raw)
  To: zsh-workers

Mikael Magnusson <mikachu@gmail.com> wrote:
> This didn't fix the problem for me, but it is acting differently now.
> Not sure how to describe it accurately, I'll paste a few lines where
> all I've done is press delete at the first character:

I've found a problem with deletions but for some reason it didn't show up
with the characters you were using.  Anyway, this code certainly fixes
something, so let's see if it's yours.

Much of the following may be simply paranoia; the last fix is the key one.
What happened before was we only overwrote part of the last displayed
character.

Index: Src/Zle/zle_refresh.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_refresh.c,v
retrieving revision 1.34
diff -u -r1.34 zle_refresh.c
--- Src/Zle/zle_refresh.c	25 Oct 2005 23:13:41 -0000	1.34
+++ Src/Zle/zle_refresh.c	28 Oct 2005 12:02:18 -0000
@@ -1069,9 +1069,22 @@
 /* 3: main display loop - write out the buffer using whatever tricks we can */
 
     for (;;) {
-	if (*nl && *ol && nl[1] == ol[1]) /* skip only if second chars match */
+	if (*nl && *ol && nl[1] == ol[1]) {
+	    /* skip only if second chars match */
+#ifdef ZLE_UNICODE_SUPPORT
+	    int ccs_was = ccs;
+#endif
 	/* skip past all matching characters */
 	    for (; *nl && (*nl == *ol); nl++, ol++, ccs++) ;
+#ifdef ZLE_UNICODE_SUPPORT
+	    /* Make sure ol and nl are pointing to real characters */
+	    while ((*nl == WEOF || *ol == WEOF) && ccs > ccs_was) {
+		nl--;
+		ol--;
+		ccs--;
+	    }
+#endif
+	}
 
 	if (!*nl) {
 	    if (ccs == winw && hasam && char_ins > 0 && ins_last
@@ -1125,7 +1138,8 @@
 
     /* inserting & deleting chars: we can if there's no right-prompt */
 	if ((ln || !put_rpmpt || !oput_rpmpt) 
-	    && (nl[1] && ol[1] && nl[1] != ol[1])) { 
+	    && (nl[1] && ol[1] && nl[1] != ol[1])
+	    && *ol != WEOF && *nl != WEOF) { 
 
 	/* deleting characters - see if we can find a match series that
 	   makes it cheaper to delete intermediate characters
@@ -1177,9 +1191,19 @@
 	}
     /* we can't do any fancy tricks, so just dump the single character
        and keep on trying */
-	zputc(*nl);
-	nl++, ol++;
-	ccs++, vcs++;
+#ifdef ZLE_UNICODE_SUPPORT
+	do {
+#endif
+	    zputc(*nl);
+	    nl++, ol++;
+	    ccs++, vcs++;
+#ifdef ZLE_UNICODE_SUPPORT
+	    /*
+	     * Make sure we always overwrite the complete width of
+	     * a character that was there before.
+	     */
+	} while (*ol == WEOF && *nl);
+#endif
     }
 }
 


-- 
Peter Stephenson <pws@csr.com>                  Software Engineer
CSR PLC, Churchill House, Cambridge Business Park, Cowley Road
Cambridge, CB4 0WZ, UK                          Tel: +44 (0)1223 692070


This message has been scanned for viruses by BlackSpider MailControl - www.blackspider.com


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: PATCH: displaying wide characters
  2005-10-28 12:03                 ` Peter Stephenson
@ 2005-10-28 17:19                   ` Mikael Magnusson
  0 siblings, 0 replies; 20+ messages in thread
From: Mikael Magnusson @ 2005-10-28 17:19 UTC (permalink / raw)
  To: Peter Stephenson; +Cc: zsh-workers

On 10/28/05, Peter Stephenson <pws@csr.com> wrote:
> Mikael Magnusson <mikachu@gmail.com> wrote:
> > This didn't fix the problem for me, but it is acting differently now.
> > Not sure how to describe it accurately, I'll paste a few lines where
> > all I've done is press delete at the first character:
>
> I've found a problem with deletions but for some reason it didn't show up
> with the characters you were using.  Anyway, this code certainly fixes
> something, so let's see if it's yours.
>
> Much of the following may be simply paranoia; the last fix is the key one.
> What happened before was we only overwrote part of the last displayed
> character.

This seems to have fixed my problem, I'll let you know if anything
strange happens.

--
Mikael Magnusson


^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2005-10-28 17:19 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-10-19 20:31 PATCH: displaying wide characters Peter Stephenson
2005-10-19 20:41 ` Peter Stephenson
2005-10-20  0:39   ` Mikael Magnusson
2005-10-20  9:39     ` Peter Stephenson
2005-10-20 15:02       ` Bart Schaefer
2005-10-21  9:03         ` Peter Stephenson
2005-10-21 14:39           ` Bart Schaefer
2005-10-21 22:29             ` Peter Stephenson
2005-10-24  8:40       ` Mikael Magnusson
2005-10-24  9:07         ` Peter Stephenson
2005-10-24 12:41           ` Mikael Magnusson
2005-10-24 12:47             ` Peter Stephenson
2005-10-24 16:53               ` Peter Stephenson
2005-10-25 23:11         ` Peter Stephenson
2005-10-26  5:19           ` Mikael Magnusson
2005-10-26  9:17             ` Peter Stephenson
2005-10-26 14:31               ` Mikael Magnusson
2005-10-28 12:03                 ` Peter Stephenson
2005-10-28 17:19                   ` Mikael Magnusson
2005-10-19 22:47 ` Peter Stephenson

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).