zsh-workers
 help / color / mirror / code / Atom feed
From: Peter Stephenson <p.stephenson@samsung.com>
To: Zsh Hackers' List <zsh-workers@zsh.org>
Subject: PATCH: self-insert key binding for multibyte sequences
Date: Tue, 15 Dec 2015 12:25:56 +0000	[thread overview]
Message-ID: <20151215122556.0b6aa98b@pwslap01u.europe.root.pri> (raw)

This looks like it goes a long way to resolving the remaining problems
with dealing consistently with bindings for multibytes sequences.

What I haven't looked at is how much the remaining uses of getrestchar()
that work at a level higher than the key buffer need tweaking or are
needed at all.  If I do work it out getrestchar_keybuf may well
disappear inside zle_keymap.c as a static.

(UTF-8 in the following.  Not crucial to description.)

Top tip: on my GB keyboard, which isn't far from a basic English
language one, AltGr + i generates a right arrow, →, which has a
character in it that needs Meta.  This is useful for testing e.g. I'd
forgotten that keybuf despite having an explicit length was already
metafied.  In X03zlebindkey I've continued to use Mikael's Special
Character for meta effects since it looks nice.

pws

diff --git a/Src/Zle/zle_keymap.c b/Src/Zle/zle_keymap.c
index d6d116b..382eb8d 100644
--- a/Src/Zle/zle_keymap.c
+++ b/Src/Zle/zle_keymap.c
@@ -1449,6 +1449,104 @@ default_bindings(void)
 /*************************/
 /* reading key sequences */
 /*************************/
+/**/
+#ifdef MULTIBYTE_SUPPORT
+/*
+ * Get the remainder of a character if we support multibyte
+ * input strings.  It may not require any more input, but
+ * we haven't yet checked.  What's read in so far is available
+ * in keybuf; if we read more we will top keybuf up.
+ *
+ * This version is used when we are still resolving the input key stream
+ * into bindings.  Once that has been done this function shouldn't be
+ * used: instead, see getrestchar() in zle_main.c.
+ *
+ * This supports a self-insert binding at any stage of a key sequence.
+ * Typically we handle 8-bit characters by having only the first byte
+ * bound to self insert; then we immediately get here and read in as
+ * many further bytes as necessary.  However, it's possible that any set
+ * of bytes up to full character is bound to self-insert; then we get
+ * here later and read as much as possible, which could be a complete
+ * character, from keybuf before attempting further input.
+ *
+ * At the end of the process, the full multibyte character is available
+ * in keybuf, so the return value may be superfluous.
+ */
+
+/**/
+mod_export ZLE_INT_T
+getrestchar_keybuf(void)
+{
+    char c;
+    wchar_t outchar;
+    int inchar, timeout, bufind = 0, buflen = keybuflen;
+    static mbstate_t mbs;
+    size_t cnt;
+
+    /*
+     * We are guaranteed to set a valid wide last character,
+     * although it may be WEOF (which is technically not
+     * a wide character at all...)
+     */
+    lastchar_wide_valid = 1;
+    memset(&mbs, 0, sizeof mbs);
+
+    /*
+     * Return may be zero if we have a NULL; handle this like
+     * any other character.
+     */
+    while (1) {
+	if (bufind < buflen) {
+	    c = STOUC(keybuf[bufind++]);
+	    if (c == Meta) {
+		DPUTS(bufind == buflen, "Meta at end of keybuf");
+		c = STOUC(keybuf[bufind++]) ^ 32;
+	    }
+	} else {
+	    /*
+	     * Always apply KEYTIMEOUT to the remains of the input
+	     * character.  The parts of a multibyte character should
+	     * arrive together.  If we don't do this the input can
+	     * get stuck if an invalid byte sequence arrives.
+	     */
+	    inchar = getbyte(1L, &timeout);
+	    /* getbyte deliberately resets lastchar_wide_valid */
+	    lastchar_wide_valid = 1;
+	    if (inchar == EOF) {
+		memset(&mbs, 0, sizeof mbs);
+		if (timeout)
+		{
+		    /*
+		     * This case means that we got a valid initial byte
+		     * (since we tested for EOF above), but the followup
+		     * timed out.  This probably indicates a duff character.
+		     * Return a '?'.
+		     */
+		    lastchar = '?';
+		    return lastchar_wide = L'?';
+		}
+		else
+		    return lastchar_wide = WEOF;
+	    }
+	    c = inchar;
+	    addkeybuf(inchar);
+	}
+
+	cnt = mbrtowc(&outchar, &c, 1, &mbs);
+	if (cnt == MB_INVALID) {
+	    /*
+	     * Invalid input.  Hmm, what's the right thing to do here?
+	     */
+	    memset(&mbs, 0, sizeof mbs);
+	    return lastchar_wide = WEOF;
+	}
+	if (cnt != MB_INCOMPLETE)
+	    break;
+    }
+    return lastchar_wide = (ZLE_INT_T)outchar;
+}
+/**/
+#endif
 
 /* read a sequence of keys that is bound to some command in a keymap */
 
@@ -1504,15 +1602,8 @@ getkeymapcmd(Keymap km, Thingy *funcp, char **strp)
 #ifdef MULTIBYTE_SUPPORT
 	    if ((f == Th(z_selfinsert) || f == Th(z_selfinsertunmeta)) &&
 		!lastchar_wide_valid && !ispfx) {
-		int len;
-		VARARR(char, mbc, MB_CUR_MAX);
-		ZLE_INT_T inchar = getrestchar(lastchar, mbc, &len);
-		if (inchar != WEOF && len) {
-		    char *ptr = mbc;
-		    while (len--)
-			addkeybuf(STOUC(*ptr++));
-		    lastlen = keybuflen;
-		}
+		(void)getrestchar_keybuf();
+		lastlen = keybuflen;
 	    }
 #endif
 	}
diff --git a/Test/X03zlebindkey.ztst b/Test/X03zlebindkey.ztst
index 70c42f9..e6fead5 100644
--- a/Test/X03zlebindkey.ztst
+++ b/Test/X03zlebindkey.ztst
@@ -1,13 +1,12 @@
 # Tests of the vi mode of ZLE
 
 %prep
-  mb_ok=
+  ZSH_TEST_LANG=
   langs=(en_{US,GB}.{UTF-,utf}8 en.UTF-8
 	 $(locale -a 2>/dev/null | egrep 'utf8|UTF-8'))
   for LANG in $langs; do
     if [[ é = ? ]]; then
       ZSH_TEST_LANG=$LANG
-      mb_ok=1
       break;
     fi
   done
@@ -29,7 +28,7 @@
 >BUFFER: foo
 >CURSOR: 3
 
-  if [[ -z $mb_ok ]]; then
+  if [[ -z $ZSH_TEST_LANG ]]; then
     ZTST_skip="bindkey multibyte test skipped"
   else
     zpty_run 'alias unbind="bindkey -r ホ"'
@@ -37,9 +36,17 @@
     zletest 'ホ'
     zpty_run unbind
     zletest 'ホ'
+    zpty_run 'bindkey ホ self-insert'
+    zletest 'ホ'
+    zpty_run unbind
+    zletest 'ホ'
   fi
 0:bindkey -s multibyte characters
 >BUFFER: bar
 >CURSOR: 3
 >BUFFER: ホ
 >CURSOR: 1
+>BUFFER: ホ
+>CURSOR: 1
+>BUFFER: ホ
+>CURSOR: 1


                 reply	other threads:[~2015-12-15 12:26 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20151215122556.0b6aa98b@pwslap01u.europe.root.pri \
    --to=p.stephenson@samsung.com \
    --cc=zsh-workers@zsh.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).