Gnus development mailing list
 help / color / mirror / Atom feed
* Re: sanitized mm-string-to-multibyte
       [not found] <8763jkg7cu.fsf@liv.ac.uk>
@ 2009-02-09 10:28 ` Katsumi Yamaoka
  2009-02-09 23:27   ` Dave Love
  0 siblings, 1 reply; 3+ messages in thread
From: Katsumi Yamaoka @ 2009-02-09 10:28 UTC (permalink / raw)
  To: Dave Love; +Cc: bugs, ding

>>>>> Dave Love wrote:
> I found some IMAP messages were crashing Emacs, and I was led to
> mm-string-to-multibyte.  I'm not sure exactly what the crash was due to,
> but the function isn't very sane in Emacs 21.  This version doesn't cons
> a string for each character.  Various uses of the function are at least
> dubious, and I'll send patches later.  mm-with-preserved-unibyte is
> useful for those changes and elsewhere.

> 2009-02-08  Dave Love  <fx@gnu.org>

> 	* mm-util.el (mm-identity-nat, mm-with-preserved-unibyte): New.
> 	(mm-string-to-multibyte): Use them.

Your version of `mm-string-to-multibyte' doesn't seem to convert
a unibyte string to a multibyte string.  In Emacs 21.1~21.4 I got:

(let* ((s1 (string-as-unibyte "a"))
       (s2 (mm-with-preserved-unibyte (string-make-multibyte s1))))
  (list (multibyte-string-p s1) (multibyte-string-p s2)))
 => (nil nil)

(let* ((s1 (string-as-multibyte "a"))
       (s2 (mm-with-preserved-unibyte (string-make-multibyte s1))))
  (list (multibyte-string-p s1) (multibyte-string-p s2)))
 => (t t)

Did I miss something?

> Index: mm-util.el
> ===================================================================
> RCS file: /usr/local/cvsroot/gnus/lisp/mm-util.el,v
> retrieving revision 7.91
> diff -u -r7.91 mm-util.el
> --- mm-util.el	14 Jan 2009 00:52:01 -0000	7.91
> +++ mm-util.el	8 Feb 2009 17:27:12 -0000
> @@ -202,6 +202,22 @@
>      (defalias 'mm-decode-coding-region 'decode-coding-region)
>      (defalias 'mm-encode-coding-region 'encode-coding-region)))

> +(defconst mm-identity-nat (let (l)
> +			    (dotimes (i 256)
> +			      (push (cons i i) l))
> +			    (make-translation-table l))
> +  "Translation table that applies the identity trasnlation.")
> +
> +(defmacro mm-with-preserved-unibyte (&rest body)
> +  "Execute BODY forms while preserving unibyte characters.
> +Such characters are not converted automatically to multibyte ones
> +when, for instance, inserted into a multibyte buffer within the
> +BODY forms."
> +  `(let ((nonascii-translation-table mm-identity-nat))
> +     ,@body))
> +(put 'mm-with-preserved-unibyte 'lisp-indent-function 0)
> +(put 'mm-with-preserved-unibyte 'edebug-form-spec '(body))
> +
>  ;; `string-to-multibyte' is available only in Emacs 22.1 or greater.
>  (defalias 'mm-string-to-multibyte
>    (cond
> @@ -210,11 +226,8 @@
>     ((fboundp 'string-to-multibyte)
>      'string-to-multibyte)
>     (t
> -    (lambda (string)
> -      "Return a multibyte string with the same individual chars as STRING."
> -      (mapconcat
> -       (lambda (ch) (mm-string-as-multibyte (char-to-string ch)))
> -       string "")))))
> +    (lambda (s)
> +      (mm-with-preserved-unibyte (string-make-multibyte s))))))

>  ;; `char-or-char-int-p' is an XEmacs function, not available in Emacs.
>  (eval-and-compile



^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: sanitized mm-string-to-multibyte
  2009-02-09 10:28 ` sanitized mm-string-to-multibyte Katsumi Yamaoka
@ 2009-02-09 23:27   ` Dave Love
  2009-02-10  0:12     ` Katsumi Yamaoka
  0 siblings, 1 reply; 3+ messages in thread
From: Dave Love @ 2009-02-09 23:27 UTC (permalink / raw)
  To: Katsumi Yamaoka; +Cc: bugs, ding

Katsumi Yamaoka <yamaoka@jpl.org> writes:

> Your version of `mm-string-to-multibyte' doesn't seem to convert
> a unibyte string to a multibyte string.  In Emacs 21.1~21.4 I got:
>
> (let* ((s1 (string-as-unibyte "a"))
>        (s2 (mm-with-preserved-unibyte (string-make-multibyte s1))))
>   (list (multibyte-string-p s1) (multibyte-string-p s2)))
>  => (nil nil)
>
> (let* ((s1 (string-as-multibyte "a"))
>        (s2 (mm-with-preserved-unibyte (string-make-multibyte s1))))
>   (list (multibyte-string-p s1) (multibyte-string-p s2)))
>  => (t t)
>
> Did I miss something?

I think it doesn't cons a new string in the trivial case like that, when
it won't matter.  Use this version if you want always to cons a
multibyte string.

Index: mm-util.el
===================================================================
RCS file: /usr/local/cvsroot/gnus/lisp/mm-util.el,v
retrieving revision 7.91
diff -u -r7.91 mm-util.el
--- mm-util.el	14 Jan 2009 00:52:01 -0000	7.91
+++ mm-util.el	9 Feb 2009 23:26:20 -0000
@@ -202,6 +202,22 @@
     (defalias 'mm-decode-coding-region 'decode-coding-region)
     (defalias 'mm-encode-coding-region 'encode-coding-region)))
 
+(defconst mm-identity-nat (let (l)
+			    (dotimes (i 256)
+			      (push (cons i i) l))
+			    (make-translation-table l))
+  "Non-ASCII translation table that applies the identity translation.")
+
+(defmacro mm-with-preserved-unibyte (&rest body)
+  "Execute BODY forms while preserving unibyte characters.
+Such characters are not converted automatically to multibyte ones
+when, for instance, inserted into a multibyte buffer within the
+BODY forms."
+  `(let ((nonascii-translation-table mm-identity-nat))
+     ,@body))
+(put 'mm-with-preserved-unibyte 'lisp-indent-function 0)
+(put 'mm-with-preserved-unibyte 'edebug-form-spec '(body))
+
 ;; `string-to-multibyte' is available only in Emacs 22.1 or greater.
 (defalias 'mm-string-to-multibyte
   (cond
@@ -210,11 +226,9 @@
    ((fboundp 'string-to-multibyte)
     'string-to-multibyte)
    (t
-    (lambda (string)
-      "Return a multibyte string with the same individual chars as STRING."
-      (mapconcat
-       (lambda (ch) (mm-string-as-multibyte (char-to-string ch)))
-       string "")))))
+    (lambda (s)
+      (mm-with-preserved-unibyte
+	(concat s (eval-when-compile (string-as-multibyte ""))))))))
 
 ;; `char-or-char-int-p' is an XEmacs function, not available in Emacs.
 (eval-and-compile



^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: sanitized mm-string-to-multibyte
  2009-02-09 23:27   ` Dave Love
@ 2009-02-10  0:12     ` Katsumi Yamaoka
  0 siblings, 0 replies; 3+ messages in thread
From: Katsumi Yamaoka @ 2009-02-10  0:12 UTC (permalink / raw)
  To: Dave Love; +Cc: bugs, ding

>>>>> Dave Love wrote:
> Katsumi Yamaoka <yamaoka@jpl.org> writes:

>> Your version of `mm-string-to-multibyte' doesn't seem to convert
>> a unibyte string to a multibyte string.  In Emacs 21.1~21.4 I got:
>>
>> (let* ((s1 (string-as-unibyte "a"))
>>        (s2 (mm-with-preserved-unibyte (string-make-multibyte s1))))
>>   (list (multibyte-string-p s1) (multibyte-string-p s2)))
>>  => (nil nil)
>>
>> (let* ((s1 (string-as-multibyte "a"))
>>        (s2 (mm-with-preserved-unibyte (string-make-multibyte s1))))
>>   (list (multibyte-string-p s1) (multibyte-string-p s2)))
>>  => (t t)
>>
>> Did I miss something?

> I think it doesn't cons a new string in the trivial case like that, when
> it won't matter.  Use this version if you want always to cons a
> multibyte string.

I verified how (eval-when-compile (string-as-multibyte ""))
behaves with this test.el file:

(defun test ()
  (let ((s1 "")
	(s2 (eval-when-compile (string-as-multibyte "")))
	(s3 (eval-when-compile (string-as-unibyte ""))))
    (message "%s %s %s"
	     (multibyte-string-p s1)
	     (multibyte-string-p s2)
	     (multibyte-string-p s3))))
(test)

$ emacs-21.4 -batch -q -l ./test.el
 => nil t nil
$ emacs-21.4 -batch -q -f batch-byte-compile ./test.el
$ emacs-21.4 -batch -q -l ./test.elc
 => nil nil nil

So there seems to be no difference in those ""s in the byte
compiled file.

> Index: mm-util.el
> ===================================================================
> RCS file: /usr/local/cvsroot/gnus/lisp/mm-util.el,v
> retrieving revision 7.91
> diff -u -r7.91 mm-util.el
> --- mm-util.el	14 Jan 2009 00:52:01 -0000	7.91
> +++ mm-util.el	9 Feb 2009 23:26:20 -0000
> @@ -202,6 +202,22 @@
>      (defalias 'mm-decode-coding-region 'decode-coding-region)
>      (defalias 'mm-encode-coding-region 'encode-coding-region)))

> +(defconst mm-identity-nat (let (l)
> +			    (dotimes (i 256)
> +			      (push (cons i i) l))
> +			    (make-translation-table l))
> +  "Non-ASCII translation table that applies the identity translation.")
> +
> +(defmacro mm-with-preserved-unibyte (&rest body)
> +  "Execute BODY forms while preserving unibyte characters.
> +Such characters are not converted automatically to multibyte ones
> +when, for instance, inserted into a multibyte buffer within the
> +BODY forms."
> +  `(let ((nonascii-translation-table mm-identity-nat))
> +     ,@body))
> +(put 'mm-with-preserved-unibyte 'lisp-indent-function 0)
> +(put 'mm-with-preserved-unibyte 'edebug-form-spec '(body))
> +
>  ;; `string-to-multibyte' is available only in Emacs 22.1 or greater.
>  (defalias 'mm-string-to-multibyte
>    (cond
> @@ -210,11 +226,9 @@
>     ((fboundp 'string-to-multibyte)
>      'string-to-multibyte)
>     (t
> -    (lambda (string)
> -      "Return a multibyte string with the same individual chars as STRING."
> -      (mapconcat
> -       (lambda (ch) (mm-string-as-multibyte (char-to-string ch)))
> -       string "")))))
> +    (lambda (s)
> +      (mm-with-preserved-unibyte
> +	(concat s (eval-when-compile (string-as-multibyte ""))))))))

>  ;; `char-or-char-int-p' is an XEmacs function, not available in Emacs.
>  (eval-and-compile



^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2009-02-10  0:12 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <8763jkg7cu.fsf@liv.ac.uk>
2009-02-09 10:28 ` sanitized mm-string-to-multibyte Katsumi Yamaoka
2009-02-09 23:27   ` Dave Love
2009-02-10  0:12     ` Katsumi Yamaoka

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).