From mboxrd@z Thu Jan 1 00:00:00 1970 X-Msuck: nntp://news.gmane.io/gmane.emacs.gnus.general/66251 Path: news.gmane.org!not-for-mail From: Miles Bader Newsgroups: gmane.emacs.gnus.general Subject: Emacs unicode merge changes to Gnus Date: Tue, 05 Feb 2008 21:30:32 -0500 Message-ID: <61r6fqbyuv.fsf@fencepost.gnu.org> NNTP-Posting-Host: lo.gmane.org Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii X-Trace: ger.gmane.org 1202265109 20638 80.91.229.12 (6 Feb 2008 02:31:49 GMT) X-Complaints-To: usenet@ger.gmane.org NNTP-Posting-Date: Wed, 6 Feb 2008 02:31:49 +0000 (UTC) To: ding@gnus.org Original-X-From: ding-owner+M14742@lists.math.uh.edu Wed Feb 06 03:32:11 2008 Return-path: Envelope-to: ding-account@gmane.org Original-Received: from util0.math.uh.edu ([129.7.128.18]) by lo.gmane.org with esmtp (Exim 4.50) id 1JMa51-0003qK-74 for ding-account@gmane.org; Wed, 06 Feb 2008 03:32:07 +0100 Original-Received: from localhost ([127.0.0.1] helo=lists.math.uh.edu) by util0.math.uh.edu with smtp (Exim 4.63) (envelope-from ) id 1JMa3g-0000dJ-G7; Tue, 05 Feb 2008 20:30:44 -0600 Original-Received: from mx1.math.uh.edu ([129.7.128.32]) by util0.math.uh.edu with esmtps (TLSv1:AES256-SHA:256) (Exim 4.63) (envelope-from ) id 1JMa3e-0000d2-PG for ding@lists.math.uh.edu; Tue, 05 Feb 2008 20:30:42 -0600 Original-Received: from quimby.gnus.org ([80.91.231.51]) by mx1.math.uh.edu with esmtp (Exim 4.67) (envelope-from ) id 1JMa3X-0001k5-At for ding@lists.math.uh.edu; Tue, 05 Feb 2008 20:30:42 -0600 Original-Received: from fencepost.gnu.org ([140.186.70.10]) by quimby.gnus.org with esmtp (Exim 3.35 #1 (Debian)) id 1JMa3a-0000pb-00 for ; Wed, 06 Feb 2008 03:30:38 +0100 Original-Received: from miles by fencepost.gnu.org with local (Exim 4.67) (envelope-from ) id 1JMa3U-00064K-Ir; Tue, 05 Feb 2008 21:30:32 -0500 System-Type: x86_64-unknown-linux-gnu Blat: Foop X-Spam-Score: -2.6 (--) List-ID: Precedence: bulk Xref: news.gmane.org gmane.emacs.gnus.general:66251 Archived-At: I am merging the Emacs trunk changes to Gnus, and as the Emacs unicode branch was merged to the trunk, some rather old changes from that branch have shown up. I've appended a patch file below; let me know if you see anything suspicious. [I eliminated some changes to contrib/ (unicode branch changes to sendmail.el, deletion of ucs-tables.el), because as I understand it those files are intended for people with old versions of emacs, so changes specific to new versions of Emacs should not be applied to them.] Thanks, -Miles 2008-02-01 Zhang Wei * rfc2047.el (rfc2047-charset-encoding-alist): Add gbk and GB18030. * mm-util.el (mm-mime-mule-charset-alist): Add gbk and GB18030. 2008-02-01 Miles Bader * mml.el (mml-parse-1): Remove apparently vestigial use of `mm-hack-charsets'. * mm-bodies.el (mm-encode-body): Likewise. 2008-02-01 Kenichi Handa * rfc2104.el (rfc2104-hexstring-to-byte-list): Renamed from rfc2104-hexstring-to-bitstring and changed to return a byte list. (rfc2104-hash): Convert the result of concat to unibyte string. 2008-02-01 Dave Love * gnus-start.el (gnus-read-newsrc-el-file): Don't bind coding-system-for-read. (gnus-gnus-to-quick-newsrc-format): Insert coding cookie. * mm-util.el (mm-hack-charsets, mm-iso-8859-15-compatible) (mm-iso-8859-x-to-15-table, mm-iso-8859-x-to-15-region): Deleted. (mm-find-mime-charset-region): Remove hack-charsets stuff. M {arch}/=tagline-rules M lisp/ChangeLog M lisp/mm-bodies.el M lisp/mml.el M lisp/mm-util.el M lisp/rfc2047.el M lisp/rfc2104.el M lisp/gnus-start.el --- orig/lisp/ChangeLog +++ mod/lisp/ChangeLog @@ -1,3 +1,31 @@ +2008-02-01 Zhang Wei + + * rfc2047.el (rfc2047-charset-encoding-alist): Add gbk and GB18030. + + * mm-util.el (mm-mime-mule-charset-alist): Add gbk and GB18030. + +2008-02-01 Miles Bader + + * mml.el (mml-parse-1): Remove apparently vestigial use of + `mm-hack-charsets'. + * mm-bodies.el (mm-encode-body): Likewise. + +2008-02-01 Kenichi Handa + + * rfc2104.el (rfc2104-hexstring-to-byte-list): Renamed from + rfc2104-hexstring-to-bitstring and changed to return a byte list. + (rfc2104-hash): Convert the result of concat to unibyte string. + +2008-02-01 Dave Love + + * gnus-start.el (gnus-read-newsrc-el-file): Don't bind + coding-system-for-read. + (gnus-gnus-to-quick-newsrc-format): Insert coding cookie. + + * mm-util.el (mm-hack-charsets, mm-iso-8859-15-compatible) + (mm-iso-8859-x-to-15-table, mm-iso-8859-x-to-15-region): Deleted. + (mm-find-mime-charset-region): Remove hack-charsets stuff. + 2008-02-03 Reiner Steib * gnus.el (gnus-group-startup-message): Add `find-image' call before --- orig/lisp/gnus-start.el +++ mod/lisp/gnus-start.el @@ -463,6 +463,8 @@ ;;; Internal variables +;; Fixme: deal with old emacs-mule when mm-universal-coding-system is +;; utf-8-emacs. (defvar gnus-ding-file-coding-system mm-universal-coding-system "Coding system for ding file.") @@ -2404,8 +2406,7 @@ ;; We always, always read the .eld file. (gnus-message 5 "Reading %s..." ding-file) (let (gnus-newsrc-assoc) - (let ((coding-system-for-read gnus-ding-file-coding-system)) - (gnus-load ding-file)) + (gnus-load ding-file) ;; Older versions of `gnus-format-specs' are no longer valid ;; in Oort Gnus 0.01. (let ((version @@ -2837,7 +2838,8 @@ (defun gnus-gnus-to-quick-newsrc-format (&optional minimal name &rest specific-variables) "Print Gnus variables such as `gnus-newsrc-alist' in Lisp format." - (princ ";; -*- emacs-lisp -*-\n") + (princ (format ";; -*- mode:emacs-lisp; coding: %s; -*-\n" + gnus-ding-file-coding-system)) (if name (princ (format ";; %s\n" name)) (princ ";; Gnus startup file.\n")) --- orig/lisp/mm-bodies.el +++ mod/lisp/mm-bodies.el @@ -104,8 +104,7 @@ (mm-charset-to-coding-system charset)) charset) (goto-char (point-min)) - (let ((charsets (mm-find-mime-charset-region (point-min) (point-max) - mm-hack-charsets))) + (let ((charsets (mm-find-mime-charset-region (point-min) (point-max)))) (cond ;; No encoding. ((null charsets) --- orig/lisp/mm-util.el +++ mod/lisp/mm-util.el @@ -492,6 +492,10 @@ (iso-2022-jp latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978) (euc-kr korean-ksc5601) (gb2312 chinese-gb2312) + (gbk chinese-gbk) + (gb18030 gb18030-2-byte + gb18030-4-byte-bmp gb18030-4-byte-smp + gb18030-4-byte-ext-1 gb18030-4-byte-ext-2) (big5 chinese-big5-1 chinese-big5-2) (tibetan tibetan) (thai-tis620 thai-tis620) @@ -560,7 +564,7 @@ cs mime mule alist) (while css (setq cs (pop css) - mime (or (coding-system-get cs :mime-charset) ; Emacs 23 (unicode) + mime (or (coding-system-get cs :mime-charset); Emacs 23 (unicode) (coding-system-get cs 'mime-charset))) (when (and mime (not (eq t (setq mule @@ -569,36 +573,6 @@ (push (cons mime (delq 'ascii mule)) alist))) (setq mm-mime-mule-charset-alist (nreverse alist))))) -(defvar mm-hack-charsets '(iso-8859-15 iso-2022-jp-2) - "A list of special charsets. -Valid elements include: -`iso-8859-15' convert ISO-8859-1, -9 to ISO-8859-15 if ISO-8859-15 exists. -`iso-2022-jp-2' convert ISO-2022-jp to ISO-2022-jp-2 if ISO-2022-jp-2 exists." -) - -(defvar mm-iso-8859-15-compatible - '((iso-8859-1 "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE") - (iso-8859-9 "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE\xD0\xDD\xDE\xF0\xFD\xFE")) - "ISO-8859-15 exchangeable coding systems and inconvertible characters.") - -(defvar mm-iso-8859-x-to-15-table - (and (fboundp 'coding-system-p) - (mm-coding-system-p 'iso-8859-15) - (mapcar - (lambda (cs) - (if (mm-coding-system-p (car cs)) - (let ((c (string-to-char - (decode-coding-string "\341" (car cs))))) - (cons (char-charset c) - (cons - (- (string-to-char - (decode-coding-string "\341" 'iso-8859-15)) c) - (string-to-list (decode-coding-string (car (cdr cs)) - (car cs)))))) - '(gnus-charset 0))) - mm-iso-8859-15-compatible)) - "A table of the difference character between ISO-8859-X and ISO-8859-15.") - (defcustom mm-coding-system-priorities (if (boundp 'current-language-environment) (let ((lang (symbol-value 'current-language-environment))) @@ -852,27 +826,6 @@ default-enable-multibyte-characters t))) -(defun mm-iso-8859-x-to-15-region (&optional b e) - (if (fboundp 'char-charset) - (let (charset item c inconvertible) - (save-restriction - (if e (narrow-to-region b e)) - (goto-char (point-min)) - (skip-chars-forward "\0-\177") - (while (not (eobp)) - (cond - ((not (setq item (assq (char-charset (setq c (char-after))) - mm-iso-8859-x-to-15-table))) - (forward-char)) - ((memq c (cdr (cdr item))) - (setq inconvertible t) - (forward-char)) - (t - (insert-before-markers (prog1 (+ c (car (cdr item))) - (delete-char 1))))) - (skip-chars-forward "\0-\177"))) - (not inconvertible)))) - (defun mm-sort-coding-systems-predicate (a b) (let ((priorities (mapcar (lambda (cs) @@ -1012,32 +965,14 @@ ;; Otherwise, we'll get nil, and the next setq will get invoked. (setq charsets (mm-xemacs-find-mime-charset b e)) + ;; Fixme: won't work for unibyte Emacs 23: + ;; We're not multibyte, or a single coding system won't cover it. (setq charsets (mm-delete-duplicates (mapcar 'mm-mime-charset (delq 'ascii (mm-find-charset-region b e)))))) - (if (and (> (length charsets) 1) - (memq 'iso-8859-15 charsets) - (memq 'iso-8859-15 hack-charsets) - (save-excursion (mm-iso-8859-x-to-15-region b e))) - (dolist (x mm-iso-8859-15-compatible) - (setq charsets (delq (car x) charsets)))) - (if (and (memq 'iso-2022-jp-2 charsets) - (memq 'iso-2022-jp-2 hack-charsets)) - (setq charsets (delq 'iso-2022-jp charsets))) - ;; Attempt to reduce the number of charsets if utf-8 is available. - (if (and (featurep 'xemacs) - (> (length charsets) 1) - (mm-coding-system-p 'utf-8)) - (let ((mm-coding-system-priorities - (cons 'utf-8 mm-coding-system-priorities))) - (setq charsets - (mm-delete-duplicates - (mapcar 'mm-mime-charset - (delq 'ascii - (mm-find-charset-region b e))))))) charsets)) (defmacro mm-with-unibyte-buffer (&rest forms) --- orig/lisp/mml.el +++ mod/lisp/mml.el @@ -284,8 +284,7 @@ (list (intern (downcase (cdr (assq 'charset tag)))))) (t - (mm-find-mime-charset-region point (point) - mm-hack-charsets)))) + (mm-find-mime-charset-region point (point))))) (when (and (not raw) (memq nil charsets)) (if (or (memq 'unknown-encoding mml-confirmation-set) (message-options-get 'unknown-encoding) --- orig/lisp/rfc2047.el +++ mod/lisp/rfc2047.el @@ -78,6 +78,8 @@ (iso-2022-jp . B) (iso-2022-kr . B) (gb2312 . B) + (gbk . B) + (gb18030 . B) (big5 . B) (cn-big5 . B) (cn-gb . B) @@ -344,7 +346,7 @@ ;; (make-char-table 'syntax-table '(2)) only works in Emacs. (let ((table (make-syntax-table))) ;; The following is done to work for setting all elements of the table - ;; in Emacs 21 and 22 and XEmacs; it appears to be the cleanest way. + ;; in Emacs 21-23 and XEmacs; it appears to be the cleanest way. ;; Play safe and don't assume the form of the word syntax entry -- ;; copy it from ?a. (if (fboundp 'set-char-table-range) ; Emacs --- orig/lisp/rfc2104.el +++ mod/lisp/rfc2104.el @@ -53,6 +53,7 @@ ;;; 1999-10-23 included in pgnus ;;; 2000-08-15 `rfc2104-hexstring-to-bitstring' ;;; 2000-05-12 added sha-1 example, added test case reference +;;; 2003-11-13 change rfc2104-hexstring-to-bitstring to ...-byte-list ;;; Code: @@ -88,12 +89,12 @@ (rfc2104-hex-to-int (reverse (append str nil)))) 0)) -(defun rfc2104-hexstring-to-bitstring (str) +(defun rfc2104-hexstring-to-byte-list (str) (let (out) (while (< 0 (length str)) (push (rfc2104-hex-to-int (substring str -2)) out) (setq str (substring str 0 -2))) - (concat out))) + out)) (defun rfc2104-hash (hash block-length hash-length key text) (let* (;; if key is longer than B, reset it to HASH(key) @@ -110,9 +111,12 @@ (setq k_ipad (mapcar (lambda (c) (logxor c rfc2104-ipad)) k_ipad)) (setq k_opad (mapcar (lambda (c) (logxor c rfc2104-opad)) k_opad)) ;; perform outer hash - (funcall hash (concat k_opad (rfc2104-hexstring-to-bitstring - ;; perform inner hash - (funcall hash (concat k_ipad text))))))) + (funcall hash + (encode-coding-string + (concat k_opad (rfc2104-hexstring-to-byte-list + ;; perform inner hash + (funcall hash (concat k_ipad text)))) + 'iso-latin-1)))) (provide 'rfc2104)