*** spam-stat.el.orig Sat Jan 10 11:33:03 2004 --- spam-stat.el Sat Jan 10 11:33:25 2004 *************** *** 171,176 **** --- 171,182 ---- :type 'number :group 'spam-stat) + (defcustom spam-treat-mime-function 'spam-treat-mime + "Function to treat MIME articles." + :group 'spam-stat + :type 'function) + + (defvar spam-stat-syntax-table (let ((table (copy-syntax-table text-mode-syntax-table))) (modify-syntax-entry ?- "w" table) *************** *** 461,466 **** --- 467,473 ---- (progn (set-buffer spam-stat-buffer) (goto-char (point-min)) + (spam-treat-article) (when (> (spam-stat-score-buffer) spam-stat-split-fancy-spam-threshhold) (when (boundp 'nnmail-split-trace) (mapc (lambda (entry) *************** *** 485,490 **** --- 492,498 ---- (setq count (1+ count)) (message "Reading %s: %.2f%%" dir (/ count max)) (insert-file-contents f) + (spam-treat-article) (funcall func) (erase-buffer)))))) *************** *** 503,519 **** (interactive) (hash-table-count spam-stat)) ! (defun spam-stat-test-directory (dir) "Test all the regular files in directory DIR for spam. If the result is 1.0, then all files are considered spam. ! If the result is 0.0, non of the files is considered spam. You can use this to determine error rates." (interactive "D") (let* ((files (directory-files dir t "^[^.]")) (total (length files)) (score 0.0); float (max (/ total 100.0)); float ! (count 0)) (with-temp-buffer (dolist (f files) (when (and (file-readable-p f) --- 511,531 ---- (interactive) (hash-table-count spam-stat)) ! (defun spam-stat-test-directory (dir &optional sp) "Test all the regular files in directory DIR for spam. If the result is 1.0, then all files are considered spam. ! If the result is 0.0, none of the files is considered spam. ! If SP eq spam, print the list of spam files. Otherwise if SP is non-nil ! print the non-spam files. You can use this to determine error rates." (interactive "D") (let* ((files (directory-files dir t "^[^.]")) (total (length files)) (score 0.0); float (max (/ total 100.0)); float ! (count 0) ! (spamf '()) ! (nspamf '())) (with-temp-buffer (dolist (f files) (when (and (file-readable-p f) *************** *** 523,531 **** (message "Reading %.2f%%, score %.2f%%" (/ count max) (/ score count)) (insert-file-contents f) (when (> (spam-stat-score-buffer) 0.9) ! (setq score (1+ score))) (erase-buffer)))) (message "Final score: %d / %d = %f" score total (/ score total)))) ;; Shrinking the dictionary --- 535,548 ---- (message "Reading %.2f%%, score %.2f%%" (/ count max) (/ score count)) (insert-file-contents f) + (push f nspamf) + (spam-treat-article) (when (> (spam-stat-score-buffer) 0.9) ! (setq score (1+ score)) ! (pop nspamf) ! (push f spamf)) (erase-buffer)))) + (when sp (print (if (equal sp "spam") spamf nspamf))) (message "Final score: %d / %d = %f" score total (/ score total)))) ;; Shrinking the dictionary *************** *** 562,567 **** --- 579,685 ---- (remove-hook 'gnus-select-article-hook 'spam-stat-store-gnus-article-buffer)) + (defun spam-treat-article () + "Treat the current buffer prior to spam analysis." + (interactive) + (spam-decode) + (let ((gnus-inhibit-treatment t) + (gnus-treatment-function-alist nil) + (gnus-article-image-alist nil) + (gnus-article-wash-types nil) + (gnus-article-buffer (current-buffer)) + (buffer-read-only nil)) + (when spam-treat-mime-function + (funcall spam-treat-mime-function)))) + + (defun spam-treat-mime (&optional ihandles) + "Treat MIME parts." + (save-excursion + (save-selected-window + (let ((window (get-buffer-window gnus-article-buffer)) + (point (point))) + (when window + (select-window window) + ;; We have to do this since selecting the window + ;; may change the point. So we set the window point. + (set-window-point window point))) + (let* ((handles (or ihandles + (mm-dissect-buffer nil gnus-article-loose-mime) + (and gnus-article-emulate-mime + (mm-uu-dissect)))) + buffer-read-only handle name type b e display) + (when (and (not ihandles) + (not gnus-displaying-mime)) + ;; Top-level call; we clean up. + (when gnus-article-mime-handles + (mm-destroy-parts gnus-article-mime-handles) + (setq gnus-article-mime-handle-alist nil));; A trick. + (setq gnus-article-mime-handles handles) + ;; We allow users to glean info from the handles. + (when gnus-article-mime-part-function + (gnus-mime-part-function handles))) + (if (and handles + (or (not (stringp (car handles))) + (cdr handles))) + (progn + (when (and (not ihandles) + (not gnus-displaying-mime)) + ;; Clean up for mime parts. + (article-goto-body) + (delete-region (point) (point-max))) + (let ((gnus-displaying-mime t)) + (spam-treat-parts handles)))))))) + + (defun spam-treat-parts (handle) + (if (stringp (car handle)) + (mapcar 'spam-treat-parts (cdr handle)) + (if (bufferp (car handle)) + (save-restriction + (narrow-to-region (point) (point)) + (when (let ((case-fold-search t)) + (string-match "text" (car (mm-handle-type handle)))) + (mm-insert-part handle)) + (goto-char (point-max))) + (mapcar 'spam-treat-parts handle)))) + + + (defun spam-decode () + "Translate a quoted-printable-encoded or base64 article." + (interactive) + (save-excursion + (let ((buffer-read-only nil) type charset) + (if (gnus-buffer-live-p (current-buffer)) + (with-current-buffer (current-buffer) + (setq type + (gnus-fetch-field "content-transfer-encoding")) + (let* ((ct (gnus-fetch-field "content-type")) + (ctl (and ct + (ignore-errors + (mail-header-parse-content-type ct))))) + (setq charset (and ctl + (mail-content-type-get ctl 'charset))) + (if (stringp charset) + (setq charset (intern (downcase charset))))))) + (unless charset + (setq charset gnus-newsgroup-charset)) + (if (and type (let ((case-fold-search t)) + (string-match "quoted-printable" type))) + (progn + (article-goto-body) + (quoted-printable-decode-region + (point) (point-max) (mm-charset-to-coding-system charset))) + (progn + (when (and type (let ((case-fold-search t)) + (string-match "base64" type))) + (article-goto-body) + (save-restriction + (narrow-to-region (point) (point-max)) + (ignore-errors (base64-decode-region (point-min) (point-max)) + (mm-decode-coding-region + (point-min) (point-max) + (mm-charset-to-coding-system charset)))))))))) + + (provide 'spam-stat) ;;; spam-stat.el ends here