Gnus development mailing list
 help / color / mirror / Atom feed
From: Andreas Seltenreich <andreas+ding@gate450.dyndns.org>
Subject: Re: nnweb + Gmane search
Date: Fri, 24 Feb 2006 01:49:57 +0100	[thread overview]
Message-ID: <874q2p1sy2.fsf@gate450.dyndns.org> (raw)
In-Reply-To: <slrndvsfu8.fc6.olly@msgid.survex.com> (Olly Betts's message of "Thu, 23 Feb 2006 23:04:22 +0000 (UTC)")

[-- Attachment #1: Type: text/plain, Size: 1056 bytes --]

Olly Betts writes:

> On 2006-02-23, Reiner Steib <reinersteib+gmane@imap.cc> wrote:
>> Please do, unless Olly isn't happy with the current output.  We can
>> easily adjust the URL in Gnus later if Olly doesn't want to include
>> FMT=nov in the default CGI script yet.  But it would be nice to have a
>> permanent URL.
>
> I'm happy with the output (apart from the extra newline at the end but
> that's a very minor issue).

Ok, I've attached a patch. Future changes in the output to anything
besides the Xref header should be transparent to the code. E.g., if
users wanted a "Newsgroups:" extra header, it should "just work" as
long as it is valid nov.

regards,
andreas

2006-02-24  Andreas Seltenreich  <uwi7@stud.uni-karlsruhe.de>

	* nnweb.el (nnweb-type-definition, nnweb-gmane-create-mapping,
        nnweb-gmane-wash-article, nnweb-gmane-search): Fix Gmane web
        groups.  Kudos to Olly Betts for providing NOV output on the
        server side.
        (nnweb-google-create-mapping): Update regexps and add some
        progress indication.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: nnweb.patch --]
[-- Type: text/x-patch, Size: 7042 bytes --]

Index: nnweb.el
===================================================================
RCS file: /usr/local/cvsroot/gnus/lisp/nnweb.el,v
retrieving revision 7.15
diff -c -r7.15 nnweb.el
*** nnweb.el	13 Feb 2006 13:32:28 -0000	7.15
--- nnweb.el	23 Feb 2006 19:39:02 -0000
***************
*** 27,35 ****
  
  ;; Note: You need to have `w3' installed for some functions to work.
  
- ;; FIXME: Due to changes in the HTML output of Gmane, stuff related to Gmane
- ;; web groups (`gnus-group-make-web-group') doesn't work anymore.
- 
  ;;; Code:
  
  (eval-when-compile (require 'cl))
--- 27,32 ----
***************
*** 82,88 ****
       (reference . identity)
       (map . nnweb-gmane-create-mapping)
       (search . nnweb-gmane-search)
!      (address . "http://gmane.org/")
       (identifier . nnweb-gmane-identity)))
    "Type-definition alist.")
  
--- 79,85 ----
       (reference . identity)
       (map . nnweb-gmane-create-mapping)
       (search . nnweb-gmane-search)
!      (address . "http://search.gmane.org/cgi-bin/omega.cgi")
       (identifier . nnweb-gmane-identity)))
    "Type-definition alist.")
  
***************
*** 407,412 ****
--- 404,410 ----
    (save-excursion
      (set-buffer nnweb-buffer)
      (erase-buffer)
+     (nnheader-message 7 "Searching google...")
      (when (funcall (nnweb-definition 'search) nnweb-search)
  	(let ((more t)
  	      (i 0))
***************
*** 417,431 ****
  	    (goto-char (point-min))
  	    (incf i 100)
  	    (if (or (not (re-search-forward
! 			  "<td><a href=\"\n\\([^>\"]+\\)\"><img src=\"/img/nav_next" nil t))
  		    (>= i nnweb-max-hits))
  		(setq more nil)
  	      ;; Yup, there are more articles
  	      (setq more (concat (nnweb-definition 'base) (match-string 1)))
  	    (when more
  	      (erase-buffer)
  	      (mm-url-insert more))))
  	  ;; Return the articles in the right order.
  	  (setq nnweb-articles
  		(sort nnweb-articles 'car-less-than-car))))))
  
--- 415,432 ----
  	    (goto-char (point-min))
  	    (incf i 100)
  	    (if (or (not (re-search-forward
! 			  "<a href=\"\n\\([^>\"]+\\)\"><img src=\"[^\"]+next"
! 			  nil t))
  		    (>= i nnweb-max-hits))
  		(setq more nil)
  	      ;; Yup, there are more articles
  	      (setq more (concat (nnweb-definition 'base) (match-string 1)))
  	    (when more
  	      (erase-buffer)
+ 	      (nnheader-message 7 "Searching google...(%d)" i)
  	      (mm-url-insert more))))
  	  ;; Return the articles in the right order.
+ 	  (nnheader-message 7 "Searching google...done")
  	  (setq nnweb-articles
  		(sort nnweb-articles 'car-less-than-car))))))
  
***************
*** 458,503 ****
    "Perform the search and create a number-to-url alist."
    (save-excursion
      (set-buffer nnweb-buffer)
!     (erase-buffer)
!     (when (funcall (nnweb-definition 'search) nnweb-search)
!       (let ((more t)
! 	    (case-fold-search t)
! 	    (active (or (cadr (assoc nnweb-group nnweb-group-alist))
! 			(cons 1 0)))
! 	    subject group url
! 	    map)
! 	  ;; Remove stuff from the beginning of results
! 	(goto-char (point-min))
! 	(search-forward "Search Results</h1><ul>" nil t)
! 	(delete-region (point-min) (point))
  	(goto-char (point-min))
! 	;; Iterate over the actual hits
! 	(while (re-search-forward ".*href=\"\\([^\"]+\\)\">\\(.*\\)" nil t)
! 	    (setq url (concat "http://gmane.org/" (match-string 1)))
! 	    (setq subject (match-string 2))
! 	  (unless (nnweb-get-hashtb url)
! 	    (push
! 	     (list
! 	      (incf (cdr active))
! 	      (make-full-mail-header
! 	       (cdr active) (concat  "(" group ") " subject) nil nil
! 	       nil nil 0 0 url))
! 	     map)
! 	    (nnweb-set-hashtb (cadar map) (car map))))
! 	;; Return the articles in the right order.
! 	(setq nnweb-articles
! 	      (sort (nconc nnweb-articles map) 'car-less-than-car))))))
  
  (defun nnweb-gmane-wash-article ()
    (let ((case-fold-search t))
      (goto-char (point-min))
!     (search-forward "<!--X-Head-of-Message-->" nil t)
!     (delete-region (point-min) (point))
!     (goto-char (point-min))
!     (while (looking-at "^<li><em>\\([^ ]+\\)</em>.*</li>")
!       (replace-match "\\1\\2" t)
!       (forward-line 1))
!     (mm-url-remove-markup)))
  
  (defun nnweb-gmane-search (search)
    (mm-url-insert
--- 459,519 ----
    "Perform the search and create a number-to-url alist."
    (save-excursion
      (set-buffer nnweb-buffer)
!     (let ((case-fold-search t)
! 	  (active (or (cadr (assoc nnweb-group nnweb-group-alist))
! 		      (cons 1 0)))
! 	  map)
!       (erase-buffer)
!       (nnheader-message 7 "Searching Gmane..." )
!       (when (funcall (nnweb-definition 'search) nnweb-search)
  	(goto-char (point-min))
! 	;; Skip the status line
! 	(forward-line 1)
! 	;; Thanks to Olly Betts we now have NOV lines in our buffer!
! 	(while (not (eobp))
! 	  (unless (eolp)
! 	    (let ((header (nnheader-parse-nov)))
! 	      (let ((xref (mail-header-xref header))
! 		    (from (mail-header-from header))
! 		    (subject (mail-header-subject header))
! 		    (rfc2047-encoding-type 'mime))
! 		(when (string-match " \\([^:]+\\):\\([0-9]+\\)" xref)
! 		  (mail-header-set-xref
! 		   header
! 		   (format "http://article.gmane.org/%s/%s/raw"
! 			   (match-string 1 xref)
! 			   (match-string 2 xref))))
! 
! 		;; Add host part to gmane-encrypted addresses
! 		(when (string-match "@$" from)
! 		  (mail-header-set-from header
! 					(concat from "public.gmane.org")))
! 
! 		(mail-header-set-subject header
! 					 (rfc2047-encode-string subject))
! 
! 		(unless (nnweb-get-hashtb (mail-header-xref header))
! 		  (push
! 		   (list
! 		    (incf (cdr active))
! 		    header)
! 		   map)
! 		  (nnweb-set-hashtb (cadar map) (car map))))))
! 	  (forward-line 1)))
!       (nnheader-message 7 "Searching Gmane...done")
!       (setq nnweb-articles
! 	    (sort (nconc nnweb-articles map) 'car-less-than-car)))))
  
  (defun nnweb-gmane-wash-article ()
    (let ((case-fold-search t))
      (goto-char (point-min))
!     (when (search-forward "<!--X-Head-of-Message-->" nil t)
!       (delete-region (point-min) (point))
!       (goto-char (point-min))
!       (while (looking-at "^<li><em>\\([^ ]+\\)</em>.*</li>")
! 	(replace-match "\\1\\2" t)
! 	(forward-line 1))
!       (mm-url-remove-markup))))
  
  (defun nnweb-gmane-search (search)
    (mm-url-insert
***************
*** 505,514 ****
      (nnweb-definition 'address)
      "?"
      (mm-url-encode-www-form-urlencoded
!      `(("query" . ,search)))))
    (setq buffer-file-name nil)
    t)
- 
  
  (defun nnweb-gmane-identity (url)
    "Return a unique identifier based on URL."
--- 521,533 ----
      (nnweb-definition 'address)
      "?"
      (mm-url-encode-www-form-urlencoded
!      `(("query" . ,search)
!        ("FMT" . "nov")
!        ("HITSPERPAGE" . ,(number-to-string nnweb-max-hits))))))
    (setq buffer-file-name nil)
+   (set-buffer-multibyte t)
+   (mm-decode-coding-region (point-min) (point-max) 'utf-8)
    t)
  
  (defun nnweb-gmane-identity (url)
    "Return a unique identifier based on URL."

  reply	other threads:[~2006-02-24  0:49 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-12-31  9:42 nnweb fix Andreas Seltenreich
2005-12-31 10:48 ` Andreas Seltenreich
2006-01-04  0:18   ` Reiner Steib
2006-01-04 12:25     ` Andreas Seltenreich
2006-01-27  8:35     ` Andreas Seltenreich
2006-01-30 15:08       ` Reiner Steib
2006-01-31  5:48         ` Andreas Seltenreich
2006-01-31 16:03           ` Reiner Steib
2006-01-31 17:06             ` Andreas Seltenreich
2006-02-03 13:20               ` Reiner Steib
2006-02-12  6:34                 ` Andreas Seltenreich
2006-02-13 13:35                   ` Reiner Steib
2006-02-08  5:56             ` nnweb + Gmane search (was: nnweb fix) Andreas Seltenreich
2006-02-08  7:56               ` Olly Betts
2006-02-09  4:47                 ` nnweb + Gmane search Andreas Seltenreich
2006-02-09 10:41                   ` Olly Betts
2006-02-09 12:28                     ` Olly Betts
2006-02-11  9:23                       ` Andreas Seltenreich
2006-02-13 10:23                         ` Olly Betts
2006-02-14 19:55                           ` Andreas Seltenreich
2006-02-23 17:53                             ` Reiner Steib
2006-02-23 23:04                               ` Olly Betts
2006-02-24  0:49                                 ` Andreas Seltenreich [this message]
2006-02-24 12:07                                 ` Olly Betts
2006-02-24 14:31                                   ` Reiner Steib
2006-02-24 15:34                                     ` Olly Betts
2006-02-24 21:58                                     ` Andreas Seltenreich
2006-02-24 23:24                                       ` Reiner Steib

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=874q2p1sy2.fsf@gate450.dyndns.org \
    --to=andreas+ding@gate450.dyndns.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).