Gnus development mailing list
 help / color / mirror / Atom feed
From: Andreas Seltenreich <andreas+ding@gate450.dyndns.org>
To: ding@gnus.org
Subject: Re: Using nnrss very actively
Date: Fri, 17 Oct 2008 16:00:34 +0200	[thread overview]
Message-ID: <87hc7bcoe5.fsf@gate450.dyndns.org> (raw)
In-Reply-To: <m3myh4cvga.fsf@quimbies.gnus.org> (Lars Magne Ingebrigtsen's message of "Thu, 16 Oct 2008 19:15:49 +0200")

Lars Magne Ingebrigtsen writes:

> But can't Atom support just be folded into nnrss?  The difference
> between Atom and RSS is kinda...  not extremely big, if I recall
> correctly. 

I tried the folding approach about a year ago but didn't like the
result[1]. The difference is, IIRC, that Atom allows for more
sophisticated content structure, and for nnrss to support it properly,
it would have to create articles with arbitrary MIME layout instead of
just the plain text/html multipart/alternative articles. So, while the
attached patch worked tolerable with all atom feeds I threw at it, it
isn't a proper solution, which would require a more invasive rewrite.

regards,
andreas

Footnotes: 
[1]  

Index: nnrss.el
===================================================================
RCS file: /usr/local/cvsroot/gnus/lisp/nnrss.el,v
retrieving revision 7.59
diff -c -r7.59 nnrss.el
*** nnrss.el	11 Jun 2008 14:19:44 -0000	7.59
--- nnrss.el	17 Oct 2008 13:21:49 -0000
***************
*** 696,704 ****
         nnrss-file-coding-system))
  
  (defun nnrss-check-group (group server)
!   (let (file xml subject url extra changed author date feed-subject
! 	     enclosure comments rss-ns rdf-ns content-ns dc-ns
! 	     hash-index)
      (if (and nnrss-use-local
  	     (file-exists-p (setq file (expand-file-name
  					(nnrss-translate-file-chars
--- 696,702 ----
         nnrss-file-coding-system))
  
  (defun nnrss-check-group (group server)
!   (let (file xml url changed)
      (if (and nnrss-use-local
  	     (file-exists-p (setq file (expand-file-name
  					(nnrss-translate-file-chars
***************
*** 720,732 ****
  	    (push (list group nnrss-group-max url) nnrss-server-data)))
  	(setq changed t))
        (setq xml (nnrss-fetch url)))
!     ;; See
!     ;; http://feeds.archive.org/validator/docs/howto/declare_namespaces.html
!     ;; for more RSS namespaces.
!     (setq dc-ns (nnrss-get-namespace-prefix xml "http://purl.org/dc/elements/1.1/")
! 	  rdf-ns (nnrss-get-namespace-prefix xml "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
! 	  rss-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/")
! 	  content-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/modules/content/"))
      (dolist (item (nreverse (nnrss-find-el (intern (concat rss-ns "item")) xml)))
        (when (and (listp item)
  		 (string= (concat rss-ns "item") (car item))
--- 718,747 ----
  	    (push (list group nnrss-group-max url) nnrss-server-data)))
  	(setq changed t))
        (setq xml (nnrss-fetch url)))
!     (let ((changed nnrss-group-data))
!       (cond ((nnrss-rss-p xml) (nnrss-check-group-rss xml))
! 	    ((nnrss-atom-p xml) (nnrss-check-group-atom xml)))
!       (unless (eq changed nnrss-group-data)
! 	(nnrss-save-group-data group server)
! 	(let ((pair (assoc group nnrss-server-data)))
! 	  (if pair
! 	      (setcar (cdr pair) nnrss-group-max)
! 	    (push (list group nnrss-group-max) nnrss-server-data)))
! 	(nnrss-save-server-data server)))))
! 
! (defun nnrss-check-group-rss (xml)
!   ;; See
!   ;; http://feeds.archive.org/validator/docs/howto/declare_namespaces.html
!   ;; for more RSS namespaces.
!   (let ((dc-ns (nnrss-get-namespace-prefix xml "http://purl.org/dc/elements/1.1/"))
! 	(rdf-ns (nnrss-get-namespace-prefix xml "http://www.w3.org/1999/02/22-rdf-syntax-ns#"))
! 	(rss-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/"))
! 	(content-ns (nnrss-get-namespace-prefix xml
! 						"http://purl.org/rss/1.0/modules/content/"))
! 
! 	subject url extra author date feed-subject
! 	enclosure comments rss-ns rdf-ns content-ns dc-ns
! 	hash-index)
      (dolist (item (nreverse (nnrss-find-el (intern (concat rss-ns "item")) xml)))
        (when (and (listp item)
  		 (string= (concat rss-ns "item") (car item))
***************
*** 780,795 ****
  	  comments
  	  hash-index)
  	 nnrss-group-data)
! 	(puthash hash-index t nnrss-group-hashtb)
! 	(setq changed t))
        (setq extra nil))
!     (when changed
!       (nnrss-save-group-data group server)
!       (let ((pair (assoc group nnrss-server-data)))
! 	(if pair
! 	    (setcar (cdr pair) nnrss-group-max)
! 	  (push (list group nnrss-group-max) nnrss-server-data)))
!       (nnrss-save-server-data server))))
  
  (declare-function gnus-group-make-rss-group "gnus-group" (&optional url))
  
--- 795,872 ----
  	  comments
  	  hash-index)
  	 nnrss-group-data)
! 	(puthash hash-index t nnrss-group-hashtb))
        (setq extra nil))
!     ))
! 
! (defun nnrss-check-group-atom (xml)
!   (let ((atom-ns (nnrss-get-namespace-prefix
! 		  xml "http://www.w3.org/2005/Atom"))
! 	(xhtml-ns (nnrss-get-namespace-prefix
! 		   xml "http://www.w3.org/1999/xhtml"))
! 	hash-index)
!     (dolist (entry (nreverse (xml-get-children
! 			      (assoc 'feed xml)
! 			      (intern (concat atom-ns "entry")))))
!       (when (and (string= (concat atom-ns "entry") (car-safe entry))
! 		 (progn (setq hash-index
! 			      (md5 (nnrss-node-text atom-ns 'id entry)))
! 			(not (gethash hash-index nnrss-group-hashtb))))
! 	(let ((title (nnrss-node-text atom-ns 'title entry))
! 	      (content
! 	       (let* ((el (car (or
! 				(xml-get-children
! 				 entry
! 				 (intern (concat atom-ns "content")))
! 				(xml-get-children
! 				 entry
! 				 (intern (concat atom-ns "summary"))))))
! 		      (ctype (xml-get-attribute-or-nil el 'type)))
! 		 (if (string= ctype "xhtml")
! 		     (with-temp-buffer
! 		       (xml-debug-print (list (caddr (cdr el))))
! 		       (buffer-string))
! 		   (nnrss-decode-entities-string
! 		    (nnrss-node-just-text el)))))
! 	      (url
! 	       (cdr (assoc 'href (cadr (assoc (intern (concat atom-ns "link"))
! 					      entry)))))
! 	      (from (mapconcat (lambda (author)
! 				 (nnrss-atom-parse-author atom-ns author))
! 			       (or (xml-get-children
! 				    entry
! 				    (intern (concat atom-ns "author")))
! 				   (xml-get-children
! 				    (assoc 'feed xml)
! 				    (intern (concat atom-ns "author"))))
! 			       ", "))
! 	      (date (nnrss-normalize-date
! 		     (nnrss-node-text atom-ns 'updated entry))))
! 	  (push
! 	   (list
! 	    (incf nnrss-group-max)
! 	    (current-time)
! 	    url
! 	    (and title (nnrss-mime-encode-string title))
! 	    (and from (nnrss-mime-encode-string from))
! 	    date
! 	    content
! 	    nil
! 	    nil
! 	    hash-index)
! 	   nnrss-group-data)
! 	  (puthash hash-index t nnrss-group-hashtb))))))
! 
! (defun nnrss-atom-parse-author (atom-ns el)
!   "Format an Atom author element EL into a proper rfc822 `From' header."
!   (let ((name (nnrss-node-text atom-ns 'name el))
! 	(email (nnrss-node-text atom-ns 'email el)))
!     ;; FIXME: Add proper quoting
!     (if name
! 	(format "\"%s\" <%s>"
! 		name
! 		(or email "nil@nil.invalid"))
!       email)))
  
  (declare-function gnus-group-make-rss-group "gnus-group" (&optional url))
  
***************
*** 1014,1054 ****
  
    (let ((parsed-page (nnrss-fetch url)))
  
! ;;    1. if this url is the rss, use it.
!     (if (nnrss-rss-p parsed-page)
! 	(let ((rss-ns (nnrss-get-namespace-prefix parsed-page "http://purl.org/rss/1.0/")))
! 	  (nnrss-rss-title-description rss-ns parsed-page url))
! 
  ;;    2. look for the <link rel="alternate"
  ;;    type="application/rss+xml" and use that if it is there.
!       (let ((links (nnrss-get-rsslinks parsed-page)))
! 	(if links
! 	    (let* ((xml (nnrss-fetch
! 			 (cdr (assoc 'href (cadar links)))))
! 		   (rss-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/")))
! 	      (nnrss-rss-title-description rss-ns xml (cdr (assoc 'href (cadar links)))))
  
  ;;    3. look for links on the site in the following order:
  ;;       - onsite links ending in .rss, .rdf, or .xml
  ;;       - onsite links containing any of the above
  ;;       - offsite links ending in .rss, .rdf, or .xml
  ;;       - offsite links containing any of the above
! 	  (let* ((base-uri (progn (string-match ".*://[^/]+/?" url)
! 				  (match-string 0 url)))
! 		 (hrefs (nnrss-order-hrefs
! 			 base-uri (nnrss-extract-hrefs parsed-page)))
! 		 (rss-link nil))
! 	    (while (and (eq rss-link nil) (not (eq hrefs nil)))
! 	      (let ((href-data (nnrss-fetch (car hrefs))))
! 		(if (nnrss-rss-p href-data)
! 		    (let* ((rss-ns (nnrss-get-namespace-prefix href-data "http://purl.org/rss/1.0/")))
! 		      (setq rss-link (nnrss-rss-title-description
! 				      rss-ns href-data (car hrefs))))
! 		  (setq hrefs (cdr hrefs)))))
! 	    (if rss-link rss-link
  
  ;;    4. check syndic8
! 	      (nnrss-find-rss-via-syndic8 url))))))))
  
  (defun nnrss-find-rss-via-syndic8 (url)
    "Query syndic8 for the rss feeds it has for URL."
--- 1091,1134 ----
  
    (let ((parsed-page (nnrss-fetch url)))
  
! ;;    1. if this url is the feed, use it.
!     (cond ((nnrss-rss-p parsed-page)
! 	   (let ((rss-ns (nnrss-get-namespace-prefix parsed-page "http://purl.org/rss/1.0/")))
! 	     (nnrss-rss-title-description rss-ns parsed-page url)))
! 	  ((nnrss-atom-p parsed-page)
! 	   (let ((rss-ns (nnrss-get-namespace-prefix parsed-page "http://www.w3.org/2005/Atom")))
! 	     (nnrss-atom-title-description rss-ns parsed-page url)))
! 	  (t
  ;;    2. look for the <link rel="alternate"
  ;;    type="application/rss+xml" and use that if it is there.
! 	   (let ((links (nnrss-get-rsslinks parsed-page)))
! 	     (if links
! 		 (let* ((xml (nnrss-fetch
! 			      (cdr (assoc 'href (cadar links)))))
! 			(rss-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/")))
! 		   (nnrss-rss-title-description rss-ns xml (cdr (assoc 'href (cadar links)))))
  
  ;;    3. look for links on the site in the following order:
  ;;       - onsite links ending in .rss, .rdf, or .xml
  ;;       - onsite links containing any of the above
  ;;       - offsite links ending in .rss, .rdf, or .xml
  ;;       - offsite links containing any of the above
! 	       (let* ((base-uri (progn (string-match ".*://[^/]+/?" url)
! 				       (match-string 0 url)))
! 		      (hrefs (nnrss-order-hrefs
! 			      base-uri (nnrss-extract-hrefs parsed-page)))
! 		      (rss-link nil))
! 		 (while (and (eq rss-link nil) (not (eq hrefs nil)))
! 		   (let ((href-data (nnrss-fetch (car hrefs))))
! 		     (if (nnrss-rss-p href-data)
! 			 (let* ((rss-ns (nnrss-get-namespace-prefix href-data "http://purl.org/rss/1.0/")))
! 			   (setq rss-link (nnrss-rss-title-description
! 					   rss-ns href-data (car hrefs))))
! 		       (setq hrefs (cdr hrefs)))))
! 		 (if rss-link rss-link
  
  ;;    4. check syndic8
! 		   (nnrss-find-rss-via-syndic8 url)))))))))
  
  (defun nnrss-find-rss-via-syndic8 (url)
    "Query syndic8 for the rss feeds it has for URL."
***************
*** 1103,1108 ****
--- 1183,1192 ----
    (or (eq (caar data) 'rss)
        (eq (caar data) 'rdf:RDF)))
  
+ (defun nnrss-atom-p (data)
+   "Test if DATA is an Atom feed."
+   (eq (caar data) 'feed))
+ 
  (defun nnrss-rss-title-description (rss-namespace data url)
    "Return the title of an RSS feed."
    (if (nnrss-rss-p data)
***************
*** 1115,1120 ****
--- 1199,1215 ----
  	 (cons 'title (caddr (nth 0 (nnrss-find-el title channel))))
  	 (cons 'href url)))))
  
+ (defun nnrss-atom-title-description (atom-ns data url)
+   "Return the title of an Atom feed."
+   (let ((title (intern (concat atom-ns "title")))
+ 	(link (intern (concat atom-ns "link")))
+ 	(subtitle (intern (concat atom-ns "subtitle")))
+ 	(feed (cddr (assoc 'feed data))))
+     (list
+      (cons 'title (caddr (assoc title feed)))
+      (cons 'description (caddr (assoc subtitle feed)))
+      (cons 'href url))))
+ 
  (defun nnrss-get-namespace-prefix (el uri)
    "Given EL (containing a parsed element) and URI (containing a string
  that gives the URI for which you want to retrieve the namespace



  parent reply	other threads:[~2008-10-17 14:00 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-10-16 13:00 Lars Magne Ingebrigtsen
2008-10-16 13:15 ` David Engster
2008-10-16 13:27   ` Lars Magne Ingebrigtsen
2008-10-16 13:43     ` David Engster
2008-10-16 13:52       ` Lars Magne Ingebrigtsen
2008-10-16 16:27         ` Mark Plaksin
2008-10-16 17:15           ` Lars Magne Ingebrigtsen
2008-10-17  8:45             ` Paul R
2008-10-17  8:57               ` David Engster
2008-10-17  9:18                 ` Paul R
2008-10-17 14:00             ` Andreas Seltenreich [this message]
2008-10-16 17:46           ` Robert D. Crawford
2008-10-16 19:27             ` David Engster
2008-10-20 17:13               ` Mark Plaksin
2008-10-21 15:53                 ` Ted Zlatanov
2008-10-21 16:24                 ` Adam Sjøgren
2008-10-28  0:04                 ` Mark Plaksin
2008-10-28  5:20                   ` Andreas Seltenreich
2008-10-28 12:11                     ` Mark Plaksin
2008-10-31  8:00                       ` nnrss-ignore-article-fields for more than just fields jidanni
2008-10-31 17:39                         ` Ted Zlatanov
2008-11-10 22:04                           ` Ted Zlatanov
2008-11-11  7:37                             ` Reiner Steib
2008-11-11 14:38                               ` Ted Zlatanov
2008-11-19 22:41                             ` Adam Sjøgren
2008-11-20 16:40                               ` Ted Zlatanov
2008-10-16 13:17 ` Using nnrss very actively Robert D. Crawford
2008-10-16 13:18 ` Adam Sjøgren
2008-10-18 23:10 ` Sebastian Krause
2008-10-18 23:16 ` Kevin Ryde
2008-10-20 20:20   ` Lars Magne Ingebrigtsen
2008-10-21 15:56     ` Ted Zlatanov
2008-10-25  0:03     ` Kevin Ryde
2008-11-03 23:43       ` Content-Location for w3m display (was: Using nnrss very actively) Kevin Ryde

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87hc7bcoe5.fsf@gate450.dyndns.org \
    --to=andreas+ding@gate450.dyndns.org \
    --cc=ding@gnus.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).