Gnus development mailing list
 help / color / mirror / Atom feed
* Statiscics'r'Us
@ 1998-10-04  6:00 Lars Magne Ingebrigtsen
  1998-10-05  0:17 ` Statiscics'r'Us Joerg Plate
  0 siblings, 1 reply; 3+ messages in thread
From: Lars Magne Ingebrigtsen @ 1998-10-04  6:00 UTC (permalink / raw)


Below is a lisp file that requests lots of headers and generates
newsreader stats.  I've run it on a couple of local hierarchies, and
it seems to work OK.  Cross-posted articles are counted many times,
though.

Anyway.  You'd probably run it with something like:

nohup nice -11 emacs-20.3 -batch -l ~/.emacs --eval '(push "~/pgnus/lisp" load-path)' -l ~/.gnus.el -f gnus -l ~/lisp/gnus-stats.elc -f gnus-batch-statistics "^ifi\\." &

I'd be interested in seeing results (not the raw ones; the compiled
ones) for some local hierarchies.  I'll be running it myself on comp
and soc and no and ifi.  Be kind to your newsserver; don't run it when
the server is busy doing useful things. 

;;; gnus-stats.el --- functions for generating newsreader statistics
;; Copyright (C) 1998 Lars Magne Ingebrigtsen

;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
;; Keywords: news

;; This file is part of GNU Emacs.

;; GNU Emacs is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING.  If not, write to the
;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.

;;; Commentary:

;;; Code:

(require 'gnus)
(require 'gnus-sum)
(require 'cl)


(defun gnus-stats-group (group year month)
  "Generate stats for GROUP."
  (let* ((case-fold-search t)
	 data date d newsreader from id beg)
    (save-excursion
      (set-buffer nntp-server-buffer)
      (goto-char (point-min))
      (setq beg (point))
      (while (search-forward "\n.\n" nil t)
	(narrow-to-region beg (point))
	(setq beg (point))
	(goto-char (point-min))
	(when (search-forward "\ndate: " nil t)
	  (setq date (parse-time-string (nnheader-header-value)))
	  (when (and (eq year (nth 5 date))
		     (eq month (nth 4 date)))
	    (goto-char (point-min))
	    (setq from ""
		  id ""
		  newsreader "")
	    (when (search-forward "\nfrom: " nil t)
	      (setq from (nnheader-header-value)))
	    (goto-char (point-min))
	    (cond
	     ((search-forward "\nx-newsreader: " nil t)
	      (setq newsreader (nnheader-header-value)))
	     ((search-forward "\nx-mailer: " nil t)
	      (setq newsreader (nnheader-header-value)))
	     ((search-forward "\nuser-agent: " nil t)
	      (setq newsreader (nnheader-header-value)))
	     (t
	      (search-forward "\nmessage-id: " nil t)
	      (setq id (nnheader-header-value))))
	    (push (list from newsreader id) data)))
	(goto-char (point-max))
	(widen)))
    (setq data (nreverse data))
    (insert "\n" group " " (number-to-string year) " "
	    (number-to-string month) "\n")
    (while (setq d (pop data))
      (prin1 d (current-buffer))
      (insert "\n"))))

(defun gnus-generate-statistics (regexp)
  "Generate newsreader statistics for groups that match REGEXP."
  (interactive "sGroups to statisticify (regexp): ")
    ;; First we make sure that we have really read the active file.
  (unless (gnus-read-active-file-p)
    (let ((gnus-read-active-file t))
      (gnus-read-active-file)))
  (pop-to-buffer "*Raw Data*")
  (erase-buffer)
  ;; Find all groups and sort them.
  (let* ((groups
	  (sort
	   (let (list)
	     (mapatoms
	      (lambda (sym)
		(and (boundp sym)
		     (symbol-value sym)
		     (consp (symbol-value sym))
		     (string-match regexp (symbol-name sym))
		     (push (symbol-name sym) list)))
	      gnus-active-hashtb)
	     list)
	   'string<))
	 (gnus-nov-is-evil t)
	 (now (decode-time (current-time)))
	 (year (nth 5 now))
	 (month (1- (nth 4 now)))
	 group articles)
    (insert (format "Statistics for %s; year %d; month %d\n\n\n"
		    regexp year month))
    (while (setq group (pop groups))
      (message "Doing %s; %d groups left..." group (length groups))
      (setq articles (gnus-uncompress-range (gnus-active group)))
      (when (gnus-retrieve-headers articles group)
	(gnus-stats-group group year month)))))

(defvar gnus-stat-newsreaders
  '("Gnus" "Mozilla" "Eudora" "Outlook Express"
    "Agent" "Xpress" "XFMail" "slrn" "Microsoft Internet News"
    "Thor" "Marcel" "MacSOUP" "Gravity" "knews" "UKA_PPP"
    "Yarn" "WinVN" "xrn" "NewsWatcher" "NN" "MicroDot"
    "ICE Crisis" "NewsAutomaten" "PMINews"
    "Exchange Internet News" "VSoup" "BBBS/L" "Internet Adventurer"
    "News for Windows" "Cyberdog" "YelloWeb"
    "tin" "trn")
  "List of different newsreaders.")

(defun gnus-analyze-statistics ()
  "Analyze the raw data."
  (interactive)
  (save-excursion
    (set-buffer "*Raw Data*")
    (goto-char (point-min))
    (let ((total 0)
	  data elem reader readers entry type stat spec entries
	  spec this e id ident)
      (setq ident (buffer-substring (point) (gnus-point-at-eol)))
      (while (search-forward "\n(" nil t)
	(incf total)
	(beginning-of-line)
	(setq elem (read (current-buffer)))
	(setq reader (cadr elem))
	(setq readers gnus-stat-newsreaders)
	(unless (setq type
		      (catch 'found
			(while readers
			  (when (string-match (car readers) reader)
			    (throw 'found (car readers)))
			  (pop readers))))
	  (setq id (caddr elem))
	  (cond
	   ((string-match "Pine" id)
	    (setq type "Pine"
		  reader "Pine"))
	   ((string-match "dejanews.com>" id)
	    (setq type "DejaNews"
		  reader "DejaNews"))
	   (t
	    (setq type "Unknown"))))
	(if (not (setq entry (assoc type entries)))
	    (push (list type 1 (list reader 1)) entries)
	  (incf (cadr entry))
	  (if (not (setq spec (assoc reader entry)))
	      (nconc entry (list (list reader 1)))
	    (incf (cadr spec)))))
      (pop-to-buffer "*Newsreader Statistics*")
      (erase-buffer)
      (insert ident "\n\n")
      (setq entries (sort entries (lambda (e1 e2) (> (cadr e1) (cadr e2)))))
      (insert (format "Total number of articles in sample: %d\n\n" total))
      (while (setq entry (pop entries))
	(insert (format "%s: %d (%.2f%%)\n"
			(or (car entry) "Unknown")
			(setq this (cadr entry))
			(* 100 (/ (float this) total))))
	(setq entry (sort (cddr entry)
			  (lambda (e1 e2) (> (cadr e1) (cadr e2)))))
	(while (setq e (pop entry))
	  (insert (format "%4d (%5.2f%%): %s\n" (cadr e)
			  (* 100 (/ (float (cadr e)) this))
			  (car e))))
	(insert "\n")))))
		
(defun gnus-batch-statistics ()
  "Do a batch statistic of REGEXP."
  (interactive)
  (let ((regexp (car command-line-args-left)))
    (gnus-slave)
    (gnus-generate-statistics regexp)
    (gnus-analyze-statistics)
    (set-buffer "*Newsreader Statistics*")
    (write-region (point-min) (point-max)
		  (format "~/newsreader-statistics-%s" regexp))
    (set-buffer "*Raw Data*")
    (write-region (point-min) (point-max)
		  (format "~/newsreader-statistics-%s.raw" regexp))))
   
;;; gnus-stats.el ends here


-- 
(domestic pets only, the antidote for overdose, milk.)
  larsi@ifi.uio.no * Lars Magne Ingebrigtsen


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: Statiscics'r'Us
  1998-10-04  6:00 Statiscics'r'Us Lars Magne Ingebrigtsen
@ 1998-10-05  0:17 ` Joerg Plate
  1998-10-05 10:09   ` Statiscics'r'Us Lars Balker Rasmussen
  0 siblings, 1 reply; 3+ messages in thread
From: Joerg Plate @ 1998-10-05  0:17 UTC (permalink / raw)


> I've run it on a couple of local hierarchies
I've run it on the "de.*" hierarchie. The results are on
<URL:http://www.provi.de/~plate/newsreader-statistics-de.gz>

-- 
"i'm working on it"


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: Statiscics'r'Us
  1998-10-05  0:17 ` Statiscics'r'Us Joerg Plate
@ 1998-10-05 10:09   ` Lars Balker Rasmussen
  0 siblings, 0 replies; 3+ messages in thread
From: Lars Balker Rasmussen @ 1998-10-05 10:09 UTC (permalink / raw)


Joerg Plate <plate@psyche.kn-bremen.de> writes:
> > I've run it on a couple of local hierarchies
> I've run it on the "de.*" hierarchie. The results are on
> <URL:http://www.provi.de/~plate/newsreader-statistics-de.gz>

And dk.*:
<URL:http://www.mjolner.dk/~lbr/newsreader-statistics-dk>
-- 
Lars Balker Rasmussen, Software Engineer, Mjolner Informatics ApS
lbr@mjolner.dk


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~1998-10-05 10:09 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
1998-10-04  6:00 Statiscics'r'Us Lars Magne Ingebrigtsen
1998-10-05  0:17 ` Statiscics'r'Us Joerg Plate
1998-10-05 10:09   ` Statiscics'r'Us Lars Balker Rasmussen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).