#!/usr/bin/python import itertools, re skipRE = re.compile(r'(js|png|gif|jpg|css|rss091|atom|ico)$') def skip(url, referer): return skipRE.search(url) is not None or \ referer == '-' or \ referer.startswith('http://www.somebits.com') for l in itertools.chain(file("/var/log/apache2/access.log.1"), file("/var/log/apache2/access.log")): d = l.split() url = d referer = d.strip('"') if not skip(url, referer): print url, referer
The output is then piped through a Python script I wrote years ago which counts up the most common lines.
One remaining mystery; the large majority of my pageviews have a referer of -, or empty. I know folks aren’t pasting URLs in by hand, where does this come from? Do redirectors somehow clear Referer?