| 1 |
"""A wrapper for Analog, a weblog analyzer and reporting tool.""" |
|---|
| 2 |
|
|---|
| 3 |
import ftplib |
|---|
| 4 |
import datetime |
|---|
| 5 |
import os |
|---|
| 6 |
import re |
|---|
| 7 |
|
|---|
| 8 |
|
|---|
| 9 |
def mirror_ftp_dir(site, user, pw, workdir, destdir): |
|---|
| 10 |
"""Mirror a remote FTP folder to a local folder.""" |
|---|
| 11 |
if not os.path.exists(destdir): |
|---|
| 12 |
os.makedirs(destdir) |
|---|
| 13 |
today = datetime.date.today() |
|---|
| 14 |
today = "access_log.%s%02d%02d" % (today.year, today.month, today.day) |
|---|
| 15 |
ftp = ftplib.FTP(site) |
|---|
| 16 |
try: |
|---|
| 17 |
ftp.login(user, pw) |
|---|
| 18 |
ftp.cwd(workdir) |
|---|
| 19 |
files = ftp.nlst() |
|---|
| 20 |
for line in files: |
|---|
| 21 |
|
|---|
| 22 |
if line != today: |
|---|
| 23 |
copyloc = os.path.join(destdir, line) |
|---|
| 24 |
if os.path.exists(copyloc): |
|---|
| 25 |
print "=", |
|---|
| 26 |
else: |
|---|
| 27 |
print "+", |
|---|
| 28 |
ftp.retrbinary('RETR %s' % line, |
|---|
| 29 |
open(copyloc, 'wb').write) |
|---|
| 30 |
finally: |
|---|
| 31 |
ftp.quit() |
|---|
| 32 |
print |
|---|
| 33 |
print |
|---|
| 34 |
|
|---|
| 35 |
|
|---|
| 36 |
options = ["BARSTYLE", |
|---|
| 37 |
"DEBUG", "DIREXCLUDE", "DNS", "DNSFILE", |
|---|
| 38 |
"HOSTNAME", |
|---|
| 39 |
"IMAGEDIR", |
|---|
| 40 |
"PAGEEXCLUDE", |
|---|
| 41 |
"OUTFILE", |
|---|
| 42 |
"REFEXCLUDE", "REQEXCLUDE", |
|---|
| 43 |
"SETTINGS", |
|---|
| 44 |
"WEEKBEGINSON", |
|---|
| 45 |
] |
|---|
| 46 |
|
|---|
| 47 |
reports = ['BROWSERREP', 'BROWSERSUM', |
|---|
| 48 |
'DAILYREP', 'DAILYSUM', 'DIRECTORY', 'DOMAIN', |
|---|
| 49 |
'FAILURE', 'FAILUSER', 'FAILREF', 'FAILHOST', 'FAILVHOST', 'FILETYPE', 'FIVEREP', 'FIVESUM', |
|---|
| 50 |
'GENERAL', |
|---|
| 51 |
'HOST', 'HOURLYREP', 'HOURLYSUM', |
|---|
| 52 |
'INTSEARCHQUERY', 'INTSEARCHWORD', |
|---|
| 53 |
'MONTHLY', |
|---|
| 54 |
'ORGANISATION', 'OSREP', |
|---|
| 55 |
'PROCTIME', |
|---|
| 56 |
'QUARTERLY', 'QUARTERREP', 'QUARTERSUM', |
|---|
| 57 |
'REDIR', 'REDIRHOST', 'REDIRREF', 'REDIRUSER', 'REDIRVHOST', 'REFERRER', 'REFSITE', 'REQUEST', |
|---|
| 58 |
'SEARCHQUERY', 'SEARCHWORD', 'SIZE', 'STATUS', |
|---|
| 59 |
'USER', |
|---|
| 60 |
'VHOST', |
|---|
| 61 |
'WEEKHOUR', 'WEEKLY', |
|---|
| 62 |
'YEARLY', |
|---|
| 63 |
] |
|---|
| 64 |
|
|---|
| 65 |
|
|---|
| 66 |
def assert_path(path): |
|---|
| 67 |
"""assert_path(path) -> Assert that the given folder exists.""" |
|---|
| 68 |
folder, filename = os.path.split(path) |
|---|
| 69 |
if not os.path.exists(folder): |
|---|
| 70 |
os.makedirs(folder) |
|---|
| 71 |
|
|---|
| 72 |
|
|---|
| 73 |
class Log(object): |
|---|
| 74 |
"""A set of logfiles with the same format(s).""" |
|---|
| 75 |
|
|---|
| 76 |
def __init__(self, *filenames): |
|---|
| 77 |
self.filenames = list(filenames) |
|---|
| 78 |
|
|---|
| 79 |
|
|---|
| 80 |
|
|---|
| 81 |
|
|---|
| 82 |
|
|---|
| 83 |
self.formats = [] |
|---|
| 84 |
self.defaultformat = None |
|---|
| 85 |
|
|---|
| 86 |
def format(self, newformat): |
|---|
| 87 |
if isinstance(newformat, basestring): |
|---|
| 88 |
self.formats.append(newformat) |
|---|
| 89 |
else: |
|---|
| 90 |
self.formats.extend(newformat) |
|---|
| 91 |
|
|---|
| 92 |
|
|---|
| 93 |
class Analog(object): |
|---|
| 94 |
"""A wrapper for Analog, a weblog analyzer and reporting tool.""" |
|---|
| 95 |
|
|---|
| 96 |
def __init__(self, analog_app): |
|---|
| 97 |
self.analog_app = analog_app |
|---|
| 98 |
self.logs = [] |
|---|
| 99 |
self.includes = [] |
|---|
| 100 |
self.configfile = None |
|---|
| 101 |
|
|---|
| 102 |
def add_logs(self, *filenames): |
|---|
| 103 |
log = Log(*filenames) |
|---|
| 104 |
self.logs.append(log) |
|---|
| 105 |
return log |
|---|
| 106 |
|
|---|
| 107 |
def write_config(self, *fileparts): |
|---|
| 108 |
if fileparts: |
|---|
| 109 |
self.configfile = os.path.join(*fileparts) |
|---|
| 110 |
|
|---|
| 111 |
if not self.configfile: |
|---|
| 112 |
raise TypeError("No config filename supplied.") |
|---|
| 113 |
assert_path(self.configfile) |
|---|
| 114 |
|
|---|
| 115 |
f = open(self.configfile, 'wb') |
|---|
| 116 |
try: |
|---|
| 117 |
for log in self.logs: |
|---|
| 118 |
for format in log.formats: |
|---|
| 119 |
f.write("LOGFORMAT %s\n" % format) |
|---|
| 120 |
for fname in log.filenames: |
|---|
| 121 |
f.write("LOGFILE %s\n" % fname) |
|---|
| 122 |
|
|---|
| 123 |
for name in options + reports: |
|---|
| 124 |
value = getattr(self, name, None) |
|---|
| 125 |
if value is not None: |
|---|
| 126 |
if isinstance(value, bool): |
|---|
| 127 |
if value: |
|---|
| 128 |
f.write("%s ON\n" % name) |
|---|
| 129 |
else: |
|---|
| 130 |
f.write("%s OFF\n" % name) |
|---|
| 131 |
else: |
|---|
| 132 |
f.write("%s %s\n" % (name, value)) |
|---|
| 133 |
|
|---|
| 134 |
for fname in self.includes: |
|---|
| 135 |
f.write("CONFIGFILE %s\n" % fname) |
|---|
| 136 |
finally: |
|---|
| 137 |
f.close() |
|---|
| 138 |
|
|---|
| 139 |
def execute(self): |
|---|
| 140 |
if self.configfile: |
|---|
| 141 |
print "Producing %s" % self.configfile |
|---|
| 142 |
cmd = r'%s -G +g"%s"' % (self.analog_app, self.configfile) |
|---|
| 143 |
print cmd |
|---|
| 144 |
stdin, stdouterr = os.popen4(cmd) |
|---|
| 145 |
for l in stdouterr.xreadlines(): |
|---|
| 146 |
print l.strip('\n') |
|---|
| 147 |
else: |
|---|
| 148 |
raise AttributeError("Config file has not been written.") |
|---|
| 149 |
|
|---|
| 150 |
def analyze(self, reportdir): |
|---|
| 151 |
"""Report on all self.logfiles into reportdir/analog.html.""" |
|---|
| 152 |
self.OUTFILE = os.path.join(reportdir, "analog.html") |
|---|
| 153 |
self.configfile = os.path.join(reportdir, "analog.cfg") |
|---|
| 154 |
self.write_config() |
|---|
| 155 |
self.execute() |
|---|
| 156 |
|
|---|
| 157 |
|
|---|
| 158 |
|
|---|
| 159 |
|
|---|
| 160 |
def files(dirname): |
|---|
| 161 |
root, dirs, filenames = os.walk(dirname).next() |
|---|
| 162 |
return filenames |
|---|
| 163 |
|
|---|
| 164 |
def add_dates_to_log(dir, filename): |
|---|
| 165 |
loc = os.path.join(dir, filename) |
|---|
| 166 |
f = open(loc, 'r') |
|---|
| 167 |
content = f.read() |
|---|
| 168 |
f.close() |
|---|
| 169 |
date = "20%s-%s-%s " % (filename[2:4], filename[4:6], filename[6:8]) |
|---|
| 170 |
content = re.sub(r'(?m)^#Fields: time', r'#Fields: date time', content) |
|---|
| 171 |
content = re.sub(r'(?m)^(?=\d\d:)', date, content) |
|---|
| 172 |
f = open(loc, 'w') |
|---|
| 173 |
f.write(content) |
|---|
| 174 |
f.close() |
|---|
| 175 |
|
|---|