Contact: fumanchu@aminus.org

Log in as guest/misc to create tickets

root/pyanalog/__init__.py

Revision 17 (checked in by fumanchu, 7 years ago)

Set eol-style:native on all .py files.

  • Property svn:eol-style set to native
Line 
1 """A wrapper for Analog, a weblog analyzer and reporting tool."""
2
3 import ftplib
4 import datetime
5 import os
6 import re
7
8
9 def mirror_ftp_dir(site, user, pw, workdir, destdir):
10     """Mirror a remote FTP folder to a local folder."""
11     if not os.path.exists(destdir):
12         os.makedirs(destdir)
13     today = datetime.date.today()
14     today = "access_log.%s%02d%02d" % (today.year, today.month, today.day)
15     ftp = ftplib.FTP(site)
16     try:
17         ftp.login(user, pw)
18         ftp.cwd(workdir)
19         files = ftp.nlst()
20         for line in files:
21             # Skip today, since it is probably not yet written completely.
22             if line != today:
23                 copyloc = os.path.join(destdir, line)
24                 if os.path.exists(copyloc):
25                     print "=",
26                 else:
27                     print "+",
28                     ftp.retrbinary('RETR %s' % line,
29                                    open(copyloc, 'wb').write)
30     finally:
31         ftp.quit()
32         print
33         print
34
35
36 options = ["BARSTYLE",
37            "DEBUG", "DIREXCLUDE", "DNS", "DNSFILE",
38            "HOSTNAME",
39            "IMAGEDIR",
40            "PAGEEXCLUDE",
41            "OUTFILE",
42            "REFEXCLUDE", "REQEXCLUDE",
43            "SETTINGS",
44            "WEEKBEGINSON",
45            ]
46
47 reports = ['BROWSERREP', 'BROWSERSUM',
48            'DAILYREP', 'DAILYSUM', 'DIRECTORY', 'DOMAIN',
49            'FAILURE', 'FAILUSER', 'FAILREF', 'FAILHOST', 'FAILVHOST', 'FILETYPE', 'FIVEREP', 'FIVESUM',
50            'GENERAL',
51            'HOST', 'HOURLYREP', 'HOURLYSUM',
52            'INTSEARCHQUERY', 'INTSEARCHWORD',
53            'MONTHLY',
54            'ORGANISATION', 'OSREP',
55            'PROCTIME',
56            'QUARTERLY', 'QUARTERREP', 'QUARTERSUM',
57            'REDIR', 'REDIRHOST', 'REDIRREF', 'REDIRUSER', 'REDIRVHOST', 'REFERRER', 'REFSITE', 'REQUEST',
58            'SEARCHQUERY', 'SEARCHWORD', 'SIZE', 'STATUS',
59            'USER',
60            'VHOST',
61            'WEEKHOUR', 'WEEKLY',
62            'YEARLY',
63            ]
64
65
66 def assert_path(path):
67     """assert_path(path) -> Assert that the given folder exists."""
68     folder, filename = os.path.split(path)
69     if not os.path.exists(folder):
70         os.makedirs(folder)
71
72
73 class Log(object):
74     """A set of logfiles with the same format(s)."""
75    
76     def __init__(self, *filenames):
77         self.filenames = list(filenames)
78         # "If you specify several formats, analog tries to match each line to
79         # the first format first, then if that fails the next, and so on,
80         # so the order of the formats is important. Usually you want to
81         # specify the most common one first, to minimise the time spent
82         # trying to match lines to inappropriate formats.
83         self.formats = []
84         self.defaultformat = None
85    
86     def format(self, newformat):
87         if isinstance(newformat, basestring):
88             self.formats.append(newformat)
89         else:
90             self.formats.extend(newformat)
91
92
93 class Analog(object):
94     """A wrapper for Analog, a weblog analyzer and reporting tool."""
95    
96     def __init__(self, analog_app):
97         self.analog_app = analog_app
98         self.logs = []
99         self.includes = []
100         self.configfile = None
101    
102     def add_logs(self, *filenames):
103         log = Log(*filenames)
104         self.logs.append(log)
105         return log
106    
107     def write_config(self, *fileparts):
108         if fileparts:
109             self.configfile = os.path.join(*fileparts)
110        
111         if not self.configfile:
112             raise TypeError("No config filename supplied.")
113         assert_path(self.configfile)
114        
115         f = open(self.configfile, 'wb')
116         try:
117             for log in self.logs:
118                 for format in log.formats:
119                     f.write("LOGFORMAT %s\n" % format)
120                 for fname in log.filenames:
121                     f.write("LOGFILE %s\n" % fname)
122            
123             for name in options + reports:
124                 value = getattr(self, name, None)
125                 if value is not None:
126                     if isinstance(value, bool):
127                         if value:
128                             f.write("%s ON\n" % name)
129                         else:
130                             f.write("%s OFF\n" % name)
131                     else:
132                         f.write("%s %s\n" % (name, value))
133            
134             for fname in self.includes:
135                 f.write("CONFIGFILE %s\n" % fname)
136         finally:
137             f.close()
138    
139     def execute(self):
140         if self.configfile:
141             print "Producing %s" % self.configfile
142             cmd = r'%s -G +g"%s"' % (self.analog_app, self.configfile)
143             print cmd
144             stdin, stdouterr = os.popen4(cmd)
145             for l in stdouterr.xreadlines():
146                 print l.strip('\n')
147         else:
148             raise AttributeError("Config file has not been written.")
149    
150     def analyze(self, reportdir):
151         """Report on all self.logfiles into reportdir/analog.html."""
152         self.OUTFILE = os.path.join(reportdir, "analog.html")
153         self.configfile = os.path.join(reportdir, "analog.cfg")
154         self.write_config()
155         self.execute()
156
157
158 # IIS format tools
159
160 def files(dirname):
161     root, dirs, filenames = os.walk(dirname).next()
162     return filenames
163
164 def add_dates_to_log(dir, filename):
165     loc = os.path.join(dir, filename)
166     f = open(loc, 'r')
167     content = f.read()
168     f.close()
169     date = "20%s-%s-%s " % (filename[2:4], filename[4:6], filename[6:8])
170     content = re.sub(r'(?m)^#Fields: time', r'#Fields: date time', content)
171     content = re.sub(r'(?m)^(?=\d\d:)', date, content)
172     f = open(loc, 'w')
173     f.write(content)
174     f.close()
175
Note: See TracBrowser for help on using the browser.