Commit 06ad4d0e authored by Arthur Lutz's avatar Arthur Lutz
Browse files

refactoring and documentation

parent e0da87dae080
Summary
-------
cubicweb integrated awstats frontend
......@@ -19,6 +19,12 @@ from cubicweb.cwctl import CWCTL
from utils import SECTIONSPEC, extract_stats_dict, eid_from_url
def url_count_from_stats(stats_dict):
'''
parse most visited urls in stats_dict generated from awstats txt file
returns two dictionnaries with eid as key and sequence of values as value
one for normal navigation, the other for rdf navigation
'''
visit_count_dict = {}
visit_count_rdf_dict = {}
for item in stats_dict['SIDER'].values():
......@@ -44,28 +50,34 @@ class UpdateWebstatsCommand(Command):
min_args = 1
def get_current_stats_period(self, session, chosendate):
""" return a statperiod for the current month, if it doesn't exist, create it """
start = first_day(chosendate)
end = last_day(start)
rset = session.execute('Any P WHERE P is StatPeriod, P start "%(start_date)s", P stop "%(end_date)s"' %
{'start_date':start,
'end_date':end})
rql = 'Any P WHERE P is StatPeriod, P start "%(start_date)s", P stop "%(end_date)s"'
rset = session.execute(rql %
{'start_date':start,
'end_date':end})
if rset:
return rset.get_entity(0,0)
return rset.get_entity(0, 0)
else:
return session.create_entity('StatPeriod', start=start, stop=end)
def update_stats(self, session, args):
''' parses awstats and creates or updates the corresponding
data in the cubicweb instance'''
if args:
chosendate = datetime.strptime(args[0], '%m/%Y')
else:
chosendate = datetime.now()
stats_period = self.get_current_stats_period(session, chosendate)
awstatsdir = session.vreg.config.get('awstats-dir', '/var/lib/awstats')
domain = session.vreg.config.get('awstats-domain', '')
filename = 'awstats%s%s.txt' % (chosendate.strftime('%m%Y'), domain and '.%s' % domain)
stats_dict = extract_stats_dict(awstatsdir, filename)
normal_dict, rdf_dict = url_count_from_stats(stats_dict)
is_rdf = False
rset = session.execute('Any N WHERE X relation_type R, R name "stats_about", X to_entity Y, Y name N')
rql = 'Any N WHERE X relation_type R, R name "stats_about", X to_entity Y, Y name N'
rset = session.execute(rql)
allowed_types = [item[0] for item in rset]
update_stats = {'updated':0,
'created':0,
......@@ -77,12 +89,12 @@ class UpdateWebstatsCommand(Command):
for eid, values in count_dict.items():
visit_count = visit_count_rdf = 0
total_hits = sum([item[0] for item in values])
stats_period = self.get_current_stats_period(session, chosendate)
entity = session.entity_from_eid(eid)
if not entity.__regid__ in allowed_types:
update_stats['skipped'] += 1
continue
rql = 'Any X,V WHERE X is Hits, X count V, X hit_type "%(hit_type)s", X stats_about E, E eid %(e)s, X period P, P eid %(sp)s'
rql = 'Any X,V WHERE X is Hits, X count V, X hit_type "%(hit_type)s",' \
'X stats_about E, E eid %(e)s, X period P, P eid %(sp)s'
rset = session.execute(rql % {'e':eid,
'sp':stats_period.eid,
'hit_type': is_rdf and 'rdf' or 'normal'})
......@@ -99,8 +111,10 @@ class UpdateWebstatsCommand(Command):
else:
print 'create', entity
update_stats['created'] += 1
session.create_entity('Hits', count = total_hits, period=stats_period,
stats_about = entity, hit_type=is_rdf and u'rdf' or u'normal')
session.create_entity('Hits', count = total_hits,
period=stats_period,
stats_about = entity,
hit_type=is_rdf and u'rdf' or u'normal')
print update_stats
......
# copyright 2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr -- mailto:contact@logilab.fr
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
import re
import os.path as osp
......@@ -33,6 +49,45 @@ SECTIONSPEC = {
'SIDER_404': ['urls with 404 errors', 'hits', 'last url referer'],
}
SECTIONLABELS = {
'TIME': "Visits by hour",
'VISITOR': 'Top visitors (by host)',
'DAY': 'Visits by days of the month',
'DOMAIN': 'Visitors domains/countries',
'LOGIN': 'logged in users',
'ROBOT': 'Robots/Spiders visitors',
'WORMS': 'Worm visits',
'EMAILSENDER': 'email sender',
'EMAILRECEIVER': 'email receiver',
'SESSION': 'Visits duration',
'SIDER': 'Most visited URLs',
'FILETYPES': 'Visited file types',
'OS': 'Visiting operating systems',
'BROWSER': 'Visiting browsers',
'SCREENSIZE': 'Hits by Screen size',
'UNKNOWNREFERER': 'Unknown referer os',
'UNKNOWNREFERERBROWSER': 'Unknown referer browser',
'ORIGIN': 'Origin of hits',
'SEREFERRALS': 'Search engine referers hits',
'PAGEREFS': 'Main external page referers',
'SEARCHWORDS': 'Hits from search keyphrases',
'KEYWORDS': 'Hits from search keywords',
#'MISC': ['misc id', 'pages', 'hits', 'bandwidth'],
'ERRORS': 'HTTP Status codes',
'CLUSTER': 'Visits by cluster id',
'SIDER_404': 'Hits with 404 errors',
}
ORIGIN_LABELS = {
'From0':'Direct address / Bookmark / Link in email...',
'From1':'Unknown Origin',
'From2':'Links from an Internet Search Engine',
'From3':'Links from an external page (other web sites except search engines)',
'From4':'Internal Link',
}
def extract_stats_dict(awstats_dir, filename):
''' from an awstats file extract structured data into a dict
......
# copyright 2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr -- mailto:contact@logilab.fr
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
from cubicweb.web import action
from cubicweb.selectors import match_user_groups
class AwstatsAccessAction(action.Action):
__regid__ = 'awstats-action'
__select__ = match_user_groups('managers')
title = _('awstats statistics')
order = 11
category = 'manage'
def url(self):
return self._cw.build_url('?vid=awstats')
def registration_callback(vreg):
vreg.register(AwstatsAccessAction)
......@@ -18,9 +18,9 @@
import os
import os.path as osp
import re
from datetime import datetime
import urllib
from logilab.mtconverter import xml_escape
from logilab.common.textutils import BYTE_UNITS
......@@ -29,21 +29,21 @@ from cubicweb.view import StartupView
from cubicweb.web.views import forms
from cubicweb.web.formfields import StringField
from cubicweb.web import formwidgets as fwdgs
from cubicweb import tags, Unauthorized
from cubes.awstats.utils import SECTIONSPEC, extract_stats_dict
from cubes.awstats.utils import SECTIONSPEC, SECTIONLABELS, \
extract_stats_dict, ORIGIN_LABELS
# FIXME - find a clean way to attach or pass this to form so it is available in form choices
# FIXME - find a clean way to attach or pass this to form so
# it is available in form choices
AWSTATS_DIR = '/var/lib/awstats'
#AWSTATS_DIR = '/home/arthur/local/awstats'
#AWSTATS_DIR = self._cw.vreg.config['awstats-dir']
def extract_available_months(form, **attrs):
""" extract available months from list of awstats files """
months = []
selected_domain = form.req.form.get('domain', '')
#rint 'XXX', form.domain.choices[0]
for filename in os.listdir(AWSTATS_DIR):
match = re.search('awstats(\d{6})\.?%s.txt' % selected_domain, filename)
if match:
......@@ -52,6 +52,7 @@ def extract_available_months(form, **attrs):
return months
def extract_available_domains(form, **attrs):
""" extract available domains from list of awstats files """
domains = []
for filename in os.listdir(AWSTATS_DIR):
match = re.search('awstats(\d{2})(\d{4})\.?(.*).txt', filename)
......@@ -61,6 +62,7 @@ def extract_available_domains(form, **attrs):
return domains
def use_as_sort_key(value):
""" use value as sort value, try it as an int, else just use value """
try:
return int(value)
except ValueError:
......@@ -68,6 +70,7 @@ def use_as_sort_key(value):
def specific_format(header, value):
""" guess from a header and value how to display it"""
if value is None:
return
elif header == 'bandwidth':
......@@ -81,16 +84,18 @@ def specific_format(header, value):
return datetime.strptime(value, '%Y%m%d').strftime('%d/%m/%Y')
except ValueError:
pass
return xml_escape(value)
return xml_escape(urllib.unquote(value).decode('utf8'))
def convert_to_bytes(value):
ordered = [(size, label) for label,size in BYTE_UNITS.items()]
""" display bandwidth data using a human readable notation """
ordered = [(size, label) for label, size in BYTE_UNITS.items()]
ordered.sort(reverse=True)
for size, label in ordered:
if value / size != 0:
return '%s %s' % (value / size, label)
class AwstatsRefreshForm(forms.FieldsForm):
"""Form to filter and select what stats are being displayed"""
__regid__ = 'select-awstats'
action = '/?vid=awstats'
domain = StringField(widget=fwdgs.Select(attrs={'onchange':'this.form.submit()'}),
......@@ -104,14 +109,16 @@ class AwstatsRefreshForm(forms.FieldsForm):
choices=[10,25,50,100])
section = StringField(widget=fwdgs.Select(attrs={'onchange':'this.form.submit()'}),
label=_('Show section :'),
choices=['',]+SECTIONSPEC.keys())
choices=[('',''),]+[(label, value) for value, label in SECTIONLABELS.items()])
form_buttons = [fwdgs.SubmitButton(label=_('Apply'))]
class AwstatsView(StartupView):
""" Simple HTML export of the stats in awstats files """
__regid__ = 'awstats'
def call(self):
""" main call """
req = self._cw
form = self._cw.vreg['forms'].select('select-awstats', self._cw)
......@@ -125,67 +132,75 @@ class AwstatsView(StartupView):
try:
stats_dict = extract_stats_dict(AWSTATS_DIR, filename)
except IOError:
filename = 'awstats%s%s.txt' % (extract_available_months(form)[0], domain and '.%s' % domain)
filename = 'awstats%s%s.txt' % (extract_available_months(form)[0],
domain and '.%s' % domain)
stats_dict = extract_stats_dict(AWSTATS_DIR, filename)
self.w(u'<div id="awstats">')
self.w(u'<h1>%s : %s</h1>' % (_('Domain'), domain or 'default'))
self.w(u'<h2>%s : %s</h2>' % (_('Time period'), '%s/%s' % (month[:2], month[2:]) ))
self.w(u'<h2>%s : %s</h2>' % (_('Time period'),
'%s/%s' % (month[:2], month[2:]) ))
if req.form.get('section'):
self.generic_table(req.form.get('section'), stats_dict, limit)
else:
self.render_navigation(stats_dict)
for key, value in SECTIONSPEC.items():
self.generic_table(key, stats_dict, limit)
#if value[1] == 'hits':
# self.simple_hits_display(key, stats_dict, limit)
self.w(u'</div>')
def render_navigation(self, stats_dict):
""" render navigation according to which sections are present """
self.w(u'<div>')
# FIXME - have inline list using css (better : in cubicweb)
self.w(u'<ul style="list-style-type: none;">')
self.w(u'<ul>')
for key in SECTIONSPEC.keys():
if stats_dict[key].values():
self.w(u'<li><a href="#%s">%s</a></li>' % (key, key))
self.w(u'<li><a href="#%s">%s</a></li>' % (key, SECTIONLABELS[key]))
self.w(u'</ul>')
self.w(u'</div>')
def generic_table(self, section_name, stats_dict, limit):
""" generic table from a section in awstats """
if not stats_dict[section_name].values():
return
self.w(u'<a name="%s"/>' % section_name)
self.w(u'<h3>%s</h3>' % SECTIONLABELS[section_name])
self.w(u'<div><table class="listing">')
self.w(u'<tr class="header">')
for header in SECTIONSPEC[section_name]:
self.w(u'<th>%s</th>' % header)
self.w(u'<th scope="col">%s</th>' % xml_escape(header))
self.w(u'</tr><tbody>')
if "hits" in SECTIONSPEC[section_name]:
order_key = "hits"
else:
order_key = SECTIONSPEC[section_name][1]
for item in stats_dict[section_name].values():
try:
item[order_key]
except KeyError:
print item
ordered_values = [(int(item[order_key]), item) for item in stats_dict[section_name].values()]
ordered_values.sort(reverse=True)
ordered_values = self.order_values(section_name, stats_dict)
for index, item in enumerate([item[1] for item in ordered_values]):
self.w(u'<tr>')
for header in SECTIONSPEC[section_name]:
self.w(u'<td>%s</td>' % specific_format(header, item.get(header)))
for tdindex, header in enumerate(SECTIONSPEC[section_name]):
if tdindex:
self.w(u'<td class="data">%s</td>' % specific_format(header, item.get(header)))
elif header == 'origin':
self.w(u'<td scope="row">%s</td>' % specific_format(header,
ORIGIN_LABELS[item.get(header)]))
else:
self.w(u'<td scope="row">%s</td>' % specific_format(header, item.get(header)))
self.w(u'</tr>')
if index > limit:
break
self.w(u'</tbody></table></div><br/>')
def simple_hits_display(self, section_name, stats_dict, limit):
key = SECTIONSPEC[section_name][0]
value_list = [(use_as_sort_key(item['hits']), item[key]) for item in stats_dict[section_name].values() if 'hits' in item]
if value_list:
value_list.sort(reverse=True)
itemlist = ''.join(['<li>%s (%s)</li>' % (xml_escape(item[1]), item[0]) for item in value_list[:limit]])
self.w(u'<ul><li><h2>%s (%s)</h2></li><ul> %s</ul></ul>' % (key, len(value_list), itemlist))
def order_values(self, section_name, stats_dict):
""" extract data in ordered fashion """
if "hour" in SECTIONSPEC[section_name] :
order_key = "hour"
reverse = False
elif "hits" in SECTIONSPEC[section_name]:
order_key = "hits"
reverse = True
else:
order_key = SECTIONSPEC[section_name][1]
reverse = True
ordered_values = [(int(item[order_key]), item)
for item in stats_dict[section_name].values()]
ordered_values.sort(reverse=reverse)
return ordered_values
def registration_callback(vreg):
vreg.register(AwstatsView)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment