Commit c03a1d25 authored by Adrien Di Mascio's avatar Adrien Di Mascio
Browse files

autopep8

parent 063a9da8bde5
......@@ -24,4 +24,4 @@ classifiers = [
'Framework :: CubicWeb',
'Programming Language :: Python',
'Programming Language :: JavaScript',
]
]
......@@ -20,7 +20,7 @@ from cubicweb.toolsutils import Command
from cubicweb.cwctl import CWCTL
from .utils import SECTIONSPEC, extract_stats_dict, eid_from_url, \
get_or_create_statperiod, compress_old_hits
get_or_create_statperiod, compress_old_hits
def url_count_from_stats(cnx, stats_dict):
......@@ -50,7 +50,7 @@ def url_count_from_stats(cnx, stats_dict):
def parse_input_date(date, periodicity):
input_formats = {'month':'%m/%Y',
input_formats = {'month': '%m/%Y',
'day': '%d/%m/%Y',
'hour': '%d/%m/%Y-%Hh'}
try:
......@@ -72,6 +72,7 @@ def track_progress(iterable, nb_ops=None, pb_size=20, pb_title=''):
class StatsUpdater(object):
def __init__(self, cnx, start, stop):
self.cnx = cnx
self.config = cnx.vreg.config
......@@ -90,12 +91,12 @@ class StatsUpdater(object):
hit_key = (hit.stats_about[0].eid, hit.period[0].eid, hit.hit_type)
self.all_hits[hit_key] = hit
## internal utilities #####################################################
# internal utilities #####################################################
def awstats_filepath(self, date):
config = self.config
date_formats = {'month': '%m%Y',
'day': '%m%Y%d',
'hour':'%m%Y%d%H'}
'hour': '%m%Y%d%H'}
domain = config['awstats-domain']
if config['awstats-domain']:
domain_ext = '.' + config['awstats-domain']
......@@ -114,13 +115,16 @@ class StatsUpdater(object):
stop = last_day(start)
elif periodicity == 'day':
start = datetime(chosendate.year, chosendate.month, chosendate.day)
stop = datetime(chosendate.year, chosendate.month, chosendate.day, 23, 59, 59)
stop = datetime(
chosendate.year, chosendate.month, chosendate.day, 23, 59, 59)
elif periodicity == 'hour':
start = datetime(chosendate.year, chosendate.month, chosendate.day, chosendate.hour)
stop = datetime(chosendate.year, chosendate.month, chosendate.day, chosendate.hour, 59, 59)
start = datetime(
chosendate.year, chosendate.month, chosendate.day, chosendate.hour)
stop = datetime(
chosendate.year, chosendate.month, chosendate.day, chosendate.hour, 59, 59)
return get_or_create_statperiod(self.cnx, start, stop, stats_report)
## update API #############################################################
# update API #############################################################
def update_stats(self, skip_compress=False):
''' parses awstats and creates or updates the corresponding
data in the cubicweb instance
......@@ -131,7 +135,7 @@ class StatsUpdater(object):
stats_report = dict.fromkeys(('updated', 'created', 'exists no change',
'skipped', 'ignored', 'periods', 'compressed'), 0)
for chosendate in track_progress(date_range(self.start, self.stop),
(self.stop-self.start).days,
(self.stop - self.start).days,
pb_size=70, pb_title='Import'):
self._update_stats_for_date(chosendate, stats_report)
if not skip_compress:
......@@ -162,9 +166,10 @@ class StatsUpdater(object):
return 'ignored'
try:
hit = self.all_hits[(eid, stats_period.eid, hit_type)]
except KeyError: # no hit yet, create one
except KeyError: # no hit yet, create one
status = 'created'
hit = self.cnx.create_entity('Hits', count=total_hits, hit_type=hit_type,
hit = self.cnx.create_entity(
'Hits', count=total_hits, hit_type=hit_type,
period=stats_period, stats_about=entity)
# append it to the cache
self.all_hits[(eid, stats_period.eid, hit_type)] = hit
......@@ -178,6 +183,7 @@ class StatsUpdater(object):
class UpdateWebstatsCommand(Command):
""" Update cubicweb web stats from awstats processed files.
If startdate is not entered, the update will be done on the previous
......@@ -199,12 +205,12 @@ class UpdateWebstatsCommand(Command):
max_args = 3
options = [
("skip-compress", {"action": 'store_true',
'help' : u'Skip the compression of old daily hits into month stats'}),
'help': u'Skip the compression of old daily hits into month stats'}),
("today", {"action": 'store_true',
'help' : u'Process stats for the current day (for testing)'}),
]
'help': u'Process stats for the current day (for testing)'}),
]
## command / initial setup API ############################################
# command / initial setup API ############################################
def run(self, args):
# args = (appid, start[, stop])
appid = args.pop(0)
......@@ -217,17 +223,19 @@ class UpdateWebstatsCommand(Command):
if self.config.today:
chosendate = datetime.now()
else:
chosendate = datetime.now()-timedelta(1)
start = datetime(chosendate.year, chosendate.month, chosendate.day)
chosendate = datetime.now() - timedelta(1)
start = datetime(
chosendate.year, chosendate.month, chosendate.day)
if len(args) > 1:
stop = parse_input_date(args[1], periodicity)
if stop is None:
stop = start
if start is None or stop is None:
sys.exit(1) # parse_input_date failed to parse date
stop += ONEDAY # date_range() excludes stop boundary
sys.exit(1) # parse_input_date failed to parse date
stop += ONEDAY # date_range() excludes stop boundary
stats_updater = StatsUpdater(cnx, start, stop)
stats_report = stats_updater.update_stats(self.config.skip_compress)
stats_report = stats_updater.update_stats(
self.config.skip_compress)
print '''=== Update Report ===
Number of periods imported : %(periods)s
Number of stat objects created : %(created)s
......
......@@ -18,6 +18,7 @@
from cubicweb.entities import AnyEntity
class StatPeriod(AnyEntity):
__regid__ = 'StatPeriod'
......
......@@ -21,5 +21,4 @@ You could setup site properties or a workflow here for example.
"""
# Example of site property change
#set_property('ui.site-title', "<sitename>")
# set_property('ui.site-title', "<sitename>")
......@@ -16,27 +16,32 @@
"""cubicweb-awstats schema"""
from yams.buildobjs import (EntityType, String, Int, BigInt, Date, Datetime, Boolean,
from yams.buildobjs import (
EntityType, String, Int, BigInt, Date, Datetime, Boolean,
SubjectRelation, RelationDefinition, RelationType)
_ = unicode
MANAGER_PERMISSIONS = {
'read': ('managers', ),
'read': ('managers', ),
'update': ('managers', 'owners',),
'delete': ('managers', 'owners'),
'add': ('managers',)
}
}
class StatPeriod(EntityType):
__permissions__ = MANAGER_PERMISSIONS
# XXX periodicity for hour cannot work with Date, when it is implemented switch to DateTime
# XXX periodicity for hour cannot work with Date, when it is implemented
# switch to DateTime
start = Date(indexed=True)
stop = Date(indexed=True)
class Hits(EntityType):
__permissions__ = MANAGER_PERMISSIONS
hit_type = String(maxsize=128, indexed=True)
count = Int() #BigInt()
count = Int() # BigInt()
class period(RelationType):
subject = 'Hits'
......@@ -45,9 +50,7 @@ class period(RelationType):
composite = 'object'
inlined = True
class stats_about(RelationType):
inlined = True
cardinality = '?*'
options = (
('awstats-dir',
{'type' : 'string',
{'type': 'string',
'default': '/var/lib/awstats',
'help': 'directory where the files produced by awstats are stored on the filesystem. ',
'group': 'awstats', 'level': 0,
}),
('awstats-domain',
{'type' : 'string',
{'type': 'string',
'default': '',
'help': 'domain of the website (eg. example.org). ',
'group': 'awstats', 'level': 0,
}),
('awstats-periodicity',
{'type' : 'choice',
'choices' : ('hour', 'day', 'month'),
{'type': 'choice',
'choices': ('hour', 'day', 'month'),
'default': 'day',
'help': 'stats periodicity',
'group': 'awstats', 'level': 0,
......
......@@ -32,9 +32,9 @@ from cubicweb.devtools.fake import FakeRequest
from psycopg2 import DataError
SECTIONSPEC = {
# commented sections are not usefull to view
# 'MAP' : ['section', 'offset'],
# 'GENERAL': ['key', None],
# commented sections are not usefull to view
# 'MAP' : ['section', 'offset'],
# 'GENERAL': ['key', None],
'TIME': ['hour', 'pages', 'hits', 'bandwidth', 'not viewed pages', 'not viewed hits', 'not viewed bandwidth'],
'VISITOR': ['host', 'pages', 'hits', 'bandwidth', 'last visit date', 'start date of last visit', 'last page of last visit'],
'DAY': ['date', 'pages', 'hits', 'bandwidth', 'visits'],
......@@ -94,12 +94,12 @@ SECTIONLABELS = {
}
ORIGIN_LABELS = {
'From0':'Direct address / Bookmark / Link in email...',
'From1':'Unknown Origin',
'From2':'Links from an Internet Search Engine',
'From3':'Links from an external page (other web sites except search engines)',
'From4':'Internal Link',
}
'From0': 'Direct address / Bookmark / Link in email...',
'From1': 'Unknown Origin',
'From2': 'Links from an Internet Search Engine',
'From3': 'Links from an external page (other web sites except search engines)',
'From4': 'Internal Link',
}
def extract_stats_dict(filepath):
......@@ -129,13 +129,15 @@ def extract_stats_dict(filepath):
section_name, nb_of_lines = line.split('_', 1)[1].split()
if section_name in SECTIONSPEC:
stats_dict.setdefault(section_name, {})
parsed_countdown = int(nb_of_lines)-1 if int(nb_of_lines) else 0
parsed_countdown = int(
nb_of_lines) - 1 if int(nb_of_lines) else 0
elif section_name and parsed_countdown:
for index, value in enumerate(line.split()):
key = line.split()[0]
stats_dict[section_name].setdefault(key, {})
try:
stats_dict[section_name][key][SECTIONSPEC[section_name][index]] = value
stats_dict[section_name][key][
SECTIONSPEC[section_name][index]] = value
except IndexError:
pass
parsed_countdown -= 1
......@@ -161,11 +163,12 @@ def eid_from_url(cnx, value):
except (NotFound, DataError, Redirect):
pass
def get_or_create_statperiod(cnx, start, stop, stats_report={}):
rql = 'Any P WHERE P is StatPeriod, P start "%(start_date)s", P stop "%(end_date)s"'
rset = cnx.execute(rql %
{'start_date':start,
'end_date':stop})
{'start_date': start,
'end_date': stop})
if rset:
return rset.get_entity(0, 0)
else:
......@@ -173,9 +176,11 @@ def get_or_create_statperiod(cnx, start, stop, stats_report={}):
stats_report['periods'] += 1
return cnx.create_entity('StatPeriod', start=start, stop=stop)
def time_params(cnx):
params = []
rset = cnx.execute('Any START ORDERBY START LIMIT 1 WHERE P is StatPeriod, P start START, P stop STOP HAVING STOP-START <= 2')
rset = cnx.execute(
'Any START ORDERBY START LIMIT 1 WHERE P is StatPeriod, P start START, P stop STOP HAVING STOP-START <= 2')
for (item,) in rset:
for first_day in date_range(previous_month(item), previous_month(datetime.now(), 5), incmonth=True):
delta = 2
......@@ -200,21 +205,23 @@ def compress_old_hits(cnx, update_stats={}, progressbar=True):
results[hit_type] = cnx.execute(rql,
{'start': start,
'stop': stop,
'hit_type':hit_type,
'hit_type': hit_type,
'timedelta': delta})
if not any(results.values()):
continue
# deleting statperiod deletes all associated hits
drset = cnx.execute('DELETE StatPeriod P WHERE P start >= %(start)s, P stop <= %(stop)s',
drset = cnx.execute(
'DELETE StatPeriod P WHERE P start >= %(start)s, P stop <= %(stop)s',
{'start': start,
'stop': stop,})
'stop': stop, })
update_stats['compressed'] += len(drset)
stp = get_or_create_statperiod(cnx, start, stop)
for hit_type, rset in results.items():
for eid, count in rset:
content_entity = cnx.entity_from_eid(eid)
# FIXME if Hits for period and content exist, update it ?
cnx.create_entity('Hits', hit_type=hit_type, period=stp, count=count,
cnx.create_entity(
'Hits', hit_type=hit_type, period=stp, count=count,
stats_about=content_entity)
if progressbar:
pb.finish()
......@@ -21,6 +21,7 @@ except ImportError:
from cubicweb.web import action
from cubicweb.predicates import match_user_groups
class AwstatsAccessAction(action.Action):
__regid__ = 'awstats-action'
__select__ = match_user_groups('managers')
......@@ -41,4 +42,3 @@ class WebStatsAccessAction(action.Action):
def url(self):
return self._cw.build_url('view', vid='webstats')
......@@ -23,7 +23,9 @@ from cubicweb.view import EntityView
from cubicweb.web.views import primary, navigation
from cubicweb.predicates import is_instance
class StatPeriodPrimaryView(primary.PrimaryView):
"""
`column_types_aggr` enables you to combine results by type
......@@ -33,7 +35,7 @@ class StatPeriodPrimaryView(primary.PrimaryView):
('Card'))
"""
__select__ = is_instance('StatPeriod')
column_types_aggr = None #tuple of tuples of types
column_types_aggr = None # tuple of tuples of types
def cell_call(self, row, col):
_ = self._cw._
......@@ -41,10 +43,13 @@ class StatPeriodPrimaryView(primary.PrimaryView):
self.w(u'<div id="primarystatperiod">')
entity = self.cw_rset.get_entity(row, col)
self.w(u'<h1>%s %s - %s (%s %s)</h1>' % (_('Statistics for period :'),
entity.printable_value('start'),
entity.printable_value('stop'),
(entity.stop - entity.start).days,
_('days')) )
entity.printable_value(
'start'),
entity.printable_value(
'stop'),
(entity.stop -
entity.start).days,
_('days')))
rset = self._cw.execute('DISTINCT Any C WHERE X is Hits, X hit_type C')
self.w(u'<a href="%s">%s</a>' % (entity.absolute_url(showall=1),
_('show all results')))
......@@ -52,28 +57,30 @@ class StatPeriodPrimaryView(primary.PrimaryView):
self.w(u'<h3>%s</h3>' % hit_type)
rql = 'Any X, C ORDERBY C DESC %(limit)s WHERE H stats_about X, H hit_type "%(type)s",'\
'H count C, H period P, P eid %%(e)s' % {'type': hit_type,
'limit': req.form.get('showall') and ' ' or 'LIMIT 20' }
'limit': req.form.get('showall') and ' ' or 'LIMIT 20'}
if self.column_types_aggr:
self.w(u'<table><tr>')
for types in self.column_types_aggr:
self.w(u'<td>')
typedrql = rql + ', X is in (%s)' % ','.join(types)
rset = self._cw.execute(typedrql, {'e':entity.eid})
rset = self._cw.execute(typedrql, {'e': entity.eid})
self.wview('table', rset, 'null')
# cf rql/editextensions.py unset_limit
nolimit_rql = typedrql.replace('LIMIT 20', '')
self.w(u'<a href="%s">Export CSV</a>' % xml_escape(self._cw.build_url('', rql=nolimit_rql % {'e':entity.eid},
vid='csvexport')))
#FIXME TODO not working right now
self.w(
u'<a href="%s">Export CSV</a>' % xml_escape(self._cw.build_url('', rql=nolimit_rql % {'e': entity.eid},
vid='csvexport')))
# FIXME TODO not working right now
self.wview('piechart', rset, 'null')
self.w(u'</td>')
self.w(u'</tr></table>')
else:
rset = self._cw.execute(rql, {'e':entity.eid})
rset = self._cw.execute(rql, {'e': entity.eid})
self.wview('table', rset, 'null')
nolimit_rql = rql.replace('LIMIT 20', '')
self.w(u'<a href="%s">Export CSV</a>' % xml_escape(self._cw.build_url('', rql=nolimit_rql % {'e':entity.eid},
vid='csvexport')))
self.w(
u'<a href="%s">Export CSV</a>' % xml_escape(self._cw.build_url('', rql=nolimit_rql % {'e': entity.eid},
vid='csvexport')))
self.w(u'</div>')
......@@ -83,16 +90,18 @@ class StatPeriodIPrevNextAdapter(navigation.IPrevNextAdapter):
def previous_entity(self):
entity = self.entity
execute = self._cw.execute
rset = execute("StatPeriod P ORDERBY S DESC LIMIT 1 WHERE P start S, P2 start S2, P2 eid %(e)s HAVING S < S2",
{'e':entity.eid})
rset = execute(
"StatPeriod P ORDERBY S DESC LIMIT 1 WHERE P start S, P2 start S2, P2 eid %(e)s HAVING S < S2",
{'e': entity.eid})
if rset:
return rset.get_entity(0, 0)
def next_entity(self):
entity = self.entity
execute = self._cw.execute
rset = execute("StatPeriod P ORDERBY S LIMIT 1 WHERE P start S, P2 start S2, P2 eid %(e)s HAVING S > S2",
{'e':entity.eid})
rset = execute(
"StatPeriod P ORDERBY S LIMIT 1 WHERE P start S, P2 start S2, P2 eid %(e)s HAVING S > S2",
{'e': entity.eid})
if rset:
return rset.get_entity(0, 0)
......@@ -108,13 +117,14 @@ class StatGraph(EntityView):
entity.dc_title()))
self.w(u'<div id="webstats">')
rql = ('DISTINCT Any S, HITS ORDERBY S WITH S, HITS BEING ('
'(Any START, HITS WHERE H count HITS, H hit_type "normal", H period P, P start START, P stop STOP, H stats_about E, E eid %%(e)s %s)'
' UNION '
'(Any START, 0 WHERE P is StatPeriod, P start START, P stop STOP, NOT EXISTS(H period P, H stats_about E, E eid %%(e)s) %s))')
'(Any START, HITS WHERE H count HITS, H hit_type "normal", H period P, P start START, P stop STOP, H stats_about E, E eid %%(e)s %s)'
' UNION '
'(Any START, 0 WHERE P is StatPeriod, P start START, P stop STOP, NOT EXISTS(H period P, H stats_about E, E eid %%(e)s) %s))')
plot_displayed = False
for constraint, label in ((' HAVING STOP-START <= 20', _('Daily')),
(' HAVING STOP-START >= 20', _('Monthly'))):
rset = self._cw.execute(rql % (constraint, constraint), {'e':entity.eid})
rset = self._cw.execute(
rql % (constraint, constraint), {'e': entity.eid})
if rset:
self.w(u'<h2>%s</h2>' % label)
self.w(self._cw.view('plot', rset, 'null'))
......
......@@ -37,27 +37,31 @@ from cubicweb.web.formfields import StringField, DateField
from cubicweb.web import formwidgets as fwdgs, httpcache
from cubicweb_awstats.utils import SECTIONSPEC, SECTIONLABELS, \
extract_stats_dict, ORIGIN_LABELS
extract_stats_dict, ORIGIN_LABELS
def extract_available_time_periods(form, **attrs):
""" extract available time periods from list of awstats files """
periods = []
selected_domain = form._cw.form.get('domain', form._cw.vreg.config['awstats-domain'])
selected_domain = form._cw.form.get('domain',
form._cw.vreg.config['awstats-domain'])
awstats_dir = form._cw.vreg.config['awstats-dir']
periodicity = form._cw.vreg.config['awstats-periodicity']
size = {
'hour':10,
'day':8,
'month':6,
}
'hour': 10,
'day': 8,
'month': 6,
}
for filename in os.listdir(awstats_dir):
match = re.search('awstats(\d{%s})\.?%s.txt' % (size[periodicity], selected_domain),
match = re.search(
'awstats(\d{%s})\.?%s.txt' % (size[periodicity], selected_domain),
filename)
if match:
periods.append((specific_format('time_period', match.group(1)),
match.group(1)))
return sorted(periods)
def extract_available_domains(form, **attrs):
""" extract available domains from list of awstats files """
domains = []
......@@ -68,6 +72,7 @@ def extract_available_domains(form, **attrs):
domains.append(match.group(3))
return sorted(domains)
def use_as_sort_key(value):
""" use value as sort value, try it as an int, else just use value """
try:
......@@ -82,12 +87,12 @@ def specific_format(header, value):
return
elif header == 'bandwidth':
return convert_to_bytes(int(value))
elif header == 'time_period' and len(value) in (6,8,10):
if len(value) == 8: # day
elif header == 'time_period' and len(value) in (6, 8, 10):
if len(value) == 8: # day
return datetime.strptime(value, '%m%Y%d').strftime('%Y/%m/%d')
elif len(value) == 6: # month
elif len(value) == 6: # month
return datetime.strptime(value, '%m%Y').strftime('%Y/%m')
elif len(value) == 10: # hour
elif len(value) == 10: # hour
return datetime.strptime(value, '%m%Y%d%H').strftime('%Y/%m/%d %H:00')
elif value and value.startswith('http://'):
return '<a href="%s">%s</a>' % (value, value)
......@@ -100,6 +105,7 @@ def specific_format(header, value):
pass
return xml_escape(urllib.unquote(value).decode('utf8'))
def convert_to_bytes(value):
""" display bandwidth data using a human readable notation """
ordered = [(size, label) for label, size in BYTE_UNITS.items()]
......@@ -108,29 +114,37 @@ def convert_to_bytes(value):
if value / size != 0:
return '%s %s' % (value / size, label)
class AwstatsRefreshForm(forms.FieldsForm):
"""Form to filter and select what stats are being displayed"""
__regid__ = 'select-awstats'
domain = StringField(widget=fwdgs.Select(attrs={'onchange':'this.form.submit()'}),
domain = StringField(
widget=fwdgs.Select(attrs={'onchange': 'this.form.submit()'}),
label=_('Domain:'),
choices=extract_available_domains)
# TODO - use calendar widget
time_period = StringField(widget=fwdgs.Select(attrs={'onchange':'this.form.submit()'}),
time_period = StringField(
widget=fwdgs.Select(attrs={'onchange': 'this.form.submit()'}),
label=_('Period:'),
choices=extract_available_time_periods)
limit = StringField(widget=fwdgs.Select(attrs={'onchange':'this.form.submit()'}),
limit = StringField(
widget=fwdgs.Select(attrs={'onchange': 'this.form.submit()'}),
label=_('Number of results :'),
choices=[u'%s' % i for i in (10,25,50,100)])
section = StringField(widget=fwdgs.Select(attrs={'onchange':'this.form.submit()'}),
choices=[u'%s' % i for i in (10, 25, 50, 100)])
section = StringField(
widget=fwdgs.Select(attrs={'onchange': 'this.form.submit()'}),
label=_('Show section :'),
choices=[('',''),]+[(label, value) for value, label in SECTIONLABELS.items()])
choices=[('', ''), ] + [(label, value) for value, label in SECTIONLABELS.items()])
form_buttons = [fwdgs.SubmitButton(label=_('Apply'))]
@property
def action(self):
return self._cw.build_url('', vid='awstats')
class AwstatsView(StartupView):
""" Simple HTML export of the stats in awstats files """
__regid__ = 'awstats'
......@@ -143,7 +157,8 @@ class AwstatsView(StartupView):
form.render(w=self.w)
domain = req.form.get('domain', '')
time_period = req.form.get('time_period', extract_available_time_periods(form)[0][1])
time_period = req.form.get('time_period', extract_available_time_periods(
form)[0][1])
limit = int(req.form.get('limit', 10))
filename = 'awstats%s%s.txt' % (time_period, domain and '.%s' % domain)
......@@ -175,7 +190,8 @@ class AwstatsView(StartupView):
self.w(u'<table id="navigation">')
for key in SECTIONSPEC.keys():
if key in stats_dict.keys() and stats_dict[key].values():
self.w(u'<tr><td><a href="#%s">%s</a></td></tr>' % (key, _(SECTIONLABELS[key])))
self.w(u'<tr><td><a href="#%s">%s</a></td></tr>' %
(key, _(SECTIONLABELS[key])))
self.w(u'</table>')
self.w(u'</div>')
......@@ -196,12 +212,14 @@ class AwstatsView(StartupView):
self.w(u'<tr>')
for tdindex, header in enumerate(SECTIONSPEC[section_name]):
if tdindex:
self.w(u'<td class="data">%s</td>' % specific_format(header, item.get(header)))
self.w(u'<td class="data">%s</td>' %
specific_format(header, item.get(header)))
elif header == 'origin':
self.w(u'<td scope="row">%s</td>' % specific_format(heade