Commit 17a02793 authored by Laurent Peuch's avatar Laurent Peuch

fix: please the flake8 god and fix to errors on the way

parent f8e4967d7639
Pipeline #15823 passed with stage
in 1 minute and 7 seconds
......@@ -7,14 +7,14 @@ This command will generate webstats objects for all linked document types.
"""
from __future__ import print_function
import sys
import os.path as osp
from datetime import datetime, timedelta
from logilab.common.date import first_day, last_day, date_range, ONEDAY
from logilab.common.shellutils import ProgressBar
from cubicweb import cwconfig, UnknownEid
from cubicweb import AuthenticationError
from cubicweb import UnknownEid
from cubicweb.utils import admincnx
from cubicweb.toolsutils import Command
......@@ -98,7 +98,6 @@ class StatsUpdater(object):
date_formats = {'month': '%m%Y',
'day': '%m%Y%d',
'hour': '%m%Y%d%H'}
domain = config['awstats-domain']
if config['awstats-domain']:
domain_ext = '.' + config['awstats-domain']
else:
......@@ -157,7 +156,6 @@ class StatsUpdater(object):
def _update_hits_for_eid(self, eid, values, stats_period, hit_type):
self.cnx.commit()
visit_count = visit_count_rdf = 0
total_hits = sum([item[0] for item in values])
try:
entity = self.cnx.entity_from_eid(eid)
......@@ -171,7 +169,8 @@ class StatsUpdater(object):
status = 'created'
hit = self.cnx.create_entity(
'Hits', count=total_hits, hit_type=hit_type,
period=stats_period, stats_about=entity)
period=stats_period, stats_about=entity
)
# append it to the cache
self.all_hits[(eid, stats_period.eid, hit_type)] = hit
else:
......@@ -247,4 +246,5 @@ Number of stat objects ignored : %(ignored)s
Number of stat objects compressed : %(compressed)s
''' % stats_report)
CWCTL.register(UpdateWebstatsCommand)
......@@ -16,17 +16,14 @@
"""cubicweb-awstats schema"""
from yams.buildobjs import (
EntityType, String, Int, BigInt, Date, Datetime, Boolean,
SubjectRelation, RelationDefinition, RelationType)
from cubicweb import _
from yams.buildobjs import EntityType, String, Int, Date, RelationType
MANAGER_PERMISSIONS = {
'read': ('managers', ),
'update': ('managers', 'owners',),
'delete': ('managers', 'owners'),
'add': ('managers',)
'update': ('managers', 'owners',),
'delete': ('managers', 'owners'),
'add': ('managers',)
}
......
......@@ -14,18 +14,16 @@
# You should have received a copy of the GNU Lesser General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
import re
import os.path as osp
from datetime import datetime, date
from datetime import datetime
from logilab.common.date import previous_month, first_day, date_range, last_day
from logilab.common.date import previous_month, date_range, last_day
from logilab.common.shellutils import ProgressBar
try:
from cubicweb import _
except ImportError:
_ = unicode
from cubicweb.req import FindEntityError
from cubicweb.web import NotFound, Redirect
from cubicweb.devtools.fake import FakeRequest
......@@ -35,8 +33,10 @@ SECTIONSPEC = {
# commented sections are not usefull to view
# 'MAP' : ['section', 'offset'],
# 'GENERAL': ['key', None],
'TIME': ['hour', 'pages', 'hits', 'bandwidth', 'not viewed pages', 'not viewed hits', 'not viewed bandwidth'],
'VISITOR': ['host', 'pages', 'hits', 'bandwidth', 'last visit date', 'start date of last visit', 'last page of last visit'],
'TIME': ['hour', 'pages', 'hits', 'bandwidth', 'not viewed pages', 'not viewed hits',
'not viewed bandwidth'],
'VISITOR': ['host', 'pages', 'hits', 'bandwidth', 'last visit date', 'start date of last visit',
'last page of last visit'],
'DAY': ['date', 'pages', 'hits', 'bandwidth', 'visits'],
'DOMAIN': ['domain', 'pages', 'hits', 'bandwidth'],
'LOGIN': ['cluster id', 'pages', 'hits', 'bandwidth', 'last visit date'],
......@@ -46,7 +46,8 @@ SECTIONSPEC = {
'EMAILRECEIVER': ['email', 'hits', 'bandwidth', 'last visit'],
'SESSION': ['session range', 'hits'],
'SIDER': ['most visited URLs', 'hits', 'bandwidth', 'entry', 'exit'],
'FILETYPES': ['served files type', 'hits', 'bandwidth', 'bandwidth without compression', 'bandwidth after compression'],
'FILETYPES': ['served files type', 'hits', 'bandwidth', 'bandwidth without compression',
'bandwidth after compression'],
'OS': ['operating systems', 'hits'],
'BROWSER': ['browser id', 'hits'],
'SCREENSIZE': ['screen size', 'hits'],
......@@ -57,7 +58,7 @@ SECTIONSPEC = {
'PAGEREFS': ['external page referers', 'pages', 'hits'],
'SEARCHWORDS': ['main search keyphrases', 'hits'],
'KEYWORDS': ['main search keyword', 'hits'],
#'MISC': ['misc id', 'pages', 'hits', 'bandwidth'],
# 'MISC': ['misc id', 'pages', 'hits', 'bandwidth'],
'ERRORS': ['errors', 'hits', 'bandwidth'],
'CLUSTER': ['cluster id', 'pages', 'hits', 'bandwidth'],
'SIDER_404': ['urls with 404 errors', 'hits', 'last url referer'],
......@@ -87,7 +88,7 @@ SECTIONLABELS = {
'PAGEREFS': _('Main external page referers'),
'SEARCHWORDS': _('Hits from search keyphrases'),
'KEYWORDS': _('Hits from search keywords'),
#'MISC': ['misc id'), 'pages'), 'hits'), 'bandwidth'],
# 'MISC': ['misc id'), 'pages'), 'hits'), 'bandwidth'],
'ERRORS': _('HTTP Status codes'),
'CLUSTER': _('Visits by cluster id'),
'SIDER_404': _('Hits with 404 errors'),
......@@ -124,7 +125,7 @@ def extract_stats_dict(filepath):
section_name = None
parsed_countdown = 0
stats_dict = {}
for line in file(filepath):
for line in open(filepath):
if line.startswith('BEGIN_'):
section_name, nb_of_lines = line.split('_', 1)[1].split()
if section_name in SECTIONSPEC:
......@@ -180,9 +181,11 @@ def get_or_create_statperiod(cnx, start, stop, stats_report={}):
def time_params(cnx):
params = []
rset = cnx.execute(
'Any START ORDERBY START LIMIT 1 WHERE P is StatPeriod, P start START, P stop STOP HAVING STOP-START <= 2')
'Any START ORDERBY START LIMIT 1 WHERE P is StatPeriod, P start START, P stop STOP HAVING '
'STOP-START <= 2')
for (item,) in rset:
for first_day in date_range(previous_month(item), previous_month(datetime.now(), 5), incmonth=True):
for first_day in date_range(previous_month(item), previous_month(datetime.now(), 5),
incmonth=True):
delta = 2
params.append((first_day, last_day(first_day), delta))
# TODO - roll complete 12 months into a year
......@@ -196,9 +199,11 @@ def compress_old_hits(cnx, update_stats={}, progressbar=True):
for start, stop, delta in tp:
if progressbar:
pb.update()
rql = 'DISTINCT Any E,SUM(C) GROUPBY E WHERE H is Hits, H count C, H hit_type %(hit_type)s,'\
'H period P, P start >= %(start)s, P stop <= %(stop)s, H stats_about E,'\
'P start START, P stop STOP HAVING STOP-START <= %(timedelta)s'
rql = (
'DISTINCT Any E,SUM(C) GROUPBY E WHERE H is Hits, H count C, H hit_type %(hit_type)s,'
'H period P, P start >= %(start)s, P stop <= %(stop)s, H stats_about E,'
'P start START, P stop STOP HAVING STOP-START <= %(timedelta)s'
)
results = {}
type_rset = cnx.execute('DISTINCT Any C WHERE X is Hits, X hit_type C')
for (hit_type,) in type_rset:
......@@ -212,8 +217,8 @@ def compress_old_hits(cnx, update_stats={}, progressbar=True):
# deleting statperiod deletes all associated hits
drset = cnx.execute(
'DELETE StatPeriod P WHERE P start >= %(start)s, P stop <= %(stop)s',
{'start': start,
'stop': stop, })
{'start': start,
'stop': stop, })
update_stats['compressed'] += len(drset)
stp = get_or_create_statperiod(cnx, start, stop)
for hit_type, rset in results.items():
......@@ -222,6 +227,6 @@ def compress_old_hits(cnx, update_stats={}, progressbar=True):
# FIXME if Hits for period and content exist, update it ?
cnx.create_entity(
'Hits', hit_type=hit_type, period=stp, count=count,
stats_about=content_entity)
stats_about=content_entity)
if progressbar:
pb.finish()
......@@ -18,7 +18,6 @@
from logilab.mtconverter import xml_escape
from cubicweb.utils import RepeatList
from cubicweb.view import EntityView
from cubicweb.web.views import primary, navigation
from cubicweb.predicates import is_instance
......@@ -47,8 +46,7 @@ class StatPeriodPrimaryView(primary.PrimaryView):
'start'),
entity.printable_value(
'stop'),
(entity.stop -
entity.start).days,
(entity.stop - entity.start).days,
_('days')))
rset = self._cw.execute('DISTINCT Any C WHERE X is Hits, X hit_type C')
self.w(u'<a href="%s">%s</a>' % (entity.absolute_url(showall=1),
......@@ -57,7 +55,8 @@ class StatPeriodPrimaryView(primary.PrimaryView):
self.w(u'<h3>%s</h3>' % hit_type)
rql = 'Any X, C ORDERBY C DESC %(limit)s WHERE H stats_about X, H hit_type "%(type)s",'\
'H count C, H period P, P eid %%(e)s' % {'type': hit_type,
'limit': req.form.get('showall') and ' ' or 'LIMIT 20'}
'limit': req.form.get('showall') and ' '
or 'LIMIT 20'}
if self.column_types_aggr:
self.w(u'<table><tr>')
for types in self.column_types_aggr:
......@@ -67,9 +66,9 @@ class StatPeriodPrimaryView(primary.PrimaryView):
self.wview('table', rset, 'null')
# cf rql/editextensions.py unset_limit
nolimit_rql = typedrql.replace('LIMIT 20', '')
self.w(
u'<a href="%s">Export CSV</a>' % xml_escape(self._cw.build_url('', rql=nolimit_rql % {'e': entity.eid},
vid='csvexport')))
self.w(u'<a href="%s">Export CSV</a>' % xml_escape(
self._cw.build_url('', rql=nolimit_rql % {'e': entity.eid},
vid='csvexport')))
# FIXME TODO not working right now
self.wview('piechart', rset, 'null')
self.w(u'</td>')
......@@ -78,9 +77,9 @@ class StatPeriodPrimaryView(primary.PrimaryView):
rset = self._cw.execute(rql, {'e': entity.eid})
self.wview('table', rset, 'null')
nolimit_rql = rql.replace('LIMIT 20', '')
self.w(
u'<a href="%s">Export CSV</a>' % xml_escape(self._cw.build_url('', rql=nolimit_rql % {'e': entity.eid},
vid='csvexport')))
self.w(u'<a href="%s">Export CSV</a>' % xml_escape(
self._cw.build_url('', rql=nolimit_rql % {'e': entity.eid},
vid='csvexport')))
self.w(u'</div>')
......@@ -91,8 +90,8 @@ class StatPeriodIPrevNextAdapter(navigation.IPrevNextAdapter):
entity = self.entity
execute = self._cw.execute
rset = execute(
"StatPeriod P ORDERBY S DESC LIMIT 1 WHERE P start S, P2 start S2, P2 eid %(e)s HAVING S < S2",
{'e': entity.eid})
"StatPeriod P ORDERBY S DESC LIMIT 1 WHERE P start S, P2 start S2, P2 eid %(e)s "
"HAVING S < S2", {'e': entity.eid})
if rset:
return rset.get_entity(0, 0)
......@@ -100,8 +99,8 @@ class StatPeriodIPrevNextAdapter(navigation.IPrevNextAdapter):
entity = self.entity
execute = self._cw.execute
rset = execute(
"StatPeriod P ORDERBY S LIMIT 1 WHERE P start S, P2 start S2, P2 eid %(e)s HAVING S > S2",
{'e': entity.eid})
"StatPeriod P ORDERBY S LIMIT 1 WHERE P start S, P2 start S2, P2 eid %(e)s HAVING "
"S > S2", {'e': entity.eid})
if rset:
return rset.get_entity(0, 0)
......@@ -117,9 +116,11 @@ class StatGraph(EntityView):
entity.dc_title()))
self.w(u'<div id="webstats">')
rql = ('DISTINCT Any S, HITS ORDERBY S WITH S, HITS BEING ('
'(Any START, HITS WHERE H count HITS, H hit_type "normal", H period P, P start START, P stop STOP, H stats_about E, E eid %%(e)s %s)'
'(Any START, HITS WHERE H count HITS, H hit_type "normal", H period P, P start '
'START, P stop STOP, H stats_about E, E eid %%(e)s %s)'
' UNION '
'(Any START, 0 WHERE P is StatPeriod, P start START, P stop STOP, NOT EXISTS(H period P, H stats_about E, E eid %%(e)s) %s))')
'(Any START, 0 WHERE P is StatPeriod, P start START, P stop STOP, NOT EXISTS(H '
'period P, H stats_about E, E eid %%(e)s) %s))')
plot_displayed = False
for constraint, label in ((' HAVING STOP-START <= 20', _('Daily')),
(' HAVING STOP-START >= 20', _('Monthly'))):
......
......@@ -54,8 +54,7 @@ def extract_available_time_periods(form, **attrs):
}
for filename in os.listdir(awstats_dir):
match = re.search(
'awstats(\d{%s})\.?%s.txt' % (size[periodicity], selected_domain),
filename)
r'awstats(\d{%s})\.?%s.txt' % (size[periodicity], selected_domain), filename)
if match:
periods.append((specific_format('time_period', match.group(1)),
match.group(1)))
......@@ -67,7 +66,7 @@ def extract_available_domains(form, **attrs):
domains = []
awstats_dir = form._cw.vreg.config['awstats-dir']
for filename in os.listdir(awstats_dir):
match = re.search('awstats(\d{2})(\d{4})\.?(.*).txt', filename)
match = re.search(r'awstats(\d{2})(\d{4})\.?(.*).txt', filename)
if match and match.group(3) not in domains:
domains.append(match.group(3))
return sorted(domains)
......@@ -96,9 +95,9 @@ def specific_format(header, value):
return datetime.strptime(value, '%m%Y%d%H').strftime('%Y/%m/%d %H:00')
elif value and value.startswith('http://'):
return '<a href="%s">%s</a>' % (value, value)
elif re.search('^\d{14}$', value):
elif re.search(r'^\d{14}$', value):
return datetime.strptime(value, '%Y%m%d%H%M%S%f').strftime('%d/%m/%Y %H:%M')
elif re.search('^\d{8}$', value):
elif re.search(r'^\d{8}$', value):
try:
return datetime.strptime(value, '%Y%m%d').strftime('%d/%m/%Y')
except ValueError:
......@@ -121,21 +120,21 @@ class AwstatsRefreshForm(forms.FieldsForm):
__regid__ = 'select-awstats'
domain = StringField(
widget=fwdgs.Select(attrs={'onchange': 'this.form.submit()'}),
label=_('Domain:'),
choices=extract_available_domains)
label=_('Domain:'),
choices=extract_available_domains)
# TODO - use calendar widget
time_period = StringField(
widget=fwdgs.Select(attrs={'onchange': 'this.form.submit()'}),
label=_('Period:'),
choices=extract_available_time_periods)
label=_('Period:'),
choices=extract_available_time_periods)
limit = StringField(
widget=fwdgs.Select(attrs={'onchange': 'this.form.submit()'}),
label=_('Number of results :'),
choices=[u'%s' % i for i in (10, 25, 50, 100)])
label=_('Number of results :'),
choices=[u'%s' % i for i in (10, 25, 50, 100)])
section = StringField(
widget=fwdgs.Select(attrs={'onchange': 'this.form.submit()'}),
label=_('Show section :'),
choices=[('', ''), ] + [(label, value) for value, label in SECTIONLABELS.items()])
label=_('Show section :'),
choices=[('', ''), ] + [(label, value) for value, label in SECTIONLABELS.items()])
form_buttons = [fwdgs.SubmitButton(label=_('Apply'))]
@property
......@@ -215,8 +214,8 @@ class AwstatsView(StartupView):
self.w(u'<td class="data">%s</td>' %
specific_format(header, item.get(header)))
elif header == 'origin':
self.w(u'<td scope="row">%s</td>' % specific_format(header,
ORIGIN_LABELS[item.get(header)]))
self.w(u'<td scope="row">%s</td>' %
specific_format(header, ORIGIN_LABELS[item.get(header)]))
else:
self.w(u'<td scope="row">%s</td>' %
specific_format(header, item.get(header)))
......@@ -236,7 +235,8 @@ class AwstatsView(StartupView):
else:
order_key = SECTIONSPEC[section_name][1]
reverse = True
return sorted(stats_dict[section_name].values(), reverse=reverse, key=lambda i: int(i[order_key]))
return sorted(stats_dict[section_name].values(), reverse=reverse,
key=lambda i: int(i[order_key]))
class WebStatsRefreshForm(forms.FieldsForm):
......@@ -286,24 +286,31 @@ class StatPeriodsView(StartupView):
self.w(u'<h1>%s</h1>' % _('Web stats'))
duration = datetime.strptime(
stop, '%Y/%m/%d') - datetime.strptime(start, '%Y/%m/%d')
self.w(
u'<h2>%s</h2>' % _('from %(start)s to %(stop)s (%(duration)s days)' % {'start': start,
'stop': stop,
'duration': duration.days}))
self.w(u'<h2>%s</h2>' % _('from %(start)s to %(stop)s (%(duration)s days)' % {
'start': start,
'stop': stop,
'duration': duration.days
}))
self.description()
self.w(
u'<h3><a href="%s">%s</a></h3>' % (self._cw.build_url(rql='Any X ORDERBY S WHERE X is StatPeriod, X start S, X stop E HAVING E-S >= 20'),
_('Navigate previous statistics by month')))
u'<h3><a href="%s">%s</a></h3>' %
(self._cw.build_url(rql='Any X ORDERBY S WHERE X is StatPeriod, X start S, X stop E '
'HAVING E-S >= 20'),
_('Navigate previous statistics by month')))
rset = self._cw.execute('DISTINCT Any T WHERE X is Hits, X hit_type T')
for index, hit_type in enumerate(rset):
self.w(u'<h3>%s</h3>' % hit_type[0])
rql = 'Any X, SUM(C) GROUPBY X ORDERBY 2 DESC %(limit)s WHERE H stats_about X, ' \
'H hit_type "%(type)s", H count C, H period P, P start >= "%(start)s", P stop <= "%(stop)s" '\
'' % {'type': hit_type[0],
'limit': 'LIMIT %s' % limit,
'start': start,
'stop': stop,
}
rql = (
'Any X, SUM(C) GROUPBY X ORDERBY 2 DESC %(limit)s WHERE H stats_about X, '
'H hit_type "%(type)s", H count C, H period P, P start >= "%(start)s", '
'P stop <= "%(stop)s" '
'' % {
'type': hit_type[0],
'limit': 'LIMIT %s' % limit,
'start': start,
'stop': stop,
}
)
if self.column_types_aggr:
self.w(u'<table class="webstats"><tr>')
for etypes in self.column_types_aggr:
......@@ -312,18 +319,18 @@ class StatPeriodsView(StartupView):
rset = self._cw.execute(typedrql)
self.generate_table_form(rset, etypes)
nolimit_rql = typedrql.replace('LIMIT %s' % limit, '')
self.w(
u'<a href="%s">Export CSV</a>' % xml_escape(self._cw.build_url(rql=nolimit_rql,
vid='csvexport')))
self.w(u'<a href="%s">Export CSV</a>' %
xml_escape(self._cw.build_url(rql=nolimit_rql,
vid='csvexport')))
self.w(u'</td>')
self.w(u'</tr></table>')
else:
rset = self._cw.execute(rql)
self.generate_table_form(rset)
nolimit_rql = rql.replace('LIMIT %s' % limit, '')
self.w(
u'<a href="%s">Export CSV</a>' % xml_escape(self._cw.build_url(rql=nolimit_rql,
vid='csvexport')))
self.w(u'<a href="%s">Export CSV</a>' %
xml_escape(self._cw.build_url(rql=nolimit_rql,
vid='csvexport')))
self.w(u'</div>')
def generate_table_form(self, rset, etypes=None):
......
......@@ -17,12 +17,12 @@ class CompressTest(CubicWebTC):
for day in date.date_range(datetime.now() - timedelta(200), datetime.now()):
stp = cnx.create_entity(
'StatPeriod', start=day, stop=day + timedelta(1))
hit = cnx.create_entity('Hits', hit_type=u'normal',
count=random.choice(range(2000)),
period=stp, stats_about=content)
cnx.create_entity('Hits', hit_type=u'normal',
count=random.choice(range(2000)),
period=stp, stats_about=content)
self.assertEqual(len(cnx.execute('Any X WHERE X is Hits')), 200)
self.assertEqual(
len(cnx.execute('Any P WHERE P is StatPeriod, P start S, P stop E HAVING E-S >= 27')), 0)
self.assertEqual(len(cnx.execute('Any P WHERE P is StatPeriod, P start S, P stop E '
'HAVING E-S >= 27')), 0)
compress_old_hits(cnx, update_stats)
# XXX SQLite bug ?
# self.assertEqual(len(req.execute('Any P WHERE P is StatPeriod, P
......
......@@ -9,6 +9,7 @@ commands =
{envpython} -m pytest {posargs:test}
[testenv:flake8]
basepython = python2
skip_install = true
deps =
flake8
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment