Commit 88de6622 authored by Arthur Lutz's avatar Arthur Lutz
Browse files

compress old hits into monthly periods

parent f4e17f7eb392
......@@ -16,6 +16,9 @@
import re
import os.path as osp
from datetime import datetime
from logilab.common.date import previous_month, first_day
from logilab.common.shellutils import ProgressBar
SECTIONSPEC = {
# commented sections are not usefull to view
......@@ -142,3 +145,52 @@ def eid_from_url(value):
return int(match.group(1))
except:
pass
def get_or_create_statperiod(session, start, stop):
rql = 'Any P WHERE P is StatPeriod, P start "%(start_date)s", P stop "%(end_date)s"'
rset = session.execute(rql %
{'start_date':start,
'end_date':stop})
if rset:
return rset.get_entity(0, 0)
else:
return session.create_entity('StatPeriod', start=start, stop=stop)
def compress_old_hits(req, update_stats={}):
# TODO roll complete 12 months into a year
pb = ProgressBar(16, 55, title='Compressing old stats')
for monthsbefore in range(4,20):
pb.update()
stop = previous_month(datetime.now(), monthsbefore)
start = first_day(stop)
rql = 'Any E, SUM(C) GROUPBY E WHERE X is Hits, X count C, X hit_type %(hit_type)s,'\
'X period P, P start >= %(start)s, P stop <= %(stop)s, X stats_about E,'\
'S start START, S stop STOP HAVING STOP-START <= 20'
results = {}
type_rset = req.execute('Any C GROUPBY C WHERE X is Hits, X hit_type C')
for hit_type in type_rset:
results[hit_type[0]] = req.execute(rql, {'start': start,
'stop': stop,
'hit_type':hit_type[0]})
if not any(results.values()):
continue
req.execute('DELETE StatPeriod P WHERE P start >= %(start)s, P stop <= %(stop)s',
{'start': start,
'stop': stop,})
stp = get_or_create_statperiod(req, start, stop)
created_entities = []
for hit_type, rset in results.items():
for eid, count in rset:
print 'creating', hit_type, start, stop, count, eid
content_entity = req.entity_from_eid(eid)
created_entities.append(req.create_entity('Hits', hit_type=hit_type, period=stp, count=count,
stats_about=content_entity))
delete_rql = 'DISTINCT Hits X WHERE X period P, P start >= %(start)s, P stop <= %(stop)s, S start START, S stop STOP HAVING STOP-START <= 20'
rset = req.execute(delete_rql, {'start': start,
'stop': stop})
created_eids = [x.eid for x in created_entities]
for e in rset.entities():
if e.eid not in created_eids:
e.cw_delete()
update_stats['compressed'] += 1
pb.finish()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment