ccplugin.py 8.51 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
# -*- coding: utf-8 -*-
"""update-webstats cubicweb plugin

Usage: cubicweb-ctl update-webstats [options] <instance-name>

This command will generate webstats objects for all linked document types.
"""

from datetime import datetime
from logilab.common.date import first_day, last_day

12
from cubicweb import cwconfig, UnknownEid
13
14
15
16
17
18
19
20
21
from cubicweb import AuthenticationError
from cubicweb.dbapi import in_memory_repo_cnx

from cubicweb.toolsutils import Command
from cubicweb.cwctl import CWCTL

from utils import SECTIONSPEC, extract_stats_dict, eid_from_url

def url_count_from_stats(stats_dict):
Arthur Lutz's avatar
Arthur Lutz committed
22
23
24
25
26
27
    '''
    parse most visited urls in stats_dict generated from awstats txt file

    returns two dictionnaries with eid as key and sequence of values as value
    one for normal navigation, the other for rdf navigation
    '''
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
    visit_count_dict = {}
    visit_count_rdf_dict = {}
    for item in stats_dict['SIDER'].values():
        url = item[SECTIONSPEC['SIDER'][0]]
        hits = int(item[SECTIONSPEC['SIDER'][1]])
        eid = eid_from_url(url)
        if not eid:
            continue
        if 'rdf' in url:
            visit_count_rdf_dict.setdefault(eid, [])
            visit_count_rdf_dict[eid].append((hits, url))
        else:
            visit_count_dict.setdefault(eid, [])
            visit_count_dict[eid].append((hits, url))
    return visit_count_dict, visit_count_rdf_dict


class UpdateWebstatsCommand(Command):
46
47
48
49
50
51
52
53
    """ Update web stats

    according to periodicity setting the input format for the date is different :

      * month 05/2011
      * day   15/05/2011
      * hour  15/05/2011-13h
    """
54
55
56
57
58

    arguments = '<instance>'
    name = 'update-webstats'
    min_args = 1

59
    def get_current_stats_period(self, session, chosendate, periodicity):
Arthur Lutz's avatar
Arthur Lutz committed
60
        """ return a statperiod for the current month, if it doesn't exist, create it """
61
        start, end = self.choose_period(chosendate, periodicity)
Arthur Lutz's avatar
Arthur Lutz committed
62
63
64
65
        rql = 'Any P WHERE P is StatPeriod, P start "%(start_date)s", P stop "%(end_date)s"'
        rset = session.execute(rql %
                               {'start_date':start,
                                'end_date':end})
66
        if rset:
Arthur Lutz's avatar
Arthur Lutz committed
67
            return rset.get_entity(0, 0)
68
69
70
        else:
            return session.create_entity('StatPeriod', start=start, stop=end)

71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
    def choose_period(self, chosendate, periodicity):
        if periodicity == 'month':
            start = first_day(chosendate)
            end = last_day(start)
        elif periodicity == 'day':
            start = datetime(chosendate.year, chosendate.month, chosendate.day)
            end = datetime(chosendate.year, chosendate.month, chosendate.day, 23, 59, 59)
        elif periodicity == 'hour':
            start = datetime(chosendate.year, chosendate.month, chosendate.day, chosendate.hour)
            end = datetime(chosendate.year, chosendate.month, chosendate.day, chosendate.hour, 59, 59)
        return start, end

    def choose_dateformat(self, periodicity):
        return {'hour':'%m%Y%d%H',
                'day': '%m%Y%d',
                'month': '%m%Y'}[periodicity]

88
    def update_stats(self, session, args):
Arthur Lutz's avatar
Arthur Lutz committed
89
90
        ''' parses awstats and creates or updates the corresponding
        data in the cubicweb instance'''
91
92
93
94
        awstatsdir = session.vreg.config.get('awstats-dir', '/var/lib/awstats')
        domain = session.vreg.config.get('awstats-domain', '')
        periodicity = session.vreg.config.get('awstats-periodicity', 'day') #FIXME s/day/month/
        assert periodicity in ('hour', 'day', 'month')
95
        if args:
96
97
98
99
100
101
            # FIXME - adapt according to periodicity
            input_format = {'month':'%m/%Y',
                            'day': '%d/%m/%Y',
                            'hour': '%d/%m/%Y-%Hh'}[periodicity]
            chosendate = datetime.strptime(args[0], input_format)
            # TODO - probably need a command to update stats from day X to day Y...
102
103
        else:
            chosendate = datetime.now()
104
105
106
107

        stats_period = self.get_current_stats_period(session, chosendate, periodicity)
        dateformat_in_file = self.choose_dateformat(periodicity)
        filename = 'awstats%s%s.txt' % (chosendate.strftime(dateformat_in_file), domain and '.%s' % domain)
108
109
110
        stats_dict = extract_stats_dict(awstatsdir, filename)
        normal_dict, rdf_dict = url_count_from_stats(stats_dict)
        is_rdf = False
Arthur Lutz's avatar
Arthur Lutz committed
111
112
        rql = 'Any N WHERE X relation_type R, R name "stats_about", X to_entity Y, Y name N'
        rset = session.execute(rql)
113
114
115
116
117
118
119
120
121
122
123
        allowed_types = [item[0] for item in rset]
        update_stats = {'updated':0,
                        'created':0,
                        'exists no change':0,
                        'skipped':0,
                        }
        for count_dict, is_rdf in ((normal_dict, False),
                                   (rdf_dict, True)):
            for eid, values in count_dict.items():
                visit_count = visit_count_rdf = 0
                total_hits = sum([item[0] for item in values])
124
125
126
127
128
                try:
                    entity = session.entity_from_eid(eid)
                except UnknownEid:
                    update_stats['skipped'] += 1
                    continue
129
130
131
                if not entity.__regid__ in allowed_types:
                    update_stats['skipped'] += 1
                    continue
Arthur Lutz's avatar
Arthur Lutz committed
132
133
                rql = 'Any X,V WHERE X is Hits, X count V, X hit_type "%(hit_type)s",' \
                      'X stats_about E, E eid %(e)s, X period P, P eid %(sp)s'
134
135
136
137
138
139
140
141
142
143
144
145
146
                rset = session.execute(rql % {'e':eid,
                                              'sp':stats_period.eid,
                                              'hit_type': is_rdf and 'rdf' or 'normal'})
                if rset:
                    if rset[0][1] != total_hits:
                        update_stats['updated'] += 1
                        session.execute('SET X count %(hits)s WHERE X eid %(e)s' %
                                         {'e':rset[0][0],
                                          'hits':total_hits})
                    else:
                        update_stats['exists no change'] += 1
                else:
                    update_stats['created'] += 1
Arthur Lutz's avatar
Arthur Lutz committed
147
148
149
150
                    session.create_entity('Hits', count = total_hits,
                                          period=stats_period,
                                          stats_about = entity,
                                          hit_type=is_rdf and u'rdf' or u'normal')
151
152
153
154
155
156
        print '''=== Update Report ===
Number of stat objects created :         %(created)s
Number of stat objects updated :         %(updated)s
Number of stat objects already existed : %(exists no change)s
Number of stat objects skipped :         %(skipped)s
        ''' % update_stats
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203


    def _init_cw_connection(self, appid):
        self.config = config = cwconfig.instance_configuration(appid)
        sourcescfg = config.sources()
        config.set_sources_mode(('system',))
        cnx = repo = None
        while cnx is None:
            try:
                login = sourcescfg['admin']['login']
                pwd = sourcescfg['admin']['password']
            except KeyError:
                login, pwd = manager_userpasswd()
            try:
                repo, cnx = in_memory_repo_cnx(config, login=login, password=pwd)
            except AuthenticationError:
                print 'wrong user/password'
            else:
                break
        session = repo._get_session(cnx.sessionid)
        # XXX keep reference on cnx otherwise cnx.__del__ will cause trouble
        #     (file a ticket)
        return cnx, session

    def main_run(self, args, rcfile=None):
        """Run the command and return status 0 if everything went fine.

        If :exc:`CommandError` is raised by the underlying command, simply log
        the error and return status 2.

        Any other exceptions, including :exc:`BadCommandUsage` will be
        propagated.
        """
        # XXX (adim): rcfile handling is spectacularly messy but I can't
        #             get it right without refactoring pivotdoc for now
        if rcfile is None:
            if '-c' in args:
                rcfile = args[args.index('-c')+1]
            elif '--config' in args:
                rcfile = args[args.index('--config')+1]
            else:
                rcfile = None#self.config.config
        return Command.main_run(self, args, rcfile)

    def run(self, args):
        appid = args.pop(0)
        cw_cnx, session = self._init_cw_connection(appid)
204
        session.set_cnxset()
205
206
207
208
209
210
        self.update_stats(session, args)
        session.commit()


CWCTL.register(UpdateWebstatsCommand)