startup.py 13.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# copyright 2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr -- mailto:contact@logilab.fr
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.

"""cubicweb-awstats startup views """


import os
21
import os.path as osp
22
import re
23
from datetime import datetime, timedelta
Arthur Lutz's avatar
Arthur Lutz committed
24
import urllib
25
26
27
28

from logilab.mtconverter import xml_escape
from logilab.common.textutils import BYTE_UNITS

Julien Cristau's avatar
Julien Cristau committed
29
30
31
32
try:
    from cubicweb import _
except ImportError:
    _ = unicode
33
34
from cubicweb.view import StartupView
from cubicweb.web.views import forms
35
from cubicweb.web.formfields import StringField, DateField
Arthur Lutz's avatar
Arthur Lutz committed
36
from cubicweb.web import formwidgets as fwdgs, httpcache
37

Arthur Lutz's avatar
Arthur Lutz committed
38
39
from cubes.awstats.utils import SECTIONSPEC, SECTIONLABELS, \
     extract_stats_dict, ORIGIN_LABELS
40

41
42
43
def extract_available_time_periods(form, **attrs):
    """ extract available time periods from list of awstats files """
    periods = []
44
    selected_domain = form._cw.form.get('domain', form._cw.vreg.config['awstats-domain'])
Arthur Lutz's avatar
Arthur Lutz committed
45
    awstats_dir = form._cw.vreg.config['awstats-dir']
46
47
48
49
50
51
    periodicity = form._cw.vreg.config['awstats-periodicity']
    size = {
        'hour':10,
        'day':8,
        'month':6,
        }
Arthur Lutz's avatar
Arthur Lutz committed
52
    for filename in os.listdir(awstats_dir):
53
54
        match = re.search('awstats(\d{%s})\.?%s.txt' % (size[periodicity], selected_domain),
                          filename)
55
        if match:
56
57
            periods.append((specific_format('time_period', match.group(1)),
                            match.group(1)))
58
    return sorted(periods)
59
60

def extract_available_domains(form, **attrs):
Arthur Lutz's avatar
Arthur Lutz committed
61
    """ extract available domains from list of awstats files """
62
    domains = []
Arthur Lutz's avatar
Arthur Lutz committed
63
64
    awstats_dir = form._cw.vreg.config['awstats-dir']
    for filename in os.listdir(awstats_dir):
65
66
67
        match = re.search('awstats(\d{2})(\d{4})\.?(.*).txt', filename)
        if match and match.group(3) not in domains:
            domains.append(match.group(3))
68
    return sorted(domains)
69
70

def use_as_sort_key(value):
Arthur Lutz's avatar
Arthur Lutz committed
71
    """ use value as sort value, try it as an int, else just use value """
72
73
74
75
76
77
78
    try:
        return int(value)
    except ValueError:
        return value


def specific_format(header, value):
Arthur Lutz's avatar
Arthur Lutz committed
79
    """ guess from a header and value how to display it"""
80
81
82
83
    if value is None:
        return
    elif header == 'bandwidth':
        return convert_to_bytes(int(value))
84
85
86
87
88
89
90
    elif header == 'time_period' and len(value) in (6,8,10):
        if len(value) == 8: # day
            return datetime.strptime(value, '%m%Y%d').strftime('%Y/%m/%d')
        elif len(value) == 6: # month
            return datetime.strptime(value, '%m%Y').strftime('%Y/%m')
        elif len(value) == 10: # hour
            return datetime.strptime(value, '%m%Y%d%H').strftime('%Y/%m/%d %H:00')
91
92
93
94
95
96
97
98
99
    elif value and value.startswith('http://'):
        return '<a href="%s">%s</a>' % (value, value)
    elif re.search('^\d{14}$', value):
        return datetime.strptime(value, '%Y%m%d%H%M%S%f').strftime('%d/%m/%Y %H:%M')
    elif re.search('^\d{8}$', value):
        try:
            return datetime.strptime(value, '%Y%m%d').strftime('%d/%m/%Y')
        except ValueError:
            pass
Arthur Lutz's avatar
Arthur Lutz committed
100
    return xml_escape(urllib.unquote(value).decode('utf8'))
101
102

def convert_to_bytes(value):
Arthur Lutz's avatar
Arthur Lutz committed
103
104
    """ display bandwidth data using a human readable notation """
    ordered = [(size, label) for label, size in BYTE_UNITS.items()]
105
106
107
108
109
110
    ordered.sort(reverse=True)
    for size, label in ordered:
        if value / size != 0:
            return '%s %s' % (value / size, label)

class AwstatsRefreshForm(forms.FieldsForm):
Arthur Lutz's avatar
Arthur Lutz committed
111
    """Form to filter and select what stats are being displayed"""
112
113
114
115
    __regid__ = 'select-awstats'
    domain = StringField(widget=fwdgs.Select(attrs={'onchange':'this.form.submit()'}),
                         label=_('Domain:'),
                         choices=extract_available_domains)
116
117
    # TODO - use calendar widget
    time_period = StringField(widget=fwdgs.Select(attrs={'onchange':'this.form.submit()'}),
118
                        label=_('Period:'),
119
                         choices=extract_available_time_periods)
120
121
    limit = StringField(widget=fwdgs.Select(attrs={'onchange':'this.form.submit()'}),
                        label=_('Number of results :'),
Arthur Lutz's avatar
Arthur Lutz committed
122
                         choices=[u'%s' % i for i in (10,25,50,100)])
123
124
    section = StringField(widget=fwdgs.Select(attrs={'onchange':'this.form.submit()'}),
                          label=_('Show section :'),
Arthur Lutz's avatar
Arthur Lutz committed
125
                          choices=[('',''),]+[(label, value) for value, label in SECTIONLABELS.items()])
126
127
    form_buttons = [fwdgs.SubmitButton(label=_('Apply'))]

128
129
130
    @property
    def action(self):
        return self._cw.build_url('', vid='awstats')
131
132

class AwstatsView(StartupView):
Arthur Lutz's avatar
Arthur Lutz committed
133
    """ Simple HTML export of the stats in awstats files """
134
135
136
    __regid__ = 'awstats'

    def call(self):
Arthur Lutz's avatar
Arthur Lutz committed
137
        """ main call """
138
        _ = self._cw._
139
140
141
142
143
144
        req = self._cw

        form = self._cw.vreg['forms'].select('select-awstats', self._cw)
        form.render(w=self.w)

        domain = req.form.get('domain', '')
145
        time_period = req.form.get('time_period', extract_available_time_periods(form)[0][1])
146
147
        limit = int(req.form.get('limit', 10))

148
        filename = 'awstats%s%s.txt' % (time_period, domain and '.%s' % domain)
Arthur Lutz's avatar
Arthur Lutz committed
149
        awstats_dir = self._cw.vreg.config['awstats-dir']
150
        try:
151
            stats_dict = extract_stats_dict(osp.join(awstats_dir, filename))
152
        except IOError:
153
154
            fallback_time_period = extract_available_time_periods(form)[0][1]
            filename = 'awstats%s%s.txt' % (fallback_time_period,
Arthur Lutz's avatar
Arthur Lutz committed
155
                                            domain and '.%s' % domain)
156
            stats_dict = extract_stats_dict(osp.join(awstats_dir, filename))
157

Arthur Lutz's avatar
Arthur Lutz committed
158
        self.w(u'<div id="awstats">')
159
        self.w(u'<h1>%s : %s</h1>' % (_('Domain'), domain or 'default'))
Arthur Lutz's avatar
Arthur Lutz committed
160
        self.w(u'<h2>%s : %s</h2>' % (_('Time period'),
161
                                      specific_format('time_period', time_period)))
162
163
164
165
166
167
        if req.form.get('section'):
            self.generic_table(req.form.get('section'), stats_dict, limit)
        else:
            self.render_navigation(stats_dict)
            for key, value in SECTIONSPEC.items():
                self.generic_table(key, stats_dict, limit)
Arthur Lutz's avatar
Arthur Lutz committed
168
        self.w(u'</div>')
169
170

    def render_navigation(self, stats_dict):
Arthur Lutz's avatar
Arthur Lutz committed
171
        """ render navigation according to which sections are present """
172
        _ = self._cw._
173
        self.w(u'<div>')
174
        self.w(u'<table id="navigation">')
175
        for key in SECTIONSPEC.keys():
Arthur Lutz's avatar
Arthur Lutz committed
176
            if key in stats_dict.keys() and stats_dict[key].values():
177
                self.w(u'<tr><td><a href="#%s">%s</a></td></tr>' % (key, _(SECTIONLABELS[key])))
178
        self.w(u'</table>')
179
180
181
        self.w(u'</div>')

    def generic_table(self, section_name, stats_dict, limit):
Arthur Lutz's avatar
Arthur Lutz committed
182
        """ generic table from a section in awstats """
183
        _ = self._cw._
Arthur Lutz's avatar
Arthur Lutz committed
184
        if section_name not in stats_dict.keys() or not stats_dict[section_name].values():
185
186
            return
        self.w(u'<a name="%s"/>' % section_name)
187
        self.w(u'<h3>%s</h3>' % _(SECTIONLABELS[section_name]))
188
189
190
        self.w(u'<div><table class="listing">')
        self.w(u'<tr class="header">')
        for header in SECTIONSPEC[section_name]:
Arthur Lutz's avatar
Arthur Lutz committed
191
            self.w(u'<th scope="col">%s</th>' % xml_escape(header))
192
193
        self.w(u'</tr><tbody>')

194
        for index, item in enumerate(self.order_values(section_name, stats_dict)):
195
            self.w(u'<tr>')
Arthur Lutz's avatar
Arthur Lutz committed
196
197
198
199
200
201
202
203
            for tdindex, header in enumerate(SECTIONSPEC[section_name]):
                if tdindex:
                    self.w(u'<td class="data">%s</td>' % specific_format(header, item.get(header)))
                elif header == 'origin':
                    self.w(u'<td scope="row">%s</td>' % specific_format(header,
                                                                        ORIGIN_LABELS[item.get(header)]))
                else:
                    self.w(u'<td scope="row">%s</td>' % specific_format(header, item.get(header)))
204
205
206
207
208
            self.w(u'</tr>')
            if index > limit:
                break
        self.w(u'</tbody></table></div><br/>')

Arthur Lutz's avatar
Arthur Lutz committed
209
210
211
212
213
214
215
216
217
218
219
    def order_values(self, section_name, stats_dict):
        """ extract data in ordered fashion """
        if "hour" in SECTIONSPEC[section_name] :
            order_key = "hour"
            reverse = False
        elif "hits" in SECTIONSPEC[section_name]:
            order_key = "hits"
            reverse = True
        else:
            order_key = SECTIONSPEC[section_name][1]
            reverse = True
220
        return sorted(stats_dict[section_name].values(), reverse=reverse, key=lambda i: int(i[order_key]))
221
222
223
224
225
226
227
228


class WebStatsRefreshForm(forms.FieldsForm):
    """Form to filter and select what stats are being displayed"""
    __regid__ = 'select-webstats'
    start = DateField(label=_('Start:'),)
    stop = DateField(label=_('Stop:'),)
    limit = StringField(label=_('Number of results :'),
Arthur Lutz's avatar
Arthur Lutz committed
229
                         choices=[u'%s' % i for i in (10,25,50,100,200,500)])
230
231
    form_buttons = [fwdgs.SubmitButton(label=_('Apply'))]

Arthur Lutz's avatar
Arthur Lutz committed
232
233
234
    @property
    def action(self):
        return self._cw.build_url('', vid='webstats')
235
236

class StatPeriodsView(StartupView):
237
238
239
240
241
242
243
244
    """ Web stats view - build from StatPeriods and Hits in cubicweb

    `column_types_aggr` enables you to combine results by type
    For example BlogEntry and MicroBlogEntries in one table and Cards in separate table :

    column_types_aggr = (('MicroBlogEntry', 'BlogEntry'),
                        ('Card'))
    """
245
    __regid__ = 'webstats'
246
    column_types_aggr = None
Arthur Lutz's avatar
Arthur Lutz committed
247
    http_cache_manager = httpcache.NoHTTPCacheManager
248
249

    def call(self):
250
        _ = self._cw._
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
        req = self._cw
        self.w(u'<div id="statperiod">')

        form = self._cw.vreg['forms'].select('select-webstats', self._cw)
        form.render(w=self.w)
        start = req.form.get('start', '')
        if not start:
            start = (datetime.now()  - timedelta(days=30)).strftime('%Y/%m/%d')
        stop = req.form.get('stop', '')
        if not stop:
            stop = datetime.now().strftime('%Y/%m/%d')
        limit = int(req.form.get('limit', 10))

        self.w(u'<h1>%s</h1>' % _('Web stats'))
        duration = datetime.strptime(stop, '%Y/%m/%d')-datetime.strptime(start, '%Y/%m/%d')
        self.w(u'<h2>%s</h2>' % _('from %(start)s to %(stop)s (%(duration)s days)' % {'start':start,
                                                                                 'stop': stop,
                                                                                 'duration':duration.days}))
Arthur Lutz's avatar
Arthur Lutz committed
269
        self.description()
270
        self.w(u'<h3><a href="%s">%s</a></h3>' % (self._cw.build_url(rql='Any X ORDERBY S WHERE X is StatPeriod, X start S, X stop E HAVING E-S >= 20'),
Arthur Lutz's avatar
Arthur Lutz committed
271
272
                                         _('Navigate previous statistics by month')))
        rset = self._cw.execute('DISTINCT Any T WHERE X is Hits, X hit_type T')
273
274
275
276
277
278
279
280
281
        for index, hit_type in enumerate(rset):
            self.w(u'<h3>%s</h3>' % hit_type[0])
            rql = 'Any X, SUM(C) GROUPBY X ORDERBY 2 DESC %(limit)s WHERE H stats_about X, ' \
                  'H hit_type "%(type)s", H count C, H period P, P start >= "%(start)s", P stop <= "%(stop)s" '\
                  '' %  {'type': hit_type[0],
                         'limit': 'LIMIT %s' % limit,
                         'start': start,
                         'stop': stop,
                         }
282
            if self.column_types_aggr:
Arthur Lutz's avatar
Arthur Lutz committed
283
                self.w(u'<table class="webstats"><tr>')
284
                for etypes in self.column_types_aggr:
Arthur Lutz's avatar
Arthur Lutz committed
285
                    self.w(u'<td class="webstats">')
286
                    typedrql = rql + ', X is in (%s)' % ','.join(etypes)
287
                    rset = self._cw.execute(typedrql)
288
                    self.generate_table_form(rset, etypes)
289
                    nolimit_rql = typedrql.replace('LIMIT %s' % limit, '')
290
291
                    self.w(u'<a href="%s">Export CSV</a>' % xml_escape(self._cw.build_url(rql=nolimit_rql,
                                                                                          vid='csvexport')))
292
293
294
295
                    self.w(u'</td>')
                self.w(u'</tr></table>')
            else:
                rset = self._cw.execute(rql)
Arthur Lutz's avatar
Arthur Lutz committed
296
                self.generate_table_form(rset)
Arthur Lutz's avatar
Arthur Lutz committed
297
                nolimit_rql = rql.replace('LIMIT %s' % limit, '')
298
299
                self.w(u'<a href="%s">Export CSV</a>' % xml_escape(self._cw.build_url(rql=nolimit_rql,
                                                                                      vid='csvexport')))
300
        self.w(u'</div>')
Arthur Lutz's avatar
Arthur Lutz committed
301
302


303
    def generate_table_form(self, rset, etypes=None):
Arthur Lutz's avatar
Arthur Lutz committed
304
305
306
307
        self.w(self._cw.view('table', rset, 'null'))

    def description(self):
        pass