Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
cubicweb
cubes
awstats
Commits
2495e993f1cf
Commit
06ad4d0e
authored
May 10, 2011
by
Arthur Lutz
Browse files
refactoring and documentation
parent
e0da87dae080
Changes
5
Hide whitespace changes
Inline
Side-by-side
README
View file @
2495e993
Summary
-------
cubicweb integrated awstats frontend
ccplugin.py
View file @
2495e993
...
...
@@ -19,6 +19,12 @@ from cubicweb.cwctl import CWCTL
from
utils
import
SECTIONSPEC
,
extract_stats_dict
,
eid_from_url
def
url_count_from_stats
(
stats_dict
):
'''
parse most visited urls in stats_dict generated from awstats txt file
returns two dictionnaries with eid as key and sequence of values as value
one for normal navigation, the other for rdf navigation
'''
visit_count_dict
=
{}
visit_count_rdf_dict
=
{}
for
item
in
stats_dict
[
'SIDER'
].
values
():
...
...
@@ -44,28 +50,34 @@ class UpdateWebstatsCommand(Command):
min_args
=
1
def
get_current_stats_period
(
self
,
session
,
chosendate
):
""" return a statperiod for the current month, if it doesn't exist, create it """
start
=
first_day
(
chosendate
)
end
=
last_day
(
start
)
rset
=
session
.
execute
(
'Any P WHERE P is StatPeriod, P start "%(start_date)s", P stop "%(end_date)s"'
%
{
'start_date'
:
start
,
'end_date'
:
end
})
rql
=
'Any P WHERE P is StatPeriod, P start "%(start_date)s", P stop "%(end_date)s"'
rset
=
session
.
execute
(
rql
%
{
'start_date'
:
start
,
'end_date'
:
end
})
if
rset
:
return
rset
.
get_entity
(
0
,
0
)
return
rset
.
get_entity
(
0
,
0
)
else
:
return
session
.
create_entity
(
'StatPeriod'
,
start
=
start
,
stop
=
end
)
def
update_stats
(
self
,
session
,
args
):
''' parses awstats and creates or updates the corresponding
data in the cubicweb instance'''
if
args
:
chosendate
=
datetime
.
strptime
(
args
[
0
],
'%m/%Y'
)
else
:
chosendate
=
datetime
.
now
()
stats_period
=
self
.
get_current_stats_period
(
session
,
chosendate
)
awstatsdir
=
session
.
vreg
.
config
.
get
(
'awstats-dir'
,
'/var/lib/awstats'
)
domain
=
session
.
vreg
.
config
.
get
(
'awstats-domain'
,
''
)
filename
=
'awstats%s%s.txt'
%
(
chosendate
.
strftime
(
'%m%Y'
),
domain
and
'.%s'
%
domain
)
stats_dict
=
extract_stats_dict
(
awstatsdir
,
filename
)
normal_dict
,
rdf_dict
=
url_count_from_stats
(
stats_dict
)
is_rdf
=
False
rset
=
session
.
execute
(
'Any N WHERE X relation_type R, R name "stats_about", X to_entity Y, Y name N'
)
rql
=
'Any N WHERE X relation_type R, R name "stats_about", X to_entity Y, Y name N'
rset
=
session
.
execute
(
rql
)
allowed_types
=
[
item
[
0
]
for
item
in
rset
]
update_stats
=
{
'updated'
:
0
,
'created'
:
0
,
...
...
@@ -77,12 +89,12 @@ class UpdateWebstatsCommand(Command):
for
eid
,
values
in
count_dict
.
items
():
visit_count
=
visit_count_rdf
=
0
total_hits
=
sum
([
item
[
0
]
for
item
in
values
])
stats_period
=
self
.
get_current_stats_period
(
session
,
chosendate
)
entity
=
session
.
entity_from_eid
(
eid
)
if
not
entity
.
__regid__
in
allowed_types
:
update_stats
[
'skipped'
]
+=
1
continue
rql
=
'Any X,V WHERE X is Hits, X count V, X hit_type "%(hit_type)s", X stats_about E, E eid %(e)s, X period P, P eid %(sp)s'
rql
=
'Any X,V WHERE X is Hits, X count V, X hit_type "%(hit_type)s",'
\
'X stats_about E, E eid %(e)s, X period P, P eid %(sp)s'
rset
=
session
.
execute
(
rql
%
{
'e'
:
eid
,
'sp'
:
stats_period
.
eid
,
'hit_type'
:
is_rdf
and
'rdf'
or
'normal'
})
...
...
@@ -99,8 +111,10 @@ class UpdateWebstatsCommand(Command):
else
:
print
'create'
,
entity
update_stats
[
'created'
]
+=
1
session
.
create_entity
(
'Hits'
,
count
=
total_hits
,
period
=
stats_period
,
stats_about
=
entity
,
hit_type
=
is_rdf
and
u
'rdf'
or
u
'normal'
)
session
.
create_entity
(
'Hits'
,
count
=
total_hits
,
period
=
stats_period
,
stats_about
=
entity
,
hit_type
=
is_rdf
and
u
'rdf'
or
u
'normal'
)
print
update_stats
...
...
utils.py
View file @
2495e993
# copyright 2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr -- mailto:contact@logilab.fr
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
import
re
import
os.path
as
osp
...
...
@@ -33,6 +49,45 @@ SECTIONSPEC = {
'SIDER_404'
:
[
'urls with 404 errors'
,
'hits'
,
'last url referer'
],
}
SECTIONLABELS
=
{
'TIME'
:
"Visits by hour"
,
'VISITOR'
:
'Top visitors (by host)'
,
'DAY'
:
'Visits by days of the month'
,
'DOMAIN'
:
'Visitors domains/countries'
,
'LOGIN'
:
'logged in users'
,
'ROBOT'
:
'Robots/Spiders visitors'
,
'WORMS'
:
'Worm visits'
,
'EMAILSENDER'
:
'email sender'
,
'EMAILRECEIVER'
:
'email receiver'
,
'SESSION'
:
'Visits duration'
,
'SIDER'
:
'Most visited URLs'
,
'FILETYPES'
:
'Visited file types'
,
'OS'
:
'Visiting operating systems'
,
'BROWSER'
:
'Visiting browsers'
,
'SCREENSIZE'
:
'Hits by Screen size'
,
'UNKNOWNREFERER'
:
'Unknown referer os'
,
'UNKNOWNREFERERBROWSER'
:
'Unknown referer browser'
,
'ORIGIN'
:
'Origin of hits'
,
'SEREFERRALS'
:
'Search engine referers hits'
,
'PAGEREFS'
:
'Main external page referers'
,
'SEARCHWORDS'
:
'Hits from search keyphrases'
,
'KEYWORDS'
:
'Hits from search keywords'
,
#'MISC': ['misc id', 'pages', 'hits', 'bandwidth'],
'ERRORS'
:
'HTTP Status codes'
,
'CLUSTER'
:
'Visits by cluster id'
,
'SIDER_404'
:
'Hits with 404 errors'
,
}
ORIGIN_LABELS
=
{
'From0'
:
'Direct address / Bookmark / Link in email...'
,
'From1'
:
'Unknown Origin'
,
'From2'
:
'Links from an Internet Search Engine'
,
'From3'
:
'Links from an external page (other web sites except search engines)'
,
'From4'
:
'Internal Link'
,
}
def
extract_stats_dict
(
awstats_dir
,
filename
):
''' from an awstats file extract structured data into a dict
...
...
views/actions.py
0 → 100644
View file @
2495e993
# copyright 2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr -- mailto:contact@logilab.fr
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
from
cubicweb.web
import
action
from
cubicweb.selectors
import
match_user_groups
class
AwstatsAccessAction
(
action
.
Action
):
__regid__
=
'awstats-action'
__select__
=
match_user_groups
(
'managers'
)
title
=
_
(
'awstats statistics'
)
order
=
11
category
=
'manage'
def
url
(
self
):
return
self
.
_cw
.
build_url
(
'?vid=awstats'
)
def
registration_callback
(
vreg
):
vreg
.
register
(
AwstatsAccessAction
)
views/startup.py
View file @
2495e993
...
...
@@ -18,9 +18,9 @@
import
os
import
os.path
as
osp
import
re
from
datetime
import
datetime
import
urllib
from
logilab.mtconverter
import
xml_escape
from
logilab.common.textutils
import
BYTE_UNITS
...
...
@@ -29,21 +29,21 @@ from cubicweb.view import StartupView
from
cubicweb.web.views
import
forms
from
cubicweb.web.formfields
import
StringField
from
cubicweb.web
import
formwidgets
as
fwdgs
from
cubicweb
import
tags
,
Unauthorized
from
cubes.awstats.utils
import
SECTIONSPEC
,
extract_stats_dict
from
cubes.awstats.utils
import
SECTIONSPEC
,
SECTIONLABELS
,
\
extract_stats_dict
,
ORIGIN_LABELS
# FIXME - find a clean way to attach or pass this to form so it is available in form choices
# FIXME - find a clean way to attach or pass this to form so
# it is available in form choices
AWSTATS_DIR
=
'/var/lib/awstats'
#AWSTATS_DIR = '/home/arthur/local/awstats'
#AWSTATS_DIR = self._cw.vreg.config['awstats-dir']
def
extract_available_months
(
form
,
**
attrs
):
""" extract available months from list of awstats files """
months
=
[]
selected_domain
=
form
.
req
.
form
.
get
(
'domain'
,
''
)
#rint 'XXX', form.domain.choices[0]
for
filename
in
os
.
listdir
(
AWSTATS_DIR
):
match
=
re
.
search
(
'awstats(\d{6})\.?%s.txt'
%
selected_domain
,
filename
)
if
match
:
...
...
@@ -52,6 +52,7 @@ def extract_available_months(form, **attrs):
return
months
def
extract_available_domains
(
form
,
**
attrs
):
""" extract available domains from list of awstats files """
domains
=
[]
for
filename
in
os
.
listdir
(
AWSTATS_DIR
):
match
=
re
.
search
(
'awstats(\d{2})(\d{4})\.?(.*).txt'
,
filename
)
...
...
@@ -61,6 +62,7 @@ def extract_available_domains(form, **attrs):
return
domains
def
use_as_sort_key
(
value
):
""" use value as sort value, try it as an int, else just use value """
try
:
return
int
(
value
)
except
ValueError
:
...
...
@@ -68,6 +70,7 @@ def use_as_sort_key(value):
def
specific_format
(
header
,
value
):
""" guess from a header and value how to display it"""
if
value
is
None
:
return
elif
header
==
'bandwidth'
:
...
...
@@ -81,16 +84,18 @@ def specific_format(header, value):
return
datetime
.
strptime
(
value
,
'%Y%m%d'
).
strftime
(
'%d/%m/%Y'
)
except
ValueError
:
pass
return
xml_escape
(
value
)
return
xml_escape
(
urllib
.
unquote
(
value
).
decode
(
'utf8'
)
)
def
convert_to_bytes
(
value
):
ordered
=
[(
size
,
label
)
for
label
,
size
in
BYTE_UNITS
.
items
()]
""" display bandwidth data using a human readable notation """
ordered
=
[(
size
,
label
)
for
label
,
size
in
BYTE_UNITS
.
items
()]
ordered
.
sort
(
reverse
=
True
)
for
size
,
label
in
ordered
:
if
value
/
size
!=
0
:
return
'%s %s'
%
(
value
/
size
,
label
)
class
AwstatsRefreshForm
(
forms
.
FieldsForm
):
"""Form to filter and select what stats are being displayed"""
__regid__
=
'select-awstats'
action
=
'/?vid=awstats'
domain
=
StringField
(
widget
=
fwdgs
.
Select
(
attrs
=
{
'onchange'
:
'this.form.submit()'
}),
...
...
@@ -104,14 +109,16 @@ class AwstatsRefreshForm(forms.FieldsForm):
choices
=
[
10
,
25
,
50
,
100
])
section
=
StringField
(
widget
=
fwdgs
.
Select
(
attrs
=
{
'onchange'
:
'this.form.submit()'
}),
label
=
_
(
'Show section :'
),
choices
=
[
''
,
]
+
SECTIONSPEC
.
key
s
())
choices
=
[
(
''
,
''
),]
+
[(
label
,
value
)
for
value
,
label
in
SECTIONLABELS
.
item
s
()
]
)
form_buttons
=
[
fwdgs
.
SubmitButton
(
label
=
_
(
'Apply'
))]
class
AwstatsView
(
StartupView
):
""" Simple HTML export of the stats in awstats files """
__regid__
=
'awstats'
def
call
(
self
):
""" main call """
req
=
self
.
_cw
form
=
self
.
_cw
.
vreg
[
'forms'
].
select
(
'select-awstats'
,
self
.
_cw
)
...
...
@@ -125,67 +132,75 @@ class AwstatsView(StartupView):
try
:
stats_dict
=
extract_stats_dict
(
AWSTATS_DIR
,
filename
)
except
IOError
:
filename
=
'awstats%s%s.txt'
%
(
extract_available_months
(
form
)[
0
],
domain
and
'.%s'
%
domain
)
filename
=
'awstats%s%s.txt'
%
(
extract_available_months
(
form
)[
0
],
domain
and
'.%s'
%
domain
)
stats_dict
=
extract_stats_dict
(
AWSTATS_DIR
,
filename
)
self
.
w
(
u
'<div id="awstats">'
)
self
.
w
(
u
'<h1>%s : %s</h1>'
%
(
_
(
'Domain'
),
domain
or
'default'
))
self
.
w
(
u
'<h2>%s : %s</h2>'
%
(
_
(
'Time period'
),
'%s/%s'
%
(
month
[:
2
],
month
[
2
:])
))
self
.
w
(
u
'<h2>%s : %s</h2>'
%
(
_
(
'Time period'
),
'%s/%s'
%
(
month
[:
2
],
month
[
2
:])
))
if
req
.
form
.
get
(
'section'
):
self
.
generic_table
(
req
.
form
.
get
(
'section'
),
stats_dict
,
limit
)
else
:
self
.
render_navigation
(
stats_dict
)
for
key
,
value
in
SECTIONSPEC
.
items
():
self
.
generic_table
(
key
,
stats_dict
,
limit
)
#if value[1] == 'hits':
# self.simple_hits_display(key, stats_dict, limit)
self
.
w
(
u
'</div>'
)
def
render_navigation
(
self
,
stats_dict
):
""" render navigation according to which sections are present """
self
.
w
(
u
'<div>'
)
# FIXME - have inline list using css (better : in cubicweb)
self
.
w
(
u
'<ul style="list-style-type: none;">'
)
self
.
w
(
u
'<ul>'
)
for
key
in
SECTIONSPEC
.
keys
():
if
stats_dict
[
key
].
values
():
self
.
w
(
u
'<li><a href="#%s">%s</a></li>'
%
(
key
,
key
))
self
.
w
(
u
'<li><a href="#%s">%s</a></li>'
%
(
key
,
SECTIONLABELS
[
key
]
))
self
.
w
(
u
'</ul>'
)
self
.
w
(
u
'</div>'
)
def
generic_table
(
self
,
section_name
,
stats_dict
,
limit
):
""" generic table from a section in awstats """
if
not
stats_dict
[
section_name
].
values
():
return
self
.
w
(
u
'<a name="%s"/>'
%
section_name
)
self
.
w
(
u
'<h3>%s</h3>'
%
SECTIONLABELS
[
section_name
])
self
.
w
(
u
'<div><table class="listing">'
)
self
.
w
(
u
'<tr class="header">'
)
for
header
in
SECTIONSPEC
[
section_name
]:
self
.
w
(
u
'<th>%s</th>'
%
header
)
self
.
w
(
u
'<th
scope="col"
>%s</th>'
%
xml_escape
(
header
)
)
self
.
w
(
u
'</tr><tbody>'
)
if
"hits"
in
SECTIONSPEC
[
section_name
]:
order_key
=
"hits"
else
:
order_key
=
SECTIONSPEC
[
section_name
][
1
]
for
item
in
stats_dict
[
section_name
].
values
():
try
:
item
[
order_key
]
except
KeyError
:
print
item
ordered_values
=
[(
int
(
item
[
order_key
]),
item
)
for
item
in
stats_dict
[
section_name
].
values
()]
ordered_values
.
sort
(
reverse
=
True
)
ordered_values
=
self
.
order_values
(
section_name
,
stats_dict
)
for
index
,
item
in
enumerate
([
item
[
1
]
for
item
in
ordered_values
]):
self
.
w
(
u
'<tr>'
)
for
header
in
SECTIONSPEC
[
section_name
]:
self
.
w
(
u
'<td>%s</td>'
%
specific_format
(
header
,
item
.
get
(
header
)))
for
tdindex
,
header
in
enumerate
(
SECTIONSPEC
[
section_name
]):
if
tdindex
:
self
.
w
(
u
'<td class="data">%s</td>'
%
specific_format
(
header
,
item
.
get
(
header
)))
elif
header
==
'origin'
:
self
.
w
(
u
'<td scope="row">%s</td>'
%
specific_format
(
header
,
ORIGIN_LABELS
[
item
.
get
(
header
)]))
else
:
self
.
w
(
u
'<td scope="row">%s</td>'
%
specific_format
(
header
,
item
.
get
(
header
)))
self
.
w
(
u
'</tr>'
)
if
index
>
limit
:
break
self
.
w
(
u
'</tbody></table></div><br/>'
)
def
simple_hits_display
(
self
,
section_name
,
stats_dict
,
limit
):
key
=
SECTIONSPEC
[
section_name
][
0
]
value_list
=
[(
use_as_sort_key
(
item
[
'hits'
]),
item
[
key
])
for
item
in
stats_dict
[
section_name
].
values
()
if
'hits'
in
item
]
if
value_list
:
value_list
.
sort
(
reverse
=
True
)
itemlist
=
''
.
join
([
'<li>%s (%s)</li>'
%
(
xml_escape
(
item
[
1
]),
item
[
0
])
for
item
in
value_list
[:
limit
]])
self
.
w
(
u
'<ul><li><h2>%s (%s)</h2></li><ul> %s</ul></ul>'
%
(
key
,
len
(
value_list
),
itemlist
))
def
order_values
(
self
,
section_name
,
stats_dict
):
""" extract data in ordered fashion """
if
"hour"
in
SECTIONSPEC
[
section_name
]
:
order_key
=
"hour"
reverse
=
False
elif
"hits"
in
SECTIONSPEC
[
section_name
]:
order_key
=
"hits"
reverse
=
True
else
:
order_key
=
SECTIONSPEC
[
section_name
][
1
]
reverse
=
True
ordered_values
=
[(
int
(
item
[
order_key
]),
item
)
for
item
in
stats_dict
[
section_name
].
values
()]
ordered_values
.
sort
(
reverse
=
reverse
)
return
ordered_values
def
registration_callback
(
vreg
):
vreg
.
register
(
AwstatsView
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment