Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
cubicweb
cubes
classifiedad
Commits
ce57a6af8e88
Commit
40c03a3f
authored
Jul 24, 2010
by
Nicolas Chauvat
Browse files
[similar] improve information extraction
parent
04de47d5d435
Changes
1
Hide whitespace changes
Inline
Side-by-side
entities.py
View file @
ce57a6af
...
...
@@ -17,16 +17,27 @@ from cubes.similarity import register_similarity, reset_similarity, vsm
def
prefix_slice
(
value
,
start
,
end
,
*
args
):
return
u
'%s: %s'
%
(
value
,
u
' '
.
join
(
args
[
start
:
end
]))
def
rgx_replace
(
value
,
*
args
):
for
idx
,
arg
in
enumerate
(
args
):
value
=
value
.
replace
(
u
'
\\
%i'
%
(
idx
+
1
),
arg
)
return
value
number2digit_rgx
=
re
.
compile
(
'^\d\d$'
)
number4digit_rgx
=
re
.
compile
(
'^\d\d\d\d$'
)
number5digit_rgx
=
re
.
compile
(
'^\d\d\d\d\d$'
)
number_rgx
=
re
.
compile
(
'^\d+$'
)
surface_rgx
=
re
.
compile
(
u
'^\d+m[²2]$'
)
annee_rgx
=
re
.
compile
(
u
'^ann[ée]e$'
)
TRANSLATE_TABLE
=
[
((
'vlce'
,),
ft
.
partial
(
vsm
.
const
,
'valence'
)),
((
's-sol'
,),
ft
.
partial
(
vsm
.
const
,
'sous-sol'
)),
((
'dt'
,),
ft
.
partial
(
vsm
.
const
,
'dont'
)),
((
'ch'
,),
ft
.
partial
(
vsm
.
const
,
'chambre'
)),
((
'chbs'
,),
ft
.
partial
(
vsm
.
const
,
'chambre'
)),
((
'chbrs'
,),
ft
.
partial
(
vsm
.
const
,
'chambre'
)),
((
'chambres'
,),
ft
.
partial
(
vsm
.
const
,
'chambre'
)),
((
'grd'
,),
ft
.
partial
(
vsm
.
const
,
'grand'
)),
((
'gde'
,),
ft
.
partial
(
vsm
.
const
,
'grande'
)),
((
'ttes'
,),
ft
.
partial
(
vsm
.
const
,
'toutes'
)),
((
'niv'
,),
ft
.
partial
(
vsm
.
const
,
'niveau'
)),
((
'st'
,),
ft
.
partial
(
vsm
.
const
,
'saint'
)),
...
...
@@ -38,12 +49,16 @@ TRANSLATE_TABLE = [
((
'salle'
,
'à'
,
'manger'
),
ft
.
partial
(
vsm
.
const
,
'salle à manger'
)),
((
'salle'
,
'bains'
),
ft
.
partial
(
vsm
.
const
,
'salle de bain'
)),
((
'salle'
,
'eau'
),
ft
.
partial
(
vsm
.
const
,
'salle d
\'
eau'
)),
((
'jacuzi'
),
ft
.
partial
(
vsm
.
const
,
'jacuzzi'
)),
((
'jacuzi'
,),
ft
.
partial
(
vsm
.
const
,
'jacuzzi'
)),
((
number5digit_rgx
,),
ft
.
partial
(
vsm
.
prefix_join
,
'postcode'
)),
((
number_rgx
,
u
'm²'
),
ft
.
partial
(
vsm
.
prefix_join
,
'surface'
)),
((
number_rgx
,
'm2'
),
lambda
x
,
y
:
u
'surface: %s m²'
%
x
),
((
surface_rgx
,),
lambda
x
:
u
'surface: %s m²'
%
x
[:
-
2
]),
((
number_rgx
,
u
'pièces'
),
ft
.
partial
(
prefix_slice
,
u
'rooms'
,
0
,
1
)),
((
'drome'
,),
ft
.
partial
(
vsm
.
const
,
u
'drôme'
)),
((
annee_rgx
,
number4digit_rgx
),
ft
.
partial
(
rgx_replace
,
u
'year-built:
\\
2'
)),
((
annee_rgx
,
number2digit_rgx
),
ft
.
partial
(
rgx_replace
,
u
'year-built: 19
\\
2'
)),
((
'drome'
,),
ft
.
partial
(
vsm
.
const
,
u
'location: http://fr.wikipedia.org/wiki/Dr%C3%B4me_%28d%C3%A9partement%29'
)),
((
'drôme'
,),
ft
.
partial
(
vsm
.
const
,
u
'location: http://fr.wikipedia.org/wiki/Dr%C3%B4me_%28d%C3%A9partement%29'
)),
((
'gare'
,
'tgv'
),
ft
.
partial
(
vsm
.
prefix_join
,
'location'
)),
((
'danton'
,),
ft
.
partial
(
vsm
.
prefix_join
,
'location'
)),
((
'grand'
,
'charran'
),
ft
.
partial
(
vsm
.
prefix_join
,
'location'
)),
...
...
@@ -51,6 +66,7 @@ TRANSLATE_TABLE = [
((
'appartement'
,),
ft
.
partial
(
vsm
.
prefix_join
,
'property-type'
)),
((
'maison'
,),
ft
.
partial
(
vsm
.
prefix_join
,
'property-type'
)),
((
'villa'
,),
ft
.
partial
(
vsm
.
const
,
'property-type: maison'
)),
((
'vlce'
,),
ft
.
partial
(
vsm
.
const
,
'location: http://dbpedia.org/resource/Valence%2C_Dr%C3%B4me'
)),
((
'valence'
,),
ft
.
partial
(
vsm
.
const
,
'location: http://dbpedia.org/resource/Valence%2C_Dr%C3%B4me'
)),
((
'26000'
,
'valence'
,),
ft
.
partial
(
vsm
.
const
,
'location: http://dbpedia.org/resource/Valence%2C_Dr%C3%B4me'
)),
((
'st'
,
'marcel'
),
ft
.
partial
(
vsm
.
const
,
'location: http://dbpedia.org/resource/Saint-Marcel-l%C3%A8s-Valence'
)),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment