Commit 0fba6d6e authored by Nicolas Chauvat's avatar Nicolas Chauvat
Browse files

[entities] move common filters to cube similarity

parent b89483b7b717
......@@ -14,54 +14,43 @@ from cubicweb.entities import AnyEntity
from cubes.similarity import register_similarity, reset_similarity, vsm
def replace_filter(table, text):
for src, dst in table:
text = text.replace(src, dst)
return text
def const(value, *args):
return value
def prefix_join(value, *args):
return u'%s: %s' % (value, u' '.join(args))
number = re.compile('^\d+$')
number_rgx = re.compile('^\d+$')
TRANSLATE_TABLE = [
(('vlce',), ft.partial(const, 'valence')),
(('chbs',), ft.partial(const, 'chambre')),
(('chbrs',), ft.partial(const, 'chambre')),
(('chambres',), ft.partial(const, 'chambre')),
(('grd',), ft.partial(const, 'grand')),
(('ttes',), ft.partial(const, 'toutes')),
(('niv',), ft.partial(const, 'niveau')),
(('st',), ft.partial(const, 'saint')),
(('min',), ft.partial(const, 'minute')),
(('mn',), ft.partial(const, 'minute')),
(('hab',), ft.partial(const, 'habitable')),
(('sde',), ft.partial(const, 'salle d\'eau')),
(('sdb',), ft.partial(const, 'salle de bain')),
(('salle','bains'), ft.partial(const, 'salle de bain')),
(('salle','eau'), ft.partial(const, 'salle d\'eau')),
((number, u'm²'), ft.partial(prefix_join, 'surface')),
((number, 'm2'), ft.partial(prefix_join, 'surface')),
(('drome',), ft.partial(const, u'drôme')),
(('gare', 'tgv'), ft.partial(prefix_join, 'location')),
(('danton',), ft.partial(prefix_join, 'location')),
(('appartement',), ft.partial(prefix_join, 'property-type')),
(('maison',), ft.partial(prefix_join, 'property-type')),
(('villa',), ft.partial(const, 'property-type: maison')),
(('valence',), ft.partial(const, 'location: http://dbpedia.org/resource/Valence%2C_Dr%C3%B4me')),
(('26000', 'valence',), ft.partial(const, 'location: http://dbpedia.org/resource/Valence%2C_Dr%C3%B4me')),
(('st', 'marcel'), ft.partial(const, 'location: http://dbpedia.org/resource/Saint-Marcel-l%C3%A8s-Valence')),
(('saint', 'marcel'), ft.partial(const, 'location: http://dbpedia.org/resource/Saint-Marcel-l%C3%A8s-Valence')),
((number, number, '€'), ft.partial(prefix_join, 'price')),
(('vlce',), ft.partial(vsm.const, 'valence')),
(('chbs',), ft.partial(vsm.const, 'chambre')),
(('chbrs',), ft.partial(vsm.const, 'chambre')),
(('chambres',), ft.partial(vsm.const, 'chambre')),
(('grd',), ft.partial(vsm.const, 'grand')),
(('ttes',), ft.partial(vsm.const, 'toutes')),
(('niv',), ft.partial(vsm.const, 'niveau')),
(('st',), ft.partial(vsm.const, 'saint')),
(('min',), ft.partial(vsm.const, 'minute')),
(('mn',), ft.partial(vsm.const, 'minute')),
(('hab',), ft.partial(vsm.const, 'habitable')),
(('sde',), ft.partial(vsm.const, 'salle d\'eau')),
(('sdb',), ft.partial(vsm.const, 'salle de bain')),
(('salle','bains'), ft.partial(vsm.const, 'salle de bain')),
(('salle','eau'), ft.partial(vsm.const, 'salle d\'eau')),
((number_rgx, u'm²'), ft.partial(vsm.prefix_join, 'surface')),
((number_rgx, 'm2'), ft.partial(vsm.prefix_join, 'surface')),
(('drome',), ft.partial(vsm.const, u'drôme')),
(('gare', 'tgv'), ft.partial(vsm.prefix_join, 'location')),
(('danton',), ft.partial(vsm.prefix_join, 'location')),
(('appartement',), ft.partial(vsm.prefix_join, 'property-type')),
(('maison',), ft.partial(vsm.prefix_join, 'property-type')),
(('villa',), ft.partial(vsm.const, 'property-type: maison')),
(('valence',), ft.partial(vsm.const, 'location: http://dbpedia.org/resource/Valence%2C_Dr%C3%B4me')),
(('26000', 'valence',), ft.partial(vsm.const, 'location: http://dbpedia.org/resource/Valence%2C_Dr%C3%B4me')),
(('st', 'marcel'), ft.partial(vsm.const, 'location: http://dbpedia.org/resource/Saint-Marcel-l%C3%A8s-Valence')),
(('saint', 'marcel'), ft.partial(vsm.const, 'location: http://dbpedia.org/resource/Saint-Marcel-l%C3%A8s-Valence')),
((number_rgx, number_rgx, '€'), ft.partial(vsm.prefix_join, 'price')),
]
REPLACE_TABLE = [
(' ', ' '),
]
filters = [ft.partial(replace_filter, REPLACE_TABLE),
filters = [ft.partial(vsm.replace_filter, REPLACE_TABLE),
ft.partial(vsm.tokenize_filter, vsm.TOKENIZE_PATTERN),
ft.partial(vsm.exclude_filter, vsm.STOP_WORDS['fr']),
ft.partial(vsm.transform_filter, TRANSLATE_TABLE),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment