unittest_transforms.py 5.36 KB
Newer Older
1
# -*- coding: iso-8859-1 -*-
2
# copyright 2006-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
#
# This file is part of logilab-mtconverter.
#
# logilab-mtconverter is free software: you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by the
# Free Software Foundation, either version 2.1 of the License, or (at your
# option) any later version.
#
# logilab-mtconverter is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
# for more details.
#
# You should have received a copy of the GNU Lesser General Public License along
# with logilab-mtconverter. If not, see <http://www.gnu.org/licenses/>.
"Sylvain ext:(%22)'s avatar
"Sylvain ext:(%22) committed
19
20
from logilab.common.testlib import TestCase, unittest_main

21
22
from logilab.mtconverter.engine import TransformEngine
from logilab.mtconverter import TransformData, TransformError, \
23
24
     register_base_transforms, register_pil_transforms, \
     register_pygments_transforms
"Sylvain ext:(%22)'s avatar
"Sylvain ext:(%22) committed
25

26
27
28
ENGINE = TransformEngine()
register_base_transforms(ENGINE)
register_pil_transforms(ENGINE)
29
_pygments_available = register_pygments_transforms(ENGINE)
30

Aurelien Campeas's avatar
Aurelien Campeas committed
31
import logilab.mtconverter as mtc
32
import os
Aurelien Campeas's avatar
Aurelien Campeas committed
33
import os.path as osp
34
import errno
35
import subprocess
36
DATAPATH = osp.dirname(__file__)
37

38
class MiscTransformsTC(TestCase):
39
    def test_html_to_text(self):
40
        data = TransformData(u'<b>yo (zou  ;)</b>', 'text/html', 'utf8')
41
        converted = ENGINE.convert(data, 'text/plain').decode().strip()
42
        self.assertEqual(converted, u'**yo (zou  ;)**')
43

44
        data = TransformData(u'<p>yo <br/>zogzog </p>', 'text/html', 'utf8')
45
        converted = ENGINE.convert(data, 'text/plain').decode().strip()
46
        self.assertEqual(converted, u'yo  \nzogzog')
47
48
49
50
51
52
53

    def test_html_to_text_noenc(self):
        self.skipTest('Encoding detection with chardet does not work')
        # will trigger guess_encoding, check non-utf8 encoding
        data = TransformData(u"<b>yo (l'tat  l'oeuf)</b>".encode('latin1'), 'text/html')
        self.assertIn(data.encoding, ('latin1', 'windows-1252'))
        data.check_encoding()
54

55
        converted = ENGINE.convert(data, 'text/plain').decode().strip()
56
        self.assertEqual(converted, u'**yo (zou  ;)**')
57

58
59
60
    def test_xml_to_text(self):
        data = TransformData(u'<root><b>yo (zou  ;)</b>a<tag/>b<root>', 'application/xml', 'utf8')
        converted = ENGINE.convert(data, 'text/plain').decode().strip()
61
        self.assertEqual(converted, u'yo (zou  ;) a b')
62

63
64
65
66
67
68
69
70
71
72
73
74

    def test_pgpsignature_to_text(self):
        _data = u"""-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.9 (GNU/Linux)

iEYEARECAAYFAkxX5p8ACgkQkjcInxztrI64QQCggKA+PmbLYnGNtBB3Lb3pO3P8
r2MAoIO1DSsuM23SzgmqubGJEZuSRWhR
=GDDk
-----END PGP SIGNATURE-----
"""
        data = TransformData(_data, 'application/pgp-signature')
        converted = ENGINE.convert(data, 'text/plain').decode()
75
        self.assertMultiLineEqual(converted, _data)
76
77


Aurelien Campeas's avatar
Aurelien Campeas committed
78
    def test_odt_to_text(self):
79
        data = TransformData(open(osp.join(DATAPATH, 'hello.odt'), 'rb'),
Aurelien Campeas's avatar
Aurelien Campeas committed
80
81
                             'application/vnd.oasis.opendocument.text', 'utf8')
        converted = ENGINE.convert(data, 'text/plain').decode().strip()
82
        self.assertEqual(converted, u'Hello ! OpenOffice.org/2.4$Unix OpenOffice.org_project/680m17$Build-9310 Hello quoi de neuf doc ? bonjour 2008-07-08T16:19:35 2009-01-09T14:44:54 mot-clef 1 PT37S')
Aurelien Campeas's avatar
Aurelien Campeas committed
83
84
85
        # ZipFile will complain that
        # TypeError: file() argument 1 must be (encoded string without NULL bytes), not str
        # if given a plain str ... we shielded us from that.
86
        data = TransformData(open(osp.join(DATAPATH, 'hello.odt'), 'rb').read(),
Aurelien Campeas's avatar
Aurelien Campeas committed
87
88
                             'application/vnd.oasis.opendocument.text', 'utf8')
        converted = ENGINE.convert(data, 'text/plain').decode().strip()
89
        self.assertEqual(converted, u'Hello ! OpenOffice.org/2.4$Unix OpenOffice.org_project/680m17$Build-9310 Hello quoi de neuf doc ? bonjour 2008-07-08T16:19:35 2009-01-09T14:44:54 mot-clef 1 PT37S')
Aurelien Campeas's avatar
Aurelien Campeas committed
90

91
    def test_pdf_to_text(self):
92
93
94
95
96
97
98
99
100
        try:
            subprocess.check_call(['pdflatex', 'hello'],
                                  cwd=osp.abspath(DATAPATH),
                                  stdout=open(os.devnull, 'w'))
        except OSError as exc:
            if exc.errno == errno.ENOENT:
                self.skipTest('pdflatex not installed')
            else:
                raise
Julien Cristau's avatar
Julien Cristau committed
101
        data = TransformData(open(osp.join(DATAPATH, 'hello.pdf'), 'rb').read(),
102
103
104
105
                             'application/pdf', 'utf8')
        converted = ENGINE.convert(data, 'text/plain').decode().strip()
        self.assertEqual(converted, u'hello')

106
107
108
109
110
111
112
113
    def test_python_to_html(self):
        if not _pygments_available:
            self.skipTest('pygments is not installed')
        with open(__file__, 'rb') as fobj:
            data = TransformData(fobj.read(), 'text/x-python3', 'latin1')
        converted = ENGINE.convert(data, 'text/html').decode()
        self.assertTrue(converted.startswith('<div class="highlight">'))

114
115
116
117
118
119
120
    def tearDown(self):
        for ext in ('pdf', 'aux', 'log'):
            try:
                os.unlink(osp.join(DATAPATH, 'hello.' + ext))
            except OSError:
                pass

"Sylvain ext:(%22)'s avatar
"Sylvain ext:(%22) committed
121
122
if __name__ == '__main__':
    unittest_main()
123
124