test_utils.py 6.03 KB
Newer Older
1
# -*- coding: utf-8 -*-
2
# copyright 2006-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
#
# This file is part of logilab-mtconverter.
#
# logilab-mtconverter is free software: you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by the
# Free Software Foundation, either version 2.1 of the License, or (at your
# option) any later version.
#
# logilab-mtconverter is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
# for more details.
#
# You should have received a copy of the GNU Lesser General Public License along
# with logilab-mtconverter. If not, see <http://www.gnu.org/licenses/>.
Sylvain's avatar
Sylvain committed
19
20
from logilab.common.testlib import TestCase, unittest_main

21
import locale
22
from io import BytesIO
23
24
from logilab.mtconverter import *

25
SPECIAL_CHARS = {
Laurent Peuch's avatar
Laurent Peuch committed
26
27
28
29
30
31
32
33
    "\f": "\n",
    "\b": " ",
    "\n": "\n",
    "\r": "\r",
    "\r\n": "\r\n",
    "\t": "\t",
    "\v": "\n",
}
Sylvain's avatar
Sylvain committed
34

35

Laurent Peuch's avatar
Laurent Peuch committed
36
class HtmlEscapeTC(TestCase):
Sylvain's avatar
Sylvain committed
37
    def test_escape(self):
Laurent Peuch's avatar
Laurent Peuch committed
38
39
40
41
42
43
44
        for data, expected in [
            ("toto", "toto"),
            ("r&d", "r&amp;d"),
            ("23<12 && 3>2", "23&lt;12 &amp;&amp; 3&gt;2"),
            ('d"h"', "d&quot;h&quot;"),
            ("h'", "h&#39;"),
        ]:
45
            yield self.assertEqual, xml_escape(data), expected
46
47
48

    def test_escape_special_chars(self):
        for car, trcar in SPECIAL_CHARS.items():
49
            yield self.assertEqual, xml_escape(car), trcar
50
        for carnum in range(32):
51
52
53
            car = chr(carnum)
            if car in SPECIAL_CHARS:
                continue
Laurent Peuch's avatar
Laurent Peuch committed
54
55
            yield self.assertEqual, xml_escape(car), " "
        yield self.assertEqual, xml_escape(u"é"), u"é"
56
57
58

    def test_escape_special_chars_unicode(self):
        for car, trcar in SPECIAL_CHARS.items():
Philippe Pepiot's avatar
Philippe Pepiot committed
59
            yield self.assertEqual, xml_escape(car), trcar
60
        for carnum in range(32):
61
62
63
            car = chr(carnum)
            if car in SPECIAL_CHARS:
                continue
Laurent Peuch's avatar
Laurent Peuch committed
64
            yield self.assertEqual, xml_escape(car), " "
65

66
    def test_html_unescape(self):
Laurent Peuch's avatar
Laurent Peuch committed
67
68
69
70
71
72
73
74
        for data, expected in [
            ("toto", "toto"),
            ("r&amp;d", "r&d"),
            ("23&lt;12 &amp;&amp; 3&gt;2", "23<12 && 3>2"),
            ("d&quot;h&quot;", 'd"h"'),
            ("h&#39;", "h'"),
            ("x &equiv; y", u"x \u2261 y"),
        ]:
75
            yield self.assertEqual, html_unescape(data), expected
76

77
78
79

class GuessEncodingTC(TestCase):
    def test_emacs_style_declaration(self):
Laurent Peuch's avatar
Laurent Peuch committed
80
81
        data = b"""# -*- coding: latin1 -*-"""
        self.assertEqual(guess_encoding(data), "latin1")
82

83
    def test_emacs_style_declaration_stringIO(self):
Laurent Peuch's avatar
Laurent Peuch committed
84
85
        data = b"""# -*- coding: latin1 -*-"""
        self.assertEqual(guess_encoding(BytesIO(data)), "latin1")
86

87
    def test_xml_style_declaration(self):
Laurent Peuch's avatar
Laurent Peuch committed
88
89
90
        data = b"""<?xml version="1.0" encoding="latin1"?>
        <root/>"""
        self.assertEqual(guess_encoding(data), "latin1")
91

92
    def test_html_style_declaration(self):
Laurent Peuch's avatar
Laurent Peuch committed
93
        data = b"""<html xmlns="http://www.w3.org/1999/xhtml" xmlns:erudi="http://www.logilab.fr/" xml:lang="fr" lang="fr">
94
95
96
97
98
<head>
<base href="http://intranet.logilab.fr/jpl/" /><meta http-equiv="content-type" content="text/html; charset=latin1"/>
</head>
<body><p>hello world</p>
</body>
Laurent Peuch's avatar
Laurent Peuch committed
99
100
</html>"""
        self.assertEqual(guess_encoding(data), "latin1")
101

102
    def test_bad_detection(self):
103
        data = b'''class SchemaViewer(object):
104
105
106
107
108
109
    """return an ureport layout for some part of a schema"""
    def __init__(self, req=None, encoding=None):
'''
        # ascii detected by chardet
        try:
            import chardet
Laurent Peuch's avatar
Laurent Peuch committed
110
111

            self.assertEqual(guess_encoding(data), "ascii")
112
        except ImportError:
113
            self.assertEqual(guess_encoding(data), DEFAULT_ENCODING)
114

Laurent Peuch's avatar
Laurent Peuch committed
115

116
117
class GuessMimetymeAndEncodingTC(TestCase):
    def test_base(self):
118
        format, encoding = guess_mimetype_and_encoding(filename=u"foo.txt", data=b"xxx")
Laurent Peuch's avatar
Laurent Peuch committed
119
        self.assertEqual(format, u"text/plain")
120
        self.assertEqual(encoding, locale.getpreferredencoding())
121
122

    def test_set_mime_and_encoding_gz_file(self):
Laurent Peuch's avatar
Laurent Peuch committed
123
124
125
126
127
128
129
130
131
132
        format, encoding = guess_mimetype_and_encoding(
            filename=u"foo.txt.gz", data=b"xxx"
        )
        self.assertEqual(format, u"text/plain")
        self.assertEqual(encoding, u"gzip")
        format, encoding = guess_mimetype_and_encoding(
            filename=u"foo.txt.gz", data=b"xxx", format="application/gzip"
        )
        self.assertEqual(format, u"text/plain")
        self.assertEqual(encoding, u"gzip")
133
        format, encoding = guess_mimetype_and_encoding(filename=u"foo.gz", data=b"xxx")
Laurent Peuch's avatar
Laurent Peuch committed
134
        self.assertEqual(format, u"application/gzip")
135
        self.assertEqual(encoding, None)
136
137

    def test_set_mime_and_encoding_bz2_file(self):
Laurent Peuch's avatar
Laurent Peuch committed
138
139
140
141
142
143
144
145
146
147
        format, encoding = guess_mimetype_and_encoding(
            filename=u"foo.txt.bz2", data=b"xxx"
        )
        self.assertEqual(format, u"text/plain")
        self.assertEqual(encoding, u"bzip2")
        format, encoding = guess_mimetype_and_encoding(
            filename=u"foo.txt.bz2", data=b"xxx", format="application/bzip2"
        )
        self.assertEqual(format, u"text/plain")
        self.assertEqual(encoding, u"bzip2")
148
        format, encoding = guess_mimetype_and_encoding(filename=u"foo.bz2", data=b"xxx")
Laurent Peuch's avatar
Laurent Peuch committed
149
        self.assertEqual(format, u"application/bzip2")
150
        self.assertEqual(encoding, None)
151
152

    def test_set_mime_and_encoding_unknwon_ext(self):
153
        format, encoding = guess_mimetype_and_encoding(filename=u"foo.789", data=b"xxx")
Laurent Peuch's avatar
Laurent Peuch committed
154
        self.assertEqual(format, u"application/octet-stream")
155
        self.assertEqual(encoding, None)
156

157

158
159
class TransformDataTC(TestCase):
    def test_autodetect_encoding_if_necessary(self):
Laurent Peuch's avatar
Laurent Peuch committed
160
161
162
163
164
165
        data = TransformData(
            b"""<?xml version="1.0" encoding="latin1"?>
        <root/>""",
            "text/xml",
        )
        self.assertEqual(data.encoding, "latin1")
166

Sylvain's avatar
Sylvain committed
167

Laurent Peuch's avatar
Laurent Peuch committed
168
if __name__ == "__main__":
Sylvain's avatar
Sylvain committed
169
    unittest_main()