Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
open-source
logilab-mtconverter
Commits
1d5b04e4567c
Commit
921e768f
authored
Sep 02, 2009
by
Sylvain Thénault
Browse files
when encoding found by the regexp, check it's known to avoid bad matches
parent
296b57e55475
Changes
3
Hide whitespace changes
Inline
Side-by-side
ChangeLog
View file @
1d5b04e4
Change log for mtconverter
==========================
2009-07-21 -- 0.7.0
2009-07-21 -- 0.7.0
* new need_guess function
* new fallbackmimetype argument to guess_mimetype_and_encoding
...
...
__init__.py
View file @
1d5b04e4
...
...
@@ -20,8 +20,9 @@ import locale
import
mimetypes
import
re
import
string
from
StringIO
import
StringIO
import
htmlentitydefs
import
codecs
from
StringIO
import
StringIO
try
:
import
chardet
...
...
@@ -66,7 +67,13 @@ def guess_encoding(buffer, fallbackencoding=None):
# try to get a character set declaration
m
=
CHARSET_DECL_RGX
.
search
(
buffer
[:
CHARSET_DECL_SEARCH_SIZE
])
if
m
is
not
None
:
return
m
.
group
(
1
)
guessed
=
m
.
group
(
1
)
try
:
# ensure encoding is known by python
codecs
.
lookup
(
guessed
)
return
guessed
except
LookupError
:
pass
if
buffer
.
lstrip
().
startswith
(
'<?xml'
):
# xml files with no encoding declaration default to UTF-8
return
'UTF-8'
...
...
test/unittest_utils.py
View file @
1d5b04e4
...
...
@@ -81,6 +81,17 @@ class GuessEncodingTC(TestCase):
</html>'''
self
.
assertEquals
(
guess_encoding
(
data
),
'latin1'
)
def
test_bad_detection
(
self
):
data
=
'''class SchemaViewer(object):
"""return an ureport layout for some part of a schema"""
def __init__(self, req=None, encoding=None):
'''
# ascii detected by chardet
try
:
import
chardet
self
.
assertEquals
(
guess_encoding
(
data
),
'ascii'
)
except
ImportError
:
self
.
assertEquals
(
guess_encoding
(
data
),
DEFAULT_ENCODING
)
class
GuessMimetymeAndEncodingTC
(
TestCase
):
def
test_base
(
self
):
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment