Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
open-source
logilab-mtconverter
Commits
8efa4841f000
Commit
86175bcd
authored
Aug 06, 2008
by
Sylvain Thenault
Browse files
* new fallbackencoding argument to guess_mimetype_and_encoding, given to
guess_encoding when necessary
parent
0df6bbe187a7
Changes
2
Hide whitespace changes
Inline
Side-by-side
ChangeLog
View file @
8efa4841
Change log for mtconverter
==========================
--
* application/xml -> text/plain transformation
* new fallbackencoding argument to guess_mimetype_and_encoding, given to
guess_encoding when necessary
2008-06-30 -- 0.4.0
* use a new pure python transformation to transform html into
formatted text. The code is based on http://www.aaronsw.com/2002/html2text/
...
...
__init__.py
View file @
8efa4841
...
...
@@ -47,8 +47,7 @@ def is_text_mimetype(mimetype):
return
(
mimetype
.
startswith
(
'text/'
)
or
mimetype
in
TEXT_MIMETYPES
)
def
guess_encoding
(
buffer
,
fallbackencoding
=
None
):
"""try to guess encoding from a buffer
"""
"""try to guess encoding from a buffer"""
if
hasattr
(
buffer
,
'getvalue'
):
# may be a StringIO
buffer
=
buffer
.
getvalue
()
# try to get a character set declaration
...
...
@@ -65,7 +64,8 @@ def guess_encoding(buffer, fallbackencoding=None):
return
detected
[
'encoding'
]
return
fallbackencoding
or
DEFAULT_ENCODING
def
guess_mimetype_and_encoding
(
format
=
None
,
encoding
=
None
,
data
=
None
,
filename
=
None
):
def
guess_mimetype_and_encoding
(
format
=
None
,
encoding
=
None
,
data
=
None
,
filename
=
None
,
fallbackencoding
=
None
):
if
format
and
format
.
split
(
'/'
)[
-
1
]
in
BINARY_ENCODINGS
:
format
=
None
# try to do better
if
filename
and
not
format
:
...
...
@@ -77,7 +77,7 @@ def guess_mimetype_and_encoding(format=None, encoding=None, data=None, filename=
else
:
format
=
u
'application/octet-stream'
if
not
encoding
and
data
and
format
and
is_text_mimetype
(
format
):
encoding
=
guess_encoding
(
data
)
encoding
=
guess_encoding
(
data
,
fallbackencoding
)
return
format
,
encoding
def
html_escape
(
data
):
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment