Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
open-source
logilab-mtconverter
Commits
849f99b123f1
Commit
36710be3
authored
Jan 08, 2009
by
Adrien Di Mascio
Browse files
provide a more robust implementation of html_unescape, considering any HTML entities
parent
bca8695c467f
Changes
2
Hide whitespace changes
Inline
Side-by-side
__init__.py
View file @
849f99b1
...
...
@@ -20,6 +20,7 @@ import locale
import
mimetypes
import
re
from
StringIO
import
StringIO
import
htmlentitydefs
try
:
import
chardet
...
...
@@ -86,10 +87,10 @@ def html_escape(data):
.
replace
(
'"'
,
'"'
).
replace
(
"'"
,
'''
))
def
html_unescape
(
data
):
"""escapes XML/HTML
forbidden characters in attributes and PCDATA
"""
return
(
data
.
replace
(
'&'
,
'&'
).
replace
(
'<'
,
'<'
).
replace
(
'>'
,
'>'
)
.
replace
(
'"'
,
'"'
).
replace
(
'''
,
"'"
).
replace
(
'''
,
"'"
))
"""
un
escapes XML/HTML
entities
"""
for
entityname
,
codepoint
in
htmlentitydefs
.
name2codepoint
.
iteritems
():
data
=
data
.
replace
(
'&%s;'
%
entityname
,
unichr
(
codepoint
))
return
data
.
replace
(
'''
,
"'"
)
class
TransformData
(
object
):
"""wrapper arround transformed data to add extra infos such as MIME
...
...
test/unittest_utils.py
View file @
849f99b1
# -*- coding: utf-8 -*-
from
logilab.common.testlib
import
TestCase
,
unittest_main
import
locale
...
...
@@ -16,6 +17,16 @@ class HtmlEscapeTC(TestCase):
]:
self
.
assertEquals
(
html_escape
(
data
),
expected
)
def
test_html_unescape
(
self
):
for
data
,
expected
in
[(
'toto'
,
'toto'
),
(
'r&d'
,
'r&d'
),
(
'23<12 && 3>2'
,
'23<12 && 3>2'
),
(
'd"h"'
,
'd"h"'
),
(
'h''
,
"h'"
),
(
'x ≡ y'
,
u
"x
\u2261
y"
),
]:
self
.
assertEquals
(
html_unescape
(
data
),
expected
)
class
GuessEncodingTC
(
TestCase
):
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment