Commit c9cf8981 authored by Rémi Cardona's avatar Rémi Cardona
Browse files

[htmltransform] Make the transform work with both unicode and bytes input

Needed for py3k support. Found by running CubicWeb tests in py3k.
parent a2a917c08159
......@@ -16,6 +16,8 @@
# You should have received a copy of the GNU Lesser General Public License along
# with logilab-mtconverter. If not, see <>.
from six import binary_type
from html2text import html2text
from logilab.mtconverter.transform import Transform
......@@ -30,4 +32,8 @@ class html_to_formatted_text(Transform):
def _convert(self, trdata):
return html2text(
if isinstance(, binary_type):
data =
data =
return html2text(data).encode(trdata.encoding)
......@@ -45,6 +45,15 @@ class MiscTransformsTC(TestCase):
converted = ENGINE.convert(data, 'text/plain').decode().strip()
self.assertEqual(converted, u'yo \nzogzog')
def test_binary_html_to_text(self):
data = TransformData(u'<b>yo (zou ;)</b>'.encode('utf-8'), 'text/html', 'utf8')
converted = ENGINE.convert(data, 'text/plain').decode().strip()
self.assertEqual(converted, u'**yo (zou ;)**')
data = TransformData(u'<p>yo <br/>zogzog </p>'.encode('utf-8'), 'text/html', 'utf8')
converted = ENGINE.convert(data, 'text/plain').decode().strip()
self.assertEqual(converted, u'yo \nzogzog')
def test_html_to_text_noenc(self):
self.skipTest('Encoding detection with chardet does not work')
# will trigger guess_encoding, check non-utf8 encoding
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment