Commit c9cf8981 authored by Rémi Cardona's avatar Rémi Cardona
Browse files

[htmltransform] Make the transform work with both unicode and bytes input

Needed for py3k support. Found by running CubicWeb tests in py3k.
parent a2a917c08159
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
# You should have received a copy of the GNU Lesser General Public License along # You should have received a copy of the GNU Lesser General Public License along
# with logilab-mtconverter. If not, see <http://www.gnu.org/licenses/>. # with logilab-mtconverter. If not, see <http://www.gnu.org/licenses/>.
from six import binary_type
from html2text import html2text from html2text import html2text
from logilab.mtconverter.transform import Transform from logilab.mtconverter.transform import Transform
...@@ -30,4 +32,8 @@ class html_to_formatted_text(Transform): ...@@ -30,4 +32,8 @@ class html_to_formatted_text(Transform):
def _convert(self, trdata): def _convert(self, trdata):
return html2text(trdata.data).encode(trdata.encoding) if isinstance(trdata.data, binary_type):
data = trdata.data.decode(trdata.encoding)
else:
data = trdata.data
return html2text(data).encode(trdata.encoding)
...@@ -45,6 +45,15 @@ class MiscTransformsTC(TestCase): ...@@ -45,6 +45,15 @@ class MiscTransformsTC(TestCase):
converted = ENGINE.convert(data, 'text/plain').decode().strip() converted = ENGINE.convert(data, 'text/plain').decode().strip()
self.assertEqual(converted, u'yo \nzogzog') self.assertEqual(converted, u'yo \nzogzog')
def test_binary_html_to_text(self):
data = TransformData(u'<b>yo (zou ;)</b>'.encode('utf-8'), 'text/html', 'utf8')
converted = ENGINE.convert(data, 'text/plain').decode().strip()
self.assertEqual(converted, u'**yo (zou ;)**')
data = TransformData(u'<p>yo <br/>zogzog </p>'.encode('utf-8'), 'text/html', 'utf8')
converted = ENGINE.convert(data, 'text/plain').decode().strip()
self.assertEqual(converted, u'yo \nzogzog')
def test_html_to_text_noenc(self): def test_html_to_text_noenc(self):
self.skipTest('Encoding detection with chardet does not work') self.skipTest('Encoding detection with chardet does not work')
# will trigger guess_encoding, check non-utf8 encoding # will trigger guess_encoding, check non-utf8 encoding
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment