[htmltransform] Make the transform work with both unicode and bytes input

Needed for py3k support. Found by running CubicWeb tests in py3k.
# You should have received a copy of the GNU Lesser General Public License along
# with logilab-mtconverter. If not, see <>.
from six import binary_type
from html2text import html2text
from logilab.mtconverter.transform import Transform
def _convert(self, trdata):
return html2text(
if isinstance(, binary_type):
data =
data =
return html2text(data).encode(trdata.encoding)
converted = ENGINE.convert(data, 'text/plain').decode().strip()
self.assertEqual(converted, u'yo \nzogzog')
def test_binary_html_to_text(self):
data = TransformData(u'<b>yo (zou ;)</b>'.encode('utf-8'), 'text/html', 'utf8')
converted = ENGINE.convert(data, 'text/plain').decode().strip()
self.assertEqual(converted, u'**yo (zou ;)**')
data = TransformData(u'<p>yo <br/>zogzog </p>'.encode('utf-8'), 'text/html', 'utf8')
converted = ENGINE.convert(data, 'text/plain').decode().strip()
self.assertEqual(converted, u'yo \nzogzog')
def test_html_to_text_noenc(self):
self.skipTest('Encoding detection with chardet does not work')
# will trigger guess_encoding, check non-utf8 encoding
