Commit 9a6f8a18 authored by Rémi Cardona's avatar Rémi Cardona
Browse files

[py3k] Fix odt2text to work properly with binary data

Related to #268148.
parent fda99736b65f
...@@ -74,14 +74,14 @@ r2MAoIO1DSsuM23SzgmqubGJEZuSRWhR ...@@ -74,14 +74,14 @@ r2MAoIO1DSsuM23SzgmqubGJEZuSRWhR
def test_odt_to_text(self): def test_odt_to_text(self):
data = TransformData(open(osp.join(DATAPATH, 'hello.odt')), data = TransformData(open(osp.join(DATAPATH, 'hello.odt'), 'rb'),
'application/vnd.oasis.opendocument.text', 'utf8') 'application/vnd.oasis.opendocument.text', 'utf8')
converted = ENGINE.convert(data, 'text/plain').decode().strip() converted = ENGINE.convert(data, 'text/plain').decode().strip()
self.assertEqual(converted, u'Hello ! OpenOffice.org/2.4$Unix OpenOffice.org_project/680m17$Build-9310 Hello quoi de neuf doc ? bonjour 2008-07-08T16:19:35 2009-01-09T14:44:54 mot-clef 1 PT37S') self.assertEqual(converted, u'Hello ! OpenOffice.org/2.4$Unix OpenOffice.org_project/680m17$Build-9310 Hello quoi de neuf doc ? bonjour 2008-07-08T16:19:35 2009-01-09T14:44:54 mot-clef 1 PT37S')
# ZipFile will complain that # ZipFile will complain that
# TypeError: file() argument 1 must be (encoded string without NULL bytes), not str # TypeError: file() argument 1 must be (encoded string without NULL bytes), not str
# if given a plain str ... we shielded us from that. # if given a plain str ... we shielded us from that.
data = TransformData(open(osp.join(DATAPATH, 'hello.odt')).read(), data = TransformData(open(osp.join(DATAPATH, 'hello.odt'), 'rb').read(),
'application/vnd.oasis.opendocument.text', 'utf8') 'application/vnd.oasis.opendocument.text', 'utf8')
converted = ENGINE.convert(data, 'text/plain').decode().strip() converted = ENGINE.convert(data, 'text/plain').decode().strip()
self.assertEqual(converted, u'Hello ! OpenOffice.org/2.4$Unix OpenOffice.org_project/680m17$Build-9310 Hello quoi de neuf doc ? bonjour 2008-07-08T16:19:35 2009-01-09T14:44:54 mot-clef 1 PT37S') self.assertEqual(converted, u'Hello ! OpenOffice.org/2.4$Unix OpenOffice.org_project/680m17$Build-9310 Hello quoi de neuf doc ? bonjour 2008-07-08T16:19:35 2009-01-09T14:44:54 mot-clef 1 PT37S')
......
...@@ -18,12 +18,15 @@ ...@@ -18,12 +18,15 @@
"""odt2text: Turn odt file into equivalent plain text file. """odt2text: Turn odt file into equivalent plain text file.
Copyright (C) 2009 Logilab S.A. Copyright (C) 2009 Logilab S.A.
""" """
from io import BytesIO
from zipfile import ZipFile from zipfile import ZipFile
from six import binary_type
from lxml import etree from lxml import etree
from tempfile import TemporaryFile as tmpfile
from logilab.mtconverter.transform import Transform from logilab.mtconverter.transform import Transform
class odt_to_unformatted_text(Transform): class odt_to_unformatted_text(Transform):
"""transforms odt content to unformatted plain text""" """transforms odt content to unformatted plain text"""
...@@ -33,15 +36,9 @@ class odt_to_unformatted_text(Transform): ...@@ -33,15 +36,9 @@ class odt_to_unformatted_text(Transform):
def _convert(self, trdata): def _convert(self, trdata):
data = trdata.data data = trdata.data
# XXX ZipFile should also accept a string if isinstance(data, binary_type):
# however, there is some bug within # ZipFile only works with binary file-like objects
# so we feed it a file data = BytesIO(data)
if isinstance(data, str):
tmp = tmpfile(mode='w+b')
tmp.write(data)
tmp.seek(0)
data = tmp
# /XXX
zip = ZipFile(data, 'r') zip = ZipFile(data, 'r')
alltext = [] alltext = []
for subelt in ('content.xml', 'meta.xml'): for subelt in ('content.xml', 'meta.xml'):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment