Commit 9a6f8a18 authored by Rémi Cardona's avatar Rémi Cardona
Browse files

[py3k] Fix odt2text to work properly with binary data

Related to #268148.
parent fda99736b65f
......@@ -74,14 +74,14 @@ r2MAoIO1DSsuM23SzgmqubGJEZuSRWhR
def test_odt_to_text(self):
data = TransformData(open(osp.join(DATAPATH, 'hello.odt')),
data = TransformData(open(osp.join(DATAPATH, 'hello.odt'), 'rb'),
'application/vnd.oasis.opendocument.text', 'utf8')
converted = ENGINE.convert(data, 'text/plain').decode().strip()
self.assertEqual(converted, u'Hello ! OpenOffice.org/2.4$Unix OpenOffice.org_project/680m17$Build-9310 Hello quoi de neuf doc ? bonjour 2008-07-08T16:19:35 2009-01-09T14:44:54 mot-clef 1 PT37S')
# ZipFile will complain that
# TypeError: file() argument 1 must be (encoded string without NULL bytes), not str
# if given a plain str ... we shielded us from that.
data = TransformData(open(osp.join(DATAPATH, 'hello.odt')).read(),
data = TransformData(open(osp.join(DATAPATH, 'hello.odt'), 'rb').read(),
'application/vnd.oasis.opendocument.text', 'utf8')
converted = ENGINE.convert(data, 'text/plain').decode().strip()
self.assertEqual(converted, u'Hello ! OpenOffice.org/2.4$Unix OpenOffice.org_project/680m17$Build-9310 Hello quoi de neuf doc ? bonjour 2008-07-08T16:19:35 2009-01-09T14:44:54 mot-clef 1 PT37S')
......
......@@ -18,12 +18,15 @@
"""odt2text: Turn odt file into equivalent plain text file.
Copyright (C) 2009 Logilab S.A.
"""
from io import BytesIO
from zipfile import ZipFile
from six import binary_type
from lxml import etree
from tempfile import TemporaryFile as tmpfile
from logilab.mtconverter.transform import Transform
class odt_to_unformatted_text(Transform):
"""transforms odt content to unformatted plain text"""
......@@ -33,15 +36,9 @@ class odt_to_unformatted_text(Transform):
def _convert(self, trdata):
data = trdata.data
# XXX ZipFile should also accept a string
# however, there is some bug within
# so we feed it a file
if isinstance(data, str):
tmp = tmpfile(mode='w+b')
tmp.write(data)
tmp.seek(0)
data = tmp
# /XXX
if isinstance(data, binary_type):
# ZipFile only works with binary file-like objects
data = BytesIO(data)
zip = ZipFile(data, 'r')
alltext = []
for subelt in ('content.xml', 'meta.xml'):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment