Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
open-source
logilab-mtconverter
Commits
7d28ad6a4aff
Commit
9a6f8a18
authored
Sep 30, 2014
by
Rémi Cardona
Browse files
[py3k] Fix odt2text to work properly with binary data
Related to #268148.
parent
fda99736b65f
Changes
2
Hide whitespace changes
Inline
Side-by-side
test/unittest_transforms.py
View file @
7d28ad6a
...
...
@@ -74,14 +74,14 @@ r2MAoIO1DSsuM23SzgmqubGJEZuSRWhR
def
test_odt_to_text
(
self
):
data
=
TransformData
(
open
(
osp
.
join
(
DATAPATH
,
'hello.odt'
)),
data
=
TransformData
(
open
(
osp
.
join
(
DATAPATH
,
'hello.odt'
)
,
'rb'
),
'application/vnd.oasis.opendocument.text'
,
'utf8'
)
converted
=
ENGINE
.
convert
(
data
,
'text/plain'
).
decode
().
strip
()
self
.
assertEqual
(
converted
,
u
'Hello ! OpenOffice.org/2.4$Unix OpenOffice.org_project/680m17$Build-9310 Hello quoi de neuf doc ? bonjour 2008-07-08T16:19:35 2009-01-09T14:44:54 mot-clef 1 PT37S'
)
# ZipFile will complain that
# TypeError: file() argument 1 must be (encoded string without NULL bytes), not str
# if given a plain str ... we shielded us from that.
data
=
TransformData
(
open
(
osp
.
join
(
DATAPATH
,
'hello.odt'
)).
read
(),
data
=
TransformData
(
open
(
osp
.
join
(
DATAPATH
,
'hello.odt'
)
,
'rb'
).
read
(),
'application/vnd.oasis.opendocument.text'
,
'utf8'
)
converted
=
ENGINE
.
convert
(
data
,
'text/plain'
).
decode
().
strip
()
self
.
assertEqual
(
converted
,
u
'Hello ! OpenOffice.org/2.4$Unix OpenOffice.org_project/680m17$Build-9310 Hello quoi de neuf doc ? bonjour 2008-07-08T16:19:35 2009-01-09T14:44:54 mot-clef 1 PT37S'
)
...
...
transforms/odt2text.py
View file @
7d28ad6a
...
...
@@ -18,12 +18,15 @@
"""odt2text: Turn odt file into equivalent plain text file.
Copyright (C) 2009 Logilab S.A.
"""
from
io
import
BytesIO
from
zipfile
import
ZipFile
from
six
import
binary_type
from
lxml
import
etree
from
tempfile
import
TemporaryFile
as
tmpfile
from
logilab.mtconverter.transform
import
Transform
class
odt_to_unformatted_text
(
Transform
):
"""transforms odt content to unformatted plain text"""
...
...
@@ -33,15 +36,9 @@ class odt_to_unformatted_text(Transform):
def
_convert
(
self
,
trdata
):
data
=
trdata
.
data
# XXX ZipFile should also accept a string
# however, there is some bug within
# so we feed it a file
if
isinstance
(
data
,
str
):
tmp
=
tmpfile
(
mode
=
'w+b'
)
tmp
.
write
(
data
)
tmp
.
seek
(
0
)
data
=
tmp
# /XXX
if
isinstance
(
data
,
binary_type
):
# ZipFile only works with binary file-like objects
data
=
BytesIO
(
data
)
zip
=
ZipFile
(
data
,
'r'
)
alltext
=
[]
for
subelt
in
(
'content.xml'
,
'meta.xml'
):
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment