Commit 11f11276 authored by Rémi Cardona's avatar Rémi Cardona
Browse files

[server] Port BFSS to py3k

The BFSS API changes in python 3:

* 'defaultdir' MUST be a unicode object
* 'fsencoding' MUST NOT be set

In python 2, fsencoding handles both the encoding of file paths on the
file system (utf-8 by default, but the system may actually be using
something else) and the encoding of file paths that will be stored in
the database.

So in python 3, we wipe the slate clean:

* rely on sys.getfilesystemencoding() to convert unicode objects to
  bytes
* always encode paths to utf-8 for storage in the database

Caveat emptor / here be dragons:

* sys.getfilesystemencoding() depends on the current locale, which
  therefore MUST be set properly
* when migrating an existing instance from py2 to py3, one MAY need
  to reencode file paths stored in the database
parent e7eafadbeb15
......@@ -23,6 +23,10 @@ from os import unlink, path as osp
from contextlib import contextmanager
import tempfile
from six import PY2, PY3, text_type, binary_type
from logilab.common import nullobject
from yams.schema import role_name
from cubicweb import Binary, ValidationError
......@@ -103,13 +107,22 @@ def fsimport(cnx):
del cnx.transaction_data['fs_importing']
_marker = nullobject()
class BytesFileSystemStorage(Storage):
"""store Bytes attribute value on the file system"""
def __init__(self, defaultdir, fsencoding='utf-8', wmode=0o444):
if type(defaultdir) is unicode:
defaultdir = defaultdir.encode(fsencoding)
def __init__(self, defaultdir, fsencoding=_marker, wmode=0o444):
if PY3:
if not isinstance(defaultdir, text_type):
raise TypeError('defaultdir must be a unicode object in python 3')
if fsencoding is not _marker:
raise ValueError('fsencoding is no longer supported in python 3')
else:
self.fsencoding = fsencoding or 'utf-8'
if isinstance(defaultdir, text_type):
defaultdir = defaultdir.encode(fsencoding)
self.default_directory = defaultdir
self.fsencoding = fsencoding
# extra umask to use when creating file
# 0444 as in "only allow read bit in permission"
self._wmode = wmode
......@@ -145,7 +158,8 @@ class BytesFileSystemStorage(Storage):
binary = entity.cw_edited.pop(attr)
fd, fpath = self.new_fs_path(entity, attr)
# bytes storage used to store file's path
entity.cw_edited.edited_attribute(attr, Binary(fpath))
binary_obj = Binary(fpath if PY2 else fpath.encode('utf-8'))
entity.cw_edited.edited_attribute(attr, binary_obj)
self._writecontent(fd, binary)
AddFileOp.get_instance(entity._cw).add_data(fpath)
return binary
......@@ -187,7 +201,8 @@ class BytesFileSystemStorage(Storage):
entity.cw_edited.edited_attribute(attr, None)
else:
# register the new location for the file.
entity.cw_edited.edited_attribute(attr, Binary(fpath))
binary_obj = Binary(fpath if PY2 else fpath.encode('utf-8'))
entity.cw_edited.edited_attribute(attr, binary_obj)
if oldpath is not None and oldpath != fpath:
# Mark the old file as useless so the file will be removed at
# commit.
......@@ -206,16 +221,19 @@ class BytesFileSystemStorage(Storage):
# available. Keeping the extension is useful for example in the case of
# PIL processing that use filename extension to detect content-type, as
# well as providing more understandable file names on the fs.
if PY2:
attr = attr.encode('ascii')
basename = [str(entity.eid), attr]
name = entity.cw_attr_metadata(attr, 'name')
if name is not None:
basename.append(name.encode(self.fsencoding))
basename.append(name.encode(self.fsencoding) if PY2 else name)
fd, fspath = uniquify_path(self.default_directory,
'_'.join(basename))
if fspath is None:
msg = entity._cw._('failed to uniquify path (%s, %s)') % (
self.default_directory, '_'.join(basename))
raise ValidationError(entity.eid, {role_name(attr, 'subject'): msg})
assert isinstance(fspath, str) # bytes on py2, unicode on py3
return fd, fspath
def current_fs_path(self, entity, attr):
......@@ -229,8 +247,12 @@ class BytesFileSystemStorage(Storage):
rawvalue = cu.fetchone()[0]
if rawvalue is None: # no previous value
return None
return sysource._process_value(rawvalue, cu.description[0],
binarywrap=str)
fspath = sysource._process_value(rawvalue, cu.description[0],
binarywrap=binary_type)
if PY3:
fspath = fspath.decode('utf-8')
assert isinstance(fspath, str) # bytes on py2, unicode on py3
return fspath
def migrate_entity(self, entity, attribute):
"""migrate an entity attribute to the storage"""
......@@ -248,15 +270,17 @@ class BytesFileSystemStorage(Storage):
class AddFileOp(hook.DataOperationMixIn, hook.Operation):
def rollback_event(self):
for filepath in self.get_data():
assert isinstance(filepath, str) # bytes on py2, unicode on py3
try:
unlink(filepath)
except Exception as ex:
self.error('cant remove %s: %s' % (filepath, ex))
self.error("can't remove %s: %s" % (filepath, ex))
class DeleteFileOp(hook.DataOperationMixIn, hook.Operation):
def postcommit_event(self):
for filepath in self.get_data():
assert isinstance(filepath, str) # bytes on py2, unicode on py3
try:
unlink(filepath)
except Exception as ex:
self.error('cant remove %s: %s' % (filepath, ex))
self.error("can't remove %s: %s" % (filepath, ex))
......@@ -17,12 +17,15 @@
# with CubicWeb. If not, see <http://www.gnu.org/licenses/>.
"""unit tests for module cubicweb.server.sources.storages"""
from six import PY2
from logilab.common.testlib import unittest_main, tag, Tags
from cubicweb.devtools.testlib import CubicWebTC
from glob import glob
import os
import os.path as osp
import sys
import shutil
import tempfile
......@@ -57,12 +60,14 @@ class StorageTC(CubicWebTC):
def setup_database(self):
self.tempdir = tempfile.mkdtemp()
bfs_storage = storages.BytesFileSystemStorage(self.tempdir)
self.bfs_storage = bfs_storage
storages.set_attribute_storage(self.repo, 'File', 'data', bfs_storage)
storages.set_attribute_storage(self.repo, 'BFSSTestable', 'opt_attr', bfs_storage)
def tearDown(self):
super(StorageTC, self).tearDown()
storages.unset_attribute_storage(self.repo, 'File', 'data')
del self.bfs_storage
shutil.rmtree(self.tempdir)
......@@ -73,8 +78,8 @@ class StorageTC(CubicWebTC):
def fspath(self, cnx, entity):
fspath = cnx.execute('Any fspath(D) WHERE F eid %(f)s, F data D',
{'f': entity.eid})[0][0]
return fspath.getvalue()
{'f': entity.eid})[0][0].getvalue()
return fspath if PY2 else fspath.decode('utf-8')
def test_bfss_wrong_fspath_usage(self):
with self.admin_access.repo_cnx() as cnx:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment