Skip to content
Snippets Groups Projects
Commit 7b94c60170ee authored by David Douard's avatar David Douard
Browse files

initial implementation of the S3Storage

This implementation provides an S3Storage that can be used to store entities
attributes in an Amazon S3 storage.

It comes with an STKEY rql function to be able to retrieve the S3 object key
for the entity attribute (similar to BFSS's FSPATH rql function).

S3 connection configurations are attached to the S3Storage instance, like:

  s3_storage = S3Storage(s3_bucket)
  storages.set_attribute_storage(repo, 'File', 'data', s3_storage)
parent 171278ab8a0a
No related branches found
No related tags found
No related merge requests found
......@@ -14,7 +14,11 @@
description = 'A Cubicweb Storage that stores the data on S3'
web = 'http://www.cubicweb.org/project/%s' % distname
__depends__ = {'cubicweb': '>= 3.24.9', 'six': '>= 1.4.0'}
__depends__ = {
'cubicweb': '>= 3.24.9',
'six': '>= 1.4.0',
'boto3': None,
}
__recommends__ = {}
classifiers = [
......
from rql.utils import register_function
from logilab.database import FunctionDescr
from cubicweb import Binary
class STKEY(FunctionDescr):
"""return the S3 key of the bytes attribute stored using the S3 Storage (s3s)
"""
rtype = 'Bytes'
def update_cb_stack(self, stack):
assert len(stack) == 1
stack[0] = self.source_execute
def as_sql(self, backend, args):
raise NotImplementedError(
'This callback is only available for S3Storage '
'managed attribute. Is STKEY() argument S3S managed?')
def source_execute(self, source, session, value):
s3key = source.binary_to_str(value)
try:
return Binary(s3key)
except OSError as ex:
source.critical("can't read %s: %s", s3key, ex)
return None
register_function(STKEY)
# copyright 2018 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
#
# This file is part of CubicWeb.
#
# CubicWeb is free software: you can redistribute it and/or modify it under the
# terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option)
# any later version.
#
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with CubicWeb. If not, see <http://www.gnu.org/licenses/>.
"""custom storages for S3"""
import uuid
from logging import getLogger
from six import PY3
import boto3
from cubicweb import Binary, set_log_methods
from cubicweb.server.sources.storages import Storage
class S3Storage(Storage):
is_source_callback = True
def __init__(self, bucket):
self.s3cnx = boto3.client('s3')
self.bucket = bucket
def callback(self, source, cnx, value):
"""see docstring for prototype, which vary according to is_source_callback
"""
key = source.binary_to_str(value).decode()
try:
data = Binary()
self.s3cnx.download_fileobj(self.bucket, key, data)
return data
except Exception as ex:
source.critical("can't retrive S3 object %s: %s", value, ex)
return None
def entity_added(self, entity, attr):
"""an entity using this storage for attr has been added"""
binary = entity.cw_edited.pop(attr)
if binary is not None:
key = self.get_s3_key(entity, attr)
# bytes storage used to store S3's object key
binary_obj = Binary(key.encode())
entity.cw_edited.edited_attribute(attr, binary_obj)
self.debug('Upload object to S3')
self.s3cnx.upload_fileobj(binary, self.bucket, key)
self.info('Uploaded object %s.%s to S3', entity.eid, attr)
return binary
def entity_updated(self, entity, attr):
"""an entity using this storage for attr has been updatded"""
return self.entity_added(entity, attr)
def entity_deleted(self, entity, attr):
"""an entity using this storage for attr has been deleted"""
raise NotImplementedError()
def migrate_entity(self, entity, attribute):
"""migrate an entity attribute to the storage"""
raise NotImplementedError()
def get_s3_key(self, entity, attr):
"""Return the S3 key of the S3 object storing the content of attribute attr of
the entity.
If the given entity has key yet (eg. at entity creation time), a new
key is generated.
"""
rset = entity._cw.execute(
'Any stkey(D) WHERE X eid %s, X %s D' %
(entity.eid, attr))
if rset and rset.rows[0][0]:
key = rset.rows[0][0].getvalue()
if PY3:
key = key.decode()
return key
return self.new_s3_key(entity, attr)
def new_s3_key(self, entity, attr):
"""Generate a new key for given entity attr.
This implemenation just return a random UUID"""
return str(uuid.uuid1())
set_log_methods(S3Storage,
getLogger('cube.s3storage.storages.s3storage'))
s3storage
s3storage, file
# copyright 2018 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr -- mailto:contact@logilab.fr
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import io
import boto3
from moto import mock_s3
from six import PY3
......@@ -16,4 +5,8 @@
"""cubicweb-s3storage automatic tests
from cubicweb.server.sources import storages
from cubicweb.devtools.testlib import CubicWebTC
from cubicweb import Binary
from cubicweb_s3storage.storages import S3Storage
......@@ -18,4 +11,5 @@
uncomment code below if you want to activate automatic test for your cube:
class S3StorageTC(CubicWebTC):
bucket = 'test-bucket'
......@@ -21,3 +15,14 @@
.. sourcecode:: python
def setup_database(self):
mock = mock_s3()
mock.start()
s3_cnx = boto3.client('s3')
s3_cnx.create_bucket(
Bucket=self.bucket,
CreateBucketConfiguration={'LocationConstraint': 'eu-west-1'})
s3_storage = S3Storage(self.bucket)
storages.set_attribute_storage(self.repo, 'File', 'data', s3_storage)
self.s3_storage = s3_storage
self.s3_cnx = s3_cnx
self.s3_mock = mock
......@@ -23,3 +28,12 @@
from cubicweb.devtools.testlib import AutomaticWebTest
def tearDown(self):
super(S3StorageTC, self).tearDown()
storages.unset_attribute_storage(self.repo, 'File', 'data')
del self.s3_storage
self.s3_mock.stop()
def create_file(self, cnx, content=b'the-data'):
return cnx.create_entity('File', data=Binary(content),
data_format=u'text/plain',
data_name=u'foo.pdf')
......@@ -25,6 +39,12 @@
class AutomaticWebTest(AutomaticWebTest):
'''provides `to_test_etypes` and/or `list_startup_views` implementation
to limit test scope
'''
def test_s3key_gen(self):
with self.admin_access.client_cnx() as cnx:
fobj = self.create_file(cnx, b'some content')
cnx.commit()
eid = fobj.eid
k1 = self.s3_storage.get_s3_key(fobj, 'data')
with self.admin_access.client_cnx() as cnx:
fobj = cnx.find('File', eid=eid).one()
k2 = self.s3_storage.get_s3_key(fobj, 'data')
self.assertEqual(k1, k2)
......@@ -30,5 +50,19 @@
def to_test_etypes(self):
'''only test views for entities of the returned types'''
return set(('My', 'Cube', 'Entity', 'Types'))
def test_file_create(self):
with self.admin_access.client_cnx() as cnx:
eid = self.create_file(cnx, b'some content').eid
cnx.commit()
key = cnx.execute('Any STKEY(D) WHERE F is File, F data D, '
'F eid %(eid)s', {'eid': eid}).rows[0][0]
key = key.getvalue().decode()
data = io.BytesIO()
self.s3_cnx.download_fileobj(self.bucket, key, data)
self.assertEqual(data.getvalue(), b'some content')
def test_file_modify(self):
with self.admin_access.client_cnx() as cnx:
eid = self.create_file(cnx, b'some content').eid
cnx.commit()
......@@ -34,6 +68,13 @@
def list_startup_views(self):
'''only test startup views of the returned identifiers'''
return ('some', 'startup', 'views')
"""
key = cnx.execute('Any STKEY(D) WHERE F is File, F data D, '
'F eid %(eid)s', {'eid': eid}).rows[0][0]
key = key.getvalue().decode()
with self.admin_access.client_cnx() as cnx:
fobj = cnx.find('File', eid=eid).one()
fobj.cw_set(data=Binary(b'something else'))
cnx.commit()
data = io.BytesIO()
self.s3_cnx.download_fileobj(self.bucket, key, data)
self.assertEqual(data.getvalue(), b'something else')
......@@ -39,4 +80,9 @@
from cubicweb.devtools import testlib
def test_file_retrieve(self):
binstuff = ''.join(chr(x) for x in range(256))
if PY3:
binstuff = binstuff.encode()
with self.admin_access.client_cnx() as cnx:
eid = self.create_file(cnx, binstuff).eid
cnx.commit()
......@@ -42,7 +88,10 @@
class DefaultTC(testlib.CubicWebTC):
def test_something(self):
self.skipTest('this cube has no test')
with self.admin_access.client_cnx() as cnx:
rset = cnx.execute('Any D WHERE F eid %(eid)s, F data D',
{'eid': eid})
self.assertTrue(rset)
data = rset.rows[0][0]
self.assertEqual(data.getvalue(), binstuff)
if __name__ == '__main__':
......
[tox]
envlist = py27,py34,flake8
envlist = py27,py3,flake8
[testenv]
......@@ -3,5 +3,4 @@
[testenv]
sitepackages = true
deps =
pytest
......@@ -6,5 +5,7 @@
deps =
pytest
moto
cubicweb-file
commands =
{envpython} -m pytest {posargs:test}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment