storages.py 11.6 KB
Newer Older
1
# copyright 2003-2013 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
2
3
4
5
6
7
8
9
10
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
#
# This file is part of CubicWeb.
#
# CubicWeb is free software: you can redistribute it and/or modify it under the
# terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option)
# any later version.
#
11
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
12
13
14
15
16
17
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License along
# with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
18
"""custom storages for the system source"""
19

20
import os
21
import sys
22
from os import unlink, path as osp
23
from contextlib import contextmanager
24
import tempfile
25

Rémi Cardona's avatar
Rémi Cardona committed
26
27
28
29
from six import PY2, PY3, text_type, binary_type

from logilab.common import nullobject

30
31
from yams.schema import role_name

32
from cubicweb import Binary, ValidationError
33
from cubicweb.server import hook
34
from cubicweb.server.edition import EditedEntity
35

36
37

def set_attribute_storage(repo, etype, attr, storage):
38
    repo.system_source.set_storage(etype, attr, storage)
39

40
def unset_attribute_storage(repo, etype, attr):
41
    repo.system_source.unset_storage(etype, attr)
42

43

44
class Storage(object):
45
46
47
    """abstract storage

    * If `source_callback` is true (by default), the callback will be run during
48
      query result process of fetched attribute's value and should have the
49
50
      following prototype::

51
        callback(self, source, cnx, value)
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69

      where `value` is the value actually stored in the backend. None values
      will be skipped (eg callback won't be called).

    * if `source_callback` is false, the callback will be run during sql
      generation when some attribute with a custom storage is accessed and
      should have the following prototype::

        callback(self, generator, relation, linkedvar)

      where `generator` is the sql generator, `relation` the current rql syntax
      tree relation and linkedvar the principal syntax tree variable holding the
      attribute.
    """
    is_source_callback = True

    def callback(self, *args):
        """see docstring for prototype, which vary according to is_source_callback
70
71
72
73
74
75
76
77
78
79
80
81
        """
        raise NotImplementedError()

    def entity_added(self, entity, attr):
        """an entity using this storage for attr has been added"""
        raise NotImplementedError()
    def entity_updated(self, entity, attr):
        """an entity using this storage for attr has been updatded"""
        raise NotImplementedError()
    def entity_deleted(self, entity, attr):
        """an entity using this storage for attr has been deleted"""
        raise NotImplementedError()
82
83
84
    def migrate_entity(self, entity, attribute):
        """migrate an entity attribute to the storage"""
        raise NotImplementedError()
85
86
87
88

# TODO
# * make it configurable without code
# * better file path attribution
Sylvain Thénault's avatar
Sylvain Thénault committed
89
# * handle backup/restore
90

91
def uniquify_path(dirpath, basename):
92
    """return a file descriptor and unique file name for `basename` in `dirpath`
93
    """
94
    path = basename.replace(osp.sep, '-')
95
    base, ext = osp.splitext(path)
96
    return tempfile.mkstemp(prefix=base, suffix=ext, dir=dirpath)
97

98
@contextmanager
99
100
101
102
def fsimport(cnx):
    present = 'fs_importing' in cnx.transaction_data
    old_value = cnx.transaction_data.get('fs_importing')
    cnx.transaction_data['fs_importing'] = True
103
104
    yield
    if present:
105
        cnx.transaction_data['fs_importing'] = old_value
106
    else:
107
        del cnx.transaction_data['fs_importing']
108

Sylvain Thénault's avatar
Sylvain Thénault committed
109

Rémi Cardona's avatar
Rémi Cardona committed
110
111
112
_marker = nullobject()


113
114
class BytesFileSystemStorage(Storage):
    """store Bytes attribute value on the file system"""
Rémi Cardona's avatar
Rémi Cardona committed
115
116
117
118
119
120
121
122
123
124
    def __init__(self, defaultdir, fsencoding=_marker, wmode=0o444):
        if PY3:
            if not isinstance(defaultdir, text_type):
                raise TypeError('defaultdir must be a unicode object in python 3')
            if fsencoding is not _marker:
                raise ValueError('fsencoding is no longer supported in python 3')
        else:
            self.fsencoding = fsencoding or 'utf-8'
            if isinstance(defaultdir, text_type):
                defaultdir = defaultdir.encode(fsencoding)
125
        self.default_directory = defaultdir
126
127
128
129
        # extra umask to use when creating file
        # 0444 as in "only allow read bit in permission"
        self._wmode = wmode

130
    def _writecontent(self, fd, binary):
131
132
        """write the content of a binary in readonly file

133
134
        As the bfss never alters an existing file it does not prevent it from
        working as intended. This is a better safe than sorry approach.
135
        """
136
        os.fchmod(fd, self._wmode)
137
138
139
140
        fileobj = os.fdopen(fd, 'wb')
        binary.to_file(fileobj)
        fileobj.close()

141

142
    def callback(self, source, cnx, value):
143
144
145
        """sql generator callback when some attribute with a custom storage is
        accessed
        """
146
147
        fpath = source.binary_to_str(value)
        try:
148
            return Binary.from_file(fpath)
149
        except EnvironmentError as ex:
150
151
            source.critical("can't open %s: %s", value, ex)
            return None
152
153
154

    def entity_added(self, entity, attr):
        """an entity using this storage for attr has been added"""
155
        if entity._cw.transaction_data.get('fs_importing'):
156
            binary = Binary.from_file(entity.cw_edited[attr].getvalue())
157
            entity._cw_dont_cache_attribute(attr)
158
        else:
159
            binary = entity.cw_edited.pop(attr)
160
            fd, fpath = self.new_fs_path(entity, attr)
161
            # bytes storage used to store file's path
Rémi Cardona's avatar
Rémi Cardona committed
162
163
            binary_obj = Binary(fpath if PY2 else fpath.encode('utf-8'))
            entity.cw_edited.edited_attribute(attr, binary_obj)
164
            self._writecontent(fd, binary)
165
            AddFileOp.get_instance(entity._cw).add_data(fpath)
166
        return binary
167
168

    def entity_updated(self, entity, attr):
169
        """an entity using this storage for attr has been updated"""
170
        # get the name of the previous file containing the value
171
        oldpath = self.current_fs_path(entity, attr)
172
        if entity._cw.transaction_data.get('fs_importing'):
173
174
175
            # If we are importing from the filesystem, the file already exists.
            # We do not need to create it but we need to fetch the content of
            # the file as the actual content of the attribute
176
            fpath = entity.cw_edited[attr].getvalue()
177
            entity._cw_dont_cache_attribute(attr)
178
            assert fpath is not None
179
            binary = Binary.from_file(fpath)
180
        else:
181
182
183
184
185
186
187
188
189
            # We must store the content of the attributes
            # into a file to stay consistent with the behaviour of entity_add.
            # Moreover, the BytesFileSystemStorage expects to be able to
            # retrieve the current value of the attribute at anytime by reading
            # the file on disk. To be able to rollback things, use a new file
            # and keep the old one that will be removed on commit if everything
            # went ok.
            #
            # fetch the current attribute value in memory
190
            binary = entity.cw_edited.pop(attr)
191
192
193
194
            if binary is None:
                fpath = None
            else:
                # Get filename for it
195
                fd, fpath = self.new_fs_path(entity, attr)
196
                # write attribute value on disk
197
                self._writecontent(fd, binary)
198
199
200
                # Mark the new file as added during the transaction.
                # The file will be removed on rollback
                AddFileOp.get_instance(entity._cw).add_data(fpath)
201
            # reinstall poped value
202
203
204
            if fpath is None:
                entity.cw_edited.edited_attribute(attr, None)
            else:
205
                # register the new location for the file.
Rémi Cardona's avatar
Rémi Cardona committed
206
207
                binary_obj = Binary(fpath if PY2 else fpath.encode('utf-8'))
                entity.cw_edited.edited_attribute(attr, binary_obj)
208
        if oldpath is not None and oldpath != fpath:
209
210
            # Mark the old file as useless so the file will be removed at
            # commit.
211
            DeleteFileOp.get_instance(entity._cw).add_data(oldpath)
212
        return binary
213
214
215

    def entity_deleted(self, entity, attr):
        """an entity using this storage for attr has been deleted"""
216
        fpath = self.current_fs_path(entity, attr)
217
218
        if fpath is not None:
            DeleteFileOp.get_instance(entity._cw).add_data(fpath)
219
220

    def new_fs_path(self, entity, attr):
221
222
223
224
225
        # We try to get some hint about how to name the file using attribute's
        # name metadata, so we use the real file name and extension when
        # available. Keeping the extension is useful for example in the case of
        # PIL processing that use filename extension to detect content-type, as
        # well as providing more understandable file names on the fs.
Rémi Cardona's avatar
Rémi Cardona committed
226
227
        if PY2:
            attr = attr.encode('ascii')
228
        basename = [str(entity.eid), attr]
229
        name = entity.cw_attr_metadata(attr, 'name')
230
        if name is not None:
Rémi Cardona's avatar
Rémi Cardona committed
231
            basename.append(name.encode(self.fsencoding) if PY2 else name)
232
        fd, fspath = uniquify_path(self.default_directory,
233
                               '_'.join(basename))
234
235
        if fspath is None:
            msg = entity._cw._('failed to uniquify path (%s, %s)') % (
236
                self.default_directory, '_'.join(basename))
237
            raise ValidationError(entity.eid, {role_name(attr, 'subject'): msg})
Rémi Cardona's avatar
Rémi Cardona committed
238
        assert isinstance(fspath, str)  # bytes on py2, unicode on py3
239
        return fd, fspath
240
241

    def current_fs_path(self, entity, attr):
242
243
244
        """return the current fs_path of the attribute, or None is the attr is
        not stored yet.
        """
245
        sysource = entity._cw.repo.system_source
246
247
        cu = sysource.doexec(entity._cw,
                             'SELECT cw_%s FROM cw_%s WHERE cw_eid=%s' % (
248
                             attr, entity.cw_etype, entity.eid))
249
250
        rawvalue = cu.fetchone()[0]
        if rawvalue is None: # no previous value
251
            return None
Rémi Cardona's avatar
Rémi Cardona committed
252
253
254
255
256
257
        fspath = sysource._process_value(rawvalue, cu.description[0],
                                         binarywrap=binary_type)
        if PY3:
            fspath = fspath.decode('utf-8')
        assert isinstance(fspath, str)  # bytes on py2, unicode on py3
        return fspath
258

259
260
    def migrate_entity(self, entity, attribute):
        """migrate an entity attribute to the storage"""
261
        entity.cw_edited = EditedEntity(entity, **entity.cw_attr_cache)
262
        self.entity_added(entity, attribute)
263
264
        cnx = entity._cw
        source = cnx.repo.system_source
265
        attrs = source.preprocess_entity(entity)
266
        sql = source.sqlgen.update('cw_' + entity.cw_etype, attrs,
267
                                   ['cw_eid'])
268
        source.doexec(cnx, sql, attrs)
269
        entity.cw_edited = None
270

271

272
class AddFileOp(hook.DataOperationMixIn, hook.Operation):
273
    def rollback_event(self):
274
        for filepath in self.get_data():
Rémi Cardona's avatar
Rémi Cardona committed
275
            assert isinstance(filepath, str)  # bytes on py2, unicode on py3
276
277
            try:
                unlink(filepath)
278
            except Exception as ex:
Rémi Cardona's avatar
Rémi Cardona committed
279
                self.error("can't remove %s: %s" % (filepath, ex))
280

281
class DeleteFileOp(hook.DataOperationMixIn, hook.Operation):
282
    def postcommit_event(self):
283
        for filepath in self.get_data():
Rémi Cardona's avatar
Rémi Cardona committed
284
            assert isinstance(filepath, str)  # bytes on py2, unicode on py3
285
286
            try:
                unlink(filepath)
287
            except Exception as ex:
Rémi Cardona's avatar
Rémi Cardona committed
288
                self.error("can't remove %s: %s" % (filepath, ex))