querier.py 32.1 KB
Newer Older
Sylvain Thénault's avatar
Sylvain Thénault committed
1
# copyright 2003-2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
2
3
4
5
6
7
8
9
10
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
#
# This file is part of CubicWeb.
#
# CubicWeb is free software: you can redistribute it and/or modify it under the
# terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option)
# any later version.
#
11
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
12
13
14
15
16
17
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License along
# with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
Adrien Di Mascio's avatar
Adrien Di Mascio committed
18
19
20
"""Helper classes to execute RQL queries on a set of sources, performing
security checking and data aggregation.
"""
21
import uuid
22
23
import time
import traceback
Adrien Di Mascio's avatar
Adrien Di Mascio committed
24
25
from itertools import repeat

26
from rql import RQLSyntaxError, CoercionError
27
from rql.stmts import Union
28
29
from rql.nodes import ETYPE_PYOBJ_MAP, etype_from_pyobj, Relation, Exists, Not,\
    VariableRef, Constant
30
from yams import BASE_TYPES
Adrien Di Mascio's avatar
Adrien Di Mascio committed
31

32
from cubicweb import ValidationError, Unauthorized, UnknownEid, QueryError
33
from cubicweb.rqlrewrite import RQLRelationRewriter
34
from cubicweb import Binary, server
Adrien Di Mascio's avatar
Adrien Di Mascio committed
35
from cubicweb.rset import ResultSet
36
from cubicweb.debug import emit_to_debug_channel
Adrien Di Mascio's avatar
Adrien Di Mascio committed
37

38
from cubicweb.utils import QueryCache, RepeatList
39
from cubicweb.misc.source_highlight import highlight_terminal
40
from cubicweb.server.rqlannotation import RQLAnnotator, set_qdata
41
from cubicweb.server.ssplanner import (READ_ONLY_RTYPES, add_types_restriction,
42
                                       prepare_plan)
43
from cubicweb.server.edition import EditedEntity
44
from cubicweb.statsd_logger import statsd_timeit, statsd_c
45
46
47
48

ETYPE_PYOBJ_MAP[Binary] = 'Bytes'


Denis Laxalde's avatar
Denis Laxalde committed
49
def empty_rset(rql, args):
Adrien Di Mascio's avatar
Adrien Di Mascio committed
50
    """build an empty result set object"""
Denis Laxalde's avatar
Denis Laxalde committed
51
    return ResultSet([], rql, args)
Adrien Di Mascio's avatar
Adrien Di Mascio committed
52
53
54
55
56
57
58


# permission utilities ########################################################

def check_no_password_selected(rqlst):
    """check that Password entities are not selected"""
    for solution in rqlst.solutions:
59
        for var, etype in solution.items():
60
61
            if etype == 'Password':
                raise Unauthorized('Password selection is not allowed (%s)' % var)
Adrien Di Mascio's avatar
Adrien Di Mascio committed
62

63
def term_etype(cnx, term, solution, args):
64
65
66
67
68
69
    """return the entity type for the given term (a VariableRef or a Constant
    node)
    """
    try:
        return solution[term.name]
    except AttributeError:
70
        return cnx.entity_type(term.eval(args))
71

72
73
def check_relations_read_access(cnx, select, args):
    """Raise :exc:`Unauthorized` if the given user doesn't have credentials to
Julien Cristau's avatar
Julien Cristau committed
74
    read relations used in the given syntax tree
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
    """
    # use `term_etype` since we've to deal with rewritten constants here,
    # when used as an external source by another repository.
    # XXX what about local read security w/ those rewritten constants...
    # XXX constants can also happen in some queries generated by req.find()
    DBG = (server.DEBUG & server.DBG_SEC) and 'read' in server._SECURITY_CAPS
    schema = cnx.repo.schema
    user = cnx.user
    if select.where is not None:
        for rel in select.where.iget_nodes(Relation):
            for solution in select.solutions:
                # XXX has_text may have specific perm ?
                if rel.r_type in READ_ONLY_RTYPES:
                    continue
                rschema = schema.rschema(rel.r_type)
                if rschema.final:
                    eschema = schema.eschema(term_etype(cnx, rel.children[0],
                                             solution, args))
                    rdef = eschema.rdef(rschema)
                else:
                    rdef = rschema.rdef(term_etype(cnx, rel.children[0],
                                                   solution, args),
                                        term_etype(cnx, rel.children[1].children[0],
                                                   solution, args))
                if not user.matching_groups(rdef.get_groups('read')):
                    if DBG:
Samuel Trégouët's avatar
Samuel Trégouët committed
101
102
                        print('check_read_access: %s %s does not match %s' %
                              (rdef, user.groups, rdef.get_groups('read')))
103
104
105
                    # XXX rqlexpr not allowed
                    raise Unauthorized('read', rel.r_type)
                if DBG:
Samuel Trégouët's avatar
Samuel Trégouët committed
106
107
                    print('check_read_access: %s %s matches %s' %
                          (rdef, user.groups, rdef.get_groups('read')))
108
109

def get_local_checks(cnx, rqlst, solution):
110
111
112
113
    """Check that the given user has credentials to access data read by the
    query and return a dict defining necessary "local checks" (i.e. rql
    expression in read permission defined in the schema) where no group grants
    him the permission.
Adrien Di Mascio's avatar
Adrien Di Mascio committed
114

115
116
    Returned dictionary's keys are variable names and values the rql expressions
    for this variable (with the given solution).
117
118
119

    Raise :exc:`Unauthorized` if access is known to be defined, i.e. if there is
    no matching group and no local permissions.
Adrien Di Mascio's avatar
Adrien Di Mascio committed
120
    """
121
    DBG = (server.DEBUG & server.DBG_SEC) and 'read' in server._SECURITY_CAPS
122
    schema = cnx.repo.schema
123
    user = cnx.user
Adrien Di Mascio's avatar
Adrien Di Mascio committed
124
125
126
    localchecks = {}
    # iterate on defined_vars and not on solutions to ignore column aliases
    for varname in rqlst.defined_vars:
127
        eschema = schema.eschema(solution[varname])
Sylvain Thénault's avatar
Sylvain Thénault committed
128
129
        if eschema.final:
            continue
130
        if not user.matching_groups(eschema.get_groups('read')):
Adrien Di Mascio's avatar
Adrien Di Mascio committed
131
132
            erqlexprs = eschema.get_rqlexprs('read')
            if not erqlexprs:
133
                ex = Unauthorized('read', solution[varname])
Adrien Di Mascio's avatar
Adrien Di Mascio committed
134
                ex.var = varname
135
                if DBG:
Samuel Trégouët's avatar
Samuel Trégouët committed
136
137
                    print('check_read_access: %s %s %s %s' %
                          (varname, eschema, user.groups, eschema.get_groups('read')))
Adrien Di Mascio's avatar
Adrien Di Mascio committed
138
                raise ex
139
140
141
142
143
144
145
146
            # don't insert security on variable only referenced by 'NOT X relation Y' or
            # 'NOT EXISTS(X relation Y)'
            varinfo = rqlst.defined_vars[varname].stinfo
            if varinfo['selected'] or (
                len([r for r in varinfo['relations']
                     if (not schema.rschema(r.r_type).final
                         and ((isinstance(r.parent, Exists) and r.parent.neged(strict=True))
                              or isinstance(r.parent, Not)))])
147
148
                !=
                len(varinfo['relations'])):
149
                localchecks[varname] = erqlexprs
Adrien Di Mascio's avatar
Adrien Di Mascio committed
150
    return localchecks
151
152


Adrien Di Mascio's avatar
Adrien Di Mascio committed
153
154
155
156
# Plans #######################################################################

class ExecutionPlan(object):
    """the execution model of a rql query, composed of querier steps"""
157

158
159
    def __init__(self, schema, rqlst, args, cnx):
        self.schema = schema
Adrien Di Mascio's avatar
Adrien Di Mascio committed
160
161
162
        # original rql syntax tree
        self.rqlst = rqlst
        self.args = args or {}
163
164
        # cnx executing the query
        self.cnx = cnx
Adrien Di Mascio's avatar
Adrien Di Mascio committed
165
166
        # execution steps
        self.steps = []
167
168
        # tracing token for debugging
        self.rql_query_tracing_token = None
169

Adrien Di Mascio's avatar
Adrien Di Mascio committed
170
171
172
173
174
    def add_step(self, step):
        """add a step to the plan"""
        self.steps.append(step)

    def sqlexec(self, sql, args=None):
175
        return self.cnx.repo.system_source.sqlexec(self.cnx, sql, args)
176

Adrien Di Mascio's avatar
Adrien Di Mascio committed
177
178
    def execute(self):
        """execute a plan and return resulting rows"""
179
        for step in self.steps:
180
            step.rql_query_tracing_token = self.rql_query_tracing_token
181
182
183
            result = step.execute()
        # the latest executed step contains the full query result
        return result
184

Adrien Di Mascio's avatar
Adrien Di Mascio committed
185
    def preprocess(self, union, security=True):
186
187
        """insert security when necessary then annotate rql syntax tree
        to prepare sql generation
Adrien Di Mascio's avatar
Adrien Di Mascio committed
188
        """
189
        cached = None
190
        if security and self.cnx.read_security:
191
            # ensure security is turned off when security is inserted,
192
            # else we may loop for ever...
193
            if self.cnx.transaction_data.get('security-rqlst-cache'):
194
195
196
                key = self.cache_key
            else:
                key = None
197
198
            if key is not None and key in self.cnx.transaction_data:
                cachedunion, args = self.cnx.transaction_data[key]
199
200
201
202
203
204
205
206
                union.children[:] = []
                for select in cachedunion.children:
                    union.append(select)
                union.has_text_query = cachedunion.has_text_query
                args.update(self.args)
                self.args = args
                cached = True
            else:
207
                with self.cnx.security_enabled(read=False):
208
                    noinvariant = self._insert_security(union)
209
                if key is not None:
210
                    self.cnx.transaction_data[key] = (union, self.args)
211
212
213
        else:
            noinvariant = ()
        if cached is None:
214
            self.cnx.vreg.rqlhelper.simplify(union)
215
216
            RQLAnnotator(self.schema).annotate(union)
            set_qdata(self.schema.rschema, union, noinvariant)
Adrien Di Mascio's avatar
Adrien Di Mascio committed
217
218
219
        if union.has_text_query:
            self.cache_key = None

220
221
    def _insert_security(self, union):
        noinvariant = set()
Adrien Di Mascio's avatar
Adrien Di Mascio committed
222
223
        for select in union.children[:]:
            for subquery in select.with_:
224
                self._insert_security(subquery.query)
Adrien Di Mascio's avatar
Adrien Di Mascio committed
225
226
            localchecks, restricted = self._check_permissions(select)
            if any(localchecks):
227
                self.cnx.rql_rewriter.insert_local_checks(
228
229
                    select, self.args, localchecks, restricted, noinvariant)
        return noinvariant
Adrien Di Mascio's avatar
Adrien Di Mascio committed
230
231

    def _check_permissions(self, rqlst):
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
        """Return a dict defining "local checks", i.e. RQLExpression defined in
        the schema that should be inserted in the original query, together with
        a set of variable names which requires some security to be inserted.

        Solutions where a variable has a type which the user can't definitly
        read are removed, else if the user *may* read it (i.e. if an rql
        expression is defined for the "read" permission of the related type),
        the local checks dict is updated.

        The local checks dict has entries for each different local check
        necessary, with associated solutions as value, a local check being
        defined by a list of 2-uple (variable name, rql expressions) for each
        variable which has to be checked. Solutions which don't require local
        checks will be associated to the empty tuple key.

        Note rqlst should not have been simplified at this point.
Adrien Di Mascio's avatar
Adrien Di Mascio committed
248
        """
249
        cnx = self.cnx
Adrien Di Mascio's avatar
Adrien Di Mascio committed
250
        msgs = []
251
252
        # dict(varname: eid), allowing to check rql expression for variables
        # which have a known eid
253
        varkwargs = {}
254
        if not cnx.transaction_data.get('security-rqlst-cache'):
255
            for var in rqlst.defined_vars.values():
256
257
                if var.stinfo['constnode'] is not None:
                    eid = var.stinfo['constnode'].eval(self.args)
258
                    varkwargs[var.name] = int(eid)
259
        # dictionary of variables restricted for security reason
Adrien Di Mascio's avatar
Adrien Di Mascio committed
260
261
262
263
264
        localchecks = {}
        restricted_vars = set()
        newsolutions = []
        for solution in rqlst.solutions:
            try:
265
                localcheck = get_local_checks(cnx, rqlst, solution)
266
            except Unauthorized as ex:
Adrien Di Mascio's avatar
Adrien Di Mascio committed
267
                msg = 'remove %s from solutions since %s has no %s access to %s'
268
                msg %= (solution, cnx.user.login, ex.args[0], ex.args[1])
Adrien Di Mascio's avatar
Adrien Di Mascio committed
269
270
271
272
                msgs.append(msg)
                LOGGER.info(msg)
            else:
                newsolutions.append(solution)
273
274
                # try to benefit of rqlexpr.check cache for entities which
                # are specified by eid in query'args
275
                for varname, eid in varkwargs.items():
276
277
278
279
                    try:
                        rqlexprs = localcheck.pop(varname)
                    except KeyError:
                        continue
280
281
282
                    # if entity has been added in the current transaction, the
                    # user can read it whatever rql expressions are associated
                    # to its type
283
                    if cnx.added_in_transaction(eid):
284
285
                        continue
                    for rqlexpr in rqlexprs:
286
                        if rqlexpr.check(cnx, eid):
287
288
                            break
                    else:
289
                        raise Unauthorized('No read access on %r with eid %i.' % (var, eid))
290
                # mark variables protected by an rql expression
Adrien Di Mascio's avatar
Adrien Di Mascio committed
291
                restricted_vars.update(localcheck)
292
                # turn local check into a dict key
293
                localcheck = tuple(sorted(localcheck.items()))
294
                localchecks.setdefault(localcheck, []).append(solution)
Adrien Di Mascio's avatar
Adrien Di Mascio committed
295
296
297
        # raise Unautorized exception if the user can't access to any solution
        if not newsolutions:
            raise Unauthorized('\n'.join(msgs))
298
299
        # if there is some message, solutions have been modified and must be
        # reconsidered by the syntax treee
Sylvain Thénault's avatar
Sylvain Thénault committed
300
301
        if msgs:
            rqlst.set_possible_types(newsolutions)
Adrien Di Mascio's avatar
Adrien Di Mascio committed
302
303
304
305
306
307
308
309
310
311
        return localchecks, restricted_vars

    def finalize(self, select, solutions, insertedvars):
        rqlst = Union()
        rqlst.append(select)
        for mainvarname, rschema, newvarname in insertedvars:
            nvartype = str(rschema.objects(solutions[0][mainvarname])[0])
            for sol in solutions:
                sol[newvarname] = nvartype
        select.clean_solutions(solutions)
312
        add_types_restriction(self.schema, select)
313
        self.cnx.vreg.rqlhelper.annotate(rqlst)
Adrien Di Mascio's avatar
Adrien Di Mascio committed
314
315
        self.preprocess(rqlst, security=False)
        return rqlst
316

Sylvain Thénault's avatar
Sylvain Thénault committed
317

Adrien Di Mascio's avatar
Adrien Di Mascio committed
318
319
320
class InsertPlan(ExecutionPlan):
    """an execution model specific to the INSERT rql query
    """
321

322
323
    def __init__(self, schema, rqlst, args, cnx):
        ExecutionPlan.__init__(self, schema, rqlst, args, cnx)
Rémi Cardona's avatar
Rémi Cardona committed
324
        # save originally selected variable, we may modify this
Adrien Di Mascio's avatar
Adrien Di Mascio committed
325
326
        # dictionary for substitution (query parameters)
        self.selected = rqlst.selection
327
        # list of rows of entities definition (ssplanner.EditedEntity)
Adrien Di Mascio's avatar
Adrien Di Mascio committed
328
329
        self.e_defs = [[]]
        # list of new relation definition (3-uple (from_eid, r_type, to_eid)
330
        self.r_defs = set()
Adrien Di Mascio's avatar
Adrien Di Mascio committed
331
332
333
334
335
336
337
338
        # indexes to track entity definitions bound to relation definitions
        self._r_subj_index = {}
        self._r_obj_index = {}
        self._expanded_r_defs = {}

    def add_entity_def(self, edef):
        """add an entity definition to build"""
        self.e_defs[-1].append(edef)
339

Adrien Di Mascio's avatar
Adrien Di Mascio committed
340
341
    def add_relation_def(self, rdef):
        """add an relation definition to build"""
342
        edef, rtype, value = rdef
343
        if self.schema[rtype].rule:
344
            raise QueryError("'%s' is a computed relation" % rtype)
345
        self.r_defs.add(rdef)
346
347
348
349
        if not isinstance(edef, int):
            self._r_subj_index.setdefault(edef, []).append(rdef)
        if not isinstance(value, int):
            self._r_obj_index.setdefault(value, []).append(rdef)
350

Adrien Di Mascio's avatar
Adrien Di Mascio committed
351
352
353
    def substitute_entity_def(self, edef, edefs):
        """substitute an incomplete entity definition by a list of complete
        equivalents
354

Adrien Di Mascio's avatar
Adrien Di Mascio committed
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
        e.g. on queries such as ::
          INSERT Personne X, Societe Y: X nom N, Y nom 'toto', X travaille Y
          WHERE U login 'admin', U login N

        X will be inserted as many times as U exists, and so the X travaille Y
        relations as to be added as many time as X is inserted
        """
        if not edefs or not self.e_defs:
            # no result, no entity will be created
            self.e_defs = ()
            return
        # first remove the incomplete entity definition
        colidx = self.e_defs[0].index(edef)
        for i, row in enumerate(self.e_defs[:]):
            self.e_defs[i][colidx] = edefs[0]
            samplerow = self.e_defs[i]
371
            for edef_ in edefs[1:]:
372
373
374
                row = [ed.clone() for i, ed in enumerate(samplerow)
                       if i != colidx]
                row.insert(colidx, edef_)
Adrien Di Mascio's avatar
Adrien Di Mascio committed
375
376
377
                self.e_defs.append(row)
        # now, see if this entity def is referenced as subject in some relation
        # definition
378
        if edef in self._r_subj_index:
Adrien Di Mascio's avatar
Adrien Di Mascio committed
379
380
381
382
            for rdef in self._r_subj_index[edef]:
                expanded = self._expanded(rdef)
                result = []
                for exp_rdef in expanded:
383
384
                    for edef_ in edefs:
                        result.append( (edef_, exp_rdef[1], exp_rdef[2]) )
Adrien Di Mascio's avatar
Adrien Di Mascio committed
385
386
387
                self._expanded_r_defs[rdef] = result
        # and finally, see if this entity def is referenced as object in some
        # relation definition
388
        if edef in self._r_obj_index:
Adrien Di Mascio's avatar
Adrien Di Mascio committed
389
390
391
392
            for rdef in self._r_obj_index[edef]:
                expanded = self._expanded(rdef)
                result = []
                for exp_rdef in expanded:
393
394
                    for edef_ in edefs:
                        result.append( (exp_rdef[0], exp_rdef[1], edef_) )
Adrien Di Mascio's avatar
Adrien Di Mascio committed
395
                self._expanded_r_defs[rdef] = result
396

Adrien Di Mascio's avatar
Adrien Di Mascio committed
397
398
399
400
401
402
403
    def _expanded(self, rdef):
        """return expanded value for the given relation definition"""
        try:
            return self._expanded_r_defs[rdef]
        except KeyError:
            self.r_defs.remove(rdef)
            return [rdef]
404

Adrien Di Mascio's avatar
Adrien Di Mascio committed
405
406
    def relation_defs(self):
        """return the list for relation definitions to insert"""
407
        for rdefs in self._expanded_r_defs.values():
Adrien Di Mascio's avatar
Adrien Di Mascio committed
408
409
410
411
            for rdef in rdefs:
                yield rdef
        for rdef in self.r_defs:
            yield rdef
412

Adrien Di Mascio's avatar
Adrien Di Mascio committed
413
414
415
    def insert_entity_defs(self):
        """return eids of inserted entities in a suitable form for the resulting
        result set, e.g.:
416

Adrien Di Mascio's avatar
Adrien Di Mascio committed
417
418
419
420
421
422
423
        e.g. on queries such as ::
          INSERT Personne X, Societe Y: X nom N, Y nom 'toto', X travaille Y
          WHERE U login 'admin', U login N

        if there is two entities matching U, the result set will look like
        [(eidX1, eidY1), (eidX2, eidY2)]
        """
424
425
        cnx = self.cnx
        repo = cnx.repo
Adrien Di Mascio's avatar
Adrien Di Mascio committed
426
427
        results = []
        for row in self.e_defs:
428
            results.append([repo.glob_add_entity(cnx, edef)
Adrien Di Mascio's avatar
Adrien Di Mascio committed
429
430
                            for edef in row])
        return results
431

Adrien Di Mascio's avatar
Adrien Di Mascio committed
432
    def insert_relation_defs(self):
433
434
        cnx = self.cnx
        repo = cnx.repo
435
436
        edited_entities = {}
        relations = {}
Adrien Di Mascio's avatar
Adrien Di Mascio committed
437
438
        for subj, rtype, obj in self.relation_defs():
            # if a string is given into args instead of an int, we get it here
Denis Laxalde's avatar
Denis Laxalde committed
439
            if isinstance(subj, str):
440
                subj = int(subj)
Denis Laxalde's avatar
Denis Laxalde committed
441
            elif not isinstance(subj, int):
442
                subj = subj.entity.eid
Denis Laxalde's avatar
Denis Laxalde committed
443
            if isinstance(obj, str):
444
                obj = int(obj)
Denis Laxalde's avatar
Denis Laxalde committed
445
            elif not isinstance(obj, int):
446
                obj = obj.entity.eid
Adrien Di Mascio's avatar
Adrien Di Mascio committed
447
            if repo.schema.rschema(rtype).inlined:
448
                if subj not in edited_entities:
449
                    entity = cnx.entity_from_eid(subj)
450
451
452
453
                    edited = EditedEntity(entity)
                    edited_entities[subj] = edited
                else:
                    edited = edited_entities[subj]
454
                edited.edited_attribute(rtype, obj)
Adrien Di Mascio's avatar
Adrien Di Mascio committed
455
            else:
456
457
458
459
                if rtype in relations:
                    relations[rtype].append((subj, obj))
                else:
                    relations[rtype] = [(subj, obj)]
460
        repo.glob_add_relations(cnx, relations)
461
        for edited in edited_entities.values():
462
            repo.glob_update_entity(cnx, edited)
Adrien Di Mascio's avatar
Adrien Di Mascio committed
463
464
465
466


class QuerierHelper(object):
    """helper class to execute rql queries, putting all things together"""
467

Adrien Di Mascio's avatar
Adrien Di Mascio committed
468
469
470
    def __init__(self, repo, schema):
        # system info helper
        self._repo = repo
471
        # instance schema
Adrien Di Mascio's avatar
Adrien Di Mascio committed
472
        self.set_schema(schema)
473

Adrien Di Mascio's avatar
Adrien Di Mascio committed
474
475
    def set_schema(self, schema):
        self.schema = schema
476
        self.clear_caches()
477

478
479
480
481
482
483
484
485
486
487
    def clear_caches(self, eids=None, etypes=None):
        if eids is None:
            self.rql_cache = RQLCache(self._repo, self.schema)
        else:
            cache = self.rql_cache
            for eid, etype in zip(eids, etypes):
                cache.pop(('Any X WHERE X eid %s' % eid,), None)
                if etype is not None:
                    cache.pop(('%s X WHERE X eid %s' % (etype, eid),), None)

488
    def plan_factory(self, rqlst, args, cnx):
Adrien Di Mascio's avatar
Adrien Di Mascio committed
489
490
        """create an execution plan for an INSERT RQL query"""
        if rqlst.TYPE == 'insert':
491
492
            return InsertPlan(self.schema, rqlst, args, cnx)
        return ExecutionPlan(self.schema, rqlst, args, cnx)
493

494
    @statsd_timeit
495
    def execute(self, cnx, rql, args=None, build_descr=True):
Adrien Di Mascio's avatar
Adrien Di Mascio committed
496
497
498
        """execute a rql query, return resulting rows and their description in
        a `ResultSet` object

499
        * `rql` should be a Unicode string or a plain ASCII string
Adrien Di Mascio's avatar
Adrien Di Mascio committed
500
501
502
503
504
        * `args` the optional parameters dictionary associated to the query
        * `build_descr` is a boolean flag indicating if the description should
          be built on select queries (if false, the description will be en empty
          list)

Alexandre Fayolle's avatar
Alexandre Fayolle committed
505
        on INSERT queries, there will be one row with the eid of each inserted
Adrien Di Mascio's avatar
Adrien Di Mascio committed
506
        entity
507

Adrien Di Mascio's avatar
Adrien Di Mascio committed
508
509
510
        result for DELETE and SET queries is undefined yet

        to maximize the rql parsing/analyzing cache performance, you should
Alexandre Fayolle's avatar
Alexandre Fayolle committed
511
        always use substitute arguments in queries (i.e. avoid query such as
Adrien Di Mascio's avatar
Adrien Di Mascio committed
512
513
        'Any X WHERE X eid 123'!)
        """
514
        if server.DEBUG & (server.DBG_RQL | server.DBG_SQL):
515
            if server.DEBUG & (server.DBG_MORE | server.DBG_SQL):
Samuel Trégouët's avatar
Samuel Trégouët committed
516
                print('*'*80)
517
            print("querier input", highlight_terminal(repr(rql)[1:-1], 'RQL'), repr(args))
Adrien Di Mascio's avatar
Adrien Di Mascio committed
518
        try:
519
520
521
522
523
            rqlst, cachekey = self.rql_cache.get(cnx, rql, args)
        except UnknownEid:
            # we want queries such as "Any X WHERE X eid 9999"
            # return an empty result instead of raising UnknownEid
            return empty_rset(rql, args)
524
        if rqlst.TYPE != 'select':
525
            if cnx.read_security:
Adrien Di Mascio's avatar
Adrien Di Mascio committed
526
527
528
                check_no_password_selected(rqlst)
            cachekey = None
        else:
529
            if cnx.read_security:
Adrien Di Mascio's avatar
Adrien Di Mascio committed
530
531
                for select in rqlst.children:
                    check_no_password_selected(select)
532
                    check_relations_read_access(cnx, select, args)
Adrien Di Mascio's avatar
Adrien Di Mascio committed
533
534
535
536
            # on select query, always copy the cached rqlst so we don't have to
            # bother modifying it. This is not necessary on write queries since
            # a new syntax tree is built from them.
            rqlst = rqlst.copy()
537
538
539
            # Rewrite computed relations
            rewriter = RQLRelationRewriter(cnx)
            rewriter.rewrite(rqlst, args)
540
            self._repo.vreg.rqlhelper.annotate(rqlst)
541
            if args:
542
                # different SQL generated when some argument is None or not (IS
543
                # NULL). This should be considered when computing sql cache key
544
                cachekey += tuple(sorted([k for k, v in args.items()
545
                                          if v is None]))
Adrien Di Mascio's avatar
Adrien Di Mascio committed
546
        # make an execution plan
547
        plan = self.plan_factory(rqlst, args, cnx)
Adrien Di Mascio's avatar
Adrien Di Mascio committed
548
        plan.cache_key = cachekey
549
        plan.rql_query_tracing_token = str(uuid.uuid4())
550
        prepare_plan(plan, self.schema, self._repo.vreg.rqlhelper)
551
552
553
554
555
556
557
558
559
560
561

        query_debug_informations = {
            "rql": rql,
            "rql_query_tracing_token": plan.rql_query_tracing_token,
            "args": args,
            # remove the last part of the stack which is: this line
            "callstack": "".join(traceback.format_stack()[:-1]),
            "description": "",
        }

        start = time.time()
Adrien Di Mascio's avatar
Adrien Di Mascio committed
562
563
564
        # execute the plan
        try:
            results = plan.execute()
565
566
        except (Unauthorized, ValidationError):
            # getting an Unauthorized/ValidationError exception means the
567
            # transaction must be rolled back
568
569
            #
            # notes:
570
            # * we should not reset the connections set here, since we don't want the
571
            #   connection to loose it during processing
572
            # * don't rollback if we're in the commit process, will be handled
573
574
575
            #   by the connection
            if cnx.commit_state is None:
                cnx.commit_state = 'uncommitable'
Adrien Di Mascio's avatar
Adrien Di Mascio committed
576
            raise
577
578
579
580

        query_debug_informations["time"] = ((time.time() - start) * 1000)
        query_debug_informations["result"] = results

Adrien Di Mascio's avatar
Adrien Di Mascio committed
581
582
        # build a description for the results if necessary
        descr = ()
583
        variables = None
Adrien Di Mascio's avatar
Adrien Di Mascio committed
584
585
586
        if build_descr:
            if rqlst.TYPE == 'select':
                # sample selection
587
588
589
590
591
592
                if len(rqlst.children) == 1 and len(rqlst.children[0].solutions) == 1:
                    # easy, all lines are identical
                    selected = rqlst.children[0].selection
                    solution = rqlst.children[0].solutions[0]
                    description = _make_description(selected, args, solution)
                    descr = RepeatList(len(results), tuple(description))
593
594
                    variables = [self._get_projected_name(projected, rqlst.children[0].stinfo)
                                 for projected in selected]
595
596
                else:
                    # hard, delegate the work :o)
597
                    descr = manual_build_descr(cnx, rqlst, args, results)
Adrien Di Mascio's avatar
Adrien Di Mascio committed
598
599
600
601
602
            elif rqlst.TYPE == 'insert':
                # on insert plan, some entities may have been auto-casted,
                # so compute description manually even if there is only
                # one solution
                basedescr = [None] * len(plan.selected)
603
                todetermine = list(zip(range(len(plan.selected)), repeat(False)))
604
                descr = _build_descr(cnx, results, basedescr, todetermine)
Adrien Di Mascio's avatar
Adrien Di Mascio committed
605
606
            # FIXME: get number of affected entities / relations on non
            # selection queries ?
607
608
609
610
            query_debug_informations["description"] = descr

        emit_to_debug_channel("rql", query_debug_informations)

Adrien Di Mascio's avatar
Adrien Di Mascio committed
611
        # return a result set object
612
        return ResultSet(results, rql, args, descr, variables)
Adrien Di Mascio's avatar
Adrien Di Mascio committed
613

614
615
616
617
    # these are overridden by set_log_methods below
    # only defining here to prevent pylint from complaining
    info = warning = error = critical = exception = debug = lambda msg,*a,**kw: None

618
619
620
621
622
623
624
625
626
627
628
629
    @staticmethod
    def _get_projected_name(projected, stinfo):
        if isinstance(projected, VariableRef):
            return projected.name
        elif isinstance(projected, Constant):
            if stinfo['rewritten'] is None:
                return str(projected)
            for name, value in stinfo['rewritten'].items():
                if [projected] == value:
                    return name
        return str(projected)

630

631
632
633
634
635
636
637
638
639
640
641
642
class RQLCache(object):

    def __init__(self, repo, schema):
        # rql st and solution cache.
        self._cache = QueryCache(repo.config['rql-cache-size'])
        # rql cache key cache. Don't bother using a Cache instance: we should
        # have a limited number of queries in there, since there are no entries
        # in this cache for user queries (which have no args)
        self._ck_cache = {}
        # some cache usage stats
        self.cache_hit, self.cache_miss = 0, 0
        # rql parsing / analysing helper
643
        self.compute_var_types = repo.vreg.compute_var_types
644
645
646
647
648
649
650
        rqlhelper = repo.vreg.rqlhelper
        # set backend on the rql helper, will be used for function checking
        rqlhelper.backend = repo.config.system_source_config['db-driver']

        def parse(rql, annotate=False, parse=rqlhelper.parse):
            """Return a freshly parsed syntax tree for the given RQL."""
            try:
Denis Laxalde's avatar
Denis Laxalde committed
651
                return parse(rql, annotate=annotate)
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
            except UnicodeError:
                raise RQLSyntaxError(rql)
        self._parse = parse

    def __len__(self):
        return len(self._cache)

    def get(self, cnx, rql, args):
        """Return syntax tree and cache key for the given RQL.

        Returned syntax tree is cached and must not be modified
        """
        # parse the query and binds variables
        cachekey = (rql,)
        try:
            if args:
                # search for named args in query which are eids (hence
                # influencing query's solutions)
                eidkeys = self._ck_cache[rql]
                if eidkeys:
                    # if there are some, we need a better cache key, eg (rql +
                    # entity type of each eid)
                    cachekey = _rql_cache_key(cnx, rql, args, eidkeys)
            rqlst = self._cache[cachekey]
            self.cache_hit += 1
            statsd_c('cache_hit')
        except KeyError:
            self.cache_miss += 1
            statsd_c('cache_miss')
            rqlst = self._parse(rql)
            # compute solutions for rqlst and return named args in query
            # which are eids. Notice that if you may not need `eidkeys`, we
            # have to compute solutions anyway (kept as annotation on the
            # tree)
686
            eidkeys = self.compute_var_types(cnx, rqlst, args)
687
688
689
690
691
692
693
694
695
696
697
698
            if args and rql not in self._ck_cache:
                self._ck_cache[rql] = eidkeys
                if eidkeys:
                    cachekey = _rql_cache_key(cnx, rql, args, eidkeys)
            self._cache[cachekey] = rqlst
        return rqlst, cachekey

    def pop(self, key, *args):
        """Pop a key from the cache."""
        self._cache.pop(key, *args)


699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
def _rql_cache_key(cnx, rql, args, eidkeys):
    cachekey = [rql]
    type_from_eid = cnx.repo.type_from_eid
    for key in sorted(eidkeys):
        try:
            etype = type_from_eid(args[key], cnx)
        except KeyError:
            raise QueryError('bad cache key %s (no value)' % key)
        except TypeError:
            raise QueryError('bad cache key %s (value: %r)' % (
                key, args[key]))
        cachekey.append(etype)
        # ensure eid is correctly typed in args
        args[key] = int(args[key])
    return tuple(cachekey)


Adrien Di Mascio's avatar
Adrien Di Mascio committed
716
717
718
719
from logging import getLogger
from cubicweb import set_log_methods
LOGGER = getLogger('cubicweb.querier')
set_log_methods(QuerierHelper, LOGGER)
720
721


722
def manual_build_descr(cnx, rqlst, args, result):
723
724
725
726
727
728
729
730
731
    """build a description for a given result by analysing each row

    XXX could probably be done more efficiently during execution of query
    """
    # not so easy, looks for variable which changes from one solution
    # to another
    unstables = rqlst.get_variable_indices()
    basedescr = []
    todetermine = []
732
    for i in range(len(rqlst.children[0].selection)):
733
734
735
736
737
738
739
740
741
742
743
744
745
        ttype = _selection_idx_type(i, rqlst, args)
        if ttype is None or ttype == 'Any':
            ttype = None
            isfinal = True
        else:
            isfinal = ttype in BASE_TYPES
        if ttype is None or i in unstables:
            basedescr.append(None)
            todetermine.append( (i, isfinal) )
        else:
            basedescr.append(ttype)
    if not todetermine:
        return RepeatList(len(result), tuple(basedescr))
746
    return _build_descr(cnx, result, basedescr, todetermine)
747

748
def _build_descr(cnx, result, basedescription, todetermine):
749
    description = []
750
    entity_type = cnx.entity_type
751
752
753
754
755
756
757
758
759
760
761
762
763
    todel = []
    for i, row in enumerate(result):
        row_descr = basedescription[:]
        for index, isfinal in todetermine:
            value = row[index]
            if value is None:
                # None value inserted by an outer join, no type
                row_descr[index] = None
                continue
            if isfinal:
                row_descr[index] = etype_from_pyobj(value)
            else:
                try:
764
                    row_descr[index] = entity_type(value)
765
                except UnknownEid:
766
                    cnx.error('wrong eid %s in repository, you should '
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
                             'db-check the database' % value)
                    todel.append(i)
                    break
        else:
            description.append(tuple(row_descr))
    for i in reversed(todel):
        del result[i]
    return description

def _make_description(selected, args, solution):
    """return a description for a result set"""
    description = []
    for term in selected:
        description.append(term.get_type(solution, args))
    return description

def _selection_idx_type(i, rqlst, args):
    """try to return type of term at index `i` of the rqlst's selection"""
    for select in rqlst.children:
        term = select.selection[i]
        for solution in select.solutions:
            try:
                ttype = term.get_type(solution, args)
                if ttype is not None:
                    return ttype
            except CoercionError:
                return None