Line data Source code
1 : /**
2 : * Copyright (C) 2022-present MongoDB, Inc.
3 : *
4 : * This program is free software: you can redistribute it and/or modify
5 : * it under the terms of the Server Side Public License, version 1,
6 : * as published by MongoDB, Inc.
7 : *
8 : * This program is distributed in the hope that it will be useful,
9 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 : * Server Side Public License for more details.
12 : *
13 : * You should have received a copy of the Server Side Public License
14 : * along with this program. If not, see
15 : * <http://www.mongodb.com/licensing/server-side-public-license>.
16 : *
17 : * As a special exception, the copyright holders give permission to link the
18 : * code of portions of this program with the OpenSSL library under certain
19 : * conditions as described in each individual source file and distribute
20 : * linked combinations including the program with the OpenSSL library. You
21 : * must comply with the Server Side Public License in all respects for
22 : * all of the code used other than as permitted herein. If you modify file(s)
23 : * with this exception, you may extend this exception to your version of the
24 : * file(s), but you are not obligated to do so. If you do not wish to do so,
25 : * delete this exception statement from your version. If you delete this
26 : * exception statement from all source files in the program, then also delete
27 : * it in the license file.
28 : */
29 :
30 : #include "mongo/db/query/optimizer/rewrites/path.h"
31 :
32 : #include <set>
33 : #include <utility>
34 :
35 : #include <absl/container/node_hash_map.h>
36 : #include <absl/meta/type_traits.h>
37 : #include <boost/move/utility_core.hpp>
38 : #include <boost/none.hpp>
39 : #include <boost/optional/optional.hpp>
40 :
41 : #include "mongo/db/exec/sbe/values/value.h"
42 : #include "mongo/db/query/optimizer/algebra/operator.h"
43 : #include "mongo/db/query/optimizer/algebra/polyvalue.h"
44 : #include "mongo/db/query/optimizer/comparison_op.h"
45 : #include "mongo/db/query/optimizer/defs.h"
46 : #include "mongo/db/query/optimizer/utils/path_utils.h"
47 : #include "mongo/db/query/optimizer/utils/strong_alias.h"
48 : #include "mongo/util/assert_util.h"
49 :
50 :
51 : namespace mongo::optimizer {
52 53735 : ABT::reference_type PathFusion::follow(ABT::reference_type n) {
53 53735 : if (auto var = n.cast<Variable>(); var) {
54 52940 : auto def = _env.getDefinition(*var);
55 52940 : if (!def.definition.empty() && !def.definition.is<Source>()) {
56 616 : return follow(def.definition);
57 : }
58 : }
59 :
60 53119 : return n;
61 : }
62 :
63 1174 : bool PathFusion::fuse(ABT& lhs, const ABT& rhs) {
64 1174 : if (auto rhsComposeM = rhs.cast<PathComposeM>(); rhsComposeM != nullptr) {
65 1094 : for (const auto& branch : collectComposed(rhs)) {
66 936 : if (_info[branch.cast<PathSyntaxSort>()]._isConst && fuse(lhs, branch)) {
67 34 : return true;
68 : }
69 192 : }
70 : }
71 :
72 1140 : if (auto lhsGet = lhs.cast<PathGet>(); lhsGet != nullptr) {
73 555 : if (auto rhsField = rhs.cast<PathField>();
74 555 : rhsField != nullptr && lhsGet->name() == rhsField->name()) {
75 95 : return fuse(lhsGet->getPath(), rhsField->getPath());
76 : }
77 :
78 460 : if (auto rhsKeep = rhs.cast<PathKeep>(); rhsKeep != nullptr) {
79 28 : if (rhsKeep->getNames().count(lhsGet->name()) > 0) {
80 24 : return true;
81 : }
82 : }
83 : }
84 :
85 1021 : if (auto lhsTraverse = lhs.cast<PathTraverse>(); lhsTraverse != nullptr) {
86 248 : if (auto rhsTraverse = rhs.cast<PathTraverse>();
87 248 : rhsTraverse != nullptr && lhsTraverse->getMaxDepth() == rhsTraverse->getMaxDepth()) {
88 0 : return fuse(lhsTraverse->getPath(), rhsTraverse->getPath());
89 : }
90 :
91 248 : auto rhsType = _info[rhs.cast<PathSyntaxSort>()]._type;
92 248 : if (rhsType != Type::unknown && rhsType != Type::array) {
93 0 : auto result = std::exchange(lhsTraverse->getPath(), make<Blackhole>());
94 0 : std::swap(lhs, result);
95 0 : return fuse(lhs, rhs);
96 0 : }
97 : }
98 :
99 1021 : if (lhs.is<PathIdentity>()) {
100 62 : lhs = rhs;
101 62 : return true;
102 : }
103 :
104 959 : if (rhs.is<PathLambda>() && _kindCtx.back() == Kind::project) {
105 : // PathLambda should be the left child.
106 7 : lhs = make<PathComposeM>(rhs, std::move(lhs));
107 7 : return true;
108 : }
109 :
110 952 : if (auto rhsConst = rhs.cast<PathConstant>(); rhsConst != nullptr) {
111 38 : if (auto lhsCmp = lhs.cast<PathCompare>(); lhsCmp != nullptr) {
112 0 : auto result = make<PathConstant>(make<BinaryOp>(
113 0 : lhsCmp->op(),
114 0 : make<BinaryOp>(Operations::Cmp3w, rhsConst->getConstant(), lhsCmp->getVal()),
115 0 : Constant::int64(0)));
116 :
117 0 : std::swap(lhs, result);
118 0 : return true;
119 0 : }
120 :
121 38 : switch (_kindCtx.back()) {
122 12 : case Kind::filter:
123 12 : break;
124 :
125 26 : case Kind::project:
126 26 : lhs = make<PathComposeM>(rhs, std::move(lhs));
127 26 : return true;
128 : }
129 : }
130 :
131 926 : return false;
132 : }
133 :
134 68599 : bool PathFusion::optimize(ABT& root) {
135 : for (;;) {
136 68599 : _changed = false;
137 68599 : algebra::transport<true>(root, *this);
138 :
139 : // If we have nodes in _redundant then continue iterating even if _changed is not set.
140 68593 : if (!_changed && _redundant.empty()) {
141 68182 : break;
142 : }
143 :
144 417 : _env.rebuild(root);
145 415 : _info.clear();
146 : }
147 :
148 68182 : return false;
149 : }
150 :
151 3188 : void PathFusion::transport(ABT& n, const PathConstant& path, ABT& c) {
152 3188 : CollectedInfo ci;
153 3188 : if (auto exprC = path.getConstant().cast<Constant>(); exprC) {
154 : // Let's see what we can determine from the constant expression
155 238 : auto [tag, val] = exprC->get();
156 238 : if (sbe::value::isObject(tag)) {
157 21 : ci._type = Type::object;
158 217 : } else if (sbe::value::isArray(tag)) {
159 0 : ci._type = Type::array;
160 217 : } else if (tag == sbe::value::TypeTags::Boolean) {
161 0 : ci._type = Type::boolean;
162 217 : } else if (tag == sbe::value::TypeTags::Nothing) {
163 0 : ci._type = Type::nothing;
164 : } else {
165 217 : ci._type = Type::any;
166 : }
167 : }
168 :
169 3188 : ci._isConst = true;
170 3188 : _info[&path] = ci;
171 3188 : }
172 :
173 288549 : void PathFusion::transport(ABT& n, const PathCompare& path, ABT& c) {
174 288549 : CollectedInfo ci;
175 :
176 : // TODO - follow up on Nothing and 3 value logic. Assume plain boolean for now.
177 288549 : ci._type = Type::boolean;
178 288549 : ci._isConst = _info[path.getVal().cast<PathSyntaxSort>()]._isConst;
179 288549 : _info[&path] = ci;
180 288549 : }
181 :
182 274427 : void PathFusion::transport(ABT& n, const PathGet& get, ABT& path) {
183 274427 : if (_changed) {
184 609 : return;
185 : }
186 :
187 : // Get "a" Const <c> -> Const <c>
188 273818 : if (auto constPath = path.cast<PathConstant>(); constPath) {
189 : // Pull out the constant path
190 90 : auto result = std::exchange(path, make<Blackhole>());
191 :
192 : // And swap it for the current node
193 90 : std::swap(n, result);
194 :
195 90 : _changed = true;
196 90 : } else {
197 273728 : auto it = _info.find(path.cast<PathSyntaxSort>());
198 273728 : uassert(6624129, "expected to find path", it != _info.end());
199 :
200 : // Simply move the info from the child.
201 273728 : _info[&get] = it->second;
202 : }
203 : }
204 :
205 2268 : void PathFusion::transport(ABT& n, const PathField& field, ABT& path) {
206 2268 : auto it = _info.find(path.cast<PathSyntaxSort>());
207 2268 : uassert(6624130, "expected to find path", it != _info.end());
208 :
209 2268 : CollectedInfo ci;
210 2268 : if (it->second._type == Type::unknown) {
211 : // We don't know anything about the child.
212 2191 : ci._type = Type::unknown;
213 77 : } else if (it->second._type == Type::nothing) {
214 : // We are setting a field to nothing (aka drop) hence we do not know what the result could
215 : // be (i.e. it all depends on the input).
216 0 : ci._type = Type::unknown;
217 : } else {
218 : // The child produces bona fide value hence this will become an object.
219 77 : ci._type = Type::object;
220 : }
221 :
222 2268 : ci._isConst = it->second._isConst;
223 2268 : _info[&field] = ci;
224 2268 : }
225 :
226 265175 : void PathFusion::transport(ABT& n, const PathTraverse& path, ABT& inner) {
227 : // Traverse is completely dependent on its input and we cannot say anything about it.
228 265175 : CollectedInfo ci;
229 265175 : ci._type = Type::unknown;
230 265175 : ci._isConst = false;
231 265175 : _info[&path] = ci;
232 265175 : }
233 :
234 247106 : void PathFusion::transport(ABT& n, const PathComposeM& path, ABT& p1, ABT& p2) {
235 247106 : if (_changed) {
236 474 : return;
237 : }
238 :
239 246672 : if (auto p1Const = p1.cast<PathConstant>(); p1Const != nullptr) {
240 1312 : switch (_kindCtx.back()) {
241 1286 : case Kind::filter:
242 : // PathComposeM in the EvalFilter context cannot be modeled as function composition
243 : // the same way that it can in EvalPath because the output of the inner EvalFilter
244 : // would be a stream of booleans rather than a stream of documents.
245 1286 : break;
246 :
247 26 : case Kind::project:
248 26 : n = make<PathConstant>(make<EvalPath>(p2, p1Const->getConstant()));
249 26 : _changed = true;
250 26 : return;
251 : }
252 : }
253 :
254 246646 : if (auto p1Get = p1.cast<PathGet>(); p1Get != nullptr && p1Get->getPath().is<PathIdentity>()) {
255 : // TODO: handle chain of Gets.
256 7 : n = make<PathGet>(p1Get->name(), std::move(p2));
257 7 : _changed = true;
258 7 : return;
259 : }
260 :
261 246639 : if (p1.is<PathIdentity>()) {
262 : // Id * p2 -> p2
263 0 : auto result = std::exchange(p2, make<Blackhole>());
264 0 : std::swap(n, result);
265 0 : _changed = true;
266 0 : return;
267 246639 : } else if (p2.is<PathIdentity>()) {
268 : // p1 * Id -> p1
269 0 : auto result = std::exchange(p1, make<Blackhole>());
270 0 : std::swap(n, result);
271 0 : _changed = true;
272 0 : return;
273 246639 : } else if (_redundant.erase(p1.cast<PathSyntaxSort>())) {
274 7 : auto result = std::exchange(p2, make<Blackhole>());
275 7 : std::swap(n, result);
276 7 : _changed = true;
277 7 : return;
278 246639 : } else if (_redundant.erase(p2.cast<PathSyntaxSort>())) {
279 0 : auto result = std::exchange(p1, make<Blackhole>());
280 0 : std::swap(n, result);
281 0 : _changed = true;
282 0 : return;
283 0 : }
284 :
285 246632 : auto p1InfoIt = _info.find(p1.cast<PathSyntaxSort>());
286 246632 : auto p2InfoIt = _info.find(p2.cast<PathSyntaxSort>());
287 :
288 246632 : uassert(6624131, "info must be defined", p1InfoIt != _info.end() && p2InfoIt != _info.end());
289 :
290 246639 : if (p1.is<PathDefault>() && p2InfoIt->second.isNotNothing() &&
291 7 : _kindCtx.back() == Kind::project) {
292 : // Default * Const e -> e (provided we can prove e is not Nothing and we can do that
293 : // only when e is Constant expression)
294 0 : auto result = std::exchange(p2, make<Blackhole>());
295 0 : std::swap(n, result);
296 0 : _changed = true;
297 246639 : } else if (p2.is<PathDefault>() && p1InfoIt->second.isNotNothing() &&
298 7 : _kindCtx.back() == Kind::project) {
299 : // Const e * Default -> e (provided we can prove e is not Nothing and we can do that
300 : // only when e is Constant expression)
301 0 : auto result = std::exchange(p1, make<Blackhole>());
302 0 : std::swap(n, result);
303 0 : _changed = true;
304 246632 : } else if (p2InfoIt->second._type == Type::object) {
305 14 : auto left = collectComposed(p1);
306 35 : for (auto l : left) {
307 21 : if (l.is<PathObj>()) {
308 7 : _redundant.emplace(l.cast<PathSyntaxSort>());
309 :
310 : // Specifically not setting _changed here. Since we are trying to erase a child we
311 : // need to traverse the tree again on the next iteration of the optimize() loop (see
312 : // conditions above which erase from _redundant).
313 : }
314 : }
315 14 : _info[&path] = p2InfoIt->second;
316 14 : } else {
317 246618 : _info[&path] = p2InfoIt->second;
318 : }
319 : }
320 :
321 52350 : void PathFusion::tryFuseComposition(ABT& n, ABT& input) {
322 : // Check to see if our flattened composition consists of constant branches containing only
323 : // Field and Keep elements. If we have duplicate Field branches then retain only the
324 : // latest one. For example:
325 : // (Field "a" ConstPath1) * (Field "b" ConstPath2) * Keep "a" -> Field "a" ConstPath1
326 : // (Field "a" ConstPath1) * (Field "a" ConstPath2) -> Field "a" ConstPath2
327 : // TODO: handle Drop elements.
328 :
329 : // Latest value per field.
330 52350 : opt::unordered_map<FieldNameType, ABT, FieldNameType::Hasher> fieldMap;
331 : // Used to preserve the relative order in which fields are set on the result.
332 52350 : FieldPathType orderedFieldNames;
333 52350 : boost::optional<FieldNameOrderedSet> toKeep;
334 :
335 52350 : Type inputType = Type::any;
336 52350 : if (auto constPtr = input.cast<Constant>(); constPtr != nullptr && constPtr->isObject()) {
337 47 : inputType = Type::object;
338 : }
339 :
340 52350 : bool updated = false;
341 52350 : bool hasDefault = false;
342 54931 : for (const auto& branch : collectComposed(n)) {
343 53304 : auto info = _info.find(branch.cast<PathSyntaxSort>());
344 53304 : if (info == _info.cend()) {
345 50723 : return;
346 : }
347 :
348 53304 : if (auto fieldPtr = branch.cast<PathField>()) {
349 1324 : if (!info->second._isConst) {
350 : // Rewrite is valid only with constant paths.
351 577 : return;
352 : }
353 :
354 : // Overwrite field with the latest value.
355 747 : if (fieldMap.insert_or_assign(fieldPtr->name(), branch).second) {
356 741 : orderedFieldNames.push_back(fieldPtr->name());
357 : } else {
358 6 : updated = true;
359 : }
360 51980 : } else if (auto keepPtr = branch.cast<PathKeep>()) {
361 1739 : for (auto it = fieldMap.begin(); it != fieldMap.cend();) {
362 15 : if (keepPtr->getNames().count(it->first) == 0) {
363 : // Field is not kept, erase.
364 15 : fieldMap.erase(it++);
365 15 : updated = true;
366 : } else {
367 0 : it++;
368 : }
369 : }
370 :
371 1724 : auto newKeepSet = keepPtr->getNames();
372 1724 : if (toKeep) {
373 0 : for (auto it = newKeepSet.begin(); it != newKeepSet.end();) {
374 0 : if (toKeep->count(*it) == 0) {
375 : // Field was not previously kept.
376 0 : newKeepSet.erase(it++);
377 0 : updated = true;
378 : } else {
379 0 : it++;
380 : }
381 : }
382 : }
383 1724 : toKeep = std::move(newKeepSet);
384 51980 : } else if (auto defaultPtr = branch.cast<PathDefault>(); defaultPtr != nullptr) {
385 110 : if (auto constPtr = defaultPtr->getDefault().cast<Constant>();
386 110 : constPtr != nullptr && constPtr->isObject() && inputType == Type::object) {
387 : // Skip over PathDefault with an empty object since our input is already an object.
388 0 : updated = true;
389 : } else {
390 : // Skip over other PathDefaults but remember we had one.
391 110 : hasDefault = true;
392 : }
393 : } else {
394 50146 : return;
395 : }
396 52350 : }
397 :
398 3254 : if (toKeep && input != Constant::emptyObject()) {
399 : // Check if we assign to every field we keep. If so, drop dependence on input.
400 1218 : bool assignToAllKeptFields = true;
401 1235 : for (const auto& fieldName : *toKeep) {
402 1225 : if (fieldMap.count(fieldName) == 0) {
403 1208 : assignToAllKeptFields = false;
404 1208 : break;
405 : }
406 : }
407 1218 : if (assignToAllKeptFields) {
408 : // Do not need the input, do not keep fields, and ignore defaults.
409 10 : input = Constant::emptyObject();
410 10 : toKeep = boost::none;
411 10 : updated = true;
412 10 : hasDefault = false;
413 : }
414 : }
415 1627 : if (!updated) {
416 1611 : return;
417 : }
418 16 : if (hasDefault) {
419 0 : return;
420 : }
421 :
422 16 : ABT result = make<PathIdentity>();
423 16 : if (toKeep) {
424 0 : maybeComposePath(result, make<PathKeep>(std::move(*toKeep)));
425 : }
426 47 : for (const auto& fieldName : orderedFieldNames) {
427 31 : auto it = fieldMap.find(fieldName);
428 31 : if (it != fieldMap.cend()) {
429 : // We may have removed the field name by virtue of not keeping.
430 16 : maybeComposePath(result, it->second);
431 : }
432 : }
433 :
434 16 : std::swap(n, result);
435 16 : _changed = true;
436 157018 : }
437 :
438 5478 : void PathFusion::transport(ABT& n, const EvalPath& eval, ABT& path, ABT& input) {
439 5478 : if (_changed) {
440 898 : return;
441 : }
442 :
443 4580 : auto realInput = follow(input);
444 : // If we are evaluating const path then we can simply replace the whole expression with the
445 : // result.
446 4580 : if (auto constPath = path.cast<PathConstant>(); constPath) {
447 : // Pull out the constant out of the path
448 97 : auto result = std::exchange(constPath->getConstant(), make<Blackhole>());
449 :
450 : // And swap it for the current node
451 97 : std::swap(n, result);
452 :
453 97 : _changed = true;
454 4580 : } else if (auto evalInput = realInput.cast<EvalPath>(); evalInput) {
455 : // An input to 'this' EvalPath expression is another EvalPath so we may try to fuse the
456 : // paths.
457 472 : if (fuse(n.cast<EvalPath>()->getPath(), evalInput->getPath())) {
458 : // We have fused paths so replace the input (by making a copy).
459 110 : input = evalInput->getInput();
460 :
461 110 : _changed = true;
462 362 : } else if (auto evalImmediateInput = input.cast<EvalPath>();
463 : evalImmediateInput != nullptr) {
464 : // Compose immediate EvalPath input.
465 78 : n = make<EvalPath>(
466 78 : make<PathComposeM>(std::move(evalImmediateInput->getPath()), std::move(path)),
467 78 : std::move(evalImmediateInput->getInput()));
468 :
469 39 : _changed = true;
470 : }
471 : } else {
472 4011 : tryFuseComposition(path, input);
473 : }
474 :
475 4580 : _kindCtx.pop_back();
476 : }
477 :
478 48540 : void PathFusion::transport(ABT& n, const EvalFilter& eval, ABT& path, ABT& input) {
479 48540 : if (_changed) {
480 1 : return;
481 : }
482 :
483 48539 : auto realInput = follow(input);
484 : // If we are evaluating const path then we can simply replace the whole expression with the
485 : // result.
486 48539 : if (auto constPath = path.cast<PathConstant>(); constPath) {
487 : // Pull out the constant out of the path
488 7 : auto result = std::exchange(constPath->getConstant(), make<Blackhole>());
489 :
490 : // And swap it for the current node
491 7 : std::swap(n, result);
492 :
493 7 : _changed = true;
494 48539 : } else if (auto evalInput = realInput.cast<EvalPath>(); evalInput) {
495 : // An input to 'this' EvalFilter expression is another EvalPath so we may try to fuse the
496 : // paths.
497 193 : if (fuse(n.cast<EvalFilter>()->getPath(), evalInput->getPath())) {
498 : // We have fused paths so replace the input (by making a copy).
499 9 : input = evalInput->getInput();
500 :
501 9 : _changed = true;
502 : }
503 : } else {
504 48339 : tryFuseComposition(path, input);
505 : }
506 :
507 48539 : _kindCtx.pop_back();
508 : }
509 : } // namespace mongo::optimizer
|