LCOV - code coverage report
Current view: top level - home/ubuntu/mongo/src/mongo/db/query/optimizer/rewrites - path.cpp (source / functions) Hit Total Coverage
Test: Code Coverage Lines: 221 265 83.4 %
Date: 2023-08-04 18:30:11 Functions: 12 15 80.0 %

          Line data    Source code
       1             : /**
       2             :  *    Copyright (C) 2022-present MongoDB, Inc.
       3             :  *
       4             :  *    This program is free software: you can redistribute it and/or modify
       5             :  *    it under the terms of the Server Side Public License, version 1,
       6             :  *    as published by MongoDB, Inc.
       7             :  *
       8             :  *    This program is distributed in the hope that it will be useful,
       9             :  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
      10             :  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      11             :  *    Server Side Public License for more details.
      12             :  *
      13             :  *    You should have received a copy of the Server Side Public License
      14             :  *    along with this program. If not, see
      15             :  *    <http://www.mongodb.com/licensing/server-side-public-license>.
      16             :  *
      17             :  *    As a special exception, the copyright holders give permission to link the
      18             :  *    code of portions of this program with the OpenSSL library under certain
      19             :  *    conditions as described in each individual source file and distribute
      20             :  *    linked combinations including the program with the OpenSSL library. You
      21             :  *    must comply with the Server Side Public License in all respects for
      22             :  *    all of the code used other than as permitted herein. If you modify file(s)
      23             :  *    with this exception, you may extend this exception to your version of the
      24             :  *    file(s), but you are not obligated to do so. If you do not wish to do so,
      25             :  *    delete this exception statement from your version. If you delete this
      26             :  *    exception statement from all source files in the program, then also delete
      27             :  *    it in the license file.
      28             :  */
      29             : 
      30             : #include "mongo/db/query/optimizer/rewrites/path.h"
      31             : 
      32             : #include <set>
      33             : #include <utility>
      34             : 
      35             : #include <absl/container/node_hash_map.h>
      36             : #include <absl/meta/type_traits.h>
      37             : #include <boost/move/utility_core.hpp>
      38             : #include <boost/none.hpp>
      39             : #include <boost/optional/optional.hpp>
      40             : 
      41             : #include "mongo/db/exec/sbe/values/value.h"
      42             : #include "mongo/db/query/optimizer/algebra/operator.h"
      43             : #include "mongo/db/query/optimizer/algebra/polyvalue.h"
      44             : #include "mongo/db/query/optimizer/comparison_op.h"
      45             : #include "mongo/db/query/optimizer/defs.h"
      46             : #include "mongo/db/query/optimizer/utils/path_utils.h"
      47             : #include "mongo/db/query/optimizer/utils/strong_alias.h"
      48             : #include "mongo/util/assert_util.h"
      49             : 
      50             : 
      51             : namespace mongo::optimizer {
      52       53735 : ABT::reference_type PathFusion::follow(ABT::reference_type n) {
      53       53735 :     if (auto var = n.cast<Variable>(); var) {
      54       52940 :         auto def = _env.getDefinition(*var);
      55       52940 :         if (!def.definition.empty() && !def.definition.is<Source>()) {
      56         616 :             return follow(def.definition);
      57             :         }
      58             :     }
      59             : 
      60       53119 :     return n;
      61             : }
      62             : 
      63        1174 : bool PathFusion::fuse(ABT& lhs, const ABT& rhs) {
      64        1174 :     if (auto rhsComposeM = rhs.cast<PathComposeM>(); rhsComposeM != nullptr) {
      65        1094 :         for (const auto& branch : collectComposed(rhs)) {
      66         936 :             if (_info[branch.cast<PathSyntaxSort>()]._isConst && fuse(lhs, branch)) {
      67          34 :                 return true;
      68             :             }
      69         192 :         }
      70             :     }
      71             : 
      72        1140 :     if (auto lhsGet = lhs.cast<PathGet>(); lhsGet != nullptr) {
      73         555 :         if (auto rhsField = rhs.cast<PathField>();
      74         555 :             rhsField != nullptr && lhsGet->name() == rhsField->name()) {
      75          95 :             return fuse(lhsGet->getPath(), rhsField->getPath());
      76             :         }
      77             : 
      78         460 :         if (auto rhsKeep = rhs.cast<PathKeep>(); rhsKeep != nullptr) {
      79          28 :             if (rhsKeep->getNames().count(lhsGet->name()) > 0) {
      80          24 :                 return true;
      81             :             }
      82             :         }
      83             :     }
      84             : 
      85        1021 :     if (auto lhsTraverse = lhs.cast<PathTraverse>(); lhsTraverse != nullptr) {
      86         248 :         if (auto rhsTraverse = rhs.cast<PathTraverse>();
      87         248 :             rhsTraverse != nullptr && lhsTraverse->getMaxDepth() == rhsTraverse->getMaxDepth()) {
      88           0 :             return fuse(lhsTraverse->getPath(), rhsTraverse->getPath());
      89             :         }
      90             : 
      91         248 :         auto rhsType = _info[rhs.cast<PathSyntaxSort>()]._type;
      92         248 :         if (rhsType != Type::unknown && rhsType != Type::array) {
      93           0 :             auto result = std::exchange(lhsTraverse->getPath(), make<Blackhole>());
      94           0 :             std::swap(lhs, result);
      95           0 :             return fuse(lhs, rhs);
      96           0 :         }
      97             :     }
      98             : 
      99        1021 :     if (lhs.is<PathIdentity>()) {
     100          62 :         lhs = rhs;
     101          62 :         return true;
     102             :     }
     103             : 
     104         959 :     if (rhs.is<PathLambda>() && _kindCtx.back() == Kind::project) {
     105             :         // PathLambda should be the left child.
     106           7 :         lhs = make<PathComposeM>(rhs, std::move(lhs));
     107           7 :         return true;
     108             :     }
     109             : 
     110         952 :     if (auto rhsConst = rhs.cast<PathConstant>(); rhsConst != nullptr) {
     111          38 :         if (auto lhsCmp = lhs.cast<PathCompare>(); lhsCmp != nullptr) {
     112           0 :             auto result = make<PathConstant>(make<BinaryOp>(
     113           0 :                 lhsCmp->op(),
     114           0 :                 make<BinaryOp>(Operations::Cmp3w, rhsConst->getConstant(), lhsCmp->getVal()),
     115           0 :                 Constant::int64(0)));
     116             : 
     117           0 :             std::swap(lhs, result);
     118           0 :             return true;
     119           0 :         }
     120             : 
     121          38 :         switch (_kindCtx.back()) {
     122          12 :             case Kind::filter:
     123          12 :                 break;
     124             : 
     125          26 :             case Kind::project:
     126          26 :                 lhs = make<PathComposeM>(rhs, std::move(lhs));
     127          26 :                 return true;
     128             :         }
     129             :     }
     130             : 
     131         926 :     return false;
     132             : }
     133             : 
     134       68599 : bool PathFusion::optimize(ABT& root) {
     135             :     for (;;) {
     136       68599 :         _changed = false;
     137       68599 :         algebra::transport<true>(root, *this);
     138             : 
     139             :         // If we have nodes in _redundant then continue iterating even if _changed is not set.
     140       68593 :         if (!_changed && _redundant.empty()) {
     141       68182 :             break;
     142             :         }
     143             : 
     144         417 :         _env.rebuild(root);
     145         415 :         _info.clear();
     146             :     }
     147             : 
     148       68182 :     return false;
     149             : }
     150             : 
     151        3188 : void PathFusion::transport(ABT& n, const PathConstant& path, ABT& c) {
     152        3188 :     CollectedInfo ci;
     153        3188 :     if (auto exprC = path.getConstant().cast<Constant>(); exprC) {
     154             :         // Let's see what we can determine from the constant expression
     155         238 :         auto [tag, val] = exprC->get();
     156         238 :         if (sbe::value::isObject(tag)) {
     157          21 :             ci._type = Type::object;
     158         217 :         } else if (sbe::value::isArray(tag)) {
     159           0 :             ci._type = Type::array;
     160         217 :         } else if (tag == sbe::value::TypeTags::Boolean) {
     161           0 :             ci._type = Type::boolean;
     162         217 :         } else if (tag == sbe::value::TypeTags::Nothing) {
     163           0 :             ci._type = Type::nothing;
     164             :         } else {
     165         217 :             ci._type = Type::any;
     166             :         }
     167             :     }
     168             : 
     169        3188 :     ci._isConst = true;
     170        3188 :     _info[&path] = ci;
     171        3188 : }
     172             : 
     173      288549 : void PathFusion::transport(ABT& n, const PathCompare& path, ABT& c) {
     174      288549 :     CollectedInfo ci;
     175             : 
     176             :     // TODO - follow up on Nothing and 3 value logic. Assume plain boolean for now.
     177      288549 :     ci._type = Type::boolean;
     178      288549 :     ci._isConst = _info[path.getVal().cast<PathSyntaxSort>()]._isConst;
     179      288549 :     _info[&path] = ci;
     180      288549 : }
     181             : 
     182      274427 : void PathFusion::transport(ABT& n, const PathGet& get, ABT& path) {
     183      274427 :     if (_changed) {
     184         609 :         return;
     185             :     }
     186             : 
     187             :     // Get "a" Const <c> -> Const <c>
     188      273818 :     if (auto constPath = path.cast<PathConstant>(); constPath) {
     189             :         // Pull out the constant path
     190          90 :         auto result = std::exchange(path, make<Blackhole>());
     191             : 
     192             :         // And swap it for the current node
     193          90 :         std::swap(n, result);
     194             : 
     195          90 :         _changed = true;
     196          90 :     } else {
     197      273728 :         auto it = _info.find(path.cast<PathSyntaxSort>());
     198      273728 :         uassert(6624129, "expected to find path", it != _info.end());
     199             : 
     200             :         // Simply move the info from the child.
     201      273728 :         _info[&get] = it->second;
     202             :     }
     203             : }
     204             : 
     205        2268 : void PathFusion::transport(ABT& n, const PathField& field, ABT& path) {
     206        2268 :     auto it = _info.find(path.cast<PathSyntaxSort>());
     207        2268 :     uassert(6624130, "expected to find path", it != _info.end());
     208             : 
     209        2268 :     CollectedInfo ci;
     210        2268 :     if (it->second._type == Type::unknown) {
     211             :         // We don't know anything about the child.
     212        2191 :         ci._type = Type::unknown;
     213          77 :     } else if (it->second._type == Type::nothing) {
     214             :         // We are setting a field to nothing (aka drop) hence we do not know what the result could
     215             :         // be (i.e. it all depends on the input).
     216           0 :         ci._type = Type::unknown;
     217             :     } else {
     218             :         // The child produces bona fide value hence this will become an object.
     219          77 :         ci._type = Type::object;
     220             :     }
     221             : 
     222        2268 :     ci._isConst = it->second._isConst;
     223        2268 :     _info[&field] = ci;
     224        2268 : }
     225             : 
     226      265175 : void PathFusion::transport(ABT& n, const PathTraverse& path, ABT& inner) {
     227             :     // Traverse is completely dependent on its input and we cannot say anything about it.
     228      265175 :     CollectedInfo ci;
     229      265175 :     ci._type = Type::unknown;
     230      265175 :     ci._isConst = false;
     231      265175 :     _info[&path] = ci;
     232      265175 : }
     233             : 
     234      247106 : void PathFusion::transport(ABT& n, const PathComposeM& path, ABT& p1, ABT& p2) {
     235      247106 :     if (_changed) {
     236         474 :         return;
     237             :     }
     238             : 
     239      246672 :     if (auto p1Const = p1.cast<PathConstant>(); p1Const != nullptr) {
     240        1312 :         switch (_kindCtx.back()) {
     241        1286 :             case Kind::filter:
     242             :                 // PathComposeM in the EvalFilter context cannot be modeled as function composition
     243             :                 // the same way that it can in EvalPath because the output of the inner EvalFilter
     244             :                 // would be a stream of booleans rather than a stream of documents.
     245        1286 :                 break;
     246             : 
     247          26 :             case Kind::project:
     248          26 :                 n = make<PathConstant>(make<EvalPath>(p2, p1Const->getConstant()));
     249          26 :                 _changed = true;
     250          26 :                 return;
     251             :         }
     252             :     }
     253             : 
     254      246646 :     if (auto p1Get = p1.cast<PathGet>(); p1Get != nullptr && p1Get->getPath().is<PathIdentity>()) {
     255             :         // TODO: handle chain of Gets.
     256           7 :         n = make<PathGet>(p1Get->name(), std::move(p2));
     257           7 :         _changed = true;
     258           7 :         return;
     259             :     }
     260             : 
     261      246639 :     if (p1.is<PathIdentity>()) {
     262             :         // Id * p2 -> p2
     263           0 :         auto result = std::exchange(p2, make<Blackhole>());
     264           0 :         std::swap(n, result);
     265           0 :         _changed = true;
     266           0 :         return;
     267      246639 :     } else if (p2.is<PathIdentity>()) {
     268             :         // p1 * Id -> p1
     269           0 :         auto result = std::exchange(p1, make<Blackhole>());
     270           0 :         std::swap(n, result);
     271           0 :         _changed = true;
     272           0 :         return;
     273      246639 :     } else if (_redundant.erase(p1.cast<PathSyntaxSort>())) {
     274           7 :         auto result = std::exchange(p2, make<Blackhole>());
     275           7 :         std::swap(n, result);
     276           7 :         _changed = true;
     277           7 :         return;
     278      246639 :     } else if (_redundant.erase(p2.cast<PathSyntaxSort>())) {
     279           0 :         auto result = std::exchange(p1, make<Blackhole>());
     280           0 :         std::swap(n, result);
     281           0 :         _changed = true;
     282           0 :         return;
     283           0 :     }
     284             : 
     285      246632 :     auto p1InfoIt = _info.find(p1.cast<PathSyntaxSort>());
     286      246632 :     auto p2InfoIt = _info.find(p2.cast<PathSyntaxSort>());
     287             : 
     288      246632 :     uassert(6624131, "info must be defined", p1InfoIt != _info.end() && p2InfoIt != _info.end());
     289             : 
     290      246639 :     if (p1.is<PathDefault>() && p2InfoIt->second.isNotNothing() &&
     291           7 :         _kindCtx.back() == Kind::project) {
     292             :         // Default * Const e -> e (provided we can prove e is not Nothing and we can do that
     293             :         // only when e is Constant expression)
     294           0 :         auto result = std::exchange(p2, make<Blackhole>());
     295           0 :         std::swap(n, result);
     296           0 :         _changed = true;
     297      246639 :     } else if (p2.is<PathDefault>() && p1InfoIt->second.isNotNothing() &&
     298           7 :                _kindCtx.back() == Kind::project) {
     299             :         // Const e * Default -> e (provided we can prove e is not Nothing and we can do that
     300             :         // only when e is Constant expression)
     301           0 :         auto result = std::exchange(p1, make<Blackhole>());
     302           0 :         std::swap(n, result);
     303           0 :         _changed = true;
     304      246632 :     } else if (p2InfoIt->second._type == Type::object) {
     305          14 :         auto left = collectComposed(p1);
     306          35 :         for (auto l : left) {
     307          21 :             if (l.is<PathObj>()) {
     308           7 :                 _redundant.emplace(l.cast<PathSyntaxSort>());
     309             : 
     310             :                 // Specifically not setting _changed here. Since we are trying to erase a child we
     311             :                 // need to traverse the tree again on the next iteration of the optimize() loop (see
     312             :                 // conditions above which erase from _redundant).
     313             :             }
     314             :         }
     315          14 :         _info[&path] = p2InfoIt->second;
     316          14 :     } else {
     317      246618 :         _info[&path] = p2InfoIt->second;
     318             :     }
     319             : }
     320             : 
     321       52350 : void PathFusion::tryFuseComposition(ABT& n, ABT& input) {
     322             :     // Check to see if our flattened composition consists of constant branches containing only
     323             :     // Field and Keep elements. If we have duplicate Field branches then retain only the
     324             :     // latest one. For example:
     325             :     //      (Field "a" ConstPath1) * (Field "b" ConstPath2) * Keep "a" -> Field "a" ConstPath1
     326             :     //      (Field "a" ConstPath1) * (Field "a" ConstPath2) -> Field "a" ConstPath2
     327             :     // TODO: handle Drop elements.
     328             : 
     329             :     // Latest value per field.
     330       52350 :     opt::unordered_map<FieldNameType, ABT, FieldNameType::Hasher> fieldMap;
     331             :     // Used to preserve the relative order in which fields are set on the result.
     332       52350 :     FieldPathType orderedFieldNames;
     333       52350 :     boost::optional<FieldNameOrderedSet> toKeep;
     334             : 
     335       52350 :     Type inputType = Type::any;
     336       52350 :     if (auto constPtr = input.cast<Constant>(); constPtr != nullptr && constPtr->isObject()) {
     337          47 :         inputType = Type::object;
     338             :     }
     339             : 
     340       52350 :     bool updated = false;
     341       52350 :     bool hasDefault = false;
     342       54931 :     for (const auto& branch : collectComposed(n)) {
     343       53304 :         auto info = _info.find(branch.cast<PathSyntaxSort>());
     344       53304 :         if (info == _info.cend()) {
     345       50723 :             return;
     346             :         }
     347             : 
     348       53304 :         if (auto fieldPtr = branch.cast<PathField>()) {
     349        1324 :             if (!info->second._isConst) {
     350             :                 // Rewrite is valid only with constant paths.
     351         577 :                 return;
     352             :             }
     353             : 
     354             :             // Overwrite field with the latest value.
     355         747 :             if (fieldMap.insert_or_assign(fieldPtr->name(), branch).second) {
     356         741 :                 orderedFieldNames.push_back(fieldPtr->name());
     357             :             } else {
     358           6 :                 updated = true;
     359             :             }
     360       51980 :         } else if (auto keepPtr = branch.cast<PathKeep>()) {
     361        1739 :             for (auto it = fieldMap.begin(); it != fieldMap.cend();) {
     362          15 :                 if (keepPtr->getNames().count(it->first) == 0) {
     363             :                     // Field is not kept, erase.
     364          15 :                     fieldMap.erase(it++);
     365          15 :                     updated = true;
     366             :                 } else {
     367           0 :                     it++;
     368             :                 }
     369             :             }
     370             : 
     371        1724 :             auto newKeepSet = keepPtr->getNames();
     372        1724 :             if (toKeep) {
     373           0 :                 for (auto it = newKeepSet.begin(); it != newKeepSet.end();) {
     374           0 :                     if (toKeep->count(*it) == 0) {
     375             :                         // Field was not previously kept.
     376           0 :                         newKeepSet.erase(it++);
     377           0 :                         updated = true;
     378             :                     } else {
     379           0 :                         it++;
     380             :                     }
     381             :                 }
     382             :             }
     383        1724 :             toKeep = std::move(newKeepSet);
     384       51980 :         } else if (auto defaultPtr = branch.cast<PathDefault>(); defaultPtr != nullptr) {
     385         110 :             if (auto constPtr = defaultPtr->getDefault().cast<Constant>();
     386         110 :                 constPtr != nullptr && constPtr->isObject() && inputType == Type::object) {
     387             :                 // Skip over PathDefault with an empty object since our input is already an object.
     388           0 :                 updated = true;
     389             :             } else {
     390             :                 // Skip over other PathDefaults but remember we had one.
     391         110 :                 hasDefault = true;
     392             :             }
     393             :         } else {
     394       50146 :             return;
     395             :         }
     396       52350 :     }
     397             : 
     398        3254 :     if (toKeep && input != Constant::emptyObject()) {
     399             :         // Check if we assign to every field we keep. If so, drop dependence on input.
     400        1218 :         bool assignToAllKeptFields = true;
     401        1235 :         for (const auto& fieldName : *toKeep) {
     402        1225 :             if (fieldMap.count(fieldName) == 0) {
     403        1208 :                 assignToAllKeptFields = false;
     404        1208 :                 break;
     405             :             }
     406             :         }
     407        1218 :         if (assignToAllKeptFields) {
     408             :             // Do not need the input, do not keep fields, and ignore defaults.
     409          10 :             input = Constant::emptyObject();
     410          10 :             toKeep = boost::none;
     411          10 :             updated = true;
     412          10 :             hasDefault = false;
     413             :         }
     414             :     }
     415        1627 :     if (!updated) {
     416        1611 :         return;
     417             :     }
     418          16 :     if (hasDefault) {
     419           0 :         return;
     420             :     }
     421             : 
     422          16 :     ABT result = make<PathIdentity>();
     423          16 :     if (toKeep) {
     424           0 :         maybeComposePath(result, make<PathKeep>(std::move(*toKeep)));
     425             :     }
     426          47 :     for (const auto& fieldName : orderedFieldNames) {
     427          31 :         auto it = fieldMap.find(fieldName);
     428          31 :         if (it != fieldMap.cend()) {
     429             :             // We may have removed the field name by virtue of not keeping.
     430          16 :             maybeComposePath(result, it->second);
     431             :         }
     432             :     }
     433             : 
     434          16 :     std::swap(n, result);
     435          16 :     _changed = true;
     436      157018 : }
     437             : 
     438        5478 : void PathFusion::transport(ABT& n, const EvalPath& eval, ABT& path, ABT& input) {
     439        5478 :     if (_changed) {
     440         898 :         return;
     441             :     }
     442             : 
     443        4580 :     auto realInput = follow(input);
     444             :     // If we are evaluating const path then we can simply replace the whole expression with the
     445             :     // result.
     446        4580 :     if (auto constPath = path.cast<PathConstant>(); constPath) {
     447             :         // Pull out the constant out of the path
     448          97 :         auto result = std::exchange(constPath->getConstant(), make<Blackhole>());
     449             : 
     450             :         // And swap it for the current node
     451          97 :         std::swap(n, result);
     452             : 
     453          97 :         _changed = true;
     454        4580 :     } else if (auto evalInput = realInput.cast<EvalPath>(); evalInput) {
     455             :         // An input to 'this' EvalPath expression is another EvalPath so we may try to fuse the
     456             :         // paths.
     457         472 :         if (fuse(n.cast<EvalPath>()->getPath(), evalInput->getPath())) {
     458             :             // We have fused paths so replace the input (by making a copy).
     459         110 :             input = evalInput->getInput();
     460             : 
     461         110 :             _changed = true;
     462         362 :         } else if (auto evalImmediateInput = input.cast<EvalPath>();
     463             :                    evalImmediateInput != nullptr) {
     464             :             // Compose immediate EvalPath input.
     465          78 :             n = make<EvalPath>(
     466          78 :                 make<PathComposeM>(std::move(evalImmediateInput->getPath()), std::move(path)),
     467          78 :                 std::move(evalImmediateInput->getInput()));
     468             : 
     469          39 :             _changed = true;
     470             :         }
     471             :     } else {
     472        4011 :         tryFuseComposition(path, input);
     473             :     }
     474             : 
     475        4580 :     _kindCtx.pop_back();
     476             : }
     477             : 
     478       48540 : void PathFusion::transport(ABT& n, const EvalFilter& eval, ABT& path, ABT& input) {
     479       48540 :     if (_changed) {
     480           1 :         return;
     481             :     }
     482             : 
     483       48539 :     auto realInput = follow(input);
     484             :     // If we are evaluating const path then we can simply replace the whole expression with the
     485             :     // result.
     486       48539 :     if (auto constPath = path.cast<PathConstant>(); constPath) {
     487             :         // Pull out the constant out of the path
     488           7 :         auto result = std::exchange(constPath->getConstant(), make<Blackhole>());
     489             : 
     490             :         // And swap it for the current node
     491           7 :         std::swap(n, result);
     492             : 
     493           7 :         _changed = true;
     494       48539 :     } else if (auto evalInput = realInput.cast<EvalPath>(); evalInput) {
     495             :         // An input to 'this' EvalFilter expression is another EvalPath so we may try to fuse the
     496             :         // paths.
     497         193 :         if (fuse(n.cast<EvalFilter>()->getPath(), evalInput->getPath())) {
     498             :             // We have fused paths so replace the input (by making a copy).
     499           9 :             input = evalInput->getInput();
     500             : 
     501           9 :             _changed = true;
     502             :         }
     503             :     } else {
     504       48339 :         tryFuseComposition(path, input);
     505             :     }
     506             : 
     507       48539 :     _kindCtx.pop_back();
     508             : }
     509             : }  // namespace mongo::optimizer

Generated by: LCOV version 1.14