Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-37530

Provide a way to cause a well-defined order of evaluation for predicates

    • Query Optimization
    • ALL
    • Hide
      (function(){
      	"use strict";
      
      	const docs = [
      		{num: 0},
      		{num: 1},
      	]
      
      	const agg = [
      	    {$match: {num: {$ne: 0}}},
      	    {$match: {$or: [
      	    	{$expr: {$divide: [5, '$num']}},
      			{b: {$exists: true}},
      	    ]}},
      	   	{$project: {_id: 0}},
      	];
      
      	const s1 = MongoRunner.runMongod({binVersion: 'latest'});
      	const s2 = MongoRunner.runMongod({binVersion: 'latest'});
      
      	let colls = [];
      	colls[0] = s1.getDB('test').c;
      	colls[1] = s2.getDB('test').c;
      
      	for (let i = 0; i < colls.length; ++i) {
      		colls[i].drop();
      	}
      
      	function doActionForAllElementsOnAllCollection(colls, elements, action){
      		for (let i = 0; i < colls.length; ++i) {
      			for (let j = 0; j < elements.length; ++j) {
      				action(colls[i], elements[j]);
      			}
      		}
      	}
      
      	assert.commandWorked(s1.getDB('test').adminCommand({setParameter: 1, internalQueryAllowAllPathsIndexes: true}));
      
      	// Insert documents.
      	doActionForAllElementsOnAllCollection(colls, docs, (coll, element) => 
      		assert.commandWorked(coll.insert(element)));
      
      	// Create indexes.
      	assert.commandWorked(colls[0].createIndex({"num":1}));
      
      	try {
      		let err1, err2;
      		let result1, result2;
      
      		try {
      			result1 = colls[0].aggregate(agg).toArray();
      		} catch (err) {
      			err1 = err.message;
      		}
      
      		try {
      			result2 = colls[1].aggregate(agg).toArray();
      		} catch (err) {
      			err2 = err.message;
      		}
      
      		print("Fuzzer Explain");
      		printjson(colls[0].explain().aggregate(agg));
      		printjson(colls[1].explain().aggregate(agg));
      		print("Fuzzer Test Errors");
      		print(err1);
      		print(err2);
      		print("Fuzzer Test Results");
      		printjson(result1);
      		printjson(result2);
      		assert(err1 === err2);
      		assert((result1 == undefined && result2 == undefined) || bsonBinaryEqual(result1, result2));
      
      
      	} finally {
      		MongoRunner.stopMongod(s1);
      		MongoRunner.stopMongod(s2);
      	}
      })();
      
      
      
      Show
      ( function (){ "use strict" ; const docs = [ {num: 0}, {num: 1}, ] const agg = [ {$match: {num: {$ne: 0}}}, {$match: {$or: [ {$expr: {$divide: [5, '$num' ]}}, {b: {$exists: true }}, ]}}, {$project: {_id: 0}}, ]; const s1 = MongoRunner.runMongod({binVersion: 'latest' }); const s2 = MongoRunner.runMongod({binVersion: 'latest' }); let colls = []; colls[0] = s1.getDB( 'test' ).c; colls[1] = s2.getDB( 'test' ).c; for (let i = 0; i < colls.length; ++i) { colls[i].drop(); } function doActionForAllElementsOnAllCollection(colls, elements, action){ for (let i = 0; i < colls.length; ++i) { for (let j = 0; j < elements.length; ++j) { action(colls[i], elements[j]); } } } assert.commandWorked(s1.getDB( 'test' ).adminCommand({setParameter: 1, internalQueryAllowAllPathsIndexes: true })); // Insert documents. doActionForAllElementsOnAllCollection(colls, docs, (coll, element) => assert.commandWorked(coll.insert(element))); // Create indexes. assert.commandWorked(colls[0].createIndex({ "num" :1})); try { let err1, err2; let result1, result2; try { result1 = colls[0].aggregate(agg).toArray(); } catch (err) { err1 = err.message; } try { result2 = colls[1].aggregate(agg).toArray(); } catch (err) { err2 = err.message; } print( "Fuzzer Explain" ); printjson(colls[0].explain().aggregate(agg)); printjson(colls[1].explain().aggregate(agg)); print( "Fuzzer Test Errors" ); print(err1); print(err2); print( "Fuzzer Test Results" ); printjson(result1); printjson(result2); assert(err1 === err2); assert((result1 == undefined && result2 == undefined) || bsonBinaryEqual(result1, result2)); } finally { MongoRunner.stopMongod(s1); MongoRunner.stopMongod(s2); } })();
    • 17

      Currently we freely reorder predicates for rewrite and optimization for the sake of performance. We could provide a way to define the order certain predicates are run in so any side effects from them (such as errors being produced) are predictable

      Here is a description of a situation that shows our current behavior:

      There is an inconsistency in how we are handling an error for a specific query involving $expr for when we use an index vs when we do a collection scan. The inconsistency stems from two behaviors of our query system:

      1. A match stage can be pushed down to hide an error
      Suppose there are two $match stages, (a) one with an expression that can throw an error for a specific input and (b) the other $match stage filters out those specific error-causing inputs. When (a) is before (b) but there is an index that matches the query for (b), the query system will push that stage to the IXSCAN and run (b) before (a) which will suppress that error.

      Example:

      const docs = [
      	{num: 0},
      	{num: 1},
      ];
      const agg = [
          {$match: {$expr: {$divide: [5, '$num']}}}, // (a) throws when $num is 0.
          {$match: {num: {$ne: 0}}}, // (b) filters out 0.
      ];
      

      The aggregation above will return

      {num: 0}

      instead of throwing the divide by zero error, because the $ne stage is pushed down to index scan:

      "winningPlan" : {
              "stage" : "FETCH",
              "filter" : {
                      "$expr" : {
                              "$divide" : [
                                      {
                                              "$const" : 5
                                      },
                                      "$num"
                              ]
                      }
              },
              "inputStage" : {
                      "stage" : "IXSCAN",
                      "keyPattern" : {
                              "num" : 1
                      },
                      "indexName" : "num_1",
                      "isMultiKey" : false,
                      "multiKeyPaths" : {
                              "num" : [ ]
                      },
                      "isUnique" : false,
                      "isSparse" : false,
                      "isPartial" : false,
                      "indexVersion" : 2,
                      "direction" : "forward",
                      "indexBounds" : {
                              "num" : [
                                      "[MinKey, 0.0)",
                                      "(0.0, MaxKey]"
                              ]
                      }
              }
      }
      
      

      2. Collection scans can form canonicalize queries to throw error
      For collection scan, the opposite of the case above can happen where when (b) is placed before (a), but (a) is an $or containing a $expr that throws. In this scenario, the query system will absorb the two match stages into one and put the two predicates in an $and with a specific order. When this happens, (a) will be reordered to be placed before (b) and thus the query doing a collection scan will throw an error.

      Given these two behaviors, the results for the following queries are inconsistent between an index scan and collection scan. Index scan will succeed while a collection scan will throw a divide by zero error.

      const docs = [
      	{num: 0},
      	{num: 1},
      ];
      
      const agg1 = [
          {$match: {num: {$ne: 0}}}, // collection scan moves this match down.
          {$match: {$or: [
          	{$expr: {$divide: [5, '$num']}}, // (a) throws when $num is 0.
      	{a: {$exists: true}},
          ]}},
      ];
      
      const agg2 = [
          {$match: {$or: [
          	{$expr: {$divide: [5, '$num']}}, // (a) throws when $num is 0.
      	{a: {$exists: true}},
          ]}},
          {$match: {num: {$ne: 0}}},  // index scan moves this match up.
      ];
      

      The explain outputs for both of two queries will be the following:

      // Collection scan
      "winningPlan" : {
              "stage" : "COLLSCAN",
              "filter" : {
                      "$and" : [
                              {
                                      "$or" : [
                                              {
                                                      "b" : {
                                                              "$exists" : true
                                                      }
                                              },
                                              {
                                                      "$expr" : {
                                                              "$divide" : [
                                                                      {
                                                                              "$const" : 5
                                                                      },
                                                                      "$num"
                                                              ]
                                                      }
                                              }
                                      ]
                              },
                              {
                                      "$nor" : [
                                              {
                                                      "num" : {
                                                              "$eq" : 0
                                                      }
                                              }
                                      ]
                              }
                      ]
              },
              "direction" : "forward"
      }
      
      // Index scan
      "winningPlan" : {
              "stage" : "FETCH",
              "filter" : {
                      "$or" : [
                              {
                                      "b" : {
                                              "$exists" : true
                                      }
                              },
                              {
                                      "$expr" : {
                                              "$divide" : [
                                                      {
                                                              "$const" : 5
                                                      },
                                                      "$num"
                                              ]
                                      }
                              }
                      ]
              },
              "inputStage" : {
                      "stage" : "IXSCAN",
                      "keyPattern" : {
                              "num" : 1
                      },
                      "indexName" : "num_1",
                      "isMultiKey" : false,
                      "multiKeyPaths" : {
                              "num" : [ ]
                      },
                      "isUnique" : false,
                      "isSparse" : false,
                      "isPartial" : false,
                      "indexVersion" : 2,
                      "direction" : "forward",
                      "indexBounds" : {
                              "num" : [
                                      "[MinKey, 0.0)",
                                      "(0.0, MaxKey]"
                              ]
                      }
              }
      }
      

      This inconsistency also occurs for an $and expression in the same conditions.

            Assignee:
            backlog-query-optimization [DO NOT USE] Backlog - Query Optimization
            Reporter:
            yuta.arai@mongodb.com Yuta Arai
            Votes:
            2 Vote for this issue
            Watchers:
            24 Start watching this issue

              Created:
              Updated: