-
Type:
Improvement
-
Resolution: Unresolved
-
Priority:
Major - P3
-
None
-
Affects Version/s: None
-
Component/s: None
-
Query Optimization
In classic a class of queries could be simplified into a single interval scan. Instead the plan enumerator generates redundant index union plans. Some of the plans union index scans over the same field, while others also union index scans that produce no result.
Test cases:
const dataset = [ { "a" : 1, "b" : 1, "c" : 3 }, { "a" : 5, "b" : 2, "c" : 4 }, { "a" : 1, "b" : 1, "c" : 3 }, { "a" : 2, "b" : 3, "c" : 5 }, { "a" : 2, "b" : 2, "c" : 5 }, { "a" : 4, "b" : 3, "c" : 2 }, { "a" : 3, "b" : 2, "c" : 5 }, { "a" : 2, "b" : 2, "c" : 1 }, { "a" : 1, "b" : 5, "c" : 3 }, { "a" : 1, "b" : 3, "c" : 4 }, { "a" : 3, "b" : 3, "c" : 5 }, { "a" : 3, "b" : 1, "c" : 1 }, { "a" : 2, "b" : 2, "c" : 4 }, { "a" : 1, "b" : 2, "c" : 1 }, { "a" : 1, "b" : 5, "c" : 3 }, { "a" : 5, "b" : 2, "c" : 3 }, { "a" : 1, "b" : 1, "c" : 5 }, { "a" : 5, "b" : 5, "c" : 2 }, { "a" : 1, "b" : 3, "c" : 3 }, { "a" : 4, "b" : 2, "c" : 5 }, { "a" : 2, "b" : 4, "c" : 5 }, { "a" : 4, "b" : 2, "c" : 3 }, { "a" : 4, "b" : 5, "c" : 4 }, { "a" : 2, "b" : 3, "c" : 2 }, { "a" : 2, "b" : 1, "c" : 2 }, { "a" : 5, "b" : 4, "c" : 5 }, { "a" : 1, "b" : 5, "c" : 3 }, { "a" : 5, "b" : 2, "c" : 2 }, { "a" : 1, "b" : 1, "c" : 4 }, { "a" : 1, "b" : 3, "c" : 1 }, { "a" : 3, "b" : 3, "c" : 1 }, { "a" : 3, "b" : 4, "c" : 1 }, { "a" : 1, "b" : 4, "c" : 4 }]; db.foo.drop(); db.foo.insert(dataset); db.foo.createIndex({ "a" : 1}); const p1 = {$and : [{$or : [{a : {$gte : 5}}, {a : {$gte : 2}}]}, {$or : [{$and : [ {a : {$eq : 3}}]}, {$and : [{a : {$gte : 2}}, {a : {$eq : 4}}]}]}]}; const p1_cnf = { $and: [ { $or: [{ a: { $gte: 5 } }, { a: { $gte: 2 } }] }, { $or: [{ a: { $eq: 3 } }] }, { $or: [{ a: { $gte: 2 }, a: { $eq: 4 } }] } ] } const p1_dnf = { $or: [ { $and: [{ a: { $gte: 5 } }, { a: { $eq: 3 } }] }, { $and: [{ a: { $gte: 5 } }, { a: { $gte: 2 }, a: { $eq: 4 } }] }, { $and: [{ a: { $gte: 2 } }, { a: { $eq: 3 } }] }, { $and: [{ a: { $gte: 2 } }, { a: { $gte: 2 }, a: { $eq: 4 } }] } ] }
The first query is equivalent to {a: {$in: [3,4]}}, however the enumerator produces these plans:
winningPlan: { isCached: false, stage: 'FETCH', filter: { '$or': [ { a: { '$gte': 5 } }, { a: { '$gte': 2 } } ] }, inputStage: { stage: 'OR', inputStages: [ { stage: 'IXSCAN', keyPattern: { a: 1 }, indexName: 'a_1', isMultiKey: false, multiKeyPaths: { a: [] }, isUnique: false, isSparse: false, isPartial: false, indexVersion: 2, direction: 'forward', indexBounds: { a: [ '[4, 4]' ] } }, { stage: 'IXSCAN', keyPattern: { a: 1 }, indexName: 'a_1', isMultiKey: false, multiKeyPaths: { a: [] }, isUnique: false, isSparse: false, isPartial: false, indexVersion: 2, direction: 'forward', indexBounds: { a: [ '[3, 3]' ] } } ] } }, rejectedPlans: [ { isCached: false, stage: 'FETCH', filter: { '$or': [ { '$and': [ { a: { '$eq': 4 } }, { a: { '$gte': 2 } } ] }, { a: { '$eq': 3 } } ] }, inputStage: { stage: 'IXSCAN', keyPattern: { a: 1 }, indexName: 'a_1', isMultiKey: false, multiKeyPaths: { a: [] }, isUnique: false, isSparse: false, isPartial: false, indexVersion: 2, direction: 'forward', indexBounds: { a: [ '[2, inf.0]' ] } } } ] },
For the second query the enumerator does even worse job:
db.foo.find(p1_cnf).explain() { winningPlan: { isCached: false, stage: 'FETCH', filter: { '$or': [ { a: { '$gte': 5 } }, { a: { '$gte': 2 } } ] }, inputStage: { stage: 'IXSCAN', keyPattern: { a: 1 }, indexName: 'a_1', isMultiKey: false, multiKeyPaths: { a: [] }, isUnique: false, isSparse: false, isPartial: false, indexVersion: 2, direction: 'forward', indexBounds: { a: [] } } }, rejectedPlans: [ { isCached: false, stage: 'FETCH', inputStage: { stage: 'OR', inputStages: [ { stage: 'IXSCAN', keyPattern: { a: 1 }, indexName: 'a_1', isMultiKey: false, multiKeyPaths: { a: [] }, isUnique: false, isSparse: false, isPartial: false, indexVersion: 2, direction: 'forward', indexBounds: { a: [] } }, { stage: 'IXSCAN', keyPattern: { a: 1 }, indexName: 'a_1', isMultiKey: false, multiKeyPaths: { a: [] }, isUnique: false, isSparse: false, isPartial: false, indexVersion: 2, direction: 'forward', indexBounds: { a: [] } } ] } } ] },
The third DNF form of the original query is has no better plan either:
winningPlan: { isCached: false, stage: 'SUBPLAN', inputStage: { stage: 'FETCH', inputStage: { stage: 'OR', inputStages: [ { stage: 'IXSCAN', keyPattern: { a: 1 }, indexName: 'a_1', isMultiKey: false, multiKeyPaths: { a: [] }, isUnique: false, isSparse: false, isPartial: false, indexVersion: 2, direction: 'forward', indexBounds: { a: [] } }, { stage: 'IXSCAN', keyPattern: { a: 1 }, indexName: 'a_1', isMultiKey: false, multiKeyPaths: { a: [] }, isUnique: false, isSparse: false, isPartial: false, indexVersion: 2, direction: 'forward', indexBounds: { a: [] } }, { stage: 'IXSCAN', keyPattern: { a: 1 }, indexName: 'a_1', isMultiKey: false, multiKeyPaths: { a: [] }, isUnique: false, isSparse: false, isPartial: false, indexVersion: 2, direction: 'forward', indexBounds: { a: [ '[3, 3]' ] } }, { stage: 'IXSCAN', keyPattern: { a: 1 }, indexName: 'a_1', isMultiKey: false, multiKeyPaths: { a: [] }, isUnique: false, isSparse: false, isPartial: false, indexVersion: 2, direction: 'forward', indexBounds: { a: [ '[4, 4]' ] } } ] } } },