The $or query with 2d geo children (noted below) is missing documents from the result set:
> db.foo.insert([{loc:[10,10]},{loc:[15,15]},{loc:[20,20]}]) BulkWriteResult({ "writeErrors" : [ ], "writeConcernErrors" : [ ], "nInserted" : 3, "nUpserted" : 0, "nMatched" : 0, "nModified" : 0, "nRemoved" : 0, "upserted" : [ ] }) > db.foo.ensureIndex({loc:"2d"}) { "createdCollectionAutomatically" : false, "numIndexesBefore" : 1, "numIndexesAfter" : 2, "ok" : 1 } > var circle10 = {loc:{$geoWithin:{$centerSphere:[[10,10],0.2]}}} > var circle20 = {loc:{$geoWithin:{$centerSphere:[[20,20],0.2]}}} > db.foo.find(circle10) { "_id" : ObjectId("53337bc1859e4c4b04f28f55"), "loc" : [ 10, 10 ] } { "_id" : ObjectId("53337bc1859e4c4b04f28f56"), "loc" : [ 15, 15 ] } > db.foo.find(circle20) { "_id" : ObjectId("53337bc1859e4c4b04f28f57"), "loc" : [ 20, 20 ] } { "_id" : ObjectId("53337bc1859e4c4b04f28f56"), "loc" : [ 15, 15 ] } > db.foo.find({$and:[circle10,circle20]}) { "_id" : ObjectId("53337bc1859e4c4b04f28f56"), "loc" : [ 15, 15 ] } > db.foo.find({$or:[circle10,circle20]}) // wrong! should return all 3 documents! { "_id" : ObjectId("53337bc1859e4c4b04f28f56"), "loc" : [ 15, 15 ] } > db.foo.dropIndexes() { "nIndexesWas" : 2, "msg" : "non-_id indexes dropped for collection", "ok" : 1 } > db.foo.find({$or:[circle10,circle20]}) // that's better { "_id" : ObjectId("53337bc1859e4c4b04f28f55"), "loc" : [ 10, 10 ] } { "_id" : ObjectId("53337bc1859e4c4b04f28f56"), "loc" : [ 15, 15 ] } { "_id" : ObjectId("53337bc1859e4c4b04f28f57"), "loc" : [ 20, 20 ] }
The cause seems to be an issue with merging 2d leaves under an OR.
Full explain for the $or query:
> db.foo.find({$or:[circle10,circle20]}).explain(true) { "cursor" : "GeoBrowse-circle", "isMultiKey" : false, "n" : 1, "nscannedObjects" : 2, "nscanned" : 2, "nscannedObjectsAllPlans" : 2, "nscannedAllPlans" : 2, "scanAndOrder" : false, "indexOnly" : false, "nYields" : 0, "nChunkSkips" : 0, "millis" : 1, "indexBounds" : { "loc" : [ [ [ 0, 0 ], [ 22.5, 22.5 ] ], [ [ -5.625, -5.625 ], [ 0, 0 ] ], [ [ -5.625, 0 ], [ 0, 5.625 ] ], [ [ -5.625, 5.625 ], [ 0, 11.25 ] ], [ [ -5.625, 11.25 ], [ 0, 16.875 ] ], [ [ -5.625, 16.875 ], [ 0, 22.5 ] ], [ [ 16.875, -5.625 ], [ 22.5, 0 ] ], [ [ 11.25, -5.625 ], [ 16.875, 0 ] ], [ [ 5.625, -5.625 ], [ 11.25, 0 ] ], [ [ 0, -5.625 ], [ 5.625, 0 ] ] ] }, "allPlans" : [ { "cursor" : "GeoBrowse-circle", "isMultiKey" : false, "n" : 1, "nscannedObjects" : 2, "nscanned" : 2, "scanAndOrder" : false, "indexOnly" : false, "nChunkSkips" : 0, "indexBounds" : { "loc" : [ [ [ 0, 0 ], [ 22.5, 22.5 ] ], [ [ -5.625, -5.625 ], [ 0, 0 ] ], [ [ -5.625, 0 ], [ 0, 5.625 ] ], [ [ -5.625, 5.625 ], [ 0, 11.25 ] ], [ [ -5.625, 11.25 ], [ 0, 16.875 ] ], [ [ -5.625, 16.875 ], [ 0, 22.5 ] ], [ [ 16.875, -5.625 ], [ 22.5, 0 ] ], [ [ 11.25, -5.625 ], [ 16.875, 0 ] ], [ [ 5.625, -5.625 ], [ 11.25, 0 ] ], [ [ 0, -5.625 ], [ 5.625, 0 ] ] ] } } ], "server" : "Rassi-MacBook-Pro.local:27017", "filterSet" : false, "stats" : { "type" : "KEEP_MUTATIONS", "works" : 4, "yields" : 0, "unyields" : 0, "invalidates" : 0, "advanced" : 1, "needTime" : 2, "needFetch" : 0, "isEOF" : 1, "children" : [ { "type" : "FETCH", "works" : 4, "yields" : 0, "unyields" : 0, "invalidates" : 0, "advanced" : 1, "needTime" : 2, "needFetch" : 0, "isEOF" : 1, "alreadyHasObj" : 2, "forcedFetches" : 0, "matchTested" : 1, "children" : [ { "type" : "GEO_2D", "works" : 2, "yields" : 0, "unyields" : 0, "invalidates" : 0, "advanced" : 2, "needTime" : 0, "needFetch" : 0, "isEOF" : 1, "geometryType" : "circle", "field" : "loc", "boundsVerbose" : [ "(0,0) -->> (22.5,22.5)", "(-5.625,-5.625) -->> (0,0)", "(-5.625,0) -->> (0,5.625)", "(-5.625,5.625) -->> (0,11.25)", "(-5.625,11.25) -->> (0,16.875)", "(-5.625,16.875) -->> (0,22.5)", "(16.875,-5.625) -->> (22.5,0)", "(11.25,-5.625) -->> (16.875,0)", "(5.625,-5.625) -->> (11.25,0)", "(0,-5.625) -->> (5.625,0)" ], "children" : [ ] } ] } ] } } >
Verbose query log output for the $or query:
2014-03-26T21:26:03.490-0400 [conn1] [QLOG] Running query: ns=test.foo limit=0 skip=0 Tree: $or GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } } GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } } Sort: {} Proj: {} 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Beginning planning... ============================= Options = INDEX_INTERSECTION KEEP_MUTATIONS Canonical query: ns=test.foo limit=0 skip=0 Tree: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } } Sort: {} Proj: {} ============================= 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Index 0 is kp: { _id: 1 } io: { v: 1, key: { _id: 1 }, name: "_id_", ns: "test.foo" } 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Index 1 is kp: { loc: "2d" } io: { v: 1, key: { loc: "2d" }, name: "loc_2d", ns: "test.foo" } 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Predicate over field 'loc' 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Relevant index 0 is kp: { loc: "2d" } io: { v: 1, key: { loc: "2d" }, name: "loc_2d", ns: "test.foo" } 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Rated tree: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } } || First: 0 notFirst: full path: loc 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Tagging memoID 0 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Enumerator: memo just before moving: [Node #0]: predicate first indices: [0] pred: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } } indexToAssign: 0 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] About to build solntree from tagged tree: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } } || Selected Index #0 pos 0 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Query is not cachable: can't cache '2d' index 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Planner: adding solution: FETCH ---fetched = 1 ---sortedByDiskLoc = 0 ---getSort = [] ---Child: ------KEEP_MUTATIONS ---------filter: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } } ---------fetched = 0 ---------sortedByDiskLoc = 0 ---------getSort = [] ---------Child: ------------GEO_2D ---------------keyPattern = { loc: "2d" } ---------------fetched = 0 ---------------sortedByDiskLoc = 0 ---------------getSort = [] 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Planner: outputted 1 indexed solutions. 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] No cache data for subchild ns=test.foo limit=0 skip=0 Tree: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } } Sort: {} Proj: {} 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Beginning planning... ============================= Options = INDEX_INTERSECTION KEEP_MUTATIONS Canonical query: ns=test.foo limit=0 skip=0 Tree: $or GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } } GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } } Sort: {} Proj: {} ============================= 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Index 0 is kp: { _id: 1 } io: { v: 1, key: { _id: 1 }, name: "_id_", ns: "test.foo" } 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Index 1 is kp: { loc: "2d" } io: { v: 1, key: { loc: "2d" }, name: "loc_2d", ns: "test.foo" } 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Predicate over field 'loc' 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Relevant index 0 is kp: { loc: "2d" } io: { v: 1, key: { loc: "2d" }, name: "loc_2d", ns: "test.foo" } 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Rated tree: $or GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } } || First: 0 notFirst: full path: loc GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } } || First: 0 notFirst: full path: loc 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Tagging memoID 2 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Tagging memoID 0 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Tagging memoID 1 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Enumerator: memo just before moving: [Node #0]: predicate first indices: [0] pred: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } } indexToAssign: 0 [Node #1]: predicate first indices: [0] pred: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } } indexToAssign: 0 [Node #2]: ALL OF: [ 0 1 ] 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] About to build solntree from tagged tree: $or GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } } || Selected Index #0 pos 0 GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } } || Selected Index #0 pos 0 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Query is not cachable: can't cache '2d' index 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Planner: adding solution: KEEP_MUTATIONS ---filter: $or GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } } GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } } ---fetched = 1 ---sortedByDiskLoc = 0 ---getSort = [] ---Child: ------FETCH ---------filter: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } } || Selected Index #0 pos 0 ---------fetched = 1 ---------sortedByDiskLoc = 0 ---------getSort = [] ---------Child: ------------GEO_2D ---------------keyPattern = { loc: "2d" } ---------------fetched = 0 ---------------sortedByDiskLoc = 0 ---------------getSort = [] 2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Planner: outputted 1 indexed solutions. 2014-03-26T21:26:03.492-0400 [conn1] [QLOG] Not caching runner but returning 1 results.
Note that the solution from the verbose query log specifies only the index bounds for the [10,10] circle, and then adds a filter on top for the [20,20] circle. Instead, it should be applying a union of the bounds, and then adding the entire $or filter on top (needed since centerSphere bounds are inexact). I infer that the access planner is incorrectly applying AND merging logic when it should be applying OR merging logic.
Original report:
Summary: Wrong cursor used for geo $or
Description: When there is a 2d index present on a collection and an $or query is performed, the wrong cursor is sometimes selected. The result is that not all the relevant documents are actually returned. This was encountered when trying to $or a $geoWithin $centerSphere, and a $geoWithin $box. The database attempted to use the GeoBrowse-box cursor, giving incorrect results.
Steps To Reproduce: Make a collection with a 2d index and some datapoints, then run:
db.datapoints.find({ "$or": [{ "location": { "$geoWithin": { "$centerSphere": [ [11.77734375, 47.27922900257082], 0.27644030243339723 ] } } }, { "location": { "$geoWithin": { "$box": [ [-90.87890625, 32.627420045483085], [-66.26953125, 45.56450582712792] ] } } }] }).explain()