Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-13205

Query access planner merges 2d leaves incorrectly inside OR

    • Type: Icon: Bug Bug
    • Resolution: Done
    • Priority: Icon: Major - P3 Major - P3
    • 2.6.0-rc3
    • Affects Version/s: 2.6.0-rc1
    • Component/s: Geo, Querying
    • None
    • ALL

      The $or query with 2d geo children (noted below) is missing documents from the result set:

      > db.foo.insert([{loc:[10,10]},{loc:[15,15]},{loc:[20,20]}])
      BulkWriteResult({
      	"writeErrors" : [ ],
      	"writeConcernErrors" : [ ],
      	"nInserted" : 3,
      	"nUpserted" : 0,
      	"nMatched" : 0,
      	"nModified" : 0,
      	"nRemoved" : 0,
      	"upserted" : [ ]
      })
      > db.foo.ensureIndex({loc:"2d"})
      {
      	"createdCollectionAutomatically" : false,
      	"numIndexesBefore" : 1,
      	"numIndexesAfter" : 2,
      	"ok" : 1
      }
      > var circle10 = {loc:{$geoWithin:{$centerSphere:[[10,10],0.2]}}}
      > var circle20 = {loc:{$geoWithin:{$centerSphere:[[20,20],0.2]}}}
      > db.foo.find(circle10)
      { "_id" : ObjectId("53337bc1859e4c4b04f28f55"), "loc" : [ 10, 10 ] }
      { "_id" : ObjectId("53337bc1859e4c4b04f28f56"), "loc" : [ 15, 15 ] }
      > db.foo.find(circle20)
      { "_id" : ObjectId("53337bc1859e4c4b04f28f57"), "loc" : [ 20, 20 ] }
      { "_id" : ObjectId("53337bc1859e4c4b04f28f56"), "loc" : [ 15, 15 ] }
      > db.foo.find({$and:[circle10,circle20]})
      { "_id" : ObjectId("53337bc1859e4c4b04f28f56"), "loc" : [ 15, 15 ] }
      > db.foo.find({$or:[circle10,circle20]}) // wrong! should return all 3 documents!
      { "_id" : ObjectId("53337bc1859e4c4b04f28f56"), "loc" : [ 15, 15 ] }
      > db.foo.dropIndexes()
      {
      	"nIndexesWas" : 2,
      	"msg" : "non-_id indexes dropped for collection",
      	"ok" : 1
      }
      > db.foo.find({$or:[circle10,circle20]}) // that's better
      { "_id" : ObjectId("53337bc1859e4c4b04f28f55"), "loc" : [ 10, 10 ] }
      { "_id" : ObjectId("53337bc1859e4c4b04f28f56"), "loc" : [ 15, 15 ] }
      { "_id" : ObjectId("53337bc1859e4c4b04f28f57"), "loc" : [ 20, 20 ] }
      

      The cause seems to be an issue with merging 2d leaves under an OR.

      Full explain for the $or query:

      > db.foo.find({$or:[circle10,circle20]}).explain(true)
      {
      	"cursor" : "GeoBrowse-circle",
      	"isMultiKey" : false,
      	"n" : 1,
      	"nscannedObjects" : 2,
      	"nscanned" : 2,
      	"nscannedObjectsAllPlans" : 2,
      	"nscannedAllPlans" : 2,
      	"scanAndOrder" : false,
      	"indexOnly" : false,
      	"nYields" : 0,
      	"nChunkSkips" : 0,
      	"millis" : 1,
      	"indexBounds" : {
      		"loc" : [
      			[
      				[
      					0,
      					0
      				],
      				[
      					22.5,
      					22.5
      				]
      			],
      			[
      				[
      					-5.625,
      					-5.625
      				],
      				[
      					0,
      					0
      				]
      			],
      			[
      				[
      					-5.625,
      					0
      				],
      				[
      					0,
      					5.625
      				]
      			],
      			[
      				[
      					-5.625,
      					5.625
      				],
      				[
      					0,
      					11.25
      				]
      			],
      			[
      				[
      					-5.625,
      					11.25
      				],
      				[
      					0,
      					16.875
      				]
      			],
      			[
      				[
      					-5.625,
      					16.875
      				],
      				[
      					0,
      					22.5
      				]
      			],
      			[
      				[
      					16.875,
      					-5.625
      				],
      				[
      					22.5,
      					0
      				]
      			],
      			[
      				[
      					11.25,
      					-5.625
      				],
      				[
      					16.875,
      					0
      				]
      			],
      			[
      				[
      					5.625,
      					-5.625
      				],
      				[
      					11.25,
      					0
      				]
      			],
      			[
      				[
      					0,
      					-5.625
      				],
      				[
      					5.625,
      					0
      				]
      			]
      		]
      	},
      	"allPlans" : [
      		{
      			"cursor" : "GeoBrowse-circle",
      			"isMultiKey" : false,
      			"n" : 1,
      			"nscannedObjects" : 2,
      			"nscanned" : 2,
      			"scanAndOrder" : false,
      			"indexOnly" : false,
      			"nChunkSkips" : 0,
      			"indexBounds" : {
      				"loc" : [
      					[
      						[
      							0,
      							0
      						],
      						[
      							22.5,
      							22.5
      						]
      					],
      					[
      						[
      							-5.625,
      							-5.625
      						],
      						[
      							0,
      							0
      						]
      					],
      					[
      						[
      							-5.625,
      							0
      						],
      						[
      							0,
      							5.625
      						]
      					],
      					[
      						[
      							-5.625,
      							5.625
      						],
      						[
      							0,
      							11.25
      						]
      					],
      					[
      						[
      							-5.625,
      							11.25
      						],
      						[
      							0,
      							16.875
      						]
      					],
      					[
      						[
      							-5.625,
      							16.875
      						],
      						[
      							0,
      							22.5
      						]
      					],
      					[
      						[
      							16.875,
      							-5.625
      						],
      						[
      							22.5,
      							0
      						]
      					],
      					[
      						[
      							11.25,
      							-5.625
      						],
      						[
      							16.875,
      							0
      						]
      					],
      					[
      						[
      							5.625,
      							-5.625
      						],
      						[
      							11.25,
      							0
      						]
      					],
      					[
      						[
      							0,
      							-5.625
      						],
      						[
      							5.625,
      							0
      						]
      					]
      				]
      			}
      		}
      	],
      	"server" : "Rassi-MacBook-Pro.local:27017",
      	"filterSet" : false,
      	"stats" : {
      		"type" : "KEEP_MUTATIONS",
      		"works" : 4,
      		"yields" : 0,
      		"unyields" : 0,
      		"invalidates" : 0,
      		"advanced" : 1,
      		"needTime" : 2,
      		"needFetch" : 0,
      		"isEOF" : 1,
      		"children" : [
      			{
      				"type" : "FETCH",
      				"works" : 4,
      				"yields" : 0,
      				"unyields" : 0,
      				"invalidates" : 0,
      				"advanced" : 1,
      				"needTime" : 2,
      				"needFetch" : 0,
      				"isEOF" : 1,
      				"alreadyHasObj" : 2,
      				"forcedFetches" : 0,
      				"matchTested" : 1,
      				"children" : [
      					{
      						"type" : "GEO_2D",
      						"works" : 2,
      						"yields" : 0,
      						"unyields" : 0,
      						"invalidates" : 0,
      						"advanced" : 2,
      						"needTime" : 0,
      						"needFetch" : 0,
      						"isEOF" : 1,
      						"geometryType" : "circle",
      						"field" : "loc",
      						"boundsVerbose" : [
      							"(0,0) -->> (22.5,22.5)",
      							"(-5.625,-5.625) -->> (0,0)",
      							"(-5.625,0) -->> (0,5.625)",
      							"(-5.625,5.625) -->> (0,11.25)",
      							"(-5.625,11.25) -->> (0,16.875)",
      							"(-5.625,16.875) -->> (0,22.5)",
      							"(16.875,-5.625) -->> (22.5,0)",
      							"(11.25,-5.625) -->> (16.875,0)",
      							"(5.625,-5.625) -->> (11.25,0)",
      							"(0,-5.625) -->> (5.625,0)"
      						],
      						"children" : [ ]
      					}
      				]
      			}
      		]
      	}
      }
      >
      

      Verbose query log output for the $or query:

      2014-03-26T21:26:03.490-0400 [conn1] [QLOG] Running query:
      ns=test.foo limit=0 skip=0
      Tree: $or
          GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
          GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } }
      Sort: {}
      Proj: {}
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Beginning planning...
      =============================
      Options = INDEX_INTERSECTION KEEP_MUTATIONS
      Canonical query:
      ns=test.foo limit=0 skip=0
      Tree: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
      Sort: {}
      Proj: {}
      =============================
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Index 0 is kp: { _id: 1 } io: { v: 1, key: { _id: 1 }, name: "_id_", ns: "test.foo" }
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Index 1 is kp: { loc: "2d" } io: { v: 1, key: { loc: "2d" }, name: "loc_2d", ns: "test.foo" }
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Predicate over field 'loc'
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Relevant index 0 is kp: { loc: "2d" } io: { v: 1, key: { loc: "2d" }, name: "loc_2d", ns: "test.foo" }
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Rated tree:
      GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }  || First: 0 notFirst: full path: loc
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Tagging memoID 0
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Enumerator: memo just before moving:
      [Node #0]: predicate
      	first indices: [0]
      	pred: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
      	indexToAssign: 0
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] About to build solntree from tagged tree:
      GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }  || Selected Index #0 pos 0
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Query is not cachable: can't cache '2d' index
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Planner: adding solution:
      FETCH
      ---fetched = 1
      ---sortedByDiskLoc = 0
      ---getSort = []
      ---Child:
      ------KEEP_MUTATIONS
      ---------filter:
                      GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
      ---------fetched = 0
      ---------sortedByDiskLoc = 0
      ---------getSort = []
      ---------Child:
      ------------GEO_2D
      ---------------keyPattern = { loc: "2d" }
      ---------------fetched = 0
      ---------------sortedByDiskLoc = 0
      ---------------getSort = []
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Planner: outputted 1 indexed solutions.
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] No cache data for subchild ns=test.foo limit=0 skip=0
      Tree: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
      Sort: {}
      Proj: {}
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Beginning planning...
      =============================
      Options = INDEX_INTERSECTION KEEP_MUTATIONS
      Canonical query:
      ns=test.foo limit=0 skip=0
      Tree: $or
          GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
          GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } }
      Sort: {}
      Proj: {}
      =============================
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Index 0 is kp: { _id: 1 } io: { v: 1, key: { _id: 1 }, name: "_id_", ns: "test.foo" }
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Index 1 is kp: { loc: "2d" } io: { v: 1, key: { loc: "2d" }, name: "loc_2d", ns: "test.foo" }
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Predicate over field 'loc'
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Relevant index 0 is kp: { loc: "2d" } io: { v: 1, key: { loc: "2d" }, name: "loc_2d", ns: "test.foo" }
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Rated tree:
      $or
          GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }  || First: 0 notFirst: full path: loc
          GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } }  || First: 0 notFirst: full path: loc
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Tagging memoID 2
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Tagging memoID 0
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Tagging memoID 1
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Enumerator: memo just before moving:
      [Node #0]: predicate
      	first indices: [0]
      	pred: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
      	indexToAssign: 0
      [Node #1]: predicate
      	first indices: [0]
      	pred: GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } }
      	indexToAssign: 0
      [Node #2]: ALL OF: [ 0 1 ]
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] About to build solntree from tagged tree:
      $or
          GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }  || Selected Index #0 pos 0
          GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } }  || Selected Index #0 pos 0
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Query is not cachable: can't cache '2d' index
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Planner: adding solution:
      KEEP_MUTATIONS
      ---filter:
              $or
                  GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 10.0, 10.0 ], 0.2 ] } } }
                  GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } }
      ---fetched = 1
      ---sortedByDiskLoc = 0
      ---getSort = []
      ---Child:
      ------FETCH
      ---------filter:
                      GEO raw = { loc: { $geoWithin: { $centerSphere: [ [ 20.0, 20.0 ], 0.2 ] } } }  || Selected Index #0 pos 0
      ---------fetched = 1
      ---------sortedByDiskLoc = 0
      ---------getSort = []
      ---------Child:
      ------------GEO_2D
      ---------------keyPattern = { loc: "2d" }
      ---------------fetched = 0
      ---------------sortedByDiskLoc = 0
      ---------------getSort = []
      2014-03-26T21:26:03.491-0400 [conn1] [QLOG] Planner: outputted 1 indexed solutions.
      2014-03-26T21:26:03.492-0400 [conn1] [QLOG] Not caching runner but returning 1 results.
      

      Note that the solution from the verbose query log specifies only the index bounds for the [10,10] circle, and then adds a filter on top for the [20,20] circle. Instead, it should be applying a union of the bounds, and then adding the entire $or filter on top (needed since centerSphere bounds are inexact). I infer that the access planner is incorrectly applying AND merging logic when it should be applying OR merging logic.

      Original report:

      Summary: Wrong cursor used for geo $or

      Description: When there is a 2d index present on a collection and an $or query is performed, the wrong cursor is sometimes selected. The result is that not all the relevant documents are actually returned. This was encountered when trying to $or a $geoWithin $centerSphere, and a $geoWithin $box. The database attempted to use the GeoBrowse-box cursor, giving incorrect results.

      Steps To Reproduce: Make a collection with a 2d index and some datapoints, then run:

      db.datapoints.find({
          "$or": [{
              "location": {
                  "$geoWithin": {
                      "$centerSphere": [
                          [11.77734375, 47.27922900257082], 0.27644030243339723
                      ]
                  }
              }
          }, {
              "location": {
                  "$geoWithin": {
                      "$box": [
                          [-90.87890625, 32.627420045483085],
                          [-66.26953125, 45.56450582712792]
                      ]
                  }
              }
          }]
      }).explain()
      

            Assignee:
            david.storch@mongodb.com David Storch
            Reporter:
            kristian.picon Kristian Picon
            Votes:
            0 Vote for this issue
            Watchers:
            5 Start watching this issue

              Created:
              Updated:
              Resolved: