Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-55200

DISTINCT_SCAN not used for $sort+$match+$group+$first on sharded collection

    • Type: Icon: Improvement Improvement
    • Resolution: Unresolved
    • Priority: Icon: Minor - P4 Minor - P4
    • None
    • Affects Version/s: 4.4.3
    • None
    • Query Execution

      When performing aggregation with $sort+$match+$group+$first pipeline, some unexpected plan is being chosen. I would expect to see DISTINCT_SCAN and small totalDocsExamined amount in explain, but having SORT_KEY_GENERATOR and large totalDocsExamined amount.

      Given:

       

      mongos> sh.status()
       — Sharding Status —
       ...
       shards:
       { "_id" : "rs0", "host" : "rs0/127.0.0.1:28100", "state" : 1 }
       { "_id" : "rs1", "host" : "rs1/127.0.0.1:28101", "state" : 1 }
      mongos> sh.shardCollection("test.addresses", { zipcode: "hashed" })
      
      mongos> db.addresses.createIndex({zipcode: 1, house: 1})
      
      mongos> db.addresses.insert([
       {zipcode: "111", house: 1}, 
       {zipcode: "111", house: 2},
       {zipcode: "111", house: 3}])
      mongos> db.addresses.insert([
       {zipcode: "444", house: 1}, 
       {zipcode: "444", house: 2},
       {zipcode: "444", house: 3}])
      mongos> db.addresses.getShardDistribution()
      Shard rs0 at rs0/127.0.0.1:28100
       data : 162B docs : 3 chunks : 2
      Shard rs1 at rs1/127.0.0.1:28101
       data : 162B docs : 3 chunks : 2
      

      So we have 3 documents on each shard with 2 distinct shard keys. Given that we could perform such aggregation:

      mongos> db.addresses.explain("executionStats").aggregate([{$sort: {zipcode: 1, house: 1}}, {$match: {zipcode: {$in: ["111", "444"]}}}, {$group: {_id: "$zipcode", first: {$first: "$$ROOT"}}}])
      {
      	"serverInfo" : {
      		"host" : "5ae1a7bb5de9",
      		"port" : 27017,
      		"version" : "4.4.3",
      		"gitVersion" : "913d6b62acfbb344dde1b116f4161360acd8fd13"
      	},
      	"mergeType" : "mongos",
      	"splitPipeline" : {
      		"shardsPart" : [
      			{
      				"$match" : {
      					"zipcode" : {
      						"$in" : [
      							"111",
      							"444"
      						]
      					}
      				}
      			},
      			{
      				"$sort" : {
      					"sortKey" : {
      						"zipcode" : 1,
      						"house" : 1
      					}
      				}
      			}
      		],
      		"mergerPart" : [
      			{
      				"$group" : {
      					"_id" : "$zipcode",
      					"first" : {
      						"$first" : "$$ROOT"
      					}
      				}
      			}
      		]
      	},
      	"shards" : {
      		"rs1" : {
      			"host" : "127.0.0.1:28101",
      			"queryPlanner" : {
      				"plannerVersion" : 1,
      				"namespace" : "test.addresses",
      				"indexFilterSet" : false,
      				"parsedQuery" : {
      					"zipcode" : {
      						"$in" : [
      							"111",
      							"444"
      						]
      					}
      				},
      				"optimizedPipeline" : true,
      				"winningPlan" : {
      					"stage" : "FETCH",
      					"inputStage" : {
      						"stage" : "SORT_KEY_GENERATOR",
      						"inputStage" : {
      							"stage" : "SHARDING_FILTER",
      							"inputStage" : {
      								"stage" : "IXSCAN",
      								"keyPattern" : {
      									"zipcode" : 1,
      									"house" : 1
      								},
      								"indexName" : "zipcode_1_house_1",
      								"isMultiKey" : false,
      								"multiKeyPaths" : {
      									"zipcode" : [ ],
      									"house" : [ ]
      								},
      								"isUnique" : false,
      								"isSparse" : false,
      								"isPartial" : false,
      								"indexVersion" : 2,
      								"direction" : "forward",
      								"indexBounds" : {
      									"zipcode" : [
      										"[\"111\", \"111\"]",
      										"[\"444\", \"444\"]"
      									],
      									"house" : [
      										"[MinKey, MaxKey]"
      									]
      								}
      							}
      						}
      					}
      				},
      				"rejectedPlans" : [
      					{
      						"stage" : "SORT",
      						"sortPattern" : {
      							"zipcode" : 1,
      							"house" : 1
      						},
      						"memLimit" : 104857600,
      						"type" : "simple",
      						"inputStage" : {
      							"stage" : "SHARDING_FILTER",
      							"inputStage" : {
      								"stage" : "FETCH",
      								"filter" : {
      									"zipcode" : {
      										"$in" : [
      											"111",
      											"444"
      										]
      									}
      								},
      								"inputStage" : {
      									"stage" : "IXSCAN",
      									"keyPattern" : {
      										"zipcode" : "hashed"
      									},
      									"indexName" : "zipcode_hashed",
      									"isMultiKey" : false,
      									"isUnique" : false,
      									"isSparse" : false,
      									"isPartial" : false,
      									"indexVersion" : 2,
      									"direction" : "forward",
      									"indexBounds" : {
      										"zipcode" : [
      											"[-1348867638842708814, -1348867638842708814]",
      											"[4824476026935005375, 4824476026935005375]"
      										]
      									}
      								}
      							}
      						}
      					}
      				]
      			},
      			"executionStats" : {
      				"executionSuccess" : true,
      				"nReturned" : 3,
      				"executionTimeMillis" : 0,
      				"totalKeysExamined" : 3,
      				"totalDocsExamined" : 3,
      				"executionStages" : {
      					"stage" : "FETCH",
      					"nReturned" : 3,
      					"executionTimeMillisEstimate" : 0,
      					"works" : 5,
      					"advanced" : 3,
      					"needTime" : 0,
      					"needYield" : 0,
      					"saveState" : 0,
      					"restoreState" : 0,
      					"isEOF" : 1,
      					"docsExamined" : 3,
      					"alreadyHasObj" : 0,
      					"inputStage" : {
      						"stage" : "SORT_KEY_GENERATOR",
      						"nReturned" : 3,
      						"executionTimeMillisEstimate" : 0,
      						"works" : 4,
      						"advanced" : 3,
      						"needTime" : 0,
      						"needYield" : 0,
      						"saveState" : 0,
      						"restoreState" : 0,
      						"isEOF" : 1,
      						"inputStage" : {
      							"stage" : "SHARDING_FILTER",
      							"nReturned" : 3,
      							"executionTimeMillisEstimate" : 0,
      							"works" : 4,
      							"advanced" : 3,
      							"needTime" : 0,
      							"needYield" : 0,
      							"saveState" : 0,
      							"restoreState" : 0,
      							"isEOF" : 1,
      							"chunkSkips" : 0,
      							"inputStage" : {
      								"stage" : "IXSCAN",
      								"nReturned" : 3,
      								"executionTimeMillisEstimate" : 0,
      								"works" : 4,
      								"advanced" : 3,
      								"needTime" : 0,
      								"needYield" : 0,
      								"saveState" : 0,
      								"restoreState" : 0,
      								"isEOF" : 1,
      								"keyPattern" : {
      									"zipcode" : 1,
      									"house" : 1
      								},
      								"indexName" : "zipcode_1_house_1",
      								"isMultiKey" : false,
      								"multiKeyPaths" : {
      									"zipcode" : [ ],
      									"house" : [ ]
      								},
      								"isUnique" : false,
      								"isSparse" : false,
      								"isPartial" : false,
      								"indexVersion" : 2,
      								"direction" : "forward",
      								"indexBounds" : {
      									"zipcode" : [
      										"[\"111\", \"111\"]",
      										"[\"444\", \"444\"]"
      									],
      									"house" : [
      										"[MinKey, MaxKey]"
      									]
      								},
      								"keysExamined" : 3,
      								"seeks" : 1,
      								"dupsTested" : 0,
      								"dupsDropped" : 0
      							}
      						}
      					}
      				}
      			}
      		},
      		"rs0" : {
      			"host" : "127.0.0.1:28100",
      			"queryPlanner" : {
      				"plannerVersion" : 1,
      				"namespace" : "test.addresses",
      				"indexFilterSet" : false,
      				"parsedQuery" : {
      					"zipcode" : {
      						"$in" : [
      							"111",
      							"444"
      						]
      					}
      				},
      				"optimizedPipeline" : true,
      				"winningPlan" : {
      					"stage" : "FETCH",
      					"inputStage" : {
      						"stage" : "SORT_KEY_GENERATOR",
      						"inputStage" : {
      							"stage" : "SHARDING_FILTER",
      							"inputStage" : {
      								"stage" : "IXSCAN",
      								"keyPattern" : {
      									"zipcode" : 1,
      									"house" : 1
      								},
      								"indexName" : "zipcode_1_house_1",
      								"isMultiKey" : false,
      								"multiKeyPaths" : {
      									"zipcode" : [ ],
      									"house" : [ ]
      								},
      								"isUnique" : false,
      								"isSparse" : false,
      								"isPartial" : false,
      								"indexVersion" : 2,
      								"direction" : "forward",
      								"indexBounds" : {
      									"zipcode" : [
      										"[\"111\", \"111\"]",
      										"[\"444\", \"444\"]"
      									],
      									"house" : [
      										"[MinKey, MaxKey]"
      									]
      								}
      							}
      						}
      					}
      				},
      				"rejectedPlans" : [
      					{
      						"stage" : "SORT",
      						"sortPattern" : {
      							"zipcode" : 1,
      							"house" : 1
      						},
      						"memLimit" : 104857600,
      						"type" : "simple",
      						"inputStage" : {
      							"stage" : "SHARDING_FILTER",
      							"inputStage" : {
      								"stage" : "FETCH",
      								"filter" : {
      									"zipcode" : {
      										"$in" : [
      											"111",
      											"444"
      										]
      									}
      								},
      								"inputStage" : {
      									"stage" : "IXSCAN",
      									"keyPattern" : {
      										"zipcode" : "hashed"
      									},
      									"indexName" : "zipcode_hashed",
      									"isMultiKey" : false,
      									"isUnique" : false,
      									"isSparse" : false,
      									"isPartial" : false,
      									"indexVersion" : 2,
      									"direction" : "forward",
      									"indexBounds" : {
      										"zipcode" : [
      											"[-1348867638842708814, -1348867638842708814]",
      											"[4824476026935005375, 4824476026935005375]"
      										]
      									}
      								}
      							}
      						}
      					}
      				]
      			},
      			"executionStats" : {
      				"executionSuccess" : true,
      				"nReturned" : 3,
      				"executionTimeMillis" : 0,
      				"totalKeysExamined" : 3,
      				"totalDocsExamined" : 3,
      				"executionStages" : {
      					"stage" : "FETCH",
      					"nReturned" : 3,
      					"executionTimeMillisEstimate" : 0,
      					"works" : 5,
      					"advanced" : 3,
      					"needTime" : 0,
      					"needYield" : 0,
      					"saveState" : 0,
      					"restoreState" : 0,
      					"isEOF" : 1,
      					"docsExamined" : 3,
      					"alreadyHasObj" : 0,
      					"inputStage" : {
      						"stage" : "SORT_KEY_GENERATOR",
      						"nReturned" : 3,
      						"executionTimeMillisEstimate" : 0,
      						"works" : 4,
      						"advanced" : 3,
      						"needTime" : 0,
      						"needYield" : 0,
      						"saveState" : 0,
      						"restoreState" : 0,
      						"isEOF" : 1,
      						"inputStage" : {
      							"stage" : "SHARDING_FILTER",
      							"nReturned" : 3,
      							"executionTimeMillisEstimate" : 0,
      							"works" : 4,
      							"advanced" : 3,
      							"needTime" : 0,
      							"needYield" : 0,
      							"saveState" : 0,
      							"restoreState" : 0,
      							"isEOF" : 1,
      							"chunkSkips" : 0,
      							"inputStage" : {
      								"stage" : "IXSCAN",
      								"nReturned" : 3,
      								"executionTimeMillisEstimate" : 0,
      								"works" : 4,
      								"advanced" : 3,
      								"needTime" : 0,
      								"needYield" : 0,
      								"saveState" : 0,
      								"restoreState" : 0,
      								"isEOF" : 1,
      								"keyPattern" : {
      									"zipcode" : 1,
      									"house" : 1
      								},
      								"indexName" : "zipcode_1_house_1",
      								"isMultiKey" : false,
      								"multiKeyPaths" : {
      									"zipcode" : [ ],
      									"house" : [ ]
      								},
      								"isUnique" : false,
      								"isSparse" : false,
      								"isPartial" : false,
      								"indexVersion" : 2,
      								"direction" : "forward",
      								"indexBounds" : {
      									"zipcode" : [
      										"[\"111\", \"111\"]",
      										"[\"444\", \"444\"]"
      									],
      									"house" : [
      										"[MinKey, MaxKey]"
      									]
      								},
      								"keysExamined" : 3,
      								"seeks" : 1,
      								"dupsTested" : 0,
      								"dupsDropped" : 0
      							}
      						}
      					}
      				}
      			}
      		}
      	},
      	"ok" : 1,
      	"operationTime" : Timestamp(1615826160, 1),
      	"$clusterTime" : {
      		"clusterTime" : Timestamp(1615826160, 1),
      		"signature" : {
      			"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
      			"keyId" : NumberLong(0)
      		}
      	}
      }
      

      As we can see in totalDocsExamined all 3 documents is being fetched on each shard, and no DISTINCT_SCAN stage.

      But if we remove $sort stage, we can see expected behavior:

       

      mongos> db.addresses.explain("executionStats").aggregate([{$match: {zipcode: {$in: ["111", "444"]}}}, {$group: {_id: "$zipcode", first: {$first: "$$ROOT"}}}])
      {
      	"serverInfo" : {
      		"host" : "5ae1a7bb5de9",
      		"port" : 27017,
      		"version" : "4.4.3",
      		"gitVersion" : "913d6b62acfbb344dde1b116f4161360acd8fd13"
      	},
      	"mergeType" : "mongos",
      	"splitPipeline" : {
      		"shardsPart" : [
      			{
      				"$match" : {
      					"zipcode" : {
      						"$in" : [
      							"111",
      							"444"
      						]
      					}
      				}
      			},
      			{
      				"$group" : {
      					"_id" : "$zipcode",
      					"first" : {
      						"$first" : "$$ROOT"
      					}
      				}
      			}
      		],
      		"mergerPart" : [
      			{
      				"$group" : {
      					"_id" : "$$ROOT._id",
      					"first" : {
      						"$first" : "$$ROOT.first"
      					},
      					"$doingMerge" : true
      				}
      			}
      		]
      	},
      	"shards" : {
      		"rs0" : {
      			"host" : "127.0.0.1:28100",
      			"stages" : [
      				{
      					"$cursor" : {
      						"queryPlanner" : {
      							"plannerVersion" : 1,
      							"namespace" : "test.addresses",
      							"indexFilterSet" : false,
      							"parsedQuery" : {
      								"zipcode" : {
      									"$in" : [
      										"111",
      										"444"
      									]
      								}
      							},
      							"queryHash" : "A9C86C49",
      							"planCacheKey" : "E7443539",
      							"winningPlan" : {
      								"stage" : "FETCH",
      								"inputStage" : {
      									"stage" : "DISTINCT_SCAN",
      									"keyPattern" : {
      										"zipcode" : 1,
      										"house" : 1
      									},
      									"indexName" : "zipcode_1_house_1",
      									"isMultiKey" : false,
      									"multiKeyPaths" : {
      										"zipcode" : [ ],
      										"house" : [ ]
      									},
      									"isUnique" : false,
      									"isSparse" : false,
      									"isPartial" : false,
      									"indexVersion" : 2,
      									"direction" : "forward",
      									"indexBounds" : {
      										"zipcode" : [
      											"[\"111\", \"111\"]",
      											"[\"444\", \"444\"]"
      										],
      										"house" : [
      											"[MinKey, MaxKey]"
      										]
      									}
      								}
      							},
      							"rejectedPlans" : [ ]
      						},
      						"executionStats" : {
      							"executionSuccess" : true,
      							"nReturned" : 1,
      							"executionTimeMillis" : 0,
      							"totalKeysExamined" : 1,
      							"totalDocsExamined" : 1,
      							"executionStages" : {
      								"stage" : "FETCH",
      								"nReturned" : 1,
      								"executionTimeMillisEstimate" : 0,
      								"works" : 2,
      								"advanced" : 1,
      								"needTime" : 0,
      								"needYield" : 0,
      								"saveState" : 1,
      								"restoreState" : 1,
      								"isEOF" : 1,
      								"docsExamined" : 1,
      								"alreadyHasObj" : 0,
      								"inputStage" : {
      									"stage" : "DISTINCT_SCAN",
      									"nReturned" : 1,
      									"executionTimeMillisEstimate" : 0,
      									"works" : 2,
      									"advanced" : 1,
      									"needTime" : 0,
      									"needYield" : 0,
      									"saveState" : 1,
      									"restoreState" : 1,
      									"isEOF" : 1,
      									"keyPattern" : {
      										"zipcode" : 1,
      										"house" : 1
      									},
      									"indexName" : "zipcode_1_house_1",
      									"isMultiKey" : false,
      									"multiKeyPaths" : {
      										"zipcode" : [ ],
      										"house" : [ ]
      									},
      									"isUnique" : false,
      									"isSparse" : false,
      									"isPartial" : false,
      									"indexVersion" : 2,
      									"direction" : "forward",
      									"indexBounds" : {
      										"zipcode" : [
      											"[\"111\", \"111\"]",
      											"[\"444\", \"444\"]"
      										],
      										"house" : [
      											"[MinKey, MaxKey]"
      										]
      									},
      									"keysExamined" : 1
      								}
      							}
      						}
      					},
      					"nReturned" : NumberLong(1),
      					"executionTimeMillisEstimate" : NumberLong(0)
      				},
      				{
      					"$groupByDistinctScan" : {
      						"newRoot" : {
      							"_id" : "$zipcode",
      							"first" : "$$ROOT"
      						}
      					},
      					"nReturned" : NumberLong(1),
      					"executionTimeMillisEstimate" : NumberLong(0)
      				}
      			]
      		},
      		"rs1" : {
      			"host" : "127.0.0.1:28101",
      			"stages" : [
      				{
      					"$cursor" : {
      						"queryPlanner" : {
      							"plannerVersion" : 1,
      							"namespace" : "test.addresses",
      							"indexFilterSet" : false,
      							"parsedQuery" : {
      								"zipcode" : {
      									"$in" : [
      										"111",
      										"444"
      									]
      								}
      							},
      							"queryHash" : "A9C86C49",
      							"planCacheKey" : "E7443539",
      							"winningPlan" : {
      								"stage" : "FETCH",
      								"inputStage" : {
      									"stage" : "DISTINCT_SCAN",
      									"keyPattern" : {
      										"zipcode" : 1,
      										"house" : 1
      									},
      									"indexName" : "zipcode_1_house_1",
      									"isMultiKey" : false,
      									"multiKeyPaths" : {
      										"zipcode" : [ ],
      										"house" : [ ]
      									},
      									"isUnique" : false,
      									"isSparse" : false,
      									"isPartial" : false,
      									"indexVersion" : 2,
      									"direction" : "forward",
      									"indexBounds" : {
      										"zipcode" : [
      											"[\"111\", \"111\"]",
      											"[\"444\", \"444\"]"
      										],
      										"house" : [
      											"[MinKey, MaxKey]"
      										]
      									}
      								}
      							},
      							"rejectedPlans" : [ ]
      						},
      						"executionStats" : {
      							"executionSuccess" : true,
      							"nReturned" : 1,
      							"executionTimeMillis" : 0,
      							"totalKeysExamined" : 1,
      							"totalDocsExamined" : 1,
      							"executionStages" : {
      								"stage" : "FETCH",
      								"nReturned" : 1,
      								"executionTimeMillisEstimate" : 0,
      								"works" : 2,
      								"advanced" : 1,
      								"needTime" : 0,
      								"needYield" : 0,
      								"saveState" : 1,
      								"restoreState" : 1,
      								"isEOF" : 1,
      								"docsExamined" : 1,
      								"alreadyHasObj" : 0,
      								"inputStage" : {
      									"stage" : "DISTINCT_SCAN",
      									"nReturned" : 1,
      									"executionTimeMillisEstimate" : 0,
      									"works" : 2,
      									"advanced" : 1,
      									"needTime" : 0,
      									"needYield" : 0,
      									"saveState" : 1,
      									"restoreState" : 1,
      									"isEOF" : 1,
      									"keyPattern" : {
      										"zipcode" : 1,
      										"house" : 1
      									},
      									"indexName" : "zipcode_1_house_1",
      									"isMultiKey" : false,
      									"multiKeyPaths" : {
      										"zipcode" : [ ],
      										"house" : [ ]
      									},
      									"isUnique" : false,
      									"isSparse" : false,
      									"isPartial" : false,
      									"indexVersion" : 2,
      									"direction" : "forward",
      									"indexBounds" : {
      										"zipcode" : [
      											"[\"111\", \"111\"]",
      											"[\"444\", \"444\"]"
      										],
      										"house" : [
      											"[MinKey, MaxKey]"
      										]
      									},
      									"keysExamined" : 1
      								}
      							}
      						}
      					},
      					"nReturned" : NumberLong(1),
      					"executionTimeMillisEstimate" : NumberLong(0)
      				},
      				{
      					"$groupByDistinctScan" : {
      						"newRoot" : {
      							"_id" : "$zipcode",
      							"first" : "$$ROOT"
      						}
      					},
      					"nReturned" : NumberLong(1),
      					"executionTimeMillisEstimate" : NumberLong(0)
      				}
      			]
      		}
      	},
      	"ok" : 1,
      	"operationTime" : Timestamp(1615826288, 1),
      	"$clusterTime" : {
      		"clusterTime" : Timestamp(1615826288, 1),
      		"signature" : {
      			"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
      			"keyId" : NumberLong(0)
      		}
      	}
      }
      

      Without sorting this aggregation works well, but I need some defined order here.

       

            Assignee:
            backlog-query-execution [DO NOT USE] Backlog - Query Execution
            Reporter:
            glowfall Pavel Myasnov
            Votes:
            0 Vote for this issue
            Watchers:
            9 Start watching this issue

              Created:
              Updated: