-
Type: Improvement
-
Resolution: Unresolved
-
Priority: Major - P3
-
None
-
Affects Version/s: 4.4.0
-
Component/s: Aggregation Framework, MapReduce
-
None
-
Query Execution
-
Query Execution 2021-03-08, Query Execution 2021-03-22, Query Execution 2021-04-05, Query Execution 2021-04-19, Query Execution 2021-05-03, Query Execution 2021-05-17
-
(copied to CRM)
Running explain with "executionStats" will execute the chosen plan, but turns any write operations into no-ops. This makes it hard to gauge the impact of those writes on performance. Addition of an explain mode that behaves like "executionStats" but which also performs write operations would allow users to measure and explicitly opt-in to writes.
Previous Description:
With MongoDB 4.4, the verbose option was removed (ignored if specified) which previously included timing details for the map and reduce phases of the command. The alternative provided by the docs is:
You can view timing information by running explain with the mapReduce command in the "executionStats" or "allPlansExecution" verbosity modes
however when using the out option with explain the results won't actually be written to the target collection, which results in explain not providing the type of timing parity the verbose option gave users.
For example:
db.foo.drop(); db.bar.drop(); db.foo.insertMany([ { _id: 1, cust_id: "Ant O. Knee", ord_date: new Date("2020-03-01"), price: 25, items: [ { sku: "oranges", qty: 5, price: 2.5 }, { sku: "apples", qty: 5, price: 2.5 } ], status: "A" }, { _id: 2, cust_id: "Ant O. Knee", ord_date: new Date("2020-03-08"), price: 70, items: [ { sku: "oranges", qty: 8, price: 2.5 }, { sku: "chocolates", qty: 5, price: 10 } ], status: "A" }, { _id: 3, cust_id: "Busby Bee", ord_date: new Date("2020-03-08"), price: 50, items: [ { sku: "oranges", qty: 10, price: 2.5 }, { sku: "pears", qty: 10, price: 2.5 } ], status: "A" }, { _id: 4, cust_id: "Busby Bee", ord_date: new Date("2020-03-18"), price: 25, items: [ { sku: "oranges", qty: 10, price: 2.5 } ], status: "A" }, { _id: 5, cust_id: "Busby Bee", ord_date: new Date("2020-03-19"), price: 50, items: [ { sku: "chocolates", qty: 5, price: 10 } ], status: "A"}, { _id: 6, cust_id: "Cam Elot", ord_date: new Date("2020-03-19"), price: 35, items: [ { sku: "carrots", qty: 10, price: 1.0 }, { sku: "apples", qty: 10, price: 2.5 } ], status: "A" }, { _id: 7, cust_id: "Cam Elot", ord_date: new Date("2020-03-20"), price: 25, items: [ { sku: "oranges", qty: 10, price: 2.5 } ], status: "A" }, { _id: 8, cust_id: "Don Quis", ord_date: new Date("2020-03-20"), price: 75, items: [ { sku: "chocolates", qty: 5, price: 10 }, { sku: "apples", qty: 10, price: 2.5 } ], status: "A" }, { _id: 9, cust_id: "Don Quis", ord_date: new Date("2020-03-20"), price: 55, items: [ { sku: "carrots", qty: 5, price: 1.0 }, { sku: "apples", qty: 10, price: 2.5 }, { sku: "oranges", qty: 10, price: 2.5 } ], status: "A" }, { _id: 10, cust_id: "Don Quis", ord_date: new Date("2020-03-23"), price: 25, items: [ { sku: "oranges", qty: 10, price: 2.5 } ], status: "A" } ]) var mapFunction1 = function() { emit(this.cust_id, this.price); }; var reduceFunction1 = function(keyCustId, valuesPrices) { return Array.sum(valuesPrices); }; db.runCommand( { mapReduce: "foo", map: mapFunction1, reduce: reduceFunction1, verbose: true, out: "bar" } )
The output of the above command in MongoDB 4.2 and 4.4 is as follows:
MongoDB 4.2
{ "result" : "bar", "timeMillis" : 112, "timing" : { "mapTime" : 0, "emitLoop" : 108, "reduceTime" : 1, "mode" : "mixed", "total" : 112 }, "counts" : { "input" : 10, "emit" : 10, "reduce" : 4, "output" : 4 }, "ok" : 1 }
MongoDB 4.4
{ "result" : "bar", "ok" : 1 }
By wrapping the mapReduce command in an explain, planner timing is produced:
db.runCommand( { explain: { mapReduce: "foo", map: mapFunction1, reduce: reduceFunction1, verbose: true, out: "bar", }, verbosity: "executionStats" } ) /* { "stages" : [ { "$cursor" : { "queryPlanner" : { "plannerVersion" : 1, "namespace" : "test.foo", "indexFilterSet" : false, "parsedQuery" : { }, "queryHash" : "8B3D4AB8", "planCacheKey" : "8B3D4AB8", "winningPlan" : { "stage" : "PROJECTION_DEFAULT", "transformBy" : { "emits" : { "$_internalJsEmit" : { "eval" : "function() {\n emit(this.cust_id, this.price);\n}", "this" : "$$ROOT" } }, "_id" : false }, "inputStage" : { "stage" : "COLLSCAN", "direction" : "forward" } }, "rejectedPlans" : [ ] }, "executionStats" : { "executionSuccess" : true, "nReturned" : 10, "executionTimeMillis" : 19, "totalKeysExamined" : 0, "totalDocsExamined" : 10, "executionStages" : { "stage" : "PROJECTION_DEFAULT", "nReturned" : 10, "executionTimeMillisEstimate" : 18, "works" : 12, "advanced" : 10, "needTime" : 1, "needYield" : 0, "saveState" : 2, "restoreState" : 2, "isEOF" : 1, "transformBy" : { "emits" : { "$_internalJsEmit" : { "eval" : "function() {\n emit(this.cust_id, this.price);\n}", "this" : "$$ROOT" } }, "_id" : false }, "inputStage" : { "stage" : "COLLSCAN", "nReturned" : 10, "executionTimeMillisEstimate" : 0, "works" : 12, "advanced" : 10, "needTime" : 1, "needYield" : 0, "saveState" : 2, "restoreState" : 2, "isEOF" : 1, "direction" : "forward", "docsExamined" : 10 } } } }, "nReturned" : NumberLong(10), "executionTimeMillisEstimate" : NumberLong(19) }, { "$unwind" : { "path" : "$emits" }, "nReturned" : NumberLong(10), "executionTimeMillisEstimate" : NumberLong(19) }, { "$group" : { "_id" : "$emits.k", "value" : { "$_internalJsReduce" : { "data" : "$emits", "eval" : "function(keyCustId, valuesPrices) {\n return Array.sum(valuesPrices);\n}" } } }, "nReturned" : NumberLong(4), "executionTimeMillisEstimate" : NumberLong(19) }, { "$out" : { "db" : "test", "coll" : "bar" }, "nReturned" : NumberLong(0), "executionTimeMillisEstimate" : NumberLong(19) } ], "serverInfo" : { "host" : "MEDIA-DESKTOP", "port" : 27017, "version" : "4.4.0", "gitVersion" : "563487e100c4215e2dce98d0af2a6a5a2d67c5cf" }, "ok" : 1 } */
The executionTimeMillisEstimate produced by the query planner does not provide the same map stage, nor reduce stage timing that was present prior to the MapReduce in Aggregation project being completed and introduced in MongoDB 4.3.3.
With SERVER-44434, the legacy mapReduce code was fully removed, but when mr::runMapReduce was replaced by map_reduce_agg::runAggregationMapReduce there was no equivalent option for tracking timing implemented (though SERVER-42685 called for it).
Some of the legacy code is summarized below in case this facilitates reintroduction of this functionality:
bool runMapReduce(OperationContext* opCtx, const string& dbname, const BSONObj& cmd, string& errmsg, BSONObjBuilder& result) { Timer t; ... BSONObjBuilder timingBuilder; ... long long mapTime = 0; long long reduceTime = 0; ... Timer mt; // <-- Map timer ... PlanExecutor::ExecState execState; while (PlanExecutor::ADVANCED == (execState = exec->getNext(&o, nullptr))) { ... if (config.verbose) mt.reset(); ... if (config.verbose) mapTime += mt.micros(); } ... timingBuilder.appendNumber("mapTime", mapTime / 1000); timingBuilder.append("emitLoop", t.millis()); ... Timer rt; <-- Reduce timer // do reduce in memory // this will be the last reduce needed for inline mode state.reduceInMemory(); // if not inline: dump the in memory map to inc collection, all data is on disk state.dumpToInc(); // final reduce state.finalReduce(opCtx, curOp); reduceTime += rt.micros(); ... timingBuilder.appendNumber("reduceTime", reduceTime / 1000); timingBuilder.append("mode", state.jsMode() ? "js" : "mixed"); ... timingBuilder.appendNumber("total", t.millis()); ... if (config.verbose) result.append("timing", timingBuilder.obj()); ... }