Details copied from: https://www.mongodb.com/community/forums/t/linq-v3-selectmany-groupby-results-with-redundant-push-within-group/205648
After switching to LinqV3 some reports starts too fail because of Mongo exception:
Command aggregate failed: Exceeded memory limit for $group, but didn’t allow external sort. Pass allowDiskUse:true to opt in
Seems like the reason is the query generated with v3 which contains unnecessary $push within $group phase, below is sample code to reproduce the issue:
using System; using System.Linq; using System.Security.Authentication; using MongoDB.Driver; using MongoDB.Driver.Linq; var settings = MongoClientSettings.FromUrl(new MongoUrl("mongodb://localhost:27017/test")); settings.SslSettings = new SslSettings {EnabledSslProtocols = SslProtocols.Tls12}; settings.LinqProvider = LinqProvider.V3; var mongoClient = new MongoClient(settings); var mongoDatabase = mongoClient.GetDatabase("test"); var collection = mongoDatabase.GetCollection<OrderDao>("test"); var query1 = collection .AsQueryable() .SelectMany(i => i.Lines) .GroupBy(l => l.ItemId) .Select(g => new ItemSummary { Id = g.Key, TotalAmount = g.Sum(l => l.TotalAmount) }); var query1txt = query1.ToString(); Console.WriteLine(query1txt); Console.WriteLine(query1txt.Contains("$push") ? "Uses $push :(" : "No $push here, hurray!"); var query2 = collection .AsQueryable() .GroupBy(l => l.Id) .Select(g => new ItemSummary { Id = g.Key, TotalAmount = g.Sum(l => l.TotalAmount) }); var query2txt = query2.ToString(); Console.WriteLine(query2txt); Console.WriteLine(query2txt.Contains("$push") ? "Uses $push :(" : "No $push here, hurray!"); public class OrderDao { public OrderLineDao[] Lines { get; set; } public decimal TotalAmount { get; set; } public Guid Id { get; set; } } public class OrderLineDao { public decimal TotalAmount { get; set; } public Guid ItemId { get; set; } } public class ItemSummary { public Guid Id { get; set; } public decimal TotalAmount { get; set; } }
Output when executed with LinqV2:
aggregate([{ "$unwind" : "$Lines" }, { "$project" : { "Lines" : "$Lines", "_id" : 0 } }, { "$group" : { "_id" : "$Lines.ItemId", "__agg0" : { "$sum" : "$Lines.TotalAmount" } } }, { "$project" : { "Id" : "$_id", "TotalAmount" : "$__agg0", "_id" : 0 } }]) No $push here, hurray! aggregate([{ "$group" : { "_id" : "$_id", "__agg0" : { "$sum" : "$TotalAmount" } } }, { "$project" : { "Id" : "$_id", "TotalAmount" : "$__agg0", "_id" : 0 } }]) No $push here, hurray!
Output when executed with LinqV3:
test.test.Aggregate([{ "$project" : { "_v" : "$Lines", "_id" : 0 } }, { "$unwind" : "$_v" }, { "$group" : { "_id" : "$_v.ItemId", "_elements" : { "$push" : "$_v" } } }, { "$project" : { "_id" : "$_id", "TotalAmount" : { "$sum" : "$_elements.TotalAmount" } } }]) Uses $push :( test.test.Aggregate([{ "$group" : { "_id" : "$_id", "__agg0" : { "$sum" : "$TotalAmount" } } }, { "$project" : { "_id" : "$_id", "TotalAmount" : "$__agg0" } }]) No $push here, hurray!