Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-81390

HashAggStage fails to respect the collation when spilling to disk

    • Query Execution
    • Fully Compatible
    • ALL
    • v7.1, v7.0, v6.0
    • QE 2023-10-16, QE 2023-10-30
    • 20

      The HashAgg spill algorithm converts each group key to a KeyString but does not provide the conversion operation with a function to normalize the key according to the collator. As a result, keys that would be considered equal in the in-memory hash table are considered distinct in the spilled data.

      This updated jstest exercises the problem. The test fails normally but succeeds if the pipeline is forced to run in the Classic engine.

      diff --git a/jstests/noPassthrough/group_spill_with_collation.js b/jstests/noPassthrough/group_spill_with_collation.js
      index f612e8b59bb..42d56ca005a 100644
      --- a/jstests/noPassthrough/group_spill_with_collation.js
      +++ b/jstests/noPassthrough/group_spill_with_collation.js
      @@ -2,6 +2,7 @@
        * Tests $group execution with increased spilling and a non-simple collation.
        */
      
      +import {assertArrayEq} from "jstests/aggregation/extras/utils.js";
       import {checkSBEEnabled} from "jstests/libs/sbe_util.js";
      
       const conn = MongoRunner.runMongod();
      @@ -24,12 +25,35 @@ for (let i = 0; i < 1000; i++) {
      
       assert.commandWorked(db.adminCommand(
           {setParameter: 1, internalQuerySlotBasedExecutionHashAggForceIncreasedSpilling: true}));
      +
      +// Test that accumulators respect the collation when the group operation spills to disk.
       const caseInsensitive = {
           collation: {locale: "en_US", strength: 2}
       };
      -const results =
      +let results =
           coll.aggregate([{$group: {_id: null, result: {$addToSet: "$x"}}}], caseInsensitive).toArray();
       assert.eq(1, results.length, results);
       assert.eq({_id: null, result: ["a"]}, results[0]);
      
      +// Test that comparisons of the group key respect the collation when the group operation spills to
      +// disk.
      +for (let i = 0; i < 1000; i++) {
      +    if (i % 3 === 0) {
      +        assert.commandWorked(coll.insert({x: 'b'}));
      +    } else {
      +        assert.commandWorked(coll.insert({x: 'B'}));
      +    }
      +}
      +
      +results =
      +    coll.aggregate(
      +            [{$group: {_id: "$x", normalizedX: {$first: {$toLower: "$x"}}, count: {$count: {}}}}],
      +            caseInsensitive)
      +        .toArray();
      +assertArrayEq({
      +    actual: results,
      +    expected: [{normalizedX: "a", count: 1000}, {normalizedX: "b", count: 1000}],
      +    fieldsToSkip: ["_id"]
      +});
      +
       MongoRunner.stopMongod(conn);
      

            Assignee:
            foteini.alvanaki@mongodb.com Foteini Alvanaki
            Reporter:
            justin.seyster@mongodb.com Justin Seyster
            Votes:
            0 Vote for this issue
            Watchers:
            8 Start watching this issue

              Created:
              Updated:
              Resolved: