Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-92964

Ensure collation is followed when using hash index in SBE IndexedLoopJoin

    • Type: Icon: Bug Bug
    • Resolution: Fixed
    • Priority: Icon: Major - P3 Major - P3
    • 8.1.0-rc0, 8.0.5
    • Affects Version/s: 6.0.16, 7.0.12, 8.0.0-rc15
    • Component/s: None
    • None
    • Query Execution
    • Fully Compatible
    • ALL
    • v8.0, v7.0, v6.0
    • Hide

      resmoke's script to reproduce the issue

      import {configureFailPoint} from "jstests/libs/fail_point_util.js";
      
      // Uncomment the line below if you like to test the Classic engine behaviour:
      // db.adminCommand({setParameter: 1, internalQueryFrameworkControl: "forceClassicEngine"});
      
      const collectionName = 'test_bf_34317';
      const coll = db[collectionName];
      coll.drop();
      
      const docs = [
          {
              _id: 108,
              "str": "foreground deliverables",
          },
      ];
      
      const options = {
          collation: {
              locale: 'ru',
              strength: 1,
          },
      };
      
      coll.insertMany(docs);
      coll.createIndex({"str": "hashed"}, options);
      
      // $limit stage does not allow non-optimized query use the index, so we can see the different results when the index is used and not used.
      const pipeline = [
          {$limit: 20},
          {$lookup: {from: collectionName, localField: "str", foreignField: "str", as: "array"}}
      ];
      
      function run(coll, pipeline, options) {
          const explain = coll.explain(true).aggregate(pipeline, options);
          jsTest.log(tojson(explain));
      
          const result = coll.aggregate(pipeline, options).toArray();
          jsTest.log(tojson(result));
      }
      
      jsTest.log('Run with enabled optimization');
      
      run(coll, pipeline, options);
      
      jsTest.log('Run with disabled optimization');
      
      configureFailPoint(db, 'disableMatchExpressionOptimization');
      configureFailPoint(db, 'disablePipelineOptimization');
      
      run(coll, pipeline, options);
      

      The result of the plan that uses a collated hash index:

      [
          {
              "_id": 108,
              "str": "foreground deliverables",
              "array": []
          }
      ]
      

      The result of the query that uses collscan:

      [
          {
              "_id": 108,
              "str": "foreground deliverables",
              "array": [
                  {
                      "_id": 108,
                      "str": "foreground deliverables"
                  }
              ]
          }
      ]
      
      Show
      resmoke's script to reproduce the issue import {configureFailPoint} from "jstests/libs/fail_point_util.js" ; // Uncomment the line below if you like to test the Classic engine behaviour: // db.adminCommand({setParameter: 1, internalQueryFrameworkControl: "forceClassicEngine" }); const collectionName = 'test_bf_34317' ; const coll = db[collectionName]; coll.drop(); const docs = [ { _id: 108, "str" : "foreground deliverables" , }, ]; const options = { collation: { locale: 'ru' , strength: 1, }, }; coll.insertMany(docs); coll.createIndex({ "str" : "hashed" }, options); // $limit stage does not allow non-optimized query use the index, so we can see the different results when the index is used and not used. const pipeline = [ {$limit: 20}, {$lookup: {from: collectionName, localField: "str" , foreignField: "str" , as: "array" }} ]; function run(coll, pipeline, options) { const explain = coll.explain( true ).aggregate(pipeline, options); jsTest.log(tojson(explain)); const result = coll.aggregate(pipeline, options).toArray(); jsTest.log(tojson(result)); } jsTest.log( 'Run with enabled optimization' ); run(coll, pipeline, options); jsTest.log( 'Run with disabled optimization' ); configureFailPoint(db, 'disableMatchExpressionOptimization' ); configureFailPoint(db, 'disablePipelineOptimization' ); run(coll, pipeline, options); The result of the plan that uses a collated hash index: [ { "_id" : 108, "str" : "foreground deliverables" , "array" : [] } ] The result of the query that uses collscan: [ { "_id" : 108, "str" : "foreground deliverables" , "array" : [ { "_id" : 108, "str" : "foreground deliverables" } ] } ]
    • QE 2024-08-05, QE 2024-08-19
    • 0

      SBE fails to find matching documents in the foreign collection when uses a collated hash index. Please see the Steps To Reproduce section for details.

            Assignee:
            adi.agrawal@mongodb.com Adi Agrawal
            Reporter:
            alexander.ignatyev@mongodb.com Alexander Ignatyev
            Votes:
            0 Vote for this issue
            Watchers:
            16 Start watching this issue

              Created:
              Updated:
              Resolved: