Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-84910

Investigate why remaining SSB queries don't use column index

    • Type: Icon: Task Task
    • Resolution: Fixed
    • Priority: Icon: Major - P3 Major - P3
    • None
    • Affects Version/s: None
    • Component/s: None
    • None
    • QE 2022-10-17, QE 2022-10-31

      This task is to determine why the remaining SSB benchmarks don't use the column index. It may be that some expressions are not supported, or a stage cannot be pushed down, etc. A short comment on the ticket describing what's preventing the column index from being used is all that's necessary here.

      Currently, ssb.q2.2 and ssb.q4.1 cannot use column-store-index predicate push-down.

      Here are the steps to reproduce the issue:

      1- You can compile the code like this:

      activate && ./buildscripts/scons.py --ssl -j50 --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars --cache=nolinked --modules="" ICECC=icecc CCACHE=ccache --dbg=off --ninja generate-ninja --link-model=static && ninja -j500 install-devcore
      

      2- Run the server:

      mongod --wiredTigerCacheSizeGB 16 --oplogSize 100000 --syncdelay 3600 --setParameter ttlMonitorEnabled=false --setParameter diagnosticDataCollectionEnabled=false --setParameter maxIndexBuildMemoryUsageMegabytes=16000 --setParameter featureFlagSbeFull=true --setParameter featureFlagColumnstoreIndexes=true
      

      3- Download dump.zip
      4- Unzip contents to reveal dump folder
      5- Load the data like this:

      cd ~/ssb
      mongorestore --gzip dump
      cd ~/mongo
      mongo ssb --eval 'db.order_lineitem.createIndex({"$**": "columnstore"})'
      

      6- Create a JS script named ssb_test.js:

      (function() {
      "use strict";
      
      load("./measure-time.js");
      
      load("jstests/libs/sbe_util.js");  // For "checkSBEEnabled.""
      
      const REPEAT = 5;
      const MEASURE_PERF = false;
      
      const columnstoreEnabled =
          checkSBEEnabled(db, ["featureFlagColumnstoreIndexes", "featureFlagSbeFull"]);
      if (!columnstoreEnabled) {
          jsTestLog("Columnstore index is NOT active.");
          return;
      } else {
          jsTestLog("Columnstore index is active.");
      }
      
      let currIndexes = db.order_lineitem.getIndexes()
      assert.eq(currIndexes.length, 2);
      assert(currIndexes[0].name == "_id_" || currIndexes[1].name == "_id_");
      assert(currIndexes[0].name == "$**_columnstore" || currIndexes[1].name == "$**_columnstore");
      
      const queries = {
          "ssb.Q2.2": {
              "pipeline": [
                  {
                      $match: {
                          'part.p_brand':
                              {$in: ['Brand#21', 'Brand#22', 'Brand#15', 'Brand#41', 'Brand#41']}
                      }
                  },
                  {
                      $lookup: {
                          from: 'supplier',
                          localField: 'l_suppkey',
                          foreignField: 's_suppkey',
                          'as': 'supplier'
                      }
                  },
                  {$match: {'supplier.nation.region.r_name': 'ASIA'}},
                  {$addFields: {supplier: {$first: '$supplier'}}},
                  {
                      $group: {
                          _id: {order_year: {$year: '$order.o_orderdate'}, brand: '$part.p_brand'},
                          revenue: {$sum: {$multiply: ['$l_extendedprice', '$l_discount']}}
                      }
                  }
              ]
          },
          "ssb.Q4.1": {
              "pipeline": [
                  {$match: {'part.p_mfgr': {$in: ['Manufacturer#1', 'Manufacturer#2']}}},
                  {
                      $lookup: {
                          from: 'supplier',
                          localField: 'l_suppkey',
                          foreignField: 's_suppkey',
                          'as': 'supplier'
                      }
                  },
                  {$addFields: {supplier: {$first: '$supplier'}}},
                  {$match: {'supplier.nation.region.r_name': 'AMERICA'}},
                  {
                      $lookup: {
                          from: 'customer',
                          localField: 'order.o_custkey',
                          foreignField: 'c_custkey',
                          'as': 'customer'
                      }
                  },
                  {$addFields: {customer: {$first: '$customer'}}},
                  {$match: {'customer.nation.region.r_name': 'AMERICA'}},
                  {
                      $group: {
                          _id: {
                              order_year: {$year: '$order.o_orderdate'},
                              c_nation: '$customer.nation.n_name'
                          },
                          revenue: {
                              $sum: {
                                  $subtract: [
                                      '$l_extendedprice',
                                      {$multiply: ['$l_extendedprice', '$l_discount']}
                                  ]
                              }
                          }
                      }
                  },
                  {$addFields: {order_year: '$_id.order_year', customer_nation: '$_id.c_nation'}}
              ]
          },
      };
      
      for (const [qName, qAgg] of Object.entries(queries)) {
          print("--------------------------------------------------------------------------------------");
          jsTestLog(`Handling ${qName}:`);
          print(`${qName}.pipeline:`)
          printjson(qAgg.pipeline)
          if (qAgg.options) {
              print(`${qName}.options:`)
              printjson(qAgg.options)
          }
          let explain = qAgg.options ? db.order_lineitem.explain().aggregate(qAgg.pipeline, qAgg.options)
                                     : db.order_lineitem.explain().aggregate(qAgg.pipeline);
          print(`${qName} uses COLUMN_SCAN? ` + JSON.stringify(explain).includes('COLUMN_SCAN'));
          printjson(explain);
          if (MEASURE_PERF) {
              let durations = [];
              for (let i = 0; i < REPEAT; ++i) {
                  jsTestLog(`${qName} iteration ${i}`);
                  const dur = qAgg.options
                      ? measureTime(() => db.order_lineitem.aggregate(qAgg.pipeline, qAgg.options))
                      : measureTime(() => db.order_lineitem.aggregate(qAgg.pipeline));
                  jsTestLog(`${qName} took ${dur}ms`);
                  durations.push(dur);
              }
              jsTestLog(`experiment,${qName},${durations}`);
          }
      }
      })();
      

      7- Run this script like this:

      mongo ssb ./ssb_test.js
      

      The output of this program shows the SBE plans for each SSB query.

            Assignee:
            mohammad.dashti@mongodb.com Mohammad Dashti (Inactive)
            Reporter:
            ian.boros@mongodb.com Ian Boros
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

              Created:
              Updated:
              Resolved: