-
Type: Task
-
Resolution: Fixed
-
Priority: Major - P3
-
None
-
Affects Version/s: None
-
Component/s: None
-
None
-
QE 2022-10-17, QE 2022-10-31
This task is to determine why the remaining SSB benchmarks don't use the column index. It may be that some expressions are not supported, or a stage cannot be pushed down, etc. A short comment on the ticket describing what's preventing the column index from being used is all that's necessary here.
Currently, ssb.q2.2 and ssb.q4.1 cannot use column-store-index predicate push-down.
Here are the steps to reproduce the issue:
1- You can compile the code like this:
activate && ./buildscripts/scons.py --ssl -j50 --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars --cache=nolinked --modules="" ICECC=icecc CCACHE=ccache --dbg=off --ninja generate-ninja --link-model=static && ninja -j500 install-devcore
2- Run the server:
mongod --wiredTigerCacheSizeGB 16 --oplogSize 100000 --syncdelay 3600 --setParameter ttlMonitorEnabled=false --setParameter diagnosticDataCollectionEnabled=false --setParameter maxIndexBuildMemoryUsageMegabytes=16000 --setParameter featureFlagSbeFull=true --setParameter featureFlagColumnstoreIndexes=true
3- Download dump.zip
4- Unzip contents to reveal dump folder
5- Load the data like this:
cd ~/ssb
mongorestore --gzip dump
cd ~/mongo
mongo ssb --eval 'db.order_lineitem.createIndex({"$**": "columnstore"})'
6- Create a JS script named ssb_test.js:
(function() { "use strict"; load("./measure-time.js"); load("jstests/libs/sbe_util.js"); // For "checkSBEEnabled."" const REPEAT = 5; const MEASURE_PERF = false; const columnstoreEnabled = checkSBEEnabled(db, ["featureFlagColumnstoreIndexes", "featureFlagSbeFull"]); if (!columnstoreEnabled) { jsTestLog("Columnstore index is NOT active."); return; } else { jsTestLog("Columnstore index is active."); } let currIndexes = db.order_lineitem.getIndexes() assert.eq(currIndexes.length, 2); assert(currIndexes[0].name == "_id_" || currIndexes[1].name == "_id_"); assert(currIndexes[0].name == "$**_columnstore" || currIndexes[1].name == "$**_columnstore"); const queries = { "ssb.Q2.2": { "pipeline": [ { $match: { 'part.p_brand': {$in: ['Brand#21', 'Brand#22', 'Brand#15', 'Brand#41', 'Brand#41']} } }, { $lookup: { from: 'supplier', localField: 'l_suppkey', foreignField: 's_suppkey', 'as': 'supplier' } }, {$match: {'supplier.nation.region.r_name': 'ASIA'}}, {$addFields: {supplier: {$first: '$supplier'}}}, { $group: { _id: {order_year: {$year: '$order.o_orderdate'}, brand: '$part.p_brand'}, revenue: {$sum: {$multiply: ['$l_extendedprice', '$l_discount']}} } } ] }, "ssb.Q4.1": { "pipeline": [ {$match: {'part.p_mfgr': {$in: ['Manufacturer#1', 'Manufacturer#2']}}}, { $lookup: { from: 'supplier', localField: 'l_suppkey', foreignField: 's_suppkey', 'as': 'supplier' } }, {$addFields: {supplier: {$first: '$supplier'}}}, {$match: {'supplier.nation.region.r_name': 'AMERICA'}}, { $lookup: { from: 'customer', localField: 'order.o_custkey', foreignField: 'c_custkey', 'as': 'customer' } }, {$addFields: {customer: {$first: '$customer'}}}, {$match: {'customer.nation.region.r_name': 'AMERICA'}}, { $group: { _id: { order_year: {$year: '$order.o_orderdate'}, c_nation: '$customer.nation.n_name' }, revenue: { $sum: { $subtract: [ '$l_extendedprice', {$multiply: ['$l_extendedprice', '$l_discount']} ] } } } }, {$addFields: {order_year: '$_id.order_year', customer_nation: '$_id.c_nation'}} ] }, }; for (const [qName, qAgg] of Object.entries(queries)) { print("--------------------------------------------------------------------------------------"); jsTestLog(`Handling ${qName}:`); print(`${qName}.pipeline:`) printjson(qAgg.pipeline) if (qAgg.options) { print(`${qName}.options:`) printjson(qAgg.options) } let explain = qAgg.options ? db.order_lineitem.explain().aggregate(qAgg.pipeline, qAgg.options) : db.order_lineitem.explain().aggregate(qAgg.pipeline); print(`${qName} uses COLUMN_SCAN? ` + JSON.stringify(explain).includes('COLUMN_SCAN')); printjson(explain); if (MEASURE_PERF) { let durations = []; for (let i = 0; i < REPEAT; ++i) { jsTestLog(`${qName} iteration ${i}`); const dur = qAgg.options ? measureTime(() => db.order_lineitem.aggregate(qAgg.pipeline, qAgg.options)) : measureTime(() => db.order_lineitem.aggregate(qAgg.pipeline)); jsTestLog(`${qName} took ${dur}ms`); durations.push(dur); } jsTestLog(`experiment,${qName},${durations}`); } } })();
7- Run this script like this:
mongo ssb ./ssb_test.js
The output of this program shows the SBE plans for each SSB query.