Loading...

Type: Bug
Resolution: Won't Fix
Priority: Major - P3
Fix Version/s: None
Affects Version/s: None
Component/s: None
Labels:
None

Operating System:
ALL
Steps To Reproduce:
Hide

Below is a modified version of the test case, stripped down to the bare minimum to reliably reproduce the issue. In the failure case, the test will hang indefinitely.

// Test that the read concern level 'snapshot' exhibits the correct yielding behavior. That is, // operations performed at read concern level snapshot check for interrupt but do not yield locks or // storage engine resources. // @tags: [ // uses_transactions, // ] import {waitForCurOpByFailPoint} from "jstests/libs/curop_helpers.js"; const dbName = "test"; const collName = "coll"; const rst = new ReplSetTest({nodes: 1}); rst.startSet(); rst.initiate(); const db = rst.getPrimary().getDB(dbName); const adminDB = db.getSiblingDB("admin"); const coll = db.coll; TestData.numDocs = 4; // Set 'internalQueryExecYieldIterations' to 2 to ensure that commands yield on the second try // (i.e. after they have established a snapshot but before they have returned any documents). assert.commandWorked(db.adminCommand({setParameter: 1, internalQueryExecYieldIterations: 2})); // Set 'internalQueryExecYieldPeriodMS' to 24 hours to significantly reduce a probability of a // situation occuring where the execution threads do not receive enough CPU time and commands yield // on timeout (i.e. yield period expiration) instead of on the second try as expected by setting // parameter 'internalQueryExecYieldIterations' to 2. assert.commandWorked(db.adminCommand({setParameter: 1, internalQueryExecYieldPeriodMS: 86400000})); function assertKillPending(opId) { const res = adminDB.aggregate([{$currentOp: {}}, {$match: {ns: coll.getFullName(), opid: opId}}]) .toArray(); assert.eq( res.length, 1, tojson( adminDB.aggregate([{$currentOp: {}}, {$match: {ns: coll.getFullName()}}]).toArray())); assert(res[0].hasOwnProperty("killPending"), tojson(res)); assert.eq(true, res[0].killPending, tojson(res)); } function populateCollection() { db.coll.drop({writeConcern: {w: "majority"}}); for (let i = 0; i < TestData.numDocs; i++) { assert.commandWorked( db.coll.insert({_id: i, x: 1, location: [0, 0]}, {writeConcern: {w: "majority"}})); } assert.commandWorked(db.runCommand({ createIndexes: "coll", indexes: [{key: {location: "2d"}, name: "geo_2d"}], writeConcern: {w: "majority"} })); } function testCommand(awaitCommandFn, curOpFilter, testWriteConflict, setYieldIterations) { // // Test that the command does not yield locks. // TestData.txnNumber++; populateCollection(); // Start a command that hangs before checking for interrupt. assert.commandWorked(db.adminCommand( {configureFailPoint: "setInterruptOnlyPlansCheckForInterruptHang", mode: "alwaysOn"})); let awaitCommand = startParallelShell(awaitCommandFn, rst.ports[0]); waitForCurOpByFailPoint( db, coll.getFullName(), "setInterruptOnlyPlansCheckForInterruptHang", curOpFilter); // Start a drop. This should block behind the command, since the command does not yield // locks. let awaitDrop = startParallelShell(function() { db.getSiblingDB("test").coll.drop({writeConcern: {w: "majority"}}); }, rst.ports[0]); // Remove the hang. The command should complete successfully. assert.commandWorked(db.adminCommand( {configureFailPoint: "setInterruptOnlyPlansCheckForInterruptHang", mode: "off"})); awaitCommand(); // Now the drop can complete. awaitDrop(); // // Test that the command does not read data that is inserted during its execution. // 'awaitCommandFn' should fail if it reads the following document: // {_id: <numDocs>, x: 1, new: 1, location: [0, 0]} // TestData.txnNumber++; populateCollection(); // Start a command that hangs before checking for interrupt. assert.commandWorked(db.adminCommand( {configureFailPoint: "setInterruptOnlyPlansCheckForInterruptHang", mode: "alwaysOn"})); awaitCommand = startParallelShell(awaitCommandFn, rst.ports[0]); waitForCurOpByFailPoint( db, coll.getFullName(), "setInterruptOnlyPlansCheckForInterruptHang", curOpFilter); // Insert data that should not be read by the command. assert.commandWorked(db.coll.insert({_id: TestData.numDocs, x: 1, new: 1, location: [0, 0]}, {writeConcern: {w: "majority"}})); // Remove the hang. The command should complete successfully. assert.commandWorked(db.adminCommand( {configureFailPoint: "setInterruptOnlyPlansCheckForInterruptHang", mode: "off"})); awaitCommand(); } // Test delete. testCommand(function() { const session = db.getMongo().startSession({causalConsistency: false}); const sessionDb = session.getDatabase("test"); session.startTransaction({readConcern: {level: "snapshot"}}); const res = assert.commandWorked(sessionDb.runCommand( {delete: "coll", deletes: [{q: {}, limit: 1}, {q: {new: 1}, limit: 1}]})); assert.commandWorked(session.commitTransaction_forTesting()); // Only remove one existing doc committed before the transaction. assert.eq(res.n, 1, tojson(res)); }, {op: "remove"}, true); rst.stopSet();
Show
Below is a modified version of the test case, stripped down to the bare minimum to reliably reproduce the issue. In the failure case, the test will hang indefinitely. // Test that the read concern level 'snapshot' exhibits the correct yielding behavior. That is, // operations performed at read concern level snapshot check for interrupt but do not yield locks or // storage engine resources. // @tags: [ // uses_transactions, // ] import {waitForCurOpByFailPoint} from "jstests/libs/curop_helpers.js" ; const dbName = "test" ; const collName = "coll" ; const rst = new ReplSetTest({nodes: 1}); rst.startSet(); rst.initiate(); const db = rst.getPrimary().getDB(dbName); const adminDB = db.getSiblingDB( "admin" ); const coll = db.coll; TestData.numDocs = 4; // Set 'internalQueryExecYieldIterations' to 2 to ensure that commands yield on the second try // (i.e. after they have established a snapshot but before they have returned any documents). assert .commandWorked(db.adminCommand({setParameter: 1, internalQueryExecYieldIterations: 2})); // Set 'internalQueryExecYieldPeriodMS' to 24 hours to significantly reduce a probability of a // situation occuring where the execution threads do not receive enough CPU time and commands yield // on timeout (i.e. yield period expiration) instead of on the second try as expected by setting // parameter 'internalQueryExecYieldIterations' to 2. assert .commandWorked(db.adminCommand({setParameter: 1, internalQueryExecYieldPeriodMS: 86400000})); function assertKillPending(opId) { const res = adminDB.aggregate([{$currentOp: {}}, {$match: {ns: coll.getFullName(), opid: opId}}]) .toArray(); assert .eq( res.length, 1, tojson( adminDB.aggregate([{$currentOp: {}}, {$match: {ns: coll.getFullName()}}]).toArray())); assert (res[0].hasOwnProperty( "killPending" ), tojson(res)); assert .eq( true , res[0].killPending, tojson(res)); } function populateCollection() { db.coll.drop({writeConcern: {w: "majority" }}); for (let i = 0; i < TestData.numDocs; i++) { assert .commandWorked( db.coll.insert({_id: i, x: 1, location: [0, 0]}, {writeConcern: {w: "majority" }})); } assert .commandWorked(db.runCommand({ createIndexes: "coll" , indexes: [{key: {location: "2d" }, name: "geo_2d" }], writeConcern: {w: "majority" } })); } function testCommand(awaitCommandFn, curOpFilter, testWriteConflict, setYieldIterations) { // // Test that the command does not yield locks. // TestData.txnNumber++; populateCollection(); // Start a command that hangs before checking for interrupt. assert .commandWorked(db.adminCommand( {configureFailPoint: "setInterruptOnlyPlansCheckForInterruptHang" , mode: "alwaysOn" })); let awaitCommand = startParallelShell(awaitCommandFn, rst.ports[0]); waitForCurOpByFailPoint( db, coll.getFullName(), "setInterruptOnlyPlansCheckForInterruptHang" , curOpFilter); // Start a drop. This should block behind the command, since the command does not yield // locks. let awaitDrop = startParallelShell(function() { db.getSiblingDB( "test" ).coll.drop({writeConcern: {w: "majority" }}); }, rst.ports[0]); // Remove the hang. The command should complete successfully. assert .commandWorked(db.adminCommand( {configureFailPoint: "setInterruptOnlyPlansCheckForInterruptHang" , mode: "off" })); awaitCommand(); // Now the drop can complete. awaitDrop(); // // Test that the command does not read data that is inserted during its execution. // 'awaitCommandFn' should fail if it reads the following document: // {_id: <numDocs>, x: 1, new : 1, location: [0, 0]} // TestData.txnNumber++; populateCollection(); // Start a command that hangs before checking for interrupt. assert .commandWorked(db.adminCommand( {configureFailPoint: "setInterruptOnlyPlansCheckForInterruptHang" , mode: "alwaysOn" })); awaitCommand = startParallelShell(awaitCommandFn, rst.ports[0]); waitForCurOpByFailPoint( db, coll.getFullName(), "setInterruptOnlyPlansCheckForInterruptHang" , curOpFilter); // Insert data that should not be read by the command. assert .commandWorked(db.coll.insert({_id: TestData.numDocs, x: 1, new : 1, location: [0, 0]}, {writeConcern: {w: "majority" }})); // Remove the hang. The command should complete successfully. assert .commandWorked(db.adminCommand( {configureFailPoint: "setInterruptOnlyPlansCheckForInterruptHang" , mode: "off" })); awaitCommand(); } // Test delete. testCommand(function() { const session = db.getMongo().startSession({causalConsistency: false }); const sessionDb = session.getDatabase( "test" ); session.startTransaction({readConcern: {level: "snapshot" }}); const res = assert .commandWorked(sessionDb.runCommand( {delete: "coll" , deletes: [{q: {}, limit: 1}, {q: { new : 1}, limit: 1}]})); assert .commandWorked(session.commitTransaction_forTesting()); // Only remove one existing doc committed before the transaction. assert .eq(res.n, 1, tojson(res)); }, {op: "remove" }, true ); rst.stopSet();
Confidence Status:
None
Work Order:
3

Aha! Reference:
None
Tracking Level:
None
Risk Status:
None
Exec Notes:
None
Goal Name:
None
Goal Link:
None

The CollectionScan::work() method returns NEED_TIME when it is scanning a clustered collection and it is unable to seekNear() close enough to the target record. As a result, PlanExecutorImpl::_getNextImpl() will continue to the next iteration in the loop, resulting in another check for yielding.

In the case of jstests/noPassthrough/read_concern_snapshot_yielding.js, the test sets internalQueryExecYieldIterations to 2 and the internalQueryExecYieldPeriodMS to 24 hours. The effect of this is that the setInterruptOnlyPlansCheckForInterruptHang failpoint will be entered whenever the loop in PlanExecutorImpl::_getNextImpl() reaches its second iteration. So, if the collection being scanned is clustered, in this case config.transactions, and if the scan is unable to reach to the interior of the scan boundary, the failpoint will be entered before the first document is made available.

My changes to make config.transactions clustered in ~~SERVER-80862~~ results in this test nondeterministically failing because the failpoint is reached too early in this way. The patch below provides a solution likely unfit for production, but which demonstrates the problem:

diff --git a/src/mongo/db/exec/collection_scan.cpp b/src/mongo/db/exec/collection_scan.cpp
index 4e88a1cc4fb..69362d2cc9f 100644
--- a/src/mongo/db/exec/collection_scan.cpp
+++ b/src/mongo/db/exec/collection_scan.cpp
@@ -156,6 +156,7 @@ CollectionScan::CollectionScan(ExpressionContext* expCtx,
 }

 PlanStage::StageState CollectionScan::doWork(WorkingSetID* out) {
+    _needTimeDueToClusteredCollection = false;
     if (_commonStats.isEOF) {
         _priority.reset();
         return PlanStage::IS_EOF;
@@ -474,6 +475,8 @@ PlanStage::StageState CollectionScan::returnIfMatches(WorkingSetMember* member,
     // mark their position for resuming the tailable scan later on.
     if (beforeStartOfRange(_params, *member)) {
         _workingSet->free(memberID);
+        LOGV2(8086240, "setting PlanStage::NEED_TIME bc clustered collection didn't seekNear close enough");
+        _needTimeDueToClusteredCollection = true;
         return PlanStage::NEED_TIME;
     }

diff --git a/src/mongo/db/exec/plan_stage.h b/src/mongo/db/exec/plan_stage.h
index bdf48637c72..4a2ae06c4a0 100644
--- a/src/mongo/db/exec/plan_stage.h
+++ b/src/mongo/db/exec/plan_stage.h
@@ -363,6 +363,11 @@ public:
         _commonStats.executionTime.emplace(0);
     }

+    bool needTimeDueToClusteredCollection() const {
+        return _needTimeDueToClusteredCollection;
+    }
+    bool _needTimeDueToClusteredCollection{false};
+
 protected:
     /**
      * Performs one unit of work.  See comment at work() above.
diff --git a/src/mongo/db/query/plan_executor_impl.cpp b/src/mongo/db/query/plan_executor_impl.cpp
index 0828f77067c..c68d29388c2 100644
--- a/src/mongo/db/query/plan_executor_impl.cpp
+++ b/src/mongo/db/query/plan_executor_impl.cpp
@@ -415,7 +415,7 @@ PlanExecutor::ExecState PlanExecutorImpl::_getNextImpl(Snapshotted<Document>* ob
             }
         };

-        if (_yieldPolicy->shouldYieldOrInterrupt(_opCtx)) {
+        if (!_root->needTimeDueToClusteredCollection() && _yieldPolicy->shouldYieldOrInterrupt(_opCtx)) {
             uassertStatusOK(_yieldPolicy->yieldOrInterrupt(_opCtx, whileYieldingFn));
         }

With this patch, the failure is no longer reproducible. I suspect that this is a bona fide bug in the query code, since it seems wrong to me that yielding would occur more often because a collection is clustered (note that the issue never occurs when the collection is nonclustered).

is related to

SERVER-80862 Make config.transactions a clustered collection

Closed

Details

Description

Attachments

Issue Links

Activity

People

Dates