From 1b3a2f6495e4304232fd16e4d38a822eb647c72f Mon Sep 17 00:00:00 2001 From: Jordi Serra Torrens Date: Tue, 11 Oct 2022 09:51:23 +0000 Subject: [PATCH] Repro SERVER-70437 --- jstests/sharding/foo.js | 73 +++++++++++++++++++ .../db/query/yield_policy_callbacks_impl.cpp | 5 +- src/mongo/db/s/move_primary_coordinator.cpp | 5 ++ 3 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 jstests/sharding/foo.js diff --git a/jstests/sharding/foo.js b/jstests/sharding/foo.js new file mode 100644 index 00000000000..4943cce26f5 --- /dev/null +++ b/jstests/sharding/foo.js @@ -0,0 +1,73 @@ +(function() { +"use strict"; + +load('jstests/libs/parallel_shell_helpers.js'); +load("jstests/libs/fail_point_util.js"); + +var st = new ShardingTest({shards: 2, rs: {setParameter: {internalQueryExecYieldIterations: 10}}}); + +const dbName = "test"; +const collName = "foo"; +const ns = dbName + "." + collName; +const numDocs = 100; + +let coll = st.s.getCollection(ns); + +assert.commandWorked( + st.s.adminCommand({enableSharding: dbName, primaryShard: st.shard0.shardName})); + +// Insert initial data +jsTest.log("Inserting initial data."); +const bulkOp = coll.initializeOrderedBulkOp(); +for (let i = 0; i < numDocs; ++i) { + bulkOp.insert({c: 0}); +} +assert.commandWorked(bulkOp.execute()); +jsTest.log("Inserted initial data."); + +let fp1 = configureFailPoint( + st.rs0.getPrimary(), 'setYieldAllLocksHang', {namespace: coll.getFullName()}); + +jsTest.log("Starting updateMany."); +const awaitWriteResult = startParallelShell( + funWithArgs(function(dbName, collName) { + const result = db.getSiblingDB(dbName)[collName].updateMany({}, {$set: {c: 1}}); + assert.eq(100, result.modifiedCount); + jsTest.log("updateMany result: " + tojson(result)); + }, coll.getDB().getName(), coll.getName()), st.s.port); + +// Wait for the write op to yield. +fp1.wait(); +jsTest.log("updateMany yielded."); + +// Run movePrimary +let movePrimaryHangBeforeCleanStaleData = configureFailPoint( + st.rs0.getPrimary(), 'hangMovePrimaryBeforeCleanStaleData'); +jsTest.log("Starting movePrimary."); +const awaitMovePrimaryResult = startParallelShell( + funWithArgs(function(dbName, toShard) { + db.adminCommand({movePrimary: dbName, to: toShard}); + }, coll.getDB().getName(), st.shard1.shardName), st.s.port); + + +movePrimaryHangBeforeCleanStaleData.wait(); +jsTest.log("Committed movePrimary."); + +// Let the multi-write resume from the yield. +jsTest.log("Resuming yielded multi-write"); +fp1.off(); + +// Wait for the multiUpdate to finish +awaitWriteResult(); + +// Let movePrimary drop the collections from the former primary. Delayed to happen after multiUpdate +// completes because otherwise the write would fail. +movePrimaryHangBeforeCleanStaleData.off(); +awaitMovePrimaryResult(); + +// Check collection (which should be on the new primary now). +assert.eq(0, coll.count({c: 0})); +assert.eq(100, coll.count({c: 1})); + +st.stop(); +})(); diff --git a/src/mongo/db/query/yield_policy_callbacks_impl.cpp b/src/mongo/db/query/yield_policy_callbacks_impl.cpp index ac1634086e2..5a610317868 100644 --- a/src/mongo/db/query/yield_policy_callbacks_impl.cpp +++ b/src/mongo/db/query/yield_policy_callbacks_impl.cpp @@ -60,9 +60,10 @@ void YieldPolicyCallbacksImpl::duringYield(OperationContext* opCtx) const { opCtx->checkForInterrupt(); } }, - [nss](const BSONObj& config) { + [opCtx, nss](const BSONObj& config) { StringData ns = config.getStringField("namespace"); - return ns.empty() || ns == nss.ns(); + return (ns.empty() || ns == nss.ns()) && + CurOp::get(opCtx)->getCommand()->getName() == "update"; }); }; failPointHang(&setYieldAllLocksHang); diff --git a/src/mongo/db/s/move_primary_coordinator.cpp b/src/mongo/db/s/move_primary_coordinator.cpp index a19a5ebb286..d0636037a96 100644 --- a/src/mongo/db/s/move_primary_coordinator.cpp +++ b/src/mongo/db/s/move_primary_coordinator.cpp @@ -40,12 +40,15 @@ #include "mongo/logv2/log.h" #include "mongo/s/client/shard_registry.h" #include "mongo/s/grid.h" +#include "mongo/util/fail_point.h" #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding namespace mongo { +MONGO_FAIL_POINT_DEFINE(hangMovePrimaryBeforeCleanStaleData); + void MovePrimaryCoordinator::appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const { stdx::lock_guard lk{_docMutex}; cmdInfoBuilder->append("request", BSON(_doc.kToShardIdFieldName << _doc.getToShardId())); @@ -109,6 +112,8 @@ ExecutorFuture MovePrimaryCoordinator::_runImpl( uassertStatusOK(movePrimarySourceManager.clone(opCtx)); uassertStatusOK(movePrimarySourceManager.enterCriticalSection(opCtx)); uassertStatusOK(movePrimarySourceManager.commitOnConfig(opCtx)); + + hangMovePrimaryBeforeCleanStaleData.pauseWhileSet(); uassertStatusOK(movePrimarySourceManager.cleanStaleData(opCtx)); }) .onError([this, anchor = shared_from_this()](const Status& status) { -- 2.17.1