From ae703673f00b275089cb756de5ab30a45ab4caa7 Mon Sep 17 00:00:00 2001 From: Jordi Serra Torrens Date: Fri, 3 May 2024 11:06:12 +0000 Subject: [PATCH] Repro BF-33079 --- jstests/repro.js | 43 +++++++++++++++++++ .../db/s/drop_collection_coordinator.cpp | 5 +++ 2 files changed, 48 insertions(+) create mode 100644 jstests/repro.js diff --git a/jstests/repro.js b/jstests/repro.js new file mode 100644 index 00000000000..c2c920f784b --- /dev/null +++ b/jstests/repro.js @@ -0,0 +1,43 @@ +import {configureFailPoint} from "jstests/libs/fail_point_util.js"; +import {moveOutSessionChunks, removeShard} from "jstests/sharding/libs/remove_shard_util.js"; + +let st = new ShardingTest({shards: 2, configShard: true, enableBalancer: true}); + +let coll = st.s.getDB('test')['foo']; + +// Create sharded collection with chunks on both shards. +assert.commandWorked( + st.s.adminCommand({enableSharding: 'test', primaryShard: st.shard1.shardName})); +assert.commandWorked(st.s.adminCommand({shardCollection: coll.getFullName(), key: {x: 'hashed'}})); + +// Start collection drop. Make it after committing the drop on the configsvr but before contacting +// the shards to perform the local drop. +let fp = configureFailPoint(st.rs1.getPrimary(), + "dropCollectionCoordinatorHangBeforeSendDropCollParticipant"); +let awaitDrop = startParallelShell(() => { + assert(db.getSiblingDB('test')['foo'].drop()); +}, st.s.port); + +fp.wait(); + +// Transition to dedicated config server. +jsTest.log("--DEBUG-- Transitioning to dedicated config server"); +removeShard(st, st.shard0.shardName); +jsTest.log("--DEBUG-- Transitioned to dedicated config server"); + +// Let drop finish. +fp.off(); +awaitDrop(); + +// Shard the collection again. +assert.commandWorked(st.s.adminCommand({shardCollection: coll.getFullName(), key: {x: 'hashed'}})); + +// Transition back to catalog shard (TODO: Why is it needed? The metadata check always runs on +// configsvr doesn't it?). +assert.commandWorked(st.s.adminCommand({transitionFromDedicatedConfigServer: 1})); + +// Check metadata consistency. +let inconsistencies = st.s.getDB('test').checkMetadataConsistency().toArray(); +assert.eq(0, inconsistencies.length, tojson(inconsistencies)); + +st.stop(); diff --git a/src/mongo/db/s/drop_collection_coordinator.cpp b/src/mongo/db/s/drop_collection_coordinator.cpp index 69e42d0a99c..cbbb9480102 100644 --- a/src/mongo/db/s/drop_collection_coordinator.cpp +++ b/src/mongo/db/s/drop_collection_coordinator.cpp @@ -79,6 +79,7 @@ #include "mongo/s/grid.h" #include "mongo/s/sharding_state.h" #include "mongo/util/decorable.h" +#include "mongo/util/fail_point.h" #include "mongo/util/future_impl.h" #include "mongo/util/out_of_line_executor.h" #include "mongo/util/uuid.h" @@ -87,6 +88,8 @@ namespace mongo { +MONGO_FAIL_POINT_DEFINE(dropCollectionCoordinatorHangBeforeSendDropCollParticipant); + void DropCollectionCoordinator::dropCollectionLocally(OperationContext* opCtx, const NamespaceString& nss, bool fromMigrate, @@ -337,6 +340,8 @@ void DropCollectionCoordinator::_commitDropCollection( const auto primaryShardId = ShardingState::get(opCtx)->shardId(); + dropCollectionCoordinatorHangBeforeSendDropCollParticipant.pauseWhileSet(); + // We need to send the drop to all the shards because both movePrimary and // moveChunk leave garbage behind for sharded collections. auto participants = Grid::get(opCtx)->shardRegistry()->getAllShardIds(opCtx); -- 2.34.1