diff --git a/jstests/sharding/remove_shard_and_move_primary.js b/jstests/sharding/remove_shard_and_move_primary.js new file mode 100644 index 00000000000..0aa03966bb9 --- /dev/null +++ b/jstests/sharding/remove_shard_and_move_primary.js @@ -0,0 +1,93 @@ +/** + * TODO: Description here + */ +(function() { +'use strict'; + +load("jstests/libs/fail_point_util.js"); +load('jstests/libs/parallel_shell_helpers.js'); + +const countDoc = 120; + +function insertDocuments(coll) { + const bigString = 'X'.repeat(1024 * 1024); // 1MB + for (let i = 0; i < countDoc; i++) { + coll.insert({_id: i, bigString: bigString}); + } +} + +function shardExists(shardName) { + return (st.s.getCollection("config.shards").find({_id: shardName}).itcount() > 0); +} + +function collectionExists(coll) { + return Array.contains(coll.getDB().getCollectionNames(), coll.getName()); +} + +function printDatabases(phase) { + const shards = st.s.getCollection("config.databases").find({}).toArray(); + jsTestLog("[" + phase + "] Databases: " + tojson(shards)); +} + +function printShards(phase) { + const shards = st.s.getCollection("config.shards").find({}).toArray(); + jsTestLog("[" + phase + "] Shards: " + tojson(shards)); +} + +/** + * TEST START HERE + */ +let st = new ShardingTest({shards: 3, other: {chunkSize: 1, enableAutoSplit: false}}); + +let dbName = "test"; +let collA = st.s.getCollection(dbName + ".collA"); + +// Setting up initial scenario +// - database 'test' -> 'shard0' primary shard +// - coll 'test.collA' -> unsharded + 120 documents inserted +assert.commandWorked(st.s.adminCommand({enableSharding: dbName})); +st.ensurePrimaryShard(dbName, st.shard0.shardName); +insertDocuments(collA); + +// Initial checks +assert(shardExists(st.shard1.shardName)); +assert(collectionExists(collA)); +assert.eq(collA.count(), countDoc); + +printDatabases("BeforeRemoveShard"); +printShards("BeforeRemoveShard"); + +let failpoint = configureFailPoint(st.configRS.getPrimary(), + 'blockAfterCountingUnshardedDatabasesOnRemovedShard'); + +// Remove 'shard1' and movePrimary to 'shard1' in a parallel shell +function removeShard(shardName) { + let i = 0; + assert.soon(() => { + let res = assert.commandWorked(db.adminCommand({removeShard: shardName})); + jsTestLog("removeShard result (" + i.toString() + "): " + tojson(res)); + i++; + return res.state === 'completed'; + }) +} + +const awaitShell = startParallelShell(funWithArgs(removeShard, st.shard1.shardName), st.s.port); + +failpoint.wait(); +jsTestLog("Failpoint reached, going to movePrimary for '" + dbName + "' to " + st.shard1.shardName); +assert.commandWorked(st.s.adminCommand({movePrimary: dbName, to: st.shard1.shardName})); +failpoint.off(); + +awaitShell(); + +printDatabases("AfterRemoveShard"); +printShards("AfterRemoveShard"); + +// Final checks +assert(!shardExists(st.shard1.shardName)); +assert.neq(st.getPrimaryShardIdForDatabase(dbName), st.shard1.shardName); +assert(collectionExists(collA)); +assert.eq(collA.count(), countDoc); + +st.stop(); +})(); diff --git a/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp index 34fdefaecd5..1f2cb43df23 100644 --- a/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp +++ b/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp @@ -97,6 +97,8 @@ namespace mongo { namespace { +MONGO_FAIL_POINT_DEFINE(blockAfterCountingUnshardedDatabasesOnRemovedShard); + using CallbackHandle = executor::TaskExecutor::CallbackHandle; using CallbackArgs = executor::TaskExecutor::CallbackArgs; using RemoteCommandCallbackArgs = executor::TaskExecutor::RemoteCommandCallbackArgs; @@ -914,6 +916,10 @@ RemoveShardProgress ShardingCatalogManager::removeShard(OperationContext* opCtx, {chunkCount, databaseCount, jumboCount})}; } + if (MONGO_unlikely(blockAfterCountingUnshardedDatabasesOnRemovedShard.shouldFail())) { + blockAfterCountingUnshardedDatabasesOnRemovedShard.pauseWhileSet(); + } + // Draining is done, now finish removing the shard. LOGV2( 21949, "Going to remove shard: {shardId}", "Going to remove shard", "shardId"_attr = name); @@ -944,6 +950,8 @@ RemoveShardProgress ShardingCatalogManager::removeShard(OperationContext* opCtx, // Tick clusterTime to get a new topologyTime for this mutation of the topology. auto newTopologyTime = VectorClockMutable::get(opCtx)->tickClusterTime(1); + //// KKKK + // Remove the shard's document and update topologyTime within a transaction. _removeShardInTransaction(opCtx, name, controlShardName, newTopologyTime.asTimestamp());