I have a sharded cluster with one shard (replica set of three nodes) and case of restoring data using percona-backup-mongodb.
The restore process was stuck on index creation for two days. I tried to restore gridFS collection of 400 GB in size, and index creation was performed for 3 days and didn't end. I watched adminCommand and thought, that it is becouse of cillection size. There were no information about locks:
rs0:PRIMARY> db.adminCommand( ... { ... currentOp: true, ... $or: [ ... \{ op: "command", "command.createIndexes": { $exists: true } }, ... \{ op: "none", "msg" : /^Index Build/ } ... ] ... } ... ) { "inprog" : [ { "type" : "op", "host" : "kpak-mongo-gridfs-2:27017", "desc" : "conn295769", "connectionId" : 295769, "client" : "127.0.0.1:59226", "appName" : "mongorestore", "clientMetadata" : { "driver" : { "name" : "mongo-go-driver", "version" : "v1.7.0" }, "os" : { "type" : "linux", "architecture" : "amd64" }, "platform" : "go1.16.9", "application" : { "name" : "mongorestore" } }, "active" : true, "currentOpTime" : "2022-07-04T14:44:56.860+03:00", "effectiveUsers" : [ { "user" : "pbmuser", "db" : "admin" } ], "opid" : 362857456, "lsid" : { "id" : UUID("4aeb2829-b2dd-4bd2-8e4a-ea28c77ed31e"), "uid" : BinData(0,"8L/kOoqHkvDRIRJTrmrrO3wwOr+ToO8WLvmn15Ql7G0=") }, "secs_running" : NumberLong(236403), "microsecs_running" : NumberLong("236403076334"), "op" : "command", "ns" : "file-store.fs.chunks", "command" : { "createIndexes" : "fs.chunks", "indexes" : [ { "key" : { "files_id" : 1, "n" : 1 }, "name" : "chunkFileIds", "ns" : "file-store.fs.chunks" } ], "ignoreUnknownIndexOptions" : true, "lsid" : { "id" : UUID("4aeb2829-b2dd-4bd2-8e4a-ea28c77ed31e") }, "$clusterTime" : { "clusterTime" : Timestamp(1656698693, 534), "signature" : { "hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="), "keyId" : NumberLong(0) } }, "$db" : "file-store", "$readPreference" : { "mode" : "primaryPreferred" } }, "numYields" : 0, "waitingForLatch" : { "timestamp" : ISODate("2022-07-01T18:04:54.326Z"), "captureName" : "FutureResolution" }, "locks" : { }, "waitingForLock" : false, "lockStats" : { "ParallelBatchWriterMode" : { "acquireCount" : { "r" : NumberLong(3) } }, "ReplicationStateTransition" : { "acquireCount" : { "w" : NumberLong(4) } }, "Global" : { "acquireCount" : { "w" : NumberLong(4) } }, "Database" : { "acquireCount" : { "w" : NumberLong(3) } }, "Collection" : { "acquireCount" : { "r" : NumberLong(1), "w" : NumberLong(1), "W" : NumberLong(1) } }, "Mutex" : { "acquireCount" : { "r" : NumberLong(3) } } }, "waitingForFlowControl" : false, "flowControlStats" : { "acquireCount" : NumberLong(3), "timeAcquiringMicros" : NumberLong(4) } }, { "type" : "op", "host" : "kpak-mongo-gridfs-2:27017", "desc" : "conn295766", "connectionId" : 295766, "client" : "127.0.0.1:59214", "appName" : "mongorestore", "clientMetadata" : { "driver" : { "name" : "mongo-go-driver", "version" : "v1.7.0" }, "os" : { "type" : "linux", "architecture" : "amd64" }, "platform" : "go1.16.9", "application" : { "name" : "mongorestore" } }, "active" : true, "currentOpTime" : "2022-07-04T14:44:56.860+03:00", "effectiveUsers" : [ { "user" : "pbmuser", "db" : "admin" } ], "opid" : 362857455, "lsid" : { "id" : UUID("559b3155-422e-453a-9df1-dc158688d89c"), "uid" : BinData(0,"8L/kOoqHkvDRIRJTrmrrO3wwOr+ToO8WLvmn15Ql7G0=") }, "secs_running" : NumberLong(236403), "microsecs_running" : NumberLong("236403076648"), "op" : "command", "ns" : "file-store.fs.files", "command" : { "createIndexes" : "fs.files", "indexes" : [ { "key" : { "filename" : 1, "uploadDate" : 1 }, "name" : "fileNames", "ns" : "file-store.fs.files" } ], "ignoreUnknownIndexOptions" : true, "lsid" : { "id" : UUID("559b3155-422e-453a-9df1-dc158688d89c") }, "$clusterTime" : { "clusterTime" : Timestamp(1656698693, 534), "signature" : { "hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="), "keyId" : NumberLong(0) } }, "$db" : "file-store", "$readPreference" : { "mode" : "primaryPreferred" } }, "numYields" : 0, "waitingForLatch" : { "timestamp" : ISODate("2022-07-01T18:04:54.206Z"), "captureName" : "FutureResolution" }, "locks" : { }, "waitingForLock" : false, "lockStats" : { "ParallelBatchWriterMode" : { "acquireCount" : { "r" : NumberLong(3) } }, "ReplicationStateTransition" : { "acquireCount" : { "w" : NumberLong(4) } }, "Global" : { "acquireCount" : { "w" : NumberLong(4) } }, "Database" : { "acquireCount" : { "w" : NumberLong(3) } }, "Collection" : { "acquireCount" : { "r" : NumberLong(1), "w" : NumberLong(1), "W" : NumberLong(1) } }, "Mutex" : { "acquireCount" : { "r" : NumberLong(3) } } }, "waitingForFlowControl" : false, "flowControlStats" : { "acquireCount" : NumberLong(3), "timeAcquiringMicros" : NumberLong(2) } } ], "ok" : 1, "$gleStats" : { "lastOpTime" : Timestamp(0, 0), "electionId" : ObjectId("7fffffff0000000000000003") }, "lastCommittedOpTime" : Timestamp(1656935091, 1), "$configServerState" : { "opTime" : { "ts" : Timestamp(1656935096, 1), "t" : NumberLong(1) } }, "$clusterTime" : { "clusterTime" : Timestamp(1656935096, 1), "signature" : { "hash" : BinData(0,"9aQA7SE2bdYHsIVCKODI8WjxuCg="), "keyId" : NumberLong("7073745711275180033") } }, "operationTime" : Timestamp(1656935091, 1) }
Later I found in mongod logs that index creation hung after the phase "Index build: waiting for next action before completing final phase".
All nodes of replica set were alive. Сould you explain what the problem was? I use 4.4 version of mongodb.