Show
Testing:
Create a significant amount of data so that you can run the datasize command and you have some time to attempt to kill the operation.
Run the datasize command and quickly find the op to kill (next step)
MongoDB Enterprise > db.runCommand({ dataSize: "database.collection" })
{ "estimate" : false , "size" : 9393026191, "numObjects" : 3293296, "millis" : 14353, "ok" : 1 }
Find and kill the op (as you see the op does not get killed)
MongoDB Enterprise > db.getSiblingDB( "admin" ).aggregate( [{ $currentOp : { allUsers: true , idleSessions: true } },{$match: { "command.dataSize" : "database.collection" }},{$project:{opid:1}}] )
{ "opid" : 1133128 }
MongoDB Enterprise > db.killOp(1133128)
{ "info" : "attempting to kill op" , "ok" : 1 }
MongoDB Enterprise > db.getSiblingDB( "admin" ).aggregate( [{ $currentOp : { allUsers: true , idleSessions: true } },{$match: { "command.dataSize" : "database.collection" }},{$project:{opid:1}}] )
{ "opid" : 1133128 }
MongoDB Enterprise > db.getSiblingDB( "admin" ).aggregate( [{ $currentOp : { allUsers: true , idleSessions: true } },{$match: { "command.dataSize" : "database.collection" }},{$project:{opid:1}}] )
{ "opid" : 1133128 }
MongoDB Enterprise > db.getSiblingDB( "admin" ).aggregate( [{ $currentOp : { allUsers: true , idleSessions: true } },{$match: { "command.dataSize" : "database.collection" }},{$project:{opid:1}}] )
{ "opid" : 1133128 }
MongoDB Enterprise > db.getSiblingDB( "admin" ).aggregate( [{ $currentOp : { allUsers: true , idleSessions: true } },{$match: { "command.dataSize" : "database.collection" }},{$project:{opid:1}}] )
{ "opid" : 1133128 }
MongoDB Enterprise > db.getSiblingDB( "admin" ).aggregate( [{ $currentOp : { allUsers: true , idleSessions: true } },{$match: { "command.dataSize" : "database.collection" }},{$project:{opid:1}}] )
{ "opid" : 1133128 }
MongoDB Enterprise > db.getSiblingDB( "admin" ).aggregate( [{ $currentOp : { allUsers: true , idleSessions: true } },{$match: { "command.dataSize" : "database.collection" }},{$project:{opid:1}}] )
{ "opid" : 1133128 }
MongoDB Enterprise > db.getSiblingDB( "admin" ).aggregate( [{ $currentOp : { allUsers: true , idleSessions: true } },{$match: { "command.dataSize" : "database.collection" }},{$project:{opid:1}}] )
{ "opid" : 1133128 }
MongoDB Enterprise > db.getSiblingDB( "admin" ).aggregate( [{ $currentOp : { allUsers: true , idleSessions: true } },{$match: { "command.dataSize" : "database.collection" }},{$project:{opid:1}}] )
{ "opid" : 1133128 }
MongoDB Enterprise > db.getSiblingDB( "admin" ).aggregate( [{ $currentOp : { allUsers: true , idleSessions: true } },{$match: { "command.dataSize" : "database.collection" }},{$project:{opid:1}}] )
{ "opid" : 1133128 }
MongoDB Enterprise > db.getSiblingDB( "admin" ).aggregate( [{ $currentOp : { allUsers: true , idleSessions: true } },{$match: { "command.dataSize" : "database.collection" }},{$project:{opid:1}}] )
MongoDB Enterprise >
From the logs:
{ "t" :{ "$date" : "2021-07-07T11:25:07.476+10:00" }, "s" : "I" , "c" : "COMMAND" , "id" :20482, "ctx" : "conn12" , "msg" : "Going to kill op" , "attr" :{ "opId" :1133128}}
{ "t" :{ "$date" : "2021-07-07T11:25:07.477+10:00" }, "s" : "I" , "c" : "COMMAND" , "id" :20884, "ctx" : "conn12" , "msg" : "Killed operation" , "attr" :{ "opId" :1133128}} >> This says killed, although the operation kept going
{ "t" :{ "$date" : "2021-07-07T11:25:16.670+10:00" }, "s" : "W" , "c" : "COMMAND" , "id" :20525, "ctx" : "conn17" , "msg" : "Failed to gather storage statistics for slow operation" , "attr" :{ "opId" :1133128, "error" : "lock acquire timeout" }}
{ "t" :{ "$date" : "2021-07-07T11:25:16.671+10:00" }, "s" : "I" , "c" : "COMMAND" , "id" :51803, "ctx" : "conn17" , "msg" : "Slow query" , "attr" :{ "type" : "command" , "ns" : "database.collection" , "appName" : "MongoDB Shell" , "command" :{ "dataSize" : "database.collection" , "lsid" :{ "id" :{ "$uuid" : "12825971-559f-4a68-9022-86bc601ca7ac" }}, "$db" : "test" }, "numYields" :0, "reslen" :91, "locks" :{ "ReplicationStateTransition" :{ "acquireCount" :{ "w" :1}}, "Global" :{ "acquireCount" :{ "r" :1}}, "Database" :{ "acquireCount" :{ "r" :1}}, "Collection" :{ "acquireCount" :{ "r" :1}}, "Mutex" :{ "acquireCount" :{ "r" :1}}}, "protocol" : "op_msg" , "durationMillis" :14353}}. >> THIS indicate that the operation started around "2021-07-07T11:25:02 more or less"
{ "t" :{ "$date" : "2021-07-07T11:25:25.269+10:00" }, "s" : "I" , "c" : "NETWORK" , "id" :22944, "ctx" : "conn17" , "msg" : "Connection ended" , "attr" :{ "remote" : "127.0.0.1:58530" , "connectionId" :17, "connectionCount" :2}}
This is a small test so the difference in time between the operation was attempted to be killed and the operation completed is small. But for customers with a larger data set the only solution is to restart the node.