Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-54127

Retryable update by _id may execute more than once if intervening write changes document's owning shard

    • Type: Icon: Bug Bug
    • Resolution: Unresolved
    • Priority: Icon: Major - P3 Major - P3
    • None
    • Affects Version/s: None
    • Component/s: Sharding
    • None
    • Cluster Scalability
    • ALL
    • Hide
      python buildscripts/resmoke.py run --suite=sharding repro_retryable_update_by_id_multiple_execution.js
      
      repro_retryable_update_by_id_multiple_execution.js
      (function() {
      "use strict";
      
      load("jstests/sharding/libs/create_sharded_collection_util.js");
      
      const st = new ShardingTest({mongos: 1, config: 1, shards: 2, rs: {nodes: 1}});
      
      const db = st.s.getDB("test");
      const collection = db.getCollection("mycoll");
      CreateShardedCollectionUtil.shardCollectionWithChunks(collection, {x: 1}, [
          {min: {x: MinKey}, max: {x: 0}, shard: st.shard0.shardName},
          {min: {x: 0}, max: {x: 10}, shard: st.shard0.shardName},
          {min: {x: 10}, max: {x: 20}, shard: st.shard1.shardName},
          {min: {x: 20}, max: {x: MaxKey}, shard: st.shard1.shardName},
      ]);
      
      assert.commandWorked(collection.insert({_id: 0, x: 5, counter: 0}));
      
      const session1 = st.s.startSession({causalConsistency: false, retryWrites: false});
      const sessionCollection1 = session1.getDatabase(db.getName()).getCollection(collection.getName());
      
      const session2 = st.s.startSession({causalConsistency: false, retryWrites: false});
      const sessionCollection2 = session2.getDatabase(db.getName()).getCollection(collection.getName());
      
      // Updates by _id are broadcasted to all shards which own chunks for the collection. Session
      // information from the retryable write which touched the document isn't migrated when the
      // document's shard key value is updated. This allows the new owning shard to execute the statement
      // a second time.
      const updateCmd = {
          updates: [
              {q: {_id: 0}, u: {$inc: {counter: 1}}},
              {q: {_id: 10000}, u: {$inc: {counter: 1}}},
          ],
          txnNumber: NumberLong(0),
      };
      
      const firstRes = sessionCollection1.runCommand("update", updateCmd);
      assert.eq({n: firstRes.n, nModified: firstRes.nModified}, {n: 1, nModified: 1});
      
      session2.startTransaction();
      assert.commandWorked(sessionCollection2.update({x: 5}, {$set: {x: 25}}));
      assert.commandWorked(session2.commitTransaction_forTesting());
      
      const secondRes = sessionCollection1.runCommand("update", updateCmd);
      print(`secondRes: ${tojsononeline(secondRes)}`);
      assert.eq(collection.findOne({_id: 0}), {_id: 0, x: 25, counter: 1});
      
      st.stop();
      })();
      
      Show
      python buildscripts/resmoke.py run --suite=sharding repro_retryable_update_by_id_multiple_execution.js repro_retryable_update_by_id_multiple_execution.js ( function () { "use strict" ; load( "jstests/sharding/libs/create_sharded_collection_util.js" ); const st = new ShardingTest({mongos: 1, config: 1, shards: 2, rs: {nodes: 1}}); const db = st.s.getDB( "test" ); const collection = db.getCollection( "mycoll" ); CreateShardedCollectionUtil.shardCollectionWithChunks(collection, {x: 1}, [ {min: {x: MinKey}, max: {x: 0}, shard: st.shard0.shardName}, {min: {x: 0}, max: {x: 10}, shard: st.shard0.shardName}, {min: {x: 10}, max: {x: 20}, shard: st.shard1.shardName}, {min: {x: 20}, max: {x: MaxKey}, shard: st.shard1.shardName}, ]); assert.commandWorked(collection.insert({_id: 0, x: 5, counter: 0})); const session1 = st.s.startSession({causalConsistency: false , retryWrites: false }); const sessionCollection1 = session1.getDatabase(db.getName()).getCollection(collection.getName()); const session2 = st.s.startSession({causalConsistency: false , retryWrites: false }); const sessionCollection2 = session2.getDatabase(db.getName()).getCollection(collection.getName()); // Updates by _id are broadcasted to all shards which own chunks for the collection. Session // information from the retryable write which touched the document isn't migrated when the // document's shard key value is updated. This allows the new owning shard to execute the statement // a second time. const updateCmd = { updates: [ {q: {_id: 0}, u: {$inc: {counter: 1}}}, {q: {_id: 10000}, u: {$inc: {counter: 1}}}, ], txnNumber: NumberLong(0), }; const firstRes = sessionCollection1.runCommand( "update" , updateCmd); assert.eq({n: firstRes.n, nModified: firstRes.nModified}, {n: 1, nModified: 1}); session2.startTransaction(); assert.commandWorked(sessionCollection2.update({x: 5}, {$set: {x: 25}})); assert.commandWorked(session2.commitTransaction_forTesting()); const secondRes = sessionCollection1.runCommand( "update" , updateCmd); print( `secondRes: ${tojsononeline(secondRes)}` ); assert.eq(collection.findOne({_id: 0}), {_id: 0, x: 25, counter: 1}); st.stop(); })();

      Updates by _id are broadcasted to all shards which own chunks for the collection when the collection is not sharded by _id. Session information from any retryable writes which touched the document isn't migrated when the document's shard key value is updated. This allows the new owning shard to execute those statements a second time.

      [js_test:repro_retryable_update_by_id_multiple_execution] 2021-01-28T23:10:11.492+0000 secondRes: {  "nModified" : 2,  "n" : 2,  "ok" : 1,  "$clusterTime" : {  "clusterTime" : Timestamp(1611875411, 85),  "signature" : {  "hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),  "keyId" : NumberLong(0) } },  "operationTime" : Timestamp(1611875411, 85) }
      [js_test:repro_retryable_update_by_id_multiple_execution] 2021-01-28T23:10:11.492+0000 s20023| {"t":{"$date":"2021-01-28T23:10:11.440+00:00"},"s":"I",  "c":"COMMAND",  "id":51803,   "ctx":"conn6","msg":"Slow query","attr":{"type":"command","ns":"test.mycoll","appName":"MongoDB Shell","command":{"find":"mycoll","filter":{"_id":0.0},"limit":1.0,"singleBatch":true,"lsid":{"id":{"$uuid":"b5e17cb0-ce96-436b-9523-2875959096a1"}},"$clusterTime":{"clusterTime":{"$timestamp":{"t":1611875411,"i":85}},"signature":{"hash":{"$binary":{"base64":"AAAAAAAAAAAAAAAAAAAAAAAAAAA=","subType":"0"}},"keyId":0}},"$db":"test"},"nShards":2,"cursorExhausted":true,"numYields":0,"nreturned":1,"reslen":274,"remote":"127.0.0.1:45432","protocol":"op_msg","durationMillis":0}}
      [js_test:repro_retryable_update_by_id_multiple_execution] 2021-01-28T23:10:11.492+0000 uncaught exception: Error: [{ "_id" : 0, "x" : 25, "counter" : 2 }] != [{ "_id" : 0, "x" : 25, "counter" : 1 }] are not equal :
      

            Assignee:
            backlog-server-cluster-scalability [DO NOT USE] Backlog - Cluster Scalability
            Reporter:
            max.hirschhorn@mongodb.com Max Hirschhorn
            Votes:
            0 Vote for this issue
            Watchers:
            11 Start watching this issue

              Created:
              Updated: