Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-31953

Range deletions by documentKey (shardKey + _id) can cause secondaries to crash

    • Type: Icon: Bug Bug
    • Resolution: Fixed
    • Priority: Icon: Major - P3 Major - P3
    • 3.6.0-rc5, 3.7.1
    • Affects Version/s: 3.6.0-rc0
    • Component/s: Sharding
    • Fully Compatible
    • ALL
    • v3.6
    • Hide
      python buildscripts/resmoke.py --suites=no_server repro_server31953.js
      
      repro_server31953.js
      (function() {
          "use strict";
      
          const st = new ShardingTest({mongos: 1, config: 1, shard: 2, rs: {nodes: 2}});
          const mongosDB = st.s.getDB("test");
          const mongosColl = mongosDB.mycoll;
      
          // Enable sharding on the test DB and ensure its primary is shard0000.
          assert.commandWorked(mongosDB.adminCommand({enableSharding: mongosDB.getName()}));
          st.ensurePrimaryShard(mongosDB.getName(), st.rs0.getURL());
      
          // Shard the test collection on the "x" field.
          assert.commandWorked(mongosDB.adminCommand({
              shardCollection: mongosColl.getFullName(),
              key: {x: 1},
          }));
      
          // Insert a document with a literal undefined value.
          assert.writeOK(mongosColl.insert({x: undefined}));
      
          // Move the chunk containing the document to shard0001.
          assert.commandWorked(mongosDB.adminCommand({
              moveChunk: mongosColl.getFullName(),
              find: {x: null},
              to: st.rs1.getURL(),
              _waitForDelete: true,
          }));
      
          // Wait for the secondary of shard0000 to process the deletion from the moveChunk operation.
          st.rs0.awaitReplication();
      
          st.stop();
      })();
      
      Show
      python buildscripts/resmoke.py --suites=no_server repro_server31953.js repro_server31953.js ( function () { "use strict" ; const st = new ShardingTest({mongos: 1, config: 1, shard: 2, rs: {nodes: 2}}); const mongosDB = st.s.getDB( "test" ); const mongosColl = mongosDB.mycoll; // Enable sharding on the test DB and ensure its primary is shard0000. assert.commandWorked(mongosDB.adminCommand({enableSharding: mongosDB.getName()})); st.ensurePrimaryShard(mongosDB.getName(), st.rs0.getURL()); // Shard the test collection on the "x" field. assert.commandWorked(mongosDB.adminCommand({ shardCollection: mongosColl.getFullName(), key: {x: 1}, })); // Insert a document with a literal undefined value. assert.writeOK(mongosColl.insert({x: undefined})); // Move the chunk containing the document to shard0001. assert.commandWorked(mongosDB.adminCommand({ moveChunk: mongosColl.getFullName(), find: {x: null }, to: st.rs1.getURL(), _waitForDelete: true , })); // Wait for the secondary of shard0000 to process the deletion from the moveChunk operation. st.rs0.awaitReplication(); st.stop(); })();
    • Repl 2017-12-04
    • 0

      The changes from SERVER-29136 made it so that the oplog entry for opType='d' in sharded cluster include the document's shard key. This means that the query planner may no longer use the IDHACK stage and instead must construct an actual ComparisonMatchExpression. Any errors that occur while initializing this ComparisonMatchExpression (for example) will cause secondaries to crash due to an error while attempting to find the document to remove.

      [js_test:server31953] 2017-11-13T21:08:28.784-0500 d20011| 2017-11-13T21:08:28.783-0500 F REPL     [repl writer worker 1] writer worker caught exception: BadValue: cannot compare to undefined on: { ts: Timestamp 1510625308000|12, t: 1, h: 409688109345307025, v: 2, op: "d", ns: "test.mycoll", ui: UUID("cd5b502a-f7e4-4078-8e65-23c718458e0b"), fromMigrate: true, wall: new Date(1510625308778), o: { x: undefined, _id: ObjectId('5a0a501bd5434448a509f0a5') } }
      [js_test:server31953] 2017-11-13T21:08:28.784-0500 d20011| 2017-11-13T21:08:28.784-0500 F -        [repl writer worker 1] Fatal assertion 16359 BadValue: cannot compare to undefined at src/mongo/db/repl/sync_tail.cpp 1181
      [js_test:server31953] 2017-11-13T21:08:28.785-0500 d20011| 2017-11-13T21:08:28.784-0500 F -        [repl writer worker 1]
      [js_test:server31953] 2017-11-13T21:08:28.785-0500 d20011|
      [js_test:server31953] 2017-11-13T21:08:28.785-0500 d20011| ***aborting after fassert() failure
      

            Assignee:
            spencer@mongodb.com Spencer Brody (Inactive)
            Reporter:
            max.hirschhorn@mongodb.com Max Hirschhorn
            Votes:
            0 Vote for this issue
            Watchers:
            8 Start watching this issue

              Created:
              Updated:
              Resolved: