Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-36897

OplogReader.hasNext can return false -> true, confusing `checkOplogs`

    • Fully Compatible
    • ALL
    • v4.0
    • Hide

      replsets/too_stale_secondary.js fails with the following patch:

      diff --git a/src/mongo/db/exec/collection_scan.cpp b/src/mongo/db/exec/collection_scan.cpp
      index 162caf4cf3..7dab48aaa6 100644
      --- a/src/mongo/db/exec/collection_scan.cpp
      +++ b/src/mongo/db/exec/collection_scan.cpp
      @@ -48,6 +48,8 @@
       
       namespace mongo {
       
      +MONGO_FAIL_POINT_DEFINE(cappedPositionLost);
      +
       using std::unique_ptr;
       using std::vector;
       using stdx::make_unique;
      @@ -78,6 +80,17 @@ CollectionScan::CollectionScan(OperationContext* opCtx,
       }
       
       PlanStage::StageState CollectionScan::doWork(WorkingSetID* out) {
      +    if (MONGO_FAIL_POINT(cappedPositionLost)) {
      +        _isDead = true;
      +        Status status(ErrorCodes::CappedPositionLost,
      +                      str::stream() << "CollectionScan died due to failure to restore "
      +                                    << "tailable cursor position. "
      +                                    << "Last seen record id: "
      +                                    << _lastSeenId);
      +        *out = WorkingSetCommon::allocateStatusMember(_workingSet, status);
      +        return PlanStage::DEAD;
      +    }
      +
           if (_isDead) {
               Status status(
                   ErrorCodes::CappedPositionLost,
      diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js
      index 97bacd8cdd..0cafca2ad0 100644
      --- a/src/mongo/shell/replsettest.js
      +++ b/src/mongo/shell/replsettest.js
      @@ -2009,7 +2009,7 @@ var ReplSetTest = function(opts) {
            */
           function checkOplogs(rst, slaves, msgPrefix = 'checkOplogs') {
               slaves = slaves || rst._slaves;
      -        var OplogReader = function(mongo) {
      +        var OplogReader = function(mongo, failOnFirst) {
                   this.next = function() {
                       if (!this.cursor)
                           throw new Error("OplogReader is not open!");
      @@ -2024,6 +2024,13 @@ var ReplSetTest = function(opts) {
                       if (!this.cursor)
                           throw new Error("OplogReader is not open!");
                       try {
      +                    if (this.failOnFirst && this.failed === false) {
      +                        this.failed = true;
      +                        assert.commandWorked(this.mongo.adminCommand(
      +                            {configureFailPoint: "cappedPositionLost", mode: {"times": 1}}));
      +                        printjson("Called cappedPositionLost.");
      +                    }
      +
                           return this.cursor.hasNext();
                       } catch (err) {
                           print("Error: hasNext threw '" + err.message + "' on " + this.mongo.host);
      @@ -2059,6 +2066,8 @@ var ReplSetTest = function(opts) {
                   this.lastDoc = null;
                   this.cursor = null;
                   this.mongo = mongo;
      +            this.failOnFirst = failOnFirst;
      +            this.failed = false;
               };
       
               if (slaves.length >= 1) {
      @@ -2080,7 +2089,8 @@ var ReplSetTest = function(opts) {
                           continue;
                       }
       
      -                readers[i] = new OplogReader(node);
      +                print("Creating OplogReader. I==2? " + (i == 2));
      +                readers[i] = new OplogReader(node, i == 2);
                       var currTS = readers[i].getFirstDoc().ts;
                       // Find the reader which has the smallestTS. This reader should have the most
                       // number of documents in the oplog.
      @@ -2108,7 +2118,7 @@ var ReplSetTest = function(opts) {
                           var otherOplogEntry = readers[i].next();
                           if (!bsonBinaryEqual(oplogEntry, otherOplogEntry)) {
                               var query = prevOplogEntry ? {ts: {$lte: prevOplogEntry.ts}} : {};
      -                        rst.nodes.forEach(node => this.dumpOplog(node, query, 100));
      +                        // rst.nodes.forEach(node => this.dumpOplog(node, query, 100));
                               var log = msgPrefix +
                                   ", non-matching oplog entries for the following nodes: \n" +
                                   firstReader.mongo.host + ": " + tojsononeline(oplogEntry) + "\n" +
      
      Show
      replsets/too_stale_secondary.js fails with the following patch: diff --git a/src/mongo/db/exec/collection_scan.cpp b/src/mongo/db/exec/collection_scan.cpp index 162caf4cf3..7dab48aaa6 100644 --- a/src/mongo/db/exec/collection_scan.cpp +++ b/src/mongo/db/exec/collection_scan.cpp @@ -48,6 +48,8 @@ namespace mongo { +MONGO_FAIL_POINT_DEFINE(cappedPositionLost); + using std::unique_ptr; using std::vector; using stdx::make_unique; @@ -78,6 +80,17 @@ CollectionScan::CollectionScan(OperationContext* opCtx, } PlanStage::StageState CollectionScan::doWork(WorkingSetID* out) { + if (MONGO_FAIL_POINT(cappedPositionLost)) { + _isDead = true ; + Status status(ErrorCodes::CappedPositionLost, + str::stream() << "CollectionScan died due to failure to restore " + << "tailable cursor position. " + << "Last seen record id: " + << _lastSeenId); + *out = WorkingSetCommon::allocateStatusMember(_workingSet, status); + return PlanStage::DEAD; + } + if (_isDead) { Status status( ErrorCodes::CappedPositionLost, diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js index 97bacd8cdd..0cafca2ad0 100644 --- a/src/mongo/shell/replsettest.js +++ b/src/mongo/shell/replsettest.js @@ -2009,7 +2009,7 @@ var ReplSetTest = function(opts) { */ function checkOplogs(rst, slaves, msgPrefix = 'checkOplogs' ) { slaves = slaves || rst._slaves; - var OplogReader = function(mongo) { + var OplogReader = function(mongo, failOnFirst) { this .next = function() { if (! this .cursor) throw new Error( "OplogReader is not open!" ); @@ -2024,6 +2024,13 @@ var ReplSetTest = function(opts) { if (! this .cursor) throw new Error( "OplogReader is not open!" ); try { + if ( this .failOnFirst && this .failed === false ) { + this .failed = true ; + assert .commandWorked( this .mongo.adminCommand( + {configureFailPoint: "cappedPositionLost" , mode: { "times" : 1}})); + printjson( "Called cappedPositionLost." ); + } + return this .cursor.hasNext(); } catch (err) { print( "Error: hasNext threw '" + err.message + "' on " + this .mongo.host); @@ -2059,6 +2066,8 @@ var ReplSetTest = function(opts) { this .lastDoc = null ; this .cursor = null ; this .mongo = mongo; + this .failOnFirst = failOnFirst; + this .failed = false ; }; if (slaves.length >= 1) { @@ -2080,7 +2089,8 @@ var ReplSetTest = function(opts) { continue ; } - readers[i] = new OplogReader(node); + print( "Creating OplogReader. I==2? " + (i == 2)); + readers[i] = new OplogReader(node, i == 2); var currTS = readers[i].getFirstDoc().ts; // Find the reader which has the smallestTS. This reader should have the most // number of documents in the oplog. @@ -2108,7 +2118,7 @@ var ReplSetTest = function(opts) { var otherOplogEntry = readers[i].next(); if (!bsonBinaryEqual(oplogEntry, otherOplogEntry)) { var query = prevOplogEntry ? {ts: {$lte: prevOplogEntry.ts}} : {}; - rst.nodes.forEach(node => this .dumpOplog(node, query, 100)); + // rst.nodes.forEach(node => this .dumpOplog(node, query, 100)); var log = msgPrefix + ", non-matching oplog entries for the following nodes: \n" + firstReader.mongo.host + ": " + tojsononeline(oplogEntry) + "\n" +
    • TIG 2018-09-24
    • 18
    • 2

      `ReplSetTest.checkOplog` will establish a reverse table scanning cursor on the oplog from each node, then "BFS" to compare the oplog for consistency. It will first get the latest entry in each oplog and perform a comparison, then advance all of the cursors and compare again. It allows the oplogs to have a different number of entries, so long as they match on the latest entries.

      To do so, checkOplog relies on OplogReader.hasNext to always return false after it has done so for the first time. However, if the call that executes the query receives a CappedPositionLost (thus not instantiating the shell's internal _cursor, a follow-up OplogReader.hasNext can return true, having re-issued the find and receiving a batch from the oplog starting at the latest entry.

            Assignee:
            david.bradford@mongodb.com David Bradford (Inactive)
            Reporter:
            daniel.gottlieb@mongodb.com Daniel Gottlieb (Inactive)
            Votes:
            0 Vote for this issue
            Watchers:
            6 Start watching this issue

              Created:
              Updated:
              Resolved: