Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-37498

remove slowBackgroundIndexBuild failpoint, which skips documents when building index, leading to missing index entries

    • Type: Icon: Bug Bug
    • Resolution: Fixed
    • Priority: Icon: Major - P3 Major - P3
    • 4.1.7
    • Affects Version/s: None
    • Component/s: Storage
    • None
    • Fully Compatible
    • ALL
    • Hide
      python buildscripts/resmoke.py --suites=no_server repro_server37498.js
      
      repro_server37498.js
      (function() {
          "use strict";
      
          const conn = MongoRunner.runMongod();
          assert.neq(null, conn, "mongod was unable to start up");
      
          const db = conn.getDB("test");
      
          assert.commandWorked(
              db.adminCommand({configureFailPoint: "slowBackgroundIndexBuild", mode: "alwaysOn"}));
      
          assert.commandWorked(db.mycoll.insert({a: 0}));
          assert.commandWorked(db.mycoll.createIndex({a: 1}, {background: true}));
      
          assert.commandWorked(
              db.adminCommand({configureFailPoint: "slowBackgroundIndexBuild", mode: "off"}));
      
          MongoRunner.stopMongod(conn);
      })();
      
      [js_test:repro_server37498] 2018-10-05T20:32:01.133-0500 Collection validation failed on host localhost:20020 with response: {
      [js_test:repro_server37498] 2018-10-05T20:32:01.133-0500 	"ns" : "test.mycoll",
      [js_test:repro_server37498] 2018-10-05T20:32:01.133-0500 	"nInvalidDocuments" : NumberLong(0),
      [js_test:repro_server37498] 2018-10-05T20:32:01.133-0500 	"nrecords" : 1,
      [js_test:repro_server37498] 2018-10-05T20:32:01.133-0500 	"nIndexes" : 2,
      [js_test:repro_server37498] 2018-10-05T20:32:01.133-0500 	"keysPerIndex" : {
      [js_test:repro_server37498] 2018-10-05T20:32:01.134-0500 		"test.mycoll.$_id_" : 1,
      [js_test:repro_server37498] 2018-10-05T20:32:01.134-0500 		"test.mycoll.$a_1" : 0
      [js_test:repro_server37498] 2018-10-05T20:32:01.134-0500 	},
      [js_test:repro_server37498] 2018-10-05T20:32:01.134-0500 	"indexDetails" : {
      [js_test:repro_server37498] 2018-10-05T20:32:01.134-0500 		"test.mycoll.$_id_" : {
      [js_test:repro_server37498] 2018-10-05T20:32:01.134-0500 			"valid" : false
      [js_test:repro_server37498] 2018-10-05T20:32:01.134-0500 		},
      [js_test:repro_server37498] 2018-10-05T20:32:01.134-0500 		"test.mycoll.$a_1" : {
      [js_test:repro_server37498] 2018-10-05T20:32:01.134-0500 			"valid" : false
      [js_test:repro_server37498] 2018-10-05T20:32:01.134-0500 		}
      [js_test:repro_server37498] 2018-10-05T20:32:01.134-0500 	},
      [js_test:repro_server37498] 2018-10-05T20:32:01.135-0500 	"valid" : false,
      [js_test:repro_server37498] 2018-10-05T20:32:01.135-0500 	"warnings" : [ ],
      [js_test:repro_server37498] 2018-10-05T20:32:01.135-0500 	"errors" : [
      [js_test:repro_server37498] 2018-10-05T20:32:01.135-0500 		"one or more indexes contain invalid index entries."
      [js_test:repro_server37498] 2018-10-05T20:32:01.135-0500 	],
      [js_test:repro_server37498] 2018-10-05T20:32:01.137-0500 	"advice" : "A corrupt namespace has been detected. See http://dochub.mongodb.org/core/data-recovery for recovery steps.",
      [js_test:repro_server37498] 2018-10-05T20:32:01.137-0500 	"ok" : 1
      [js_test:repro_server37498] 2018-10-05T20:32:01.137-0500 }
      
      Show
      python buildscripts/resmoke.py --suites=no_server repro_server37498.js repro_server37498.js ( function () { "use strict" ; const conn = MongoRunner.runMongod(); assert.neq( null , conn, "mongod was unable to start up" ); const db = conn.getDB( "test" ); assert.commandWorked( db.adminCommand({configureFailPoint: "slowBackgroundIndexBuild" , mode: "alwaysOn" })); assert.commandWorked(db.mycoll.insert({a: 0})); assert.commandWorked(db.mycoll.createIndex({a: 1}, {background: true })); assert.commandWorked( db.adminCommand({configureFailPoint: "slowBackgroundIndexBuild" , mode: "off" })); MongoRunner.stopMongod(conn); })(); [js_test:repro_server37498] 2018-10-05T20:32:01.133-0500 Collection validation failed on host localhost:20020 with response: { [js_test:repro_server37498] 2018-10-05T20:32:01.133-0500 "ns" : "test.mycoll", [js_test:repro_server37498] 2018-10-05T20:32:01.133-0500 "nInvalidDocuments" : NumberLong(0), [js_test:repro_server37498] 2018-10-05T20:32:01.133-0500 "nrecords" : 1, [js_test:repro_server37498] 2018-10-05T20:32:01.133-0500 "nIndexes" : 2, [js_test:repro_server37498] 2018-10-05T20:32:01.133-0500 "keysPerIndex" : { [js_test:repro_server37498] 2018-10-05T20:32:01.134-0500 "test.mycoll.$_id_" : 1, [js_test:repro_server37498] 2018-10-05T20:32:01.134-0500 "test.mycoll.$a_1" : 0 [js_test:repro_server37498] 2018-10-05T20:32:01.134-0500 }, [js_test:repro_server37498] 2018-10-05T20:32:01.134-0500 "indexDetails" : { [js_test:repro_server37498] 2018-10-05T20:32:01.134-0500 "test.mycoll.$_id_" : { [js_test:repro_server37498] 2018-10-05T20:32:01.134-0500 "valid" : false [js_test:repro_server37498] 2018-10-05T20:32:01.134-0500 }, [js_test:repro_server37498] 2018-10-05T20:32:01.134-0500 "test.mycoll.$a_1" : { [js_test:repro_server37498] 2018-10-05T20:32:01.134-0500 "valid" : false [js_test:repro_server37498] 2018-10-05T20:32:01.134-0500 } [js_test:repro_server37498] 2018-10-05T20:32:01.134-0500 }, [js_test:repro_server37498] 2018-10-05T20:32:01.135-0500 "valid" : false, [js_test:repro_server37498] 2018-10-05T20:32:01.135-0500 "warnings" : [ ], [js_test:repro_server37498] 2018-10-05T20:32:01.135-0500 "errors" : [ [js_test:repro_server37498] 2018-10-05T20:32:01.135-0500 "one or more indexes contain invalid index entries." [js_test:repro_server37498] 2018-10-05T20:32:01.135-0500 ], [js_test:repro_server37498] 2018-10-05T20:32:01.137-0500 "advice" : "A corrupt namespace has been detected. See http://dochub.mongodb.org/core/data-recovery for recovery steps.", [js_test:repro_server37498] 2018-10-05T20:32:01.137-0500 "ok" : 1 [js_test:repro_server37498] 2018-10-05T20:32:01.137-0500 }
    • Storage NYC 2018-12-31, Storage NYC 2019-01-14

      The issue is that the PlanExecutor::getNextSnapshotted() is continually called while the slowBackgroundIndexBuild failpoint is set without ever calling MultiIndexBlockImpl::insert(). It may be sufficient to set retries=1 before the continue, but I'd be curious if the slowBackgroundIndexBuild failpoint is still necessary after the introduction of the hangBeforeIndexBuildOf and hangAfterIndexBuildOf failpoints as part of SERVER-26387.

      src/mongo/db/catalog/index_create_impl.cpp
      auto exec =
          InternalPlanner::collectionScan(_opCtx, _collection->ns().ns(), _collection, yieldPolicy);
      
      Snapshotted<BSONObj> objToIndex;
      RecordId loc;
      PlanExecutor::ExecState state;
      int retries = 0;  // non-zero when retrying our last document.
      while (retries ||
              (PlanExecutor::ADVANCED == (state = exec->getNextSnapshotted(&objToIndex, &loc))) ||
              MONGO_FAIL_POINT(hangAfterStartingIndexBuild)) {
          try {
              if (_allowInterruption)
                  _opCtx->checkForInterrupt();
      
              if (!(retries || PlanExecutor::ADVANCED == state) ||
                  MONGO_FAIL_POINT(slowBackgroundIndexBuild)) {
                  log() << "Hanging index build due to failpoint";
                  invariant(_allowInterruption);
                  sleepmillis(1000);
                  continue;
              }
      

            Assignee:
            gregory.wlodarek@mongodb.com Gregory Wlodarek
            Reporter:
            max.hirschhorn@mongodb.com Max Hirschhorn
            Votes:
            0 Vote for this issue
            Watchers:
            5 Start watching this issue

              Created:
              Updated:
              Resolved: