Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-17649

Secondary oplog replication inserts stall/slow under heavy insert load using WT

    • Type: Icon: Bug Bug
    • Resolution: Cannot Reproduce
    • Priority: Icon: Major - P3 Major - P3
    • None
    • Affects Version/s: 3.0.0
    • Component/s: Replication, WiredTiger
    • None
    • Fully Compatible
    • ALL

      We noticed that one of our 4 shards had a secondary with high replication lag.

      Replica shard log has periodic very slow serverStatus with large values for "after oplog":

      2015-03-18T18:30:44.539+0000 I COMMAND [conn82] serverStatus was very slow:

      { after basic: 0, after asserts: 0, after connections: 0, after cursors: 0, after extra_info: 0, after globalLock: 0, after locks: 0, after network: 0, after opcounters: 0, after opcountersRepl: 0, after oplog: 72189, after repl: 72189, after storageEngine: 72189, after wiredTiger: 72189, at end: 72189 }

      Restarted the replica shard hoping that this was a fluke, but the shard quickly went back to a low insertion rate.

      Running db.getSiblingDB("local").oplog.rs.stats() produces:

      {
      	"ns" : "local.oplog.rs",
      	"count" : 174896077,
      	"size" : 125833992359,
      	"avgObjSize" : 719,
      	"storageSize" : 62140203008,
      	"capped" : true,
      	"max" : -1,
      	"maxSize" : NumberLong("125829120000"),
      	"wiredTiger" : {
      		"metadata" : {
      			"formatVersion" : 1,
      			"oplogKeyExtractionVersion" : 1
      		},
      		"creationString" : "allocation_size=4KB,app_metadata=(formatVersion=1,oplogKeyExtractionVersion=1),block_allocation=best,block_compressor=snappy,cache_resident=0,checkpoint=(WiredTigerCheckpoint.23998=(addr=\"01e30ad2de81e436162abee3ddf13ebde46538e310e3ddf17bb3e43c3b848d808080e50e77d81fc0e50bad737fc0\",order=23998,time=1426703669,size=50154676224,write_gen=20018662)),checkpoint_lsn=(7514,101134976),checksum=uncompressed,collator=,columns=,dictionary=0,format=btree,huffman_key=,huffman_value=,id=9,internal_item_max=0,internal_key_max=0,internal_key_truncate=,internal_page_max=4KB,key_format=q,key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,leaf_value_max=1MB,memory_page_max=10m,os_cache_dirty_max=0,os_cache_max=0,prefix_compression=0,prefix_compression_min=4,split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,value_format=u,version=(major=1,minor=1)",
      		"type" : "file",
      		"uri" : "statistics:table:local/collection-2--4790446482890547801",
      		"LSM" : {
      			"bloom filters in the LSM tree" : 0,
      			"bloom filter false positives" : 0,
      			"bloom filter hits" : 0,
      			"bloom filter misses" : 0,
      			"bloom filter pages evicted from cache" : 0,
      			"bloom filter pages read into cache" : 0,
      			"total size of bloom filters" : 0,
      			"sleep for LSM checkpoint throttle" : 0,
      			"chunks in the LSM tree" : 0,
      			"highest merge generation in the LSM tree" : 0,
      			"queries that could have benefited from a Bloom filter that did not exist" : 0,
      			"sleep for LSM merge throttle" : 0
      		},
      		"block-manager" : {
      			"file allocation unit size" : 4096,
      			"blocks allocated" : 176633,
      			"checkpoint size" : 50154676224,
      			"allocations requiring file extension" : 0,
      			"blocks freed" : 206417,
      			"file magic number" : 120897,
      			"file major version number" : 1,
      			"minor version number" : 0,
      			"file bytes available for reuse" : 12083351552,
      			"file size in bytes" : 62140203008
      		},
      		"btree" : {
      			"column-store variable-size deleted values" : 0,
      			"column-store fixed-size leaf pages" : 0,
      			"column-store internal pages" : 0,
      			"column-store variable-size leaf pages" : 0,
      			"pages rewritten by compaction" : 0,
      			"number of key/value pairs" : 0,
      			"fixed-record size" : 0,
      			"maximum tree depth" : 32,
      			"maximum internal page key size" : 368,
      			"maximum internal page size" : 4096,
      			"maximum leaf page key size" : 3276,
      			"maximum leaf page size" : 32768,
      			"maximum leaf page value size" : 1048576,
      			"overflow pages" : 0,
      			"row-store internal pages" : 0,
      			"row-store leaf pages" : 0
      		},
      		"cache" : {
      			"bytes read into cache" : 5684148385,
      			"bytes written from cache" : 5626150236,
      			"checkpoint blocked page eviction" : 11413,
      			"unmodified pages evicted" : 175446,
      			"page split during eviction deepened the tree" : 1,
      			"modified pages evicted" : 3911,
      			"data source pages selected for eviction unable to be evicted" : 2675,
      			"hazard pointer blocked page eviction" : 589,
      			"internal pages evicted" : 1882,
      			"pages split during eviction" : 585,
      			"in-memory page splits" : 553,
      			"overflow values cached in memory" : 0,
      			"pages read into cache" : 178344,
      			"overflow pages read into cache" : 0,
      			"pages written from cache" : 176607
      		},
      		"compression" : {
      			"raw compression call failed, no additional data available" : 0,
      			"raw compression call failed, additional data available" : 0,
      			"raw compression call succeeded" : 0,
      			"compressed pages read" : 176000,
      			"compressed pages written" : 173316,
      			"page written failed to compress" : 3,
      			"page written was too small to compress" : 3288
      		},
      		"cursor" : {
      			"create calls" : 128,
      			"insert calls" : 10935859,
      			"bulk-loaded cursor-insert calls" : 0,
      			"cursor-insert key and value bytes inserted" : 5679356593,
      			"next calls" : 7327404,
      			"prev calls" : 35,
      			"remove calls" : 561,
      			"cursor-remove key bytes removed" : 5049,
      			"reset calls" : 10937358,
      			"search calls" : 0,
      			"search near calls" : 1122,
      			"update calls" : 0,
      			"cursor-update value bytes updated" : 0
      		},
      		"reconciliation" : {
      			"dictionary matches" : 0,
      			"internal page multi-block writes" : 25,
      			"leaf page multi-block writes" : 605,
      			"maximum blocks required for a page" : 236,
      			"internal-page overflow keys" : 0,
      			"leaf-page overflow keys" : 0,
      			"overflow values written" : 0,
      			"pages deleted" : 2780,
      			"page checksum matches" : 5294,
      			"page reconciliation calls" : 6475,
      			"page reconciliation calls for eviction" : 1885,
      			"leaf page key bytes discarded using prefix compression" : 0,
      			"internal page key bytes discarded using suffix compression" : 175625
      		},
      		"session" : {
      			"object compaction" : 0,
      			"open cursor count" : 128
      		},
      		"transaction" : {
      			"update conflicts" : 0
      		}
      	},
      	"nindexes" : 0,
      	"totalIndexSize" : 0,
      	"indexSizes" : {
      		
      	},
      	"ok" : 1
      }
      

        1. additional_oplog_stats.json
          5 kB
          Oleg Rekutin
        2. gdbmon.py
          3 kB
          Alexander Gorrod
        3. insert_chart.png
          48 kB
          Oleg Rekutin
        4. node2withproblem-serverStatus.json
          12 kB
          Oleg Rekutin
        5. nodewithoutproblem-oplogstats.json
          5 kB
          Oleg Rekutin
        6. nodewithoutproblem-serverStatus.json
          12 kB
          Oleg Rekutin
        7. nodewithproblem-oplogstats.json
          5 kB
          Oleg Rekutin
        8. nodewithproblem-serverStatus.json
          12 kB
          Oleg Rekutin
        9. repl_insert_chart.png
          71 kB
          Oleg Rekutin
        10. Screen Shot 2015-03-18 at 2.38.22 PM.png
          29 kB
          Oleg Rekutin
        11. Screen Shot 2015-03-18 at 2.42.00 PM.png
          33 kB
          Oleg Rekutin
        12. shard-bad-repl-drain.log
          32 kB
          Oleg Rekutin
        13. ss-bad.log.gz
          447 kB
          Oleg Rekutin
        14. ss-good.log.gz
          573 kB
          Oleg Rekutin

            Assignee:
            alexander.gorrod@mongodb.com Alexander Gorrod
            Reporter:
            oleg@evergage.com Oleg Rekutin
            Votes:
            0 Vote for this issue
            Watchers:
            7 Start watching this issue

              Created:
              Updated:
              Resolved: