Uploaded image for project: 'WiredTiger'
  1. WiredTiger
  2. WT-11067

Tiered Storage cache stuck with FLCS

    • StorEng - Refinement Pipeline

      The format CONFIG, below leads to a cache stuck failure about 50% of the time.

      ############################################
      #  RUN PARAMETERS: V3
      ############################################
      assert.read_timestamp=0
      backup=0
      backup.incremental=off
      backup.incr_granularity=444
      block_cache=0
      block_cache.cache_on_checkpoint=1
      block_cache.cache_on_writes=1
      block_cache.size=51
      btree.huffman_value=0
      buffer_alignment=0
      cache=176
      cache.evict_max=1
      cache.eviction_dirty_target=0
      cache.eviction_dirty_trigger=0
      cache.minimum=20
      checkpoint=wiredtiger
      checkpoint.log_size=98
      checkpoint.wait=81
      debug.checkpoint_retention=7
      debug.cursor_reposition=0
      debug.eviction=0
      debug.log_retention=6
      debug.realloc_exact=0
      debug.realloc_malloc=0
      debug.slow_checkpoint=0
      debug.table_logging=0
      debug.update_restore_evict=0
      disk.data_extend=0
      disk.direct_io=0
      disk.encryption=rotn-7
      disk.mmap=1
      disk.mmap_all=0
      file_manager.close_handle_minimum=17
      file_manager.close_idle_time=0
      file_manager.close_scan_interval=7
      format.abort=0
      format.independent_thread_rng=1
      format.major_timeout=0
      import=0
      logging=0
      logging.compression=none
      logging.file_max=429351
      logging.prealloc=1
      logging.remove=1
      ops.alter=0
      ops.compaction=0
      ops.hs_cursor=1
      ops.bound_cursor=1
      ops.prepare=0
      ops.random_cursor=0
      ops.salvage=0
      ops.verify=0
      quiet=1
      random.data_seed=14391668
      random.extra_seed=5888921
      runs.in_memory=0
      runs.ops=0
      runs.predictable_replay=0
      runs.rows=5000000
      runs.tables=3
      runs.threads=10
      runs.timer=11
      runs.verify_failure_dump=0
      statistics.mode=fast
      statistics_log.sources=off
      stress.aggressive_sweep=0
      stress.checkpoint=0
      stress.checkpoint_evict_page=0
      stress.checkpoint_prepare=0
      stress.evict_reposition=0
      stress.failpoint_eviction_fail_after_reconciliation=1
      stress.failpoint_hs_delete_key_from_ts=0
      stress.hs_checkpoint_delay=0
      stress.hs_search=0
      stress.hs_sweep=0
      stress.sleep_before_read_overflow_onpage=0
      stress.split_1=0
      stress.split_2=0
      stress.split_3=0
      stress.split_4=0
      stress.split_5=0
      stress.split_6=0
      stress.split_7=0
      stress.split_8=0
      tiered_storage.flush_frequency=18
      tiered_storage.storage_source=dir_store
      transaction.implicit=0
      transaction.timestamps=1
      wiredtiger.config=off
      wiredtiger.rwlock=1
      wiredtiger.leak_memory=0
      ############################################
      #  TABLE PARAMETERS: table 1
      ############################################
      table1.btree.compression=lz4
      table1.btree.dictionary=1
      table1.btree.internal_key_truncation=1
      table1.btree.internal_page_max=17
      table1.btree.key_max=123
      table1.btree.key_min=23
      table1.btree.leaf_page_max=13
      table1.btree.memory_page_max=5
      table1.btree.prefix_len=0
      table1.btree.prefix_compression=1
      table1.btree.prefix_compression_min=3
      table1.btree.reverse=0
      table1.btree.split_pct=90
      table1.btree.value_max=2603
      table1.btree.value_min=8
      table1.disk.checksum=off
      table1.disk.firstfit=0
      table1.ops.pareto=0
      table1.ops.pareto.skew=93
      table1.ops.pct.delete=19
      table1.ops.pct.insert=1
      table1.ops.pct.modify=64
      table1.ops.pct.read=16
      table1.ops.pct.write=0
      table1.ops.truncate=1
      table1.runs.mirror=1
      table1.runs.source=file
      table1.runs.type=row-store
      ############################################
      #  TABLE PARAMETERS: table 2
      ############################################
      table2.btree.compression=snappy
      table2.btree.dictionary=0
      table2.btree.internal_key_truncation=1
      table2.btree.internal_page_max=12
      table2.btree.key_max=93
      table2.btree.key_min=29
      table2.btree.leaf_page_max=11
      table2.btree.memory_page_max=2
      table2.btree.prefix_len=0
      table2.btree.prefix_compression=1
      table2.btree.prefix_compression_min=3
      table2.btree.reverse=0
      table2.btree.split_pct=58
      table2.btree.value_max=3006
      table2.btree.value_min=14
      table2.disk.checksum=on
      table2.disk.firstfit=0
      table2.ops.pareto=0
      table2.ops.pareto.skew=21
      table2.ops.pct.delete=3
      table2.ops.pct.insert=66
      table2.ops.pct.modify=13
      table2.ops.pct.read=2
      table2.ops.pct.write=16
      table2.ops.truncate=1
      table2.runs.mirror=1
      table2.runs.source=table
      table2.runs.type=row-store
      ############################################
      #  TABLE PARAMETERS: table 3
      ############################################
      table3.btree.bitcnt=6
      table3.btree.compression=snappy
      table3.btree.internal_key_truncation=0
      table3.btree.internal_page_max=16
      table3.btree.leaf_page_max=16
      table3.btree.memory_page_max=1
      table3.btree.split_pct=81
      table3.disk.checksum=uncompressed
      table3.disk.firstfit=0
      table3.ops.pareto=1
      table3.ops.pareto.skew=77
      table3.ops.pct.delete=53
      table3.ops.pct.insert=16
      table3.ops.pct.modify=15
      table3.ops.pct.read=10
      table3.ops.pct.write=6
      table3.ops.truncate=1
      table3.runs.mirror=1
      table3.runs.source=table
      table3.runs.type=fixed-length column-store

      In this configuration, table 3 is FLCS. It looks like format just sets up tiered storage on the connection, so all of the tables – including this FLCS – are tiered. We've made little effort (so far) to make sure non-row-store tables work on tiered storage. But I figured I should flag this in case it turns out to be symptom of something more significant.

            Assignee:
            backlog-server-storage-engines [DO NOT USE] Backlog - Storage Engines Team
            Reporter:
            keith.smith@mongodb.com Keith Smith
            Votes:
            0 Vote for this issue
            Watchers:
            1 Start watching this issue

              Created:
              Updated: