Uploaded image for project: 'WiredTiger'
  1. WiredTiger
  2. WT-6512

test/format timed out with prepare-conflict

    • Type: Icon: Bug Bug
    • Resolution: Duplicate
    • Priority: Icon: Major - P3 Major - P3
    • None
    • Affects Version/s: None
    • Component/s: None
    • None

      Running test/format, I have hit the following error several times:

      t: FAILED: read_op/62: now < start || now - start < 60: WT_CURSOR.prev: timed out with prepare-conflict
      t: process aborting

      I see this in about 1 of 100 runs. Sometimes the operation is WT_CURSOR.next and occasionally it is WT_CURSOR.search_near. These errors occur with the following CONFIG file:

      ############################################
      #  RUN PARAMETERS: V2
      ############################################
      assert.commit_timestamp=0
      assert.read_timestamp=0
      backup=0
      backup.incremental=off
      backup.incr_granularity=6896
      btree.bitcnt=4
      btree.compression=none
      btree.dictionary=0
      btree.huffman_key=0
      btree.huffman_value=0
      btree.internal_key_truncation=1
      btree.internal_page_max=16
      btree.key_gap=15
      btree.key_max=35
      btree.key_min=30
      btree.leaf_page_max=16
      btree.memory_page_max=5
      btree.prefix_compression=1
      btree.prefix_compression_min=4
      btree.repeat_data_pct=50
      btree.reverse=0
      btree.split_pct=82
      btree.value_max=442
      btree.value_min=20
      cache=40
      cache.evict_max=2
      cache.minimum=20
      checkpoint=wiredtiger
      checkpoint.log_size=23
      checkpoint.wait=67
      disk.checksum=uncompressed
      disk.data_extend=0
      disk.direct_io=0
      disk.encryption=none
      disk.firstfit=0
      disk.mmap=1
      disk.mmap_all=0
      format.abort=0
      format.independent_thread_rng=1
      format.major_timeout=0
      logging=0
      logging.archive=0
      logging.compression=none
      logging.file_max=342936
      logging.prealloc=0
      lsm.auto_throttle=1
      lsm.bloom=1
      lsm.bloom_bit_count=58
      lsm.bloom_hash_count=26
      lsm.bloom_oldest=0
      lsm.chunk_size=3
      lsm.merge_max=16
      lsm.worker_threads=4
      ops.alter=0
      ops.compaction=0
      ops.hs_cursor=0
      ops.pct.delete=13
      ops.pct.insert=29
      ops.pct.modify=14
      ops.pct.read=33
      ops.pct.write=11
      ops.prepare=1
      ops.random_cursor=0
      ops.rebalance=0
      ops.salvage=1
      ops.truncate=1
      ops.verify=1
      quiet=1
      runs=1
      runs.in_memory=0
      runs.ops=0
      runs.rows=1000000
      runs.source=table
      runs.threads=4
      runs.timer=6
      runs.type=row-store
      runs.verify_failure_dump=0
      statistics=0
      statistics.server=0
      stress.aggressive_sweep=0
      stress.checkpoint=0
      stress.hs_checkpoint_delay=0
      stress.hs_sweep=0
      stress.split_1=0
      stress.split_2=0
      stress.split_3=0
      stress.split_4=0
      stress.split_5=0
      stress.split_6=0
      stress.split_7=0
      stress.split_8=0
      transaction.frequency=100
      transaction.isolation=snapshot
      transaction.rollback_to_stable=0
      transaction.timestamps=1
      wiredtiger.config=
      wiredtiger.rwlock=1
      wiredtiger.leak_memory=0
      ############################################

      As indicated by the failure message, the failed thread has been waiting for 60 seconds on a WT_PREPARE_CONFLICT.

      Thread 1 (Thread 0x7fbf009c8700 (LWP 4142)):
      #0  0x00007fbf0419ae97 in __nptl_clear_internal_signals (set=<optimized out>)
          at ../sysdeps/unix/sysv/linux/nptl-signals.h:49
      #1  __libc_signal_block_app (set=<optimized out>) at ../sysdeps/unix/sysv/linux/nptl-signals.h:69
      #2  __GI_raise (sig=2) at ../sysdeps/unix/sysv/linux/raise.c:40
      #3  0x00007fbf0419c801 in __GI_abort () at abort.c:61
      #4  0x000000000041dc2e in testutil_die (e=e@entry=0, 
          fmt=fmt@entry=0x5fac18 "%s/%d: %s: %s: timed out with prepare-conflict")
          at ../../../test/utility/misc.c:66
      #5  0x0000000000411ae8 in read_op (exactp=0x0, op=<optimized out>, cursor=0x346e000)
          at ../../../test/format/format.i:61
      #6  nextprev (tinfo=tinfo@entry=0x3bba000, cursor=cursor@entry=0x346e000, next=next@entry=true)
          at ../../../test/format/ops.c:1274
      #7  0x0000000000413108 in ops (arg=0x3bba000) at ../../../test/format/ops.c:1034
      #8  0x00007fbf050806db in start_thread (arg=0x7fbf009c8700) at pthread_create.c:463
      #9  0x00007fbf0427d88f in gnu_dev_makedev (__major=<optimized out>, __minor=<optimized out>)

      The other three format threads have finished committing or rolling back transactions and are blocked in eviction waiting for pages to evict:

      Thread 10 (Thread 0x7fbf001c7700 (LWP 4143)):
      #0  0x00007fbf05086ed9 in futex_reltimed_wait_cancelable (private=<optimized out>, 
          reltime=0x7fbf001c6a00, expected=0, futex_word=0x2732c58)
          at ../sysdeps/unix/sysv/linux/futex-internal.h:142
      #1  __pthread_cond_wait_common (abstime=0x7fbf001c6ac0, mutex=0x2732c08, cond=0x2732c30)
          at pthread_cond_wait.c:533
      #2  __pthread_cond_timedwait (cond=cond@entry=0x2732c30, mutex=mutex@entry=0x2732c08, 
          abstime=abstime@entry=0x7fbf001c6ac0) at pthread_cond_wait.c:667
      #3  0x00000000004a0b62 in __wt_cond_wait_signal (session=session@entry=0x2761330, cond=0x2732c00, 
          usecs=usecs@entry=10000, run_func=run_func@entry=0x0, signalled=signalled@entry=0x7fbf001c6b50)
          at ../src/os_posix/os_mtx_cond.c:115
      #4  0x0000000000469149 in __wt_cond_wait (run_func=0x0, usecs=10000, cond=<optimized out>, 
          session=0x2761330) at ../src/include/misc.i:19
      #5  __wt_cache_eviction_worker (session=session@entry=0x2761330, busy=<optimized out>, 
          readonly=readonly@entry=false, pct_full=<optimized out>) at ../src/evict/evict_lru.c:2350
      #6  0x00000000004f0de8 in __wt_cache_eviction_check (busy=false, readonly=false, didworkp=0x0, 
          didworkp=0x0, readonly=false, busy=false, session=0x2761330) at ../src/include/cache.i:479
      #7  __wt_txn_commit (session=session@entry=0x2761330, cfg=cfg@entry=0x7fbf001c6cb0)
          at ../src/txn/txn.c:1465
      #8  0x00000000004cdc20 in __session_commit_transaction (wt_session=0x2761330, 
          config=<optimized out>) at ../src/session/session_api.c:1649
      #9  0x00000000004113ae in commit_transaction (tinfo=tinfo@entry=0x3bbab00, 
          prepared=prepared@entry=false) at ../../../test/format/ops.c:550
      #10 0x0000000000413b21 in ops (arg=0x3bbab00) at ../../../test/format/ops.c:1086
      #11 0x00007fbf050806db in start_thread (arg=0x7fbf001c7700) at pthread_create.c:463
      #12 0x00007fbf0427d88f in gnu_dev_makedev (__major=<optimized out>, __minor=<optimized out>)
          at makedev.c:30
      #13 0x0000000000000000 in ?? ()
      
      
      Thread 4 (Thread 0x7fbeff1c5700 (LWP 4145)):
      #0  0x00007fbf05086ed9 in futex_reltimed_wait_cancelable (private=<optimized out>, 
          reltime=0x7fbeff1c4b20, expected=0, futex_word=0x2732c58)
          at ../sysdeps/unix/sysv/linux/futex-internal.h:142
      #1  __pthread_cond_wait_common (abstime=0x7fbeff1c4be0, mutex=0x2732c08, cond=0x2732c30)
          at pthread_cond_wait.c:533
      #2  __pthread_cond_timedwait (cond=cond@entry=0x2732c30, mutex=mutex@entry=0x2732c08, 
          abstime=abstime@entry=0x7fbeff1c4be0) at pthread_cond_wait.c:667
      #3  0x00000000004a0b62 in __wt_cond_wait_signal (session=session@entry=0x2760f40, cond=0x2732c00, 
          usecs=usecs@entry=10000, run_func=run_func@entry=0x0, signalled=signalled@entry=0x7fbeff1c4c70)
          at ../src/os_posix/os_mtx_cond.c:115
      #4  0x0000000000469149 in __wt_cond_wait (run_func=0x0, usecs=10000, cond=<optimized out>, 
          session=0x2760f40) at ../src/include/misc.i:19
      #5  __wt_cache_eviction_worker (session=session@entry=0x2760f40, busy=<optimized out>, 
          readonly=readonly@entry=false, pct_full=<optimized out>) at ../src/evict/evict_lru.c:2350
      #6  0x00000000004ee3cf in __wt_cache_eviction_check (busy=false, readonly=false, didworkp=0x0, 
          didworkp=0x0, readonly=false, busy=false, session=0x2760f40) at ../src/include/cache.i:479
      #7  __wt_txn_rollback (session=session@entry=0x2760f40, cfg=cfg@entry=0x7fbeff1c4d30)
          at ../src/txn/txn.c:1725
      #8  0x00000000004cd54f in __session_rollback_transaction (wt_session=0x2760f40, 
          config=<optimized out>) at ../src/session/session_api.c:1735
      #9  0x0000000000413a91 in rollback_transaction (tinfo=0x3bbc100) at ../../../test/format/ops.c:572
      #10 ops (arg=0x3bbc100) at ../../../test/format/ops.c:1091
      #11 0x00007fbf050806db in start_thread (arg=0x7fbeff1c5700) at pthread_create.c:463
      #12 0x00007fbf0427d88f in gnu_dev_makedev (__major=<optimized out>, __minor=<optimized out>)
          at makedev.c:30
      #13 0x0000000000000000 in ?? ()
      
      
      Thread 3 (Thread 0x7fbeff9c6700 (LWP 4144)):
      #0  0x00007fbf05086ed9 in futex_reltimed_wait_cancelable (private=<optimized out>, 
          reltime=0x7fbeff9c5b20, expected=0, futex_word=0x2732c58)
          at ../sysdeps/unix/sysv/linux/futex-internal.h:142
      #1  __pthread_cond_wait_common (abstime=0x7fbeff9c5be0, mutex=0x2732c08, cond=0x2732c30)
          at pthread_cond_wait.c:533
      #2  __pthread_cond_timedwait (cond=cond@entry=0x2732c30, mutex=mutex@entry=0x2732c08, 
          abstime=abstime@entry=0x7fbeff9c5be0) at pthread_cond_wait.c:667
      #3  0x00000000004a0b62 in __wt_cond_wait_signal (session=session@entry=0x2760370, cond=0x2732c00, 
          usecs=usecs@entry=10000, run_func=run_func@entry=0x0, signalled=signalled@entry=0x7fbeff9c5c70)
          at ../src/os_posix/os_mtx_cond.c:115
      #4  0x0000000000469149 in __wt_cond_wait (run_func=0x0, usecs=10000, cond=<optimized out>,
          session=0x2760370) at ../src/include/misc.i:19
      #5  __wt_cache_eviction_worker (session=session@entry=0x2760370, busy=<optimized out>, 
          readonly=readonly@entry=false, pct_full=<optimized out>) at ../src/evict/evict_lru.c:2350
      #6  0x00000000004ee3cf in __wt_cache_eviction_check (busy=false, readonly=false, didworkp=0x0, 
          didworkp=0x0, readonly=false, busy=false, session=0x2760370) at ../src/include/cache.i:479
      #7  __wt_txn_rollback (session=session@entry=0x2760370, cfg=cfg@entry=0x7fbeff9c5d30)
          at ../src/txn/txn.c:1725
      #8  0x00000000004cd54f in __session_rollback_transaction (wt_session=0x2760370, 
          config=<optimized out>) at ../src/session/session_api.c:1735
      #9  0x0000000000413a91 in rollback_transaction (tinfo=0x3bbb600) at ../../../test/format/ops.c:572
      #10 ops (arg=0x3bbb600) at ../../../test/format/ops.c:1091
      #11 0x00007fbf050806db in start_thread (arg=0x7fbeff9c6700) at pthread_create.c:463
      #12 0x00007fbf0427d88f in gnu_dev_makedev (__major=<optimized out>, __minor=<optimized out>)
          at makedev.c:30
      #13 0x0000000000000000 in ?? ()

            Assignee:
            backlog-server-storage-engines [DO NOT USE] Backlog - Storage Engines Team
            Reporter:
            keith.smith@mongodb.com Keith Smith
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

              Created:
              Updated:
              Resolved: