MongoDB testing hit an assertion failure in __evict_reconcile():
/* We should not be trying to evict using a checkpoint-cursor transaction. */
WT_ASSERT(session, !F_ISSET(session->txn, WT_TXN_IS_CHECKPOINT));
Here's the stack trace that got there:
#0 0x00007f8f5c979e87 in raise () from /lib/x86_64-linux-gnu/libc.so.6 #1 0x00007f8f5c97b7f1 in abort () from /lib/x86_64-linux-gnu/libc.so.6 #2 0x00007f8f509f34f1 in __wt_abort (session=session@entry=0x7f8f46c57e50) at src/third_party/wiredtiger/src/os_common/os_abort.c:30 #3 0x00007f8f509ea7eb in __evict_reconcile (evict_flags=4, ref=0x7f8f078e3d00, session=0x7f8f46c57e50) at src/third_party/wiredtiger/src/evict/evict_page.c:803 #4 __wt_evict (session=0x7f8f46c57e50, ref=0x7f8f078e3d00, previous_state=<optimized out>, flags=<optimized out>) at src/third_party/wiredtiger/src/evict/evict_page.c:182 #5 0x00007f8f50b3390e in __wt_page_release_evict (session=session@entry=0x7f8f46c57e50, ref=ref@entry=0x7f8f078e3d00, flags=flags@entry=0) at src/third_party/wiredtiger/src/evict/evict_page.c:91 #6 0x00007f8f50a663b9 in __wt_page_in_func (session=session@entry=0x7f8f46c57e50, ref=ref@entry=0x7f8f078e3d00, flags=flags@entry=128, func=func@entry=0x7f8f50c440d0 <__PRETTY_FUNCTION__.47959> "__wt_row_search", line=line@entry=438) at src/third_party/wiredtiger/src/btree/bt_read.c:388 #7 0x00007f8f50aa505a in __wt_page_swap_func (func=<synthetic pointer>, line=438, line=438, func=<synthetic pointer>, flags=128, want=0x7f8f078e3d00, held=0x7f8f4b942030, session=0x7f8f46c57e50) at src/third_party/wiredtiger/src/include/btree_inline.h:2150 #8 __wt_row_search (cbt=cbt@entry=0x7f8f4674cf20, srch_key=srch_key@entry=0x7f8f4674d038, insert=insert@entry=false, leaf=leaf@entry=0x0, leaf_safe=leaf_safe@entry=false, leaf_foundp=leaf_foundp@entry=0x0) at src/third_party/wiredtiger/src/btree/row_srch.c:438 #9 0x00007f8f50a38d7e in __cursor_row_search (leaf_foundp=0x0, leaf=0x0, insert=false, cbt=0x7f8f4674cf20) at src/third_party/wiredtiger/src/btree/bt_cursor.c:501 #10 __wt_btcur_search (cbt=cbt@entry=0x7f8f4674cf20) at src/third_party/wiredtiger/src/btree/bt_cursor.c:825 #11 0x00007f8f50ae345a in __curfile_search (cursor=0x7f8f4674cf20) at src/third_party/wiredtiger/src/cursor/cur_file.c:312 #12 0x00007f8f50b662f0 in __wt_metadata_search (session=session@entry=0x7f8f46c57e50, key=<optimized out>, valuep=valuep@entry=0x7f8f2ae10c28) at src/third_party/wiredtiger/src/meta/meta_table.c:322 #13 0x00007f8f50b610f6 in __meta_retrieve_timestamp (session=0x7f8f46c57e50, system_uri=<optimized out>, timestamp_name=0x7f8f50c397c7 "oldest_timestamp", timestampp=0x7f8f2ae10e98, ckpttime=0x7f8f2ae10d68) at src/third_party/wiredtiger/src/meta/meta_ckpt.c:1722 #14 0x00007f8f50b61982 in __meta_retrieve_a_checkpoint_timestamp (session=session@entry=0x7f8f46c57e50, ckpt_name=ckpt_name@entry=0x0, uri=uri@entry=0x7f8f50c3d1ad "system:oldest", key=key@entry=0x7f8f50c397c7 "oldest_timestamp", timestampp=timestampp@entry=0x7f8f2ae10e98, ckpttime=ckpttime@entry=0x7f8f2ae10d68) at src/third_party/wiredtiger/src/meta/meta_ckpt.c:1762 #15 0x00007f8f50b65489 in __wt_meta_read_checkpoint_oldest (session=session@entry=0x7f8f46c57e50, ckpt_name=ckpt_name@entry=0x0, timestampp=timestampp@entry=0x7f8f2ae10e98, ckpttime=ckpttime@entry=0x7f8f2ae10d68) at src/third_party/wiredtiger/src/meta/meta_ckpt.c:1797 #16 0x00007f8f50bd504d in __session_fetch_checkpoint_meta (oldest_time_ret=0x7f8f2ae10d68, stable_time_ret=0x7f8f2ae10d78, snapshot_time_ret=0x7f8f2ae10d70, info_ret=0x7f8f2ae10e90, ckpt_name=0x0, session=0x7f8f46c57e50) at src/third_party/wiredtiger/src/session/session_dhandle.c:299 #17 __wt_session_get_btree_ckpt (session=session@entry=0x7f8f46c57e50, uri=<optimized out>, uri@entry=0x7f8f50c37e2b "file:WiredTigerHS.wt", cfg=cfg@entry=0x7f8f2ae11030, flags=flags@entry=0, hs_dhandlep=0x0, hs_dhandlep@entry=0x7f8f2ae10e40, ckpt_snapshot=ckpt_snapshot@entry=0x7f8f2ae10e90) at src/third_party/wiredtiger/src/session/session_dhandle.c:545 #18 0x00007f8f50ae48fd in __wt_curfile_open (session=session@entry=0x7f8f46c57e50, uri=<optimized out>, uri@entry=0x7f8f50c37e2b "file:WiredTigerHS.wt", owner=owner@entry=0x0, cfg=cfg@entry=0x7f8f2ae11030, cursorp=cursorp@entry=0x7f8f2ae11020) at src/third_party/wiredtiger/src/cursor/cur_file.c:1178 #19 0x00007f8f50bce964 in __session_open_cursor_int (session=0x7f8f46c57e50, uri=<optimized out>, owner=0x0, other=<optimized out>, cfg=<optimized out>, hash_value=12552819710270246934, cursorp=0x7f8f2ae11020) at src/third_party/wiredtiger/src/session/session_api.c:524 #20 0x00007f8f50bce31f in __wt_open_cursor (session=session@entry=0x7f8f46c57e50, uri=<optimized out>, uri@entry=0x7f8f50c37e2b "file:WiredTigerHS.wt", owner=owner@entry=0x0, cfg=cfg@entry=0x7f8f2ae11030, cursorp=cursorp@entry=0x7f8f2ae11020) at src/third_party/wiredtiger/src/session/session_api.c:599 #21 0x00007f8f50ae5da5 in __curhs_file_cursor_open (session=0x7f8f46c57e50, owner=0x0, cursorp=0x7f8f179ad1f0) at src/third_party/wiredtiger/src/cursor/cur_hs.c:45 #22 0x00007f8f50aee49d in __wt_curhs_open (session=session@entry=0x7f8f46c57e50, owner=owner@entry=0x0, cursorp=cursorp@entry=0x7f8f2ae11118) at src/third_party/wiredtiger/src/cursor/cur_hs.c:1272 #23 0x00007f8f50b3584c in __wt_hs_find_upd (session=session@entry=0x7f8f46c57e50, btree_id=20, key=key@entry=0x7f8f16822038, value_format=0x7f8f13621920 "u", recno=recno@entry=0, upd_value=0x7f8f168222d0, base_value_buf=0x7f8f168222d0) at src/third_party/wiredtiger/src/history/hs_cursor.c:110 #24 0x00007f8f50a21cfa in __wt_txn_read (session=0x7f8f46c57e50, cbt=0x7f8f16821f20, key=<optimized out>, recno=<optimized out>, upd=<optimized out>) at src/third_party/wiredtiger/src/include/txn_inline.h:1135 #25 0x00007f8f50a28187 in __cursor_row_next (key_out_of_boundsp=<synthetic pointer>, prefix=0x0, skippedp=<synthetic pointer>, restart=<optimized out>, newpage=true, cbt=0x7f8f16821f20) at src/third_party/wiredtiger/src/btree/bt_curnext.c:540 #26 __wt_btcur_next_prefix (cbt=0x7f8f16821f20, prefix=<optimized out>, truncating=<optimized out>) at src/third_party/wiredtiger/src/btree/bt_curnext.c:870 #27 0x00007f8f50ae2b4a in __curfile_next (cursor=0x7f8f16821f20) at src/third_party/wiredtiger/src/cursor/cur_file.c:187 #28 0x00007f8f54db1ed3 in mongo::WiredTigerRecordStoreCursorBase::<lambda()>::operator() (__closure=<optimized out>, __closure=<optimized out>) at src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp:2172 #29 mongo::WiredTigerRecordStoreCursorBase::<lambda()>::operator() (__closure=<optimized out>, __closure=<optimized out>) at src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp:2172 #30 mongo::wiredTigerPrepareConflictRetry<mongo::WiredTigerRecordStoreCursorBase::next()::<lambda()> > (f=..., opCtx=0x7f8f0548e4a0) at src/mongo/db/storage/wiredtiger/wiredtiger_prepare_conflict.h:74 #31 mongo::WiredTigerRecordStoreCursorBase::next (this=0x7f8f44d12b20) at src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp:2171 #32 0x00007f8f51eb49ee in mongo::SeekableRecordThrottleCursor::next (this=0x7f8f2c9bc1e0, opCtx=0x7f8f0548e4a0) at /opt/mongodbtoolchain/revisions/c6da1cf7f0b4b60d53566305e59857d3d540dcf7/stow/gcc-v3.JnX/include/c++/8.5.0/bits/unique_ptr.h:345 #33 0x00007f8f508f6a2c in mongo::ValidateAdaptor::traverseRecordStore (this=this@entry=0x7f8f2ae13020, opCtx=<optimized out>, results=results@entry=0x7f8f2ae147e0, output=output@entry=0x7f8f2ae14910) at /opt/mongodbtoolchain/revisions/c6da1cf7f0b4b60d53566305e59857d3d540dcf7/stow/gcc-v3.JnX/include/c++/8.5.0/bits/unique_ptr.h:345 #34 0x00007f8f508e7813 in mongo::CollectionValidation::validate (opCtx=<optimized out>, nss=..., mode=mode@entry=mongo::CollectionValidation::ValidateMode::kBackground, repairMode=repairMode@entry=mongo::CollectionValidation::RepairMode::kNone, results=results@entry=0x7f8f2ae147e0, output=0x7f8f2ae14910, turnOnExtraLoggingForTest=false) at src/mongo/db/catalog/collection_validation.cpp:567
The summary is that the server is using a checkpoint cursor during validation. It enters WiredTiger to perform a WT_CURSOR::next() operation on that cursor. WiredTiger needs to open a cursor on the corresponding history store checkpoint, and in the course of opening that cursor reads the checkpoint metadata from the metadata file.
Accessing a page in the metadata file, via __wt_page_in_func(), results in the forced eviction of the page, which is about 100 bytes over the 5MB max page size. And thus we wind up trying to evict a page from within a checkpoint cursor operation, triggering the assertion.
Note that this test has over 1,400 tables, which accounts for the large metadata page.