A couple of Jenkins jobs are hung running column store formats.
The configuration file:
############################################ # RUN PARAMETERS ############################################ abort=0 auto_throttle=1 firstfit=0 bitcnt=4 bloom=1 bloom_bit_count=48 bloom_hash_count=12 bloom_oldest=0 cache=40 checkpoints=1 checksum=uncompressed chunk_size=1 compaction=0 compression=none data_extend=0 data_source=file delete_pct=43 dictionary=0 evict_max=4 file_type=variable-length column-store backups=0 huffman_key=0 huffman_value=0 insert_pct=16 internal_key_truncation=1 internal_page_max=14 isolation=read-committed key_gap=18 key_max=27 key_min=27 leak_memory=0 leaf_page_max=9 logging=0 logging_archive=0 logging_prealloc=0 lsm_worker_threads=4 merge_max=9 mmap=0 ops=100000 prefix_compression=1 prefix_compression_min=7 repeat_data_pct=68 reverse=0 rows=100000 runs=100 split_pct=75 statistics=0 statistics_server=0 threads=20 timer=20 value_max=910 value_min=8 wiredtiger_config= write_pct=73 ############################################
The call stacks from the process. It's not that obvious how it's stuck - i.e, I don't think this is a deadlock:
(gdb) thread apply all where Thread 10 (Thread 0x7f333d2c7700 (LWP 17262)): #0 0x0000003467a0b98e in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 WT-1 0x00000000004421d8 in __wt_cond_wait (session=0x2144040, cond=0x215dbc0, usecs=100000) at ../src/os_posix/os_mtx_cond.c:79 WT-2 0x000000000042b09b in __evict_server (arg=0x2144040) at ../src/evict/evict_lru.c:210 WT-3 0x0000003467a079d1 in start_thread () from /lib64/libpthread.so.0 WT-4 0x00000034676e8b6d in clone () from /lib64/libc.so.6 Thread 9 (Thread 0x7f33361fc700 (LWP 17263)): #0 0x0000003467a0b98e in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 WT-1 0x00000000004421d8 in __wt_cond_wait (session=0x2144b40, cond=0x218ee50, usecs=10000000) at ../src/os_posix/os_mtx_cond.c:79 WT-2 0x000000000042151a in __sweep_server (arg=0x2144b40) at ../src/conn/conn_sweep.c:171 WT-3 0x0000003467a079d1 in start_thread () from /lib64/libpthread.so.0 WT-4 0x00000034676e8b6d in clone () from /lib64/libc.so.6 Thread 8 (Thread 0x7f33357fb700 (LWP 17284)): #0 0x00000000004eb9b3 in __col_insert_search_match (inshead=0x7f32e0080a70, recno=64587) at ../src/include/column.i:111 WT-1 0x00000000004eca07 in __cursor_var_prev (cbt=0x7f32c403aaf0, newpage=0) at ../src/btree/bt_curprev.c:341 WT-2 0x00000000004ed388 in __wt_btcur_prev (cbt=0x7f32c403aaf0, truncating=0) at ../src/btree/bt_curprev.c:589 WT-3 0x00000000004b8e8e in __curfile_prev (cursor=0x7f32c403aaf0) at ../src/cursor/cur_file.c:156 WT-4 0x0000000000410ab9 in nextprev (cursor=0x7f32c403aaf0, next=0, notfoundp=0x7f33357fada4) at ../../../test/format/ops.c:674 WT-5 0x0000000000410337 in ops (arg=0x24f3400) at ../../../test/format/ops.c:475 WT-6 0x0000003467a079d1 in start_thread () from /lib64/libpthread.so.0 WT-7 0x00000034676e8b6d in clone () from /lib64/libc.so.6 Thread 7 (Thread 0x7f333dcc8700 (LWP 17287)): #0 __col_insert_search_lt (inshead=0x7f32f40851b0, recno=39196) at ../src/include/column.i:82 WT-1 0x00000000004ecb5f in __cursor_var_prev (cbt=0x7f32b403aaf0, newpage=0) at ../src/btree/bt_curprev.c:379 WT-2 0x00000000004ed388 in __wt_btcur_prev (cbt=0x7f32b403aaf0, truncating=0) at ../src/btree/bt_curprev.c:589 WT-3 0x00000000004b8e8e in __curfile_prev (cursor=0x7f32b403aaf0) at ../src/cursor/cur_file.c:156 WT-4 0x0000000000410ab9 in nextprev (cursor=0x7f32b403aaf0, next=0, notfoundp=0x7f333dcc7da4) at ../../../test/format/ops.c:674 WT-5 0x0000000000410337 in ops (arg=0x24f3508) at ../../../test/format/ops.c:475 WT-6 0x0000003467a079d1 in start_thread () from /lib64/libpthread.so.0 ---Type <return> to continue, or q <return> to quit--- WT-7 0x00000034676e8b6d in clone () from /lib64/libc.so.6 Thread 6 (Thread 0x7f3337fff700 (LWP 17288)): #0 0x00000000004eca73 in __cursor_var_prev (cbt=0x7f32ac03aaf0, newpage=0) at ../src/btree/bt_curprev.c:346 WT-1 0x00000000004ed388 in __wt_btcur_prev (cbt=0x7f32ac03aaf0, truncating=0) at ../src/btree/bt_curprev.c:589 WT-2 0x00000000004b8e8e in __curfile_prev (cursor=0x7f32ac03aaf0) at ../src/cursor/cur_file.c:156 WT-3 0x0000000000410ab9 in nextprev (cursor=0x7f32ac03aaf0, next=0, notfoundp=0x7f3337ffeda4) at ../../../test/format/ops.c:674 WT-4 0x0000000000410337 in ops (arg=0x24f3560) at ../../../test/format/ops.c:475 WT-5 0x0000003467a079d1 in start_thread () from /lib64/libpthread.so.0 WT-6 0x00000034676e8b6d in clone () from /lib64/libc.so.6 Thread 5 (Thread 0x7f3336bfd700 (LWP 17290)): #0 __col_insert_search_match (inshead=0x7f3300000e60, recno=38371) at ../src/include/column.i:118 WT-1 0x00000000004eca07 in __cursor_var_prev (cbt=0x7f33241896f0, newpage=0) at ../src/btree/bt_curprev.c:341 WT-2 0x00000000004ed388 in __wt_btcur_prev (cbt=0x7f33241896f0, truncating=0) at ../src/btree/bt_curprev.c:589 WT-3 0x00000000004b8e8e in __curfile_prev (cursor=0x7f33241896f0) at ../src/cursor/cur_file.c:156 WT-4 0x0000000000410ab9 in nextprev (cursor=0x7f33241896f0, next=0, notfoundp=0x7f3336bfcda4) at ../../../test/format/ops.c:674 WT-5 0x0000000000410337 in ops (arg=0x24f3610) at ../../../test/format/ops.c:475 WT-6 0x0000003467a079d1 in start_thread () from /lib64/libpthread.so.0 WT-7 0x00000034676e8b6d in clone () from /lib64/libc.so.6 Thread 4 (Thread 0x7f3331bf5700 (LWP 17296)): #0 0x00000000004e9f3e in __wt_cell_unpack_safe (cell=0x238794d, unpack=0x7f3331bf4ad0, end=0x0) at ../src/include/cell.i:622 WT-1 0x00000000004ea1c3 in __wt_cell_unpack (cell=0x238794d, unpack=0x7f3331bf4ad0) at ../src/include/cell.i:710 WT-2 0x00000000004ecb15 in __cursor_var_prev (cbt=0x7f332c086a10, newpage=0) at ../src/btree/bt_curprev.c:365 WT-3 0x00000000004ed388 in __wt_btcur_prev (cbt=0x7f332c086a10, truncating=0) at ../src/btree/bt_curprev.c:589 WT-4 0x00000000004b8e8e in __curfile_prev (cursor=0x7f332c086a10) at ../src/cursor/cur_file.c:156 WT-5 0x0000000000410ab9 in nextprev (cursor=0x7f332c086a10, next=0, notfoundp=0x7f3331bf4da4) at ../../../test/format/ops.c:674 WT-6 0x0000000000410337 in ops (arg=0x24f3820) at ../../../test/format/ops.c:475 WT-7 0x0000003467a079d1 in start_thread () from /lib64/libpthread.so.0 WT-8 0x00000034676e8b6d in clone () from /lib64/libc.so.6 Thread 3 (Thread 0x7f33311f4700 (LWP 17297)): #0 __cursor_var_prev (cbt=0x7f3320086610, newpage=1) ---Type <return> to continue, or q <return> to quit--- at ../src/btree/bt_curprev.c:342 WT-1 0x00000000004ed388 in __wt_btcur_prev (cbt=0x7f3320086610, truncating=0) at ../src/btree/bt_curprev.c:589 WT-2 0x00000000004b8e8e in __curfile_prev (cursor=0x7f3320086610) at ../src/cursor/cur_file.c:156 WT-3 0x0000000000410ab9 in nextprev (cursor=0x7f3320086610, next=0, notfoundp=0x7f33311f3da4) at ../../../test/format/ops.c:674 WT-4 0x0000000000410337 in ops (arg=0x24f3878) at ../../../test/format/ops.c:475 WT-5 0x0000003467a079d1 in start_thread () from /lib64/libpthread.so.0 WT-6 0x00000034676e8b6d in clone () from /lib64/libc.so.6 Thread 2 (Thread 0x7f332a1fc700 (LWP 17301)): #0 0x00000000004ebddf in __col_var_search (page=0x25332e0, recno=84228, start_recnop=0x7f332a1fbac8) at ../src/include/column.i:259 WT-1 0x00000000004ec94a in __cursor_var_prev (cbt=0x7f32ec03aaf0, newpage=0) at ../src/btree/bt_curprev.c:335 WT-2 0x00000000004ed388 in __wt_btcur_prev (cbt=0x7f32ec03aaf0, truncating=0) at ../src/btree/bt_curprev.c:589 WT-3 0x00000000004b8e8e in __curfile_prev (cursor=0x7f32ec03aaf0) at ../src/cursor/cur_file.c:156 WT-4 0x0000000000410ab9 in nextprev (cursor=0x7f32ec03aaf0, next=0, notfoundp=0x7f332a1fbda4) at ../../../test/format/ops.c:674 WT-5 0x0000000000410337 in ops (arg=0x24f39d8) at ../../../test/format/ops.c:475 WT-6 0x0000003467a079d1 in start_thread () from /lib64/libpthread.so.0 WT-7 0x00000034676e8b6d in clone () from /lib64/libc.so.6 Thread 1 (Thread 0x7f333eed2720 (LWP 13068)): #0 0x00000034676accdd in nanosleep () from /lib64/libc.so.6 WT-1 0x00000034676e1e54 in usleep () from /lib64/libc.so.6 WT-2 0x000000000040f8b7 in wts_ops (lastrun=0) at ../../../test/format/ops.c:168 WT-3 0x0000000000412937 in main (argc=0, argv=0x7ffff9709f40) at ../../../test/format/t.c:202 (gdb) thread 2 [Switching to thread 2 (Thread 0x7f332a1fc700 (LWP 17301))]#0 0x00000000004ebddf in __col_var_search (page=0x25332e0, recno=84228, start_recnop=0x7f332a1fbac8) at ../src/include/column.i:259 259 repeat = page->pg_var_repeats + indx; (gdb) p page->pg_var_repeats There is no member named pg_var_repeats. (gdb) p page $1 = (WT_PAGE *) 0x25332e0 (gdb) p page->pg_var_repeats There is no member named pg_var_repeats. (gdb) p page->u.colv_var.repeats There is no member named colv_var. (gdb) p page->u.col_var.repeats $2 = (WT_COL_RLE *) 0x2533610 (gdb) p *$2 $3 = {recno = 84216, rle = 4, indx = 4} (gdb) p limit $4 = 1 (gdb) n 260 if (recno >= repeat->recno && (gdb) 261 recno < repeat->recno + repeat->rle) { (gdb) 260 if (recno >= repeat->recno && (gdb) 262 if (start_recnop != NULL) (gdb) 263 *start_recnop = repeat->recno; (gdb) 264 return (page->pg_var_d + repeat->indx); (gdb) 289 } (gdb) __cursor_var_prev (cbt=0x7f32ec03aaf0, newpage=0) at ../src/btree/bt_curprev.c:334 334 if ((cip = (gdb) n 337 cbt->slot = WT_COL_SLOT(page, cip); (gdb) p cip $5 = (WT_COL *) 0x2533350 (gdb) n 340 cbt->ins_head = WT_COL_UPDATE_SLOT(page, cbt->slot); (gdb) 341 cbt->ins = __col_insert_search_match(cbt->ins_head, cbt->recno);
The failed builds:
http://build.wiredtiger.com:8080/job/wiredtiger-test-format-stress/6524/console
http://build.wiredtiger.com:8080/job/wiredtiger-test-format-recovery/306/
It seems likely due to the recent column store changes from WT-1833
- related to
-
WT-1833 Column store name-space gap handling.
- Closed