Uploaded image for project: 'WiredTiger'
  1. WiredTiger
  2. WT-8072

Add check for all_durable moving backwards in timestamp_abort

    • 8
    • Storage - Ra 2022-04-04, Storage - Ra 2022-04-18

      While working on WT-8001, haribabu.kommi and I thought to check if the global durable_timestamp moves back while we update it's value during a transaction_commit.

      I made this change on top of change for WT-8001:

      diff --git a/src/txn/txn.c b/src/txn/txn.c
      index 30fdebef6..11bcfe558 100644
      --- a/src/txn/txn.c
      +++ b/src/txn/txn.c
      @@ -1572,6 +1572,37 @@ __txn_mod_compare(const void *a, const void *b)
           return (aopt->u.op_col.recno < bopt->u.op_col.recno);
       }
       
      +/*
      + * __get_all_durable_ts --
      + *     blah blah
      + */
      +static wt_timestamp_t
      +__get_all_durable_ts(WT_SESSION_IMPL *session)
      +{
      +    WT_CONNECTION_IMPL *conn;
      +    WT_TXN_GLOBAL *txn_global;
      +    WT_TXN_SHARED *s;
      +    wt_timestamp_t ts, tmpts;
      +    uint32_t i, session_cnt;
      +
      +    conn = S2C(session);
      +    txn_global = &conn->txn_global;
      +
      +    ts = txn_global->durable_timestamp;
      +    __wt_readlock(session, &txn_global->rwlock);
      +
      +    /* Walk the array of concurrent transactions. */
      +    WT_ORDERED_READ(session_cnt, conn->session_cnt);
      +    for (i = 0, s = txn_global->txn_shared_list; i < session_cnt; i++, s++) {
      +        WT_ORDERED_READ(tmpts, s->pinned_durable_timestamp);
      +        if (tmpts != WT_TS_NONE && --tmpts < ts)
      +            ts = tmpts;
      +    }
      +    __wt_readunlock(session, &txn_global->rwlock);
      +
      +    return (ts);
      +}
      +
       /*
        * __wt_txn_commit --
        *     Commit the current transaction.
      @@ -1588,6 +1619,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
           WT_TXN_OP *op;
           WT_UPDATE *upd;
           wt_timestamp_t candidate_durable_timestamp, prev_durable_timestamp;
      +    wt_timestamp_t ts_prev, ts_after;
           uint32_t fileid;
           uint8_t previous_state;
           u_int i, ft_resolution;
      @@ -1847,6 +1879,8 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
               update_durable_ts = candidate_durable_timestamp > prev_durable_timestamp;
           }
       
      +    ts_prev = __get_all_durable_ts(session);
      +
           /*
            * If it looks like we'll need to move the global durable timestamp, attempt atomic cas and
            * re-check.
      @@ -1861,6 +1895,12 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
                   prev_durable_timestamp = txn_global->durable_timestamp;
               }
       
      +    ts_after = __get_all_durable_ts(session);
      +    if (ts_after < ts_prev)
      +        WT_ERR_PANIC(session, WT_PANIC,
      +          "All durable timestamp moved backwards from %"PRIu64" to %"PRIu64".", ts_prev, ts_after);
      +
      +
           /*
            * We're between transactions, if we need to block for eviction, it's a good time to do so. Note
            * that we must ignore any error return because the user's data is committed.
      

      I already see that though python tests pass, test/checkpoint/smoke.sh fails:

      [1631171089:109686][7063:0x7efcf77fe700], t, WT_SESSION.commit_transaction: __wt_txn_commit, 1901: All durable timestamp moved backwards from 5 to 4.: WT_PANIC: WiredTiger library panic
      [1631171089:109715][7063:0x7efcf77fe700], t, WT_SESSION.commit_transaction: __wt_txn_commit, 1901: the process must exit and restart: WT_PANIC: WiredTiger library panic
      [1631171089:109729][7063:0x7efcf77fe700], t, WT_SESSION.commit_transaction: __wt_abort, 28: aborting WiredTiger library
      Aborted (core dumped)
      

      It will worthwhile running full WT test suite and stress test to investigate when and why global durable moves back in our testing.

            Assignee:
            sean.watt@mongodb.com Sean Watt
            Reporter:
            sulabh.mahajan@mongodb.com Sulabh Mahajan
            Votes:
            0 Vote for this issue
            Watchers:
            7 Start watching this issue

              Created:
              Updated:
              Resolved: