Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-29440

InnoDB instant ALTER TABLE recovery wrongly uses READ COMMITTED isolation level instead of READ UNCOMMITTED

    XMLWordPrintable

Details

    Description

      I accidentally found this bug while trying to reproduce MDEV-29438.

      In MDEV-27234, the data dictionary recovery was corrected to use the READ COMMITTED isolation level. However, the recovery of the instant ALTER TABLE metadata (MDEV-11369, MDEV-15562) was not adjusted.

      Technically, also MariaDB Server 10.3, 10.4, 10.5 may be affected by this, but because they do not support crash-safe DDL operations such as MDEV-25180, it does not make sense to fix this bug before MariaDB Server 10.6.

      Consider the following test case:

      --source include/have_innodb.inc
      --source include/not_embedded.inc
      --source include/have_debug.inc
      --source include/have_debug_sync.inc
       
      CREATE TABLE t1(a INT PRIMARY KEY) ENGINE=InnoDB;
      INSERT INTO t1 SET a=1;
      CREATE TABLE t2(a INT UNSIGNED PRIMARY KEY) ENGINE=InnoDB;
      INSERT INTO t2 VALUES (1),(2),(3),(4),(5),(6);
       
      connect ddl, localhost, root;
      SET DEBUG_SYNC='innodb_alter_inplace_before_commit SIGNAL ddl WAIT_FOR ever';
      --send
      ALTER TABLE t2 ADD COLUMN b TINYINT UNSIGNED NOT NULL DEFAULT 42 FIRST;
       
      connection default;
      SET DEBUG_SYNC='now WAIT_FOR ddl';
      SET GLOBAL innodb_flush_log_at_trx_commit=1;
      DELETE FROM t1;
       
      --source include/kill_mysqld.inc
      disconnect ddl;
      --source include/start_mysqld.inc
       
      CHECK TABLE t2;
      DROP TABLE t1,t2;
      

      Thanks to MDEV-27234, the data dictionary information for the ALTER TABLE operation during which the server was killed was recovered correctly: the uncommitted newest version of the dictionary tables was ignored and the table recovered as if no ALTER TABLE operation had completed.

      However, the recovery would crash a little bit later:

      10.6 40aa94df356cfa000fc43c92ff0061212d1b161d

      2022-09-01 13:19:14 0 [Note] InnoDB: Starting crash recovery from checkpoint LSN=54849,54849
      mariadbd: /mariadb/10.6/storage/innobase/rem/rem0rec.cc:304: void rec_init_offsets_comp_ordinary(const rec_t*, const dict_index_t*, rec_offs*, ulint, const dict_col_t::def_t*, rec_leaf_format) [with bool mblob = true; bool redundant_temp = false; rec_t = unsigned char; rec_offs = short unsigned int; ulint = long unsigned int]: Assertion `n_fields <= ulint(index->n_fields) + 1' failed.
      

      The reason seems to be that we are reading the metadata record using the READ UNCOMMITTED isolation level. Here is an incomplete attempt to fix that:

      diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
      index e9b4e2937b9..71d11e22597 100644
      --- a/storage/innobase/btr/btr0cur.cc
      +++ b/storage/innobase/btr/btr0cur.cc
      @@ -384,6 +384,7 @@ static dberr_t btr_cur_instant_init_low(dict_index_t* index, mtr_t* mtr)
       	ut_ad(index->n_core_null_bytes != dict_index_t::NO_CORE_NULL_BYTES);
       
       	if (fil_page_get_type(root->page.frame) == FIL_PAGE_INDEX) {
      +not_instant:
       		ut_ad(!index->is_instant());
       		return DB_SUCCESS;
       	}
      @@ -445,9 +446,6 @@ static dberr_t btr_cur_instant_init_low(dict_index_t* index, mtr_t* mtr)
       	concurrent operations on the table, including table eviction
       	from the cache. */
       
      -	if (info_bits & REC_INFO_DELETED_FLAG) {
      -		/* This metadata record includes a BLOB that identifies
      -		any dropped or reordered columns. */
       	ulint trx_id_offset = index->trx_id_offset;
       	/* If !index->trx_id_offset, the PRIMARY KEY contains
       	variable-length columns. For the metadata record,
      @@ -507,6 +505,9 @@ static dberr_t btr_cur_instant_init_low(dict_index_t* index, mtr_t* mtr)
       	const byte* ptr = rec + trx_id_offset
       		+ (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
       
      +	if (info_bits & REC_INFO_DELETED_FLAG) {
      +		/* This metadata record includes a BLOB that identifies
      +		any dropped or reordered columns. */
       		if (mach_read_from_4(ptr + BTR_EXTERN_LEN)) {
       			goto incompatible;
       		}
      @@ -519,7 +520,22 @@ static dberr_t btr_cur_instant_init_low(dict_index_t* index, mtr_t* mtr)
       		    != space->id) {
       			goto incompatible;
       		}
      +	}
      +
      +	if (const trx_id_t trx_id = trx_read_trx_id(rec + trx_id_offset)) {
      +		if (!trx_sys.find(nullptr, trx_id, false)) {
      +			/* The ALTER TABLE operation was committed. */
      +		} else if (rec[trx_id_offset + DATA_TRX_ID_LEN] & 0x80) {
      +			/* The first instant ALTER TABLE operation
      +			was not completed. */
      +			goto not_instant;
      +		} else {
      +			/* FIXME: retrieve an older version */
      +		}
      +	}
       
      +	if (info_bits & REC_INFO_DELETED_FLAG) {
      +		const uint len = mach_read_from_4(ptr + BTR_EXTERN_LEN + 4);
       		buf_block_t* block = buf_page_get(
       			page_id_t(space->id,
       				  mach_read_from_4(ptr + BTR_EXTERN_PAGE_NO)),
      

      With that incomplete fix (we should fetch the last committed version of the metadata record it is not the first but some later instant ALTER TABLE that was aborted), we would crash in rollback a little later:

      mariadbd: /mariadb/10.6/storage/innobase/rem/rem0rec.cc:938: rec_offs* rec_get_offsets_func(const rec_t*, const dict_index_t*, rec_offs*, ulint, ulint, const char*, unsigned int, mem_heap_t**): Assertion `index->table->instant' failed.
      ...
      Thread 1 hit Breakpoint 1, rec_get_offsets_func (rec=rec@entry=0x7f16f1bf8102 "", index=index@entry=0x556ab210e418, offsets=offsets@entry=0x7ffd7392d190, n_core=3, n_fields=n_fields@entry=18446744073709551615, file=file@entry=0x556ab04ac448 "/mariadb/10.6/storage/innobase/row/row0undo.cc", line=190, heap=0x7ffd7392d188) at /mariadb/10.6/storage/innobase/rem/rem0rec.cc:938
      938			ut_ad(index->table->instant);
      (rr) bt
      #0  rec_get_offsets_func (rec=rec@entry=0x7f16f1bf8102 "", index=index@entry=0x556ab210e418, offsets=offsets@entry=0x7ffd7392d190, n_core=3, n_fields=n_fields@entry=18446744073709551615, 
          file=file@entry=0x556ab04ac448 "/mariadb/10.6/storage/innobase/row/row0undo.cc", line=190, heap=0x7ffd7392d188) at /mariadb/10.6/storage/innobase/rem/rem0rec.cc:938
      #1  0x0000556aaffe0186 in row_undo_search_clust_to_pcur (node=node@entry=0x556ab210f9e8) at /mariadb/10.6/storage/innobase/row/row0undo.cc:190
      #2  0x0000556ab0117457 in row_undo_ins_parse_undo_rec (node=node@entry=0x556ab210f9e8, dict_locked=dict_locked@entry=true) at /mariadb/10.6/storage/innobase/row/row0uins.cc:468
      #3  0x0000556ab011876d in row_undo_ins (node=node@entry=0x556ab210f9e8, thr=thr@entry=0x556ab210f7f8) at /mariadb/10.6/storage/innobase/row/row0uins.cc:573
      #4  0x0000556aaffe0c04 in row_undo (node=node@entry=0x556ab210f9e8, thr=thr@entry=0x556ab210f7f8) at /mariadb/10.6/storage/innobase/row/row0undo.cc:401
      #5  0x0000556aaffe0c97 in row_undo_step (thr=thr@entry=0x556ab210f7f8) at /mariadb/10.6/storage/innobase/row/row0undo.cc:452
      #6  0x0000556aaff6d9e1 in que_thr_step (thr=thr@entry=0x556ab210f7f8) at /mariadb/10.6/storage/innobase/que/que0que.cc:651
      #7  0x0000556aaff6dbd2 in que_run_threads_low (thr=thr@entry=0x556ab210f7f8) at /mariadb/10.6/storage/innobase/que/que0que.cc:709
      #8  0x0000556aaff6dc8f in que_run_threads (thr=0x556ab210f7f8) at /mariadb/10.6/storage/innobase/que/que0que.cc:729
      #9  0x0000556ab001299d in trx_rollback_active (trx=trx@entry=0x7f16f215fb80) at /mariadb/10.6/storage/innobase/trx/trx0roll.cc:609
      

      Attachments

        Issue Links

          Activity

            People

              marko Marko Mäkelä
              marko Marko Mäkelä
              Votes:
              0 Vote for this issue
              Watchers:
              5 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Git Integration

                  Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.