Details
-
Bug
-
Status: Closed (View Workflow)
-
Critical
-
Resolution: Fixed
-
10.6.8, 10.7.4, 10.8.3, 10.9.1, 10.10.1
Description
I accidentally found this bug while trying to reproduce MDEV-29438.
In MDEV-27234, the data dictionary recovery was corrected to use the READ COMMITTED isolation level. However, the recovery of the instant ALTER TABLE metadata (MDEV-11369, MDEV-15562) was not adjusted.
Technically, also MariaDB Server 10.3, 10.4, 10.5 may be affected by this, but because they do not support crash-safe DDL operations such as MDEV-25180, it does not make sense to fix this bug before MariaDB Server 10.6.
Consider the following test case:
--source include/have_innodb.inc
|
--source include/not_embedded.inc
|
--source include/have_debug.inc
|
--source include/have_debug_sync.inc
|
|
CREATE TABLE t1(a INT PRIMARY KEY) ENGINE=InnoDB; |
INSERT INTO t1 SET a=1; |
CREATE TABLE t2(a INT UNSIGNED PRIMARY KEY) ENGINE=InnoDB; |
INSERT INTO t2 VALUES (1),(2),(3),(4),(5),(6); |
|
connect ddl, localhost, root; |
SET DEBUG_SYNC='innodb_alter_inplace_before_commit SIGNAL ddl WAIT_FOR ever'; |
--send
|
ALTER TABLE t2 ADD COLUMN b TINYINT UNSIGNED NOT NULL DEFAULT 42 FIRST; |
|
connection default; |
SET DEBUG_SYNC='now WAIT_FOR ddl'; |
SET GLOBAL innodb_flush_log_at_trx_commit=1; |
DELETE FROM t1; |
|
--source include/kill_mysqld.inc
|
disconnect ddl;
|
--source include/start_mysqld.inc
|
|
CHECK TABLE t2; |
DROP TABLE t1,t2; |
Thanks to MDEV-27234, the data dictionary information for the ALTER TABLE operation during which the server was killed was recovered correctly: the uncommitted newest version of the dictionary tables was ignored and the table recovered as if no ALTER TABLE operation had completed.
However, the recovery would crash a little bit later:
10.6 40aa94df356cfa000fc43c92ff0061212d1b161d |
2022-09-01 13:19:14 0 [Note] InnoDB: Starting crash recovery from checkpoint LSN=54849,54849
|
mariadbd: /mariadb/10.6/storage/innobase/rem/rem0rec.cc:304: void rec_init_offsets_comp_ordinary(const rec_t*, const dict_index_t*, rec_offs*, ulint, const dict_col_t::def_t*, rec_leaf_format) [with bool mblob = true; bool redundant_temp = false; rec_t = unsigned char; rec_offs = short unsigned int; ulint = long unsigned int]: Assertion `n_fields <= ulint(index->n_fields) + 1' failed.
|
The reason seems to be that we are reading the metadata record using the READ UNCOMMITTED isolation level. Here is an incomplete attempt to fix that:
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
|
index e9b4e2937b9..71d11e22597 100644
|
--- a/storage/innobase/btr/btr0cur.cc
|
+++ b/storage/innobase/btr/btr0cur.cc
|
@@ -384,6 +384,7 @@ static dberr_t btr_cur_instant_init_low(dict_index_t* index, mtr_t* mtr)
|
ut_ad(index->n_core_null_bytes != dict_index_t::NO_CORE_NULL_BYTES);
|
|
if (fil_page_get_type(root->page.frame) == FIL_PAGE_INDEX) {
|
+not_instant:
|
ut_ad(!index->is_instant());
|
return DB_SUCCESS;
|
}
|
@@ -445,9 +446,6 @@ static dberr_t btr_cur_instant_init_low(dict_index_t* index, mtr_t* mtr)
|
concurrent operations on the table, including table eviction
|
from the cache. */
|
|
- if (info_bits & REC_INFO_DELETED_FLAG) {
|
- /* This metadata record includes a BLOB that identifies
|
- any dropped or reordered columns. */
|
ulint trx_id_offset = index->trx_id_offset;
|
/* If !index->trx_id_offset, the PRIMARY KEY contains
|
variable-length columns. For the metadata record,
|
@@ -507,6 +505,9 @@ static dberr_t btr_cur_instant_init_low(dict_index_t* index, mtr_t* mtr)
|
const byte* ptr = rec + trx_id_offset
|
+ (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
|
|
+ if (info_bits & REC_INFO_DELETED_FLAG) {
|
+ /* This metadata record includes a BLOB that identifies
|
+ any dropped or reordered columns. */
|
if (mach_read_from_4(ptr + BTR_EXTERN_LEN)) {
|
goto incompatible;
|
}
|
@@ -519,7 +520,22 @@ static dberr_t btr_cur_instant_init_low(dict_index_t* index, mtr_t* mtr)
|
!= space->id) {
|
goto incompatible;
|
}
|
+ }
|
+
|
+ if (const trx_id_t trx_id = trx_read_trx_id(rec + trx_id_offset)) {
|
+ if (!trx_sys.find(nullptr, trx_id, false)) {
|
+ /* The ALTER TABLE operation was committed. */
|
+ } else if (rec[trx_id_offset + DATA_TRX_ID_LEN] & 0x80) {
|
+ /* The first instant ALTER TABLE operation
|
+ was not completed. */
|
+ goto not_instant;
|
+ } else {
|
+ /* FIXME: retrieve an older version */
|
+ }
|
+ }
|
|
+ if (info_bits & REC_INFO_DELETED_FLAG) {
|
+ const uint len = mach_read_from_4(ptr + BTR_EXTERN_LEN + 4);
|
buf_block_t* block = buf_page_get(
|
page_id_t(space->id,
|
mach_read_from_4(ptr + BTR_EXTERN_PAGE_NO)), |
With that incomplete fix (we should fetch the last committed version of the metadata record it is not the first but some later instant ALTER TABLE that was aborted), we would crash in rollback a little later:
mariadbd: /mariadb/10.6/storage/innobase/rem/rem0rec.cc:938: rec_offs* rec_get_offsets_func(const rec_t*, const dict_index_t*, rec_offs*, ulint, ulint, const char*, unsigned int, mem_heap_t**): Assertion `index->table->instant' failed.
|
...
|
Thread 1 hit Breakpoint 1, rec_get_offsets_func (rec=rec@entry=0x7f16f1bf8102 "", index=index@entry=0x556ab210e418, offsets=offsets@entry=0x7ffd7392d190, n_core=3, n_fields=n_fields@entry=18446744073709551615, file=file@entry=0x556ab04ac448 "/mariadb/10.6/storage/innobase/row/row0undo.cc", line=190, heap=0x7ffd7392d188) at /mariadb/10.6/storage/innobase/rem/rem0rec.cc:938
|
938 ut_ad(index->table->instant);
|
(rr) bt
|
#0 rec_get_offsets_func (rec=rec@entry=0x7f16f1bf8102 "", index=index@entry=0x556ab210e418, offsets=offsets@entry=0x7ffd7392d190, n_core=3, n_fields=n_fields@entry=18446744073709551615,
|
file=file@entry=0x556ab04ac448 "/mariadb/10.6/storage/innobase/row/row0undo.cc", line=190, heap=0x7ffd7392d188) at /mariadb/10.6/storage/innobase/rem/rem0rec.cc:938
|
#1 0x0000556aaffe0186 in row_undo_search_clust_to_pcur (node=node@entry=0x556ab210f9e8) at /mariadb/10.6/storage/innobase/row/row0undo.cc:190
|
#2 0x0000556ab0117457 in row_undo_ins_parse_undo_rec (node=node@entry=0x556ab210f9e8, dict_locked=dict_locked@entry=true) at /mariadb/10.6/storage/innobase/row/row0uins.cc:468
|
#3 0x0000556ab011876d in row_undo_ins (node=node@entry=0x556ab210f9e8, thr=thr@entry=0x556ab210f7f8) at /mariadb/10.6/storage/innobase/row/row0uins.cc:573
|
#4 0x0000556aaffe0c04 in row_undo (node=node@entry=0x556ab210f9e8, thr=thr@entry=0x556ab210f7f8) at /mariadb/10.6/storage/innobase/row/row0undo.cc:401
|
#5 0x0000556aaffe0c97 in row_undo_step (thr=thr@entry=0x556ab210f7f8) at /mariadb/10.6/storage/innobase/row/row0undo.cc:452
|
#6 0x0000556aaff6d9e1 in que_thr_step (thr=thr@entry=0x556ab210f7f8) at /mariadb/10.6/storage/innobase/que/que0que.cc:651
|
#7 0x0000556aaff6dbd2 in que_run_threads_low (thr=thr@entry=0x556ab210f7f8) at /mariadb/10.6/storage/innobase/que/que0que.cc:709
|
#8 0x0000556aaff6dc8f in que_run_threads (thr=0x556ab210f7f8) at /mariadb/10.6/storage/innobase/que/que0que.cc:729
|
#9 0x0000556ab001299d in trx_rollback_active (trx=trx@entry=0x7f16f215fb80) at /mariadb/10.6/storage/innobase/trx/trx0roll.cc:609
|
Attachments
Issue Links
- is caused by
-
MDEV-27234 InnoDB dictionary recovery wrongly uses READ UNCOMMITTED isolation level instead of READ COMMITTED
- Closed
- is duplicated by
-
MDEV-29422 InnoDB crashes when dict_load_table_one() notices a corrupted table
- Closed
- relates to
-
MDEV-11369 Instant add column for InnoDB
- Closed
-
MDEV-15562 Instant DROP COLUMN or changing the order of columns
- Closed
-
MDEV-29438 Recovery or backup of instant ALTER TABLE is incorrect
- Closed