Details
-
Bug
-
Status: Closed (View Workflow)
-
Blocker
-
Resolution: Fixed
-
10.4.0
Description
Here is a simplified version of a test case that elenst posted in MDEV-17721, with some purge control added to make it more deterministic:
--source include/have_innodb.inc
|
|
SET @saved_frequency = @@GLOBAL.innodb_purge_rseg_truncate_frequency; |
SET GLOBAL innodb_purge_rseg_truncate_frequency=1; |
|
connect (prevent_purge,localhost,root); |
START TRANSACTION WITH CONSISTENT SNAPSHOT; |
|
connection default; |
CREATE TABLE t1 (f1 INT, f2 INT) ENGINE=InnoDB; |
INSERT INTO t1 () VALUES (); |
ALTER TABLE t1 DROP f2, ADD COLUMN f2 INT; |
ALTER TABLE t1 DROP f1; |
DELETE FROM t1; |
|
disconnect prevent_purge;
|
|
ALTER TABLE t1 ADD COLUMN extra TINYINT UNSIGNED NOT NULL DEFAULT 42; |
ALTER TABLE t1 DROP extra; |
--source include/wait_all_purged.inc
|
DROP TABLE t1; |
SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency; |
This will lead to a crash:
10.4 9fdb8c59eb297c6acb15345ae732678d0d8ee838 |
2018-11-22 8:54:07 1 [ERROR] InnoDB: Trying to access update undo rec field 7 in index `GEN_CLUST_INDEX` of table `test`.`t1` but index has only 4 fields Submit a detailed bug report to https://jira.mariadb.org/. Run also CHECK TABLE `test`.`t1`. n_fields = 2, i = 1
|
mysqld: /mariadb/10.4/storage/innobase/trx/trx0rec.cc:1661: byte* trx_undo_update_rec_get_update(const byte*, dict_index_t*, ulint, trx_id_t, roll_ptr_t, ulint, mem_heap_t*, upd_t**): Assertion `0' failed.
|
…
|
#8 0x000055f3803ad2de in trx_undo_update_rec_get_update (ptr=0x55f382989c2d "\360\377\377\377\377\001l\363U", index=0x7ff434080008, type=12, trx_id=70, roll_ptr=7599824392815061, info_bits=48, heap=0x55f382989b60, upd=0x55f382989090) at /mariadb/10.4/storage/innobase/trx/trx0rec.cc:1661
|
#9 0x000055f380339699 in row_purge_parse_undo_rec (node=0x55f382989028, undo_rec=0x55f382989bf8 "", updated_extern=0x7ff46e708cae, thr=0x55f382988b80) at /mariadb/10.4/storage/innobase/row/row0purge.cc:1143
|
At the time of the assertion failure, the table is empty, and dict_index_t::clear_instant_alter() has been called. The following instrumentation tells us that the caller is the DROP extra:
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
|
index 7e6fe455b72..9035cb60e51 100644
|
--- a/storage/innobase/include/dict0mem.h
|
+++ b/storage/innobase/include/dict0mem.h
|
@@ -2131,11 +2131,17 @@ inline void dict_index_t::clear_instant_add()
|
n_core_null_bytes = UT_BITS_IN_BYTES(unsigned(n_nullable));
|
}
|
|
+#include <my_stacktrace.h>
|
+
|
inline void dict_index_t::clear_instant_alter()
|
{
|
DBUG_ASSERT(is_primary());
|
DBUG_ASSERT(n_fields == n_def);
|
|
+ extern ulonglong my_thread_stack_size;
|
+ fprintf(stderr, "clear %u\n", table->n_dropped());
|
+ my_print_stacktrace(NULL, my_thread_stack_size, 0);
|
+
|
if (!table->instant) {
|
if (is_instant()) {
|
clear_instant_add(); |
clear 3
|
stack_bottom = 0x0 thread_stack 0x49000
|
/dev/shm/10.4/sql/mysqld(my_print_stacktrace+0x40)[0x56001405179f]
|
mysys/stacktrace.c:269(my_print_stacktrace)[0x560013b71dc8]
|
include/dict0mem.h:2145(dict_index_t::clear_instant_alter())[0x560013b60bc5]
|
handler/handler0alter.cc:5500(innobase_instant_try(Alter_inplace_info const*, ha_innobase_inplace_ctx*, TABLE const*, TABLE const*, trx_t*))[0x560013b7acad]
|
handler/handler0alter.cc:9888(commit_try_norebuild(Alter_inplace_info*, ha_innobase_inplace_ctx*, TABLE*, TABLE const*, trx_t*, char const*))[0x560013b6e1a5]
|
handler/handler0alter.cc:10515(ha_innobase::commit_inplace_alter_table(TABLE*, Alter_inplace_info*, bool))[0x56001392502b]
|
The message clear 3 indicates that 3 columns had been instantly dropped until that point. The test only executes 3 DROP, so it must be the last one. Because of this, the table cannot have been empty when the ADD COLUMN extra executed, and thus the ADD COLUMN extra must have updated a pre-existing metadata record.
The undo record that purge is parsing is for the update of a hidden metadata record, for the ADD COLUMN extra, because at that time the index contained 7 fields, and because the default value is present in the undo log:
(gdb) frame 8
|
#8 0x000055f3803ad2de in trx_undo_update_rec_get_update (ptr=0x55f382989c2d "\360\377\377\377\377\001l\363U", index=0x7ff434080008, type=12, trx_id=70, roll_ptr=7599824392815061, info_bits=48, heap=0x55f382989b60, upd=0x55f382989090) at /mariadb/10.4/storage/innobase/trx/trx0rec.cc:1661
|
(gdb) p/u *ptr@2
|
$1 = {1, 42}
|
Purge has to run some time between the ADD COLUMN extra and the DROP extra.
Here is a more deterministic version of the test case, with full purge control. Unfortunately, it still is not completely deterministic:
--source include/have_innodb.inc
|
|
SET @saved_frequency = @@GLOBAL.innodb_purge_rseg_truncate_frequency; |
SET GLOBAL innodb_purge_rseg_truncate_frequency=1; |
|
connect (prevent_purge,localhost,root); |
START TRANSACTION WITH CONSISTENT SNAPSHOT; |
|
connection default; |
CREATE TABLE t1 (f1 INT, f2 INT) ENGINE=InnoDB; |
INSERT INTO t1 () VALUES (); |
ALTER TABLE t1 DROP f2, ADD COLUMN f2 INT; |
ALTER TABLE t1 DROP f1; |
DELETE FROM t1; |
|
connection prevent_purge; |
COMMIT; |
START TRANSACTION WITH CONSISTENT SNAPSHOT; |
connection default; |
|
ALTER TABLE t1 ADD COLUMN extra TINYINT UNSIGNED NOT NULL DEFAULT 42; |
let $wait_all_purged= 1;
|
--source include/wait_all_purged.inc
|
ALTER TABLE t1 DROP extra; |
disconnect prevent_purge;
|
let $wait_all_purged= 0;
|
--source include/wait_all_purged.inc
|
DROP TABLE t1; |
SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency; |
I think that a fix should be added to ALTER TABLE:
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
|
index 8b8ccd2fc11..f6fb498b4a1 100644
|
--- a/storage/innobase/handler/handler0alter.cc
|
+++ b/storage/innobase/handler/handler0alter.cc
|
@@ -5496,6 +5496,11 @@ static bool innobase_instant_try(
|
/* The table is empty. */
|
ut_ad(page_is_root(block->frame));
|
btr_page_empty(block, NULL, index, 0, &mtr);
|
+ if (index->table->instant) {
|
+ /* FIXME: Assign a new index->table->id,
|
+ SYS_TABLES.TABLE_ID, SYS_INDEXES.TABLE_ID
|
+ to invalidate old undo log records in purge. */
|
+ }
|
index->clear_instant_alter();
|
err = DB_SUCCESS;
|
goto func_exit; |
Attachments
Issue Links
- causes
-
MDEV-17899 Instant ADD/DROP multiple failure assertions
-
- Closed
-
- is caused by
-
MDEV-15562 Instant DROP COLUMN or changing the order of columns
-
- Closed
-
- relates to
-
MDEV-17721 Various assertion failures due to MDEV-15562 instant ALTER TABLE
-
- Closed
-
Activity
Field | Original Value | New Value |
---|---|---|
Link |
This issue is caused by |
Link |
This issue relates to |
issue.field.resolutiondate | 2018-11-22 08:57:48.0 | 2018-11-22 08:57:48.802 |
Resolution | Fixed [ 1 ] | |
Status | Open [ 1 ] | Closed [ 6 ] |
Link |
This issue causes |
Workflow | MariaDB v3 [ 90816 ] | MariaDB v4 [ 155246 ] |
There are 2 other callers of index->clear_instant_alter(), apart from the ALTER TABLE.
In the ROLLBACK of an instant ALTER TABLE, btr_cur_optimistic_delete() and btr_cur_pessimistic_delete() are invoking index->clear_instant_alter() when deleting the metadata record and the table becomes empty. The delete on rollback should only happen when the
MDEV-15562metadata record was inserted (not updated) by the transaction, that is, there was no prior instant ADD or DROP or permutation of columns since the table was emptied or rebuilt. Because this first instant ALTER TABLE did not complete, there cannot be any undo log records from other transactions that would refer to the altered table definition. The only undo log record type that can initiate such a rollback is TRX_UNDO_INSERT_METADATA. That record does not have any further parameters; it implicitly identifies the metadata record. After the rollback, purge would run, and it could attempt to reset the transaction ID (MDEV-12288) by searching for trx_undo_metadata. This should not cause any harm.