Details
-
Bug
-
Status: Closed (View Workflow)
-
Critical
-
Resolution: Fixed
-
10.6, 10.8(EOL), 10.9(EOL), 10.10(EOL), 10.11, 11.0(EOL)
Description
Notes:
The test case is non-deterministic, run with --repeat=N. It usually fails for me on the first attempt, but it can vary on different machines and builds. Could not however reproduce with rr even in 1000 attempts.
I don't know what is the role of the failing EXCHANGE PARTITION, but I couldn't remove it. Possibly it can be replaced with something more sensible.
--source include/have_innodb.inc
|
--source include/have_partition.inc
|
|
CREATE TABLE t1 (a INT) ENGINE=InnoDB PARTITION BY HASH (a) PARTITIONS 9; |
INSERT INTO t1 VALUES (6),(6),(1),(6),(1),(3),(2),(3); |
CREATE TABLE t2 (b INT) ENGINE=InnoDB; |
CREATE INDEX ind1 ON t1(a); |
--error ER_NO_SUCH_TABLE
|
ALTER TABLE t1 EXCHANGE PARTITION p6 WITH TABLE non_existing; |
|
--connect (con1,localhost,root,,)
|
--send
|
CREATE UNIQUE INDEX ind2 ON t1(a); |
|
--connection default
|
CREATE UNIQUE INDEX ind3 ON t2(b); |
DELETE FROM t1; |
|
--connection con1
|
--error 0,ER_DUP_ENTRY
|
--reap
|
|
sleep 2;
|
|
# Cleanup
|
DROP TABLE t1, t2; |
10.6 c2710572 |
mariadbd: /data/src/10.6/storage/innobase/buf/buf0buf.cc:2549: buf_block_t* buf_page_get_low(page_id_t, ulint, ulint, buf_block_t*, ulint, mtr_t*, dberr_t*, bool): Assertion `mode == 16 || mode == 12' failed.
|
230514 1:14:45 [ERROR] mysqld got signal 6 ;
|
|
#9 0x00007f80bf253df2 in __GI___assert_fail (assertion=0x564143a009e0 "mode == 16 || mode == 12", file=0x5641439fd220 "/data/src/10.6/storage/innobase/buf/buf0buf.cc", line=2549, function=0x564143a005e0 "buf_block_t* buf_page_get_low(page_id_t, ulint, ulint, buf_block_t*, ulint, mtr_t*, dberr_t*, bool)") at ./assert/assert.c:101
|
#10 0x00005641425aa99b in buf_page_get_low (page_id=..., zip_size=0, rw_latch=1, guess=0x0, mode=10, mtr=0x7f80b5b41e80, err=0x7f80b5b41210, allow_ibuf_merge=false) at /data/src/10.6/storage/innobase/buf/buf0buf.cc:2549
|
#11 0x00005641425aca80 in buf_page_get_gen (page_id=..., zip_size=0, rw_latch=1, guess=0x0, mode=10, mtr=0x7f80b5b41e80, err=0x7f80b5b41210, allow_ibuf_merge=false) at /data/src/10.6/storage/innobase/buf/buf0buf.cc:2927
|
#12 0x0000564142548331 in btr_cur_t::search_leaf (this=0x7f80b5b419e0, tuple=0x61a000012120, mode=PAGE_CUR_LE, latch_mode=BTR_MODIFY_LEAF, mtr=0x7f80b5b41e80) at /data/src/10.6/storage/innobase/btr/btr0cur.cc:1107
|
#13 0x000056414200e08f in btr_pcur_open (tuple=0x61a000012120, mode=PAGE_CUR_LE, latch_mode=BTR_PURGE_LEAF, cursor=0x7f80b5b419e0, mtr=0x7f80b5b41e80) at /data/src/10.6/storage/innobase/include/btr0pcur.h:431
|
#14 0x00005641423c37ed in row_search_index_entry (entry=0x61a000012120, mode=BTR_PURGE_LEAF, pcur=0x7f80b5b419e0, mtr=0x7f80b5b41e80) at /data/src/10.6/storage/innobase/row/row0row.cc:1281
|
#15 0x00005641423ad393 in row_purge_remove_sec_if_poss_leaf (node=0x61b000002b20, index=0x616000c6f720, entry=0x61a000012120) at /data/src/10.6/storage/innobase/row/row0purge.cc:475
|
#16 0x00005641423add6a in row_purge_remove_sec_if_poss (node=0x61b000002b20, index=0x616000c6f720, entry=0x61a000012120) at /data/src/10.6/storage/innobase/row/row0purge.cc:576
|
#17 0x00005641423ae06e in row_purge_del_mark (node=0x61b000002b20) at /data/src/10.6/storage/innobase/row/row0purge.cc:624
|
#18 0x00005641423b361f in row_purge_record_func (node=0x61b000002b20, undo_rec=0x625000054808 "", thr=0x617000001358, updated_extern=false) at /data/src/10.6/storage/innobase/row/row0purge.cc:1194
|
#19 0x00005641423b3bf3 in row_purge (node=0x61b000002b20, undo_rec=0x625000054808 "", thr=0x617000001358) at /data/src/10.6/storage/innobase/row/row0purge.cc:1255
|
#20 0x00005641423b3ec9 in row_purge_step (thr=0x617000001358) at /data/src/10.6/storage/innobase/row/row0purge.cc:1318
|
#21 0x000056414229a1bf in que_thr_step (thr=0x617000001358) at /data/src/10.6/storage/innobase/que/que0que.cc:588
|
#22 0x000056414229a592 in que_run_threads_low (thr=0x617000001358) at /data/src/10.6/storage/innobase/que/que0que.cc:644
|
#23 0x000056414229a74e in que_run_threads (thr=0x617000001358) at /data/src/10.6/storage/innobase/que/que0que.cc:664
|
#24 0x00005641424392fe in srv_task_execute () at /data/src/10.6/storage/innobase/srv/srv0srv.cc:1595
|
#25 0x0000564142439ec4 in purge_worker_callback () at /data/src/10.6/storage/innobase/srv/srv0srv.cc:1843
|
#26 0x0000564142809439 in tpool::task_group::execute (this=0x5641458de700 <purge_task_group>, t=0x5641458de7c0 <purge_worker_task>) at /data/src/10.6/tpool/task_group.cc:55
|
#27 0x0000564142809c3f in tpool::task::execute (this=0x5641458de7c0 <purge_worker_task>) at /data/src/10.6/tpool/task.cc:32
|
#28 0x00005641427fa9cf in tpool::thread_pool_generic::worker_main (this=0x61900001b380, thread_var=0x630000020400) at /data/src/10.6/tpool/tpool_generic.cc:580
|
#29 0x0000564142808f25 in std::__invoke_impl<void, void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> (__f=@0x604000000ca8: (void (tpool::thread_pool_generic::*)(tpool::thread_pool_generic * const, tpool::worker_data *)) 0x5641427fa874 <tpool::thread_pool_generic::worker_main(tpool::worker_data*)>, __t=@0x604000000ca0: 0x61900001b380) at /usr/include/c++/12/bits/invoke.h:74
|
#30 0x0000564142808d58 in std::__invoke<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> (__fn=@0x604000000ca8: (void (tpool::thread_pool_generic::*)(tpool::thread_pool_generic * const, tpool::worker_data *)) 0x5641427fa874 <tpool::thread_pool_generic::worker_main(tpool::worker_data*)>) at /usr/include/c++/12/bits/invoke.h:96
|
#31 0x0000564142808c8b in std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> >::_M_invoke<0ul, 1ul, 2ul> (this=0x604000000c98) at /usr/include/c++/12/bits/std_thread.h:252
|
#32 0x0000564142808c28 in std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> >::operator() (this=0x604000000c98) at /usr/include/c++/12/bits/std_thread.h:259
|
#33 0x0000564142808c0c in std::thread::_State_impl<std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> > >::_M_run (this=0x604000000c90) at /usr/include/c++/12/bits/std_thread.h:210
|
#34 0x00007f80bf4d44a3 in ?? () from /lib/x86_64-linux-gnu/libstdc++.so.6
|
#35 0x00007f80bf2a7fd4 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
|
#36 0x00007f80bf3285bc in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
|
The failure apparently started happening (or, if it existed before, it became much more probable) after this commit in 10.6.9 / 10.7.5 / 10.8.4 / 10.9.2:
commit 0b47c126e31cddda1e94588799599e138400bcf8
|
Author: Marko Mäkelä
|
Date: Mon Jun 6 14:03:22 2022 +0300
|
|
MDEV-13542: Crashing on corrupted page is unhelpful
|
Attachments
Issue Links
- relates to
-
MDEV-31385 Change buffer entries are left behind when freeing a page, causing secondary index corruption when the page is later reused
-
- Closed
-
For me, purge is failing when trying to access a page of ind2 of a partition of table t1:
10.6 a3e5b5c4db47cd29dc090d27db1d7f915cf774f6
#15 row_search_index_entry (entry=entry@entry=0x7f85b4009a28,
mode=BTR_PURGE_LEAF, pcur=pcur@entry=0x7f85beffc040,
mtr=mtr@entry=0x7f85beffc2c0)
at /mariadb/10.6/storage/innobase/row/row0row.cc:1281
1281 if (btr_pcur_open(entry, PAGE_CUR_LE, mode, pcur, mtr) != DB_SUCCESS) {
(gdb) p pcur.btr_cur.page_cur.index.name
$1 = {m_name = 0x7f85840768f0 "ind2"}
(gdb) p pcur.btr_cur.page_cur.index.table.name
$2 = {m_name = 0x7f8590183ca8 "test/t1#P#p2"}
During the time of the crash, the statement DROP TABLE t1, t2 is waiting for a metadata lock (MDL) on test.t1.
With a few attempts of
I got lucky. The index tree is indeed being dropped after a DDL operation:
10.6 a3e5b5c4db47cd29dc090d27db1d7f915cf774f6
#8 0x0000558f79802f0b in btr_free_if_exists (space=space@entry=0x2f1f1819b000, page=page@entry=5, index_id=<optimized out>, mtr=mtr@entry=0x2510586c52c0) at /mariadb/10.6/storage/innobase/btr/btr0btr.cc:1244
#9 0x0000558f7988a1c8 in dict_drop_index_tree (pcur=pcur@entry=0x558f7bacd790, trx=trx@entry=0x0, mtr=mtr@entry=0x2510586c52c0) at /mariadb/10.6/storage/innobase/dict/dict0crea.cc:905
#10 0x0000558f79997976 in row_purge_remove_clust_if_poss_low (node=node@entry=0x558f7bacd6f0, mode=mode@entry=BTR_MODIFY_LEAF) at /mariadb/10.6/storage/innobase/row/row0purge.cc:165
#11 0x0000558f799983d5 in row_purge_remove_clust_if_poss (node=node@entry=0x558f7bacd6f0) at /mariadb/10.6/storage/innobase/row/row0purge.cc:258
#12 0x0000558f7999855c in row_purge_del_mark (node=node@entry=0x558f7bacd6f0) at /mariadb/10.6/storage/innobase/row/row0purge.cc:632
The committed DDL operation was a rollback of the CREATE UNIQUE INDEX:
10.6 a3e5b5c4db47cd29dc090d27db1d7f915cf774f6
#0 row_merge_drop_index_dict (trx=trx@entry=0x45c901cb8780, index_id=45) at /mariadb/10.6/storage/innobase/row/row0merge.cc:3707
#1 0x0000558f799786f7 in row_merge_drop_indexes (trx=0x45c901cb8780, table=table@entry=0x2f1f1819bd70, locked=<optimized out>, alter_trx=<optimized out>) at /mariadb/10.6/storage/innobase/row/row0merge.cc:3942
#2 0x0000558f798a8e10 in innobase_rollback_sec_index (user_table=0x2f1f1819bd70, table=table@entry=0x2f1f1800e8c8, locked=<optimized out>, trx=<optimized out>, alter_trx=<optimized out>)
at /mariadb/10.6/storage/innobase/handler/handler0alter.cc:8890
#3 0x0000558f798af79b in rollback_inplace_alter_table (ha_alter_info=ha_alter_info@entry=0x7f56ed902d50, table=0x2f1f1800e8c8, prebuilt=0x2f1f1819ff50)
at /mariadb/10.6/storage/innobase/handler/handler0alter.cc:9028
#4 0x0000558f798a65b7 in ha_innobase::commit_inplace_alter_table (this=0x2f1f18195c70, altered_table=0x7f56ed902e10, ha_alter_info=0x7f56ed902d50, commit=false)
at /mariadb/10.6/storage/innobase/handler/handler0alter.cc:11015
#5 0x0000558f792e73ba in handler::ha_commit_inplace_alter_table (this=0x2f1f18195c70, altered_table=altered_table@entry=0x7f56ed902e10, ha_alter_info=ha_alter_info@entry=0x7f56ed902d50,
commit=commit@entry=false) at /mariadb/10.6/sql/handler.cc:5272
#6 0x0000558f7973c3ab in ha_partition::commit_inplace_alter_table (this=0x2f1f18194410, altered_table=0x7f56ed902e10, ha_alter_info=0x7f56ed902d50, commit=<optimized out>)
at /mariadb/10.6/sql/ha_partition.cc:10628
#7 0x0000558f792e73ba in handler::ha_commit_inplace_alter_table (this=0x2f1f18194410, altered_table=altered_table@entry=0x7f56ed902e10, ha_alter_info=ha_alter_info@entry=0x7f56ed902d50,
commit=commit@entry=false) at /mariadb/10.6/sql/handler.cc:5272
Now, why does the assertion fail when a DML undo log record is being purged? Those records are always associated with a table, not directly with secondary indexes.
During the rollback of CREATE UNIQUE INDEX, there will be no attempt to upgrade the metadata lock to MDL_EXCLUSIVE. Because the table is being referenced from another connection, the ‘stub’ for the index ind2 was not removed from the InnoDB data dictionary cache:
10.6 a3e5b5c4db47cd29dc090d27db1d7f915cf774f6
#0 dict_table_t::acquire (this=0x2f1f1819bd70) at /mariadb/10.6/storage/innobase/include/dict0dict.inl:1132
#1 0x0000558f797b88eb in dict_table_open_on_name (table_name=table_name@entry=0x7d064bc66a60 "test/t1#P#p2", dict_locked=dict_locked@entry=false, ignore_err=ignore_err@entry=DICT_ERR_IGNORE_FK_NOKEY)
at /mariadb/10.6/storage/innobase/dict/dict0dict.cc:1087
#2 0x0000558f79780ec0 in ha_innobase::open_dict_table (norm_name=norm_name@entry=0x7d064bc66a60 "test/t1#P#p2", is_partition=is_partition@entry=true, ignore_err=ignore_err@entry=DICT_ERR_IGNORE_FK_NOKEY)
at /mariadb/10.6/storage/innobase/handler/ha_innodb.cc:6236
#3 0x0000558f79790cc3 in ha_innobase::open (this=0x2f1f180c7210, name=0x7d064bc66d70 "./test/t1#P#p2") at /mariadb/10.6/storage/innobase/handler/ha_innodb.cc:5955
#4 0x0000558f792e31ae in handler::ha_open (this=0x2f1f180c7210, table_arg=<optimized out>, name=name@entry=0x7d064bc66d70 "./test/t1#P#p2", mode=2, test_if_locked=1042, mem_root=mem_root@entry=0x0,
partitions_to_open=0x0) at /mariadb/10.6/sql/handler.cc:3366
#5 0x0000558f79744cb8 in ha_partition::open_read_partitions (this=this@entry=0x2f1f180c6990, name_buff=name_buff@entry=0x7d064bc66d70 "./test/t1#P#p2", name_buff_size=name_buff_size@entry=513)
at /mariadb/10.6/sql/ha_partition.cc:8832
#6 0x0000558f797453a7 in ha_partition::open (this=0x2f1f180c6990, name=0x2f1f1808ed50 "./test/t1", mode=<optimized out>, test_if_locked=18) at /mariadb/10.6/sql/ha_partition.cc:3812
#7 0x0000558f792e31ae in handler::ha_open (this=0x2f1f180c6990, table_arg=table_arg@entry=0x2f1f1818bda8, name=0x2f1f1808ed50 "./test/t1", mode=2, test_if_locked=test_if_locked@entry=18,
mem_root=mem_root@entry=0x0, partitions_to_open=0x0) at /mariadb/10.6/sql/handler.cc:3366
#8 0x0000558f795c35b1 in open_table_from_share (thd=thd@entry=0x2f1f18000d58, share=share@entry=0x2f1f1808e7a0, alias=alias@entry=0x2f1f18014358, db_stat=db_stat@entry=33, prgflag=prgflag@entry=8,
ha_open_flags=18, outparam=0x2f1f1818bda8, is_create_table=false, partitions_to_open=0x0) at /mariadb/10.6/sql/table.cc:4405
#9 0x0000558f7941bb19 in open_table (thd=thd@entry=0x2f1f18000d58, table_list=table_list@entry=0x2f1f18014310, ot_ctx=ot_ctx@entry=0x7d064bc67430) at /mariadb/10.6/sql/sql_base.cc:2036
#10 0x0000558f7941d048 in open_and_process_table (thd=thd@entry=0x2f1f18000d58, tables=tables@entry=0x2f1f18014310, counter=counter@entry=0x7d064bc674cc, flags=flags@entry=0,
prelocking_strategy=prelocking_strategy@entry=0x7d064bc67518, has_prelocking_list=has_prelocking_list@entry=false, ot_ctx=0x7d064bc67430) at /mariadb/10.6/sql/sql_base.cc:3847
#11 0x0000558f7941e82f in open_tables (thd=thd@entry=0x2f1f18000d58, options=@0x2f1f18006420: {m_options = DDL_options_st::OPT_NONE}, start=start@entry=0x7d064bc674b8, counter=counter@entry=0x7d064bc674cc,
flags=flags@entry=0, prelocking_strategy=prelocking_strategy@entry=0x7d064bc67518) at /mariadb/10.6/sql/sql_base.cc:4330
#12 0x0000558f7941ef27 in open_and_lock_tables (thd=0x2f1f18000d58, options=<optimized out>, tables=<optimized out>, derived=<optimized out>, flags=0,
prelocking_strategy=prelocking_strategy@entry=0x7d064bc67518) at /mariadb/10.6/sql/sql_base.cc:5303
#13 0x0000558f7931c38f in open_and_lock_tables (thd=<optimized out>, tables=<optimized out>, derived=<optimized out>, flags=<optimized out>) at /mariadb/10.6/sql/sql_base.h:509
#14 0x0000558f7949abdb in mysql_delete (thd=thd@entry=0x2f1f18000d58, table_list=0x2f1f18014310, conds=<optimized out>, order_list=order_list@entry=0x2f1f180059e8, limit=18446744073709551615, options=0,
result=0x0) at /mariadb/10.6/sql/sql_delete.cc:341
#15 0x0000558f794dc86b in mysql_execute_command (thd=thd@entry=0x2f1f18000d58, is_called_from_prepared_stmt=is_called_from_prepared_stmt@entry=false) at /mariadb/10.6/sql/sql_parse.cc:4818
#16 0x0000558f794e0cb7 in mysql_parse (thd=thd@entry=0x2f1f18000d58, rawbuf=<optimized out>, length=<optimized out>, parser_state=parser_state@entry=0x7d064bc68320) at /mariadb/10.6/sql/sql_parse.cc:8036
#17 0x0000558f794e22c8 in dispatch_command (command=command@entry=COM_QUERY, thd=thd@entry=0x2f1f18000d58, packet=packet@entry=0x2f1f180918c9 "DELETE FROM t1", packet_length=packet_length@entry=14,
blocking=blocking@entry=true) at /mariadb/10.6/sql/sql_parse.cc:1896
As far as I can tell, this bug is possible to hit even without using any partitioned tables. At the same time I wonder why EXCHANGE PARTITION is not using MDL_EXCLUSIVE. When creating the rr replay trace, I was actually using the following patch because I was suspecting that the ALTER TABLE t1 EXCHANGE PARTITION… can be executed concurrently with CREATE UNIQUE INDEX…ON t1:
diff --git a/sql/sql_partition_admin.cc b/sql/sql_partition_admin.cc
index 6ca96300b7a..56e2e735658 100644
--- a/sql/sql_partition_admin.cc
+++ b/sql/sql_partition_admin.cc
@@ -555,7 +555,7 @@ bool Sql_cmd_alter_table_exchange_partition::
we need some info from the engine, which we can only access after open,
to be able to verify the structure/metadata.
*/
- table_list->mdl_request.set_type(MDL_SHARED_NO_WRITE);
+ table_list->mdl_request.set_type(MDL_EXCLUSIVE);
if (unlikely(open_tables(thd, &table_list, &table_counter, 0,
&alter_prelocking_strategy)))
{
@@ -754,7 +754,7 @@ bool Sql_cmd_alter_table_exchange_partition::
thd->variables.option_bits= save_option_bits;
err:
- if (thd->locked_tables_mode)
+ if (false && thd->locked_tables_mode)
{
if (swap_table_mdl_ticket)
thiru, can you please try to create a DEBUG_SYNC test case for this (with a single non-partitioned table), and a fix to the purge code? Possibly it could be a fairly simple extension to the test innodb.innodb-index-online. A possible fix could be to check if the index actually is in committed state, before trying to apply any records. Starting with the fix of
MDEV-15250, this should be safe. If this is repeatable in older versions than 10.6, I do not think that we will be able to fix it easily. The way how DDL works with the purge of history was radically changed in 10.6 as part ofMDEV-25506and other tickets.