Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-31264

Purge trying to access freed secondary index page

Details

    Description

      Notes:
      The test case is non-deterministic, run with --repeat=N. It usually fails for me on the first attempt, but it can vary on different machines and builds. Could not however reproduce with rr even in 1000 attempts.
      I don't know what is the role of the failing EXCHANGE PARTITION, but I couldn't remove it. Possibly it can be replaced with something more sensible.

      --source include/have_innodb.inc
      --source include/have_partition.inc
       
      CREATE TABLE t1 (a INT) ENGINE=InnoDB PARTITION BY HASH (a) PARTITIONS 9;
      INSERT INTO t1 VALUES (6),(6),(1),(6),(1),(3),(2),(3);
      CREATE TABLE t2 (b INT) ENGINE=InnoDB;
      CREATE INDEX ind1 ON t1(a);
      --error ER_NO_SUCH_TABLE
      ALTER TABLE t1 EXCHANGE PARTITION p6 WITH TABLE non_existing;
       
      --connect (con1,localhost,root,,)
      --send
        CREATE UNIQUE INDEX ind2 ON t1(a);
       
      --connection default
      CREATE UNIQUE INDEX ind3 ON t2(b);
      DELETE FROM t1;
       
      --connection con1
      --error 0,ER_DUP_ENTRY
      --reap
       
      sleep 2;
       
      # Cleanup
      DROP TABLE t1, t2;
      

      10.6 c2710572

      mariadbd: /data/src/10.6/storage/innobase/buf/buf0buf.cc:2549: buf_block_t* buf_page_get_low(page_id_t, ulint, ulint, buf_block_t*, ulint, mtr_t*, dberr_t*, bool): Assertion `mode == 16 || mode == 12' failed.
      230514  1:14:45 [ERROR] mysqld got signal 6 ;
       
      #9  0x00007f80bf253df2 in __GI___assert_fail (assertion=0x564143a009e0 "mode == 16 || mode == 12", file=0x5641439fd220 "/data/src/10.6/storage/innobase/buf/buf0buf.cc", line=2549, function=0x564143a005e0 "buf_block_t* buf_page_get_low(page_id_t, ulint, ulint, buf_block_t*, ulint, mtr_t*, dberr_t*, bool)") at ./assert/assert.c:101
      #10 0x00005641425aa99b in buf_page_get_low (page_id=..., zip_size=0, rw_latch=1, guess=0x0, mode=10, mtr=0x7f80b5b41e80, err=0x7f80b5b41210, allow_ibuf_merge=false) at /data/src/10.6/storage/innobase/buf/buf0buf.cc:2549
      #11 0x00005641425aca80 in buf_page_get_gen (page_id=..., zip_size=0, rw_latch=1, guess=0x0, mode=10, mtr=0x7f80b5b41e80, err=0x7f80b5b41210, allow_ibuf_merge=false) at /data/src/10.6/storage/innobase/buf/buf0buf.cc:2927
      #12 0x0000564142548331 in btr_cur_t::search_leaf (this=0x7f80b5b419e0, tuple=0x61a000012120, mode=PAGE_CUR_LE, latch_mode=BTR_MODIFY_LEAF, mtr=0x7f80b5b41e80) at /data/src/10.6/storage/innobase/btr/btr0cur.cc:1107
      #13 0x000056414200e08f in btr_pcur_open (tuple=0x61a000012120, mode=PAGE_CUR_LE, latch_mode=BTR_PURGE_LEAF, cursor=0x7f80b5b419e0, mtr=0x7f80b5b41e80) at /data/src/10.6/storage/innobase/include/btr0pcur.h:431
      #14 0x00005641423c37ed in row_search_index_entry (entry=0x61a000012120, mode=BTR_PURGE_LEAF, pcur=0x7f80b5b419e0, mtr=0x7f80b5b41e80) at /data/src/10.6/storage/innobase/row/row0row.cc:1281
      #15 0x00005641423ad393 in row_purge_remove_sec_if_poss_leaf (node=0x61b000002b20, index=0x616000c6f720, entry=0x61a000012120) at /data/src/10.6/storage/innobase/row/row0purge.cc:475
      #16 0x00005641423add6a in row_purge_remove_sec_if_poss (node=0x61b000002b20, index=0x616000c6f720, entry=0x61a000012120) at /data/src/10.6/storage/innobase/row/row0purge.cc:576
      #17 0x00005641423ae06e in row_purge_del_mark (node=0x61b000002b20) at /data/src/10.6/storage/innobase/row/row0purge.cc:624
      #18 0x00005641423b361f in row_purge_record_func (node=0x61b000002b20, undo_rec=0x625000054808 "", thr=0x617000001358, updated_extern=false) at /data/src/10.6/storage/innobase/row/row0purge.cc:1194
      #19 0x00005641423b3bf3 in row_purge (node=0x61b000002b20, undo_rec=0x625000054808 "", thr=0x617000001358) at /data/src/10.6/storage/innobase/row/row0purge.cc:1255
      #20 0x00005641423b3ec9 in row_purge_step (thr=0x617000001358) at /data/src/10.6/storage/innobase/row/row0purge.cc:1318
      #21 0x000056414229a1bf in que_thr_step (thr=0x617000001358) at /data/src/10.6/storage/innobase/que/que0que.cc:588
      #22 0x000056414229a592 in que_run_threads_low (thr=0x617000001358) at /data/src/10.6/storage/innobase/que/que0que.cc:644
      #23 0x000056414229a74e in que_run_threads (thr=0x617000001358) at /data/src/10.6/storage/innobase/que/que0que.cc:664
      #24 0x00005641424392fe in srv_task_execute () at /data/src/10.6/storage/innobase/srv/srv0srv.cc:1595
      #25 0x0000564142439ec4 in purge_worker_callback () at /data/src/10.6/storage/innobase/srv/srv0srv.cc:1843
      #26 0x0000564142809439 in tpool::task_group::execute (this=0x5641458de700 <purge_task_group>, t=0x5641458de7c0 <purge_worker_task>) at /data/src/10.6/tpool/task_group.cc:55
      #27 0x0000564142809c3f in tpool::task::execute (this=0x5641458de7c0 <purge_worker_task>) at /data/src/10.6/tpool/task.cc:32
      #28 0x00005641427fa9cf in tpool::thread_pool_generic::worker_main (this=0x61900001b380, thread_var=0x630000020400) at /data/src/10.6/tpool/tpool_generic.cc:580
      #29 0x0000564142808f25 in std::__invoke_impl<void, void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> (__f=@0x604000000ca8: (void (tpool::thread_pool_generic::*)(tpool::thread_pool_generic * const, tpool::worker_data *)) 0x5641427fa874 <tpool::thread_pool_generic::worker_main(tpool::worker_data*)>, __t=@0x604000000ca0: 0x61900001b380) at /usr/include/c++/12/bits/invoke.h:74
      #30 0x0000564142808d58 in std::__invoke<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> (__fn=@0x604000000ca8: (void (tpool::thread_pool_generic::*)(tpool::thread_pool_generic * const, tpool::worker_data *)) 0x5641427fa874 <tpool::thread_pool_generic::worker_main(tpool::worker_data*)>) at /usr/include/c++/12/bits/invoke.h:96
      #31 0x0000564142808c8b in std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> >::_M_invoke<0ul, 1ul, 2ul> (this=0x604000000c98) at /usr/include/c++/12/bits/std_thread.h:252
      #32 0x0000564142808c28 in std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> >::operator() (this=0x604000000c98) at /usr/include/c++/12/bits/std_thread.h:259
      #33 0x0000564142808c0c in std::thread::_State_impl<std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> > >::_M_run (this=0x604000000c90) at /usr/include/c++/12/bits/std_thread.h:210
      #34 0x00007f80bf4d44a3 in ?? () from /lib/x86_64-linux-gnu/libstdc++.so.6
      #35 0x00007f80bf2a7fd4 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
      #36 0x00007f80bf3285bc in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
      

      The failure apparently started happening (or, if it existed before, it became much more probable) after this commit in 10.6.9 / 10.7.5 / 10.8.4 / 10.9.2:

      commit 0b47c126e31cddda1e94588799599e138400bcf8
      Author: Marko Mäkelä
      Date:   Mon Jun 6 14:03:22 2022 +0300
       
          MDEV-13542: Crashing on corrupted page is unhelpful
      

      Attachments

        1. TBR-1510.cfg
          44 kB
          Matthias Leich
        2. TBR-1510.yy
          0.3 kB
          Matthias Leich
        3. TBR-1510.zz
          1 kB
          Matthias Leich

        Issue Links

          Activity

            For me, purge is failing when trying to access a page of ind2 of a partition of table t1:

            10.6 a3e5b5c4db47cd29dc090d27db1d7f915cf774f6

            #15 row_search_index_entry (entry=entry@entry=0x7f85b4009a28, 
                mode=BTR_PURGE_LEAF, pcur=pcur@entry=0x7f85beffc040, 
                mtr=mtr@entry=0x7f85beffc2c0)
                at /mariadb/10.6/storage/innobase/row/row0row.cc:1281
            1281		if (btr_pcur_open(entry, PAGE_CUR_LE, mode, pcur, mtr) != DB_SUCCESS) {
            (gdb) p pcur.btr_cur.page_cur.index.name
            $1 = {m_name = 0x7f85840768f0 "ind2"}
            (gdb) p pcur.btr_cur.page_cur.index.table.name
            $2 = {m_name = 0x7f8590183ca8 "test/t1#P#p2"}
            

            During the time of the crash, the statement DROP TABLE t1, t2 is waiting for a metadata lock (MDL) on test.t1.

            With a few attempts of

            while ./mtr --rr=-h --parallel=auto main.m-MDEV-31264{,,,,,,,,,,,,,,,,,,,,,,}; do :; done
            

            I got lucky. The index tree is indeed being dropped after a DDL operation:

            10.6 a3e5b5c4db47cd29dc090d27db1d7f915cf774f6

            #8  0x0000558f79802f0b in btr_free_if_exists (space=space@entry=0x2f1f1819b000, page=page@entry=5, index_id=<optimized out>, mtr=mtr@entry=0x2510586c52c0) at /mariadb/10.6/storage/innobase/btr/btr0btr.cc:1244
            #9  0x0000558f7988a1c8 in dict_drop_index_tree (pcur=pcur@entry=0x558f7bacd790, trx=trx@entry=0x0, mtr=mtr@entry=0x2510586c52c0) at /mariadb/10.6/storage/innobase/dict/dict0crea.cc:905
            #10 0x0000558f79997976 in row_purge_remove_clust_if_poss_low (node=node@entry=0x558f7bacd6f0, mode=mode@entry=BTR_MODIFY_LEAF) at /mariadb/10.6/storage/innobase/row/row0purge.cc:165
            #11 0x0000558f799983d5 in row_purge_remove_clust_if_poss (node=node@entry=0x558f7bacd6f0) at /mariadb/10.6/storage/innobase/row/row0purge.cc:258
            #12 0x0000558f7999855c in row_purge_del_mark (node=node@entry=0x558f7bacd6f0) at /mariadb/10.6/storage/innobase/row/row0purge.cc:632
            

            The committed DDL operation was a rollback of the CREATE UNIQUE INDEX:

            10.6 a3e5b5c4db47cd29dc090d27db1d7f915cf774f6

            #0  row_merge_drop_index_dict (trx=trx@entry=0x45c901cb8780, index_id=45) at /mariadb/10.6/storage/innobase/row/row0merge.cc:3707
            #1  0x0000558f799786f7 in row_merge_drop_indexes (trx=0x45c901cb8780, table=table@entry=0x2f1f1819bd70, locked=<optimized out>, alter_trx=<optimized out>) at /mariadb/10.6/storage/innobase/row/row0merge.cc:3942
            #2  0x0000558f798a8e10 in innobase_rollback_sec_index (user_table=0x2f1f1819bd70, table=table@entry=0x2f1f1800e8c8, locked=<optimized out>, trx=<optimized out>, alter_trx=<optimized out>)
                at /mariadb/10.6/storage/innobase/handler/handler0alter.cc:8890
            #3  0x0000558f798af79b in rollback_inplace_alter_table (ha_alter_info=ha_alter_info@entry=0x7f56ed902d50, table=0x2f1f1800e8c8, prebuilt=0x2f1f1819ff50)
                at /mariadb/10.6/storage/innobase/handler/handler0alter.cc:9028
            #4  0x0000558f798a65b7 in ha_innobase::commit_inplace_alter_table (this=0x2f1f18195c70, altered_table=0x7f56ed902e10, ha_alter_info=0x7f56ed902d50, commit=false)
                at /mariadb/10.6/storage/innobase/handler/handler0alter.cc:11015
            #5  0x0000558f792e73ba in handler::ha_commit_inplace_alter_table (this=0x2f1f18195c70, altered_table=altered_table@entry=0x7f56ed902e10, ha_alter_info=ha_alter_info@entry=0x7f56ed902d50, 
                commit=commit@entry=false) at /mariadb/10.6/sql/handler.cc:5272
            #6  0x0000558f7973c3ab in ha_partition::commit_inplace_alter_table (this=0x2f1f18194410, altered_table=0x7f56ed902e10, ha_alter_info=0x7f56ed902d50, commit=<optimized out>)
                at /mariadb/10.6/sql/ha_partition.cc:10628
            #7  0x0000558f792e73ba in handler::ha_commit_inplace_alter_table (this=0x2f1f18194410, altered_table=altered_table@entry=0x7f56ed902e10, ha_alter_info=ha_alter_info@entry=0x7f56ed902d50, 
                commit=commit@entry=false) at /mariadb/10.6/sql/handler.cc:5272
            

            Now, why does the assertion fail when a DML undo log record is being purged? Those records are always associated with a table, not directly with secondary indexes.

            During the rollback of CREATE UNIQUE INDEX, there will be no attempt to upgrade the metadata lock to MDL_EXCLUSIVE. Because the table is being referenced from another connection, the ‘stub’ for the index ind2 was not removed from the InnoDB data dictionary cache:

            10.6 a3e5b5c4db47cd29dc090d27db1d7f915cf774f6

            #0  dict_table_t::acquire (this=0x2f1f1819bd70) at /mariadb/10.6/storage/innobase/include/dict0dict.inl:1132
            #1  0x0000558f797b88eb in dict_table_open_on_name (table_name=table_name@entry=0x7d064bc66a60 "test/t1#P#p2", dict_locked=dict_locked@entry=false, ignore_err=ignore_err@entry=DICT_ERR_IGNORE_FK_NOKEY)
                at /mariadb/10.6/storage/innobase/dict/dict0dict.cc:1087
            #2  0x0000558f79780ec0 in ha_innobase::open_dict_table (norm_name=norm_name@entry=0x7d064bc66a60 "test/t1#P#p2", is_partition=is_partition@entry=true, ignore_err=ignore_err@entry=DICT_ERR_IGNORE_FK_NOKEY)
                at /mariadb/10.6/storage/innobase/handler/ha_innodb.cc:6236
            #3  0x0000558f79790cc3 in ha_innobase::open (this=0x2f1f180c7210, name=0x7d064bc66d70 "./test/t1#P#p2") at /mariadb/10.6/storage/innobase/handler/ha_innodb.cc:5955
            #4  0x0000558f792e31ae in handler::ha_open (this=0x2f1f180c7210, table_arg=<optimized out>, name=name@entry=0x7d064bc66d70 "./test/t1#P#p2", mode=2, test_if_locked=1042, mem_root=mem_root@entry=0x0, 
                partitions_to_open=0x0) at /mariadb/10.6/sql/handler.cc:3366
            #5  0x0000558f79744cb8 in ha_partition::open_read_partitions (this=this@entry=0x2f1f180c6990, name_buff=name_buff@entry=0x7d064bc66d70 "./test/t1#P#p2", name_buff_size=name_buff_size@entry=513)
                at /mariadb/10.6/sql/ha_partition.cc:8832
            #6  0x0000558f797453a7 in ha_partition::open (this=0x2f1f180c6990, name=0x2f1f1808ed50 "./test/t1", mode=<optimized out>, test_if_locked=18) at /mariadb/10.6/sql/ha_partition.cc:3812
            #7  0x0000558f792e31ae in handler::ha_open (this=0x2f1f180c6990, table_arg=table_arg@entry=0x2f1f1818bda8, name=0x2f1f1808ed50 "./test/t1", mode=2, test_if_locked=test_if_locked@entry=18, 
                mem_root=mem_root@entry=0x0, partitions_to_open=0x0) at /mariadb/10.6/sql/handler.cc:3366
            #8  0x0000558f795c35b1 in open_table_from_share (thd=thd@entry=0x2f1f18000d58, share=share@entry=0x2f1f1808e7a0, alias=alias@entry=0x2f1f18014358, db_stat=db_stat@entry=33, prgflag=prgflag@entry=8, 
                ha_open_flags=18, outparam=0x2f1f1818bda8, is_create_table=false, partitions_to_open=0x0) at /mariadb/10.6/sql/table.cc:4405
            #9  0x0000558f7941bb19 in open_table (thd=thd@entry=0x2f1f18000d58, table_list=table_list@entry=0x2f1f18014310, ot_ctx=ot_ctx@entry=0x7d064bc67430) at /mariadb/10.6/sql/sql_base.cc:2036
            #10 0x0000558f7941d048 in open_and_process_table (thd=thd@entry=0x2f1f18000d58, tables=tables@entry=0x2f1f18014310, counter=counter@entry=0x7d064bc674cc, flags=flags@entry=0, 
                prelocking_strategy=prelocking_strategy@entry=0x7d064bc67518, has_prelocking_list=has_prelocking_list@entry=false, ot_ctx=0x7d064bc67430) at /mariadb/10.6/sql/sql_base.cc:3847
            #11 0x0000558f7941e82f in open_tables (thd=thd@entry=0x2f1f18000d58, options=@0x2f1f18006420: {m_options = DDL_options_st::OPT_NONE}, start=start@entry=0x7d064bc674b8, counter=counter@entry=0x7d064bc674cc, 
                flags=flags@entry=0, prelocking_strategy=prelocking_strategy@entry=0x7d064bc67518) at /mariadb/10.6/sql/sql_base.cc:4330
            #12 0x0000558f7941ef27 in open_and_lock_tables (thd=0x2f1f18000d58, options=<optimized out>, tables=<optimized out>, derived=<optimized out>, flags=0, 
                prelocking_strategy=prelocking_strategy@entry=0x7d064bc67518) at /mariadb/10.6/sql/sql_base.cc:5303
            #13 0x0000558f7931c38f in open_and_lock_tables (thd=<optimized out>, tables=<optimized out>, derived=<optimized out>, flags=<optimized out>) at /mariadb/10.6/sql/sql_base.h:509
            #14 0x0000558f7949abdb in mysql_delete (thd=thd@entry=0x2f1f18000d58, table_list=0x2f1f18014310, conds=<optimized out>, order_list=order_list@entry=0x2f1f180059e8, limit=18446744073709551615, options=0, 
                result=0x0) at /mariadb/10.6/sql/sql_delete.cc:341
            #15 0x0000558f794dc86b in mysql_execute_command (thd=thd@entry=0x2f1f18000d58, is_called_from_prepared_stmt=is_called_from_prepared_stmt@entry=false) at /mariadb/10.6/sql/sql_parse.cc:4818
            #16 0x0000558f794e0cb7 in mysql_parse (thd=thd@entry=0x2f1f18000d58, rawbuf=<optimized out>, length=<optimized out>, parser_state=parser_state@entry=0x7d064bc68320) at /mariadb/10.6/sql/sql_parse.cc:8036
            #17 0x0000558f794e22c8 in dispatch_command (command=command@entry=COM_QUERY, thd=thd@entry=0x2f1f18000d58, packet=packet@entry=0x2f1f180918c9 "DELETE FROM t1", packet_length=packet_length@entry=14, 
                blocking=blocking@entry=true) at /mariadb/10.6/sql/sql_parse.cc:1896
            

            As far as I can tell, this bug is possible to hit even without using any partitioned tables. At the same time I wonder why EXCHANGE PARTITION is not using MDL_EXCLUSIVE. When creating the rr replay trace, I was actually using the following patch because I was suspecting that the ALTER TABLE t1 EXCHANGE PARTITION… can be executed concurrently with CREATE UNIQUE INDEX…ON t1:

            diff --git a/sql/sql_partition_admin.cc b/sql/sql_partition_admin.cc
            index 6ca96300b7a..56e2e735658 100644
            --- a/sql/sql_partition_admin.cc
            +++ b/sql/sql_partition_admin.cc
            @@ -555,7 +555,7 @@ bool Sql_cmd_alter_table_exchange_partition::
                 we need some info from the engine, which we can only access after open,
                 to be able to verify the structure/metadata.
               */
            -  table_list->mdl_request.set_type(MDL_SHARED_NO_WRITE);
            +  table_list->mdl_request.set_type(MDL_EXCLUSIVE);
               if (unlikely(open_tables(thd, &table_list, &table_counter, 0,
                                        &alter_prelocking_strategy)))
               {
            @@ -754,7 +754,7 @@ bool Sql_cmd_alter_table_exchange_partition::
               thd->variables.option_bits= save_option_bits;
             
             err:
            -  if (thd->locked_tables_mode)
            +  if (false && thd->locked_tables_mode)
               {
                 if (swap_table_mdl_ticket)
                   swap_table_mdl_ticket->downgrade_lock(MDL_SHARED_NO_READ_WRITE);
            

            thiru, can you please try to create a DEBUG_SYNC test case for this (with a single non-partitioned table), and a fix to the purge code? Possibly it could be a fairly simple extension to the test innodb.innodb-index-online. A possible fix could be to check if the index actually is in committed state, before trying to apply any records. Starting with the fix of MDEV-15250, this should be safe. If this is repeatable in older versions than 10.6, I do not think that we will be able to fix it easily. The way how DDL works with the purge of history was radically changed in 10.6 as part of MDEV-25506 and other tickets.

            marko Marko Mäkelä added a comment - For me, purge is failing when trying to access a page of ind2 of a partition of table t1 : 10.6 a3e5b5c4db47cd29dc090d27db1d7f915cf774f6 #15 row_search_index_entry (entry=entry@entry=0x7f85b4009a28, mode=BTR_PURGE_LEAF, pcur=pcur@entry=0x7f85beffc040, mtr=mtr@entry=0x7f85beffc2c0) at /mariadb/10.6/storage/innobase/row/row0row.cc:1281 1281 if (btr_pcur_open(entry, PAGE_CUR_LE, mode, pcur, mtr) != DB_SUCCESS) { (gdb) p pcur.btr_cur.page_cur.index.name $1 = {m_name = 0x7f85840768f0 "ind2"} (gdb) p pcur.btr_cur.page_cur.index.table.name $2 = {m_name = 0x7f8590183ca8 "test/t1#P#p2"} During the time of the crash, the statement DROP TABLE t1, t2 is waiting for a metadata lock (MDL) on test.t1 . With a few attempts of while . /mtr --rr=-h --parallel=auto main.m-MDEV-31264{,,,,,,,,,,,,,,,,,,,,,,}; do :; done I got lucky. The index tree is indeed being dropped after a DDL operation: 10.6 a3e5b5c4db47cd29dc090d27db1d7f915cf774f6 #8 0x0000558f79802f0b in btr_free_if_exists (space=space@entry=0x2f1f1819b000, page=page@entry=5, index_id=<optimized out>, mtr=mtr@entry=0x2510586c52c0) at /mariadb/10.6/storage/innobase/btr/btr0btr.cc:1244 #9 0x0000558f7988a1c8 in dict_drop_index_tree (pcur=pcur@entry=0x558f7bacd790, trx=trx@entry=0x0, mtr=mtr@entry=0x2510586c52c0) at /mariadb/10.6/storage/innobase/dict/dict0crea.cc:905 #10 0x0000558f79997976 in row_purge_remove_clust_if_poss_low (node=node@entry=0x558f7bacd6f0, mode=mode@entry=BTR_MODIFY_LEAF) at /mariadb/10.6/storage/innobase/row/row0purge.cc:165 #11 0x0000558f799983d5 in row_purge_remove_clust_if_poss (node=node@entry=0x558f7bacd6f0) at /mariadb/10.6/storage/innobase/row/row0purge.cc:258 #12 0x0000558f7999855c in row_purge_del_mark (node=node@entry=0x558f7bacd6f0) at /mariadb/10.6/storage/innobase/row/row0purge.cc:632 The committed DDL operation was a rollback of the CREATE UNIQUE INDEX : 10.6 a3e5b5c4db47cd29dc090d27db1d7f915cf774f6 #0 row_merge_drop_index_dict (trx=trx@entry=0x45c901cb8780, index_id=45) at /mariadb/10.6/storage/innobase/row/row0merge.cc:3707 #1 0x0000558f799786f7 in row_merge_drop_indexes (trx=0x45c901cb8780, table=table@entry=0x2f1f1819bd70, locked=<optimized out>, alter_trx=<optimized out>) at /mariadb/10.6/storage/innobase/row/row0merge.cc:3942 #2 0x0000558f798a8e10 in innobase_rollback_sec_index (user_table=0x2f1f1819bd70, table=table@entry=0x2f1f1800e8c8, locked=<optimized out>, trx=<optimized out>, alter_trx=<optimized out>) at /mariadb/10.6/storage/innobase/handler/handler0alter.cc:8890 #3 0x0000558f798af79b in rollback_inplace_alter_table (ha_alter_info=ha_alter_info@entry=0x7f56ed902d50, table=0x2f1f1800e8c8, prebuilt=0x2f1f1819ff50) at /mariadb/10.6/storage/innobase/handler/handler0alter.cc:9028 #4 0x0000558f798a65b7 in ha_innobase::commit_inplace_alter_table (this=0x2f1f18195c70, altered_table=0x7f56ed902e10, ha_alter_info=0x7f56ed902d50, commit=false) at /mariadb/10.6/storage/innobase/handler/handler0alter.cc:11015 #5 0x0000558f792e73ba in handler::ha_commit_inplace_alter_table (this=0x2f1f18195c70, altered_table=altered_table@entry=0x7f56ed902e10, ha_alter_info=ha_alter_info@entry=0x7f56ed902d50, commit=commit@entry=false) at /mariadb/10.6/sql/handler.cc:5272 #6 0x0000558f7973c3ab in ha_partition::commit_inplace_alter_table (this=0x2f1f18194410, altered_table=0x7f56ed902e10, ha_alter_info=0x7f56ed902d50, commit=<optimized out>) at /mariadb/10.6/sql/ha_partition.cc:10628 #7 0x0000558f792e73ba in handler::ha_commit_inplace_alter_table (this=0x2f1f18194410, altered_table=altered_table@entry=0x7f56ed902e10, ha_alter_info=ha_alter_info@entry=0x7f56ed902d50, commit=commit@entry=false) at /mariadb/10.6/sql/handler.cc:5272 Now, why does the assertion fail when a DML undo log record is being purged? Those records are always associated with a table, not directly with secondary indexes. During the rollback of CREATE UNIQUE INDEX , there will be no attempt to upgrade the metadata lock to MDL_EXCLUSIVE . Because the table is being referenced from another connection, the ‘stub’ for the index ind2 was not removed from the InnoDB data dictionary cache: 10.6 a3e5b5c4db47cd29dc090d27db1d7f915cf774f6 #0 dict_table_t::acquire (this=0x2f1f1819bd70) at /mariadb/10.6/storage/innobase/include/dict0dict.inl:1132 #1 0x0000558f797b88eb in dict_table_open_on_name (table_name=table_name@entry=0x7d064bc66a60 "test/t1#P#p2", dict_locked=dict_locked@entry=false, ignore_err=ignore_err@entry=DICT_ERR_IGNORE_FK_NOKEY) at /mariadb/10.6/storage/innobase/dict/dict0dict.cc:1087 #2 0x0000558f79780ec0 in ha_innobase::open_dict_table (norm_name=norm_name@entry=0x7d064bc66a60 "test/t1#P#p2", is_partition=is_partition@entry=true, ignore_err=ignore_err@entry=DICT_ERR_IGNORE_FK_NOKEY) at /mariadb/10.6/storage/innobase/handler/ha_innodb.cc:6236 #3 0x0000558f79790cc3 in ha_innobase::open (this=0x2f1f180c7210, name=0x7d064bc66d70 "./test/t1#P#p2") at /mariadb/10.6/storage/innobase/handler/ha_innodb.cc:5955 #4 0x0000558f792e31ae in handler::ha_open (this=0x2f1f180c7210, table_arg=<optimized out>, name=name@entry=0x7d064bc66d70 "./test/t1#P#p2", mode=2, test_if_locked=1042, mem_root=mem_root@entry=0x0, partitions_to_open=0x0) at /mariadb/10.6/sql/handler.cc:3366 #5 0x0000558f79744cb8 in ha_partition::open_read_partitions (this=this@entry=0x2f1f180c6990, name_buff=name_buff@entry=0x7d064bc66d70 "./test/t1#P#p2", name_buff_size=name_buff_size@entry=513) at /mariadb/10.6/sql/ha_partition.cc:8832 #6 0x0000558f797453a7 in ha_partition::open (this=0x2f1f180c6990, name=0x2f1f1808ed50 "./test/t1", mode=<optimized out>, test_if_locked=18) at /mariadb/10.6/sql/ha_partition.cc:3812 #7 0x0000558f792e31ae in handler::ha_open (this=0x2f1f180c6990, table_arg=table_arg@entry=0x2f1f1818bda8, name=0x2f1f1808ed50 "./test/t1", mode=2, test_if_locked=test_if_locked@entry=18, mem_root=mem_root@entry=0x0, partitions_to_open=0x0) at /mariadb/10.6/sql/handler.cc:3366 #8 0x0000558f795c35b1 in open_table_from_share (thd=thd@entry=0x2f1f18000d58, share=share@entry=0x2f1f1808e7a0, alias=alias@entry=0x2f1f18014358, db_stat=db_stat@entry=33, prgflag=prgflag@entry=8, ha_open_flags=18, outparam=0x2f1f1818bda8, is_create_table=false, partitions_to_open=0x0) at /mariadb/10.6/sql/table.cc:4405 #9 0x0000558f7941bb19 in open_table (thd=thd@entry=0x2f1f18000d58, table_list=table_list@entry=0x2f1f18014310, ot_ctx=ot_ctx@entry=0x7d064bc67430) at /mariadb/10.6/sql/sql_base.cc:2036 #10 0x0000558f7941d048 in open_and_process_table (thd=thd@entry=0x2f1f18000d58, tables=tables@entry=0x2f1f18014310, counter=counter@entry=0x7d064bc674cc, flags=flags@entry=0, prelocking_strategy=prelocking_strategy@entry=0x7d064bc67518, has_prelocking_list=has_prelocking_list@entry=false, ot_ctx=0x7d064bc67430) at /mariadb/10.6/sql/sql_base.cc:3847 #11 0x0000558f7941e82f in open_tables (thd=thd@entry=0x2f1f18000d58, options=@0x2f1f18006420: {m_options = DDL_options_st::OPT_NONE}, start=start@entry=0x7d064bc674b8, counter=counter@entry=0x7d064bc674cc, flags=flags@entry=0, prelocking_strategy=prelocking_strategy@entry=0x7d064bc67518) at /mariadb/10.6/sql/sql_base.cc:4330 #12 0x0000558f7941ef27 in open_and_lock_tables (thd=0x2f1f18000d58, options=<optimized out>, tables=<optimized out>, derived=<optimized out>, flags=0, prelocking_strategy=prelocking_strategy@entry=0x7d064bc67518) at /mariadb/10.6/sql/sql_base.cc:5303 #13 0x0000558f7931c38f in open_and_lock_tables (thd=<optimized out>, tables=<optimized out>, derived=<optimized out>, flags=<optimized out>) at /mariadb/10.6/sql/sql_base.h:509 #14 0x0000558f7949abdb in mysql_delete (thd=thd@entry=0x2f1f18000d58, table_list=0x2f1f18014310, conds=<optimized out>, order_list=order_list@entry=0x2f1f180059e8, limit=18446744073709551615, options=0, result=0x0) at /mariadb/10.6/sql/sql_delete.cc:341 #15 0x0000558f794dc86b in mysql_execute_command (thd=thd@entry=0x2f1f18000d58, is_called_from_prepared_stmt=is_called_from_prepared_stmt@entry=false) at /mariadb/10.6/sql/sql_parse.cc:4818 #16 0x0000558f794e0cb7 in mysql_parse (thd=thd@entry=0x2f1f18000d58, rawbuf=<optimized out>, length=<optimized out>, parser_state=parser_state@entry=0x7d064bc68320) at /mariadb/10.6/sql/sql_parse.cc:8036 #17 0x0000558f794e22c8 in dispatch_command (command=command@entry=COM_QUERY, thd=thd@entry=0x2f1f18000d58, packet=packet@entry=0x2f1f180918c9 "DELETE FROM t1", packet_length=packet_length@entry=14, blocking=blocking@entry=true) at /mariadb/10.6/sql/sql_parse.cc:1896 As far as I can tell, this bug is possible to hit even without using any partitioned tables. At the same time I wonder why EXCHANGE PARTITION is not using MDL_EXCLUSIVE . When creating the rr replay trace, I was actually using the following patch because I was suspecting that the ALTER TABLE t1 EXCHANGE PARTITION… can be executed concurrently with CREATE UNIQUE INDEX…ON t1 : diff --git a/sql/sql_partition_admin.cc b/sql/sql_partition_admin.cc index 6ca96300b7a..56e2e735658 100644 --- a/sql/sql_partition_admin.cc +++ b/sql/sql_partition_admin.cc @@ -555,7 +555,7 @@ bool Sql_cmd_alter_table_exchange_partition:: we need some info from the engine, which we can only access after open, to be able to verify the structure/metadata. */ - table_list->mdl_request.set_type(MDL_SHARED_NO_WRITE); + table_list->mdl_request.set_type(MDL_EXCLUSIVE); if (unlikely(open_tables(thd, &table_list, &table_counter, 0, &alter_prelocking_strategy))) { @@ -754,7 +754,7 @@ bool Sql_cmd_alter_table_exchange_partition:: thd->variables.option_bits= save_option_bits; err: - if (thd->locked_tables_mode) + if (false && thd->locked_tables_mode) { if (swap_table_mdl_ticket) swap_table_mdl_ticket->downgrade_lock(MDL_SHARED_NO_READ_WRITE); thiru , can you please try to create a DEBUG_SYNC test case for this (with a single non-partitioned table), and a fix to the purge code? Possibly it could be a fairly simple extension to the test innodb.innodb-index-online . A possible fix could be to check if the index actually is in committed state, before trying to apply any records. Starting with the fix of MDEV-15250 , this should be safe. If this is repeatable in older versions than 10.6, I do not think that we will be able to fix it easily. The way how DDL works with the purge of history was radically changed in 10.6 as part of MDEV-25506 and other tickets.

            Replying to myself: Even if ALTER TABLE…EXCHANGE PARTITION is not using MDL_EXCLUSIVE, it is protected by a metadata lock that will conflict with MDL_EXCLUSIVE, which protects CREATE INDEX, DROP INDEX, DROP TABLE and similar operations. The relaxed locking should be completely fine from the InnoDB point of view.

            marko Marko Mäkelä added a comment - Replying to myself: Even if ALTER TABLE…EXCHANGE PARTITION is not using MDL_EXCLUSIVE , it is protected by a metadata lock that will conflict with MDL_EXCLUSIVE , which protects CREATE INDEX , DROP INDEX , DROP TABLE and similar operations. The relaxed locking should be completely fine from the InnoDB point of view.

            diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc
            index f4db252b069..aa315098419 100644
            --- a/storage/innobase/row/row0purge.cc
            +++ b/storage/innobase/row/row0purge.cc
            @@ -615,6 +615,8 @@ row_purge_del_mark(
                   const auto type= node->index->type;
                   if (type & (DICT_FTS | DICT_CORRUPT))
                     continue;
            +      if (dict_index_get_online_status(node->index) > ONLINE_INDEX_CREATION)
            +        continue;
                   if (UNIV_UNLIKELY(DICT_VIRTUAL & type) && !node->index->is_committed() &&
                       node->index->has_new_v_col())
                     continue;
            

            Above patch fixes the issue. To be frank, this issue can happen only when we're dealing with partition table.
            Because within single table, added index are marked as corrupted if it fails during inplace_alter_table() or
            during mdl timeout.

            thiru Thirunarayanan Balathandayuthapani added a comment - diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc index f4db252b069..aa315098419 100644 --- a/storage/innobase/row/row0purge.cc +++ b/storage/innobase/row/row0purge.cc @@ -615,6 +615,8 @@ row_purge_del_mark( const auto type= node->index->type; if (type & (DICT_FTS | DICT_CORRUPT)) continue; + if (dict_index_get_online_status(node->index) > ONLINE_INDEX_CREATION) + continue; if (UNIV_UNLIKELY(DICT_VIRTUAL & type) && !node->index->is_committed() && node->index->has_new_v_col()) continue; Above patch fixes the issue. To be frank, this issue can happen only when we're dealing with partition table. Because within single table, added index are marked as corrupted if it fails during inplace_alter_table() or during mdl timeout.

            It looks like you forgot to add have_debug.inc and have_debug_sync.inc as well as forgot to add a .result file. This should be OK to push after some RQG testing.

            thiru, do you have any idea why this would not affect 10.4 or 10.5? Are we missing some debug assertion there, or is the code just so much different there?

            marko Marko Mäkelä added a comment - It looks like you forgot to add have_debug.inc and have_debug_sync.inc as well as forgot to add a .result file. This should be OK to push after some RQG testing. thiru , do you have any idea why this would not affect 10.4 or 10.5? Are we missing some debug assertion there, or is the code just so much different there?

            In 10.4 and 10.5, row_purge_del_mark() has the following code to skip the corrupted indexes and aborted indexes:

               while (node->index != NULL) {
                            /* skip corrupted secondary index */
                            dict_table_skip_corrupt_index(node->index);
             
                            row_purge_skip_uncommitted_virtual_index(node->index);
             
                            if (!node->index) {
                                    break;
                            }
             
            inline bool dict_index_t::is_corrupted() const
            {
                    return UNIV_UNLIKELY(online_status >= ONLINE_INDEX_ABORTED
                                         || (type & DICT_CORRUPT)
                                         || (table && table->corrupted));
            }
            

            From 10.6 onwards, code is totally different.

            thiru Thirunarayanan Balathandayuthapani added a comment - In 10.4 and 10.5, row_purge_del_mark() has the following code to skip the corrupted indexes and aborted indexes: while (node->index != NULL) { /* skip corrupted secondary index */ dict_table_skip_corrupt_index(node->index);   row_purge_skip_uncommitted_virtual_index(node->index);   if (!node->index) { break; }   inline bool dict_index_t::is_corrupted() const { return UNIV_UNLIKELY(online_status >= ONLINE_INDEX_ABORTED || (type & DICT_CORRUPT) || (table && table->corrupted)); } From 10.6 onwards, code is totally different.
            mleich Matthias Leich added a comment - - edited

            origin/bb-10.6-MDEV-31264 86d912c570774d2c1b313a7e5e88efede9451062 2023-05-22T15:18:37+05:30
            performed quite good during RQG testing except

            [rr 684056 158393]2023-05-25  3:12:42 15 [ERROR] InnoDB: Summed data size 221, returned by func 3502
            [rr 684056 158397]2023-05-25  3:12:42 15 [ERROR] InnoDB: Apparent corruption in space 0 page 2002 of index `IBUF_DUMMY` of table `IBUF_DUMMY`
            [rr 684056 158401]2023-05-25  3:12:42 15 [ERROR] InnoDB: Summed data size 221, returned by func 3502
            [rr 684056 158405]2023-05-25  3:12:42 15 [ERROR] InnoDB: Apparent corruption in space 0 page 2002 of index `IBUF_DUMMY` of table `IBUF_DUMMY`
            [rr 684056 158410]mysqld: /data/Server/bb-10.6-MDEV-31264/storage/innobase/ibuf/ibuf0ibuf.cc:4319: dberr_t ibuf_merge_or_delete_for_page(buf_block_t*, page_id_t, ulint): Assertion `page_validate(block->page.frame, dummy_index)' failed.
            (rr) bt
            #0  __pthread_kill_implementation (no_tid=0, signo=6, threadid=77335817782848) at ./nptl/pthread_kill.c:44
            #1  __pthread_kill_internal (signo=6, threadid=77335817782848) at ./nptl/pthread_kill.c:78
            #2  __GI___pthread_kill (threadid=77335817782848, signo=signo@entry=6) at ./nptl/pthread_kill.c:89
            #3  0x000039e24514d476 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
            #4  0x000039e2451337f3 in __GI_abort () at ./stdlib/abort.c:79
            #5  0x000039e24513371b in __assert_fail_base (fmt=0x39e2452e8150 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x55be517327e0 "page_validate(block->page.frame, dummy_index)", 
                file=0x55be51725f20 "/data/Server/bb-10.6-MDEV-31264/storage/innobase/ibuf/ibuf0ibuf.cc", line=4319, function=<optimized out>) at ./assert/assert.c:92
            #6  0x000039e245144e96 in __GI___assert_fail (assertion=0x55be517327e0 "page_validate(block->page.frame, dummy_index)", file=0x55be51725f20 "/data/Server/bb-10.6-MDEV-31264/storage/innobase/ibuf/ibuf0ibuf.cc", line=4319, 
                function=0x55be51732340 "dberr_t ibuf_merge_or_delete_for_page(buf_block_t*, page_id_t, ulint)") at ./assert/assert.c:101
            #7  0x000055be505fe4f0 in ibuf_merge_or_delete_for_page (block=block@entry=0x4b461be028f0, page_id=..., zip_size=zip_size@entry=0) at /data/Server/bb-10.6-MDEV-31264/storage/innobase/ibuf/ibuf0ibuf.cc:4319
            #8  0x000055be50af170a in buf_page_get_low (page_id=..., zip_size=zip_size@entry=0, rw_latch=rw_latch@entry=2, guess=<optimized out>, guess@entry=0x0, mode=mode@entry=16, mtr=mtr@entry=0x465625f224d0, err=<optimized out>, 
                allow_ibuf_merge=<optimized out>) at /data/Server/bb-10.6-MDEV-31264/storage/innobase/buf/buf0buf.cc:2827
            #9  0x000055be50af2d9c in buf_page_get_gen (page_id=..., zip_size=zip_size@entry=0, rw_latch=rw_latch@entry=2, guess=guess@entry=0x0, mode=mode@entry=16, mtr=mtr@entry=0x465625f224d0, err=0x465625f22480, 
                allow_ibuf_merge=true) at /data/Server/bb-10.6-MDEV-31264/storage/innobase/buf/buf0buf.cc:2927
            #10 0x000055be505f4657 in ibuf_read_merge_pages (space_ids=space_ids@entry=0x465625f22c60, page_nos=page_nos@entry=0x465625f22ca0, n_stored=<optimized out>)
                at /data/Server/bb-10.6-MDEV-31264/storage/innobase/ibuf/ibuf0ibuf.cc:2373
            #11 0x000055be505f7ced in ibuf_insert_low (mode=mode@entry=BTR_MODIFY_PREV, op=op@entry=IBUF_OP_INSERT, no_counter=no_counter@entry=0, entry=entry@entry=0x616007250bd8, entry_size=entry_size@entry=13, 
                index=index@entry=0x6160029d6a08, page_id=..., zip_size=<optimized out>, thr=<optimized out>) at /data/Server/bb-10.6-MDEV-31264/storage/innobase/ibuf/ibuf0ibuf.cc:3269
            #12 0x000055be505fab99 in ibuf_insert (op=op@entry=IBUF_OP_INSERT, entry=entry@entry=0x616007250bd8, index=0x6160029d6a08, page_id=..., zip_size=zip_size@entry=0, thr=<optimized out>)
                at /data/Server/bb-10.6-MDEV-31264/storage/innobase/ibuf/ibuf0ibuf.cc:3599
            #13 0x000055be50a92241 in btr_cur_t::search_leaf (this=this@entry=0x465625f23f00, tuple=tuple@entry=0x616007250bd8, mode=mode@entry=PAGE_CUR_LE, latch_mode=latch_mode@entry=2114, mtr=mtr@entry=0x465625f24410)
                at /data/Server/bb-10.6-MDEV-31264/storage/innobase/btr/btr0cur.cc:1136
            #14 0x000055be507fb9c1 in row_ins_sec_index_entry_low (flags=flags@entry=0, mode=mode@entry=BTR_MODIFY_LEAF, index=index@entry=0x6160029d6a08, offsets_heap=offsets_heap@entry=0x619000ae2480, 
                heap=heap@entry=0x619000ae2980, entry=entry@entry=0x616007250bd8, trx_id=<optimized out>, thr=<optimized out>) at /data/Server/bb-10.6-MDEV-31264/storage/innobase/row/row0ins.cc:3012
            #15 0x000055be507fe0d6 in row_ins_sec_index_entry (index=index@entry=0x6160029d6a08, entry=entry@entry=0x616007250bd8, thr=thr@entry=0x62100413a250, check_foreign=check_foreign@entry=true)
                at /data/Server/bb-10.6-MDEV-31264/storage/innobase/row/row0ins.cc:3319
            #16 0x000055be50803896 in row_ins_index_entry (index=0x6160029d6a08, entry=0x616007250bd8, thr=thr@entry=0x62100413a250) at /data/Server/bb-10.6-MDEV-31264/storage/innobase/row/row0ins.cc:3367
            #17 0x000055be50803b93 in row_ins_index_entry_step (node=node@entry=0x621004139f18, thr=thr@entry=0x62100413a250) at /data/Server/bb-10.6-MDEV-31264/storage/innobase/row/row0ins.cc:3533
            #18 0x000055be508054b9 in row_ins (node=node@entry=0x621004139f18, thr=thr@entry=0x62100413a250) at /data/Server/bb-10.6-MDEV-31264/storage/innobase/row/row0ins.cc:3658
            #19 0x000055be50805eb2 in row_ins_step (thr=thr@entry=0x62100413a250) at /data/Server/bb-10.6-MDEV-31264/storage/innobase/row/row0ins.cc:3787
            #20 0x000055be50857928 in row_insert_for_mysql (mysql_rec=mysql_rec@entry=0x619000ae10c8 "\377\366\004", prebuilt=0x621004139988, ins_mode=<optimized out>)
                at /data/Server/bb-10.6-MDEV-31264/storage/innobase/row/row0mysql.cc:1314
            #21 0x000055be504d4d61 in ha_innobase::write_row (this=0x61d000ce2cb8, record=<optimized out>) at /data/Server/bb-10.6-MDEV-31264/storage/innobase/handler/ha_innodb.cc:7913
            #22 0x000055be4fa4df57 in handler::ha_write_row (this=0x61d000ce2cb8, buf=0x619000ae10c8 "\377\366\004") at /data/Server/bb-10.6-MDEV-31264/sql/handler.cc:7624
            #23 0x000055be4f06ca60 in write_record (thd=thd@entry=0x62b00013b218, table=table@entry=0x619000adfc98, info=info@entry=0x465625f25770, sink=sink@entry=0x0) at /data/Server/bb-10.6-MDEV-31264/sql/sql_insert.cc:2157
            #24 0x000055be4f0860ae in mysql_insert (thd=thd@entry=0x62b00013b218, table_list=<optimized out>, fields=..., values_list=..., update_fields=..., update_values=..., duplic=<optimized out>, ignore=<optimized out>, 
                result=<optimized out>) at /data/Server/bb-10.6-MDEV-31264/sql/sql_insert.cc:1129
            #25 0x000055be4f156e73 in mysql_execute_command (thd=thd@entry=0x62b00013b218, is_called_from_prepared_stmt=is_called_from_prepared_stmt@entry=false) at /data/Server/bb-10.6-MDEV-31264/sql/sql_parse.cc:4570
            #26 0x000055be4f16441a in mysql_parse (thd=thd@entry=0x62b00013b218, rawbuf=<optimized out>, length=<optimized out>, parser_state=parser_state@entry=0x465625f26620) at /data/Server/bb-10.6-MDEV-31264/sql/sql_parse.cc:8036
            #27 0x000055be4f16ac55 in dispatch_command (command=command@entry=COM_QUERY, thd=thd@entry=0x62b00013b218, 
                packet=packet@entry=0x629001130219 "INSERT IGNORE INTO t1 ( id, k) VALUES ( NULL, 269418496 ) /* E_R Thread1 QNO 1566 CON_ID 15 */ ", packet_length=packet_length@entry=95, blocking=blocking@entry=true)
                at /data/Server/bb-10.6-MDEV-31264/sql/sql_parse.cc:1896
            #28 0x000055be4f16fb5c in do_command (thd=0x62b00013b218, blocking=blocking@entry=true) at /data/Server/bb-10.6-MDEV-31264/sql/sql_parse.cc:1409
            #29 0x000055be4f59b2d6 in do_handle_one_connection (connect=<optimized out>, connect@entry=0x608000002d38, put_in_cache=put_in_cache@entry=true) at /data/Server/bb-10.6-MDEV-31264/sql/sql_connect.cc:1416
            #30 0x000055be4f59b93c in handle_one_connection (arg=0x608000002d38) at /data/Server/bb-10.6-MDEV-31264/sql/sql_connect.cc:1318
            #31 0x000039e24519fb43 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
            #32 0x000039e245230bb4 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:100
            (rr) 
             
            sdp:/data1/results/1685009505/TBR-1510$ _RR_TRACE_DIR=./1/rr rr replay --mark-stdio
             
            This failure was observed on
                origin/bb-10.9-MDEV-29911 e9a6dc7ae59dec89bdbb1d60bcbdc76bc3d583c8 2023-05-11T14:29:56+03:00
                origin/10.6 270eeeb52358db8b76745dfb5fc0e78118a6a2bf 2023-05-23T12:25:39+03:00 with some patch
            too but there it was very rare.
             
            RQG
            ====
            # git clone https://github.com/mleich1/rqg --branch <pick the right branch> RQG
            #
            # GIT_SHOW: HEAD -> master, origin/master, origin/HEAD 9b710460ff8c54260b0e0649f029efa932aee22c 2023-05-23T14:33:37+02:00
            # rqg.pl  : Version 4.2.1 (2022-12)
            #
            # $RQG_HOME/rqg.pl \
            # --duration=300 \
            # --queries=10000000 \
            # --no_mask \
            # --seed=random \
            # --gendata=TBR-1510.zz \
            # --max_gd_duration=1200 \
            # --engine=InnoDB \
            # --rpl_mode=none \
            # --mysqld=--innodb_page_size=4K \
            # --mysqld=--innodb-buffer-pool-size=5M \
            # --mysqld=--innodb-lock-wait-timeout=50 \
            # --mysqld=--loose-idle_write_transaction_timeout=0 \
            # --mysqld=--innodb_file_per_table=0 \           # <== important
            # --mysqld=--loose_innodb_lock_schedule_algorithm=fcfs \
            # --mysqld=--loose_innodb_change_buffering=changes \   # <== important
            # --mysqld=--interactive_timeout=28800 \
            # --mysqld=--loose-table_lock_wait_timeout=50 \
            # --mysqld=--net_write_timeout=60 \
            # --mysqld=--wait_timeout=28800 \
            # --mysqld=--loose-innodb_fatal_semaphore_wait_threshold=300 \
            # --mysqld=--lock-wait-timeout=86400 \
            # --mysqld=--log-output=none \
            # --mysqld=--log_bin_trust_function_creators=1 \
            # --mysqld=--loose-idle_transaction_timeout=0 \
            # --mysqld=--loose-plugin-load-add=provider_lz4.so \
            # --mysqld=--loose-idle_readonly_transaction_timeout=0 \
            # --mysqld=--plugin-load-add=file_key_management.so \
            # --mysqld=--file-key-management-filename=$RQG_HOME/conf/mariadb/encryption_keys.txt \
            # --mysqld=--net_read_timeout=30 \
            # --mysqld=--slave_net_timeout=60 \
            # --mysqld=--log-bin \
            # --mysqld=--connect_timeout=60 \
            # --mysqld=--loose-innodb_read_only_compressed=OFF \
            # --mysqld=--loose-max-statement-time=30 \
            # --mysqld=--sync-binlog=1 \
            # --mysqld=--loose-debug_assert_on_not_freed_memory=0 \
            # --reporters=Backtrace,Deadlock,None \
            # --validators=None \
            # --grammar=TBR-1510.yy \
            # --threads=1 \
            # <local settings>
            

            mleich Matthias Leich added a comment - - edited origin/bb-10.6- MDEV-31264 86d912c570774d2c1b313a7e5e88efede9451062 2023-05-22T15:18:37+05:30 performed quite good during RQG testing except [rr 684056 158393]2023-05-25 3:12:42 15 [ERROR] InnoDB: Summed data size 221, returned by func 3502 [rr 684056 158397]2023-05-25 3:12:42 15 [ERROR] InnoDB: Apparent corruption in space 0 page 2002 of index `IBUF_DUMMY` of table `IBUF_DUMMY` [rr 684056 158401]2023-05-25 3:12:42 15 [ERROR] InnoDB: Summed data size 221, returned by func 3502 [rr 684056 158405]2023-05-25 3:12:42 15 [ERROR] InnoDB: Apparent corruption in space 0 page 2002 of index `IBUF_DUMMY` of table `IBUF_DUMMY` [rr 684056 158410]mysqld: /data/Server/bb-10.6-MDEV-31264/storage/innobase/ibuf/ibuf0ibuf.cc:4319: dberr_t ibuf_merge_or_delete_for_page(buf_block_t*, page_id_t, ulint): Assertion `page_validate(block->page.frame, dummy_index)' failed. (rr) bt #0 __pthread_kill_implementation (no_tid=0, signo=6, threadid=77335817782848) at ./nptl/pthread_kill.c:44 #1 __pthread_kill_internal (signo=6, threadid=77335817782848) at ./nptl/pthread_kill.c:78 #2 __GI___pthread_kill (threadid=77335817782848, signo=signo@entry=6) at ./nptl/pthread_kill.c:89 #3 0x000039e24514d476 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26 #4 0x000039e2451337f3 in __GI_abort () at ./stdlib/abort.c:79 #5 0x000039e24513371b in __assert_fail_base (fmt=0x39e2452e8150 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x55be517327e0 "page_validate(block->page.frame, dummy_index)", file=0x55be51725f20 "/data/Server/bb-10.6-MDEV-31264/storage/innobase/ibuf/ibuf0ibuf.cc", line=4319, function=<optimized out>) at ./assert/assert.c:92 #6 0x000039e245144e96 in __GI___assert_fail (assertion=0x55be517327e0 "page_validate(block->page.frame, dummy_index)", file=0x55be51725f20 "/data/Server/bb-10.6-MDEV-31264/storage/innobase/ibuf/ibuf0ibuf.cc", line=4319, function=0x55be51732340 "dberr_t ibuf_merge_or_delete_for_page(buf_block_t*, page_id_t, ulint)") at ./assert/assert.c:101 #7 0x000055be505fe4f0 in ibuf_merge_or_delete_for_page (block=block@entry=0x4b461be028f0, page_id=..., zip_size=zip_size@entry=0) at /data/Server/bb-10.6-MDEV-31264/storage/innobase/ibuf/ibuf0ibuf.cc:4319 #8 0x000055be50af170a in buf_page_get_low (page_id=..., zip_size=zip_size@entry=0, rw_latch=rw_latch@entry=2, guess=<optimized out>, guess@entry=0x0, mode=mode@entry=16, mtr=mtr@entry=0x465625f224d0, err=<optimized out>, allow_ibuf_merge=<optimized out>) at /data/Server/bb-10.6-MDEV-31264/storage/innobase/buf/buf0buf.cc:2827 #9 0x000055be50af2d9c in buf_page_get_gen (page_id=..., zip_size=zip_size@entry=0, rw_latch=rw_latch@entry=2, guess=guess@entry=0x0, mode=mode@entry=16, mtr=mtr@entry=0x465625f224d0, err=0x465625f22480, allow_ibuf_merge=true) at /data/Server/bb-10.6-MDEV-31264/storage/innobase/buf/buf0buf.cc:2927 #10 0x000055be505f4657 in ibuf_read_merge_pages (space_ids=space_ids@entry=0x465625f22c60, page_nos=page_nos@entry=0x465625f22ca0, n_stored=<optimized out>) at /data/Server/bb-10.6-MDEV-31264/storage/innobase/ibuf/ibuf0ibuf.cc:2373 #11 0x000055be505f7ced in ibuf_insert_low (mode=mode@entry=BTR_MODIFY_PREV, op=op@entry=IBUF_OP_INSERT, no_counter=no_counter@entry=0, entry=entry@entry=0x616007250bd8, entry_size=entry_size@entry=13, index=index@entry=0x6160029d6a08, page_id=..., zip_size=<optimized out>, thr=<optimized out>) at /data/Server/bb-10.6-MDEV-31264/storage/innobase/ibuf/ibuf0ibuf.cc:3269 #12 0x000055be505fab99 in ibuf_insert (op=op@entry=IBUF_OP_INSERT, entry=entry@entry=0x616007250bd8, index=0x6160029d6a08, page_id=..., zip_size=zip_size@entry=0, thr=<optimized out>) at /data/Server/bb-10.6-MDEV-31264/storage/innobase/ibuf/ibuf0ibuf.cc:3599 #13 0x000055be50a92241 in btr_cur_t::search_leaf (this=this@entry=0x465625f23f00, tuple=tuple@entry=0x616007250bd8, mode=mode@entry=PAGE_CUR_LE, latch_mode=latch_mode@entry=2114, mtr=mtr@entry=0x465625f24410) at /data/Server/bb-10.6-MDEV-31264/storage/innobase/btr/btr0cur.cc:1136 #14 0x000055be507fb9c1 in row_ins_sec_index_entry_low (flags=flags@entry=0, mode=mode@entry=BTR_MODIFY_LEAF, index=index@entry=0x6160029d6a08, offsets_heap=offsets_heap@entry=0x619000ae2480, heap=heap@entry=0x619000ae2980, entry=entry@entry=0x616007250bd8, trx_id=<optimized out>, thr=<optimized out>) at /data/Server/bb-10.6-MDEV-31264/storage/innobase/row/row0ins.cc:3012 #15 0x000055be507fe0d6 in row_ins_sec_index_entry (index=index@entry=0x6160029d6a08, entry=entry@entry=0x616007250bd8, thr=thr@entry=0x62100413a250, check_foreign=check_foreign@entry=true) at /data/Server/bb-10.6-MDEV-31264/storage/innobase/row/row0ins.cc:3319 #16 0x000055be50803896 in row_ins_index_entry (index=0x6160029d6a08, entry=0x616007250bd8, thr=thr@entry=0x62100413a250) at /data/Server/bb-10.6-MDEV-31264/storage/innobase/row/row0ins.cc:3367 #17 0x000055be50803b93 in row_ins_index_entry_step (node=node@entry=0x621004139f18, thr=thr@entry=0x62100413a250) at /data/Server/bb-10.6-MDEV-31264/storage/innobase/row/row0ins.cc:3533 #18 0x000055be508054b9 in row_ins (node=node@entry=0x621004139f18, thr=thr@entry=0x62100413a250) at /data/Server/bb-10.6-MDEV-31264/storage/innobase/row/row0ins.cc:3658 #19 0x000055be50805eb2 in row_ins_step (thr=thr@entry=0x62100413a250) at /data/Server/bb-10.6-MDEV-31264/storage/innobase/row/row0ins.cc:3787 #20 0x000055be50857928 in row_insert_for_mysql (mysql_rec=mysql_rec@entry=0x619000ae10c8 "\377\366\004", prebuilt=0x621004139988, ins_mode=<optimized out>) at /data/Server/bb-10.6-MDEV-31264/storage/innobase/row/row0mysql.cc:1314 #21 0x000055be504d4d61 in ha_innobase::write_row (this=0x61d000ce2cb8, record=<optimized out>) at /data/Server/bb-10.6-MDEV-31264/storage/innobase/handler/ha_innodb.cc:7913 #22 0x000055be4fa4df57 in handler::ha_write_row (this=0x61d000ce2cb8, buf=0x619000ae10c8 "\377\366\004") at /data/Server/bb-10.6-MDEV-31264/sql/handler.cc:7624 #23 0x000055be4f06ca60 in write_record (thd=thd@entry=0x62b00013b218, table=table@entry=0x619000adfc98, info=info@entry=0x465625f25770, sink=sink@entry=0x0) at /data/Server/bb-10.6-MDEV-31264/sql/sql_insert.cc:2157 #24 0x000055be4f0860ae in mysql_insert (thd=thd@entry=0x62b00013b218, table_list=<optimized out>, fields=..., values_list=..., update_fields=..., update_values=..., duplic=<optimized out>, ignore=<optimized out>, result=<optimized out>) at /data/Server/bb-10.6-MDEV-31264/sql/sql_insert.cc:1129 #25 0x000055be4f156e73 in mysql_execute_command (thd=thd@entry=0x62b00013b218, is_called_from_prepared_stmt=is_called_from_prepared_stmt@entry=false) at /data/Server/bb-10.6-MDEV-31264/sql/sql_parse.cc:4570 #26 0x000055be4f16441a in mysql_parse (thd=thd@entry=0x62b00013b218, rawbuf=<optimized out>, length=<optimized out>, parser_state=parser_state@entry=0x465625f26620) at /data/Server/bb-10.6-MDEV-31264/sql/sql_parse.cc:8036 #27 0x000055be4f16ac55 in dispatch_command (command=command@entry=COM_QUERY, thd=thd@entry=0x62b00013b218, packet=packet@entry=0x629001130219 "INSERT IGNORE INTO t1 ( id, k) VALUES ( NULL, 269418496 ) /* E_R Thread1 QNO 1566 CON_ID 15 */ ", packet_length=packet_length@entry=95, blocking=blocking@entry=true) at /data/Server/bb-10.6-MDEV-31264/sql/sql_parse.cc:1896 #28 0x000055be4f16fb5c in do_command (thd=0x62b00013b218, blocking=blocking@entry=true) at /data/Server/bb-10.6-MDEV-31264/sql/sql_parse.cc:1409 #29 0x000055be4f59b2d6 in do_handle_one_connection (connect=<optimized out>, connect@entry=0x608000002d38, put_in_cache=put_in_cache@entry=true) at /data/Server/bb-10.6-MDEV-31264/sql/sql_connect.cc:1416 #30 0x000055be4f59b93c in handle_one_connection (arg=0x608000002d38) at /data/Server/bb-10.6-MDEV-31264/sql/sql_connect.cc:1318 #31 0x000039e24519fb43 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442 #32 0x000039e245230bb4 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:100 (rr)   sdp:/data1/results/1685009505/TBR-1510$ _RR_TRACE_DIR=./1/rr rr replay --mark-stdio   This failure was observed on origin/bb-10.9-MDEV-29911 e9a6dc7ae59dec89bdbb1d60bcbdc76bc3d583c8 2023-05-11T14:29:56+03:00 origin/10.6 270eeeb52358db8b76745dfb5fc0e78118a6a2bf 2023-05-23T12:25:39+03:00 with some patch too but there it was very rare.   RQG ==== # git clone https://github.com/mleich1/rqg --branch <pick the right branch> RQG # # GIT_SHOW: HEAD -> master, origin/master, origin/HEAD 9b710460ff8c54260b0e0649f029efa932aee22c 2023-05-23T14:33:37+02:00 # rqg.pl : Version 4.2.1 (2022-12) # # $RQG_HOME/rqg.pl \ # --duration=300 \ # --queries=10000000 \ # --no_mask \ # --seed=random \ # --gendata=TBR-1510.zz \ # --max_gd_duration=1200 \ # --engine=InnoDB \ # --rpl_mode=none \ # --mysqld=--innodb_page_size=4K \ # --mysqld=--innodb-buffer-pool-size=5M \ # --mysqld=--innodb-lock-wait-timeout=50 \ # --mysqld=--loose-idle_write_transaction_timeout=0 \ # --mysqld=--innodb_file_per_table=0 \ # <== important # --mysqld=--loose_innodb_lock_schedule_algorithm=fcfs \ # --mysqld=--loose_innodb_change_buffering=changes \ # <== important # --mysqld=--interactive_timeout=28800 \ # --mysqld=--loose-table_lock_wait_timeout=50 \ # --mysqld=--net_write_timeout=60 \ # --mysqld=--wait_timeout=28800 \ # --mysqld=--loose-innodb_fatal_semaphore_wait_threshold=300 \ # --mysqld=--lock-wait-timeout=86400 \ # --mysqld=--log-output=none \ # --mysqld=--log_bin_trust_function_creators=1 \ # --mysqld=--loose-idle_transaction_timeout=0 \ # --mysqld=--loose-plugin-load-add=provider_lz4.so \ # --mysqld=--loose-idle_readonly_transaction_timeout=0 \ # --mysqld=--plugin-load-add=file_key_management.so \ # --mysqld=--file-key-management-filename=$RQG_HOME/conf/mariadb/encryption_keys.txt \ # --mysqld=--net_read_timeout=30 \ # --mysqld=--slave_net_timeout=60 \ # --mysqld=--log-bin \ # --mysqld=--connect_timeout=60 \ # --mysqld=--loose-innodb_read_only_compressed=OFF \ # --mysqld=--loose-max-statement-time=30 \ # --mysqld=--sync-binlog=1 \ # --mysqld=--loose-debug_assert_on_not_freed_memory=0 \ # --reporters=Backtrace,Deadlock,None \ # --validators=None \ # --grammar=TBR-1510.yy \ # --threads=1 \ # <local settings>

            The observed failure is something that I would try to fix related to MDEV-31088. It currently seems to me that the fix of MDEV-24569 may have been incomplete.

            marko Marko Mäkelä added a comment - The observed failure is something that I would try to fix related to MDEV-31088 . It currently seems to me that the fix of MDEV-24569 may have been incomplete.

            MDEV-31385 was filed for the corruption.

            marko Marko Mäkelä added a comment - MDEV-31385 was filed for the corruption.

            People

              thiru Thirunarayanan Balathandayuthapani
              elenst Elena Stepanova
              Votes:
              0 Vote for this issue
              Watchers:
              5 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Git Integration

                  Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.