Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-24547

Update fails when online alter does rollback due to MDL time out

Details

    Description

      When online alter rollbacks due to MDL time out, it doesn't mark the index online status as ONLINE_INDEX_ABORTED. Concurrent update fails to update the secondary index because the index is being dropped.

      The following test case demonstrates the problem

      diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc
      index 10156c7d3ab..39c110c2df2 100644
      --- a/storage/innobase/row/row0upd.cc
      +++ b/storage/innobase/row/row0upd.cc
      @@ -2517,6 +2517,8 @@ row_upd_sec_index_entry(
       
              mem_heap_empty(heap);
       
      +       DEBUG_SYNC_C_IF_THD(trx->mysql_thd,
      +                           "before_row_upd_sec_new_index_entry");
              /* Build a new index entry */
              entry = row_build_index_entry(node->upd_row, node->upd_ext,
                                            index, heap);
      

      Test case:

      --source include/have_innodb.inc
      create table t1(f1 char(10), f2 char(10) not null, f3 int not null,
                      f4 int not null, primary key(f3))engine=innodb;
      insert into t1 values('a','a', 1, 1), ('b','b', 2, 2), ('c', 'c', 3, 3), ('d', 'd', 4, 4);
      SET DEBUG_SYNC="row_merge_after_scan SIGNAL con1_start WAIT_FOR con1_insert";
      SET DEBUG_SYNC="innodb_commit_inplace_alter_table_wait SIGNAL con1_wait WAIT_FOR con1_update";
      send ALTER TABLE t1 ADD UNIQUE INDEX(f1(3), f4), ADD UNIQUE INDEX(f2), ALGORITHM=INPLACE;
       
      connect(con1,localhost,root,,,);
      SET DEBUG_SYNC="now WAIT_FOR con1_start";
      begin;
      INSERT INTO t1 VALUES('e','e',5, 5);
      SET DEBUG_SYNC="now SIGNAL con1_insert";
      SET DEBUG_SYNC="now WAIT_FOR con1_wait";
      SET DEBUG_SYNC="before_row_upd_sec_new_index_entry SIGNAL con1_update WAIT_FOR alter_rollback";
      SEND UPDATE t1 set f4 = 10 order by f1 desc limit 2;
       
      connection default;
      --error 1205
      reap;
      SET DEBUG_SYNC="now SIGNAL alter_rollback";
       
      connection con1;
      reap;
      commit;
       
      connection default;
      disconnect con1;
      DROP TABLE t1;
      SET DEBUG_SYNC="RESET";
      

      .opt file:

      --lock_wait_timeout=2
      

      The assert failure is

      Thread 1 (Thread 0x7fce7c4a4700 (LWP 29920)):
      #0  __pthread_kill (threadid=<optimized out>, signo=6) at ../sysdeps/unix/sysv/linux/pthread_kill.c:57
      #1  0x0000563735d38299 in my_write_core (sig=6) at /home/thiru/mariarepo/10.2/10.2-test/mysys/stacktrace.c:382
      #2  0x00005637355b12fc in handle_fatal_signal (sig=6) at /home/thiru/mariarepo/10.2/10.2-test/sql/signal_handler.cc:343
      #3  <signal handler called>
      #4  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
      #5  0x00007fce81eb28b1 in __GI_abort () at abort.c:79
      #6  0x00007fce81ea242a in __assert_fail_base (fmt=0x7fce82029a38 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=assertion@entry=0x563735f8b7ca "col->ord_part", file=file@entry=0x563735f8b620 "/home/thiru/mariarepo/10.2/10.2-test/storage/innobase/row/row0row.cc", line=line@entry=294, function=function@entry=0x563735f8bd20 <row_build_index_entry_low(dtuple_t const*, row_ext_t const*, dict_index_t*, mem_block_info_t*, unsigned long)::__PRETTY_FUNCTION__> "dtuple_t* row_build_index_entry_low(const dtuple_t*, const row_ext_t*, dict_index_t*, mem_heap_t*, ulint)") at assert.c:92
      #7  0x00007fce81ea24a2 in __GI___assert_fail (assertion=0x563735f8b7ca "col->ord_part", file=0x563735f8b620 "/home/thiru/mariarepo/10.2/10.2-test/storage/innobase/row/row0row.cc", line=294, function=0x563735f8bd20 <row_build_index_entry_low(dtuple_t const*, row_ext_t const*, dict_index_t*, mem_block_info_t*, unsigned long)::__PRETTY_FUNCTION__> "dtuple_t* row_build_index_entry_low(const dtuple_t*, const row_ext_t*, dict_index_t*, mem_heap_t*, ulint)") at assert.c:101
      #8  0x000056373590ef10 in row_build_index_entry_low (row=0x7fce2801d4d0, ext=0x0, index=0x7fce240a18e0, heap=0x7fce2801de60, flag=0) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/row/row0row.cc:294
      #9  0x000056373593a995 in row_build_index_entry (row=0x7fce2801d4d0, ext=0x0, index=0x7fce240a18e0, heap=0x7fce2801de60) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/include/row0row.ic:129
      #10 0x0000563735940316 in row_upd_sec_index_entry (node=0x7fce2801c280, thr=0x7fce2801c5e8) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/row/row0upd.cc:2523
      #11 0x0000563735940524 in row_upd_sec_step (node=0x7fce2801c280, thr=0x7fce2801c5e8) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/row/row0upd.cc:2555
      #12 0x00005637359427a9 in row_upd (node=0x7fce2801c280, thr=0x7fce2801c5e8) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/row/row0upd.cc:3317
      #13 0x0000563735942afd in row_upd_step (thr=0x7fce2801c5e8) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/row/row0upd.cc:3434
      #14 0x00005637358eccf5 in row_update_for_mysql (prebuilt=0x7fce2801b710) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/row/row0mysql.cc:1825
      #15 0x00005637357b2b07 in ha_innobase::update_row (this=0x7fce2801ab28, old_row=0x7fce2801a6f0 "\375e         e         \005", new_row=0x7fce2801a6d0 "\375e         e         \005") at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/handler/ha_innodb.cc:8960
      #16 0x00005637355c0cec in handler::ha_update_row (this=0x7fce2801ab28, old_data=0x7fce2801a6f0 "\375e         e         \005", new_data=0x7fce2801a6d0 "\375e         e         \005") at /home/thiru/mariarepo/10.2/10.2-test/sql/handler.cc:6148
      #17 0x000056373542b2ff in mysql_update (thd=0x7fce28000d50, table_list=0x7fce280113f8, fields=..., values=..., conds=0x0, order_num=1, order=0x7fce28011cf0, limit=2, handle_duplicates=DUP_ERROR, ignore=false, found_return=0x7fce7c4a2c50, updated_return=0x7fce7c4a2d00) at /home/thiru/mariarepo/10.2/10.2-test/sql/sql_update.cc:819
      #18 0x000056373533b530 in mysql_execute_command (thd=0x7fce28000d50) at /home/thiru/mariarepo/10.2/10.2-test/sql/sql_parse.cc:4032
      #19 0x0000563735347215 in mysql_parse (thd=0x7fce28000d50, rawbuf=0x7fce280112f8 "UPDATE t1 set f4 = 10 order by f1 desc limit 2", length=46, parser_state=0x7fce7c4a35f0, is_com_multi=false, is_next_command=false) at /home/thiru/mariarepo/10.2/10.2-test/sql/sql_parse.cc:7762
      #20 0x0000563735335619 in dispatch_command (command=COM_QUERY, thd=0x7fce28000d50, packet=0x7fce28008b11 "UPDATE t1 set f4 = 10 order by f1 desc limit 2", packet_length=46, is_com_multi=false, is_next_command=false) at /home/thiru/mariarepo/10.2/10.2-test/sql/sql_parse.cc:1827
      #21 0x000056373533409e in do_command (thd=0x7fce28000d50) at /home/thiru/mariarepo/10.2/10.2-test/sql/sql_parse.cc:1381
      #22 0x00005637354897db in do_handle_one_connection (connect=0x563737e0ad30) at /home/thiru/mariarepo/10.2/10.2-test/sql/sql_connect.cc:1336
      #23 0x0000563735489546 in handle_one_connection (arg=0x563737e0ad30) at /home/thiru/mariarepo/10.2/10.2-test/sql/sql_connect.cc:1241
      #24 0x0000563735c8b976 in pfs_spawn_thread (arg=0x563737e4b420) at /home/thiru/mariarepo/10.2/10.2-test/storage/perfschema/pfs.cc:1869
      #25 0x00007fce82bf46db in start_thread (arg=0x7fce7c4a4700) at pthread_create.c:463
      #26 0x00007fce81f93a3f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
      

      I think InnoDB should fix this by taking InnoDB table lock during rollback of alter table. In that case, we can avoid concurrent DML. Maybe having some extra() to indicate MDL time out and take X-lock?

      Open to suggestion for the solution though.

      Attachments

        Activity

          Thank you for creating the deterministic test case. I think that this happens by design; this was discussed when online ALTER TABLE was developed in MySQL 5.6. If the upgrade to MDL_EXCLUSIVE for committing the online ALTER TABLE operation times out, then some ADD INDEX stubs will be left behind. This garbage would be cleared on a subsequent ALTER TABLE, or by dict_table_close() when it is invoked with try_drop=true. Until then, concurrent DML connections will have to ignore the garbage indexes. I would expect us to handle this correctly in the INSERT code path, because I remember that I had to solve some problems there during the development of WL#5526 in MySQL 5.6.

          I do not think that we should add more locking to fix this. We must simply ensure that we properly skip the garbage indexes.

          marko Marko Mäkelä added a comment - Thank you for creating the deterministic test case. I think that this happens by design; this was discussed when online ALTER TABLE was developed in MySQL 5.6. If the upgrade to MDL_EXCLUSIVE for committing the online ALTER TABLE operation times out, then some ADD INDEX stubs will be left behind. This garbage would be cleared on a subsequent ALTER TABLE , or by dict_table_close() when it is invoked with try_drop=true . Until then, concurrent DML connections will have to ignore the garbage indexes. I would expect us to handle this correctly in the INSERT code path, because I remember that I had to solve some problems there during the development of WL#5526 in MySQL 5.6. I do not think that we should add more locking to fix this. We must simply ensure that we properly skip the garbage indexes.

          Patch is in bb-10.2-MDEV-24547

          thiru Thirunarayanan Balathandayuthapani added a comment - Patch is in bb-10.2- MDEV-24547

          Thank you, the proposed fix definitely is a step to the right direction.

          However, I think that when row_upd_sec_index_entry() is operating on an uncommitted index (which may be dropped by the concurrent rollback of an online ALTER TABLE…ADD INDEX), we must hold index->lock for the entire duration of the operation.

          In your proposed fix, it would seem to me that a similar race condition is possible inside row_ins_sec_index_entry() and later steps of the function. That is, I suspect that you only reduced the window of opportunity of this race condition, but did not eliminate it altogether.

          I believe that we should pass the mini-transaction and the flag BTR_ALREADY_S_LATCHED to the lower-level operations.

          marko Marko Mäkelä added a comment - Thank you, the proposed fix definitely is a step to the right direction. However, I think that when row_upd_sec_index_entry() is operating on an uncommitted index (which may be dropped by the concurrent rollback of an online ALTER TABLE…ADD INDEX ), we must hold index->lock for the entire duration of the operation. In your proposed fix, it would seem to me that a similar race condition is possible inside row_ins_sec_index_entry() and later steps of the function. That is, I suspect that you only reduced the window of opportunity of this race condition, but did not eliminate it altogether. I believe that we should pass the mini-transaction and the flag BTR_ALREADY_S_LATCHED to the lower-level operations.

          In row_ins_sec_index_entry(), we do check for online status of the index again. Patch is trying to avoid the race condition while building the secondary index tuple.

                  /* Ensure that we acquire index->lock when inserting into an
                  index with index->online_status == ONLINE_INDEX_COMPLETE, but
                  could still be subject to rollback_inplace_alter_table().
                  This prevents a concurrent change of index->online_status.
                  The memory object cannot be freed as long as we have an open
                  reference to the table, or index->table->n_ref_count > 0. */
                  const bool      check = !index->is_committed();
                  if (check) {
                          DEBUG_SYNC_C("row_ins_sec_index_enter");
                          if (mode == BTR_MODIFY_LEAF) {
                                  search_mode |= BTR_ALREADY_S_LATCHED;
                                  mtr_s_lock(dict_index_get_lock(index), &mtr);
                          } else {
                                  mtr_sx_lock(dict_index_get_lock(index), &mtr);
                          }
           
                          if (row_log_online_op_try(
                                      index, entry, thr_get_trx(thr)->id)) {
                                  goto func_exit;
                          }
                  }
          

          thiru Thirunarayanan Balathandayuthapani added a comment - In row_ins_sec_index_entry() , we do check for online status of the index again. Patch is trying to avoid the race condition while building the secondary index tuple. /* Ensure that we acquire index->lock when inserting into an index with index->online_status == ONLINE_INDEX_COMPLETE, but could still be subject to rollback_inplace_alter_table(). This prevents a concurrent change of index->online_status. The memory object cannot be freed as long as we have an open reference to the table, or index->table->n_ref_count > 0. */ const bool check = !index->is_committed(); if (check) { DEBUG_SYNC_C("row_ins_sec_index_enter"); if (mode == BTR_MODIFY_LEAF) { search_mode |= BTR_ALREADY_S_LATCHED; mtr_s_lock(dict_index_get_lock(index), &mtr); } else { mtr_sx_lock(dict_index_get_lock(index), &mtr); }   if (row_log_online_op_try( index, entry, thr_get_trx(thr)->id)) { goto func_exit; } }

          People

            thiru Thirunarayanan Balathandayuthapani
            thiru Thirunarayanan Balathandayuthapani
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved:

              Git Integration

                Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.