Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-13201

Assertion `srv_undo_sources || ...` failed on shutdown during DDL operation

Details

    Description

      diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc
      index e22bbd6..8b272ab 100644
      --- a/storage/innobase/trx/trx0purge.cc
      +++ b/storage/innobase/trx/trx0purge.cc
      @@ -281,6 +281,8 @@ trx_purge_add_update_undo_to_history(
                              hist_size + undo->size, MLOG_4BYTES, mtr);
              }
       
      +  DBUG_EXECUTE_IF("mdev13201", my_sleep(1000000););
      +
              /* Before any transaction-generating background threads or the
              purge have been started, recv_recovery_rollback_active() can
              start transactions in row_merge_drop_temp_indexes() and
      

      10.2 08c493c62afae0e81f with the patch above

      2017-10-05 21:08:39 140315038668544 [Note] /data/src/10.2-bug/sql/mysqld (root[root] @ localhost []): Normal shutdown
       
      2017-10-05 21:08:39 140315038668544 [Note] Event Scheduler: Purging the queue. 0 events
      2017-10-05 21:08:39 140314702509824 [Note] InnoDB: FTS optimize thread exiting.
      mysqld: /data/src/10.2-bug/storage/innobase/trx/trx0purge.cc:307: void trx_purge_add_update_undo_to_history(trx_t*, ib_page_t*, mtr_t*): Assertion `srv_undo_sources || ((srv_startup_is_before_trx_rollback_phase || trx_rollback_or_clean_is_active) && purge_sys->state == PURGE_STATE_INIT) || (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND && purge_sys->state == PURGE_STATE_DISABLED) || ((trx->undo_no == 0 || trx->in_mysql_trx_list) && srv_fast_shutdown)' failed.
      171005 21:08:41 [ERROR] mysqld got signal 6 ;
       
      #7  0x00007f9da9850ee2 in __assert_fail () from /lib/x86_64-linux-gnu/libc.so.6
      #8  0x000055e643b839f6 in trx_purge_add_update_undo_to_history (trx=0x7f9d9f3feca8, undo_page=0x7f9d9f0d4000 "", mtr=0x7f9da41263d0) at /data/src/10.2-bug/storage/innobase/trx/trx0purge.cc:300
      #9  0x000055e643bba351 in trx_undo_update_cleanup (trx=0x7f9d9f3feca8, undo_page=0x7f9d9f0d4000 "", mtr=0x7f9da41263d0) at /data/src/10.2-bug/storage/innobase/trx/trx0undo.cc:1838
      #10 0x000055e643ba903b in trx_write_serialisation_history (trx=0x7f9d9f3feca8, mtr=0x7f9da41263d0) at /data/src/10.2-bug/storage/innobase/trx/trx0trx.cc:1458
      #11 0x000055e643baa907 in trx_commit_low (trx=0x7f9d9f3feca8, mtr=0x7f9da41263d0) at /data/src/10.2-bug/storage/innobase/trx/trx0trx.cc:1939
      #12 0x000055e643baaa9a in trx_commit (trx=0x7f9d9f3feca8) at /data/src/10.2-bug/storage/innobase/trx/trx0trx.cc:2009
      #13 0x000055e643bab38f in trx_commit_for_mysql (trx=0x7f9d9f3feca8) at /data/src/10.2-bug/storage/innobase/trx/trx0trx.cc:2229
      #14 0x000055e643c8cd56 in dict_stats_exec_sql (pinfo=0x7f9d5813f958, sql=0x55e6440eb830 "PROCEDURE TABLE_STATS_SAVE () IS\nBEGIN\nDELETE FROM \"mysql/innodb_table_stats\"\nWHERE\ndatabase_name = :database_name AND\ntable_name = :table_name;\nINSERT INTO \"mysql/innodb_table_stats\"\nVALUES\n(\n:databa"..., trx=0x7f9d9f3feca8) at /data/src/10.2-bug/storage/innobase/dict/dict0stats.cc:325
      #15 0x000055e643c912b6 in dict_stats_save (table_orig=0x7f9d58143908, only_for_index=0x0) at /data/src/10.2-bug/storage/innobase/dict/dict0stats.cc:2514
      #16 0x000055e643c92fcd in dict_stats_update (table=0x7f9d58143908, stats_upd_option=DICT_STATS_EMPTY_TABLE) at /data/src/10.2-bug/storage/innobase/dict/dict0stats.cc:3269
      #17 0x000055e643b2a4fb in row_truncate_complete (table=0x7f9d58143908, trx=0x7f9d9f3fe878, fsp_flags=33, logger=@0x7f9da4127398: 0x0, err=DB_SUCCESS) at /data/src/10.2-bug/storage/innobase/row/row0trunc.cc:1275
      #18 0x000055e643b2c006 in row_truncate_table_for_mysql (table=0x7f9d58143908, trx=0x7f9d9f3fe878) at /data/src/10.2-bug/storage/innobase/row/row0trunc.cc:2114
      #19 0x000055e6439b828f in ha_innobase::truncate (this=0x7f9d5812ff28) at /data/src/10.2-bug/storage/innobase/handler/ha_innodb.cc:13567
      #20 0x000055e6436a1ef5 in handler::ha_truncate (this=0x7f9d5812ff28) at /data/src/10.2-bug/sql/handler.cc:4078
      #21 0x000055e643868712 in Sql_cmd_truncate_table::handler_truncate (this=0x7f9d58012b20, thd=0x7f9d58000b00, table_ref=0x7f9d58012508, is_tmp_table=false) at /data/src/10.2-bug/sql/sql_truncate.cc:245
      #22 0x000055e643868d4c in Sql_cmd_truncate_table::truncate_table (this=0x7f9d58012b20, thd=0x7f9d58000b00, table_ref=0x7f9d58012508) at /data/src/10.2-bug/sql/sql_truncate.cc:441
      #23 0x000055e643868ebe in Sql_cmd_truncate_table::execute (this=0x7f9d58012b20, thd=0x7f9d58000b00) at /data/src/10.2-bug/sql/sql_truncate.cc:498
      #24 0x000055e64343286c in mysql_execute_command (thd=0x7f9d58000b00) at /data/src/10.2-bug/sql/sql_parse.cc:6203
      #25 0x000055e64343711c in mysql_parse (thd=0x7f9d58000b00, rawbuf=0x7f9d58012438 "TRUNCATE TABLE t1", length=17, parser_state=0x7f9da4128200, is_com_multi=false, is_next_command=false) at /data/src/10.2-bug/sql/sql_parse.cc:7875
      #26 0x000055e643424dc0 in dispatch_command (command=COM_QUERY, thd=0x7f9d58000b00, packet=0x7f9d58008891 "TRUNCATE TABLE t1", packet_length=17, is_com_multi=false, is_next_command=false) at /data/src/10.2-bug/sql/sql_parse.cc:1812
      #27 0x000055e643423736 in do_command (thd=0x7f9d58000b00) at /data/src/10.2-bug/sql/sql_parse.cc:1360
      #28 0x000055e643570917 in do_handle_one_connection (connect=0x55e646fd3150) at /data/src/10.2-bug/sql/sql_connect.cc:1354
      #29 0x000055e6435706a4 in handle_one_connection (arg=0x55e646fd3150) at /data/src/10.2-bug/sql/sql_connect.cc:1260
      #30 0x000055e64398d08c in pfs_spawn_thread (arg=0x55e646fdd6c0) at /data/src/10.2-bug/storage/perfschema/pfs.cc:1862
      #31 0x00007f9dab795494 in start_thread (arg=0x7f9da4129700) at pthread_create.c:333
      #32 0x00007f9da990d93f in clone () from /lib/x86_64-linux-gnu/libc.so.6
      

      --source include/have_innodb.inc
       
      --let $stats= `SELECT @@innodb_stats_persistent`
      SET GLOBAL innodb_stats_persistent= ON;
       
      CREATE TABLE t1 (t TEXT) ENGINE=InnoDB;
      SET debug_dbug='+d,mdev13201';
      --enable_reconnect
       
      --send
        TRUNCATE TABLE t1;
       
      --connect (con1,localhost,root,,test)
      --source include/restart_mysqld.inc
      --disconnect con1
       
      --connection default
      --disable_abort_on_error
      --reap
      SELECT COUNT(*) FROM t1;
      DROP TABLE t1;
       
      eval SET GLOBAL innodb_stats_persistent= $stats;
      

      Attachments

        Issue Links

          Activity

            A little shorter test:

            --source include/have_innodb.inc
            SET GLOBAL innodb_stats_persistent= ON;
            CREATE TABLE t1 (t TEXT) ENGINE=InnoDB;
            --connect (con1,localhost,root,,test)
            SET debug_dbug='+d,mdev13201';
            --send
            TRUNCATE TABLE t1;
            --connection default
            --source include/restart_mysqld.inc
            --disconnect con1
            DROP TABLE t1;
            

            marko Marko Mäkelä added a comment - A little shorter test: --source include/have_innodb.inc SET GLOBAL innodb_stats_persistent= ON ; CREATE TABLE t1 (t TEXT) ENGINE=InnoDB; --connect (con1,localhost,root,,test) SET debug_dbug= '+d,mdev13201' ; --send TRUNCATE TABLE t1; --connection default --source include/restart_mysqld.inc --disconnect con1 DROP TABLE t1;

            This patch might not be perfect, but I think that it is good enough for now:

            diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc
            index 2ea482095ce..7584effee9e 100644
            --- a/storage/innobase/dict/dict0stats.cc
            +++ b/storage/innobase/dict/dict0stats.cc
            @@ -293,7 +293,10 @@ dict_stats_exec_sql(
             	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
             	ut_ad(mutex_own(&dict_sys->mutex));
             
            -	if (!dict_stats_persistent_storage_check(true)) {
            +	extern bool dict_stats_start_shutdown;
            +
            +	if (dict_stats_start_shutdown
            +	    || !dict_stats_persistent_storage_check(true)) {
             		pars_info_free(pinfo);
             		return(DB_STATS_DO_NOT_EXIST);
             	}
            

            My concern is that there could still be a small race condition window between the shutdown and the update of persistent statistics. A proper fix would of course be to update the persistent statistics atomically as part of the DDL transaction (such as the TRUNCATE).
            The assertion failure is only a debug assertion; non-debug servers were never affected.

            marko Marko Mäkelä added a comment - This patch might not be perfect, but I think that it is good enough for now: diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc index 2ea482095ce..7584effee9e 100644 --- a/storage/innobase/dict/dict0stats.cc +++ b/storage/innobase/dict/dict0stats.cc @@ -293,7 +293,10 @@ dict_stats_exec_sql( ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X)); ut_ad(mutex_own(&dict_sys->mutex)); - if (!dict_stats_persistent_storage_check(true)) { + extern bool dict_stats_start_shutdown; + + if (dict_stats_start_shutdown + || !dict_stats_persistent_storage_check(true)) { pars_info_free(pinfo); return(DB_STATS_DO_NOT_EXIST); } My concern is that there could still be a small race condition window between the shutdown and the update of persistent statistics. A proper fix would of course be to update the persistent statistics atomically as part of the DDL transaction (such as the TRUNCATE). The assertion failure is only a debug assertion; non-debug servers were never affected.

            The race condition will occasionally cause test failure. I think that the simplest work-around is to introduce a debug-only flag trx_t::persistent_stats.
            The proper fix would be to update the persistent statistics within the DDL transaction.

            With this fix in place, it turns out that the test would occasionally fail due to an older problem, which I filed as MDEV-14511.

            marko Marko Mäkelä added a comment - The race condition will occasionally cause test failure. I think that the simplest work-around is to introduce a debug-only flag trx_t::persistent_stats. The proper fix would be to update the persistent statistics within the DDL transaction. With this fix in place, it turns out that the test would occasionally fail due to an older problem, which I filed as MDEV-14511 .

            People

              marko Marko Mäkelä
              elenst Elena Stepanova
              Votes:
              0 Vote for this issue
              Watchers:
              2 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Git Integration

                  Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.