Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-24348

InnoDB shutdown hang with innodb_flush_sync=0

    XMLWordPrintable

    Details

      Description

      If buf_flush_ahead() happens to be executed immediately before a server shutdown is initiated, and if innodb_flush_sync=0 (nondefault setting), then the shutdown may hang because the page cleaner thread fails to finish its job. The following patch demonstrates and fixes this:

      diff --git a/mysql-test/suite/innodb/r/autoinc_debug.result b/mysql-test/suite/innodb/r/autoinc_debug.result
      index 5856c75f784..8ff8f41289a 100644
      --- a/mysql-test/suite/innodb/r/autoinc_debug.result
      +++ b/mysql-test/suite/innodb/r/autoinc_debug.result
      @@ -1,6 +1,9 @@
       CREATE TABLE t1 (id INT AUTO_INCREMENT PRIMARY KEY)ENGINE=INNODB;
       # SETTING auto_increment_increment IN CONNECTION DEFAULT
       SET AUTO_INCREMENT_INCREMENT = 1;
      +SET GLOBAL innodb_flush_sync=OFF;
      +CREATE TEMPORARY TABLE t (id SERIAL) ENGINE=InnoDB;
      +SET debug_dbug= '+d,ib_log_flush_ahead';
       INSERT INTO t1 VALUES(NULL);
       SELECT * FROM t1;
       id
      diff --git a/mysql-test/suite/innodb/t/autoinc_debug.test b/mysql-test/suite/innodb/t/autoinc_debug.test
      index 2e662565490..3c78d9ecd01 100644
      --- a/mysql-test/suite/innodb/t/autoinc_debug.test
      +++ b/mysql-test/suite/innodb/t/autoinc_debug.test
      @@ -8,6 +8,9 @@ CREATE TABLE t1 (id INT AUTO_INCREMENT PRIMARY KEY)ENGINE=INNODB;
       
       --echo # SETTING auto_increment_increment IN CONNECTION DEFAULT
       SET AUTO_INCREMENT_INCREMENT = 1;
      +SET GLOBAL innodb_flush_sync=OFF;
      +CREATE TEMPORARY TABLE t (id SERIAL) ENGINE=InnoDB;
      +SET debug_dbug= '+d,ib_log_flush_ahead';
       INSERT INTO t1 VALUES(NULL);
       SELECT * FROM t1;
       SHOW CREATE TABLE t1;
      diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
      index d1a55bae456..bdb1f3404a1 100644
      --- a/storage/innobase/buf/buf0flu.cc
      +++ b/storage/innobase/buf/buf0flu.cc
      @@ -2104,6 +2104,7 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
       
           if (!dirty_blocks)
           {
      +unemployed2:
             if (UNIV_UNLIKELY(lsn_limit != 0))
             {
               buf_flush_sync_lsn= 0;
      @@ -2126,6 +2127,13 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
       
           const lsn_t oldest_lsn= buf_pool.get_oldest_modification(0);
       
      +#if 0 /* MDEV-12227 FIXME: enable this */
      +    ut_ad(oldest_lsn); /* dirty_blocks implies this */
      +#else
      +    if (!oldest_lsn)
      +      goto unemployed2;
      +#endif
      +
           if (UNIV_UNLIKELY(lsn_limit != 0) && oldest_lsn >= lsn_limit)
             buf_flush_sync_lsn= 0;
       
      diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc
      index 4c22f05c4ba..60c91364b15 100644
      --- a/storage/innobase/mtr/mtr0mtr.cc
      +++ b/storage/innobase/mtr/mtr0mtr.cc
      @@ -889,6 +889,7 @@ inline std::pair<lsn_t,bool> mtr_t::finish_write(ulint len)
       	m_log.for_each_block(write_log);
       	m_commit_lsn = log_sys.get_lsn();
       	bool flush = log_close(m_commit_lsn);
      +	DBUG_EXECUTE_IF("ib_log_flush_ahead", flush=true;);
       
       	return std::make_pair(start_lsn, flush);
       }
      

      If I omit the fix to the file buf0flu.cc, the test would hang for me on a box where nproc reports 40:

      ./mtr --parallel=auto --repeat=100 innodb.autoinc_debug{,,,,,,,,,,,,,,,,,,,,,,}
      

      On the first attempt, 22 of the 23 concurrent runs hung on the first pass. The last worker hung on the 22nd pass. With the fix, all 23×100 runs completed successfully.

        Attachments

          Issue Links

            Activity

              People

              Assignee:
              marko Marko Mäkelä
              Reporter:
              marko Marko Mäkelä
              Votes:
              0 Vote for this issue
              Watchers:
              1 Start watching this issue

                Dates

                Created:
                Updated:
                Resolved: