Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-24913

Assertion !recv_no_log_write in log_write_up_to()

Details

    Description

      #0  __pthread_kill (threadid=<optimized out>, signo=6) at ../sysdeps/unix/sysv/linux/pthread_kill.c:57
      #1  0x0000562f0af35581 in my_write_core (sig=6) at /home/thiru/mariarepo/10.2/10.2-test/mysys/stacktrace.c:382
      #2  0x0000562f0a7adcde in handle_fatal_signal (sig=6) at /home/thiru/mariarepo/10.2/10.2-test/sql/signal_handler.cc:343
      #3  <signal handler called>
      #4  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
      #5  0x00007fdaf69008b1 in __GI_abort () at abort.c:79
      #6  0x00007fdaf68f042a in __assert_fail_base (fmt=0x7fdaf6a77a38 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=assertion@entry=0x562f0b1276dc "!recv_no_log_write", file=file@entry=0x562f0b1272e0 "/home/thiru/mariarepo/10.2/10.2-test/storage/innobase/log/log0log.cc", line=line@entry=1088, function=function@entry=0x562f0b128c80 <log_write_up_to(unsigned long, bool)::__PRETTY_FUNCTION__> "void log_write_up_to(lsn_t, bool)") at assert.c:92
      #7  0x00007fdaf68f04a2 in __GI___assert_fail (assertion=0x562f0b1276dc "!recv_no_log_write", file=0x562f0b1272e0 "/home/thiru/mariarepo/10.2/10.2-test/storage/innobase/log/log0log.cc", line=1088, function=0x562f0b128c80 <log_write_up_to(unsigned long, bool)::__PRETTY_FUNCTION__> "void log_write_up_to(lsn_t, bool)") at assert.c:101
      #8  0x0000562f0aa3b791 in log_write_up_to (lsn=1889319, flush_to_disk=true) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/log/log0log.cc:1088
      #9  0x0000562f0ac28dd5 in buf_flush_write_block_low (bpage=0x7fdaf1211960, flush_type=BUF_FLUSH_SINGLE_PAGE, sync=true) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/buf/buf0flu.cc:1033
      #10 0x0000562f0ac295ef in buf_flush_page (buf_pool=0x562f0d2a0620, bpage=0x7fdaf1211960, flush_type=BUF_FLUSH_SINGLE_PAGE, sync=true) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/buf/buf0flu.cc:1233
      #11 0x0000562f0ac2bcd8 in buf_flush_single_page_from_LRU (buf_pool=0x562f0d2a0620) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/buf/buf0flu.cc:2222
      #12 0x0000562f0ac34d56 in buf_LRU_get_free_block (buf_pool=0x562f0d2a0620) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/buf/buf0lru.cc:1040
      #13 0x0000562f0ac156d2 in buf_page_create (page_id=..., page_size=..., mtr=0x7ffef0af0860) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/buf/buf0buf.cc:5501
      #14 0x0000562f0aa47abc in recv_recovery_create_page_low (page_id=..., recv_addr=0x7fdaf174e3c0) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/log/log0recv.cc:2355
      #15 0x0000562f0aa47c7f in recv_recovery_create_page_low (page_id=...) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/log/log0recv.cc:2386
      #16 0x0000562f0ac1b62a in recv_recovery_create_page (page_id=...) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/include/log0recv.h:372
      #17 0x0000562f0ac135da in buf_page_get_gen (page_id=..., page_size=..., rw_latch=2, guess=0x0, mode=10, file=0x562f0b223b50 "/home/thiru/mariarepo/10.2/10.2-test/storage/innobase/fil/fil0fil.cc", line=4514, mtr=0x7ffef0af0fd0, err=0x0) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/buf/buf0buf.cc:4827
      #18 0x0000562f0ac9a9e5 in fsp_flags_try_adjust (space_id=21, flags=33) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/fil/fil0fil.cc:4512
      #19 0x0000562f0ac9acd5 in fil_space_for_table_exists_in_mem (id=21, name=0x562f0d3d2a00 "test/t", table_flags=33) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/fil/fil0fil.cc:4572
      #20 0x0000562f0ac6f333 in dict_load_tablespace (table=0x562f0d3d03b0, ignore_err=DICT_ERR_IGNORE_RECOVER_LOCK) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/dict/dict0load.cc:2838
      #21 0x0000562f0ac6fad4 in dict_load_table_one (name=..., ignore_err=DICT_ERR_IGNORE_RECOVER_LOCK, fk_tables=std::deque with 0 elements) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/dict/dict0load.cc:2987
      #22 0x0000562f0ac6f0ec in dict_load_table (name=0x562f0d3ee530 "test/t", ignore_err=DICT_ERR_IGNORE_RECOVER_LOCK) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/dict/dict0load.cc:2798
      #23 0x0000562f0ac70640 in dict_load_table_on_id (table_id=38, ignore_err=DICT_ERR_IGNORE_RECOVER_LOCK) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/dict/dict0load.cc:3224
      #24 0x0000562f0ac506b4 in dict_table_open_on_id_low (table_id=38, ignore_err=DICT_ERR_IGNORE_RECOVER_LOCK, open_only_if_in_cache=0) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/include/dict0priv.ic:90
      #25 0x0000562f0ac5224e in dict_table_open_on_id (table_id=38, dict_locked=0, table_op=DICT_TABLE_OP_LOAD_TABLESPACE) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/dict/dict0dict.cc:827
      #26 0x0000562f0ab9bb72 in trx_resurrect_table_locks (trx=0x7fdaf1afe120, undo_ptr=0x7fdaf1aff148, undo=0x562f0d3d4cd0) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/trx/trx0trx.cc:780
      #27 0x0000562f0ab9c6c2 in trx_lists_init_at_db_start () at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/trx/trx0trx.cc:1017
      #28 0x0000562f0ab970cb in trx_sys_init_at_db_start () at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/trx/trx0sys.cc:500
      #29 0x0000562f0ab57fb1 in innobase_start_or_create_for_mysql () at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/srv/srv0start.cc:2223
      #30 0x0000562f0a9a5219 in innobase_init (p=0x562f0d1cbf00) at /home/thiru/mariarepo/10.2/10.2-test/storage/innobase/handler/ha_innodb.cc:4297
      #31 0x0000562f0a7afaad in ha_initialize_handlerton (plugin=0x562f0d1bea58) at /home/thiru/mariarepo/10.2/10.2-test/sql/handler.cc:555
      #32 0x0000562f0a54dec3 in plugin_initialize (tmp_root=0x7ffef0af76e0, plugin=0x562f0d1bea58, argc=0x562f0bae95e8 <remaining_argc>, argv=0x562f0d0893e0, options_only=false) at /home/thiru/mariarepo/10.2/10.2-test/sql/sql_plugin.cc:1417
      #33 0x0000562f0a54eb01 in plugin_init (argc=0x562f0bae95e8 <remaining_argc>, argv=0x562f0d0893e0, flags=2) at /home/thiru/mariarepo/10.2/10.2-test/sql/sql_plugin.cc:1698
      

      innodb.innodb_bug59641 test case is failing in 10.2.

      Single page flushing LRU causes the mariadb to crash. It is fixed by MDEV-23399 in 10.5+

      In 10.2, InnoDB can get the crash by running the following test cases:

      ./mtr innodb.innodb_bug47167 innodb.innodb_bug47621 innodb.innodb_bug47622 innodb.innodb_bug56947 innodb.innodb_bug57252 innodb.innodb_bug57255 innodb.innodb_bug57904 innodb.innodb_bug59307 innodb.innodb_bug59410 innodb.innodb_bug59641
      

      Failing test case is innodb.innodb_bug59641

      Attachments

        Issue Links

          Activity

            On a second thought, 10.5 could still initiate a LRU flushing batch under similar circumstances. The problem appears to occur when trx_lists_init_at_db_start() is applying recovered log to a page (MDEV-21572), and it needs to evict another recovered page to free up some space. As far as I can tell, the situation should be harmless, and we should adjust the assertion in some way.

            The following code in log_write_up_to() looks a little suspicious:

            #if UNIV_WORD_SIZE > 7
            	/* We can do a dirty read of LSN. */
            	/* NOTE: Currently doesn't do dirty read for
            	(flush_to_disk == true) case, because the log_mutex
            	contention also works as the arbitrator for write-IO
            	(fsync) bandwidth between log files and data files. */
            	if (!flush_to_disk && log_sys->write_lsn >= lsn) {
            		return;
            	}
            #endif
             
            	log_write_mutex_enter();
            	ut_ad(!recv_no_log_write);
             
            	lsn_t	limit_lsn = flush_to_disk
            		? log_sys->flushed_to_disk_lsn
            		: log_sys->write_lsn;
             
            	if (limit_lsn >= lsn) {
            		log_write_mutex_exit();
            		return;
            	}
            

            Could it be that we actually had log_sys->write_lsn >= lsn and limit_lsn >= lsn there, and the assertion is simply too early? Can you try to repeat this? And then move the assertion at the end of the above code block and retry?

            marko Marko Mäkelä added a comment - On a second thought, 10.5 could still initiate a LRU flushing batch under similar circumstances. The problem appears to occur when trx_lists_init_at_db_start() is applying recovered log to a page ( MDEV-21572 ), and it needs to evict another recovered page to free up some space. As far as I can tell, the situation should be harmless, and we should adjust the assertion in some way. The following code in log_write_up_to() looks a little suspicious: #if UNIV_WORD_SIZE > 7 /* We can do a dirty read of LSN. */ /* NOTE: Currently doesn't do dirty read for (flush_to_disk == true) case, because the log_mutex contention also works as the arbitrator for write-IO (fsync) bandwidth between log files and data files. */ if (!flush_to_disk && log_sys->write_lsn >= lsn) { return ; } #endif   log_write_mutex_enter(); ut_ad(!recv_no_log_write);   lsn_t limit_lsn = flush_to_disk ? log_sys->flushed_to_disk_lsn : log_sys->write_lsn;   if (limit_lsn >= lsn) { log_write_mutex_exit(); return ; } Could it be that we actually had log_sys->write_lsn >= lsn and limit_lsn >= lsn there, and the assertion is simply too early? Can you try to repeat this? And then move the assertion at the end of the above code block and retry?

            origin/bb-10.6-MDEV-24913 67151eadc34fae8f68591a6d4610fd0559351507 2021-02-22T17:56:57+05:30
            behaved well during RQG testing.

            mleich Matthias Leich added a comment - origin/bb-10.6- MDEV-24913 67151eadc34fae8f68591a6d4610fd0559351507 2021-02-22T17:56:57+05:30 behaved well during RQG testing.

            The assertion was too strict, and it only happens for debug build. There was no impact on non-debug builds.

            thiru Thirunarayanan Balathandayuthapani added a comment - The assertion was too strict, and it only happens for debug build. There was no impact on non-debug builds.

            People

              thiru Thirunarayanan Balathandayuthapani
              thiru Thirunarayanan Balathandayuthapani
              Votes:
              0 Vote for this issue
              Watchers:
              4 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Git Integration

                  Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.