Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-22332

Assertion `mtr_started == mtr.is_active()' failed in row_merge_read_clustered_index on shutdown, InnoDB: Apparent corruption of an index page upon recovery

    XMLWordPrintable

Details

    Description

      The test case below is non-deterministic. It fails almost every time for me, but it clearly depends on timing. Run several times and/or try to change the sleep. Please don't put it into the test suite with sleep, synchronize properly instead.

      --source include/have_innodb.inc
      --source include/have_sequence.inc
       
      CREATE TABLE t1 (a float unsigned NOT NULL) ENGINE=InnoDB;
      INSERT INTO t1 SELECT 1 FROM seq_1_to_65962;
      SELECT COUNT(*) FROM t1;
       
      --connect (con1,localhost,root,,)
      --send OPTIMIZE TABLE t1
       
      --connection default
      --sleep 1
      --source include/restart_mysqld.inc
       
      DROP TABLE t1;
      

      Sometimes it only crashes on shutdown:

      10.5 98003440

      2020-04-21 21:19:39 0 [Note] /data/bld/10.5-debug-nightly/bin/mariadbd (initiated by: root[root] @ localhost []): Normal shutdown
      2020-04-21 21:19:39 0 [Note] Event Scheduler: Purging the queue. 0 events
      mariadbd: /data/src/10.5/storage/innobase/row/row0merge.cc:2723: dberr_t row_merge_read_clustered_index(trx_t*, TABLE*, const dict_table_t*, dict_table_t*, bool, dict_index_t**, dict_index_t*, fts_psort_t*, merge_file_t*, const ulint*, ulint, const dtuple_t*, const dict_add_v_col_t*, const ulint*, ulint, ib_sequence_t&, row_merge_block_t*, bool, pfs_os_file_t*, ut_stage_alter_t*, double, row_merge_block_t*, TABLE*, bool): Assertion `mtr_started == mtr.is_active()' failed.
      200421 21:19:39 [ERROR] mysqld got signal 6 ;
       
      #6  0x00007f5cfd0b0e67 in __assert_fail_base (fmt=<optimized out>, assertion=assertion@entry=0x564d2459b498 "mtr_started == mtr.is_active()", file=file@entry=0x564d2459a608 "/data/src/10.5/storage/innobase/row/row0merge.cc", line=line@entry=2723, function=function@entry=0x564d2459e320 <row_merge_read_clustered_index(trx_t*, TABLE*, dict_table_t const*, dict_table_t*, bool, dict_index_t**, dict_index_t*, fts_psort_t*, merge_file_t*, unsigned long const*, unsigned long, dtuple_t const*, dict_add_v_col_t const*, unsigned long const*, unsigned long, ib_sequence_t&, unsigned char*, bool, pfs_os_file_t*, ut_stage_alter_t*, double, unsigned char*, TABLE*, bool)::__PRETTY_FUNCTION__> "dberr_t row_merge_read_clustered_index(trx_t*, TABLE*, const dict_table_t*, dict_table_t*, bool, dict_index_t**, dict_index_t*, fts_psort_t*, merge_file_t*, const ulint*, ulint, const dtuple_t*, const"...) at assert.c:92
      #7  0x00007f5cfd0b0f12 in __GI___assert_fail (assertion=0x564d2459b498 "mtr_started == mtr.is_active()", file=0x564d2459a608 "/data/src/10.5/storage/innobase/row/row0merge.cc", line=2723, function=0x564d2459e320 <row_merge_read_clustered_index(trx_t*, TABLE*, dict_table_t const*, dict_table_t*, bool, dict_index_t**, dict_index_t*, fts_psort_t*, merge_file_t*, unsigned long const*, unsigned long, dtuple_t const*, dict_add_v_col_t const*, unsigned long const*, unsigned long, ib_sequence_t&, unsigned char*, bool, pfs_os_file_t*, ut_stage_alter_t*, double, unsigned char*, TABLE*, bool)::__PRETTY_FUNCTION__> "dberr_t row_merge_read_clustered_index(trx_t*, TABLE*, const dict_table_t*, dict_table_t*, bool, dict_index_t**, dict_index_t*, fts_psort_t*, merge_file_t*, const ulint*, ulint, const dtuple_t*, const"...) at assert.c:101
      #8  0x0000564d23dbba40 in row_merge_read_clustered_index (trx=0x7f5cf80e8290, table=0x7f5cf25add70, old_table=0x7f5cbc0408a8, new_table=0x7f5cb001c078, online=true, index=0x7f5cb001b8e0, fts_sort_idx=0x0, psort_info=0x0, files=0x7f5cb001efa8, key_numbers=0x7f5cb001b8e8, n_index=1, defaults=0x0, add_v=0x0, col_map=0x7f5cb001b938, add_autoinc=18446744073709551615, sequence=..., block=0x7f5cd8cfa000 <error: Cannot access memory at address 0x7f5cd8cfa000>, skip_pk_sort=true, tmpfd=0x7f5cf25acd80, stage=0x7f5cb001ef38, pct_cost=50, crypt_block=0x0, eval_table=0x7f5cf25add70, allow_not_null=false) at /data/src/10.5/storage/innobase/row/row0merge.cc:2723
      #9  0x0000564d23dc1aba in row_merge_build_indexes (trx=0x7f5cf80e8290, old_table=0x7f5cbc0408a8, new_table=0x7f5cb001c078, online=true, indexes=0x7f5cb001b8e0, key_numbers=0x7f5cb001b8e8, n_indexes=1, table=0x7f5cf25add70, defaults=0x0, col_map=0x7f5cb001b938, add_autoinc=18446744073709551615, sequence=..., skip_pk_sort=true, stage=0x7f5cb001ef38, add_v=0x0, eval_table=0x7f5cf25add70, allow_not_null=false) at /data/src/10.5/storage/innobase/row/row0merge.cc:4534
      #10 0x0000564d23c68911 in ha_innobase::inplace_alter_table (this=0x7f5cbc0fa050, altered_table=0x7f5cf25add70, ha_alter_info=0x7f5cf25adce0) at /data/src/10.5/storage/innobase/handler/handler0alter.cc:8319
      #11 0x0000564d2357030b in handler::ha_inplace_alter_table (this=0x7f5cbc0fa050, altered_table=0x7f5cf25add70, ha_alter_info=0x7f5cf25adce0) at /data/src/10.5/sql/handler.h:4509
      #12 0x0000564d23563340 in mysql_inplace_alter_table (thd=0x7f5cb0000b18, table_list=0x7f5cb00125b0, table=0x7f5cbc220058, altered_table=0x7f5cf25add70, ha_alter_info=0x7f5cf25adce0, inplace_supported=HA_ALTER_INPLACE_COPY_NO_LOCK, target_mdl_request=0x7f5cf25aeb30, alter_ctx=0x7f5cf25af680) at /data/src/10.5/sql/sql_table.cc:7872
      #13 0x0000564d2356ad44 in mysql_alter_table (thd=0x7f5cb0000b18, new_db=0x564d2420dee0 <null_clex_str>, new_name=0x564d2420dee0 <null_clex_str>, create_info=0x7f5cf25b0270, table_list=0x7f5cb00125b0, alter_info=0x7f5cf25b01a0, order_num=0, order=0x0, ignore=false, if_exists=false) at /data/src/10.5/sql/sql_table.cc:10444
      #14 0x0000564d2356e1b7 in mysql_recreate_table (thd=0x7f5cb0000b18, table_list=0x7f5cb00125b0, table_copy=false) at /data/src/10.5/sql/sql_table.cc:11378
      #15 0x0000564d23619761 in admin_recreate_table (thd=0x7f5cb0000b18, table_list=0x7f5cb00125b0) at /data/src/10.5/sql/sql_admin.cc:58
      #16 0x0000564d2361cb15 in mysql_admin_table(THD *, TABLE_LIST *, HA_CHECK_OPT *, const char *, thr_lock_type, bool, bool, uint, int (*)(THD *, TABLE_LIST *, HA_CHECK_OPT *), struct {...}, int (*)(THD *, TABLE_LIST *, HA_CHECK_OPT *)) (thd=0x7f5cb0000b18, tables=0x7f5cb00125b0, check_opt=0x7f5cb0005dc0, operator_name=0x564d2426b1e8 "optimize", lock_type=TL_WRITE, org_open_for_modify=true, repair_table_use_frm=false, extra_open_options=0, prepare_func=0x0, operator_func=(int (handler::*)(handler * const, THD *, HA_CHECK_OPT *)) 0x564d237c9d74 <handler::ha_optimize(THD*, st_ha_check_opt*)>, view_operator_func=0x0) at /data/src/10.5/sql/sql_admin.cc:1030
      #17 0x0000564d2361df65 in Sql_cmd_optimize_table::execute (this=0x7f5cb0012c88, thd=0x7f5cb0000b18) at /data/src/10.5/sql/sql_admin.cc:1372
      #18 0x0000564d23470bfc in mysql_execute_command (thd=0x7f5cb0000b18) at /data/src/10.5/sql/sql_parse.cc:5912
      #19 0x0000564d2347706c in mysql_parse (thd=0x7f5cb0000b18, rawbuf=0x7f5cb00124f0 "OPTIMIZE TABLE t1", length=17, parser_state=0x7f5cf25b1520, is_com_multi=false, is_next_command=false) at /data/src/10.5/sql/sql_parse.cc:7957
      #20 0x0000564d23462861 in dispatch_command (command=COM_QUERY, thd=0x7f5cb0000b18, packet=0x7f5cb0008939 "", packet_length=17, is_com_multi=false, is_next_command=false) at /data/src/10.5/sql/sql_parse.cc:1840
      #21 0x0000564d23460f97 in do_command (thd=0x7f5cb0000b18) at /data/src/10.5/sql/sql_parse.cc:1359
      #22 0x0000564d2360546b in do_handle_one_connection (connect=0x564d26e0ccf8, put_in_cache=true) at /data/src/10.5/sql/sql_connect.cc:1422
      #23 0x0000564d2360519a in handle_one_connection (arg=0x564d26e0ccf8) at /data/src/10.5/sql/sql_connect.cc:1319
      #24 0x0000564d23b3b0c4 in pfs_spawn_thread (arg=0x564d26d9cb58) at /data/src/10.5/storage/perfschema/pfs.cc:2201
      #25 0x00007f5cff0394a4 in start_thread (arg=0x7f5cf25b2700) at pthread_create.c:456
      #26 0x00007f5cfd16dd0f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
      

      Sometimes it additionally fails upon recovery after crashing on shutdown, as below. It appears that COUNT in the test case increases the probability of this secondary failure:

      2020-04-21 21:06:14 0 [Note] InnoDB: Starting crash recovery from checkpoint LSN=5135307
      2020-04-21 21:06:14 0 [Note] InnoDB: Starting a batch to recover 74 pages from redo log.
      2020-04-21 21:06:15 0 [ERROR] InnoDB: Record 125 is above rec heap top 120
      2020-04-21 21:06:15 0 [Note] InnoDB: Uncompressed page, stored checksum in field1 0, calculated checksums for field1: crc32 985310376, innodb 3118391444,  page type 17855 == INDEX.none 3735928559, stored checksum in field2 4622906, calculated checksums for field2: crc32 985310376, innodb 3074129174, none 3735928559,  page LSN 0 4622906, low 4 bytes of LSN at page end 2597492507, page number (if stored to page already) 6, space id (if created with >= MySQL-4.1.1 and stored already) 6
      2020-04-21 21:06:15 0 [Note] InnoDB: Page may be an index page where index id is 26
      2020-04-21 21:06:15 0 [ERROR] [FATAL] InnoDB: Apparent corruption of an index page [page id: space=6, page number=6] to be written to data file. We intentionally crash the server to prevent corrupt data from ending up in data files.
      200421 21:06:15 [ERROR] mysqld got signal 6 ;
       
      #5  0x00007f5a6c3f942a in __GI_abort () at abort.c:89
      #6  0x000056476d11305f in ib::fatal::~fatal (this=0x7f5a617de990, __in_chrg=<optimized out>) at /data/src/10.5/storage/innobase/ut/ut0ut.cc:597
      #7  0x000056476d19fb7f in buf_dblwr_assert_on_corrupt_block (block=0x7f5a61beff28) at /data/src/10.5/storage/innobase/buf/buf0dblwr.cc:793
      #8  0x000056476d19fcd2 in buf_dblwr_check_block (block=0x7f5a61beff28) at /data/src/10.5/storage/innobase/buf/buf0dblwr.cc:852
      #9  0x000056476d1a0249 in buf_dblwr_flush_buffered_writes () at /data/src/10.5/storage/innobase/buf/buf0dblwr.cc:976
      #10 0x000056476d1aeb93 in buf_flush_end (flush_type=BUF_FLUSH_LIST) at /data/src/10.5/storage/innobase/buf/buf0flu.cc:1975
      #11 0x000056476d1aed2e in buf_flush_do_batch (type=BUF_FLUSH_LIST, min_n=18446744073709551614, lsn_limit=18446744073709551615, n=0x7f5a617dec60) at /data/src/10.5/storage/innobase/buf/buf0flu.cc:2019
      #12 0x000056476d1b137b in pc_flush_slot () at /data/src/10.5/storage/innobase/buf/buf0flu.cc:2640
      #13 0x000056476d1b1a5f in buf_flush_page_cleaner () at /data/src/10.5/storage/innobase/buf/buf0flu.cc:2805
      #14 0x00007f5a6e3794a4 in start_thread (arg=0x7f5a617df700) at pthread_create.c:456
      #15 0x00007f5a6c4add0f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
      

      Not reproducible on 10.4.
      Didn't get any obvious problem after trying several times on a non-debug build.

      rr profile is available. Please note that it stops twice while replaying it, first with SIGUSR1, and after continuing with SIGABRT.

      Attachments

        Activity

          People

            marko Marko Mäkelä
            elenst Elena Stepanova
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved:

              Git Integration

                Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.