Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-22332

Assertion `mtr_started == mtr.is_active()' failed in row_merge_read_clustered_index on shutdown, InnoDB: Apparent corruption of an index page upon recovery

    XMLWordPrintable

    Details

      Description

      The test case below is non-deterministic. It fails almost every time for me, but it clearly depends on timing. Run several times and/or try to change the sleep. Please don't put it into the test suite with sleep, synchronize properly instead.

      --source include/have_innodb.inc
      --source include/have_sequence.inc
       
      CREATE TABLE t1 (a float unsigned NOT NULL) ENGINE=InnoDB;
      INSERT INTO t1 SELECT 1 FROM seq_1_to_65962;
      SELECT COUNT(*) FROM t1;
       
      --connect (con1,localhost,root,,)
      --send OPTIMIZE TABLE t1
       
      --connection default
      --sleep 1
      --source include/restart_mysqld.inc
       
      DROP TABLE t1;
      

      Sometimes it only crashes on shutdown:

      10.5 98003440

      2020-04-21 21:19:39 0 [Note] /data/bld/10.5-debug-nightly/bin/mariadbd (initiated by: root[root] @ localhost []): Normal shutdown
      2020-04-21 21:19:39 0 [Note] Event Scheduler: Purging the queue. 0 events
      mariadbd: /data/src/10.5/storage/innobase/row/row0merge.cc:2723: dberr_t row_merge_read_clustered_index(trx_t*, TABLE*, const dict_table_t*, dict_table_t*, bool, dict_index_t**, dict_index_t*, fts_psort_t*, merge_file_t*, const ulint*, ulint, const dtuple_t*, const dict_add_v_col_t*, const ulint*, ulint, ib_sequence_t&, row_merge_block_t*, bool, pfs_os_file_t*, ut_stage_alter_t*, double, row_merge_block_t*, TABLE*, bool): Assertion `mtr_started == mtr.is_active()' failed.
      200421 21:19:39 [ERROR] mysqld got signal 6 ;
       
      #6  0x00007f5cfd0b0e67 in __assert_fail_base (fmt=<optimized out>, assertion=assertion@entry=0x564d2459b498 "mtr_started == mtr.is_active()", file=file@entry=0x564d2459a608 "/data/src/10.5/storage/innobase/row/row0merge.cc", line=line@entry=2723, function=function@entry=0x564d2459e320 <row_merge_read_clustered_index(trx_t*, TABLE*, dict_table_t const*, dict_table_t*, bool, dict_index_t**, dict_index_t*, fts_psort_t*, merge_file_t*, unsigned long const*, unsigned long, dtuple_t const*, dict_add_v_col_t const*, unsigned long const*, unsigned long, ib_sequence_t&, unsigned char*, bool, pfs_os_file_t*, ut_stage_alter_t*, double, unsigned char*, TABLE*, bool)::__PRETTY_FUNCTION__> "dberr_t row_merge_read_clustered_index(trx_t*, TABLE*, const dict_table_t*, dict_table_t*, bool, dict_index_t**, dict_index_t*, fts_psort_t*, merge_file_t*, const ulint*, ulint, const dtuple_t*, const"...) at assert.c:92
      #7  0x00007f5cfd0b0f12 in __GI___assert_fail (assertion=0x564d2459b498 "mtr_started == mtr.is_active()", file=0x564d2459a608 "/data/src/10.5/storage/innobase/row/row0merge.cc", line=2723, function=0x564d2459e320 <row_merge_read_clustered_index(trx_t*, TABLE*, dict_table_t const*, dict_table_t*, bool, dict_index_t**, dict_index_t*, fts_psort_t*, merge_file_t*, unsigned long const*, unsigned long, dtuple_t const*, dict_add_v_col_t const*, unsigned long const*, unsigned long, ib_sequence_t&, unsigned char*, bool, pfs_os_file_t*, ut_stage_alter_t*, double, unsigned char*, TABLE*, bool)::__PRETTY_FUNCTION__> "dberr_t row_merge_read_clustered_index(trx_t*, TABLE*, const dict_table_t*, dict_table_t*, bool, dict_index_t**, dict_index_t*, fts_psort_t*, merge_file_t*, const ulint*, ulint, const dtuple_t*, const"...) at assert.c:101
      #8  0x0000564d23dbba40 in row_merge_read_clustered_index (trx=0x7f5cf80e8290, table=0x7f5cf25add70, old_table=0x7f5cbc0408a8, new_table=0x7f5cb001c078, online=true, index=0x7f5cb001b8e0, fts_sort_idx=0x0, psort_info=0x0, files=0x7f5cb001efa8, key_numbers=0x7f5cb001b8e8, n_index=1, defaults=0x0, add_v=0x0, col_map=0x7f5cb001b938, add_autoinc=18446744073709551615, sequence=..., block=0x7f5cd8cfa000 <error: Cannot access memory at address 0x7f5cd8cfa000>, skip_pk_sort=true, tmpfd=0x7f5cf25acd80, stage=0x7f5cb001ef38, pct_cost=50, crypt_block=0x0, eval_table=0x7f5cf25add70, allow_not_null=false) at /data/src/10.5/storage/innobase/row/row0merge.cc:2723
      #9  0x0000564d23dc1aba in row_merge_build_indexes (trx=0x7f5cf80e8290, old_table=0x7f5cbc0408a8, new_table=0x7f5cb001c078, online=true, indexes=0x7f5cb001b8e0, key_numbers=0x7f5cb001b8e8, n_indexes=1, table=0x7f5cf25add70, defaults=0x0, col_map=0x7f5cb001b938, add_autoinc=18446744073709551615, sequence=..., skip_pk_sort=true, stage=0x7f5cb001ef38, add_v=0x0, eval_table=0x7f5cf25add70, allow_not_null=false) at /data/src/10.5/storage/innobase/row/row0merge.cc:4534
      #10 0x0000564d23c68911 in ha_innobase::inplace_alter_table (this=0x7f5cbc0fa050, altered_table=0x7f5cf25add70, ha_alter_info=0x7f5cf25adce0) at /data/src/10.5/storage/innobase/handler/handler0alter.cc:8319
      #11 0x0000564d2357030b in handler::ha_inplace_alter_table (this=0x7f5cbc0fa050, altered_table=0x7f5cf25add70, ha_alter_info=0x7f5cf25adce0) at /data/src/10.5/sql/handler.h:4509
      #12 0x0000564d23563340 in mysql_inplace_alter_table (thd=0x7f5cb0000b18, table_list=0x7f5cb00125b0, table=0x7f5cbc220058, altered_table=0x7f5cf25add70, ha_alter_info=0x7f5cf25adce0, inplace_supported=HA_ALTER_INPLACE_COPY_NO_LOCK, target_mdl_request=0x7f5cf25aeb30, alter_ctx=0x7f5cf25af680) at /data/src/10.5/sql/sql_table.cc:7872
      #13 0x0000564d2356ad44 in mysql_alter_table (thd=0x7f5cb0000b18, new_db=0x564d2420dee0 <null_clex_str>, new_name=0x564d2420dee0 <null_clex_str>, create_info=0x7f5cf25b0270, table_list=0x7f5cb00125b0, alter_info=0x7f5cf25b01a0, order_num=0, order=0x0, ignore=false, if_exists=false) at /data/src/10.5/sql/sql_table.cc:10444
      #14 0x0000564d2356e1b7 in mysql_recreate_table (thd=0x7f5cb0000b18, table_list=0x7f5cb00125b0, table_copy=false) at /data/src/10.5/sql/sql_table.cc:11378
      #15 0x0000564d23619761 in admin_recreate_table (thd=0x7f5cb0000b18, table_list=0x7f5cb00125b0) at /data/src/10.5/sql/sql_admin.cc:58
      #16 0x0000564d2361cb15 in mysql_admin_table(THD *, TABLE_LIST *, HA_CHECK_OPT *, const char *, thr_lock_type, bool, bool, uint, int (*)(THD *, TABLE_LIST *, HA_CHECK_OPT *), struct {...}, int (*)(THD *, TABLE_LIST *, HA_CHECK_OPT *)) (thd=0x7f5cb0000b18, tables=0x7f5cb00125b0, check_opt=0x7f5cb0005dc0, operator_name=0x564d2426b1e8 "optimize", lock_type=TL_WRITE, org_open_for_modify=true, repair_table_use_frm=false, extra_open_options=0, prepare_func=0x0, operator_func=(int (handler::*)(handler * const, THD *, HA_CHECK_OPT *)) 0x564d237c9d74 <handler::ha_optimize(THD*, st_ha_check_opt*)>, view_operator_func=0x0) at /data/src/10.5/sql/sql_admin.cc:1030
      #17 0x0000564d2361df65 in Sql_cmd_optimize_table::execute (this=0x7f5cb0012c88, thd=0x7f5cb0000b18) at /data/src/10.5/sql/sql_admin.cc:1372
      #18 0x0000564d23470bfc in mysql_execute_command (thd=0x7f5cb0000b18) at /data/src/10.5/sql/sql_parse.cc:5912
      #19 0x0000564d2347706c in mysql_parse (thd=0x7f5cb0000b18, rawbuf=0x7f5cb00124f0 "OPTIMIZE TABLE t1", length=17, parser_state=0x7f5cf25b1520, is_com_multi=false, is_next_command=false) at /data/src/10.5/sql/sql_parse.cc:7957
      #20 0x0000564d23462861 in dispatch_command (command=COM_QUERY, thd=0x7f5cb0000b18, packet=0x7f5cb0008939 "", packet_length=17, is_com_multi=false, is_next_command=false) at /data/src/10.5/sql/sql_parse.cc:1840
      #21 0x0000564d23460f97 in do_command (thd=0x7f5cb0000b18) at /data/src/10.5/sql/sql_parse.cc:1359
      #22 0x0000564d2360546b in do_handle_one_connection (connect=0x564d26e0ccf8, put_in_cache=true) at /data/src/10.5/sql/sql_connect.cc:1422
      #23 0x0000564d2360519a in handle_one_connection (arg=0x564d26e0ccf8) at /data/src/10.5/sql/sql_connect.cc:1319
      #24 0x0000564d23b3b0c4 in pfs_spawn_thread (arg=0x564d26d9cb58) at /data/src/10.5/storage/perfschema/pfs.cc:2201
      #25 0x00007f5cff0394a4 in start_thread (arg=0x7f5cf25b2700) at pthread_create.c:456
      #26 0x00007f5cfd16dd0f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
      

      Sometimes it additionally fails upon recovery after crashing on shutdown, as below. It appears that COUNT in the test case increases the probability of this secondary failure:

      2020-04-21 21:06:14 0 [Note] InnoDB: Starting crash recovery from checkpoint LSN=5135307
      2020-04-21 21:06:14 0 [Note] InnoDB: Starting a batch to recover 74 pages from redo log.
      2020-04-21 21:06:15 0 [ERROR] InnoDB: Record 125 is above rec heap top 120
      2020-04-21 21:06:15 0 [Note] InnoDB: Uncompressed page, stored checksum in field1 0, calculated checksums for field1: crc32 985310376, innodb 3118391444,  page type 17855 == INDEX.none 3735928559, stored checksum in field2 4622906, calculated checksums for field2: crc32 985310376, innodb 3074129174, none 3735928559,  page LSN 0 4622906, low 4 bytes of LSN at page end 2597492507, page number (if stored to page already) 6, space id (if created with >= MySQL-4.1.1 and stored already) 6
      2020-04-21 21:06:15 0 [Note] InnoDB: Page may be an index page where index id is 26
      2020-04-21 21:06:15 0 [ERROR] [FATAL] InnoDB: Apparent corruption of an index page [page id: space=6, page number=6] to be written to data file. We intentionally crash the server to prevent corrupt data from ending up in data files.
      200421 21:06:15 [ERROR] mysqld got signal 6 ;
       
      #5  0x00007f5a6c3f942a in __GI_abort () at abort.c:89
      #6  0x000056476d11305f in ib::fatal::~fatal (this=0x7f5a617de990, __in_chrg=<optimized out>) at /data/src/10.5/storage/innobase/ut/ut0ut.cc:597
      #7  0x000056476d19fb7f in buf_dblwr_assert_on_corrupt_block (block=0x7f5a61beff28) at /data/src/10.5/storage/innobase/buf/buf0dblwr.cc:793
      #8  0x000056476d19fcd2 in buf_dblwr_check_block (block=0x7f5a61beff28) at /data/src/10.5/storage/innobase/buf/buf0dblwr.cc:852
      #9  0x000056476d1a0249 in buf_dblwr_flush_buffered_writes () at /data/src/10.5/storage/innobase/buf/buf0dblwr.cc:976
      #10 0x000056476d1aeb93 in buf_flush_end (flush_type=BUF_FLUSH_LIST) at /data/src/10.5/storage/innobase/buf/buf0flu.cc:1975
      #11 0x000056476d1aed2e in buf_flush_do_batch (type=BUF_FLUSH_LIST, min_n=18446744073709551614, lsn_limit=18446744073709551615, n=0x7f5a617dec60) at /data/src/10.5/storage/innobase/buf/buf0flu.cc:2019
      #12 0x000056476d1b137b in pc_flush_slot () at /data/src/10.5/storage/innobase/buf/buf0flu.cc:2640
      #13 0x000056476d1b1a5f in buf_flush_page_cleaner () at /data/src/10.5/storage/innobase/buf/buf0flu.cc:2805
      #14 0x00007f5a6e3794a4 in start_thread (arg=0x7f5a617df700) at pthread_create.c:456
      #15 0x00007f5a6c4add0f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
      

      Not reproducible on 10.4.
      Didn't get any obvious problem after trying several times on a non-debug build.

      rr profile is available. Please note that it stops twice while replaying it, first with SIGUSR1, and after continuing with SIGABRT.

        Attachments

          Activity

            People

            Assignee:
            marko Marko Mäkelä
            Reporter:
            elenst Elena Stepanova
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

              Dates

              Created:
              Updated:
              Resolved: