Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-17793

Crash in purge after instant DROP and emptying the table

    Details

      Description

      Here is a simplified version of a test case that Elena Stepanova posted in MDEV-17721, with some purge control added to make it more deterministic:

      --source include/have_innodb.inc
       
      SET @saved_frequency = @@GLOBAL.innodb_purge_rseg_truncate_frequency;
      SET GLOBAL innodb_purge_rseg_truncate_frequency=1;
       
      connect (prevent_purge,localhost,root);
      START TRANSACTION WITH CONSISTENT SNAPSHOT;
       
      connection default;
      CREATE TABLE t1 (f1 INT, f2 INT) ENGINE=InnoDB;
      INSERT INTO t1 () VALUES ();
      ALTER TABLE t1 DROP f2, ADD COLUMN f2 INT;
      ALTER TABLE t1 DROP f1;
      DELETE FROM t1;
       
      disconnect prevent_purge;
       
      ALTER TABLE t1 ADD COLUMN extra TINYINT UNSIGNED NOT NULL DEFAULT 42;
      ALTER TABLE t1 DROP extra;
      --source include/wait_all_purged.inc
      DROP TABLE t1;
      SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;
      

      This will lead to a crash:

      10.4 9fdb8c59eb297c6acb15345ae732678d0d8ee838

      2018-11-22  8:54:07 1 [ERROR] InnoDB: Trying to access update undo rec field 7 in index `GEN_CLUST_INDEX` of table `test`.`t1` but index has only 4 fields Submit a detailed bug report to https://jira.mariadb.org/. Run also CHECK TABLE `test`.`t1`. n_fields = 2, i = 1
      mysqld: /mariadb/10.4/storage/innobase/trx/trx0rec.cc:1661: byte* trx_undo_update_rec_get_update(const byte*, dict_index_t*, ulint, trx_id_t, roll_ptr_t, ulint, mem_heap_t*, upd_t**): Assertion `0' failed.
      #8  0x000055f3803ad2de in trx_undo_update_rec_get_update (ptr=0x55f382989c2d "\360\377\377\377\377\001l\363U", index=0x7ff434080008, type=12, trx_id=70, roll_ptr=7599824392815061, info_bits=48, heap=0x55f382989b60, upd=0x55f382989090) at /mariadb/10.4/storage/innobase/trx/trx0rec.cc:1661
      #9  0x000055f380339699 in row_purge_parse_undo_rec (node=0x55f382989028, undo_rec=0x55f382989bf8 "", updated_extern=0x7ff46e708cae, thr=0x55f382988b80) at /mariadb/10.4/storage/innobase/row/row0purge.cc:1143
      

      At the time of the assertion failure, the table is empty, and dict_index_t::clear_instant_alter() has been called. The following instrumentation tells us that the caller is the DROP extra:

      diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
      index 7e6fe455b72..9035cb60e51 100644
      --- a/storage/innobase/include/dict0mem.h
      +++ b/storage/innobase/include/dict0mem.h
      @@ -2131,11 +2131,17 @@ inline void dict_index_t::clear_instant_add()
       	n_core_null_bytes = UT_BITS_IN_BYTES(unsigned(n_nullable));
       }
       
      +#include <my_stacktrace.h>
      +
       inline void dict_index_t::clear_instant_alter()
       {
       	DBUG_ASSERT(is_primary());
       	DBUG_ASSERT(n_fields == n_def);
       
      +	extern ulonglong my_thread_stack_size;
      +	fprintf(stderr, "clear %u\n", table->n_dropped());
      +	my_print_stacktrace(NULL, my_thread_stack_size, 0);
      +
       	if (!table->instant) {
       		if (is_instant()) {
       			clear_instant_add();
      

      clear 3
      stack_bottom = 0x0 thread_stack 0x49000
      /dev/shm/10.4/sql/mysqld(my_print_stacktrace+0x40)[0x56001405179f]
      mysys/stacktrace.c:269(my_print_stacktrace)[0x560013b71dc8]
      include/dict0mem.h:2145(dict_index_t::clear_instant_alter())[0x560013b60bc5]
      handler/handler0alter.cc:5500(innobase_instant_try(Alter_inplace_info const*, ha_innobase_inplace_ctx*, TABLE const*, TABLE const*, trx_t*))[0x560013b7acad]
      handler/handler0alter.cc:9888(commit_try_norebuild(Alter_inplace_info*, ha_innobase_inplace_ctx*, TABLE*, TABLE const*, trx_t*, char const*))[0x560013b6e1a5]
      handler/handler0alter.cc:10515(ha_innobase::commit_inplace_alter_table(TABLE*, Alter_inplace_info*, bool))[0x56001392502b]
      

      The message clear 3 indicates that 3 columns had been instantly dropped until that point. The test only executes 3 DROP, so it must be the last one. Because of this, the table cannot have been empty when the ADD COLUMN extra executed, and thus the ADD COLUMN extra must have updated a pre-existing metadata record.

      The undo record that purge is parsing is for the update of a hidden metadata record, for the ADD COLUMN extra, because at that time the index contained 7 fields, and because the default value is present in the undo log:

      (gdb) frame 8
      #8  0x000055f3803ad2de in trx_undo_update_rec_get_update (ptr=0x55f382989c2d "\360\377\377\377\377\001l\363U", index=0x7ff434080008, type=12, trx_id=70, roll_ptr=7599824392815061, info_bits=48, heap=0x55f382989b60, upd=0x55f382989090) at /mariadb/10.4/storage/innobase/trx/trx0rec.cc:1661
      (gdb) p/u *ptr@2
      $1 = {1, 42}
      

      Purge has to run some time between the ADD COLUMN extra and the DROP extra.

      Here is a more deterministic version of the test case, with full purge control. Unfortunately, it still is not completely deterministic:

      --source include/have_innodb.inc
       
      SET @saved_frequency = @@GLOBAL.innodb_purge_rseg_truncate_frequency;
      SET GLOBAL innodb_purge_rseg_truncate_frequency=1;
       
      connect (prevent_purge,localhost,root);
      START TRANSACTION WITH CONSISTENT SNAPSHOT;
       
      connection default;
      CREATE TABLE t1 (f1 INT, f2 INT) ENGINE=InnoDB;
      INSERT INTO t1 () VALUES ();
      ALTER TABLE t1 DROP f2, ADD COLUMN f2 INT;
      ALTER TABLE t1 DROP f1;
      DELETE FROM t1;
       
      connection prevent_purge;
      COMMIT;
      START TRANSACTION WITH CONSISTENT SNAPSHOT;
      connection default;
       
      ALTER TABLE t1 ADD COLUMN extra TINYINT UNSIGNED NOT NULL DEFAULT 42;
      let $wait_all_purged= 1;
      --source include/wait_all_purged.inc
      ALTER TABLE t1 DROP extra;
      disconnect prevent_purge;
      let $wait_all_purged= 0;
      --source include/wait_all_purged.inc
      DROP TABLE t1;
      SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;
      

      I think that a fix should be added to ALTER TABLE:

      diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
      index 8b8ccd2fc11..f6fb498b4a1 100644
      --- a/storage/innobase/handler/handler0alter.cc
      +++ b/storage/innobase/handler/handler0alter.cc
      @@ -5496,6 +5496,11 @@ static bool innobase_instant_try(
       		/* The table is empty. */
       		ut_ad(page_is_root(block->frame));
       		btr_page_empty(block, NULL, index, 0, &mtr);
      +		if (index->table->instant) {
      +			/* FIXME: Assign a new index->table->id,
      +			SYS_TABLES.TABLE_ID, SYS_INDEXES.TABLE_ID
      +			to invalidate old undo log records in purge. */
      +		}
       		index->clear_instant_alter();
       		err = DB_SUCCESS;
       		goto func_exit;
      

        Attachments

          Issue Links

            Activity

              People

              • Assignee:
                marko Marko Mäkelä
                Reporter:
                marko Marko Mäkelä
              • Votes:
                0 Vote for this issue
                Watchers:
                1 Start watching this issue

                Dates

                • Created:
                  Updated:
                  Resolved: