Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-26554

Table-rebuilding DDL on parent table causes crash for INSERT into child table

Details

    Description

      origin/10.6 ee39757f3c91e04a0ccbb5424fba7dd56167ad93 2021-09-01T07:44:11+03:00
      # 2021-09-07T01:48:07 [2499567] | [rr 2506142 89968]210907  1:46:10 [rr 2506142 89971][ERROR] mysqld got signal 11 ;
      # 2021-09-07T01:48:07 [2499567] | Query (0x62b0000b6238): INSERT INTO `BB` SELECT * FROM `view_E`
      # 2021-09-07T01:48:07 [2499567] | Connection ID (thread ID): 14
      # 2021-09-07T01:48:07 [2499567] | [rr 2506142 105389]Status: KILL_TIMEOUT
       
       
      sdp:/data/Results/1631001251/TBR-1186/dev/shm/vardir/1631001251/200/1/rr(rr) bt
      #0  0x00005623dbab3d0d in id_name_t::operator() (this=0x10) at /data/Server/10.6I/storage/innobase/include/dict0mem.h:513
      #1  0x00005623dbd99673 in row_ins_foreign_report_add_err (trx=0x20d977a41b48, foreign=0x61600406b708, rec=0x51282bee4070 "supremum", entry=0x61a000315ee0) at /data/Server/10.6I/storage/innobase/row/row0ins.cc:834
      #2  0x00005623dbd9e16b in row_ins_check_foreign_constraint (check_ref=1, foreign=0x61600406b708, table=0x6180000afd08, entry=0x61a000315ee0, thr=0x621000311e38) at /data/Server/10.6I/storage/innobase/row/row0ins.cc:1802
      #3  0x00005623dbd9ef09 in row_ins_check_foreign_constraints (table=0x6180000afd08, index=0x616004069008, pk=false, entry=0x61a000315ee0, thr=0x621000311e38) at /data/Server/10.6I/storage/innobase/row/row0ins.cc:1950
      #4  0x00005623dbda60c4 in row_ins_sec_index_entry (index=0x616004069008, entry=0x61a000315ee0, thr=0x621000311e38, check_foreign=true) at /data/Server/10.6I/storage/innobase/row/row0ins.cc:3274
      #5  0x00005623dbda66e1 in row_ins_index_entry (index=0x616004069008, entry=0x61a000315ee0, thr=0x621000311e38) at /data/Server/10.6I/storage/innobase/row/row0ins.cc:3351
      #6  0x00005623dbda7736 in row_ins_index_entry_step (node=0x621000311af0, thr=0x621000311e38) at /data/Server/10.6I/storage/innobase/row/row0ins.cc:3517
      #7  0x00005623dbda80f5 in row_ins (node=0x621000311af0, thr=0x621000311e38) at /data/Server/10.6I/storage/innobase/row/row0ins.cc:3663
      #8  0x00005623dbda921d in row_ins_step (thr=0x621000311e38) at /data/Server/10.6I/storage/innobase/row/row0ins.cc:3809
      #9  0x00005623dbde8ed2 in row_insert_for_mysql (mysql_rec=0x6190016c88c8 "", prebuilt=0x621000311188, ins_mode=ROW_INS_NORMAL) at /data/Server/10.6I/storage/innobase/row/row0mysql.cc:1318
      #10 0x00005623dba6c4b1 in ha_innobase::write_row (this=0x61d0008d36b8, record=0x6190016c88c8 "") at /data/Server/10.6I/storage/innobase/handler/ha_innodb.cc:7820
      #11 0x00005623db199c7a in handler::ha_write_row (this=0x61d0008d36b8, buf=0x6190016c88c8 "") at /data/Server/10.6I/sql/handler.cc:7514
      #12 0x00005623da8f60d3 in write_record (thd=0x62b0000af218, table=0x6190016c8398, info=0x629000ddbc08, sink=0x0) at /data/Server/10.6I/sql/sql_insert.cc:2135
      #13 0x00005623da9051bf in select_insert::send_data (this=0x629000ddbbb8, values=...) at /data/Server/10.6I/sql/sql_insert.cc:4091
      #14 0x00005623dab2154b in select_result_sink::send_data_with_check (this=0x629000ddbbb8, items=..., u=0x62b0000b3380, sent=0) at /data/Server/10.6I/sql/sql_class.h:5631
      #15 0x00005623daadfcdd in end_send (join=0x629000ddbc80, join_tab=0x629000ddf730, end_of_records=false) at /data/Server/10.6I/sql/sql_select.cc:22287
      #16 0x00005623daad88d4 in evaluate_join_record (join=0x629000ddbc80, join_tab=0x629000ddf380, error=0) at /data/Server/10.6I/sql/sql_select.cc:21280
      #17 0x00005623daad77fd in sub_select (join=0x629000ddbc80, join_tab=0x629000ddf380, end_of_records=false) at /data/Server/10.6I/sql/sql_select.cc:21057
      #18 0x00005623daad62a9 in do_select (join=0x629000ddbc80, procedure=0x0) at /data/Server/10.6I/sql/sql_select.cc:20604
      #19 0x00005623daa67778 in JOIN::exec_inner (this=0x629000ddbc80) at /data/Server/10.6I/sql/sql_select.cc:4737
      #20 0x00005623daa64fa2 in JOIN::exec (this=0x629000ddbc80) at /data/Server/10.6I/sql/sql_select.cc:4515
      #21 0x00005623daa68b90 in mysql_select (thd=0x62b0000af218, tables=0x62b0000b70e0, fields=..., conds=0x0, og_num=0, order=0x0, group=0x0, having=0x0, proc_param=0x0, select_options=2202244745984, result=0x629000ddbbb8, unit=0x62b0000b3380, 
          select_lex=0x62b0000b6ab0) at /data/Server/10.6I/sql/sql_select.cc:4993
      #22 0x00005623daa3dc01 in handle_select (thd=0x62b0000af218, lex=0x62b0000b32b8, result=0x629000ddbbb8, setup_tables_done_option=1073741824) at /data/Server/10.6I/sql/sql_select.cc:545
      #23 0x00005623da9ace84 in mysql_execute_command (thd=0x62b0000af218, is_called_from_prepared_stmt=false) at /data/Server/10.6I/sql/sql_parse.cc:4711
      #24 0x00005623da9c2b5c in mysql_parse (thd=0x62b0000af218, rawbuf=0x62b0000b6238 "INSERT INTO `BB` SELECT * FROM `view_E` /* E_R Thread1 QNO 23 CON_ID 14 */", length=74, parser_state=0x289908ce6b20) at /data/Server/10.6I/sql/sql_parse.cc:8030
      #25 0x00005623da99adc7 in dispatch_command (command=COM_QUERY, thd=0x62b0000af218, packet=0x629000cb2219 "INSERT INTO `BB` SELECT * FROM `view_E` /* E_R Thread1 QNO 23 CON_ID 14 */ ", packet_length=75, blocking=true)
          at /data/Server/10.6I/sql/sql_parse.cc:1896
      #26 0x00005623da99819f in do_command (thd=0x62b0000af218, blocking=true) at /data/Server/10.6I/sql/sql_parse.cc:1404
      #27 0x00005623dad9755d in do_handle_one_connection (connect=0x608000003138, put_in_cache=true) at /data/Server/10.6I/sql/sql_connect.cc:1418
      #28 0x00005623dad96dee in handle_one_connection (arg=0x608000002c38) at /data/Server/10.6I/sql/sql_connect.cc:1312
      #29 0x00004d53061b9609 in start_thread (arg=<optimized out>) at pthread_create.c:477
      #30 0x00006d2c2bfb3293 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
      (rr) 
       
      RQG
      ====
      git clone https://github.com/mleich1/rqg --branch experimental RQG
       
      perl rqg.pl \
      --duration=300 \
      --queries=10000000 \
      --no_mask \
      --seed=random \
      --engine=InnoDB \
      --rpl_mode=none \
      --views \
      --mysqld=--innodb-buffer-pool-size=8M \
      --mysqld=--interactive_timeout=28800 \
      --mysqld=--innodb_stats_persistent=on \
      --mysqld=--net_write_timeout=60 \
      --mysqld=--loose_innodb_use_native_aio=0 \
      --mysqld=--log-bin \
      --mysqld=--file-key-management-filename=$RQG_HOME/conf/mariadb/encryption_keys.txt \
      --mysqld=--innodb-lock-wait-timeout=50 \
      --mysqld=--log-output=none \
      --mysqld=--loose-max-statement-time=30 \
      --mysqld=--loose-table_lock_wait_timeout=50 \
      --mysqld=--innodb_adaptive_hash_index=off \
      --mysqld=--loose-innodb_fatal_semaphore_wait_threshold=300 \
      --mysqld=--loose-idle_transaction_timeout=0 \
      --mysqld=--connect_timeout=60 \
      --mysqld=--log_bin_trust_function_creators=1 \
      --mysqld=--slave_net_timeout=60 \
      --mysqld=--plugin-load-add=file_key_management.so \
      --mysqld=--wait_timeout=28800 \
      --mysqld=--innodb_page_size=8K \
      --mysqld=--net_read_timeout=30 \
      --mysqld=--loose-innodb-sync-debug \
      --mysqld=--lock-wait-timeout=86400 \
      --mysqld=--loose_innodb_lock_schedule_algorithm=fcfs \
      --mysqld=--loose-debug_assert_on_not_freed_memory=0 \
      --mysqld=--loose-idle_readonly_transaction_timeout=0 \
      --mysqld=--loose-idle_write_transaction_timeout=0 \
      --mysqld=--sync-binlog=1 \
      --reporters=Backtrace,Deadlock1,ErrorLog \
      --validators=None \
      --threads=2 \
      --grammar=TBR-1186.yy \
      --workdir=<local settings> \
      --vardir=<local settings> \
      --mtr-build-thread=<local settings> \
      --basedir1=<local settings> \
      --script_debug=_nix_
      
      

      Attachments

        1. MDEV-26554.sh
          1 kB
        2. MDEV-26554.test
          0.7 kB
        3. MDEV-26554trunc.test
          0.8 kB
        4. TBR-1186.yy
          0.5 kB

        Issue Links

          Activity

            When running a loop of TRUNCATE TABLE parent concurrently with INSERT IGNORE INTO child, it fails in a similar way as with ALTER TABLE parent FORCE, ALGORITHM=INPLACE:

            10.6 41c66ef6f76634112a3c84b85097239b40d1efad

            #6  0x00007fa92a3907f2 in __GI___assert_fail (assertion=0x56382b389433 "lock->trx == this", file=0x56382b3891a2 "/mariadb/10.6m/storage/innobase/dict/drop.cc", line=159, function=0x56382b3893b6 "dberr_t trx_t::drop_table(const dict_table_t &)") at assert.c:101
            #7  0x000056382af43565 in trx_t::drop_table (this=0x7fa924707540, table=@0x7fa8c403e028: <incomplete type>) at /mariadb/10.6m/storage/innobase/dict/drop.cc:159
            #8  0x000056382ac507e8 in ha_innobase::truncate (this=0x7fa8c403ae20) at /mariadb/10.6m/storage/innobase/handler/ha_innodb.cc:13843
            

            marko Marko Mäkelä added a comment - When running a loop of TRUNCATE TABLE parent concurrently with INSERT IGNORE INTO child , it fails in a similar way as with ALTER TABLE parent FORCE, ALGORITHM=INPLACE : 10.6 41c66ef6f76634112a3c84b85097239b40d1efad #6 0x00007fa92a3907f2 in __GI___assert_fail (assertion=0x56382b389433 "lock->trx == this", file=0x56382b3891a2 "/mariadb/10.6m/storage/innobase/dict/drop.cc", line=159, function=0x56382b3893b6 "dberr_t trx_t::drop_table(const dict_table_t &)") at assert.c:101 #7 0x000056382af43565 in trx_t::drop_table (this=0x7fa924707540, table=@0x7fa8c403e028: <incomplete type>) at /mariadb/10.6m/storage/innobase/dict/drop.cc:159 #8 0x000056382ac507e8 in ha_innobase::truncate (this=0x7fa8c403ae20) at /mariadb/10.6m/storage/innobase/handler/ha_innodb.cc:13843

            I came up with an idea how to work around the missing MDL_EXCLUSIVE:

            • In ha_innobase::commit_inplace_alter_table(commit=true), try to acquire LOCK_X on all child tables, and fail if this cannot be done. Note: the lock wait would be blocked by MDL_EXCLUSIVE on the being-altered table.
            • In ha_innobase::rename_table() when renaming a table from a non-#sql to a #sql name, try to acquire LOCK_X on all child tables, and return an error if this cannot be done.
            • In ha_innobase::truncate(), try to acquire LOCK_X on all child tables, and return an error if this cannot be done.

            A different solution may be needed when we get around to implementing MDEV-22361.

            marko Marko Mäkelä added a comment - I came up with an idea how to work around the missing MDL_EXCLUSIVE : In ha_innobase::commit_inplace_alter_table(commit=true) , try to acquire LOCK_X on all child tables, and fail if this cannot be done. Note: the lock wait would be blocked by MDL_EXCLUSIVE on the being-altered table. In ha_innobase::rename_table() when renaming a table from a non- #sql to a #sql name, try to acquire LOCK_X on all child tables, and return an error if this cannot be done. In ha_innobase::truncate() , try to acquire LOCK_X on all child tables, and return an error if this cannot be done. A different solution may be needed when we get around to implementing MDEV-22361 .

            The following seems to fix this race condition for TRUNCATE, eventually causing the TRUNCATE to fail with an acceptable error:

            10.6 9c5835e067e99e1f85477f28d3bdc807537393a8 with patch

            mysqltest: At line 33: query 'reap' failed: ER_LOCK_DEADLOCK (1213): Deadlock found when trying to get lock; try restarting transaction
            

            diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
            index e3e9ed08eb4..cc70423ed47 100644
            --- a/storage/innobase/handler/ha_innodb.cc
            +++ b/storage/innobase/handler/ha_innodb.cc
            @@ -13792,6 +13792,19 @@ int ha_innobase::truncate()
             		error = fts_lock_tables(trx, *ib_table);
             	}
             
            +	if (error == DB_SUCCESS) {
            +		dict_sys.freeze();
            +		for (const dict_foreign_t* f : ib_table->referenced_set) {
            +			if (dict_table_t* child = f->foreign_table) {
            +				error = lock_table_for_trx(child, trx, LOCK_X);
            +				if (error != DB_SUCCESS) {
            +					break;
            +				}
            +			}
            +		}
            +		dict_sys.unfreeze();
            +	}
            +
             	/* Wait for purge threads to stop using the table. */
             	for (uint n = 15; ib_table->get_ref_count() > 1; ) {
             		if (!--n) {
            

            I will attempt a similar fix on ha_innobase::commit_inplace_alter_table() and ha_innobase::rename_table() as well.

            marko Marko Mäkelä added a comment - The following seems to fix this race condition for TRUNCATE , eventually causing the TRUNCATE to fail with an acceptable error: 10.6 9c5835e067e99e1f85477f28d3bdc807537393a8 with patch mysqltest: At line 33: query 'reap' failed: ER_LOCK_DEADLOCK (1213): Deadlock found when trying to get lock; try restarting transaction diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index e3e9ed08eb4..cc70423ed47 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -13792,6 +13792,19 @@ int ha_innobase::truncate() error = fts_lock_tables(trx, *ib_table); } + if (error == DB_SUCCESS) { + dict_sys.freeze(); + for (const dict_foreign_t* f : ib_table->referenced_set) { + if (dict_table_t* child = f->foreign_table) { + error = lock_table_for_trx(child, trx, LOCK_X); + if (error != DB_SUCCESS) { + break; + } + } + } + dict_sys.unfreeze(); + } + /* Wait for purge threads to stop using the table. */ for (uint n = 15; ib_table->get_ref_count() > 1; ) { if (!--n) { I will attempt a similar fix on ha_innobase::commit_inplace_alter_table() and ha_innobase::rename_table() as well.

            If we acquire exclusive locks on the child tables before acquiring an exclusive lock on the main table or the statistics tables, no deadlocks will be reported between the concurrent INSERT and TRUNCATE. A similar fix for ha_innobase::rename_table() seems to work for ALTER TABLE or OPTIMIZE TABLE when using old_alter_table=1 a.k.a. alter_algorithm=copy.

            marko Marko Mäkelä added a comment - If we acquire exclusive locks on the child tables before acquiring an exclusive lock on the main table or the statistics tables, no deadlocks will be reported between the concurrent INSERT and TRUNCATE . A similar fix for ha_innobase::rename_table() seems to work for ALTER TABLE or OPTIMIZE TABLE when using old_alter_table=1 a.k.a. alter_algorithm=copy .

            My fix survived MDEV-26554trunc.test:

            innodb.MDEV-26554trunc 'innodb'          [ pass ]  52125
            

            I will try to create a smaller test that demonstrates that the INSERT on the child table would block the DDL operation on the parent table.

            marko Marko Mäkelä added a comment - My fix survived MDEV-26554trunc.test : innodb.MDEV-26554trunc 'innodb' [ pass ] 52125 I will try to create a smaller test that demonstrates that the INSERT on the child table would block the DDL operation on the parent table.

            People

              marko Marko Mäkelä
              mleich Matthias Leich
              Votes:
              0 Vote for this issue
              Watchers:
              4 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Git Integration

                  Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.