[MDEV-13898] Corruption during online table-rebuilding ALTER of ROW_FORMAT=REDUNDANT tables (assertion failure or "index is corrupt") Created: 2017-09-23  Updated: 2017-11-17  Resolved: 2017-09-25

Status: Closed
Project: MariaDB Server
Component/s: Storage Engine - InnoDB
Affects Version/s: 10.2.9
Fix Version/s: 10.2.9

Type: Bug Priority: Major
Reporter: Elena Stepanova Assignee: Marko Mäkelä
Resolution: Fixed Votes: 0
Labels: regression


 Description   

The basic test case is this:

--source include/have_innodb.inc
 
CREATE TABLE t1 (i INT) ENGINE=InnoDB ROW_FORMAT=REDUNDANT;
--send
  OPTIMIZE TABLE t1;
 
--connect (con1,localhost,root,,test)
 
INSERT INTO t1 VALUES (1);
 
--connection default
--reap
 
# Cleanup
--disconnect con1
--connection default
DROP TABLE t1;

It fails well for me, but still it's non-deterministic, so run it with --repeat=N.
I think this sync point makes it (more) reliable. I'm not sure it's the most precise place to synchronize, but I wanted to use an existing sync point:

--source include/have_innodb.inc
--source include/have_debug_sync.inc
 
CREATE TABLE t1 (i INT) ENGINE=InnoDB ROW_FORMAT=REDUNDANT;
 SET debug_sync= 'alter_table_inplace_after_lock_downgrade SIGNAL there_now WAIT_FOR go';
--send
  OPTIMIZE TABLE t1;
 
--connect (con1,localhost,root,,test)
 
SET debug_sync= 'now WAIT_FOR there_now';
INSERT INTO t1 VALUES (1);
SET debug_sync= 'now SIGNAL go';
 
--connection default
--reap
 
# Cleanup
--disconnect con1
--connection default
SET debug_sync= 'RESET';
DROP TABLE t1;

10.2 debug ca3c8d9b58fdc267b7c4df1ce3afde5ec6599344

mysqld: /data/src/10.2/storage/innobase/row/row0log.cc:3118: dberr_t row_log_table_apply_ops(que_thr_t*, row_merge_dup_t*, ut_stage_alter_t*): Assertion `0' failed.
170923 18:49:16 [ERROR] mysqld got signal 6 ;
 
#7  0x00007f76015f5ee2 in __assert_fail () from /lib/x86_64-linux-gnu/libc.so.6
#8  0x000055afddb128b8 in row_log_table_apply_ops (thr=0x7f75ac0360b0, dup=0x7f75fc0cf080, stage=0x7f75ac160100) at /data/src/10.2/storage/innobase/row/row0log.cc:3118
#9  0x000055afddb12bea in row_log_table_apply (thr=0x7f75ac0360b0, old_table=0x7f75ac067f38, table=0x7f75ac133280, stage=0x7f75ac160100) at /data/src/10.2/storage/innobase/row/row0log.cc:3186
#10 0x000055afdd9f753a in ha_innobase::inplace_alter_table (this=0x7f75ac12fe28, altered_table=0x7f75ac133280, ha_alter_info=0x7f75fc0cf3b0) at /data/src/10.2/storage/innobase/handler/handler0alter.cc:6483
#11 0x000055afdd5109e1 in handler::ha_inplace_alter_table (this=0x7f75ac12fe28, altered_table=0x7f75ac133280, ha_alter_info=0x7f75fc0cf3b0) at /data/src/10.2/sql/handler.h:3768
#12 0x000055afdd507c56 in mysql_inplace_alter_table (thd=0x7f75ac000b00, table_list=0x7f75ac012508, table=0x7f75ac12f220, altered_table=0x7f75ac133280, ha_alter_info=0x7f75fc0cf3b0, inplace_supported=HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE, target_mdl_request=0x7f75fc0cf420, alter_ctx=0x7f75fc0cffe0) at /data/src/10.2/sql/sql_table.cc:7331
#13 0x000055afdd50cf98 in mysql_alter_table (thd=0x7f75ac000b00, new_db=0x0, new_name=0x0, create_info=0x7f75fc0d0be0, table_list=0x7f75ac012508, alter_info=0x7f75fc0d0b30, order_num=0, order=0x0, ignore=false) at /data/src/10.2/sql/sql_table.cc:9323
#14 0x000055afdd50f6f6 in mysql_recreate_table (thd=0x7f75ac000b00, table_list=0x7f75ac012508, table_copy=false) at /data/src/10.2/sql/sql_table.cc:10099
#15 0x000055afdd590c8d in admin_recreate_table (thd=0x7f75ac000b00, table_list=0x7f75ac012508) at /data/src/10.2/sql/sql_admin.cc:58
#16 0x000055afdd593f15 in mysql_admin_table(THD *, TABLE_LIST *, HA_CHECK_OPT *, const char *, thr_lock_type, bool, bool, uint, int (*)(THD *, TABLE_LIST *, HA_CHECK_OPT *), struct {...}, int (*)(THD *, TABLE_LIST *, HA_CHECK_OPT *)) (thd=0x7f75ac000b00, tables=0x7f75ac012508, check_opt=0x7f75ac005588, operator_name=0x55afdded4cc8 "optimize", lock_type=TL_WRITE, open_for_modify=true, repair_table_use_frm=false, extra_open_options=0, prepare_func=0x0, operator_func=(int (handler::*)(handler * const, THD *, HA_CHECK_OPT *)) 0x55afdd6b2150 <handler::ha_optimize(THD*, st_ha_check_opt*)>, view_operator_func=0x0) at /data/src/10.2/sql/sql_admin.cc:1022
#17 0x000055afdd595260 in Sql_cmd_optimize_table::execute (this=0x7f75ac012b20, thd=0x7f75ac000b00) at /data/src/10.2/sql/sql_admin.cc:1364
#18 0x000055afdd44299a in mysql_execute_command (thd=0x7f75ac000b00) at /data/src/10.2/sql/sql_parse.cc:6203
#19 0x000055afdd44724a in mysql_parse (thd=0x7f75ac000b00, rawbuf=0x7f75ac012438 "OPTIMIZE TABLE t1", length=17, parser_state=0x7f75fc0d2200, is_com_multi=false, is_next_command=false) at /data/src/10.2/sql/sql_parse.cc:7875
#20 0x000055afdd434efb in dispatch_command (command=COM_QUERY, thd=0x7f75ac000b00, packet=0x7f75ac13ea71 "", packet_length=17, is_com_multi=false, is_next_command=false) at /data/src/10.2/sql/sql_parse.cc:1812
#21 0x000055afdd433875 in do_command (thd=0x7f75ac000b00) at /data/src/10.2/sql/sql_parse.cc:1360
#22 0x000055afdd580ae1 in do_handle_one_connection (connect=0x55afe0590340) at /data/src/10.2/sql/sql_connect.cc:1354
#23 0x000055afdd58086e in handle_one_connection (arg=0x55afe0590340) at /data/src/10.2/sql/sql_connect.cc:1260
#24 0x000055afdd99d616 in pfs_spawn_thread (arg=0x55afe059a8b0) at /data/src/10.2/storage/perfschema/pfs.cc:1862
#25 0x00007f760353a494 in start_thread (arg=0x7f75fc0d3700) at pthread_create.c:333
#26 0x00007f76016b293f in clone () from /lib/x86_64-linux-gnu/libc.so.6

10.2 non-debug

Table	Op	Msg_type	Msg_text
test.t1	optimize	note	Table does not support optimize, doing recreate + analyze instead
test.t1	optimize	error	Index for table 't1' is corrupt; try to repair it
test.t1	optimize	status	Operation failed
Warnings:
Error	1034	Index for table 't1' is corrupt; try to repair it

The problem appeared in 10.2 tree with this commit:

commit f5a833c3e085524585d1ae5b8303c168f075dab3
Author: Marko Mäkelä <marko.makela@mariadb.com>
Date:   Wed Sep 13 15:41:04 2017 +0300
 
    Clean up the logging of virtual column values in table-rebuilding online ALTER



 Comments   
Comment by Elena Stepanova [ 2017-09-24 ]

For the record, this variation of the test case produces a very similar yet a bit different assertion failure (different place in the code):

--source include/have_innodb.inc
--source include/have_debug_sync.inc
 
CREATE TABLE t1 (i INT) ENGINE=InnoDB ROW_FORMAT=REDUNDANT;
 SET debug_sync= 'alter_table_inplace_after_lock_downgrade SIGNAL there_now WAIT_FOR go';
--send
  OPTIMIZE TABLE t1;
 
--connect (con1,localhost,root,,test)
 
SET debug_sync= 'now WAIT_FOR there_now';
INSERT INTO t1 VALUES (1),(2);
SET debug_sync= 'now SIGNAL go';
 
--connection default
--reap
 
# Cleanup
--disconnect con1
--connection default
SET debug_sync= 'RESET';
DROP TABLE t1;

The only difference in the test case is that INSERT inserts two values instead of one.

10.2 ca3c8d9b58

mysqld: /data/src/10.2/storage/innobase/row/row0log.cc:2434: const mrec_t* row_log_table_apply_op(que_thr_t*, ulint, ulint, row_merge_dup_t*, dberr_t*, mem_heap_t*, mem_heap_t*, const mrec_t*, const mrec_t*, ulint*): Assertion `0' failed.
170925  2:34:35 [ERROR] mysqld got signal 6 ;
 
#7  0x00007f836e89cee2 in __assert_fail () from /lib/x86_64-linux-gnu/libc.so.6
#8  0x000055daefac365d in row_log_table_apply_op (thr=0x7f831003ca90, trx_id_col=1, new_trx_id_col=1, dup=0x7f83604f4080, error=0x7f83604f3e24, offsets_heap=0x7f8310089be0, heap=0x7f8310060750, mrec=0x7f836036501b "\002A\001", mrec_end=0x7f8360365038 "", offsets=0x7f831013c940) at /data/src/10.2/storage/innobase/row/row0log.cc:2434
#9  0x000055daefac56df in row_log_table_apply_ops (thr=0x7f831003ca90, dup=0x7f83604f4080, stage=0x7f8310010ef0) at /data/src/10.2/storage/innobase/row/row0log.cc:3087
#10 0x000055daefac5bea in row_log_table_apply (thr=0x7f831003ca90, old_table=0x7f8310088bb8, table=0x7f8310151550, stage=0x7f8310010ef0) at /data/src/10.2/storage/innobase/row/row0log.cc:3186
#11 0x000055daef9aa53a in ha_innobase::inplace_alter_table (this=0x7f831014d8f8, altered_table=0x7f8310151550, ha_alter_info=0x7f83604f43b0) at /data/src/10.2/storage/innobase/handler/handler0alter.cc:6483
#12 0x000055daef4c39e1 in handler::ha_inplace_alter_table (this=0x7f831014d8f8, altered_table=0x7f8310151550, ha_alter_info=0x7f83604f43b0) at /data/src/10.2/sql/handler.h:3768
#13 0x000055daef4bac56 in mysql_inplace_alter_table (thd=0x7f8310000b00, table_list=0x7f8310012508, table=0x7f831014ccf0, altered_table=0x7f8310151550, ha_alter_info=0x7f83604f43b0, inplace_supported=HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE, target_mdl_request=0x7f83604f4420, alter_ctx=0x7f83604f4fe0) at /data/src/10.2/sql/sql_table.cc:7331
#14 0x000055daef4bff98 in mysql_alter_table (thd=0x7f8310000b00, new_db=0x0, new_name=0x0, create_info=0x7f83604f5be0, table_list=0x7f8310012508, alter_info=0x7f83604f5b30, order_num=0, order=0x0, ignore=false) at /data/src/10.2/sql/sql_table.cc:9323
#15 0x000055daef4c26f6 in mysql_recreate_table (thd=0x7f8310000b00, table_list=0x7f8310012508, table_copy=false) at /data/src/10.2/sql/sql_table.cc:10099
#16 0x000055daef543c8d in admin_recreate_table (thd=0x7f8310000b00, table_list=0x7f8310012508) at /data/src/10.2/sql/sql_admin.cc:58
#17 0x000055daef546f15 in mysql_admin_table(THD *, TABLE_LIST *, HA_CHECK_OPT *, const char *, thr_lock_type, bool, bool, uint, int (*)(THD *, TABLE_LIST *, HA_CHECK_OPT *), struct {...}, int (*)(THD *, TABLE_LIST *, HA_CHECK_OPT *)) (thd=0x7f8310000b00, tables=0x7f8310012508, check_opt=0x7f8310005588, operator_name=0x55daefe87cc8 "optimize", lock_type=TL_WRITE, open_for_modify=true, repair_table_use_frm=false, extra_open_options=0, prepare_func=0x0, operator_func=(int (handler::*)(handler * const, THD *, HA_CHECK_OPT *)) 0x55daef665150 <handler::ha_optimize(THD*, st_ha_check_opt*)>, view_operator_func=0x0) at /data/src/10.2/sql/sql_admin.cc:1022
#18 0x000055daef548260 in Sql_cmd_optimize_table::execute (this=0x7f8310012b20, thd=0x7f8310000b00) at /data/src/10.2/sql/sql_admin.cc:1364
#19 0x000055daef3f599a in mysql_execute_command (thd=0x7f8310000b00) at /data/src/10.2/sql/sql_parse.cc:6203
#20 0x000055daef3fa24a in mysql_parse (thd=0x7f8310000b00, rawbuf=0x7f8310012438 "OPTIMIZE TABLE t1", length=17, parser_state=0x7f83604f7200, is_com_multi=false, is_next_command=false) at /data/src/10.2/sql/sql_parse.cc:7875
#21 0x000055daef3e7efb in dispatch_command (command=COM_QUERY, thd=0x7f8310000b00, packet=0x7f8310084b11 "", packet_length=17, is_com_multi=false, is_next_command=false) at /data/src/10.2/sql/sql_parse.cc:1812
#22 0x000055daef3e6875 in do_command (thd=0x7f8310000b00) at /data/src/10.2/sql/sql_parse.cc:1360
#23 0x000055daef533ae1 in do_handle_one_connection (connect=0x55daf2c1b340) at /data/src/10.2/sql/sql_connect.cc:1354
#24 0x000055daef53386e in handle_one_connection (arg=0x55daf2c1b340) at /data/src/10.2/sql/sql_connect.cc:1260
#25 0x000055daef950616 in pfs_spawn_thread (arg=0x55daf2c258b0) at /data/src/10.2/storage/perfschema/pfs.cc:1862
#26 0x00007f83707e1494 in start_thread (arg=0x7f83604f8700) at pthread_create.c:333
#27 0x00007f836e95993f in clone () from /lib/x86_64-linux-gnu/libc.so.6

Comment by Marko Mäkelä [ 2017-09-25 ]

The problem is that two extra bytes are being written into the online log, even though there are no virtual columns in the table:

(gdb) p index.online_log.tail
$4 = {block = 0x7fffdfc60000 "A\001", block_pfx = {m_size = 1048576}, 
  buf = '\276' <repeats 3904 times>, '\000' <repeats 61631 times>, 
  blocks = 0, bytes = 28, total = 28}
(gdb) p/x *index.online_log.tail.block@28
$5 = {0x41, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0, 0x0, 0x0, 0x0, 0x0, 0x5, 
  0x7, 0x85, 0x0, 0x0, 0x1, 0x39, 0x1, 0x10, 0x80, 0x0, 0x0, 0x1, 0x0, 0x2}

The 0x0,0x2 look like empty data for virtual columns. The parameter renaming in MySQL 5.7 misled me. Here is the fix:

diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc
index 1385238f560..747959fcde5 100644
--- a/storage/innobase/row/row0log.cc
+++ b/storage/innobase/row/row0log.cc
@@ -853,7 +853,7 @@ row_log_table_low_redundant(
 
 	size = rec_get_converted_size_temp(
 		index, tuple->fields, tuple->n_fields, &extra_size);
-	ulint v_size = ventry
+	ulint v_size = num_v
 		? rec_get_converted_size_temp_v(index, ventry) : 0;
 
 	mrec_size = ROW_LOG_HEADER_SIZE + size + v_size + (extra_size >= 0x80);
@@ -909,12 +909,10 @@ row_log_table_low_redundant(
 		rec_convert_dtuple_to_temp(
 			b + extra_size, index, tuple->fields, tuple->n_fields);
 		b += size;
-		if (ventry) {
+		ut_ad(!num_v == !v_size);
+		if (num_v) {
                         rec_convert_dtuple_to_temp_v(b, new_index, ventry);
 			b += v_size;
-		}
-
-		if (num_v) {
 			if (o_ventry) {
 				rec_convert_dtuple_to_temp_v(
 					b, new_index, o_ventry);

Generated at Thu Feb 08 08:09:13 UTC 2024 using Jira 8.20.16#820016-sha1:9d11dbea5f4be3d4cc21f03a88dd11d8c8687422.