#5 0x00007fb3469bb79b in abort () from /lib/x86_64-linux-gnu/libc.so.6
#6 0x0000000000ee7f4b in ut_dbg_assertion_failed (expr=0x133c6e0 "!other_lock || wsrep_thd_is_BF(lock->trx->mysql_thd, FALSE) || wsrep_thd_is_BF(other_lock->trx->mysql_thd, FALSE)", file=0x133a3f0 "/home/travis/src/storage/innobase/lock/lock0lock.cc", line=6243) at /home/travis/src/storage/innobase/ut/ut0dbg.cc:61
#7 0x0000000000d6132a in lock_rec_queue_validate (locked_lock_trx_sys=0, block=0x7fb2aba06660, rec=0x7fb2ac80841f "", index=0x7fb268071e98, offsets=0x7fb344d26d80) at /home/travis/src/storage/innobase/lock/lock0lock.cc:6241
#8 0x0000000000d6381e in lock_clust_rec_read_check_and_lock (flags=0, block=0x7fb2aba06660, rec=0x7fb2ac80841f "", index=0x7fb268071e98, offsets=0x7fb344d26d80, mode=LOCK_X, gap_mode=0, thr=0x7fb290599dc0) at /home/travis/src/storage/innobase/lock/lock0lock.cc:7061
#9 0x0000000000e4804d in sel_set_rec_lock (pcur=0x7fb290598910, rec=0x7fb2ac80841f "", index=0x7fb268071e98, offsets=0x7fb344d26d80, mode=3, type=0, thr=0x7fb290599dc0, mtr=0x7fb344d27920) at /home/travis/src/storage/innobase/row/row0sel.cc:1250
#10 0x0000000000e4fb82 in row_search_mvcc (buf=0x7fb2901b5878 "\377\377\377\376\377\377\377\200", mode=PAGE_CUR_G, prebuilt=0x7fb290598748, match_mode=0, direction=1) at /home/travis/src/storage/innobase/row/row0sel.cc:4969
#11 0x0000000000ce5aa7 in ha_innobase::general_fetch (this=0x7fb290fa6208, buf=0x7fb2901b5878 "\377\377\377\376\377\377\377\200", direction=1, match_mode=0) at /home/travis/src/storage/innobase/handler/ha_innodb.cc:10024
#12 0x0000000000ce609b in ha_innobase::rnd_next (this=0x7fb290fa6208, buf=0x7fb2901b5878 "\377\377\377\376\377\377\377\200") at /home/travis/src/storage/innobase/handler/ha_innodb.cc:10237
#13 0x00000000009c3fe9 in handler::ha_rnd_next (this=0x7fb290fa6208, buf=0x7fb2901b5878 "\377\377\377\376\377\377\377\200") at /home/travis/src/sql/handler.cc:2576
#14 0x0000000000b2dd83 in rr_sequential (info=0x7fb344d28080) at /home/travis/src/sql/records.cc:485
#15 0x0000000000832440 in mysql_update (thd=0x7fb28c000f00, table_list=0x7fb28c0116f0, fields=..., values=..., conds=0x7fb28c012160, order_num=0, order=0x0, limit=18446744073709551598, handle_duplicates=DUP_ERROR, ignore=false, found_return=0x7fb344d287b0, updated_return=0x7fb344d287b8) at /home/travis/src/sql/sql_update.cc:735
#16 0x000000000074aab6 in mysql_execute_command (thd=0x7fb28c000f00) at /home/travis/src/sql/sql_parse.cc:4273
#17 0x00000000007562c6 in mysql_parse (thd=0x7fb28c000f00, rawbuf=0x7fb28c0114c8 "UPDATE `view_table1_innodb` SET `col_decimal_key_default_null` = 'lightly' WHERE `col_float_unsigned_not_null_key` < 5 /* QNO 14993 CON_ID 20 */", length=145, parser_state=0x7fb344d29080, is_com_multi=false, is_next_command=false) at /home/travis/src/sql/sql_parse.cc:7902
#18 0x0000000000743efe in dispatch_command (command=COM_QUERY, thd=0x7fb28c000f00, packet=0x7fb28c008c51 " UPDATE `view_table1_innodb` SET `col_decimal_key_default_null` = 'lightly' WHERE `col_float_unsigned_not_null_key` < 5 /* QNO 14993 CON_ID 20 */ ", packet_length=147, is_com_multi=false, is_next_command=false) at /home/travis/src/sql/sql_parse.cc:1806
#19 0x0000000000742846 in do_command (thd=0x7fb28c000f00) at /home/travis/src/sql/sql_parse.cc:1360
#20 0x000000000088f93e in do_handle_one_connection (connect=0x3712c90) at /home/travis/src/sql/sql_connect.cc:1335
#21 0x000000000088f6c0 in handle_one_connection (arg=0x3712c90) at /home/travis/src/sql/sql_connect.cc:1241
#22 0x00007fb34755ce9a in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
#23 0x00007fb346a782ed in clone () from /lib/x86_64-linux-gnu/libc.so.6
I think that this is related to the failure that I repeated on 10.4 and 10.2 and mysql-5.7.27 today, using the test main.other_lock_MDEV-20066 that is attached to MDEV-20066. On mysql-5.7.27 the stack trace looks like this:
mysql-5.7.27
Trying to get some variables.
Some pointers may be invalid and cause the dump to abort.
Query (7f9c88005af0): DELETE FROM t3 WHERE col1 = 7
…
#6 0x0000560866645111 in ut_dbg_assertion_failed (expr=0x560866c77bfb "!other_lock", file=0x560866c77318 "/mariadb/mysql-server/storage/innobase/lock/lock0lock.cc", line=1787) at /mariadb/mysql-server/storage/innobase/ut/ut0dbg.cc:67
#7 0x00005608664995a9 in lock_rec_add_to_queue (type_mode=1059, block=0x7f9cdceaba80, heap_no=2, index=0x7f9c90f916e0, trx=0x7f9cde8c9d20, caller_owns_trx_mutex=false) at /mariadb/mysql-server/storage/innobase/lock/lock0lock.cc:1787
#8 0x00005608664a37d7 in lock_rec_convert_impl_to_expl_for_trx (block=0x7f9cdceaba80, rec=0x7f9cdd3e807d "\200", index=0x7f9c90f916e0, offsets=0x7f9cdcbaab70, trx=0x7f9cde8c9d20, heap_no=2) at /mariadb/mysql-server/storage/innobase/lock/lock0lock.cc:6059
#9 0x00005608664a3ab6 in lock_rec_convert_impl_to_expl (block=0x7f9cdceaba80, rec=0x7f9cdd3e807d "\200", index=0x7f9c90f916e0, offsets=0x7f9cdcbaab70) at /mariadb/mysql-server/storage/innobase/lock/lock0lock.cc:6113
#10 0x00005608664a4929 in lock_clust_rec_read_check_and_lock (flags=0, block=0x7f9cdceaba80, rec=0x7f9cdd3e807d "\200", index=0x7f9c90f916e0, offsets=0x7f9cdcbaab70, mode=LOCK_X, gap_mode=1024, thr=0x7f9c880102f0) at /mariadb/mysql-server/storage/innobase/lock/lock0lock.cc:6404
#11 0x000056086659f42a in sel_set_rec_lock (pcur=0x7f9c8800fda0, rec=0x7f9cdd3e807d "\200", index=0x7f9c90f916e0, offsets=0x7f9cdcbaab70, mode=3, type=1024, thr=0x7f9c880102f0, mtr=0x7f9cdcbaaea0) at /mariadb/mysql-server/storage/innobase/row/row0sel.cc:1252
#12 0x00005608665a842c in row_search_mvcc (buf=0x7f9c8800f580 "\377", mode=PAGE_CUR_GE, prebuilt=0x7f9c8800fb80, match_mode=1, direction=0) at /mariadb/mysql-server/storage/innobase/row/row0sel.cc:5521
#13 0x00005608664180a4 in ha_innobase::index_read (this=0x7f9c8800f290, buf=0x7f9c8800f580 "\377", key_ptr=0x7f9c8801c6c0 "\a", key_len=4, find_flag=HA_READ_KEY_EXACT) at /mariadb/mysql-server/storage/innobase/handler/ha_innodb.cc:8744
#14 0x0000560865aa4e08 in handler::index_read_map (this=0x7f9c8800f290, buf=0x7f9c8800f580 "\377", key=0x7f9c8801c6c0 "\a", keypart_map=1, find_flag=HA_READ_KEY_EXACT) at /mariadb/mysql-server/sql/handler.h:2809
#15 0x0000560865a96ad8 in handler::ha_index_read_map (this=0x7f9c8800f290, buf=0x7f9c8800f580 "\377", key=0x7f9c8801c6c0 "\a", keypart_map=1, find_flag=HA_READ_KEY_EXACT) at /mariadb/mysql-server/sql/handler.cc:3039
#16 0x0000560865aa0777 in handler::read_range_first (this=0x7f9c8800f290, start_key=0x7f9c8800f378, end_key=0x7f9c8800f398, eq_range_arg=true, sorted=true) at /mariadb/mysql-server/sql/handler.cc:7404
#17 0x0000560865a9e7a1 in handler::multi_range_read_next (this=0x7f9c8800f290, range_info=0x7f9cdcbabf80) at /mariadb/mysql-server/sql/handler.cc:6469
#18 0x0000560865a9f5fa in DsMrr_impl::dsmrr_next (this=0x7f9c8800f4f0, range_info=0x7f9cdcbabf80) at /mariadb/mysql-server/sql/handler.cc:6861
#19 0x000056086642a174 in ha_innobase::multi_range_read_next (this=0x7f9c8800f290, range_info=0x7f9cdcbabf80) at /mariadb/mysql-server/storage/innobase/handler/ha_innodb.cc:20571
#20 0x0000560866271bd7 in QUICK_RANGE_SELECT::get_next (this=0x7f9c88011c60) at /mariadb/mysql-server/sql/opt_range.cc:11233
#21 0x0000560865fb95a2 in rr_quick (info=0x7f9cdcbac330) at /mariadb/mysql-server/sql/records.cc:398
#22 0x00005608662a53d3 in Sql_cmd_delete::mysql_delete (this=0x7f9c88007028, thd=0x7f9c88000dd0, limit=18446744073709551615) at /mariadb/mysql-server/sql/sql_delete.cc:463
#23 0x00005608662a823a in Sql_cmd_delete::execute (this=0x7f9c88007028, thd=0x7f9c88000dd0) at /mariadb/mysql-server/sql/sql_delete.cc:1393
#24 0x0000560866094b84 in mysql_execute_command (thd=0x7f9c88000dd0, first_level=true) at /mariadb/mysql-server/sql/sql_parse.cc:3606
#25 0x000056086609a3e5 in mysql_parse (thd=0x7f9c88000dd0, parser_state=0x7f9cdcbad660) at /mariadb/mysql-server/sql/sql_parse.cc:5570
#26 0x000056086608fc60 in dispatch_command (thd=0x7f9c88000dd0, com_data=0x7f9cdcbaddf0, command=COM_QUERY) at /mariadb/mysql-server/sql/sql_parse.cc:1484
#27 0x000056086608eb77 in do_command (thd=0x7f9c88000dd0) at /mariadb/mysql-server/sql/sql_parse.cc:1025
#28 0x00005608661bd74d in handle_connection (arg=0x56086a0c9df0) at /mariadb/mysql-server/sql/conn_handler/connection_handler_per_thread.cc:306
#29 0x000056086684a2c5 in pfs_spawn_thread (arg=0x56086a0c9e40) at /mariadb/mysql-server/storage/perfschema/pfs.cc:2190
#30 0x00007f9ce3956fa3 in start_thread (arg=<optimized out>) at pthread_create.c:486
#31 0x00007f9ce35104cf in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
current statement: INSERT INTO t3 (col1, col_text ) VALUES ( 7, 77 );
I was unable to repeat the issue on MariaDB Server 10.1.
I think that we must find and fix this bug before analyzing MDEV-20066 further, because that bug could be a consequence of this one.
Marko Mäkelä
added a comment - I think that this is related to the failure that I repeated on 10.4 and 10.2 and mysql-5.7.27 today, using the test main.other_lock_ MDEV-20066 that is attached to MDEV-20066 . On mysql-5.7.27 the stack trace looks like this:
mysql-5.7.27
Trying to get some variables.
Some pointers may be invalid and cause the dump to abort.
Query (7f9c88005af0): DELETE FROM t3 WHERE col1 = 7
…
#6 0x0000560866645111 in ut_dbg_assertion_failed (expr=0x560866c77bfb "!other_lock", file=0x560866c77318 "/mariadb/mysql-server/storage/innobase/lock/lock0lock.cc", line=1787) at /mariadb/mysql-server/storage/innobase/ut/ut0dbg.cc:67
#7 0x00005608664995a9 in lock_rec_add_to_queue (type_mode=1059, block=0x7f9cdceaba80, heap_no=2, index=0x7f9c90f916e0, trx=0x7f9cde8c9d20, caller_owns_trx_mutex=false) at /mariadb/mysql-server/storage/innobase/lock/lock0lock.cc:1787
#8 0x00005608664a37d7 in lock_rec_convert_impl_to_expl_for_trx (block=0x7f9cdceaba80, rec=0x7f9cdd3e807d "\200", index=0x7f9c90f916e0, offsets=0x7f9cdcbaab70, trx=0x7f9cde8c9d20, heap_no=2) at /mariadb/mysql-server/storage/innobase/lock/lock0lock.cc:6059
#9 0x00005608664a3ab6 in lock_rec_convert_impl_to_expl (block=0x7f9cdceaba80, rec=0x7f9cdd3e807d "\200", index=0x7f9c90f916e0, offsets=0x7f9cdcbaab70) at /mariadb/mysql-server/storage/innobase/lock/lock0lock.cc:6113
#10 0x00005608664a4929 in lock_clust_rec_read_check_and_lock (flags=0, block=0x7f9cdceaba80, rec=0x7f9cdd3e807d "\200", index=0x7f9c90f916e0, offsets=0x7f9cdcbaab70, mode=LOCK_X, gap_mode=1024, thr=0x7f9c880102f0) at /mariadb/mysql-server/storage/innobase/lock/lock0lock.cc:6404
#11 0x000056086659f42a in sel_set_rec_lock (pcur=0x7f9c8800fda0, rec=0x7f9cdd3e807d "\200", index=0x7f9c90f916e0, offsets=0x7f9cdcbaab70, mode=3, type=1024, thr=0x7f9c880102f0, mtr=0x7f9cdcbaaea0) at /mariadb/mysql-server/storage/innobase/row/row0sel.cc:1252
#12 0x00005608665a842c in row_search_mvcc (buf=0x7f9c8800f580 "\377", mode=PAGE_CUR_GE, prebuilt=0x7f9c8800fb80, match_mode=1, direction=0) at /mariadb/mysql-server/storage/innobase/row/row0sel.cc:5521
#13 0x00005608664180a4 in ha_innobase::index_read (this=0x7f9c8800f290, buf=0x7f9c8800f580 "\377", key_ptr=0x7f9c8801c6c0 "\a", key_len=4, find_flag=HA_READ_KEY_EXACT) at /mariadb/mysql-server/storage/innobase/handler/ha_innodb.cc:8744
#14 0x0000560865aa4e08 in handler::index_read_map (this=0x7f9c8800f290, buf=0x7f9c8800f580 "\377", key=0x7f9c8801c6c0 "\a", keypart_map=1, find_flag=HA_READ_KEY_EXACT) at /mariadb/mysql-server/sql/handler.h:2809
#15 0x0000560865a96ad8 in handler::ha_index_read_map (this=0x7f9c8800f290, buf=0x7f9c8800f580 "\377", key=0x7f9c8801c6c0 "\a", keypart_map=1, find_flag=HA_READ_KEY_EXACT) at /mariadb/mysql-server/sql/handler.cc:3039
#16 0x0000560865aa0777 in handler::read_range_first (this=0x7f9c8800f290, start_key=0x7f9c8800f378, end_key=0x7f9c8800f398, eq_range_arg=true, sorted=true) at /mariadb/mysql-server/sql/handler.cc:7404
#17 0x0000560865a9e7a1 in handler::multi_range_read_next (this=0x7f9c8800f290, range_info=0x7f9cdcbabf80) at /mariadb/mysql-server/sql/handler.cc:6469
#18 0x0000560865a9f5fa in DsMrr_impl::dsmrr_next (this=0x7f9c8800f4f0, range_info=0x7f9cdcbabf80) at /mariadb/mysql-server/sql/handler.cc:6861
#19 0x000056086642a174 in ha_innobase::multi_range_read_next (this=0x7f9c8800f290, range_info=0x7f9cdcbabf80) at /mariadb/mysql-server/storage/innobase/handler/ha_innodb.cc:20571
#20 0x0000560866271bd7 in QUICK_RANGE_SELECT::get_next (this=0x7f9c88011c60) at /mariadb/mysql-server/sql/opt_range.cc:11233
#21 0x0000560865fb95a2 in rr_quick (info=0x7f9cdcbac330) at /mariadb/mysql-server/sql/records.cc:398
#22 0x00005608662a53d3 in Sql_cmd_delete::mysql_delete (this=0x7f9c88007028, thd=0x7f9c88000dd0, limit=18446744073709551615) at /mariadb/mysql-server/sql/sql_delete.cc:463
#23 0x00005608662a823a in Sql_cmd_delete::execute (this=0x7f9c88007028, thd=0x7f9c88000dd0) at /mariadb/mysql-server/sql/sql_delete.cc:1393
#24 0x0000560866094b84 in mysql_execute_command (thd=0x7f9c88000dd0, first_level=true) at /mariadb/mysql-server/sql/sql_parse.cc:3606
#25 0x000056086609a3e5 in mysql_parse (thd=0x7f9c88000dd0, parser_state=0x7f9cdcbad660) at /mariadb/mysql-server/sql/sql_parse.cc:5570
#26 0x000056086608fc60 in dispatch_command (thd=0x7f9c88000dd0, com_data=0x7f9cdcbaddf0, command=COM_QUERY) at /mariadb/mysql-server/sql/sql_parse.cc:1484
#27 0x000056086608eb77 in do_command (thd=0x7f9c88000dd0) at /mariadb/mysql-server/sql/sql_parse.cc:1025
#28 0x00005608661bd74d in handle_connection (arg=0x56086a0c9df0) at /mariadb/mysql-server/sql/conn_handler/connection_handler_per_thread.cc:306
#29 0x000056086684a2c5 in pfs_spawn_thread (arg=0x56086a0c9e40) at /mariadb/mysql-server/storage/perfschema/pfs.cc:2190
#30 0x00007f9ce3956fa3 in start_thread (arg=<optimized out>) at pthread_create.c:486
#31 0x00007f9ce35104cf in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
current statement: INSERT INTO t3 (col1, col_text ) VALUES ( 7, 77 );
I was unable to repeat the issue on MariaDB Server 10.1.
I think that we must find and fix this bug before analyzing MDEV-20066 further, because that bug could be a consequence of this one.
#10 0x00005633677f83c4 in lock_clust_rec_read_check_and_lock (flags=<optimized out>, block=0x7f510cf9b268, rec=0x7f510d5f007d "\200", index=0x7f50b818acc8, offsets=0x7f510cc1d050, mode=LOCK_X, gap_mode=1024, thr=0x7f50bc022ae8) at /mariadb/10.2o/storage/innobase/lock/lock0lock.cc:6130
#11 0x00005633678d6f2e in sel_set_rec_lock (pcur=0x7f50bc022268, rec=0x7f510d5f007d "\200", index=0x7f50b818acc8, offsets=<optimized out>, mode=<optimized out>, type=1024, thr=<optimized out>, mtr=<optimized out>) at /mariadb/10.2o/storage/innobase/row/row0sel.cc:1261
#12 0x00005633678d3ad1 in row_search_mvcc (buf=<optimized out>, mode=<optimized out>, prebuilt=0x7f50bc022098, match_mode=<optimized out>, direction=<optimized out>) at /mariadb/10.2o/storage/innobase/row/row0sel.cc:5133
#13 0x0000563367783291 in ha_innobase::index_read (this=0x7f50bc06fa50, buf=0x7f50bc06e748 "\377", key_ptr=<optimized out>, key_len=<optimized out>, find_flag=<optimized out>) at /mariadb/10.2o/storage/innobase/handler/ha_innodb.cc:9470
#14 0x00005633675c2de4 in handler::ha_index_read_map (this=0x7f50bc06fa50, buf=0x7f50bc06e748 "\377", key=0x7f50b0174070 "\a", keypart_map=1, find_flag=HA_READ_KEY_EXACT) at /mariadb/10.2o/sql/handler.cc:2711
#15 0x00005633675c96b6 in handler::read_range_first (this=0x7f50bc06fa50, start_key=0x7f50bc06fb38, end_key=0x7f50bc06fb58, eq_range_arg=true, sorted=<optimized out>) at /mariadb/10.2o/sql/handler.cc:5455
#16 0x00005633675267c2 in handler::multi_range_read_next (this=0x7f50bc06fa50, range_info=0x7f510cc1ddf8) at /mariadb/10.2o/sql/multi_range_read.cc:298
#17 0x000056336752691c in Mrr_simple_index_reader::get_next (this=0x7f50bc06ffc0, range_info=0x7f510cc1ddf8) at /mariadb/10.2o/sql/multi_range_read.cc:335
#18 0x000056336752860f in DsMrr_impl::dsmrr_next (this=0x7f50bc06fe80, range_info=0x7f510cc1ddf8) at /mariadb/10.2o/sql/multi_range_read.cc:1411
#19 0x00005633676ee3f1 in QUICK_RANGE_SELECT::get_next (this=0x7f50b006e510) at /mariadb/10.2o/sql/opt_range.cc:11459
#20 0x00005633676fb05d in rr_quick (info=0x7f510cc1def0) at /mariadb/10.2o/sql/records.cc:365
#21 0x000056336771bb2c in mysql_delete (thd=0x7f50b0000cf8, table_list=<optimized out>, conds=<optimized out>, order_list=<optimized out>, limit=18446744073709551361, options=<optimized out>, result=<optimized out>) at /mariadb/10.2o/sql/sql_delete.cc:559
#22 0x0000563367408802 in mysql_execute_command (thd=<optimized out>) at /mariadb/10.2o/sql/sql_parse.cc:4371
#23 0x0000563367400365 in mysql_parse (thd=0x7f50b0000cf8, rawbuf=0x7f50b0010640 "DELETE FROM t3 WHERE col1 = 7", length=<optimized out>, parser_state=0x7f510cc1f720, is_com_multi=<optimized out>, is_next_command=<optimized out>) at /mariadb/10.2o/sql/sql_parse.cc:7760
#24 0x00005633673fd508 in dispatch_command (command=COM_QUERY, thd=0x7f50b0000cf8, packet=0x7f50b0008329 "DELETE FROM t3 WHERE col1 = 7", packet_length=29, is_com_multi=<optimized out>, is_next_command=<optimized out>) at /mariadb/10.2o/sql/sql_parse.cc:1831
While we are converting the implicit lock (which conflicts with our pending lock request) to an explicit one, the owning transaction is no longer listed in trx_sys->rw_trx_list, because it is about to be committed in another thread after having inserted exactly 1 record:
#7 0x00005633677f9f8e in PolicyMutex<TTASEventMutex<GenericPolicy> >::enter (
is_next_command=<optimized out>) at /mariadb/10.2o/sql/sql_parse.cc:7760
The table contains the following records:
(col1,DB_TRX_ID,DB_ROLL_PTR,col_text)=(4,0x6cf5,update,77),(7,0x6d17,insert,77)
(col_text,col1)=(77,4),(77,7), PAGE_MAX_TRX_ID=0x6d17
We have trx_sys->max_trx_id=0x6d1f, and both trx_sys->rw_trx_set and trx_sys->rw_trx_list contain only the two blocked transactions 0x6d1d and 0x6d1c. The committing transaction 0x6d17 is not listed. Because the most significant bit of DB_ROLL_PTR is set in the record, purge must have run after the UPDATE t3 SET col1=4 (necessarily by an earlier-committed transaction 0x6cf5) that had delete-marked the clustered index record, before the being-committed INSERT was executed.
I cannot spot any wrong-doing by transaction 0x6d17.
The problem appears to be a race condition between the commit of that transaction, and an active transaction that is waiting for a lock that is being implicitly held by the committing transaction. In lock_rec_convert_impl_to_expl(), at the time of the call to trx_rw_is_active(), the trx_sys->rw_trx_set must have contained the transaction 0x6d17. Because neither trx_sys->mutex nor lock_sys->mutex are being held, the transaction may be delisted. Later, in lock_rec_convert_impl_to_expl_for_trx(), we acquire lock_sys->mutex (which is blocking the commit of the conflicting transaction), and in lock_rec_add_to_queue() the assertion fails, because lock_rec_other_has_expl_req(LOCK_X, block, wait=false, heap_no, trx) finds an explicit lock.
Could it simply be that the other blocked transaction had converted the implicit lock to an explicit one? The answer appears to be no, because the code is blocked in lock_clust_rec_read_check_and_lock(), which was invoked on the supremum pseudo-record:
Unfortunately, no plausible value of other_lock was available in the core dump that I analyzed. It was other_lock=NULL, the only acceptable value for the assertion expression. In lock_sys->rec_hash->array, at the time of the crash, there is only one record lock bitmap entry, for LOCK_REC|LOCK_REC_NOT_GAP|LOCK_X of the committing transaction, and apparently an empty bitmap (no locks set).
On a rerun with improved instrumentation, we have a plausible value of other_lock, but the crash looks slightly different. We have transactions 0x31d5 and 0x31d6 in trx_sys->rw_trx_list and trx_sys->rw_trx_set. The assertion fails in DELETE (transaction 0x31d6). The lock_sys->rec_hash->array contains exactly one record lock bitmap, associated with transaction 0x31ce, an INSERT that is being committed. The transaction 0x31d5, which is associated with the other_lock that tripped the assertion, got a bit further than last time, and is waiting for fix_block->lock X-latch here:
#21 0x00005585e639b999 in mysql_execute_command (thd=<optimized out>) at /mariadb/10.2o/sql/sql_parse.cc:4007
#22 0x00005585e6397365 in mysql_parse (thd=0x7fd268000cf8, rawbuf=0x7fd268010640 "UPDATE t3 SET col1 = 7", length=<optimized out>, parser_state=0x7fd2bff68720, is_com_multi=<optimized out>, is_next_command=<optimized out>) at /mariadb/10.2o/sql/sql_parse.cc:7760
The other_lock appears to be LOCK_X|LOCK_REC, granted to the UPDATE transaction (which by the way is still about to perform its first modification, blocked by the clustered index page latch that the DELETE is holding). The record lock bitmap is 0xe, meaning that heap numbers 1,2,3 (page supremum and 2 user records) are locked. In the table, we have
(col1,DB_TRX_ID,DB_ROLL_PTR,col_text)=(4,0x31cc,insert,77), (7,0x31ce,insert,77)
(col_text,col1)=(77,4), (77,7), PAGE_MAX_TRX_ID=0x31ce
In this case, purge had removed any delete-marked records from the clustered index, both before UPDATE t3 SET col1=4 (by committed transaction 0x31cc) and the INSERT (by committing transaction 0x31ce).
The scenario seems to be as follows:
The INSERT transaction performed its operation, implicitly locking col1=7.
The DELETE transaction, about to enqueue a waiting lock request on col1=7, starts to convert the implicit lock to explicit.
The UPDATE transaction somehow gets in between, and gets the request on col1=7 granted.
The assertion fails in the DELETE transaction.
For the being-committed INSERT, blocked by lock_sys->mutex, there are no bits set in the only entry that is registered in lock_sys->rec_hash. This entry ought to have been created by an earlier transaction, possibly the currently active UPDATE transaction.
Open questions:
Why is the other_lock no longer registered in lock_sys->rec_hash? It belongs to an active transaction that is under an impression that it successfully acquired the lock. UPDATE (until MDEV-16232 is implemented) always relies on explicit locks, never implicit ones.
Were the lock bits for the lock_sys->rec_hash entry for the INSERT transaction ever set? Was that entry originally created by the UPDATE transaction, or was it by some earlier transaction?
When was the lock request granted for the UPDATE transaction, and by which thread?
Marko Mäkelä
added a comment - Another failure, this time cmake -DWITH_WSREP=OFF :
10.2 9bf424bc7b8a6199ef0c22b508835f21019f861a
mysqld: /mariadb/10.2o/storage/innobase/lock/lock0lock.cc:1903: void lock_rec_add_to_queue(ulint, const buf_block_t *, ulint, dict_index_t *, trx_t *, bool): Assertion `!other_lock' failed.
…
#7 0x00005633677e9fc1 in lock_rec_add_to_queue (type_mode=1059, block=0x7f510cf9b268, heap_no=2, index=0x7f50b818acc8, trx=0x7f510d89d268, caller_owns_trx_mutex=<optimized out>) at /mariadb/10.2o/storage/innobase/lock/lock0lock.cc:1903
#8 0x00005633677f644b in lock_rec_convert_impl_to_expl_for_trx (block=<optimized out>, rec=<optimized out>, index=<optimized out>, trx=<optimized out>, heap_no=2) at /mariadb/10.2o/storage/innobase/lock/lock0lock.cc:5800
#9 lock_rec_convert_impl_to_expl (block=0x7f510cf9b268, rec=<optimized out>, index=<optimized out>, offsets=<optimized out>) at /mariadb/10.2o/storage/innobase/lock/lock0lock.cc:5854
#10 0x00005633677f83c4 in lock_clust_rec_read_check_and_lock (flags=<optimized out>, block=0x7f510cf9b268, rec=0x7f510d5f007d "\200", index=0x7f50b818acc8, offsets=0x7f510cc1d050, mode=LOCK_X, gap_mode=1024, thr=0x7f50bc022ae8) at /mariadb/10.2o/storage/innobase/lock/lock0lock.cc:6130
#11 0x00005633678d6f2e in sel_set_rec_lock (pcur=0x7f50bc022268, rec=0x7f510d5f007d "\200", index=0x7f50b818acc8, offsets=<optimized out>, mode=<optimized out>, type=1024, thr=<optimized out>, mtr=<optimized out>) at /mariadb/10.2o/storage/innobase/row/row0sel.cc:1261
#12 0x00005633678d3ad1 in row_search_mvcc (buf=<optimized out>, mode=<optimized out>, prebuilt=0x7f50bc022098, match_mode=<optimized out>, direction=<optimized out>) at /mariadb/10.2o/storage/innobase/row/row0sel.cc:5133
#13 0x0000563367783291 in ha_innobase::index_read (this=0x7f50bc06fa50, buf=0x7f50bc06e748 "\377", key_ptr=<optimized out>, key_len=<optimized out>, find_flag=<optimized out>) at /mariadb/10.2o/storage/innobase/handler/ha_innodb.cc:9470
#14 0x00005633675c2de4 in handler::ha_index_read_map (this=0x7f50bc06fa50, buf=0x7f50bc06e748 "\377", key=0x7f50b0174070 "\a", keypart_map=1, find_flag=HA_READ_KEY_EXACT) at /mariadb/10.2o/sql/handler.cc:2711
#15 0x00005633675c96b6 in handler::read_range_first (this=0x7f50bc06fa50, start_key=0x7f50bc06fb38, end_key=0x7f50bc06fb58, eq_range_arg=true, sorted=<optimized out>) at /mariadb/10.2o/sql/handler.cc:5455
#16 0x00005633675267c2 in handler::multi_range_read_next (this=0x7f50bc06fa50, range_info=0x7f510cc1ddf8) at /mariadb/10.2o/sql/multi_range_read.cc:298
#17 0x000056336752691c in Mrr_simple_index_reader::get_next (this=0x7f50bc06ffc0, range_info=0x7f510cc1ddf8) at /mariadb/10.2o/sql/multi_range_read.cc:335
#18 0x000056336752860f in DsMrr_impl::dsmrr_next (this=0x7f50bc06fe80, range_info=0x7f510cc1ddf8) at /mariadb/10.2o/sql/multi_range_read.cc:1411
#19 0x00005633676ee3f1 in QUICK_RANGE_SELECT::get_next (this=0x7f50b006e510) at /mariadb/10.2o/sql/opt_range.cc:11459
#20 0x00005633676fb05d in rr_quick (info=0x7f510cc1def0) at /mariadb/10.2o/sql/records.cc:365
#21 0x000056336771bb2c in mysql_delete (thd=0x7f50b0000cf8, table_list=<optimized out>, conds=<optimized out>, order_list=<optimized out>, limit=18446744073709551361, options=<optimized out>, result=<optimized out>) at /mariadb/10.2o/sql/sql_delete.cc:559
#22 0x0000563367408802 in mysql_execute_command (thd=<optimized out>) at /mariadb/10.2o/sql/sql_parse.cc:4371
#23 0x0000563367400365 in mysql_parse (thd=0x7f50b0000cf8, rawbuf=0x7f50b0010640 "DELETE FROM t3 WHERE col1 = 7", length=<optimized out>, parser_state=0x7f510cc1f720, is_com_multi=<optimized out>, is_next_command=<optimized out>) at /mariadb/10.2o/sql/sql_parse.cc:7760
#24 0x00005633673fd508 in dispatch_command (command=COM_QUERY, thd=0x7f50b0000cf8, packet=0x7f50b0008329 "DELETE FROM t3 WHERE col1 = 7", packet_length=29, is_com_multi=<optimized out>, is_next_command=<optimized out>) at /mariadb/10.2o/sql/sql_parse.cc:1831
While we are converting the implicit lock (which conflicts with our pending lock request) to an explicit one, the owning transaction is no longer listed in trx_sys->rw_trx_list , because it is about to be committed in another thread after having inserted exactly 1 record:
#7 0x00005633677f9f8e in PolicyMutex<TTASEventMutex<GenericPolicy> >::enter (
this=<optimized out>, n_spins=30, n_delay=<optimized out>,
name=<optimized out>, line=6587)
at /mariadb/10.2o/storage/innobase/include/ib0mutex.h:637
#8 lock_trx_release_locks (trx=0x7f510d89d268)
at /mariadb/10.2o/storage/innobase/lock/lock0lock.cc:6587
#9 0x00005633679558f0 in trx_commit_in_memory (trx=<optimized out>,
mtr=<optimized out>, serialised=<optimized out>)
at /mariadb/10.2o/storage/innobase/trx/trx0trx.cc:1665
#10 trx_commit_low (trx=<optimized out>, mtr=<optimized out>)
at /mariadb/10.2o/storage/innobase/trx/trx0trx.cc:1883
#11 0x0000563367956bb3 in trx_commit (trx=0x7f510d89d268)
at /mariadb/10.2o/storage/innobase/trx/trx0trx.cc:1907
#12 0x0000563367958204 in trx_commit_for_mysql (trx=0x7f510d89d268)
at /mariadb/10.2o/storage/innobase/trx/trx0trx.cc:2116
#13 0x0000563367794a30 in innobase_commit_low (trx=<optimized out>)
at /mariadb/10.2o/storage/innobase/handler/ha_innodb.cc:4524
#14 innobase_commit_ordered_2 (trx=0x7f510d89d268, thd=<optimized out>)
at /mariadb/10.2o/storage/innobase/handler/ha_innodb.cc:4641
#15 0x000056336778cd76 in innobase_commit (hton=<optimized out>,
thd=0x7f50bc000cf8, commit_trx=<optimized out>)
at /mariadb/10.2o/storage/innobase/handler/ha_innodb.cc:4757
#16 0x00005633675c0545 in commit_one_phase_2 (thd=0x7f50bc000cf8,
all=<optimized out>, trans=0x7f50bc003fc8, is_real_trans=<optimized out>)
at /mariadb/10.2o/sql/handler.cc:1604
#17 0x00005633675c0470 in ha_commit_one_phase (thd=0x7f50bc000cf8,
all=<optimized out>) at /mariadb/10.2o/sql/handler.cc:1585
#18 0x00005633675c0081 in ha_commit_trans (thd=0x7f50bc000cf8,
all=<optimized out>) at /mariadb/10.2o/sql/handler.cc:1452
#19 0x0000563367508c5a in trans_commit_stmt (thd=0x7f50bc000cf8)
at /mariadb/10.2o/sql/transaction.cc:510
#20 0x0000563367406ffa in mysql_execute_command (thd=<optimized out>)
at /mariadb/10.2o/sql/sql_parse.cc:6043
#21 0x0000563367400365 in mysql_parse (thd=0x7f50bc000cf8,
rawbuf=0x7f50bc010640 "INSERT INTO t3 (col1, col_text ) VALUES ( 7, 77 )", length=<optimized out>, parser_state=0x7f510cc6a720,
is_com_multi=<optimized out>, is_next_command=<optimized out>)
at /mariadb/10.2o/sql/sql_parse.cc:7760
There is another transaction that is likewise being blocked, before having modified anything:
#7 0x00005633677f845e in PolicyMutex<TTASEventMutex<GenericPolicy> >::enter (
this=<optimized out>, n_spins=30, n_delay=6, name=<optimized out>,
line=6133) at /mariadb/10.2o/storage/innobase/include/ib0mutex.h:637
#8 lock_clust_rec_read_check_and_lock (flags=<optimized out>,
block=0x7f510cf9b268, rec=0x7f510d5f0070 "supremum",
index=0x7f50b818acc8, offsets=0x7f510cbd2570, mode=LOCK_X, gap_mode=0,
thr=0x7f50b017d678)
at /mariadb/10.2o/storage/innobase/lock/lock0lock.cc:6133
#9 0x00005633678d6f2e in sel_set_rec_lock (pcur=0x7f50b017d128,
rec=0x7f510d5f0070 "supremum", index=0x7f50b818acc8,
offsets=<optimized out>, mode=<optimized out>, type=0,
thr=<optimized out>, mtr=<optimized out>)
at /mariadb/10.2o/storage/innobase/row/row0sel.cc:1261
#10 0x00005633678d2b52 in row_search_mvcc (buf=<optimized out>,
mode=<optimized out>, prebuilt=0x7f50b017cf58,
match_mode=<optimized out>, direction=<optimized out>)
at /mariadb/10.2o/storage/innobase/row/row0sel.cc:4835
#11 0x00005633677836f0 in ha_innobase::general_fetch (this=0x7f50b017b880,
buf=0x7f50b0178258 "\377\a", direction=1, match_mode=0)
at /mariadb/10.2o/storage/innobase/handler/ha_innodb.cc:9737
#12 0x00005633675c3431 in handler::ha_index_next (this=0x7f50b017b880,
buf=0x7f50b0178258 "\377\a") at /mariadb/10.2o/sql/handler.cc:2761
#13 0x00005633676fb2e6 in rr_index (info=0x7f510cbd3318)
at /mariadb/10.2o/sql/records.cc:449
#14 0x00005633674b6762 in mysql_update (thd=0x7f50b4000cf8,
table_list=<optimized out>, fields=..., values=...,
conds=<optimized out>, order_num=<optimized out>, order=<optimized out>,
limit=18446744073709551615, handle_duplicates=<optimized out>,
ignore=<optimized out>, found_return=<optimized out>,
updated_return=<optimized out>) at /mariadb/10.2o/sql/sql_update.cc:629
#15 0x0000563367404999 in mysql_execute_command (thd=<optimized out>)
at /mariadb/10.2o/sql/sql_parse.cc:4007
#16 0x0000563367400365 in mysql_parse (thd=0x7f50b4000cf8,
rawbuf=0x7f50b4010640 "UPDATE t3 SET col1 = 7", length=<optimized out>,
parser_state=0x7f510cbd4720, is_com_multi=<optimized out>,
is_next_command=<optimized out>) at /mariadb/10.2o/sql/sql_parse.cc:7760
The table contains the following records:
(col1,DB_TRX_ID,DB_ROLL_PTR,col_text)=(4,0x6cf5,update,77),(7,0x6d17,insert,77)
(col_text,col1)=(77,4),(77,7), PAGE_MAX_TRX_ID=0x6d17
We have trx_sys->max_trx_id=0x6d1f , and both trx_sys->rw_trx_set and trx_sys->rw_trx_list contain only the two blocked transactions 0x6d1d and 0x6d1c. The committing transaction 0x6d17 is not listed. Because the most significant bit of DB_ROLL_PTR is set in the record, purge must have run after the UPDATE t3 SET col1=4 (necessarily by an earlier-committed transaction 0x6cf5) that had delete-marked the clustered index record, before the being-committed INSERT was executed.
I cannot spot any wrong-doing by transaction 0x6d17.
The problem appears to be a race condition between the commit of that transaction, and an active transaction that is waiting for a lock that is being implicitly held by the committing transaction. In lock_rec_convert_impl_to_expl() , at the time of the call to trx_rw_is_active() , the trx_sys->rw_trx_set must have contained the transaction 0x6d17. Because neither trx_sys->mutex nor lock_sys->mutex are being held, the transaction may be delisted. Later, in lock_rec_convert_impl_to_expl_for_trx() , we acquire lock_sys->mutex (which is blocking the commit of the conflicting transaction), and in lock_rec_add_to_queue() the assertion fails, because lock_rec_other_has_expl_req(LOCK_X, block, wait=false, heap_no, trx) finds an explicit lock.
Could it simply be that the other blocked transaction had converted the implicit lock to an explicit one? The answer appears to be no, because the code is blocked in lock_clust_rec_read_check_and_lock() , which was invoked on the supremum pseudo-record:
if (heap_no != PAGE_HEAP_NO_SUPREMUM) {
lock_rec_convert_impl_to_expl(block, rec, index, offsets);
}
lock_mutex_enter();
Unfortunately, no plausible value of other_lock was available in the core dump that I analyzed. It was other_lock=NULL , the only acceptable value for the assertion expression. In lock_sys->rec_hash->array , at the time of the crash, there is only one record lock bitmap entry, for LOCK_REC|LOCK_REC_NOT_GAP|LOCK_X of the committing transaction, and apparently an empty bitmap (no locks set).
On a rerun with improved instrumentation, we have a plausible value of other_lock , but the crash looks slightly different. We have transactions 0x31d5 and 0x31d6 in trx_sys->rw_trx_list and trx_sys->rw_trx_set . The assertion fails in DELETE (transaction 0x31d6). The lock_sys->rec_hash->array contains exactly one record lock bitmap, associated with transaction 0x31ce, an INSERT that is being committed. The transaction 0x31d5, which is associated with the other_lock that tripped the assertion, got a bit further than last time, and is waiting for fix_block->lock X-latch here:
#10 buf_page_get_gen (page_id=..., page_size=..., rw_latch=2, guess=0x0, mode=10, file=0x5585e6d67a50 "/mariadb/10.2o/storage/innobase/row/row0upd.cc", line=3097, mtr=0x7fd2bff666b8, err=0x7fd2bff6509c) at /mariadb/10.2o/storage/innobase/buf/buf0buf.cc:4918
#11 0x00005585e6928c4f in btr_cur_search_to_nth_level (index=<optimized out>, level=<optimized out>, tuple=<optimized out>, mode=<optimized out>, latch_mode=<optimized out>, cursor=<optimized out>, has_search_latch=0, file=0x5585e6d67a50 "/mariadb/10.2o/storage/innobase/row/row0upd.cc", line=3097, mtr=0x7fd2bff666b8, autoinc=0) at /mariadb/10.2o/storage/innobase/btr/btr0cur.cc:1242
#12 0x00005585e6948fd7 in btr_pcur_open_with_no_init_func (index=<optimized out>, tuple=0x7fd26801e1f8, mode=3398803468, latch_mode=0, cursor=<optimized out>, has_search_latch=0, file=<optimized out>, line=<optimized out>, mtr=<optimized out>) at /mariadb/10.2o/storage/innobase/include/btr0pcur.ic:525
#13 btr_pcur_restore_position_func (latch_mode=0, cursor=0x7fd268062ab0, file=0x5585e6d67a50 "/mariadb/10.2o/storage/innobase/row/row0upd.cc", line=<optimized out>, mtr=0x7fd2bff666b8) at /mariadb/10.2o/storage/innobase/btr/btr0pcur.cc:351
#14 0x00005585e688b2d2 in row_upd_clust_step (node=<optimized out>, thr=<optimized out>) at /mariadb/10.2o/storage/innobase/row/row0upd.cc:3097
#15 row_upd (node=0x7fd25c080b90, thr=0x7fd25c080e98) at /mariadb/10.2o/storage/innobase/row/row0upd.cc:3281
#16 0x00005585e688acf3 in row_upd_step (thr=0x7fd25c080e98) at /mariadb/10.2o/storage/innobase/row/row0upd.cc:3427
#17 0x00005585e68392bb in row_update_for_mysql (prebuilt=0x7fd25c080088) at /mariadb/10.2o/storage/innobase/row/row0mysql.cc:1828
#18 0x00005585e6718f95 in ha_innobase::update_row (this=0x7fd25c07f900, old_row=0x7fd25c0296b8 "\377\004", new_row=0x7fd25c0296a8 "\377\a") at /mariadb/10.2o/storage/innobase/handler/ha_innodb.cc:9016
#19 0x00005585e6562183 in handler::ha_update_row (this=0x7fd25c07f900, old_data=0x7fd25c0296b8 "\377\004", new_data=0x7fd25c0296a8 "\377\a") at /mariadb/10.2o/sql/handler.cc:6121
#20 0x00005585e644ded9 in mysql_update (thd=0x7fd268000cf8, table_list=<optimized out>, fields=..., values=..., conds=<optimized out>, order_num=<optimized out>, order=<optimized out>, limit=18446744073709551615, handle_duplicates=<optimized out>, ignore=<optimized out>, found_return=<optimized out>, updated_return=<optimized out>) at /mariadb/10.2o/sql/sql_update.cc:818
#21 0x00005585e639b999 in mysql_execute_command (thd=<optimized out>) at /mariadb/10.2o/sql/sql_parse.cc:4007
#22 0x00005585e6397365 in mysql_parse (thd=0x7fd268000cf8, rawbuf=0x7fd268010640 "UPDATE t3 SET col1 = 7", length=<optimized out>, parser_state=0x7fd2bff68720, is_com_multi=<optimized out>, is_next_command=<optimized out>) at /mariadb/10.2o/sql/sql_parse.cc:7760
The other_lock appears to be LOCK_X|LOCK_REC , granted to the UPDATE transaction (which by the way is still about to perform its first modification, blocked by the clustered index page latch that the DELETE is holding). The record lock bitmap is 0xe, meaning that heap numbers 1,2,3 (page supremum and 2 user records) are locked. In the table, we have
(col1,DB_TRX_ID,DB_ROLL_PTR,col_text)=(4,0x31cc,insert,77), (7,0x31ce,insert,77)
(col_text,col1)=(77,4), (77,7), PAGE_MAX_TRX_ID=0x31ce
In this case, purge had removed any delete-marked records from the clustered index, both before UPDATE t3 SET col1=4 (by committed transaction 0x31cc) and the INSERT (by committing transaction 0x31ce).
The scenario seems to be as follows:
The INSERT transaction performed its operation, implicitly locking col1=7 .
The DELETE transaction, about to enqueue a waiting lock request on col1=7 , starts to convert the implicit lock to explicit.
The UPDATE transaction somehow gets in between, and gets the request on col1=7 granted.
The assertion fails in the DELETE transaction.
For the being-committed INSERT , blocked by lock_sys->mutex , there are no bits set in the only entry that is registered in lock_sys->rec_hash . This entry ought to have been created by an earlier transaction, possibly the currently active UPDATE transaction.
Open questions:
Why is the other_lock no longer registered in lock_sys->rec_hash ? It belongs to an active transaction that is under an impression that it successfully acquired the lock. UPDATE (until MDEV-16232 is implemented) always relies on explicit locks, never implicit ones.
Were the lock bits for the lock_sys->rec_hash entry for the INSERT transaction ever set? Was that entry originally created by the UPDATE transaction, or was it by some earlier transaction?
When was the lock request granted for the UPDATE transaction, and by which thread?
In my previous analysis, I forgot to dereference the singly-linked list of ib_lock_t::hash. Actually the other_lock appears to be linked from the entry in lock_sys->rec_hash, as the very last element, in the following case.
The following variation of the test appears to crash slightly less frequently:
CREATETABLE t3 (col1 INTPRIMARYKEY, col_text INT UNSIGNED NOTNULLDEFAULT 0, INDEX uidx3(col_text)) ENGINE=InnoDB;
at /mariadb/10.2o/storage/innobase/row/row0upd.cc:3281
#16 0x000055c7f48c0d23 in row_upd_step (thr=0x7f5ae404bba8)
at /mariadb/10.2o/storage/innobase/row/row0upd.cc:3427
#17 0x000055c7f486f2eb in row_update_for_mysql (prebuilt=0x7f5ae404b0c8)
at /mariadb/10.2o/storage/innobase/row/row0mysql.cc:1828
#18 0x000055c7f474f524 in ha_innobase::delete_row (this=0x7f5ae407dd60,
record=<optimized out>)
at /mariadb/10.2o/storage/innobase/handler/ha_innodb.cc:9136
4 first items in the only occupied hash bucket lock_sys->rec_hash belong to the being-committed INSERT transaction. The 5th and last item is the other_lock by the other DELETE. The first items are (all for the clustered index):
LOCK_REC|LOCK_S|LOCK_REC_NOT_GAP({})
LOCK_REC|LOCK_X({supremum})
LOCK_REC|LOCK_X|LOCK_INSERT_INTENTION({supremum})
LOCK_REC|LOCK_S|LOCK_GAP({col1=7})
The last item is other_lock, also for the clustered index:
LOCK_REC|LOCK_X({col1=7})
This is clearly conflicting with the lock that the being-committed INSERT transaction is still holding! So, the assertion seems to be pretty much valid.
Also in this case, purge had run into completion before the INSERT.
The failure can be repeated even without involving a secondary index:
CREATETABLE t3 (col1 INTPRIMARYKEY) ENGINE=InnoDB;
thread1: INSERTINTO t3 SET col1=7;
thread2: DELETEFROM t3;
thread3: DELETEFROM t3;
For the above test, I got a simpler failure where lock_sys->rec_hash only contains other_lock, which belongs to the other DELETE transaction that is blocked by the clustered index page latch that our DELETE is holding:
at /mariadb/10.2o/storage/innobase/row/row0upd.cc:3281
#16 0x000055ca28f55d23 in row_upd_step (thr=0x7fa520078ff0)
at /mariadb/10.2o/storage/innobase/row/row0upd.cc:3427
#17 0x000055ca28f042eb in row_update_for_mysql (prebuilt=0x7fa520078268)
at /mariadb/10.2o/storage/innobase/row/row0mysql.cc:1828
#18 0x000055ca28de4524 in ha_innobase::delete_row (this=0x7fa52006ef20,
record=<optimized out>)
at /mariadb/10.2o/storage/innobase/handler/ha_innodb.cc:9136
Again, trx_sys->rw_trx_list only contains the two DELETE transactions (0x34bf, 0x34c0). The INSERT transaction 0x34be is being committed, waiting for the lock_sys->mutex. The table contents is (col1,DB_TRX_ID,DB_ROLL_PTR)=(7,0x34be,insert).
In this case, the scenario seems simpler, and looks like a possible subset of the more complex scenario presented earlier:
The INSERT transaction performed its operation, implicitly locking col1=7.
The current DELETE transaction started converting the implicit lock into explicit one.
The INSERT transaction was removed from trx_sys->rw_trx_list and trx_sys->rw_trx_set in trx_erase_lists(), invoked by trx_commit_in_memory().
The other DELETE transaction noticed that there is no implicit record lock, and registered a conflicting explicit record lock.
The current DELETE transaction resumes execution and hits the assertion failure due to the conflicting other_lock.
I believe that it should also be possible to replace one of the DELETE with SELECT * FROM t3 FOR UPDATE, but I did not try that yet.
Marko Mäkelä
added a comment - In my previous analysis, I forgot to dereference the singly-linked list of ib_lock_t::hash . Actually the other_lock appears to be linked from the entry in lock_sys->rec_hash , as the very last element, in the following case.
The following variation of the test appears to crash slightly less frequently:
CREATE TABLE t3 (col1 INT PRIMARY KEY , col_text INT UNSIGNED NOT NULL DEFAULT 0, INDEX uidx3(col_text)) ENGINE=InnoDB;
thread1: INSERT INTO t3 (col1, col_text ) VALUES ( 7, 77 );
thread2: DELETE FROM t3;
thread3: DELETE FROM t3;
Unsurprisingly, the crash occurs on DELETE FROM t3 . The other_lock owner is waiting for the clustered index page latch here:
#13 btr_pcur_restore_position_func (latch_mode=0, cursor=0x7f5ae8179420,
file=0x55c7f4d9da70 "/mariadb/10.2o/storage/innobase/row/row0upd.cc",
line=<optimized out>, mtr=0x7f5b3fed0578)
at /mariadb/10.2o/storage/innobase/btr/btr0pcur.cc:351
#14 0x000055c7f48c1302 in row_upd_clust_step (node=<optimized out>,
thr=<optimized out>)
at /mariadb/10.2o/storage/innobase/row/row0upd.cc:3097
#15 row_upd (node=0x7f5ae404b8a0, thr=0x7f5ae404bba8)
at /mariadb/10.2o/storage/innobase/row/row0upd.cc:3281
#16 0x000055c7f48c0d23 in row_upd_step (thr=0x7f5ae404bba8)
at /mariadb/10.2o/storage/innobase/row/row0upd.cc:3427
#17 0x000055c7f486f2eb in row_update_for_mysql (prebuilt=0x7f5ae404b0c8)
at /mariadb/10.2o/storage/innobase/row/row0mysql.cc:1828
#18 0x000055c7f474f524 in ha_innobase::delete_row (this=0x7f5ae407dd60,
record=<optimized out>)
at /mariadb/10.2o/storage/innobase/handler/ha_innodb.cc:9136
4 first items in the only occupied hash bucket lock_sys->rec_hash belong to the being-committed INSERT transaction. The 5th and last item is the other_lock by the other DELETE . The first items are (all for the clustered index):
LOCK_REC|LOCK_S|LOCK_REC_NOT_GAP ({})
LOCK_REC|LOCK_X ({supremum})
LOCK_REC|LOCK_X|LOCK_INSERT_INTENTION ({supremum})
LOCK_REC|LOCK_S|LOCK_GAP ({col1=7})
The last item is other_lock , also for the clustered index:
LOCK_REC|LOCK_X ({col1=7})
This is clearly conflicting with the lock that the being-committed INSERT transaction is still holding! So, the assertion seems to be pretty much valid.
Also in this case, purge had run into completion before the INSERT .
The failure can be repeated even without involving a secondary index:
CREATE TABLE t3 (col1 INT PRIMARY KEY ) ENGINE=InnoDB;
thread1: INSERT INTO t3 SET col1=7;
thread2: DELETE FROM t3;
thread3: DELETE FROM t3;
For the above test, I got a simpler failure where lock_sys->rec_hash only contains other_lock , which belongs to the other DELETE transaction that is blocked by the clustered index page latch that our DELETE is holding:
#10 buf_page_get_gen (page_id=..., page_size=..., rw_latch=2, guess=0x0,
mode=10,
file=0x55ca29432a70 "/mariadb/10.2o/storage/innobase/row/row0upd.cc",
line=3097, mtr=0x7fa572d59578, err=0x7fa572d57f5c)
at /mariadb/10.2o/storage/innobase/buf/buf0buf.cc:4918
#11 0x000055ca28ff3c7f in btr_cur_search_to_nth_level (index=<optimized out>,
level=<optimized out>, tuple=<optimized out>, mode=<optimized out>,
latch_mode=<optimized out>, cursor=<optimized out>, has_search_latch=0,
file=0x55ca29432a70 "/mariadb/10.2o/storage/innobase/row/row0upd.cc",
line=3097, mtr=0x7fa572d59578, autoinc=0)
at /mariadb/10.2o/storage/innobase/btr/btr0cur.cc:1242
#12 0x000055ca29014007 in btr_pcur_open_with_no_init_func (
index=<optimized out>, tuple=0x7fa514075b38, mode=2037727244,
latch_mode=0, cursor=<optimized out>, has_search_latch=0,
file=<optimized out>, line=<optimized out>, mtr=<optimized out>)
at /mariadb/10.2o/storage/innobase/include/btr0pcur.ic:525
#13 btr_pcur_restore_position_func (latch_mode=0, cursor=0x7fa514017830,
file=0x55ca29432a70 "/mariadb/10.2o/storage/innobase/row/row0upd.cc",
line=<optimized out>, mtr=0x7fa572d59578)
at /mariadb/10.2o/storage/innobase/btr/btr0pcur.cc:351
#14 0x000055ca28f56302 in row_upd_clust_step (node=<optimized out>,
thr=<optimized out>)
at /mariadb/10.2o/storage/innobase/row/row0upd.cc:3097
#15 row_upd (node=0x7fa520078d18, thr=0x7fa520078ff0)
at /mariadb/10.2o/storage/innobase/row/row0upd.cc:3281
#16 0x000055ca28f55d23 in row_upd_step (thr=0x7fa520078ff0)
at /mariadb/10.2o/storage/innobase/row/row0upd.cc:3427
#17 0x000055ca28f042eb in row_update_for_mysql (prebuilt=0x7fa520078268)
at /mariadb/10.2o/storage/innobase/row/row0mysql.cc:1828
#18 0x000055ca28de4524 in ha_innobase::delete_row (this=0x7fa52006ef20,
record=<optimized out>)
at /mariadb/10.2o/storage/innobase/handler/ha_innodb.cc:9136
Again, trx_sys->rw_trx_list only contains the two DELETE transactions (0x34bf, 0x34c0). The INSERT transaction 0x34be is being committed, waiting for the lock_sys->mutex . The table contents is (col1,DB_TRX_ID,DB_ROLL_PTR)=(7,0x34be,insert).
In this case, the scenario seems simpler, and looks like a possible subset of the more complex scenario presented earlier:
The INSERT transaction performed its operation, implicitly locking col1=7 .
The current DELETE transaction started converting the implicit lock into explicit one.
The INSERT transaction was removed from trx_sys->rw_trx_list and trx_sys->rw_trx_set in trx_erase_lists() , invoked by trx_commit_in_memory() .
The other DELETE transaction noticed that there is no implicit record lock, and registered a conflicting explicit record lock.
The current DELETE transaction resumes execution and hits the assertion failure due to the conflicting other_lock .
I believe that it should also be possible to replace one of the DELETE with SELECT * FROM t3 FOR UPDATE , but I did not try that yet.
I found some room for cleanup. If the statements trx = NULL; were ever executed in the function trx_reference(), we would dereference a NULL pointer and crash in trx_mutex_exit(trx);. Hence, those statements must have been unreachable, and they can be replaced with debug assertions, like above. This implies that we do not have to acquire trx->mutex if id==0. Actually, the entire function can be omitted if id==0. It is better to merge the relevant part of the function to its only caller:
transactions now that it no longer holds any user locks. */
@@ -1682,6 +1680,8 @@ trx_commit_in_memory(
} else {
MONITOR_INC(MONITOR_TRX_RW_COMMIT);
}
+
+ trx->id = 0;
}
ut_ad(!trx->rsegs.m_redo.update_undo);
The above clean-up is not yet fixing anything. We must ensure that the ‘holds locks’ status of a being-committed transaction is being changed atomically. It looks like it should be protected by lock_sys->mutex, but for implicit locks it is not the case at least since the Bug#21433768 fix in MySQL 5.7.9. That commit says that it is reverting part of an earlier Bug#17320977 fix in MySQL 5.6.17,5.7.4. One of the Bug#17420977 commits said:
The active transaction was removed from trx_sys->rw_trx_set prematurely.
That is exactly matching our case.
In MySQL 5.6 (and MariaDB 10.0 and 10.1), the transaction is removed from the lists after the locks have been released, and hence the race condition will not occur. A notable change between 5.7 and 5.6 is that in 5.7, we rely more on the trx_t::n_ref, and commit will wait for the reference-count to reach 0. The function trx_rw_is_active() in 5.6 is missing the parameter do_ref_count.
A better fix for the MySQL 5.7.9 bug could be only remove the transaction from the MVCC-related data structures before releasing the locks, and let lock_trx_release_locks() remove the transaction from trx_sys->rw_trx_list and trx_sys->rw_trx_set, while holding lock_sys->mutex and trx_sys->mutex.
Marko Mäkelä
added a comment - I found some room for cleanup. If the statements trx = NULL; were ever executed in the function trx_reference() , we would dereference a NULL pointer and crash in trx_mutex_exit(trx); . Hence, those statements must have been unreachable, and they can be replaced with debug assertions, like above. This implies that we do not have to acquire trx->mutex if id==0 . Actually, the entire function can be omitted if id==0 . It is better to merge the relevant part of the function to its only caller:
diff --git a/storage/innobase/include/trx0sys.ic b/storage/innobase/include/trx0sys.ic
index c552af9ab47..bfd345a27db 100644
--- a/storage/innobase/include/trx0sys.ic
+++ b/storage/innobase/include/trx0sys.ic
@@ -364,8 +364,13 @@ trx_rw_is_active(
trx_t* trx = trx_rw_is_active_low(trx_id, corrupt);
- if (trx) {
- trx = trx_reference(do_ref_count ? trx_id : 0, trx);
+ if (trx && do_ref_count) {
+ trx_mutex_enter(trx);
+ ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
+ ut_ad(trx->id == trx_id);
+ ut_ad(trx->n_ref >= 0);
+ ++trx->n_ref;
+ trx_mutex_exit(trx);
}
trx_sys_mutex_exit();
The above implies that the following cleanup is safe to do:
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
index 21171bad2bb..a674a38df2d 100644
--- a/storage/innobase/lock/lock0lock.cc
+++ b/storage/innobase/lock/lock0lock.cc
@@ -6647,8 +6647,6 @@ lock_trx_release_locks(
the is_recovered flag. */
trx->is_recovered = false;
- /* Ensure that trx_reference() will not find this transaction. */
- trx->id = 0;
trx_mutex_exit(trx);
diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc
index 196ae5db36e..86008387385 100644
--- a/storage/innobase/trx/trx0trx.cc
+++ b/storage/innobase/trx/trx0trx.cc
@@ -1660,10 +1660,8 @@ trx_commit_in_memory(
trx_erase_lists(trx, serialised);
}
- /* trx->id will be cleared in lock_trx_release_locks(trx). */
- ut_ad(trx->read_only || !trx->rsegs.m_redo.rseg || trx->id);
lock_trx_release_locks(trx);
- ut_ad(trx->id == 0);
+ ut_ad(trx->read_only || !trx->rsegs.m_redo.rseg || trx->id);
/* Remove the transaction from the list of active
transactions now that it no longer holds any user locks. */
@@ -1682,6 +1680,8 @@ trx_commit_in_memory(
} else {
MONITOR_INC(MONITOR_TRX_RW_COMMIT);
}
+
+ trx->id = 0;
}
ut_ad(!trx->rsegs.m_redo.update_undo);
The above clean-up is not yet fixing anything. We must ensure that the ‘holds locks’ status of a being-committed transaction is being changed atomically. It looks like it should be protected by lock_sys->mutex , but for implicit locks it is not the case at least since the Bug#21433768 fix in MySQL 5.7.9 . That commit says that it is reverting part of an earlier Bug#17320977 fix in MySQL 5.6.17,5.7.4. One of the Bug#17420977 commits said:
The active transaction was removed from trx_sys->rw_trx_set prematurely.
That is exactly matching our case.
In MySQL 5.6 (and MariaDB 10.0 and 10.1), the transaction is removed from the lists after the locks have been released, and hence the race condition will not occur. A notable change between 5.7 and 5.6 is that in 5.7, we rely more on the trx_t::n_ref , and commit will wait for the reference-count to reach 0. The function trx_rw_is_active() in 5.6 is missing the parameter do_ref_count .
A better fix for the MySQL 5.7.9 bug could be only remove the transaction from the MVCC-related data structures before releasing the locks, and let lock_trx_release_locks() remove the transaction from trx_sys->rw_trx_list and trx_sys->rw_trx_set , while holding lock_sys->mutex and trx_sys->mutex .
We can and will detach the transaction from the MVCC data structures before starting to release any locks.
Implicit locks will released by assigning trx->state=TRX_STATE_COMMITTED_IN_MEMORY.
All callers of trx_rw_is_active() and trx_rw_is_active_low() must recheck trx->state after acquiring lock_sys->mutex or trx->mutex.
Before releasing explicit locks, lock_trx_release_locks() must ensure !trx_is_referenced(trx) after acquiring lock_sys->mutex and trx->mutex. There was a glitch in this check; at the end we would release trx->mutex and acquire lock_sys->mutex and trx->mutex, and fail to recheck. After this check, !trx_is_referenced(trx) must hold, because other threads will check the trx->state before incrementing the reference count.
Explicit locks can be released in batches (LOCK_RELEASE_INTERVAL = 1000) just like we did before.
Only after lock_trx_release_locks() has completed, trx_commit_in_memory() will remove the transaction from trx_sys->rw_trx_list and trx_sys->rw_trx_set. (This turned out to be the easiest place to release it; in this way, lock_trx_release_locks() avoids dealing with trx_sys.)
Marko Mäkelä
added a comment - My proposed fix goes along the following lines:
We can and will detach the transaction from the MVCC data structures before starting to release any locks.
Implicit locks will released by assigning trx->state=TRX_STATE_COMMITTED_IN_MEMORY .
All callers of trx_rw_is_active() and trx_rw_is_active_low() must recheck trx->state after acquiring lock_sys->mutex or trx->mutex .
Before releasing explicit locks, lock_trx_release_locks() must ensure !trx_is_referenced(trx) after acquiring lock_sys->mutex and trx->mutex . There was a glitch in this check; at the end we would release trx->mutex and acquire lock_sys->mutex and trx->mutex , and fail to recheck. After this check, !trx_is_referenced(trx) must hold, because other threads will check the trx->state before incrementing the reference count.
Explicit locks can be released in batches ( LOCK_RELEASE_INTERVAL = 1000 ) just like we did before.
Only after lock_trx_release_locks() has completed, trx_commit_in_memory() will remove the transaction from trx_sys->rw_trx_list and trx_sys->rw_trx_set . (This turned out to be the easiest place to release it; in this way, lock_trx_release_locks() avoids dealing with trx_sys .)
For MariaDB 10.3, the fix will have to be reworked, because trx->n_ref is an atomic variable (not relying on trx->mutex protection), and because trx_sys.rw_trx_hash replaced the previous data structures.
Marko Mäkelä
added a comment - For MariaDB 10.3, the fix will have to be reworked, because trx->n_ref is an atomic variable (not relying on trx->mutex protection), and because trx_sys.rw_trx_hash replaced the previous data structures.
The fix is not directly applicable to 10.3, because the trx_sys.rw_trx_hash is used for both MVCC and locking. In 10.2, trx_sys->serialisation_list and trx_sys->rw_trx_ids are used for MVCC, and trx_sys->rw_trx_set (duplicated by trx_sys->rw_trx_list) are used for checking implicit locks.
The minimal test (INSERT,DELETE,DELETE) does not crash in 10.3 e41eb044f19e3ee9b881344565924dd6b9d20e1a for me, either with or without changes that I applied from the 10.2 fix. The slightly larger test that is attached in MDEV-20066 crashes very easily on that 10.3 revision, but not that easily with the applicable part of the 10.2 fix.
To make that bigger test to work with ./mtr --repeat, I added a cleanup step before write_file like this:
#6 0x000055fa44c5f8f4 in lock_rec_queue_validate (locked_lock_trx_sys=<optimized out>, block=0x7f4fae431da8, rec=<optimized out>, index=<optimized out>, offsets=0x7f4fb4055ea0) at /mariadb/10.3/storage/innobase/lock/lock0lock.cc:4961
#7 0x000055fa44c62065 in lock_clust_rec_read_check_and_lock (flags=0, block=0x7f4fae431da8, rec=0x7f4fae93c07d "\200", index=0x7f4f5c09d918, offsets=0x7f4fb4055ea0, mode=LOCK_X, gap_mode=1024, thr=0x7f4f58073890) at /mariadb/10.3/storage/innobase/lock/lock0lock.cc:5854
#8 0x000055fa44d5880b in sel_set_rec_lock (pcur=0x7f4f58073358, rec=0x7f4fae93c07d "\200", index=0x7f4f5c09d918, offsets=0x7f4fb4055ea0, mode=<optimized out>, type=<optimized out>, thr=<optimized out>, mtr=<optimized out>) at /mariadb/10.3/storage/innobase/row/row0sel.cc:1260
…
#21 0x000055fa447ddd95 in mysql_parse (thd=0x7f4f5c000cf8, rawbuf=0x7f4f5c011cf0 "DELETE FROM t3 WHERE col1 = 7", length=<optimized out>, parser_state=0x7f4fb40586a0, is_com_multi=<optimized out>, is_next_command=<optimized out>) at /mariadb/10.3/sql/sql_parse.cc:7829
There is a thread executing commit, waiting for lock_sys.mutex at the start of lock_trx_release_locks().
Marko Mäkelä
added a comment - The fix is not directly applicable to 10.3, because the trx_sys.rw_trx_hash is used for both MVCC and locking. In 10.2, trx_sys->serialisation_list and trx_sys->rw_trx_ids are used for MVCC, and trx_sys->rw_trx_set (duplicated by trx_sys->rw_trx_list ) are used for checking implicit locks.
The minimal test (INSERT,DELETE,DELETE) does not crash in 10.3 e41eb044f19e3ee9b881344565924dd6b9d20e1a for me, either with or without changes that I applied from the 10.2 fix. The slightly larger test that is attached in MDEV-20066 crashes very easily on that 10.3 revision, but not that easily with the applicable part of the 10.2 fix.
To make that bigger test to work with ./mtr --repeat , I added a cleanup step before write_file like this:
--error 0,1
--remove_file $MY_TEST_FILE
--write_file $MY_TEST_FILE
On 10.2 with the proposed fix , this larger test successfully completed 30×100 runs.
The larger test crashed on the partially fixed 10.3, on the 9th round (with 30 tests running in parallel):
10.3 e41eb044f19e3ee9b881344565924dd6b9d20e1a with a partial fix
InnoDB: Failing assertion: !other_lock || wsrep_thd_is_BF(lock->trx->mysql_thd, FALSE) || wsrep_thd_is_BF(other_lock->trx->mysql_thd, FALSE)
…
#6 0x000055fa44c5f8f4 in lock_rec_queue_validate (locked_lock_trx_sys=<optimized out>, block=0x7f4fae431da8, rec=<optimized out>, index=<optimized out>, offsets=0x7f4fb4055ea0) at /mariadb/10.3/storage/innobase/lock/lock0lock.cc:4961
#7 0x000055fa44c62065 in lock_clust_rec_read_check_and_lock (flags=0, block=0x7f4fae431da8, rec=0x7f4fae93c07d "\200", index=0x7f4f5c09d918, offsets=0x7f4fb4055ea0, mode=LOCK_X, gap_mode=1024, thr=0x7f4f58073890) at /mariadb/10.3/storage/innobase/lock/lock0lock.cc:5854
#8 0x000055fa44d5880b in sel_set_rec_lock (pcur=0x7f4f58073358, rec=0x7f4fae93c07d "\200", index=0x7f4f5c09d918, offsets=0x7f4fb4055ea0, mode=<optimized out>, type=<optimized out>, thr=<optimized out>, mtr=<optimized out>) at /mariadb/10.3/storage/innobase/row/row0sel.cc:1260
…
#21 0x000055fa447ddd95 in mysql_parse (thd=0x7f4f5c000cf8, rawbuf=0x7f4f5c011cf0 "DELETE FROM t3 WHERE col1 = 7", length=<optimized out>, parser_state=0x7f4fb40586a0, is_com_multi=<optimized out>, is_next_command=<optimized out>) at /mariadb/10.3/sql/sql_parse.cc:7829
There is a thread executing commit, waiting for lock_sys.mutex at the start of lock_trx_release_locks() .
#8 0x00005647d21f2e00 in lock_clust_rec_read_check_and_lock (flags=0, block=0x7fc32b1cf170, rec=0x7fc32b51807d "\200", index=0x7fc2d00f7608, offsets=0x7fc3280dfec0, mode=LOCK_X, gap_mode=1024, thr=0x7fc2d4056548) at /mariadb/10.3/storage/innobase/lock/lock0lock.cc:5854
#9 0x00005647d232a1f9 in sel_set_rec_lock (pcur=0x7fc2d4055ce8, rec=<optimized out>, index=0x7fc2d00f7608, offsets=<optimized out>, mode=<optimized out>, type=<optimized out>, thr=0x7fc2d4056548, mtr=0x7fc3280e01e8) at /mariadb/10.3/storage/innobase/row/row0sel.cc:1260
#10 0x00005647d2326abb in row_search_mvcc (buf=<optimized out>, mode=<optimized out>, prebuilt=0x7fc2d4055b18, match_mode=<optimized out>, direction=0) at /mariadb/10.3/storage/innobase/row/row0sel.cc:5062
…
#22 0x00005647d1d3f53d in mysql_parse (thd=0x7fc2b8000cf8, rawbuf=0x7fc2b80109f0 "DELETE FROM t3 WHERE col1 = 7", length=<optimized out>, parser_state=<optimized out>, is_com_multi=<optimized out>, is_next_command=<optimized out>) at /mariadb/10.3/sql/sql_parse.cc:7829
The current transaction is 0x3d1b, and the other_lock->trx->id is 0x3d1c.
The clustered index root page contains (col1,DB_TRX_ID,DB_ROLL_PTR,col_text)=(4,0,1<<55,77),(7,0x3d19,insert,77).
The transaction 0x3d19 is waiting for lock_sys.mutex at the start of lock_trx_release_locks().
Because even with the partial fix, we will already have removed the transaction from trx_sys.rw_trx_hash at this point of commit, it is very possible that the other_lock->trx thread observed that the record is no longer implicitly locked. That thread is waiting for the clustered index leaf page latch, which our crashing thread is holding.
I developed a follow-up fix that allows us to remove the transaction from trx_sys.rw_trx_hash after the locks have been released. The MVCC read view creation still needs some work, because the test innodb.innodb-read-view is returning wrong result.
Marko Mäkelä
added a comment - Here is an analysis of a 10.3 crash with the partial fix:
10.3 12cd8d54e373f5e8de98ed2cb5bc6921049a38d2
#5 0x00005647d21e00d0 in lock_rec_add_to_queue (type_mode=1059,
block=0x7fc32b1cf170, heap_no=2, index=0x7fc2d00f7608,
trx=0x7fc32ba8e288, caller_owns_trx_mutex=<optimized out>)
at /mariadb/10.3/storage/innobase/lock/lock0lock.cc:1853
#6 0x00005647d21f07f3 in lock_rec_convert_impl_to_expl_for_trx (
block=<optimized out>, rec=<optimized out>, index=<optimized out>,
trx=<optimized out>, heap_no=2)
at /mariadb/10.3/storage/innobase/lock/lock0lock.cc:5431
#7 lock_rec_convert_impl_to_expl (caller_trx=<optimized out>, block=0x7fc32b1cf170, rec=<optimized out>, index=<optimized out>, offsets=<optimized out>) at /mariadb/10.3/storage/innobase/lock/lock0lock.cc:5591
#8 0x00005647d21f2e00 in lock_clust_rec_read_check_and_lock (flags=0, block=0x7fc32b1cf170, rec=0x7fc32b51807d "\200", index=0x7fc2d00f7608, offsets=0x7fc3280dfec0, mode=LOCK_X, gap_mode=1024, thr=0x7fc2d4056548) at /mariadb/10.3/storage/innobase/lock/lock0lock.cc:5854
#9 0x00005647d232a1f9 in sel_set_rec_lock (pcur=0x7fc2d4055ce8, rec=<optimized out>, index=0x7fc2d00f7608, offsets=<optimized out>, mode=<optimized out>, type=<optimized out>, thr=0x7fc2d4056548, mtr=0x7fc3280e01e8) at /mariadb/10.3/storage/innobase/row/row0sel.cc:1260
#10 0x00005647d2326abb in row_search_mvcc (buf=<optimized out>, mode=<optimized out>, prebuilt=0x7fc2d4055b18, match_mode=<optimized out>, direction=0) at /mariadb/10.3/storage/innobase/row/row0sel.cc:5062
…
#22 0x00005647d1d3f53d in mysql_parse (thd=0x7fc2b8000cf8, rawbuf=0x7fc2b80109f0 "DELETE FROM t3 WHERE col1 = 7", length=<optimized out>, parser_state=<optimized out>, is_com_multi=<optimized out>, is_next_command=<optimized out>) at /mariadb/10.3/sql/sql_parse.cc:7829
The current transaction is 0x3d1b, and the other_lock->trx->id is 0x3d1c.
The clustered index root page contains (col1,DB_TRX_ID,DB_ROLL_PTR,col_text)=(4,0,1<<55,77),(7,0x3d19,insert,77).
The transaction 0x3d19 is waiting for lock_sys.mutex at the start of lock_trx_release_locks() .
Because even with the partial fix, we will already have removed the transaction from trx_sys.rw_trx_hash at this point of commit, it is very possible that the other_lock->trx thread observed that the record is no longer implicitly locked. That thread is waiting for the clustered index leaf page latch, which our crashing thread is holding.
I developed a follow-up fix that allows us to remove the transaction from trx_sys.rw_trx_hash after the locks have been released. The MVCC read view creation still needs some work, because the test innodb.innodb-read-view is returning wrong result.
svoj had the idea that we could assign trx->state=TRX_STATE_COMMITTED_IN_MEMORY; as the first step, before removing the transaction from trx_sys, and then proceeding to free the explicit locks. That sounds promising to me, and it would allow to keep the trx_erase_lists() in one atomic piece in 10.2.
Marko Mäkelä
added a comment - This patch makes the 10.2 fix equivalent to the problematic 10.3 fix, introducing a failure of the test {{innodb.innodb-read-view}:
diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc
index 6c2361207cf..b005baa18f7 100644
--- a/storage/innobase/trx/trx0trx.cc
+++ b/storage/innobase/trx/trx0trx.cc
@@ -1629,13 +1629,6 @@ trx_commit_in_memory(
DBUG_LOG("trx", "Autocommit in memory: " << trx);
trx->state = TRX_STATE_NOT_STARTED;
} else {
- if (trx->id > 0) {
- /* For consistent snapshot, we need to remove current
- transaction from running transaction id list for mvcc
- before doing commit and releasing locks. */
- trx_erase_mvcc(trx, serialised);
- }
-
lock_trx_release_locks(trx);
ut_ad(trx->read_only || !trx->rsegs.m_redo.rseg || trx->id);
ut_ad(!trx_is_referenced(trx));
@@ -1672,7 +1665,10 @@ trx_commit_in_memory(
trx_sys_mutex_exit();
}
- trx->id = 0;
+ if (trx->id > 0) {
+ trx_erase_mvcc(trx, serialised);
+ trx->id = 0;
+ }
}
ut_ad(!trx->rsegs.m_redo.update_undo);
svoj had the idea that we could assign trx->state=TRX_STATE_COMMITTED_IN_MEMORY; as the first step, before removing the transaction from trx_sys , and then proceeding to free the explicit locks. That sounds promising to me, and it would allow to keep the trx_erase_lists() in one atomic piece in 10.2.
I think that we should try to fix this serious corruption bug before the next releases. My 10.2 fix was promising in RQG tests, but it is causing a performance regression, so the fix will need some more work.
Marko Mäkelä
added a comment - I think that we should try to fix this serious corruption bug before the next releases. My 10.2 fix was promising in RQG tests, but it is causing a performance regression, so the fix will need some more work.
Fresh occurrence on 10.3: https://api.travis-ci.org/v3/job/379841622/log.txt