|
This was reproduced on a second slave.
|
|
Reproducible relatively easily by running set global read_only=0|1 repeatedly on an active slave.
It's not that it applies to threads, it's just that it attempts to acquire a lock (which is expected and documented), and apparently causes a deadlock with other thread(s), which must be a bug.
It's not a complete deadlock, it's timed, limited by lock_wait_timeout. The problem is, this timeout is 1 year by default
As a workaround, if you need one, set lock_wait_timeout to a reasonable value, either globally or in the session which sets read_only.
|
|
MTR test case.
ATTENTION! This test case is ONLY for the purpose of reproducing the problem! Do NOT put it into the regression suite, make a proper one when the root cause of the failure is known, with debug features if necessary.
|
Test case
|
--source include/master-slave.inc
|
--source include/have_binlog_format_row.inc
|
--source include/have_innodb.inc
|
|
--delimiter $$
|
create procedure pr(tname varchar(8))
|
begin
|
declare i int default 0;
|
set @a = concat('CREATE TABLE ',tname,' (pk int auto_increment primary key, f float) engine=InnoDB');
|
prepare stmt from @a;
|
execute stmt;
|
set @a = concat('INSERT INTO ',tname,' values (null, 0)');
|
prepare stmt from @a;
|
start transaction;
|
while (i<100)
|
do
|
execute stmt;
|
set i=i+1;
|
end while;
|
commit;
|
set @a = concat('UPDATE ',tname,' set f = RAND() ORDER BY pk LIMIT 5');
|
prepare stmt from @a;
|
loop
|
execute stmt;
|
end loop;
|
deallocate prepare stmt;
|
end $$
|
--delimiter ;
|
|
--connection slave
|
stop slave;
|
set global slave_parallel_threads=8;
|
start slave;
|
|
--let $cons= 8
|
|
while ($cons)
|
{
|
--connect(con$cons,localhost,root,,)
|
--send_eval call pr('t$cons')
|
--dec $cons
|
}
|
|
--connection slave
|
--let $run=1000
|
while ($run)
|
{
|
set global read_only = 0;
|
show processlist;
|
set global read_only = 1;
|
--sleep 0.5
|
--dec $run
|
}
|
|
--echo # Ran 1000 iterations, it didn't hang.
|
Adjust the number of iterations or the testcase-timeout to make sure you don't get your server killed due to the timeout.
The test either hangs at some point (both on release and debug builds) with something like this as the last output:
show processlist;
|
Id User Host db Command Time State Info Progress
|
4 root localhost:41762 test Sleep 4 NULL 0.000
|
5 root localhost:41763 test Sleep 5 NULL 0.000
|
8 root localhost:41771 test Query 0 init show processlist 0.000
|
9 root localhost:41772 test Sleep 4 NULL 0.000
|
10 system user NULL Connect 4 Waiting for master to send event NULL 0.000
|
11 system user NULL Connect 1 closing tables NULL 0.000
|
12 system user NULL Connect 1 closing tables NULL 0.000
|
13 system user NULL Connect 1 Waiting for prior transaction to start commit before starting next transaction NULL 0.000
|
14 system user NULL Connect 1 closing tables NULL 0.000
|
15 system user NULL Connect 1 closing tables NULL 0.000
|
16 system user NULL Connect 1 closing tables NULL 0.000
|
17 system user NULL Connect 1 Waiting for prior transaction to start commit before starting next transaction NULL 0.000
|
18 system user NULL Connect 1 executing NULL 0.000
|
19 system user NULL Connect 4 Slave has read all relay log; waiting for the slave I/O thread to update it NULL 0.000
|
set global read_only = 1;
|
or, on the debug build it can cause the assertion failure:
mysqld: /data/src/10.0/sql/mdl.cc:2421: bool MDL_context::acquire_lock(MDL_request*, ulong): Assertion `(mdl_request->type != MDL_INTENTION_EXCLUSIVE && mdl_request->type != MDL_EXCLUSIVE) || !(get_thd()->rgi_slave && get_thd()->rgi_slave->is_parallel_exec && lock->check_if_conflicting_replication_locks(this))' failed.
|
|
Stack trace from 10.0 6c74ef8ae
|
#6 0x00007f8710334266 in __assert_fail_base (fmt=0x7f871046d238 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=assertion@entry=0xebf638 "(mdl_request->type != MDL_INTENTION_EXCLUSIVE && mdl_request->type != MDL_EXCLUSIVE) || !(get_thd()->rgi_slave && get_thd()->rgi_slave->is_parallel_exec && lock->check_if_conflicting_replication_locks"..., file=file@entry=0xebf1a2 "/data/src/10.0/sql/mdl.cc", line=line@entry=2421, function=function@entry=0xebff60 <MDL_context::acquire_lock(MDL_request*, unsigned long)::__PRETTY_FUNCTION__> "bool MDL_context::acquire_lock(MDL_request*, ulong)") at assert.c:92
|
#7 0x00007f8710334312 in __GI___assert_fail (assertion=0xebf638 "(mdl_request->type != MDL_INTENTION_EXCLUSIVE && mdl_request->type != MDL_EXCLUSIVE) || !(get_thd()->rgi_slave && get_thd()->rgi_slave->is_parallel_exec && lock->check_if_conflicting_replication_locks"..., file=0xebf1a2 "/data/src/10.0/sql/mdl.cc", line=2421, function=0xebff60 <MDL_context::acquire_lock(MDL_request*, unsigned long)::__PRETTY_FUNCTION__> "bool MDL_context::acquire_lock(MDL_request*, ulong)") at assert.c:101
|
#8 0x000000000076d0db in MDL_context::acquire_lock (this=0x7f86fb416168, mdl_request=0x7f87102724f0, lock_wait_timeout=31536000) at /data/src/10.0/sql/mdl.cc:2417
|
#9 0x00000000005e6a9a in open_table (thd=0x7f86fb416070, table_list=0x7f86fb440870, mem_root=0x7f8710272930, ot_ctx=0x7f87102728f0) at /data/src/10.0/sql/sql_base.cc:2324
|
#10 0x00000000005e9912 in open_and_process_table (thd=0x7f86fb416070, lex=0x7f86fb419948, tables=0x7f86fb440870, counter=0x7f8710272a34, flags=0, prelocking_strategy=0x7f8710272a70, has_prelocking_list=false, ot_ctx=0x7f87102728f0, new_frm_mem=0x7f8710272930) at /data/src/10.0/sql/sql_base.cc:4034
|
#11 0x00000000005ea9c7 in open_tables (thd=0x7f86fb416070, start=0x7f87102729f0, counter=0x7f8710272a34, flags=0, prelocking_strategy=0x7f8710272a70) at /data/src/10.0/sql/sql_base.cc:4568
|
#12 0x00000000005eb9e7 in open_and_lock_tables (thd=0x7f86fb416070, tables=0x7f86fb440870, derived=false, flags=0, prelocking_strategy=0x7f8710272a70) at /data/src/10.0/sql/sql_base.cc:5220
|
#13 0x00000000005dfe23 in open_and_lock_tables (thd=0x7f86fb416070, tables=0x7f86fb440870, derived=false, flags=0) at /data/src/10.0/sql/sql_base.h:489
|
#14 0x000000000092b1eb in Rows_log_event::do_apply_event (this=0x7f86f9bf87b0, rgi=0x7f86f9038800) at /data/src/10.0/sql/log_event.cc:9730
|
#15 0x00000000005bc520 in Log_event::apply_event (this=0x7f86f9bf87b0, rgi=0x7f86f9038800) at /data/src/10.0/sql/log_event.h:1360
|
#16 0x00000000005b2e68 in apply_event_and_update_pos (ev=0x7f86f9bf87b0, thd=0x7f86fb416070, rgi=0x7f86f9038800, rpt=0x7f86fc0562b8) at /data/src/10.0/sql/slave.cc:3297
|
#17 0x00000000007bcff4 in rpt_handle_event (qev=0x7f86f9a94270, rpt=0x7f86fc0562b8) at /data/src/10.0/sql/rpl_parallel.cc:52
|
#18 0x00000000007bf784 in handle_rpl_parallel_thread (arg=0x7f86fc0562b8) at /data/src/10.0/sql/rpl_parallel.cc:1201
|
#19 0x00000000009faa88 in pfs_spawn_thread (arg=0x7f86fc04e5f0) at /data/src/10.0/storage/perfschema/pfs.cc:1860
|
#20 0x00007f87122360a4 in start_thread (arg=0x7f8710273700) at pthread_create.c:309
|
#21 0x00007f87103ee87d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
|
Note: assigned to svoj because at the first glance it looks more like a locking problem than a replication problem; please reassign to plinux if necessary.
|
|
I don't immediately see there's anything wrong with locking. This assertion was added along with some parallel replication fixes:
https://github.com/MariaDB/server/commit/98e36b299915fe30bc935380d6bc1f6a1986eb52
https://github.com/MariaDB/server/commit/b51109693e6abb0e58256192a648cdd158d47615
See also this thread: https://lists.launchpad.net/maria-developers/msg09859.html
Also I suggest to keep non-MDL caller details out of MDL. That is move this assertion to replication code if possible. E.g. iterate replication threads instead.
Reassigning to plinux per Elena suggestion. ATTN knielsen, monty.
|
|
Actually it can be triggered without read-only
--source include/have_binlog_format_row.inc
|
--source include/have_innodb.inc
|
--source include/master-slave.inc
|
|
--sync_slave_with_master
|
--source include/stop_slave.inc
|
|
SET GLOBAL slave_parallel_threads=7;
|
set global slave_parallel_mode=optimistic;
|
set global slave_transaction_retries=1000;
|
alter table mysql.gtid_slave_pos engine=myisam;
|
|
# Create some conflicting events on master
|
connection master;
|
CREATE TABLE t1(a INT NOT NULL AUTO_INCREMENT PRIMARY KEY) engine=innodb;
|
INSERT INTO t1 VALUES (1),(2),(3),(4);
|
--disable_query_log
|
--let $count=3
|
while ($count)
|
{
|
update t1 set a=a+5 where a=1;
|
update t1 set a=a+5 where a=2;
|
update t1 set a=a+5 where a=3;
|
update t1 set a=a+5 where a=4;
|
update t1 set a=a-5 where a=5;
|
update t1 set a=a-5 where a=6;
|
--dec $count
|
}
|
--enable_query_log
|
select * from t1;
|
drop table t1;
|
|
--connection slave
|
start slave;
|
--connection master
|
--sync_slave_with_master
|
stop slave;
|
SET GLOBAL slave_parallel_threads=0;
|
set global slave_parallel_mode=conservative;
|
set global slave_transaction_retries=10;
|
start slave;
|
--source include/rpl_end.inc
|
|
It fails deterministic-ally but sometime this can trigger 19157 sometimes this bug
|
|
Still reproducible.
Test case, that was provided by Elena Stepanova - fails on 10.2,10.3, but for some reason not on 10.4+
|
10.3 3fd80d08740d3ad2426bf
|
mysqld: /10.3/src/sql/mdl.cc:2123: bool MDL_context::acquire_lock(MDL_request*, double): Assertion `(mdl_request->type != MDL_INTENTION_EXCLUSIVE && mdl_request->type != MDL_EXCLUSIVE) || !(get_thd()->rgi_slave && get_thd()->rgi_slave->is_parallel_exec && lock->check_if_conflicting_replication_locks(this))' failed.
|
211223 17:15:13 [ERROR] mysqld got signal 6 ;
|
|
/lib/x86_64-linux-gnu/libc.so.6(+0x36f36)[0x7efd97ae6f36]
|
sql/mdl.cc:2129(MDL_context::acquire_lock(MDL_request*, double))[0x55dd14aba797]
|
sql/sql_base.cc:1800(open_table(THD*, TABLE_LIST*, Open_table_context*))[0x55dd145613d8]
|
sql/sql_base.cc:3715(open_and_process_table(THD*, TABLE_LIST*, unsigned int*, unsigned int, Prelocking_strategy*, bool, Open_table_context*))[0x55dd1456b84e]
|
sql/sql_base.cc:4190(open_tables(THD*, DDL_options_st const&, TABLE_LIST**, unsigned int*, unsigned int, Prelocking_strategy*))[0x55dd1456defa]
|
sql/sql_base.cc:5129(open_and_lock_tables(THD*, DDL_options_st const&, TABLE_LIST*, bool, unsigned int, Prelocking_strategy*))[0x55dd14572ff2]
|
sql/sql_base.h:503(open_and_lock_tables(THD*, TABLE_LIST*, bool, unsigned int))[0x55dd144d7314]
|
sql/rpl_gtid.cc:671(rpl_slave_state::record_gtid(THD*, rpl_gtid const*, unsigned long long, rpl_group_info*, bool, void**))[0x55dd14bad2a7]
|
sql/log_event.cc:8982(Xid_log_event::do_apply_event(rpl_group_info*))[0x55dd1519a4ec]
|
sql/log_event.h:1492(Log_event::apply_event(rpl_group_info*))[0x55dd144a4233]
|
sql/slave.cc:3789(apply_event_and_update_pos_apply(Log_event*, THD*, rpl_group_info*, int))[0x55dd144866b0]
|
sql/slave.cc:3982(apply_event_and_update_pos_for_parallel(Log_event*, THD*, rpl_group_info*))[0x55dd1448756a]
|
sql/rpl_parallel.cc:51(rpt_handle_event(rpl_parallel_thread::queued_event*, rpl_parallel_thread*))[0x55dd14bbb648]
|
sql/rpl_parallel.cc:1335(handle_rpl_parallel_thread)[0x55dd14bc2d95]
|
perfschema/pfs.cc:1871(pfs_spawn_thread)[0x55dd160cb3d9]
|
nptl/pthread_create.c:478(start_thread)[0x7efd97cad609]
|
/lib/x86_64-linux-gnu/libc.so.6(clone+0x43)[0x7efd97bd2293]
|
|
test, that was provided by Sachin Setiya - fails on 10.2-10.7
|
10.7 92a4e76a2c1c15fb44d
|
mariadbd: /10.7/src/sql/mdl.cc:2345: bool MDL_context::acquire_lock(MDL_request*, double): Assertion `(mdl_request->type != MDL_INTENTION_EXCLUSIVE && mdl_request->type != MDL_EXCLUSIVE) || !(get_thd()->rgi_slave && get_thd()->rgi_slave->is_parallel_exec && lock->check_if_conflicting_replication_locks(this))' failed.
|
211223 17:19:59 [ERROR] mysqld got signal 6 ;
|
|
Server version: 10.7.2-MariaDB-debug-log
|
|
??:0(__assert_fail)[0x7f934f14bf36]
|
sql/mdl.cc:2351(MDL_context::acquire_lock(MDL_request*, double))[0x5623d7d15f4b]
|
sql/mdl.cc:2508(MDL_context::acquire_locks(I_P_List<MDL_request, I_P_List_adapter<MDL_request, &MDL_request::next_in_list, &MDL_request::prev_in_list>, I_P_List_counter, I_P_List_no_push_back<MDL_request> >*, double))[0x5623d7d17048]
|
sql/sql_base.cc:4045(lock_table_names(THD*, DDL_options_st const&, TABLE_LIST*, TABLE_LIST*, unsigned long, unsigned int))[0x5623d76b26be]
|
sql/sql_base.h:256(lock_table_names(THD*, TABLE_LIST*, TABLE_LIST*, unsigned long, unsigned int))[0x5623d7afef77]
|
sql/sql_table.cc:1093(mysql_rm_table(THD*, TABLE_LIST*, bool, bool, bool, bool))[0x5623d7b0707a]
|
sql/sql_parse.cc:4947(mysql_execute_command(THD*, bool))[0x5623d7863209]
|
sql/sql_parse.cc:8028(mysql_parse(THD*, char*, unsigned int, Parser_state*))[0x5623d7877f6f]
|
sql/log_event_server.cc:1918(Query_log_event::do_apply_event(rpl_group_info*, char const*, unsigned int))[0x5623d84d2e1d]
|
sql/log_event_server.cc:1592(Query_log_event::do_apply_event(rpl_group_info*))[0x5623d84d0047]
|
sql/log_event.h:1516(Log_event::apply_event(rpl_group_info*))[0x5623d75dbdef]
|
sql/slave.cc:3881(apply_event_and_update_pos_apply(Log_event*, THD*, rpl_group_info*, int))[0x5623d75bd954]
|
sql/slave.cc:4068(apply_event_and_update_pos_for_parallel(Log_event*, THD*, rpl_group_info*))[0x5623d75be6e0]
|
sql/rpl_parallel.cc:62(rpt_handle_event(rpl_parallel_thread::queued_event*, rpl_parallel_thread*))[0x5623d7e64545]
|
sql/rpl_parallel.cc:1376(handle_rpl_parallel_thread)[0x5623d7e6c1bd]
|
perfschema/pfs.cc:2203(pfs_spawn_thread)[0x5623d899211f]
|
nptl/pthread_create.c:478(start_thread)[0x7f934f664609]
|
??:0(clone)[0x7f934f237293]
|
|
Query (0x61200009c0d3): DROP TABLE `t1` /* generated by server */
|
|