Details
-
Bug
-
Status: Open (View Workflow)
-
Major
-
Resolution: Unresolved
-
10.6
Description
A multi-source replica with one semi-sync connection and one async connection will throw a Replication event checksum verification failure when reading events on the semi-sync connection.
[ERROR] Master 'c1': Slave I/O: Replication event checksum verification failed while reading from network, Internal MariaDB error code: 1743
|
The following MTR test highlights this:
# Configure environment as 1->3,2->3 with named channels c1, c2 respectively
|
# c1 is semi-sync enabled, c2 is semi-sync disabled
|
|
# binlog_format independent
|
--source include/have_binlog_format_statement.inc
|
|
--let $_rpl_server= 3
|
while ($_rpl_server)
|
{
|
# Connect.
|
--let $rpl_server_number= $_rpl_server
|
--let $rpl_connection_name= server_$_rpl_server
|
--source include/rpl_connect.inc
|
|
# Configure server.
|
--let $rpl_connection_name= server_$_rpl_server
|
--source include/rpl_connection.inc
|
USE test;
|
RESET MASTER;
|
SET GLOBAL gtid_slave_pos= "";
|
RESET SLAVE;
|
|
--dec $_rpl_server
|
}
|
|
--connection server_1
|
set global rpl_semi_sync_master_enabled= 1;
|
set global rpl_semi_sync_master_timeout= 1000; # 1 second because the replica will err, so we don't want to wait
|
|
--connection server_2
|
set global rpl_semi_sync_master_enabled= 0;
|
|
--connection server_3
|
--eval change master 'c1' to master_host='127.0.0.1',master_port=$SERVER_MYPORT_1,master_use_gtid=slave_pos, master_user='root';
|
--eval change master 'c2' to master_host='127.0.0.1',master_port=$SERVER_MYPORT_2,master_use_gtid=slave_pos, master_user='root';
|
set global rpl_semi_sync_slave_enabled=1;
|
start slave 'c1';
|
|
--connection server_1
|
--echo # Waiting for slave to connect to server 1
|
--let $status_var_value= 1
|
--let $status_var= rpl_semi_sync_master_clients
|
--source include/wait_for_status_var.inc
|
|
--connection server_3
|
|
--echo # Disable semi_sync on the slave mid-way through
|
set global rpl_semi_sync_slave_enabled=0;
|
|
start slave 'c2';
|
set default_master_connection='c2';
|
--source include/wait_for_slave_io_to_start.inc
|
|
|
--connection server_1
|
create table t1 (a int);
|
|
|
# Err 1743 is ER_NETWORK_READ_EVENT_CHECKSUM_FAILURE
|
--let $err_txt= error code: 1743
|
--let $assert_text= Check that there is no '$err_txt' in error log.
|
--let $assert_select= $err_txt
|
--let $assert_file= $MYSQLTEST_VARDIR/log/mysqld.3.err
|
--let $assert_count= 0
|
--let $assert_only_after=CURRENT_TEST
|
--source include/assert_grep.inc
|
|
--echo #
|
--echo # Cleanup
|
|
--connection server_3
|
stop slave 'c1';
|
stop slave 'c2';
|
set global rpl_semi_sync_slave_enabled=0;
|
|
|
--connection server_1
|
set global rpl_semi_sync_master_enabled=0;
|
drop table t1;
|
|
--connection server_2
|
set global rpl_semi_sync_master_enabled=0;
|
drop table t2;
|
|
--connection server_3
|
start slave 'c1';
|
start slave 'c2';
|
|
--source include/rpl_end.inc
|
--echo # End of test
|
Looks to be caused because the later call to start_slave 'c2' with semi_sync_enabled=0 will re-initialize the rpl_semisync_slave singleton with the disabled status, despite having an active connection on 'c1'. Where fixing MDEV-32944 would fix this, I think the bigger issue is that different channels share the same instance of rpl_semisync_slave.
Attachments
Issue Links
- is caused by
-
MDEV-32551 "Read semi-sync reply magic number error" warnings on master
- Closed
- relates to
-
MDEV-32944 Rpl_semi_sync_slave_enabled sysvar Should Not Be Modifiable While IO Thread is Running
- Closed