Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-32947

Semi-sync with Multi-source Checksum Verification Failure

    XMLWordPrintable

Details

    Description

      A multi-source replica with one semi-sync connection and one async connection will throw a Replication event checksum verification failure when reading events on the semi-sync connection.

      [ERROR] Master 'c1': Slave I/O: Replication event checksum verification failed while reading from network, Internal MariaDB error code: 1743
      

      The following MTR test highlights this:

      # Configure environment as 1->3,2->3 with named channels c1, c2 respectively
      # c1 is semi-sync enabled, c2 is semi-sync disabled
       
      # binlog_format independent
      --source include/have_binlog_format_statement.inc
       
      --let $_rpl_server= 3
      while ($_rpl_server)
      {
        # Connect.
        --let $rpl_server_number= $_rpl_server
        --let $rpl_connection_name= server_$_rpl_server
        --source include/rpl_connect.inc
       
        # Configure server.
        --let $rpl_connection_name= server_$_rpl_server
        --source include/rpl_connection.inc
        USE test;
        RESET MASTER;
        SET GLOBAL gtid_slave_pos= "";
        RESET SLAVE;
       
        --dec $_rpl_server
      }
       
      --connection server_1
      set global rpl_semi_sync_master_enabled= 1;
      set global rpl_semi_sync_master_timeout= 1000; # 1 second because the replica will err, so we don't want to wait
       
      --connection server_2
      set global rpl_semi_sync_master_enabled= 0;
       
      --connection server_3
      --eval change master 'c1' to master_host='127.0.0.1',master_port=$SERVER_MYPORT_1,master_use_gtid=slave_pos, master_user='root';
      --eval change master 'c2' to master_host='127.0.0.1',master_port=$SERVER_MYPORT_2,master_use_gtid=slave_pos, master_user='root';
      set global rpl_semi_sync_slave_enabled=1;
      start slave 'c1';
       
      --connection server_1
      --echo # Waiting for slave to connect to server 1
      --let $status_var_value= 1
      --let $status_var= rpl_semi_sync_master_clients
      --source include/wait_for_status_var.inc
       
      --connection server_3
       
      --echo # Disable semi_sync on the slave mid-way through
      set global rpl_semi_sync_slave_enabled=0;
       
      start slave 'c2';
      set default_master_connection='c2';
      --source include/wait_for_slave_io_to_start.inc
       
       
      --connection server_1
      create table t1 (a int);
       
       
      # Err 1743 is ER_NETWORK_READ_EVENT_CHECKSUM_FAILURE
      --let $err_txt= error code: 1743
      --let $assert_text= Check that there is no '$err_txt' in error log.
      --let $assert_select= $err_txt
      --let $assert_file= $MYSQLTEST_VARDIR/log/mysqld.3.err
      --let $assert_count= 0
      --let $assert_only_after=CURRENT_TEST
      --source include/assert_grep.inc
       
      --echo #
      --echo # Cleanup
       
      --connection server_3
      stop slave 'c1';
      stop slave 'c2';
      set global rpl_semi_sync_slave_enabled=0;
       
       
      --connection server_1
      set global rpl_semi_sync_master_enabled=0;
      drop table t1;
       
      --connection server_2
      set global rpl_semi_sync_master_enabled=0;
      drop table t2;
       
      --connection server_3
      start slave 'c1';
      start slave 'c2';
       
      --source include/rpl_end.inc
      --echo # End of test
      

      Looks to be caused because the later call to start_slave 'c2' with semi_sync_enabled=0 will re-initialize the rpl_semisync_slave singleton with the disabled status, despite having an active connection on 'c1'. Where fixing MDEV-32944 would fix this, I think the bigger issue is that different channels share the same instance of rpl_semisync_slave.

      Attachments

        Issue Links

          Activity

            People

              monty Michael Widenius
              bnestere Brandon Nesterenko
              Votes:
              1 Vote for this issue
              Watchers:
              4 Start watching this issue

              Dates

                Created:
                Updated:

                Git Integration

                  Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.