I can get this error quite reliably with this crude injection:
diff --git a/mysql-test/extra/rpl_tests/rpl_checksum.inc b/mysql-test/extra/rpl_tests/rpl_checksum.inc
|
index 14664fd..44d3776 100644
|
--- a/mysql-test/extra/rpl_tests/rpl_checksum.inc
|
+++ b/mysql-test/extra/rpl_tests/rpl_checksum.inc
|
@@ -15,6 +15,9 @@ call mtr.add_suppression('Slave can not handle replication events with the check
|
call mtr.add_suppression('Replication event checksum verification failed');
|
# due to C failure simulation
|
call mtr.add_suppression('Relay log write failure: could not queue event from master');
|
+--disable_query_log
|
+set @@global.debug_dbug='d,mdev11491';
|
+--enable_query_log
|
call mtr.add_suppression('Master is configured to log replication events with checksum, but will not send such events to slaves that cannot process them');
|
|
# A. read/write access to the global vars:
|
@@ -88,6 +91,10 @@ create table t1 (a int);
|
flush logs;
|
flush logs;
|
flush logs;
|
+--sleep 10
|
+--disable_query_log
|
+set @@global.debug_dbug='';
|
+--enable_query_log
|
|
sync_slave_with_master;
|
#connection slave;
|
diff --git a/sql/log.cc b/sql/log.cc
|
index 45ab5c8..6e0a87b 100644
|
--- a/sql/log.cc
|
+++ b/sql/log.cc
|
@@ -9630,6 +9630,7 @@ binlog_background_thread(void *arg __attribute__((unused)))
|
DEBUG_SYNC(thd, "binlog_background_thread_before_mark_xid_done");
|
/* Grab next pointer first, as mark_xid_done() may free the element. */
|
next= queue->next_in_queue;
|
+ DBUG_EXECUTE_IF("mdev11491", { my_sleep(1000000); };);
|
mysql_bin_log.mark_xid_done(queue->binlog_id, true);
|
queue= next;
|
|
So, it's a race condition for really slow systems (disks?) when mark_xid_done is considerably delayed.
We need to ensure that it happens before we rotate to the log which for which the position is later printed in the error message.
I can get this error quite reliably with this crude injection:
diff --git a/mysql-test/extra/rpl_tests/rpl_checksum.inc b/mysql-test/extra/rpl_tests/rpl_checksum.inc
index 14664fd..44d3776 100644
--- a/mysql-test/extra/rpl_tests/rpl_checksum.inc
+++ b/mysql-test/extra/rpl_tests/rpl_checksum.inc
@@ -15,6 +15,9 @@ call mtr.add_suppression('Slave can not handle replication events with the check
call mtr.add_suppression('Replication event checksum verification failed');
# due to C failure simulation
call mtr.add_suppression('Relay log write failure: could not queue event from master');
+--disable_query_log
+set @@global.debug_dbug='d,mdev11491';
+--enable_query_log
call mtr.add_suppression('Master is configured to log replication events with checksum, but will not send such events to slaves that cannot process them');
# A. read/write access to the global vars:
@@ -88,6 +91,10 @@ create table t1 (a int);
flush logs;
flush logs;
flush logs;
+--sleep 10
+--disable_query_log
+set @@global.debug_dbug='';
+--enable_query_log
sync_slave_with_master;
#connection slave;
diff --git a/sql/log.cc b/sql/log.cc
index 45ab5c8..6e0a87b 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -9630,6 +9630,7 @@ binlog_background_thread(void *arg __attribute__((unused)))
DEBUG_SYNC(thd, "binlog_background_thread_before_mark_xid_done");
/* Grab next pointer first, as mark_xid_done() may free the element. */
next= queue->next_in_queue;
+ DBUG_EXECUTE_IF("mdev11491", { my_sleep(1000000); };);
mysql_bin_log.mark_xid_done(queue->binlog_id, true);
queue= next;
So, it's a race condition for really slow systems (disks?) when mark_xid_done is considerably delayed.
We need to ensure that it happens before we rotate to the log which for which the position is later printed in the error message.