[MDEV-8105] [Draft] Slave hangs while replicating Created: 2015-05-05  Updated: 2020-10-18  Resolved: 2020-10-18

Status: Closed
Project: MariaDB Server
Component/s: Replication
Affects Version/s: 10.1
Fix Version/s: N/A

Type: Bug Priority: Major
Reporter: Elena Stepanova Assignee: Elena Stepanova
Resolution: Incomplete Votes: 0
Labels: None

Attachments: File binlogs.tar.gz     File mysql.err     File mysql.log.gz     HTML File threads1    

 Description   

+-----+-------------+-----------------+-------+---------+--------+--------------------------------------------------------------------------------+-------------------------------------------------+----------+
| Id  | User        | Host            | db    | Command | Time   | State                                                                          | Info                                            | Progress |
+-----+-------------+-----------------+-------+---------+--------+--------------------------------------------------------------------------------+-------------------------------------------------+----------+
|   2 | root        | localhost:48095 | mysql | Query   | 124028 | Waiting for the slave SQL thread to advance position                           | SELECT MASTER_POS_WAIT('mysql-bin.000027',1019) |    0.000 |
| 157 | system user |                 | NULL  | Connect | 124100 | Waiting for master to send event                                               | NULL                                            |    0.000 |
| 161 | system user |                 | NULL  | Connect | 124067 | Waiting for prior transaction to start commit before starting next transaction | NULL                                            |    0.000 |
| 162 | system user |                 | NULL  | Connect | 124067 | Waiting for prior transaction to start commit before starting next transaction | NULL                                            |    0.000 |
| 163 | system user |                 | NULL  | Connect | 124067 | Waiting for work from SQL thread                                               | NULL                                            |    0.000 |
| 164 | system user |                 | NULL  | Connect | 124464 | Waiting for prior transaction to commit                                        | NULL                                            |    0.000 |
| 165 | system user |                 | NULL  | Connect | 124067 | Waiting for prior transaction to start commit before starting next transaction | NULL                                            |    0.000 |
| 166 | system user |                 | NULL  | Connect | 124067 | Waiting for prior transaction to start commit before starting next transaction | NULL                                            |    0.000 |
| 167 | system user |                 | NULL  | Connect | 124067 | Waiting for work from SQL thread                                               | NULL                                            |    0.000 |
| 168 | system user |                 | NULL  | Connect | 124067 | Waiting for work from SQL thread                                               | NULL                                            |    0.000 |
| 169 | system user |                 | NULL  | Connect | 124067 | Waiting for work from SQL thread                                               | NULL                                            |    0.000 |
| 170 | system user |                 | NULL  | Connect | 124067 | Waiting for work from SQL thread                                               | NULL                                            |    0.000 |
| 171 | system user |                 | NULL  | Connect | 124067 | Waiting for prior transaction to start commit before starting next transaction | NULL                                            |    0.000 |
| 172 | system user |                 | test  | Connect | 124482 | Waiting for table metadata lock                                                | ALTER TABLE `PP_J` DISABLE KEYS                 |    0.000 |
| 173 | system user |                 | NULL  | Connect | 124067 | Waiting for work from SQL thread                                               | NULL                                            |    0.000 |
| 174 | system user |                 | NULL  | Connect | 124067 | Waiting for work from SQL thread                                               | NULL                                            |    0.000 |
| 175 | system user |                 | NULL  | Connect | 124067 | Waiting for work from SQL thread                                               | NULL                                            |    0.000 |
| 176 | system user |                 | NULL  | Connect | 124067 | Waiting for work from SQL thread                                               | NULL                                            |    0.000 |
| 177 | system user |                 | NULL  | Connect | 124067 | Waiting for work from SQL thread                                               | NULL                                            |    0.000 |
| 178 | system user |                 | NULL  | Connect | 124067 | Waiting for work from SQL thread                                               | NULL                                            |    0.000 |
| 179 | system user |                 | NULL  | Connect | 124067 | Waiting for work from SQL thread                                               | NULL                                            |    0.000 |
| 180 | system user |                 | NULL  | Connect | 124067 | Waiting for prior transaction to start commit before starting next transaction | NULL                                            |    0.000 |
| 181 | system user |                 | NULL  | Connect | 124031 | Waiting for room in worker thread event queue                                  | NULL                                            |    0.000 |
| 187 | root        | localhost:48847 | test  | Query   |      0 | init                                                                           | show processlist                                |    0.000 |
+-----+-------------+-----------------+-------+---------+--------+--------------------------------------------------------------------------------+-------------------------------------------------+----------+

MariaDB [test]> show all slaves status \G
*************************** 1. row ***************************
              Connection_name: 
              Slave_SQL_State: Waiting for room in worker thread event queue
               Slave_IO_State: Waiting for master to send event
                  Master_Host: 127.0.0.1
                  Master_User: root
                  Master_Port: 10730
                Connect_Retry: 1
              Master_Log_File: mysql-bin.000027
          Read_Master_Log_Pos: 1019
               Relay_Log_File: mysql-relay-bin.000704
                Relay_Log_Pos: 3455
        Relay_Master_Log_File: mysql-bin.000010
             Slave_IO_Running: Yes
            Slave_SQL_Running: Yes
              Replicate_Do_DB: 
          Replicate_Ignore_DB: 
           Replicate_Do_Table: 
       Replicate_Ignore_Table: 
      Replicate_Wild_Do_Table: 
  Replicate_Wild_Ignore_Table: 
                   Last_Errno: 0
                   Last_Error: 
                 Skip_Counter: 0
          Exec_Master_Log_Pos: 3168
              Relay_Log_Space: 4701833
              Until_Condition: None
               Until_Log_File: 
                Until_Log_Pos: 0
           Master_SSL_Allowed: No
           Master_SSL_CA_File: 
           Master_SSL_CA_Path: 
              Master_SSL_Cert: 
            Master_SSL_Cipher: 
               Master_SSL_Key: 
        Seconds_Behind_Master: 124650
Master_SSL_Verify_Server_Cert: No
                Last_IO_Errno: 0
                Last_IO_Error: 
               Last_SQL_Errno: 0
               Last_SQL_Error: 
  Replicate_Ignore_Server_Ids: 
             Master_Server_Id: 1
               Master_SSL_Crl: 
           Master_SSL_Crlpath: 
                   Using_Gtid: No
                  Gtid_IO_Pos: 
      Replicate_Do_Domain_Ids: 
  Replicate_Ignore_Domain_Ids: 
                Parallel_Mode: optimistic
         Retried_transactions: 0
           Max_relay_log_size: 4096
         Executed_log_entries: 8072
    Slave_received_heartbeats: 68
       Slave_heartbeat_period: 1800.000
               Gtid_Slave_Pos: 0-1-84
1 row in set (0.00 sec)

Slave command line:

/home/elenst/git/10.1/sql/mysqld --no-defaults --basedir=/home/elenst/git/10.1 --datadir=/home/elenst/test_results/10.1-parallel-replication-9/current1_1_slave/data --lc-messages-dir=/home/elenst/git/10.1/sql/share --character-sets-dir=/home/elenst/git/10.1/sql/share/charsets --tmpdir=/home/elenst/test_results/10.1-parallel-replication-9/current1_1_slave/tmp --core-file --max-allowed-packet=128Mb --port=10732 --socket=/home/elenst/test_results/10.1-parallel-replication-9/current1_1_slave/mysql.sock --pid-file=/home/elenst/test_results/10.1-parallel-replication-9/current1_1_slave/mysql.pid --general-log --general-log-file=/home/elenst/test_results/10.1-parallel-replication-9/current1_1_slave/mysql.log --server_id=2 --report-host=127.0.0.1 --report_port=10732 --log_output=FILE --slow_query_log --log_bin_trust_function_creators=1 --query_cache_size=64M --slave-skip-errors=1049,1305,1539,1505 --slave-parallel-mode=optimistic --binlog_commit_wait_count=20 --binlog_commit_wait_usec=1000000 --slave-parallel-threads=20 --max-binlog-size=4096 --innodb --sql-mode=no_engine_substitution

RQG command line (revno 1016 + diff below)

perl /home/elenst/bzr/randgen-mariadb-patches/runall-new.pl --no-mask --seed=time --threads=16 --duration=400 --queries=100M --reporters=QueryTimeout,Backtrace,ErrorLog,Deadlock,ReplicationThreadRestarter --redefine=conf/mariadb/general-workarounds.yy --redefine=conf/mariadb/10.0-features-redefine.yy --mysqld=--log_output=FILE --mysqld=--slow_query_log --mysqld=--log_bin_trust_function_creators=1 --mysqld=--query_cache_size=64M --engine=InnoDB --views --grammar=conf/partitioning/partition_pruning.yy --gendata=conf/partitioning/partition_pruning.zz --rpl_mode=row --mysqld=--slave-skip-errors=1049,1305,1539,1505 --mysqld=--slave-parallel-mode=optimistic --mysqld=--binlog_commit_wait_count=20 --mysqld=--binlog_commit_wait_usec=1000000 --mysqld=--slave-parallel-threads=20 --use-gtid=no --mysqld=--max-binlog-size=4096 --mtr-build-thread=73 --basedir1=/home/elenst/git/10.1 --vardir1=/home/elenst/test_results/10.1-parallel-replication-9/current1_1

RQG diff

elenst@perro:~/bzr/randgen-mariadb-patches$ bzr diff
=== modified file 'combinations.pl'
--- combinations.pl	2014-12-22 17:22:18 +0000
+++ combinations.pl	2015-04-24 15:06:19 +0000
@@ -77,7 +77,7 @@
 my $max_result = 0;
 my $thread_id = 0;
 my $epochcreadir;
-my $mtrbt = defined $ENV{MTR_BUILD_THREAD}?$ENV{MTR_BUILD_THREAD}:300;
+my $mtrbt = defined $ENV{MTR_BUILD_THREAD}?$ENV{MTR_BUILD_THREAD}:73;
 
 my $opt_result = GetOptions(
 	'config=s' => \$config_file,
 
=== modified file 'conf/mariadb/gtid_stress.zz'
--- conf/mariadb/gtid_stress.zz	2013-06-23 16:17:39 +0000
+++ conf/mariadb/gtid_stress.zz	2015-04-24 20:40:40 +0000
@@ -21,7 +21,8 @@
 
 $tables = {
 	partitions => [ undef , 'HASH(pk) PARTITIONS 2' ],
-	engines => [ 'InnoDB' ]
+	engines => [ 'InnoDB' ],
+	rows => [ 0, 1, 10, 100, 1000, 2000 ]
 };
 
 $fields = {
 
=== modified file 'lib/GenTest/Constants.pm'
--- lib/GenTest/Constants.pm	2013-06-21 23:07:00 +0000
+++ lib/GenTest/Constants.pm	2015-04-24 15:07:30 +0000
@@ -147,7 +147,7 @@
 use constant DB_JAVADB		=> 4;
 use constant DB_DRIZZLE		=> 5;
 
-use constant DEFAULT_MTR_BUILD_THREAD => 930; ## Legacy...
+use constant DEFAULT_MTR_BUILD_THREAD => 72; ## Legacy...
 
 #
 # The part below deals with constant value to constant name conversions
 
=== modified file 'lib/GenTest/Executor/MySQL.pm'
--- lib/GenTest/Executor/MySQL.pm	2015-03-22 18:05:51 +0000
+++ lib/GenTest/Executor/MySQL.pm	2015-04-23 16:28:17 +0000
@@ -1002,7 +1002,7 @@
                     "ELSE data_type END ".
          "FROM information_schema.tables INNER JOIN ".
               "information_schema.columns USING(table_schema, table_name) ".
-          "WHERE table_name <> 'DUMMY'"; 
+          "WHERE table_name <> 'DUMMY' AND table_name <> 'GEOMETRY_COLUMNS'"; 
 
     return $self->dbh()->selectall_arrayref($query);
 }
 
=== modified file 'lib/GenTest/Reporter/ReplicationThreadRestarter.pm'
--- lib/GenTest/Reporter/ReplicationThreadRestarter.pm	2010-02-10 12:47:45 +0000
+++ lib/GenTest/Reporter/ReplicationThreadRestarter.pm	2015-04-30 23:09:29 +0000
@@ -61,8 +61,30 @@
 	}
 }
 
+sub report {
+
+	my $reporter = shift;
+	my $slave_host = $reporter->serverInfo('slave_host');
+	my $slave_port = $reporter->serverInfo('slave_port');
+
+	my $slave_dsn = 'dbi:mysql:host='.$slave_host.':port='.$slave_port.':user=root';
+	my $slave_dbh = DBI->connect($slave_dsn);
+
+	if (defined $slave_dbh) {
+		$slave_dbh->do("START SLAVE");
+		if ($slave_dbh->err()) {
+			say("Query START SLAVE failed: ".$slave_dbh->errstr());
+			return STATUS_REPLICATION_FAILURE;
+		} else {
+			return STATUS_OK;
+		}
+	} else {
+		return STATUS_SERVER_CRASHED;
+	}
+}
+
 sub type {
-	return REPORTER_TYPE_PERIODIC;
+	return REPORTER_TYPE_PERIODIC | REPORTER_TYPE_SUCCESS;
 }
 
 1;

All threads` stack trace from the running slave is attached (threads1).
Master binlogs are attached (binlogs.tar.gz).
Master general log is attached (mysql.log.gz)
Slave error log is attached (mysql.err).


Generated at Thu Feb 08 07:24:40 UTC 2024 using Jira 8.20.16#820016-sha1:9d11dbea5f4be3d4cc21f03a88dd11d8c8687422.