Details
-
Bug
-
Status: Closed (View Workflow)
-
Blocker
-
Resolution: Fixed
-
5.5.1
-
None
-
None
-
CentOS Linux release 8.2.2004 (Core)
Virtualized, On Premise
-
2021-2, 2021-3
Description
*Binlog replication is falling way behind on a 3 node MariaDB async cluster with Columnstore engine activated.
io_thread is fine. sql_thread is stuck on a columnstore query on slaves/replicas.*
Customer reported that insert into <CS table> select .. from <CStable> breaks the replication, meaning replication got stuck and never moving forward. He tried to stop replication, but also got hung and there wasn't able to stop slave. So, he had to kill mysqld process manually, but then it hit this again. He also tried to skip the event, but was not able to do it because once again stop slave hung. More details are in the ticket.
MariaDB [(none)]> show global variables like 'autocommit'; |
+---------------+-------+
|
| Variable_name | Value |
|
+---------------+-------+
|
| autocommit | ON |
|
+---------------+-------+
|
1 row in set (0.001 sec) |
 |
MariaDB [(none)]> SHOW VARIABLES LIKE 'columnstore%'; |
+-------------------------------------------------+--------+
|
| Variable_name | Value |
|
+-------------------------------------------------+--------+
|
| columnstore_cache_flush_threshold | 500000 | |
| columnstore_cache_inserts | OFF |
|
| columnstore_compression_type | SNAPPY |
|
| columnstore_decimal_scale | 8 | |
| columnstore_derived_handler | ON |
|
| columnstore_diskjoin_bucketsize | 100 | |
| columnstore_diskjoin_largesidelimit | 0 | |
| columnstore_diskjoin_smallsidelimit | 0 | |
| columnstore_double_for_decimal_math | OFF |
|
| columnstore_group_by_handler | ON |
|
| columnstore_import_for_batchinsert_delimiter | 7 | |
| columnstore_import_for_batchinsert_enclosed_by | 17 | |
| columnstore_local_query | 0 | |
| columnstore_orderby_threads | 16 | |
| columnstore_ordered_only | OFF |
|
| columnstore_replication_slave | OFF |
|
| columnstore_select_handler | ON |
|
| columnstore_select_handler_in_stored_procedures | ON |
|
| columnstore_string_scan_threshold | 10 | |
| columnstore_stringtable_threshold | 20 | |
| columnstore_um_mem_limit | 0 | |
| columnstore_use_decimal_scale | OFF |
|
| columnstore_use_import_for_batchinsert | ALWAYS |
|
| columnstore_varbin_always_hex | OFF |
|
+-------------------------------------------------+--------+
|
24 rows in set (0.000 sec) |
 |
MariaDB [dwhs]> show global variables like '%bin%form%'; |
+----------------------------+-------+
|
| Variable_name | Value |
|
+----------------------------+-------+
|
| binlog_format | MIXED |
|
| wsrep_forced_binlog_format | NONE |
|
+----------------------------+-------+
|
- show slave status and binlog output.
MariaDB [dwhs]> show slave status\G
|
*************************** 1. row *************************** |
Slave_IO_State: Waiting for master to send event |
Master_Host: 10.201.64.92 |
Master_User: repl
|
Master_Port: 3306 |
Connect_Retry: 60 |
Master_Log_File: mariadb-bin.000007 |
Read_Master_Log_Pos: 19867074 |
Relay_Log_File: mariadb-relay.000002 |
Relay_Log_Pos: 5766 |
Relay_Master_Log_File: mariadb-bin.000007 |
Slave_IO_Running: Yes
|
Slave_SQL_Running: Yes
|
Replicate_Do_DB:
|
Replicate_Ignore_DB:
|
Replicate_Do_Table:
|
Replicate_Ignore_Table:
|
Replicate_Wild_Do_Table:
|
Replicate_Wild_Ignore_Table:
|
Last_Errno: 0 |
Last_Error:
|
Skip_Counter: 0 |
Exec_Master_Log_Pos: 19858617 |
Relay_Log_Space: 14530 |
Until_Condition: None
|
Until_Log_File:
|
Until_Log_Pos: 0 |
Master_SSL_Allowed: No
|
Master_SSL_CA_File:
|
Master_SSL_CA_Path:
|
Master_SSL_Cert:
|
Master_SSL_Cipher:
|
Master_SSL_Key:
|
Seconds_Behind_Master: 19689 |
Master_SSL_Verify_Server_Cert: No
|
Last_IO_Errno: 0 |
Last_IO_Error:
|
Last_SQL_Errno: 0 |
Last_SQL_Error:
|
Replicate_Ignore_Server_Ids:
|
Master_Server_Id: 20 |
Master_SSL_Crl:
|
Master_SSL_Crlpath:
|
Using_Gtid: Slave_Pos
|
Gtid_IO_Pos: 0-20-8255 |
Replicate_Do_Domain_Ids:
|
Replicate_Ignore_Domain_Ids:
|
Parallel_Mode: optimistic
|
SQL_Delay: 0 |
SQL_Remaining_Delay: NULL
|
Slave_SQL_Running_State: Executing
|
Slave_DDL_Groups: 7 |
Slave_Non_Transactional_Groups: 0 |
Slave_Transactional_Groups: 4 |
 |
 |
MariaDB [dwhs]> show binlog events in 'mariadb-bin.000007' from 19858617 limit 2\G |
*************************** 1. row *************************** |
Log_name: mariadb-bin.000007 |
Pos: 19858617 |
Event_type: Gtid
|
Server_id: 20 |
End_log_pos: 19858659 |
Info: BEGIN GTID 0-20-8255 |
*************************** 2. row *************************** |
Log_name: mariadb-bin.000007 |
Pos: 19858659 |
Event_type: Query
|
Server_id: 20 |
End_log_pos: 19867001 |
Info: use `dwhs`; insert into lead_details_ins_smry1
|
SELECT daydate,
|
hour_of_day,
|
dataheader_type,
|
dest_adv_key,
|
destination_program_key,
|
destination_program_name,
|
vertical,
|
supervertical,
|
csd_salesrep,
|
.
|
.
|
.
|
- node info
$ curl -k https://127.0.0.1:8640/cmapi/0.4.0/cluster/status --header 'Content-Type:application/json' --header 'x-api-key:********' | jq .
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 1160 100 1160 0 0 4280 0 --:--:-- --:--:-- --:--:-- 4264
{
"timestamp": "2021-01-12 16:27:25.647409",
"xx.xx.xx.91": {
"timestamp": "2021-01-12 16:27:25.654759",
"uptime": 683831,
"dbrm_mode": "master",
"cluster_mode": "readwrite",
"dbroots": [
"1"
],
"module_id": 1,
"services": [
{
"name": "workernode",
"pid": 635181
},
{
"name": "controllernode",
"pid": 635200
},
{
"name": "PrimProc",
"pid": 635240
},
{
"name": "ExeMgr",
"pid": 635384
},
{
"name": "WriteEngine",
"pid": 635423
},
{
"name": "DMLProc",
"pid": 635444
},
{
"name": "DDLProc",
"pid": 635496
}
]
},
"xx.xx.xx.92": {
"timestamp": "2021-01-12 16:27:25.737338",
"uptime": 683834,
"dbrm_mode": "slave",
"cluster_mode": "readonly",
"dbroots": [
"2"
],
"module_id": 2,
"services": [
{
"name": "workernode",
"pid": 630976
},
{
"name": "PrimProc",
"pid": 631016
},
{
"name": "ExeMgr",
"pid": 631157
},
{
"name": "WriteEngine",
"pid": 631195
}
]
},
"xx.xx.xx.93": {
"timestamp": "2021-01-12 16:27:25.818605",
"uptime": 683838,
"dbrm_mode": "slave",
"cluster_mode": "readonly",
"dbroots": [
"3"
],
"module_id": 3,
"services": [
{
"name": "workernode",
"pid": 619787
},
{
"name": "PrimProc",
"pid": 619828
},
{
"name": "ExeMgr",
"pid": 619976
},
{
"name": "WriteEngine",
"pid": 620012
}
]
},
"num_nodes": 3
}
- table schema
MariaDB [dwhs]> show create table lead_details_ins_smry1\G
|
*************************** 1. row *************************** |
Table: lead_details_ins_smry1
|
Create Table: CREATE TABLE `lead_details_ins_smry1` (
|
`daydate` date DEFAULT NULL,
|
`hour_of_day` int(11) DEFAULT NULL, |
`dataheader_type` varchar(30) DEFAULT NULL, |
`dest_adv_key` int(11) DEFAULT NULL, |
`destination_program_key` int(11) DEFAULT NULL, |
.
|
.
|
.
|
`payable_leads` int(11) DEFAULT NULL, |
`payable_calls` int(11) DEFAULT NULL |
) ENGINE=Columnstore DEFAULT CHARSET=latin1
|
 |
 |
MariaDB [dwhs]> show create table lead_details_ins\G
|
*************************** 1. row *************************** |
Table: lead_details_ins
|
Create Table: CREATE TABLE `lead_details_ins` (
|
`daydate` date DEFAULT NULL,
|
`datacapturekey` bigint(20) DEFAULT NULL, |
`parent_datacapturekey` bigint(20) DEFAULT NULL, |
`hour_of_day` int(11) DEFAULT NULL, |
`dataheader_key` int(11) DEFAULT NULL, |
`dataheader_type` varchar(30) DEFAULT NULL, |
`datacapture_status` int(11) DEFAULT NULL, |
`datacapture_status_name` varchar(100) DEFAULT NULL, |
`dest_adv_key` int(11) DEFAULT NULL, |
.
|
.
|
.
|
) ENGINE=Columnstore DEFAULT CHARSET=latin1
|