Details
-
Bug
-
Status: Open (View Workflow)
-
Major
-
Resolution: Unresolved
-
None
-
None
Description
We installed the galera cluster using the bitnami mariadb-helm charts.
Affected Version 10.7.7 (not able to select it through the select field above)
After a while (few days) we always see deadlocks in the logs:
Caused by: java.sql.SQLException: (conn=371621) Lock wait timeout exceeded; try restarting transaction
|
The values.yaml file:
image:
|
tag: 10.7.7-debian-11-r30
|
|
podAnnotations:
|
vault.security.banzaicloud.io/vault-addr: "http://censored-for-compliance-reasons.com"
|
vault.security.banzaicloud.io/vault-skip-verify: "true"
|
vault.security.banzaicloud.io/vault-role: "cluster-nonprod"
|
vault.security.banzaicloud.io/vault-path: "cluster-nonprod"
|
|
serviceAccount:
|
create: true
|
automountServiceAccountToken: true
|
|
fullnameOverride: dev-mariadb
|
|
rbac:
|
create: true
|
|
podManagementPolicy: Parallel
|
|
replicaCount: 3
|
|
db:
|
user: technical
|
name: ""
|
|
persistence:
|
size: 20Gi
|
|
usePasswordFiles: true
|
customPasswordFiles:
|
root: "/vault-secrets/rootpassword"
|
user: "/vault-secrets/userpassword"
|
mariabackup: "/vault-secrets/mariabackuppassword"
|
|
extraVolumes:
|
- name: vault-secrets-volume
|
emptyDir: {}
|
|
extraVolumeMounts:
|
- mountPath: /vault-secrets
|
name: vault-secrets-volume
|
|
initContainers:
|
- command:
|
- bash
|
- -c
|
- |
|
echo -n "$PASSWORD" > /vault-secrets/userpassword
|
echo -n "$ROOTPASSWORD" > /vault-secrets/rootpassword
|
echo -n "$MARIABACKUPPASSWORD" > /vault-secrets/mariabackuppassword
|
image: ubuntu:jammy
|
name: init-container
|
volumeMounts:
|
- mountPath: /vault-secrets
|
name: vault-secrets-volume
|
env:
|
- name: PASSWORD
|
value: vault:cluster-nonprod/data/dev/common/mariadb-galera/credentials#mariadb-password
|
- name: ROOTPASSWORD
|
value: vault:cluster-nonprod/data/dev/common/mariadb-galera/credentials#mariadb-root-password
|
- name: MARIABACKUPPASSWORD
|
value: vault:cluster-nonprod/data/dev/common/mariadb-galera/credentials#mariadb-galera-mariabackup-password
|
|
resources:
|
requests:
|
cpu: 0.5
|
memory: 4096Mi
|
limits:
|
cpu: 2
|
memory: 4096Mi
|
|
initdbScripts:
|
init_dbs.sql: |
|
CREATE DATABASE IF NOT EXISTS vault;
|
CREATE DATABASE IF NOT EXISTS vault_lock;
|
CREATE DATABASE IF NOT EXISTS keycloak;
|
|
mariadbConfiguration: |-
|
[client]
|
port=3306
|
socket=/opt/bitnami/mariadb/tmp/mysql.sock
|
plugin_dir=/opt/bitnami/mariadb/plugin
|
|
[mysqld]
|
default_storage_engine=InnoDB
|
basedir=/opt/bitnami/mariadb
|
datadir=/bitnami/mariadb/data
|
plugin_dir=/opt/bitnami/mariadb/plugin
|
tmpdir=/opt/bitnami/mariadb/tmp
|
socket=/opt/bitnami/mariadb/tmp/mysql.sock
|
pid_file=/opt/bitnami/mariadb/tmp/mysqld.pid
|
bind_address=0.0.0.0
|
|
## Character set
|
##
|
collation_server=utf8_unicode_ci
|
init_connect='SET NAMES utf8'
|
character_set_server=utf8
|
|
## MyISAM
|
##
|
key_buffer_size=32M
|
myisam_recover_options=FORCE,BACKUP
|
|
## Safety
|
##
|
skip_host_cache
|
skip_name_resolve
|
max_allowed_packet=16M
|
max_connect_errors=1000000
|
sql_mode=IGNORE_SPACE,ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION
|
sysdate_is_now=1
|
|
## Binary Logging
|
##
|
log_bin=mysql-bin
|
expire_logs_days=14
|
# Disabling for performance per http://severalnines.com/blog/9-tips-going-production-galera-cluster-mysql
|
sync_binlog=0
|
# Required for Galera
|
binlog_format=row
|
|
## Caches and Limits
|
##
|
tmp_table_size=32M
|
max_heap_table_size=32M
|
# Re-enabling as now works with Maria 10.1.2
|
query_cache_type=1
|
query_cache_limit=4M
|
query_cache_size=256M
|
max_connections=500
|
thread_cache_size=50
|
open_files_limit=65535
|
table_definition_cache=4096
|
table_open_cache=4096
|
|
## InnoDB
|
##
|
innodb=FORCE
|
innodb_strict_mode=1
|
# Mandatory per https://github.com/codership/documentation/issues/25
|
innodb_autoinc_lock_mode=2
|
# Per https://www.percona.com/blog/2006/08/04/innodb-double-write/
|
innodb_doublewrite=1
|
innodb_flush_method=O_DIRECT
|
innodb_log_files_in_group=2
|
innodb_log_file_size=128M
|
innodb_flush_log_at_trx_commit=1
|
innodb_file_per_table=1
|
# 80% Memory is default reco.
|
# Need to re-evaluate when DB size grows
|
innodb_buffer_pool_size=2G
|
innodb_file_format=Barracuda
|
|
## Logging
|
##
|
log_error=/opt/bitnami/mariadb/logs/mysqld.log
|
slow_query_log_file=/opt/bitnami/mariadb/logs/mysqld.log
|
log_queries_not_using_indexes=1
|
slow_query_log=1
|
|
[galera]
|
wsrep_on=ON
|
wsrep_provider=/opt/bitnami/mariadb/lib/libgalera_smm.so
|
wsrep_sst_method=mariabackup
|
wsrep_slave_threads=4
|
wsrep_cluster_address=gcomm://
|
wsrep_cluster_name=galera
|
wsrep_sst_auth="root:"
|
# Enabled for performance per https://mariadb.com/kb/en/innodb-system-variables/#innodb_flush_log_at_trx_commit
|
innodb_flush_log_at_trx_commit=2
|
# MYISAM REPLICATION SUPPORT #
|
wsrep_mode=REPLICATE_MYISAM
|
|
[mariadb]
|
plugin_load_add=auth_pam
|
|
|
diagnosticMode:
|
enabled: false
|
We checked the nodes for there state and we saw that node dev-mariadb-0 has a process in "Waiting for certification" that never ends:
| 370905 | root | localhost | anonym | Query | 140221 | Waiting for certification | INSERT INTO `ACT_RU_VARIABLE` VALUES ('0018a8dc-afc8-11ee-8401-d2c062af09ed',1,'string','temporaryRa | 0.000 |
|
It seems all the other processes waiting for this one to complete.
We have tried to kill this process, but a KILL 370905 just ends up with an additional process waiting for the one to complete.
We are only able to fix this problem by restarting the node again. The other both nodes working as expected and read and write is possible.
Any idea how to fix this problem, or how we are able to ensure that this didn`t happen again?
Additional Information
Full Processlist of node0:
MariaDB [(none)]> show processlist;
|
+--------+-------------+-------------------+---------+------------+---------+---------------------------------+------------------------------------------------------------------------------------------------------+----------+
|
| Id | User | Host | db | Command | Time | State | Info | Progress |
|
+--------+-------------+-------------------+---------+------------+---------+---------------------------------+------------------------------------------------------------------------------------------------------+----------+
|
| 1 | system user | | NULL | Sleep | 1977667 | wsrep aborter idle | NULL | 0.000 |
|
| 2 | system user | | NULL | Sleep | 140221 | Waiting for TOI DDL | update ACT_RU_EXECUTION set
|
REV_ = 2,
|
PROC_DEF_ID_ = 'd5618a2e-9aa3-11ee-84ad-06434dd3b4 | 0.000 |
|
| 7 | system user | | NULL | Sleep | 140674 | After apply log event | NULL | 0.000 |
|
| 8 | system user | | NULL | Sleep | 140665 | After apply log event | NULL | 0.000 |
|
| 9 | system user | | NULL | Sleep | 140681 | After apply log event | NULL | 0.000 |
|
| 370905 | root | localhost | anonym | Query | 140221 | Waiting for certification | INSERT INTO `ACT_RU_VARIABLE` VALUES
|
('0018a8dc-afc8-11ee-8401-d2c062af09ed',1,'string','temporaryRa | 0.000 |
|
| 370942 | root | localhost | NULL | Query | 140044 | Waiting to execute in isolation | DROP DATABASE anonym | 0.000 |
|
| 371448 | root | localhost | NULL | Query | 137554 | Waiting to execute in isolation | DROP DATABASE anonym | 0.000 |
|
| 371473 | root | localhost | NULL | Query | 137424 | Waiting to execute in isolation | DROP DATABASE anonym | 0.000 |
|
| 371585 | root | localhost | NULL | Query | 136880 | Waiting to execute in isolation | KILL QUERY 370942 | 0.000 |
|
| 371619 | root | 10.42.1.163:57406 | anonym | Query | 135847 | starting | COMMIT | 0.000 |
|
| 377012 | root | 10.42.1.163:45958 | anonym | Query | 108463 | starting | COMMIT | 0.000 |
|
| 397698 | root | localhost | NULL | Query | 10473 | Waiting to execute in isolation | KILL QUERY 397677 | 0.000 |
|
| 397701 | root | localhost | NULL | Query | 10429 | Waiting to execute in isolation | CREATE DATABASE test | 0.000 |
|
| 398502 | root | localhost | anonym | Field List | 6647 | Waiting for table flush | NULL | 0.000 |
|
| 398522 | root | localhost | NULL | Sleep | 6570 | | NULL | 0.000 |
|
| 399845 | root | 10.42.0.36:45956 | anonym | Sleep | 285 | | NULL | 0.000 |
|
| 399846 | root | 10.42.0.36:46010 | anonym | Sleep | 285 | | NULL | 0.000 |
|
| 399883 | root | localhost | NULL | Query | 0 | starting | show processlist | 0.000 |
|
+--------+-------------+-------------------+---------+------------+---------+---------------------------------+------------------------------------------------------------------------------------------------------+----------+
|
20 rows in set (0.000 sec)
|
I checked all the nodes for the status
|
Node0:
MariaDB [(none)]> SHOW GLOBAL STATUS LIKE 'wsrep_%';
|
+-------------------------------+------------------------------------
|
| Variable_name | Value
|
| wsrep_local_state_uuid | a11a7c71-9843-11ee-bf01-3e8006b3a5a5
|
| wsrep_protocol_version | 10
|
| wsrep_last_committed | 7458
|
| wsrep_replicated | 1034
|
| wsrep_replicated_bytes | 85241264
|
| wsrep_repl_keys | 160959
|
| wsrep_repl_keys_bytes | 1312816
|
| wsrep_repl_data_bytes | 83857720
|
| wsrep_repl_other_bytes | 0
|
| wsrep_received | 3771
|
| wsrep_received_bytes | 854031936
|
| wsrep_local_commits | 753
|
| wsrep_local_cert_failures | 0
|
| wsrep_local_replays | 0
|
| wsrep_local_send_queue | 0
|
| wsrep_local_send_queue_max | 2
|
| wsrep_local_send_queue_min | 0
|
| wsrep_local_send_queue_avg | 0.000679348
|
| wsrep_local_recv_queue | 9
|
| wsrep_local_recv_queue_max | 9
|
| wsrep_local_recv_queue_min | 0
|
| wsrep_local_recv_queue_avg | 0.0285714
|
| wsrep_local_cached_downto | 7143
|
| wsrep_flow_control_paused_ns | 0
|
| wsrep_flow_control_paused | 0
|
| wsrep_flow_control_sent | 0
|
| wsrep_flow_control_recv | 0
|
| wsrep_flow_control_active | false
|
| wsrep_flow_control_requested | false
|
| wsrep_cert_deps_distance | 20.7702
|
| wsrep_apply_oooe | 0.0141204
|
| wsrep_apply_oool | 0.00347222
|
| wsrep_apply_window | 1.04051
|
| wsrep_apply_waits | 225
|
| wsrep_commit_oooe | 0
|
| wsrep_commit_oool | 0
|
| wsrep_commit_window | 1.00834
|
| wsrep_local_state | 4
|
| wsrep_local_state_comment | Synced
|
| wsrep_cert_index_size | 3781
|
| wsrep_causal_reads | 0
|
| wsrep_cert_interval | 15.1052
|
| wsrep_open_transactions | 3
|
| wsrep_open_connections | 7
|
| wsrep_incoming_addresse | 10.42.1.156:0,10.42.6.39:0,10.42.17.43:0
|
| wsrep_cluster_weight | 3
|
| wsrep_desync_count | 1
|
| wsrep_evs_delayed |
|
| wsrep_evs_evict_list |
|
| wsrep_evs_repl_latency | 0/0/0/0/0
|
| wsrep_evs_state | OPERATIONAL
|
| wsrep_gcomm_uuid | b123cdcc-9f46-11ee-924e-b2b1bdb1b017
|
| wsrep_gmcast_segment | 0
|
| wsrep_applier_thread_count | 4
|
| wsrep_cluster_capabilities |
|
| wsrep_cluster_conf_id | 27
|
| wsrep_cluster_size | 3
|
| wsrep_cluster_state_uui | a11a7c71-9843-11ee-bf01-3e8006b3a5a5
|
| wsrep_cluster_status | Primary
|
| wsrep_connected | ON
|
| wsrep_local_bf_aborts | 0
|
| wsrep_local_index | 1
|
| wsrep_provider_capabilities | :MULTI_MASTER:CERTIFICATION:PARALLEL_APPLYING:TRX_REPLAY:ISOLATION:PAUSE:CAUSAL_READS:INCREMENTAL_WRITESET:UNORDERED:PREORDERED:STREAMING:NBO: |
|
| wsrep_provider_name | Galera
|
| wsrep_provider_vendor | Codership Oy <info@codership.com>
|
| wsrep_provider_version | 4.13(r0f7af31)
|
| wsrep_ready | ON
|
| wsrep_rollbacker_thread_count | 1
|
| wsrep_thread_count | 5
|
69 rows in set (0.001 sec)
|
Node1:
MariaDB [(none)]> SHOW GLOBAL STATUS LIKE 'wsrep_%';
|
+-------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+
|
| Variable_name | Value |
|
+-------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+
|
| wsrep_local_state_uuid | a11a7c71-9843-11ee-bf01-3e8006b3a5a5 |
|
| wsrep_protocol_version | 10 |
|
| wsrep_last_committed | 7480 |
|
| wsrep_replicated | 1 |
|
| wsrep_replicated_bytes | 464 |
|
| wsrep_repl_keys | 1 |
|
| wsrep_repl_keys_bytes | 32 |
|
| wsrep_repl_data_bytes | 362 |
|
| wsrep_repl_other_bytes | 0 |
|
| wsrep_received | 19 |
|
| wsrep_received_bytes | 14723 |
|
| wsrep_local_commits | 0 |
|
| wsrep_local_cert_failures | 0 |
|
| wsrep_local_replays | 0 |
|
| wsrep_local_send_queue | 0 |
|
| wsrep_local_send_queue_max | 1 |
|
| wsrep_local_send_queue_min | 0 |
|
| wsrep_local_send_queue_avg | 0 |
|
| wsrep_local_recv_queue | 0 |
|
| wsrep_local_recv_queue_max | 1 |
|
| wsrep_local_recv_queue_min | 0 |
|
| wsrep_local_recv_queue_avg | 0 |
|
| wsrep_local_cached_downto | 7454 |
|
| wsrep_flow_control_paused_ns | 0 |
|
| wsrep_flow_control_paused | 0 |
|
| wsrep_flow_control_sent | 0 |
|
| wsrep_flow_control_recv | 0 |
|
| wsrep_flow_control_active | false |
|
| wsrep_flow_control_requested | false |
|
| wsrep_cert_deps_distance | 1.21053 |
|
| wsrep_apply_oooe | 0 |
|
| wsrep_apply_oool | 0 |
|
| wsrep_apply_window | 1 |
|
| wsrep_apply_waits | 0 |
|
| wsrep_commit_oooe | 0 |
|
| wsrep_commit_oool | 0 |
|
| wsrep_commit_window | 1 |
|
| wsrep_local_state | 4 |
|
| wsrep_local_state_comment | Synced |
|
| wsrep_cert_index_size | 3785 |
|
| wsrep_causal_reads | 0 |
|
| wsrep_cert_interval | 4323.37 |
|
| wsrep_open_transactions | 0 |
|
| wsrep_open_connections | 0 |
|
| wsrep_incoming_addresses | 10.42.4.127:0,10.42.17.60:0,10.42.6.39:0 |
|
| wsrep_cluster_weight | 3 |
|
| wsrep_desync_count | 0 |
|
| wsrep_evs_delayed | |
|
| wsrep_evs_evict_list | |
|
| wsrep_evs_repl_latency | 0/0/0/0/0 |
|
| wsrep_evs_state | OPERATIONAL |
|
| wsrep_gcomm_uuid | 499450f3-affe-11ee-aab6-9f2e869c0a93 |
|
| wsrep_gmcast_segment | 0 |
|
| wsrep_applier_thread_count | 4 |
|
| wsrep_cluster_capabilities | |
|
| wsrep_cluster_conf_id | 33 |
|
| wsrep_cluster_size | 3 |
|
| wsrep_cluster_state_uuid | a11a7c71-9843-11ee-bf01-3e8006b3a5a5 |
|
| wsrep_cluster_status | Primary |
|
| wsrep_connected | ON |
|
| wsrep_local_bf_aborts | 0 |
|
| wsrep_local_index | 1 |
|
| wsrep_provider_capabilities | :MULTI_MASTER:CERTIFICATION:PARALLEL_APPLYING:TRX_REPLAY:ISOLATION:PAUSE:CAUSAL_READS:INCREMENTAL_WRITESET:UNORDERED:PREORDERED:STREAMING:NBO: |
|
| wsrep_provider_name | Galera |
|
| wsrep_provider_vendor | Codership Oy <info@codership.com> |
|
| wsrep_provider_version | 4.13(r0f7af31) |
|
| wsrep_ready | ON |
|
| wsrep_rollbacker_thread_count | 1 |
|
| wsrep_thread_count | 5 |
|
+-------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+
|
69 rows in set (0.002 sec)
|
Node2:
MariaDB [(none)]> SHOW GLOBAL STATUS LIKE 'wsrep_%';
|
+-------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+
|
| Variable_name | Value |
|
+-------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+
|
| wsrep_local_state_uuid | a11a7c71-9843-11ee-bf01-3e8006b3a5a5 |
|
| wsrep_protocol_version | 10 |
|
| wsrep_last_committed | 7480 |
|
| wsrep_replicated | 1 |
|
| wsrep_replicated_bytes | 456 |
|
| wsrep_repl_keys | 1 |
|
| wsrep_repl_keys_bytes | 32 |
|
| wsrep_repl_data_bytes | 360 |
|
| wsrep_repl_other_bytes | 0 |
|
| wsrep_received | 12 |
|
| wsrep_received_bytes | 13784 |
|
| wsrep_local_commits | 0 |
|
| wsrep_local_cert_failures | 0 |
|
| wsrep_local_replays | 0 |
|
| wsrep_local_send_queue | 0 |
|
| wsrep_local_send_queue_max | 1 |
|
| wsrep_local_send_queue_min | 0 |
|
| wsrep_local_send_queue_avg | 0 |
|
| wsrep_local_recv_queue | 0 |
|
| wsrep_local_recv_queue_max | 1 |
|
| wsrep_local_recv_queue_min | 0 |
|
| wsrep_local_recv_queue_avg | 0 |
|
| wsrep_local_cached_downto | 7143 |
|
| wsrep_flow_control_paused_ns | 0 |
|
| wsrep_flow_control_paused | 0 |
|
| wsrep_flow_control_sent | 0 |
|
| wsrep_flow_control_recv | 0 |
|
| wsrep_flow_control_active | false |
|
| wsrep_flow_control_requested | false |
|
| wsrep_cert_deps_distance | 1.21053 |
|
| wsrep_apply_oooe | 0 |
|
| wsrep_apply_oool | 0 |
|
| wsrep_apply_window | 1 |
|
| wsrep_apply_waits | 0 |
|
| wsrep_commit_oooe | 0 |
|
| wsrep_commit_oool | 0 |
|
| wsrep_commit_window | 1 |
|
| wsrep_local_state | 4 |
|
| wsrep_local_state_comment | Synced |
|
| wsrep_cert_index_size | 3785 |
|
| wsrep_causal_reads | 0 |
|
| wsrep_cert_interval | 4323.37 |
|
| wsrep_open_transactions | 0 |
|
| wsrep_open_connections | 0 |
|
| wsrep_incoming_addresses | 10.42.4.127:0,10.42.17.60:0,10.42.6.39:0 |
|
| wsrep_cluster_weight | 3 |
|
| wsrep_desync_count | 0 |
|
| wsrep_evs_delayed | |
|
| wsrep_evs_evict_list | |
|
| wsrep_evs_repl_latency | 0/0/0/0/0 |
|
| wsrep_evs_state | OPERATIONAL |
|
| wsrep_gcomm_uuid | 2993d30e-afff-11ee-89f7-e7f3cd26fa41 |
|
| wsrep_gmcast_segment | 0 |
|
| wsrep_applier_thread_count | 4 |
|
| wsrep_cluster_capabilities | |
|
| wsrep_cluster_conf_id | 33 |
|
| wsrep_cluster_size | 3 |
|
| wsrep_cluster_state_uuid | a11a7c71-9843-11ee-bf01-3e8006b3a5a5 |
|
| wsrep_cluster_status | Primary |
|
| wsrep_connected | ON |
|
| wsrep_local_bf_aborts | 0 |
|
| wsrep_local_index | 0 |
|
| wsrep_provider_capabilities | :MULTI_MASTER:CERTIFICATION:PARALLEL_APPLYING:TRX_REPLAY:ISOLATION:PAUSE:CAUSAL_READS:INCREMENTAL_WRITESET:UNORDERED:PREORDERED:STREAMING:NBO: |
|
| wsrep_provider_name | Galera |
|
| wsrep_provider_vendor | Codership Oy <info@codership.com> |
|
| wsrep_provider_version | 4.13(r0f7af31) |
|
| wsrep_ready | ON |
|
| wsrep_rollbacker_thread_count | 1 |
|
| wsrep_thread_count | 5 |
|
+-------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+
|
69 rows in set (0.002 sec)
|
Attachments
Issue Links
- relates to
-
MDEV-34784 MariaDB Stuck in "Waiting for certification"
-
- Open
-
experiencing a similar issue but then with
cat /etc/os-release
PRETTY_NAME="Ubuntu 22.04.3 LTS"
NAME="Ubuntu"
VERSION_ID="22.04"
VERSION="22.04.3 LTS (Jammy Jellyfish)"
Server version: 11.0.4-MariaDB-1:11.0.4+maria~ubu2204 mariadb.org binary distribution
4 node galera cluster
custom options like
[mysqld]
skip-name-resolve
binlog_format=ROW
default-storage-engine=innodb
innodb_autoinc_lock_mode=2
bind-address=0.0.0.0
wsrep_on=ON
wsrep_provider=/usr/lib/libgalera_smm.so
wsrep_cluster_name="testing_cluster"
wsrep_cluster_address="gcomm://1,2,3,4"
wsrep_sst_method=rsync
wsrep_slave_threads = 1
max_connections = 500
net_read_timeout = 3600
net_write_timeout = 3600
innodb_buffer_pool_size = 12G
innodb_log_file_size = 2G
performance_schema
wsrep_node_name="node1"
wsrep_sst_donor="node4"
wsrep_node_address="1.2.3.1"
wsrep_notify_cmd=/usr/local/bin/wsrep_notify
wsrep_provider_options="pc.weight=2"
wsrep_log_conflicts=ON
with symptoms:
logs on mariadb server side, where first digit is amount of occurances, removed second info to avoid overly excessive log.
(so in one minute, 22228 entries , another minute 18402 entries, of WSREP MDL conflict, solved by abort)
totalling to 129801 entries, which sounds like a rather excessive number.
1 Jan 23 22:05 host mariadbd[2748109]: 2024-01-23 22:05:05 537156 [Note] WSREP: MDL conflict db=db2 table=table1 ticket=2 solved by abort
22228 Jan 23 22:27 host mariadbd[2748109]: 2024-01-23 22:27:02 0 [Note] WSREP: MDL conflict db=db2 table=table1 ticket=8 solved by abort
1 Jan 23 22:27 host mariadbd[2748109]: 2024-01-23 22:27:03 0 [Note] WSREP: MDL conflict db=db2 table=table1 ticket=8 solved by abort
18402 Jan 23 22:45 host mariadbd[2748109]: 2024-01-23 22:45:01 0 [Note] WSREP: MDL conflict db=db2 table=table1 ticket=8 solved by abort
1 Jan 24 04:04 host mariadbd[2748109]: 2024-01-24 4:04:02 555277 [Note] WSREP: MDL conflict db=db2 table=table1 ticket=8 solved by abort
22499 Jan 24 04:04 host mariadbd[2748109]: 2024-01-24 4:04:02 0 [Note] WSREP: MDL conflict db=db2 table=table1 ticket=8 solved by abort
1 Jan 24 04:45 host mariadbd[2748109]: 2024-01-24 4:45:01 557456 [Note] WSREP: MDL conflict db=db2 table=table1 ticket=8 solved by abort
22498 Jan 24 04:45 host mariadbd[2748109]: 2024-01-24 4:45:01 0 [Note] WSREP: MDL conflict db=db2 table=table1 ticket=8 solved by abort
1 Jan 24 22:05 host mariadbd[2748109]: 2024-01-24 22:05:04 609774 [Note] WSREP: MDL conflict db=db2 table=table1 ticket=2 solved by abort
21257 Jan 24 22:45 host mariadbd[2748109]: 2024-01-24 22:45:01 0 [Note] WSREP: MDL conflict db=db2 table=table1 ticket=8 solved by abort
1 Jan 24 22:45 host mariadbd[2748109]: 2024-01-24 22:45:05 611895 [Note] WSREP: MDL conflict db=db2 table=table1 ticket=8 solved by abort
[[..snip... this continues for the coming hours...]]
1 Jan 25 07:10 host mariadbd[2748109]: 2024-01-25 7:10:24 637282 [Note] WSREP: MDL conflict db=db2 table=table1 ticket=8 solved by abort
1 Jan 25 07:10 host mariadbd[2748109]: 2024-01-25 7:10:54 637308 [Note] WSREP: MDL conflict db=db2 table=table1 ticket=8 solved by abort
only active processes are:
{{pt-table-sync --verbose --execute --databases ${DBS} --tables ${TABLES} h=${DB_SRC},u=${SRC_USER},p=${SRC_PASS} h=${DB_DST},u=${DST_USER},p=${DST_PASS} }}
where the rundeck jobs are in a state `Waiting for certification`
and the pt-table-sync is trying to truncate a table with state `Waiting for TOI DDL`
example
-------
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------other observations:
wsrep_local_recv_queue was increasing gradually on this specific node (from 0 to 30 in 20 minutes)
mysql_global_status_threads_connected was also gradually increasing (from 1 to 31)