Details
-
Bug
-
Status: Closed (View Workflow)
-
Major
-
Resolution: Incomplete
-
10.3.16
-
None
Description
We're seeing galera replication issues since a few weeks but coudln't figure out why the issue is happening.
We're running a setup of 3 Mariadb Galera Nodes and all reads and writes go to galera1 usually as we're using maxscale with a preference to galera1
two of the nodes are running without issues in the cluster
As soon as we add the third node back we'll encounter following issue:
2019-06-26 10:55:34 0 [Note] /usr/sbin/mysqld: ready for connections.
|
Version: '10.3.16-MariaDB-1:10.3.16+maria~bionic-log' socket: '/var/run/mysqld/mysqld.sock' port: 3307 mariadb.org binary distribution
|
2019-06-26 10:55:35 2 [ERROR] mysqld: Can't find record in 'cache_bootstrap'
|
2019-06-26 10:55:35 2 [ERROR] Slave SQL: Could not execute Delete_rows_v1 event on table REDACTED_2.cache_bootstrap; Can't find record in 'cache_bootstrap', Error_code: 1032; handler error HA_ERR_KEY_NOT_FOUND; the event's master log FIRST, end_log_pos 476745, Internal MariaDB error code: 1032
|
2019-06-26 10:55:35 2 [Warning] WSREP: RBR event 3 Delete_rows_v1 apply warning: 120, 1696954986
|
2019-06-26 10:55:35 2 [Warning] WSREP: Failed to apply app buffer: seqno: 1696954986, status: 1
|
at galera/src/trx_handle.cpp:apply():353
|
Retrying 2th time
|
2019-06-26 10:55:35 2 [ERROR] mysqld: Can't find record in 'cache_bootstrap'
|
2019-06-26 10:55:35 2 [ERROR] Slave SQL: Could not execute Delete_rows_v1 event on table REDACTED_2.cache_bootstrap; Can't find record in 'cache_bootstrap', Error_code: 1032; handler error HA_ERR_KEY_NOT_FOUND; the event's master log FIRST, end_log_pos 476745, Internal MariaDB error code: 1032
|
2019-06-26 10:55:35 2 [Warning] WSREP: RBR event 3 Delete_rows_v1 apply warning: 120, 1696954986
|
2019-06-26 10:55:35 2 [Warning] WSREP: Failed to apply app buffer: seqno: 1696954986, status: 1
|
at galera/src/trx_handle.cpp:apply():353
|
Retrying 3th time
|
2019-06-26 10:55:35 2 [ERROR] mysqld: Can't find record in 'cache_bootstrap'
|
2019-06-26 10:55:35 2 [ERROR] Slave SQL: Could not execute Delete_rows_v1 event on table REDACTED_2.cache_bootstrap; Can't find record in 'cache_bootstrap', Error_code: 1032; handler error HA_ERR_KEY_NOT_FOUND; the event's master log FIRST, end_log_pos 476745, Internal MariaDB error code: 1032
|
2019-06-26 10:55:35 2 [Warning] WSREP: RBR event 3 Delete_rows_v1 apply warning: 120, 1696954986
|
2019-06-26 10:55:35 2 [Warning] WSREP: Failed to apply app buffer: seqno: 1696954986, status: 1
|
at galera/src/trx_handle.cpp:apply():353
|
Retrying 4th time
|
2019-06-26 10:55:35 2 [ERROR] mysqld: Can't find record in 'cache_bootstrap'
|
2019-06-26 10:55:35 2 [ERROR] Slave SQL: Could not execute Delete_rows_v1 event on table REDACTED_2.cache_bootstrap; Can't find record in 'cache_bootstrap', Error_code: 1032; handler error HA_ERR_KEY_NOT_FOUND; the event's master log FIRST, end_log_pos 476745, Internal MariaDB error code: 1032
|
2019-06-26 10:55:35 2 [Warning] WSREP: RBR event 3 Delete_rows_v1 apply warning: 120, 1696954986
|
2019-06-26 10:55:35 2 [ERROR] WSREP: Failed to apply trx: source: 1dfa2e4b-978d-11e9-be80-ba6ff42601f9 version: 4 local: 0 state: APPLYING flags: 1 conn_id: 575767 trx_id: 3811606594 seqnos (l: 328, g: 1696954986, s: 1696954984, d: 1696954660, ts: 42374739735530)
|
2019-06-26 10:55:35 2 [ERROR] WSREP: Failed to apply trx 1696954986 4 times
|
2019-06-26 10:55:35 2 [ERROR] WSREP: Node consistency compromised, aborting...
|
This happens after we used one of the remaining two nodes as a donor. We also tried recreating galera2 and galera3 from galera1 as we see that one as correct source. To rule out any corruption that happened after the issue appeared but the issue still seems to happen.
In the meantime we also tried adding galera4 while having galera3 stopped to have a total of 3 nodes in the cluster but that lead to the same issues.
It looks similar as : https://jira.mariadb.org/browse/MDEV-9309 or https://jira.mariadb.org/browse/MDEV-18230
Any help is greatly appreciated.
/b
Additional Info
mariadb.cnf
# MariaDB-specific config file.
|
# Read by /etc/mysql/my.cnf
|
|
|
[client]
|
# Default is Latin1, if you need UTF-8 set this (also in server section)
|
#default-character-set = utf8
|
|
|
[mysqld]
|
#
|
# * Character sets
|
#
|
# Default is Latin1, if you need UTF-8 set all this (also in client section)
|
#
|
#character-set-server = utf8
|
#collation-server = utf8_general_ci
|
#character_set_server = utf8
|
#collation_server = utf8_general_ci
|
# Import all .cnf files from configuration directory
|
!includedir /etc/mysql/mariadb.conf.d/
|
conf.d/cluster.cnf
|
|
[mysqld]
|
bind-address=*
|
binlog_format=ROW
|
default_storage_engine=InnoDB
|
innodb_autoinc_lock_mode=2
|
innodb_flush_log_at_trx_commit=0
|
conf.d/wsrep.cnf
|
|
[mysqld]
|
wsrep_on=ON
|
wsrep_cluster_address="gcomm://172.21.42.110,172.21.42.131,172.21.42.113,172.21.42.143"
|
wsrep_cluster_name=services
|
wsrep_node_address=172.21.42.110
|
wsrep_node_name=galera1
|
wsrep_provider = /usr/lib/galera/libgalera_smm.so
|
wsrep_log_conflicts=On
|
wsrep_provider_options="cert.log_conflicts=On; gcache.size=4G; gcs.fc_limit=4096; gcs.fc_master_slave=Yes"
|
wsrep_slave_threads=16
|
my.cnf
|
|
[client]
|
port = 3306
|
socket = /var/run/mysqld/mysqld.sock
|
|
|
[galera]
|
query_cache_size = 0
|
query_cache_type = 0
|
|
|
[isamchk]
|
key_buffer_size = 16M
|
|
|
[mysqld]
|
basedir = /usr
|
bind_address = *
|
character_set_server = utf8mb4
|
collation_server = utf8mb4_bin
|
datadir = /var/lib/mysql
|
expire_logs_days = 10
|
ignore-db-dir = lost+found
|
ignore-db-dir = mysql-backup
|
innodb_buffer_pool_instances = 4
|
innodb_buffer_pool_size = 4G
|
innodb_log_buffer_size = 32M
|
innodb_log_file_size = 1G
|
innodb_read_io_threads = 8
|
innodb_write_io_threads = 8
|
join_buffer_size = 200M
|
key_buffer_size = 16M
|
log-error = /var/log/mysql/error.log
|
long_query_time = 2
|
max_allowed_packet = 64M
|
max_binlog_size = 100M
|
max_connections = 1000
|
max_heap_table_size = 512M
|
myisam-recover-options = BACKUP
|
optimizer_search_depth = 0
|
pid-file = /var/run/mysqld/mysqld.pid
|
port = 3307
|
query_cache_limit = 24M
|
query_cache_size = 0
|
query_cache_type = 0
|
read_rnd_buffer_size = 4M
|
skip-external-locking
|
skip_name_resolve = 1
|
slow_query_log = 1
|
slow_query_log_file = /var/log/mysql/slow.log
|
socket = /var/run/mysqld/mysqld.sock
|
sort_buffer_size = 4M
|
ssl = false
|
ssl-ca = /etc/mysql/cacert.pem
|
ssl-cert = /etc/mysql/server-cert.pem
|
ssl-key = /etc/mysql/server-key.pem
|
table_open_cache = 200000
|
thread_cache_size = 8
|
thread_stack = 256K
|
tmp_table_size = 512M
|
tmpdir = /tmp
|
transaction-isolation = READ-COMMITTED
|
user = mysql
|
|
|
[mysqld-5.0]
|
myisam-recover = BACKUP
|
|
|
[mysqld-5.1]
|
myisam-recover = BACKUP
|
|
|
[mysqld-5.5]
|
myisam-recover = BACKUP
|
|
|
[mysqld-5.6]
|
myisam-recover-options = BACKUP
|
|
|
[mysqld-5.7]
|
myisam-recover-options = BACKUP
|
|
|
[mysqld_safe]
|
log-error = /var/log/mysql/error.log
|
nice = 0
|
socket = /var/run/mysqld/mysqld.sock
|
|
|
[mysqldump]
|
max_allowed_packet = 512M
|
quick
|
quote-names
|
|
|
|
|
|
|
!includedir /etc/mysql/conf.d
|
|