[MDEV-25455] Assertion `thd->wsrep_trx().active()' failed virtual int Wsrep_applier_service::apply_write_set(const wsrep::ws_meta&, const wsrep::const_buffer&, wsrep::mutable_buffer&) Created: 2021-04-19  Updated: 2023-03-17

Status: Open
Project: MariaDB Server
Component/s: Galera
Affects Version/s: 10.6
Fix Version/s: 10.6

Type: Bug Priority: Major
Reporter: Ramesh Sivaraman Assignee: Daniele Sciascia
Resolution: Unresolved Votes: 0
Labels: not-10.2, not-10.3, not-10.4, not-10.5

Issue Links:
Relates
relates to MDEV-30862 Assertion `mode_ == m_high_priority' ... Closed

 Description   

Testcase

Start 3 node cluster
 
connection node1;
 
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 BLOB) ENGINE=InnoDB;
SET SESSION wsrep_on=OFF;
INSERT INTO t1 VALUES (1, 'X');
SET SESSION wsrep_on=ON;
 
 
connection node_3;
 
SET AUTOCOMMIT=OFF;
SET SESSION wsrep_trx_fragment_size = 131070;
START TRANSACTION;
INSERT INTO t1 VALUES (1, REPEAT('A', 65535));
INSERT INTO t1 VALUES (2, REPEAT('A', 65535));
INSERT INTO t1 VALUES (3, REPEAT('A', 65535));
INSERT INTO t1 VALUES (4, REPEAT('A', 65535));
INSERT INTO t1 VALUES (5, REPEAT('A', 65535));
COMMIT;
 
connection node1;
 
DELETE FROM t1 WHERE f1 = 2;  # Sometimes we need to run DELETE statement multiple times to get the crash.

10.6.0-0 3402184ce3d235788455c30a2d2dae3c32d15af9 (Debug)

mysqld: /test/10.6_dbg/sql/wsrep_high_priority_service.cc:542: virtual int Wsrep_applier_service::apply_write_set(const wsrep::ws_meta&, const wsrep::const_buffer&, wsrep::mutable_buffer&): Assertion `thd->wsrep_trx().active()' failed.

10.6.0-0 3402184ce3d235788455c30a2d2dae3c32d15af9 (Debug)

Core was generated by `/test/GAL_EMD150321-mariadb-10.6.0-0-linux-x86_64-dbg/bin/mysqld --defaults-fil'.
Program terminated with signal SIGABRT, Aborted.
#0  __pthread_kill (threadid=<optimized out>, signo=signo@entry=6) at ../sysdeps/unix/sysv/linux/pthread_kill.c:56
#1  0x000055b79cbdd2cd in my_write_core (sig=sig@entry=6) at /test/10.6_dbg/mysys/stacktrace.c:424
#2  0x000055b79c37e124 in handle_fatal_signal (sig=6) at /test/10.6_dbg/sql/signal_handler.cc:331
#3  <signal handler called>
#4  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
#5  0x000014c205ac9859 in __GI_abort () at abort.c:79
#6  0x000014c205ac9729 in __assert_fail_base (fmt=0x14c205c5f588 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n",
    assertion=0x55b79cf7d7f0 "thd->wsrep_trx().active()", file=0x55b79cf7ccf8 "/test/10.6_dbg/sql/wsrep_high_priority_service.cc", line=536,
    function=<optimized out>) at assert.c:92
#7  0x000014c205adaf36 in __GI___assert_fail (assertion=assertion@entry=0x55b79cf7d7f0 "thd->wsrep_trx().active()",
    file=file@entry=0x55b79cf7ccf8 "/test/10.6_dbg/sql/wsrep_high_priority_service.cc", line=line@entry=536,
    function=function@entry=0x55b79cf7cf30 "virtual int Wsrep_applier_service::apply_write_set(const wsrep::ws_meta&, const wsrep::const_buffer&, wsrep::mutable_buffer&)") at assert.c:101
#8  0x000055b79c72accd in Wsrep_applier_service::apply_write_set (this=0x14c1e003ad10, ws_meta=..., data=..., err=...)
    at /test/10.6_dbg/wsrep-lib/include/wsrep/transaction_id.hpp:52
#9  0x000055b79ccfbec4 in apply_fragment (server_state=..., high_priority_service=...,
    streaming_applier=streaming_applier@entry=0x14c1e003ad10, ws_handle=..., ws_meta=..., data=...)
    at /test/10.6_dbg/wsrep-lib/src/server_state.cpp:98
#10 0x000055b79cd03eee in apply_write_set (server_state=..., high_priority_service=..., ws_handle=..., ws_meta=..., data=...)
    at /test/10.6_dbg/wsrep-lib/src/server_state.cpp:384
#11 0x000055b79cd04c37 in wsrep::server_state::on_apply (this=<optimized out>, high_priority_service=..., ws_handle=..., ws_meta=...,
    data=...) at /test/10.6_dbg/wsrep-lib/src/server_state.cpp:1136
#12 0x000055b79cd1231a in wsrep::high_priority_service::apply (data=..., ws_meta=..., ws_handle=..., this=0x14c1f4650d10)
    at /test/10.6_dbg/wsrep-lib/include/wsrep/high_priority_service.hpp:47
#13 (anonymous namespace)::apply_cb (ctx=ctx@entry=0x14c1f4650d10, wsh=wsh@entry=0x14c1f464fdb0, flags=flags@entry=0,
    buf=buf@entry=0x14c1f464fdc0, meta=meta@entry=0x14c1f4650070, exit_loop=exit_loop@entry=0x14c1f465002f)
    at /test/10.6_dbg/wsrep-lib/src/wsrep_provider_v26.cpp:502
#14 0x000014c205344b05 in galera::TrxHandleSlave::apply (this=this@entry=0x14c198037e60, recv_ctx=recv_ctx@entry=0x14c1f4650d10,
    apply_cb=0x55b79cd12197 <(anonymous namespace)::apply_cb(void*, wsrep_ws_handle_t const*, uint32_t, wsrep_buf_t const*, wsrep_trx_meta_t const*, wsrep_bool_t*)>, meta=..., exit_loop=exit_loop@entry=@0x14c1f465002f: false) at /test/10.6_galera_dbg/galera/src/trx_handle.cpp:391
#15 0x000014c2053547e2 in galera::ReplicatorSMM::apply_trx (this=0x55b79e407380, recv_ctx=0x14c1f4650d10, ts=...)
    at /test/10.6_galera_dbg/galera/src/replicator_smm.cpp:504
#16 0x000014c20535c6b8 in galera::ReplicatorSMM::process_trx (this=0x55b79e407380, recv_ctx=0x14c1f4650d10, ts_ptr=...)
    at /test/10.6_galera_dbg/galera/src/replicator_smm.cpp:2127
#17 0x000014c20538cfc3 in galera::GcsActionSource::process_writeset (this=0x55b79e436a50, recv_ctx=0x14c1f4650d10, act=...,
    exit_loop=@0x14c1f465091f: false) at /test/10.6_galera_dbg/galera/src/gcs_action_source.cpp:62
#18 0x000014c20538d99b in galera::GcsActionSource::dispatch (this=<optimized out>, recv_ctx=<optimized out>, act=...,
    exit_loop=<optimized out>) at /test/10.6_galera_dbg/galera/src/gcs_action_source.cpp:110
#19 0x000014c20538dba2 in galera::GcsActionSource::process (this=0x55b79e436a50, recv_ctx=0x14c1f4650d10, exit_loop=@0x14c1f465091f: false)
    at /test/10.6_galera_dbg/galera/src/gcs_action_source.cpp:183
#20 0x000014c205359760 in galera::ReplicatorSMM::async_recv (this=0x55b79e407380, recv_ctx=0x14c1f4650d10)
    at /test/10.6_galera_dbg/galera/src/replicator_smm.cpp:390
#21 0x000014c20532ffb1 in galera_recv (gh=<optimized out>, recv_ctx=<optimized out>)
    at /test/10.6_galera_dbg/galera/src/wsrep_provider.cpp:236
#22 0x000055b79cd1277c in wsrep::wsrep_provider_v26::run_applier (this=<optimized out>, applier_ctx=<optimized out>)
    at /test/10.6_dbg/wsrep-lib/src/wsrep_provider_v26.cpp:743
#23 0x000055b79c7486e9 in wsrep_replication_process (thd=0x14c198000ff8, arg=<optimized out>) at /test/10.6_dbg/sql/wsrep_server_state.h:51
#24 0x000055b79c736d22 in start_wsrep_THD (arg=arg@entry=0x55b79e455f60) at /test/10.6_dbg/sql/wsrep_mysqld.h:579
#25 0x000055b79c6c2c01 in pfs_spawn_thread (arg=0x55b79ea17c88) at /test/10.6_dbg/storage/perfschema/pfs.cc:2201
#26 0x000014c205fd7609 in start_thread (arg=<optimized out>) at pthread_create.c:477
--Type <RET> for more, q to quit, c to continue without paging--
#27 0x000014c205bc6293 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95



 Comments   
Comment by Daniele Sciascia [ 2021-04-22 ]

I tried but couldn't reproduce. I always get error "1047: WSREP has not yet prepared node for application use" on the DELETE. That's expected because node_1 drops out of the cluster since it fails to apply the transaction from node3

Comment by Jan Lindström (Inactive) [ 2021-05-05 ]

ramesh Can you try to create test case that reproduces more easily?

Comment by Ramesh Sivaraman [ 2021-05-05 ]

jplindst I can reproduce this issue only on galapq server. The crash is not happening on my local machine. I will update the ticket if I get a reduced test case that will reproduce the crash on all environments

Comment by Ramesh Sivaraman [ 2021-08-16 ]

sciascid jplindst Found reproducible test case, node 2 is crashing when executing given test case

SET SESSION wsrep_trx_fragment_size=3;
SET @@sql_mode= '';
SET SESSION wsrep_osu_method=RSU;
SET GLOBAL tx_read_only=ON;
CREATE TABLE t1 (id INT(16) NOT NULL AUTO_INCREMENT, PRIMARY KEY(id)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1),(2);
SELECT SLEEP (3);

Leads to

10.6.5 9ac1ac006197c8979db1dc73f4e983f623e831e8 (Debug)

Core was generated by `/test/GAL_MD150821-mariadb-10.6.5-linux-x86_64-dbg/bin/mysqld --defaults-file=/'.
Program terminated with signal SIGABRT, Aborted.
#0  __pthread_kill (threadid=<optimized out>, signo=signo@entry=6)
    at ../sysdeps/unix/sysv/linux/pthread_kill.c:56
[Current thread is 1 (Thread 0x148e6100f700 (LWP 315013))]
 
(gdb) bt
#0  __pthread_kill (threadid=<optimized out>, signo=signo@entry=6) at ../sysdeps/unix/sysv/linux/pthread_kill.c:56
#1  0x0000560412d37889 in my_write_core (sig=sig@entry=6) at /test/10.6_dbg/mysys/stacktrace.c:424
#2  0x00005604124ec4c6 in handle_fatal_signal (sig=6) at /test/10.6_dbg/sql/signal_handler.cc:344
#3  <signal handler called>
#4  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
#5  0x0000148e638a7859 in __GI_abort () at abort.c:79
#6  0x0000148e638a7729 in __assert_fail_base (fmt=0x148e63a3d588 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x560413102a90 "thd->wsrep_trx().active()", 
    file=0x560413101f98 "/test/10.6_dbg/sql/wsrep_high_priority_service.cc", line=536, function=<optimized out>) at assert.c:92
#7  0x0000148e638b8f36 in __GI___assert_fail (assertion=assertion@entry=0x560413102a90 "thd->wsrep_trx().active()", file=file@entry=0x560413101f98 "/test/10.6_dbg/sql/wsrep_high_priority_service.cc", 
    line=line@entry=536, function=function@entry=0x5604131023f8 "virtual int Wsrep_applier_service::apply_write_set(const wsrep::ws_meta&, const wsrep::const_buffer&, wsrep::mutable_buffer&)") at assert.c:101
#8  0x0000560412886653 in Wsrep_applier_service::apply_write_set (this=0x148de802d4e0, ws_meta=..., data=..., err=...) at /test/10.6_dbg/wsrep-lib/include/wsrep/transaction_id.hpp:52
#9  0x0000560412e71774 in apply_fragment (server_state=..., high_priority_service=..., streaming_applier=streaming_applier@entry=0x148de802d4e0, ws_handle=..., ws_meta=..., data=...)
    at /test/10.6_dbg/wsrep-lib/src/server_state.cpp:98
#10 0x0000560412e79490 in apply_write_set (server_state=..., high_priority_service=..., ws_handle=..., ws_meta=..., data=...) at /test/10.6_dbg/wsrep-lib/src/server_state.cpp:385
#11 0x0000560412e7a4e9 in wsrep::server_state::on_apply (this=<optimized out>, high_priority_service=..., ws_handle=..., ws_meta=..., data=...) at /test/10.6_dbg/wsrep-lib/src/server_state.cpp:1137
#12 0x0000560412e882c9 in wsrep::high_priority_service::apply (data=..., ws_meta=..., ws_handle=..., this=0x148e6100ed10) at /test/10.6_dbg/wsrep-lib/include/wsrep/high_priority_service.hpp:47
#13 (anonymous namespace)::apply_cb (ctx=ctx@entry=0x148e6100ed10, wsh=wsh@entry=0x148e6100ddb0, flags=flags@entry=0, buf=buf@entry=0x148e6100ddc0, meta=meta@entry=0x148e6100e070, 
    exit_loop=exit_loop@entry=0x148e6100e02f) at /test/10.6_dbg/wsrep-lib/src/wsrep_provider_v26.cpp:504
#14 0x0000148e63369765 in galera::TrxHandleSlave::apply (this=this@entry=0x148e3c008150, recv_ctx=recv_ctx@entry=0x148e6100ed10, 
    apply_cb=0x560412e88146 <(anonymous namespace)::apply_cb(void*, wsrep_ws_handle_t const*, uint32_t, wsrep_buf_t const*, wsrep_trx_meta_t const*, wsrep_bool_t*)>, meta=..., 
    exit_loop=exit_loop@entry=@0x148e6100e02f: false) at /test/10.6_galera_dbg/galera/src/trx_handle.cpp:391
#15 0x0000148e63379442 in galera::ReplicatorSMM::apply_trx (this=0x5604159f39e0, recv_ctx=0x148e6100ed10, ts=...) at /test/10.6_galera_dbg/galera/src/replicator_smm.cpp:504
#16 0x0000148e63381378 in galera::ReplicatorSMM::process_trx (this=0x5604159f39e0, recv_ctx=0x148e6100ed10, ts_ptr=...) at /test/10.6_galera_dbg/galera/src/replicator_smm.cpp:2123
#17 0x0000148e633b1a43 in galera::GcsActionSource::process_writeset (this=0x5604159ec610, recv_ctx=0x148e6100ed10, act=..., exit_loop=@0x148e6100e91f: false)
    at /test/10.6_galera_dbg/galera/src/gcs_action_source.cpp:62
#18 0x0000148e633b241b in galera::GcsActionSource::dispatch (this=<optimized out>, recv_ctx=<optimized out>, act=..., exit_loop=<optimized out>) at /test/10.6_galera_dbg/galera/src/gcs_action_source.cpp:110
#19 0x0000148e633b2622 in galera::GcsActionSource::process (this=0x5604159ec610, recv_ctx=0x148e6100ed10, exit_loop=@0x148e6100e91f: false) at /test/10.6_galera_dbg/galera/src/gcs_action_source.cpp:183
#20 0x0000148e6337e3a0 in galera::ReplicatorSMM::async_recv (this=0x5604159f39e0, recv_ctx=0x148e6100ed10) at /test/10.6_galera_dbg/galera/src/replicator_smm.cpp:390
#21 0x0000148e633558e1 in galera_recv (gh=<optimized out>, recv_ctx=<optimized out>) at /test/10.6_galera_dbg/galera/src/wsrep_provider.cpp:236
#22 0x0000560412e8872a in wsrep::wsrep_provider_v26::run_applier (this=<optimized out>, applier_ctx=<optimized out>) at /test/10.6_dbg/wsrep-lib/src/wsrep_provider_v26.cpp:796
#23 0x00005604128a4390 in wsrep_replication_process (thd=0x148e3c000ff8, arg=<optimized out>) at /test/10.6_dbg/sql/wsrep_server_state.h:51
#24 0x000056041289205b in start_wsrep_THD (arg=arg@entry=0x560415a3b470) at /test/10.6_dbg/sql/wsrep_mysqld.h:579
#25 0x000056041281d09b in pfs_spawn_thread (arg=0x560415a172c8) at /test/10.6_dbg/storage/perfschema/pfs.cc:2201
#26 0x0000148e63db4609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#27 0x0000148e639a4293 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
(gdb) 

Generated at Thu Feb 08 09:37:49 UTC 2024 using Jira 8.20.16#820016-sha1:9d11dbea5f4be3d4cc21f03a88dd11d8c8687422.