Details
- 
    
Bug
 - 
    Status: Closed (View Workflow)
 - 
    
Critical
 - 
    Resolution: Fixed
 - 
    10.6
 - 
    None
 
Description
Analysis: Here we have high-priority BF applier thread that finds a conflicting local transaction. In reproduced case it was thread updating persistent statistics. BF thread is holding lock_sys latch and thread updating persistent statistics is waiting for it. Conflicting thread is_wsrep() is false and it is holding at least one table lock and record locks. However as it is not wsrep it's trx->mysql_thd is NULL causing assertion.
How to reproduce: No simple case yet found but with 2-node cluster use sysbench and insert 20m rows. Then start sysbench measure on node_1.
					(gdb) p v->mysql_thd
			 | 
		
					$1 = (THD *) 0x0
			 | 
		
					(gdb) p v->lock.wait_lock
			 | 
		
					$4 = {m = std::atomic<ib_lock_t *> = { 0x0 }}
			 | 
		
					(gdb) p v->state
			 | 
		
					$5 = {m = std::atomic<trx_state_t> = { TRX_STATE_ACTIVE }}
			 | 
		
					(gdb) p v->id
			 | 
		
					$6 = 39
			 | 
		
					(gdb) p v->wsrep
			 | 
		
					$7 = 0 '\000'
			 | 
		
					(gdb) p v->lock
			 | 
		
					$8 = {wait_lock = {m = std::atomic<ib_lock_t *> = { 0x0 }}, wait_trx = 0x0, cond = {__data = {__wseq = {__value64 = 0, __value32 = {__low = 0, __high = 0}}, __g1_start = {__value64 = 0, __value32 = {__low = 0, __high = 0}}, __g_refs = {0, 0}, __g_size = {0, 0}, __g1_orig_size = 0, __wrefs = 0, __g_signals = {0, 0}}, __size = '\000' <repeats 47 times>, __align = 0}, suspend_time = {m = std::atomic<my_hrtime_t> = { {val = 0} }}, was_chosen_as_deadlock_victim = {m = std::atomic<unsigned char> = { 0 '\000' }}, rec_cached = 1 '\001', table_cached = 1 '\001', wait_thr = 0x0, rec_pool = {{lock = {trx = 0x7f5b00ca7b80, trx_locks = {prev = 0x7f5b00ca8080, next = 0x0}, index = 0x55c1c4cc8de8, hash = 0x7f5b00ca9280, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x900000045, locks = {prev = 0x50, next = 0x0}}, rec_lock = {page_id = {m_id = 38654705733}, n_bits = 80}}, type_mode = 1027}}, {lock = {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}}, {lock = {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}}, {lock = {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}}, {lock = {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}}, {lock = {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}}, {lock = {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}}, {lock = {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}}}, table_pool = {{trx = 0x7f5b00ca7b80, trx_locks = {prev = 0x0, next = 0x7f5b00ca7c80}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x55c1c4cbffa8, locks = {prev = 0x0, next = 0x7f5b00ca9680}}, rec_lock = {page_id = {m_id = 94290718752680}, n_bits = 0}}, type_mode = 9}, {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}, {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}, {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}, {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}, {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}, {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}, {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}}, lock_heap = 0x55c1c4b80098, trx_locks = {count = 2, start = 0x7f5b00ca8080, end = 0x7f5b00ca7c80, node = &ib_lock_t::trx_locks, init = 51966}, table_locks = std::vector of length 1, capacity 1 = {0x7f5b00ca8080}, evicted_tables = {count = 0, start = 0x0, end = 0x0, node = &dict_table_t::table_LRU, init = 51966}, n_rec_locks = 1}
			 | 
		
Stack trace (using additional assertion)
					(gdb) where
			 | 
		
					#0  __pthread_kill_implementation (no_tid=0, signo=6, threadid=<optimized out>) at ./nptl/pthread_kill.c:44
			 | 
		
					#1  __pthread_kill_internal (signo=6, threadid=<optimized out>) at ./nptl/pthread_kill.c:78
			 | 
		
					#2  __GI___pthread_kill (threadid=<optimized out>, signo=6) at ./nptl/pthread_kill.c:89
			 | 
		
					#3  0x000055c1c1fc9781 in my_write_core (sig=6) at /home/jan/work/mariadb/upstream/mysys/stacktrace.c:424
			 | 
		
					#4  0x000055c1c1695719 in handle_fatal_signal (sig=6) at /home/jan/work/mariadb/upstream/sql/signal_handler.cc:357
			 | 
		
					#5  <signal handler called>
			 | 
		
					#6  __pthread_kill_implementation (no_tid=0, signo=6, threadid=<optimized out>) at ./nptl/pthread_kill.c:44
			 | 
		
					#7  __pthread_kill_internal (signo=6, threadid=<optimized out>) at ./nptl/pthread_kill.c:78
			 | 
		
					#8  __GI___pthread_kill (threadid=<optimized out>, signo=signo@entry=6) at ./nptl/pthread_kill.c:89
			 | 
		
					#9  0x00007f5b13842866 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
			 | 
		
					#10 0x00007f5b138268b7 in __GI_abort () at ./stdlib/abort.c:79
			 | 
		
					#11 0x000055c1c1d7f8a7 in ut_dbg_assertion_failed (expr=0x55c1c24630a2 "v->mysql_thd", file=0x55c1c2462470 "/home/jan/work/mariadb/upstream/storage/innobase/lock/lock0lock.cc", line=1040) at /home/jan/work/mariadb/upstream/storage/innobase/ut/ut0dbg.cc:60
			 | 
		
					#12 0x000055c1c1b7c29c in lock_wait_wsrep (trx=0x7f5b00ca9180) at /home/jan/work/mariadb/upstream/storage/innobase/lock/lock0lock.cc:1040
			 | 
		
					#13 0x000055c1c1b802a0 in lock_wait (thr=0x7f5aec0b1968) at /home/jan/work/mariadb/upstream/storage/innobase/lock/lock0lock.cc:1930
			 | 
		
					#14 0x000055c1c1cb44ca in row_mysql_handle_errors (new_err=0x7f5b1122bd54, trx=0x7f5b00ca9180, thr=0x7f5aec0b1968, savept=0x7f5b1122bd58) at /home/jan/work/mariadb/upstream/storage/innobase/row/row0mysql.cc:688
			 | 
		
					#15 0x000055c1c1cb5fcd in row_insert_for_mysql (mysql_rec=0x7f5aec0aff38 "\377\035\025", prebuilt=0x7f5aec0b10f8, ins_mode=ROW_INS_NORMAL) at /home/jan/work/mariadb/upstream/storage/innobase/row/row0mysql.cc:1326
			 | 
		
					#16 0x000055c1c1ac0e2b in ha_innobase::write_row (this=0x7f5aec0b03c0, record=0x7f5aec0aff38 "\377\035\025") at /home/jan/work/mariadb/upstream/storage/innobase/handler/ha_innodb.cc:7844
			 | 
		
					#17 0x000055c1c16b057e in handler::ha_write_row (this=0x7f5aec0b03c0, buf=0x7f5aec0aff38 "\377\035\025") at /home/jan/work/mariadb/upstream/sql/handler.cc:7661
			 | 
		
					#18 0x000055c1c1859443 in Rows_log_event::write_row (this=0x7f5aec02bb48, rgi=0x7f5aec021c10, overwrite=false) at /home/jan/work/mariadb/upstream/sql/log_event_server.cc:7746
			 | 
		
					#19 0x000055c1c1859b05 in Write_rows_log_event::do_exec_row (this=0x7f5aec02bb48, rgi=0x7f5aec021c10) at /home/jan/work/mariadb/upstream/sql/log_event_server.cc:8008
			 | 
		
					#20 0x000055c1c1852e75 in Rows_log_event::do_apply_event (this=0x7f5aec02bb48, rgi=0x7f5aec021c10) at /home/jan/work/mariadb/upstream/sql/log_event_server.cc:6126
			 | 
		
					#21 0x000055c1c1838c0e in Log_event::apply_event (this=0x7f5aec02bb48, rgi=0x7f5aec021c10) at /home/jan/work/mariadb/upstream/sql/log_event.cc:4191
			 | 
		
					#22 0x000055c1c1a81e8a in wsrep_apply_events (thd=0x7f5aec000dc8, rli=0x7f5aec0159a0, events_buf=0x7f5b03f085e0, buf_len=504160) at /home/jan/work/mariadb/upstream/sql/wsrep_applier.cc:213
			 | 
		
					#23 0x000055c1c1a59ce9 in apply_events (thd=0x7f5aec000dc8, rli=0x7f5aec0159a0, data=..., err=...) at /home/jan/work/mariadb/upstream/sql/wsrep_high_priority_service.cc:128
			 | 
		
					#24 0x000055c1c1a5c204 in Wsrep_applier_service::apply_write_set (this=0x7f5b1122da90, ws_meta=..., data=..., err=...) at /home/jan/work/mariadb/upstream/sql/wsrep_high_priority_service.cc:596
			 | 
		
					#25 0x000055c1c20ae066 in apply_write_set (server_state=..., high_priority_service=..., ws_handle=..., ws_meta=..., data=...) at /home/jan/work/mariadb/upstream/wsrep-lib/src/server_state.cpp:332
			 | 
		
					#26 0x000055c1c20b243c in wsrep::server_state::on_apply (this=0x55c1c4a26d70, high_priority_service=..., ws_handle=..., ws_meta=..., data=...) at /home/jan/work/mariadb/upstream/wsrep-lib/src/server_state.cpp:1128
			 | 
		
					#27 0x000055c1c20cd383 in wsrep::high_priority_service::apply (this=0x7f5b1122da90, ws_handle=..., ws_meta=..., data=...) at /home/jan/work/mariadb/upstream/wsrep-lib/include/wsrep/high_priority_service.hpp:47
			 | 
		
					#28 0x000055c1c20c9da3 in (anonymous namespace)::apply_cb (ctx=0x7f5b1122da90, wsh=0x7f5b1122ccc0, flags=65, buf=0x7f5b1122ccd0, meta=0x7f5b1122cf90, exit_loop=0x7f5b1122cf4f) at /home/jan/work/mariadb/upstream/wsrep-lib/src/wsrep_provider_v26.cpp:507
			 | 
		
					#29 0x00007f5b13276932 in galera::TrxHandleSlave::apply (this=this@entry=0x7f5aec06a850, recv_ctx=recv_ctx@entry=0x7f5b1122da90, apply_cb=0x55c1c20c9b67 <(anonymous namespace)::apply_cb(void*, wsrep_ws_handle_t const*, uint32_t, wsrep_buf_t const*, wsrep_trx_meta_t const*, wsrep_bool_t*)>, meta=..., exit_loop=exit_loop@entry=@0x7f5b1122cf4f: false) at /home/jan/work/galera-lib/galera/galera/src/trx_handle.cpp:396
			 | 
		
					#30 0x00007f5b132895b8 in galera::ReplicatorSMM::apply_trx (this=this@entry=0x55c1c4a3cd70, recv_ctx=recv_ctx@entry=0x7f5b1122da90, ts=...) at /home/jan/work/galera-lib/galera/galera/src/replicator_smm.cpp:518
			 | 
		
					#31 0x00007f5b1328db63 in galera::ReplicatorSMM::process_trx (this=0x55c1c4a3cd70, recv_ctx=0x7f5b1122da90, ts_ptr=...) at /home/jan/work/galera-lib/galera/galera/src/replicator_smm.cpp:2152
			 | 
		
					#32 0x00007f5b132c4131 in galera::GcsActionSource::process_writeset (this=0x55c1c4a6f4c0, recv_ctx=0x7f5b1122da90, act=..., exit_loop=@0x7f5b1122d67f: false) at /home/jan/work/galera-lib/galera/galera/src/gcs_action_source.cpp:62
			 | 
		
					#33 0x00007f5b132c5286 in galera::GcsActionSource::process (this=0x55c1c4a6f4c0, recv_ctx=0x7f5b1122da90, exit_loop=@0x7f5b1122d67f: false) at /home/jan/work/galera-lib/galera/galera/src/gcs_action_source.cpp:186
			 | 
		
					#34 0x00007f5b1328e2b0 in galera::ReplicatorSMM::async_recv (this=0x55c1c4a3cd70, recv_ctx=0x7f5b1122da90) at /home/jan/work/galera-lib/galera/galera/src/replicator_smm.cpp:404
			 | 
		
					#35 0x00007f5b13262f8f in galera_recv (gh=<optimized out>, recv_ctx=<optimized out>) at /home/jan/work/galera-lib/galera/galera/src/wsrep_provider.cpp:264
			 | 
		
					#36 0x000055c1c20cb45e in wsrep::wsrep_provider_v26::run_applier (this=0x55c1c4a27720, applier_ctx=0x7f5b1122da90) at /home/jan/work/mariadb/upstream/wsrep-lib/src/wsrep_provider_v26.cpp:858
			 | 
		
					#37 0x000055c1c1a82d9c in wsrep_replication_process (thd=0x7f5aec000dc8, arg=0x55c1c4a75440) at /home/jan/work/mariadb/upstream/sql/wsrep_thd.cc:57
			 | 
		
					#38 0x000055c1c1a7003b in start_wsrep_THD (arg=0x55c1c4a75440) at /home/jan/work/mariadb/upstream/sql/wsrep_mysqld.cc:3776
			 | 
		
					#39 0x000055c1c19d6518 in pfs_spawn_thread (arg=0x55c1c4a61688) at /home/jan/work/mariadb/upstream/storage/perfschema/pfs.cc:2201
			 | 
		
					#40 0x00007f5b13897ada in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:444
			 | 
		
					#41 0x00007f5b1392847c in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:78
			 | 
		
Attachments
Issue Links
- blocks
 - 
                    
MDEV-33211 Galera SST on maria-backup causes donor node to be unresponsive
-         
 - Closed
 
 -