Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-33278

Assertion failure in thd_get_thread_id at lock_wait_wsrep

    XMLWordPrintable

Details

    Description

      Analysis: Here we have high-priority BF applier thread that finds a conflicting local transaction. In reproduced case it was thread updating persistent statistics. BF thread is holding lock_sys latch and thread updating persistent statistics is waiting for it. Conflicting thread is_wsrep() is false and it is holding at least one table lock and record locks. However as it is not wsrep it's trx->mysql_thd is NULL causing assertion.

      How to reproduce: No simple case yet found but with 2-node cluster use sysbench and insert 20m rows. Then start sysbench measure on node_1.

      (gdb) p v->mysql_thd
      $1 = (THD *) 0x0
      (gdb) p v->lock.wait_lock
      $4 = {m = std::atomic<ib_lock_t *> = { 0x0 }}
      (gdb) p v->state
      $5 = {m = std::atomic<trx_state_t> = { TRX_STATE_ACTIVE }}
      (gdb) p v->id
      $6 = 39
      (gdb) p v->wsrep
      $7 = 0 '\000'
      (gdb) p v->lock
      $8 = {wait_lock = {m = std::atomic<ib_lock_t *> = { 0x0 }}, wait_trx = 0x0, cond = {__data = {__wseq = {__value64 = 0, __value32 = {__low = 0, __high = 0}}, __g1_start = {__value64 = 0, __value32 = {__low = 0, __high = 0}}, __g_refs = {0, 0}, __g_size = {0, 0}, __g1_orig_size = 0, __wrefs = 0, __g_signals = {0, 0}}, __size = '\000' <repeats 47 times>, __align = 0}, suspend_time = {m = std::atomic<my_hrtime_t> = { {val = 0} }}, was_chosen_as_deadlock_victim = {m = std::atomic<unsigned char> = { 0 '\000' }}, rec_cached = 1 '\001', table_cached = 1 '\001', wait_thr = 0x0, rec_pool = {{lock = {trx = 0x7f5b00ca7b80, trx_locks = {prev = 0x7f5b00ca8080, next = 0x0}, index = 0x55c1c4cc8de8, hash = 0x7f5b00ca9280, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x900000045, locks = {prev = 0x50, next = 0x0}}, rec_lock = {page_id = {m_id = 38654705733}, n_bits = 80}}, type_mode = 1027}}, {lock = {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}}, {lock = {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}}, {lock = {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}}, {lock = {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}}, {lock = {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}}, {lock = {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}}, {lock = {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}}}, table_pool = {{trx = 0x7f5b00ca7b80, trx_locks = {prev = 0x0, next = 0x7f5b00ca7c80}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x55c1c4cbffa8, locks = {prev = 0x0, next = 0x7f5b00ca9680}}, rec_lock = {page_id = {m_id = 94290718752680}, n_bits = 0}}, type_mode = 9}, {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}, {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}, {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}, {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}, {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}, {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}, {trx = 0x0, trx_locks = {prev = 0x0, next = 0x0}, index = 0x0, hash = 0x0, requested_time = 0, wait_time = 0, un_member = {tab_lock = {table = 0x0, locks = {prev = 0x0, next = 0x0}}, rec_lock = {page_id = {m_id = 0}, n_bits = 0}}, type_mode = 0}}, lock_heap = 0x55c1c4b80098, trx_locks = {count = 2, start = 0x7f5b00ca8080, end = 0x7f5b00ca7c80, node = &ib_lock_t::trx_locks, init = 51966}, table_locks = std::vector of length 1, capacity 1 = {0x7f5b00ca8080}, evicted_tables = {count = 0, start = 0x0, end = 0x0, node = &dict_table_t::table_LRU, init = 51966}, n_rec_locks = 1}
      

      Stack trace (using additional assertion)

      (gdb) where
      #0  __pthread_kill_implementation (no_tid=0, signo=6, threadid=<optimized out>) at ./nptl/pthread_kill.c:44
      #1  __pthread_kill_internal (signo=6, threadid=<optimized out>) at ./nptl/pthread_kill.c:78
      #2  __GI___pthread_kill (threadid=<optimized out>, signo=6) at ./nptl/pthread_kill.c:89
      #3  0x000055c1c1fc9781 in my_write_core (sig=6) at /home/jan/work/mariadb/upstream/mysys/stacktrace.c:424
      #4  0x000055c1c1695719 in handle_fatal_signal (sig=6) at /home/jan/work/mariadb/upstream/sql/signal_handler.cc:357
      #5  <signal handler called>
      #6  __pthread_kill_implementation (no_tid=0, signo=6, threadid=<optimized out>) at ./nptl/pthread_kill.c:44
      #7  __pthread_kill_internal (signo=6, threadid=<optimized out>) at ./nptl/pthread_kill.c:78
      #8  __GI___pthread_kill (threadid=<optimized out>, signo=signo@entry=6) at ./nptl/pthread_kill.c:89
      #9  0x00007f5b13842866 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
      #10 0x00007f5b138268b7 in __GI_abort () at ./stdlib/abort.c:79
      #11 0x000055c1c1d7f8a7 in ut_dbg_assertion_failed (expr=0x55c1c24630a2 "v->mysql_thd", file=0x55c1c2462470 "/home/jan/work/mariadb/upstream/storage/innobase/lock/lock0lock.cc", line=1040) at /home/jan/work/mariadb/upstream/storage/innobase/ut/ut0dbg.cc:60
      #12 0x000055c1c1b7c29c in lock_wait_wsrep (trx=0x7f5b00ca9180) at /home/jan/work/mariadb/upstream/storage/innobase/lock/lock0lock.cc:1040
      #13 0x000055c1c1b802a0 in lock_wait (thr=0x7f5aec0b1968) at /home/jan/work/mariadb/upstream/storage/innobase/lock/lock0lock.cc:1930
      #14 0x000055c1c1cb44ca in row_mysql_handle_errors (new_err=0x7f5b1122bd54, trx=0x7f5b00ca9180, thr=0x7f5aec0b1968, savept=0x7f5b1122bd58) at /home/jan/work/mariadb/upstream/storage/innobase/row/row0mysql.cc:688
      #15 0x000055c1c1cb5fcd in row_insert_for_mysql (mysql_rec=0x7f5aec0aff38 "\377\035\025", prebuilt=0x7f5aec0b10f8, ins_mode=ROW_INS_NORMAL) at /home/jan/work/mariadb/upstream/storage/innobase/row/row0mysql.cc:1326
      #16 0x000055c1c1ac0e2b in ha_innobase::write_row (this=0x7f5aec0b03c0, record=0x7f5aec0aff38 "\377\035\025") at /home/jan/work/mariadb/upstream/storage/innobase/handler/ha_innodb.cc:7844
      #17 0x000055c1c16b057e in handler::ha_write_row (this=0x7f5aec0b03c0, buf=0x7f5aec0aff38 "\377\035\025") at /home/jan/work/mariadb/upstream/sql/handler.cc:7661
      #18 0x000055c1c1859443 in Rows_log_event::write_row (this=0x7f5aec02bb48, rgi=0x7f5aec021c10, overwrite=false) at /home/jan/work/mariadb/upstream/sql/log_event_server.cc:7746
      #19 0x000055c1c1859b05 in Write_rows_log_event::do_exec_row (this=0x7f5aec02bb48, rgi=0x7f5aec021c10) at /home/jan/work/mariadb/upstream/sql/log_event_server.cc:8008
      #20 0x000055c1c1852e75 in Rows_log_event::do_apply_event (this=0x7f5aec02bb48, rgi=0x7f5aec021c10) at /home/jan/work/mariadb/upstream/sql/log_event_server.cc:6126
      #21 0x000055c1c1838c0e in Log_event::apply_event (this=0x7f5aec02bb48, rgi=0x7f5aec021c10) at /home/jan/work/mariadb/upstream/sql/log_event.cc:4191
      #22 0x000055c1c1a81e8a in wsrep_apply_events (thd=0x7f5aec000dc8, rli=0x7f5aec0159a0, events_buf=0x7f5b03f085e0, buf_len=504160) at /home/jan/work/mariadb/upstream/sql/wsrep_applier.cc:213
      #23 0x000055c1c1a59ce9 in apply_events (thd=0x7f5aec000dc8, rli=0x7f5aec0159a0, data=..., err=...) at /home/jan/work/mariadb/upstream/sql/wsrep_high_priority_service.cc:128
      #24 0x000055c1c1a5c204 in Wsrep_applier_service::apply_write_set (this=0x7f5b1122da90, ws_meta=..., data=..., err=...) at /home/jan/work/mariadb/upstream/sql/wsrep_high_priority_service.cc:596
      #25 0x000055c1c20ae066 in apply_write_set (server_state=..., high_priority_service=..., ws_handle=..., ws_meta=..., data=...) at /home/jan/work/mariadb/upstream/wsrep-lib/src/server_state.cpp:332
      #26 0x000055c1c20b243c in wsrep::server_state::on_apply (this=0x55c1c4a26d70, high_priority_service=..., ws_handle=..., ws_meta=..., data=...) at /home/jan/work/mariadb/upstream/wsrep-lib/src/server_state.cpp:1128
      #27 0x000055c1c20cd383 in wsrep::high_priority_service::apply (this=0x7f5b1122da90, ws_handle=..., ws_meta=..., data=...) at /home/jan/work/mariadb/upstream/wsrep-lib/include/wsrep/high_priority_service.hpp:47
      #28 0x000055c1c20c9da3 in (anonymous namespace)::apply_cb (ctx=0x7f5b1122da90, wsh=0x7f5b1122ccc0, flags=65, buf=0x7f5b1122ccd0, meta=0x7f5b1122cf90, exit_loop=0x7f5b1122cf4f) at /home/jan/work/mariadb/upstream/wsrep-lib/src/wsrep_provider_v26.cpp:507
      #29 0x00007f5b13276932 in galera::TrxHandleSlave::apply (this=this@entry=0x7f5aec06a850, recv_ctx=recv_ctx@entry=0x7f5b1122da90, apply_cb=0x55c1c20c9b67 <(anonymous namespace)::apply_cb(void*, wsrep_ws_handle_t const*, uint32_t, wsrep_buf_t const*, wsrep_trx_meta_t const*, wsrep_bool_t*)>, meta=..., exit_loop=exit_loop@entry=@0x7f5b1122cf4f: false) at /home/jan/work/galera-lib/galera/galera/src/trx_handle.cpp:396
      #30 0x00007f5b132895b8 in galera::ReplicatorSMM::apply_trx (this=this@entry=0x55c1c4a3cd70, recv_ctx=recv_ctx@entry=0x7f5b1122da90, ts=...) at /home/jan/work/galera-lib/galera/galera/src/replicator_smm.cpp:518
      #31 0x00007f5b1328db63 in galera::ReplicatorSMM::process_trx (this=0x55c1c4a3cd70, recv_ctx=0x7f5b1122da90, ts_ptr=...) at /home/jan/work/galera-lib/galera/galera/src/replicator_smm.cpp:2152
      #32 0x00007f5b132c4131 in galera::GcsActionSource::process_writeset (this=0x55c1c4a6f4c0, recv_ctx=0x7f5b1122da90, act=..., exit_loop=@0x7f5b1122d67f: false) at /home/jan/work/galera-lib/galera/galera/src/gcs_action_source.cpp:62
      #33 0x00007f5b132c5286 in galera::GcsActionSource::process (this=0x55c1c4a6f4c0, recv_ctx=0x7f5b1122da90, exit_loop=@0x7f5b1122d67f: false) at /home/jan/work/galera-lib/galera/galera/src/gcs_action_source.cpp:186
      #34 0x00007f5b1328e2b0 in galera::ReplicatorSMM::async_recv (this=0x55c1c4a3cd70, recv_ctx=0x7f5b1122da90) at /home/jan/work/galera-lib/galera/galera/src/replicator_smm.cpp:404
      #35 0x00007f5b13262f8f in galera_recv (gh=<optimized out>, recv_ctx=<optimized out>) at /home/jan/work/galera-lib/galera/galera/src/wsrep_provider.cpp:264
      #36 0x000055c1c20cb45e in wsrep::wsrep_provider_v26::run_applier (this=0x55c1c4a27720, applier_ctx=0x7f5b1122da90) at /home/jan/work/mariadb/upstream/wsrep-lib/src/wsrep_provider_v26.cpp:858
      #37 0x000055c1c1a82d9c in wsrep_replication_process (thd=0x7f5aec000dc8, arg=0x55c1c4a75440) at /home/jan/work/mariadb/upstream/sql/wsrep_thd.cc:57
      #38 0x000055c1c1a7003b in start_wsrep_THD (arg=0x55c1c4a75440) at /home/jan/work/mariadb/upstream/sql/wsrep_mysqld.cc:3776
      #39 0x000055c1c19d6518 in pfs_spawn_thread (arg=0x55c1c4a61688) at /home/jan/work/mariadb/upstream/storage/perfschema/pfs.cc:2201
      #40 0x00007f5b13897ada in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:444
      #41 0x00007f5b1392847c in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:78
      

      Attachments

        Issue Links

          Activity

            People

              sysprg Julius Goryavsky
              janlindstrom Jan Lindström
              Votes:
              0 Vote for this issue
              Watchers:
              3 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Git Integration

                  Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.