Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-17063

server locks up and is killed after innodb_fatal_semaphore_wait_threshold is reached

Details

    Description

      Happening at a customer of ours.
      This issue repeats itself every saturday at approx the same time - looks like something in the workload is triggering it.
      The server locks up and does not process any more data (connections are accepted but not handled). After the timeout set in innodb_fatal_semaphore_wait_threshold the InnoDB kernel commits suicide.

      I was able to capture a stack trace right before the fatal wait was reached.

          367 __lll_lock_wait,__GI___pthread_mutex_lock,do_handle_one_connection,handle_one_connection,start_thread,clone
          347 poll,vio_io_wait,vio_socket_io_wait,vio_read,::??,my_net_read_packet_reallen,my_net_read_packet,do_command,do_handle_one_connection,handle_one_connection,start_thr
      ead,clone
          251 pthread_cond_wait,::??,::??,::??,::??,::??,handler::ha_write_row,write_record,mysql_insert,mysql_execute_command,mysql_parse,::??,dispatch_command,do_command,do_ha
      ndle_one_connection,handle_one_connection,start_thread,clone
          157 nanosleep,::??,::??,::??,handler::ha_index_read_map,::??,sub_select,JOIN::exec_inner,JOIN::exec,mysql_select,handle_select,::??,mysql_execute_command,mysql_parse,:
      :??,dispatch_command,do_command,do_handle_one_connection,handle_one_connection,start_thread,clone
          123 nanosleep,::??,::??,::??,handler::index_read_idx_map,handler::ha_index_read_idx_map,::??,::??,::??,JOIN::optimize_inner,JOIN::optimize,mysql_select,handle_select,:
      :??,mysql_execute_command,mysql_parse,::??,dispatch_command,do_command,do_handle_one_connection,handle_one_connection,start_thread,clone
           67 pthread_cond_wait,::??,::??,::??,::??,::??,handler::ha_index_read_map,handler::read_range_first,handler::multi_range_read_next,Mrr_simple_index_reader::get_next,Ds
      Mrr_impl::dsmrr_next,QUICK_RANGE_SELECT::get_next,::??,mysql_update,mysql_execute_command,mysql_parse,::??,dispatch_command,do_command,do_handle_one_connection,handle_one_
      connection,start_thread,clone
           52 pthread_cond_wait,::??,::??,::??,::??,::??,::??,handler::ha_open,open_table_from_share,open_table,open_tables,open_and_lock_tables,::??,mysql_execute_command,mysql
      _parse,::??,dispatch_command,do_command,do_handle_one_connection,handle_one_connection,start_thread,clone
           30 nanosleep,::??,::??,::??,handler::ha_index_read_map,handler::read_range_first,handler::multi_range_read_next,Mrr_simple_index_reader::get_next,DsMrr_impl::dsmrr_ne
      xt,QUICK_RANGE_SELECT::get_next,QUICK_ROR_UNION_SELECT::reset,filesort,create_sort_index,st_join_table::sort_table,join_init_read_record,sub_select,JOIN::exec_inner,JOIN::
      exec,mysql_select,handle_select,::??,mysql_execute_command,mysql_parse,::??,dispatch_command,do_command,do_handle_one_connection,handle_one_connection,start_thread,clone
           18 nanosleep,::??,::??,::??,handler::ha_write_row,write_record,mysql_insert,mysql_execute_command,mysql_parse,::??,dispatch_command,do_command,do_handle_one_connectio
      n,handle_one_connection,start_thread,clone
           17 __lll_lock_wait,__GI___pthread_mutex_lock,unlink_thd,one_thread_per_connection_end,do_handle_one_connection,handle_one_connection,start_thread,clone
           15 pthread_cond_wait,::??,::??,::??,::??,::??,::??,handler::ha_open,open_table_from_share,open_table,open_tables,open_and_lock_tables,::??,mysql_execute_command,sp_in
      str_stmt::exec_core,sp_lex_keeper::reset_lex_and_exec_core,sp_instr_stmt::execute,sp_head::execute,sp_head::execute_procedure,::??,mysql_execute_command,mysql_parse,::??,d
      ispatch_command,do_command,do_handle_one_connection,handle_one_connection,start_thread,clone
           13 pthread_cond_wait,::??,::??,::??,::??,::??,::??,handler::ha_open,open_table_from_share,open_table,open_tables,mysql_update,mysql_execute_command,mysql_parse,::??,d
      ispatch_command,do_command,do_handle_one_connection,handle_one_connection,start_thread,clone
           10 libaio::??,::??,::??,::??,::??,::??,start_thread,clone
            9 pthread_cond_wait,::??,::??,::??,::??,::??,::??,handler::index_read_idx_map,handler::ha_index_read_idx_map,::??,::??,::??,JOIN::optimize_inner,JOIN::optimize,mysql
      _select,handle_select,::??,mysql_execute_command,mysql_parse,::??,dispatch_command,do_command,do_handle_one_connection,handle_one_connection,start_thread,clone
            9 nanosleep,::??,::??,::??,::??,handler::ha_rnd_next,rr_sequential,sub_select,JOIN::exec_inner,JOIN::exec,mysql_select,handle_select,::??,mysql_execute_command,mysql
      _parse,::??,dispatch_command,do_command,do_handle_one_connection,handle_one_connection,start_thread,clone
            8 pthread_cond_wait,::??,::??,start_thread,clone
            7 pthread_cond_wait,::??,::??,::??,::??,::??,::??,handler::ha_open,open_table_from_share,open_table,open_tables,open_and_lock_tables,mysql_insert,mysql_execute_comma
      nd,mysql_parse,::??,dispatch_command,do_command,do_handle_one_connection,handle_one_connection,start_thread,clone
            6 pthread_cond_wait,::??,::??,::??,::??,::??,::??,handler::ha_index_read_map,::??,sub_select,JOIN::exec_inner,JOIN::exec,mysql_select,handle_select,::??,mysql_execut
      e_command,mysql_parse,::??,dispatch_command,do_command,do_handle_one_connection,handle_one_connection,start_thread,clone
            4 pthread_cond_wait,fifo_lock_get,gu_fifo_get_head,gcs_recv,galera::Gcs::recv,galera::GcsActionSource::process,galera::ReplicatorSMM::async_recv,galera_recv,::??,sta
      rt_wsrep_THD,start_thread,clone
            4 nanosleep,::??,::??,::??,::??,handler::ha_rnd_next,filesort,create_sort_index,st_join_table::sort_table,join_init_read_record,sub_select,JOIN::exec_inner,JOIN::exe
      c,mysql_select,handle_select,::??,mysql_execute_command,mysql_parse,::??,dispatch_command,do_command,do_handle_one_connection,handle_one_connection,start_thread,clone
            3 pthread_cond_wait,::??,::??,::??,::??,open_tables,open_and_lock_tables,::??,mysql_execute_command,mysql_parse,::??,dispatch_command,do_command,do_handle_one_connec
      tion,handle_one_connection,start_thread,clone
            3 pthread_cond_wait,::??,::??,::??,::??,::??,handler::ha_update_row,mysql_update,mysql_execute_command,mysql_parse,::??,dispatch_command,do_command,do_handle_one_con
      nection,handle_one_connection,start_thread,clone
            2 pthread_cond_wait,::??,::??,::??,start_thread,clone
            2 pthread_cond_timedwait,::??,::??,::??,start_thread,clone
            2 nanosleep,::??,::??,start_thread,clone
            2 nanosleep,::??,::??,::??,handler::ha_index_read_map,handler::read_range_first,handler::multi_range_read_next,Mrr_simple_index_reader::get_next,DsMrr_impl::dsmrr_ne
      xt,QUICK_RANGE_SELECT::get_next,::??,mysql_update,mysql_execute_command,mysql_parse,::??,dispatch_command,do_command,do_handle_one_connection,handle_one_connection,start_t
      hread,clone
            2 nanosleep,::??,::??,::??,handler::ha_index_read_map,handler::read_range_first,handler::multi_range_read_next,Mrr_simple_index_reader::get_next,DsMrr_impl::dsmrr_ne
      xt,QUICK_RANGE_SELECT::get_next,filesort,create_sort_index,st_join_table::sort_table,join_init_read_record,sub_select,JOIN::exec_inner,JOIN::exec,mysql_select,handle_selec
      t,::??,mysql_execute_command,mysql_parse,::??,dispatch_command,do_command,do_handle_one_connection,handle_one_connection,start_thread,clone
            1 pthread_cond_wait,::??,start_wsrep_THD,start_thread,clone
            1 pthread_cond_wait,::??,start_thread,clone
            1 pthread_cond_wait,::??,::??,::??,::??,::??,start_thread,clone
            1 pthread_cond_wait,::??,::??,::??,::??,::??,plugin_foreach_with_mask,ha_kill_query,THD::awake,kill_one_thread,::??,mysql_execute_command,mysql_parse,::??,dispatch_c
      ommand,do_command,do_handle_one_connection,handle_one_connection,start_thread,clone
            1 pthread_cond_wait,::??,::??,::??,::??,::??,::??,ha_show_status,mysql_execute_command,mysql_parse,::??,dispatch_command,do_command,do_handle_one_connection,handle_o
      ne_connection,start_thread,clone
            1 pthread_cond_wait,handle_slave_background,start_thread,clone
            1 pthread_cond_wait,::??,::??,::??,::??,::??,::??,::??,handler::ha_rnd_next,filesort,create_sort_index,st_join_table::sort_table,join_init_read_record,sub_select,JOI
      N::exec_inner,JOIN::exec,mysql_select,handle_select,::??,mysql_execute_command,mysql_parse,::??,dispatch_command,do_command,do_handle_one_connection,handle_one_connection,
      start_thread,clone
            1 pthread_cond_wait,gu::Lock::wait,RecvBuf::front,gcomm_recv,core_msg_recv,gcs_core_recv,gcs_recv_thread,start_thread,clone
            1 pthread_cond_wait,gu::Lock::wait,galera::ServiceThd::thd_func,start_thread,clone
            1 pthread_cond_wait,::??,::??,::??,::??,::??,fill_status,get_schema_tables_result,JOIN::exec_inner,JOIN::exec,mysql_select,handle_select,::??,mysql_execute_command,m
      ysql_parse,::??,dispatch_command,do_command,do_handle_one_connection,handle_one_connection,start_thread,clone
            1 pthread_cond_wait,binlog_background_thread,start_thread,clone
            1 pthread_cond_timedwait,::??,start_thread,clone
            1 pthread_cond_timedwait,::??,::??,start_thread,clone
            1 pthread_cond_timedwait,::??,::??,::??,::??,start_thread,clone
            1 nanosleep,::??,::??,::??,start_thread,clone
            1 nanosleep,::??,::??,::??,handler::ha_index_next_same,::??,sub_select,::??,sub_select,JOIN::exec_inner,JOIN::exec,mysql_select,handle_select,::??,mysql_execute_comm
      and,mysql_parse,::??,dispatch_command,do_command,do_handle_one_connection,handle_one_connection,start_thread,clone
            1 nanosleep,::??,::??,::??,::??,handler::ha_index_first,::??,sub_select,JOIN::exec_inner,JOIN::exec,mysql_select,handle_select,::??,mysql_execute_command,mysql_parse
      ,::??,dispatch_command,do_command,do_handle_one_connection,handle_one_connection,start_thread,clone
            1 __lll_lock_wait,__GI___pthread_mutex_lock,::??,THD::~THD,THD::~THD,CONNECT::close_with_error,handle_connections_sockets,mysqld_main,__libc_start_main,_start
            1 __lll_lock_wait,__GI___pthread_mutex_lock,::??,thd_query_safe,::??,::??,::??,::??,get_schema_tables_result,JOIN::exec_inner,JOIN::exec,mysql_select,handle_select,:
      :??,mysql_execute_command,mysql_parse,::??,dispatch_command,do_command,do_handle_one_connection,handle_one_connection,start_thread,clone
            1 __lll_lock_wait,__GI___pthread_mutex_lock,one_thread_per_connection_end,do_handle_one_connection,handle_one_connection,start_thread,clone
            1 __lll_lock_wait,__GI___pthread_mutex_lock,::??,mysqld_list_processes,mysql_execute_command,mysql_parse,::??,dispatch_command,do_command,do_handle_one_connection,ha
      ndle_one_connection,start_thread,clone
            1 __lll_lock_wait,__GI___pthread_mutex_lock,::??,fill_status,get_schema_tables_result,JOIN::exec_inner,JOIN::exec,st_select_lex_unit::exec,mysql_union,handle_select,
      ::??,mysql_execute_command,mysql_parse,::??,dispatch_command,do_command,do_handle_one_connection,handle_one_connection,start_thread,clone
            1 __lll_lock_wait,__GI___pthread_mutex_lock,::??,fill_status,get_schema_tables_result,JOIN::exec_inner,JOIN::exec,mysql_select,handle_select,::??,mysql_execute_comma
      nd,mysql_parse,::??,dispatch_command,do_command,do_handle_one_connection,handle_one_connection,start_thread,clone
            1 __lll_lock_wait,__GI___pthread_mutex_lock,::??,calc_sum_of_all_status,fill_status,get_schema_tables_result,JOIN::exec_inner,JOIN::exec,mysql_select,handle_select,:
      :??,mysql_execute_command,mysql_parse,::??,dispatch_command,do_command,do_handle_one_connection,handle_one_connection,start_thread,clone
            1 epoll_wait,asio::detail::epoll_reactor::run(epoll_reactor.ipp:391),asio::detail::task_io_service::do_run_one(task_io_service.ipp:355),asio::detail::task_io_service
      ::run(task_io_service.ipp:355),asio::io_service::run(task_io_service.ipp:355),gcomm::AsioProtonet::event_loop(task_io_service.ipp:355),GCommConn::run,GCommConn::run_fn,sta
      rt_thread,clone
            1 do_sigwait,__sigwait,signal_hand,start_thread,clone
      
      

      Attachments

        1. crash.log
          407 kB
        2. sema.log.gz
          22 kB

        Issue Links

          Activity

            wlad and me made some analysis of the stack traces. I do not see anything obviously wrong in InnoDB, and he did not see anything obviously wrong on the SQL layer.
            Given that the deployment involves Galera, and based on the fact that many ./mysql-test-run --suite=galera tests occasionally fail due to mutex violations (especially in the forced rollback of transactions), I would tend to believe that the hang could be caused by the wsrep patches or the Galera library.

            marko Marko Mäkelä added a comment - wlad and me made some analysis of the stack traces. I do not see anything obviously wrong in InnoDB, and he did not see anything obviously wrong on the SQL layer. Given that the deployment involves Galera, and based on the fact that many ./mysql-test-run --suite=galera tests occasionally fail due to mutex violations (especially in the forced rollback of transactions), I would tend to believe that the hang could be caused by the wsrep patches or the Galera library.

            Here is one regular (but non-deterministic) test crash that leads to me believe that the problem could be in Galera:

            10.4 67e3d1ee93505e320bdb4343a8a8be827183a58e

            CURRENT_TEST: galera.MW-286
            mysqltest: At line 26: query 'ALTER TABLE t1 ADD PRIMARY KEY (f1)' failed with wrong errno 2013: 'Lost connection to MySQL server during query', instead of 0...
             
            The result from queries just before the failure was:
            connection node_1;
            CREATE TABLE ten (f1 INTEGER) Engine=InnoDB;
            INSERT INTO ten VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
            CREATE TABLE t1 (f1 INTEGER) Engine=InnoDB;
            INSERT INTO t1 (f1) SELECT 000000 + (10000 * a1.f1) + (1000 * a2.f1) + (100 * a3.f1) + (10 * a4.f1) + a5.f1 FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4, ten AS a5;
            INSERT INTO t1 (f1) SELECT 100000 + (10000 * a1.f1) + (1000 * a2.f1) + (100 * a3.f1) + (10 * a4.f1) + a5.f1 FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4, ten AS a5;;
            connection node_2;
            SET GLOBAL wsrep_desync = TRUE;
            SET wsrep_on = FALSE;
            ALTER TABLE t1 ADD PRIMARY KEY (f1);
            …
            Version: '10.4.1-MariaDB-debug-log'  socket: '/dev/shm/10.4c/mysql-test/var/tmp/63/mysqld.2.sock'  port: 17261  Source distribution
            2018-12-14 10:52:58 0 [Note] WSREP: Member 1.0 (jyty) desyncs itself from group
            2018-12-14 10:52:58 0 [Note] WSREP: Shifting SYNCED -> DONOR/DESYNCED (TO: 4)
            mysqld: /mariadb/10.4c/storage/innobase/include/sync0policy.ic:64: void MutexDebug<TTASEventMutex<GenericPolicy> >::enter(const Mutex *, const char *, unsigned int) [Mutex = TTASEventMutex<GenericPolicy>]: Assertion `!is_owned()' failed.
            …
            #10 PolicyMutex<TTASEventMutex<GenericPolicy> >::enter (n_spins=30, n_delay=4, name=0x121cc79 "/mariadb/10.4c/storage/innobase/lock/lock0lock.cc", line=6398, this=<optimized out>) at /mariadb/10.4c/storage/innobase/include/ib0mutex.h:590
            #11 lock_trx_handle_wait (trx=0x7f2316024208) at /mariadb/10.4c/storage/innobase/lock/lock0lock.cc:6398
            #12 0x0000000000a990c2 in innobase_kill_query (thd=0x7f2298000cf8) at /mariadb/10.4c/storage/innobase/handler/ha_innodb.cc:5133
            #13 0x00000000008c0eb7 in kill_handlerton (thd=0x7f2298000cf8, plugin=<optimized out>, level=0x7f2324103d8c) at /mariadb/10.4c/sql/handler.cc:813
            #14 0x00000000006a62cf in plugin_foreach_with_mask (thd=0x7f2298000cf8, func=0x8c0e80 <kill_handlerton(THD*, st_plugin_int**, void*)>, type=<optimized out>, state_mask=8, arg=0x7f2324103d8c) at /mariadb/10.4c/sql/sql_plugin.cc:2432
            #15 0x00000000008c0e47 in ha_kill_query (thd=0x7f2298000cf8, level=<optimized out>) at /mariadb/10.4c/sql/handler.cc:820
            #16 0x000000000064481e in THD::awake_no_mutex (this=0x7f2298000cf8, state_to_set=<optimized out>) at /mariadb/10.4c/sql/sql_class.cc:1836
            #17 0x000000000081bfda in THD::awake (this=0x7f2298000cf8, state_to_set=KILL_QUERY) at /mariadb/10.4c/sql/sql_class.h:3291
            #18 wsrep_thd_awake (thd=0x7f2298000cf8, signal=<optimized out>) at /mariadb/10.4c/sql/wsrep_mysqld.cc:2635
            #19 0x0000000000a95064 in wsrep_innobase_kill_one_trx (bf_thd_ptr=0x7f2308000cf8, bf_trx=<optimized out>, victim_trx=<optimized out>, signal=<optimized out>) at /mariadb/10.4c/storage/innobase/handler/ha_innodb.cc:18711
            #20 0x0000000000a9b3f2 in wsrep_abort_transaction (bf_thd=<optimized out>, victim_thd=<optimized out>, signal=1 '\001') at /mariadb/10.4c/storage/innobase/handler/ha_innodb.cc:18787
            #21 0x00000000008cd3fa in ha_abort_transaction (bf_thd=0x7f2308000cf8, victim_thd=0x7f2298000cf8, signal=1 '\001') at /mariadb/10.4c/sql/handler.cc:6564
            #22 0x000000000082527b in wsrep_abort_thd (bf_thd_ptr=0x7f2308000cf8, victim_thd_ptr=0x7f2298000cf8, signal=1 '\001') at /mariadb/10.4c/sql/wsrep_thd.cc:655
            #23 0x000000000081b4b1 in wsrep_grant_mdl_exception (requestor_ctx=<optimized out>, ticket=<optimized out>, key=<optimized out>) at /mariadb/10.4c/sql/wsrep_mysqld.cc:2006
            #24 0x000000000079abcb in MDL_lock::can_grant_lock (this=0x7f2298107dd8, type_arg=<optimized out>, requestor_ctx=0x7f2308000e18, ignore_lock_priority=<optimized out>) at /mariadb/10.4c/sql/mdl.cc:1765
            #25 0x000000000079b428 in MDL_context::try_acquire_lock_impl (this=0x7f2308000e18, mdl_request=0x7f23088d5da0, out_ticket=0x7f2324104180) at /mariadb/10.4c/sql/mdl.cc:2116
            #26 0x000000000079b7c7 in MDL_context::acquire_lock (this=0x7f2308000e18, mdl_request=0x7f23088d5da0, lock_wait_timeout=86400) at /mariadb/10.4c/sql/mdl.cc:2263
            #27 0x00000000006274eb in open_table_get_mdl_lock (thd=0x7f2308000cf8, ot_ctx=0x7f2324104ab0, mdl_request=0x7f23088d5da0, flags=0, mdl_ticket=0x7f2324104478) at /mariadb/10.4c/sql/sql_base.cc:1592
            #28 0x0000000000626676 in open_table (thd=0x7f2308000cf8, table_list=0x7f23088d5958, ot_ctx=0x7f2324104ab0) at /mariadb/10.4c/sql/sql_base.cc:1852
            #29 0x00000000006297df in open_and_process_table (thd=<optimized out>, lex=<optimized out>, tables=<optimized out>, counter=<optimized out>, flags=<optimized out>, prelocking_strategy=<optimized out>, ot_ctx=<optimized out>, has_prelocking_list=<optimized out>) at /mariadb/10.4c/sql/sql_base.cc:3724
            #30 open_tables (thd=0x7f2308000cf8, options=..., start=0x7f2324104b48, counter=0x7f2324104b44, flags=0, prelocking_strategy=0x7f2324104c00) at /mariadb/10.4c/sql/sql_base.cc:4255
            #31 0x000000000062be3b in open_and_lock_tables (thd=0x7f2308000cf8, options=..., tables=<optimized out>, derived=false, flags=0, prelocking_strategy=0x7f2324104c00) at /mariadb/10.4c/sql/sql_base.cc:5134
            #32 0x00000000009d7d0f in open_and_lock_tables (thd=0x2, tables=0x0, derived=false, flags=0) at /mariadb/10.4c/sql/sql_base.h:507
            #33 Rows_log_event::do_apply_event (this=0x7f230802c8a8, rgi=<optimized out>) at /mariadb/10.4c/sql/log_event.cc:11286
            #34 0x00000000005eddfa in Log_event::apply_event (this=0x7f230802c8a8, rgi=0x7f23080229e0) at /mariadb/10.4c/sql/log_event.h:1481
            #35 0x00000000008234e1 in wsrep_apply_events (thd=<optimized out>, events_buf=<optimized out>, buf_len=493589) at /mariadb/10.4c/sql/wsrep_applier.cc:166
            #36 wsrep_apply_cb (ctx=<optimized out>, buf=0x7f23180c3128, buf_len=<optimized out>, flags=<optimized out>, meta=<optimized out>) at /mariadb/10.4c/sql/wsrep_applier.cc:269
            #37 0x00007f232688323b in galera::TrxHandle::apply(void*, wsrep_cb_status (*)(void*, void const*, unsigned long, unsigned int, wsrep_trx_meta const*), wsrep_trx_meta const&) const () from /usr/lib/galera/libgalera_smm.so
            #38 0x00007f23268c6203 in ?? () from /usr/lib/galera/libgalera_smm.so
            #39 0x00007f23268c8ba8 in galera::ReplicatorSMM::apply_trx(void*, galera::TrxHandle*) () from /usr/lib/galera/libgalera_smm.so
            #40 0x00007f23268cbede in galera::ReplicatorSMM::process_trx(void*, galera::TrxHandle*) () from /usr/lib/galera/libgalera_smm.so
            #41 0x00007f23268a8288 in galera::GcsActionSource::dispatch(void*, gcs_action const&, bool&) () from /usr/lib/galera/libgalera_smm.so
            #42 0x00007f23268a8ea8 in galera::GcsActionSource::process(void*, bool&) () from /usr/lib/galera/libgalera_smm.so
            #43 0x00007f23268cc52d in galera::ReplicatorSMM::async_recv(void*) () from /usr/lib/galera/libgalera_smm.so
            #44 0x00007f23268e284b in galera_recv () from /usr/lib/galera/libgalera_smm.so
            #45 0x0000000000824904 in wsrep_replication_process (thd=0x7f2308000cf8) at /mariadb/10.4c/sql/wsrep_thd.cc:362
            #46 0x000000000081b7fa in start_wsrep_THD (arg=0x8248b0 <wsrep_replication_process(THD*)>) at /mariadb/10.4c/sql/wsrep_mysqld.cc:2101
            #47 0x00007f2328650fa3 in start_thread (arg=<optimized out>) at pthread_create.c:486
            #48 0x00007f23274f188f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
            

            This same problem should exist in 10.3 and likely also in 10.2, maybe even earlier major versions.

            marko Marko Mäkelä added a comment - Here is one regular (but non-deterministic) test crash that leads to me believe that the problem could be in Galera: 10.4 67e3d1ee93505e320bdb4343a8a8be827183a58e CURRENT_TEST: galera.MW-286 mysqltest: At line 26: query 'ALTER TABLE t1 ADD PRIMARY KEY (f1)' failed with wrong errno 2013: 'Lost connection to MySQL server during query', instead of 0...   The result from queries just before the failure was: connection node_1; CREATE TABLE ten (f1 INTEGER) Engine=InnoDB; INSERT INTO ten VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); CREATE TABLE t1 (f1 INTEGER) Engine=InnoDB; INSERT INTO t1 (f1) SELECT 000000 + (10000 * a1.f1) + (1000 * a2.f1) + (100 * a3.f1) + (10 * a4.f1) + a5.f1 FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4, ten AS a5; INSERT INTO t1 (f1) SELECT 100000 + (10000 * a1.f1) + (1000 * a2.f1) + (100 * a3.f1) + (10 * a4.f1) + a5.f1 FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4, ten AS a5;; connection node_2; SET GLOBAL wsrep_desync = TRUE; SET wsrep_on = FALSE; ALTER TABLE t1 ADD PRIMARY KEY (f1); … Version: '10.4.1-MariaDB-debug-log' socket: '/dev/shm/10.4c/mysql-test/var/tmp/63/mysqld.2.sock' port: 17261 Source distribution 2018-12-14 10:52:58 0 [Note] WSREP: Member 1.0 (jyty) desyncs itself from group 2018-12-14 10:52:58 0 [Note] WSREP: Shifting SYNCED -> DONOR/DESYNCED (TO: 4) mysqld: /mariadb/10.4c/storage/innobase/include/sync0policy.ic:64: void MutexDebug<TTASEventMutex<GenericPolicy> >::enter(const Mutex *, const char *, unsigned int) [Mutex = TTASEventMutex<GenericPolicy>]: Assertion `!is_owned()' failed. … #10 PolicyMutex<TTASEventMutex<GenericPolicy> >::enter (n_spins=30, n_delay=4, name=0x121cc79 "/mariadb/10.4c/storage/innobase/lock/lock0lock.cc", line=6398, this=<optimized out>) at /mariadb/10.4c/storage/innobase/include/ib0mutex.h:590 #11 lock_trx_handle_wait (trx=0x7f2316024208) at /mariadb/10.4c/storage/innobase/lock/lock0lock.cc:6398 #12 0x0000000000a990c2 in innobase_kill_query (thd=0x7f2298000cf8) at /mariadb/10.4c/storage/innobase/handler/ha_innodb.cc:5133 #13 0x00000000008c0eb7 in kill_handlerton (thd=0x7f2298000cf8, plugin=<optimized out>, level=0x7f2324103d8c) at /mariadb/10.4c/sql/handler.cc:813 #14 0x00000000006a62cf in plugin_foreach_with_mask (thd=0x7f2298000cf8, func=0x8c0e80 <kill_handlerton(THD*, st_plugin_int**, void*)>, type=<optimized out>, state_mask=8, arg=0x7f2324103d8c) at /mariadb/10.4c/sql/sql_plugin.cc:2432 #15 0x00000000008c0e47 in ha_kill_query (thd=0x7f2298000cf8, level=<optimized out>) at /mariadb/10.4c/sql/handler.cc:820 #16 0x000000000064481e in THD::awake_no_mutex (this=0x7f2298000cf8, state_to_set=<optimized out>) at /mariadb/10.4c/sql/sql_class.cc:1836 #17 0x000000000081bfda in THD::awake (this=0x7f2298000cf8, state_to_set=KILL_QUERY) at /mariadb/10.4c/sql/sql_class.h:3291 #18 wsrep_thd_awake (thd=0x7f2298000cf8, signal=<optimized out>) at /mariadb/10.4c/sql/wsrep_mysqld.cc:2635 #19 0x0000000000a95064 in wsrep_innobase_kill_one_trx (bf_thd_ptr=0x7f2308000cf8, bf_trx=<optimized out>, victim_trx=<optimized out>, signal=<optimized out>) at /mariadb/10.4c/storage/innobase/handler/ha_innodb.cc:18711 #20 0x0000000000a9b3f2 in wsrep_abort_transaction (bf_thd=<optimized out>, victim_thd=<optimized out>, signal=1 '\001') at /mariadb/10.4c/storage/innobase/handler/ha_innodb.cc:18787 #21 0x00000000008cd3fa in ha_abort_transaction (bf_thd=0x7f2308000cf8, victim_thd=0x7f2298000cf8, signal=1 '\001') at /mariadb/10.4c/sql/handler.cc:6564 #22 0x000000000082527b in wsrep_abort_thd (bf_thd_ptr=0x7f2308000cf8, victim_thd_ptr=0x7f2298000cf8, signal=1 '\001') at /mariadb/10.4c/sql/wsrep_thd.cc:655 #23 0x000000000081b4b1 in wsrep_grant_mdl_exception (requestor_ctx=<optimized out>, ticket=<optimized out>, key=<optimized out>) at /mariadb/10.4c/sql/wsrep_mysqld.cc:2006 #24 0x000000000079abcb in MDL_lock::can_grant_lock (this=0x7f2298107dd8, type_arg=<optimized out>, requestor_ctx=0x7f2308000e18, ignore_lock_priority=<optimized out>) at /mariadb/10.4c/sql/mdl.cc:1765 #25 0x000000000079b428 in MDL_context::try_acquire_lock_impl (this=0x7f2308000e18, mdl_request=0x7f23088d5da0, out_ticket=0x7f2324104180) at /mariadb/10.4c/sql/mdl.cc:2116 #26 0x000000000079b7c7 in MDL_context::acquire_lock (this=0x7f2308000e18, mdl_request=0x7f23088d5da0, lock_wait_timeout=86400) at /mariadb/10.4c/sql/mdl.cc:2263 #27 0x00000000006274eb in open_table_get_mdl_lock (thd=0x7f2308000cf8, ot_ctx=0x7f2324104ab0, mdl_request=0x7f23088d5da0, flags=0, mdl_ticket=0x7f2324104478) at /mariadb/10.4c/sql/sql_base.cc:1592 #28 0x0000000000626676 in open_table (thd=0x7f2308000cf8, table_list=0x7f23088d5958, ot_ctx=0x7f2324104ab0) at /mariadb/10.4c/sql/sql_base.cc:1852 #29 0x00000000006297df in open_and_process_table (thd=<optimized out>, lex=<optimized out>, tables=<optimized out>, counter=<optimized out>, flags=<optimized out>, prelocking_strategy=<optimized out>, ot_ctx=<optimized out>, has_prelocking_list=<optimized out>) at /mariadb/10.4c/sql/sql_base.cc:3724 #30 open_tables (thd=0x7f2308000cf8, options=..., start=0x7f2324104b48, counter=0x7f2324104b44, flags=0, prelocking_strategy=0x7f2324104c00) at /mariadb/10.4c/sql/sql_base.cc:4255 #31 0x000000000062be3b in open_and_lock_tables (thd=0x7f2308000cf8, options=..., tables=<optimized out>, derived=false, flags=0, prelocking_strategy=0x7f2324104c00) at /mariadb/10.4c/sql/sql_base.cc:5134 #32 0x00000000009d7d0f in open_and_lock_tables (thd=0x2, tables=0x0, derived=false, flags=0) at /mariadb/10.4c/sql/sql_base.h:507 #33 Rows_log_event::do_apply_event (this=0x7f230802c8a8, rgi=<optimized out>) at /mariadb/10.4c/sql/log_event.cc:11286 #34 0x00000000005eddfa in Log_event::apply_event (this=0x7f230802c8a8, rgi=0x7f23080229e0) at /mariadb/10.4c/sql/log_event.h:1481 #35 0x00000000008234e1 in wsrep_apply_events (thd=<optimized out>, events_buf=<optimized out>, buf_len=493589) at /mariadb/10.4c/sql/wsrep_applier.cc:166 #36 wsrep_apply_cb (ctx=<optimized out>, buf=0x7f23180c3128, buf_len=<optimized out>, flags=<optimized out>, meta=<optimized out>) at /mariadb/10.4c/sql/wsrep_applier.cc:269 #37 0x00007f232688323b in galera::TrxHandle::apply(void*, wsrep_cb_status (*)(void*, void const*, unsigned long, unsigned int, wsrep_trx_meta const*), wsrep_trx_meta const&) const () from /usr/lib/galera/libgalera_smm.so #38 0x00007f23268c6203 in ?? () from /usr/lib/galera/libgalera_smm.so #39 0x00007f23268c8ba8 in galera::ReplicatorSMM::apply_trx(void*, galera::TrxHandle*) () from /usr/lib/galera/libgalera_smm.so #40 0x00007f23268cbede in galera::ReplicatorSMM::process_trx(void*, galera::TrxHandle*) () from /usr/lib/galera/libgalera_smm.so #41 0x00007f23268a8288 in galera::GcsActionSource::dispatch(void*, gcs_action const&, bool&) () from /usr/lib/galera/libgalera_smm.so #42 0x00007f23268a8ea8 in galera::GcsActionSource::process(void*, bool&) () from /usr/lib/galera/libgalera_smm.so #43 0x00007f23268cc52d in galera::ReplicatorSMM::async_recv(void*) () from /usr/lib/galera/libgalera_smm.so #44 0x00007f23268e284b in galera_recv () from /usr/lib/galera/libgalera_smm.so #45 0x0000000000824904 in wsrep_replication_process (thd=0x7f2308000cf8) at /mariadb/10.4c/sql/wsrep_thd.cc:362 #46 0x000000000081b7fa in start_wsrep_THD (arg=0x8248b0 <wsrep_replication_process(THD*)>) at /mariadb/10.4c/sql/wsrep_mysqld.cc:2101 #47 0x00007f2328650fa3 in start_thread (arg=<optimized out>) at pthread_create.c:486 #48 0x00007f23274f188f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95 This same problem should exist in 10.3 and likely also in 10.2, maybe even earlier major versions.

            rpizzi are BACKUP LOCKs being used?

            Roel Roel Van de Paar added a comment - rpizzi are BACKUP LOCKs being used?

            No backup involved in this scenario

            rpizzi Rick Pizzi (Inactive) added a comment - No backup involved in this scenario

            This issue has never reproduced since the MDEV-18464 fix

            sysprg Julius Goryavsky added a comment - This issue has never reproduced since the MDEV-18464 fix

            People

              sysprg Julius Goryavsky
              rpizzi Rick Pizzi (Inactive)
              Votes:
              2 Vote for this issue
              Watchers:
              10 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Git Integration

                  Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.