Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-28948

FLUSH BINARY LOGS waits/hangs on mysql_mutex_unlock(&LOCK_index) forever

    XMLWordPrintable

    Details

      Description

      The following thread hangs forever while executing the FLUSH BINARY LOGS:

      Thread 1044 (Thread 0x7fcfae319700 (LWP 112070)):
      #0  0x00007fde4ea07a35 in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
      No symbol table info available.
      #1  0x00005642bbfd68ec in inline_mysql_cond_wait (src_file=0x5642bc72b7b8 "/home/jenkins/workspace/Build-Package/padding_for_CPACK_RPM_BUILD_SOURCE_DIRS_PREFIX_ON_ES_BACKUP_DEBUGSOURCE/sql/log.cc", src_line=4491, mutex=<optimized out>, that=0x5642bd05e6c0 <mysql_bin_log+1248>) at /usr/src/debug/MariaDB-/src_0/include/mysql/psi/mysql_thread.h:1233
              result = <optimized out>
      #2  MYSQL_BIN_LOG::wait_for_last_checkpoint_event (this=this@entry=0x5642bd05e1e0 <mysql_bin_log>) at /usr/src/debug/MariaDB-/src_0/sql/log.cc:4491
      No locals.
      #3  0x00005642bbdf1017 in reload_acl_and_cache (thd=thd@entry=0x7fcfcc2ce168, options=1024, tables=tables@entry=0x0, write_to_binlog=write_to_binlog@entry=0x7fcfae317e70) at /usr/src/debug/MariaDB-/src_0/sql/sql_reload.cc:162
              drop_gtid_domain = <optimized out>
              result = false
              tmp_write_to_binlog = 0
      #4  0x00005642bbcd979d in mysql_execute_command (thd=thd@entry=0x7fcfcc2ce168) at /usr/src/debug/MariaDB-/src_0/sql/sql_parse.cc:5524
              write_to_binlog = 1
              res = 0
              lex = 0x7fcfcc2d2088
              trace_command = {<Json_writer_struct> = {_vptr.Json_writer_struct = 0x5642bce1a0d0 <vtable for Json_writer_object+16>, my_writer = 0x0, context = {writer = 0x0}, closed = false}, <No data fields>}
              trace_command_steps = {<Json_writer_struct> = {_vptr.Json_writer_struct = 0x5642bce1a0f0 <vtable for Json_writer_array+16>, my_writer = 0x0, context = {writer = 0x0}, closed = false}, <No data fields>}
              orig_binlog_format = BINLOG_FORMAT_ROW
              up_result = 0
              rpl_filter = <optimized out>
              ots = {ctx = 0x7fcfcc2d1d38, traceable = false}
              orig_current_stmt_binlog_format = BINLOG_FORMAT_ROW
              select_lex = 0x7fcfcc2d2978
              first_table = 0x0
              all_tables = 0x0
              unit = 0x7fcfcc2d2150
              have_table_map_for_update = false
      #5  0x00005642bbcde1f5 in mysql_parse (thd=thd@entry=0x7fcfcc2ce168, rawbuf=rawbuf@entry=0x7fcfcc0e0d50 "flush binary logs", length=length@entry=17, parser_state=parser_state@entry=0x7fcfae3182e0, is_com_multi=is_com_multi@entry=false, is_next_command=is_next_command@entry=false) at /usr/src/debug/MariaDB-/src_0/sql/sql_parse.cc:8127
              found_semicolon = <optimized out>
              error = <optimized out>
              lex = 0x7fcfcc2d2088
              err = <optimized out>
      #6  0x00005642bbcdea5c in wsrep_mysql_parse (thd=thd@entry=0x7fcfcc2ce168, rawbuf=0x7fcfcc0e0d50 "flush binary logs", length=17, parser_state=parser_state@entry=0x7fcfae3182e0, is_com_multi=is_com_multi@entry=false, is_next_command=is_next_command@entry=false) at /usr/src/debug/MariaDB-/src_0/sql/sql_parse.cc:7930
              is_autocommit = true
              retry_autocommit = false
      #7  0x00005642bbce17e1 in dispatch_command (command=command@entry=COM_QUERY, thd=thd@entry=0x7fcfcc2ce168, packet=packet@entry=0x7fcfcc282549 "flush binary logs", packet_length=packet_length@entry=17, is_com_multi=is_com_multi@entry=false, is_next_command=is_next_command@entry=false) at /usr/src/debug/MariaDB-/src_0/sql/sql_parse.cc:1877
              packet_end = 0x7fcfcc0e0d61 ""
              parser_state = {m_lip = {lookahead_token = -1, lookahead_yylval = 0x0, m_thd = 0x7fcfcc2ce168, m_ptr = 0x7fcfcc0e0d62 "", m_tok_start = 0x7fcfcc0e0d62 "", m_tok_end = 0x7fcfcc0e0d62 "", m_end_of_query = 0x7fcfcc0e0d61 "", m_tok_start_prev = 0x7fcfcc0e0d61 "", m_buf = 0x7fcfcc0e0d50 "flush binary logs", m_buf_length = 17, m_echo = true, m_echo_saved = false, m_cpp_buf = 0x7fcfcc0e0db8 "flush binary logs", m_cpp_ptr = 0x7fcfcc0e0dc9 "", m_cpp_tok_start = 0x7fcfcc0e0dc9 "", m_cpp_tok_start_prev = 0x7fcfcc0e0dc9 "", m_cpp_tok_end = 0x7fcfcc0e0dc9 "", m_body_utf8 = 0x0, m_body_utf8_ptr = 0x0, m_cpp_utf8_processed_ptr = 0x0, next_state = MY_LEX_END, found_semicolon = 0x0, ignore_space = false, stmt_prepare_mode = false, multi_statements = true, yylineno = 1, m_digest = 0x0, in_comment = NO_COMMENT, in_comment_saved = NO_COMMENT, m_cpp_text_start = 0x7fcfcc0e0fc7 "1", m_cpp_text_end = 0x7fcfcc0e0fc8 "", m_underscore_cs = 0x0}, m_yacc = {yacc_yyss = 0x0, yacc_yyvs = 0x0, m_set_signal_info = {m_item = {0x0 <repeats 12 times>}}, m_lock_type = TL_READ_DEFAULT, m_mdl_type = MDL_SHARED_READ}, m_digest_psi = 0x0}
              net = 0x7fcfcc2ce3e8
              do_end_of_statement = true
              __FUNCTION__ = "dispatch_command"
              error = false
              drop_more_results = false
      #8  0x00005642bbce231f in do_command (thd=0x7fcfcc2ce168) at /usr/src/debug/MariaDB-/src_0/sql/sql_parse.cc:1371
              return_value = <optimized out>
              packet = 0x7fcfcc282548 "\003flush binary logs"
              packet_length = <optimized out>
              net = 0x7fcfcc2ce3e8
              command = COM_QUERY
              __FUNCTION__ = "do_command"
      #9  0x00005642bbdd22c9 in do_handle_one_connection (connect=<optimized out>, connect@entry=0x5642f81cb3a8, put_in_cache=put_in_cache@entry=true) at /usr/src/debug/MariaDB-/src_0/sql/sql_connect.cc:1418
              create_user = true
              thr_create_utime = <optimized out>
              thd = 0x7fcfcc2ce168
      #10 0x00005642bbdd2574 in handle_one_connection (arg=arg@entry=0x5642f81cb3a8) at /usr/src/debug/MariaDB-/src_0/sql/sql_connect.cc:1312
              connect = 0x5642f81cb3a8
      #11 0x00005642bc1683a7 in pfs_spawn_thread (arg=0x5642f89df1f8) at /usr/src/debug/MariaDB-/src_0/storage/perfschema/pfs.cc:2201
              typed_arg = 0x5642f89df1f8
              user_arg = 0x5642f81cb3a8
              pfs = <optimized out>
              user_start_routine = 0x5642bbdd2540 <handle_one_connection(void*)>
              klass = <optimized out>
      #12 0x00007fde4ea03ea5 in start_thread () from /lib64/libpthread.so.0
      No symbol table info available.
      #13 0x00007fde4df1e9fd in clone () from /lib64/libc.so.6
      No symbol table info available.
      

      Only killing the mariadbd process helps. Code review seems to give a hint that wait is here:

       4263 bool MYSQL_BIN_LOG::reset_logs(THD *thd, bool create_new_log,
       4264                                rpl_gtid *init_state, uint32 init_state_len      ,
       4265                                ulong next_log_number)
       4266 {
      ...
       4464   if (!is_relay_log)
       4465   { 
       4466     xid_count_per_binlog *b;
       4467     /*
       4468       Remove all entries in the xid_count list except the last. 
       4469       Normally we will just be deleting all the entries that we waited for       to   
       4470       drop to zero above. But if we fail during RESET MASTER for some reas      on    
       4471       then we will not have created any new log file, and we may keep the       last  
       4472       of the old entries.
       4473     */
       4474     mysql_mutex_lock(&LOCK_xid_list);
       4475     for (;;)
       4476     { 
       4477       b= binlog_xid_count_list.head();
       4478       DBUG_ASSERT(b /* List can never become empty. */);
       4479       if (b->binlog_id == current_binlog_id)
       4480         break;
       4481       DBUG_ASSERT(b->xid_count == 0);
       4482       WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::reset_logs(): Removing "
       4483                            "xid_list_entry for %s (%lu)", b);
       4484       delete binlog_xid_count_list.get();
       4485     }
       4486     mysql_cond_broadcast(&COND_xid_list);
       4487     reset_master_pending--;
       4488     mysql_mutex_unlock(&LOCK_xid_list);
       4489   }
       4490 
       4491   mysql_mutex_unlock(&LOCK_index);
       4492   mysql_mutex_unlock(&LOCK_log);
       4493   DBUG_RETURN(error);
       4494 }
      

      There is a backtrace and full backtrace attached to the related customer issue. This happens on a Galera node that seems to be used an async replication master for a couple of slaves too.

        Attachments

          Issue Links

            Activity

              People

              Assignee:
              Elkin Andrei Elkin
              Reporter:
              valerii Valerii Kravchuk
              Votes:
              0 Vote for this issue
              Watchers:
              7 Start watching this issue

                Dates

                Created:
                Updated:
                Resolved:

                  Git Integration

                  Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.