Details
-
Bug
-
Status: Closed (View Workflow)
-
Critical
-
Resolution: Fixed
-
10.5.12, 10.5.15
Description
The following thread hangs forever while executing the FLUSH BINARY LOGS:
Thread 1044 (Thread 0x7fcfae319700 (LWP 112070)):
|
#0 0x00007fde4ea07a35 in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
|
No symbol table info available.
|
#1 0x00005642bbfd68ec in inline_mysql_cond_wait (src_file=0x5642bc72b7b8 "/home/jenkins/workspace/Build-Package/padding_for_CPACK_RPM_BUILD_SOURCE_DIRS_PREFIX_ON_ES_BACKUP_DEBUGSOURCE/sql/log.cc", src_line=4491, mutex=<optimized out>, that=0x5642bd05e6c0 <mysql_bin_log+1248>) at /usr/src/debug/MariaDB-/src_0/include/mysql/psi/mysql_thread.h:1233
|
result = <optimized out>
|
#2 MYSQL_BIN_LOG::wait_for_last_checkpoint_event (this=this@entry=0x5642bd05e1e0 <mysql_bin_log>) at /usr/src/debug/MariaDB-/src_0/sql/log.cc:4491
|
No locals.
|
#3 0x00005642bbdf1017 in reload_acl_and_cache (thd=thd@entry=0x7fcfcc2ce168, options=1024, tables=tables@entry=0x0, write_to_binlog=write_to_binlog@entry=0x7fcfae317e70) at /usr/src/debug/MariaDB-/src_0/sql/sql_reload.cc:162
|
drop_gtid_domain = <optimized out>
|
result = false
|
tmp_write_to_binlog = 0
|
#4 0x00005642bbcd979d in mysql_execute_command (thd=thd@entry=0x7fcfcc2ce168) at /usr/src/debug/MariaDB-/src_0/sql/sql_parse.cc:5524
|
write_to_binlog = 1
|
res = 0
|
lex = 0x7fcfcc2d2088
|
trace_command = {<Json_writer_struct> = {_vptr.Json_writer_struct = 0x5642bce1a0d0 <vtable for Json_writer_object+16>, my_writer = 0x0, context = {writer = 0x0}, closed = false}, <No data fields>}
|
trace_command_steps = {<Json_writer_struct> = {_vptr.Json_writer_struct = 0x5642bce1a0f0 <vtable for Json_writer_array+16>, my_writer = 0x0, context = {writer = 0x0}, closed = false}, <No data fields>}
|
orig_binlog_format = BINLOG_FORMAT_ROW
|
up_result = 0
|
rpl_filter = <optimized out>
|
ots = {ctx = 0x7fcfcc2d1d38, traceable = false}
|
orig_current_stmt_binlog_format = BINLOG_FORMAT_ROW
|
select_lex = 0x7fcfcc2d2978
|
first_table = 0x0
|
all_tables = 0x0
|
unit = 0x7fcfcc2d2150
|
have_table_map_for_update = false
|
#5 0x00005642bbcde1f5 in mysql_parse (thd=thd@entry=0x7fcfcc2ce168, rawbuf=rawbuf@entry=0x7fcfcc0e0d50 "flush binary logs", length=length@entry=17, parser_state=parser_state@entry=0x7fcfae3182e0, is_com_multi=is_com_multi@entry=false, is_next_command=is_next_command@entry=false) at /usr/src/debug/MariaDB-/src_0/sql/sql_parse.cc:8127
|
found_semicolon = <optimized out>
|
error = <optimized out>
|
lex = 0x7fcfcc2d2088
|
err = <optimized out>
|
#6 0x00005642bbcdea5c in wsrep_mysql_parse (thd=thd@entry=0x7fcfcc2ce168, rawbuf=0x7fcfcc0e0d50 "flush binary logs", length=17, parser_state=parser_state@entry=0x7fcfae3182e0, is_com_multi=is_com_multi@entry=false, is_next_command=is_next_command@entry=false) at /usr/src/debug/MariaDB-/src_0/sql/sql_parse.cc:7930
|
is_autocommit = true
|
retry_autocommit = false
|
#7 0x00005642bbce17e1 in dispatch_command (command=command@entry=COM_QUERY, thd=thd@entry=0x7fcfcc2ce168, packet=packet@entry=0x7fcfcc282549 "flush binary logs", packet_length=packet_length@entry=17, is_com_multi=is_com_multi@entry=false, is_next_command=is_next_command@entry=false) at /usr/src/debug/MariaDB-/src_0/sql/sql_parse.cc:1877
|
packet_end = 0x7fcfcc0e0d61 ""
|
parser_state = {m_lip = {lookahead_token = -1, lookahead_yylval = 0x0, m_thd = 0x7fcfcc2ce168, m_ptr = 0x7fcfcc0e0d62 "", m_tok_start = 0x7fcfcc0e0d62 "", m_tok_end = 0x7fcfcc0e0d62 "", m_end_of_query = 0x7fcfcc0e0d61 "", m_tok_start_prev = 0x7fcfcc0e0d61 "", m_buf = 0x7fcfcc0e0d50 "flush binary logs", m_buf_length = 17, m_echo = true, m_echo_saved = false, m_cpp_buf = 0x7fcfcc0e0db8 "flush binary logs", m_cpp_ptr = 0x7fcfcc0e0dc9 "", m_cpp_tok_start = 0x7fcfcc0e0dc9 "", m_cpp_tok_start_prev = 0x7fcfcc0e0dc9 "", m_cpp_tok_end = 0x7fcfcc0e0dc9 "", m_body_utf8 = 0x0, m_body_utf8_ptr = 0x0, m_cpp_utf8_processed_ptr = 0x0, next_state = MY_LEX_END, found_semicolon = 0x0, ignore_space = false, stmt_prepare_mode = false, multi_statements = true, yylineno = 1, m_digest = 0x0, in_comment = NO_COMMENT, in_comment_saved = NO_COMMENT, m_cpp_text_start = 0x7fcfcc0e0fc7 "1", m_cpp_text_end = 0x7fcfcc0e0fc8 "", m_underscore_cs = 0x0}, m_yacc = {yacc_yyss = 0x0, yacc_yyvs = 0x0, m_set_signal_info = {m_item = {0x0 <repeats 12 times>}}, m_lock_type = TL_READ_DEFAULT, m_mdl_type = MDL_SHARED_READ}, m_digest_psi = 0x0}
|
net = 0x7fcfcc2ce3e8
|
do_end_of_statement = true
|
__FUNCTION__ = "dispatch_command"
|
error = false
|
drop_more_results = false
|
#8 0x00005642bbce231f in do_command (thd=0x7fcfcc2ce168) at /usr/src/debug/MariaDB-/src_0/sql/sql_parse.cc:1371
|
return_value = <optimized out>
|
packet = 0x7fcfcc282548 "\003flush binary logs"
|
packet_length = <optimized out>
|
net = 0x7fcfcc2ce3e8
|
command = COM_QUERY
|
__FUNCTION__ = "do_command"
|
#9 0x00005642bbdd22c9 in do_handle_one_connection (connect=<optimized out>, connect@entry=0x5642f81cb3a8, put_in_cache=put_in_cache@entry=true) at /usr/src/debug/MariaDB-/src_0/sql/sql_connect.cc:1418
|
create_user = true
|
thr_create_utime = <optimized out>
|
thd = 0x7fcfcc2ce168
|
#10 0x00005642bbdd2574 in handle_one_connection (arg=arg@entry=0x5642f81cb3a8) at /usr/src/debug/MariaDB-/src_0/sql/sql_connect.cc:1312
|
connect = 0x5642f81cb3a8
|
#11 0x00005642bc1683a7 in pfs_spawn_thread (arg=0x5642f89df1f8) at /usr/src/debug/MariaDB-/src_0/storage/perfschema/pfs.cc:2201
|
typed_arg = 0x5642f89df1f8
|
user_arg = 0x5642f81cb3a8
|
pfs = <optimized out>
|
user_start_routine = 0x5642bbdd2540 <handle_one_connection(void*)>
|
klass = <optimized out>
|
#12 0x00007fde4ea03ea5 in start_thread () from /lib64/libpthread.so.0
|
No symbol table info available.
|
#13 0x00007fde4df1e9fd in clone () from /lib64/libc.so.6
|
No symbol table info available.
|
Only killing the mariadbd process helps. Code review seems to give a hint that wait is here:
4263 bool MYSQL_BIN_LOG::reset_logs(THD *thd, bool create_new_log,
|
4264 rpl_gtid *init_state, uint32 init_state_len ,
|
4265 ulong next_log_number)
|
4266 {
|
...
|
4464 if (!is_relay_log)
|
4465 {
|
4466 xid_count_per_binlog *b;
|
4467 /*
|
4468 Remove all entries in the xid_count list except the last.
|
4469 Normally we will just be deleting all the entries that we waited for to
|
4470 drop to zero above. But if we fail during RESET MASTER for some reas on
|
4471 then we will not have created any new log file, and we may keep the last
|
4472 of the old entries.
|
4473 */
|
4474 mysql_mutex_lock(&LOCK_xid_list);
|
4475 for (;;)
|
4476 {
|
4477 b= binlog_xid_count_list.head();
|
4478 DBUG_ASSERT(b /* List can never become empty. */);
|
4479 if (b->binlog_id == current_binlog_id)
|
4480 break;
|
4481 DBUG_ASSERT(b->xid_count == 0);
|
4482 WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::reset_logs(): Removing "
|
4483 "xid_list_entry for %s (%lu)", b);
|
4484 delete binlog_xid_count_list.get();
|
4485 }
|
4486 mysql_cond_broadcast(&COND_xid_list);
|
4487 reset_master_pending--;
|
4488 mysql_mutex_unlock(&LOCK_xid_list);
|
4489 }
|
4490
|
4491 mysql_mutex_unlock(&LOCK_index);
|
4492 mysql_mutex_unlock(&LOCK_log);
|
4493 DBUG_RETURN(error);
|
4494 }
|
There is a backtrace and full backtrace attached to the related customer issue. This happens on a Galera node that seems to be used an async replication master for a couple of slaves too.
Attachments
Issue Links
- relates to
-
MDEV-24526 binlog rotate via FLUSH LOGS may obsolate binlog file for recovery too early
-
- Closed
-
-
MDEV-27127 slave hung at shutdown
-
- Open
-
Howdy, Brandon.
There two commits on bb-10.5-andrei that comprise a backporting solution
which makes sure the "manual" (via setting of need_unlog) xid unlogging works with
1pc branch (to my examination the 2pc branch [of {{ordered_commit}}] is correct]).
10.6 branch was special that
MDEV-26031reinforced binlog_commit is always runas the 1st hton commit in the 1pc case.
Like the 1st commit message admits the backport is a maximum what I've found that we can do now. The matter of course is complicated by the presence of galera and perhaps the latter plays
its critical role to hit the issue. I am leaving that possibility open, and a new bug would be reported if this backport won't help.