|
I finally repeated this with rr replay (MDEV-22179), cherry-picking a version of the 10.2-22179 branch to 10.5. I also had to make safe_process safer, to get a complete trace:
diff --git a/mysql-test/lib/My/SafeProcess/safe_process.cc b/mysql-test/lib/My/SafeProcess/safe_process.cc
|
index 4d0d1e2a3a0..abc167a4300 100644
|
--- a/mysql-test/lib/My/SafeProcess/safe_process.cc
|
+++ b/mysql-test/lib/My/SafeProcess/safe_process.cc
|
@@ -144,7 +144,7 @@ static int kill_child(bool was_killed)
|
message("Killing child: %d", child_pid);
|
// Terminate whole process group
|
if (! was_killed)
|
- kill(-child_pid, SIGKILL);
|
+ kill(-child_pid, SIGQUIT);
|
|
pid_t ret_pid= waitpid(child_pid, &status, 0);
|
if (ret_pid == child_pid)
|
Furthermore, I improved the probability with the following patch:
diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc
|
index 00d11d1806e..50decfe7aac 100644
|
--- a/storage/innobase/dict/dict0dict.cc
|
+++ b/storage/innobase/dict/dict0dict.cc
|
@@ -356,6 +356,7 @@ dict_table_close(
|
if (!thd || !mdl) {
|
} else if (MDL_context *mdl_context= static_cast<MDL_context*>(
|
thd_mdl_context(thd))) {
|
+ os_thread_sleep(10000);
|
mdl_context->release_lock(mdl);
|
}
|
}
|
The error was reported here:
|
10.5 0d6d63e1505bbf9898d46fa641a7a0579a4ae460
|
Thread 3 hit Breakpoint 1, my_error (nr=1205, MyFlags=0) at /mariadb/10.5m/mysys/my_error.c:113
|
113 DBUG_ENTER("my_error");
|
#0 my_error (nr=1205, MyFlags=0) at /mariadb/10.5m/mysys/my_error.c:113
|
#1 0x000055788770c0ef in MDL_context::acquire_lock (this=0x7ff638000e68, mdl_request=0x7ff6cc0aa3a0, lock_wait_timeout=0) at /mariadb/10.5m/sql/mdl.cc:2322
|
#2 0x000055788770d055 in MDL_context::upgrade_shared_lock (this=0x7ff638000e68, mdl_ticket=0x7ff638022be0, new_type=MDL_EXCLUSIVE, lock_wait_timeout=0) at /mariadb/10.5m/sql/mdl.cc:2583
|
#3 0x0000557887690878 in mysql_inplace_alter_table (thd=0x7ff638000d48, table_list=0x7ff638011c28, table=0x7ff6341992f8, altered_table=0x7ff6cc0aa7e0, ha_alter_info=0x7ff6cc0abb30,
|
inplace_supported=HA_ALTER_INPLACE_INSTANT, target_mdl_request=<optimized out>, alter_ctx=0x7ff6cc0abbd0) at /mariadb/10.5m/sql/sql_table.cc:7797
|
#4 mysql_alter_table (thd=<optimized out>, new_db=<optimized out>, new_name=<optimized out>, create_info=<optimized out>, table_list=0x7ff638011c28, alter_info=<optimized out>, order_num=<optimized out>,
|
order=<optimized out>, ignore=<optimized out>, if_exists=<optimized out>) at /mariadb/10.5m/sql/sql_table.cc:10457
|
#5 0x0000557887707b6d in Sql_cmd_alter_table::execute (this=<optimized out>, thd=0x7ff638000d48) at /mariadb/10.5m/sql/sql_alter.cc:532
|
#6 0x00005578875d31da in mysql_execute_command (thd=0x7ff638000d48) at /mariadb/10.5m/sql/sql_parse.cc:5950
|
#7 0x00005578875ce39d in mysql_parse (thd=0x7ff638000d48, rawbuf=0x7ff638011af0 "ALTER TABLE t_permanent_innodb ADD COLUMN col2 INT", length=<optimized out>, parser_state=<optimized out>,
|
is_com_multi=<optimized out>, is_next_command=<optimized out>) at /mariadb/10.5m/sql/sql_parse.cc:7992
|
Thread 13 (a purge worker task) is executing the added sleep, waiting to release the MDL. Below is some more output. The output of thread apply all backtrace was too long for Jira.
|
10.5 0d6d63e1505bbf9898d46fa641a7a0579a4ae460
|
(rr) up
|
#1 0x000055788770c0ef in MDL_context::acquire_lock (this=0x7ff638000e68, mdl_request=0x7ff6cc0aa3a0, lock_wait_timeout=0) at /mariadb/10.5m/sql/mdl.cc:2322
|
2322 my_error(ER_LOCK_WAIT_TIMEOUT, MYF(0));
|
(rr) p *this
|
$2 = {m_wait = {m_LOCK_wait_status = {m_mutex = {global = pthread_mutex_t = {Type = Error check, Status = Not acquired, Robust = No, Shared = No, Protocol = None}, mutex = pthread_mutex_t = {Type = Normal,
|
Status = Not acquired, Robust = No, Shared = No, Protocol = None}, file = 0x557888085b9d "/mariadb/10.5m/include/mysql/psi/mysql_thread.h", name = 0x5578880de49f "m_LOCK_wait_status", line = 1257,
|
count = 0, create_flags = 0, active_flags = 0, id = 266, thread = 0, locked_mutex = 0x7ff638007ae8, used_mutex = 0x7ff638007b58, prev = 0x0, next = 0x0}, m_psi = 0x7ff6e187c240}, m_COND_wait_status = {
|
m_cond = pthread_cond_t = {Threads known to still execute a wait function = 0, Clock ID = CLOCK_REALTIME, Shared = No}, m_psi = 0x5578899528c0}, m_wait_status = MDL_wait::GRANTED}, m_tickets = {
|
{<I_P_List_null_counter> = {<No data fields>}, <I_P_List_no_push_back<MDL_ticket>> = {<No data fields>}, m_first = 0x7ff63801c520},
|
{<I_P_List_null_counter> = {<No data fields>}, <I_P_List_no_push_back<MDL_ticket>> = {<No data fields>}, m_first = 0x7ff638022be0},
|
{<I_P_List_null_counter> = {<No data fields>}, <I_P_List_no_push_back<MDL_ticket>> = {<No data fields>}, m_first = 0x0}}, m_owner = 0x7ff638000e18, m_needs_thr_lock_abort = false, m_LOCK_waiting_for = {
|
m_prlock = {lock = pthread_mutex_t = {Type = Normal, Status = Not acquired, Robust = No, Shared = No, Protocol = None}, no_active_readers = pthread_cond_t = {
|
Threads known to still execute a wait function = 0, Clock ID = CLOCK_REALTIME, Shared = No}, active_readers = 0, writers_waiting_readers = 0, active_writer = 0 '\000', writer_thread = 0},
|
m_psi = 0x7ff6dcd16540}, m_waiting_for = 0x0, m_pins = 0x557889d05ca8}
|
(rr) p *mdl_request
|
$3 = {type = MDL_EXCLUSIVE, duration = MDL_TRANSACTION, next_in_list = 0x5578876be101
|
<open_table_from_share(THD*, TABLE_SHARE*, st_mysql_const_lex_string const*, unsigned int, unsigned int, unsigned int, TABLE*, bool, List<String>*)+1985>, prev_in_list = 0x20753ff5b63ef300, ticket = 0x0,
|
key = {m_length = 25, m_db_name_length = 4, m_hash_value = 1289468714,
|
m_ptr = "\002test\000t_permanent_innodb\000\240%\210\020\000\000\000\340\f\000\070\366\177\000\000\374\227%\210xU\000\000\313\022\000\000\000\000\000\000\360\244\n\314\366\177\000\000\060\250\n\314\366\177\000\000\000\363>\266\000\000\000\000H\r\000\070\366\177\000\000\017v\f\210xU\000\000\321f\f\210xU\000\000\a\000\000\200\240\020\000\000\020\246\n\314\366\177\000\000\324|\v\210xU\000\000\031\006\000\000\000\000\000\000\360\244\n\314\366\177\000\000\b\370\376\207xU\000\000\000\363>\266\365?u '\000\000\000\000\000\000\000 \302\065\210xU\000\000\340\f\000\070\366\177\000\000\060\302\065\210xU\000\000=\000\000\000\000\000\000\000"...}, m_src_file = 0x5578880de2ce "/mariadb/10.5m/sql/mdl.cc", m_src_line = 2581}
|
(rr) thread 13
|
(rr) frame 11
|
(rr) p *mdl
|
$5 = {<MDL_wait_for_subgraph> = {_vptr$MDL_wait_for_subgraph = 0x557888818fe8 <vtable for MDL_ticket+16>}, next_in_context = 0x0, prev_in_context = 0x557889ce31e8, next_in_lock = 0x0,
|
prev_in_lock = 0x7ff638022bf8, m_type = MDL_SHARED, m_duration = MDL_EXPLICIT, m_ctx = 0x557889ce30e8, m_lock = 0x7ff6342015b8, m_psi = 0x557889d2b0c0}
|
(rr) p *mdl.m_lock
|
$6 = {key = {m_length = 25, m_db_name_length = 4, m_hash_value = 1289468714, m_ptr = "\002test\000t_permanent_innodb\000", '\245' <repeats 362 times>}, m_rwlock = {m_prlock = {lock = pthread_mutex_t = {
|
Type = Normal, Status = Not acquired, Robust = No, Shared = No, Protocol = None}, no_active_readers = pthread_cond_t = {Threads known to still execute a wait function = 0, Clock ID = CLOCK_REALTIME,
|
Shared = No}, active_readers = 0, writers_waiting_readers = 0, active_writer = 0 '\000', writer_thread = 0}, m_psi = 0x7ff6dcd12a40}, m_granted = {
|
m_list = {<I_P_List_null_counter> = {<No data fields>}, <I_P_List_fast_push_back<MDL_ticket>> = {m_last = 0x7ff6a8000db8}, m_first = 0x7ff638022be0}, m_bitmap = 34}, m_waiting = {
|
m_list = {<I_P_List_null_counter> = {<No data fields>}, <I_P_List_fast_push_back<MDL_ticket>> = {m_last = 0x7ff6342017e0}, m_first = 0x0}, m_bitmap = 0}, m_hog_lock_count = 0,
|
m_strategy = 0x557888a0de98 <MDL_lock::m_object_lock_strategy>}
|
(rr) thread 13
|
[Switching to thread 13 (Thread 1437863.1438123)]
|
#0 0x0000000070000002 in ?? ()
|
(rr) frame 11
|
#11 0x0000557887e9b759 in dict_table_close (table=<optimized out>, dict_locked=false, try_drop=<optimized out>, thd=0x557889ce2fc8, mdl=0x7ff6a8000da0) at /mariadb/10.5m/storage/innobase/dict/dict0dict.cc:359
|
359 os_thread_sleep(10000);
|
(rr) p *mdl
|
$5 = {<MDL_wait_for_subgraph> = {_vptr$MDL_wait_for_subgraph = 0x557888818fe8 <vtable for MDL_ticket+16>}, next_in_context = 0x0, prev_in_context = 0x557889ce31e8, next_in_lock = 0x0,
|
prev_in_lock = 0x7ff638022bf8, m_type = MDL_SHARED, m_duration = MDL_EXPLICIT, m_ctx = 0x557889ce30e8, m_lock = 0x7ff6342015b8, m_psi = 0x557889d2b0c0}
|
(rr) p *mdl.m_lock
|
$6 = {key = {m_length = 25, m_db_name_length = 4, m_hash_value = 1289468714, m_ptr = "\002test\000t_permanent_innodb\000", '\245' <repeats 362 times>}, m_rwlock = {m_prlock = {lock = pthread_mutex_t = {
|
Type = Normal, Status = Not acquired, Robust = No, Shared = No, Protocol = None}, no_active_readers = pthread_cond_t = {Threads known to still execute a wait function = 0, Clock ID = CLOCK_REALTIME,
|
Shared = No}, active_readers = 0, writers_waiting_readers = 0, active_writer = 0 '\000', writer_thread = 0}, m_psi = 0x7ff6dcd12a40}, m_granted = {
|
m_list = {<I_P_List_null_counter> = {<No data fields>}, <I_P_List_fast_push_back<MDL_ticket>> = {m_last = 0x7ff6a8000db8}, m_first = 0x7ff638022be0}, m_bitmap = 34}, m_waiting = {
|
m_list = {<I_P_List_null_counter> = {<No data fields>}, <I_P_List_fast_push_back<MDL_ticket>> = {m_last = 0x7ff6342017e0}, m_first = 0x0}, m_bitmap = 0}, m_hog_lock_count = 0,
|
m_strategy = 0x557888a0de98 <MDL_lock::m_object_lock_strategy>}
|
|