Details
-
Bug
-
Status: Open (View Workflow)
-
Critical
-
Resolution: Unresolved
-
10.4.13
-
None
-
linux OS 8.1
Description
When we execute select for update SQL through airflow, it causes MariaDB crash。
Trying to get some variables.
Some pointers may be invalid and cause the dump to abort.
Query (0x7f9cb0948970): SELECT task_instance.try_number AS task_instance_try_number, task_instance.task_id AS task_instance_task_id, task_instance.dag_id AS task_instance_dag_id, task_instance.execution_date AS task_instance_execution_date, task_instance.start_date AS task_instance_start_date, task_instance.end_date AS task_instance_end_date, task_instance.duration AS task_instance_duration, task_instance.state AS task_instance_state, task_instance.max_tries AS task_instance_max_tries, task_instance.hostname AS task_instance_hostname, task_instance.unixname AS task_instance_unixname, task_instance.job_id AS task_instance_job_id, task_instance.pool AS task_instance_pool, task_instance.pool_slots AS task_instance_pool_slots, task_instance.queue AS task_instance_queue, task_instance.priority_weight AS task_instance_priority_weight, task_instance.operator AS task_instance_operator, task_instance.queued_dttm AS task_instance_queued_dttm, task_instance.pid AS task_instance_pid, task_instance.executor_config AS task_instance_executor_config FROM task_instance WHERE (task_instance.dag_id = 'BROAD_P_IPDP_ACBAL_STA_F' AND task_instance.task_id = 'check_mysql_pythonOperatorIPDP_CONFIRMATION_TXRECORD_I' AND task_instance.execution_date = '2020-07-20 01:00:00' OR task_instance.dag_id = 'BROAD_P_IPDP_ACBAL_STA_F' AND task_instance.task_id = 'check_mysql_pythonOperatorIPDP_PRODUCT_NAV_INFO_F' AND task_instance.execution_date = '2020-07-20 01:00:00' OR task_instance.dag_id = 'BROAD_P_GJJ_BAT_CARD_BIND_STA_F' AND task_instance.task_id = 'pre-step-GJJ_BAT_CARD_BIND_DETAIL_RECEIVE_I' AND task_instance.execution_date = '2020-07-20 01:00:00' OR task_instance.dag_id = 'BROAD_P_GJJ_BAT_CARD_BIND_STA_F' AND task_instance.task_id = 'pre-step-GJJ_BAT_CARD_BIND_SUM_RECEIVE_I' AND task_instance.execution_date = '2020-07-20 01:00:00' OR task_instance.dag_id = 'BROAD_P_GJJ_BAT_CARD_BIND_STA_F' AND task_instance.task_id = 'pre-step-GJJ_BAT_TRADE_DETAIL_I' AND task_instance.execution_date = '2020-07-20 01:00:00' OR task_instance.dag_id = 'BROAD_P_GJJ_BAT_CARD_BIND_STA_F' AND task_instance.task_id = 'pre-step-GJJ_LOCAL_TRAN_GJJ_F' AND task_instance.execution_date = '2020-07-20 01:00:00') AND task_instance.state IN ('scheduled') FOR UPDATE
Connection ID (thread ID): 38313056
Status: NOT_KILLED
Attachments
Issue Links
- relates to
-
MDEV-21345 Deadlocks when two transactions update the same row at the same time
-
- Open
-
-
MDEV-23939 Crash with mariadb 10.4: storage/innobase/row/row0sel.cc:4480: row_search_mvcc Assertion `prebuilt->sql_stat_start || trx->state == TRX_STATE_ACTIVE || (prebuilt->table->no_rollback() && trx->state == TRX_STATE_NOT_STARTED)’ failed.
-
- Closed
-
-
MDEV-26291 ER_FILSORT_ABORT triggers - crash query when "for update" in a select statement
-
- Closed
-
Activity
zhangwf, in 233.42_err_20200729.log the stack trace is not fully resolved:
where mysqld died. If you see no messages after this, something went
|
terribly wrong...
|
stack_bottom = 0x7f9cf93e4e50 thread_stack 0x49000
|
Fatal signal 11 while backtracing
|
/usr/local/mysql/bin/mysqld(my_print_stacktrace+0x2e)[0x556ea7203e2e]
|
/usr/local/mysql/bin/mysqld(handle_fatal_signal+0x30f)[0x556ea6c14b3f]
|
Fatal signal 11 while backtracing
|
/lib64/libpthread.so.0(+0x12dc0)[0x7f9e46cdfdc0]
|
Fatal signal 11 while backtracing
|
/lib64/libc.so.6(gsignal+0x10f)[0x7f9e459f18df]
|
/lib64/libc.so.6(abort+0x127)[0x7f9e459dbcf5]
|
Fatal signal 11 while backtracing
|
/usr/local/mysql/bin/mysqld(+0x629e48)[0x556ea6914e48]
|
/usr/local/mysql/bin/mysqld(+0xb1d72e)[0x556ea6e0872e]
|
/usr/local/mysql/bin/mysqld(+0xb1d86b)[0x556ea6e0886b]
|
/usr/local/mysql/bin/mysqld(+0xb1e61a)[0x556ea6e0961a]
|
/usr/local/mysql/bin/mysqld(+0xb1eae3)[0x556ea6e09ae3]
|
/usr/local/mysql/bin/mysqld(+0x629062)[0x556ea6914062]
|
/usr/local/mysql/bin/mysqld(+0xbb9109)[0x556ea6ea4109]
|
/usr/local/mysql/bin/mysqld(+0xad8bb8)[0x556ea6dc3bb8]
|
/usr/local/mysql/bin/mysqld(_ZN7handler17ha_index_read_mapEPhPKhm16ha_rkey_function+0x1c8)[0x556ea6c19a28]
|
/usr/local/mysql/bin/mysqld(+0x768cb7)[0x556ea6a53cb7]
|
/usr/local/mysql/bin/mysqld(_Z10sub_selectP4JOINP13st_join_tableb+0x1a9)[0x556ea6a458b9]
|
That is a bug in our diagnostic code (actually 2 bugs because we seem to hit SIGSEGV while trying to generate the stack trace).
An incomplete stack trace like this makes very difficult to determine the reason of the crash. Can you please try to resolve the numeric addresses in the stack trace output? If https://mariadb.com/kb/en/how-to-produce-a-full-stack-trace-for-mysqld/ is not applicable (you cannot generate a core dump), I would suggest the following approach to decode something like
gdb /usr/local/bin/mysqld
|
list *0x629e48
|
list *0xb1d72e
|
…
|
list *0x768cb7
|
quit
|
If the short addresses do not work, try the long ones, although they could differ from run to run due to Address Space Layout Randomization (ASLR).
"..." was meant to mean all the addresses
long resolve of stack |
(gdb) list *0x629e48
|
0x629e48 is in grant_reload(THD*) (/usr/src/debug/MariaDB-10.4.13/src_0/sql/sql_acl.cc:1809).
|
1804 Class that represents a collection of grant tables.
|
1805 */
|
1806 class Grant_tables
|
1807 {
|
1808 public:
|
1809 Grant_tables() : p_user_table(&m_user_table_json) { }
|
1810
|
1811 int open_and_lock(THD *thd, int which_tables, enum thr_lock_type lock_type)
|
1812 {
|
1813 DBUG_ENTER("Grant_tables::open_and_lock");
|
(gdb) list *0x556ea6914e48
|
(gdb) list *0xb1d72e
|
0xb1d72e is in row_count_rtree_recs(row_prebuilt_t*, unsigned long*) (/usr/src/debug/MariaDB-10.4.13/src_0/storage/innobase/include/data0data.ic:407).
|
402 ut_ad(heap);
|
403
|
404 buf_size = DTUPLE_EST_ALLOC(n_fields + n_v_fields);
|
405 buf = mem_heap_alloc(heap, buf_size);
|
406
|
407 tuple = dtuple_create_from_mem(buf, buf_size, n_fields, n_v_fields);
|
408
|
409 return(tuple);
|
410 }
|
411
|
(gdb) list *0xb1d86b
|
0xb1d86b is in row_count_rtree_recs(row_prebuilt_t*, unsigned long*) (/usr/src/debug/MariaDB-10.4.13/src_0/storage/innobase/include/mem0mem.ic:405).
|
400 mem_heap_t* heap)
|
401 {
|
402 mem_block_t* block;
|
403 mem_block_t* prev_block;
|
404
|
405 block = UT_LIST_GET_LAST(heap->base);
|
406
|
407 if (heap->free_block) {
|
408 mem_heap_free_block_free(heap);
|
409 }
|
(gdb) list *0xb1e61a
|
0xb1e61a is in row_undo_search_clust_to_pcur(undo_node_t*) (/usr/src/debug/MariaDB-10.4.13/src_0/storage/innobase/include/data0data.ic:449).
|
444 ulint n_v_fields = dtuple_get_n_v_fields(tuple);
|
445 dtuple_t* new_tuple = dtuple_create_with_vcol(
|
446 heap, n_fields, n_v_fields);
|
447 ulint i;
|
448
|
449 for (i = 0; i < n_fields; i++) {
|
450 dfield_copy(dtuple_get_nth_field(new_tuple, i),
|
451 dtuple_get_nth_field(tuple, i));
|
452 }
|
453
|
(gdb) list *0xb1eae3
|
0xb1eae3 is in row_undo_step(que_thr_t*) (/usr/src/debug/MariaDB-10.4.13/src_0/storage/innobase/include/trx0undo.ic:52).
|
47
|
48 roll_ptr = (roll_ptr_t) is_insert << ROLL_PTR_INSERT_FLAG_POS
|
49 | (roll_ptr_t) rseg_id << ROLL_PTR_RSEG_ID_POS
|
50 | (roll_ptr_t) page_no << ROLL_PTR_PAGE_POS
|
51 | offset;
|
52 return(roll_ptr);
|
53 }
|
54
|
55 /***********************************************************************//**
|
56 Decodes a roll pointer. */
|
(gdb) list *0x629062
|
0x629062 is in merge_role_routine_grant_privileges(char const*, char const*, role_hash_t*, HASH*, ACL_ROLE*) (/usr/src/debug/MariaDB-10.4.13/src_0/sql/sql_alloc.h:39).
|
34 return thd_alloc(thd_get_current_thd(), size);
|
35 }
|
36 static void *operator new[](size_t size, MEM_ROOT *mem_root) throw ()
|
37 { return alloc_root(mem_root, size); }
|
38 static void *operator new(size_t size, MEM_ROOT *mem_root) throw()
|
39 { return alloc_root(mem_root, size); }
|
40 static void operator delete(void *ptr, size_t size) { TRASH_FREE(ptr, size); }
|
41 static void operator delete(void *, MEM_ROOT *){}
|
42 static void operator delete[](void *, MEM_ROOT *)
|
43 { /* never called */ }
|
(gdb) list *0xbb9109
|
0xbb9109 is in buf_flush_sync_all_buf_pools() (/usr/include/c++/8/bits/atomic_base.h:468).
|
463 /usr/include/c++/8/bits/atomic_base.h: No such file or directory.
|
(gdb) list *0xad8bb8
|
0xad8bb8 is in rec_init_offsets_temp(unsigned char const*, dict_index_t const*, unsigned short*, unsigned long, dict_col_t::def_t const*, rec_comp_status_t) (/usr/src/debug/MariaDB-10.4.13/src_0/storage/innobase/include/rem0rec.h:341).
|
336 @param[in,out] header variable header of a REC_STATUS_INSTANT record
|
337 @return number of added fields */
|
338 inline unsigned rec_get_n_add_field(const byte*& header)
|
339 {
|
340 unsigned n_fields_add = *--header;
|
341 if (n_fields_add < 0x80) {
|
342 ut_ad(rec_get_n_add_field_len(n_fields_add) == 1);
|
343 return n_fields_add;
|
344 }
|
345
|
(gdb) list *(_ZN7handler17ha_index_read_mapEPhPKhm16ha_rkey_function+0x1c8)
|
0x8b1fc8 is in handler::ha_index_read_map(unsigned char*, unsigned char const*, unsigned long, ha_rkey_function) (/usr/src/debug/MariaDB-10.4.13/src_0/sql/sql_class.h:3988).
|
3983 */
|
3984 inline void set_killed(killed_state killed_arg,
|
3985 int killed_errno_arg= 0,
|
3986 const char *killed_err_msg_arg= 0)
|
3987 {
|
3988 mysql_mutex_lock(&LOCK_thd_kill);
|
3989 set_killed_no_mutex(killed_arg, killed_errno_arg, killed_err_msg_arg);
|
3990 mysql_mutex_unlock(&LOCK_thd_kill);
|
3991 }
|
3992 /*
|
(gdb) list *(0x768cb7)
|
0x768cb7 is in st_key::actual_rec_per_key(unsigned int) (/usr/src/debug/MariaDB-10.4.13/src_0/sql/table.cc:9070).
|
9065 }
|
9066
|
9067
|
9068 double KEY::actual_rec_per_key(uint i)
|
9069 {
|
9070 if (rec_per_key == 0)
|
9071 return 0;
|
9072 return (is_statistics_from_stat_tables ?
|
9073 read_stats->get_avg_frequency(i) : (double) rec_per_key[i]);
|
9074 }
|
(gdb) list *(_Z10sub_selectP4JOINP13st_join_tableb+0x1a9)
|
0x6dcfa9 is in sub_select(JOIN*, st_join_table*, bool) (/usr/src/debug/MariaDB-10.4.13/src_0/sql/sql_select.cc:20374).
|
20369 /*
|
20370 Note: psergey has added the 2nd part of the following condition; the
|
20371 change should probably be made in 5.1, too.
|
20372 */
|
20373 bool skip_over= FALSE;
|
20374 while (rc == NESTED_LOOP_OK && join->return_tab >= join_tab)
|
20375 {
|
20376 if (join_tab->loosescan_match_tab &&
|
20377 join_tab->loosescan_match_tab->found_match)
|
20378 {
|
(gdb) list *(_ZN4JOIN10exec_innerEv+0xc73)
|
0x702a73 is at /usr/src/debug/MariaDB-10.4.13/src_0/sql/sql_error.h:1007.
|
1002
|
1003 void reset_diagnostics_area();
|
1004
|
1005 bool is_set() const { return m_status != DA_EMPTY; }
|
1006
|
1007 bool is_error() const { return m_status == DA_ERROR; }
|
1008
|
1009 bool is_eof() const { return m_status == DA_EOF; }
|
1010
|
1011 bool is_ok() const { return m_status == DA_OK; }
|
(gdb) list *(_ZN4JOIN4execEv+0x33)
|
0x702cd3 is in JOIN::exec() (/usr/src/debug/MariaDB-10.4.13/src_0/sql/sql_select.cc:4242).
|
4237 select_lex->select_number))
|
4238 dbug_serve_apcs(thd, 1);
|
4239 );
|
4240 ANALYZE_START_TRACKING(&explain->time_tracker);
|
4241 exec_inner();
|
4242 ANALYZE_STOP_TRACKING(&explain->time_tracker);
|
4243
|
4244 DBUG_EXECUTE_IF("show_explain_probe_join_exec_end",
|
4245 if (dbug_user_var_equals_int(thd,
|
4246 "show_explain_probe_select_id",
|
(gdb) list *((_Z12mysql_selectP3THDP10TABLE_LISTjR4ListI4ItemEPS4_jP8st_orderS9_S7_S9_yP13select_resultP18st_select_lex_unitP13st_select_lex+0x186))
|
0x701076 is at /usr/src/debug/MariaDB-10.4.13/src_0/sql/sql_select.cc:4695.
|
4690 {
|
4691 THD_STAGE_INFO(thd, stage_end);
|
4692 err|= (int)(select_lex->cleanup());
|
4693 DBUG_RETURN(err || thd->is_error());
|
4694 }
|
4695 DBUG_RETURN(join->error ? join->error: err);
|
4696 }
|
4697
|
4698
|
4699 /*****************************************************************************
|
(gdb) list *((_Z13handle_selectP3THDP3LEXP13select_resultm+0x1c7))
|
0x7019f7 is in handle_select(THD*, LEX*, select_result*, unsigned long) (/usr/src/debug/MariaDB-10.4.13/src_0/sql/sql_select.cc:428).
|
423 }
|
424 DBUG_PRINT("info",("res: %d report_error: %d", res,
|
425 thd->is_error()));
|
426 res|= thd->is_error();
|
427 if (unlikely(res))
|
428 result->abort_result_set();
|
429 if (unlikely(thd->killed == ABORT_QUERY && !thd->no_errors))
|
430 {
|
431 /*
|
432 If LIMIT ROWS EXAMINED interrupted query execution, issue a warning,
|
(gdb) list *(+0x6167d7)
|
0x6167d7 is at /usr/src/debug/MariaDB-10.4.13/src_0/sql/sp_head.cc:5110.
|
5105 if (instr_copy_struct == NULL || add_instr(instr_copy_struct))
|
5106 return true;
|
5107
|
5108 sp_instr_copen *instr_copen=
|
5109 new (thd->mem_root) sp_instr_copen(instructions(), spcont, coffset);
|
5110 if (instr_copen == NULL || add_instr(instr_copen))
|
5111 return true;
|
5112
|
5113 sp_instr_cfetch *instr_cfetch=
|
5114 new (thd->mem_root) sp_instr_cfetch(instructions(),
|
(gdb) list *(_Z16dispatch_command19enum_server_commandP3THDPcjbb+0x2e80)
|
0x6acea0 is at /usr/src/debug/MariaDB-10.4.13/src_0/sql/sql_profile.h:312.
|
307 void status_change(const char *status_arg,
|
308 const char *function_arg,
|
309 const char *file_arg, unsigned int line_arg)
|
310 {
|
311 if (unlikely(current))
|
312 current->new_status(status_arg, function_arg, file_arg, line_arg);
|
313 }
|
314
|
315 inline void set_thd(THD *thd_arg)
|
316 {
|
(gdb) list *(_Z10do_commandP3THD+0x109)
|
0x6acfc9 is in do_command(THD*) (/usr/src/debug/MariaDB-10.4.13/src_0/sql/sql_parse.cc:1359).
|
1354 /* Restore read timeout value */
|
1355 my_net_set_read_timeout(net, thd->variables.net_read_timeout);
|
1356
|
1357 DBUG_ASSERT(packet_length);
|
1358 DBUG_ASSERT(!thd->apc_target.is_enabled());
|
1359 return_value= dispatch_command(command, thd, packet+1,
|
1360 (uint) (packet_length-1), FALSE, FALSE);
|
1361 DBUG_ASSERT(!thd->apc_target.is_enabled());
|
1362
|
1363 out:
|
(gdb) list *((_Z24do_handle_one_connectionP7CONNECT+0x1fa))
|
0x78f68a is in do_handle_one_connection(CONNECT*) (/usr/src/debug/MariaDB-10.4.13/src_0/include/mysql/psi/mysql_socket.h:150).
|
145 #endif
|
146 )
|
147 {
|
148 #ifdef HAVE_PSI_SOCKET_INTERFACE
|
149 if (socket.m_psi != NULL)
|
150 PSI_SOCKET_CALL(set_socket_thread_owner)(socket.m_psi);
|
151 #endif
|
152 }
|
153
|
154 /**
|
short resolve of stack |
0x629e48 is in grant_reload(THD*) (/usr/src/debug/MariaDB-10.4.13/src_0/sql/sql_acl.cc:1809).
|
0xb1d72e is in row_count_rtree_recs(row_prebuilt_t*, unsigned long*) (/usr/src/debug/MariaDB-10.4.13/src_0/storage/innobase/include/data0data.ic:407).
|
0xb1d86b is in row_count_rtree_recs(row_prebuilt_t*, unsigned long*) (/usr/src/debug/MariaDB-10.4.13/src_0/storage/innobase/include/mem0mem.ic:405).
|
0xb1e61a is in row_undo_search_clust_to_pcur(undo_node_t*) (/usr/src/debug/MariaDB-10.4.13/src_0/storage/innobase/include/data0data.ic:449).
|
0xb1eae3 is in row_undo_step(que_thr_t*) (/usr/src/debug/MariaDB-10.4.13/src_0/storage/innobase/include/trx0undo.ic:52).
|
0x629062 is in merge_role_routine_grant_privileges(char const*, char const*, role_hash_t*, HASH*, ACL_ROLE*) (/usr/src/debug/MariaDB-10.4.13/src_0/sql/sql_alloc.h:39).
|
0xbb9109 is in buf_flush_sync_all_buf_pools() (/usr/include/c++/8/bits/atomic_base.h:468).
|
0xad8bb8 is in rec_init_offsets_temp(unsigned char const*, dict_index_t const*, unsigned short*, unsigned long, dict_col_t::def_t const*, rec_comp_status_t) (/usr/src/debug/MariaDB-10.4.13/src_0/storage/innobase/include/rem0rec.h:341).
|
0x8b1fc8 is in handler::ha_index_read_map(unsigned char*, unsigned char const*, unsigned long, ha_rkey_function) (/usr/src/debug/MariaDB-10.4.13/src_0/sql/sql_class.h:3988).
|
0x768cb7 is in st_key::actual_rec_per_key(unsigned int) (/usr/src/debug/MariaDB-10.4.13/src_0/sql/table.cc:9070).
|
0x6dcfa9 is in sub_select(JOIN*, st_join_table*, bool) (/usr/src/debug/MariaDB-10.4.13/src_0/sql/sql_select.cc:20374).
|
0x702a73 is at /usr/src/debug/MariaDB-10.4.13/src_0/sql/sql_error.h:1007.
|
0x702cd3 is in JOIN::exec() (/usr/src/debug/MariaDB-10.4.13/src_0/sql/sql_select.cc:4242).
|
0x701076 is at /usr/src/debug/MariaDB-10.4.13/src_0/sql/sql_select.cc:4695.
|
0x7019f7 is in handle_select(THD*, LEX*, select_result*, unsigned long) (/usr/src/debug/MariaDB-10.4.13/src_0/sql/sql_select.cc:428).
|
0x6167d7 is at /usr/src/debug/MariaDB-10.4.13/src_0/sql/sp_head.cc:5110.
|
0x6acea0 is at /usr/src/debug/MariaDB-10.4.13/src_0/sql/sql_profile.h:312.
|
0x6acfc9 is in do_command(THD*) (/usr/src/debug/MariaDB-10.4.13/src_0/sql/sql_parse.cc:1359).
|
0x78f68a is in do_handle_one_connection(CONNECT*) (/usr/src/debug/MariaDB-10.4.13/src_0/include/mysql/psi/mysql_socket.h:150).
|
{noformat)
|
|
|
{noformat:title=signal in backtrace}
|
(gdb) list *(my_print_stacktrace+0x2e)
|
0xdae5ae is in my_print_stacktrace (/usr/src/debug/MariaDB-10.4.13/src_0/mysys/stacktrace.c:269).
|
264 void my_print_stacktrace(uchar* stack_bottom, ulong thread_stack,
|
265 my_bool silent __attribute__((unused)))
|
266 {
|
267 void *addrs[128];
|
268 char **strings __attribute__((unused)) = NULL;
|
269 int n = backtrace(addrs, array_elements(addrs));
|
270 my_safe_printf_stderr("stack_bottom = %p thread_stack 0x%lx\n",
|
271 stack_bottom, thread_stack);
|
272 #if HAVE_MY_ADDR_RESOLVE
|
273 if (print_with_addr_resolve(addrs, n))
|
(gdb) list *(handle_fatal_signal+0x30f)
|
0x8ab92f is in handle_fatal_signal(int) (/usr/src/debug/MariaDB-10.4.13/src_0/sql/signal_handler.cc:267).
|
262 }
|
263
|
264 my_safe_printf_stderr("\nConnection ID (thread ID): %lu\n",
|
265 (ulong) thd->thread_id);
|
266 my_safe_printf_stderr("Status: %s\n\n", kreason);
|
267 my_safe_printf_stderr("%s", "Optimizer switch: ");
|
268 ulonglong optsw= thd->variables.optimizer_switch;
|
269 for (uint i= 0; optimizer_switch_names[i+1]; i++, optsw >>= 1)
|
270 {
|
271 if (i)
|
I haven't identified any other backtrace that is similar currently in JIRA
This looks like it is the same problem as in MDEV-23939 (which itself relates to MDEV-20666).
zhangwf, are you still affected by this issue?
danblack, thank you for your help in resolving the stack trace, but it does not make sense to me. For MDEV-20666, a reproducible test case exists. I think that this bug as well as MDEV-23939 could be fixed by fixing MDEV-20666.
Thanks for the bug report. To ensure we have enough information to try to reproduce this can you config:
`
`