Details
-
Bug
-
Status: Open (View Workflow)
-
Major
-
Resolution: Unresolved
-
12.3.1
-
None
Description
MDEV-39341 HammerDB TPROC-C testing of 659fe18b99d shown that, when some contention in InnoDB index locks is released, Opt_trace_start::init() starts popping up in the top% functions by perf report.
The function, at least in a RelWithDebInfo build with GCC 15.2.1, gets compiled to:
(gdb) disassemble /s Opt_trace_start::init
|
Dump of assembler code for function _ZN15Opt_trace_start4initEP3THDP10TABLE_LIST16enum_sql_commandP4ListI12set_var_baseEPKcmPK15charset_info_st:
|
/home/avetere/repos/personal/server/sql/opt_trace.cc:
|
494 {
|
495 /*
|
496 if optimizer trace is enabled and the statement we have is traceable,
|
497 then we start the context.
|
498 */
|
499 const ulonglong var= thd->variables.optimizer_trace;
|
0x00000000004d4470 <+0>: push rbp
|
0x00000000004d4471 <+1>: mov r10,rsi
|
0x00000000004d4474 <+4>: mov rsi,r9
|
0x00000000004d4477 <+7>: mov rbp,rsp
|
0x00000000004d447a <+10>: push r15
|
0x00000000004d447c <+12>: push r14
|
0x00000000004d447e <+14>: push r13
|
0x00000000004d4480 <+16>: push r12
|
0x00000000004d4482 <+18>: push rbx
|
0x00000000004d4483 <+19>: sub rsp,0x28
|
0x00000000004d4487 <+23>: mov rax,QWORD PTR [r10+0x950]
|
|
|
500 traceable= FALSE;
|
0x00000000004d448e <+30>: mov r9,QWORD PTR [rbp+0x10]
|
0x00000000004d4492 <+34>: mov BYTE PTR [rdi+0x8],0x0
|
|
|
501 if (unlikely(var & Opt_trace_context::FLAG_ENABLED) &&
|
0x00000000004d4496 <+38>: mov r11,QWORD PTR [rbp+0x18]
|
|
|
505 !thd->system_thread &&
|
0x00000000004d449a <+42>: test al,0x1
|
0x00000000004d449c <+44>: jne 0x4d44b0 <_ZN15Opt_trace_start4initEP3THDP10TABLE_LIST16enum_sql_commandP4ListI12set_var_baseEPKcmPK15charset_info_st+64>
|
|
|
516 }
|
517 }
|
0x00000000004d449e <+46>: lea rsp,[rbp-0x28]
|
0x00000000004d44a2 <+50>: pop rbx
|
0x00000000004d44a3 <+51>: pop r12
|
0x00000000004d44a5 <+53>: pop r13
|
0x00000000004d44a7 <+55>: pop r14
|
0x00000000004d44a9 <+57>: pop r15
|
0x00000000004d44ab <+59>: pop rbp
|
0x00000000004d44ac <+60>: ret
|
0x00000000004d44ad <+61>: nop DWORD PTR [rax]
|
...
|
So if the flag optimizer_trace is not enabled, which by default is not (and most likely is not during HammerDB testing), the function does early return, still after loading the flag, and doing push/pop to setup and dismantle the stack. This could be related to the measured overhead.
Below an extract of the perf report results from MDEV-39341:
12.3.1-commit-21a0714a118 (12.3.1)
|
3.63%,mariadbd, [.] ssux_lock_impl<true>::rd_lock_spin()
|
1.72%,mariadbd, [.] int page_cur_dtuple_cmp<false>(dtuple_t const&, unsigned char const*, dict_index_t const&, unsigned short*, unsigned long)
|
1.52%,mariadbd, [.] cmp_dtuple_rec_bytes(unsigned char const*, dict_index_t const&, dtuple_t const&, int*, unsigned long)
|
1.48%,[kernel.kallsyms], [k] native_queued_spin_lock_slowpath
|
1.45%,mariadbd, [.] page_cur_search_with_match(dtuple_t const*, page_cur_mode_t, unsigned short*, unsigned short*, page_cur_t*, rtr_info*)
|
1.37%,mariadbd, [.] buf_page_get_gen(page_id_t, unsigned long, rw_lock_type_t, buf_block_t*, unsigned long, mtr_t*, dberr_t*)
|
|
|
12.3.2-commit-82ba33a7997 (12.3 on Apr 2nd 2026)
|
3.97%,mariadbd, [.] ssux_lock_impl<true>::rd_lock_spin()
|
1.60%,mariadbd, [.] cmp_dtuple_rec_bytes(unsigned char const*, dict_index_t const&, dtuple_t const&, int*, unsigned long)
|
1.58%,mariadbd, [.] int page_cur_dtuple_cmp<false>(dtuple_t const&, unsigned char const*, dict_index_t const&, unsigned short*, unsigned long)
|
1.24%,mariadbd, [.] buf_page_get_gen(page_id_t, unsigned long, rw_lock_type_t, buf_block_t*, unsigned long, mtr_t*, dberr_t*)
|
1.15%,mariadbd, [.] page_cur_search_with_match(dtuple_t const*, page_cur_mode_t, unsigned short*, unsigned short*, page_cur_t*, rtr_info*)
|
1.12%,mariadbd, [.] Opt_trace_start::init(THD*, TABLE_LIST*, enum_sql_command, List<set_var_base>*, char const*, unsigned long, charset_info_st const*)
|
|
|
12.3.2-commit-659fe18b99d (above + #4975)
|
1.60%,mariadbd, [.] cmp_dtuple_rec_bytes(unsigned char const*, dict_index_t const&, dtuple_t const&, int*, unsigned long)
|
1.49%,mariadbd, [.] int page_cur_dtuple_cmp<false>(dtuple_t const&, unsigned char const*, dict_index_t const&, unsigned short*, unsigned long)
|
1.24%,mariadbd, [.] buf_page_get_gen(page_id_t, unsigned long, rw_lock_type_t, buf_block_t*, unsigned long, mtr_t*, dberr_t*)
|
1.21%,mariadbd, [.] Opt_trace_start::init(THD*, TABLE_LIST*, enum_sql_command, List<set_var_base>*, char const*, unsigned long, charset_info_st const*)
|
1.15%,mariadbd, [.] page_cur_search_with_match(dtuple_t const*, page_cur_mode_t, unsigned short*, unsigned short*, page_cur_t*, rtr_info*)
|
1.11%,mariadbd, [.] ssux_lock_impl<true>::rd_lock_spin()
|
Attachments
Issue Links
- relates to
-
MDEV-39341 HammerDB TPROC-C performance drops after 1 minute with increased futex contention
-
- In Testing
-