Details
-
Bug
-
Status: Open (View Workflow)
-
Major
-
Resolution: Unresolved
-
10.6, 10.11
Description
mleich produced an interesting core dump where many threads end up waiting on an index page of a innodb_file_per_table=0 table.
ssh pluto
|
gdb -c /data/results/1733769117/002802/1/data/core.926429 /data/Server_bin/10.6-MDEV-24035_fast/bin/mariadbd
|
One thread is trying to upgrade its shared latch to exclusive:
10.6-MDEV-24035 15abe5a8eba370f9d53419d44c219ec9ae679326 based on 10.6 7372ecc396517839a1546a8c1dd9192711922ef1 |
Thread 26 (Thread 0x7f35b0892640 (LWP 926988)):
|
#0 syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38
|
#1 0x000055e03329d540 in ssux_lock_impl<true>::wait (lk=<optimized out>, this=0x7f35880598a0) at /data/Server/10.6-MDEV-24035/storage/innobase/sync/srw_lock.cc:260
|
#2 ssux_lock_impl<true>::wr_wait (this=this@entry=0x7f35880598a0, lk=<optimized out>) at /data/Server/10.6-MDEV-24035/storage/innobase/sync/srw_lock.cc:371
|
#3 0x000055e032be4718 in ssux_lock_impl<true>::u_wr_upgrade (this=0x7f35880598a0) at /data/Server/10.6-MDEV-24035/storage/innobase/include/srw_lock.h:299
|
#4 sux_lock<ssux_lock_impl<true> >::u_x_upgrade (this=0x7f35880598a0) at /data/Server/10.6-MDEV-24035/storage/innobase/include/sux_lock.h:439
|
#5 mtr_t::index_lock_upgrade (this=this@entry=0x7f35b088feb0) at /data/Server/10.6-MDEV-24035/storage/innobase/btr/btr0cur.cc:1721
|
#6 0x000055e032be4783 in btr_cur_t::pessimistic_search_leaf (this=this@entry=0x7f35b088f680, tuple=tuple@entry=0x7f352403c288, mode=mode@entry=PAGE_CUR_LE, mtr=mtr@entry=0x7f35b088feb0) at /data/Server/10.6-MDEV-24035/storage/innobase/btr/btr0cur.cc:1751
|
#7 0x000055e0332c831b in btr_cur_t::search_leaf (this=this@entry=0x7f35b088f680, tuple=tuple@entry=0x7f352403c288, mode=mode@entry=PAGE_CUR_LE, latch_mode=<optimized out>, latch_mode@entry=1098, mtr=mtr@entry=0x7f35b088feb0) at /data/Server/10.6-MDEV-24035/storage/innobase/btr/btr0cur.cc:1573
|
With tfaas print mtr.m_memo I found out that only this thread as well as Thread 12 are holding a latch on this index page. Thread 12 is blocked on ibuf.index->lock:
10.6-MDEV-24035 15abe5a8eba370f9d53419d44c219ec9ae679326 based on 10.6 7372ecc396517839a1546a8c1dd9192711922ef1 |
#5 sux_lock<ssux_lock_impl<true> >::x_lock (for_io=false, this=0x55e0354ec8f0) at /data/Server/10.6-MDEV-24035/storage/innobase/include/sux_lock.h:429
|
#6 mtr_t::x_lock (lock=0x55e0354ec8f0, this=0x7f357ae37af0) at /data/Server/10.6-MDEV-24035/storage/innobase/include/mtr0mtr.h:271
|
#7 ibuf_delete_rec (page_id=..., pcur=0x7f357ae37640, search_tuple=0x7f35580204f8, mtr=0x7f357ae37af0) at /data/Server/10.6-MDEV-24035/storage/innobase/ibuf/ibuf0ibuf.cc:3978
|
#8 0x000055e033214567 in ibuf_merge_or_delete_for_page (block=block@entry=0x7f35c2074f00, page_id=..., zip_size=<optimized out>) at /data/Server/10.6-MDEV-24035/storage/innobase/ibuf/ibuf0ibuf.cc:4321
|
#9 0x000055e032be6a5f in buf_page_ibuf_merge_try (block=block@entry=0x7f35c2074f00, rw_latch=2, err=err@entry=0x7f357ae3808c) at /data/Server/10.6-MDEV-24035/storage/innobase/include/buf0buf.h:649
|
#10 0x000055e032be6e2c in buf_page_get_low (page_id=..., zip_size=<optimized out>, rw_latch=<optimized out>, guess=<optimized out>, mode=<optimized out>, mtr=<optimized out>, err=<optimized out>, allow_ibuf_merge=<optimized out>) at /data/Server/10.6-MDEV-24035/storage/innobase/buf/buf0buf.cc:3015
|
#11 0x000055e0332c7eac in btr_cur_t::search_leaf (this=this@entry=0x7f357ae38680, tuple=tuple@entry=0x7f3564030f08, mode=mode@entry=PAGE_CUR_LE, latch_mode=<optimized out>, latch_mode@entry=66, mtr=mtr@entry=0x7f357ae38eb0) at /data/Server/10.6-MDEV-24035/storage/innobase/btr/btr0cur.cc:1268
|
#12 0x000055e033262cad in row_ins_sec_index_entry_low (flags=0, mode=BTR_MODIFY_LEAF, index=0x7f3588059728, offsets_heap=<optimized out>, heap=0x7f3558059e40, entry=0x7f3564030f08, trx_id=0, thr=0x7f3564031340) at /data/Server/10.6-MDEV-24035/storage/innobase/row/row0ins.cc:3014
|
That latch is held by Thread 47, which in turn is waiting for ibuf_mutex, which Thread 12 is holding:
10.6-MDEV-24035 15abe5a8eba370f9d53419d44c219ec9ae679326 based on 10.6 7372ecc396517839a1546a8c1dd9192711922ef1 |
#4 0x000055e03321180f in inline_mysql_mutex_lock (that=0x55e0345a3a60 <ibuf_mutex>) at /data/Server/10.6-MDEV-24035/include/mysql/psi/mysql_thread.h:752
|
#5 ibuf_remove_free_page () at /data/Server/10.6-MDEV-24035/storage/innobase/ibuf/ibuf0ibuf.cc:1931
|
#6 0x000055e0332119e0 in ibuf_free_excess_pages () at /data/Server/10.6-MDEV-24035/storage/innobase/ibuf/ibuf0ibuf.cc:1993
|
#7 0x000055e0332663f3 in row_ins_sec_index_entry (index=0x7f3588059728, entry=0x7f3554053df8, thr=0x7f350404d0a0, check_foreign=<optimized out>) at /data/Server/10.6-MDEV-24035/storage/innobase/row/row0ins.cc:3331
|
#8 0x000055e033266726 in row_ins_index_entry (thr=0x7f350404d0a0, entry=<optimized out>, index=<optimized out>) at /data/Server/10.6-MDEV-24035/storage/innobase/row/row0ins.cc:3371
|
#9 row_ins_index_entry_step (thr=0x7f350404d0a0, node=0x7f350404cd98) at /data/Server/10.6-MDEV-24035/storage/innobase/row/row0ins.cc:3535
|
#10 row_ins (thr=0x7f350404d0a0, node=0x7f350404cd98) at /data/Server/10.6-MDEV-24035/storage/innobase/row/row0ins.cc:3660
|
#11 row_ins_step (thr=thr@entry=0x7f350404d0a0) at /data/Server/10.6-MDEV-24035/storage/innobase/row/row0ins.cc:3789
|
#12 0x000055e033273663 in row_insert_for_mysql (mysql_rec=mysql_rec@entry=0x7f3504044f28 "\377\371F\275|z", ' ' <repeats 119 times>, prebuilt=0x7f350404c638, ins_mode=ROW_INS_NORMAL) at /data/Server/10.6-MDEV-24035/storage/innobase/row/row0mysql.cc:1288
|
It seems that ibuf_free_excess_pages() should be rewritten something like this:
diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc
|
index 3e8f7888959..1fcab67e413 100644
|
--- a/storage/innobase/ibuf/ibuf0ibuf.cc
|
+++ b/storage/innobase/ibuf/ibuf0ibuf.cc
|
@@ -1976,23 +1976,18 @@ ibuf_free_excess_pages(void)
|
/*========================*/
|
{
|
if (UNIV_UNLIKELY(!ibuf.index)) return;
|
+ mysql_mutex_lock(&ibuf_mutex);
|
/* Free at most a few pages at a time, so that we do not delay the
|
requested service too much */
|
|
- for (ulint i = 0; i < 4; i++) {
|
-
|
- ibool too_much_free;
|
-
|
- mysql_mutex_lock(&ibuf_mutex);
|
- too_much_free = ibuf_data_too_much_free();
|
- mysql_mutex_unlock(&ibuf_mutex);
|
-
|
- if (!too_much_free) {
|
- return;
|
- }
|
-
|
+ for (ulint i = 4; i-- && ibuf_data_too_much_free());
|
+ mysql_mutex_lock(&ibuf_pessimistic_insert_mutex);
|
ibuf_remove_free_page();
|
+ mysql_mutex_unlock(&ibuf_pessimistic_insert_mutex);
|
+ mysql_mutex_unlock(&ibuf_mutex);
|
+ mysql_mutex_lock(&ibuf_mutex);
|
}
|
+ mysql_mutex_unlock(&ibuf_mutex);
|
}
|
|
#ifdef UNIV_DEBUG |
Also ibuf_remove_free_page() would have to be adjusted so that it would not acquire these mutexes.
Last but not least, all use of these two mutexes and ibuf.index->lock need to be reviewed, and debug assertions "not holding this latch" added before some mutex or rw-lock waits. SAFE_MUTEX does cover mysql_mutex_t but not any rw-lock, such as ibuf.index->lock.
Side note: The use of innodb_file_per_table=0 immediately made me think of MDEV-29930, but unlike that one, this run used innodb_undo_tablespaces=3.
Attachments
Issue Links
- relates to
-
MDEV-27734 Set innodb_change_buffering=none by default
- Closed
-
MDEV-29694 Remove the InnoDB change buffer
- Closed