[MDEV-29930] Server hang with innodb_file_per_table=0, innodb_undo_tablespaces=0 - Jira

Details

Type: Bug
Status: Open (View Workflow)
Priority: Major
Resolution: Unresolved
Affects Version/s: 10.6, 10.7(EOL), 10.8(EOL), 10.9(EOL), 10.10(EOL), 10.11
Fix Version/s: 10.6, 10.11
Component/s: Storage Engine - InnoDB
Labels:
- hang

Description

mleich produced a core dump where InnoDB is hanging as follows.

Thread 43 is holding exclusive fil_system.sys_space->latch and waiting for a latch on the clustered index root page, which is exclusively locked by Thread 25, which is waiting for the fil_system.sys_space->latch. Stack traces:

bb-10.6-MDEV-29835 5d32d49cdac0936012914e11492c75b88c5307b9
Thread 25 (Thread 0x7f04c70f4700 (LWP 3792727)):
#0 syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38
#1 0x0000563f9e0e4c30 in srw_mutex_impl<false>::wait (lk=2147483650, this=0x6120000231e8) at /data/Server/bb-10.6-MDEV-29835A/storage/innobase/sync/srw_lock.cc:238
#2 srw_mutex_impl<false>::wait_and_lock (this=this@entry=0x6120000231e8) at /data/Server/bb-10.6-MDEV-29835A/storage/innobase/sync/srw_lock.cc:326
#3 0x0000563f9de18e3c in srw_mutex_impl<false>::wr_lock (this=0x6120000231e8) at /usr/include/c++/9/bits/atomic_base.h:539
#4 ssux_lock_impl<false>::wr_lock (this=0x6120000231e8) at /data/Server/bb-10.6-MDEV-29835A/storage/innobase/include/srw_lock.h:257
#5 fil_space_t::x_lock (this=0x612000023140) at /data/Server/bb-10.6-MDEV-29835A/storage/innobase/include/fil0fil.h:1058
#6 mtr_t::x_lock_space (this=this@entry=0x7f04c70ee990, space=space@entry=0x612000023140) at /data/Server/bb-10.6-MDEV-29835A/storage/innobase/mtr/mtr0mtr.cc:816
#7 0x0000563f9e46fa13 in fsp_reserve_free_extents (n_reserved=n_reserved@entry=0x7f04c70ee330, space=space@entry=0x612000023140, n_ext=n_ext@entry=2, alloc_type=alloc_type@entry=FSP_UNDO, mtr=mtr@entry=0x7f04c70ee990, n_pages=n_pages@entry=2) at /data/Server/bb-10.6-MDEV-29835A/storage/innobase/fsp/fsp0fsp.cc:2391
#8 0x0000563f9e1a7006 in trx_undo_seg_create (space=0x612000023140, rseg_hdr=rseg_hdr@entry=0x7f04e8f034f0, id=id@entry=0x7f04c70ee620, err=err@entry=0x7f04c70ee870, mtr=mtr@entry=0x7f04c70ee990) at /data/Server/bb-10.6-MDEV-29835A/storage/innobase/trx/trx0undo.cc:500
#9 0x0000563f9e1a8598 in trx_undo_create (trx=trx@entry=0x7f04e9c09440, rseg=rseg@entry=0x563f9fbcf380 <trx_sys+33024>, undo=undo@entry=0x7f04e9c09d90, err=err@entry=0x7f04c70ee870, mtr=mtr@entry=0x7f04c70ee990) at /data/Server/bb-10.6-MDEV-29835A/storage/innobase/trx/trx0undo.cc:1250
#10 0x0000563f9e1ad36c in trx_undo_assign_low (trx=trx@entry=0x7f04e9c09440, rseg=rseg@entry=0x563f9fbcf380 <trx_sys+33024>, undo=undo@entry=0x7f04e9c09d90, err=err@entry=0x7f04c70ee870, mtr=mtr@entry=0x7f04c70ee990) at /data/Server/bb-10.6-MDEV-29835A/storage/innobase/trx/trx0undo.cc:1424
#11 0x0000563f9e1365ed in trx_undo_report_row_operation (thr=thr@entry=0x6210019504c0, index=index@entry=0x616000743208, clust_entry=clust_entry@entry=0x6170004a4888, update=update@entry=0x0, cmpl_info=cmpl_info@entry=0, rec=rec@entry=0x0, offsets=<optimized out>, roll_ptr=<optimized out>) at /data/Server/bb-10.6-MDEV-29835A/storage/innobase/trx/trx0rec.cc:1889
#12 0x0000563f9e222616 in btr_cur_ins_lock_and_undo (flags=flags@entry=0, cursor=cursor@entry=0x7f04c70ef840, entry=entry@entry=0x6170004a4888, thr=thr@entry=0x6210019504c0, mtr=mtr@entry=0x7f04c70efc50, inherit=inherit@entry=0x7f04c70ef190) at /data/Server/bb-10.6-MDEV-29835A/storage/innobase/btr/btr0cur.cc:3172
#13 0x0000563f9e22f3af in btr_cur_optimistic_insert (flags=flags@entry=0, cursor=cursor@entry=0x7f04c70ef840, offsets=offsets@entry=0x7f04c70ef760, heap=heap@entry=0x7f04c70ef740, entry=entry@entry=0x6170004a4888, rec=rec@entry=0x7f04c70ef780, big_rec=<optimized out>, n_ext=0, thr=<optimized out>, mtr=<optimized out>) at /data/Server/bb-10.6-MDEV-29835A/storage/innobase/btr/btr0cur.cc:3401
…
#27 0x0000563f9c96b2d1 in dispatch_command (command=command@entry=COM_QUERY, thd=thd@entry=0x62b00026f218, packet=packet@entry=0x629000eb0219 "INSERT INTO `table100_innodb_int_autoinc` ( `col_varchar_255_ucs2_key`, `col_enum_ucs2`, `pk` ) VALUES ( REPEAT( _UCS2 0xFC, 88 ), REPEAT( _LATIN1 0xB82, 55 ), CONVERT( 'nyvoycvkwzuzafewftjyajgpenqfea"..., packet_length=packet_length@entry=402, blocking=blocking@entry=true) at /data/Server/bb-10.6-MDEV-29835A/sql/sql_class.h:1362
…
Thread 43 (Thread 0x7f04c720c700 (LWP 3792726)):
#0 syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38
#1 0x0000563f9e0e4eab in srw_mutex_impl<true>::wait (lk=2147483650, this=0x7f04e8f09dd8) at /data/Server/bb-10.6-MDEV-29835A/storage/innobase/sync/srw_lock.cc:238
#2 srw_mutex_impl<true>::wait_and_lock (this=this@entry=0x7f04e8f09dd8) at /data/Server/bb-10.6-MDEV-29835A/storage/innobase/sync/srw_lock.cc:326
#3 0x0000563f9e1e3dde in srw_mutex_impl<true>::wr_lock (this=0x7f04e8f09dd8) at /usr/include/c++/9/bits/atomic_base.h:413
#4 ssux_lock_impl<true>::u_lock (this=0x7f04e8f09dd8) at /data/Server/bb-10.6-MDEV-29835A/storage/innobase/include/srw_lock.h:250
#5 sux_lock<ssux_lock_impl<true> >::u_lock (this=0x7f04e8f09dd8) at /data/Server/bb-10.6-MDEV-29835A/storage/innobase/include/sux_lock.h:378
#6 btr_page_alloc_low (index=index@entry=0x616000743208, hint_page_no=hint_page_no@entry=919, file_direction=file_direction@entry=113 'q', level=level@entry=0, mtr=mtr@entry=0x7f04c7207480, init_mtr=init_mtr@entry=0x7f04c7207480, err=<optimized out>) at /data/Server/bb-10.6-MDEV-29835A/storage/innobase/btr/btr0btr.cc:531
#7 0x0000563f9e1e4018 in btr_page_alloc (index=index@entry=0x616000743208, hint_page_no=919, file_direction=file_direction@entry=113 'q', level=level@entry=0, mtr=mtr@entry=0x7f04c7207480, init_mtr=init_mtr@entry=0x7f04c7207480, err=0x7f04c72072a0) at /data/Server/bb-10.6-MDEV-29835A/storage/innobase/include/dict0mem.h:1216
#8 0x0000563f9e2457c4 in btr_store_big_rec_extern_fields (pcur=pcur@entry=0x611000190900, offsets=<optimized out>, big_rec_vec=<optimized out>, btr_mtr=btr_mtr@entry=0x7f04c7208020, op=op@entry=BTR_STORE_UPDATE) at /data/Server/bb-10.6-MDEV-29835A/storage/innobase/btr/btr0cur.cc:6981
#9 0x0000563f9e094c72 in row_upd_clust_rec (flags=flags@entry=0, node=node@entry=0x6230002ff4d0, index=index@entry=0x616000743208, offsets=offsets@entry=0x7f04c7207d40, offsets_heap=offsets_heap@entry=0x7f04c7207be0, thr=thr@entry=0x6230002ffb70, mtr=<optimized out>) at /data/Server/bb-10.6-MDEV-29835A/storage/innobase/row/row0upd.cc:2425
#10 0x0000563f9e09d67d in row_upd_clust_step (node=node@entry=0x6230002ff4d0, thr=thr@entry=0x6230002ffb70) at /data/Server/bb-10.6-MDEV-29835A/storage/innobase/row/row0upd.cc:2651
#11 0x0000563f9e09df80 in row_upd (node=node@entry=0x6230002ff4d0, thr=thr@entry=0x6230002ffb70) at /data/Server/bb-10.6-MDEV-29835A/storage/innobase/row/row0upd.cc:2752
#12 0x0000563f9e09ebb8 in row_upd_step (thr=thr@entry=0x6230002ffb70) at /data/Server/bb-10.6-MDEV-29835A/storage/innobase/row/row0upd.cc:2894
#13 0x0000563f9dfc1c22 in row_update_for_mysql (prebuilt=0x6230002fe988) at /data/Server/bb-10.6-MDEV-29835A/storage/innobase/row/row0mysql.cc:1690
#14 0x0000563f9dbd09d8 in ha_innobase::update_row (this=0x61d000995cb8, old_row=<optimized out>, new_row=<optimized out>) at /data/Server/bb-10.6-MDEV-29835A/storage/innobase/handler/ha_innodb.cc:8658
#15 0x0000563f9d20f9c2 in handler::ha_update_row (this=0x61d000995cb8, old_data=0x6260002ce578 "", new_data=0x6260002cd138 "") at /data/Server/bb-10.6-MDEV-29835A/sql/handler.cc:7619
#16 0x0000563f9cc9ff87 in mysql_update (thd=thd@entry=0x62b00025a218, table_list=<optimized out>, fields=..., values=..., conds=<optimized out>, order_num=<optimized out>, order=<optimized out>, limit=18446744073709551569, ignore=<optimized out>, found_return=<optimized out>, updated_return=<optimized out>) at /data/Server/bb-10.6-MDEV-29835A/sql/sql_update.cc:1087
#17 0x0000563f9c97b2fe in mysql_execute_command (thd=thd@entry=0x62b00025a218, is_called_from_prepared_stmt=is_called_from_prepared_stmt@entry=false) at /data/Server/bb-10.6-MDEV-29835A/sql/sql_limit.h:85
#18 0x0000563f9c93cd24 in mysql_parse (thd=thd@entry=0x62b00025a218, rawbuf=<optimized out>, length=<optimized out>, parser_state=parser_state@entry=0x7f04c720a130) at /data/Server/bb-10.6-MDEV-29835A/sql/sql_parse.cc:8016
#19 0x0000563f9c96b2d1 in dispatch_command (command=command@entry=COM_QUERY, thd=thd@entry=0x62b00025a218, packet=packet@entry=0x629000e7e219 "UPDATE `table100_innodb_int_autoinc` SET `col_longtext_ucs2` = REPEAT( _UCS2 0x6A2, 193 ) WHERE `pk` != CONVERT( 'lucyycvudjxjxzxuyfcoekngbfghdzoyfzwxbtnfcruncyegayljpvrntajwwjlabfnrjccsatyzboxyjcojxc"..., packet_length=packet_length@entry=259, blocking=blocking@entry=true) at /data/Server/bb-10.6-MDEV-29835A/sql/sql_class.h:1362

I believe that the minimum test case should be something like the following:

SET GLOBAL innodb_file_per_table=0;

CREATE TABLE t(pk INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b BLOB)

ENGINE=InnoDB;

INSERT INTO t SELECT seq,REPEAT('x',20000) FROM seq_1_to_100000;

and concurrent UPDATE t SET b=CONCAT(b,'y') WHERE pk=… from at least 2 threads.

A possible fix might be to exclusively acquire fil_system.sys_space->latch upfront for every operation that might need to allocate a page, for tables stored in the system tablespace.

Attachments

- Sort By Name
- Sort By Date
- Ascending
- Descending
- Thumbnails
- List
- Download All

Freeze1_MDEV-29930.cfg
44 kB
2022-11-02 13:32
Freeze1_MDEV-29930-nano.yy
0.4 kB
2022-11-02 13:30

Issue Links

is blocked by

MDEV-29835 Partial server freeze

Closed

relates to

MDEV-29983 Deprecate innodb_file_per_table

Closed

MDEV-30638 Deadlock due to updating InnoDB statistics

Closed

Activity

Ascending order - Click to sort in descending order

Matthias Leich added a comment - 2022-11-02 13:39 - edited

RQG

-------

git clone https://github.com/mleich1/rqg --branch experimental RQG

Freeze1_MDEV-29930-nano.yy (simplified RQG grammar, only two concurrent sessions are required):

thread1:

    UPDATE table100_innodb_int_autoinc SET _field_no_pk = value WHERE `col_enum_utf8` IS NULL ;

thread1_connect:

thread1_init:

thread2:

    INSERT INTO table100_innodb_int_autoinc ( col_char_255_utf8 , col_longtext_ucs2_key , col_enum_utf8 ) VALUES ( value , value , value ) ;

thread2_connect:

thread2_init:

value:

    REPEAT( _UCS2 _hex , _tinyint_unsigned ) ;

Freeze1_MDEV-29930.cfg : Config file for the simplifier

./REPLAY_SIMP.sh Freeze1_MDEV-29930.cfg <path to installed MariaDB>

Some results of testing with the simplified test on

origin/bb-10.6-MDEV-29835 5d32d49cdac0936012914e11492c75b88c5307b9 2022-10-31T12:06:50+02:00

innodb_file_per_table=0, innodb_undo_tablespaces=0  47 RQG tests, 10 replays

innodb_file_per_table=1, innodb_undo_tablespaces=0  ~ 1000 RQG tests, no replay

innodb_file_per_table=0, innodb_undo_tablespaces=2  ~ 2000 RQG tests, no replay

Matthias Leich added a comment - 2022-11-02 13:39 - edited RQG ------- git clone https://github.com/mleich1/rqg --branch experimental RQG Freeze1_MDEV-29930-nano.yy (simplified RQG grammar, only two concurrent sessions are required): thread1: UPDATE table100_innodb_int_autoinc SET _field_no_pk = value WHERE `col_enum_utf8` IS NULL ; thread1_connect: ; thread1_init: ; thread2: INSERT INTO table100_innodb_int_autoinc ( col_char_255_utf8 , col_longtext_ucs2_key , col_enum_utf8 ) VALUES ( value , value , value ) ; thread2_connect: ; thread2_init: ; value: REPEAT( _UCS2 _hex , _tinyint_unsigned ) ; Freeze1_MDEV-29930.cfg : Config file for the simplifier ./REPLAY_SIMP.sh Freeze1_MDEV-29930.cfg <path to installed MariaDB> Some results of testing with the simplified test on origin/bb-10.6-MDEV-29835 5d32d49cdac0936012914e11492c75b88c5307b9 2022-10-31T12:06:50+02:00 innodb_file_per_table=0, innodb_undo_tablespaces=0 47 RQG tests, 10 replays innodb_file_per_table=1, innodb_undo_tablespaces=0 ~ 1000 RQG tests, no replay innodb_file_per_table=0, innodb_undo_tablespaces=2 ~ 2000 RQG tests, no replay

Marko Mäkelä added a comment - 2023-02-13 14:38

This is conceptually related to ~~MDEV-30638~~. The only case when the undo log is located in the same tablespace with the table is when both are in the system tablespace, or in the temporary tablespace. Quoting and amending my comment from there:

The relevant latching order here should be as follows:

Acquire dict_index_t::lock in non-shared mode.

Acquire the index root page latch in non-shared mode.

Possibly acquire further index page latches in accordance with the WL#6326 rules explained in ~~MDEV-29835~~. (When not holding exclusive dict_index_t::lock, acquire the index page latches in strict root-to-leaf, left-to-right order, to avoid deadlocks.)

Acquire the tablespace latch.

Acquire latches on the allocation data structures. These are protected by the tablespace latch.

Possibly allocate and write some pages.

The scenario of ~~MDEV-30638~~ applied to a pessimistic operation (one where not only an index leaf page needs to be modified). The hang here involves an optimistic operation where no index pages will be allocated or freed.

The hang here involves the allocation of undo log pages, not index pages. That can also happen in an optimistic operation, either because this is the first undo log record to be written by the transaction, or because the undo log record does not fit in the last allocated page.

To fix this, we might do one of/or two things:

Always use the BTR_MODIFY_ROOT_AND_LEAF mode also for "optimistic" modifications when the table and undo logs reside in the system tablespace.
Possibly do something similar for the temporary tablespace, or find out if such a hang is possible for temporary tablespaces.

Marko Mäkelä added a comment - 2023-02-13 14:38 This is conceptually related to MDEV-30638 . The only case when the undo log is located in the same tablespace with the table is when both are in the system tablespace, or in the temporary tablespace. Quoting and amending my comment from there: The relevant latching order here should be as follows: Acquire dict_index_t::lock in non-shared mode. Acquire the index root page latch in non-shared mode. Possibly acquire further index page latches in accordance with the WL#6326 rules explained in MDEV-29835 . (When not holding exclusive dict_index_t::lock , acquire the index page latches in strict root-to-leaf, left-to-right order, to avoid deadlocks.) Acquire the tablespace latch. Acquire latches on the allocation data structures. These are protected by the tablespace latch. Possibly allocate and write some pages. The scenario of MDEV-30638 applied to a pessimistic operation (one where not only an index leaf page needs to be modified). The hang here involves an optimistic operation where no index pages will be allocated or freed. The hang here involves the allocation of undo log pages , not index pages . That can also happen in an optimistic operation, either because this is the first undo log record to be written by the transaction, or because the undo log record does not fit in the last allocated page. To fix this, we might do one of/or two things: Always use the BTR_MODIFY_ROOT_AND_LEAF mode also for "optimistic" modifications when the table and undo logs reside in the system tablespace. Possibly do something similar for the temporary tablespace, or find out if such a hang is possible for temporary tablespaces.

Marko Mäkelä added a comment - 2023-02-13 14:41

In a hang that I just analyzed, another thread was waiting for a leaf page latch that was held by an optimistic insert, waiting for the system tablespace latch in trx_undo_report_row_operation().

But, in that hang, the thread identified by fil_system.sys_space.latch_owner is actually executing a double btr_compress() and waiting for an exclusive level-1 page latch. This is wrong, because the thread is holding a non-exclusive index latch and holding at least one level-0 (leaf) page latch:

(gdb) p *mtr.m_memo.small@9

$11 = {{object = 0x61600070a980, type = MTR_MEMO_SX_LOCK}, {object = 0x7f8faa829560, type = MTR_MEMO_PAGE_X_FIX}, {object = 0x7f8faa84bf80, type = MTR_MEMO_PAGE_X_MODIFY}, {object = 0x7f8faa840f30,

    type = MTR_MEMO_PAGE_X_MODIFY}, {object = 0x7f8faa8418f0, type = MTR_MEMO_PAGE_X_MODIFY}, {object = 0x7f8faa842380, type = MTR_MEMO_PAGE_X_MODIFY}, {object = 0x612000002740, type = MTR_MEMO_SPACE_X_LOCK}, {

    object = 0x7f8faa821840, type = MTR_MEMO_PAGE_SX_FIX}, {object = 0x7f8faa8418f0, type = MTR_MEMO_PAGE_X_FIX}}

(gdb) p/x ((buf_page_t*)mtr.m_memo.small[5].object).frame[64]@2

$12 = {0x0, 0x0}

In other words, this is a ~~MDEV-29835~~ hang. I think that we must fix the remaining cases of that bug first and then check if this bug is a mere consequence of that hang.

Here is the relevant part of stack trace of the offending thread:

#8  0x0000555832679b5c in btr_can_merge_with_page (cursor=cursor@entry=0x7f8f8e0e17d0, page_no=page_no@entry=1049, merge_block=merge_block@entry=0x7f8f8e0e0930, mtr=mtr@entry=0x7f8f8e0e25f0) at /data/Server/bb-10.6-MDEV-26055-2-MDEV-26827-MDEV-30134-MDEV-30638/storage/innobase/btr/btr0btr.cc:5455

#9  0x000055583269927b in btr_compress (cursor=cursor@entry=0x7f8f8e0e17d0, adjust=adjust@entry=false, mtr=mtr@entry=0x7f8f8e0e25f0) at /data/Server/bb-10.6-MDEV-26055-2-MDEV-26827-MDEV-30134-MDEV-30638/storage/innobase/btr/btr0btr.cc:3775

#10 0x00005558326cd7b5 in btr_cur_compress_if_useful (cursor=cursor@entry=0x7f8f8e0e17d0, adjust=adjust@entry=false, mtr=mtr@entry=0x7f8f8e0e25f0) at /data/Server/bb-10.6-MDEV-26055-2-MDEV-26827-MDEV-30134-MDEV-30638/storage/innobase/btr/btr0cur.cc:4323

#11 0x00005558326eacd1 in btr_cur_pessimistic_delete (err=err@entry=0x7f8f8e0e1520, has_reserved_extents=has_reserved_extents@entry=1, cursor=cursor@entry=0x7f8f8e0e17d0, flags=flags@entry=16, rollback=rollback@entry=false, mtr=mtr@entry=0x7f8f8e0e25f0) at /data/Server/bb-10.6-MDEV-26055-2-MDEV-26827-MDEV-30134-MDEV-30638/storage/innobase/btr/btr0cur.cc:4783

#12 0x00005558326eb25b in btr_cur_node_ptr_delete (parent=parent@entry=0x7f8f8e0e17d0, mtr=mtr@entry=0x7f8f8e0e25f0) at /data/Server/bb-10.6-MDEV-26055-2-MDEV-26827-MDEV-30134-MDEV-30638/storage/innobase/btr/btr0cur.cc:4821

#13 0x0000555832699698 in btr_compress (cursor=cursor@entry=0x7f8f8e0e2310, adjust=adjust@entry=false, mtr=mtr@entry=0x7f8f8e0e25f0) at /data/Server/bb-10.6-MDEV-26055-2-MDEV-26827-MDEV-30134-MDEV-30638/storage/innobase/btr/btr0btr.cc:3893

#14 0x00005558326cd7b5 in btr_cur_compress_if_useful (cursor=cursor@entry=0x7f8f8e0e2310, adjust=adjust@entry=false, mtr=mtr@entry=0x7f8f8e0e25f0) at /data/Server/bb-10.6-MDEV-26055-2-MDEV-26827-MDEV-30134-MDEV-30638/storage/innobase/btr/btr0cur.cc:4323

#15 0x00005558326eacd1 in btr_cur_pessimistic_delete (err=err@entry=0x7f8f8e0e2260, has_reserved_extents=has_reserved_extents@entry=0, cursor=cursor@entry=0x7f8f8e0e2310, flags=flags@entry=0, rollback=rollback@entry=false, mtr=mtr@entry=0x7f8f8e0e25f0) at /data/Server/bb-10.6-MDEV-26055-2-MDEV-26827-MDEV-30134-MDEV-30638/storage/innobase/btr/btr0cur.cc:4783

#16 0x00005558324f9899 in row_purge_remove_sec_if_poss_tree (node=node@entry=0x61a000009fb0, index=index@entry=0x61600070a808, entry=entry@entry=0x619001287308) at /data/Server/bb-10.6-MDEV-26055-2-MDEV-26827-MDEV-30134-MDEV-30638/storage/innobase/row/row0purge.cc:410

Marko Mäkelä added a comment - 2023-02-13 14:41 In a hang that I just analyzed, another thread was waiting for a leaf page latch that was held by an optimistic insert, waiting for the system tablespace latch in trx_undo_report_row_operation() . But, in that hang, the thread identified by fil_system.sys_space.latch_owner is actually executing a double btr_compress() and waiting for an exclusive level-1 page latch. This is wrong, because the thread is holding a non-exclusive index latch and holding at least one level-0 (leaf) page latch: (gdb) p *mtr.m_memo.small@9 $11 = {{object = 0x61600070a980, type = MTR_MEMO_SX_LOCK}, {object = 0x7f8faa829560, type = MTR_MEMO_PAGE_X_FIX}, {object = 0x7f8faa84bf80, type = MTR_MEMO_PAGE_X_MODIFY}, {object = 0x7f8faa840f30, type = MTR_MEMO_PAGE_X_MODIFY}, {object = 0x7f8faa8418f0, type = MTR_MEMO_PAGE_X_MODIFY}, {object = 0x7f8faa842380, type = MTR_MEMO_PAGE_X_MODIFY}, {object = 0x612000002740, type = MTR_MEMO_SPACE_X_LOCK}, { object = 0x7f8faa821840, type = MTR_MEMO_PAGE_SX_FIX}, {object = 0x7f8faa8418f0, type = MTR_MEMO_PAGE_X_FIX}} (gdb) p/x ((buf_page_t*)mtr.m_memo.small[5].object).frame[64]@2 $12 = {0x0, 0x0} In other words, this is a MDEV-29835 hang. I think that we must fix the remaining cases of that bug first and then check if this bug is a mere consequence of that hang. Here is the relevant part of stack trace of the offending thread: #8 0x0000555832679b5c in btr_can_merge_with_page (cursor=cursor@entry=0x7f8f8e0e17d0, page_no=page_no@entry=1049, merge_block=merge_block@entry=0x7f8f8e0e0930, mtr=mtr@entry=0x7f8f8e0e25f0) at /data/Server/bb-10.6-MDEV-26055-2-MDEV-26827-MDEV-30134-MDEV-30638/storage/innobase/btr/btr0btr.cc:5455 #9 0x000055583269927b in btr_compress (cursor=cursor@entry=0x7f8f8e0e17d0, adjust=adjust@entry=false, mtr=mtr@entry=0x7f8f8e0e25f0) at /data/Server/bb-10.6-MDEV-26055-2-MDEV-26827-MDEV-30134-MDEV-30638/storage/innobase/btr/btr0btr.cc:3775 #10 0x00005558326cd7b5 in btr_cur_compress_if_useful (cursor=cursor@entry=0x7f8f8e0e17d0, adjust=adjust@entry=false, mtr=mtr@entry=0x7f8f8e0e25f0) at /data/Server/bb-10.6-MDEV-26055-2-MDEV-26827-MDEV-30134-MDEV-30638/storage/innobase/btr/btr0cur.cc:4323 #11 0x00005558326eacd1 in btr_cur_pessimistic_delete (err=err@entry=0x7f8f8e0e1520, has_reserved_extents=has_reserved_extents@entry=1, cursor=cursor@entry=0x7f8f8e0e17d0, flags=flags@entry=16, rollback=rollback@entry=false, mtr=mtr@entry=0x7f8f8e0e25f0) at /data/Server/bb-10.6-MDEV-26055-2-MDEV-26827-MDEV-30134-MDEV-30638/storage/innobase/btr/btr0cur.cc:4783 #12 0x00005558326eb25b in btr_cur_node_ptr_delete (parent=parent@entry=0x7f8f8e0e17d0, mtr=mtr@entry=0x7f8f8e0e25f0) at /data/Server/bb-10.6-MDEV-26055-2-MDEV-26827-MDEV-30134-MDEV-30638/storage/innobase/btr/btr0cur.cc:4821 #13 0x0000555832699698 in btr_compress (cursor=cursor@entry=0x7f8f8e0e2310, adjust=adjust@entry=false, mtr=mtr@entry=0x7f8f8e0e25f0) at /data/Server/bb-10.6-MDEV-26055-2-MDEV-26827-MDEV-30134-MDEV-30638/storage/innobase/btr/btr0btr.cc:3893 #14 0x00005558326cd7b5 in btr_cur_compress_if_useful (cursor=cursor@entry=0x7f8f8e0e2310, adjust=adjust@entry=false, mtr=mtr@entry=0x7f8f8e0e25f0) at /data/Server/bb-10.6-MDEV-26055-2-MDEV-26827-MDEV-30134-MDEV-30638/storage/innobase/btr/btr0cur.cc:4323 #15 0x00005558326eacd1 in btr_cur_pessimistic_delete (err=err@entry=0x7f8f8e0e2260, has_reserved_extents=has_reserved_extents@entry=0, cursor=cursor@entry=0x7f8f8e0e2310, flags=flags@entry=0, rollback=rollback@entry=false, mtr=mtr@entry=0x7f8f8e0e25f0) at /data/Server/bb-10.6-MDEV-26055-2-MDEV-26827-MDEV-30134-MDEV-30638/storage/innobase/btr/btr0cur.cc:4783 #16 0x00005558324f9899 in row_purge_remove_sec_if_poss_tree (node=node@entry=0x61a000009fb0, index=index@entry=0x61600070a808, entry=entry@entry=0x619001287308) at /data/Server/bb-10.6-MDEV-26055-2-MDEV-26827-MDEV-30134-MDEV-30638/storage/innobase/row/row0purge.cc:410

MariaDB Server

Server hang with innodb_file_per_table=0, innodb_undo_tablespaces=0

Details

Description

Attachments

Attachments

Issue Links

Activity

People

Dates

Git Integration