Details
-
Bug
-
Status: Closed (View Workflow)
-
Major
-
Resolution: Not a Bug
-
N/A
-
None
Description
Start by applying the attached patch to 10.11, revision 6ebdd3013a18b01dbecec76b870810329eb76586. It contains 17 additional random usleeps in InnoDB's locking code in storage/innobase/lock/lock0lock.cc (with a few matching extra brackets were needed). Then run the following code using a C based client but not the CLI.
Note: Most of the code in this testcase is repeated to trigger the issue more readily. Using a patched tree, the issue should show as good as every run.
# mysqld options possibly required for replay: --innodb_change_buffering=inserts --innodb_fast_shutdown=1 --innodb_file_per_table=1 |
CREATE DEFINER=root@localhost EVENT e1 ON SCHEDULE EVERY '1' SECOND COMMENT 'a' DO DELETE FROM t; |
SELECT SLEEP(0.2); |
CREATE DEFINER=root@localhost EVENT e2 ON SCHEDULE EVERY '1' SECOND COMMENT 'a' DO DELETE FROM t; |
SELECT SLEEP(0.2); |
CREATE DEFINER=root@localhost EVENT e3 ON SCHEDULE EVERY '1' SECOND COMMENT 'a' DO DELETE FROM t; |
SELECT SLEEP(0.2); |
CREATE DEFINER=root@localhost EVENT e4 ON SCHEDULE EVERY '1' SECOND COMMENT 'a' DO DELETE FROM t; |
SELECT SLEEP(0.1); |
CREATE DEFINER=root@localhost EVENT e4 ON SCHEDULE EVERY '1' SECOND COMMENT 'a' DO DELETE FROM t; |
SET GLOBAL event_scheduler=ON; |
CREATE TABLE t (id INT); |
INSERT INTO t VALUES (1),(1); |
SET GLOBAL innodb_buffer_pool_size=21474836480; |
SELECT SLEEP(3); |
SET GLOBAL innodb_buffer_pool_size=134217728; |
SELECT SLEEP(3); |
SET GLOBAL innodb_buffer_pool_size=21474836480; |
SELECT SLEEP(3); |
SET GLOBAL innodb_buffer_pool_size=134217728; |
SELECT SLEEP(3); |
SET GLOBAL innodb_buffer_pool_size=21474836480; |
SELECT SLEEP(3); |
SET GLOBAL innodb_buffer_pool_size=134217728; |
SELECT SLEEP(3); |
SET GLOBAL innodb_buffer_pool_size=21474836480; |
SELECT SLEEP(3); |
SET GLOBAL innodb_buffer_pool_size=134217728; |
SELECT SLEEP(3); |
SET GLOBAL innodb_buffer_pool_size=21474836480; |
SELECT SLEEP(3); |
SET GLOBAL innodb_buffer_pool_size=134217728; |
SELECT SLEEP(3); |
SET GLOBAL innodb_buffer_pool_size=21474836480; |
SELECT SLEEP(3); |
Leads to:
10.11.0 ba875e939619baefb08936863a889830f595e426 (Debug) |
Core was generated by `/test/SLEEP_MD071022-mariadb-10.11.0-linux-x86_64-dbg/bin/mysqld --no-defaults'.
|
Program terminated with signal SIGSEGV, Segmentation fault.
|
#0 lock_sys_t::hash_table::resize (
|
this=this@entry=0x55dcb133ba98 <lock_sys+88>, n=n@entry=6553600)
|
at /test/10.11_dbg/storage/innobase/lock/lock0lock.cc:123
|
[Current thread is 1 (Thread 0x15192d1d4700 (LWP 2940395))]
|
(gdb) bt
|
#0 lock_sys_t::hash_table::resize (this=this@entry=0x55dcb133ba98 <lock_sys+88>, n=n@entry=6553600) at /test/10.11_dbg/storage/innobase/lock/lock0lock.cc:123
|
#1 0x000055dcafbc307b in lock_sys_t::resize (this=0x55dcb133ba40 <lock_sys>, n_cells=6553600) at /test/10.11_dbg/storage/innobase/lock/lock0lock.cc:464
|
#2 0x000055dcafdecf88 in buf_pool_t::resize (this=this@entry=0x55dcb0a1f000 <buf_pool>) at /test/10.11_dbg/storage/innobase/buf/buf0buf.cc:1813
|
#3 0x000055dcafde1065 in buf_resize_callback () at /test/10.11_dbg/storage/innobase/buf/buf0buf.cc:1860
|
#4 0x000055dcafefbe0d in tpool::task_group::execute (this=0x55dcb1365be0 <single_threaded_group>, t=t@entry=0x55dcb1365b40 <buf_resize_task>) at /test/10.11_dbg/tpool/task_group.cc:70
|
#5 0x000055dcafefbe95 in tpool::task::execute (this=0x55dcb1365b40 <buf_resize_task>) at /test/10.11_dbg/tpool/task.cc:32
|
#6 0x000055dcafef98ab in tpool::thread_pool_generic::worker_main (this=0x55dcb18a14b0, thread_var=0x55dcb18a1f60) at /test/10.11_dbg/tpool/tpool_generic.cc:580
|
#7 0x000055dcafefafde in std::__invoke_impl<void, void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> (__t=<optimized out>, __f=<optimized out>) at /usr/include/c++/9/bits/invoke.h:89
|
#8 std::__invoke<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> (__fn=<optimized out>) at /usr/include/c++/9/bits/invoke.h:95
|
#9 std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> >::_M_invoke<0ul, 1ul, 2ul> (this=<optimized out>) at /usr/include/c++/9/thread:244
|
#10 std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> >::operator() (this=<optimized out>) at /usr/include/c++/9/thread:251
|
#11 std::thread::_State_impl<std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> > >::_M_run (this=<optimized out>) at /usr/include/c++/9/thread:195
|
#12 0x0000151946c11de4 in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
|
#13 0x0000151946d2b609 in start_thread (arg=<optimized out>) at pthread_create.c:477
|
#14 0x0000151946917133 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
|
10.11.0 ba875e939619baefb08936863a889830f595e426 (Debug) |
2022-10-15 14:37:32 0 [Note] /test/SLEEP_MD071022-mariadb-10.11.0-linux-x86_64-dbg/bin/mysqld: ready for connections.
|
Version: '10.11.0-MariaDB-debug' socket: '/test/SLEEP_MD071022-mariadb-10.11.0-linux-x86_64-dbg/socket.sock' port: 12293 MariaDB Server
|
2022-10-15 14:37:33 6 [Note] Event Scheduler: scheduler thread started with id 6
|
2022-10-15 14:37:33 0 [Note] InnoDB: Resizing buffer pool from 128.000MiB to 20.000GiB (unit = 2.000MiB).
|
2022-10-15 14:37:33 0 [Note] InnoDB: Disabling adaptive hash index.
|
2022-10-15 14:37:33 0 [Note] InnoDB: Withdrawing blocks to be shrunken.
|
2022-10-15 14:37:33 0 [Note] InnoDB: Latching entire buffer pool.
|
2022-10-15 14:37:33 0 [Note] InnoDB: Resizing buffer pool from 64 chunks to 10240 chunks.
|
2022-10-15 14:37:34 0 [Note] InnoDB: 10176 chunks (1282170 blocks) were added.
|
2022-10-15 14:37:34 0 [Note] InnoDB: Resizing other hash tables.
|
-EOL-
|
With this part alone from the sleeps patch (the one that introduces the loop as marko explained)
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
index 1c11efafc7a..d3331b0c196 100644
--- a/storage/innobase/lock/lock0lock.cc
+++ b/storage/innobase/lock/lock0lock.cc
@@ -118,7 +118,7 @@ void lock_sys_t::hash_table::resize(ulint n)
else
{
lock_t *next= static_cast<lock_t*>(c->node);
- while (next->hash)
+ while (next->hash) usleep(1000000);
next= next->hash;
next->hash= lock;
}
and the test case which is basically the same as in the description, I only converted it into an infinite loop
--source include/have_innodb.inc
--delimiter $$
LOOP
reproducible easily enough, both via MTR and the client (for the latter, --delimiter needs to be changed to DELIMITER), with a regular sigsegv-ed stack trace
10.11 6ebdd3013a18b01dbecec76b870810329eb76586 + patch above
#3 <signal handler called>
#4 0x000056377b853fa2 in lock_sys_t::hash_table::resize (this=0x56378de2a7a0 <lock_sys+96>, n=40960) at /data/src/10.11-bug/storage/innobase/lock/lock0lock.cc:123
#5 0x000056377b85788c in lock_sys_t::resize (this=0x56378de2a740 <lock_sys>, n_cells=40960) at /data/src/10.11-bug/storage/innobase/lock/lock0lock.cc:463
#6 0x000056377bf54a7a in buf_pool_t::resize (this=0x56378aebbb40 <buf_pool>) at /data/src/10.11-bug/storage/innobase/buf/buf0buf.cc:1813
#7 0x000056377bf23c37 in buf_resize_callback () at /data/src/10.11-bug/storage/innobase/buf/buf0buf.cc:1860
#8 0x000056377c2fc998 in tpool::task_group::execute (this=0x56378de612a0 <single_threaded_group>, t=0x56378de61360 <buf_resize_task>) at /data/src/10.11-bug/tpool/task_group.cc:70
#9 0x000056377c2fdd97 in tpool::task::execute (this=0x56378de61360 <buf_resize_task>) at /data/src/10.11-bug/tpool/task.cc:32
#10 0x000056377c2dceb9 in tpool::thread_pool_generic::worker_main (this=0x618000000880, thread_var=0x630000020400) at /data/src/10.11-bug/tpool/tpool_generic.cc:580
#11 0x000056377c2fbc94 in std::__invoke_impl<void, void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> (__f=@0x604000000d28: (void (tpool::thread_pool_generic::*)(tpool::thread_pool_generic * const, tpool::worker_data *)) 0x56377c2dc8d4 <tpool::thread_pool_generic::worker_main(tpool::worker_data*)>, __t=@0x604000000d20: 0x618000000880) at /usr/include/c++/10/bits/invoke.h:73
#12 0x000056377c2fb642 in std::__invoke<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> (__fn=@0x604000000d28: (void (tpool::thread_pool_generic::*)(tpool::thread_pool_generic * const, tpool::worker_data *)) 0x56377c2dc8d4 <tpool::thread_pool_generic::worker_main(tpool::worker_data*)>) at /usr/include/c++/10/bits/invoke.h:95
#13 0x000056377c2fb419 in std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> >::_M_invoke<0ul, 1ul, 2ul> (this=0x604000000d18) at /usr/include/c++/10/thread:264
#14 0x000056377c2fb29a in std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> >::operator() (this=0x604000000d18) at /usr/include/c++/10/thread:271
#15 0x000056377c2fb262 in std::thread::_State_impl<std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> > >::_M_run (this=0x604000000d10) at /usr/include/c++/10/thread:215
#16 0x00007f3129738ed0 in ?? () from /lib/x86_64-linux-gnu/libstdc++.so.6
#17 0x00007f3129844ea7 in start_thread (arg=<optimized out>) at pthread_create.c:477
#18 0x00007f3128ac7aef in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95