|
I executed in GDB
set log MDEV-29799.txt
|
set pagination off
|
set print pretty on
|
set print frame-arguments all
|
thread apply all bt
|
info threads
|
show scheduler-locking
|
show schedule-multiple
|
show non-stop
|
show target-async
|
And attached the same as MDEV-29799.txt .
|
|
Thank you. Would the following patch fix this?
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
|
index 0d6a069d50d..5582dbce6a6 100644
|
--- a/storage/innobase/lock/lock0lock.cc
|
+++ b/storage/innobase/lock/lock0lock.cc
|
@@ -460,9 +460,11 @@ void lock_sys_t::resize(ulint n_cells)
|
/* Buffer pool resizing is rarely initiated by the user, and this
|
would exceed the maximum size of a memory transaction. */
|
LockMutexGuard g{SRW_LOCK_CALL};
|
+ mysql_mutex_lock(&wait_mutex);
|
rec_hash.resize(n_cells);
|
prdt_hash.resize(n_cells);
|
prdt_page_hash.resize(n_cells);
|
+ mysql_mutex_unlock(&wait_mutex);
|
}
|
|
/** Closes the lock system at database shutdown. */
|
|
|
Testing patch.
|
|
The sleeps.diff introduces an infinite while loop in lock_sys_t::hash_table::resize(ulint) due to missing braces. That will cause the buffer pool resizing thread to hang while holding exclusive lock_sys.latch. A mystery remains: What is sending the SIGSEGV to the process, on an instruction that did not dereference any memory, and without triggering the signal handler. I was suspecting the Linux kernel’s out-of-memory killer, but that should use SIGKILL.
|
|
Confirmed that the system did not run OOM and the process (before SIGSEGV crashing) took at max 0.3% of memory. dmesg also did not show any system operation on the process in question. Want to do a bit more research, self-assigning.
|
|
With this part alone from the sleeps patch (the one that introduces the loop as marko explained)
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
|
index 1c11efafc7a..d3331b0c196 100644
|
--- a/storage/innobase/lock/lock0lock.cc
|
+++ b/storage/innobase/lock/lock0lock.cc
|
@@ -118,7 +118,7 @@ void lock_sys_t::hash_table::resize(ulint n)
|
else
|
{
|
lock_t *next= static_cast<lock_t*>(c->node);
|
- while (next->hash)
|
+ while (next->hash) usleep(1000000);
|
next= next->hash;
|
next->hash= lock;
|
}
|
and the test case which is basically the same as in the description, I only converted it into an infinite loop
--source include/have_innodb.inc
|
|
CREATE DEFINER=root@localhost EVENT e1 ON SCHEDULE EVERY '1' SECOND COMMENT 'a' DO DELETE FROM t;
|
SELECT SLEEP(0.1);
|
CREATE DEFINER=root@localhost EVENT e2 ON SCHEDULE EVERY '1' SECOND COMMENT 'a' DO DELETE FROM t;
|
SELECT SLEEP(0.1);
|
CREATE DEFINER=root@localhost EVENT e3 ON SCHEDULE EVERY '1' SECOND COMMENT 'a' DO DELETE FROM t;
|
SELECT SLEEP(0.1);
|
CREATE DEFINER=root@localhost EVENT e4 ON SCHEDULE EVERY '1' SECOND COMMENT 'a' DO DELETE FROM t;
|
SELECT SLEEP(0.1);
|
CREATE DEFINER=root@localhost EVENT e5 ON SCHEDULE EVERY '1' SECOND COMMENT 'a' DO INSERT INTO t VALUES (1),(1);
|
SELECT SLEEP(0.1);
|
CREATE DEFINER=root@localhost EVENT e6 ON SCHEDULE EVERY '1' SECOND COMMENT 'a' DO INSERT INTO t VALUES (1),(1);
|
SELECT SLEEP(0.1);
|
CREATE DEFINER=root@localhost EVENT e7 ON SCHEDULE EVERY '1' SECOND COMMENT 'a' DO INSERT INTO t VALUES (1),(1);
|
SELECT SLEEP(0.1);
|
CREATE DEFINER=root@localhost EVENT e8 ON SCHEDULE EVERY '1' SECOND COMMENT 'a' DO INSERT INTO t VALUES (1),(1);
|
SELECT SLEEP(0.1);
|
CREATE TABLE t (id INT) ENGINE=InnoDB;
|
SET GLOBAL event_scheduler=ON;
|
--delimiter $$
|
BEGIN NOT ATOMIC
|
DECLARE CONTINUE HANDLER FOR 1210 BEGIN END;
|
LOOP
|
INSERT INTO t VALUES (1),(1);
|
SET GLOBAL innodb_buffer_pool_size=21474836480;
|
SELECT SLEEP(3);
|
SET GLOBAL innodb_buffer_pool_size=134217728;
|
SELECT SLEEP(3);
|
END LOOP;
|
END $$
|
--delimiter ;
|
reproducible easily enough, both via MTR and the client (for the latter, --delimiter needs to be changed to DELIMITER), with a regular sigsegv-ed stack trace
|
10.11 6ebdd3013a18b01dbecec76b870810329eb76586 + patch above
|
#3 <signal handler called>
|
#4 0x000056377b853fa2 in lock_sys_t::hash_table::resize (this=0x56378de2a7a0 <lock_sys+96>, n=40960) at /data/src/10.11-bug/storage/innobase/lock/lock0lock.cc:123
|
#5 0x000056377b85788c in lock_sys_t::resize (this=0x56378de2a740 <lock_sys>, n_cells=40960) at /data/src/10.11-bug/storage/innobase/lock/lock0lock.cc:463
|
#6 0x000056377bf54a7a in buf_pool_t::resize (this=0x56378aebbb40 <buf_pool>) at /data/src/10.11-bug/storage/innobase/buf/buf0buf.cc:1813
|
#7 0x000056377bf23c37 in buf_resize_callback () at /data/src/10.11-bug/storage/innobase/buf/buf0buf.cc:1860
|
#8 0x000056377c2fc998 in tpool::task_group::execute (this=0x56378de612a0 <single_threaded_group>, t=0x56378de61360 <buf_resize_task>) at /data/src/10.11-bug/tpool/task_group.cc:70
|
#9 0x000056377c2fdd97 in tpool::task::execute (this=0x56378de61360 <buf_resize_task>) at /data/src/10.11-bug/tpool/task.cc:32
|
#10 0x000056377c2dceb9 in tpool::thread_pool_generic::worker_main (this=0x618000000880, thread_var=0x630000020400) at /data/src/10.11-bug/tpool/tpool_generic.cc:580
|
#11 0x000056377c2fbc94 in std::__invoke_impl<void, void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> (__f=@0x604000000d28: (void (tpool::thread_pool_generic::*)(tpool::thread_pool_generic * const, tpool::worker_data *)) 0x56377c2dc8d4 <tpool::thread_pool_generic::worker_main(tpool::worker_data*)>, __t=@0x604000000d20: 0x618000000880) at /usr/include/c++/10/bits/invoke.h:73
|
#12 0x000056377c2fb642 in std::__invoke<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> (__fn=@0x604000000d28: (void (tpool::thread_pool_generic::*)(tpool::thread_pool_generic * const, tpool::worker_data *)) 0x56377c2dc8d4 <tpool::thread_pool_generic::worker_main(tpool::worker_data*)>) at /usr/include/c++/10/bits/invoke.h:95
|
#13 0x000056377c2fb419 in std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> >::_M_invoke<0ul, 1ul, 2ul> (this=0x604000000d18) at /usr/include/c++/10/thread:264
|
#14 0x000056377c2fb29a in std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> >::operator() (this=0x604000000d18) at /usr/include/c++/10/thread:271
|
#15 0x000056377c2fb262 in std::thread::_State_impl<std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> > >::_M_run (this=0x604000000d10) at /usr/include/c++/10/thread:215
|
#16 0x00007f3129738ed0 in ?? () from /lib/x86_64-linux-gnu/libstdc++.so.6
|
#17 0x00007f3129844ea7 in start_thread (arg=<optimized out>) at pthread_create.c:477
|
#18 0x00007f3128ac7aef in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
|
|
|
Roel clarified to me that he is disabling a number of sigaction() calls. That explains why no signal handler was called on the SIGSEGV.
Usually, SIGSEGV is flagged on the instruction that immediately follows the one that caused the fault. In the debugging session that I conducted with Roel a couple of days ago, the signal was flagged on an instruction that was 2 instructions after the one that read next->hash:
0x0000561b190b84cb <+813>: cmpq $0x0,0x20(%r12)
|
0x0000561b190b84d1 <+819>: jne 0x561b190b84a3 <lock_sys_t::hash_table::resize(unsigned long)+773>
|
=> 0x0000561b190b84d3 <+821>: mov %rbx,0x20
|
The sleeps.diff does introduce an infinite loop and therefore will cause buffer pool resizing to hang in those cases where the code will be executed. But, it is also exposing a race condition that I am yet to analyze: both next and next->hash are expected to remain constant in such infinite loop, but instead, we have next=nullptr at some point. A simpler patch for exposing the same race condition might be the following:
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
|
index 0d6a069d50d..27ca8a4c694 100644
|
--- a/storage/innobase/lock/lock0lock.cc
|
+++ b/storage/innobase/lock/lock0lock.cc
|
@@ -119,7 +119,7 @@ void lock_sys_t::hash_table::resize(ulint n)
|
{
|
lock_t *next= static_cast<lock_t*>(c->node);
|
while (next->hash)
|
- next= next->hash;
|
+ std::this_thread::yield();
|
next->hash= lock;
|
}
|
lock= next;
|
|
|
My version of the patch may be too simplified, because I removed the assignment that used to be the while loop body, while Roel’s patch and the simplified version that elenst posted would move that assignment after the loop.
If we have next->hash==nullptr, then the while loop will not be entered. With sleeps.diff the code would become the following:
while (next->hash) ...; // not taken
|
next= next->hash; // next=nullptr!
|
next->hash= lock; // undefined behaviour: dereferencing next==nullptr
|
Therefore, it remains possible that the sleeps.diff is actually causing a problem.
So far, I was unable to reproduce the problem with any variation of my simple patch that I tried: replacing std::this_thread::yield() with usleep(1000000) or adding the assignment after the loop. Roel, can you try it once more, removing the assignment after the infinite while loop?
Note: I invoked elenst’s test on 10.6 with ./mtr -mysqld=-event-scheduler=on to avoid warnings that the event scheduler is switched off. (I don’t think that this code has been changed between 10.6 and 10.11.)
|
|
Confirmed with an UBSAN build that the direct assignment of next= next->hash; without the while (next->hash) leads to the undefined behaviour as described in the last comment (null pointer access). Closed as not a bug. It was a valueable learning experience.
The random sleeps setup/testing was based on some ideas we had previously discussed, and the buffer pool resizing was not specifically targeted but part of the input SQL.
|
|
The approach of injecting random sleeps here and there is valid and useful, but unfortunately in this case it was done incorrectly and actually caused a bug by itself.
Also I learned that SIGSEGV may be flagged a few instructions after the guilty one. This was not a waste of time.
|