#3 0x00007f3289be9412 in __GI___assert_fail (assertion=0x5635057c8076 "table2->cached", file=0x5635057c7c10 "/home/mleich/10.3-MDEV-22633/storage/innobase/dict/dict0dict.cc", line=1274,
function=0x5635057cbc80 <dict_table_t::add_to_cache()::__PRETTY_FUNCTION__> "void dict_table_t::add_to_cache()") at assert.c:101
#4 0x00005635050d0188 in dict_table_t::add_to_cache (this=0x7f31d804ccf0) at /home/mleich/10.3-MDEV-22633/storage/innobase/dict/dict0dict.cc:1272
#5 0x00005635050c3670 in dict_create_table_step (thr=0x7f31ec023650) at /home/mleich/10.3-MDEV-22633/storage/innobase/dict/dict0crea.cc:1275
#6 0x0000563504ea04ce in que_thr_step (thr=0x7f31ec023650) at /home/mleich/10.3-MDEV-22633/storage/innobase/que/que0que.cc:1047
#7 0x0000563504ea06fd in que_run_threads_low (thr=0x7f31ec023650) at /home/mleich/10.3-MDEV-22633/storage/innobase/que/que0que.cc:1101
#8 0x0000563504ea094b in que_run_threads (thr=0x7f31ec023650) at /home/mleich/10.3-MDEV-22633/storage/innobase/que/que0que.cc:1141
#9 0x0000563504f0c92e in row_create_table_for_mysql (table=0x7f31d804ccf0, trx=0x7f326fc09888, mode=FIL_ENCRYPTION_DEFAULT, key_id=1) at /home/mleich/10.3-MDEV-22633/storage/innobase/row/row0mysql.cc:2427
#10 0x00005635051614ec in fts_create_one_index_table (trx=0x7f326fc09888, index=0x7f31e4093740, fts_table=0x7f32500e52e0, heap=0x7f31ec0684e0) at /home/mleich/10.3-MDEV-22633/storage/innobase/fts/fts0fts.cc:1996
#11 0x000056350516179c in fts_create_index_tables (trx=0x7f326fc09888, index=0x7f31e4093740, id=1192) at /home/mleich/10.3-MDEV-22633/storage/innobase/fts/fts0fts.cc:2068
add_fts_doc_id_idx=false) at /home/mleich/10.3-MDEV-22633/storage/innobase/handler/handler0alter.cc:5861
#13 0x0000563504dbee2e in ha_innobase::prepare_inplace_alter_table (this=0x7f31e0083570, altered_table=0x7f31ef4b2be8, ha_alter_info=0x7f32500e61e0) at /home/mleich/10.3-MDEV-22633/storage/innobase/handler/handler0alter.cc:6921
#14 0x0000563504b4da44 in handler::ha_prepare_inplace_alter_table (this=0x7f31e0083570, altered_table=0x7f31ef4b2be8, ha_alter_info=0x7f32500e61e0) at /home/mleich/10.3-MDEV-22633/sql/handler.cc:4563
#15 0x00005635049095f0 in mysql_inplace_alter_table (thd=0x7f31ec000ce8, table_list=0x7f31ec010c98, table=0x7f31e0080cd8, altered_table=0x7f31ef4b2be8, ha_alter_info=0x7f32500e61e0, inplace_supported=HA_ALTER_INPLACE_NOCOPY_LOCK,
target_mdl_request=0x7f32500e6360, alter_ctx=0x7f32500e6910) at /home/mleich/10.3-MDEV-22633/sql/sql_table.cc:7597
The last shown element of the hash table (element index 2978) is a pointer to our table2. We should find out when that pointer was created, or why it was not erased when the object was invalidated.
(rr) break dict_mem_table_free
(rr) cond 1 table==$2
(rr) watch -l ((void**)0x7f3204089ce0)[2978]
(rr) run
The watchpoint will be hit at when 293280, for filling in our bogus pointer. It is not a table at all!
Thread 22 hit Hardware watchpoint 2: -location ((void**)0x7f3204089ce0)[2978]
Old value = (void *) 0x0
New value = (void *) 0x7f31ba7e90d8
0x00005635050594db in btr_search_check_free_space_in_heap (index=0x563508c68f20)
at /home/mleich/10.3-MDEV-22633/storage/innobase/btr/btr0sea.cc:209
209 heap->free_block = block;
(rr) when
Current event: 293280
(rr) bt
#0 0x00005635050594db in btr_search_check_free_space_in_heap (index=0x563508c68f20)
at /home/mleich/10.3-MDEV-22633/storage/innobase/btr/btr0sea.cc:209
#1 0x000056350505e0be in btr_search_info_update_slow (info=0x563508c69308, cursor=0x563508c86d78)
at /home/mleich/10.3-MDEV-22633/storage/innobase/btr/btr0sea.cc:1578
#2 0x000056350502c3cb in btr_search_info_update (index=0x563508c68f20, cursor=0x563508c86d78)
at /home/mleich/10.3-MDEV-22633/storage/innobase/include/btr0sea.ic:84
#3 0x0000563505034344 in btr_cur_search_to_nth_level_func (index=0x563508c68f20, level=0, tuple=0x7f32200ddea8, mode=PAGE_CUR_LE,
The interesting moment occurs in dict_resize() at when 477227. Both dict_resize() and dict_table_t::add_to_cache() are protected by dict_sys->mutex, so the problem should reside in dict_resize(). Let us rewind to that event again:
(rr) run 477220
…
(rr) c
Continuing.
Thread 1 hit Hardware watchpoint 3: -location dict_sys.table_hash
Old value = (hash_table_t *) 0x563508c65250
New value = (hash_table_t *) 0x563508c652a0
dict_resize () at /home/mleich/10.3-MDEV-22633/storage/innobase/dict/dict0dict.cc:6305
The array is clear at this point, as expected. The memory was allocated by ut_malloc_nokey() in hash_create(). (Side note: something calloc()-like would avoid the need of the separate hash_table_clear() step.
The real culprit seems to be the following, which is apparently accessing a mem_heap_t that should have been freed as part of the dict_resize():
The problem is that we should invoke mem_heap_free() and zero out the pointer. When re-enabling the adaptive hash index, we should invoke mem_heap_create(). In its current form, the btr_search_sys->hash_tables are pointing to memory that has not been properly allocated from the buffer pool. It could even point to memory that was released to the operating system when the buffer pool was shrunk.
Marko Mäkelä
added a comment - Here is how I debugged it:
(rr) p *dict_sys.table_hash
$1 = {type = HASH_TABLE_SYNC_NONE, adaptive = 0, n_cells = 37441,
array = 0x7f3204089ce0, n_sync_obj = 0, sync_obj = {mutexes = 0x0,
rw_locks = 0x0}, heaps = 0x0, heap = 0x0, magic_n = 76561114}
(rr) p table2
$2 = (dict_table_t *) 0x7f31ba7e90d8
(rr) set print elements 40000
(rr) set max-value-size 300000
(rr) p *(void**)0x7f3204089ce0@37441
$3 = {0x0, 0x0, 0x0, 0x0, 0x7f31cc076ca0, 0x0 <repeats 224 times>, 0x563508bc92b0, 0x0 <repeats 55 times>, 0x7f31d7470ce0,
0x0 <repeats 228 times>, 0x7f31cc064db0, 0x0 <repeats 91 times>, 0x7f31e00997a0, 0x0 <repeats 42 times>, 0x7f31ef4be070,
0x0 <repeats 121 times>, 0x7f31cc0643b0, 0x0 <repeats 89 times>, 0x7f31e009b400, 0x0 <repeats 254 times>, 0x7f31e009cdb0,
0x0 <repeats 139 times>, 0x7f31d746b8b0, 0x0 <repeats 114 times>, 0x7f31e009d920, 0x0 <repeats 1606 times>, 0x7f31ba7e90d8,
…
The last shown element of the hash table (element index 2978) is a pointer to our table2 . We should find out when that pointer was created, or why it was not erased when the object was invalidated.
(rr) break dict_mem_table_free
(rr) cond 1 table==$2
(rr) watch -l ((void**)0x7f3204089ce0)[2978]
(rr) run
The watchpoint will be hit at when 293280, for filling in our bogus pointer. It is not a table at all!
Thread 22 hit Hardware watchpoint 2: -location ((void**)0x7f3204089ce0)[2978]
Old value = (void *) 0x0
New value = (void *) 0x7f31ba7e90d8
0x00005635050594db in btr_search_check_free_space_in_heap (index=0x563508c68f20)
at /home/mleich/10.3-MDEV-22633/storage/innobase/btr/btr0sea.cc:209
209 heap->free_block = block;
(rr) when
Current event: 293280
(rr) bt
#0 0x00005635050594db in btr_search_check_free_space_in_heap (index=0x563508c68f20)
at /home/mleich/10.3-MDEV-22633/storage/innobase/btr/btr0sea.cc:209
#1 0x000056350505e0be in btr_search_info_update_slow (info=0x563508c69308, cursor=0x563508c86d78)
at /home/mleich/10.3-MDEV-22633/storage/innobase/btr/btr0sea.cc:1578
#2 0x000056350502c3cb in btr_search_info_update (index=0x563508c68f20, cursor=0x563508c86d78)
at /home/mleich/10.3-MDEV-22633/storage/innobase/include/btr0sea.ic:84
#3 0x0000563505034344 in btr_cur_search_to_nth_level_func (index=0x563508c68f20, level=0, tuple=0x7f32200ddea8, mode=PAGE_CUR_LE,
latch_mode=2, cursor=0x563508c86d78, ahi_latch=0x0, file=0x56350574a0e0 "/home/mleich/10.3-MDEV-22633/storage/innobase/row/row0row.cc",
line=1043, mtr=0x7f3227ffe290, autoinc=0) at /home/mleich/10.3-MDEV-22633/storage/innobase/btr/btr0cur.cc:2347
#4 0x0000563504f3c06a in btr_pcur_open_low (index=0x563508c68f20, level=0, tuple=0x7f32200ddea8, mode=PAGE_CUR_LE, latch_mode=2,
cursor=0x563508c86d78, file=0x56350574a0e0 "/home/mleich/10.3-MDEV-22633/storage/innobase/row/row0row.cc", line=1043, autoinc=0,
mtr=0x7f3227ffe290) at /home/mleich/10.3-MDEV-22633/storage/innobase/include/btr0pcur.ic:441
#5 0x0000563504f3f2d8 in row_search_on_row_ref (pcur=0x563508c86d78, mode=2, table=0x563508c668b0, ref=0x7f32200ddea8, mtr=0x7f3227ffe290)
at /home/mleich/10.3-MDEV-22633/storage/innobase/row/row0row.cc:1043
#6 0x0000563504f314ee in row_purge_reposition_pcur (mode=2, node=0x563508c86cd0, mtr=0x7f3227ffe290)
at /home/mleich/10.3-MDEV-22633/storage/innobase/row/row0purge.cc:78
#7 0x0000563504f316e1 in row_purge_remove_clust_if_poss_low (node=0x563508c86cd0, mode=2)
at /home/mleich/10.3-MDEV-22633/storage/innobase/row/row0purge.cc:123
#8 0x0000563504f31ae6 in row_purge_remove_clust_if_poss (node=0x563508c86cd0)
at /home/mleich/10.3-MDEV-22633/storage/innobase/row/row0purge.cc:191
#9 0x0000563504f33678 in row_purge_del_mark (node=0x563508c86cd0) at /home/mleich/10.3-MDEV-22633/storage/innobase/row/row0purge.cc:781
#10 0x0000563504f351cb in row_purge_record_func (node=0x563508c86cd0, undo_rec=0x7f32200230b8 "", thr=0x563508c86c10, updated_extern=false)
at /home/mleich/10.3-MDEV-22633/storage/innobase/row/row0purge.cc:1191
#11 0x0000563504f354dc in row_purge (node=0x563508c86cd0, undo_rec=0x7f32200230b8 "", thr=0x563508c86c10)
at /home/mleich/10.3-MDEV-22633/storage/innobase/row/row0purge.cc:1257
#12 0x0000563504f35734 in row_purge_step (thr=0x563508c86c10) at /home/mleich/10.3-MDEV-22633/storage/innobase/row/row0purge.cc:1316
#13 0x0000563504ea0464 in que_thr_step (thr=0x563508c86c10) at /home/mleich/10.3-MDEV-22633/storage/innobase/que/que0que.cc:1039
#14 0x0000563504ea06fd in que_run_threads_low (thr=0x563508c86c10) at /home/mleich/10.3-MDEV-22633/storage/innobase/que/que0que.cc:1101
#15 0x0000563504ea094b in que_run_threads (thr=0x563508c86c10) at /home/mleich/10.3-MDEV-22633/storage/innobase/que/que0que.cc:1141
#16 0x0000563504fbd73f in trx_purge (n_purge_threads=4, truncate=false, slot=0x563505f47708 <srv_sys+328>)
at /home/mleich/10.3-MDEV-22633/storage/innobase/trx/trx0purge.cc:1606
#17 0x0000563504f8c547 in srv_do_purge (n_total_purged=0x7f3227ffeeb0, slot=0x563505f47708 <srv_sys+328>)
at /home/mleich/10.3-MDEV-22633/storage/innobase/srv/srv0srv.cc:2628
#18 0x0000563504f8ca29 in srv_purge_coordinator_thread (arg=0x0) at /home/mleich/10.3-MDEV-22633/storage/innobase/srv/srv0srv.cc:2760
#19 0x00007f328a8f06db in start_thread (arg=0x7f3227fff700) at pthread_create.c:463
#20 0x00007f3289cda88f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
A bit later, the block will be freed, and reused again:
Thread 2 hit Hardware watchpoint 2: -location ((void**)0x7f3204089ce0)[2978]
Old value = (void *) 0x7f31ba7e90d8
New value = (void *) 0x0
mem_heap_free_block_free (heap=0x7f320408f980) at /home/mleich/10.3-MDEV-22633/storage/innobase/mem/mem0mem.cc:437
437 }
(rr) when
Current event: 477029
(rr) bt
#0 mem_heap_free_block_free (heap=0x7f320408f980) at /home/mleich/10.3-MDEV-22633/storage/innobase/mem/mem0mem.cc:437
#1 0x00005635050540b2 in mem_heap_empty (heap=0x7f320408f980) at /home/mleich/10.3-MDEV-22633/storage/innobase/include/mem0mem.ic:293
#2 0x000056350505a19d in btr_search_disable (need_mutex=true) at /home/mleich/10.3-MDEV-22633/storage/innobase/btr/btr0sea.cc:468
#3 0x00005635050763ac in buf_pool_resize () at /home/mleich/10.3-MDEV-22633/storage/innobase/buf/buf0buf.cc:2744
#4 0x0000563505077e2f in buf_resize_thread () at /home/mleich/10.3-MDEV-22633/storage/innobase/buf/buf0buf.cc:3201
#5 0x00007f328a8f06db in start_thread (arg=0x7f3217dff700) at pthread_create.c:463
#6 0x00007f3289cda88f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
(rr) c
Continuing.
[rr 17978 477030]2020-05-19 19:14:23 0 [Note] InnoDB: disabled adaptive hash index.
[rr 17978 477034]2020-05-19 19:14:23 0 [Note] InnoDB: Withdrawing blocks to be shrunken.
[rr 17978 477038]2020-05-19 19:14:23 0 [Note] InnoDB: Latching whole of buffer pool.
[rr 17978 477058]2020-05-19 19:14:23 0 [Note] InnoDB: buffer pool 0 : resizing with chunks 2 to 6.
[rr 17978 477204]2020-05-19 19:14:23 0 [Note] InnoDB: buffer pool 0 : 4 chunks (1536 blocks) were added.
[rr 17978 477208]2020-05-19 19:14:23 0 [Note] InnoDB: Resizing hash tables.
[rr 17978 477212]2020-05-19 19:14:23 0 [Note] InnoDB: buffer pool 0 : hash tables were resized.
[rr 17978 477216]2020-05-19 19:14:23 0 [Note] InnoDB: Resizing also other hash tables.
[rr 17978 477228]2020-05-19 19:14:23 0 [Note] InnoDB: Resized hash tables at lock_sys, adaptive hash index, dictionary.
[rr 17978 477232]2020-05-19 19:14:23 0 [Note] InnoDB: Completed to resize buffer pool from 50331648 to 150994944.
[rr 17978 477244]2020-05-19 19:14:23 0 [Note] InnoDB: Re-enabled adaptive hash index.
[rr 17978 477248]2020-05-19 19:14:23 0 [Note] InnoDB: Completed resizing buffer pool at 200519 19:14:23.
[Switching to Thread 17978.25129]
Thread 24 hit Hardware watchpoint 2: -location ((void**)0x7f3204089ce0)[2978]
Old value = (void *) 0x0
New value = (void *) 0x7f31ba7e90d8
0x00005635050594db in btr_search_check_free_space_in_heap (index=0x563508c68f20)
at /home/mleich/10.3-MDEV-22633/storage/innobase/btr/btr0sea.cc:209
209 heap->free_block = block;
(rr) when
Current event: 477400
(rr) c
Continuing.
[rr 17978 480487]2020-05-19 19:14:24 94 [Warning] Aborted connection 94 to db: 'test' user: 'root' host: 'localhost' (CLOSE_CONNECTION)
[rr 17978 481493]mysqld: /home/mleich/10.3-MDEV-22633/storage/innobase/dict/dict0dict.cc:1274: void dict_table_t::add_to_cache(): Assertion `table2->cached' failed.
Thread 38 received signal SIGABRT, Aborted.
So, apparently we are dealing with a bogus hash table. Let us set a watchpoint on it and rerun:
Thread 1 hit Hardware watchpoint 3: -location dict_sys.table_hash
Old value = (hash_table_t *) 0x0
New value = (hash_table_t *) 0x563508c65250
dict_init () at /home/mleich/10.3-MDEV-22633/storage/innobase/dict/dict0dict.cc:1091
1091 buf_pool_get_curr_size()
(rr) c
Continuing.
[rr 17978 2599]2020-05-19 19:10:16 0 [Note] InnoDB: 128 out of 128 rollback segments are active.
…
Thread 15 hit Hardware watchpoint 3: -location dict_sys.table_hash
Old value = (hash_table_t *) 0x563508c65250
New value = (hash_table_t *) 0x563508c652a0
dict_resize () at /home/mleich/10.3-MDEV-22633/storage/innobase/dict/dict0dict.cc:6305
6305 buf_pool_get_curr_size()
Current event: 477227
[rr 17978 477228]2020-05-19 19:14:23 0 [Note] InnoDB: Resized hash tables at lock_sys, adaptive hash index, dictionary.
[rr 17978 477232]2020-05-19 19:14:23 0 [Note] InnoDB: Completed to resize buffer pool from 50331648 to 150994944.
[rr 17978 477244]2020-05-19 19:14:23 0 [Note] InnoDB: Re-enabled adaptive hash index.
[rr 17978 477248]2020-05-19 19:14:23 0 [Note] InnoDB: Completed resizing buffer pool at 200519 19:14:23.
[Switching to Thread 17978.25129]
Thread 24 hit Hardware watchpoint 2: -location ((void**)0x7f3204089ce0)[2978]
Old value = (void *) 0x0
New value = (void *) 0x7f31ba7e90d8
0x00005635050594db in btr_search_check_free_space_in_heap (index=0x563508c68f20)
at /home/mleich/10.3-MDEV-22633/storage/innobase/btr/btr0sea.cc:209
209 heap->free_block = block;
(rr)
Continuing.
[rr 17978 480487]2020-05-19 19:14:24 94 [Warning] Aborted connection 94 to db: 'test' user: 'root' host: 'localhost' (CLOSE_CONNECTION)
[rr 17978 481493]mysqld: /home/mleich/10.3-MDEV-22633/storage/innobase/dict/dict0dict.cc:1274: void dict_table_t::add_to_cache(): Assertion `table2->cached' failed.
Thread 33 received signal SIGABRT, Aborted.
The interesting moment occurs in dict_resize() at when 477227. Both dict_resize() and dict_table_t::add_to_cache() are protected by dict_sys->mutex , so the problem should reside in dict_resize() . Let us rewind to that event again:
(rr) run 477220
…
(rr) c
Continuing.
Thread 1 hit Hardware watchpoint 3: -location dict_sys.table_hash
Old value = (hash_table_t *) 0x563508c65250
New value = (hash_table_t *) 0x563508c652a0
dict_resize () at /home/mleich/10.3-MDEV-22633/storage/innobase/dict/dict0dict.cc:6305
6305 buf_pool_get_curr_size()
(rr) list
6300 dict_sys->table_hash = hash_create(
6301 buf_pool_get_curr_size()
6302 / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE));
…
(rr) when
Current event: 477227
(rr) p *dict_sys.table_hash
$20 = {type = HASH_TABLE_SYNC_NONE, adaptive = 0, n_cells = 37441, array = 0x7f3204089ce0, n_sync_obj = 0, sync_obj = {mutexes = 0x0,
rw_locks = 0x0}, heaps = 0x0, heap = 0x0, magic_n = 76561114}
(rr) p *(void**)0x7f3204089ce0@37441(rr) i lo
block = 0x7f31ba7e90d8
ahi_latch = 0x563508b4a890
table = 0x563508b8dcb0
heap = 0x7f320408f980
(rr) cond 5 block==0x7f320408f980
$21 = {0x0 <repeats 37441 times>}
The array is clear at this point, as expected. The memory was allocated by ut_malloc_nokey() in hash_create() . (Side note: something calloc() -like would avoid the need of the separate hash_table_clear() step.
The real culprit seems to be the following, which is apparently accessing a mem_heap_t that should have been freed as part of the dict_resize() :
(rr) c
Continuing.
[rr 17978 477228]2020-05-19 19:14:23 0 [Note] InnoDB: Resized hash tables at lock_sys, adaptive hash index, dictionary.
[rr 17978 477232]2020-05-19 19:14:23 0 [Note] InnoDB: Completed to resize buffer pool from 50331648 to 150994944.
[rr 17978 477244]2020-05-19 19:14:23 0 [Note] InnoDB: Re-enabled adaptive hash index.
[rr 17978 477248]2020-05-19 19:14:23 0 [Note] InnoDB: Completed resizing buffer pool at 200519 19:14:23.
[Switching to Thread 17978.25129]
Thread 24 hit Hardware watchpoint 2: -location ((void**)0x7f3204089ce0)[2978]
Old value = (void *) 0x0
New value = (void *) 0x7f31ba7e90d8
0x00005635050594db in btr_search_check_free_space_in_heap (index=0x563508c68f20)
at /home/mleich/10.3-MDEV-22633/storage/innobase/btr/btr0sea.cc:209
209 heap->free_block = block;
(rr) bt
#0 0x00005635050594db in btr_search_check_free_space_in_heap (index=0x563508c68f20)
at /home/mleich/10.3-MDEV-22633/storage/innobase/btr/btr0sea.cc:209
#1 0x000056350505db39 in btr_search_build_page_hash_index (index=0x563508c68f20, block=0x7f31ba7d93c0, ahi_latch=0x563508b4a890, n_fields=2,
n_bytes=0, left_side=0) at /home/mleich/10.3-MDEV-22633/storage/innobase/btr/btr0sea.cc:1504
#2 0x000056350505e129 in btr_search_info_update_slow (info=0x563508c69308, cursor=0x563508c86658)
at /home/mleich/10.3-MDEV-22633/storage/innobase/btr/btr0sea.cc:1595
#3 0x000056350502c3cb in btr_search_info_update (index=0x563508c68f20, cursor=0x563508c86658)
at /home/mleich/10.3-MDEV-22633/storage/innobase/include/btr0sea.ic:84
#4 0x0000563505034344 in btr_cur_search_to_nth_level_func (index=0x563508c68f20, level=0, tuple=0x7f321c0270c0, mode=PAGE_CUR_LE,
latch_mode=2, cursor=0x563508c86658, ahi_latch=0x0, file=0x56350574a0e0 "/home/mleich/10.3-MDEV-22633/storage/innobase/row/row0row.cc",
line=1043, mtr=0x7f3226afc340, autoinc=0) at /home/mleich/10.3-MDEV-22633/storage/innobase/btr/btr0cur.cc:2347
#5 0x0000563504f3c06a in btr_pcur_open_low (index=0x563508c68f20, level=0, tuple=0x7f321c0270c0, mode=PAGE_CUR_LE, latch_mode=2,
cursor=0x563508c86658, file=0x56350574a0e0 "/home/mleich/10.3-MDEV-22633/storage/innobase/row/row0row.cc", line=1043, autoinc=0,
mtr=0x7f3226afc340) at /home/mleich/10.3-MDEV-22633/storage/innobase/include/btr0pcur.ic:441
#6 0x0000563504f3f2d8 in row_search_on_row_ref (pcur=0x563508c86658, mode=2, table=0x563508c668b0, ref=0x7f321c0270c0, mtr=0x7f3226afc340)
at /home/mleich/10.3-MDEV-22633/storage/innobase/row/row0row.cc:1043
#7 0x0000563504f314ee in row_purge_reposition_pcur (mode=2, node=0x563508c865b0, mtr=0x7f3226afc340)
at /home/mleich/10.3-MDEV-22633/storage/innobase/row/row0purge.cc:78
#8 0x0000563504f316e1 in row_purge_remove_clust_if_poss_low (node=0x563508c865b0, mode=2)
at /home/mleich/10.3-MDEV-22633/storage/innobase/row/row0purge.cc:123
#9 0x0000563504f31ae6 in row_purge_remove_clust_if_poss (node=0x563508c865b0)
at /home/mleich/10.3-MDEV-22633/storage/innobase/row/row0purge.cc:191
#10 0x0000563504f33678 in row_purge_del_mark (node=0x563508c865b0) at /home/mleich/10.3-MDEV-22633/storage/innobase/row/row0purge.cc:781
#11 0x0000563504f351cb in row_purge_record_func (node=0x563508c865b0, undo_rec=0x7f32200c0e68 "", thr=0x563508c864f0, updated_extern=false)
at /home/mleich/10.3-MDEV-22633/storage/innobase/row/row0purge.cc:1191
#12 0x0000563504f354dc in row_purge (node=0x563508c865b0, undo_rec=0x7f32200c0e68 "", thr=0x563508c864f0)
at /home/mleich/10.3-MDEV-22633/storage/innobase/row/row0purge.cc:1257
#13 0x0000563504f35734 in row_purge_step (thr=0x563508c864f0) at /home/mleich/10.3-MDEV-22633/storage/innobase/row/row0purge.cc:1316
#14 0x0000563504ea0464 in que_thr_step (thr=0x563508c864f0) at /home/mleich/10.3-MDEV-22633/storage/innobase/que/que0que.cc:1039
#15 0x0000563504ea06fd in que_run_threads_low (thr=0x563508c864f0) at /home/mleich/10.3-MDEV-22633/storage/innobase/que/que0que.cc:1101
#16 0x0000563504ea094b in que_run_threads (thr=0x563508c864f0) at /home/mleich/10.3-MDEV-22633/storage/innobase/que/que0que.cc:1141
#17 0x0000563504f8bfd4 in srv_task_execute (slot=0x563505f47778 <srv_sys+440>)
at /home/mleich/10.3-MDEV-22633/storage/innobase/srv/srv0srv.cc:2482
#18 0x0000563504f8c1f3 in srv_worker_thread (arg=0x0) at /home/mleich/10.3-MDEV-22633/storage/innobase/srv/srv0srv.cc:2530
#19 0x00007f328a8f06db in start_thread (arg=0x7f3226afd700) at pthread_create.c:463
#20 0x00007f3289cda88f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
(rr) when
Current event: 477400
(rr) i lo
block = 0x7f31ba7e90d8
ahi_latch = 0x563508b4a890
table = 0x563508b8dcb0
heap = 0x7f320408f980
We must find out how and when the heap above was created, and why it was not invalidated.
(rr) break mem0mem.ic:391
(rr) cond 4 block==0x7f320408f980
(rr) run 400000
…
Thread 28 hit Hardware watchpoint 2: -location ((void**)0x7f3204089ce0)[2978]
Old value = (void *) 0x7f31ba7e90d8
New value = (void *) 0x0
mem_heap_free_block_free (heap=0x7f320408f980) at /home/mleich/10.3-MDEV-22633/storage/innobase/mem/mem0mem.cc:437
437 }
(rr) bt
#0 mem_heap_free_block_free (heap=0x7f320408f980) at /home/mleich/10.3-MDEV-22633/storage/innobase/mem/mem0mem.cc:437
#1 0x00005635050540b2 in mem_heap_empty (heap=0x7f320408f980) at /home/mleich/10.3-MDEV-22633/storage/innobase/include/mem0mem.ic:293
#2 0x000056350505a19d in btr_search_disable (need_mutex=true) at /home/mleich/10.3-MDEV-22633/storage/innobase/btr/btr0sea.cc:468
#3 0x00005635050763ac in buf_pool_resize () at /home/mleich/10.3-MDEV-22633/storage/innobase/buf/buf0buf.cc:2744
#4 0x0000563505077e2f in buf_resize_thread () at /home/mleich/10.3-MDEV-22633/storage/innobase/buf/buf0buf.cc:3201
#5 0x00007f328a8f06db in start_thread (arg=0x7f3217dff700) at pthread_create.c:463
#6 0x00007f3289cda88f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
(rr) when
Current event: 477029
(rr) c
Thread 28 hit Hardware watchpoint 2: -location ((void**)0x7f3204089ce0)[2978]
Old value = (void *) 0x7f31ba7e90d8
New value = (void *) 0x0
mem_heap_free_block_free (heap=0x7f320408f980) at /home/mleich/10.3-MDEV-22633/storage/innobase/mem/mem0mem.cc:437
437 }
(rr) bt
#0 mem_heap_free_block_free (heap=0x7f320408f980) at /home/mleich/10.3-MDEV-22633/storage/innobase/mem/mem0mem.cc:437
#1 0x00005635050540b2 in mem_heap_empty (heap=0x7f320408f980) at /home/mleich/10.3-MDEV-22633/storage/innobase/include/mem0mem.ic:293
#2 0x000056350505a19d in btr_search_disable (need_mutex=true) at /home/mleich/10.3-MDEV-22633/storage/innobase/btr/btr0sea.cc:468
#3 0x00005635050763ac in buf_pool_resize () at /home/mleich/10.3-MDEV-22633/storage/innobase/buf/buf0buf.cc:2744
#4 0x0000563505077e2f in buf_resize_thread () at /home/mleich/10.3-MDEV-22633/storage/innobase/buf/buf0buf.cc:3201
#5 0x00007f328a8f06db in start_thread (arg=0x7f3217dff700) at pthread_create.c:463
#6 0x00007f3289cda88f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
(rr) when
Current event: 477029
(rr) c
Continuing.
[rr 17978 477030]2020-05-19 19:14:23 0 [Note] InnoDB: disabled adaptive hash index.
[rr 17978 477034]2020-05-19 19:14:23 0 [Note] InnoDB: Withdrawing blocks to be shrunken.
[rr 17978 477038]2020-05-19 19:14:23 0 [Note] InnoDB: Latching whole of buffer pool.
[rr 17978 477058]2020-05-19 19:14:23 0 [Note] InnoDB: buffer pool 0 : resizing with chunks 2 to 6.
[rr 17978 477204]2020-05-19 19:14:23 0 [Note] InnoDB: buffer pool 0 : 4 chunks (1536 blocks) were added.
[rr 17978 477208]2020-05-19 19:14:23 0 [Note] InnoDB: Resizing hash tables.
[rr 17978 477212]2020-05-19 19:14:23 0 [Note] InnoDB: buffer pool 0 : hash tables were resized.
[rr 17978 477216]2020-05-19 19:14:23 0 [Note] InnoDB: Resizing also other hash tables.
Thread 28 hit Hardware watchpoint 3: -location dict_sys.table_hash
Old value = (hash_table_t *) 0x563508c65250
New value = (hash_table_t *) 0x563508c652a0
dict_resize () at /home/mleich/10.3-MDEV-22633/storage/innobase/dict/dict0dict.cc:6305
6305 buf_pool_get_curr_size()
(rr) when
Current event: 477227
(rr) c
Continuing.
[rr 17978 477228]2020-05-19 19:14:23 0 [Note] InnoDB: Resized hash tables at lock_sys, adaptive hash index, dictionary.
[rr 17978 477232]2020-05-19 19:14:23 0 [Note] InnoDB: Completed to resize buffer pool from 50331648 to 150994944.
[rr 17978 477244]2020-05-19 19:14:23 0 [Note] InnoDB: Re-enabled adaptive hash index.
[rr 17978 477248]2020-05-19 19:14:23 0 [Note] InnoDB: Completed resizing buffer pool at 200519 19:14:23.
[Switching to Thread 17978.25129]
Thread 24 hit Hardware watchpoint 2: -location ((void**)0x7f3204089ce0)[2978]
Old value = (void *) 0x0
New value = (void *) 0x7f31ba7e90d8
0x00005635050594db in btr_search_check_free_space_in_heap (index=0x563508c68f20)
at /home/mleich/10.3-MDEV-22633/storage/innobase/btr/btr0sea.cc:209
209 heap->free_block = block;
Because the breakpoint was not hit, the heap was created much earlier, because . But, above we can already see the bug:
(rr) list btr0sea.cc:468
463 buf_pool_clear_hash_index();
464
465 /* Clear the adaptive hash index. */
466 for (ulint i = 0; i < btr_ahi_parts; ++i) {
467 hash_table_clear(btr_search_sys->hash_tables[i]);
468 mem_heap_empty(btr_search_sys->hash_tables[i]->heap);
469 }
470
471 btr_search_x_unlock_all();
472 }
The problem is that we should invoke mem_heap_free() and zero out the pointer. When re-enabling the adaptive hash index, we should invoke mem_heap_create() . In its current form, the btr_search_sys->hash_tables are pointing to memory that has not been properly allocated from the buffer pool. It could even point to memory that was released to the operating system when the buffer pool was shrunk.
Basically, we should move some code from btr_search_sys_free() to btr_search_disable() and some from btr_search_sys_create() to btr_search_enable().
Marko Mäkelä
added a comment - As far as I can tell, this bug should affect MySQL 5.7 and 8.0 as well. The fix should involve replacing the following code in btr_search_disable() :
for (ulint i = 0; i < btr_ahi_parts; ++i) {
hash_table_clear(btr_search_sys->hash_tables[i]);
mem_heap_empty(btr_search_sys->hash_tables[i]->heap);
}
Basically, we should move some code from btr_search_sys_free() to btr_search_disable() and some from btr_search_sys_create() to btr_search_enable() .
The patch looks OK to me. I would suggest one more improvement: defer the allocation of btr_search_sys->hash_tables to btr_search_enable(). That is, on startup, invoke btr_search_enable() if the setting is initially enabled.
mleich, please test this also with multiple buffer pool instances.
Marko Mäkelä
added a comment - The patch looks OK to me. I would suggest one more improvement: defer the allocation of btr_search_sys->hash_tables to btr_search_enable() . That is, on startup, invoke btr_search_enable() if the setting is initially enabled.
mleich , please test this also with multiple buffer pool instances.
People
Thirunarayanan Balathandayuthapani
Matthias Leich
Votes:
0Vote for this issue
Watchers:
3Start watching this issue
Dates
Created:
Updated:
Resolved:
Git Integration
Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.
The table2 looks like buf_block_t*. There is no table creation with table2 address.
(rr) p table2
$2 = (dict_table_t *) 0x7f31ba7e90d8
(rr) p *(buf_block_t*) $14
$15 = {page = {id = {m_space = 4294967295, m_page_no = 4294967295},
hash = 0x0, size = {m_physical = 65536, m_logical = 65536,
m_is_compressed = 0}, buf_fix_count = 0, io_fix = BUF_IO_NONE,
state = BUF_BLOCK_MEMORY, flush_type = 0, buf_pool_index = 0, zip = {
data = 0x0, m_start = 0, m_external = false, m_end = 0,
m_nonempty = 0, n_blobs = 0, ssize = 0}, real_size = 0, slot = 0x0,
in_page_hash = 0, in_zip_hash = 0, list = {prev = 0x0, next = 0x0},
in_flush_list = 0, in_free_list = 0, flush_observer = 0x0,
newest_modification = 32952548, oldest_modification = 0, LRU = {
prev = 0x0, next = 0x0}, in_LRU_list = 0, old = 0,
freed_page_clock = 4763, access_time = 421127849,
file_page_was_freed = 0}, frame = 0x7f31bbc10000 "",
lock = {<latch_t> = {
_vptr.latch_t = 0x563505e08a18 <vtable for rw_lock_t+16>,
m_id = LATCH_ID_BUF_BLOCK_LOCK, m_rw_lock = true},
lock_word = 536870912, waiters = 0, sx_recursive = 0,
writer_is_wait_ex = false, writer_thread = 0, event = 0x7f3204033860,
wait_ex_event = 0x7f32040338e0,
cfile_name = 0x5635057ac860 "/home/mleich/10.3-MDEV-22633/storage/innobase/buf/buf0buf.cc",
last_x_file_name = 0x5635056d7f98 "/home/mleich/10.3-MDEV-22633/storage/innobase/ibuf/ibuf0ibuf.cc", cline = 1575, is_block_lock = 1,
last_x_line = 4445, count_os_wait = 0, list = {prev = 0x7f31ba7e92a8,
next = 0x7f31ba7e8fd0}, pfs_psi = 0x0, debug_list = {count = 0,
start = 0x0, end = 0x0, node = &rw_lock_debug_t::list, init = 51966},
level = SYNC_LEVEL_VARYING}, unzip_LRU = {prev = 0x0, next = 0x0},
in_unzip_LRU_list = 0, in_withdraw_list = 0, lock_hash_val = 1235,
modify_clock = 3, n_hash_helps = 0, n_bytes = 0, n_fields = 1,
left_side = true, n_pointers = 0, curr_n_fields = 0, curr_n_bytes = 0,
curr_left_side = 0, index = 0x0, skip_flush_check = false,
debug_latch = {<latch_t> = {
_vptr.latch_t = 0x563505e08a18 <vtable for rw_lock_t+16>,
m_id = LATCH_ID_BUF_BLOCK_LOCK, m_rw_lock = true},
lock_word = 536870912, waiters = 0, sx_recursive = 0,
writer_is_wait_ex = false, writer_thread = 0, event = 0x7f3204033960,
wait_ex_event = 0x7f32040339e0,
cfile_name = 0x5635057ac860 "/home/mleich/10.3-MDEV-22633/storage/innobase/buf/buf0buf.cc", last_x_file_name = 0x56350576c770 "not yet reserved",
cline = 1578, is_block_lock = 0, last_x_line = 0, count_os_wait = 0,
list = {prev = 0x7f31ba7e9470, next = 0x7f31ba7e9198}, pfs_psi = 0x0,
debug_list = {count = 0, start = 0x0, end = 0x0,
node = &rw_lock_debug_t::list, init = 51966},
level = SYNC_LEVEL_VARYING}, mutex = {m_impl = {m_lock_word = 0,
m_event = 0x7f32040337e0,
m_policy = {<MutexDebug<TTASEventMutex<BlockMutexPolicy> >> = {
_vptr.MutexDebug = 0x563505e08ac0 <vtable for BlockMutexPolicy<TTASEventMutex<BlockMutexPolicy> >+16>, m_magic_n = 979585,
m_context = {<latch_t> = {
_vptr.latch_t = 0x563505e08830 <vtable for MutexDebug<TTASEventMutex<BlockMutexPolicy> >::Context+16>, m_id = LATCH_ID_BUF_BLOCK_MUTEX,
m_rw_lock = false}, m_mutex = 0x0, m_filename = 0x0,
m_line = 0, m_thread_id = 18446744073709551615}},
m_count = 0x563508ae80e0, m_id = LATCH_ID_BUF_BLOCK_MUTEX}},
m_ptr = 0x0}}
Page could have re-initialised by the following code:
/* wait for the number of blocks fit to the new size (if needed)*/
for (ulint i = 0; i < srv_buf_pool_instances; i++) {
buf_pool = buf_pool_from_array(i);
if (buf_pool->curr_size < buf_pool->old_size) {
should_retry_withdraw |=
buf_pool_withdraw_blocks(buf_pool);
}
}
buf_page_realloc() initializes the page id with ULINT32_UNDEFINED