Details
Description
Since 2020-08-24 unit.lf test frequently fails in buildbot on aarch64, and a few times on ppc64le.
This is occurring after the attempted fix in MDEV-27088. The unit.lf test now runs sufficient iterations to frequently catch out its faulty implementation.
An example of a stalled test:
gdb of lf-t stalled on aarch64 from 10.9-43fa8e0b8f3bae1ff8493cfd3adb39443da6a809 |
(gdb) directory /source
|
Source directories searched: /source:$cdir:$cwd
|
(gdb) thread apply all bt -frame-arguments all full
|
|
Thread 2 (Thread 0xffff23fff120 (LWP 7523) "lf-t"):
|
#0 lf_pinbox_real_free (pins=0xffff980017d8) at /home/mdborg/mariadb-server-10.8/mysys/lf_alloc-pin.c:376
|
a = 0xffff23ffe3d8
|
b = 0xffff23ffe3f0
|
c = 0xffff23ffe3e0
|
cur = 0xffff34001518
|
npins = 25
|
list = 0xffff34001518
|
addr = 0xffff23ffe390
|
first = 0xfffff74f8aaf
|
last = 0xffff34001518
|
var = <optimized out>
|
stack_ends_here = <optimized out>
|
pinbox = 0xaaaac84743f8 <lf_allocator>
|
#1 0x0000aaaac811963c in lf_pinbox_free (pins=pins@entry=0xffff980017d8, addr=addr@entry=0xffff68001ea8) at /home/mdborg/mariadb-server-10.8/mysys/lf_alloc-pin.c:271
|
No locals.
|
#2 0x0000aaaac8116874 in test_lf_alloc (arg=<optimized out>) at /home/mdborg/mariadb-server-10.8/unittest/mysys/lf-t.c:90
|
node1 = 0xffff68001ea8
|
node2 = 0xffff28001db8
|
m = 9409
|
x = <optimized out>
|
y = 0
|
pins = 0xffff980017d8
|
#3 0x0000ffff9fdad5c8 in start_thread (arg=0x0) at ./nptl/pthread_create.c:442
|
ret = <optimized out>
|
pd = 0x0
|
out = <optimized out>
|
unwind_buf = {cancel_jmp_buf = {{jmp_buf = {281471285719328, 281474830928560, 281474830928558, 8448352, 281474830928559, 0, 281471277268992, 8448352, 281473365200928, 281471277268992, 281471285717056, 4514424196649599986, 0, 4514424198325567406, 0, 0, 0, 0, 0, 0, 0, 0}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
|
not_first_call = 0
|
--Type <RET> for more, q to quit, c to continue without paging--
|
#4 0x0000ffff9fe15d1c in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:79
|
No locals.
|
|
Thread 1 (Thread 0xffff9ff25020 (LWP 7491) "lf-t"):
|
#0 __futex_abstimed_wait_common64 (private=128, cancel=true, abstime=0x0, op=265, expected=7523, futex_word=0xffff23fff1f0) at ./nptl/futex-internal.c:57
|
_x3tmp = 0
|
_x0tmp = 281471285719536
|
_x0 = 281471285719536
|
_x3 = 0
|
_x4tmp = 0
|
_x1tmp = 265
|
_x1 = 265
|
_x4 = 0
|
_x5tmp = 4294967295
|
_x2tmp = 7523
|
_x2 = 7523
|
_x5 = 4294967295
|
_x8 = 98
|
_sys_result = <optimized out>
|
sc_cancel_oldtype = 0
|
sc_ret = <optimized out>
|
_sys_result = <optimized out>
|
_x5tmp = <optimized out>
|
_x4tmp = <optimized out>
|
_x3tmp = <optimized out>
|
_x2tmp = <optimized out>
|
_x1tmp = <optimized out>
|
_x0tmp = <optimized out>
|
_x0 = <optimized out>
|
_x1 = <optimized out>
|
_x2 = <optimized out>
|
_x3 = <optimized out>
|
--Type <RET> for more, q to quit, c to continue without paging--c
|
_x4 = <optimized out>
|
_x5 = <optimized out>
|
_x8 = <optimized out>
|
#1 __futex_abstimed_wait_common (cancel=true, private=128, abstime=0x0, clockid=0, expected=7523, futex_word=0xffff23fff1f0) at ./nptl/futex-internal.c:87
|
err = <optimized out>
|
clockbit = 256
|
op = 265
|
err = <optimized out>
|
clockbit = <optimized out>
|
op = <optimized out>
|
#2 __GI___futex_abstimed_wait_cancelable64 (futex_word=futex_word@entry=0xffff23fff1f0, expected=7523, clockid=clockid@entry=0, abstime=abstime@entry=0x0, private=private@entry=128) at ./nptl/futex-internal.c:139
|
No locals.
|
#3 0x0000ffff9fdaef2c in __pthread_clockjoin_ex (threadid=281471285719328, thread_return=thread_return@entry=0x0, clockid=clockid@entry=0, abstime=abstime@entry=0x0, block=block@entry=true) at ./nptl/pthread_join_common.c:105
|
ret = <optimized out>
|
_buffer = {__routine = 0xffff9fdaedb0 <cleanup>, __arg = 0xffff23fff548, __canceltype = -1611481088, __prev = 0x0}
|
tid = <optimized out>
|
pd = 0xffff23fff120
|
self = <optimized out>
|
result = 0
|
pd_result = <optimized out>
|
#4 0x0000ffff9fdaedb0 in ___pthread_join (threadid=<optimized out>, thread_return=thread_return@entry=0x0) at ./nptl/pthread_join.c:24
|
No locals.
|
#5 0x0000aaaac8116c54 in test_concurrently (test=test@entry=0xaaaac8157e10 "lf_alloc (with my_thread_init)", handler=handler@entry=0xaaaac8116800 <test_lf_alloc>, n=n@entry=30, m=<optimized out>, m@entry=30000) at /home/mdborg/mariadb-server-10.8/unittest/mysys/thr_template.c:46
|
threads = 0xaaaae03e27a0
|
i = 1
|
now = 3568672611079524
|
#6 0x0000aaaac8116da0 in do_tests () at /home/mdborg/mariadb-server-10.8/unittest/mysys/lf-t.c:188
|
No locals.
|
#7 0x0000aaaac81165ec in main (argc=<optimized out>, argv=<optimized out>) at /home/mdborg/mariadb-server-10.8/unittest/mysys/thr_template.c:67
|
No locals.
|
(gdb) p *lf_allocator
|
Structure has no component named operator*.
|
(gdb) p lf_allocator
|
$1 = {pinbox = {pinarray = {level = {0xffff98000d10, 0x0, 0x0, 0x0}, size_of_element = 184}, free_func = 0xaaaac81193e4 <alloc_free>, free_func_arg = 0xaaaac84743f8 <lf_allocator>,
|
free_ptr_offset = 0, pinstack_top_ver = 2006515725, pins_in_array = 30}, top = 0xffff48004cc8 "x\033", element_size = 8, mallocs = 0, constructor = 0x0, destructor = 0x0}
|
(gdb) p lf_hash
|
$2 = {array = {level = {0x0, 0x0, 0x0, 0x0}, size_of_element = 8}, alloc = {pinbox = {pinarray = {level = {0x0, 0x0, 0x0, 0x0}, size_of_element = 184},
|
free_func = 0xaaaac81193e4 <alloc_free>, free_func_arg = 0xaaaac8474358 <lf_hash+40>, free_ptr_offset = 8, pinstack_top_ver = 0, pins_in_array = 0}, top = 0x0, element_size = 36,
|
mallocs = 0, constructor = 0x0, destructor = 0x0}, get_key = 0x0, initializer = 0xaaaac8119c24 <default_initializer(LF_HASH*, void*, void const*)>,
|
hash_function = 0xaaaac8119bf0 <calc_hash(CHARSET_INFO*, uchar const*, size_t)>, charset = 0xaaaac83ff888 <my_charset_bin>, key_offset = 0, key_length = 4, element_size = 4, flags = 1,
|
size = 1, count = 0}
|
mbeck, svoj, if you have a moment/interest, can you please check the implementation again.
Attachments
Issue Links
- relates to
-
MDEV-12897 unit.lf failed in buildbot
- Closed
-
MDEV-27088 Server crash on ARM (WMM architecture) due to missing barriers in lf-hash
- Closed
-
MDEV-31151 Inaccurate stack size caculation caused stack overflow in pinbox allocator
- Closed