Details
-
Bug
-
Status: Closed (View Workflow)
-
Blocker
-
Resolution: Fixed
-
10.4.14, 10.2(EOL), 10.3(EOL), 10.5
-
None
Description
A customer is experiencing crashes when an ETL query is executed.
See stack trace and details.
Attachments
Issue Links
- relates to
-
MDEV-22556 Incorrect result for window function when using encrypt-tmp-files=ON
-
- Closed
-
-
MDEV-24045 Swtich to use mysql_file_pread/pwrite instead of mysql_file_read/write while reading and writing to IO_CACHE wherever feasible
-
- Open
-
Activity
reproducible test case, with encrypt-tmp-files=ON
set sort_buffer_size= 2000; |
create table t1( a DECIMAL(12,0) DEFAULT NULL, b VARCHAR(20) DEFAULT NULL, c DECIMAL(12,0) DEFAULT NULL)engine=INNODB; |
insert into t1 select seq, seq, seq from seq_1_to_5000; |
select count(*) from (select a, b, c, sum(b) OVER (PARTITION BY a) FROM t1)q; |
The stacktrace for the failure on debug build
Program terminated with signal SIGABRT, Aborted.
|
#0 __pthread_kill (threadid=<optimized out>, signo=6) at ../sysdeps/unix/sysv/linux/pthread_kill.c:56
|
56 ../sysdeps/unix/sysv/linux/pthread_kill.c: No such file or directory.
|
[Current thread is 1 (Thread 0x7fad85c67700 (LWP 240685))]
|
#0 __pthread_kill (threadid=<optimized out>, signo=6) at ../sysdeps/unix/sysv/linux/pthread_kill.c:56
|
#1 0x000055fb818aaa89 in my_write_core (sig=6) at /home/varun/MariaDB/10.4/mysys/stacktrace.c:386
|
#2 0x000055fb80f35f5f in handle_fatal_signal (sig=6) at /home/varun/MariaDB/10.4/sql/signal_handler.cc:343
|
#3 <signal handler called>
|
#4 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
|
#5 0x00007fad8caa3859 in __GI_abort () at abort.c:79
|
#6 0x00007fad8caa3729 in __assert_fail_base (fmt=0x7fad8cc39588 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x55fb81a17768 "pos_in_file % info->buffer_length == 0", file=0x55fb81a17728 "/home/varun/MariaDB/10.4/sql/mf_iocache_encr.cc", line=82, function=<optimized out>) at assert.c:92
|
#7 0x00007fad8cab4f36 in __GI___assert_fail (assertion=0x55fb81a17768 "pos_in_file % info->buffer_length == 0", file=0x55fb81a17728 "/home/varun/MariaDB/10.4/sql/mf_iocache_encr.cc", line=82, function=0x55fb81a17790 "int my_b_encr_read(IO_CACHE*, uchar*, size_t)") at assert.c:101
|
#8 0x000055fb80dd0f25 in my_b_encr_read (info=0x7fad3013dfa8, Buffer=0x7fad30135398 "\230\177\034\060\255\177", Count=8) at /home/varun/MariaDB/10.4/sql/mf_iocache_encr.cc:82
|
#9 0x000055fb818813cf in _my_b_read (info=0x7fad3013dfa8, Buffer=0x7fad30135398 "\230\177\034\060\255\177", Count=8) at /home/varun/MariaDB/10.4/mysys/mf_iocache.c:577
|
#10 0x000055fb810ef848 in my_b_read (info=0x7fad3013dfa8, Buffer=0x7fad30135398 "\230\177\034\060\255\177", Count=8) at /home/varun/MariaDB/10.4/include/my_sys.h:532
|
#11 0x000055fb810f0782 in rr_from_tempfile (info=0x7fad85c64c20) at /home/varun/MariaDB/10.4/sql/records.cc:499
|
#12 0x000055fb80b1d00d in READ_RECORD::read_record (this=0x7fad85c64c20) at /home/varun/MariaDB/10.4/sql/records.h:70
|
#13 0x000055fb80e174ee in compute_window_func (thd=0x7fad30000d90, window_functions=..., cursor_managers=..., tbl=0x7fad30061ed8, filesort_result=0x7fad3013df80) at /home/varun/MariaDB/10.4/sql/sql_window.cc:2823
|
#14 0x000055fb80e17ae3 in Window_func_runner::exec (this=0x7fad30069cf8, thd=0x7fad30000d90, tbl=0x7fad30061ed8, filesort_result=0x7fad3013df80) at /home/varun/MariaDB/10.4/sql/sql_window.cc:2968
|
#15 0x000055fb80e17bd6 in Window_funcs_sort::exec (this=0x7fad30069cf0, join=0x7fad30016f78, keep_filesort_result=true) at /home/varun/MariaDB/10.4/sql/sql_window.cc:2996
|
#16 0x000055fb80e18186 in Window_funcs_computation::exec (this=0x7fad30019410, join=0x7fad30016f78, keep_last_filesort_result=true) at /home/varun/MariaDB/10.4/sql/sql_window.cc:3123
|
#17 0x000055fb80c67f66 in AGGR_OP::end_send (this=0x7fad30019080) at /home/varun/MariaDB/10.4/sql/sql_select.cc:28780
|
#18 0x000055fb80c508df in sub_select_postjoin_aggr (join=0x7fad30016f78, join_tab=0x7fad30069490, end_of_records=true) at /home/varun/MariaDB/10.4/sql/sql_select.cc:20182
|
#19 0x000055fb80c50c52 in sub_select (join=0x7fad30016f78, join_tab=0x7fad300690e8, end_of_records=true) at /home/varun/MariaDB/10.4/sql/sql_select.cc:20417
|
#20 0x000055fb80c503b8 in do_select (join=0x7fad30016f78, procedure=0x0) at /home/varun/MariaDB/10.4/sql/sql_select.cc:20008
|
#21 0x000055fb80c24ddf in JOIN::exec_inner (this=0x7fad30016f78) at /home/varun/MariaDB/10.4/sql/sql_select.cc:4496
|
#22 0x000055fb80c23f0e in JOIN::exec (this=0x7fad30016f78) at /home/varun/MariaDB/10.4/sql/sql_select.cc:4278
|
#23 0x000055fb80c25662 in mysql_select (thd=0x7fad30000d90, tables=0x7fad30014a30, wild_num=0, fields=..., conds=0x0, og_num=0, order=0x0, group=0x0, having=0x0, proc_param=0x0, select_options=2416184064, result=0x7fad30016e90, unit=0x7fad30015118, select_lex=0x7fad30013ba8) at /home/varun/MariaDB/10.4/sql/sql_select.cc:4713
|
#24 0x000055fb80b78e30 in mysql_derived_fill (thd=0x7fad30000d90, lex=0x7fad30004c00, derived=0x7fad30015950) at /home/varun/MariaDB/10.4/sql/sql_derived.cc:1255
|
#25 0x000055fb80b75de4 in mysql_handle_single_derived (lex=0x7fad30004c00, derived=0x7fad30015950, phases=96) at /home/varun/MariaDB/10.4/sql/sql_derived.cc:206
|
#26 0x000055fb80c3e8bd in st_join_table::preread_init (this=0x7fad3006af88) at /home/varun/MariaDB/10.4/sql/sql_select.cc:13562
|
#27 0x000055fb80c50cdc in sub_select (join=0x7fad30016920, join_tab=0x7fad3006af88, end_of_records=false) at /home/varun/MariaDB/10.4/sql/sql_select.cc:20434
|
#28 0x000055fb80c5034e in do_select (join=0x7fad30016920, procedure=0x0) at /home/varun/MariaDB/10.4/sql/sql_select.cc:20006
|
#29 0x000055fb80c24ddf in JOIN::exec_inner (this=0x7fad30016920) at /home/varun/MariaDB/10.4/sql/sql_select.cc:4496
|
#30 0x000055fb80c23f0e in JOIN::exec (this=0x7fad30016920) at /home/varun/MariaDB/10.4/sql/sql_select.cc:4278
|
#31 0x000055fb80c25662 in mysql_select (thd=0x7fad30000d90, tables=0x7fad30015950, wild_num=0, fields=..., conds=0x0, og_num=0, order=0x0, group=0x0, having=0x0, proc_param=0x0, select_options=2147748608, result=0x7fad300168f8, unit=0x7fad30004cc0, select_lex=0x7fad30013548) at /home/varun/MariaDB/10.4/sql/sql_select.cc:4713
|
Patch addressing part of the problem
http://lists.askmonty.org/pipermail/commits/2020-October/014343.html
Ok, so I see two issues that have caused this MDEV.
Both are caused by two features not taking each other into account:
- There are special kinds of IO_CACHE objects that read (and write) encrypted data.
- Window functions code uses init_slave_io_cache() to create "slave" IO caches that read the same data.
Issue #1: memory allocation.
An IO_CACHE that uses encryption uses a larger buffer (it needs space for the encrypted data, decrypted data, IO_CACHE_CRYPT struct to describe encryption parameters etc).
The logic to do this is here in init_io_cache():
if (type == SEQ_READ_APPEND) |
buffer_block *= 2;
|
else if (cache_myflags & MY_ENCRYPT) |
buffer_block= 2*(buffer_block + MY_AES_BLOCK_SIZE) + sizeof(IO_CACHE_CRYPT); |
...
|
if ((info->buffer= (uchar*) my_malloc(buffer_block, flags)) != 0) |
init_slave_io_cache() doesn't know about this. It allocates memory like so:
if (!(slave_buf= (uchar*)my_malloc(master->buffer_length, MYF(0)))) |
This is why we get a memory overrun.
Varun's patch tries to fix this by copying the logic:
- if (!(slave_buf= (uchar*)my_malloc(master->buffer_length, MYF(0))))
|
+ buffer_length= master->buffer_length;
|
+ if (master->type == SEQ_READ_APPEND)
|
+ buffer_length *= 2;
|
+ else if (master->myflags & MY_ENCRYPT)
|
+ buffer_length= 2*(buffer_length + MY_AES_BLOCK_SIZE) + sizeof(IO_CACHE_CRYPT);
|
+ |
I would suggest a dedicated member saying how much was allocated.
e.g. change IO_CACHE::alloced_buffer from bool to size_t.
Issue #2: IO_CACHE::seek_not_done
When IO_CACHE objects are cloned, they still share the file descriptor.
This means, operation on one IO_CACHE may change the file read position which will confuse other IO_CACHEs using it.
In order to avoid this, there is IO_CACHE::next_file_user which is a circular list of objects using the same file. If an IO_CACHE operates on the file, it it sets IO_CACHE::seek_not_done=1 for every other IO_CACHE in the list.
_my_b_cache_read has pieces like:
if (info->next_file_user) |
{
|
IO_CACHE *c;
|
for (c= info->next_file_user; |
c!= info;
|
c= c->next_file_user)
|
{
|
c->seek_not_done= 1;
|
}
|
}
|
But my_b_encr_read doesn't do this. This needs to be fixed.
varun but you've also mentioned there was a 3rd problem? I can't observe it on my side- when I apply the attached patch, the example works correctly for me.
psergey the problem you mentioned in Issue #2 is the one I was talking about.
I added a hack in the patch to do it
diff --git a/sql/sql_window.cc b/sql/sql_window.cc
|
index abb3937096f..6fc2f65b299 100644 |
--- a/sql/sql_window.cc
|
+++ b/sql/sql_window.cc
|
@@ -2865,6 +2865,8 @@ bool compute_window_func(THD *thd, |
save_window_function_values(window_functions, tbl, rowid_buf);
|
|
rownum++;
|
+ if (info.io_cache) |
+ info.io_cache->seek_not_done= true; |
}
|
So your approach covers it
Draft patch: https://github.com/MariaDB/server/commit/e877e752ba2c0f107d4732301dc4a41d2d57a79d .
Input provided over email.
Two concerns about the write -> pwrite conversion:
Concern #1:
There are two calls in mf_iocache.c left which depend on the file's current position:
mf_iocache.c|1707| if (mysql_file_write(info->file,Buffer, length, info->myflags | MY_NABP))
|
mf_iocache.c|1828| if (mysql_file_write(info->file, info->write_buffer, length,
|
Imagine the following situation:
Execution before this patch:
// open file for writing
|
mysql_file_write(...) // A call made at some other point than the above two calls
|
mysql_file_write(...) // One of the above two calls.
|
After the patch this becomes:
// open file for writing
|
mysql_file_pwrite(...) // Note that this doesn't change the file position
|
mysql_file_write(...) // One of the above two calls.
|
// ^ the above writes at "current position" which is 0, so the write goes not
|
// where it was intended.
|
Is there any logic which would allow to say that the above will not happen?
Concern #2 is this code in mi_check.c:
old_file=info->dfile;
|
info->dfile=info->rec_cache.file;
|
Does the code that follows this assume that info->dfile is seeked to some particular offset?
if (sort_info->current_key) |
{
|
key=info->lastkey+info->s->base.max_key_length;
|
if ((error=(*info->s->read_rnd)(info,sort_param->record,info->lastpos,0)) && |
error != HA_ERR_RECORD_DELETED)
|
{
|
mi_check_print_error(param,"Can't read record to be removed"); |
info->dfile=old_file;
|
DBUG_RETURN(1);
|
}
|
|
for (i=0 ; i < sort_info->current_key ; i++) |
{
|
uint key_length=_mi_make_key(info,i,key,sort_param->record,info->lastpos);
|
if (_mi_ck_delete(info,i,key,key_length)) |
{
|
mi_check_print_error(param,"Can't delete key %d from record to be removed",i+1); |
info->dfile=old_file;
|
DBUG_RETURN(1);
|
}
|
}
|
if (sort_param->calc_checksum) |
param->glob_crc-=(*info->s->calc_checksum)(info, sort_param->record);
|
}
|
error=flush_io_cache(&info->rec_cache) || (*info->s->delete_record)(info);
|
info->dfile=old_file; /* restore actual value */ |
.. and I have the same question about sort_delete_record() in storage/maria/ma_check.c.
It also has
row_info->dfile.file= row_info->rec_cache.file;
|
if (flush_io_cache(&row_info->rec_cache)) |
DBUG_RETURN(1);
|
...
|
Before the patch, row_info->rec_cache.file was "seeked" to somewhere, now it is not.
Any thoughts? If it is not immediately clear, let's ask Monty (this is very old code so I dont think anybody else rememers).
No problem with mi_check or ma_check(), They are taking the file descriptor of the newly re-created data file and removing a duplicate row from it. The remove-row code is using pread/pwrite and thus is not affected by the position of the IO_CACHE file descriptor and will not affect the IO_CACHE code
Take-aways from yesterday discussion:
- We cannot change mysql_file_read -> mysql_file_pread everywhere (as sometimes IO_CACHE is used to read from FIFOs)
- The impact of the "use pread" approach is much larger than we anticipated.
Due to this:
- Lets switch to the older version of the patch, with seeks and updating seek_not_done for other slaves.
- Then, we will make the larger patch which uses pread where appropriate and push that to a development version.
Ok to push the "conservative" fix for aec8072f99ef4b1a4a966692a1cb761abde96cf0 .
varun please file an MDEV for the "complete" fix and link with this one
rpizzi can you also attach the global variables list