|
The issue doesn't have anything to do with the amount of memory actually.
It fails here:
(gdb) wher
|
#0 ha_innobase::open (this=0x7ffe541f1598, name=0x7ffe5407fcd0 "./test/t1") at /home/psergey/dev-git/10.3/storage/innobase/handler/ha_innodb.cc:6105
|
#1 0x0000555555f438af in handler::ha_open (this=0x7ffe541f1598, table_arg=0x7ffe541c51e0, name=0x7ffe5407fcd0 "./test/t1", mode=33, test_if_locked=2, mem_root=0x7ffe540062a0, partitions_to_open=0x0) at /home/psergey/dev-git/10.3/sql/handler.cc:2760
|
#2 0x0000555555f431f9 in handler::clone (this=0x7ffe541c6688, name=0x7ffe5407fcd0 "./test/t1", mem_root=0x7ffe540062a0) at /home/psergey/dev-git/10.3/sql/handler.cc:2663
|
#3 0x000055555616b325 in ha_innobase::clone (this=0x7ffe541c6688, name=0x7ffe5407fcd0 "./test/t1", mem_root=0x7ffe540062a0) at /home/psergey/dev-git/10.3/storage/innobase/handler/ha_innodb.cc:6470
|
#4 0x0000555555de89f0 in DsMrr_impl::setup_two_handlers (this=0x7ffe541c6ad8) at /home/psergey/dev-git/10.3/sql/multi_range_read.cc:1060
|
#5 0x0000555555de8566 in DsMrr_impl::dsmrr_init (this=0x7ffe541c6ad8, h_arg=0x7ffe541c6688, seq_funcs=0x7ffe541c65c8, seq_init_param=0x7ffe5406d420, n_ranges=1, mode=1028, buf=0x7ffe5406cee0) at /home/psergey/dev-git/10.3/sql/multi_range_read.cc:959
|
#6 0x0000555556186480 in ha_innobase::multi_range_read_init (this=0x7ffe541c6688, seq=0x7ffe541c65c8, seq_init_param=0x7ffe5406d420, n_ranges=1, mode=1028, buf=0x7ffe5406cee0) at /home/psergey/dev-git/10.3/storage/innobase/handler/ha_innodb.cc:20419
|
#7 0x000055555676bdc6 in ha_partition::multi_range_read_init (this=0x7ffe541c5e28, seq=0x7ffff016cde0, seq_init_param=0x7ffe541ef170, n_ranges=1, mrr_mode=1028, buf=0x7ffe54261750) at /home/psergey/dev-git/10.3/sql/ha_partition.cc:6467
|
#8 0x00005555560c3d4f in QUICK_RANGE_SELECT::reset (this=0x7ffe541ef170) at /home/psergey/dev-git/10.3/sql/opt_range.cc:11450
|
#9 0x0000555555f374b2 in find_all_keys (thd=0x7ffe54000d60, param=0x7ffff016d080, select=0x7ffe541f03c8, fs_info=0x7ffe54090760, buffpek_pointers=0x7ffff016d280, tempfile=0x7ffff016d110, pq=0x0, found_rows=0x7ffe54090940) at /home/psergey/dev-git/10.3/sql/filesort.cc:763
|
#10 0x0000555555f35a7b in filesort (thd=0x7ffe54000d60, table=0x7ffe541c51e0, filesort=0x7ffe541f0918, tracker=0x7ffe541f0fb8, join=0x7ffe54015530, first_table_bit=1) at /home/psergey/dev-git/10.3/sql/filesort.cc:268
|
#11 0x0000555555ca7a61 in create_sort_index (thd=0x7ffe54000d60, join=0x7ffe54015530, tab=0x7ffe541ed268, fsort=0x7ffe541f0918) at /home/psergey/dev-git/10.3/sql/sql_select.cc:22714
|
#12 0x0000555555ca1d06 in st_join_table::sort_table (this=0x7ffe541ed268) at /home/psergey/dev-git/10.3/sql/sql_select.cc:20480
|
#13 0x0000555555ca18e1 in join_init_read_record (tab=0x7ffe541ed268) at /home/psergey/dev-git/10.3/sql/sql_select.cc:20421
|
#14 0x0000555555c9f679 in sub_select (join=0x7ffe54015530, join_tab=0x7ffe541ed268, end_of_records=false) at /home/psergey/dev-git/10.3/sql/sql_select.cc:19502
|
#15 0x0000555555c9eb65 in do_select (join=0x7ffe54015530, procedure=0x0) at /home/psergey/dev-git/10.3/sql/sql_select.cc:19045
|
#16 0x0000555555c75eba in JOIN::exec_inner (this=0x7ffe54015530) at /home/psergey/dev-git/10.3/sql/sql_select.cc:4044
|
|
|
When the table is opened at the start of the query, the call stack is as follows:
ha_partition::open (this=0x7ffe540261b8, name=0x7ffe540247f0 "./test/t1", mode=2, test_if_locked=18) at /home/psergey/dev-git/10.3/sql/ha_partition.cc:3589
|
this calls create_partition_name() and then calls:
ha_partition::open_read_partitions (this=0x7ffe540261b8, name_buff=0x7ffff016ce60 "./test/t1#P#p1", ...) at /home/psergey/dev-git/10.3/sql/ha_partition.cc:8449
|
This calls
ha_innobase::open (this=0x7ffe5403e078, name=0x7ffff016ce60 "./test/t1#P#p1") ...
|
Note the name including the partition suffix.
|
|
MRR support into ha_partition was added in 10.3, by this patch :
commit 8eeb689e9fc57afe19a8dbff354b5f9f167867a9
|
Author: Monty <monty@mariadb.org> Wed Jul 5 18:20:06 2017
|
Committer: Monty <monty@mariadb.org> Sun Dec 3 14:58:34 2017
|
|
Adding multi_range_read support to partitions
|
|
|
A patch to fix handler::clone to work when the cloned table is a partition:
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
|
index 016de40e3bc..331aafb89dd 100644
|
--- a/storage/innobase/handler/ha_innodb.cc
|
+++ b/storage/innobase/handler/ha_innodb.cc
|
@@ -6467,7 +6467,7 @@ ha_innobase::clone(
|
DBUG_ENTER("ha_innobase::clone");
|
|
ha_innobase* new_handler = static_cast<ha_innobase*>(
|
- handler::clone(name, mem_root));
|
+ handler::clone(m_prebuilt->table->name.m_name, mem_root));
|
|
if (new_handler != NULL) {
|
DBUG_ASSERT(new_handler->m_prebuilt != NULL);
|
The issue was that the passed "name" parameter holds the table name, not the partition name. In InnoDB, we can get the name from the ha_innobase object we are making a clone of.
Another patch to handle cleanup after the MRR scan is done:
(The issue was that if you call ha_innobase_object->multi_range_read_init() it will have inited=RND, not inited=INDEX. ha_partition would see inited!=INDEX, and not call index_end(), and later we assert in ha_innobase::external_unlock())
diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc
|
index 5a78249644d..ec3c0715f45 100644
|
--- a/sql/ha_partition.cc
|
+++ b/sql/ha_partition.cc
|
@@ -5481,6 +5481,13 @@ int ha_partition::index_end()
|
if ((tmp= (*file)->ha_index_end()))
|
error= tmp;
|
}
|
+ else if ((*file)->inited == RND)
|
+ {
|
+ // Possible due to MRR
|
+ int tmp;
|
+ if ((tmp= (*file)->ha_rnd_end()))
|
+ error= tmp;
|
+ }
|
} while (*(++file));
|
destroy_record_priority_queue();
|
DBUG_RETURN(error);
|
|
|
But the issues do not end here. I put a breakpoint in ha_innobase::multi_range_read_init:
Thread 38 "mysqld" hit Breakpoint 17, ha_innobase::multi_range_read_init (this=0x7ffe5402b798, seq=0x7ffe5402b6d8, seq_init_param=0x7ffe54011450, n_ranges=1, mode=1028, buf=0x7ffe540113b8) at /home/psergey/dev-git/10.3/storage/innobase/handler/ ha_innodb.cc:20418
|
$389 = (ha_innobase * const) 0x7ffe5402b798
|
$390 = {buffer = 0x7ffe540378b8 '\245' <repeats 200 times>..., buffer_end = 0x7ffe540778b8 "h4z\025", end_of_used_area = 0x7ffe540378b8 '\245' <repeats 200 times>...}
|
(gdb) c
|
Continuing.
|
...
Thread 38 "mysqld" hit Breakpoint 17, ha_innobase::multi_range_read_init (this=0x7ffe5402bf48, seq=0x7ffe5402b6d8, seq_init_param=0x7ffe54011468, n_ranges=1, mode=1028, buf=0x7ffe540113d0) at /home/psergey/dev-git/10.3/storage/innobase/handler/ ha_innodb.cc:20418
|
$391 = (ha_innobase * const) 0x7ffe5402bf48
|
$392 = {buffer = 0x7ffe540378b8 "\\", buffer_end = 0x7ffe540778b8 "h4z\025", end_of_used_area = 0x7ffe540378b8 "\\"}
|
(gdb) c
|
Continuing.
|
...
Thread 38 "mysqld" hit Breakpoint 17, ha_innobase::multi_range_read_init (this=0x7ffe5402c6f8, seq=0x7ffe5402b6d8, seq_init_param=0x7ffe54011480, n_ranges=1, mode=1028, buf=0x7ffe540113e8) at /home/psergey/dev-git/10.3/storage/innobase/handler/ ha_innodb.cc:20418
|
$393 = (ha_innobase * const) 0x7ffe5402c6f8
|
$394 = {buffer = 0x7ffe540378b8 "`", buffer_end = 0x7ffe540778b8 "h4z\025", end_of_used_area = 0x7ffe540378b8 "`"}
|
One can see that different ha_innobase objects get the same buffer space!
|
|
The query I was debugging was:
select * from t1 force index (key_col) where key_col < 10;
|
the records are not required to be returned in order (and indeed, the execution enters ha_partition::handle_unordered_scan_next_partition).
Still, ha_partitiion code first calls multi_range_range_read_init() for all partitions, then calls multi_range_read_next for them. This means that it can't use the provided buffer space for partition #1, then for partition #2, etc. It should divide the buffer space between the partitions. Each partition will get 1/N -th of buffer space. This will make the MRR scan less efficient.
|
|
As of the issue title, this seems not to be InnoDB-specific - just reproduced the same with MySIAM & Aria on customer request.
|
|
The patch is in bb-10.3-mdev20611 branch
|
|
The fix is pushed into 10.3 branch
|
|
Hi, Sergei,
Many thanks for your quick help on this.
Apologies for the stupid question, is this fix going to 10.4 mainline also?
It seems to be equally affected as 10.3, but ticket only says 10.3 got
updated.
With my best regards,
Assen
On Tue, 2019-11-19 at 11:14 +0000, Sergei Petrunia (Jira) wrote:
[
https://jira.mariadb.org/browse/MDEV-20611?focusedWorklogId=62211&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-62211
]
Sergei Petrunia logged work on MDEV-20611:
------------------------------------------
Author: Sergei Petrunia
Created on: 2019-11-19 11:13
Start Date: 2019-11-19 11:13
Worklog Time Spent: 1d
Issue Time Tracking
-------------------
Time Spent: 2d 1.5h (was: 1d 1.5h)
Worklog Id: (was: 62211)
MRR scan over partitioned InnoDB table produces "Out of memory" error
----------------------------------------------------------------------
Key: MDEV-20611
URL:
https://jira.mariadb.org/browse/MDEV-20611
Project: MariaDB Server
Issue Type: Bug
Components: Partitioning
Affects Versions: 10.3
Reporter: Sergei Petrunia
Assignee: Sergei Petrunia
Priority: Major
Fix For: 10.3.21
Original Estimate: 0d
Time Spent: 2d 1.5h
Remaining Estimate: 0d
Run this testcase
|
--source include/have_partition.inc
|
|
--source include/have_innodb.inc
|
|
create table ten(a int);
|
|
insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
|
create table one_k(a int);
|
|
insert into one_k select A.a + B.a* 10 + C.a * 100 from ten A, ten B, ten C;
|
|
create table t1 (
|
|
ID bigint(20) NOT NULL AUTO_INCREMENT,
|
|
part_id int,
|
|
key_col int,
|
|
col2 int,
|
|
key(key_col),
|
|
PRIMARY KEY (ID,part_id)
|
|
) ENGINE=InnoDB
|
|
PARTITION BY RANGE (part_id)
|
|
(PARTITION p1 VALUES LESS THAN (3) ENGINE = InnoDB,
|
|
PARTITION p2 VALUES LESS THAN (7) ENGINE = InnoDB,
|
|
PARTITION p3 VALUES LESS THAN (10) ENGINE = InnoDB
|
|
);
|
|
insert into t1 select
|
|
A.a+10*B.a,
|
|
A.a,
|
|
B.a,
|
|
123456
|
|
from ten A, ten B;
|
|
set optimizer_switch='mrr=on';
|
|
explain
|
|
select * from t1 force index (key_col) where key_col < 10;
|
|
select * from t1 force index (key_col) where key_col < 10;
|
|
drop table ten,one_k,t1;
|
|
And it will fail with:
|
mysqltest: At line 35: query 'select * from t1 force index (key_col)
|
where key_col < 10' failed: 1041: Out of memory.
|
|
–
This message was sent by Atlassian Jira
(v7.13.1#713001)
–
Assen Totin
Senior Engineer R&D
mariadb.com
<https://www.google.com/url?q=https%3A%2F%2Fmariadb.com%2F&sa=D&sntz=1&usg=AFQjCNFS4fJgsXfpe7F6PINCUzBni5FtsA>
<https://www.google.com/url?q=https%3A%2F%2Ftwitter.com%2Fmariadb&sa=D&sntz=1&usg=AFQjCNERCXZqmq46PUqauiXzqbzGqgx9kQ>
<https://www.google.com/url?q=https%3A%2F%2Fwww.facebook.com%2FMariaDB.dbms%2F&sa=D&sntz=1&usg=AFQjCNF1Dsdcq_ddJzBgHTcTNYMpWunUZQ>
<https://www.google.com/url?q=https%3A%2F%2Fwww.linkedin.com%2Fcompany%2Fmariadb-corporation%2F&sa=D&sntz=1&usg=AFQjCNGxMQaiP0NxDaeYJLxzWpSGQl4z-A>
|
|
Hi assen.totin,
Yes, the fix will be merged into 10.4 tree before the next 10.4 release. I've now set the fixVersion accordingly.
|