|
Here if we rewrite the query to have th entire HAVING clause in the WHERE clause, the crash happens on 10.3 too
|
SELECT a FROM t1 WHERE a = (SELECT MIN(b) FROM t2) AND (a, a) IN (SELECT 'qux', 'qux');
|
|
|
Indeed, and even on 10.2, only on 10.2 and 10.3 the stack trace does not have the frame Charset::set_charset (at least on my machine):
|
10.2 51b7438d
|
#3 <signal handler called>
|
#4 0x00005575f71ddc62 in String::copy (this=0x7f8974017c20, str=...) at /data/src/10.2/sql/sql_string.cc:176
|
#5 0x00005575f73d38e5 in Item_cache_str::cache_value (this=0x7f8974017b18) at /data/src/10.2/sql/item.cc:10086
|
#6 0x00005575f73d41e5 in Item_cache_row::cache_value (this=0x7f8974017578) at /data/src/10.2/sql/item.cc:10209
|
#7 0x00005575f73e4a01 in Item_in_optimizer::val_int (this=0x7f89740174a0) at /data/src/10.2/sql/item_cmpfunc.cc:1588
|
#8 0x00005575f73b6185 in Item::val_bool (this=0x7f89740174a0) at /data/src/10.2/sql/item.cc:112
|
#9 0x00005575f71a7eac in Item::eval_const_cond (this=0x7f89740174a0) at /data/src/10.2/sql/item.h:1190
|
#10 0x00005575f7188680 in Item::remove_eq_conds (this=0x7f89740174a0, thd=0x7f8974000d90, cond_value=0x7f898b7fc0d4, top_level_arg=false) at /data/src/10.2/sql/sql_select.cc:15936
|
#11 0x00005575f7187e2b in Item_cond::remove_eq_conds (this=0x7f8974015a80, thd=0x7f8974000d90, cond_value=0x7f8974015fc8, top_level_arg=true) at /data/src/10.2/sql/sql_select.cc:15759
|
#12 0x00005575f71875d5 in optimize_cond (join=0x7f8974015cb8, conds=0x7f8974015a80, join_list=0x7f8974005268, ignore_on_conds=false, cond_value=0x7f8974015fc8, cond_equal=0x7f89740160f0, flags=1) at /data/src/10.2/sql/sql_select.cc:15492
|
#13 0x00005575f71612fe in JOIN::optimize_inner (this=0x7f8974015cb8) at /data/src/10.2/sql/sql_select.cc:1372
|
#14 0x00005575f71603b8 in JOIN::optimize (this=0x7f8974015cb8) at /data/src/10.2/sql/sql_select.cc:1117
|
#15 0x00005575f71698f6 in mysql_select (thd=0x7f8974000d90, tables=0x7f8974012960, wild_num=0, fields=..., conds=0x7f8974015a80, og_num=0, order=0x0, group=0x0, having=0x0, proc_param=0x0, select_options=2147748608, result=0x7f8974015c98, unit=0x7f8974004988, select_lex=0x7f89740050c8) at /data/src/10.2/sql/sql_select.cc:3822
|
#16 0x00005575f715db00 in handle_select (thd=0x7f8974000d90, lex=0x7f89740048c8, result=0x7f8974015c98, setup_tables_done_option=0) at /data/src/10.2/sql/sql_select.cc:361
|
#17 0x00005575f7128453 in execute_sqlcom_select (thd=0x7f8974000d90, all_tables=0x7f8974012960) at /data/src/10.2/sql/sql_parse.cc:6247
|
#18 0x00005575f711ee02 in mysql_execute_command (thd=0x7f8974000d90) at /data/src/10.2/sql/sql_parse.cc:3558
|
#19 0x00005575f712c1fb in mysql_parse (thd=0x7f8974000d90, rawbuf=0x7f89740126f8 "SELECT a FROM t1 WHERE a = (SELECT MIN(b) FROM t2) AND (a, a) IN (SELECT 'qux', 'qux')", length=86, parser_state=0x7f898b7fd5f0, is_com_multi=false, is_next_command=false) at /data/src/10.2/sql/sql_parse.cc:7762
|
#20 0x00005575f711a4f4 in dispatch_command (command=COM_QUERY, thd=0x7f8974000d90, packet=0x7f8974008b51 "SELECT a FROM t1 WHERE a = (SELECT MIN(b) FROM t2) AND (a, a) IN (SELECT 'qux', 'qux')", packet_length=86, is_com_multi=false, is_next_command=false) at /data/src/10.2/sql/sql_parse.cc:1827
|
#21 0x00005575f7118fef in do_command (thd=0x7f8974000d90) at /data/src/10.2/sql/sql_parse.cc:1381
|
#22 0x00005575f72733ee in do_handle_one_connection (connect=0x5575faf739c0) at /data/src/10.2/sql/sql_connect.cc:1336
|
#23 0x00005575f7273153 in handle_one_connection (arg=0x5575faf739c0) at /data/src/10.2/sql/sql_connect.cc:1241
|
#24 0x00005575f7a9ae86 in pfs_spawn_thread (arg=0x5575faf56d80) at /data/src/10.2/storage/perfschema/pfs.cc:1869
|
#25 0x00007f8995bbc609 in start_thread (arg=<optimized out>) at pthread_create.c:477
|
#26 0x00007f8995798293 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
|
I've updated the affects/fix versions, please feel free to modify the title to something more appropriate.
|
|
Here the where clause after mutiple equalities are created and then constants are propagated
(<in_optimizer>(((subquery#2),(subquery#2)),<exists>(subquery#3)) and multiple equal((subquery#2), t1.a)"
|
|
|
Ok trying to do the equality propagation manually, this query is the same as the above
SELECT a FROM t1 WHERE ((SELECT MIN(b) FROM t2), (SELECT MIN(b) FROM t2)) IN (SELECT 'qux', 'qux');
|
So when i run this query I get
MariaDB [test]> SELECT a FROM t1 WHERE ((SELECT MIN(b) FROM t2), (SELECT MIN(b) FROM t2)) IN (SELECT 'qux', 'qux');
|
ERROR 1235 (42000): This version of MariaDB doesn't yet support 'SUBQUERY in ROW in left expression of IN/ALL/ANY'
|
igor do you think the equality propagation in the example above needs to be done? When I run the query by doing the propagation manually it says that subqueries are not allowed in ROW in left expression of IN/ALL/ANY subquery
|
|
After some analysis I came to the real cause of the problem with the query:
SELECT a FROM t1 WHERE a = (SELECT MIN(b) FROM t2) AND (a, a) IN (SELECT 'qux', 'qux');
|
in 10.2+.
The following diff demonstrates the problematic code. The similar changes are needed for all corresponding functions of item_subselect.cc where forced_const were introduced by Timour in 2012 (see the commit c04786d3e3d4fad53b46604ce37643f3ea4da1fa).
--- a/sql/item_subselect.cc
|
+++ b/sql/item_subselect.cc
|
@@ -1352,7 +1352,11 @@ String *Item_singlerow_subselect::val_str(String *str)
|
{
|
DBUG_ASSERT(fixed == 1);
|
if (forced_const)
|
+ {
|
+ if ((null_value= value->null_value))
|
+ return 0;
|
return value->val_str(str);
|
+ }
|
if (!exec() && !value->null_value)
|
{
|
null_value= FALSE;
|
The original test of Elena should not be added to the patch fixing the bug in 10.2 as no multiple equalities are built in HAVING in 10.2 (due to some bug in check_simple_equality() fixed by Galina in 10.4 in her development for pushdown from HAVING to WHERE). Elena's test case should be added after I fix MDEV-23983.
|
|
Patch
http://lists.askmonty.org/pipermail/commits/2021-January/014434.html
|
|
When executing the query
|
SELECT a FROM t1 WHERE a = (SELECT MIN(b) FROM t2) AND (a, a) IN (SELECT 'qux', 'qux');
|
at the optimization stage substitute (a,a) for (sq,sq) where sq the item created for the subquery
(SELECT MIN(b) FROM t2)ive
|
As the condition (sq, sql) IN (SELECT 'qux', 'qux') is a constant and inexpensive remove_eq_cond tries to evaluate it. When doing this it tries to cache the value of the row (sq,sq). Here it fails after having successfully evaluated the first occurrence of sq. For the second occurrence it uses the value of the first occurrence but does not update properly the flag null_value set previously to false by the call of Item_singlerow_subselect::bring_value() for the first occurrence of sq. This causes a crash in the call of Item_singlerow_subselect::cache_value() for the second occurrence of sq.
Interesting enough that one can't reproduce the problem when sq is encountered not in a row.
|
|
If we fill the table t2 with several rows
INSERT INTO t2 VALUES ('xxx'), ('yyy');
|
INSERT INTO t2 SELECT * FROM t2;
|
INSERT INTO t2 SELECT * FROM t2;
|
and slightly change the query for:
SELECT a FROM t1 WHERE a = (SELECT MIN(b) FROM t2 WHERE b='aaa') AND (a, a) IN (SELECT
|
'qux', 'qux');
|
the problem persists as far the subquery sq remains inexpensive.
Yet if we add more records to t2
INSERT INTO t2 SELECT * FROM t2;
|
INSERT INTO t2 SELECT * FROM t2;
|
INSERT INTO t2 SELECT * FROM t2;
|
INSERT INTO t2 SELECT * FROM t2;
|
INSERT INTO t2 SELECT * FROM t2;
|
the query finishes successfully.
In this case no multiple equality is built and no substitution into the row (a,a) are done.
|
|
This fix for the problem looks better and more adequate as the one suggested above as it touches the code only of one function Item_singlerow_subselect::bring_value().
void Item_singlerow_subselect::bring_value()
|
{
|
+ if (forced_const)
|
+ return;
|
if (!exec() && assigned())
|
- null_value= 0;
|
+ null_value= forced_const && max_columns == 1 ? value->null_value : 0;
|
else
|
reset();
|
}
|
The patch shows that the problem is actually with this function, not with val_* methods of Item_singlerow_subselect.
It's not clear how to fix the problem for multi-column single-row subselects. However I failed to build a legitimate query with multi-column single-row subselect that would demonstrate the problem
|
|
It looks like that he problem could be resolved by evaluating the subquery sq when we build Item_equal for the equality. I did not check this variant though.
|
|
This bug concerns queries that hardly can be expected in applications.
|
|
Patch
http://lists.askmonty.org/pipermail/commits/2021-January/014439.html
|
|
It looks like that Item_cache_row::bring_value() has lines that can be removed without causing any problem:
--- a/sql/item.cc
|
+++ b/sql/item.cc
|
@@ -10298,8 +10298,6 @@ void Item_cache_row::bring_value()
|
return;
|
example->bring_value();
|
null_value= example->null_value;
|
- for (uint i= 0; i < item_count; i++)
|
- values[i]->bring_value();
|
}
|
They became unneeded after the following commit:
commit da7646b64c7e76930e05c0235c724872416ba1dc
|
Author: Alexey Kopytov <Alexey.Kopytov@Sun.com> Thu Sep 9 07:44:53 2010
|
Committer: Alexey Kopytov <Alexey.Kopytov@Sun.com> Thu Sep 9 07:44:53 2010
|
|
|
Interesting that the following patch also fixes the problem:
--- a/sql/item_subselect.cc
|
+++ b/sql/item_subselect.cc
|
@@ -1308,8 +1308,18 @@ bool Item_singlerow_subselect::null_inside()
|
|
void Item_singlerow_subselect::bring_value()
|
{
|
+ if (!unit->uncacheable && engine->is_executed())
|
+ return;
|
if (!exec() && assigned())
|
+ {
|
null_value= 0;
|
+ if (!unit->uncacheable)
|
+ {
|
+ null_value= 1;
|
+ for (uint i= 0; i < max_columns ;i++)
|
+ null_value&= row[i]->null_value;
|
+ }
|
+ }
|
else
|
reset();
|
}
|
This patch does not call exec() for a cacheable subquery if it has been already executed.
It looks like we still have some inconsistency in how we set null_value flags for Item_row objects and Item_cache_row objects
|
|
Made a couple of patch With the help of monty.
Patch1:
https://github.com/MariaDB/server/commit/d6782fcded7cfe5f561359124153d5deaba28ec9
Patch2:
https://github.com/MariaDB/server/commit/a87320e335b6345f1ce04eea8b790a741c1ec23e
|
|
I continued investigating the problem and came to the conclusion that it is a result of different interpretations of the null_value flag for row values. Some code assumes that the flag null_value for a row is set to true if every component of the row has null_value flag set to true (see, for example, the code that compares two row values). In other pieces of code the flag is set to true if there is a component with its null_value flag set to true:
bool Item_cache_row::cache_value()
|
{
|
if (!example)
|
return FALSE;
|
value_cached= TRUE;
|
null_value= 0;
|
example->bring_value();
|
for (uint i= 0; i < item_count; i++)
|
{
|
values[i]->cache_value();
|
null_value|= values[i]->null_value;
|
}
|
return TRUE;
|
}
|
Another function that uses such interpretation of the null_value flag in row items is Item_in_optimizer::val_int().
In the following patch I tried to make sure that we use only the first interpretation of the null_value flag in row item (it seems to me that was the original one).
diff --git a/sql/item.cc b/sql/item.cc
|
index e633964..36b7a3e 100644
|
--- a/sql/item.cc
|
+++ b/sql/item.cc
|
@@ -10218,12 +10218,13 @@ bool Item_cache_row::cache_value()
|
if (!example)
|
return FALSE;
|
value_cached= TRUE;
|
- null_value= 0;
|
example->bring_value();
|
+ null_value= 1;
|
for (uint i= 0; i < item_count; i++)
|
{
|
+ values[i]->bring_value();
|
values[i]->cache_value();
|
- null_value|= values[i]->null_value;
|
+ null_value&= values[i]->null_value;
|
}
|
return TRUE;
|
}
|
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
|
index d16c741..ecaeb39 100644
|
--- a/sql/item_cmpfunc.cc
|
+++ b/sql/item_cmpfunc.cc
|
@@ -1596,7 +1596,7 @@ longlong Item_in_optimizer::val_int()
|
DBUG_RETURN(res);
|
}
|
|
- if (cache->null_value)
|
+ if (cache->null_value || cache->null_inside())
|
{
|
DBUG_PRINT("info", ("Left NULL..."));
|
/*
|
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc
|
index 802bfca..56b3b1b 100644
|
--- a/sql/item_subselect.cc
|
+++ b/sql/item_subselect.cc
|
@@ -831,7 +831,8 @@ bool Item_subselect::expr_cache_is_needed(THD *thd)
|
|
inline bool Item_in_subselect::left_expr_has_null()
|
{
|
- return (*(optimizer->get_cache()))->null_value;
|
+ return (*(optimizer->get_cache()))->null_value ||
|
+ (*(optimizer->get_cache()))->null_inside();
|
}
|
|
|
@@ -1299,7 +1300,17 @@ bool Item_singlerow_subselect::null_inside()
|
void Item_singlerow_subselect::bring_value()
|
{
|
if (!exec() && assigned())
|
- null_value= 0;
|
+ {
|
+ null_value= 1;
|
+ for (uint i= 0; i < max_columns ; i++)
|
+ {
|
+ if (!row[i]->null_value)
|
+ {
|
+ null_value= 0;
|
+ return;
|
+ }
|
+ }
|
+ }
|
else
|
reset();
|
}
|
All tests passed with this patch (as well as the reported test case of course). Yet I'm not sure that I covered all pieces of code where the interpretations of the null_value flag in rows differed.
|