[MDEV-25362] Incorrect name resolution for subqueries in ON expressions Created: 2021-04-07  Updated: 2021-04-22  Resolved: 2021-04-19

Status: Closed
Project: MariaDB Server
Component/s: Server
Affects Version/s: 10.2, 10.3, 10.4, 10.5, 10.6
Fix Version/s: 10.2.38, 10.3.29, 10.4.19, 10.5.10, 10.6.0

Type: Bug Priority: Major
Reporter: Sergei Petrunia Assignee: Igor Babaev
Resolution: Fixed Votes: 0
Labels: None

Issue Links:
Relates
relates to MDEV-25346 Server crashes in Item_field::fix_out... Closed

 Description   

(I've found this issue when fixing MDEV-25202. the fix is a part of MDEV-25202 fix )
ON expressions inside a join nest cannot refer to tables outside the nest. This can be "circumvented" by putting the outside reference into a subquery.

create table t1 (a int, b int);
create table t2 (c int, d int);
create table t3 (e int, f int);
create table t4 (g int, h int);

A reference to t1.a in the ON expression produces an error, as expected:

explain 
select * 
from 
  t1 left join 
   (t2 
    join
    t3 on 
    (t3.f=t1.a)
   ) on (t2.c=t1.a );

ERROR 1054 (42S22): Unknown column 't1.a' in 'on clause'

A reference from a subquery does not produce an error:

explain 
select * 
from 
  t1 left join 
   (t2 
    join 
    t3 on 
    (t3.f=(select max(g) from t4 where t4.h=t1.a))
   ) on (t2.c=t1.a );

This is wrong/



 Comments   
Comment by Sergei Petrunia [ 2021-04-07 ]

http://lists.askmonty.org/pipermail/commits/2021-April/014539.html

Comment by Igor Babaev [ 2021-04-07 ]

Sergey,

The above fix should be applied to 10.4 (without test cases). The test cases must be added to the corresponding fix that would be good for 10.2, 10.3.

Comment by Oleksandr Byelkin [ 2021-04-08 ]

OK

Comment by Sergei Petrunia [ 2021-04-08 ]

The above suggested fix for this issue causes MDEV-25346. Apply the patch for MDEV-25346 together with the above fix.

Comment by Oleksandr Byelkin [ 2021-04-16 ]

OK as a fix for 10.6

Comment by Oleksandr Byelkin [ 2021-04-16 ]

the patch for 10.2 is OK to push

Comment by Igor Babaev [ 2021-04-19 ]

Here's the diff for those who will merge the patch into 10.4:

diff --git a/mysql-test/main/join_outer.result b/mysql-test/main/join_outer.result
index c92b8e6..362335a 100644
--- a/mysql-test/main/join_outer.result
+++ b/mysql-test/main/join_outer.result
@@ -2687,6 +2687,46 @@ id       timestamp       modifiedBy      id      REV     REVTYPE profile_id      id      REV     person_id       id      REV
 DROP TABLE t1,t2,t3,t4;
 # end of 10.1 tests
 #
+# MDEV-25362: name resolution for subqueries in ON expressions
+#
+create table t1 (a int, b int);
+create table t2 (c int, d int);
+create table t3 (e int, f int);
+create table t4 (g int, h int);
+explain
+select *
+from
+t1 left join
+(t2
+join
+t3 on
+(t3.f=t1.a)
+) on (t2.c=t1.a );
+ERROR 42S22: Unknown column 't1.a' in 'on clause'
+explain
+select *
+from
+t1 left join
+(t2
+join
+t3 on
+(t3.f=(select max(g) from t4 where t4.h=t1.a))
+) on (t2.c=t1.a );
+ERROR 42S22: Unknown column 't1.a' in 'where clause'
+drop table t1,t2,t3,t4;
+create table t1 (a int);
+insert into t1 values (1),(2);
+create table t2 (b int);
+insert into t2 values (1),(2);
+create table t3 (c int);
+insert into t3 values (1),(2);
+select * from ( select * from t1 left join t2
+on b in (select x from t3 as sq1)
+) as sq2;
+ERROR 42S22: Unknown column 'x' in 'field list'
+drop table t1,t2,t3;
+# end of 10.2 tests
+#
 # MDEV-17518: Range optimization doesn't use ON expressions from nested outer joins
 #
 create table t1(a int);
diff --git a/mysql-test/main/join_outer.test b/mysql-test/main/join_outer.test
index f835d8a..bf75a52 100644
--- a/mysql-test/main/join_outer.test
+++ b/mysql-test/main/join_outer.test
@@ -2192,6 +2192,56 @@ DROP TABLE t1,t2,t3,t4;
 --echo # end of 10.1 tests
 
 --echo #
+--echo # MDEV-25362: name resolution for subqueries in ON expressions
+--echo #
+
+create table t1 (a int, b int);
+create table t2 (c int, d int);
+create table t3 (e int, f int);
+create table t4 (g int, h int);
+
+--error ER_BAD_FIELD_ERROR
+explain
+select *
+from
+  t1 left join
+   (t2
+    join
+    t3 on
+    (t3.f=t1.a)
+   ) on (t2.c=t1.a );
+
+# This must produce an error:
+--error ER_BAD_FIELD_ERROR
+explain
+select *
+from
+  t1 left join
+   (t2
+    join
+    t3 on
+    (t3.f=(select max(g) from t4 where t4.h=t1.a))
+   ) on (t2.c=t1.a );
+
+drop table t1,t2,t3,t4;
+
+create table t1 (a int);
+insert into t1 values (1),(2);
+create table t2 (b int);
+insert into t2 values (1),(2);
+create table t3 (c int);
+insert into t3 values (1),(2);
+
+--error ER_BAD_FIELD_ERROR
+select * from ( select * from t1 left join t2
+                              on b in (select x from t3 as sq1)
+              ) as sq2;
+
+drop table t1,t2,t3;
+
+--echo # end of 10.2 tests
+
+--echo #
 --echo # MDEV-17518: Range optimization doesn't use ON expressions from nested outer joins
 --echo #
 create table t1(a int);
diff --git a/mysql-test/main/join_outer_jcl6.result b/mysql-test/main/join_outer_jcl6.result
index 2c7dc7b..2810ffa 100644
--- a/mysql-test/main/join_outer_jcl6.result
+++ b/mysql-test/main/join_outer_jcl6.result
@@ -2694,6 +2694,46 @@ id       timestamp       modifiedBy      id      REV     REVTYPE profile_id      id      REV     person_id       id      REV
 DROP TABLE t1,t2,t3,t4;
 # end of 10.1 tests
 #
+# MDEV-25362: name resolution for subqueries in ON expressions
+#
+create table t1 (a int, b int);
+create table t2 (c int, d int);
+create table t3 (e int, f int);
+create table t4 (g int, h int);
+explain
+select *
+from
+t1 left join
+(t2
+join
+t3 on
+(t3.f=t1.a)
+) on (t2.c=t1.a );
+ERROR 42S22: Unknown column 't1.a' in 'on clause'
+explain
+select *
+from
+t1 left join
+(t2
+join
+t3 on
+(t3.f=(select max(g) from t4 where t4.h=t1.a))
+) on (t2.c=t1.a );
+ERROR 42S22: Unknown column 't1.a' in 'where clause'
+drop table t1,t2,t3,t4;
+create table t1 (a int);
+insert into t1 values (1),(2);
+create table t2 (b int);
+insert into t2 values (1),(2);
+create table t3 (c int);
+insert into t3 values (1),(2);
+select * from ( select * from t1 left join t2
+on b in (select x from t3 as sq1)
+) as sq2;
+ERROR 42S22: Unknown column 'x' in 'field list'
+drop table t1,t2,t3;
+# end of 10.2 tests
+#
 # MDEV-17518: Range optimization doesn't use ON expressions from nested outer joins
 #
 create table t1(a int);
diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc
index 500cb43..2ad31ae 100644
--- a/sql/sql_derived.cc
+++ b/sql/sql_derived.cc
@@ -596,6 +596,32 @@ bool mysql_derived_init(THD *thd, LEX *lex, TABLE_LIST *derived)
 }
 
 
+ /*
+  @brief
+    Prevent name resolution out of context of ON expressions in derived tables
+
+  @param
+    join_list  list of tables used in from list of a derived
+
+  @details
+    The function sets the Name_resolution_context::outer_context to NULL
+    for all ON expressions contexts in the given join list. It does this
+    recursively for all nested joins the list contains.
+*/
+
+static void nullify_outer_context_for_on_clauses(List<TABLE_LIST>& join_list)
+{
+  List_iterator<TABLE_LIST> li(join_list);
+  while (TABLE_LIST *table= li++)
+  {
+    if (table->on_context)
+      table->on_context->outer_context= NULL;
+    if (table->nested_join)
+      nullify_outer_context_for_on_clauses(table->nested_join->join_list);
+  }
+}
+
+
 /*
   Create temporary table structure (but do not fill it)
 
@@ -760,7 +786,12 @@ bool mysql_derived_prepare(THD *thd, LEX *lex, TABLE_LIST *derived)
   /* prevent name resolving out of derived table */
   for (SELECT_LEX *sl= first_select; sl; sl= sl->next_select())
   {
+    // Prevent it for the WHERE clause
     sl->context.outer_context= 0;
+
+    // And for ON clauses, if there are any
+    nullify_outer_context_for_on_clauses(*sl->join_list);
+
     if (!derived->is_with_table_recursive_reference() ||
         (!derived->with->with_anchor && 
          !derived->with->is_with_prepared_anchor()))
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index ecde337..5338fad 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -9391,7 +9391,7 @@ SELECT_LEX *LEX::parsed_subselect(SELECT_LEX_UNIT *unit)
               (curr_sel == NULL && current_select == &builtin_select));
   if (curr_sel)
   {
-    curr_sel->register_unit(unit, &curr_sel->context);
+    curr_sel->register_unit(unit, context_stack.head());
     curr_sel->add_statistics(unit);
   }

In brief: do not merge the changes from sql_parse.cc, add the change from the above diff to sql_lex.cc instead.

Comment by Igor Babaev [ 2021-04-19 ]

A fix for this bug was pushed into 10.2. The fix is good for 10.3 as well, but not for 10.4+. The tested diff for a fix good for 10.4+ is provided in a JIRA comment (see the result of testing in bb-10.4-igor). The difference between 10.2 and 10.4 fixes is minor (a couple of lines in one file).

Generated at Thu Feb 08 09:37:07 UTC 2024 using Jira 8.20.16#820016-sha1:9d11dbea5f4be3d4cc21f03a88dd11d8c8687422.