Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-27510

Query returns wrong result when using split optimization

Details

    Description

      test case, that was reported in MDEV-27132 by mhadji@gmail.com

      --source include/have_innodb.inc
       
      CREATE TABLE raw_a (
        date varchar(100),
        v varchar(100),
        c char(5),
        foo bigint(20)
      ) ENGINE=InnoDB;
       
      INSERT INTO raw_a VALUES ('20211231','A','bh',1),('20211231','B','bh',0),('20211231','C','bh',0),('20211231','D','bh',0),('20211231','E','bh',0),('20211231','G','bh',0),('20211231','H','bh',0),('20211231','I','bh',3),('20211231','J','bh',0),('20211231','K','bh',0),('20211231','A','bl',9),('20211231','B','bl',0),('20211231','C','bl',0),('20211231','D','bl',0),('20211231','E','bl',0),('20211231','G','bl',0),('20211231','H','bl',0),('20211231','I','bl',7),('20211231','J','bl',0),('20211231','K','bl',0),('20211231','M','bl',0),('20211231','N','bl',3),('20211231','R','bl',9),('20211231','S','bl',0),('20211231','T','bl',0),('20211231','U','bl',0),('20211231','V','bl',0);
       
      CREATE TABLE raw_b (
        date int(11) NOT NULL,
        v varchar(50),
        t varchar(512),
        c varchar(50),
        o varchar(10),
        sh varchar(10),
        p char(5),
        s varchar(20),
        foo bigint(20),
        PRIMARY KEY (date,v,t,c,o,sh,p,s),
        KEY date (date,c,v)
      ) ENGINE=InnoDB;
       
      INSERT INTO raw_b VALUES (20211231,'A','a','bl','a','f','0','[0-5]',2),(20211231,'A','a','bl','a','f','0','[6-9)',2),(20211231,'A','b','bl','a','f','0','[0-5]',2),(20211231,'A','b','bl','a','f','0','[6-9)',2),(20211231,'A','c','bh','a','f','0','[0-5]',2),(20211231,'A','c','bh','a','f','0','[10-15)',2),(20211231,'A','c','bh','a','f','0','[15-20)',2),(20211231,'A','c','bh','a','f','0','[20-30)',2),(20211231,'A','c','bh','a','f','0','[6-9)',2),(20211231,'A','c','bl','a','f','0','[0-5]',2),(20211231,'A','c','bl','a','f','0','[20-30)',2),(20211231,'A','c','bl','a','f','0','[6-9)',2),(20211231,'A','cc','bl','a','f','0','[0-5]',2),(20211231,'A','cc','bl','a','f','0','[10-15)',2),(20211231,'A','cc','bl','a','f','0','[15-20)',2),(20211231,'A','cc','bl','a','f','0','[6-9)',2),(20211231,'A','cc','bl','a','LO','0','[0-5]',2),(20211231,'A','ccc','bl','a','f','0','[0-5]',2),(20211231,'A','d','bh','a','f','0','[0-5]',2),(20211231,'A','d','bh','a','f','0','[10-15)',2),(20211231,'A','d','bh','a','f','0','[20-30)',2),(20211231,'A','d','bh','a','f','0','[6-9)',2),(20211231,'A','e','bh','a','f','0','[0-5]',2),(20211231,'A','e','bh','a','f','0','[10-15)',2),(20211231,'A','e','bh','a','f','0','[20-30)',2),(20211231,'A','e','bh','a','f','0','[6-9)',2),(20211231,'A','f','bh','a','f','0','[6-9)',2),(20211231,'A','g','bh','a','f','0','[0-5]',2),(20211231,'A','g','bh','a','f','0','[10-15)',2),(20211231,'A','g','bh','a','f','0','[6-9)',2),(20211231,'A','h','bh','a','f','0','[0-5]',2),(20211231,'A','h','bh','a','f','0','[10-15)',2),(20211231,'A','h','bh','a','f','0','[15-20)',2),(20211231,'A','h','bh','a','f','0','[20-30)',2),(20211231,'A','h','bh','a','f','0','[30-inf)',2),(20211231,'A','h','bh','a','f','0','[6-9)',2),(20211231,'A','h','bh','a','f','1-4','[10-15)',2),(20211231,'A','h','bh','a','f','<0','[0-5]',2),(20211231,'A','i','bh','a','f','0','[0-5]',2),(20211231,'A','i','bh','a','f','0','[20-30)',2),(20211231,'A','i','bh','a','f','0','[6-9)',2),(20211231,'A','j','bh','a','f','0','[0-5]',2),(20211231,'A','j','bh','a','f','0','[6-9)',2),(20211231,'A','k','bh','a','f','0','[0-5]',2),(20211231,'A','k','bh','a','f','0','[10-15)',2),(20211231,'A','k','bh','a','f','0','[15-20)',2),(20211231,'A','k','bh','a','f','0','[20-30)',2),(20211231,'A','k','bh','a','f','0','[30-inf)',2),(20211231,'A','k','bh','a','f','0','[6-9)',2),(20211231,'A','l','bh','a','f','0','[0-5]',2),(20211231,'A','m','bh','a','f','0','[0-5]',2),(20211231,'A','m','bh','a','f','0','[10-15)',2),(20211231,'A','m','bh','a','f','0','[15-20)',2),(20211231,'A','m','bh','a','f','0','[20-30)',2),(20211231,'A','m','bh','a','f','0','[6-9)',2),(20211231,'A','m','bh','a','f','>4','[10-15)',2),(20211231,'A','n','bh','a','f','0','[0-5]',2),(20211231,'A','n','bl','a','f','0','[0-5]',2),(20211231,'A','o','bh','a','f','0','[0-5]',2),(20211231,'A','o','bh','a','f','0','[6-9)',2),(20211231,'A','p','bh','a','f','0','[0-5]',2),(20211231,'A','p','bh','a','f','0','[10-15)',2),(20211231,'A','p','bh','a','f','0','[15-20)',2),(20211231,'A','p','bh','a','f','0','[6-9)',2),(20211231,'A','p','bh','a','f','<0','[0-5]',2),(20211231,'Afdaskjlhfaslkjfhasdklfhdskla','j','bh','a','f','0','[0-5]',2),(20211231,'Afdaskjlhfaslkjfhasdklfhdskla','j','bh','a','f','0','[10-15)',2),(20211231,'Afdaskjlhfaslkjfhasdklfhdskla','j','bh','a','f','0','[15-20)',2),(20211231,'Afdaskjlhfaslkjfhasdklfhdskla','j','bh','a','f','0','[20-30)',2),(20211231,'Afdaskjlhfaslkjfhasdklfhdskla','j','bh','a','f','0','[30-inf)',2),(20211231,'Afdaskjlhfaslkjfhasdklfhdskla','j','bh','a','f','0','[6-9)',2);
       
      SELECT a.date, a.c, b.foo, a.foo
          FROM
              (SELECT date,c,SUM(foo) as foo
               FROM raw_b
               WHERE date >= 20211231
               GROUP BY date,c
              ) as b,
              (SELECT date, c, SUM(foo) as foo
               FROM raw_a
               WHERE date >= 20211231
               GROUP BY date,c
              ) as a where a.date = b.date AND a.c = b.c;
       
       set optimizer_switch="split_materialized=off";
       
      SELECT a.date, a.c, b.foo, a.foo
          FROM
              (SELECT date,c,SUM(foo) as foo
               FROM raw_b
               WHERE date >= 20211231
               GROUP BY date,c
              ) as b,
              (SELECT date, c, SUM(foo) as foo
               FROM raw_a
               WHERE date >= 20211231
               GROUP BY date,c
              ) as a where a.date = b.date AND a.c = b.c;
      

      MariaDB [test]>  SELECT a.date, a.c, b.foo, a.foo     FROM         (SELECT date,c,SUM(foo) as foo          FROM raw_b          WHERE date >= 20211231          GROUP BY date,c         ) as b,         (SELECT date, c, SUM(foo) as foo          FROM raw_a          WHERE date >= 20211231          GROUP BY date,c         ) as a where a.date = b.date AND a.c = b.c;
      +----------+------+------+------+
      | date     | c    | foo  | foo  |
      +----------+------+------+------+
      | 20211231 | bl   |  142 |   28 |
      +----------+------+------+------+
      1 row in set (0.016 sec)
       
      MariaDB [test]> set optimizer_switch="split_materialized=off";
      Query OK, 0 rows affected (0.001 sec)
       
      MariaDB [test]>  SELECT a.date, a.c, b.foo, a.foo     FROM         (SELECT date,c,SUM(foo) as foo          FROM raw_b          WHERE date >= 20211231          GROUP BY date,c         ) as b,         (SELECT date, c, SUM(foo) as foo          FROM raw_a          WHERE date >= 20211231          GROUP BY date,c         ) as a where a.date = b.date AND a.c = b.c;
      +----------+------+------+------+
      | date     | c    | foo  | foo  |
      +----------+------+------+------+
      | 20211231 | bh   |  114 |    4 |
      | 20211231 | bl   |   28 |   28 |
      +----------+------+------+------+
      2 rows in set (0.008 sec)
      

      MariaDB [test]> explain extended SELECT a.date, a.c, b.foo, a.foo     FROM         (SELECT date,c,SUM(foo) as foo          FROM raw_b          WHERE date >= 20211231          GROUP BY date,c         ) as b,         (SELECT date, c, SUM(foo) as foo          FROM raw_a          WHERE date >= 20211231          GROUP BY date,c         ) as a where a.date = b.date AND a.c = b.c;
      +------+-----------------+------------+------+---------------+---------+---------+------------+------+----------+----------------------------------------------+
      | id   | select_type     | table      | type | possible_keys | key     | key_len | ref        | rows | filtered | Extra                                        |
      +------+-----------------+------------+------+---------------+---------+---------+------------+------+----------+----------------------------------------------+
      |    1 | PRIMARY         | <derived3> | ALL  | NULL          | NULL    | NULL    | NULL       | 27   |   100.00 | Using where                                  |
      |    1 | PRIMARY         | <derived2> | ref  | key0          | key0    | 56      | a.date,a.c | 2    |   100.00 | Using where                                  |
      |    3 | DERIVED         | raw_a      | ALL  | NULL          | NULL    | NULL    | NULL       | 27   |   100.00 | Using where; Using temporary; Using filesort |
      |    2 | LATERAL DERIVED | raw_b      | ref  | PRIMARY,date  | PRIMARY | 4       | a.date     | 1    |   100.00 | Using where; Using temporary; Using filesort |
      +------+-----------------+------------+------+---------------+---------+---------+------------+------+----------+----------------------------------------------+
      4 rows in set, 1 warning (0.005 sec)
       
      Note (Code 1003): /* select#1 */ select `a`.`date` AS `date`,`a`.`c` AS `c`,`b`.`foo` AS `foo`,`a`.`foo` AS `foo` from (/* select#2 */ select `test`.`raw_b`.`date` AS `date`,`test`.`raw_b`.`c` AS `c`,sum(`test`.`raw_b`.`foo`) AS `foo` from `test`.`raw_b` where `test`.`raw_b`.`date` >= 20211231 and `test`.`raw_b`.`date` = `a`.`date` and `test`.`raw_b`.`c` = `a`.`c` group by `test`.`raw_b`.`date`) `b` join (/* select#3 */ select `test`.`raw_a`.`date` AS `date`,`test`.`raw_a`.`c` AS `c`,sum(`test`.`raw_a`.`foo`) AS `foo` from `test`.`raw_a` where `test`.`raw_a`.`date` >= 20211231 group by `test`.`raw_a`.`date`,`test`.`raw_a`.`c`) `a` where `a`.`date` = `b`.`date` and `a`.`c` = `b`.`c`
      

      Attachments

        Issue Links

          Activity

            A fix for this bug was pushed into 10.3.

            igor Igor Babaev (Inactive) added a comment - A fix for this bug was pushed into 10.3.

            OK to push

            sanja Oleksandr Byelkin added a comment - OK to push

            When we return to the tree just before the commit 480a06718d137c9ee7784012ccb609b9e79ff08c we see that result sets for the above queries are correct. So this is a regression bug. The regression appeared in 10.3.29.

            igor Igor Babaev (Inactive) added a comment - When we return to the tree just before the commit 480a06718d137c9ee7784012ccb609b9e79ff08c we see that result sets for the above queries are correct. So this is a regression bug. The regression appeared in 10.3.29.

            For the query using tables raw_a, raw_b the output from EXPLAIN FORMAT=JSON looks as follows when 'split_materialized' is set to 'on'.

            EXPLAIN
            {
              "query_block": {
                "select_id": 1,
                "table": {
                  "table_name": "<derived3>",
                  "access_type": "ALL",
                  "rows": 27,
                  "filtered": 100,
                  "attached_condition": "a.`date` is not null and a.c is not null",
                  "materialized": {
                    "query_block": {
                      "select_id": 3,
                      "filesort": {
                        "sort_key": "raw_a.`date`, raw_a.c",
                        "temporary_table": {
                          "table": {
                            "table_name": "raw_a",
                            "access_type": "ALL",
                            "rows": 27,
                            "filtered": 100,
                            "attached_condition": "raw_a.`date` >= 20211231"
                          }
                        }
                      }
                    }
                  }
                },
                "table": {
                  "table_name": "<derived2>",
                  "access_type": "ref",
                  "possible_keys": ["key0"],
                  "key": "key0",
                  "key_length": "56",
                  "used_key_parts": ["date", "c"],
                  "ref": ["a.date", "a.c"],
                  "rows": 2,
                  "filtered": 100,
                  "attached_condition": "a.`date` = b.`date` and a.c = b.c",
                  "materialized": {
                    "lateral": 1,
                    "query_block": {
                      "select_id": 2,
                      "outer_ref_condition": "a.`date` is not null",
                      "filesort": {
                        "sort_key": "raw_b.`date`",
                        "temporary_table": {
                          "table": {
                            "table_name": "raw_b",
                            "access_type": "ref",
                            "possible_keys": ["PRIMARY", "date"],
                            "key": "PRIMARY",
                            "key_length": "4",
                            "used_key_parts": ["date"],
                            "ref": ["a.date"],
                            "rows": 1,
                            "filtered": 100,
                            "attached_condition": "raw_b.`date` <=> a.`date` and raw_b.`date` >= 20211231"
                          }
                        }
                      }
                    }
                  }
                }
              }
            }
            

            Here we see that the condition and 'a.c = b.c' is not among the conjuncts of the where condition of the derived table, though the function remove_const() removes 'c' from the group list of the derived table 'b' considering a.c as a constant when joining rows of the lateral derived table.

            igor Igor Babaev (Inactive) added a comment - For the query using tables raw_a, raw_b the output from EXPLAIN FORMAT=JSON looks as follows when 'split_materialized' is set to 'on'. EXPLAIN { "query_block": { "select_id": 1, "table": { "table_name": "<derived3>", "access_type": "ALL", "rows": 27, "filtered": 100, "attached_condition": "a.`date` is not null and a.c is not null", "materialized": { "query_block": { "select_id": 3, "filesort": { "sort_key": "raw_a.`date`, raw_a.c", "temporary_table": { "table": { "table_name": "raw_a", "access_type": "ALL", "rows": 27, "filtered": 100, "attached_condition": "raw_a.`date` >= 20211231" } } } } } }, "table": { "table_name": "<derived2>", "access_type": "ref", "possible_keys": ["key0"], "key": "key0", "key_length": "56", "used_key_parts": ["date", "c"], "ref": ["a.date", "a.c"], "rows": 2, "filtered": 100, "attached_condition": "a.`date` = b.`date` and a.c = b.c", "materialized": { "lateral": 1, "query_block": { "select_id": 2, "outer_ref_condition": "a.`date` is not null", "filesort": { "sort_key": "raw_b.`date`", "temporary_table": { "table": { "table_name": "raw_b", "access_type": "ref", "possible_keys": ["PRIMARY", "date"], "key": "PRIMARY", "key_length": "4", "used_key_parts": ["date"], "ref": ["a.date"], "rows": 1, "filtered": 100, "attached_condition": "raw_b.`date` <=> a.`date` and raw_b.`date` >= 20211231" } } } } } } } } Here we see that the condition and 'a.c = b.c' is not among the conjuncts of the where condition of the derived table, though the function remove_const() removes 'c' from the group list of the derived table 'b' considering a.c as a constant when joining rows of the lateral derived table.

            When debugging we see that the condition

            from_agg_items.ledger_id = charges.from_ledger_id
            

            has been actually pushed into WHERE of the derived table, but later is removed by the code:

                if (left_item->type() == Item::FIELD_ITEM &&
                    is_eq_cond_injected_for_split_opt((Item_func_eq *) cond) &&
                    (!retain_ref_cond ||
                     !test_if_ref(root_cond, (Item_field*) left_item,right_item)))
                {
                  cond->marker=3;
                  return (COND*) 0;
                }
            

            (see sql_select.cc: make_cond_for_table_from_pred())
            The code was added in the commit that tried to fix the bug MDEV-25128:

            commit 480a06718d137c9ee7784012ccb609b9e79ff08c
            Author:	Igor Babaev <igor@askmonty.org>  Tue Mar 23 20:54:54 2021
            Committer:	Igor Babaev <igor@askmonty.org>  Tue Mar 23 20:54:54 2021
             
            MDEV-25128 Wrong result from join with materialized semi-join and
                       splittable derived
            

            igor Igor Babaev (Inactive) added a comment - When debugging we see that the condition from_agg_items.ledger_id = charges.from_ledger_id has been actually pushed into WHERE of the derived table, but later is removed by the code: if (left_item->type() == Item::FIELD_ITEM && is_eq_cond_injected_for_split_opt((Item_func_eq *) cond) && (!retain_ref_cond || !test_if_ref(root_cond, (Item_field*) left_item,right_item))) { cond->marker=3; return (COND*) 0; } (see sql_select.cc: make_cond_for_table_from_pred()) The code was added in the commit that tried to fix the bug MDEV-25128 : commit 480a06718d137c9ee7784012ccb609b9e79ff08c Author: Igor Babaev <igor@askmonty.org> Tue Mar 23 20:54:54 2021 Committer: Igor Babaev <igor@askmonty.org> Tue Mar 23 20:54:54 2021   MDEV-25128 Wrong result from join with materialized semi-join and splittable derived

            People

              igor Igor Babaev (Inactive)
              alice Alice Sherepa
              Votes:
              1 Vote for this issue
              Watchers:
              6 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Git Integration

                  Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.