Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-27510

Query returns wrong result when using split optimization

Details

    Description

      test case, that was reported in MDEV-27132 by mhadji@gmail.com

      --source include/have_innodb.inc
       
      CREATE TABLE raw_a (
        date varchar(100),
        v varchar(100),
        c char(5),
        foo bigint(20)
      ) ENGINE=InnoDB;
       
      INSERT INTO raw_a VALUES ('20211231','A','bh',1),('20211231','B','bh',0),('20211231','C','bh',0),('20211231','D','bh',0),('20211231','E','bh',0),('20211231','G','bh',0),('20211231','H','bh',0),('20211231','I','bh',3),('20211231','J','bh',0),('20211231','K','bh',0),('20211231','A','bl',9),('20211231','B','bl',0),('20211231','C','bl',0),('20211231','D','bl',0),('20211231','E','bl',0),('20211231','G','bl',0),('20211231','H','bl',0),('20211231','I','bl',7),('20211231','J','bl',0),('20211231','K','bl',0),('20211231','M','bl',0),('20211231','N','bl',3),('20211231','R','bl',9),('20211231','S','bl',0),('20211231','T','bl',0),('20211231','U','bl',0),('20211231','V','bl',0);
       
      CREATE TABLE raw_b (
        date int(11) NOT NULL,
        v varchar(50),
        t varchar(512),
        c varchar(50),
        o varchar(10),
        sh varchar(10),
        p char(5),
        s varchar(20),
        foo bigint(20),
        PRIMARY KEY (date,v,t,c,o,sh,p,s),
        KEY date (date,c,v)
      ) ENGINE=InnoDB;
       
      INSERT INTO raw_b VALUES (20211231,'A','a','bl','a','f','0','[0-5]',2),(20211231,'A','a','bl','a','f','0','[6-9)',2),(20211231,'A','b','bl','a','f','0','[0-5]',2),(20211231,'A','b','bl','a','f','0','[6-9)',2),(20211231,'A','c','bh','a','f','0','[0-5]',2),(20211231,'A','c','bh','a','f','0','[10-15)',2),(20211231,'A','c','bh','a','f','0','[15-20)',2),(20211231,'A','c','bh','a','f','0','[20-30)',2),(20211231,'A','c','bh','a','f','0','[6-9)',2),(20211231,'A','c','bl','a','f','0','[0-5]',2),(20211231,'A','c','bl','a','f','0','[20-30)',2),(20211231,'A','c','bl','a','f','0','[6-9)',2),(20211231,'A','cc','bl','a','f','0','[0-5]',2),(20211231,'A','cc','bl','a','f','0','[10-15)',2),(20211231,'A','cc','bl','a','f','0','[15-20)',2),(20211231,'A','cc','bl','a','f','0','[6-9)',2),(20211231,'A','cc','bl','a','LO','0','[0-5]',2),(20211231,'A','ccc','bl','a','f','0','[0-5]',2),(20211231,'A','d','bh','a','f','0','[0-5]',2),(20211231,'A','d','bh','a','f','0','[10-15)',2),(20211231,'A','d','bh','a','f','0','[20-30)',2),(20211231,'A','d','bh','a','f','0','[6-9)',2),(20211231,'A','e','bh','a','f','0','[0-5]',2),(20211231,'A','e','bh','a','f','0','[10-15)',2),(20211231,'A','e','bh','a','f','0','[20-30)',2),(20211231,'A','e','bh','a','f','0','[6-9)',2),(20211231,'A','f','bh','a','f','0','[6-9)',2),(20211231,'A','g','bh','a','f','0','[0-5]',2),(20211231,'A','g','bh','a','f','0','[10-15)',2),(20211231,'A','g','bh','a','f','0','[6-9)',2),(20211231,'A','h','bh','a','f','0','[0-5]',2),(20211231,'A','h','bh','a','f','0','[10-15)',2),(20211231,'A','h','bh','a','f','0','[15-20)',2),(20211231,'A','h','bh','a','f','0','[20-30)',2),(20211231,'A','h','bh','a','f','0','[30-inf)',2),(20211231,'A','h','bh','a','f','0','[6-9)',2),(20211231,'A','h','bh','a','f','1-4','[10-15)',2),(20211231,'A','h','bh','a','f','<0','[0-5]',2),(20211231,'A','i','bh','a','f','0','[0-5]',2),(20211231,'A','i','bh','a','f','0','[20-30)',2),(20211231,'A','i','bh','a','f','0','[6-9)',2),(20211231,'A','j','bh','a','f','0','[0-5]',2),(20211231,'A','j','bh','a','f','0','[6-9)',2),(20211231,'A','k','bh','a','f','0','[0-5]',2),(20211231,'A','k','bh','a','f','0','[10-15)',2),(20211231,'A','k','bh','a','f','0','[15-20)',2),(20211231,'A','k','bh','a','f','0','[20-30)',2),(20211231,'A','k','bh','a','f','0','[30-inf)',2),(20211231,'A','k','bh','a','f','0','[6-9)',2),(20211231,'A','l','bh','a','f','0','[0-5]',2),(20211231,'A','m','bh','a','f','0','[0-5]',2),(20211231,'A','m','bh','a','f','0','[10-15)',2),(20211231,'A','m','bh','a','f','0','[15-20)',2),(20211231,'A','m','bh','a','f','0','[20-30)',2),(20211231,'A','m','bh','a','f','0','[6-9)',2),(20211231,'A','m','bh','a','f','>4','[10-15)',2),(20211231,'A','n','bh','a','f','0','[0-5]',2),(20211231,'A','n','bl','a','f','0','[0-5]',2),(20211231,'A','o','bh','a','f','0','[0-5]',2),(20211231,'A','o','bh','a','f','0','[6-9)',2),(20211231,'A','p','bh','a','f','0','[0-5]',2),(20211231,'A','p','bh','a','f','0','[10-15)',2),(20211231,'A','p','bh','a','f','0','[15-20)',2),(20211231,'A','p','bh','a','f','0','[6-9)',2),(20211231,'A','p','bh','a','f','<0','[0-5]',2),(20211231,'Afdaskjlhfaslkjfhasdklfhdskla','j','bh','a','f','0','[0-5]',2),(20211231,'Afdaskjlhfaslkjfhasdklfhdskla','j','bh','a','f','0','[10-15)',2),(20211231,'Afdaskjlhfaslkjfhasdklfhdskla','j','bh','a','f','0','[15-20)',2),(20211231,'Afdaskjlhfaslkjfhasdklfhdskla','j','bh','a','f','0','[20-30)',2),(20211231,'Afdaskjlhfaslkjfhasdklfhdskla','j','bh','a','f','0','[30-inf)',2),(20211231,'Afdaskjlhfaslkjfhasdklfhdskla','j','bh','a','f','0','[6-9)',2);
       
      SELECT a.date, a.c, b.foo, a.foo
          FROM
              (SELECT date,c,SUM(foo) as foo
               FROM raw_b
               WHERE date >= 20211231
               GROUP BY date,c
              ) as b,
              (SELECT date, c, SUM(foo) as foo
               FROM raw_a
               WHERE date >= 20211231
               GROUP BY date,c
              ) as a where a.date = b.date AND a.c = b.c;
       
       set optimizer_switch="split_materialized=off";
       
      SELECT a.date, a.c, b.foo, a.foo
          FROM
              (SELECT date,c,SUM(foo) as foo
               FROM raw_b
               WHERE date >= 20211231
               GROUP BY date,c
              ) as b,
              (SELECT date, c, SUM(foo) as foo
               FROM raw_a
               WHERE date >= 20211231
               GROUP BY date,c
              ) as a where a.date = b.date AND a.c = b.c;
      

      MariaDB [test]>  SELECT a.date, a.c, b.foo, a.foo     FROM         (SELECT date,c,SUM(foo) as foo          FROM raw_b          WHERE date >= 20211231          GROUP BY date,c         ) as b,         (SELECT date, c, SUM(foo) as foo          FROM raw_a          WHERE date >= 20211231          GROUP BY date,c         ) as a where a.date = b.date AND a.c = b.c;
      +----------+------+------+------+
      | date     | c    | foo  | foo  |
      +----------+------+------+------+
      | 20211231 | bl   |  142 |   28 |
      +----------+------+------+------+
      1 row in set (0.016 sec)
       
      MariaDB [test]> set optimizer_switch="split_materialized=off";
      Query OK, 0 rows affected (0.001 sec)
       
      MariaDB [test]>  SELECT a.date, a.c, b.foo, a.foo     FROM         (SELECT date,c,SUM(foo) as foo          FROM raw_b          WHERE date >= 20211231          GROUP BY date,c         ) as b,         (SELECT date, c, SUM(foo) as foo          FROM raw_a          WHERE date >= 20211231          GROUP BY date,c         ) as a where a.date = b.date AND a.c = b.c;
      +----------+------+------+------+
      | date     | c    | foo  | foo  |
      +----------+------+------+------+
      | 20211231 | bh   |  114 |    4 |
      | 20211231 | bl   |   28 |   28 |
      +----------+------+------+------+
      2 rows in set (0.008 sec)
      

      MariaDB [test]> explain extended SELECT a.date, a.c, b.foo, a.foo     FROM         (SELECT date,c,SUM(foo) as foo          FROM raw_b          WHERE date >= 20211231          GROUP BY date,c         ) as b,         (SELECT date, c, SUM(foo) as foo          FROM raw_a          WHERE date >= 20211231          GROUP BY date,c         ) as a where a.date = b.date AND a.c = b.c;
      +------+-----------------+------------+------+---------------+---------+---------+------------+------+----------+----------------------------------------------+
      | id   | select_type     | table      | type | possible_keys | key     | key_len | ref        | rows | filtered | Extra                                        |
      +------+-----------------+------------+------+---------------+---------+---------+------------+------+----------+----------------------------------------------+
      |    1 | PRIMARY         | <derived3> | ALL  | NULL          | NULL    | NULL    | NULL       | 27   |   100.00 | Using where                                  |
      |    1 | PRIMARY         | <derived2> | ref  | key0          | key0    | 56      | a.date,a.c | 2    |   100.00 | Using where                                  |
      |    3 | DERIVED         | raw_a      | ALL  | NULL          | NULL    | NULL    | NULL       | 27   |   100.00 | Using where; Using temporary; Using filesort |
      |    2 | LATERAL DERIVED | raw_b      | ref  | PRIMARY,date  | PRIMARY | 4       | a.date     | 1    |   100.00 | Using where; Using temporary; Using filesort |
      +------+-----------------+------------+------+---------------+---------+---------+------------+------+----------+----------------------------------------------+
      4 rows in set, 1 warning (0.005 sec)
       
      Note (Code 1003): /* select#1 */ select `a`.`date` AS `date`,`a`.`c` AS `c`,`b`.`foo` AS `foo`,`a`.`foo` AS `foo` from (/* select#2 */ select `test`.`raw_b`.`date` AS `date`,`test`.`raw_b`.`c` AS `c`,sum(`test`.`raw_b`.`foo`) AS `foo` from `test`.`raw_b` where `test`.`raw_b`.`date` >= 20211231 and `test`.`raw_b`.`date` = `a`.`date` and `test`.`raw_b`.`c` = `a`.`c` group by `test`.`raw_b`.`date`) `b` join (/* select#3 */ select `test`.`raw_a`.`date` AS `date`,`test`.`raw_a`.`c` AS `c`,sum(`test`.`raw_a`.`foo`) AS `foo` from `test`.`raw_a` where `test`.`raw_a`.`date` >= 20211231 group by `test`.`raw_a`.`date`,`test`.`raw_a`.`c`) `a` where `a`.`date` = `b`.`date` and `a`.`c` = `b`.`c`
      

      Attachments

        Issue Links

          Activity

            When debugging we see that the condition

            from_agg_items.ledger_id = charges.from_ledger_id
            

            has been actually pushed into WHERE of the derived table, but later is removed by the code:

                if (left_item->type() == Item::FIELD_ITEM &&
                    is_eq_cond_injected_for_split_opt((Item_func_eq *) cond) &&
                    (!retain_ref_cond ||
                     !test_if_ref(root_cond, (Item_field*) left_item,right_item)))
                {
                  cond->marker=3;
                  return (COND*) 0;
                }
            

            (see sql_select.cc: make_cond_for_table_from_pred())
            The code was added in the commit that tried to fix the bug MDEV-25128:

            commit 480a06718d137c9ee7784012ccb609b9e79ff08c
            Author:	Igor Babaev <igor@askmonty.org>  Tue Mar 23 20:54:54 2021
            Committer:	Igor Babaev <igor@askmonty.org>  Tue Mar 23 20:54:54 2021
             
            MDEV-25128 Wrong result from join with materialized semi-join and
                       splittable derived
            

            igor Igor Babaev (Inactive) added a comment - When debugging we see that the condition from_agg_items.ledger_id = charges.from_ledger_id has been actually pushed into WHERE of the derived table, but later is removed by the code: if (left_item->type() == Item::FIELD_ITEM && is_eq_cond_injected_for_split_opt((Item_func_eq *) cond) && (!retain_ref_cond || !test_if_ref(root_cond, (Item_field*) left_item,right_item))) { cond->marker=3; return (COND*) 0; } (see sql_select.cc: make_cond_for_table_from_pred()) The code was added in the commit that tried to fix the bug MDEV-25128 : commit 480a06718d137c9ee7784012ccb609b9e79ff08c Author: Igor Babaev <igor@askmonty.org> Tue Mar 23 20:54:54 2021 Committer: Igor Babaev <igor@askmonty.org> Tue Mar 23 20:54:54 2021   MDEV-25128 Wrong result from join with materialized semi-join and splittable derived

            For the query using tables raw_a, raw_b the output from EXPLAIN FORMAT=JSON looks as follows when 'split_materialized' is set to 'on'.

            EXPLAIN
            {
              "query_block": {
                "select_id": 1,
                "table": {
                  "table_name": "<derived3>",
                  "access_type": "ALL",
                  "rows": 27,
                  "filtered": 100,
                  "attached_condition": "a.`date` is not null and a.c is not null",
                  "materialized": {
                    "query_block": {
                      "select_id": 3,
                      "filesort": {
                        "sort_key": "raw_a.`date`, raw_a.c",
                        "temporary_table": {
                          "table": {
                            "table_name": "raw_a",
                            "access_type": "ALL",
                            "rows": 27,
                            "filtered": 100,
                            "attached_condition": "raw_a.`date` >= 20211231"
                          }
                        }
                      }
                    }
                  }
                },
                "table": {
                  "table_name": "<derived2>",
                  "access_type": "ref",
                  "possible_keys": ["key0"],
                  "key": "key0",
                  "key_length": "56",
                  "used_key_parts": ["date", "c"],
                  "ref": ["a.date", "a.c"],
                  "rows": 2,
                  "filtered": 100,
                  "attached_condition": "a.`date` = b.`date` and a.c = b.c",
                  "materialized": {
                    "lateral": 1,
                    "query_block": {
                      "select_id": 2,
                      "outer_ref_condition": "a.`date` is not null",
                      "filesort": {
                        "sort_key": "raw_b.`date`",
                        "temporary_table": {
                          "table": {
                            "table_name": "raw_b",
                            "access_type": "ref",
                            "possible_keys": ["PRIMARY", "date"],
                            "key": "PRIMARY",
                            "key_length": "4",
                            "used_key_parts": ["date"],
                            "ref": ["a.date"],
                            "rows": 1,
                            "filtered": 100,
                            "attached_condition": "raw_b.`date` <=> a.`date` and raw_b.`date` >= 20211231"
                          }
                        }
                      }
                    }
                  }
                }
              }
            }
            

            Here we see that the condition and 'a.c = b.c' is not among the conjuncts of the where condition of the derived table, though the function remove_const() removes 'c' from the group list of the derived table 'b' considering a.c as a constant when joining rows of the lateral derived table.

            igor Igor Babaev (Inactive) added a comment - For the query using tables raw_a, raw_b the output from EXPLAIN FORMAT=JSON looks as follows when 'split_materialized' is set to 'on'. EXPLAIN { "query_block": { "select_id": 1, "table": { "table_name": "<derived3>", "access_type": "ALL", "rows": 27, "filtered": 100, "attached_condition": "a.`date` is not null and a.c is not null", "materialized": { "query_block": { "select_id": 3, "filesort": { "sort_key": "raw_a.`date`, raw_a.c", "temporary_table": { "table": { "table_name": "raw_a", "access_type": "ALL", "rows": 27, "filtered": 100, "attached_condition": "raw_a.`date` >= 20211231" } } } } } }, "table": { "table_name": "<derived2>", "access_type": "ref", "possible_keys": ["key0"], "key": "key0", "key_length": "56", "used_key_parts": ["date", "c"], "ref": ["a.date", "a.c"], "rows": 2, "filtered": 100, "attached_condition": "a.`date` = b.`date` and a.c = b.c", "materialized": { "lateral": 1, "query_block": { "select_id": 2, "outer_ref_condition": "a.`date` is not null", "filesort": { "sort_key": "raw_b.`date`", "temporary_table": { "table": { "table_name": "raw_b", "access_type": "ref", "possible_keys": ["PRIMARY", "date"], "key": "PRIMARY", "key_length": "4", "used_key_parts": ["date"], "ref": ["a.date"], "rows": 1, "filtered": 100, "attached_condition": "raw_b.`date` <=> a.`date` and raw_b.`date` >= 20211231" } } } } } } } } Here we see that the condition and 'a.c = b.c' is not among the conjuncts of the where condition of the derived table, though the function remove_const() removes 'c' from the group list of the derived table 'b' considering a.c as a constant when joining rows of the lateral derived table.

            When we return to the tree just before the commit 480a06718d137c9ee7784012ccb609b9e79ff08c we see that result sets for the above queries are correct. So this is a regression bug. The regression appeared in 10.3.29.

            igor Igor Babaev (Inactive) added a comment - When we return to the tree just before the commit 480a06718d137c9ee7784012ccb609b9e79ff08c we see that result sets for the above queries are correct. So this is a regression bug. The regression appeared in 10.3.29.

            OK to push

            sanja Oleksandr Byelkin added a comment - OK to push

            A fix for this bug was pushed into 10.3.

            igor Igor Babaev (Inactive) added a comment - A fix for this bug was pushed into 10.3.

            People

              igor Igor Babaev (Inactive)
              alice Alice Sherepa
              Votes:
              1 Vote for this issue
              Watchers:
              6 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Git Integration

                  Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.