[MDEV-27510] Query returns wrong result when using split optimization Created: 2022-01-14  Updated: 2024-01-18  Resolved: 2022-01-26

Status: Closed
Project: MariaDB Server
Component/s: Optimizer
Affects Version/s: 10.3, 10.4, 10.5, 10.6, 10.7
Fix Version/s: 10.3.33, 10.4.23, 10.5.14, 10.6.6, 10.7.2

Type: Bug Priority: Blocker
Reporter: Alice Sherepa Assignee: Igor Babaev
Resolution: Fixed Votes: 1
Labels: regression

Issue Links:
Duplicate
duplicates MDEV-27694 regression? Join using derived with a... Closed
Problem/Incident
causes MDEV-32784 Perfromance Degradation of Joins on V... Confirmed
Relates
relates to MDEV-27132 Wrong result from query when using sp... Closed
relates to MDEV-26965 Left join to derived table on multipl... Closed

 Description   

test case, that was reported in MDEV-27132 by mhadji@gmail.com

--source include/have_innodb.inc
 
CREATE TABLE raw_a (
  date varchar(100),
  v varchar(100),
  c char(5),
  foo bigint(20)
) ENGINE=InnoDB;
 
INSERT INTO raw_a VALUES ('20211231','A','bh',1),('20211231','B','bh',0),('20211231','C','bh',0),('20211231','D','bh',0),('20211231','E','bh',0),('20211231','G','bh',0),('20211231','H','bh',0),('20211231','I','bh',3),('20211231','J','bh',0),('20211231','K','bh',0),('20211231','A','bl',9),('20211231','B','bl',0),('20211231','C','bl',0),('20211231','D','bl',0),('20211231','E','bl',0),('20211231','G','bl',0),('20211231','H','bl',0),('20211231','I','bl',7),('20211231','J','bl',0),('20211231','K','bl',0),('20211231','M','bl',0),('20211231','N','bl',3),('20211231','R','bl',9),('20211231','S','bl',0),('20211231','T','bl',0),('20211231','U','bl',0),('20211231','V','bl',0);
 
CREATE TABLE raw_b (
  date int(11) NOT NULL,
  v varchar(50),
  t varchar(512),
  c varchar(50),
  o varchar(10),
  sh varchar(10),
  p char(5),
  s varchar(20),
  foo bigint(20),
  PRIMARY KEY (date,v,t,c,o,sh,p,s),
  KEY date (date,c,v)
) ENGINE=InnoDB;
 
INSERT INTO raw_b VALUES (20211231,'A','a','bl','a','f','0','[0-5]',2),(20211231,'A','a','bl','a','f','0','[6-9)',2),(20211231,'A','b','bl','a','f','0','[0-5]',2),(20211231,'A','b','bl','a','f','0','[6-9)',2),(20211231,'A','c','bh','a','f','0','[0-5]',2),(20211231,'A','c','bh','a','f','0','[10-15)',2),(20211231,'A','c','bh','a','f','0','[15-20)',2),(20211231,'A','c','bh','a','f','0','[20-30)',2),(20211231,'A','c','bh','a','f','0','[6-9)',2),(20211231,'A','c','bl','a','f','0','[0-5]',2),(20211231,'A','c','bl','a','f','0','[20-30)',2),(20211231,'A','c','bl','a','f','0','[6-9)',2),(20211231,'A','cc','bl','a','f','0','[0-5]',2),(20211231,'A','cc','bl','a','f','0','[10-15)',2),(20211231,'A','cc','bl','a','f','0','[15-20)',2),(20211231,'A','cc','bl','a','f','0','[6-9)',2),(20211231,'A','cc','bl','a','LO','0','[0-5]',2),(20211231,'A','ccc','bl','a','f','0','[0-5]',2),(20211231,'A','d','bh','a','f','0','[0-5]',2),(20211231,'A','d','bh','a','f','0','[10-15)',2),(20211231,'A','d','bh','a','f','0','[20-30)',2),(20211231,'A','d','bh','a','f','0','[6-9)',2),(20211231,'A','e','bh','a','f','0','[0-5]',2),(20211231,'A','e','bh','a','f','0','[10-15)',2),(20211231,'A','e','bh','a','f','0','[20-30)',2),(20211231,'A','e','bh','a','f','0','[6-9)',2),(20211231,'A','f','bh','a','f','0','[6-9)',2),(20211231,'A','g','bh','a','f','0','[0-5]',2),(20211231,'A','g','bh','a','f','0','[10-15)',2),(20211231,'A','g','bh','a','f','0','[6-9)',2),(20211231,'A','h','bh','a','f','0','[0-5]',2),(20211231,'A','h','bh','a','f','0','[10-15)',2),(20211231,'A','h','bh','a','f','0','[15-20)',2),(20211231,'A','h','bh','a','f','0','[20-30)',2),(20211231,'A','h','bh','a','f','0','[30-inf)',2),(20211231,'A','h','bh','a','f','0','[6-9)',2),(20211231,'A','h','bh','a','f','1-4','[10-15)',2),(20211231,'A','h','bh','a','f','<0','[0-5]',2),(20211231,'A','i','bh','a','f','0','[0-5]',2),(20211231,'A','i','bh','a','f','0','[20-30)',2),(20211231,'A','i','bh','a','f','0','[6-9)',2),(20211231,'A','j','bh','a','f','0','[0-5]',2),(20211231,'A','j','bh','a','f','0','[6-9)',2),(20211231,'A','k','bh','a','f','0','[0-5]',2),(20211231,'A','k','bh','a','f','0','[10-15)',2),(20211231,'A','k','bh','a','f','0','[15-20)',2),(20211231,'A','k','bh','a','f','0','[20-30)',2),(20211231,'A','k','bh','a','f','0','[30-inf)',2),(20211231,'A','k','bh','a','f','0','[6-9)',2),(20211231,'A','l','bh','a','f','0','[0-5]',2),(20211231,'A','m','bh','a','f','0','[0-5]',2),(20211231,'A','m','bh','a','f','0','[10-15)',2),(20211231,'A','m','bh','a','f','0','[15-20)',2),(20211231,'A','m','bh','a','f','0','[20-30)',2),(20211231,'A','m','bh','a','f','0','[6-9)',2),(20211231,'A','m','bh','a','f','>4','[10-15)',2),(20211231,'A','n','bh','a','f','0','[0-5]',2),(20211231,'A','n','bl','a','f','0','[0-5]',2),(20211231,'A','o','bh','a','f','0','[0-5]',2),(20211231,'A','o','bh','a','f','0','[6-9)',2),(20211231,'A','p','bh','a','f','0','[0-5]',2),(20211231,'A','p','bh','a','f','0','[10-15)',2),(20211231,'A','p','bh','a','f','0','[15-20)',2),(20211231,'A','p','bh','a','f','0','[6-9)',2),(20211231,'A','p','bh','a','f','<0','[0-5]',2),(20211231,'Afdaskjlhfaslkjfhasdklfhdskla','j','bh','a','f','0','[0-5]',2),(20211231,'Afdaskjlhfaslkjfhasdklfhdskla','j','bh','a','f','0','[10-15)',2),(20211231,'Afdaskjlhfaslkjfhasdklfhdskla','j','bh','a','f','0','[15-20)',2),(20211231,'Afdaskjlhfaslkjfhasdklfhdskla','j','bh','a','f','0','[20-30)',2),(20211231,'Afdaskjlhfaslkjfhasdklfhdskla','j','bh','a','f','0','[30-inf)',2),(20211231,'Afdaskjlhfaslkjfhasdklfhdskla','j','bh','a','f','0','[6-9)',2);
 
SELECT a.date, a.c, b.foo, a.foo
    FROM
        (SELECT date,c,SUM(foo) as foo
         FROM raw_b
         WHERE date >= 20211231
         GROUP BY date,c
        ) as b,
        (SELECT date, c, SUM(foo) as foo
         FROM raw_a
         WHERE date >= 20211231
         GROUP BY date,c
        ) as a where a.date = b.date AND a.c = b.c;
 
 set optimizer_switch="split_materialized=off";
 
SELECT a.date, a.c, b.foo, a.foo
    FROM
        (SELECT date,c,SUM(foo) as foo
         FROM raw_b
         WHERE date >= 20211231
         GROUP BY date,c
        ) as b,
        (SELECT date, c, SUM(foo) as foo
         FROM raw_a
         WHERE date >= 20211231
         GROUP BY date,c
        ) as a where a.date = b.date AND a.c = b.c;

MariaDB [test]>  SELECT a.date, a.c, b.foo, a.foo     FROM         (SELECT date,c,SUM(foo) as foo          FROM raw_b          WHERE date >= 20211231          GROUP BY date,c         ) as b,         (SELECT date, c, SUM(foo) as foo          FROM raw_a          WHERE date >= 20211231          GROUP BY date,c         ) as a where a.date = b.date AND a.c = b.c;
+----------+------+------+------+
| date     | c    | foo  | foo  |
+----------+------+------+------+
| 20211231 | bl   |  142 |   28 |
+----------+------+------+------+
1 row in set (0.016 sec)
 
MariaDB [test]> set optimizer_switch="split_materialized=off";
Query OK, 0 rows affected (0.001 sec)
 
MariaDB [test]>  SELECT a.date, a.c, b.foo, a.foo     FROM         (SELECT date,c,SUM(foo) as foo          FROM raw_b          WHERE date >= 20211231          GROUP BY date,c         ) as b,         (SELECT date, c, SUM(foo) as foo          FROM raw_a          WHERE date >= 20211231          GROUP BY date,c         ) as a where a.date = b.date AND a.c = b.c;
+----------+------+------+------+
| date     | c    | foo  | foo  |
+----------+------+------+------+
| 20211231 | bh   |  114 |    4 |
| 20211231 | bl   |   28 |   28 |
+----------+------+------+------+
2 rows in set (0.008 sec)

MariaDB [test]> explain extended SELECT a.date, a.c, b.foo, a.foo     FROM         (SELECT date,c,SUM(foo) as foo          FROM raw_b          WHERE date >= 20211231          GROUP BY date,c         ) as b,         (SELECT date, c, SUM(foo) as foo          FROM raw_a          WHERE date >= 20211231          GROUP BY date,c         ) as a where a.date = b.date AND a.c = b.c;
+------+-----------------+------------+------+---------------+---------+---------+------------+------+----------+----------------------------------------------+
| id   | select_type     | table      | type | possible_keys | key     | key_len | ref        | rows | filtered | Extra                                        |
+------+-----------------+------------+------+---------------+---------+---------+------------+------+----------+----------------------------------------------+
|    1 | PRIMARY         | <derived3> | ALL  | NULL          | NULL    | NULL    | NULL       | 27   |   100.00 | Using where                                  |
|    1 | PRIMARY         | <derived2> | ref  | key0          | key0    | 56      | a.date,a.c | 2    |   100.00 | Using where                                  |
|    3 | DERIVED         | raw_a      | ALL  | NULL          | NULL    | NULL    | NULL       | 27   |   100.00 | Using where; Using temporary; Using filesort |
|    2 | LATERAL DERIVED | raw_b      | ref  | PRIMARY,date  | PRIMARY | 4       | a.date     | 1    |   100.00 | Using where; Using temporary; Using filesort |
+------+-----------------+------------+------+---------------+---------+---------+------------+------+----------+----------------------------------------------+
4 rows in set, 1 warning (0.005 sec)
 
Note (Code 1003): /* select#1 */ select `a`.`date` AS `date`,`a`.`c` AS `c`,`b`.`foo` AS `foo`,`a`.`foo` AS `foo` from (/* select#2 */ select `test`.`raw_b`.`date` AS `date`,`test`.`raw_b`.`c` AS `c`,sum(`test`.`raw_b`.`foo`) AS `foo` from `test`.`raw_b` where `test`.`raw_b`.`date` >= 20211231 and `test`.`raw_b`.`date` = `a`.`date` and `test`.`raw_b`.`c` = `a`.`c` group by `test`.`raw_b`.`date`) `b` join (/* select#3 */ select `test`.`raw_a`.`date` AS `date`,`test`.`raw_a`.`c` AS `c`,sum(`test`.`raw_a`.`foo`) AS `foo` from `test`.`raw_a` where `test`.`raw_a`.`date` >= 20211231 group by `test`.`raw_a`.`date`,`test`.`raw_a`.`c`) `a` where `a`.`date` = `b`.`date` and `a`.`c` = `b`.`c`



 Comments   
Comment by Igor Babaev [ 2022-01-23 ]

This comment was originally added by dan.howard in the entry for the closed bug MDEV-25714
@Igor Babaev

On version 10.5.13, the test script below reliably exhibits the bug. I run the setup script to create the tables and populate them with some dummy data. The query below counts the number of transaction_item rows for each (ledger_id, charge_id) pair. I've been careful in my test data to ensure that there is only ever 1 transaction_item row for each (ledger_id, charge_id) pair. Usually when I first run the query, I get the correct results (this is obvious because we see from_num_rows=1 on every row in the results set). After a short time (less than 1 minute for me), I start getting the incorrect results, and we see from_num_rows=2 on every row in the results set. I've captured below the output from EXPLAIN FORMAT=JSON for the same query, before and after it starts failing.

Note that that the number of rows of dummy data I have seems to be significant. The more rows I have, the faster the query starts giving incorrect results.

Setup:

DROP TABLE IF EXISTS transaction_items;
DROP TABLE IF EXISTS transactions;
DROP TABLE IF EXISTS charges;
DROP TABLE IF EXISTS ledgers;
CREATE TABLE ledgers (
  id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
  name VARCHAR(32)
);
CREATE TABLE charges (
  id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
  from_ledger_id BIGINT UNSIGNED NOT NULL,
  to_ledger_id BIGINT UNSIGNED NOT NULL,
  amount INT NOT NULL,
  CONSTRAINT fk_charge_from_ledger FOREIGN KEY (from_ledger_id) REFERENCES ledgers (id) ON DELETE
CASCADE ON UPDATE RESTRICT,
  CONSTRAINT fk_charge_to_ledger FOREIGN KEY (to_ledger_id) REFERENCES ledgers (id) ON DELETE CASCADE ON UPDATE RESTRICT
);
CREATE TABLE transactions (
  id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
  ledger_id BIGINT UNSIGNED NOT NULL,
  CONSTRAINT fk_transactions_ledger FOREIGN KEY (ledger_id) REFERENCES ledgers (id) ON DELETE CASCADE ON UPDATE RESTRICT
);
CREATE TABLE transaction_items (
  id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
  transaction_id BIGINT UNSIGNED NOT NULL,
  charge_id BIGINT UNSIGNED,
  amount INT NOT NULL,
  CONSTRAINT fk_items_transaction FOREIGN KEY (transaction_id) REFERENCES transactions (id) ON DELETE CASCADE ON UPDATE RESTRICT,
  CONSTRAINT fk_items_charge FOREIGN KEY (charge_id) REFERENCES charges (id) ON DELETE CASCADE ON UPDATE RESTRICT
);
INSERT INTO `ledgers` (`id`, `name`) VALUES (1, 'Anna'), (2, 'John'), (3, 'Fred');
INSERT INTO `charges` (`id`, `from_ledger_id`, `to_ledger_id`, `amount`) VALUES (1, 2, 1, 200), (2, 1, 2, 330), (3, 1, 2, 640), (4, 3, 1, 640), (5, 3, 2, 1000);
INSERT INTO `charges` (`id`, `from_ledger_id`, `to_ledger_id`, `amount`) VALUES (6, 3, 1, 660), (7, 2, 3, 650), (8, 3, 2, 160), (9, 2, 1, 740), (10, 3, 2, 310);
INSERT INTO `charges` (`id`, `from_ledger_id`, `to_ledger_id`, `amount`) VALUES (11, 2, 1, 640), (12, 3, 2, 240), (13, 3, 2, 340), (14, 2, 1, 720), (15, 2, 3, 100);
INSERT INTO `charges` (`id`, `from_ledger_id`, `to_ledger_id`, `amount`) VALUES (16, 2, 3, 980), (17, 2, 1, 80), (18, 1, 2, 760), (19, 2, 3, 740), (20, 2, 1, 990);
INSERT INTO `transactions` (`id`, `ledger_id`) VALUES (2, 1), (3, 1), (5, 1), (8, 1), (12, 1), (18, 1), (22, 1), (28, 1), (34, 1), (35, 1);
INSERT INTO `transactions` (`id`, `ledger_id`) VALUES (40, 1), (1, 2), (4, 2), (6, 2), (10, 2), (13, 2), (16, 2), (17, 2), (20, 2), (21, 2);
INSERT INTO `transactions` (`id`, `ledger_id`) VALUES (24, 2), (26, 2), (27, 2), (29, 2), (31, 2), (33, 2), (36, 2), (37, 2), (39, 2), (7, 3);
INSERT INTO `transactions` (`id`, `ledger_id`) VALUES (9, 3), (11, 3), (14, 3), (15, 3), (19, 3), (23, 3), (25, 3), (30, 3), (32, 3), (38, 3);
INSERT INTO `transaction_items` (`id`, `transaction_id`, `charge_id`, `amount`) VALUES (1, 1, 1, -200), (2, 2, 1, 200), (3, 3, 2, -330), (4, 4, 2, 330), (5, 5, 3, -640);
INSERT INTO `transaction_items` (`id`, `transaction_id`, `charge_id`, `amount`) VALUES (6, 6, 3, 640), (7, 7, 4, -640), (8, 8, 4, 640), (9, 9, 5, -1000), (10, 10, 5, 1000);
INSERT INTO `transaction_items` (`id`, `transaction_id`, `charge_id`, `amount`) VALUES (11, 11, 6, -660), (12, 12, 6, 660), (13, 13, 7, -650), (14, 14, 7, 650), (15, 15, 8, -160);
INSERT INTO `transaction_items` (`id`, `transaction_id`, `charge_id`, `amount`) VALUES (16, 16, 8, 160), (17, 17, 9, -740), (18, 18, 9, 740), (19, 19, 10, -310), (20, 20, 10, 310);
INSERT INTO `transaction_items` (`id`, `transaction_id`, `charge_id`, `amount`) VALUES (21, 21, 11, -640), (22, 22, 11, 640), (23, 23, 12, -240), (24, 24, 12, 240), (25, 25, 13, -340);
INSERT INTO `transaction_items` (`id`, `transaction_id`, `charge_id`, `amount`) VALUES (26, 26, 13, 340), (27, 27, 14, -720), (28, 28, 14, 720), (29, 29, 15, -100), (30, 30, 15, 100);
INSERT INTO `transaction_items` (`id`, `transaction_id`, `charge_id`, `amount`) VALUES (31, 31, 16, -980), (32, 32, 16, 980), (33, 33, 17, -80), (34, 34, 17, 80), (35, 35, 18, -760);
INSERT INTO `transaction_items` (`id`, `transaction_id`, `charge_id`, `amount`) VALUES (36, 36, 18, 760), (37, 37, 19, -740), (38, 38, 19, 740), (39, 39, 20, -990), (40, 40, 20, 990);

The query:

SELECT
    charges.id,
    charges.from_ledger_id,
    charges.to_ledger_id,
    from_agg_items.num_rows AS from_num_rows
FROM charges
LEFT JOIN (
    SELECT
        transactions.ledger_id,
        transaction_items.charge_id,
        count(*) as num_rows
    FROM transaction_items
    INNER JOIN transactions ON transaction_items.transaction_id = transactions.id
    GROUP BY transactions.ledger_id, transaction_items.charge_id
) AS from_agg_items ON from_agg_items.charge_id = charges.id AND from_agg_items.ledger_id = charges.from_ledger_id
WHERE charges.to_ledger_id = 2;

EXPLAIN result when the query is returning correct results:

{
 "query_block": {
   "select_id": 1,
   "table": {
     "table_name": "charges",
     "access_type": "ALL",
     "possible_keys": ["fk_charge_to_ledger"],
     "rows": 20,
     "filtered": 40,
     "attached_condition": "charges.to_ledger_id = 2"
   },
   "table": {
     "table_name": "<derived2>",
     "access_type": "ref",
     "possible_keys": ["key0"],
     "key": "key0",
     "key_length": "18",
     "used_key_parts": ["ledger_id", "charge_id"],
     "ref": ["bugtest.charges.from_ledger_id", "bugtest.charges.id"],
     "rows": 4,
     "filtered": 100,
     "materialized": {
       "query_block": {
         "select_id": 2,
         "filesort": {
           "sort_key": "transactions.ledger_id, transaction_items.charge_id",
           "temporary_table": {
             "table": {
               "table_name": "transaction_items",
               "access_type": "ALL",
               "possible_keys": ["fk_items_transaction", "fk_items_charge"],
               "rows": 40,
               "filtered": 100
             },
             "table": {
               "table_name": "transactions",
               "access_type": "eq_ref",
               "possible_keys": ["PRIMARY", "fk_transactions_ledger"],
               "key": "PRIMARY",
               "key_length": "8",
               "used_key_parts": ["id"],
               "ref": ["bugtest.transaction_items.transaction_id"],
               "rows": 1,
               "filtered": 100
             }
           }
         }
       }
     }
   }
}

After a short time (less than 1 minute usually), the query will start returning the wrong results.

EXPLAIN result when the query is returning incorrect results:

{
 "query_block": {
   "select_id": 1,
   "table": {
     "table_name": "charges",
     "access_type": "ALL",
     "possible_keys": ["fk_charge_to_ledger"],
     "rows": 20,
     "filtered": 35,
     "attached_condition": "charges.to_ledger_id = 2"
   },
   "table": {
     "table_name": "<derived2>",
     "access_type": "ref",
     "possible_keys": ["key0"],
     "key": "key0",
     "key_length": "18",
     "used_key_parts": ["ledger_id", "charge_id"],
     "ref": ["bugtest.charges.from_ledger_id", "bugtest.charges.id"],
     "rows": 2,
     "filtered": 100,
     "materialized": {
       "lateral": 1,
       "query_block": {
         "select_id": 2,
         "table": {
           "table_name": "transaction_items",
           "access_type": "ref",
           "possible_keys": ["fk_items_transaction", "fk_items_charge"],
           "key": "fk_items_charge",
           "key_length": "9",
           "used_key_parts": ["charge_id"],
           "ref": ["bugtest.charges.id"],
           "rows": 1,
           "filtered": 100
         },
         "table": {
           "table_name": "transactions",
           "access_type": "eq_ref",
           "possible_keys": ["PRIMARY", "fk_transactions_ledger"],
           "key": "PRIMARY",
           "key_length": "8",
           "used_key_parts": ["id"],
           "ref": ["bugtest.transaction_items.transaction_id"],
           "rows": 1,
           "filtered": 100
         }
       }
     }
   }
 }
}

Comment by Igor Babaev [ 2022-01-23 ]

The test case from danhowardmws demonstrates the same bug as the test case from mhadji@gmail.com.
Yet the last test case is much more stable: it demonstrates the same problem after adding the statement

ANALYZE TABLE ledgers, charges, transactions, transaction_items;

just before executing the query.
If we add

ANALYZE TABLE raw_a, raw_b;

before the query of the first test case the optimizer chooses an execution plan that does not employ the split optimization even when 'split_materialized' is set to 'on'.

Comment by Igor Babaev [ 2022-01-23 ]

The following variant of the above test case that uses only MyISAM tables demonstrate the same problem when 'split_materialized' is set to 'on':

CREATE TABLE ledgers (
  id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
  name VARCHAR(32)
) ENGINE=MyISAM;
 
CREATE TABLE charges (
  id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
  from_ledger_id BIGINT UNSIGNED NOT NULL,
  to_ledger_id BIGINT UNSIGNED NOT NULL,
  amount INT NOT NULL,
  KEY fk_charge_from_ledger (from_ledger_id),
  KEY fk_charge_to_ledger (to_ledger_id)
) ENGINE=MyISAM;
 
CREATE TABLE transactions (
  id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
  ledger_id BIGINT UNSIGNED NOT NULL,
  KEY fk_transactions_ledger (ledger_id)
) ENGINE=MyISAM;
 
CREATE TABLE transaction_items (
  id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
  transaction_id BIGINT UNSIGNED NOT NULL,
  charge_id BIGINT UNSIGNED,
  amount INT NOT NULL,
  KEY fk_items_transaction (transaction_id),
  KEY fk_items_charge (charge_id)
) ENGINE=MyISAM;
 
INSERT INTO ledgers (id, name) VALUES
(1, 'Anna'), (2, 'John'), (3, 'Fred');
 
INSERT INTO charges (id, from_ledger_id, to_ledger_id, amount) VALUES
(1, 2, 1, 200), (2, 1, 2, 330), (3, 1, 2, 640), (4, 3, 1, 640), (5, 3, 2, 1000),
(6, 3, 1, 660), (7, 2, 3, 650), (8, 3, 2, 160), (9, 2, 1, 740), (10, 3, 2, 310),
(11, 2, 1, 640), (12, 3, 2, 240), (13, 3, 2, 340), (14, 2, 1, 720),
(15, 2, 3, 100),
(16, 2, 3, 980), (17, 2, 1, 80), (18, 1, 2, 760), (19, 2, 3, 740),
(20, 2, 1, 990);
 
INSERT INTO transactions (id, ledger_id) VALUES
(2, 1), (3, 1), (5, 1), (8, 1), (12, 1), (18, 1), (22, 1), (28, 1),
(34, 1), (35, 1),
(40, 1), (1, 2), (4, 2), (6, 2), (10, 2), (13, 2), (16, 2), (17, 2),
(20, 2), (21, 2),
(24, 2), (26, 2), (27, 2), (29, 2), (31, 2), (33, 2), (36, 2), (37, 2),
(39, 2), (7, 3),
(9, 3), (11, 3), (14, 3), (15, 3), (19, 3), (23, 3), (25, 3), (30, 3),
(32, 3), (38, 3);
 
INSERT INTO transaction_items (id, transaction_id, charge_id, amount) VALUES
(1, 1, 1, -200), (2, 2, 1, 200), (3, 3, 2, -330), (4, 4, 2, 330),
(5, 5, 3, -640),
(6, 6, 3, 640), (7, 7, 4, -640), (8, 8, 4, 640), (9, 9, 5, -1000),
(10, 10, 5, 1000),
(11, 11, 6, -660), (12, 12, 6, 660), (13, 13, 7, -650), (14, 14, 7, 650),
(15, 15, 8, -160),
(16, 16, 8, 160), (17, 17, 9, -740), (18, 18, 9, 740), (19, 19, 10, -310),
(20, 20, 10, 310),
(21, 21, 11, -640), (22, 22, 11, 640), (23, 23, 12, -240), (24, 24, 12, 240),
(25, 25, 13, -340),
(26, 26, 13, 340), (27, 27, 14, -720), (28, 28, 14, 720), (29, 29, 15, -100),
(30, 30, 15, 100),
(31, 31, 16, -980), (32, 32, 16, 980), (33, 33, 17, -80), (34, 34, 17, 80),
(35, 35, 18, -760),
(36, 36, 18, 760), (37, 37, 19, -740), (38, 38, 19, 740), (39, 39, 20, -990),
(40, 40, 20, 990);
 
ANALYZE TABLE ledgers, charges, transactions, transaction_items;
 
SELECT
    charges.id,
    charges.from_ledger_id,
    charges.to_ledger_id,
    from_agg_items.num_rows AS from_num_rows
FROM charges
INNER JOIN (
    SELECT
        transactions.ledger_id,
        transaction_items.charge_id,
        count(*) as num_rows
    FROM transaction_items
    INNER JOIN transactions ON transaction_items.transaction_id = transactions.id
    GROUP BY transactions.ledger_id, transaction_items.charge_id
) AS from_agg_items
ON from_agg_items.charge_id = charges.id AND
   from_agg_items.ledger_id = charges.from_ledger_id
WHERE charges.to_ledger_id = 2;

MariaDB [test]> SELECT
    ->     charges.id,
    ->     charges.from_ledger_id,
    ->     charges.to_ledger_id,
    ->     from_agg_items.num_rows AS from_num_rows
    -> FROM charges
    -> INNER JOIN (
    ->     SELECT
    ->         transactions.ledger_id,
    ->         transaction_items.charge_id,
    ->         count(*) as num_rows
    ->     FROM transaction_items
    ->     INNER JOIN transactions ON transaction_items.transaction_id = transactions.id
    ->     GROUP BY transactions.ledger_id, transaction_items.charge_id
    -> ) AS from_agg_items
    -> ON from_agg_items.charge_id = charges.id AND
    ->    from_agg_items.ledger_id = charges.from_ledger_id
    -> WHERE charges.to_ledger_id = 2;
+----+----------------+--------------+---------------+
| id | from_ledger_id | to_ledger_id | from_num_rows |
+----+----------------+--------------+---------------+
|  2 |              1 |            2 |             2 |
|  3 |              1 |            2 |             2 |
|  5 |              3 |            2 |             2 |
|  8 |              3 |            2 |             2 |
| 10 |              3 |            2 |             2 |
| 12 |              3 |            2 |             2 |
| 13 |              3 |            2 |             2 |
| 18 |              1 |            2 |             2 |
+----+----------------+--------------+---------------+

We see that the values in the column are incorrect. Let's compare the above result set with the result set returned by the query when 'split_materialized' is set to 'off'

MariaDB [test]> set optimizer_switch='split_materialized=off'; 
Query OK, 0 rows affected (0.000 sec)
 
MariaDB [test]> SELECT
    ->     charges.id,
    ->     charges.from_ledger_id,
    ->     charges.to_ledger_id,
    ->     from_agg_items.num_rows AS from_num_rows
    -> FROM charges
    -> INNER JOIN (
    ->     SELECT
    ->         transactions.ledger_id,
    ->         transaction_items.charge_id,
    ->         count(*) as num_rows
    ->     FROM transaction_items
    ->     INNER JOIN transactions ON transaction_items.transaction_id = transactions.id
    ->     GROUP BY transactions.ledger_id, transaction_items.charge_id
    -> ) AS from_agg_items
    -> ON from_agg_items.charge_id = charges.id AND
    ->    from_agg_items.ledger_id = charges.from_ledger_id
    -> WHERE charges.to_ledger_id = 2;
+----+----------------+--------------+---------------+
| id | from_ledger_id | to_ledger_id | from_num_rows |
+----+----------------+--------------+---------------+
|  2 |              1 |            2 |             1 |
|  3 |              1 |            2 |             1 |
|  5 |              3 |            2 |             1 |
|  8 |              3 |            2 |             1 |
| 10 |              3 |            2 |             1 |
| 12 |              3 |            2 |             1 |
| 13 |              3 |            2 |             1 |
| 18 |              1 |            2 |             1 |
+----+----------------+--------------+---------------+
8 rows in set (0.001 sec)

Comment by Igor Babaev [ 2022-01-24 ]

The query above uses INNER JOIN instead of LEFT JOIN as it was in the original reported query.
However we see the same problem in the result set from the query with LEFT JOIN as well:

MariaDB [test]> INSERT INTO charges (id, from_ledger_id, to_ledger_id, amount) VALUES
    -> (101, 4, 2, 100), (102, 7, 2, 200);
Query OK, 2 rows affected (0.001 sec)
Records: 2  Duplicates: 0  Warnings: 0
 
MariaDB [test]> SELECT
    ->     charges.id,
    ->     charges.from_ledger_id,
    ->     charges.to_ledger_id,
    ->     from_agg_items.num_rows AS from_num_rows
    -> FROM charges
    -> LEFT JOIN (
    ->     SELECT
    ->         transactions.ledger_id,
    ->         transaction_items.charge_id,
    ->         count(*) as num_rows
    ->     FROM transaction_items
    ->     INNER JOIN transactions ON transaction_items.transaction_id = transactions.id
    ->     GROUP BY transactions.ledger_id, transaction_items.charge_id
    -> ) AS from_agg_items
    -> ON from_agg_items.charge_id = charges.id AND
    ->    from_agg_items.ledger_id = charges.from_ledger_id
    -> WHERE charges.to_ledger_id = 2;
+-----+----------------+--------------+---------------+
| id  | from_ledger_id | to_ledger_id | from_num_rows |
+-----+----------------+--------------+---------------+
|   2 |              1 |            2 |             2 |
|   3 |              1 |            2 |             2 |
|   5 |              3 |            2 |             2 |
|   8 |              3 |            2 |             2 |
|  10 |              3 |            2 |             2 |
|  12 |              3 |            2 |             2 |
|  13 |              3 |            2 |             2 |
|  18 |              1 |            2 |             2 |
| 101 |              4 |            2 |          NULL |
| 102 |              7 |            2 |          NULL |
+-----+----------------+--------------+---------------+
10 rows in set (0.001 sec)

Comment by Igor Babaev [ 2022-01-24 ]

Let's try to figure out why we have this problem. Let's get the output from EXPLAIN FORMAT=JSON for the query when 'split_materialized' is set to 'on':

 MariaDB [test]> EXPLAIN FORMAT=JSON
    -> SELECT
    ->     charges.id,
    ->     charges.from_ledger_id,
    ->     charges.to_ledger_id,
    ->     from_agg_items.num_rows AS from_num_rows
    -> FROM charges
    -> INNER JOIN (
    ->     SELECT
    ->         transactions.ledger_id,
    ->         transaction_items.charge_id,
    ->         count(*) as num_rows
    ->     FROM transaction_items
    ->     INNER JOIN transactions ON transaction_items.transaction_id = transactions.id
    ->     GROUP BY transactions.ledger_id, transaction_items.charge_id
    -> ) AS from_agg_items
    -> ON from_agg_items.charge_id = charges.id AND
    ->    from_agg_items.ledger_id = charges.from_ledger_id
    -> WHERE charges.to_ledger_id = 2;
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| EXPLAIN                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| {
  "query_block": {
    "select_id": 1,
    "table": {
      "table_name": "charges",
      "access_type": "ref",
      "possible_keys": ["PRIMARY", "fk_charge_from_ledger", "fk_charge_to_ledger"],
      "key": "fk_charge_to_ledger",
      "key_length": "8",
      "used_key_parts": ["to_ledger_id"],
      "ref": ["const"],
      "rows": 10,
      "filtered": 100
    },
    "table": {
      "table_name": "<derived2>",
      "access_type": "ref",
      "possible_keys": ["key0"],
      "key": "key0",
      "key_length": "17",
      "used_key_parts": ["ledger_id", "charge_id"],
      "ref": ["test.charges.from_ledger_id", "test.charges.id"],
      "rows": 2,
      "filtered": 100,
      "materialized": {
        "lateral": 1,
        "query_block": {
          "select_id": 2,
          "table": {
            "table_name": "transaction_items",
            "access_type": "ref",
            "possible_keys": ["fk_items_transaction", "fk_items_charge"],
            "key": "fk_items_charge",
            "key_length": "9",
            "used_key_parts": ["charge_id"],
            "ref": ["test.charges.id"],
            "rows": 2,
            "filtered": 100
          },
          "table": {
            "table_name": "transactions",
            "access_type": "eq_ref",
            "possible_keys": ["PRIMARY", "fk_transactions_ledger"],
            "key": "PRIMARY",
            "key_length": "8",
            "used_key_parts": ["id"],
            "ref": ["test.transaction_items.transaction_id"],
            "rows": 1,
            "filtered": 100
          }
        }
      }
    }
  }
} |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+

We see that aggregation of the rows of the lateral derived table does not require sorting. It means the optimizer thinks that lateral derived table contains only one row for each row from the left join operand. But this can be guaranteed only if both conditions

  from_agg_items.charge_id = charges.id
and
  from_agg_items.ledger_id = charges.from_ledger_id

are pushed into the derived table agg_items.
Yet we don't see it for the second condition.

Comment by Igor Babaev [ 2022-01-24 ]

When debugging we see that the condition

from_agg_items.ledger_id = charges.from_ledger_id

has been actually pushed into WHERE of the derived table, but later is removed by the code:

    if (left_item->type() == Item::FIELD_ITEM &&
        is_eq_cond_injected_for_split_opt((Item_func_eq *) cond) &&
        (!retain_ref_cond ||
         !test_if_ref(root_cond, (Item_field*) left_item,right_item)))
    {
      cond->marker=3;
      return (COND*) 0;
    }

(see sql_select.cc: make_cond_for_table_from_pred())
The code was added in the commit that tried to fix the bug MDEV-25128:

commit 480a06718d137c9ee7784012ccb609b9e79ff08c
Author:	Igor Babaev <igor@askmonty.org>  Tue Mar 23 20:54:54 2021
Committer:	Igor Babaev <igor@askmonty.org>  Tue Mar 23 20:54:54 2021
 
MDEV-25128 Wrong result from join with materialized semi-join and
           splittable derived

Comment by Igor Babaev [ 2022-01-25 ]

For the query using tables raw_a, raw_b the output from EXPLAIN FORMAT=JSON looks as follows when 'split_materialized' is set to 'on'.

EXPLAIN
{
  "query_block": {
    "select_id": 1,
    "table": {
      "table_name": "<derived3>",
      "access_type": "ALL",
      "rows": 27,
      "filtered": 100,
      "attached_condition": "a.`date` is not null and a.c is not null",
      "materialized": {
        "query_block": {
          "select_id": 3,
          "filesort": {
            "sort_key": "raw_a.`date`, raw_a.c",
            "temporary_table": {
              "table": {
                "table_name": "raw_a",
                "access_type": "ALL",
                "rows": 27,
                "filtered": 100,
                "attached_condition": "raw_a.`date` >= 20211231"
              }
            }
          }
        }
      }
    },
    "table": {
      "table_name": "<derived2>",
      "access_type": "ref",
      "possible_keys": ["key0"],
      "key": "key0",
      "key_length": "56",
      "used_key_parts": ["date", "c"],
      "ref": ["a.date", "a.c"],
      "rows": 2,
      "filtered": 100,
      "attached_condition": "a.`date` = b.`date` and a.c = b.c",
      "materialized": {
        "lateral": 1,
        "query_block": {
          "select_id": 2,
          "outer_ref_condition": "a.`date` is not null",
          "filesort": {
            "sort_key": "raw_b.`date`",
            "temporary_table": {
              "table": {
                "table_name": "raw_b",
                "access_type": "ref",
                "possible_keys": ["PRIMARY", "date"],
                "key": "PRIMARY",
                "key_length": "4",
                "used_key_parts": ["date"],
                "ref": ["a.date"],
                "rows": 1,
                "filtered": 100,
                "attached_condition": "raw_b.`date` <=> a.`date` and raw_b.`date` >= 20211231"
              }
            }
          }
        }
      }
    }
  }
}

Here we see that the condition and 'a.c = b.c' is not among the conjuncts of the where condition of the derived table, though the function remove_const() removes 'c' from the group list of the derived table 'b' considering a.c as a constant when joining rows of the lateral derived table.

Comment by Igor Babaev [ 2022-01-25 ]

When we return to the tree just before the commit 480a06718d137c9ee7784012ccb609b9e79ff08c we see that result sets for the above queries are correct. So this is a regression bug. The regression appeared in 10.3.29.

Comment by Oleksandr Byelkin [ 2022-01-25 ]

OK to push

Comment by Igor Babaev [ 2022-01-26 ]

A fix for this bug was pushed into 10.3.

Generated at Thu Feb 08 09:53:27 UTC 2024 using Jira 8.20.16#820016-sha1:9d11dbea5f4be3d4cc21f03a88dd11d8c8687422.