|
Could you please add your .cnf file(s)? I could not repeat the issue:
MariaDB [test]> select version();
|
+----------------+
|
| version() |
|
+----------------+
|
| 10.8.4-MariaDB |
|
+----------------+
|
1 row in set (0,000 sec)
|
|
MariaDB [test]> CREATE TABLE Parent (ParentID int, Value1 int);
|
Query OK, 0 rows affected (0,053 sec)
|
|
MariaDB [test]> CREATE TABLE Child (ParentID int, ChildID int);
|
Query OK, 0 rows affected (0,033 sec)
|
|
MariaDB [test]> INSERT INTO Parent (ParentID, Value1)
|
-> VALUES(1, 1),
|
-> (2, NULL),
|
-> (3, 3),
|
-> (4, NULL),
|
-> (5, 5),
|
-> (6, 6),
|
-> (7, 1);
|
Query OK, 7 rows affected (0,012 sec)
|
Records: 7 Duplicates: 0 Warnings: 0
|
|
MariaDB [test]> INSERT INTO Child (ParentID, ChildID)
|
-> VALUES(1, 11),
|
-> (2, 21),
|
-> (2, 22),
|
-> (3, 31),
|
-> (3, 32),
|
-> (3, 33),
|
-> (4, 41),
|
-> (4, 42),
|
-> (4, 43),
|
-> (4, 44),
|
-> (6, 61),
|
-> (6, 62),
|
-> (6, 63),
|
-> (6, 64),
|
-> (6, 65),
|
-> (6, 66),
|
-> (7, 77);
|
Query OK, 17 rows affected (0,013 sec)
|
Records: 17 Duplicates: 0 Warnings: 0
|
|
MariaDB [test]> SELECT
|
-> `p_1`.`ParentID`,
|
-> `p_1`.`Value1`
|
-> FROM
|
-> `Parent` `p_2`
|
-> INNER JOIN `Child` `c_1` ON `p_2`.`ParentID` = `c_1`.`ParentID`
|
-> INNER JOIN `Parent` `p` ON `p_2`.`ParentID` = `p`.`ParentID`
|
-> INNER JOIN `Parent` `p_1` ON `p_2`.`ParentID` = `p_1`.`ParentID`
|
-> INNER JOIN `Child` `c4` ON `c4`.`ParentID` = `p_1`.`ParentID`
|
-> WHERE
|
-> EXISTS(
|
-> SELECT
|
-> *
|
-> FROM
|
-> `Parent` `p_3`
|
-> WHERE
|
-> EXISTS(
|
-> SELECT
|
-> *
|
-> FROM
|
-> `Child` `c_2`
|
-> WHERE
|
-> `c_2`.`ParentID` > 1 AND `c_2`.`ParentID` = `p_3`.`ParentID`
|
-> ) AND
|
-> `p_3`.`ParentID` = `p_1`.`ParentID`
|
-> ) AND
|
-> `c4`.`ParentID` % 2 = 0 AND
|
-> EXISTS(
|
-> SELECT
|
-> *
|
-> FROM
|
-> `Child` `c_3`
|
-> WHERE
|
-> `c_3`.`ParentID` > 1 AND `c_3`.`ParentID` = `p`.`ParentID`
|
-> ) AND
|
-> `c_1`.`ParentID` > 1 AND
|
-> EXISTS(
|
-> SELECT
|
-> *
|
-> FROM
|
-> `Child` `c_4`
|
-> WHERE
|
-> `c_4`.`ParentID` > 1 AND `c_4`.`ParentID` = `p_2`.`ParentID`
|
-> );
|
+----------+--------+
|
| ParentID | Value1 |
|
+----------+--------+
|
| 2 | NULL |
|
| 2 | NULL |
|
| 2 | NULL |
|
| 2 | NULL |
|
| 4 | NULL |
|
| 4 | NULL |
|
| 4 | NULL |
|
| 4 | NULL |
|
| 4 | NULL |
|
| 4 | NULL |
|
| 4 | NULL |
|
| 4 | NULL |
|
| 4 | NULL |
|
| 4 | NULL |
|
| 4 | NULL |
|
| 4 | NULL |
|
| 4 | NULL |
|
| 4 | NULL |
|
| 4 | NULL |
|
| 4 | NULL |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
| 6 | 6 |
|
+----------+--------+
|
56 rows in set (0,002 sec)
|
|
|
Sorry about that. Can confirm it is not reproduced with attached scripts, but still reproduced in our more rich environment. I will try to identify what is missing and report back a bit later.
|
|
Found it. Add following index: CREATE INDEX IX_ChildIndex ON Child (ParentID)
|
|
Thank you!
The bug is reproducible on non-debug builds on 10.6-10.9 with InnoDB, not on 10.3-10.5, not on 10.10, not with Aria/Myisam. On debug versions the results are correct.
The regression is after https://github.com/MariaDB/server/commit/64f24b776d commit "greedy_search() and best_extension_by_limited_search() scrambled table order"
--source include/have_innodb.inc
|
|
create table parent (parentid int, value1 int) engine=innodb;
|
|
create table child (parentid int, childid int) engine=innodb;
|
create index ix_childindex on child (parentid);
|
|
|
insert into parent (parentid, value1)
|
values(1, 1),
|
(2, null),
|
(3, 3),
|
(4, null),
|
(5, 5),
|
(6, 6),
|
(7, 1);
|
|
insert into child (parentid, childid)
|
values(1, 11),
|
(2, 21),
|
(2, 22),
|
(3, 31),
|
(3, 32),
|
(3, 33),
|
(4, 41),
|
(4, 42),
|
(4, 43),
|
(4, 44),
|
(6, 61),
|
(6, 62),
|
(6, 63),
|
(6, 64),
|
(6, 65),
|
(6, 66),
|
(7, 77);
|
|
select
|
count(`p_1`.`parentid`)
|
from
|
`parent` `p_2`
|
inner join `child` `c_1` on `p_2`.`parentid` = `c_1`.`parentid`
|
inner join `parent` `p` on `p_2`.`parentid` = `p`.`parentid`
|
inner join `parent` `p_1` on `p_2`.`parentid` = `p_1`.`parentid`
|
inner join `child` `c4` on `c4`.`parentid` = `p_1`.`parentid`
|
|
where exists
|
( select *
|
from `parent` `p_3`
|
where exists
|
( select *
|
from `child` `c_2`
|
where `c_2`.`parentid` > 1
|
and `c_2`.`parentid` = `p_3`.`parentid` )
|
and `p_3`.`parentid` = `p_1`.`parentid` )
|
and `c4`.`parentid` % 2 = 0
|
and exists
|
( select *
|
from `child` `c_3`
|
where `c_3`.`parentid` > 1
|
and `c_3`.`parentid` = `p`.`parentid` )
|
and `c_1`.`parentid` > 1
|
and exists
|
( select *
|
from `child` `c_4`
|
where `c_4`.`parentid` > 1
|
and `c_4`.`parentid` = `p_2`.`parentid` );
|
expected result - 56, wrong -71.
|
|
dluk, by the way, if you need a workaround, set optimizer_prune_level=1 should restore the old behavior, disabling the new optimization that fails to handle your query.
|
|
Analyzing...
Query structure:
p2 c_1 p p_1 c4
|
semi-join (/*id=2*/ p_3)
|
semi-join (/*id=4*/ c_3)
|
semi-join (/*id=5*/c_4)
|
(Note that the grand-child EXISTS subquery that selects from c_2 was not converted into an IN and so it was not converted to a semi-join, either).
EXPLAIN when it produces a wrong result:
+------+--------------------+-------------+--------+---------------+---------------+---------+-------------------+------+----------------------------------------------------------------+
|
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
|
+------+--------------------+-------------+--------+---------------+---------------+---------+-------------------+------+----------------------------------------------------------------+
|
| 1 | PRIMARY | p_2 | ALL | NULL | NULL | NULL | NULL | 7 | Using where |
|
| 1 | PRIMARY | <subquery5> | eq_ref | distinct_key | distinct_key | 4 | func | 1 | |
|
| 1 | PRIMARY | c_3 | ref | ix_childindex | ix_childindex | 5 | test.p_2.parentid | 1 | Using index; Start temporary |
|
| 1 | PRIMARY | c_1 | ref | ix_childindex | ix_childindex | 5 | test.p_2.parentid | 1 | Using index |
|
| 1 | PRIMARY | c4 | ref | ix_childindex | ix_childindex | 5 | test.p_2.parentid | 1 | Using index |
|
| 1 | PRIMARY | p | ALL | NULL | NULL | NULL | NULL | 7 | Using where; End temporary; Using join buffer (flat, BNL join) |
|
| 1 | PRIMARY | p_1 | ALL | NULL | NULL | NULL | NULL | 7 | Using where; Using join buffer (incremental, BNL join) |
|
| 1 | PRIMARY | <subquery2> | eq_ref | distinct_key | distinct_key | 4 | func | 1 | |
|
| 5 | MATERIALIZED | c_4 | range | ix_childindex | ix_childindex | 5 | NULL | 16 | Using where; Using index |
|
| 2 | MATERIALIZED | p_3 | ALL | NULL | NULL | NULL | NULL | 7 | Using where |
|
| 3 | DEPENDENT SUBQUERY | c_2 | ref | ix_childindex | ix_childindex | 5 | test.p_3.parentid | 1 | Using where; Using index |
|
+------+--------------------+-------------+--------+---------------+---------------+---------+-------------------+------+----------------------------------------------------------------+
|
Note that it uses an "uncommon" way to remove duplicates for c_3:
c_3 Start temporary
|
c_1
|
c4
|
p End temporary; Using join buffer
|
UPD: well, but the query plan still seems to be valid. The semi-join that contains c_3 is correlated with table p, that's why "End temporary" is at table p.
|
|
If I do
set join_cache_level=0;
|
set optimizer_switch='loosescan=off';
|
I get the same query plan but without the join buffer:
+------+--------------------+-------------+--------+---------------+---------------+---------+-------------------+------+------------------------------+
|
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
|
+------+--------------------+-------------+--------+---------------+---------------+---------+-------------------+------+------------------------------+
|
| 1 | PRIMARY | p_2 | ALL | NULL | NULL | NULL | NULL | 7 | Using where |
|
| 1 | PRIMARY | <subquery5> | eq_ref | distinct_key | distinct_key | 4 | func | 1 | |
|
| 1 | PRIMARY | c_3 | ref | ix_childindex | ix_childindex | 5 | test.p_2.parentid | 1 | Using index; Start temporary |
|
| 1 | PRIMARY | c_1 | ref | ix_childindex | ix_childindex | 5 | test.p_2.parentid | 1 | Using index |
|
| 1 | PRIMARY | c4 | ref | ix_childindex | ix_childindex | 5 | test.p_2.parentid | 1 | Using index |
|
| 1 | PRIMARY | p | ALL | NULL | NULL | NULL | NULL | 7 | Using where; End temporary |
|
| 1 | PRIMARY | p_1 | ALL | NULL | NULL | NULL | NULL | 7 | Using where |
|
| 1 | PRIMARY | <subquery2> | eq_ref | distinct_key | distinct_key | 4 | func | 1 | |
|
| 5 | MATERIALIZED | c_4 | range | ix_childindex | ix_childindex | 5 | NULL | 16 | Using where; Using index |
|
| 2 | MATERIALIZED | p_3 | ALL | NULL | NULL | NULL | NULL | 7 | Using where |
|
| 3 | DEPENDENT SUBQUERY | c_2 | ref | ix_childindex | ix_childindex | 5 | test.p_3.parentid | 1 | Using where; Using index |
|
+------+--------------------+-------------+--------+---------------+---------------+---------+-------------------+------+------------------------------+
|
The SELECT produces 56 rows (the correct number).
|
|
As for why debug build is different from release build:
The debug build uses a different query plan.
Comparing the traces. The first big difference (which eventually causes pruning to work for one binary but not for the other) is here:
--- trace-rel.txt
|
+++ trace-dbg.txt
|
...
|
"table": "c_4",
|
"best_access_path": {
|
"considered_access_paths": [
|
{
|
"access_type": "ref",
|
"index": "ix_childindex",
|
"used_range_estimates": false,
|
"reason": "not better than ref estimates",
|
- "rows": 1,
|
- "cost": 7.000939884,
|
+ "rows": 3,
|
+ "cost": 7.002819652,
|
"chosen": true
|
},
|
The numbers come from InnoDB's index statistics:
Release build:
MariaDB [test]> show keys from child;
|
+-------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+
|
| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Ignored |
|
+-------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+
|
| child | 1 | ix_childindex | 1 | parentid | A | 17 | NULL | NULL | YES | BTREE | | | NO |
|
+-------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+
|
Debug build:
MariaDB [test]> show keys from child;
|
+-------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+
|
| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Ignored |
|
+-------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+
|
| child | 1 | ix_childindex | 1 | parentid | A | 6 | NULL | NULL | YES | BTREE | | | NO |
|
+-------+------------+---------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+
|
|
|
Ok, I think I know where the problem is:
The issue occurs when DuplicateWeedout optimization is applied together with Join Buffer, and also there is an SJ-Materialization-lookup table in the join prefix.
The query plan of our query has <subquery5>+eq_ref before the "Start temporary"... "End temporary".
The "End temporary" comes together with "Using join buffer".
+------+-------------+-------------+--------+---------------+---------------+---------+--------------+------+----------------------------------------------------------------+
|
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
|
+------+-------------+-------------+--------+---------------+---------------+---------+--------------+------+----------------------------------------------------------------+
|
| 1 | PRIMARY | p_2 | ALL | NULL | NULL | NULL | NULL | 7 | Using where |
|
| 1 | PRIMARY | <subquery5> | eq_ref | distinct_key | distinct_key | 4 | func | 1 | |
|
| 1 | PRIMARY | c_3 | ref | ix_childindex | ix_childindex | 5 | p_2.parentid | 1 | Using index; Start temporary |
|
| 1 | PRIMARY | c_1 | ref | ix_childindex | ix_childindex | 5 | p_2.parentid | 1 | Using index |
|
| 1 | PRIMARY | c4 | ref | ix_childindex | ix_childindex | 5 | p_2.parentid | 1 | Using index |
|
| 1 | PRIMARY | p | ALL | NULL | NULL | NULL | NULL | 7 | Using where; End temporary; Using join buffer (flat, BNL join) |
|
I see that init_dups_weedout() includes the following rowids into the temp. table:
init_dups_weedout
|
adding: p_2
|
adding: sj-materialize // Questionable: it uses eq_ref and so isn't different.
|
skipping: c_3 // Right, it's in the subquery
|
adding: c_1
|
adding: c4
|
adding: p
|
It uses this loop:
for (JOIN_TAB *j=join->join_tab + first_table;
|
j < join->join_tab + first_table + n_tables; j++)
|
On the other hand, JOIN_CACHE::create_remaining_fields() prepares to store rowids for these tables:
JOIN_CACHE::create_remaining_fields
|
creating rowid for p_2
|
creating rowid for c_1
|
creating rowid for c4
|
It uses this loop:
for (tab= start_tab; tab != join_tab;
|
tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
|
Note that this loop does miss the <sj-materialize> table (due to WITHOUT_BUSH_ROOTS).
Looking at what SJ_TMP_TABLE::sj_weedout_check_row():
sj_weedout_check_row: OK
|
p_2:000000000201;
|
sj-materialize:38D409A8FF7F0000;
|
c_1:000000000208;
|
c4:000000000208;
|
p:000000000201;
|
That is, DuplicateWeedout code does include the <sj-materialize> table in the checks. But the join buffering code doesn't save/restore its rowid.
So, SJ-Materialization code takes whatever the last rowid was. If it happens to be incorrect one, we get duplicates
Example:
sj_weedout_check_row: DUP-2 p_2:000000000203;sj-materialize:38D409A8FF7F0000;c_1:00000000020D;c4:00000000020D;p:000000000203;
|
sj_weedout_check_row: OK p_2:000000000203;sj-materialize:48D409A8FF7F0000;c_1:00000000020D;c4:00000000020E;p:000000000203;
|
Note that the rowids of p_2 are the same, but the rowids of sj-materialize are different.
|
|
Another question is why does setup_semijoin_dups_elimination()/sj_table_is_included() doesn't recognize that the table <sj-materialize> uses eq_ref and so doesn't have to be in the DuplicateWeedout's rowid...
This is because eq_ref access for <sj-materialize> is constructed in setup_sj_materialization_part2(), which is called after
setup_semijoin_dups_elimination().
|
|
The patch is in: bb-10.9-mdev29382
Note that the problem actually affects earlier versions of MariaDB. We should probably fix them, too.
|
|
psergei
Here's a test case that demonstrates the problem for both innodb and myisam and for Debug and Release builds of 10.7 and 10.4 (other versions was not just checked).
--source include/have_sequence.inc
|
|
--echo #
|
--echo # MDEV-29382: Query returns wrong number of records
|
--echo #
|
|
create table t1 (parentid int, value1 int);
|
|
create table t2 (parentid int, childid int);
|
create index ix_childindex on t2 (parentid);
|
|
insert into t1 (parentid, value1)
|
values(1, 1),
|
(2, null),
|
(3, 3),
|
(4, null),
|
(5, 5),
|
(6, 6),
|
(7, 1);
|
|
insert into t1 select seq, 1 from seq_10_to_100;
|
|
insert into t2 (parentid, childid)
|
values(1, 11),
|
(2, 21),
|
(2, 22),
|
(3, 31),
|
(3, 32),
|
(3, 33),
|
(4, 41),
|
(4, 42),
|
(4, 43),
|
(4, 44),
|
(6, 61),
|
(6, 62),
|
(6, 63),
|
(6, 64),
|
(6, 65),
|
(6, 66),
|
(7, 77);
|
|
insert into t2 select seq, seq+100 from seq_10_to_100;
|
|
analyze table t1,t2;
|
|
set optimizer_prune_level=1;
|
|
let $q=
|
select
|
count(p.parentid)
|
from
|
t1 p_2
|
inner join t2 c_1 on p_2.parentid = c_1.parentid
|
inner join t1 p on p_2.parentid = p.parentid
|
inner join t2 c4 on c4.parentid = p_2.parentid
|
where
|
p.parentid in (
|
select c_3.parentid
|
from t2 c_3
|
where c_3.parentid > 1
|
)
|
and
|
c_1.parentid > 1
|
and
|
p_2.parentid in ( select c_4.parentid
|
from t2 c_4
|
where
|
c_4.parentid > 1 and c_4.childid < 100
|
)
|
;
|
|
eval explain $q;
|
eval $q;
|
show status like 'Last_query_cost';
|
|
|
--echo # check that the result set is as expected
|
set optimizer_switch='semijoin=off';
|
eval $q;
|
|
drop table t1,t2;
|
|
set optimizer_switch= default;
|
What I don't like about this test case is that uses set optimizer_prune_level=1.
Try to get rid of this setting using additional tables and indexes for them.
|
|
I cannot approve the patch yet. See my testcase and check it for 10.3.
|