|
A temporary workaround for this issue is to assign the value 'exists_to_in=off' to the option optimizer_switch. That is, just before run the statement
CALL qry_exporterImages_mariaDbTest(3569,4,628);
|
execute the following one
set optimizer_switch='exists_to_in=off';
|
|
|
The same issue happens in case the query containing inside a body of the stored procedure be run as a prepared statement.
|
|
The original query encapsulated inside the store route contains a query that uses mix of CTEs and views. Experiments with the original query shows that CTE doesn't affect on producing different result sets on first and second execution and can be eliminated.
Below is the simplified prepared statement that produces different result sets on first ans second execution.
DROP VIEW IF EXISTS `vu_productimages_retailergroup_mariaDbTest_1`;
|
|
CREATE VIEW `vu_productimages_retailergroup_mariaDbTest_1` AS
|
SELECT
|
`pimg`.`product_key` AS `product_key`,
|
1,
|
`prgi`.`image_order` AS `image_order`
|
FROM
|
(
|
(
|
`productimages_mariaDbTest` `pimg`
|
JOIN `product_retailer_group` `prg`
|
ON(`prg`.`product_key` = `pimg`.`product_key`)
|
)
|
JOIN `product_retailer_group_images` `prgi`
|
ON(`prgi`.`image_id` = `pimg`.`id` AND `prgi`.`prg_id` = `prg`.`id`)
|
)
|
UNION
|
SELECT
|
`pimg`.`product_key` AS `product_key`,
|
`pd`.`dealerID` AS `retailer_id`,
|
`pimg`.`image_order` AS `image_order`
|
FROM
|
(
|
`productimages_mariaDbTest` `pimg`
|
JOIN
|
(
|
SELECT `pdsub`.`dealerID` AS `dealerID`,`pm`.`product_key` AS `product_key`
|
FROM
|
(
|
`productdealers_mariaDbTest` `pdsub` JOIN `product_map_mariaDbTest` `pm`
|
ON(`pm`.`product_key` = `pdsub`.`product_key`)
|
)
|
) `pd`
|
ON(`pimg`.`product_key` = `pd`.`product_key`)
|
)
|
WHERE
|
! EXISTS
|
(
|
SELECT `pd2`.`id`,`pd2`.`product_key`
|
FROM
|
(
|
(
|
(
|
SELECT `pdsub`.`id` AS `id`,`pm`.`product_key` AS `product_key`
|
FROM
|
(
|
`productdealers_mariaDbTest` `pdsub`
|
JOIN `product_map_mariaDbTest` `pm`
|
ON(`pm`.`product_key` = `pdsub`.`product_key`)
|
)
|
) `pd2`
|
JOIN `product_retailer_group` `prg`
|
ON (`prg`.`product_key` = `pd2`.`product_key` )
|
)
|
JOIN `item_creation_retailer_group_retailers_mariaDbTest` `icrgr`
|
ON(`icrgr`.`group_id` = `prg`.`ic_retailer_group_id`)
|
)
|
WHERE
|
`pd2`.`product_key` = `pd`.`product_key`
|
AND
|
`icrgr`.`dealerid` = `pd`.`dealerID`
|
);
|
|
PREPARE stmt FROM "
|
SELECT
|
product_key,
|
image_order
|
FROM vu_productimages_retailergroup_mariaDbTest_1";
|
|
|
It is interesting that If comment out output of the second field (1 and `pd`.`dealerID` AS `retailer_id`) then result set is the same for both PS and regular mode and it differs from output without modification. (result set with modification contains 13 rows whereas output w/o modification contains 530 rows when is run in regular mode or on first execution in PS mode).
|
|
Below is the simplest test case where second execution of the Prepare Statement produces different result set than its first execution. Hope, this test case and the customer's bug have the same root cause.
|
--disable_warnings
|
DROP TABLE IF EXISTS t1, t2, t3, t4, t5;
|
--enable_warnings
|
|
CREATE TABLE t1 (
|
product_key int
|
);
|
|
INSERT INTO t1 VALUES (3569);
|
|
CREATE TABLE t2 (
|
id int,
|
product_key int,
|
dealerid int
|
);
|
INSERT INTO t2 VALUES
|
(16494, 3569, 4),
|
(16949, 3569, 112);
|
|
CREATE TABLE t3 (
|
product_key int
|
);
|
|
INSERT INTO t3 VALUES (3569);
|
|
CREATE TABLE t4 (
|
group_id int,
|
product_key int
|
);
|
|
INSERT INTO t4 VALUES (117, 3569);
|
|
CREATE TABLE t5 (
|
group_id int,
|
dealerid int
|
);
|
|
INSERT INTO t5 VALUES (117, 4);
|
|
PREPARE stmt FROM "
|
SELECT * FROM
|
(
|
t1
|
JOIN
|
(
|
SELECT t2.dealerid AS dealerid, t3.product_key AS product_key
|
FROM
|
(
|
t2 JOIN t3
|
ON(t3.product_key = t2.product_key)
|
)
|
) pd
|
ON(t1.product_key = pd.product_key)
|
)
|
WHERE
|
! EXISTS
|
(
|
SELECT pd2.id, pd2.product_key
|
FROM
|
(
|
(
|
(
|
SELECT t2.id AS id, t3.product_key AS product_key
|
FROM
|
(
|
t2 JOIN t3
|
ON(t3.product_key = t2.product_key)
|
)
|
) pd2
|
JOIN t4
|
ON (t4.product_key = pd2.product_key)
|
)
|
JOIN t5 icrgr
|
ON(icrgr.group_id = t4.group_id)
|
)
|
WHERE
|
pd2.product_key = pd.product_key
|
AND
|
icrgr.dealerid = pd.dealerid
|
)";
|
|
--echo # It is expected a result set containing only one record (3569, 112, 3569)
|
EXECUTE stmt;
|
--echo # The second execution of this Prepared Statement should produce the same results set
|
EXECUTE stmt;
|
|
--echo Clean up
|
DEALLOCATE PREPARE stmt;
|
DROP TABLE t1, t2, t3, t4, t5;
|
|
|
This is a simplified version of the test case above reproducing the problem:
CREATE TABLE t1 (
|
product_key int,
|
dealerid int
|
);
|
INSERT INTO t1 VALUES
|
(3569, 4),
|
(3569, 112);
|
|
CREATE TABLE t2 (
|
id int,
|
product_key int
|
);
|
INSERT INTO t2 VALUES
|
(16494, 3569),
|
(16494, 3569);
|
|
CREATE TABLE t3 (
|
dealerid int
|
);
|
INSERT INTO t3 VALUES (4), (5);
|
|
set optimizer_switch='subquery_cache=off';
|
|
EXPLAIN EXTENDED
|
SELECT * FROM t1
|
WHERE
|
! EXISTS
|
(
|
SELECT dt.id
|
FROM (SELECT id, product_key FROM t2) dt, t3
|
WHERE
|
dt.product_key = t1.product_key AND
|
t3.dealerid = t1.dealerid
|
);
|
|
SELECT * FROM t1
|
WHERE
|
! EXISTS
|
(
|
SELECT dt.id
|
FROM (SELECT id, product_key FROM t2) dt, t3
|
WHERE
|
dt.product_key = t1.product_key AND
|
t3.dealerid = t1.dealerid
|
);
|
|
PREPARE stmt FROM "
|
SELECT * FROM t1
|
WHERE
|
! EXISTS
|
(
|
SELECT dt.id
|
FROM (SELECT id, product_key FROM t2) dt, t3
|
WHERE
|
dt.product_key = t1.product_key AND
|
t3.dealerid = t1.dealerid
|
)";
|
|
EXECUTE stmt;
|
|
EXECUTE stmt;
|
|
DEALLOCATE PREPARE stmt;
|
|
DROP TABLE t1,t2,t3;
|
|
|
Let's consider how we come to wrong results from the second execution of PS/SP on the following simple test case:
|
CREATE TABLE t1 (a int, b int);
|
INSERT INTO t1 VALUES (3569, 4), (3569, 112);
|
|
CREATE TABLE t2 (c int, a int);
|
INSERT INTO t2 VALUES (16494, 3569), (16494, 3569);
|
|
PREPARE stmt FROM "
|
SELECT * FROM t1
|
WHERE EXISTS (SELECT dt.c FROM (SELECT c, a FROM t2) dt WHERE dt.a = t1.a)
|
";
|
EXECUTE stmt;
|
EXECUTE stmt;
|
The EXECUTE comands return correspondingly
MariaDB [test]> EXECUTE stmt;
|
+------+------+
|
| a | b |
|
+------+------+
|
| 3569 | 4 |
|
| 3569 | 112 |
|
+------+------+
|
2 rows in set
|
|
MariaDB [test]> EXECUTE stmt;
|
Empty set
|
At the first execution of the query some permanent transformation of it are performed.
The first transformation is re-writing the EXISTS sbbquery into an IN subquery. The transformation is performed by the call conds->walk(&Item::exists2in_processor, 0, thd). Before the call the query tree looks like this:
"select t1.a AS a,t1.b AS b from t1
|
where <in_optimizer>(1,exists(select t2.c from (select t2.c AS c,t2.a AS a from t2) dt where t2.a = t1.a limit 1))"
|
Here t2.c from the select list of the exists subquery and t2.a from the where condition of this subquery are
actually the objects of the type Item_direct_view_ref constructed when dt.c and dt.a were resolved against the derived table dt. We should be noted here that both objects are allocated not on the statement memory, but on the execution memory and they refer to some items in the translation table constructed on the statement memory. This doesn't look good.
Let ref(dt.c) be the pointer in the item_list of the original exists subquery. After the prepare phase this pointer refers to the Item_direct_view_ref(dt.c). As the latter is allocated on the execution memory the change must be rolled back after the execution of the query. The location of the pointer ref(dt.c) is in the statement query. If some permanent transformation changes the pointer in this location then the roll back operation effectively cancels this change breaking the result of the transformation.
During the exists_to_in transformation the subquery
"select t2.c from (select t2.c AS c,t2.a AS a from t2) dt where t2.a = t1.a limit 1"
|
is converted into the subquery
"select t2.a from (select t2.c AS c,t2.a AS a from t2) dt where 1 limit 1"
|
Note that the Item_direct_view_ref object t2.c from the select list of the subquery is replaced with the Item_direct_view_ref object t2.a and this replacement is supposed to be permanent as the transformation is permanent. So pointer ref(dt.c) is preplaced with the pointer ref(dt.a)
After the transformation of the query looks like this
select t1.a AS a,t1.b AS b from t1
|
where <in_optimizer>(t1.a,t1.a in (select t2.a from (select t2.c AS c,t2.a AS a from t2) dt where 1))"
|
The next transformation is applied to the the subquery in the IN predicate. The derived table used in the subquery is merged into the the subquery and we get
"select t2.a from (t2) where 1"
|
Finally conversion to a semi-join is applied to the IN predicate. As a result the main query is transformed into:
"select t1.a AS a,t1.b AS b from t1 semi join (t2) where t1.a=t2.a
|
Here (t2) is actually semi-join nest "select t2.a from t2".
At the end of the execution roll-back operation replaces t2.a with dt.c in the above the above subquery.
At the second execution dt.c is resolved against t2.c and the materialization of the semi-join chosen by the planner is performed not for "select t2.a from t2", but rather for "select t2.c from t2".
|