Details
-
Bug
-
Status: Open (View Workflow)
-
Major
-
Resolution: Unresolved
-
10.5
-
None
Description
create table ten(a int primary key); |
insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); |
 |
create table one_k(a int primary key); |
insert into one_k select A.a + B.a* 10 + C.a * 100 from ten A, ten B, ten C; |
create table t10 (a int, b int, c int); |
insert into t10 select a,a,a from one_k; |
create table t11 (a int, b int, c int); |
insert into t11 select a,a,a from one_k; |
explain select * from t10 where a in (select a from t11 where b<200);
|
+------+--------------+-------------+--------+---------------+--------------+---------+------+------+-------------+
|
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
|
+------+--------------+-------------+--------+---------------+--------------+---------+------+------+-------------+
|
| 1 | PRIMARY | t10 | ALL | NULL | NULL | NULL | NULL | 1000 | |
|
| 1 | PRIMARY | <subquery2> | eq_ref | distinct_key | distinct_key | 4 | func | 1 | |
|
| 2 | MATERIALIZED | t11 | ALL | NULL | NULL | NULL | NULL | 1000 | Using where |
|
+------+--------------+-------------+--------+---------------+--------------+---------+------+------+-------------+
|
3 rows in set (0.01 sec)
|
Note that ANALYZE FORMAT=JSON output doesn't include the time to do materialization:
analyze format=json select * from t10 where a in (select a from t11 where b<200)\G
|
*************************** 1. row ***************************
|
ANALYZE: {
|
"query_block": {
|
"select_id": 1,
|
"r_loops": 1,
|
"r_total_time_ms": 101.372774,
|
"table": {
|
"table_name": "t10",
|
"access_type": "ALL",
|
"r_loops": 1,
|
"rows": 1000,
|
"r_rows": 1000,
|
"r_table_time_ms": 23.69744705,
|
"r_other_time_ms": 8.525119753,
|
"filtered": 100,
|
"r_filtered": 100
|
},
|
"table": {
|
"table_name": "<subquery2>",
|
"access_type": "eq_ref",
|
"possible_keys": ["distinct_key"],
|
"key": "distinct_key",
|
"key_length": "4",
|
"used_key_parts": ["a"],
|
"ref": ["func"],
|
"r_loops": 1000,
|
"rows": 1,
|
"r_rows": 0.2,
|
"r_table_time_ms": 9.660541515,
|
"r_other_time_ms": 8.798404243,
|
"filtered": 100,
|
"r_filtered": 100,
|
"materialized": {
|
"unique": 1,
|
"query_block": {
|
"select_id": 2,
|
"table": {
|
"table_name": "t11",
|
"access_type": "ALL",
|
"r_loops": 1,
|
"rows": 1000,
|
"r_rows": 1000,
|
"r_table_time_ms": 37.91860276,
|
"r_other_time_ms": 12.66568104,
|
"filtered": 100,
|
"r_filtered": 20,
|
"attached_condition": "t11.b < 200"
|
}
|
}
|
}
|
}
|
}
|
}
|
The issue only affects semi-join. Let's try a non-semijoin materialization:
explain select * from t10 where a in (select a from t11 where b<200) or b <3;
|
+------+--------------+-------+------+---------------+------+---------+------+------+-------------+
|
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
|
+------+--------------+-------+------+---------------+------+---------+------+------+-------------+
|
| 1 | PRIMARY | t10 | ALL | NULL | NULL | NULL | NULL | 1000 | Using where |
|
| 2 | MATERIALIZED | t11 | ALL | NULL | NULL | NULL | NULL | 1000 | Using where |
|
+------+--------------+-------+------+---------------+------+---------+------+------+-------------+
|
analyze format=json select * from t10 where a in (select a from t11 where b<200) or b <3 \G
|
*************************** 1. row ***************************
|
ANALYZE: {
|
"query_block": {
|
"select_id": 1,
|
"r_loops": 1,
|
"r_total_time_ms": 124.4985902,
|
"table": {
|
"table_name": "t10",
|
"access_type": "ALL",
|
"r_loops": 1,
|
"rows": 1000,
|
"r_rows": 1000,
|
"r_table_time_ms": 24.17382037,
|
"r_other_time_ms": 36.00676106,
|
"filtered": 100,
|
"r_filtered": 20,
|
"attached_condition": "<in_optimizer>(t10.a,t10.a in (subquery#2)) or t10.b < 3"
|
},
|
"subqueries": [
|
{
|
"query_block": {
|
"select_id": 2,
|
"r_loops": 1,
|
"r_total_time_ms": 64.01030056,
|
"table": {
|
"table_name": "t11",
|
"access_type": "ALL",
|
"r_loops": 1,
|
"rows": 1000,
|
"r_rows": 1000,
|
"r_table_time_ms": 41.05231854,
|
"r_other_time_ms": 22.83537212,
|
"filtered": 100,
|
"r_filtered": 20,
|
"attached_condition": "t11.b < 200"
|
}
|
}
|
}
|
]
|
}
|
}
|
here, one can see that query_block with select_id=2 has r_total_time_ms.