Details
Description
create table t1 (a int); |
insert into t1 values (1),(2),(3); |
|
create table t2 (a int); |
insert into t2 values (10),(11); |
analyze format=json
|
select * from (select a from t1 union all select a from t2) TBL; |
Shows something like this:
"table": {
|
"table_name": "<derived2>",
|
"access_type": "ALL",
|
"r_loops": 1,
|
"rows": 5,
|
"r_rows": 5,
|
"r_table_time_ms": 0.039263025,
|
"r_other_time_ms": 0.028563309,
|
"filtered": 100,
|
"r_filtered": 100,
|
"materialized": {
|
"query_block": {
|
"union_result": {
|
"table_name": "<union2,3>",
|
"access_type": "ALL",
|
"r_loops": 0,
|
"r_rows": null,
|
note r_loops=0, r_rows:null. This is clearly incorrect.
Note that regular UNION (not UNION ALL) one gets correct result:
"materialized": {
|
"query_block": {
|
"union_result": {
|
"table_name": "<union2,3>",
|
"access_type": "ALL",
|
"r_loops": 1,
|
"r_rows": 5,
|
Attachments
Activity
Mysql-8 output for UNION:
| {
|
"query_block": {
|
"select_id": 1,
|
"cost_info": {
|
"query_cost": "3.06"
|
},
|
"table": {
|
"table_name": "TBL",
|
"access_type": "ALL",
|
"rows_examined_per_scan": 5,
|
"rows_produced_per_join": 5,
|
"filtered": "100.00",
|
"cost_info": {
|
"read_cost": "2.56",
|
"eval_cost": "0.50",
|
"prefix_cost": "3.06",
|
"data_read_per_join": "80"
|
},
|
"used_columns": [
|
"a"
|
],
|
"materialized_from_subquery": {
|
"using_temporary_table": true,
|
"dependent": false,
|
"cacheable": true,
|
"query_block": {
|
"union_result": {
|
"using_temporary_table": true,
|
"table_name": "<union2,3>",
|
"access_type": "ALL",
|
"query_specifications": [
|
{
|
"dependent": false,
|
"cacheable": true,
|
"query_block": {
|
"select_id": 2,
|
"cost_info": {
|
"query_cost": "0.55"
|
},
|
"table": {
|
"table_name": "t1",
|
"access_type": "ALL",
|
"rows_examined_per_scan": 3,
|
"rows_produced_per_join": 3,
|
"filtered": "100.00",
|
"cost_info": {
|
"read_cost": "0.25",
|
"eval_cost": "0.30",
|
"prefix_cost": "0.55",
|
"data_read_per_join": "24"
|
},
|
"used_columns": [
|
"a"
|
]
|
}
|
}
|
},
|
{
|
"dependent": false,
|
"cacheable": true,
|
"query_block": {
|
"select_id": 3,
|
"cost_info": {
|
"query_cost": "0.45"
|
},
|
"table": {
|
"table_name": "t2",
|
"access_type": "ALL",
|
"rows_examined_per_scan": 2,
|
"rows_produced_per_join": 2,
|
"filtered": "100.00",
|
"cost_info": {
|
"read_cost": "0.25",
|
"eval_cost": "0.20",
|
"prefix_cost": "0.45",
|
"data_read_per_join": "16"
|
},
|
"used_columns": [
|
"a"
|
]
|
}
|
}
|
}
|
]
|
}
|
}
|
}
|
}
|
}
|
} |
|
and for UNION ALL:
"query_block": {
|
"select_id": 1,
|
"cost_info": {
|
"query_cost": "3.06"
|
},
|
"table": {
|
"table_name": "TBL",
|
"access_type": "ALL",
|
"rows_examined_per_scan": 5,
|
"rows_produced_per_join": 5,
|
"filtered": "100.00",
|
"cost_info": {
|
"read_cost": "2.56",
|
"eval_cost": "0.50",
|
"prefix_cost": "3.06",
|
"data_read_per_join": "80"
|
},
|
"used_columns": [
|
"a"
|
],
|
"materialized_from_subquery": {
|
"using_temporary_table": true,
|
"dependent": false,
|
"cacheable": true,
|
"query_block": {
|
"union_result": {
|
"using_temporary_table": false,
|
"query_specifications": [
|
{
|
"dependent": false,
|
"cacheable": true,
|
"query_block": {
|
"select_id": 2,
|
"cost_info": {
|
"query_cost": "0.55"
|
},
|
"table": {
|
"table_name": "t1",
|
"access_type": "ALL",
|
"rows_examined_per_scan": 3,
|
"rows_produced_per_join": 3,
|
"filtered": "100.00",
|
"cost_info": {
|
"read_cost": "0.25",
|
"eval_cost": "0.30",
|
"prefix_cost": "0.55",
|
"data_read_per_join": "24"
|
},
|
"used_columns": [
|
"a"
|
]
|
}
|
}
|
},
|
{
|
"dependent": false,
|
"cacheable": true,
|
"query_block": {
|
"select_id": 3,
|
"cost_info": {
|
"query_cost": "0.45"
|
},
|
"table": {
|
"table_name": "t2",
|
"access_type": "ALL",
|
"rows_examined_per_scan": 2,
|
"rows_produced_per_join": 2,
|
"filtered": "100.00",
|
"cost_info": {
|
"read_cost": "0.25",
|
"eval_cost": "0.20",
|
"prefix_cost": "0.45",
|
"data_read_per_join": "16"
|
},
|
"used_columns": [
|
"a"
|
]
|
}
|
}
|
}
|
]
|
}
|
}
|
}
|
}
|
}
|
} |
|
The diff is:
28,30c27
|
< "using_temporary_table": true, |
< "table_name": "<union2,3>", |
< "access_type": "ALL", |
---
|
> "using_temporary_table": false, |
Regarding the second question "In the code that prints tabular explain, locate the position where it either prints or doesn't print the line that is the difference between UNION and UNION ALL":
sql_explain.cc int Explain_union::print_explain() |
<...>
|
if (!using_tmp)
|
return 0;
|
|
/* Print a line with "UNIT RESULT" */
|
List<Item> item_list;
|
Item *item_null= new (mem_root) Item_null(thd);
|
There's no such check in int Explain_union::print_explain_json() though.
sql_explain.cc int Explain_union::print_explain_json() |
<...>
|
make_union_table_name(table_name_buffer);
|
writer->add_member("table_name").add_str(table_name_buffer);
|
writer->add_member("access_type").add_str("ALL"); // not very useful
|
I suggest changing it in the following way:
sql_explain.cc int Explain_union::print_explain_json() |
<...>
|
if (using_tmp)
|
{
|
make_union_table_name(table_name_buffer);
|
writer->add_member("table_name").add_str(table_name_buffer);
|
writer->add_member("access_type").add_str("ALL"); // not very useful
|
}
|
I would also consider adding "using_temporary_table" field to explicitly display whether the temp table was created and to be more compatible with MySQL.
After closer examination I suggest to go even further and put "r_rows" and "r_loops" members under the same condition:
int Explain_union::print_explain_json() |
if (using_tmp)
|
{
|
make_union_table_name(table_name_buffer);
|
writer->add_member("table_name").add_str(table_name_buffer);
|
writer->add_member("access_type").add_str("ALL"); // not very useful
|
|
/* r_loops (not present in tabular output) */
|
if (is_analyze)
|
{
|
writer->add_member("r_loops").add_ll(
|
fake_select_lex_tracker.get_loops());
|
}
|
|
/* `r_rows` */
|
if (is_analyze)
|
{
|
writer->add_member("r_rows");
|
if (fake_select_lex_tracker.has_scans())
|
writer->add_double(fake_select_lex_tracker.get_avg_rows());
|
else
|
writer->add_null();
|
}
|
}
|
So "r_rows" and "r_loops" will not be printed in case when the temporary table is not created for union.
oleg.smirnov, ok please proceed to create a patch following the above suggestions.
I think it's a good idea to print (or not print) "using_temporary_table": true inside the "union_result" node.
I see that MySQL has two "using_temporary_table":
"materialized_from_subquery": {
|
"using_temporary_table": true,
|
...
|
"union_result": {
|
"using_temporary_table": true,
|
As far as I understand, one will always have using_temporary_table inside materialized_from_subquery...
Perhaps they've added that member only so that the user is able to see which parts of query use temp. tables...
I think the goal of "make EXPLAIN FORMAT=JSON indicate which parts of query plan create temp. tables" has some merit but it's outside of the scope of this MDEV.
A question to consider: what should fixVersion be? ANALYZE FORMAT=JSON was introduced in 10.3 ...
Looks like the implementation of ANALYZE FORMAT=JSON ignores the optimization introduced in https://jira.mariadb.org/browse/MDEV-334. Needs to be discussed, probably the problem is not only about "r_rows" and "r_loops" but the whole reflection of the UNION ALL optimization in the output.