Details
-
Bug
-
Status: Closed (View Workflow)
-
Major
-
Resolution: Incomplete
-
10.5
Description
This is to track analysis of a join optimization problem reported privately. (Will make the take-aways public as soon as one can separate them from private user data)
Attachments
Activity
ISSUE1
Occurs easily when ref access uses range access' estimates (used_range_estimates=true). They scan the same # rows, and ref access scans one range while range access in general scans more than one range. Putting this into the cost formula, we get the higher cost for range access.
ISSUE2
Is a known (and big) issue. It is filed as MDEV-23707 (not sure which one is the primary, either that one or some of the linked MDEVs). It should be fixed but the fix for it will be large and probably too intrusive to put into stable versions.
ISSUE3
Is something new. I'll investigate it.
I can reproduce it locally.
For the derived table, I get:
"considered_execution_plans": [
|
{
|
"plan_prefix": [],
|
"table": "t",
|
"best_access_path": {
|
"considered_access_paths": [
|
{
|
"access_type": "ref",
|
"index": "PRIMARY",
|
"used_range_estimates": true,
|
"rows": 70,
|
"cost": 1.094197968,
|
"chosen": true
|
},
|
{
|
"access_type": "range",
|
"resulting_rows": 70,
|
"cost": 14.98919797,
|
"chosen": false
|
}
|
],
|
"chosen_access_method": {
|
"type": "ref",
|
"records": 70,
|
"cost": 1.094197968,
|
"uses_join_buffering": false
|
}
|
},
|
"rows_for_plan": 70,
|
"cost_for_plan": 15.09419797,
|
"selectivity": 7.256593e-4,
|
"estimated_join_cardinality": 0.050796152
|
}
|
]
|
and this causes the optimization to go bad in the upper query:
"considered_execution_plans": [
|
{
|
"plan_prefix": [],
|
"table": "<derived2>",
|
"best_access_path": {
|
"considered_access_paths": [
|
{
|
"access_type": "scan",
|
"resulting_rows": 0,
|
"cost": 85.14490999,
|
"chosen": true
|
}
|
],
|
"chosen_access_method": {
|
"type": "scan",
|
"records": 0,
|
"cost": 85.14490999,
|
"uses_join_buffering": false
|
}
|
},
|
"rows_for_plan": 0,
|
"cost_for_plan": 85.14490999,
|
....
"rest_of_plan": [
|
{
|
"plan_prefix": ["<derived2>"],
|
"table": "t10",
|
"best_access_path": {
|
"considered_access_paths": [
|
{
|
"access_type": "ref",
|
"index": "a",
|
"used_range_estimates": false,
|
"cause": "not available",
|
"rows": 1,
|
"cost": 0,
|
Debugging the way the estimates are computed for the derived tables
The first piece:
|
(gdb) wher
|
#0 st_select_lex::increase_derived_records (this=0x7ffe64015ab0, records=70) at /home/psergey/dev-git/10.5/sql/sql_lex.cc:5534
|
#1 0x0000555555eb4dab in make_join_statistics (join=0x7ffe6496d7b0, tables_list=..., keyuse_array=0x7ffe6496daa0) at /home/psergey/dev-git/10.5/sql/sql_select.cc:5580
|
#2 0x0000555555ea885b in JOIN::optimize_inner (this=0x7ffe6496d7b0) at /home/psergey/dev-git/10.5/sql/sql_select.cc:2251
|
#3 0x0000555555ea6379 in JOIN::optimize (this=0x7ffe6496d7b0) at /home/psergey/dev-git/10.5/sql/sql_select.cc:1627
|
#4 0x0000555555dff102 in mysql_derived_optimize (thd=0x7ffe64000d78, lex=0x7ffe64004e50, derived=0x7ffe64019480) at /home/psergey/dev-git/10.5/sql/sql_derived.cc:1026
|
#5 0x0000555555dfcad0 in mysql_handle_single_derived (lex=0x7ffe64004e50, derived=0x7ffe64019480, phases=4) at /home/psergey/dev-git/10.5/sql/sql_derived.cc:206
|
#6 0x0000555555ea7e92 in JOIN::optimize_inner (this=0x7ffe6496d158) at /home/psergey/dev-git/10.5/sql/sql_select.cc:2083
|
#7 0x0000555555ea6379 in JOIN::optimize (this=0x7ffe6496d158) at /home/psergey/dev-git/10.5/sql/sql_select.cc:1627
|
The loop in make_join_statistics doesn't care about selectivities:
/* |
Calculate estimated number of rows for materialized derived
|
table/view.
|
*/
|
for (i= 0; i < join->table_count ; i++) |
if (double rr= join->best_positions[i].records_read) |
records= COST_MULT(records, rr);
|
ha_rows rows= records > (double) HA_ROWS_MAX ? HA_ROWS_MAX : (ha_rows) records; |
The value is stored in SELECT_LEX::result->est_records.
Then, we get here:
(gdb) wher
|
#0 TABLE_LIST::fetch_number_of_rows (this=0x7ffe64019480) at /home/psergey/dev-git/10.5/sql/table.cc:9308
|
#1 0x0000555555eb20c6 in make_join_statistics (join=0x7ffe6496d158, tables_list=..., keyuse_array=0x7ffe6496d448) at /home/psergey/dev-git/10.5/sql/sql_select.cc:4886
|
#2 0x0000555555ea885b in JOIN::optimize_inner (this=0x7ffe6496d158) at /home/psergey/dev-git/10.5/sql/sql_select.cc:2251
|
#3 0x0000555555ea6379 in JOIN::optimize (this=0x7ffe6496d158) at /home/psergey/dev-git/10.5/sql/sql_select.cc:1627
|
#4 0x0000555555eb15b7 in mysql_select (thd=0x7ffe64000d78, tables=0x7ffe64019480, fields=..., conds=0x7ffe6401a530, og_num=0, order=0x0, group=0x0, having=0x0, proc_param=0x0, select_options=2147748612, result=0x7ffe6401b070, unit=0x7ffe64004f18, select_lex=0x7ffe640154f8) at /home/psergey/dev-git/10.5/sql/sql_select.cc:4644
|
#5 0x0000555555eeff9c in mysql_explain_union (thd=0x7ffe64000d78, unit=0x7ffe64004f18, result=0x7ffe6401b070) at /home/psergey/dev-git/10.5/sql/sql_select.cc:27220
|
and do this:
if (is_materialized_derived() && !fill_me) |
{
|
table->file->stats.records= get_unit()->result->est_records;
|
set_if_bigger(table->file->stats.records, 2);
|
table->used_stat_records= table->file->stats.records;
|
and store E(#records) in table->used_stat_records.
#0 0x0000555555eca82a in st_join_table::scan_time (this=0x7ffe64975fb0) at /home/psergey/dev-git/10.5/sql/sql_select.cc:13510
|
#1 0x0000555555eb3f71 in make_join_statistics (join=0x7ffe6496d158, tables_list=..., keyuse_array=0x7ffe6496d448) at /home/psergey/dev-git/10.5/sql/sql_select.cc:5385
|
#2 0x0000555555ea885b in JOIN::optimize_inner (this=0x7ffe6496d158) at /home/psergey/dev-git/10.5/sql/sql_select.cc:2251
|
#3 0x0000555555ea6379 in JOIN::optimize (this=0x7ffe6496d158) at /home/psergey/dev-git/10.5/sql/sql_select.cc:1627
|
#4 0x0000555555eb15b7 in mysql_select (thd=0x7ffe64000d78, tables=0x7ffe64019480, fields=..., conds=0x7ffe6401a530, og_num=0, order=0x0, group=0x0, having=0x0, proc_param=0x0, select_options=2147748612, result=0x7ffe6401b070, unit=0x7ffe64004f18, select_lex=0x7ffe640154f8) at /home/psergey/dev-git/10.5/sql/sql_select.cc:4644
|
sets this:
found_records= records=table->stat_records();
|
Then we get here:
(gdb) wher
|
#0 st_join_table::choose_best_splitting (this=0x7ffe64974160, record_count=1, remaining_tables=3) at /home/psergey/dev-git/10.5/sql/opt_split.cc:1017
|
#1 0x0000555555eb99bc in best_access_path (join=0x7ffe6496d158, s=0x7ffe64974160, remaining_tables=3, join_positions=0x7ffe64974ad0, idx=0, disable_jbuf=true, record_count=1, pos=0x7ffe64974ad0, loose_scan_pos=0x7fffdc1abf60) at /home/psergey/dev-git/10.5/sql/sql_select.cc:7360
|
#2 0x0000555555ebfff9 in best_extension_by_limited_search (join=0x7ffe6496d158, remaining_tables=3, idx=0, record_count=1, read_time=0, search_depth=62, prune_level=1, use_cond_selectivity=4) at /home/psergey/dev-git/10.5/sql/sql_select.cc:9541
|
#3 0x0000555555ebdf7c in greedy_search (join=0x7ffe6496d158, remaining_tables=3, search_depth=62, prune_level=1, use_cond_selectivity=4) at /home/psergey/dev-git/10.5/sql/sql_select.cc:8745
|
#4 0x0000555555ebd1c9 in choose_plan (join=0x7ffe6496d158, join_tables=3) at /home/psergey/dev-git/10.5/sql/sql_select.cc:8310
|
#5 0x0000555555eb4b4f in make_join_statistics (join=0x7ffe6496d158, tables_list=..., keyuse_array=0x7ffe6496d448) at /home/psergey/dev-git/10.5/sql/sql_select.cc:5550
|
#6 0x0000555555ea885b in JOIN::optimize_inner (this=0x7ffe6496d158) at /home/psergey/dev-git/10.5/sql/sql_select.cc:2251
|
#7 0x0000555555ea6379 in JOIN::optimize (this=0x7ffe6496d158) at /home/psergey/dev-git/10.5/sql/sql_select.cc:1627
|
and execute this:
records= (ha_rows)spl_opt_info->unsplit_card;
and records becomes 0.
More details: spl_opt_info->unsplit_card has type double while JOIN_TAB::records is of type integer.
Part #1: http://lists.askmonty.org/pipermail/commits/2021-January/014450.html . I think, this should go into any stable version of MariaDB.
Part #2: http://lists.askmonty.org/pipermail/commits/2021-January/014451.html - this should go into the development version (10.6)
Then
{
"plan_prefix": [],
"table": "t",
"best_access_path": {
"considered_access_paths": [
{
"access_type": "ref",
"index": "PRIMARY",
"used_range_estimates": true,
"rows": 35,
"cost": 1.086814621,
"chosen": true
},
{
"access_type": "range",
"resulting_rows": 35,
"cost": 8.106814621,
"chosen": false
}
],
"chosen_access_method": {
"type": "ref",
"records": 35,
"cost": 1.086814621,
"uses_join_buffering": false
}
},
"rows_for_plan": 35,
"cost_for_plan": 8.086814621,
"selectivity": 0.013565891,
"estimated_join_cardinality": 0.474806202
}
Three issues: