Details
-
Bug
-
Status: Stalled (View Workflow)
-
Critical
-
Resolution: Unresolved
-
None
-
None
-
None
Description
The query has many subqueries with EXISTS (SELECT ...)
10.5 can handle all 26 of these.
10.11 can reach up to 11 of these at which point it gets exponentially slower for each subquery added.
Attachments
- 1.zip
- 73 kB
- CS0757389_TEST.zip
- 559 kB
- otf.json
- 71 kB
- ots.json
- 1.02 MB
Activity
We can simplify our test query to
CREATE TABLE t1 ( t1a int, PRIMARY KEY (t1a)); |
insert into t1 values (1), (2), (4); |
|
CREATE TABLE t2 ( t2a int, PRIMARY KEY (t2a)); |
insert into t2 values (4), (5), (8); |
|
analyze table t1,t2; |
|
CREATE ALGORITHM = TEMPTABLE VIEW v1 AS |
SELECT t2.t2a |
FROM t2 JOIN t1 on t2.t2a = t1.t1a; |
|
CREATE ALGORITHM = TEMPTABLE VIEW v2 AS |
SELECT t2.t2a |
FROM t2 JOIN t1 on t2.t2a = t1.t1a; |
|
SELECT count(v1.t2a) |
FROM v1 |
JOIN v2 a on v1.t2a = a.t2a |
JOIN v2 b on v1.t2a = b.t2a |
JOIN v2 c on v1.t2a = c.t2a |
JOIN v2 d on v1.t2a = d.t2a |
JOIN v2 e on v1.t2a = e.t2a |
JOIN v2 f on v1.t2a = f.t2a |
JOIN v2 g on v1.t2a = g.t2a; |
and we note that this still takes a few seconds on 10.6 after commit 432a4ebe5cd2ebf4d0fad79092e82e5d1a9f53ba
If we materialize the tables used, like this
create table tv1 AS |
SELECT t2.t2a |
FROM t2 JOIN t1 on t2.t2a = t1.t1a; |
|
create table tv2 AS |
SELECT t2.t2a |
FROM t2 JOIN t1 on t2.t2a = t1.t1a; |
|
SELECT count(tv1.t2a) |
FROM tv1 |
JOIN tv2 a on tv1.t2a = a.t2a |
JOIN tv2 b on tv1.t2a = b.t2a |
JOIN tv2 c on tv1.t2a = c.t2a |
JOIN tv2 d on tv1.t2a = d.t2a |
JOIN tv2 e on tv1.t2a = e.t2a |
JOIN tv2 f on tv1.t2a = f.t2a |
JOIN tv2 g on tv1.t2a = g.t2a; |
we see runs 3 orders of magnitude faster.
Attached is 2 optimizer traces, one for the former (ots.json), one the latter (otf.json).
An observable difference lies in the element best_access_path
{\
|
"plan_prefix": [\
|
"tv1",\
|
"a",\
|
"b",\
|
"c",\
|
"d",\
|
"e",\
|
"f",\
|
"g"\
|
],\
|
"table": "i",\
|
"best_access_path": {\
|
"considered_access_paths": [\
|
{\
|
"access_type": "scan",\
|
"resulting_rows": 1,\
|
"cost": 1,\
|
"chosen": true\
|
}\
|
],\
|
"chosen_access_method": {\
|
"type": "scan",\
|
"records": 1,\
|
"cost": 1,\
|
"uses_join_buffering": true\
|
}\
|
},\
|
"rows_for_plan": 1,\
|
"cost_for_plan": 10.8,\
|
"pruned_by_heuristic": true\
|
}\
|
vs
"plan_prefix": [\
|
"<derived2>",\
|
"<derived3>",\
|
"<derived4>",\
|
"<derived5>",\
|
"<derived6>",\
|
"<derived7>",\
|
"<derived8>",\
|
"<derived9>"\
|
],\
|
"table": "<derived10>",\
|
"best_access_path": {\
|
"considered_access_paths": [\
|
{\
|
"access_type": "ref",\
|
"index": "key0",\
|
"rec_per_key_stats_missing": true,\
|
"used_range_estimates": false,\
|
"cause": "not available",\
|
"rows": 10.17857143,\
|
"cost": 35496856021,\
|
"chosen": true\
|
},\
|
{\
|
"access_type": "ref",\
|
"index": "key1",\
|
"rec_per_key_stats_missing": true,\
|
"used_range_estimates": false,\
|
"cause": "not available",\
|
"rows": 10.17857143,\
|
"cost": 35496856021,\
|
"chosen": false,\
|
"cause": "cost"\
|
},\
|
{\
|
"access_type": "ref",\
|
"index": "key2",\
|
"rec_per_key_stats_missing": true,\
|
"used_range_estimates": false,\
|
"cause": "not available",\
|
"rows": 10.17857143,\
|
"cost": 35496856021,\
|
"chosen": false,\
|
"cause": "cost"\
|
},\
|
{\
|
"access_type": "ref",\
|
"index": "key3",\
|
"rec_per_key_stats_missing": true,\
|
"used_range_estimates": false,\
|
"cause": "not available",\
|
"rows": 10.17857143,\
|
"cost": 35496856021,\
|
"chosen": false,\
|
"cause": "cost"\
|
},\
|
{\
|
"access_type": "ref",\
|
"index": "key4",\
|
"rec_per_key_stats_missing": true,\
|
"used_range_estimates": false,\
|
"cause": "not available",\
|
"rows": 10.17857143,\
|
"cost": 35496856021,\
|
"chosen": false,\
|
"cause": "cost"\
|
},\
|
{\
|
"access_type": "ref",\
|
"index": "key5",\
|
"rec_per_key_stats_missing": true,\
|
"used_range_estimates": false,\
|
"cause": "not available",\
|
"rows": 10.17857143,\
|
"cost": 35496856021,\
|
"chosen": false,\
|
"cause": "cost"\
|
},\
|
{\
|
"access_type": "ref",\
|
"index": "key6",\
|
"rec_per_key_stats_missing": true,\
|
"used_range_estimates": false,\
|
"cause": "not available",\
|
"rows": 10.17857143,\
|
"cost": 35496856021,\
|
"chosen": false,\
|
"cause": "cost"\
|
},\
|
{\
|
"access_type": "ref",\
|
"index": "key7",\
|
"rec_per_key_stats_missing": true,\
|
"used_range_estimates": false,\
|
"cause": "not available",\
|
"rows": 10.17857143,\
|
"cost": 35496856021,\
|
"chosen": false,\
|
"cause": "cost"\
|
},\
|
{\
|
"access_type": "scan",\
|
"resulting_rows": 285,\
|
"cost": 1171397025,\
|
"chosen": false\
|
}\
|
],\
|
"chosen_access_method": {\
|
"type": "ref",\
|
"records": 10.17857143,\
|
"cost": 35496856021,\
|
"uses_join_buffering": false\
|
}\
|
},\
|
So 432a4ebe5cd2ebf4d0fad79092e82e5d1a9f53ba is the problem commit
Before this commit
in best_extension_by_limited_search
we hit
trace_one_table.add("pruned_by_heuristic", true);
at recursion level 9
we are looking for next table,
evaluating first table at position 9
│ 10031 if (best_record_count >= current_record_count && │
|
│ 10032 best_read_time >= current_read_time && │
|
│ 10033 /* TODO: What is the reasoning behind this condition? */ │
|
│ 10034 (!(s->key_dependent & allowed_tables & remaining_tables) || │
|
│ 10035 join->positions[idx].records_read < 2.0)) │
|
│ 10036 { │
|
│ > 10037 best_record_count= current_record_count; │
|
│ 10038 best_read_time= current_read_time; │
|
│ 10039 }
|
this happens, context
(gdb) p s->table->alias.Ptr
$12 = 0x7fa90c02b420 "v2"
(gdb) p s->key_dependent
$13 = 0
so next look through this
│ 9960 for (pos= join->best_ref + idx ; (s= *pos) ; pos++) │
(gdb) p (*pos)>table>alias.Ptr
$16 = 0x7fa90c02b420 "v2"
(they are all v2)
then this happens
│ 10026 if (best_record_count > current_record_count ||
|
│ 10027 best_read_time > current_read_time || │
|
│ 10028 (idx == join->const_tables && // 's' is the first table in the QEP │
|
│ 10029 s->table == join->sort_by_table)) │
|
│ 10030 { │
|
│ 10031 if (best_record_count >= current_record_count && │
|
│ 10032 best_read_time >= current_read_time && │
|
│ 10033 /* TODO: What is the reasoning behind this condition? */ │
|
│ 10034 (!(s->key_dependent & allowed_tables & remaining_tables) || │
|
│ 10035 join->positions[idx].records_read < 2.0)) │
|
│ 10036 { │
|
│ 10037 best_record_count= current_record_count; │
|
│ 10038 best_read_time= current_read_time; │
|
│ 10039 } │
|
│ 10040 } │
|
│ 10041 else │
|
│ 10042 { │
|
│B+ 10043 DBUG_EXECUTE("opt", print_plan(join, idx+1, │
|
│ 10044 current_record_count, │
|
│ 10045 read_time, │
|
│ 10046 current_read_time, │
|
│ 10047 "pruned_by_heuristic");); │
|
│ > 10048 trace_one_table.add("pruned_by_heuristic", true); │
|
│ 10049 restore_prev_nj_state(s); │
|
│ 10050 restore_prev_sj_state(remaining_tables, s, idx); │
|
│ 10051 continue; │
|
│ 10052 } │
|
│ 10053 }
|
and the execution is fast.
s->key_dependent is NEVER set to anything after make_join_statistics()
│ > 5230 bzero((void*)stat, sizeof(JOIN_TAB)* table_count);
|
after commit 432a4ebe5cd2ebf4d0fad79092e82e5d1a9f53ba
we get down to here
│ > 10205 if (best_record_count > current_record_count || │
|
│ 10206 best_read_time > current_read_time || │
|
│ 10207 (idx == join->const_tables && // 's' is the first table in the QEP │
|
│ 10208 s->table == join->sort_by_table)) │
|
│ 10209 {
|
(gdb) p best_record_count
$3 = 1.7976931348623157e+308
(gdb) p current_record_count
$4 = 285
then
│ 10218 if (best_record_count >= current_record_count &&
|
│ 10219 best_read_time >= current_read_time &&
|
│B+> 10220 (!(position->key_dependent & allowed_tables) ||
|
│ 10221 position->records_read < 2.0))
|
│ 10222 {
|
│ 10223 best_record_count= current_record_count;
|
│ 10224 best_read_time= current_read_time;
|
│ 10225 }
|
│ 10226 }
|
(gdb) p position->key_dependent & allowed_tables
$6 = 2
so best_record_count is never set at this position (9)
position->key_dependent set here
│ > 8643 pos->key_dependent= (best_type == JT_EQ_REF ? (table_map) 0 :
|
│ 8644 key_dependent & remaining_tables);
|
key_dependent set in best_access_path()
│ 7898 if (all_parts & 1)
|
│ > 7899 key_dependent|= key_parts_dependent;
|
key_parts_dependent set earlier
│ 7890 else if (!(found_part & keyuse->keypart_map))
|
│ > 7891 key_parts_dependent|= keyuse->used_tables;
|
(gdb) p keyuse->used_tables
$17 = 2
(gdb) p dbp(keyuse)
$18 = 0x556e2247c020 <dbug_big_buffer> "TABLE:v2,Name:key1,length:7,usable parts:1,parts:1,(KEY_PART_INFO *):
Value:[(FIELD_ITEM*)<maybe_null><fixed>(`v2`.`id2`)]"
this is set in add_key_use() here
(gdb) p key_field->val
$25 = (Item_field *) 0x7f0d88029e28
(gdb) p dbp(key_field->val)
$26 = 0x556e2247c020 <dbug_big_buffer> "0x7f0d82034b68:
"
(gdb) p key_field->val->used_tables()
$27 = 2
(gdb) p key_field->val->field->table->map
$31 = 2
this set in TABLE_LIST::set_tablenr()
│ > 2381 table->map= table_map(1) << new_tablenr; │
|
Back to question....
│ 10218 if (best_record_count >= current_record_count && │
|
│ 10219 best_read_time >= current_read_time && │
|
│B+> 10220 (!(position->key_dependent & allowed_tables) || │
|
│ 10221 position->records_read < 2.0)) │
|
│ 10222 {
|
Why this test?
possible commit after which we are slower
https://github.com/MariaDB/server/commit/432a4ebe5cd2ebf4d0fad79092e82e5d1a9f53ba
│ 10218 if (best_record_count >= current_record_count &&
│ 10219 best_read_time >= current_read_time &&
│B+> 10220 (!(position->key_dependent & allowed_tables) ||
│ 10221 position->records_read < 2.0))
│ 10222 {
│ 10223 best_record_count= current_record_count;
│ 10224 best_read_time= current_read_time;
│ 10225 }
│ 10226 }
in an example query (slow after commit, fast before),
(gdb) p position->key_dependent & allowed_tables
$6 = 2
so best_record_count is never set at this position (where it was before), subsequently, the next loop doesn't hit
DBUG_EXECUTE("opt", print_plan(join, idx+1, ... "pruned_by_heuristic"););
breaking the loop, instead exploring all possible join combinations.
key_dependent set in best_access_path()
│ 7898 if (all_parts & 1)
│ > 7899 key_dependent|= key_parts_dependent;
key_parts_dependent set earlier
│ 7890 else if (!(found_part & keyuse->keypart_map))
│ > 7891 key_parts_dependent|= keyuse->used_tables;
(gdb) p keyuse->used_tables
$17 = 2
(gdb) p dbp(keyuse)
$18 = 0x556e2247c020 <dbug_big_buffer> "TABLE:v2,Name:key1,length:7,usable parts:1,parts:1,(KEY_PART_INFO *):{v2.id2=NULL}Value:[(FIELD_ITEM*)<maybe_null><fixed>(`v2`.`id2`)]"
this is set in add_key_use() flowing in from TABLE_LIST::set_tablenr()
│ > 2381 table->map= table_map(1) << new_tablenr;
so the question i had and mentioned in sql_processor days ago was
│B+> 10220 (!(position->key_dependent & allowed_tables) ||
what are we trying to do here? I do not follow.
Prior to this commit,
│ 10033 /* TODO: What is the reasoning behind this condition? */
│ 10034 (!(s->key_dependent & allowed_tables & remaining_tables) ||
s->key_dependent is never set beyond it's initial zero value construction (for the test query).