Details
-
Task
-
Status: In Testing (View Workflow)
-
Critical
-
Resolution: Unresolved
-
None
Description
Selectivity computations can produce poor estimates when multi-part keys are involved.
Motivating example:
create table t1 ( |
id int auto_increment primary key, |
a int, |
b int, |
f varchar(256) default 'xxx', |
index idx1(a), |
index idx2(b) |
);
|
insert into t1(a,b) select rand(13) * 1000 mod 200, rand(17) * 1000 mod 50 from seq_1_to_1000; |
create index idx3 on t1(a,b); |
# Use query plans that are easier to read: |
set optimizer_switch='rowid_filter=off'; |
First, let's use single-column indexes only:
explain extended select * from t1 ignore index (idx3) |
where a between 50 and 80 and b between 33 and 41; |
+------+-------------+-------+-------+---------------+------+---------+------+------+----------+------------------------------------+ |
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | filtered | Extra | |
+------+-------------+-------+-------+---------------+------+---------+------+------+----------+------------------------------------+ |
| 1 | SIMPLE | t1 | range | idx1,idx2 | idx1 | 5 | NULL | 149 | 17.50 | Using index condition; Using where | |
+------+-------------+-------+-------+---------------+------+---------+------+------+----------+------------------------------------+ |
The estimated #rows in the output is 149 * 0.175= 26 rows.
The estimates are obtained from
"range_scan_alternatives": [ |
{
|
"index": "idx1", |
"ranges": ["(50) <= (a) <= (80)"], |
"rowid_ordered": false, |
"using_mrr": false, |
"index_only": false, |
"rows": 149, |
"cost": 0.1537364, |
"chosen": true |
},
|
{
|
"index": "idx2", |
"ranges": ["(33) <= (b) <= (41)"], |
"rowid_ordered": false, |
"using_mrr": false, |
"index_only": false, |
"rows": 175, |
"cost": 0.17956584, |
"chosen": false, |
"cause": "cost" |
}
|
Now, with three indexes:
explain extended select * from t1 where a between 50 and 80 and b between 33 and 41; |
+------+-------------+-------+-------+----------------+------+---------+------+------+----------+-----------------------+
|
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
|
+------+-------------+-------+-------+----------------+------+---------+------+------+----------+-----------------------+
|
| 1 | SIMPLE | t1 | range | idx1,idx2,idx3 | idx3 | 10 | NULL | 144 | 100.00 | Using index condition |
|
+------+-------------+-------+-------+----------------+------+---------+------+------+----------+-----------------------+
|
It is using this plan
"chosen_range_access_summary": { |
"range_access_plan": { |
"type": "range_scan", |
"index": "idx3", |
"rows": 144, |
"ranges": ["(50,33) <= (a,b) <= (80,41)"] |
},
|
"rows_for_plan": 144, |
"cost_for_plan": 0.1487692, |
"chosen": true |
}
|
It has these estimate numbers at its disposal:
"range_scan_alternatives": [ |
{
|
"index": "idx1", |
"ranges": ["(50) <= (a) <= (80)"], |
"rowid_ordered": false, |
"using_mrr": false, |
"index_only": false, |
"rows": 149, |
"cost": 0.1537364, |
"chosen": true |
},
|
{
|
"index": "idx2", |
"ranges": ["(33) <= (b) <= (41)"], |
"rowid_ordered": false, |
"using_mrr": false, |
"index_only": false, |
"rows": 175, |
"cost": 0.17956584, |
"chosen": false, |
"cause": "cost" |
},
|
{
|
"index": "idx3", |
"ranges": ["(50,33) <= (a,b) <= (80,41)"], |
"rowid_ordered": false, |
"using_mrr": false, |
"index_only": false, |
"rows": 144, |
"cost": 0.1487692, |
"chosen": true |
}
|
],
|
and the point is that it could come up with a tighter estimate of output bound by taking into account the estimates on a and b separately.
Igor's point is that "keyparts >=1 do not matter for non-equalities".
Attachments
Issue Links
- is part of
-
MDEV-23707 Fix condition selectivity computation for join prefixes
- Stalled