|
Mysql 8.0.15
mysql> select count(*) from t join t t2 on t.id2 = t2.id2 and t2.valid_to >='9999-12-31';
|
+----------+
|
| count(*) |
|
+----------+
|
| 262740 |
|
+----------+
|
1 row in set (0.12 sec)
|
|
mysql> explain select count(*) from t join t t2 on t.id2 = t2.id2 and t2.valid_to >='9999-12-31';
|
+----+-------------+-------+------------+-------+---------------+--------------+---------+-------------+--------+----------+--------------------------+
|
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
|
+----+-------------+-------+------------+-------+---------------+--------------+---------+-------------+--------+----------+--------------------------+
|
| 1 | SIMPLE | t2 | NULL | index | id2_valid_to | id2_valid_to | 12 | NULL | 261800 | 33.33 | Using where; Using index |
|
| 1 | SIMPLE | t | NULL | ref | id2_valid_to | id2_valid_to | 4 | test.t2.id2 | 268 | 100.00 | Using index |
|
+----+-------------+-------+------------+-------+---------------+--------------+---------+-------------+--------+----------+--------------------------+
|
2 rows in set, 1 warning (0.00 sec)
|
|
|
| select count(*) from t join t t2 on t.id2 = t2.id2 and t2.valid_to >='9999-12-31' | {
|
"steps": [
|
{
|
"join_preparation": {
|
"select#": 1,
|
"steps": [
|
{
|
"expanded_query": "/* select#1 */ select count(0) AS `count(*)` from (`t` join `t` `t2` on(((`t`.`id2` = `t2`.`id2`) and (`t2`.`valid_to` >= '9999-12-31'))))"
|
},
|
{
|
"transformations_to_nested_joins": {
|
"transformations": [
|
"JOIN_condition_to_WHERE",
|
"parenthesis_removal"
|
],
|
"expanded_query": "/* select#1 */ select count(0) AS `count(*)` from `t` join `t` `t2` where ((`t`.`id2` = `t2`.`id2`) and (`t2`.`valid_to` >= '9999-12-31'))"
|
}
|
}
|
]
|
}
|
},
|
{
|
"join_optimization": {
|
"select#": 1,
|
"steps": [
|
{
|
"condition_processing": {
|
"condition": "WHERE",
|
"original_condition": "((`t`.`id2` = `t2`.`id2`) and (`t2`.`valid_to` >= '9999-12-31'))",
|
"steps": [
|
{
|
"transformation": "equality_propagation",
|
"resulting_condition": "((`t2`.`valid_to` >= '9999-12-31') and multiple equal(`t`.`id2`, `t2`.`id2`))"
|
},
|
{
|
"transformation": "constant_propagation",
|
"resulting_condition": "((`t2`.`valid_to` >= '9999-12-31') and multiple equal(`t`.`id2`, `t2`.`id2`))"
|
},
|
{
|
"transformation": "trivial_condition_removal",
|
"resulting_condition": "((`t2`.`valid_to` >= '9999-12-31') and multiple equal(`t`.`id2`, `t2`.`id2`))"
|
}
|
]
|
}
|
},
|
{
|
"substitute_generated_columns": {
|
}
|
},
|
{
|
"table_dependencies": [
|
{
|
"table": "`t`",
|
"row_may_be_null": false,
|
"map_bit": 0,
|
"depends_on_map_bits": [
|
]
|
},
|
{
|
"table": "`t` `t2`",
|
"row_may_be_null": false,
|
"map_bit": 1,
|
"depends_on_map_bits": [
|
]
|
}
|
]
|
},
|
{
|
"ref_optimizer_key_uses": [
|
{
|
"table": "`t`",
|
"field": "id2",
|
"equals": "`t2`.`id2`",
|
"null_rejecting": false
|
},
|
{
|
"table": "`t` `t2`",
|
"field": "id2",
|
"equals": "`t`.`id2`",
|
"null_rejecting": false
|
}
|
]
|
},
|
{
|
"rows_estimation": [
|
{
|
"table": "`t`",
|
"table_scan": {
|
"rows": 261800,
|
"cost": 152.25
|
}
|
},
|
{
|
"table": "`t` `t2`",
|
"table_scan": {
|
"rows": 261800,
|
"cost": 152.25
|
}
|
}
|
]
|
},
|
{
|
"considered_execution_plans": [
|
{
|
"plan_prefix": [
|
],
|
"table": "`t`",
|
"best_access_path": {
|
"considered_access_paths": [
|
{
|
"access_type": "ref",
|
"index": "id2_valid_to",
|
"usable": false,
|
"chosen": false
|
},
|
{
|
"rows_to_scan": 261800,
|
"filtering_effect": [
|
],
|
"final_filtering_effect": 1,
|
"access_type": "scan",
|
"resulting_rows": 261800,
|
"cost": 26332,
|
"chosen": true
|
}
|
]
|
},
|
"condition_filtering_pct": 100,
|
"rows_for_plan": 261800,
|
"cost_for_plan": 26332,
|
"rest_of_plan": [
|
{
|
"plan_prefix": [
|
"`t`"
|
],
|
"table": "`t` `t2`",
|
"best_access_path": {
|
"considered_access_paths": [
|
{
|
"access_type": "ref",
|
"index": "id2_valid_to",
|
"rows": 268.79,
|
"cost": 7.14e6,
|
"chosen": true
|
},
|
{
|
"access_type": "scan",
|
"chosen": false,
|
"cause": "covering_index_better_than_full_scan"
|
}
|
]
|
},
|
"condition_filtering_pct": 100,
|
"rows_for_plan": 7.04e7,
|
"cost_for_plan": 7.16e6,
|
"chosen": true
|
}
|
]
|
},
|
{
|
"plan_prefix": [
|
],
|
"table": "`t` `t2`",
|
"best_access_path": {
|
"considered_access_paths": [
|
{
|
"access_type": "ref",
|
"index": "id2_valid_to",
|
"usable": false,
|
"chosen": false
|
},
|
{
|
"rows_to_scan": 261800,
|
"filtering_effect": [
|
],
|
"final_filtering_effect": 0.3333,
|
"access_type": "scan",
|
"resulting_rows": 87258,
|
"cost": 26332,
|
"chosen": true
|
}
|
]
|
},
|
"condition_filtering_pct": 100,
|
"rows_for_plan": 87258,
|
"cost_for_plan": 26332,
|
"rest_of_plan": [
|
{
|
"plan_prefix": [
|
"`t` `t2`"
|
],
|
"table": "`t`",
|
"best_access_path": {
|
"considered_access_paths": [
|
{
|
"access_type": "ref",
|
"index": "id2_valid_to",
|
"rows": 268.79,
|
"cost": 2.38e6,
|
"chosen": true
|
},
|
{
|
"access_type": "scan",
|
"chosen": false,
|
"cause": "covering_index_better_than_full_scan"
|
}
|
]
|
},
|
"condition_filtering_pct": 100,
|
"rows_for_plan": 2.35e7,
|
"cost_for_plan": 2.4e6,
|
"chosen": true
|
}
|
]
|
}
|
]
|
},
|
{
|
"attaching_conditions_to_tables": {
|
"original_condition": "((`t`.`id2` = `t2`.`id2`) and (`t2`.`valid_to` >= '9999-12-31'))",
|
"attached_conditions_computation": [
|
],
|
"attached_conditions_summary": [
|
{
|
"table": "`t` `t2`",
|
"attached": "(`t2`.`valid_to` >= '9999-12-31')"
|
},
|
{
|
"table": "`t`",
|
"attached": "(`t`.`id2` = `t2`.`id2`)"
|
}
|
]
|
}
|
},
|
{
|
"optimizing_distinct_group_by_order_by": {
|
}
|
},
|
{
|
"finalizing_table_conditions": [
|
{
|
"table": "`t` `t2`",
|
"original_table_condition": "(`t2`.`valid_to` >= '9999-12-31')",
|
"final_table_condition ": "(`t2`.`valid_to` >= '9999-12-31')"
|
},
|
{
|
"table": "`t`",
|
"original_table_condition": "(`t`.`id2` = `t2`.`id2`)",
|
"final_table_condition ": null
|
}
|
]
|
},
|
{
|
"refine_plan": [
|
{
|
"table": "`t` `t2`"
|
},
|
{
|
"table": "`t`"
|
}
|
]
|
},
|
{
|
"considering_tmp_tables": [
|
]
|
}
|
]
|
}
|
},
|
{
|
"join_execution": {
|
"select#": 1,
|
"steps": [
|
]
|
}
|
}
|
]
|
} | 0 | 0 |
|
|
MariaDB 10.4.3
|
MariaDB [test]> select count(*) from t join t t2 on t.id2 = t2.id2 and t2.valid_to >='9999-12-31';
|
+----------+
|
| count(*) |
|
+----------+
|
| 262148 |
|
+----------+
|
1 row in set (15.226 sec)
|
|
MariaDB [test]> analyze select count(*) from t join t t2 on t.id2 = t2.id2 and t2.valid_to >='9999-12-31';
|
+------+-------------+-------+-------+---------------+--------------+---------+------------+--------+-----------+----------+------------+--------------------------+
|
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | r_rows | filtered | r_filtered | Extra |
|
+------+-------------+-------+-------+---------------+--------------+---------+------------+--------+-----------+----------+------------+--------------------------+
|
| 1 | SIMPLE | t | index | id2_valid_to | id2_valid_to | 12 | NULL | 261800 | 262148.00 | 100.00 | 100.00 | Using index |
|
| 1 | SIMPLE | t2 | ref | id2_valid_to | id2_valid_to | 4 | test.t.id2 | 138 | 263.07 | 100.00 | 0.38 | Using where; Using index |
|
+------+-------------+-------+-------+---------------+--------------+---------+------------+--------+-----------+----------+------------+--------------------------+
|
2 rows in set (14.841 sec)
|
|
| {
|
"query_block": {
|
"select_id": 1,
|
"r_loops": 1,
|
"r_total_time_ms": 15141,
|
"table": {
|
"table_name": "t",
|
"access_type": "index",
|
"possible_keys": ["id2_valid_to"],
|
"key": "id2_valid_to",
|
"key_length": "12",
|
"used_key_parts": ["id2", "valid_to"],
|
"r_loops": 1,
|
"rows": 261800,
|
"r_rows": 262148,
|
"r_total_time_ms": 46.387,
|
"filtered": 100,
|
"r_filtered": 100,
|
"using_index": true
|
},
|
"table": {
|
"table_name": "t2",
|
"access_type": "ref",
|
"possible_keys": ["id2_valid_to"],
|
"key": "id2_valid_to",
|
"key_length": "4",
|
"used_key_parts": ["id2"],
|
"ref": ["test.t.id2"],
|
"r_loops": 262148,
|
"rows": 138,
|
"r_rows": 263.07,
|
"r_total_time_ms": 10883,
|
"filtered": 100,
|
"r_filtered": 0.3801,
|
"attached_condition": "t2.valid_to >= '9999-12-31'",
|
"using_index": true
|
}
|
}
|
} |
|
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
1 row in set (15.139 sec)
|
|
| select count(*) from t join t t2 on t.id2 = t2.id2 and t2.valid_to >='9999-12-31' | {
|
"steps": [
|
{
|
"join_preparation": {
|
"select_id": 1,
|
"steps": [
|
{
|
"expanded_query": "select count(0) AS `count(*)` from (t join t t2 on(t.id2 = t2.id2 and t2.valid_to >= '9999-12-31'))"
|
}
|
]
|
}
|
},
|
{
|
"join_optimization": {
|
"select_id": 1,
|
"steps": [
|
{
|
"condition_processing": {
|
"condition": "WHERE",
|
"original_condition": "t.id2 = t2.id2 and t2.valid_to >= '9999-12-31'",
|
"steps": [
|
{
|
"transformation": "equality_propagation",
|
"resulting_condition": "t2.valid_to >= '9999-12-31' and multiple equal(t.id2, t2.id2)"
|
},
|
{
|
"transformation": "constant_propagation",
|
"resulting_condition": "t2.valid_to >= '9999-12-31' and multiple equal(t.id2, t2.id2)"
|
},
|
{
|
"transformation": "trivial_condition_removal",
|
"resulting_condition": "t2.valid_to >= '9999-12-31' and multiple equal(t.id2, t2.id2)"
|
}
|
]
|
}
|
},
|
{
|
"table_dependencies": [
|
{
|
"table": "t",
|
"row_may_be_null": false,
|
"map_bit": 0,
|
"depends_on_map_bits": []
|
},
|
{
|
"table": "t2",
|
"row_may_be_null": false,
|
"map_bit": 1,
|
"depends_on_map_bits": []
|
}
|
]
|
},
|
{
|
"ref_optimizer_key_uses": [
|
{
|
"table": "t",
|
"field": "id2",
|
"equals": "t2.id2",
|
"null_rejecting": false
|
},
|
{
|
"table": "t2",
|
"field": "id2",
|
"equals": "t.id2",
|
"null_rejecting": false
|
}
|
]
|
},
|
{
|
"rows_estimation": [
|
{
|
"table": "t",
|
"table_scan": {
|
"rows": 261800,
|
"cost": 609
|
}
|
},
|
{
|
"selectivity_for_indexes": [],
|
"selectivity_for_columns": [],
|
"cond_selectivity": 1
|
},
|
{
|
"table": "t2",
|
"table_scan": {
|
"rows": 261800,
|
"cost": 609
|
}
|
}
|
]
|
},
|
{
|
"considered_execution_plans": [
|
{
|
"plan_prefix": [],
|
"table": "t",
|
"best_access_path": {
|
"considered_access_paths": [
|
{
|
"access_type": "scan",
|
"resulting_rows": 261800,
|
"cost": 609,
|
"chosen": true
|
}
|
]
|
},
|
"rest_of_plan": [
|
{
|
"plan_prefix": ["t"],
|
"table": "t2",
|
"best_access_path": {
|
"considered_access_paths": [
|
{
|
"access_type": "ref",
|
"index": "id2_valid_to",
|
"rows": 138,
|
"cost": 268856,
|
"chosen": true
|
},
|
{
|
"type": "scan",
|
"chosen": false,
|
"cause": "cost"
|
}
|
]
|
}
|
}
|
]
|
},
|
{
|
"plan_prefix": [],
|
"table": "t2",
|
"best_access_path": {
|
"considered_access_paths": [
|
{
|
"access_type": "scan",
|
"resulting_rows": 261800,
|
"cost": 609,
|
"chosen": true
|
}
|
]
|
},
|
"rest_of_plan": [
|
{
|
"plan_prefix": ["t2"],
|
"table": "t",
|
"best_access_path": {
|
"considered_access_paths": [
|
{
|
"access_type": "ref",
|
"index": "id2_valid_to",
|
"rows": 138,
|
"cost": 268856,
|
"chosen": true
|
},
|
{
|
"type": "scan",
|
"chosen": false,
|
"cause": "cost"
|
}
|
]
|
},
|
"pruned_by_cost": true
|
}
|
]
|
}
|
]
|
},
|
{
|
"attaching_conditions_to_tables": {
|
"original_condition": "t2.id2 = t.id2 and t2.valid_to >= '9999-12-31'",
|
"attached_conditions_computation": [],
|
"attached_conditions_summary": [
|
{
|
"table": "t",
|
"attached": null
|
},
|
{
|
"table": "t2",
|
"attached": "t2.valid_to >= '9999-12-31'"
|
}
|
]
|
}
|
}
|
]
|
}
|
},
|
{
|
"join_execution": {
|
"select_id": 1,
|
"steps": []
|
}
|
}
|
]
|
} | 0 | 0 |
|
|
|
|
For this particular test case is would seem that SINGLE precision histograms provide a better estimate for the filtered clause than DOUBLE precision histograms, however it is a simple coincidence because:
The total number of unique values in id2 column (1001) is 0.38% of the total number of rows inserted which matches closely with a histogram with 254 buckets, where each bucket holds roughly 0.39% of the total rows in the set.
A histogram with 127 buckets will have each bucket hold double the number of rows.
Next I will describe how we arrive to the selectivity values:
Histogram::range_selectivity ends up being called with the parameters min_pos=1 and max_pos=1
Thread 36 "mysqld" hit Breakpoint 2, Histogram::range_selectivity (this=0x7ffe2007f9b8, min_pos=1, max_pos=1)
|
|
// Relevant code executed below:
|
|
│270 double bucket_sel= 1.0/(get_width() + 1);
|
│271 uint min= find_bucket(min_pos, TRUE);
|
│272 uint max= find_bucket(max_pos, FALSE);
|
│273 sel= bucket_sel * (max - min + 1);
|
│274 return sel;
|
With these parameters, and the values in the the "found" buckets (min and max) are always the last bucket in the set, which means that the selectivity is always bucket_sel.
bucket_sel is based off the width of a bucket. The width of a bucket means the percentage of rows from the table that fit into each bucket.
It is very easy to "trick" this piece of code into outputting a bad selectivity value (compared to what is the true selectivity of the condition). Just double the number of rows with valid_to='9999-12-31', by running this set of statements before ANALYZE TABLE t PERSISTENT FOR ALL in the test case and you'll get:
create temporary table t_min as select min(id) id from t group by id2;
|
update t, t_min set t.valid_to='9999-12-31' where t.id = t_min.id;
|
commit;
|
Now we have both the "minimum ids" and the "maximum ids" with valid_to set to '9999-12-31'.
Compute SIMPLE_PREC_HB histograms and run the Select query again:
analyze select count(*) from t join t t2 on t.id2 = t2.id2 and t2.valid_to >='9999-12-31';
|
+------+-------------+-------+-------+---------------+--------------+---------+-------------+--------+-----------+----------+------------+--------------------------+
|
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | r_rows | filtered | r_filtered | Extra |
|
+------+-------------+-------+-------+---------------+--------------+---------+-------------+--------+-----------+----------+------------+--------------------------+
|
| 1 | SIMPLE | t2 | index | id2_valid_to | id2_valid_to | 12 | NULL | 262152 | 262152.00 | 0.39 | 0.76 | Using where; Using index |
|
| 1 | SIMPLE | t | ref | id2_valid_to | id2_valid_to | 4 | test.t2.id2 | 261 | 261.89 | 100.00 | 100.00 | Using index |
|
+------+-------------+-------+-------+---------------+--------------+---------+-------------+--------+-----------+----------+------------+--------------------------+
|
Another way to look at a SINGLE_PREC histogram:
The interval between min_value and max_value for the column we have collected a histogram is split into 255 segments. We can then compute how many rows fall into each segment. With the current implementation the smallest value we can get when it comes to selectivity estimates for a range condition is equal to 0.39% (The selectivity corresponding to 1 bucket out of 255 buckets).
For DOUBLE_PREC histograms, the smallest selectivity value we can get for a range condition is double that of SINGLE_PREC histograms as we can only hold half the number of buckets.
However, the advantage of DOUBLE_PREC histograms is that the min_value, max_value interval is split into 255 * 255 segments. This means that for range conditions spanning less than 1/255th of the range [min_value, max_value], the histogram will better convey the true percentage of rows that match said range.
igor let me know if this Analysis makes sense to you and if it helps.
|