Details
-
Bug
-
Status: Confirmed (View Workflow)
-
Major
-
Resolution: Unresolved
-
10.1(EOL), 10.2(EOL), 10.3(EOL), 10.4(EOL), 10.5
Description
Consider an example:
create table i ( |
id int, |
sku int, |
col1 int, |
key(id) |
);
|
insert into i select seq, seq, seq from seq_1_to_10000; |
create table s ( |
sku int, |
col1 int, |
key(sku) |
);
|
insert into s select seq, seq from seq_1_to_20000; |
explain
|
SELECT |
DISTINCT i.col1 |
FROM i JOIN s |
WHERE |
i.sku = s.sku
|
ORDER BY |
i.id DESC LIMIT 1; |
The query plan for this is to use the index over (i.id) to satisfy the ORDER BY ... LIMIT (and shortcut the execution early):
+------+-------------+-------+-------+---------------+------+---------+----------+------+------------------------------+
|
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
|
+------+-------------+-------+-------+---------------+------+---------+----------+------+------------------------------+
|
| 1 | SIMPLE | i | index | NULL | id | 5 | NULL | 1 | Using where; Using temporary |
|
| 1 | SIMPLE | s | ref | sku | sku | 5 | j5.i.sku | 1 | Using index; Distinct |
|
+------+-------------+-------+-------+---------------+------+---------+----------+------+------------------------------+
|
The problem is, handling DISTINCT actually prevents the short-cutting.
MariaDB's duplicate-removal algorithm enumerates the whole join output.
analyze format=json
|
SELECT DISTINCT i.col1 FROM i JOIN s WHERE i.sku = s.sku
|
ORDER BY i.id DESC LIMIT 1;
|
ANALYZE FORMAT=JSON shows:
|
{
|
"query_block": {
|
"select_id": 1,
|
"r_loops": 1,
|
"r_total_time_ms": 650.5171852,
|
"temporary_table": {
|
"table": {
|
"table_name": "i",
|
"access_type": "index",
|
"key": "id",
|
"key_length": "5",
|
"used_key_parts": ["id"],
|
"r_loops": 1,
|
"rows": 10266,
|
"r_rows": 10000,
|
"r_table_time_ms": 399.2319244,
|
"r_other_time_ms": 10.25677143,
|
"filtered": 100,
|
"r_filtered": 100,
|
"attached_condition": "i.sku is not null"
|
},
|
"table": {
|
"table_name": "s",
|
"access_type": "ref",
|
"possible_keys": ["sku"],
|
"key": "sku",
|
"key_length": "5",
|
"used_key_parts": ["sku"],
|
"ref": ["j5.i.sku"],
|
"r_loops": 10000,
|
"rows": 1,
|
"r_rows": 1,
|
"r_table_time_ms": 227.2785918,
|
"r_other_time_ms": 13.72163759,
|
"filtered": 100,
|
"r_filtered": 100,
|
"using_index": true,
|
"distinct": true
|
}
|
}
|
}
|
}
|
Note the "r_rows": 10000. It has computed the whole join output.
Let's run the same query without DISTINCT:
analyze format=json
|
SELECT i.col1 FROM i JOIN s WHERE i.sku = s.sku
|
ORDER BY i.id DESC LIMIT 1;
|
{
|
"query_block": {
|
"select_id": 1,
|
"r_loops": 1,
|
"r_total_time_ms": 0.626986227,
|
"table": {
|
"table_name": "i",
|
"access_type": "index",
|
"key": "id",
|
"key_length": "5",
|
"used_key_parts": ["id"],
|
"r_loops": 1,
|
"rows": 10266,
|
"r_rows": 1,
|
"r_table_time_ms": 0.314612616,
|
"r_other_time_ms": 0.023000019,
|
"filtered": 100,
|
"r_filtered": 100,
|
"attached_condition": "i.sku is not null"
|
},
|
"table": {
|
"table_name": "s",
|
"access_type": "ref",
|
"possible_keys": ["sku"],
|
"key": "sku",
|
"key_length": "5",
|
"used_key_parts": ["sku"],
|
"ref": ["j5.i.sku"],
|
"r_loops": 1,
|
"rows": 1,
|
"r_rows": 1,
|
"r_table_time_ms": 0.136636567,
|
"r_other_time_ms": 0.125355838,
|
"filtered": 100,
|
"r_filtered": 100,
|
"using_index": true
|
}
|
}
|
}
|