Details
-
Bug
-
Status: Stalled (View Workflow)
-
Minor
-
Resolution: Unresolved
-
10.5.9, 10.2(EOL), 10.3(EOL), 10.4(EOL), 10.5
Description
Customer shared the interesting test result and was wondering why where CONCAT(col) is faster without CONCAT. I've attached test schema and data.
- this is the initial table schema and query.
CREATE TABLE F ( |
id INTEGER NOT NULL, |
dept CHAR(4), |
dest CHAR(4), |
t TIMESTAMP NOT NULL, |
|
PRIMARY KEY (id), |
KEY t (t) |
);
|
|
CREATE TABLE M ( |
station CHAR(4) NOT NULL, |
t_start DATETIME NOT NULL, |
t_end DATETIME DEFAULT NULL, |
windspeed INTEGER unsigned DEFAULT NULL, |
winddir INTEGER unsigned DEFAULT NULL, |
PRIMARY KEY (station,t_start) |
);
|
SELECT SQL_NO_CACHE F.id, F.t, M.t_start, M.t_end, M.winddir
|
FROM F JOIN M ON (M.t_start BETWEEN F.t - INTERVAL 1 HOUR AND F.t AND M.t_end >= F.t)
|
WHERE F.t BETWEEN '2021-01-01' AND '2021-01-03'
|
AND M.station = 'LFBO';
|
+------+-------------+-------+-------+---------------+---------+---------+-------+-------+--------------------------+
|
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
|
+------+-------------+-------+-------+---------------+---------+---------+-------+-------+--------------------------+
|
| 1 | SIMPLE | F | range | t | t | 4 | NULL | 266 | Using where; Using index |
|
| 1 | SIMPLE | M | ref | PRIMARY | PRIMARY | 12 | const | 42462 | Using where |
|
+------+-------------+-------+-------+---------------+---------+---------+-------+-------+--------------------------+
|
- analyze output
| {
"query_block": {
"select_id": 1,
"r_loops": 1,
"r_total_time_ms": 40612.55196,
"table": {
"table_name": "F",
"access_type": "range",
"possible_keys": ["t"],
"key": "t",
"key_length": "4",
"used_key_parts": ["t"],
"r_loops": 1,
"rows": 266,
"r_rows": 266,
"r_table_time_ms": 1.012544094,
"r_other_time_ms": 1.524286562,
"filtered": 100,
"r_filtered": 100,
"attached_condition": "f.t between '2021-01-01 00:00:00.000000' and '2021-01-03 00:00:00.000000'",
"using_index": true
},
"table": {
"table_name": "M",
"access_type": "ref",
"possible_keys": ["PRIMARY"],
"key": "PRIMARY",
"key_length": "12",
"used_key_parts": ["station"],
"ref": ["const"],
"r_loops": 266,
"rows": 42462,
"r_rows": 21285,
"r_table_time_ms": 18581.56254,
"r_other_time_ms": 22028.44358,
"filtered": 100,
"r_filtered": 0.004698144,
"attached_condition": "m.station = 'LFBO' and m.t_start between f.t - interval 1 hour and f.t and m.t_end >= f.t"
}
}
} |
With initial query and index, it was extremely slow. So, I proposed change index of `m` table to use primary key
MariaDB [raymond]> alter table m drop primary key, add primary key(t_start, station);
|
Query OK, 0 rows affected (0.415 sec) |
Records: 0 Duplicates: 0 Warnings: 0 |
+------+-------------+-------+-------+---------------+------+---------+------+--------+-------------------------------------------------+
|
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
|
+------+-------------+-------+-------+---------------+------+---------+------+--------+-------------------------------------------------+
|
| 1 | SIMPLE | F | range | t | t | 4 | NULL | 266 | Using where; Using index |
|
| 1 | SIMPLE | M | ALL | PRIMARY | NULL | NULL | NULL | 249541 | Using where; Using join buffer (flat, BNL join) |
|
+------+-------------+-------+-------+---------------+------+---------+------+--------+-------------------------------------------------+
|
- analyze output
| {
"query_block": {
"select_id": 1,
"r_loops": 1,
"r_total_time_ms": 4726.954647,
"table": {
"table_name": "F",
"access_type": "range",
"possible_keys": ["t"],
"key": "t",
"key_length": "4",
"used_key_parts": ["t"],
"r_loops": 1,
"rows": 266,
"r_rows": 266,
"r_table_time_ms": 0.802683315,
"r_other_time_ms": 0.913815331,
"filtered": 100,
"r_filtered": 100,
"attached_condition": "f.t between '2021-01-01 00:00:00.000000' and '2021-01-03 00:00:00.000000'",
"using_index": true
},
"block-nl-join": {
"table": {
"table_name": "M",
"access_type": "ALL",
"possible_keys": ["PRIMARY"],
"r_loops": 1,
"rows": 249541,
"r_rows": 250119,
"r_table_time_ms": 753.0043604,
"r_other_time_ms": 3972.225239,
"filtered": 100,
"r_filtered": 8.509949264,
"attached_condition": "m.station = 'LFBO'"
},
"buffer_type": "flat",
"buffer_size": "2Kb",
"join_type": "BNL",
"attached_condition": "m.t_start between f.t - interval 1 hour and f.t and m.t_end >= f.t",
"r_filtered": 0.004698144
}
}
} |
Then, it got much faster compared to 1st one. However, customer noted that query got even faster with concat on where clause.
+------+-------------+-------+-------+---------------+------+---------+------+--------+------------------------------------------------+
|
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
|
+------+-------------+-------+-------+---------------+------+---------+------+--------+------------------------------------------------+
|
| 1 | SIMPLE | F | range | t | t | 4 | NULL | 266 | Using where; Using index | |
| 1 | SIMPLE | M | ALL | PRIMARY | NULL | NULL | NULL | 249541 | Range checked for each record (index map: 0x1) | |
+------+-------------+-------+-------+---------------+------+---------+------+--------+------------------------------------------------+
|
- analyze output
| {
"query_block": {
"select_id": 1,
"r_loops": 1,
"r_total_time_ms": 67.0752722,
"table": {
"table_name": "F",
"access_type": "range",
"possible_keys": ["t"],
"key": "t",
"key_length": "4",
"used_key_parts": ["t"],
"r_loops": 1,
"rows": 266,
"r_rows": 266,
"r_table_time_ms": 0.802407781,
"r_other_time_ms": 2.921898929,
"filtered": 100,
"r_filtered": 100,
"attached_condition": "f.t between '2021-01-01 00:00:00.000000' and '2021-01-03 00:00:00.000000'",
"using_index": true
},
"range-checked-for-each-record": {
"keys": ["PRIMARY"],
"r_keys": {
"full_scan": 0,
"index_merge": 0,
"range": {
"PRIMARY": 266
}
},
"table": {
"table_name": "M",
"access_type": "ALL",
"possible_keys": ["PRIMARY"],
"r_loops": 266,
"rows": 249541,
"r_rows": 37.36842105,
"r_table_time_ms": 31.15710876,
"r_other_time_ms": 32.18475629,
"filtered": 100,
"r_filtered": 2.676056338
}
}
}
} |
what would be the logical explanation on this dramatic query performance improvement?