Details
-
Bug
-
Status: Closed (View Workflow)
-
Major
-
Resolution: Incomplete
-
10.11.7
-
None
-
None
Description
Hello,
We have been running a 3 node MariaDB 10.3 Galera Cluster which contained two large tables (>500m rows each). The primary key of these tables is composite which, when the tables were initially created, was (incorrectly) formed such that the least selective column was placed first in the primary key definition. The table definition resembles this structure
CREATE TABLE `problem_table` |
(
|
`server` int(11) NOT NULL, -- low cardinality |
`registration` int NOT NULL, -- high cardinality |
`account` int NOT NULL, |
`datetime` datetime NOT NULL, |
PRIMARY KEY (`server`,`registration`), |
KEY `idx_account_server_datetime` (`account`,`server`,`datetime`) |
) ENGINE=InnoDB
|
On MariaDB v10.3 queries on this table were running just fine. Queries which included the server/account predicate were correctly using the idx_account_server_datetime index, executing immediately.
After the upgrade, queries that included the server/account predicate needed tens of seconds or even minutes to execute. Examining the execution plan showed that such queries, rather than using the idx_account_server_datetime index, were using the primary key but only utilising the server section of the key.
analyze format=json
|
SELECT d.registration |
FROM accounts AS mu |
INNER JOIN problem_table AS d |
ON d.account = mu.account_num |
AND d.server= mu.server |
WHERE mu.account_num IN (54009816, 54009817, 54064221, 54159404, 54159405, 54159407) |
Results in the following execution plan
{
|
"query_optimization": { |
"r_total_time_ms": 0.605693292 |
},
|
"query_block": { |
"select_id": 1, |
"r_loops": 1, |
"r_total_time_ms": 101981.6707, |
"nested_loop": [ |
{
|
"table": { |
"table_name": "mu", |
"access_type": "range", |
"possible_keys": ["idx_account_num"], |
"key": "idx_account_num", |
"key_length": "8", |
"used_key_parts": ["account_num"], |
"r_loops": 1, |
"rows": 6, |
"r_rows": 6, |
"r_table_time_ms": 0.102108941, |
"r_other_time_ms": 0.052517189, |
"r_engine_stats": { |
"pages_accessed": 18 |
},
|
"filtered": 100, |
"r_filtered": 100, |
"attached_condition": "mydb.mu.account_num in (54009816,54009817,54064221,54159404,54159405,54159407)", |
"using_index": true |
}
|
},
|
{
|
"table": { |
"table_name": "d", |
"access_type": "ref", |
"possible_keys": [ |
"PRIMARY", |
"idx_account_server_datetime" |
],
|
"key": "PRIMARY", |
"key_length": "4", |
"used_key_parts": ["server"], |
"ref": ["mydb.mu.server"], |
"r_loops": 6, |
"rows": 89, |
"r_rows": 20279561, |
"r_table_time_ms": 80010.61336, |
"r_other_time_ms": 21970.88978, |
"r_engine_stats": { |
"pages_accessed": 6445148, |
"old_rows_read": 35 |
},
|
"filtered": 100, |
"r_filtered": 2.900641357, |
"attached_condition": "mydb.d.account = server.mu.account_num" |
}
|
}
|
]
|
}
|
}
|
As you can see, when joining the two tables, rather than using the idx_account_server_datetime index, it uses only the leftmost part of the primary key. We run analyze & optimize on the tables to try and fix the issue without any noticeable improvements. Please note that if we the force index syntax, the query executes immediately
analyze format=json
|
SELECT d.registration |
FROM accounts AS mu |
INNER JOIN problem_table AS d force index(idx_account_server_datetime) |
ON d.account = mu.account_num |
AND d.server= mu.server |
WHERE mu.account_num IN (54009816, 54009817, 54064221, 54159404, 54159405, 54159407) |