|
Attached mysqldump for test tables.
|
|
Joey Mart, thanks for the report.
Confirmed with a test case below. Execution of query with "using temprorary" takes longer.
CREATE TABLE `t1` (id char(32) NOT NULL PRIMARY KEY);
|
CREATE TABLE `t2` (id char(32) NOT NULL PRIMARY KEY);
|
|
INSERT INTO t1 SELECT seq FROM seq_1_to_1000000;
|
INSERT INTO t2 SELECT seq FROM seq_5000_to_500000;
|
|
ANALYZE FORMAT=JSON SELECT t1.id FROM t1 INNER JOIN t2 ON t1.id = t2.id order by t1.id ;
|
ANALYZE FORMAT=JSON SELECT t1.id FROM t1 INNER JOIN t2 ON t1.id = t2.id order by t2.id ;
|
ANALYZE FORMAT=JSON SELECT t1.id FROM t1 INNER JOIN t2 ON t1.id = t2.id order by t1.id ;
|
{
|
"query_block": {
|
"select_id": 1,
|
"r_loops": 1,
|
"r_total_time_ms": 5401.8,
|
"filesort": {
|
"sort_key": "t1.`id`",
|
"r_loops": 1,
|
"r_total_time_ms": 156.94,
|
"r_used_priority_queue": false,
|
"r_output_rows": 495001,
|
"r_sort_passes": 4,
|
"r_buffer_size": "2047Kb",
|
"temporary_table": {
|
"table": {
|
"table_name": "t2",
|
"access_type": "index",
|
"possible_keys": ["PRIMARY"],
|
"key": "PRIMARY",
|
"key_length": "32",
|
"used_key_parts": ["id"],
|
"r_loops": 1,
|
"rows": 484131,
|
"r_rows": 495001,
|
"r_total_time_ms": 886.6,
|
"filtered": 100,
|
"r_filtered": 100,
|
"using_index": true
|
},
|
"table": {
|
"table_name": "t1",
|
"access_type": "eq_ref",
|
"possible_keys": ["PRIMARY"],
|
"key": "PRIMARY",
|
"key_length": "32",
|
"used_key_parts": ["id"],
|
"ref": ["test.t2.id"],
|
"r_loops": 495001,
|
"rows": 1,
|
"r_rows": 1,
|
"r_total_time_ms": 4122.1,
|
"filtered": 100,
|
"r_filtered": 100,
|
"using_index": true
|
}
|
}
|
}
|
}
|
}
|
ANALYZE FORMAT=JSON SELECT t1.id FROM t1 INNER JOIN t2 ON t1.id = t2.id order by t2.id ;
|
{
|
"query_block": {
|
"select_id": 1,
|
"r_loops": 1,
|
"r_total_time_ms": 2471.7,
|
"table": {
|
"table_name": "t2",
|
"access_type": "index",
|
"possible_keys": ["PRIMARY"],
|
"key": "PRIMARY",
|
"key_length": "32",
|
"used_key_parts": ["id"],
|
"r_loops": 1,
|
"rows": 484131,
|
"r_rows": 495001,
|
"r_total_time_ms": 163.77,
|
"filtered": 100,
|
"r_filtered": 100,
|
"using_index": true
|
},
|
"table": {
|
"table_name": "t1",
|
"access_type": "eq_ref",
|
"possible_keys": ["PRIMARY"],
|
"key": "PRIMARY",
|
"key_length": "32",
|
"used_key_parts": ["id"],
|
"ref": ["test.t2.id"],
|
"r_loops": 495001,
|
"rows": 1,
|
"r_rows": 1,
|
"r_total_time_ms": 2235,
|
"filtered": 100,
|
"r_filtered": 100,
|
"using_index": true
|
}
|
}
|
}
|
|
|
Still reproducible on the current MariaDB 10.2 (and I expect that on MariaDB 10.3 it will be reproducible, too).
This case should be handled by "orderby_uses_equalities" optimization. However, I have orderby_uses_equalities=on on my server, and I still observe the following
ANALYZE FORMAT=JSON SELECT t1.id FROM t1 INNER JOIN t2 ON t1.id = t2.id order by t1.id ;
|
| {
|
"query_block": {
|
"select_id": 1,
|
"r_loops": 1,
|
"r_total_time_ms": 13620,
|
"filesort": {
|
"sort_key": "t1.`id`",
|
"r_loops": 1,
|
"r_total_time_ms": 461.05,
|
"r_used_priority_queue": false,
|
"r_output_rows": 495001,
|
"r_sort_passes": 4,
|
"r_buffer_size": "2047Kb",
|
"temporary_table": {
|
"table": {
|
"table_name": "t2",
|
"access_type": "index",
|
"possible_keys": ["PRIMARY"],
|
"key": "PRIMARY",
|
"key_length": "32",
|
"used_key_parts": ["id"],
|
"r_loops": 1,
|
"rows": 425764,
|
"r_rows": 495001,
|
"r_total_time_ms": 1768.8,
|
"filtered": 100,
|
"r_filtered": 100,
|
"using_index": true
|
},
|
"table": {
|
"table_name": "t1",
|
"access_type": "eq_ref",
|
"possible_keys": ["PRIMARY"],
|
"key": "PRIMARY",
|
"key_length": "32",
|
"used_key_parts": ["id"],
|
"ref": ["test.t2.id"],
|
"r_loops": 495001,
|
"rows": 1,
|
"r_rows": 1,
|
"r_total_time_ms": 9524.7,
|
"filtered": 100,
|
"r_filtered": 100,
|
"using_index": true
|
}
|
}
|
}
|
}
|
} |
|
Join order is (t2, t1), ORDER BY t1.id causes filesort to be used (why is orderby_uses_equalities not working?)
ANALYZE FORMAT=JSON SELECT t1.id FROM t1 INNER JOIN t2 ON t1.id = t2.id order by t2.id ;
|
| {
|
"query_block": {
|
"select_id": 1,
|
"r_loops": 1,
|
"r_total_time_ms": 13234,
|
"table": {
|
"table_name": "t2",
|
"access_type": "index",
|
"possible_keys": ["PRIMARY"],
|
"key": "PRIMARY",
|
"key_length": "32",
|
"used_key_parts": ["id"],
|
"r_loops": 1,
|
"rows": 425764,
|
"r_rows": 495001,
|
"r_total_time_ms": 1751.9,
|
"filtered": 100,
|
"r_filtered": 100,
|
"using_index": true
|
},
|
"table": {
|
"table_name": "t1",
|
"access_type": "eq_ref",
|
"possible_keys": ["PRIMARY"],
|
"key": "PRIMARY",
|
"key_length": "32",
|
"used_key_parts": ["id"],
|
"ref": ["test.t2.id"],
|
"r_loops": 495001,
|
"rows": 1,
|
"r_rows": 1,
|
"r_total_time_ms": 10387,
|
"filtered": 100,
|
"r_filtered": 100,
|
"using_index": true
|
}
|
}
|
} |
|
Join order is (t2, t1), ORDER BY t2.id uses the index in t2 (as expected).
|
|
varun, please investigate.
|
|
Observation:
If i change the type field id of table t1 and t2 from char(32) to int, then i see the orderby_uses_equalities being used , so we don't see any temporary table while sorting.
Output for ANALYZE format=json with order by using t1.id
ANALYZE FORMAT=JSON SELECT t1.id FROM t1 INNER JOIN t2 ON t1.id = t2.id order by t1.id ;
|
| {
|
"query_block": {
|
"select_id": 1,
|
"r_loops": 1,
|
"r_total_time_ms": 0.6928,
|
"table": {
|
"table_name": "t2",
|
"access_type": "index",
|
"possible_keys": ["PRIMARY"],
|
"key": "PRIMARY",
|
"key_length": "4",
|
"used_key_parts": ["id"],
|
"r_loops": 1,
|
"rows": 10,
|
"r_rows": 10,
|
"r_total_time_ms": 0.1476,
|
"filtered": 100,
|
"r_filtered": 100,
|
"using_index": true
|
},
|
"table": {
|
"table_name": "t1",
|
"access_type": "eq_ref",
|
"possible_keys": ["PRIMARY"],
|
"key": "PRIMARY",
|
"key_length": "4",
|
"used_key_parts": ["id"],
|
"ref": ["test.t2.id"],
|
"r_loops": 10,
|
"rows": 1,
|
"r_rows": 1,
|
"r_total_time_ms": 0.4508,
|
"filtered": 100,
|
"r_filtered": 100,
|
"using_index": true
|
}
|
}
|
} |
|
|
|
Ok, so `orderby_uses_equalities` is unable use this equality because of its datatype.
|
|
A little background about equality propagation. There are two kinds of equality propagation.
1. Full substitution - this allows to make inferences like X=Y AND cond(X) -> X=Y AND cond(Y).
2. Substitution for the purpose of comparison (S4PC) allows to make inferences like X=Y and X < expr -> X=Y and Y < expr
Full substitution is often unusable for [VAR]CHAR. The common reasons are case-insensitive comparisons ( 'a'='A' ) and padding ('A' = 'A '). However, they don't prevent S4PC.
My point is that orderby_uses_equalities should use S4PC. While this bug looks like it is relying on Full Substitution instead.
|
|
Example of S4PC in action:
MariaDB [test]> show create table t1;
|
+-------+-----------------------------------------------------------------------------------------------------------+
|
| Table | Create Table |
|
+-------+-----------------------------------------------------------------------------------------------------------+
|
| t1 | CREATE TABLE `t1` (
|
`id` char(32) NOT NULL,
|
PRIMARY KEY (`id`)
|
) ENGINE=InnoDB DEFAULT CHARSET=latin1 |
|
+-------+-----------------------------------------------------------------------------------------------------------+
|
1 row in set (0.000 sec)
|
10:45
MariaDB [test]> show create table t2;
|
+-------+-----------------------------------------------------------------------------------------------------------+
|
| Table | Create Table |
|
+-------+-----------------------------------------------------------------------------------------------------------+
|
| t2 | CREATE TABLE `t2` (
|
`id` char(32) NOT NULL,
|
PRIMARY KEY (`id`)
|
) ENGINE=InnoDB DEFAULT CHARSET=latin1 |
|
+-------+-----------------------------------------------------------------------------------------------------------+
|
1 row in set (0.000 sec)
|
analyze format=json select * from t1,t2 where t1.id > 'a' and t2.id=t1.id
|
| {
|
"query_block": {
|
"select_id": 1,
|
"r_loops": 1,
|
"r_total_time_ms": 0.5222,
|
"table": {
|
"table_name": "t2",
|
"access_type": "range",
|
"possible_keys": ["PRIMARY"],
|
"key": "PRIMARY",
|
"key_length": "32",
|
"used_key_parts": ["id"],
|
"r_loops": 1,
|
"rows": 3,
|
"r_rows": 3,
|
"r_total_time_ms": 0.1872,
|
"filtered": 100,
|
"r_filtered": 100,
|
"attached_condition": "t2.`id` > 'a'",
|
"using_index": true
|
},
|
"table": {
|
"table_name": "t1",
|
"access_type": "eq_ref",
|
"possible_keys": ["PRIMARY"],
|
"key": "PRIMARY",
|
"key_length": "32",
|
"used_key_parts": ["id"],
|
"ref": ["test.t2.id"],
|
"r_loops": 3,
|
"rows": 1,
|
"r_rows": 1,
|
"r_total_time_ms": 0.1543,
|
"filtered": 100,
|
"r_filtered": 100,
|
"using_index": true
|
}
|
}
|
} |
|
|
|
varun, please find the code that does S4PC and then let's
- verify that orderby_uses_equalities uses full substitution instead
- discuss whether it could use S4PC also.
(TODO: does orderby_uses_equalities handle cases where full substitution applies while S4PC does not? e.g. {{ t1.datetime_col=t2.datetime_col ORDER BY MONTH(t1.datetime_col)}} ? )
|
|
For S4PC we will not have a problem even if we have a function in the order by list.
lets take a query for example:
select * from t1,t2 where t1.id = t2.id order by length(t1.id)
so the substitution of t1.id with t2.id would not happen inside the length(..) function. In the optimization orderby_uses_equalites we do substitution only for the top level items(not functional items).
|