|
--source include/have_innodb.inc
|
|
drop table if exists t1, t2;
|
create table t1 (pk int auto_increment primary key, i int, j int, index(i,j)) engine=InnoDB;
|
begin;
|
insert into t1 values
|
(null,1,1),(null,2,2),(null,3,3),(null,4,4),
|
(null,5,5),(null,6,6),(null,7,7),(null,8,8);
|
insert into t1 select null, i, j from t1;
|
insert into t1 select null, i, j from t1;
|
insert into t1 select null, i, j from t1;
|
insert into t1 select null, i, j from t1;
|
commit;
|
|
create table t2 (pk int auto_increment primary key, i int, j int, index(i,j)) engine=InnoDB;
|
insert into t2 select * from t1;
|
|
begin;
|
insert into t1 select null, i, j from t1;
|
insert into t1 select null, i, j from t1;
|
insert into t1 select null, i, j from t1;
|
insert into t1 select null, i, j from t1;
|
insert into t1 select null, i, j from t1;
|
insert into t1 select null, i, j from t1;
|
insert into t1 select null, i, j from t1;
|
insert into t1 select null, i, j from t1;
|
insert into t1 select null, i, j from t1;
|
insert into t1 select null, i, j from t1;
|
commit;
|
|
analyze table t1, t2;
|
|
explain
|
select t1.j, t2.j, count(distinct t1.pk) from t1 inner join t2 on t1.i = t2.i group by t1.j, t2.j;
|
explain
|
select t1.j, t2.j, count(distinct t1.pk) from t1 inner join t2 on t1.i = t2.i group by t1.j, t2.j order by null;
|
|
set profiling = 1;
|
select t1.j, t2.j, count(distinct t1.pk) from t1 inner join t2 on t1.i = t2.i group by t1.j, t2.j;
|
show profile for query 1;
|
select t1.j, t2.j, count(distinct t1.pk) from t1 inner join t2 on t1.i = t2.i group by t1.j, t2.j order by null;
|
show profile for query 2;
|
|
# set profiling = 0;
|
drop table t1, t2;
|
|
Queries
|
select t1.j, t2.j, count(distinct t1.pk) from t1 inner join t2 on t1.i = t2.i group by t1.j, t2.j;
|
select t1.j, t2.j, count(distinct t1.pk) from t1 inner join t2 on t1.i = t2.i group by t1.j, t2.j order by null;
|
|
10.1.8+ explains
|
+----+-------------+-------+------------+-------+---------------+------+---------+-----------+-------+----------+-----------------------------------------------------------+
|
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
|
+----+-------------+-------+------------+-------+---------------+------+---------+-----------+-------+----------+-----------------------------------------------------------+
|
| 1 | SIMPLE | t2 | NULL | index | i | i | 10 | NULL | 128 | 100.00 | Using where; Using index; Using temporary; Using filesort |
|
| 1 | SIMPLE | t1 | NULL | ref | i | i | 5 | test.t2.i | 18729 | 100.00 | Using index |
|
+----+-------------+-------+------------+-------+---------------+------+---------+-----------+-------+----------+-----------------------------------------------------------+
|
2 rows in set, 1 warning (0.00 sec)
|
|
+----+-------------+-------+------------+-------+---------------+------+---------+-----------+-------+----------+-----------------------------------------------------------+
|
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
|
+----+-------------+-------+------------+-------+---------------+------+---------+-----------+-------+----------+-----------------------------------------------------------+
|
| 1 | SIMPLE | t2 | NULL | index | i | i | 10 | NULL | 128 | 100.00 | Using where; Using index; Using temporary; Using filesort |
|
| 1 | SIMPLE | t1 | NULL | ref | i | i | 5 | test.t2.i | 18729 | 100.00 | Using index |
|
+----+-------------+-------+------------+-------+---------------+------+---------+-----------+-------+----------+-----------------------------------------------------------+
|
Values for 'Sorting result' are highly volatile, but they are present in both profiles and are considerable comparing to others.
|
10.1.8+ Profile 1
|
+------------------------------+----------+
|
| Status | Duration |
|
+------------------------------+----------+
|
| starting | 0.000041 |
|
| checking permissions | 0.000009 |
|
| checking permissions | 0.000008 |
|
| Opening tables | 0.000015 |
|
| After opening tables | 0.000011 |
|
| System lock | 0.000008 |
|
| Table lock | 0.000011 |
|
| init | 0.000015 |
|
| optimizing | 0.000011 |
|
| statistics | 0.000019 |
|
| preparing | 0.000018 |
|
| executing | 0.000008 |
|
| Copying to tmp table | 0.000036 |
|
| Copying to tmp table | 0.287073 |
|
| converting HEAP to Aria | 0.070330 |
|
| Copying to tmp table on disk | 1.992319 |
|
| Sorting result | 1.166406 |
|
| Sending data | 0.896152 |
|
| end | 0.000092 |
|
| removing tmp table | 0.000015 |
|
| end | 0.000008 |
|
| removing tmp table | 0.004670 |
|
| end | 0.000017 |
|
| query end | 0.000015 |
|
| closing tables | 0.000009 |
|
| Unlocking tables | 0.000021 |
|
| freeing items | 0.000016 |
|
| removing tmp table | 0.000009 |
|
| freeing items | 0.000008 |
|
| updating status | 0.000077 |
|
| cleaning up | 0.000013 |
|
+------------------------------+----------+
|
|
10.1.8+ Profile 2
|
+------------------------------+----------+
|
| Status | Duration |
|
+------------------------------+----------+
|
| starting | 0.000058 |
|
| checking permissions | 0.000009 |
|
| checking permissions | 0.000037 |
|
| Opening tables | 0.000020 |
|
| After opening tables | 0.000009 |
|
| System lock | 0.000009 |
|
| Table lock | 0.000011 |
|
| init | 0.000020 |
|
| optimizing | 0.000016 |
|
| statistics | 0.000029 |
|
| preparing | 0.000043 |
|
| executing | 0.000008 |
|
| Copying to tmp table | 0.000030 |
|
| Copying to tmp table | 0.289974 |
|
| converting HEAP to Aria | 0.073210 |
|
| Copying to tmp table on disk | 1.915759 |
|
| Sorting result | 0.515669 |
|
| Sending data | 1.845258 |
|
| end | 0.000034 |
|
| removing tmp table | 0.000013 |
|
| end | 0.000007 |
|
| removing tmp table | 0.004524 |
|
| end | 0.000013 |
|
| query end | 0.000014 |
|
| closing tables | 0.000009 |
|
| Unlocking tables | 0.000021 |
|
| freeing items | 0.000014 |
|
| removing tmp table | 0.000009 |
|
| freeing items | 0.000008 |
|
| updating status | 0.000078 |
|
| cleaning up | 0.000012 |
|
+------------------------------+----------+
|
It looks similar on MySQL 5.5, but quite different on 5.6/5.7.
Both plains have filesort; and both profiles have 'Sorting result', but the value is negligible comparing to others.
Don't pay attention to absolute values, it's a debug build.
|
5.7.9 Explains
|
+----+-------------+-------+------------+-------+---------------+------+---------+-----------+-------+----------+-----------------------------------------------------------+
|
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
|
+----+-------------+-------+------------+-------+---------------+------+---------+-----------+-------+----------+-----------------------------------------------------------+
|
| 1 | SIMPLE | t2 | NULL | index | i | i | 10 | NULL | 128 | 100.00 | Using where; Using index; Using temporary; Using filesort |
|
| 1 | SIMPLE | t1 | NULL | ref | i | i | 5 | test.t2.i | 18729 | 100.00 | Using index |
|
+----+-------------+-------+------------+-------+---------------+------+---------+-----------+-------+----------+-----------------------------------------------------------+
|
|
+----+-------------+-------+------------+-------+---------------+------+---------+-----------+-------+----------+-----------------------------------------------------------+
|
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
|
+----+-------------+-------+------------+-------+---------------+------+---------+-----------+-------+----------+-----------------------------------------------------------+
|
| 1 | SIMPLE | t2 | NULL | index | i | i | 10 | NULL | 128 | 100.00 | Using where; Using index; Using temporary; Using filesort |
|
| 1 | SIMPLE | t1 | NULL | ref | i | i | 5 | test.t2.i | 18729 | 100.00 | Using index |
|
+----+-------------+-------+------------+-------+---------------+------+---------+-----------+-------+----------+-----------------------------------------------------------+
|
|
5.7.9 Profile 1
|
+---------------------------+-----------+
|
| Status | Duration |
|
+---------------------------+-----------+
|
| starting | 0.000105 |
|
| checking permissions | 0.000012 |
|
| checking permissions | 0.000012 |
|
| Opening tables | 0.000071 |
|
| init | 0.000047 |
|
| System lock | 0.000035 |
|
| optimizing | 0.000020 |
|
| statistics | 0.000049 |
|
| preparing | 0.000028 |
|
| Creating tmp table | 0.000092 |
|
| Sorting result | 0.000019 |
|
| executing | 0.000009 |
|
| Sending data | 7.695084 |
|
| converting HEAP to ondisk | 15.788881 |
|
| Sending data | 22.940116 |
|
| Creating sort index | 14.980626 |
|
| end | 0.000031 |
|
| query end | 0.000038 |
|
| removing tmp table | 0.071810 |
|
| query end | 0.000076 |
|
| removing tmp table | 0.000027 |
|
| query end | 0.000016 |
|
| closing tables | 0.000051 |
|
| freeing items | 0.000038 |
|
| removing tmp table | 0.000016 |
|
| freeing items | 0.000104 |
|
| cleaning up | 0.000040 |
|
+---------------------------+-----------+
|
|
5.7.9 Profile 2
|
+---------------------------+-----------+
|
| Status | Duration |
|
+---------------------------+-----------+
|
| starting | 0.000127 |
|
| checking permissions | 0.000012 |
|
| checking permissions | 0.000011 |
|
| Opening tables | 0.000037 |
|
| init | 0.000057 |
|
| System lock | 0.000035 |
|
| optimizing | 0.000026 |
|
| statistics | 0.000079 |
|
| preparing | 0.000033 |
|
| Creating tmp table | 0.000106 |
|
| Sorting result | 0.000018 |
|
| executing | 0.000009 |
|
| Sending data | 6.841433 |
|
| converting HEAP to ondisk | 14.928534 |
|
| Sending data | 24.134702 |
|
| Creating sort index | 15.795815 |
|
| end | 0.000032 |
|
| query end | 0.000032 |
|
| removing tmp table | 0.072903 |
|
| query end | 0.000143 |
|
| removing tmp table | 0.000022 |
|
| query end | 0.000017 |
|
| closing tables | 0.000036 |
|
| freeing items | 0.000070 |
|
| removing tmp table | 0.000024 |
|
| freeing items | 0.000103 |
|
| cleaning up | 0.000042 |
|
+---------------------------+-----------+
|
|
|
EXPLAIN FORMAT=JSON on 10.2:
"query_block": {
|
"select_id": 1,
|
"filesort": {
|
"sort_key": "t1.j, t2.j",
|
"temporary_table": {
|
"table": {
|
"table_name": "t1",
|
"access_type": "index",
|
"possible_keys": ["i"],
|
"key": "i",
|
"key_length": "10",
|
"used_key_parts": ["i", "j"],
|
"rows": 131380,
|
"filtered": 100,
|
"attached_condition": "(t1.i is not null)",
|
"using_index": true
|
},
|
"table": {
|
"table_name": "t2",
|
"access_type": "ref",
|
"possible_keys": ["i"],
|
"key": "i",
|
"key_length": "5",
|
"used_key_parts": ["i"],
|
"ref": ["j22.t1.i"],
|
"rows": 8,
|
"filtered": 100,
|
"using_index": true
|
}
|
}
|
}
|
}
|
|
|
Note that it doesn't do sorting if I take the query with ORDER BY null and replace count(distinct t1.pk) with count(distinct t1.pk).
Sorting is necessary to compute COUNT(DISTINCT).
When we compute COUNT(t1.pk), the optimizer works as follows:
- create a temporary (work) table: create table tmp ( t1_j, t2_j , value_of_count_pk int, primary key(t1_j, t2_j));
- run the join. For every record combination, locate its row in the temp. table, update of value_of_count_pk
The above works, because intermediate state of COUNT(t1.pk) is just one number, "count value so far".
When we compute COUNT(DISTINCT t1.pk), the intermediate state is "count value so far" and also "all values of t1.pk seen so far". This set is quite big and won't fit into a field in a temp.table.
So, the optimizer uses a different strategy here. It works as follows:
- create a temporary table
- Run the join. Sort its output by t1_j, t2_j.
- Read the join output in the sorted order. When we do this, GROUP BY groups follow one another. That way, we first compute COUNT(DISTINCT) for the first group in the group by, then for the second, and so forth.
This execution strategy needs to sorting regardless of whether the query output needs to be sorted.
(If the above explanation is not clear, checkout also http://www.slideshare.net/SergeyPetrunya/how-mysql-handles-order-by-group-by-and-distinct . The case we're talking about here is in slide #18 "Handling DISTINCT aggregates").
|