Details
Description
Here is the test case
the dataset:
CREATE TABLE t1 (c1 text, c2 int); |
INSERT INTO t1 VALUES ('a',1), ('c',3), ('g',7), ('d',4), ('c',3); |
CREATE TABLE t2 (c1 text, c2 int); |
INSERT INTO t2 VALUES ('b',2), ('c',3); |
CREATE ALGORITHM=TEMPTABLE VIEW v1 AS SELECT * FROM t1; |
Now i run the query
MariaDB [test]> SELECT v1.c1, v1.c2, t2.c1, t2.c2 FROM v1, t2 WHERE v1.c1=t2.c1;
|
+------+------+------+------+
|
| c1 | c2 | c1 | c2 |
|
+------+------+------+------+
|
| c | 3 | c | 3 |
|
| c | 3 | c | 3 |
|
+------+------+------+------+
|
2 rows in set (0.006 sec)
|
|
MariaDB [test]> set @@join_cache_level=4;
|
Query OK, 0 rows affected (0.000 sec)
|
|
MariaDB [test]> SELECT v1.c1, v1.c2, t2.c1, t2.c2 FROM v1, t2 WHERE v1.c1=t2.c1;
|
+------+------+------+------+
|
| c1 | c2 | c1 | c2 |
|
+------+------+------+------+
|
| a | 1 | b | 2 |
|
| a | 1 | c | 3 |
|
| c | 3 | b | 2 |
|
| c | 3 | c | 3 |
|
| g | 7 | b | 2 |
|
| g | 7 | c | 3 |
|
| d | 4 | b | 2 |
|
| d | 4 | c | 3 |
|
| c | 3 | b | 2 |
|
| c | 3 | c | 3 |
|
+------+------+------+------+
|
10 rows in set (0.005 sec)
|
So we see here that raising the value of join_cache_level from 2 to 4 gives incorrect result
Attachments
Issue Links
- blocks
-
MDEV-15253 Default optimizer setting changes for MariaDB 10.4
-
- Closed
-
- relates to
-
MDEV-16714 Derived table for hash join is created with a key
-
- Stalled
-
-
MDEV-16337 Setting join_cache_level=4 changes efficient ref access plan to an inefficient hash join
-
- Open
-
Activity
Output for analyze
MariaDB [test]> analyze format=json SELECT v1.c1, v1.c2, t2.c1, t2.c2 FROM v1, t2 WHERE v1.c1=t2.c1;
|
| {
|
"query_block": {
|
"select_id": 1,
|
"r_loops": 1,
|
"r_total_time_ms": 2.6403,
|
"table": {
|
"table_name": "t2",
|
"access_type": "ALL",
|
"r_loops": 1,
|
"rows": 2,
|
"r_rows": 2,
|
"r_total_time_ms": 0.0973,
|
"filtered": 100,
|
"r_filtered": 100,
|
"attached_condition": "t2.c1 is not null"
|
},
|
"block-nl-join": {
|
"table": {
|
"table_name": "<derived2>",
|
"access_type": "hash_ALL",
|
"possible_keys": ["key0"],
|
"key": "#hash#key0",
|
"key_length": "3",
|
"used_key_parts": ["c1"],
|
"ref": ["test.t2.c1"],
|
"r_loops": 1,
|
"rows": 5,
|
"r_rows": 5,
|
"r_total_time_ms": 0.2898,
|
"filtered": 40,
|
"r_filtered": 100
|
},
|
"buffer_type": "flat",
|
"buffer_size": "256Kb",
|
"join_type": "BNLH",
|
"r_filtered": 100,
|
"materialized": {
|
"query_block": {
|
"select_id": 2,
|
"r_loops": 1,
|
"r_total_time_ms": 0.5638,
|
"table": {
|
"table_name": "t1",
|
"access_type": "ALL",
|
"r_loops": 1,
|
"rows": 5,
|
"r_rows": 5,
|
"r_total_time_ms": 0.2038,
|
"filtered": 100,
|
"r_filtered": 100
|
}
|
}
|
}
|
}
|
}
|
} |
|
|
Some observations:
1) the plan shows that we are doing a hash join but we are using the derived keys for the derived tables. If we have derived keys for a derived table we always do a ref access.
2)If I set optimizer_switch='derived_with_keys=off', we get correct results and see hash join being implemented using a hash key
+------+-------------+------------+----------+---------------+-----------+---------+------------+------+--------------------------------------------------+
|
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
|
+------+-------------+------------+----------+---------------+-----------+---------+------------+------+--------------------------------------------------+
|
| 1 | PRIMARY | t2 | ALL | NULL | NULL | NULL | NULL | 2 | Using where |
|
| 1 | PRIMARY | <derived2> | hash_ALL | NULL | #hash#$hj | 3 | test.t2.c1 | 4 | Using where; Using join buffer (flat, BNLH join) |
|
| 2 | DERIVED | t1 | ALL | NULL | NULL | NULL | NULL | 4 | |
|
+------+-------------+------------+----------+---------------+-----------+---------+------------+------+--------------------------------------------------+
|
3) We should note that the fields of the table are of type TEXT, if I change them to VARCHAR or CHAR, I get correct results also with derived keys. The query plan is
+------+-------------+------------+----------+---------------+------------+---------+------------+------+-------------------------------------+
|
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
|
+------+-------------+------------+----------+---------------+------------+---------+------------+------+-------------------------------------+
|
| 1 | PRIMARY | t2 | ALL | NULL | NULL | NULL | NULL | 2 | Using where |
|
| 1 | PRIMARY | <derived2> | hash_ALL | key0 | #hash#key0 | 13 | test.t2.c1 | 5 | Using join buffer (flat, BNLH join) |
|
| 2 | DERIVED | t1 | ALL | NULL | NULL | NULL | NULL | 5 | |
|
+------+-------------+------------+----------+---------------+------------+---------+------------+------+-------------------------------------+
|
|
MariaDB [test]> show create table t1;
|
+-------+-------------------------------------------------------------------------------------------------------------------------+
|
| Table | Create Table |
|
+-------+-------------------------------------------------------------------------------------------------------------------------+
|
| t1 | CREATE TABLE `t1` (
|
`c1` varchar(10) DEFAULT NULL,
|
`c2` int(11) DEFAULT NULL
|
) ENGINE=InnoDB DEFAULT CHARSET=latin1 |
|
+-------+-------------------------------------------------------------------------------------------------------------------------+
|
MariaDB [test]> show create table t2;
|
+-------+-------------------------------------------------------------------------------------------------------------------------+
|
| Table | Create Table |
|
+-------+-------------------------------------------------------------------------------------------------------------------------+
|
| t2 | CREATE TABLE `t2` (
|
`c1` varchar(10) DEFAULT NULL,
|
`c2` int(11) DEFAULT NULL
|
) ENGINE=InnoDB DEFAULT CHARSET=latin1 |
|
+-------+-------------------------------------------------------------------------------------------------------------------------+
|
4) Also according to the documentation BLOB and TEXT columns also can be indexed, but a prefix length must be given, so what should we do during the creation of derived_keys over such fields.
As discussed on the optimizer call, igor confirms that we should not use derived keys for doing a hash join, so if we have derived keys we should prefer a REF access instead of HASH JOIN
Explain output with join_cache_level=2
explain
|
SELECT v1.c1, v1.c2, t2.c1, t2.c2 FROM v1, t2 WHERE v1.c1=t2.c1;
|
id select_type table type possible_keys key key_len ref rows Extra
|
1 PRIMARY t2 ALL NULL NULL NULL NULL 2
|
1 PRIMARY <derived2> ALL NULL NULL NULL NULL 5 Using where; Using join buffer (flat, BNL join)
|
2 DERIVED t1 ALL NULL NULL NULL NULL 5
|
An observation about derived_with_keys optimization ( not really related to this patch):
MariaDB [j5]> create table t20 (
|
-> a varchar(750),
|
-> b varchar(750)
|
-> );
|
Query OK, 0 rows affected (0.19 sec)
|
|
MariaDB [j5]> insert into t20 select a,a from one_k;
|
Query OK, 1000 rows affected (0.26 sec)
|
Records: 1000 Duplicates: 0 Warnings: 0
|
|
MariaDB [j5]> create table t21 as select a from t20 limit 10;
|
Query OK, 10 rows affected (0.18 sec)
|
Records: 10 Duplicates: 0 Warnings: 0
|
|
MariaDB [j5]>
|
MariaDB [j5]> create ALGORITHM=TEMPTABLE view v20 as select a,b from t20;
|
Query OK, 0 rows affected (0.04 sec)
|
|
MariaDB [j5]> set join_cache_level=default;
|
Query OK, 0 rows affected (0.00 sec)
|
MariaDB [j5]> explain select * from t20, v20 where t20.a=v20.a;
|
+------+-------------+------------+------+---------------+------+---------+----------+------+-------------+
|
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
|
+------+-------------+------------+------+---------------+------+---------+----------+------+-------------+
|
| 1 | PRIMARY | t20 | ALL | NULL | NULL | NULL | NULL | 1109 | Using where |
|
| 1 | PRIMARY | <derived2> | ref | key0 | key0 | 753 | j5.t20.a | 10 | |
|
| 2 | DERIVED | t20 | ALL | NULL | NULL | NULL | NULL | 1109 | |
|
+------+-------------+------------+------+---------------+------+---------+----------+------+-------------+
|
3 rows in set (0.01 sec)
|
|
MariaDB [j5]> explain select * from t20, v20 where t20.b=v20.b;
|
+------+-------------+------------+------+---------------+------+---------+----------+------+-------------+
|
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
|
+------+-------------+------------+------+---------------+------+---------+----------+------+-------------+
|
| 1 | PRIMARY | t20 | ALL | NULL | NULL | NULL | NULL | 1109 | Using where |
|
| 1 | PRIMARY | <derived2> | ref | key0 | key0 | 753 | j5.t20.b | 10 | |
|
| 2 | DERIVED | t20 | ALL | NULL | NULL | NULL | NULL | 1109 | |
|
+------+-------------+------------+------+---------------+------+---------+----------+------+-------------+
|
3 rows in set (0.01 sec)
|
|
MariaDB [j5]> explain select * from t20, v20 where t20.a=v20.a and t20.b=v20.b;
|
+------+-------------+------------+------+---------------+------+---------+------+------+-------------------------------------------------+
|
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
|
+------+-------------+------------+------+---------------+------+---------+------+------+-------------------------------------------------+
|
| 1 | PRIMARY | t20 | ALL | NULL | NULL | NULL | NULL | 1109 | |
|
| 1 | PRIMARY | <derived2> | ALL | NULL | NULL | NULL | NULL | 1109 | Using where; Using join buffer (flat, BNL join) |
|
| 2 | DERIVED | t20 | ALL | NULL | NULL | NULL | NULL | 1109 | |
|
+------+-------------+------------+------+---------------+------+---------+------+------+-------------------------------------------------+
|
3 rows in set (0.01 sec)
|
Note that derived_with_keys is used for either of the equalities but is not
used at all when both equalities are present.
This is a consequence of a solution used in this patch:
commit f735822720b5e004d0f9cc3f490242c154cfbbac
|
Author: Igor Babaev <igor@askmonty.org> Fri Aug 1 09:17:43 2014
|
Committer: Igor Babaev <igor@askmonty.org> Fri Aug 1 09:17:43 2014
|
|
Fixed bug mdev-5721.
|
Do not define a look-up key for a temporary table if its length
|
exceeds the maximum length of such keys.
|
THere, generate_derived_keys_for_table will call TABLE::check_tmp_key() for one
two-component key, which will find it too long and refuse to create it.
Creating a one-component key will not be considered.
On the question of how derived_with_keys feature relates to hash joins.
The workflow for derived_with_keys is:
- Inside update_ref_and_keys(), a KEYUSE object is created representing derived_tbl.col=... equality
- generate_derived_keys() is called
- * which calls TABLE::check_tmp_key to check that a key can indeed be created.
When derived-with-keys feature works "on its own", it will not create KEYUSE objects representing equalities on blob columns (like derived_tbl.blob_column = ...)
The code to prevent that is pasted below, look for the !(field->flags & BLOB_FLAG) check :
add_key_field(JOIN *join,
|
KEY_FIELD **key_fields,uint and_level, Item_func *cond,
|
Field *field, bool eq_func, Item **value, uint num_values, |
table_map usable_tables, SARGABLE_PARAM **sargables)
|
{
|
uint optimize= 0;
|
if (eq_func && |
((join->is_allowed_hash_join_access() &&
|
field->hash_join_is_possible() &&
|
!(field->table->pos_in_table_list->is_materialized_derived() &&
|
field->table->created)) ||
|
(field->table->pos_in_table_list->is_materialized_derived() &&
|
!field->table->created && !(field->flags & BLOB_FLAG))))
|
{
|
optimize= KEY_OPTIMIZE_EQ;
|
}
|
however, if hash join is enabled, the first part of the above condition is satisfied, and the KEYUSE object is created.
Output for explain
MariaDB [test]> explain SELECT v1.c1, v1.c2, t2.c1, t2.c2 FROM v1, t2 WHERE v1.c1=t2.c1;
+------+-------------+------------+----------+---------------+------------+---------+------------+------+-------------------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+------+-------------+------------+----------+---------------+------------+---------+------------+------+-------------------------------------+
| 1 | PRIMARY | t2 | ALL | NULL | NULL | NULL | NULL | 2 | Using where |
| 1 | PRIMARY | <derived2> | hash_ALL | key0 | #hash#key0 | 3 | test.t2.c1 | 5 | Using join buffer (flat, BNLH join) |
| 2 | DERIVED | t1 | ALL | NULL | NULL | NULL | NULL | 5 | |
+------+-------------+------------+----------+---------------+------------+---------+------------+------+-------------------------------------+