|
OK the plans look similar but they are not exactly the same, lets analyze the output of ANALYZE FORMAT=JSON for the queries with different join_buffer_size.
When we have a smaller join_buffer_size in that case we disallow using join buffering for a table (t3 here)
Output of ANALYZE with join_buffer_size= 2097152
ANALYZE FORMAT=JSON
|
SELECT * FROM t1, t2 LEFT JOIN t3 ON (c3 = c2)
|
WHERE ( i2, pk1 ) IN ( SELECT i3, i4 FROM t3 A, t4 B);
|
ANALYZE
|
{
|
"query_block": {
|
"select_id": 1,
|
"r_loops": 1,
|
"r_total_time_ms": 0.4434,
|
"table": {
|
"table_name": "B",
|
"access_type": "system",
|
"r_loops": 0,
|
"rows": 1,
|
"r_rows": null,
|
"filtered": 100,
|
"r_filtered": null
|
},
|
"table": {
|
"table_name": "t1",
|
"access_type": "const",
|
"possible_keys": ["PRIMARY"],
|
"key": "PRIMARY",
|
"key_length": "4",
|
"used_key_parts": ["pk1"],
|
"ref": ["const"],
|
"r_loops": 0,
|
"rows": 1,
|
"r_rows": null,
|
"filtered": 100,
|
"r_filtered": null
|
},
|
"table": {
|
"table_name": "t2",
|
"access_type": "ALL",
|
"r_loops": 1,
|
"rows": 2,
|
"r_rows": 2,
|
"r_total_time_ms": 0.0201,
|
"filtered": 100,
|
"r_filtered": 100,
|
"attached_condition": "t2.i2 is not null"
|
},
|
"duplicates_removal": {
|
"block-nl-join": {
|
"table": {
|
"table_name": "A",
|
"access_type": "hash_index",
|
"possible_keys": ["i3"],
|
"key": "#hash#i3:i3",
|
"key_length": "5:5",
|
"used_key_parts": ["i3"],
|
"ref": ["test.t2.i2"],
|
"r_loops": 1,
|
"rows": 3,
|
"r_rows": 3,
|
"r_total_time_ms": 0.055,
|
"filtered": 66.667,
|
"r_filtered": 100,
|
"using_index": true
|
},
|
"buffer_type": "flat",
|
"buffer_size": "2048Kb",
|
"join_type": "BNLH",
|
"r_filtered": 100
|
}
|
},
|
"table": {
|
"table_name": "t3",
|
"access_type": "hash_ALL",
|
"possible_keys": ["c3"],
|
"key": "#hash#c3",
|
"key_length": "2",
|
"used_key_parts": ["c3"],
|
"ref": ["test.t2.c2"],
|
"r_loops": 2,
|
"rows": 3,
|
"r_rows": 3,
|
"r_total_time_ms": 0.0289,
|
"filtered": 33.333,
|
"r_filtered": 100,
|
"attached_condition": "trigcond(trigcond(t2.c2 is not null))"
|
}
|
}
|
}
|
Output for ANALYZE FORMAT=JSON with join_buffer_size= 262144 (gives correct result)
ANALYZE FORMAT=JSON
|
SELECT * FROM t1, t2 LEFT JOIN t3 ON (c3 = c2)
|
WHERE ( i2, pk1 ) IN ( SELECT i3, i4 FROM t3 A, t4 B);
|
ANALYZE
|
{
|
"query_block": {
|
"select_id": 1,
|
"r_loops": 1,
|
"r_total_time_ms": 0.3191,
|
"table": {
|
"table_name": "B",
|
"access_type": "system",
|
"r_loops": 0,
|
"rows": 1,
|
"r_rows": null,
|
"filtered": 100,
|
"r_filtered": null
|
},
|
"table": {
|
"table_name": "t1",
|
"access_type": "const",
|
"possible_keys": ["PRIMARY"],
|
"key": "PRIMARY",
|
"key_length": "4",
|
"used_key_parts": ["pk1"],
|
"ref": ["const"],
|
"r_loops": 0,
|
"rows": 1,
|
"r_rows": null,
|
"filtered": 100,
|
"r_filtered": null
|
},
|
"table": {
|
"table_name": "t2",
|
"access_type": "ALL",
|
"r_loops": 1,
|
"rows": 2,
|
"r_rows": 2,
|
"r_total_time_ms": 0.0172,
|
"filtered": 100,
|
"r_filtered": 100,
|
"attached_condition": "t2.i2 is not null"
|
},
|
"duplicates_removal": {
|
"block-nl-join": {
|
"table": {
|
"table_name": "A",
|
"access_type": "hash_index",
|
"possible_keys": ["i3"],
|
"key": "#hash#i3:i3",
|
"key_length": "5:5",
|
"used_key_parts": ["i3"],
|
"ref": ["test.t2.i2"],
|
"r_loops": 1,
|
"rows": 3,
|
"r_rows": 3,
|
"r_total_time_ms": 0.0422,
|
"filtered": 66.667,
|
"r_filtered": 100,
|
"using_index": true
|
},
|
"buffer_type": "flat",
|
"buffer_size": "256Kb",
|
"join_type": "BNLH",
|
"r_filtered": 100
|
}
|
},
|
"block-nl-join": {
|
"table": {
|
"table_name": "t3",
|
"access_type": "hash_ALL",
|
"possible_keys": ["c3"],
|
"key": "#hash#c3",
|
"key_length": "2",
|
"used_key_parts": ["c3"],
|
"ref": ["test.t2.c2"],
|
"r_loops": 1,
|
"rows": 3,
|
"r_rows": 3,
|
"r_total_time_ms": 0.0132,
|
"filtered": 33.333,
|
"r_filtered": 100
|
},
|
"buffer_type": "flat",
|
"buffer_size": "256Kb",
|
"join_type": "BNLH",
|
"attached_condition": "trigcond(trigcond(t2.c2 is not null))",
|
"r_filtered": null
|
}
|
}
|
}
|
|
The problem with the first output (with smaller join buffer), it is still using hash key for table t3 which is incorrect. I think instead of using hash-key we could use ref access on key (c3) here instead. Also by not being able to do the lookup the ON condition is not evaluated (c3= c2) and hence all rows pass the where clause and we return more results.
|