See details in the attached test case. If we create the right spread of values in the key and pick the range just right, when rowid_filter optimizer switch is enabled, the optimizer will sacrifice a part of the key for the rowid_filter. As a result we use only the u_delete_user and end up examining 5K+ rows out of 10K in the table. If we disable rowid_filter, the user_name constraint is being used, and we examine only 1 row.
let $a= `select concat((select nm from t1 orderby RAND(13) limit 1),'%')`;
eval select'$a'as a;
set optimizer_switch='rowid_filter=off';
eval
explain
select * from t1 where nm like'$a'AND fl2 = 0;
show status like'Last_query_cost';
eval
analyze format=json
select * from t1 where nm like'$a'AND fl2 = 0;
eval
select * from t1 where nm like'$a'AND fl2 = 0;
set optimizer_switch='rowid_filter=on';
eval
explain
select * from t1 where nm like'$a'AND fl2 = 0;
show status like'Last_query_cost';
eval
analyze format=json
select * from t1 where nm like'$a'AND fl2 = 0;
eval
select * from t1 where nm like'$a'AND fl2 = 0;
set optimizer_switch='rowid_filter=default';
droptable t1;
When running this test case we see:
set optimizer_switch='rowid_filter=off';
explain
select * from t1 where nm like '683%' AND fl2 = 0;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range idx1,idx2 idx1 35 NULL 1 Using index condition; Using where
show status like 'Last_query_cost';
Variable_name Value
Last_query_cost 2.707390
analyze format=json
select * from t1 where nm like '683%' AND fl2 = 0;
ANALYZE
{
"query_block": {
"select_id": 1,
"r_loops": 1,
"r_total_time_ms": 0.0397,
"table": {
"table_name": "t1",
"access_type": "range",
"possible_keys": ["idx1", "idx2"],
"key": "idx1",
"key_length": "35",
"used_key_parts": ["nm"],
"r_loops": 1,
"rows": 1,
"r_rows": 1,
"r_total_time_ms": 0.0257,
"filtered": 49.2,
"r_filtered": 100,
"index_condition": "t1.nm like '683%'",
"attached_condition": "t1.fl2 = 0"
}
}
}
select * from t1 where nm like '683%' AND fl2 = 0;
pk nm fl1 fl2
232 683 0 0
set optimizer_switch='rowid_filter=on';
explain
select * from t1 where nm like '683%' AND fl2 = 0;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref|filter idx1,idx2 idx2|idx1 2|35 const 492 (0%) Using where; Using rowid filter
show status like 'Last_query_cost';
Variable_name Value
Last_query_cost 1.431790
analyze format=json
select * from t1 where nm like '683%' AND fl2 = 0;
ANALYZE
{
"query_block": {
"select_id": 1,
"r_loops": 1,
"r_total_time_ms": 0.2755,
"table": {
"table_name": "t1",
"access_type": "ref",
"possible_keys": ["idx1", "idx2"],
"key": "idx2",
"key_length": "2",
"used_key_parts": ["fl2"],
"ref": ["const"],
"rowid_filter": {
"range": {
"key": "idx1",
"used_key_parts": ["nm"]
},
"rows": 1,
"selectivity_pct": 0.1,
"r_rows": 1,
"r_selectivity_pct": 0.2,
"r_buffer_size": 0,
"r_filling_time_ms": 0.0203
},
"r_loops": 1,
"rows": 492,
"r_rows": 1,
"r_total_time_ms": 0.2607,
"filtered": 0.1,
"r_filtered": 100,
"attached_condition": "t1.nm like '683%'"
}
}
}
Why does the optimizer choose the plan that employs rowid filter optimization when it's allowed, though the execution by this plan takes much more time than the execution without rowid filter ("r_total_time_ms": 0.2755 vs "r_total_time_ms": 0.0257)? Why is the cost of the plan that uses rowid filter is so cheap?
Igor Babaev (Inactive)
added a comment - Here' a synthetic test case that demonstrate the problem:
--source include/have_sequence.inc
create table t1 (
pk int primary key auto_increment,
nm varchar (32),
fl1 tinyint default 0,
fl2 tinyint default 0,
index idx1(nm, fl1),
index idx2(fl2)
) engine=myisam;
create table name (
pk int primary key auto_increment,
nm bigint
) engine=myisam;
create table flag2 (
pk int primary key auto_increment,
fl2 tinyint
) engine=myisam;
insert into name (nm) select seq from seq_1_to_1000 order by rand(17);
insert into flag2(fl2) select seq mod 2 from seq_1_to_1000 order by rand(19);
insert into t1(nm,fl2)
select nm, fl2 from name , flag2 where name .pk = flag2.pk;
drop table name , flag2;
select * from t1 limit 20;
analyze table t1 persistent for all ;
let $a= ` select concat(( select nm from t1 order by RAND(13) limit 1), '%' )`;
eval select '$a' as a;
set optimizer_switch= 'rowid_filter=off' ;
eval
explain
select * from t1 where nm like '$a' AND fl2 = 0;
show status like 'Last_query_cost' ;
eval
analyze format=json
select * from t1 where nm like '$a' AND fl2 = 0;
eval
select * from t1 where nm like '$a' AND fl2 = 0;
set optimizer_switch= 'rowid_filter=on' ;
eval
explain
select * from t1 where nm like '$a' AND fl2 = 0;
show status like 'Last_query_cost' ;
eval
analyze format=json
select * from t1 where nm like '$a' AND fl2 = 0;
eval
select * from t1 where nm like '$a' AND fl2 = 0;
set optimizer_switch= 'rowid_filter=default' ;
drop table t1;
When running this test case we see:
set optimizer_switch='rowid_filter=off';
explain
select * from t1 where nm like '683%' AND fl2 = 0;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range idx1,idx2 idx1 35 NULL 1 Using index condition; Using where
show status like 'Last_query_cost';
Variable_name Value
Last_query_cost 2.707390
analyze format=json
select * from t1 where nm like '683%' AND fl2 = 0;
ANALYZE
{
"query_block": {
"select_id": 1,
"r_loops": 1,
"r_total_time_ms": 0.0397,
"table": {
"table_name": "t1",
"access_type": "range",
"possible_keys": ["idx1", "idx2"],
"key": "idx1",
"key_length": "35",
"used_key_parts": ["nm"],
"r_loops": 1,
"rows": 1,
"r_rows": 1,
"r_total_time_ms": 0.0257,
"filtered": 49.2,
"r_filtered": 100,
"index_condition": "t1.nm like '683%'",
"attached_condition": "t1.fl2 = 0"
}
}
}
select * from t1 where nm like '683%' AND fl2 = 0;
pk nm fl1 fl2
232 683 0 0
set optimizer_switch='rowid_filter=on';
explain
select * from t1 where nm like '683%' AND fl2 = 0;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref|filter idx1,idx2 idx2|idx1 2|35 const 492 (0%) Using where; Using rowid filter
show status like 'Last_query_cost';
Variable_name Value
Last_query_cost 1.431790
analyze format=json
select * from t1 where nm like '683%' AND fl2 = 0;
ANALYZE
{
"query_block": {
"select_id": 1,
"r_loops": 1,
"r_total_time_ms": 0.2755,
"table": {
"table_name": "t1",
"access_type": "ref",
"possible_keys": ["idx1", "idx2"],
"key": "idx2",
"key_length": "2",
"used_key_parts": ["fl2"],
"ref": ["const"],
"rowid_filter": {
"range": {
"key": "idx1",
"used_key_parts": ["nm"]
},
"rows": 1,
"selectivity_pct": 0.1,
"r_rows": 1,
"r_selectivity_pct": 0.2,
"r_buffer_size": 0,
"r_filling_time_ms": 0.0203
},
"r_loops": 1,
"rows": 492,
"r_rows": 1,
"r_total_time_ms": 0.2607,
"filtered": 0.1,
"r_filtered": 100,
"attached_condition": "t1.nm like '683%'"
}
}
}
Why does the optimizer choose the plan that employs rowid filter optimization when it's allowed, though the execution by this plan takes much more time than the execution without rowid filter ("r_total_time_ms": 0.2755 vs "r_total_time_ms": 0.0257)? Why is the cost of the plan that uses rowid filter is so cheap?
A fix for this bug was pushed into 10.4. A slightly different patch was applied to 10.5. The latter should be merged upstream.
Igor Babaev (Inactive)
added a comment - A fix for this bug was pushed into 10.4. A slightly different patch was applied to 10.5. The latter should be merged upstream.
Here' a synthetic test case that demonstrate the problem:
--source include/have_sequence.inc
) engine=myisam;
) engine=myisam;
fl2 tinyint
) engine=myisam;
eval
explain
eval
analyze format=json
eval
eval
explain
eval
analyze format=json
eval
When running this test case we see:
set optimizer_switch='rowid_filter=off';
explain
select * from t1 where nm like '683%' AND fl2 = 0;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range idx1,idx2 idx1 35 NULL 1 Using index condition; Using where
show status like 'Last_query_cost';
Variable_name Value
Last_query_cost 2.707390
analyze format=json
select * from t1 where nm like '683%' AND fl2 = 0;
ANALYZE
{
"query_block": {
"select_id": 1,
"r_loops": 1,
"r_total_time_ms": 0.0397,
"table": {
"table_name": "t1",
"access_type": "range",
"possible_keys": ["idx1", "idx2"],
"key": "idx1",
"key_length": "35",
"used_key_parts": ["nm"],
"r_loops": 1,
"rows": 1,
"r_rows": 1,
"r_total_time_ms": 0.0257,
"filtered": 49.2,
"r_filtered": 100,
"index_condition": "t1.nm like '683%'",
"attached_condition": "t1.fl2 = 0"
}
}
}
select * from t1 where nm like '683%' AND fl2 = 0;
pk nm fl1 fl2
232 683 0 0
set optimizer_switch='rowid_filter=on';
explain
select * from t1 where nm like '683%' AND fl2 = 0;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref|filter idx1,idx2 idx2|idx1 2|35 const 492 (0%) Using where; Using rowid filter
show status like 'Last_query_cost';
Variable_name Value
Last_query_cost 1.431790
analyze format=json
select * from t1 where nm like '683%' AND fl2 = 0;
ANALYZE
{
"query_block": {
"select_id": 1,
"r_loops": 1,
"r_total_time_ms": 0.2755,
"table": {
"table_name": "t1",
"access_type": "ref",
"possible_keys": ["idx1", "idx2"],
"key": "idx2",
"key_length": "2",
"used_key_parts": ["fl2"],
"ref": ["const"],
"rowid_filter": {
"range": {
"key": "idx1",
"used_key_parts": ["nm"]
},
"rows": 1,
"selectivity_pct": 0.1,
"r_rows": 1,
"r_selectivity_pct": 0.2,
"r_buffer_size": 0,
"r_filling_time_ms": 0.0203
},
"r_loops": 1,
"rows": 492,
"r_rows": 1,
"r_total_time_ms": 0.2607,
"filtered": 0.1,
"r_filtered": 100,
"attached_condition": "t1.nm like '683%'"
}
}
}
Why does the optimizer choose the plan that employs rowid filter optimization when it's allowed, though the execution by this plan takes much more time than the execution without rowid filter ("r_total_time_ms": 0.2755 vs "r_total_time_ms": 0.0257)? Why is the cost of the plan that uses rowid filter is so cheap?