Details
-
Task
-
Status: Open (View Workflow)
-
Minor
-
Resolution: Unresolved
-
None
-
None
Description
Hi guys, i found a optimization (maybe a bug?) that could be 'easly' optimized, check this query...
EXPLAIN
|
SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa!='mov' |
this result in 446196 rows (read all table), and changing it to:
EXPLAIN
|
SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa IN ('','d','s','p','a') |
result in 16 rows and 'using where' (an index)
the interesting part is....
column 'sit_spa' is ENUM('d','s','p','a','mov') NOT NULL DEFAULT 'd'
|
—
sit_spa!='mov' could be optimized via ENUM declaration to: 'd','s','p','a' plus '' if database accepted a value outside enum declaration, ex: a insert of 'xyz' result in '' enum value + warning in this field
the idea is...
optimize "enum_field!=enum_value" removing the enum_value from enum declaration list.
the result is an index being used instead of a full table scan
—
A second optimization is:
WHERE sit_spa='z'
since 'z' isn't in 'd','s','p','a','mov', we could return 'impossible where'
check that enum is a diferent field type (for where part, select part and order by/group by parts)...
for sit_spa enum ('d','s') not null
enum_index 0 = ""
enum_index 1 = 'd'
enum_index 2 = 's'
for sit_spa enum ('d','s') (without not null), enum_index NULL = NULL
when using enum_value ='3' and enum_index=2, example: enum ('1','3')
if we execute, WHERE enum='2'
the enum_value='2' don't exists, the parser check for enum_index, and found '3' as result (since enum_index='2', is enum_value='3')
—
implementation ?
from sergei emails:
We have a range optimizer (in opt_range.cc), it can already do pretty, much what you say, it can change, for example
WHERE a != 10
|
to
WHERE a < 10 OR a > 10
|
So, perhaps, all you need to do is to make sure this code is used for your ENUM queries.
—
but reading sql_select.cc we have remove_eq_conds that check conditions one by one, and can rewrite the condition or return a impossible where, or a always true condition
i think that get_mm_leaf is something that only works with index (i'm wrong?)
—
maybe it should be implemented in opt_range.cc at get_mm_leaf() function? or in sql_select.cc remove_eq_conds function?
something like:
/*
|
MDEV-4419
|
IMPOSSIBLE WHERE FOR ENUM FIELDS / REWRITE OF ENUM FIELDS OPERATOR
|
|
any enum_field OP const_value should be executed in all enum values,
|
to optimize the query, since ENUM is a 'bitmap' of all possible values
|
we can rewrited it to IN/NOT IN operator, this allow index usage
|
|
examples:
|
|
for NULL possible fields:
|
a ENUM ('a','b','c')
|
internally we have: NULL,0=>'',1=>'a',2=>'b',3=>'c'
|
for NOT NULL fields:
|
a ENUM ('a','b','c') NOT NULL
|
internally we have: 0=>'',1=>'a',2=>'b',3=>'c'
|
|
example 1) (impossible where)
|
WHERE a = 'd'
|
'' = 'd' ? false
|
'a' = 'd' ? false
|
'b' = 'd' ? false
|
'c' = 'd' ? false
|
|
rewrite to
|
(1=0) - always false, impossble where
|
|
example 2) (allways true)
|
a < 'd' will result:
|
'' < 'd' ? true
|
'a' < 'd' ? true
|
'b' < 'd' ? true
|
'c' < 'd' ? true
|
|
rewriting to :
|
a IN ('','a','b','c') - always true
|
|
example 3) (some values of enum, using IN)
|
a LIKE '%a' will result:
|
'' LIKE '%a' ? false
|
'a' LIKE '%a' ? true
|
'b' LIKE '%a' ? false
|
'c' LIKE '%a' ? false
|
|
we have 1 true, and 3 falses, better use IN ()
|
rewriting to :
|
|
using IN: a IN ('a')
|
using NOT IN: a NOT IN ('','b','c')
|
or using ENUM INDEX:
|
using IN: a IN (1)
|
using NOT IN: a NOT IN (0,2,3)
|
|
|
example 4) (some values of enum, using NOT IN)
|
a != 'b' will result:
|
'' != 'b' ? true
|
'a' != 'b' ? true
|
'b' != 'b' ? false
|
'c' != 'b' ? true
|
|
we have 3 true, and 1 false, better use NOT IN ()
|
rewriting to:
|
|
using IN: a IN ('','a','c')
|
using NOT IN: a NOT IN ('b')
|
or using ENUM INDEX:
|
using IN: a IN (0,1,3)
|
using NOT IN: a NOT IN (2)
|
*/
|
if (field->real_type() == MYSQL_TYPE_ENUM)
|
{
|
uint count_true_list =0;
|
uint count_false_list=0;
|
bool true_false_list[];
|
uint enum_field_counter=0;
|
char buffer[255];
|
String enum_item(buffer, sizeof(buffer), res.charset());
|
|
/* seek all values and check how many true/false we got */
|
resize_true_false_list[]
|
set true_false_list to false;
|
/* true_false_list start from 0 to enum values +1, NULL = index 0, */
|
uint *len= field->typelib->type_lengths;
|
for (enum_field_counter=0,const char **pos= field->typelib->type_names; *pos; pos++, len++, enum_field_counter++)
|
{
|
enum_string_value= enum_item.copy(*pos, *len, charset(), res.charset(), &dummy_errors);
|
if (type == Item_func::IN_FUNC)
|
{
|
/* search in each IN value */
|
while( .. in_values .. )
|
{
|
|
if ((enum_str_value == in_str_value && in_value_is_string && compare_function(enum_str,in_str)) ||
|
(enum_int_value == in_int_value && !in_value_is_string && compare_function(enum_int,in_int))) {
|
count_true_list ++;
|
true_false_list[enum_field_counter]=true;
|
} else {
|
count_false_list ++;
|
}
|
}
|
} else if (type == Item_func::BETWEEN) {
|
/* two args */
|
// don't know what to do...
|
goto end;
|
} else {
|
/* only one value */
|
if ((enum_str_value == str_value && value_is_string && compare_function(enum_str,value_str)) ||
|
(enum_int_value == int_value && !value_is_string && compare_function(enum_int,value_int))) {
|
count_true_list ++;
|
true_false_list[enum_field_counter]=true;
|
} else {
|
count_false_list ++;
|
}
|
}
|
}
|
/* check about always true (discart operation) / false (impossible where) */
|
if (count_false_list == 0)
|
{
|
tree= &null_element; /* always true */
|
goto end;
|
}
|
if (count_true_list == 0)
|
{
|
tree->type= SEL_ARG::IMPOSSIBLE; /* impossible where */
|
goto end;
|
}
|
/* rewrite the operation using IN / NOT IN () */
|
if (count_false_list > count_true_list)
|
inv=true; /* add NOT before IN () */
|
type=Item_func::IN_FUNC;
|
for each true_false_list
|
{
|
if(inv)
|
add respective enum_value of true_false_list when it's false
|
else
|
add respective enum_value of true_false_list when it's true
|
}
|
goto end;
|
}
|
Attachments
Activity
Field | Original Value | New Value |
---|---|---|
Description |
Hi guys, i found a optimization (maybe a bug?) that could be 'easly' optimized, check this query... EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa!='mov' this result in 446196 rows (read all table), and changing it to: EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa IN ('','d','s','p','a') result in 16 rows and 'using where' (an index) the interesting part is.... column 'sit_spa' is ENUM('d','s','p','a','mov') NOT NULL DEFAULT 'd' --- sit_spa!='mov' could be optimized via ENUM declaration to: 'd','s','p','a' plus '' if database accepted a value outside enum declaration, ex: a insert of 'xyz' result in '' enum value + warning in this field the idea is... optimize "enum_field!=enum_value" removing the enum_value from enum declaration list. the result is an index being used instead of a full table scan thanks guys, sorry for many posts today, i'm updating databases to mariadb 10.0.1, and checking logs to optimize tables |
Hi guys, i found a optimization (maybe a bug?) that could be 'easly' optimized, check this query... {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa!='mov' {code} this result in 446196 rows (read all table), and changing it to: {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa IN ('','d','s','p','a') {code} result in 16 rows and 'using where' (an index) the interesting part is.... {noformat} column 'sit_spa' is ENUM('d','s','p','a','mov') NOT NULL DEFAULT 'd' {noformat} --- sit_spa!='mov' could be optimized via ENUM declaration to: 'd','s','p','a' plus '' if database accepted a value outside enum declaration, ex: a insert of 'xyz' result in '' enum value + warning in this field the idea is... optimize "enum_field!=enum_value" removing the enum_value from enum declaration list. the result is an index being used instead of a full table scan thanks guys, sorry for many posts today, i'm updating databases to mariadb 10.0.1, and checking logs to optimize tables |
Attachment | mdev4419.tar.gz [ 21900 ] |
Affects Version/s | 10.0.1 [ 11400 ] | |
Environment | linux 64 bits | |
Issue Type | Bug [ 1 ] | Task [ 3 ] |
Labels | optimizer |
Description |
Hi guys, i found a optimization (maybe a bug?) that could be 'easly' optimized, check this query... {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa!='mov' {code} this result in 446196 rows (read all table), and changing it to: {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa IN ('','d','s','p','a') {code} result in 16 rows and 'using where' (an index) the interesting part is.... {noformat} column 'sit_spa' is ENUM('d','s','p','a','mov') NOT NULL DEFAULT 'd' {noformat} --- sit_spa!='mov' could be optimized via ENUM declaration to: 'd','s','p','a' plus '' if database accepted a value outside enum declaration, ex: a insert of 'xyz' result in '' enum value + warning in this field the idea is... optimize "enum_field!=enum_value" removing the enum_value from enum declaration list. the result is an index being used instead of a full table scan thanks guys, sorry for many posts today, i'm updating databases to mariadb 10.0.1, and checking logs to optimize tables |
Hi guys, i found a optimization (maybe a bug?) that could be 'easly' optimized, check this query... {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa!='mov' {code} this result in 446196 rows (read all table), and changing it to: {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa IN ('','d','s','p','a') {code} result in 16 rows and 'using where' (an index) the interesting part is.... {noformat} column 'sit_spa' is ENUM('d','s','p','a','mov') NOT NULL DEFAULT 'd' {noformat} --- sit_spa!='mov' could be optimized via ENUM declaration to: 'd','s','p','a' plus '' if database accepted a value outside enum declaration, ex: a insert of 'xyz' result in '' enum value + warning in this field the idea is... optimize "enum_field!=enum_value" removing the enum_value from enum declaration list. the result is an index being used instead of a full table scan --- implementation ? from sergei emails: We have a range optimizer (in opt_range.cc), it can already do pretty, much what you say, it can change, for example {code} WHERE a != 10 {code} to {code} WHERE a < 10 OR a > 10 {code} So, perhaps, all you need to do is to make sure this code is used for your ENUM queries. |
Comment |
[ reading this: http://dev.mysql.com/doc/internals/en/optimizer-code-concepts.html#optimizer-definitions i will try to understand this: The optimize_cond() and opt_sum_query() routines perform transformations. The make_join_statistics() routine puts together all the information it can find about indexes that might be useful for accessing the query's tables. should i look at ('transformation' part)? JOIN::optimize() /* optimizer is from here ... */ optimize_cond() opt_sum_query() or i'm going to the wrong direction to write a patch for this optimization? ] |
Comment |
[ i was reading a bit the source of mariadb 10.0.1 at line 13553 of sql_select.cc " See BUG#12594011" maybe something in this lines could do this, i don't know how to code, but something like: if (field type = enum) and const_item and (operation = '!=' or operation = 'not in') read all values from enum and put in new list ('values from enum','a','b','c'...) remove the const_itens from the new list rewrite condition to " IN ()" with the new list maybe this could work? i'm not a expert in source code of mariadb ] |
Summary | ENUM OPTIMIZATION - could use index | ENUM OPTIMIZATION - could use index, could return 'Impossible Where' |
Description |
Hi guys, i found a optimization (maybe a bug?) that could be 'easly' optimized, check this query... {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa!='mov' {code} this result in 446196 rows (read all table), and changing it to: {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa IN ('','d','s','p','a') {code} result in 16 rows and 'using where' (an index) the interesting part is.... {noformat} column 'sit_spa' is ENUM('d','s','p','a','mov') NOT NULL DEFAULT 'd' {noformat} --- sit_spa!='mov' could be optimized via ENUM declaration to: 'd','s','p','a' plus '' if database accepted a value outside enum declaration, ex: a insert of 'xyz' result in '' enum value + warning in this field the idea is... optimize "enum_field!=enum_value" removing the enum_value from enum declaration list. the result is an index being used instead of a full table scan --- implementation ? from sergei emails: We have a range optimizer (in opt_range.cc), it can already do pretty, much what you say, it can change, for example {code} WHERE a != 10 {code} to {code} WHERE a < 10 OR a > 10 {code} So, perhaps, all you need to do is to make sure this code is used for your ENUM queries. |
Hi guys, i found a optimization (maybe a bug?) that could be 'easly' optimized, check this query... {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa!='mov' {code} this result in 446196 rows (read all table), and changing it to: {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa IN ('','d','s','p','a') {code} result in 16 rows and 'using where' (an index) the interesting part is.... {noformat} column 'sit_spa' is ENUM('d','s','p','a','mov') NOT NULL DEFAULT 'd' {noformat} --- sit_spa!='mov' could be optimized via ENUM declaration to: 'd','s','p','a' plus '' if database accepted a value outside enum declaration, ex: a insert of 'xyz' result in '' enum value + warning in this field the idea is... optimize "enum_field!=enum_value" removing the enum_value from enum declaration list. the result is an index being used instead of a full table scan --- A second optimization is: WHERE sit_spa='z' since 'z' isn't in 'd','s','p','a','mov', we could return 'impossible where' check that enum is a diferent field type (for where part, select part and order by/group by parts)... for sit_spa enum ('d','s') not null enum_index 0 = "" enum_index 1 = 'd' enum_index 2 = 's' for sit_spa enum ('d','s') (without not null), enum_index NULL = NULL when using enum_value ='3' and enum_index=2, example: enum ('1','3') if we execute, WHERE enum='2' the enum_value='2' don't exists, the parser check for enum_index, and found '3' as result (since enum_index='2', is enum_value='3') --- implementation ? from sergei emails: We have a range optimizer (in opt_range.cc), it can already do pretty, much what you say, it can change, for example {code} WHERE a != 10 {code} to {code} WHERE a < 10 OR a > 10 {code} So, perhaps, all you need to do is to make sure this code is used for your ENUM queries. |
Description |
Hi guys, i found a optimization (maybe a bug?) that could be 'easly' optimized, check this query... {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa!='mov' {code} this result in 446196 rows (read all table), and changing it to: {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa IN ('','d','s','p','a') {code} result in 16 rows and 'using where' (an index) the interesting part is.... {noformat} column 'sit_spa' is ENUM('d','s','p','a','mov') NOT NULL DEFAULT 'd' {noformat} --- sit_spa!='mov' could be optimized via ENUM declaration to: 'd','s','p','a' plus '' if database accepted a value outside enum declaration, ex: a insert of 'xyz' result in '' enum value + warning in this field the idea is... optimize "enum_field!=enum_value" removing the enum_value from enum declaration list. the result is an index being used instead of a full table scan --- A second optimization is: WHERE sit_spa='z' since 'z' isn't in 'd','s','p','a','mov', we could return 'impossible where' check that enum is a diferent field type (for where part, select part and order by/group by parts)... for sit_spa enum ('d','s') not null enum_index 0 = "" enum_index 1 = 'd' enum_index 2 = 's' for sit_spa enum ('d','s') (without not null), enum_index NULL = NULL when using enum_value ='3' and enum_index=2, example: enum ('1','3') if we execute, WHERE enum='2' the enum_value='2' don't exists, the parser check for enum_index, and found '3' as result (since enum_index='2', is enum_value='3') --- implementation ? from sergei emails: We have a range optimizer (in opt_range.cc), it can already do pretty, much what you say, it can change, for example {code} WHERE a != 10 {code} to {code} WHERE a < 10 OR a > 10 {code} So, perhaps, all you need to do is to make sure this code is used for your ENUM queries. |
Hi guys, i found a optimization (maybe a bug?) that could be 'easly' optimized, check this query... {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa!='mov' {code} this result in 446196 rows (read all table), and changing it to: {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa IN ('','d','s','p','a') {code} result in 16 rows and 'using where' (an index) the interesting part is.... {noformat} column 'sit_spa' is ENUM('d','s','p','a','mov') NOT NULL DEFAULT 'd' {noformat} --- sit_spa!='mov' could be optimized via ENUM declaration to: 'd','s','p','a' plus '' if database accepted a value outside enum declaration, ex: a insert of 'xyz' result in '' enum value + warning in this field the idea is... optimize "enum_field!=enum_value" removing the enum_value from enum declaration list. the result is an index being used instead of a full table scan --- A second optimization is: WHERE sit_spa='z' since 'z' isn't in 'd','s','p','a','mov', we could return 'impossible where' check that enum is a diferent field type (for where part, select part and order by/group by parts)... for sit_spa enum ('d','s') not null enum_index 0 = "" enum_index 1 = 'd' enum_index 2 = 's' for sit_spa enum ('d','s') (without not null), enum_index NULL = NULL when using enum_value ='3' and enum_index=2, example: enum ('1','3') if we execute, WHERE enum='2' the enum_value='2' don't exists, the parser check for enum_index, and found '3' as result (since enum_index='2', is enum_value='3') --- implementation ? from sergei emails: We have a range optimizer (in opt_range.cc), it can already do pretty, much what you say, it can change, for example {code} WHERE a != 10 {code} to {code} WHERE a < 10 OR a > 10 {code} So, perhaps, all you need to do is to make sure this code is used for your ENUM queries. --- maybe it should be implemented in opt_range.cc at get_mm_leaf() function something like: {code} /* MDEV-4419 IMPOSSIBLE WHERE FOR ENUM FIELDS / REWRITE OF ENUM FIELDS OPERATOR any enum_field OP const_value should be executed in all enum values, to optimize the query, since ENUM is a 'bitmap' of all possible values we can rewrited it to IN/NOT IN operator, this allow index usage examples: for NULL possible fields: a ENUM ('a','b','c') internally we have: NULL,0=>'',1=>'a',2=>'b',3=>'c' for NOT NULL fields: a ENUM ('a','b','c') NOT NULL internally we have: 0=>'',1=>'a',2=>'b',3=>'c' example 1) (impossible where) WHERE a = 'd' '' = 'd' ? false 'a' = 'd' ? false 'b' = 'd' ? false 'c' = 'd' ? false rewrite to (1=0) - always false, impossble where example 2) (allways true) a < 'd' will result: '' < 'd' ? true 'a' < 'd' ? true 'b' < 'd' ? true 'c' < 'd' ? true rewriting to : a IN ('','a','b','c') - always true example 3) (some values of enum, using IN) a LIKE '%a' will result: '' LIKE '%a' ? false 'a' LIKE '%a' ? true 'b' LIKE '%a' ? false 'c' LIKE '%a' ? false we have 1 true, and 3 falses, better use IN () rewriting to : using IN: a IN ('a') using NOT IN: a NOT IN ('','b','c') or using ENUM INDEX: using IN: a IN (1) using NOT IN: a NOT IN (0,2,3) example 4) (some values of enum, using NOT IN) a != 'b' will result: '' != 'b' ? true 'a' != 'b' ? true 'b' != 'b' ? false 'c' != 'b' ? true we have 3 true, and 1 false, better use NOT IN () rewriting to: using IN: a IN ('','a','c') using NOT IN: a NOT IN ('b') or using ENUM INDEX: using IN: a IN (0,1,3) using NOT IN: a NOT IN (2) */ if (field->real_type() == MYSQL_TYPE_ENUM) { uint count_true_list =0; uint count_false_list=0; bool true_false_list[]; uint enum_field_counter=0; /* seek all values and check how many true/false we got */ resize_true_false_list[] set true_false_list to false; /* true_false_list start from 0 to enum values +1, NULL = index 0, */ while() { if (type == Item_func::IN_FUNC) { /* search in each IN value */ while() { if () { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } } else if (type == Item_func::BETWEEN) { /* two args */ if () { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } else { /* only one value */ if () { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } renum_field_counter++; } /* check about always true (discart operation) / false (impossible where) */ if (count_false_list == 0) { tree= &null_element; /* always true */ goto end; } if (count_true_list == 0) { tree->type= SEL_ARG::IMPOSSIBLE; /* impossible where */ goto end; } /* rewrite the operation using IN / NOT IN () */ if (count_false_list > count_true_list) inv=true; /* add NOT before IN () */ type=Item_func::IN_FUNC; for each true_false_list { if(inv) add respective enum_value of true_false_list when it's false else add respective enum_value of true_false_list when it's true } goto end; } {code} |
Description |
Hi guys, i found a optimization (maybe a bug?) that could be 'easly' optimized, check this query... {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa!='mov' {code} this result in 446196 rows (read all table), and changing it to: {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa IN ('','d','s','p','a') {code} result in 16 rows and 'using where' (an index) the interesting part is.... {noformat} column 'sit_spa' is ENUM('d','s','p','a','mov') NOT NULL DEFAULT 'd' {noformat} --- sit_spa!='mov' could be optimized via ENUM declaration to: 'd','s','p','a' plus '' if database accepted a value outside enum declaration, ex: a insert of 'xyz' result in '' enum value + warning in this field the idea is... optimize "enum_field!=enum_value" removing the enum_value from enum declaration list. the result is an index being used instead of a full table scan --- A second optimization is: WHERE sit_spa='z' since 'z' isn't in 'd','s','p','a','mov', we could return 'impossible where' check that enum is a diferent field type (for where part, select part and order by/group by parts)... for sit_spa enum ('d','s') not null enum_index 0 = "" enum_index 1 = 'd' enum_index 2 = 's' for sit_spa enum ('d','s') (without not null), enum_index NULL = NULL when using enum_value ='3' and enum_index=2, example: enum ('1','3') if we execute, WHERE enum='2' the enum_value='2' don't exists, the parser check for enum_index, and found '3' as result (since enum_index='2', is enum_value='3') --- implementation ? from sergei emails: We have a range optimizer (in opt_range.cc), it can already do pretty, much what you say, it can change, for example {code} WHERE a != 10 {code} to {code} WHERE a < 10 OR a > 10 {code} So, perhaps, all you need to do is to make sure this code is used for your ENUM queries. --- maybe it should be implemented in opt_range.cc at get_mm_leaf() function something like: {code} /* MDEV-4419 IMPOSSIBLE WHERE FOR ENUM FIELDS / REWRITE OF ENUM FIELDS OPERATOR any enum_field OP const_value should be executed in all enum values, to optimize the query, since ENUM is a 'bitmap' of all possible values we can rewrited it to IN/NOT IN operator, this allow index usage examples: for NULL possible fields: a ENUM ('a','b','c') internally we have: NULL,0=>'',1=>'a',2=>'b',3=>'c' for NOT NULL fields: a ENUM ('a','b','c') NOT NULL internally we have: 0=>'',1=>'a',2=>'b',3=>'c' example 1) (impossible where) WHERE a = 'd' '' = 'd' ? false 'a' = 'd' ? false 'b' = 'd' ? false 'c' = 'd' ? false rewrite to (1=0) - always false, impossble where example 2) (allways true) a < 'd' will result: '' < 'd' ? true 'a' < 'd' ? true 'b' < 'd' ? true 'c' < 'd' ? true rewriting to : a IN ('','a','b','c') - always true example 3) (some values of enum, using IN) a LIKE '%a' will result: '' LIKE '%a' ? false 'a' LIKE '%a' ? true 'b' LIKE '%a' ? false 'c' LIKE '%a' ? false we have 1 true, and 3 falses, better use IN () rewriting to : using IN: a IN ('a') using NOT IN: a NOT IN ('','b','c') or using ENUM INDEX: using IN: a IN (1) using NOT IN: a NOT IN (0,2,3) example 4) (some values of enum, using NOT IN) a != 'b' will result: '' != 'b' ? true 'a' != 'b' ? true 'b' != 'b' ? false 'c' != 'b' ? true we have 3 true, and 1 false, better use NOT IN () rewriting to: using IN: a IN ('','a','c') using NOT IN: a NOT IN ('b') or using ENUM INDEX: using IN: a IN (0,1,3) using NOT IN: a NOT IN (2) */ if (field->real_type() == MYSQL_TYPE_ENUM) { uint count_true_list =0; uint count_false_list=0; bool true_false_list[]; uint enum_field_counter=0; /* seek all values and check how many true/false we got */ resize_true_false_list[] set true_false_list to false; /* true_false_list start from 0 to enum values +1, NULL = index 0, */ while() { if (type == Item_func::IN_FUNC) { /* search in each IN value */ while() { if () { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } } else if (type == Item_func::BETWEEN) { /* two args */ if () { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } else { /* only one value */ if () { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } renum_field_counter++; } /* check about always true (discart operation) / false (impossible where) */ if (count_false_list == 0) { tree= &null_element; /* always true */ goto end; } if (count_true_list == 0) { tree->type= SEL_ARG::IMPOSSIBLE; /* impossible where */ goto end; } /* rewrite the operation using IN / NOT IN () */ if (count_false_list > count_true_list) inv=true; /* add NOT before IN () */ type=Item_func::IN_FUNC; for each true_false_list { if(inv) add respective enum_value of true_false_list when it's false else add respective enum_value of true_false_list when it's true } goto end; } {code} |
Hi guys, i found a optimization (maybe a bug?) that could be 'easly' optimized, check this query... {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa!='mov' {code} this result in 446196 rows (read all table), and changing it to: {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa IN ('','d','s','p','a') {code} result in 16 rows and 'using where' (an index) the interesting part is.... {noformat} column 'sit_spa' is ENUM('d','s','p','a','mov') NOT NULL DEFAULT 'd' {noformat} --- sit_spa!='mov' could be optimized via ENUM declaration to: 'd','s','p','a' plus '' if database accepted a value outside enum declaration, ex: a insert of 'xyz' result in '' enum value + warning in this field the idea is... optimize "enum_field!=enum_value" removing the enum_value from enum declaration list. the result is an index being used instead of a full table scan --- A second optimization is: WHERE sit_spa='z' since 'z' isn't in 'd','s','p','a','mov', we could return 'impossible where' check that enum is a diferent field type (for where part, select part and order by/group by parts)... for sit_spa enum ('d','s') not null enum_index 0 = "" enum_index 1 = 'd' enum_index 2 = 's' for sit_spa enum ('d','s') (without not null), enum_index NULL = NULL when using enum_value ='3' and enum_index=2, example: enum ('1','3') if we execute, WHERE enum='2' the enum_value='2' don't exists, the parser check for enum_index, and found '3' as result (since enum_index='2', is enum_value='3') --- implementation ? from sergei emails: We have a range optimizer (in opt_range.cc), it can already do pretty, much what you say, it can change, for example {code} WHERE a != 10 {code} to {code} WHERE a < 10 OR a > 10 {code} So, perhaps, all you need to do is to make sure this code is used for your ENUM queries. --- maybe it should be implemented in opt_range.cc at get_mm_leaf() function something like: {code} /* MDEV-4419 IMPOSSIBLE WHERE FOR ENUM FIELDS / REWRITE OF ENUM FIELDS OPERATOR any enum_field OP const_value should be executed in all enum values, to optimize the query, since ENUM is a 'bitmap' of all possible values we can rewrited it to IN/NOT IN operator, this allow index usage examples: for NULL possible fields: a ENUM ('a','b','c') internally we have: NULL,0=>'',1=>'a',2=>'b',3=>'c' for NOT NULL fields: a ENUM ('a','b','c') NOT NULL internally we have: 0=>'',1=>'a',2=>'b',3=>'c' example 1) (impossible where) WHERE a = 'd' '' = 'd' ? false 'a' = 'd' ? false 'b' = 'd' ? false 'c' = 'd' ? false rewrite to (1=0) - always false, impossble where example 2) (allways true) a < 'd' will result: '' < 'd' ? true 'a' < 'd' ? true 'b' < 'd' ? true 'c' < 'd' ? true rewriting to : a IN ('','a','b','c') - always true example 3) (some values of enum, using IN) a LIKE '%a' will result: '' LIKE '%a' ? false 'a' LIKE '%a' ? true 'b' LIKE '%a' ? false 'c' LIKE '%a' ? false we have 1 true, and 3 falses, better use IN () rewriting to : using IN: a IN ('a') using NOT IN: a NOT IN ('','b','c') or using ENUM INDEX: using IN: a IN (1) using NOT IN: a NOT IN (0,2,3) example 4) (some values of enum, using NOT IN) a != 'b' will result: '' != 'b' ? true 'a' != 'b' ? true 'b' != 'b' ? false 'c' != 'b' ? true we have 3 true, and 1 false, better use NOT IN () rewriting to: using IN: a IN ('','a','c') using NOT IN: a NOT IN ('b') or using ENUM INDEX: using IN: a IN (0,1,3) using NOT IN: a NOT IN (2) */ if (field->real_type() == MYSQL_TYPE_ENUM) { uint count_true_list =0; uint count_false_list=0; bool true_false_list[]; uint enum_field_counter=0; /* seek all values and check how many true/false we got */ resize_true_false_list[] set true_false_list to false; /* true_false_list start from 0 to enum values +1, NULL = index 0, */ for(enum_field_counter =0; enum_field_counter < field->typelib->count; enum_field_counter++){ { if (type == Item_func::IN_FUNC) { /* search in each IN value */ while() { if () { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } } else if (type == Item_func::BETWEEN) { /* two args */ if () { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } else { /* only one value */ if () { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } } /* check about always true (discart operation) / false (impossible where) */ if (count_false_list == 0) { tree= &null_element; /* always true */ goto end; } if (count_true_list == 0) { tree->type= SEL_ARG::IMPOSSIBLE; /* impossible where */ goto end; } /* rewrite the operation using IN / NOT IN () */ if (count_false_list > count_true_list) inv=true; /* add NOT before IN () */ type=Item_func::IN_FUNC; for each true_false_list { if(inv) add respective enum_value of true_false_list when it's false else add respective enum_value of true_false_list when it's true } goto end; } {code} |
Description |
Hi guys, i found a optimization (maybe a bug?) that could be 'easly' optimized, check this query... {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa!='mov' {code} this result in 446196 rows (read all table), and changing it to: {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa IN ('','d','s','p','a') {code} result in 16 rows and 'using where' (an index) the interesting part is.... {noformat} column 'sit_spa' is ENUM('d','s','p','a','mov') NOT NULL DEFAULT 'd' {noformat} --- sit_spa!='mov' could be optimized via ENUM declaration to: 'd','s','p','a' plus '' if database accepted a value outside enum declaration, ex: a insert of 'xyz' result in '' enum value + warning in this field the idea is... optimize "enum_field!=enum_value" removing the enum_value from enum declaration list. the result is an index being used instead of a full table scan --- A second optimization is: WHERE sit_spa='z' since 'z' isn't in 'd','s','p','a','mov', we could return 'impossible where' check that enum is a diferent field type (for where part, select part and order by/group by parts)... for sit_spa enum ('d','s') not null enum_index 0 = "" enum_index 1 = 'd' enum_index 2 = 's' for sit_spa enum ('d','s') (without not null), enum_index NULL = NULL when using enum_value ='3' and enum_index=2, example: enum ('1','3') if we execute, WHERE enum='2' the enum_value='2' don't exists, the parser check for enum_index, and found '3' as result (since enum_index='2', is enum_value='3') --- implementation ? from sergei emails: We have a range optimizer (in opt_range.cc), it can already do pretty, much what you say, it can change, for example {code} WHERE a != 10 {code} to {code} WHERE a < 10 OR a > 10 {code} So, perhaps, all you need to do is to make sure this code is used for your ENUM queries. --- maybe it should be implemented in opt_range.cc at get_mm_leaf() function something like: {code} /* MDEV-4419 IMPOSSIBLE WHERE FOR ENUM FIELDS / REWRITE OF ENUM FIELDS OPERATOR any enum_field OP const_value should be executed in all enum values, to optimize the query, since ENUM is a 'bitmap' of all possible values we can rewrited it to IN/NOT IN operator, this allow index usage examples: for NULL possible fields: a ENUM ('a','b','c') internally we have: NULL,0=>'',1=>'a',2=>'b',3=>'c' for NOT NULL fields: a ENUM ('a','b','c') NOT NULL internally we have: 0=>'',1=>'a',2=>'b',3=>'c' example 1) (impossible where) WHERE a = 'd' '' = 'd' ? false 'a' = 'd' ? false 'b' = 'd' ? false 'c' = 'd' ? false rewrite to (1=0) - always false, impossble where example 2) (allways true) a < 'd' will result: '' < 'd' ? true 'a' < 'd' ? true 'b' < 'd' ? true 'c' < 'd' ? true rewriting to : a IN ('','a','b','c') - always true example 3) (some values of enum, using IN) a LIKE '%a' will result: '' LIKE '%a' ? false 'a' LIKE '%a' ? true 'b' LIKE '%a' ? false 'c' LIKE '%a' ? false we have 1 true, and 3 falses, better use IN () rewriting to : using IN: a IN ('a') using NOT IN: a NOT IN ('','b','c') or using ENUM INDEX: using IN: a IN (1) using NOT IN: a NOT IN (0,2,3) example 4) (some values of enum, using NOT IN) a != 'b' will result: '' != 'b' ? true 'a' != 'b' ? true 'b' != 'b' ? false 'c' != 'b' ? true we have 3 true, and 1 false, better use NOT IN () rewriting to: using IN: a IN ('','a','c') using NOT IN: a NOT IN ('b') or using ENUM INDEX: using IN: a IN (0,1,3) using NOT IN: a NOT IN (2) */ if (field->real_type() == MYSQL_TYPE_ENUM) { uint count_true_list =0; uint count_false_list=0; bool true_false_list[]; uint enum_field_counter=0; /* seek all values and check how many true/false we got */ resize_true_false_list[] set true_false_list to false; /* true_false_list start from 0 to enum values +1, NULL = index 0, */ for(enum_field_counter =0; enum_field_counter < field->typelib->count; enum_field_counter++){ { if (type == Item_func::IN_FUNC) { /* search in each IN value */ while() { if () { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } } else if (type == Item_func::BETWEEN) { /* two args */ if () { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } else { /* only one value */ if () { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } } /* check about always true (discart operation) / false (impossible where) */ if (count_false_list == 0) { tree= &null_element; /* always true */ goto end; } if (count_true_list == 0) { tree->type= SEL_ARG::IMPOSSIBLE; /* impossible where */ goto end; } /* rewrite the operation using IN / NOT IN () */ if (count_false_list > count_true_list) inv=true; /* add NOT before IN () */ type=Item_func::IN_FUNC; for each true_false_list { if(inv) add respective enum_value of true_false_list when it's false else add respective enum_value of true_false_list when it's true } goto end; } {code} |
Hi guys, i found a optimization (maybe a bug?) that could be 'easly' optimized, check this query... {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa!='mov' {code} this result in 446196 rows (read all table), and changing it to: {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa IN ('','d','s','p','a') {code} result in 16 rows and 'using where' (an index) the interesting part is.... {noformat} column 'sit_spa' is ENUM('d','s','p','a','mov') NOT NULL DEFAULT 'd' {noformat} --- sit_spa!='mov' could be optimized via ENUM declaration to: 'd','s','p','a' plus '' if database accepted a value outside enum declaration, ex: a insert of 'xyz' result in '' enum value + warning in this field the idea is... optimize "enum_field!=enum_value" removing the enum_value from enum declaration list. the result is an index being used instead of a full table scan --- A second optimization is: WHERE sit_spa='z' since 'z' isn't in 'd','s','p','a','mov', we could return 'impossible where' check that enum is a diferent field type (for where part, select part and order by/group by parts)... for sit_spa enum ('d','s') not null enum_index 0 = "" enum_index 1 = 'd' enum_index 2 = 's' for sit_spa enum ('d','s') (without not null), enum_index NULL = NULL when using enum_value ='3' and enum_index=2, example: enum ('1','3') if we execute, WHERE enum='2' the enum_value='2' don't exists, the parser check for enum_index, and found '3' as result (since enum_index='2', is enum_value='3') --- implementation ? from sergei emails: We have a range optimizer (in opt_range.cc), it can already do pretty, much what you say, it can change, for example {code} WHERE a != 10 {code} to {code} WHERE a < 10 OR a > 10 {code} So, perhaps, all you need to do is to make sure this code is used for your ENUM queries. --- maybe it should be implemented in opt_range.cc at get_mm_leaf() function something like: {code} /* MDEV-4419 IMPOSSIBLE WHERE FOR ENUM FIELDS / REWRITE OF ENUM FIELDS OPERATOR any enum_field OP const_value should be executed in all enum values, to optimize the query, since ENUM is a 'bitmap' of all possible values we can rewrited it to IN/NOT IN operator, this allow index usage examples: for NULL possible fields: a ENUM ('a','b','c') internally we have: NULL,0=>'',1=>'a',2=>'b',3=>'c' for NOT NULL fields: a ENUM ('a','b','c') NOT NULL internally we have: 0=>'',1=>'a',2=>'b',3=>'c' example 1) (impossible where) WHERE a = 'd' '' = 'd' ? false 'a' = 'd' ? false 'b' = 'd' ? false 'c' = 'd' ? false rewrite to (1=0) - always false, impossble where example 2) (allways true) a < 'd' will result: '' < 'd' ? true 'a' < 'd' ? true 'b' < 'd' ? true 'c' < 'd' ? true rewriting to : a IN ('','a','b','c') - always true example 3) (some values of enum, using IN) a LIKE '%a' will result: '' LIKE '%a' ? false 'a' LIKE '%a' ? true 'b' LIKE '%a' ? false 'c' LIKE '%a' ? false we have 1 true, and 3 falses, better use IN () rewriting to : using IN: a IN ('a') using NOT IN: a NOT IN ('','b','c') or using ENUM INDEX: using IN: a IN (1) using NOT IN: a NOT IN (0,2,3) example 4) (some values of enum, using NOT IN) a != 'b' will result: '' != 'b' ? true 'a' != 'b' ? true 'b' != 'b' ? false 'c' != 'b' ? true we have 3 true, and 1 false, better use NOT IN () rewriting to: using IN: a IN ('','a','c') using NOT IN: a NOT IN ('b') or using ENUM INDEX: using IN: a IN (0,1,3) using NOT IN: a NOT IN (2) */ if (field->real_type() == MYSQL_TYPE_ENUM) { uint count_true_list =0; uint count_false_list=0; bool true_false_list[]; uint enum_field_counter=0; char buffer[255]; String enum_item(buffer, sizeof(buffer), res.charset()); /* seek all values and check how many true/false we got */ resize_true_false_list[] set true_false_list to false; /* true_false_list start from 0 to enum values +1, NULL = index 0, */ uint *len= field->typelib->type_lengths; for (enum_field_counter=0,const char **pos= field->typelib->type_names; *pos; pos++, len++, enum_field_counter++) { enum_string_value= enum_item.copy(*pos, *len, charset(), res.charset(), &dummy_errors); if (type == Item_func::IN_FUNC) { /* search in each IN value */ while( .. in_values .. ) { if ((enum_str_value == in_str_value && in_value_is_string) || (enum_int_value == in_int_value && !in_value_is_string)) { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } } else if (type == Item_func::BETWEEN) { /* two args */ // don't know what to do... goto end; } else { /* only one value */ if ((enum_str_value == str_value && value_is_string) || (enum_int_value == int_value && !value_is_string)) { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } } /* check about always true (discart operation) / false (impossible where) */ if (count_false_list == 0) { tree= &null_element; /* always true */ goto end; } if (count_true_list == 0) { tree->type= SEL_ARG::IMPOSSIBLE; /* impossible where */ goto end; } /* rewrite the operation using IN / NOT IN () */ if (count_false_list > count_true_list) inv=true; /* add NOT before IN () */ type=Item_func::IN_FUNC; for each true_false_list { if(inv) add respective enum_value of true_false_list when it's false else add respective enum_value of true_false_list when it's true } goto end; } {code} |
Description |
Hi guys, i found a optimization (maybe a bug?) that could be 'easly' optimized, check this query... {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa!='mov' {code} this result in 446196 rows (read all table), and changing it to: {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa IN ('','d','s','p','a') {code} result in 16 rows and 'using where' (an index) the interesting part is.... {noformat} column 'sit_spa' is ENUM('d','s','p','a','mov') NOT NULL DEFAULT 'd' {noformat} --- sit_spa!='mov' could be optimized via ENUM declaration to: 'd','s','p','a' plus '' if database accepted a value outside enum declaration, ex: a insert of 'xyz' result in '' enum value + warning in this field the idea is... optimize "enum_field!=enum_value" removing the enum_value from enum declaration list. the result is an index being used instead of a full table scan --- A second optimization is: WHERE sit_spa='z' since 'z' isn't in 'd','s','p','a','mov', we could return 'impossible where' check that enum is a diferent field type (for where part, select part and order by/group by parts)... for sit_spa enum ('d','s') not null enum_index 0 = "" enum_index 1 = 'd' enum_index 2 = 's' for sit_spa enum ('d','s') (without not null), enum_index NULL = NULL when using enum_value ='3' and enum_index=2, example: enum ('1','3') if we execute, WHERE enum='2' the enum_value='2' don't exists, the parser check for enum_index, and found '3' as result (since enum_index='2', is enum_value='3') --- implementation ? from sergei emails: We have a range optimizer (in opt_range.cc), it can already do pretty, much what you say, it can change, for example {code} WHERE a != 10 {code} to {code} WHERE a < 10 OR a > 10 {code} So, perhaps, all you need to do is to make sure this code is used for your ENUM queries. --- maybe it should be implemented in opt_range.cc at get_mm_leaf() function something like: {code} /* MDEV-4419 IMPOSSIBLE WHERE FOR ENUM FIELDS / REWRITE OF ENUM FIELDS OPERATOR any enum_field OP const_value should be executed in all enum values, to optimize the query, since ENUM is a 'bitmap' of all possible values we can rewrited it to IN/NOT IN operator, this allow index usage examples: for NULL possible fields: a ENUM ('a','b','c') internally we have: NULL,0=>'',1=>'a',2=>'b',3=>'c' for NOT NULL fields: a ENUM ('a','b','c') NOT NULL internally we have: 0=>'',1=>'a',2=>'b',3=>'c' example 1) (impossible where) WHERE a = 'd' '' = 'd' ? false 'a' = 'd' ? false 'b' = 'd' ? false 'c' = 'd' ? false rewrite to (1=0) - always false, impossble where example 2) (allways true) a < 'd' will result: '' < 'd' ? true 'a' < 'd' ? true 'b' < 'd' ? true 'c' < 'd' ? true rewriting to : a IN ('','a','b','c') - always true example 3) (some values of enum, using IN) a LIKE '%a' will result: '' LIKE '%a' ? false 'a' LIKE '%a' ? true 'b' LIKE '%a' ? false 'c' LIKE '%a' ? false we have 1 true, and 3 falses, better use IN () rewriting to : using IN: a IN ('a') using NOT IN: a NOT IN ('','b','c') or using ENUM INDEX: using IN: a IN (1) using NOT IN: a NOT IN (0,2,3) example 4) (some values of enum, using NOT IN) a != 'b' will result: '' != 'b' ? true 'a' != 'b' ? true 'b' != 'b' ? false 'c' != 'b' ? true we have 3 true, and 1 false, better use NOT IN () rewriting to: using IN: a IN ('','a','c') using NOT IN: a NOT IN ('b') or using ENUM INDEX: using IN: a IN (0,1,3) using NOT IN: a NOT IN (2) */ if (field->real_type() == MYSQL_TYPE_ENUM) { uint count_true_list =0; uint count_false_list=0; bool true_false_list[]; uint enum_field_counter=0; char buffer[255]; String enum_item(buffer, sizeof(buffer), res.charset()); /* seek all values and check how many true/false we got */ resize_true_false_list[] set true_false_list to false; /* true_false_list start from 0 to enum values +1, NULL = index 0, */ uint *len= field->typelib->type_lengths; for (enum_field_counter=0,const char **pos= field->typelib->type_names; *pos; pos++, len++, enum_field_counter++) { enum_string_value= enum_item.copy(*pos, *len, charset(), res.charset(), &dummy_errors); if (type == Item_func::IN_FUNC) { /* search in each IN value */ while( .. in_values .. ) { if ((enum_str_value == in_str_value && in_value_is_string) || (enum_int_value == in_int_value && !in_value_is_string)) { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } } else if (type == Item_func::BETWEEN) { /* two args */ // don't know what to do... goto end; } else { /* only one value */ if ((enum_str_value == str_value && value_is_string) || (enum_int_value == int_value && !value_is_string)) { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } } /* check about always true (discart operation) / false (impossible where) */ if (count_false_list == 0) { tree= &null_element; /* always true */ goto end; } if (count_true_list == 0) { tree->type= SEL_ARG::IMPOSSIBLE; /* impossible where */ goto end; } /* rewrite the operation using IN / NOT IN () */ if (count_false_list > count_true_list) inv=true; /* add NOT before IN () */ type=Item_func::IN_FUNC; for each true_false_list { if(inv) add respective enum_value of true_false_list when it's false else add respective enum_value of true_false_list when it's true } goto end; } {code} |
Hi guys, i found a optimization (maybe a bug?) that could be 'easly' optimized, check this query... {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa!='mov' {code} this result in 446196 rows (read all table), and changing it to: {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa IN ('','d','s','p','a') {code} result in 16 rows and 'using where' (an index) the interesting part is.... {noformat} column 'sit_spa' is ENUM('d','s','p','a','mov') NOT NULL DEFAULT 'd' {noformat} --- sit_spa!='mov' could be optimized via ENUM declaration to: 'd','s','p','a' plus '' if database accepted a value outside enum declaration, ex: a insert of 'xyz' result in '' enum value + warning in this field the idea is... optimize "enum_field!=enum_value" removing the enum_value from enum declaration list. the result is an index being used instead of a full table scan --- A second optimization is: WHERE sit_spa='z' since 'z' isn't in 'd','s','p','a','mov', we could return 'impossible where' check that enum is a diferent field type (for where part, select part and order by/group by parts)... for sit_spa enum ('d','s') not null enum_index 0 = "" enum_index 1 = 'd' enum_index 2 = 's' for sit_spa enum ('d','s') (without not null), enum_index NULL = NULL when using enum_value ='3' and enum_index=2, example: enum ('1','3') if we execute, WHERE enum='2' the enum_value='2' don't exists, the parser check for enum_index, and found '3' as result (since enum_index='2', is enum_value='3') --- implementation ? from sergei emails: We have a range optimizer (in opt_range.cc), it can already do pretty, much what you say, it can change, for example {code} WHERE a != 10 {code} to {code} WHERE a < 10 OR a > 10 {code} So, perhaps, all you need to do is to make sure this code is used for your ENUM queries. --- maybe it should be implemented in opt_range.cc at get_mm_leaf() function something like: {code} /* MDEV-4419 IMPOSSIBLE WHERE FOR ENUM FIELDS / REWRITE OF ENUM FIELDS OPERATOR any enum_field OP const_value should be executed in all enum values, to optimize the query, since ENUM is a 'bitmap' of all possible values we can rewrited it to IN/NOT IN operator, this allow index usage examples: for NULL possible fields: a ENUM ('a','b','c') internally we have: NULL,0=>'',1=>'a',2=>'b',3=>'c' for NOT NULL fields: a ENUM ('a','b','c') NOT NULL internally we have: 0=>'',1=>'a',2=>'b',3=>'c' example 1) (impossible where) WHERE a = 'd' '' = 'd' ? false 'a' = 'd' ? false 'b' = 'd' ? false 'c' = 'd' ? false rewrite to (1=0) - always false, impossble where example 2) (allways true) a < 'd' will result: '' < 'd' ? true 'a' < 'd' ? true 'b' < 'd' ? true 'c' < 'd' ? true rewriting to : a IN ('','a','b','c') - always true example 3) (some values of enum, using IN) a LIKE '%a' will result: '' LIKE '%a' ? false 'a' LIKE '%a' ? true 'b' LIKE '%a' ? false 'c' LIKE '%a' ? false we have 1 true, and 3 falses, better use IN () rewriting to : using IN: a IN ('a') using NOT IN: a NOT IN ('','b','c') or using ENUM INDEX: using IN: a IN (1) using NOT IN: a NOT IN (0,2,3) example 4) (some values of enum, using NOT IN) a != 'b' will result: '' != 'b' ? true 'a' != 'b' ? true 'b' != 'b' ? false 'c' != 'b' ? true we have 3 true, and 1 false, better use NOT IN () rewriting to: using IN: a IN ('','a','c') using NOT IN: a NOT IN ('b') or using ENUM INDEX: using IN: a IN (0,1,3) using NOT IN: a NOT IN (2) */ if (field->real_type() == MYSQL_TYPE_ENUM) { uint count_true_list =0; uint count_false_list=0; bool true_false_list[]; uint enum_field_counter=0; char buffer[255]; String enum_item(buffer, sizeof(buffer), res.charset()); /* seek all values and check how many true/false we got */ resize_true_false_list[] set true_false_list to false; /* true_false_list start from 0 to enum values +1, NULL = index 0, */ uint *len= field->typelib->type_lengths; for (enum_field_counter=0,const char **pos= field->typelib->type_names; *pos; pos++, len++, enum_field_counter++) { enum_string_value= enum_item.copy(*pos, *len, charset(), res.charset(), &dummy_errors); if (type == Item_func::IN_FUNC) { /* search in each IN value */ while( .. in_values .. ) { if ((enum_str_value == in_str_value && in_value_is_string && compare_function(enum_str,in_str)) || (enum_int_value == in_int_value && !in_value_is_string && compare_function(enum_int,in_int))) { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } } else if (type == Item_func::BETWEEN) { /* two args */ // don't know what to do... goto end; } else { /* only one value */ if ((enum_str_value == str_value && value_is_string && compare_function(enum_str,value_str)) || (enum_int_value == int_value && !value_is_string && compare_function(enum_int,value_int))) { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } } /* check about always true (discart operation) / false (impossible where) */ if (count_false_list == 0) { tree= &null_element; /* always true */ goto end; } if (count_true_list == 0) { tree->type= SEL_ARG::IMPOSSIBLE; /* impossible where */ goto end; } /* rewrite the operation using IN / NOT IN () */ if (count_false_list > count_true_list) inv=true; /* add NOT before IN () */ type=Item_func::IN_FUNC; for each true_false_list { if(inv) add respective enum_value of true_false_list when it's false else add respective enum_value of true_false_list when it's true } goto end; } {code} |
Summary | ENUM OPTIMIZATION - could use index, could return 'Impossible Where' | SQL PARSER - Enum Optimization - some queries could use index and could return 'Impossible Where' when using ENUM fields and constants outside ENUM declaration |
Description |
Hi guys, i found a optimization (maybe a bug?) that could be 'easly' optimized, check this query... {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa!='mov' {code} this result in 446196 rows (read all table), and changing it to: {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa IN ('','d','s','p','a') {code} result in 16 rows and 'using where' (an index) the interesting part is.... {noformat} column 'sit_spa' is ENUM('d','s','p','a','mov') NOT NULL DEFAULT 'd' {noformat} --- sit_spa!='mov' could be optimized via ENUM declaration to: 'd','s','p','a' plus '' if database accepted a value outside enum declaration, ex: a insert of 'xyz' result in '' enum value + warning in this field the idea is... optimize "enum_field!=enum_value" removing the enum_value from enum declaration list. the result is an index being used instead of a full table scan --- A second optimization is: WHERE sit_spa='z' since 'z' isn't in 'd','s','p','a','mov', we could return 'impossible where' check that enum is a diferent field type (for where part, select part and order by/group by parts)... for sit_spa enum ('d','s') not null enum_index 0 = "" enum_index 1 = 'd' enum_index 2 = 's' for sit_spa enum ('d','s') (without not null), enum_index NULL = NULL when using enum_value ='3' and enum_index=2, example: enum ('1','3') if we execute, WHERE enum='2' the enum_value='2' don't exists, the parser check for enum_index, and found '3' as result (since enum_index='2', is enum_value='3') --- implementation ? from sergei emails: We have a range optimizer (in opt_range.cc), it can already do pretty, much what you say, it can change, for example {code} WHERE a != 10 {code} to {code} WHERE a < 10 OR a > 10 {code} So, perhaps, all you need to do is to make sure this code is used for your ENUM queries. --- maybe it should be implemented in opt_range.cc at get_mm_leaf() function something like: {code} /* MDEV-4419 IMPOSSIBLE WHERE FOR ENUM FIELDS / REWRITE OF ENUM FIELDS OPERATOR any enum_field OP const_value should be executed in all enum values, to optimize the query, since ENUM is a 'bitmap' of all possible values we can rewrited it to IN/NOT IN operator, this allow index usage examples: for NULL possible fields: a ENUM ('a','b','c') internally we have: NULL,0=>'',1=>'a',2=>'b',3=>'c' for NOT NULL fields: a ENUM ('a','b','c') NOT NULL internally we have: 0=>'',1=>'a',2=>'b',3=>'c' example 1) (impossible where) WHERE a = 'd' '' = 'd' ? false 'a' = 'd' ? false 'b' = 'd' ? false 'c' = 'd' ? false rewrite to (1=0) - always false, impossble where example 2) (allways true) a < 'd' will result: '' < 'd' ? true 'a' < 'd' ? true 'b' < 'd' ? true 'c' < 'd' ? true rewriting to : a IN ('','a','b','c') - always true example 3) (some values of enum, using IN) a LIKE '%a' will result: '' LIKE '%a' ? false 'a' LIKE '%a' ? true 'b' LIKE '%a' ? false 'c' LIKE '%a' ? false we have 1 true, and 3 falses, better use IN () rewriting to : using IN: a IN ('a') using NOT IN: a NOT IN ('','b','c') or using ENUM INDEX: using IN: a IN (1) using NOT IN: a NOT IN (0,2,3) example 4) (some values of enum, using NOT IN) a != 'b' will result: '' != 'b' ? true 'a' != 'b' ? true 'b' != 'b' ? false 'c' != 'b' ? true we have 3 true, and 1 false, better use NOT IN () rewriting to: using IN: a IN ('','a','c') using NOT IN: a NOT IN ('b') or using ENUM INDEX: using IN: a IN (0,1,3) using NOT IN: a NOT IN (2) */ if (field->real_type() == MYSQL_TYPE_ENUM) { uint count_true_list =0; uint count_false_list=0; bool true_false_list[]; uint enum_field_counter=0; char buffer[255]; String enum_item(buffer, sizeof(buffer), res.charset()); /* seek all values and check how many true/false we got */ resize_true_false_list[] set true_false_list to false; /* true_false_list start from 0 to enum values +1, NULL = index 0, */ uint *len= field->typelib->type_lengths; for (enum_field_counter=0,const char **pos= field->typelib->type_names; *pos; pos++, len++, enum_field_counter++) { enum_string_value= enum_item.copy(*pos, *len, charset(), res.charset(), &dummy_errors); if (type == Item_func::IN_FUNC) { /* search in each IN value */ while( .. in_values .. ) { if ((enum_str_value == in_str_value && in_value_is_string && compare_function(enum_str,in_str)) || (enum_int_value == in_int_value && !in_value_is_string && compare_function(enum_int,in_int))) { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } } else if (type == Item_func::BETWEEN) { /* two args */ // don't know what to do... goto end; } else { /* only one value */ if ((enum_str_value == str_value && value_is_string && compare_function(enum_str,value_str)) || (enum_int_value == int_value && !value_is_string && compare_function(enum_int,value_int))) { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } } /* check about always true (discart operation) / false (impossible where) */ if (count_false_list == 0) { tree= &null_element; /* always true */ goto end; } if (count_true_list == 0) { tree->type= SEL_ARG::IMPOSSIBLE; /* impossible where */ goto end; } /* rewrite the operation using IN / NOT IN () */ if (count_false_list > count_true_list) inv=true; /* add NOT before IN () */ type=Item_func::IN_FUNC; for each true_false_list { if(inv) add respective enum_value of true_false_list when it's false else add respective enum_value of true_false_list when it's true } goto end; } {code} |
Hi guys, i found a optimization (maybe a bug?) that could be 'easly' optimized, check this query... {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa!='mov' {code} this result in 446196 rows (read all table), and changing it to: {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa IN ('','d','s','p','a') {code} result in 16 rows and 'using where' (an index) the interesting part is.... {noformat} column 'sit_spa' is ENUM('d','s','p','a','mov') NOT NULL DEFAULT 'd' {noformat} --- sit_spa!='mov' could be optimized via ENUM declaration to: 'd','s','p','a' plus '' if database accepted a value outside enum declaration, ex: a insert of 'xyz' result in '' enum value + warning in this field the idea is... optimize "enum_field!=enum_value" removing the enum_value from enum declaration list. the result is an index being used instead of a full table scan --- A second optimization is: WHERE sit_spa='z' since 'z' isn't in 'd','s','p','a','mov', we could return 'impossible where' check that enum is a diferent field type (for where part, select part and order by/group by parts)... for sit_spa enum ('d','s') not null enum_index 0 = "" enum_index 1 = 'd' enum_index 2 = 's' for sit_spa enum ('d','s') (without not null), enum_index NULL = NULL when using enum_value ='3' and enum_index=2, example: enum ('1','3') if we execute, WHERE enum='2' the enum_value='2' don't exists, the parser check for enum_index, and found '3' as result (since enum_index='2', is enum_value='3') --- implementation ? from sergei emails: We have a range optimizer (in opt_range.cc), it can already do pretty, much what you say, it can change, for example {code} WHERE a != 10 {code} to {code} WHERE a < 10 OR a > 10 {code} So, perhaps, all you need to do is to make sure this code is used for your ENUM queries. --- maybe it should be implemented in opt_range.cc at get_mm_leaf() function? or in sql_select.cc remove_eq_conds function? something like: {code} /* MDEV-4419 IMPOSSIBLE WHERE FOR ENUM FIELDS / REWRITE OF ENUM FIELDS OPERATOR any enum_field OP const_value should be executed in all enum values, to optimize the query, since ENUM is a 'bitmap' of all possible values we can rewrited it to IN/NOT IN operator, this allow index usage examples: for NULL possible fields: a ENUM ('a','b','c') internally we have: NULL,0=>'',1=>'a',2=>'b',3=>'c' for NOT NULL fields: a ENUM ('a','b','c') NOT NULL internally we have: 0=>'',1=>'a',2=>'b',3=>'c' example 1) (impossible where) WHERE a = 'd' '' = 'd' ? false 'a' = 'd' ? false 'b' = 'd' ? false 'c' = 'd' ? false rewrite to (1=0) - always false, impossble where example 2) (allways true) a < 'd' will result: '' < 'd' ? true 'a' < 'd' ? true 'b' < 'd' ? true 'c' < 'd' ? true rewriting to : a IN ('','a','b','c') - always true example 3) (some values of enum, using IN) a LIKE '%a' will result: '' LIKE '%a' ? false 'a' LIKE '%a' ? true 'b' LIKE '%a' ? false 'c' LIKE '%a' ? false we have 1 true, and 3 falses, better use IN () rewriting to : using IN: a IN ('a') using NOT IN: a NOT IN ('','b','c') or using ENUM INDEX: using IN: a IN (1) using NOT IN: a NOT IN (0,2,3) example 4) (some values of enum, using NOT IN) a != 'b' will result: '' != 'b' ? true 'a' != 'b' ? true 'b' != 'b' ? false 'c' != 'b' ? true we have 3 true, and 1 false, better use NOT IN () rewriting to: using IN: a IN ('','a','c') using NOT IN: a NOT IN ('b') or using ENUM INDEX: using IN: a IN (0,1,3) using NOT IN: a NOT IN (2) */ if (field->real_type() == MYSQL_TYPE_ENUM) { uint count_true_list =0; uint count_false_list=0; bool true_false_list[]; uint enum_field_counter=0; char buffer[255]; String enum_item(buffer, sizeof(buffer), res.charset()); /* seek all values and check how many true/false we got */ resize_true_false_list[] set true_false_list to false; /* true_false_list start from 0 to enum values +1, NULL = index 0, */ uint *len= field->typelib->type_lengths; for (enum_field_counter=0,const char **pos= field->typelib->type_names; *pos; pos++, len++, enum_field_counter++) { enum_string_value= enum_item.copy(*pos, *len, charset(), res.charset(), &dummy_errors); if (type == Item_func::IN_FUNC) { /* search in each IN value */ while( .. in_values .. ) { if ((enum_str_value == in_str_value && in_value_is_string && compare_function(enum_str,in_str)) || (enum_int_value == in_int_value && !in_value_is_string && compare_function(enum_int,in_int))) { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } } else if (type == Item_func::BETWEEN) { /* two args */ // don't know what to do... goto end; } else { /* only one value */ if ((enum_str_value == str_value && value_is_string && compare_function(enum_str,value_str)) || (enum_int_value == int_value && !value_is_string && compare_function(enum_int,value_int))) { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } } /* check about always true (discart operation) / false (impossible where) */ if (count_false_list == 0) { tree= &null_element; /* always true */ goto end; } if (count_true_list == 0) { tree->type= SEL_ARG::IMPOSSIBLE; /* impossible where */ goto end; } /* rewrite the operation using IN / NOT IN () */ if (count_false_list > count_true_list) inv=true; /* add NOT before IN () */ type=Item_func::IN_FUNC; for each true_false_list { if(inv) add respective enum_value of true_false_list when it's false else add respective enum_value of true_false_list when it's true } goto end; } {code} |
Description |
Hi guys, i found a optimization (maybe a bug?) that could be 'easly' optimized, check this query... {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa!='mov' {code} this result in 446196 rows (read all table), and changing it to: {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa IN ('','d','s','p','a') {code} result in 16 rows and 'using where' (an index) the interesting part is.... {noformat} column 'sit_spa' is ENUM('d','s','p','a','mov') NOT NULL DEFAULT 'd' {noformat} --- sit_spa!='mov' could be optimized via ENUM declaration to: 'd','s','p','a' plus '' if database accepted a value outside enum declaration, ex: a insert of 'xyz' result in '' enum value + warning in this field the idea is... optimize "enum_field!=enum_value" removing the enum_value from enum declaration list. the result is an index being used instead of a full table scan --- A second optimization is: WHERE sit_spa='z' since 'z' isn't in 'd','s','p','a','mov', we could return 'impossible where' check that enum is a diferent field type (for where part, select part and order by/group by parts)... for sit_spa enum ('d','s') not null enum_index 0 = "" enum_index 1 = 'd' enum_index 2 = 's' for sit_spa enum ('d','s') (without not null), enum_index NULL = NULL when using enum_value ='3' and enum_index=2, example: enum ('1','3') if we execute, WHERE enum='2' the enum_value='2' don't exists, the parser check for enum_index, and found '3' as result (since enum_index='2', is enum_value='3') --- implementation ? from sergei emails: We have a range optimizer (in opt_range.cc), it can already do pretty, much what you say, it can change, for example {code} WHERE a != 10 {code} to {code} WHERE a < 10 OR a > 10 {code} So, perhaps, all you need to do is to make sure this code is used for your ENUM queries. --- maybe it should be implemented in opt_range.cc at get_mm_leaf() function? or in sql_select.cc remove_eq_conds function? something like: {code} /* MDEV-4419 IMPOSSIBLE WHERE FOR ENUM FIELDS / REWRITE OF ENUM FIELDS OPERATOR any enum_field OP const_value should be executed in all enum values, to optimize the query, since ENUM is a 'bitmap' of all possible values we can rewrited it to IN/NOT IN operator, this allow index usage examples: for NULL possible fields: a ENUM ('a','b','c') internally we have: NULL,0=>'',1=>'a',2=>'b',3=>'c' for NOT NULL fields: a ENUM ('a','b','c') NOT NULL internally we have: 0=>'',1=>'a',2=>'b',3=>'c' example 1) (impossible where) WHERE a = 'd' '' = 'd' ? false 'a' = 'd' ? false 'b' = 'd' ? false 'c' = 'd' ? false rewrite to (1=0) - always false, impossble where example 2) (allways true) a < 'd' will result: '' < 'd' ? true 'a' < 'd' ? true 'b' < 'd' ? true 'c' < 'd' ? true rewriting to : a IN ('','a','b','c') - always true example 3) (some values of enum, using IN) a LIKE '%a' will result: '' LIKE '%a' ? false 'a' LIKE '%a' ? true 'b' LIKE '%a' ? false 'c' LIKE '%a' ? false we have 1 true, and 3 falses, better use IN () rewriting to : using IN: a IN ('a') using NOT IN: a NOT IN ('','b','c') or using ENUM INDEX: using IN: a IN (1) using NOT IN: a NOT IN (0,2,3) example 4) (some values of enum, using NOT IN) a != 'b' will result: '' != 'b' ? true 'a' != 'b' ? true 'b' != 'b' ? false 'c' != 'b' ? true we have 3 true, and 1 false, better use NOT IN () rewriting to: using IN: a IN ('','a','c') using NOT IN: a NOT IN ('b') or using ENUM INDEX: using IN: a IN (0,1,3) using NOT IN: a NOT IN (2) */ if (field->real_type() == MYSQL_TYPE_ENUM) { uint count_true_list =0; uint count_false_list=0; bool true_false_list[]; uint enum_field_counter=0; char buffer[255]; String enum_item(buffer, sizeof(buffer), res.charset()); /* seek all values and check how many true/false we got */ resize_true_false_list[] set true_false_list to false; /* true_false_list start from 0 to enum values +1, NULL = index 0, */ uint *len= field->typelib->type_lengths; for (enum_field_counter=0,const char **pos= field->typelib->type_names; *pos; pos++, len++, enum_field_counter++) { enum_string_value= enum_item.copy(*pos, *len, charset(), res.charset(), &dummy_errors); if (type == Item_func::IN_FUNC) { /* search in each IN value */ while( .. in_values .. ) { if ((enum_str_value == in_str_value && in_value_is_string && compare_function(enum_str,in_str)) || (enum_int_value == in_int_value && !in_value_is_string && compare_function(enum_int,in_int))) { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } } else if (type == Item_func::BETWEEN) { /* two args */ // don't know what to do... goto end; } else { /* only one value */ if ((enum_str_value == str_value && value_is_string && compare_function(enum_str,value_str)) || (enum_int_value == int_value && !value_is_string && compare_function(enum_int,value_int))) { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } } /* check about always true (discart operation) / false (impossible where) */ if (count_false_list == 0) { tree= &null_element; /* always true */ goto end; } if (count_true_list == 0) { tree->type= SEL_ARG::IMPOSSIBLE; /* impossible where */ goto end; } /* rewrite the operation using IN / NOT IN () */ if (count_false_list > count_true_list) inv=true; /* add NOT before IN () */ type=Item_func::IN_FUNC; for each true_false_list { if(inv) add respective enum_value of true_false_list when it's false else add respective enum_value of true_false_list when it's true } goto end; } {code} |
Hi guys, i found a optimization (maybe a bug?) that could be 'easly' optimized, check this query... {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa!='mov' {code} this result in 446196 rows (read all table), and changing it to: {code:sql} EXPLAIN SELECT COUNT(*)FROM dig_lotes WHERE lote_tipo='r' AND sit_spa IN ('','d','s','p','a') {code} result in 16 rows and 'using where' (an index) the interesting part is.... {noformat} column 'sit_spa' is ENUM('d','s','p','a','mov') NOT NULL DEFAULT 'd' {noformat} --- sit_spa!='mov' could be optimized via ENUM declaration to: 'd','s','p','a' plus '' if database accepted a value outside enum declaration, ex: a insert of 'xyz' result in '' enum value + warning in this field the idea is... optimize "enum_field!=enum_value" removing the enum_value from enum declaration list. the result is an index being used instead of a full table scan --- A second optimization is: WHERE sit_spa='z' since 'z' isn't in 'd','s','p','a','mov', we could return 'impossible where' check that enum is a diferent field type (for where part, select part and order by/group by parts)... for sit_spa enum ('d','s') not null enum_index 0 = "" enum_index 1 = 'd' enum_index 2 = 's' for sit_spa enum ('d','s') (without not null), enum_index NULL = NULL when using enum_value ='3' and enum_index=2, example: enum ('1','3') if we execute, WHERE enum='2' the enum_value='2' don't exists, the parser check for enum_index, and found '3' as result (since enum_index='2', is enum_value='3') --- implementation ? from sergei emails: We have a range optimizer (in opt_range.cc), it can already do pretty, much what you say, it can change, for example {code} WHERE a != 10 {code} to {code} WHERE a < 10 OR a > 10 {code} So, perhaps, all you need to do is to make sure this code is used for your ENUM queries. --- but reading sql_select.cc we have remove_eq_conds that check conditions one by one, and can rewrite the condition or return a impossible where, or a always true condition i think that get_mm_leaf is something that only works with index (i'm wrong?) --- maybe it should be implemented in opt_range.cc at get_mm_leaf() function? or in sql_select.cc remove_eq_conds function? something like: {code} /* MDEV-4419 IMPOSSIBLE WHERE FOR ENUM FIELDS / REWRITE OF ENUM FIELDS OPERATOR any enum_field OP const_value should be executed in all enum values, to optimize the query, since ENUM is a 'bitmap' of all possible values we can rewrited it to IN/NOT IN operator, this allow index usage examples: for NULL possible fields: a ENUM ('a','b','c') internally we have: NULL,0=>'',1=>'a',2=>'b',3=>'c' for NOT NULL fields: a ENUM ('a','b','c') NOT NULL internally we have: 0=>'',1=>'a',2=>'b',3=>'c' example 1) (impossible where) WHERE a = 'd' '' = 'd' ? false 'a' = 'd' ? false 'b' = 'd' ? false 'c' = 'd' ? false rewrite to (1=0) - always false, impossble where example 2) (allways true) a < 'd' will result: '' < 'd' ? true 'a' < 'd' ? true 'b' < 'd' ? true 'c' < 'd' ? true rewriting to : a IN ('','a','b','c') - always true example 3) (some values of enum, using IN) a LIKE '%a' will result: '' LIKE '%a' ? false 'a' LIKE '%a' ? true 'b' LIKE '%a' ? false 'c' LIKE '%a' ? false we have 1 true, and 3 falses, better use IN () rewriting to : using IN: a IN ('a') using NOT IN: a NOT IN ('','b','c') or using ENUM INDEX: using IN: a IN (1) using NOT IN: a NOT IN (0,2,3) example 4) (some values of enum, using NOT IN) a != 'b' will result: '' != 'b' ? true 'a' != 'b' ? true 'b' != 'b' ? false 'c' != 'b' ? true we have 3 true, and 1 false, better use NOT IN () rewriting to: using IN: a IN ('','a','c') using NOT IN: a NOT IN ('b') or using ENUM INDEX: using IN: a IN (0,1,3) using NOT IN: a NOT IN (2) */ if (field->real_type() == MYSQL_TYPE_ENUM) { uint count_true_list =0; uint count_false_list=0; bool true_false_list[]; uint enum_field_counter=0; char buffer[255]; String enum_item(buffer, sizeof(buffer), res.charset()); /* seek all values and check how many true/false we got */ resize_true_false_list[] set true_false_list to false; /* true_false_list start from 0 to enum values +1, NULL = index 0, */ uint *len= field->typelib->type_lengths; for (enum_field_counter=0,const char **pos= field->typelib->type_names; *pos; pos++, len++, enum_field_counter++) { enum_string_value= enum_item.copy(*pos, *len, charset(), res.charset(), &dummy_errors); if (type == Item_func::IN_FUNC) { /* search in each IN value */ while( .. in_values .. ) { if ((enum_str_value == in_str_value && in_value_is_string && compare_function(enum_str,in_str)) || (enum_int_value == in_int_value && !in_value_is_string && compare_function(enum_int,in_int))) { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } } else if (type == Item_func::BETWEEN) { /* two args */ // don't know what to do... goto end; } else { /* only one value */ if ((enum_str_value == str_value && value_is_string && compare_function(enum_str,value_str)) || (enum_int_value == int_value && !value_is_string && compare_function(enum_int,value_int))) { count_true_list ++; true_false_list[enum_field_counter]=true; } else { count_false_list ++; } } } /* check about always true (discart operation) / false (impossible where) */ if (count_false_list == 0) { tree= &null_element; /* always true */ goto end; } if (count_true_list == 0) { tree->type= SEL_ARG::IMPOSSIBLE; /* impossible where */ goto end; } /* rewrite the operation using IN / NOT IN () */ if (count_false_list > count_true_list) inv=true; /* add NOT before IN () */ type=Item_func::IN_FUNC; for each true_false_list { if(inv) add respective enum_value of true_false_list when it's false else add respective enum_value of true_false_list when it's true } goto end; } {code} |
Fix Version/s | 10.1.0 [ 12200 ] |
Fix Version/s | 10.2.0 [ 14601 ] | |
Fix Version/s | 10.1.0 [ 12200 ] |
Workflow | defaullt [ 27210 ] | MariaDB v2 [ 46635 ] |
Workflow | MariaDB v2 [ 46635 ] | MariaDB v3 [ 67353 ] |
Fix Version/s | 10.2 [ 14601 ] |
Workflow | MariaDB v3 [ 67353 ] | MariaDB v4 [ 130104 ] |
Hi Roberto,
Please provide the full data dump for dig_lotes.
Another question,
>> optimize "enum_field!=enum_value" removing the enum_value from enum declaration list.
Would you be willing to implement the optiization and provide a patch?