|
Trying to find where exactly it happens. It's not equality propagation. Equality propagation code sees that the charset is "complex" and doesn't create an Item_equal:
#0 my_propagate_complex (cs=0x555556b94d60, str=0x0, length=0) at /home/psergey/dev-git/10.1-explain-json-r4/strings/ctype-simple.c:1683
|
#1 0x0000555555a50463 in check_simple_equality (left_item=0x7fffc6443898, right_item=0x7fffc64439a0, item=0x7fffc6443a38, cond_equal=0x7fffc6bb3a80) at /home/psergey/dev-git/10.1-explain-json-r4/sql/sql_select.cc:12415
|
#2 0x0000555555a50886 in check_equality (thd=0x7fffd0366070, item=0x7fffc6443a38, cond_equal=0x7fffc6bb3a80, eq_list=0x7fffc6bb3af0) at /home/psergey/dev-git/10.1-explain-json-r4/sql/sql_select.cc:12559
|
#3 0x0000555555a50991 in build_equal_items_for_cond (thd=0x7fffd0366070, cond=0x7fffc6443f88, inherited=0x0, link_item_fields=true) at /home/psergey/dev-git/10.1-explain-json-r4/sql/sql_select.cc:12662
|
#4 0x0000555555a51125 in build_equal_items (join=0x7fffc6444170, cond=0x7fffc6443f88, inherited=0x0, join_list=0x7fffd036a5f0, ignore_on_conds=false, cond_equal_ref=0x7fffc64445c0, link_equal_fields=true) at /home/psergey/dev-git/10.1-explain-json-r4/sql/sql_select.cc:12871
|
#5 0x0000555555a54535 in optimize_cond (join=0x7fffc6444170, conds=0x7fffc6443f88, join_list=0x7fffd036a5f0, ignore_on_conds=false, cond_value=0x7fffc6444498, cond_equal=0x7fffc64445c0, flags=1) at /home/psergey/dev-git/10.1-explain-json-r4/sql/sql_select.cc:14418
|
#6 0x0000555555a3237c in JOIN::optimize_inner (this=0x7fffc6444170) at /home/psergey/dev-git/10.1-explain-json-r4/sql/sql_select.cc:1199
|
#7 0x0000555555a31b02 in JOIN::optimize (this=0x7fffc6444170) at /home/psergey/dev-git/10.1-explain-json-r4/sql/sql_select.cc:1023
|
#8 0x0000555555a39bff in mysql_select (thd=0x7fffd0366070, rref_pointer_array=0x7fffd036a6f0, tables=0x7fffc64432a0, wild_num=1, fields=..., conds=0x7fffc6443f88, og_num=0, order=0x0, group=0x0, having=0x0, proc_param=0x0, select_options=2147748608, result=0x7fffc6444150, unit=0x7fffd0369d90, select_lex=0x7fffd036a478) at /home/psergey/dev-git/10.1-explain-json-r4/sql/sql_select.cc:3306
|
#9 0x0000555555a2fd0c in handle_select (thd=0x7fffd0366070, lex=0x7fffd0369cc8, result=0x7fffc6444150, setup_tables_done_option=0) at /home/psergey/dev-git/10.1-explain-json-r4/sql/sql_select.cc:372
|
#10 0x0000555555a02328 in execute_sqlcom_select (thd=0x7fffd0366070, all_tables=0x7fffc64432a0) at /home/psergey/dev-git/10.1-explain-json-r4/sql/sql_parse.cc:5675
|
#11 0x00005555559f89f2 in mysql_execute_command (thd=0x7fffd0366070) at /home/psergey/dev-git/10.1-explain-json-r4/sql/sql_parse.cc:2806
|
#12 0x0000555555a0543b in mysql_parse (thd=0x7fffd0366070, rawbuf=0x7fffc6443088 "SELECT * FROM t1 WHERE a='ae' AND a LIKE 'ä'", length=45, parser_state=0x7fffc6bb5090) at /home/psergey/dev-git/10.1-explain-json-r4/sql/sql_parse.cc:6946
|
|
|
There is a process called "constant propagation" which is done in propagate_cond_constants() and it doesn't rely on equality propagation.
For the example query, "a LIKE 'ä'" is changed into 'ae' like 'ä' here:
#0 change_cond_ref_to_const (thd=0x7fffd0366070, save_list=0x7fffc6bb3ba0, and_father=0x7fffc6443f88, cond=0x7fffc6443de8, field=0x7fffc6443898, value=0x7fffc64439a0) at /home/psergey/dev-git/10.1-explain-json-r4/sql/sql_select.cc:13588
|
#1 0x0000555555a529b8 in change_cond_ref_to_const (thd=0x7fffd0366070, save_list=0x7fffc6bb3ba0, and_father=0x7fffc6443f88, cond=0x7fffc6443f88, field=0x7fffc6443898, value=0x7fffc64439a0) at /home/psergey/dev-git/10.1-explain-json-r4/sql/sql_select.cc:13542
|
#2 0x0000555555a532a2 in propagate_cond_constants (thd=0x7fffd0366070, save_list=0x7fffc6bb3ba0, and_father=0x7fffc6443f88, cond=0x7fffc6443a38) at /home/psergey/dev-git/10.1-explain-json-r4/sql/sql_select.cc:13652
|
#3 0x0000555555a52f44 in propagate_cond_constants (thd=0x7fffd0366070, save_list=0x0, and_father=0x7fffc6443f88, cond=0x7fffc6443f88) at /home/psergey/dev-git/10.1-explain-json-r4/sql/sql_select.cc:13619
|
#4 0x0000555555a54589 in optimize_cond (join=0x7fffc6444170, conds=0x7fffc6443f88, join_list=0x7fffd036a5f0, ignore_on_conds=false, cond_value=0x7fffc6444498, cond_equal=0x7fffc64445c0, flags=1) at /home/psergey/dev-git/10.1-explain-json-r4/sql/sql_select.cc:14422
|
#5 0x0000555555a3237c in JOIN::optimize_inner (this=0x7fffc6444170) at /home/psergey/dev-git/10.1-explain-json-r4/sql/sql_select.cc:1199
|
#6 0x0000555555a31b02 in JOIN::optimize (this=0x7fffc6444170) at /home/psergey/dev-git/10.1-explain-json-r4/sql/sql_select.cc:1023
|
#7 0x0000555555a39bff in mysql_select (thd=0x7fffd0366070, rref_pointer_array=0x7fffd036a6f0, tables=0x7fffc64432a0, wild_num=1, fields=..., conds=0x7fffc6443f88, og_num=0, order=0x0, group=0x0, having=0x0, proc_param=0x0, select_options=2147748608, result=0x7fffc6444150, unit=0x7fffd0369d90, select_lex=0x7fffd036a478) at /home/psergey/dev-git/10.1-explain-json-r4/sql/sql_select.cc:3306
|
|
|
The reason the submitted patch works is this code:
change_cond_ref_to_const(THD *thd, I_List<COND_CMP> *save_list,
|
Item *and_father, Item *cond,
|
Item *field, Item *value)
|
{
|
...
if (cond->eq_cmp_result() == Item::COND_OK)
|
return; // Not a boolean function
|
I am not sure what the exact meaning of COND_OK vs COND_TRUE is.
|
|
What is the meaning of these Item::COND_... constants, anyway...
remove_eq_conds uses this encoding:
cond_value is set to according:
COND_OK query is possible (field = constant)
COND_TRUE always true ( 1 = 1 )
COND_FALSE always false ( 1 = 2 )
But it seems the meaning is different when one is looking at the return value
of Item::eq_cmp_result():
COND_UNDEF - no item has COND_UNDEF.
COND_OK
this is what default Item::eq_cmp_resut() returns.
COND_TRUE is unconditionally returned by:
- Item_func_eq
- Item_func_equal ( the x<=>y comparion)
- Item_func_ge
- Item_func_le
- Item_func_like (this is what Bar's patch suggests to change)
COND_FALSE is returned unconditionally by
- Item_func_gt
- Item_func_lt
- Item_func_ne
|
|
So, one could guess that
- COND_TRUE/COND_FALSE means "it's a boolean comparison function". COND_TRUE means that func(x,x)=true, COND_FALSE means that "func(x,x)=false".
- COND_OK means something other than boolean comparison function.
In this interpretation, the code in change_cond_ref_to_const() makes sense. The function does not to equality substitution. It does "substitution of equal fields as long as they are in the context of binary comparisons". This allows to substitute in a greater number of cases (like in this example).
|
|
Will discuss with igor today evening.
|
|
Discussed, ok to push/
|
|
The same problem happens if I swap the constants in the conditon (i.e. pass 'ä' in equality and 'ae' in LIKE):
SET NAMES utf8 COLLATE utf8_german2_ci;
|
DROP TABLE IF EXISTS t1;
|
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8 COLLATE utf8_german2_ci);
|
INSERT INTO t1 VALUES ('ae'),('ä');
|
SELECT * FROM t1 WHERE a='ä';
|
SELECT * FROM t1 WHERE a LIKE 'ae';
|
SELECT * FROM t1 WHERE a='ä' AND a LIKE 'ae';
|
The first query correctly returns 2 rows.
The second query correctly returns 1 row.
The third query returns no rows, which is wrong.
|
|
The same effect is observed using trailing spaces instead of a contraction:
SET NAMES utf8 COLLATE utf8_unicode_ci;
|
DROP TABLE IF EXISTS t1;
|
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8 COLLATE utf8_unicode_ci);
|
INSERT INTO t1 VALUES ('a '),('a');
|
SELECT * FROM t1 WHERE a='a';
|
SELECT * FROM t1 WHERE a LIKE 'a ';
|
SELECT * FROM t1 WHERE a='a' AND a LIKE 'a ';
|
The first query correctly returns 2 rows.
The second query correctly returns 1 row.
The third query returns no rows, which is wrong.
|
|
it's also reproducible even with simple collations:
DROP TABLE IF EXISTS t1;
|
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1);
|
INSERT INTO t1 VALUES ('a'),('a ');
|
SET NAMES latin1;
|
SELECT * FROM t1 WHERE CONCAT(a)='a';
|
SELECT * FROM t1 WHERE CONCAT(a) LIKE 'a ';
|
SELECT * FROM t1 WHERE CONCAT(a)='a' AND CONCAT(a) LIKE 'a ';
|
The first query correctly returns two rows.
The second query correctly returns one rows.
The third query returns no rows (while one row is expected).
|
|
It's also reproducible if the field is on the right side of LIKE:
DROP TABLE IF EXISTS t1;
|
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1);
|
INSERT INTO t1 VALUES ('a'),('a ');
|
SET NAMES latin1;
|
SELECT * FROM t1 WHERE 'a'=CONCAT(a);
|
SELECT * FROM t1 WHERE 'a ' LIKE CONCAT(a);
|
SELECT * FROM t1 WHERE 'a'=CONCAT(a) AND 'a ' LIKE CONCAT(a);
|
The first query correctly returns two rows.
The second query correctly returns one rows.
The third query returns no rows (while one row is expected).
|
|
More examples:
DROP TABLE IF EXISTS t1;
|
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1);
|
INSERT INTO t1 VALUES ('%'),('% ');
|
SET NAMES latin1;
|
SELECT * FROM t1 WHERE '% '=CONCAT(a);
|
SELECT * FROM t1 WHERE 'a' LIKE CONCAT(a);
|
SELECT * FROM t1 WHERE '% '=CONCAT(a) AND 'a' LIKE CONCAT(a);
|
The first query correctly returns two rows.
The second query correctly returns one rows.
The third query returns no rows (while one row is expected).
|
|
This example also demonstrates a wrong behaviour, but different from the previous ones:
The last query returns two rows:
- unlike no rows in the previous examples
- and instead of one row, which would be a correct result
DROP TABLE IF EXISTS t1;
|
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1);
|
INSERT INTO t1 VALUES ('%'),('% ');
|
SET NAMES latin1;
|
SELECT * FROM t1 WHERE '%'=CONCAT(a);
|
SELECT * FROM t1 WHERE 'a' LIKE CONCAT(a);
|
SELECT * FROM t1 WHERE '%'=CONCAT(a) AND 'a' LIKE CONCAT(a);
|
The first query correctly returns two rows.
The second query correctly returns one rows.
The third query returns two rows (while one row is expected).
|