Details
-
Bug
-
Status: Closed (View Workflow)
-
Blocker
-
Resolution: Fixed
-
5.5(EOL), 10.0(EOL)
-
None
Description
The test case shows that SELECT returns a row that doesn't match the WHERE condition. This only happens when the column is not indexed, if there's an index the result is correct (no rows found).
DROP DATABASE IF EXISTS `strict_unicode_test`; |
SET NAMES 'utf8' COLLATE 'utf8_unicode_ci'; |
SET SESSION sql_mode='STRICT_ALL_TABLES'; |
CREATE DATABASE `strict_unicode_test` COLLATE utf8_unicode_ci; |
USE `strict_unicode_test`; |
SHOW VARIABLES LIKE 'coll%'; |
SHOW VARIABLES LIKE 'character%'; |
DROP TABLE IF EXISTS `users`; |
CREATE TABLE `users` ( |
`ID` bigint(20) unsigned NOT NULL AUTO_INCREMENT, |
`login` varchar(60) COLLATE utf8_unicode_ci NOT NULL DEFAULT '', |
PRIMARY KEY (`ID`) |
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; |
INSERT INTO users (login) VALUES ('admin'); |
SELECT * FROM `users` WHERE `login` = 'admin��'; |
SHOW WARNINGS;
|
Attachments
Issue Links
Activity
Perhaps broken bytes should be compared as a pseudo-character which is greater than any possible normal character,
or less than any possible normal character. This would preserve behaviour for #2, #3, #4.
"Greater than any possible character" is easier to implement.
Summary:
N | Field collation | character_set_connection | Index | Result | Warning |
---|---|---|---|---|---|
1 | utf8_unicode_ci | utf8 | no | 'a' | - |
2 | utf8_unicode_ci | utf8 | yes | empty set | Incorrect string value |
3 | utf8_general_ci | utf8 | no | empty set | - |
4 | utf8_general_ci | utf8 | yes | empty set | Incorrect string value |
5 | utf16_unicode_ci | utf8 | no | error: illegal mix of collations | - |
6 | utf16_unicode_ci | utf8 | yes | error: illegal mix of collations | - |
7 | utf8_unicode_ci | utf16 | no | 'a????' | - |
8 | utf8_unicode_ci | utf16 | yes | 'a????' | - |
With the column collation utf8_general_ci or utf8_unicode_ci and with character_set_connection=utf16:
SET NAMES 'utf8', character_set_connection=utf16;
|
SET SESSION sql_mode='STRICT_ALL_TABLES';
|
DROP TABLE IF EXISTS t1;
|
CREATE TABLE t1 (
|
id bigint(20) unsigned NOT NULL AUTO_INCREMENT,
|
a varchar(60) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL DEFAULT '',
|
PRIMARY KEY (id)
|
) ENGINE=InnoDB;
|
INSERT INTO t1 (a) VALUES ('a'),('a??'),('a???'),('a????');
|
SELECT * FROM t1 WHERE a = 'a��';
|
ALTER TABLE t1 ADD KEY(a);
|
SELECT * FROM t1 WHERE a = 'a��';
|
it return 'a????' both without and with the index:
mysql> SELECT * FROM t1 WHERE a = 'a��';
|
+----+-------+
|
| id | a |
|
+----+-------+
|
| 4 | a???? |
|
+----+-------+
|
1 row in set (0.00 sec)
|
|
mysql> ALTER TABLE t1 ADD KEY(a);
|
Query OK, 0 rows affected (0.06 sec)
|
Records: 0 Duplicates: 0 Warnings: 0
|
|
mysql> SELECT * FROM t1 WHERE a = 'a��';
|
+----+-------+
|
| id | a |
|
+----+-------+
|
| 4 | a???? |
|
+----+-------+
|
1 row in set (0.00 sec)
|
With the column collation utf16_unicode_ci:
SET NAMES 'utf8';
|
SET SESSION sql_mode='STRICT_ALL_TABLES';
|
DROP TABLE IF EXISTS t1;
|
CREATE TABLE t1 (
|
id bigint(20) unsigned NOT NULL AUTO_INCREMENT,
|
a varchar(60) CHARACTER SET utf16 COLLATE utf16_unicode_ci NOT NULL DEFAULT '',
|
PRIMARY KEY (id)
|
) ENGINE=InnoDB;
|
INSERT INTO t1 (a) VALUES ('a'),('a??'),('a???'),('a????');
|
SELECT * FROM t1 WHERE a = 'a��';
|
ALTER TABLE t1 ADD KEY(a);
|
SELECT * FROM t1 WHERE a = 'a��';
|
it returns:
ERROR 1267 (HY000): Illegal mix of collations (utf16_unicode_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation '='
|
both without and with the index.
The error is returned because the string cannot be safely converted from 3-byte utf8 to utf16.
The same error is returned if I change the collation to utf16_general_ci.
With the default collation utf8_general_ci:
SET NAMES 'utf8';
|
SET SESSION sql_mode='STRICT_ALL_TABLES';
|
DROP TABLE IF EXISTS t1;
|
CREATE TABLE t1 (
|
id bigint(20) unsigned NOT NULL AUTO_INCREMENT,
|
a varchar(60) COLLATE utf8_general_ci NOT NULL DEFAULT '',
|
PRIMARY KEY (id)
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_general_ci;
|
INSERT INTO t1 (a) VALUES ('a'),('a??'),('a???'),('a????');
|
SELECT * FROM t1 WHERE a = 'a��';
|
ALTER TABLE t1 ADD KEY(a);
|
SELECT * FROM t1 WHERE a = 'a��';
|
it returns empty set without indexes and empty set with warning with the index:
mysql> SELECT * FROM t1 WHERE a = 'a��';
|
Empty set (0.00 sec)
|
|
mysql> ALTER TABLE t1 ADD KEY(a);
|
Query OK, 0 rows affected (0.06 sec)
|
Records: 0 Duplicates: 0 Warnings: 0
|
|
mysql> SELECT * FROM t1 WHERE a = 'a��';
|
Empty set, 1 warning (0.00 sec)
|
|
mysql> SHOW WARNINGS;
|
+---------+------+--------------------------------------------------------------------+
|
| Level | Code | Message |
|
+---------+------+--------------------------------------------------------------------+
|
| Warning | 1366 | Incorrect string value: '\xF0\x9D\x8C\x86' for column 'a' at row 1 |
|
+---------+------+--------------------------------------------------------------------+
|
1 row in set (0.00 sec)
|
A smaller test case:
SET NAMES 'utf8' COLLATE 'utf8_unicode_ci';
|
SET SESSION sql_mode='STRICT_ALL_TABLES';
|
DROP TABLE IF EXISTS t1;
|
CREATE TABLE t1 (
|
id bigint(20) unsigned NOT NULL AUTO_INCREMENT,
|
a varchar(60) COLLATE utf8_unicode_ci NOT NULL DEFAULT '',
|
PRIMARY KEY (id)
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
|
INSERT INTO t1 (a) VALUES ('a'),('a??'),('a???'),('a????');
|
SELECT * FROM t1 WHERE a = 'a��';
|
ALTER TABLE t1 ADD KEY(a);
|
SELECT * FROM t1 WHERE a = 'a��';
|
Notice, it returns 'a' without index and empty set with a warning with index:
mysql> SELECT * FROM t1 WHERE a = 'a��';
|
+----+---+
|
| id | a |
|
+----+---+
|
| 1 | a |
|
+----+---+
|
1 row in set (0.00 sec)
|
|
mysql> ALTER TABLE t1 ADD KEY(a);
|
Query OK, 0 rows affected (0.06 sec)
|
Records: 0 Duplicates: 0 Warnings: 0
|
|
mysql> SELECT * FROM t1 WHERE a = 'a��';
|
Empty set, 1 warning (0.00 sec)
|
|
mysql> SHOW WARNINGS;
|
+---------+------+--------------------------------------------------------------------+
|
| Level | Code | Message |
|
+---------+------+--------------------------------------------------------------------+
|
| Warning | 1366 | Incorrect string value: '\xF0\x9D\x8C\x86' for column 'a' at row 1 |
|
+---------+------+--------------------------------------------------------------------+
|
1 row in set (0.00 sec)
|
I think the following failure is related to the described problem (please extract it into a separate issue if it turns out to be different):
Failing test case |
DROP TABLE IF EXISTS t1, t2; |
|
CREATE TABLE t1 (f1 CHAR(3)) ENGINE=MyISAM CHARSET utf8; |
INSERT INTO t1 VALUES ('foo'),('bar'); |
CREATE TABLE t2 (s char(3)) ENGINE=MyISAM CHARSET latin1; |
INSERT INTO t2 VALUES ('qux'),('qqq'); |
|
# The following result is correct: |
SELECT * FROM t1 WHERE (f1 < 'qux') IS NULL; |
|
# But this is not: |
SELECT * FROM t1 WHERE (f1 < (SELECT s FROM t2 LIMIT 1)) IS NULL; |
|
# And even this works, though it shouldn't: |
SELECT * FROM t1 WHERE (f1 < (SELECT s FROM t2)) IS NULL; |
|
DROP TABLE t1, t2; |
Results |
MariaDB [test]> # The following result is correct: |
MariaDB [test]> SELECT * FROM t1 WHERE (f1 < 'qux') IS NULL; |
Empty set (0.00 sec) |
|
MariaDB [test]>
|
MariaDB [test]> # But this is not: |
MariaDB [test]> SELECT * FROM t1 WHERE (f1 < (SELECT s FROM t2 LIMIT 1)) IS NULL; |
+------+ |
| f1 |
|
+------+ |
| foo |
|
| bar |
|
+------+ |
2 rows in set (0.00 sec) |
|
MariaDB [test]>
|
MariaDB [test]> # And even this works, though it shouldn't: |
MariaDB [test]> SELECT * FROM t1 WHERE (f1 < (SELECT s FROM t2)) IS NULL; |
+------+ |
| f1 |
|
+------+ |
| foo |
|
| bar |
|
+------+ |
2 rows in set (0.00 sec) |
I think it's related, because if t2 is also UTF8, there is no problem:
Success story |
DROP TABLE IF EXISTS t1, t2; |
|
CREATE TABLE t1 (f1 CHAR(3)) ENGINE=MyISAM CHARSET utf8; |
INSERT INTO t1 VALUES ('foo'),('bar'); |
CREATE TABLE t2 (s char(3)) ENGINE=MyISAM CHARSET utf8; |
INSERT INTO t2 VALUES ('qux'),('qqq'); |
|
# The following result is correct: |
SELECT * FROM t1 WHERE (f1 < 'qux') IS NULL; |
|
# Not this is okay too: |
SELECT * FROM t1 WHERE (f1 < (SELECT s FROM t2 LIMIT 1)) IS NULL; |
|
# And this one produces the expected error message: |
SELECT * FROM t1 WHERE (f1 < (SELECT s FROM t2)) IS NULL; |
|
DROP TABLE t1, t2; |
Results |
MariaDB [test]> # The following result is correct: |
MariaDB [test]> SELECT * FROM t1 WHERE (f1 < 'qux') IS NULL; |
Empty set (0.00 sec) |
|
MariaDB [test]>
|
MariaDB [test]> # Not this is okay too: |
MariaDB [test]> SELECT * FROM t1 WHERE (f1 < (SELECT s FROM t2 LIMIT 1)) IS NULL; |
Empty set (0.00 sec) |
|
MariaDB [test]>
|
MariaDB [test]> # And this one produces the expected error message: |
MariaDB [test]> SELECT * FROM t1 WHERE (f1 < (SELECT s FROM t2)) IS NULL; |
ERROR 1242 (21000): Subquery returns more than 1 row |
Consistent behavior for all cases and all charsets will be implemented in 10.1, see
MDEV-8036