|
I think the following failure is related to the described problem (please extract it into a separate issue if it turns out to be different):
|
Failing test case
|
DROP TABLE IF EXISTS t1, t2;
|
|
CREATE TABLE t1 (f1 CHAR(3)) ENGINE=MyISAM CHARSET utf8;
|
INSERT INTO t1 VALUES ('foo'),('bar');
|
CREATE TABLE t2 (s char(3)) ENGINE=MyISAM CHARSET latin1;
|
INSERT INTO t2 VALUES ('qux'),('qqq');
|
|
# The following result is correct:
|
SELECT * FROM t1 WHERE (f1 < 'qux') IS NULL;
|
|
# But this is not:
|
SELECT * FROM t1 WHERE (f1 < (SELECT s FROM t2 LIMIT 1)) IS NULL;
|
|
# And even this works, though it shouldn't:
|
SELECT * FROM t1 WHERE (f1 < (SELECT s FROM t2)) IS NULL;
|
|
DROP TABLE t1, t2;
|
|
Results
|
MariaDB [test]> # The following result is correct:
|
MariaDB [test]> SELECT * FROM t1 WHERE (f1 < 'qux') IS NULL;
|
Empty set (0.00 sec)
|
|
MariaDB [test]>
|
MariaDB [test]> # But this is not:
|
MariaDB [test]> SELECT * FROM t1 WHERE (f1 < (SELECT s FROM t2 LIMIT 1)) IS NULL;
|
+------+
|
| f1 |
|
+------+
|
| foo |
|
| bar |
|
+------+
|
2 rows in set (0.00 sec)
|
|
MariaDB [test]>
|
MariaDB [test]> # And even this works, though it shouldn't:
|
MariaDB [test]> SELECT * FROM t1 WHERE (f1 < (SELECT s FROM t2)) IS NULL;
|
+------+
|
| f1 |
|
+------+
|
| foo |
|
| bar |
|
+------+
|
2 rows in set (0.00 sec)
|
I think it's related, because if t2 is also UTF8, there is no problem:
|
Success story
|
DROP TABLE IF EXISTS t1, t2;
|
|
CREATE TABLE t1 (f1 CHAR(3)) ENGINE=MyISAM CHARSET utf8;
|
INSERT INTO t1 VALUES ('foo'),('bar');
|
CREATE TABLE t2 (s char(3)) ENGINE=MyISAM CHARSET utf8;
|
INSERT INTO t2 VALUES ('qux'),('qqq');
|
|
# The following result is correct:
|
SELECT * FROM t1 WHERE (f1 < 'qux') IS NULL;
|
|
# Not this is okay too:
|
SELECT * FROM t1 WHERE (f1 < (SELECT s FROM t2 LIMIT 1)) IS NULL;
|
|
# And this one produces the expected error message:
|
SELECT * FROM t1 WHERE (f1 < (SELECT s FROM t2)) IS NULL;
|
|
DROP TABLE t1, t2;
|
|
Results
|
MariaDB [test]> # The following result is correct:
|
MariaDB [test]> SELECT * FROM t1 WHERE (f1 < 'qux') IS NULL;
|
Empty set (0.00 sec)
|
|
MariaDB [test]>
|
MariaDB [test]> # Not this is okay too:
|
MariaDB [test]> SELECT * FROM t1 WHERE (f1 < (SELECT s FROM t2 LIMIT 1)) IS NULL;
|
Empty set (0.00 sec)
|
|
MariaDB [test]>
|
MariaDB [test]> # And this one produces the expected error message:
|
MariaDB [test]> SELECT * FROM t1 WHERE (f1 < (SELECT s FROM t2)) IS NULL;
|
ERROR 1242 (21000): Subquery returns more than 1 row
|
|
|
A smaller test case:
SET NAMES 'utf8' COLLATE 'utf8_unicode_ci';
|
SET SESSION sql_mode='STRICT_ALL_TABLES';
|
DROP TABLE IF EXISTS t1;
|
CREATE TABLE t1 (
|
id bigint(20) unsigned NOT NULL AUTO_INCREMENT,
|
a varchar(60) COLLATE utf8_unicode_ci NOT NULL DEFAULT '',
|
PRIMARY KEY (id)
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
|
INSERT INTO t1 (a) VALUES ('a'),('a??'),('a???'),('a????');
|
SELECT * FROM t1 WHERE a = 'a��';
|
ALTER TABLE t1 ADD KEY(a);
|
SELECT * FROM t1 WHERE a = 'a��';
|
Notice, it returns 'a' without index and empty set with a warning with index:
mysql> SELECT * FROM t1 WHERE a = 'a��';
|
+----+---+
|
| id | a |
|
+----+---+
|
| 1 | a |
|
+----+---+
|
1 row in set (0.00 sec)
|
|
mysql> ALTER TABLE t1 ADD KEY(a);
|
Query OK, 0 rows affected (0.06 sec)
|
Records: 0 Duplicates: 0 Warnings: 0
|
|
mysql> SELECT * FROM t1 WHERE a = 'a��';
|
Empty set, 1 warning (0.00 sec)
|
|
mysql> SHOW WARNINGS;
|
+---------+------+--------------------------------------------------------------------+
|
| Level | Code | Message |
|
+---------+------+--------------------------------------------------------------------+
|
| Warning | 1366 | Incorrect string value: '\xF0\x9D\x8C\x86' for column 'a' at row 1 |
|
+---------+------+--------------------------------------------------------------------+
|
1 row in set (0.00 sec)
|
|
|
With the default collation utf8_general_ci:
SET NAMES 'utf8';
|
SET SESSION sql_mode='STRICT_ALL_TABLES';
|
DROP TABLE IF EXISTS t1;
|
CREATE TABLE t1 (
|
id bigint(20) unsigned NOT NULL AUTO_INCREMENT,
|
a varchar(60) COLLATE utf8_general_ci NOT NULL DEFAULT '',
|
PRIMARY KEY (id)
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_general_ci;
|
INSERT INTO t1 (a) VALUES ('a'),('a??'),('a???'),('a????');
|
SELECT * FROM t1 WHERE a = 'a��';
|
ALTER TABLE t1 ADD KEY(a);
|
SELECT * FROM t1 WHERE a = 'a��';
|
it returns empty set without indexes and empty set with warning with the index:
mysql> SELECT * FROM t1 WHERE a = 'a��';
|
Empty set (0.00 sec)
|
|
mysql> ALTER TABLE t1 ADD KEY(a);
|
Query OK, 0 rows affected (0.06 sec)
|
Records: 0 Duplicates: 0 Warnings: 0
|
|
mysql> SELECT * FROM t1 WHERE a = 'a��';
|
Empty set, 1 warning (0.00 sec)
|
|
mysql> SHOW WARNINGS;
|
+---------+------+--------------------------------------------------------------------+
|
| Level | Code | Message |
|
+---------+------+--------------------------------------------------------------------+
|
| Warning | 1366 | Incorrect string value: '\xF0\x9D\x8C\x86' for column 'a' at row 1 |
|
+---------+------+--------------------------------------------------------------------+
|
1 row in set (0.00 sec)
|
|
|
With the column collation utf16_unicode_ci:
SET NAMES 'utf8';
|
SET SESSION sql_mode='STRICT_ALL_TABLES';
|
DROP TABLE IF EXISTS t1;
|
CREATE TABLE t1 (
|
id bigint(20) unsigned NOT NULL AUTO_INCREMENT,
|
a varchar(60) CHARACTER SET utf16 COLLATE utf16_unicode_ci NOT NULL DEFAULT '',
|
PRIMARY KEY (id)
|
) ENGINE=InnoDB;
|
INSERT INTO t1 (a) VALUES ('a'),('a??'),('a???'),('a????');
|
SELECT * FROM t1 WHERE a = 'a��';
|
ALTER TABLE t1 ADD KEY(a);
|
SELECT * FROM t1 WHERE a = 'a��';
|
it returns:
ERROR 1267 (HY000): Illegal mix of collations (utf16_unicode_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation '='
|
both without and with the index.
The error is returned because the string cannot be safely converted from 3-byte utf8 to utf16.
The same error is returned if I change the collation to utf16_general_ci.
|
|
With the column collation utf8_general_ci or utf8_unicode_ci and with character_set_connection=utf16:
SET NAMES 'utf8', character_set_connection=utf16;
|
SET SESSION sql_mode='STRICT_ALL_TABLES';
|
DROP TABLE IF EXISTS t1;
|
CREATE TABLE t1 (
|
id bigint(20) unsigned NOT NULL AUTO_INCREMENT,
|
a varchar(60) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL DEFAULT '',
|
PRIMARY KEY (id)
|
) ENGINE=InnoDB;
|
INSERT INTO t1 (a) VALUES ('a'),('a??'),('a???'),('a????');
|
SELECT * FROM t1 WHERE a = 'a��';
|
ALTER TABLE t1 ADD KEY(a);
|
SELECT * FROM t1 WHERE a = 'a��';
|
it return 'a????' both without and with the index:
mysql> SELECT * FROM t1 WHERE a = 'a��';
|
+----+-------+
|
| id | a |
|
+----+-------+
|
| 4 | a???? |
|
+----+-------+
|
1 row in set (0.00 sec)
|
|
mysql> ALTER TABLE t1 ADD KEY(a);
|
Query OK, 0 rows affected (0.06 sec)
|
Records: 0 Duplicates: 0 Warnings: 0
|
|
mysql> SELECT * FROM t1 WHERE a = 'a��';
|
+----+-------+
|
| id | a |
|
+----+-------+
|
| 4 | a???? |
|
+----+-------+
|
1 row in set (0.00 sec)
|
|
|
Summary:
| N |
Field collation |
character_set_connection |
Index |
Result |
Warning |
| 1 |
utf8_unicode_ci |
utf8 |
no |
'a' |
- |
| 2 |
utf8_unicode_ci |
utf8 |
yes |
empty set |
Incorrect string value |
| 3 |
utf8_general_ci |
utf8 |
no |
empty set |
- |
| 4 |
utf8_general_ci |
utf8 |
yes |
empty set |
Incorrect string value |
| 5 |
utf16_unicode_ci |
utf8 |
no |
error: illegal mix of collations |
- |
| 6 |
utf16_unicode_ci |
utf8 |
yes |
error: illegal mix of collations |
- |
| 7 |
utf8_unicode_ci |
utf16 |
no |
'a????' |
- |
| 8 |
utf8_unicode_ci |
utf16 |
yes |
'a????' |
- |
|
|
Perhaps broken bytes should be compared as a pseudo-character which is greater than any possible normal character,
or less than any possible normal character. This would preserve behaviour for #2, #3, #4.
"Greater than any possible character" is easier to implement.
|
|
Consistent behavior for all cases and all charsets will be implemented in 10.1, see MDEV-8036
|