Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-30661

UPPER() returns an empty string for U+0251 in uca1400 collations for utf8

Details

    Description

      The problem described in MDEV-30556 is repeatable in 10.10 with uca1400 collations:

      CREATE OR REPLACE TABLE bad_case_folding
      (
        code INT NOT NULL,
        c VARCHAR(32) CHARACTER SET utf8mb4 COLLATE uca1400_ai_ci NOT NULL
      );
      DELIMITER $$
       
      FOR code IN 0..0x10FFFF
      DO
        BEGIN
          DECLARE str TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_520_ci DEFAULT CHAR(code USING utf32);
          IF LENGTH(LOWER(str))=0 OR LENGTH(UPPER(str))=0 THEN
            INSERT INTO bad_case_folding VALUES (code, str);
          END IF;
        END;
      END FOR;
      $$
      DELIMITER ;
       
      SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM bad_case_folding;
      

      +-----------+---------------+---------------+----+
      | HEX(code) | HEX(LOWER(c)) | HEX(UPPER(c)) | c  |
      +-----------+---------------+---------------+----+
      | 23A       |               | C8BA          | Ⱥ  |
      | 23E       |               | C8BE          | Ⱦ  |
      | 23F       | C8BF          |               | ȿ  |
      | 240       | C980          |               | ɀ  |
      | 250       | C990          |               | ɐ  |
      | 251       | C991          |               | ɑ  |
      | 252       | C992          |               | ɒ  |
      | 26B       | C9AB          |               | ɫ  |
      | 271       | C9B1          |               | ɱ  |
      | 27D       | C9BD          |               | ɽ  |
      +-----------+---------------+---------------+----+
      

      Or a faster test version:

      CREATE OR REPLACE TABLE bad_case_folding
      (
        code INT NOT NULL,
        c VARCHAR(32) CHARACTER SET utf8mb4 COLLATE uca1400_ai_ci NOT NULL DEFAULT ''
      );
      INSERT INTO bad_case_folding (code) VALUES (0x23A),(0x23E),(0x23F),(0x240),(0x250),(0x251),(0x252),(0x26B),(0x271),(0x27D);
      UPDATE bad_case_folding SET c=CHAR(code USING utf32);
      SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM bad_case_folding ORDER BY code;
      

      +-----------+---------------+---------------+----+
      | HEX(code) | HEX(LOWER(c)) | HEX(UPPER(c)) | c  |
      +-----------+---------------+---------------+----+
      | 23A       |               | C8BA          | Ⱥ  |
      | 23E       |               | C8BE          | Ⱦ  |
      | 23F       | C8BF          |               | ȿ  |
      | 240       | C980          |               | ɀ  |
      | 250       | C990          |               | ɐ  |
      | 251       | C991          |               | ɑ  |
      | 252       | C992          |               | ɒ  |
      | 26B       | C9AB          |               | ɫ  |
      | 271       | C9B1          |               | ɱ  |
      | 27D       | C9BD          |               | ɽ  |
      +-----------+---------------+---------------+----+
      

      Attachments

        Issue Links

          Activity

            bar Alexander Barkov created issue -
            bar Alexander Barkov made changes -
            Field Original Value New Value
            bar Alexander Barkov made changes -
            Description The problem described in MDEV-30556 is repeatable in 10.10 with uca1400 collations:

            {code:sql}
            CREATE OR REPLACE TABLE bad_case_folding
            (
              code INT NOT NULL,
              c VARCHAR(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_520_ci NOT NULL
            );
            DELIMITER $$

            FOR code IN 0..0x10FFFF
            DO
              BEGIN
                DECLARE str TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_520_ci DEFAULT CHAR(code USING utf32);
                IF LENGTH(LOWER(str))=0 OR LENGTH(UPPER(str))=0 THEN
                  INSERT INTO bad_case_folding VALUES (code, str);
                END IF;
              END;
            END FOR;
            $$
            DELIMITER ;

            SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM bad_case_folding;
            {code}
            {noformat}
            +-----------+---------------+---------------+----+
            | HEX(code) | HEX(LOWER(c)) | HEX(UPPER(c)) | c |
            +-----------+---------------+---------------+----+
            | 23A | | C8BA | Ⱥ |
            | 23E | | C8BE | Ⱦ |
            | 23F | C8BF | | ȿ |
            | 240 | C980 | | ɀ |
            | 250 | C990 | | ɐ |
            | 251 | C991 | | ɑ |
            | 252 | C992 | | ɒ |
            | 26B | C9AB | | ɫ |
            | 271 | C9B1 | | ɱ |
            | 27D | C9BD | | ɽ |
            +-----------+---------------+---------------+----+
            {noformat}


            Or a faster test version:

            {code:sql}
            CREATE OR REPLACE TABLE bad_case_folding
            (
              code INT NOT NULL,
              c VARCHAR(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_520_ci NOT NULL DEFAULT ''
            );
            INSERT INTO bad_case_folding (code) VALUES (0x23A),(0x23E),(0x23F),(0x240),(0x250),(0x251),(0x252),(0x26B),(0x271),(0x27D);
            UPDATE bad_case_folding SET c=CHAR(code USING utf32);
            SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM bad_case_folding ORDER BY code;
            {code}
            {noformat}
            +-----------+---------------+---------------+----+
            | HEX(code) | HEX(LOWER(c)) | HEX(UPPER(c)) | c |
            +-----------+---------------+---------------+----+
            | 23A | | C8BA | Ⱥ |
            | 23E | | C8BE | Ⱦ |
            | 23F | C8BF | | ȿ |
            | 240 | C980 | | ɀ |
            | 250 | C990 | | ɐ |
            | 251 | C991 | | ɑ |
            | 252 | C992 | | ɒ |
            | 26B | C9AB | | ɫ |
            | 271 | C9B1 | | ɱ |
            | 27D | C9BD | | ɽ |
            +-----------+---------------+---------------+----+
            {noformat}

            The problem described in MDEV-30556 is repeatable in 10.10 with uca1400 collations:

            {code:sql}
            CREATE OR REPLACE TABLE bad_case_folding
            (
              code INT NOT NULL,
              c VARCHAR(32) CHARACTER SET utf8mb4 COLLATE uca1400_ai_ci NOT NULL
            );
            DELIMITER $$

            FOR code IN 0..0x10FFFF
            DO
              BEGIN
                DECLARE str TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_520_ci DEFAULT CHAR(code USING utf32);
                IF LENGTH(LOWER(str))=0 OR LENGTH(UPPER(str))=0 THEN
                  INSERT INTO bad_case_folding VALUES (code, str);
                END IF;
              END;
            END FOR;
            $$
            DELIMITER ;

            SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM bad_case_folding;
            {code}
            {noformat}
            +-----------+---------------+---------------+----+
            | HEX(code) | HEX(LOWER(c)) | HEX(UPPER(c)) | c |
            +-----------+---------------+---------------+----+
            | 23A | | C8BA | Ⱥ |
            | 23E | | C8BE | Ⱦ |
            | 23F | C8BF | | ȿ |
            | 240 | C980 | | ɀ |
            | 250 | C990 | | ɐ |
            | 251 | C991 | | ɑ |
            | 252 | C992 | | ɒ |
            | 26B | C9AB | | ɫ |
            | 271 | C9B1 | | ɱ |
            | 27D | C9BD | | ɽ |
            +-----------+---------------+---------------+----+
            {noformat}


            Or a faster test version:

            {code:sql}
            CREATE OR REPLACE TABLE bad_case_folding
            (
              code INT NOT NULL,
              c VARCHAR(32) CHARACTER SET utf8mb4 COLLATE uca1400_ai_ci NOT NULL DEFAULT ''
            );
            INSERT INTO bad_case_folding (code) VALUES (0x23A),(0x23E),(0x23F),(0x240),(0x250),(0x251),(0x252),(0x26B),(0x271),(0x27D);
            UPDATE bad_case_folding SET c=CHAR(code USING utf32);
            SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM bad_case_folding ORDER BY code;
            {code}
            {noformat}
            +-----------+---------------+---------------+----+
            | HEX(code) | HEX(LOWER(c)) | HEX(UPPER(c)) | c |
            +-----------+---------------+---------------+----+
            | 23A | | C8BA | Ⱥ |
            | 23E | | C8BE | Ⱦ |
            | 23F | C8BF | | ȿ |
            | 240 | C980 | | ɀ |
            | 250 | C990 | | ɐ |
            | 251 | C991 | | ɑ |
            | 252 | C992 | | ɒ |
            | 26B | C9AB | | ɫ |
            | 271 | C9B1 | | ɱ |
            | 27D | C9BD | | ɽ |
            +-----------+---------------+---------------+----+
            {noformat}

            bar Alexander Barkov made changes -
            bar Alexander Barkov made changes -
            bar Alexander Barkov made changes -
            bar Alexander Barkov made changes -
            bar Alexander Barkov made changes -
            Fix Version/s 10.10.4 [ 28522 ]
            Fix Version/s 10.11.3 [ 28524 ]
            Fix Version/s 10.10 [ 27530 ]
            Fix Version/s 10.11 [ 27614 ]
            bar Alexander Barkov made changes -
            Status Open [ 1 ] In Progress [ 3 ]
            bar Alexander Barkov made changes -
            Resolution Fixed [ 1 ]
            Status In Progress [ 3 ] Closed [ 6 ]
            danblack Daniel Black made changes -
            bar Alexander Barkov made changes -
            Fix Version/s 11.0.2 [ 28706 ]

            People

              bar Alexander Barkov
              bar Alexander Barkov
              Votes:
              0 Vote for this issue
              Watchers:
              2 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Git Integration

                  Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.