Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-21065

UNIQUE constraint causes a query with string comparison to omit a row in the result set

Details

    Description

      Consider the following test case:

      CREATE TABLE t0(c0 INT UNIQUE);
      INSERT INTO t0 VALUES (NULL), (NULL), (NULL), (NULL), (1), (0);
      SELECT * FROM t0 WHERE c0 < '\n2'; -- expected rows with c0=0 and c0=1, actual: only row with c0=0 is fetched
      

      The query only fetches a single row. This is unexpected, because the expression in the WHERE clause evaluates to TRUE for two records:

      SELECT c0 < '\n2' FROM t0; -- evaluates to TRUE for two rows
      

      When removing the UNIQUE constraint from the column, or inserting fewer NULL values, the query works as expected.

      Attachments

        Issue Links

          Activity

            alice Alice Sherepa added a comment -

            Thanks! Reproducible on 5.5-10.4:

            CREATE TABLE t0(c0 INT UNIQUE) engine=myisam;
            INSERT INTO t0 VALUES (NULL), (NULL), (NULL), (NULL), (1), (0);
            SELECT * FROM t0 WHERE c0 < '\n2';
             
            set optimizer_trace='enabled=on';
            SELECT * FROM t0 WHERE c0 < '\n2';
            select * from information_schema.optimizer_trace limit 1;
             
            SELECT * FROM t0 WHERE c0 < '\n2' {
              "steps": [
                {
                  "join_preparation": {
                    "select_id": 1,
                    "steps": [
                      {
                        "expanded_query": "select t0.c0 AS c0 from t0 where t0.c0 < '\n2'"
                      }
                    ]
                  }
                },
                {
                  "join_optimization": {
                    "select_id": 1,
                    "steps": [
                      {
                        "condition_processing": {
                          "condition": "WHERE",
                          "original_condition": "t0.c0 < '\n2'",
                          "steps": [
                            {
                              "transformation": "equality_propagation",
                              "resulting_condition": "t0.c0 < '\n2'"
                            },
                            {
                              "transformation": "constant_propagation",
                              "resulting_condition": "t0.c0 < '\n2'"
                            },
                            {
                              "transformation": "trivial_condition_removal",
                              "resulting_condition": "t0.c0 < '\n2'"
                            }
                          ]
                        }
                      },
                      {
                        "table_dependencies": [
                          {
                            "table": "t0",
                            "row_may_be_null": false,
                            "map_bit": 0,
                            "depends_on_map_bits": []
                          }
                        ]
                      },
                      {
                        "ref_optimizer_key_uses": []
                      },
                      {
                        "rows_estimation": [
                          {
                            "table": "t0",
                            "range_analysis": {
                              "table_scan": {
                                "rows": 6,
                                "cost": 5.3103
                              },
                              "potential_range_indexes": [
                                {
                                  "index": "c0",
                                  "usable": true,
                                  "key_parts": ["c0"]
                                }
                              ],
                              "best_covering_index_scan": {
                                "index": "c0",
                                "cost": 1.314,
                                "chosen": true
                              },
                              "setup_range_conditions": [],
                              "group_index_range": {
                                "chosen": false,
                                "cause": "no group by or distinct"
                              },
                              "analyzing_range_alternatives": {
                                "range_scan_alternatives": [
                                  {
                                    "index": "c0",
                                    "ranges": ["(NULL) < (c0) <= (0)"],
                                    "rowid_ordered": false,
                                    "using_mrr": false,
                                    "index_only": true,
                                    "rows": 1,
                                    "cost": 1.3023,
                                    "chosen": true
                                  }
                                ],
                                "analyzing_roworder_intersect": {
                                  "cause": "too few roworder scans"
                                },
                                "analyzing_index_merge_union": []
                              },
                              "chosen_range_access_summary": {
                                "range_access_plan": {
                                  "type": "range_scan",
                                  "index": "c0",
                                  "rows": 1,
                                  "ranges": ["(NULL) < (c0) <= (0)"]
                                },
                                "rows_for_plan": 1,
                                "cost_for_plan": 1.3023,
                                "chosen": true
                              }
                            }
                          },
                          {
                            "selectivity_for_indexes": [
                              {
                                "index_name": "c0",
                                "selectivity_from_index": 0.1667
                              }
                            ],
                            "selectivity_for_columns": [],
                            "cond_selectivity": 0.1667
                          }
                        ]
                      },
                      {
                        "considered_execution_plans": [
                          {
                            "plan_prefix": [],
                            "table": "t0",
                            "best_access_path": {
                              "considered_access_paths": [
                                {
                                  "access_type": "range",
                                  "resulting_rows": 1,
                                  "cost": 1.3023,
                                  "chosen": true
                                }
                              ],
                              "chosen_access_method": {
                                "type": "range",
                                "records": 1,
                                "cost": 1.3023,
                                "uses_join_buffering": false,
                                "filter_used": false
                              }
                            },
                            "rows_for_plan": 1,
                            "cost_for_plan": 1.5023,
                            "estimated_join_cardinality": 1
                          }
                        ]
                      },
                      {
                        "best_join_order": ["t0"]
                      },
                      {
                        "attaching_conditions_to_tables": {
                          "original_condition": "t0.c0 < '\n2'",
                          "attached_conditions_computation": [],
                          "attached_conditions_summary": [
                            {
                              "table": "t0",
                              "attached": "t0.c0 < '\n2'"
                            }
                          ]
                        }
                      }
                    ]
                  }
                },
                {
                  "join_execution": {
                    "select_id": 1,
                    "steps": []
                  }
                }
              ]
            } 
            

            alice Alice Sherepa added a comment - Thanks! Reproducible on 5.5-10.4: CREATE TABLE t0(c0 INT UNIQUE) engine=myisam; INSERT INTO t0 VALUES (NULL), (NULL), (NULL), (NULL), (1), (0); SELECT * FROM t0 WHERE c0 < '\n2';   set optimizer_trace='enabled=on'; SELECT * FROM t0 WHERE c0 < '\n2'; select * from information_schema.optimizer_trace limit 1;   SELECT * FROM t0 WHERE c0 < '\n2' { "steps": [ { "join_preparation": { "select_id": 1, "steps": [ { "expanded_query": "select t0.c0 AS c0 from t0 where t0.c0 < '\n2'" } ] } }, { "join_optimization": { "select_id": 1, "steps": [ { "condition_processing": { "condition": "WHERE", "original_condition": "t0.c0 < '\n2'", "steps": [ { "transformation": "equality_propagation", "resulting_condition": "t0.c0 < '\n2'" }, { "transformation": "constant_propagation", "resulting_condition": "t0.c0 < '\n2'" }, { "transformation": "trivial_condition_removal", "resulting_condition": "t0.c0 < '\n2'" } ] } }, { "table_dependencies": [ { "table": "t0", "row_may_be_null": false, "map_bit": 0, "depends_on_map_bits": [] } ] }, { "ref_optimizer_key_uses": [] }, { "rows_estimation": [ { "table": "t0", "range_analysis": { "table_scan": { "rows": 6, "cost": 5.3103 }, "potential_range_indexes": [ { "index": "c0", "usable": true, "key_parts": ["c0"] } ], "best_covering_index_scan": { "index": "c0", "cost": 1.314, "chosen": true }, "setup_range_conditions": [], "group_index_range": { "chosen": false, "cause": "no group by or distinct" }, "analyzing_range_alternatives": { "range_scan_alternatives": [ { "index": "c0", "ranges": ["(NULL) < (c0) <= (0)"], "rowid_ordered": false, "using_mrr": false, "index_only": true, "rows": 1, "cost": 1.3023, "chosen": true } ], "analyzing_roworder_intersect": { "cause": "too few roworder scans" }, "analyzing_index_merge_union": [] }, "chosen_range_access_summary": { "range_access_plan": { "type": "range_scan", "index": "c0", "rows": 1, "ranges": ["(NULL) < (c0) <= (0)"] }, "rows_for_plan": 1, "cost_for_plan": 1.3023, "chosen": true } } }, { "selectivity_for_indexes": [ { "index_name": "c0", "selectivity_from_index": 0.1667 } ], "selectivity_for_columns": [], "cond_selectivity": 0.1667 } ] }, { "considered_execution_plans": [ { "plan_prefix": [], "table": "t0", "best_access_path": { "considered_access_paths": [ { "access_type": "range", "resulting_rows": 1, "cost": 1.3023, "chosen": true } ], "chosen_access_method": { "type": "range", "records": 1, "cost": 1.3023, "uses_join_buffering": false, "filter_used": false } }, "rows_for_plan": 1, "cost_for_plan": 1.5023, "estimated_join_cardinality": 1 } ] }, { "best_join_order": ["t0"] }, { "attaching_conditions_to_tables": { "original_condition": "t0.c0 < '\n2'", "attached_conditions_computation": [], "attached_conditions_summary": [ { "table": "t0", "attached": "t0.c0 < '\n2'" } ] } } ] } }, { "join_execution": { "select_id": 1, "steps": [] } } ] }
            igor Igor Babaev added a comment -

            With such population of the table

            INSERT INTO t0 VALUES (NULL), (NULL), (NULL), (1), (0);
            

            we have the execution plan using full index plan:

            MariaDB [test]> EXPLAIN SELECT * FROM t0 WHERE c0 < '\n2';
            +------+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+
            | id   | select_type | table | type  | possible_keys | key  | key_len | ref  | rows | Extra                    |
            +------+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+
            |    1 | SIMPLE      | t0    | index | c0            | c0   | 5       | NULL | 5    | Using where; Using index |
            +------+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+
            

            and we have the correct result set for the query

            MariaDB [test]> SELECT * FROM t0 WHERE c0 < '\n2';
            +------+
            | c0   |
            +------+
            |    0 |
            |    1 |
            +------+
            

            If we add an extra NULL to the table we get an execution plan that uses range index scan and a wrong result set:

            MariaDB [test]> INSERT INTO t0 VALUES (NULL);
             
            MariaDB [test]> EXPLAIN SELECT * FROM t0 WHERE c0 < '\n2';
            +------+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+
            | id   | select_type | table | type  | possible_keys | key  | key_len | ref  | rows | Extra                    |
            +------+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+
            |    1 | SIMPLE      | t0    | range | c0            | c0   | 5       | NULL | 1    | Using where; Using index |
            +------+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+
             
            MariaDB [test]> SELECT * FROM t0 WHERE c0 < '\n2';
            +------+
            | c0   |
            +------+
            |    0 |
            +------+
            

            So range index scan does not work properly here.

            igor Igor Babaev added a comment - With such population of the table INSERT INTO t0 VALUES ( NULL ), ( NULL ), ( NULL ), (1), (0); we have the execution plan using full index plan: MariaDB [test]> EXPLAIN SELECT * FROM t0 WHERE c0 < '\n2'; +------+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+ | id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra | +------+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+ | 1 | SIMPLE | t0 | index | c0 | c0 | 5 | NULL | 5 | Using where; Using index | +------+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+ and we have the correct result set for the query MariaDB [test]> SELECT * FROM t0 WHERE c0 < '\n2'; +------+ | c0 | +------+ | 0 | | 1 | +------+ If we add an extra NULL to the table we get an execution plan that uses range index scan and a wrong result set: MariaDB [test]> INSERT INTO t0 VALUES (NULL);   MariaDB [test]> EXPLAIN SELECT * FROM t0 WHERE c0 < '\n2'; +------+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+ | id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra | +------+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+ | 1 | SIMPLE | t0 | range | c0 | c0 | 5 | NULL | 1 | Using where; Using index | +------+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+   MariaDB [test]> SELECT * FROM t0 WHERE c0 < '\n2'; +------+ | c0 | +------+ | 0 | +------+ So range index scan does not work properly here.
            igor Igor Babaev added a comment -

            Here's how we get a wrong result set from the query

             SELECT * FROM t0 WHERE c0 < '\n2'
            

            for the reported test case.

            1. As we have and an index defined on c0 the optimizer tries to evaluate possible range access for the sargable condition c0 < '\n2'.

            2.Doing this the optimizer calls get_quick_record_count() that calls SQL_SELECT::test_quick_select() that tries to build a SEL_ARG tree for the condition c0 < '\n2' by an invocation of get_full_func_mm_tree_for_args() .

            3. The last call brings us to a call of Item_bool_func::get_mm_leaf that calls Field::get_mm_leaf_int for the field c0.

            4. The latter function first calls Item::save_in_field_no_warnings() for the item in the right
            part of the condition '\n2'. This function brings us to a call of Field_long::store() that tries to get the value of '\n2' to store it in the field for the column c0. It gets the value by a call of get_int() that returns 0 as the constant starts from the character '\n'.

            5. When the control returns back to Field::get_mm_leaf_int() the function Field::stored_field_make_mm_leaf() is invoked to build a SEL_ARG node for the range c0 < '\n2'. When doing this it is checked whether the range interval is right open or right closed. Here the code wants to be smart and to make the decision it calls stored_field_cmp_to_item(). The latter compares two values: one that were stored as right bound of the range (0) and '\n2' and it compares them as doubles. The string '\n2' is converted to double with '\n' skipped returning 2 and as 0 < 2 the interval is considered as right closed, because by all means this might happen only for conditions like int_field < 2.3. So as a result we have the range NULL < x <= 0.

            6. The executioner just scans this range and returns 1 row.

            igor Igor Babaev added a comment - Here's how we get a wrong result set from the query SELECT * FROM t0 WHERE c0 < '\n2' for the reported test case. 1. As we have and an index defined on c0 the optimizer tries to evaluate possible range access for the sargable condition c0 < '\n2'. 2.Doing this the optimizer calls get_quick_record_count() that calls SQL_SELECT::test_quick_select() that tries to build a SEL_ARG tree for the condition c0 < '\n2' by an invocation of get_full_func_mm_tree_for_args() . 3. The last call brings us to a call of Item_bool_func::get_mm_leaf that calls Field::get_mm_leaf_int for the field c0. 4. The latter function first calls Item::save_in_field_no_warnings() for the item in the right part of the condition '\n2'. This function brings us to a call of Field_long::store() that tries to get the value of '\n2' to store it in the field for the column c0. It gets the value by a call of get_int() that returns 0 as the constant starts from the character '\n'. 5. When the control returns back to Field::get_mm_leaf_int() the function Field::stored_field_make_mm_leaf() is invoked to build a SEL_ARG node for the range c0 < '\n2'. When doing this it is checked whether the range interval is right open or right closed. Here the code wants to be smart and to make the decision it calls stored_field_cmp_to_item(). The latter compares two values: one that were stored as right bound of the range (0) and '\n2' and it compares them as doubles. The string '\n2' is converted to double with '\n' skipped returning 2 and as 0 < 2 the interval is considered as right closed, because by all means this might happen only for conditions like int_field < 2.3. So as a result we have the range NULL < x <= 0. 6. The executioner just scans this range and returns 1 row.
            igor Igor Babaev added a comment -

            The above analysis was done for 10.4. Yet 5.5 follows a similar path here though the code was re-engineered.

            The easiest way to fix the problem is to force my_strntoull10rnd_8bit() to skip '\n' and some other characters in the same way as the conversion from string to double does it.

            Still I would prefer to convert '\n2' to double and keep the result as the right bound of the range. However I don't know how to do it even for 10.4.

            My experiments when I tried to change the code of SEL_ARG *Field::get_mm_leaf_int
            replacing the code

              int err= value->save_in_field_no_warnings(this, 1);
            

            with the code

              Type_handler_hybrid_field_type cmp(type_handler_for_comparison());
              if (cmp.aggregate_for_comparison(value->type_handler_for_comparison()))
              {
                // At fix_fields() time we checked that "field" and "item" are comparable
                DBUG_ASSERT(0);
                return 0;
              }
              int err= cmp.type_handler()->Item_save_in_field(value, this, 1);
            

            did not succeed.

            That's why I re-assign this bug to Bar. Hopefully my analysis will help him.

            igor Igor Babaev added a comment - The above analysis was done for 10.4. Yet 5.5 follows a similar path here though the code was re-engineered. The easiest way to fix the problem is to force my_strntoull10rnd_8bit() to skip '\n' and some other characters in the same way as the conversion from string to double does it. Still I would prefer to convert '\n2' to double and keep the result as the right bound of the range. However I don't know how to do it even for 10.4. My experiments when I tried to change the code of SEL_ARG *Field::get_mm_leaf_int replacing the code int err= value->save_in_field_no_warnings(this, 1); with the code Type_handler_hybrid_field_type cmp(type_handler_for_comparison()); if (cmp.aggregate_for_comparison(value->type_handler_for_comparison())) { // At fix_fields() time we checked that "field" and "item" are comparable DBUG_ASSERT(0); return 0; } int err= cmp.type_handler()->Item_save_in_field(value, this, 1); did not succeed. That's why I re-assign this bug to Bar. Hopefully my analysis will help him.

            igor, thanks for the analysis. It's very helpful indeed.

            bar Alexander Barkov added a comment - igor , thanks for the analysis. It's very helpful indeed.
            bar Alexander Barkov added a comment - - edited

            The inconsistency can be reproduced with this query:

            SELECT CAST('\n2' AS DOUBLE) AS dbl, CAST('\n2' AS DECIMAL) AS dcml, CAST('\n2' AS INT) AS intg;
            

            +------+------+------+
            | dbl  | dcml | intg |
            +------+------+------+
            |    2 |    2 |    0 |
            +------+------+------+
            

            The CAST to INT invokes my_strtoll10(), so it should also be fixed.

            bar Alexander Barkov added a comment - - edited The inconsistency can be reproduced with this query: SELECT CAST ( '\n2' AS DOUBLE ) AS dbl, CAST ( '\n2' AS DECIMAL ) AS dcml, CAST ( '\n2' AS INT ) AS intg; +------+------+------+ | dbl | dcml | intg | +------+------+------+ | 2 | 2 | 0 | +------+------+------+ The CAST to INT invokes my_strtoll10() , so it should also be fixed.
            bar Alexander Barkov added a comment - - edited

            Handling in the leading space characters in various string-to-number conversion routines is done indeed in a different way for different numeric data types. DOUBLE and DECIMAL routines do skip characters like '\r' and '\n', while INT routines do not:

            • string-to-double: my_strtod_int():

                 case '\t':
                 case '\n':
                 case '\v':
                 case '\f':
                 case '\r':
                 case ' ':
                 continue; 
              

            • string-to-decimal: internal_str2dec()

                while (s < end_of_string && my_isspace(&my_charset_latin1, *s))
                  s++;
              

            • string-to-int: my_strntoull10rnd_8bit()

                /* Skip leading spaces and tabs */
                for ( ; str < end && (*str == ' ' || *str == '\t') ; str++);
              

            • string-to-int: my_strtoll10()

                 while (s != end && (*s == ' ' || *s == '\t'))
                   s++;  
              

            bar Alexander Barkov added a comment - - edited Handling in the leading space characters in various string-to-number conversion routines is done indeed in a different way for different numeric data types. DOUBLE and DECIMAL routines do skip characters like '\r' and '\n', while INT routines do not: string-to-double: my_strtod_int(): case '\t' : case '\n' : case '\v' : case '\f' : case '\r' : case ' ' : continue ; string-to-decimal: internal_str2dec() while (s < end_of_string && my_isspace(&my_charset_latin1, *s)) s++; string-to-int: my_strntoull10rnd_8bit() /* Skip leading spaces and tabs */ for ( ; str < end && (*str == ' ' || *str == '\t' ) ; str++); string-to-int: my_strtoll10() while (s != end && (*s == ' ' || *s == '\t' )) s++;

            People

              bar Alexander Barkov
              mrigger Manuel Rigger
              Votes:
              0 Vote for this issue
              Watchers:
              5 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Git Integration

                  Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.