Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-30532

Wrong index chosen by the optimizer for ORDER BY

Details

    Description

      With the optimizer switch:

      index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=off,condition_pushdown_from_having=on

      and optimizer_use_condition_selectivity set to 1, and task_small1 table loaded from the attachment, we get the following:

      analyze format=json SELECT task0.sys_id
      FROM   task_small1 task0
      WHERE 
          ( task0.sys_created_on >= '2020-01-01 06:00:00'           
          AND task0.sys_created_on <= '2020-08-11 04:59:59' )       AND task0.a_ref_4 = '1d979f'  
             AND task0.sys_class_name = 'c'       ORDER  BY task0.sys_created_on DESC LIMIT  0, 20;
       
      ANALYZE
      {
        "query_block": {
          "select_id": 1,
          "r_loops": 1,
          "r_total_time_ms": 2.8276,
          "table": {
            "table_name": "task0",
            "access_type": "range",
            "possible_keys": [
              "task_index_created",
              "release_task_ref2",
              "task_task_class_created"
            ],
            "key": "task_index_created",
            "key_length": "6",
            "used_key_parts": ["sys_created_on"],
            "r_loops": 1,
            "rows": 35807,
            "r_rows": 2004,
            "r_total_time_ms": 2.7249,
            "filtered": 22.336,
            "r_filtered": 0.998,
            "attached_condition": "task0.a_ref_4 <=> '1d979f' and task0.sys_created_on >= '2020-01-01 06:00:00' and task0.sys_created_on <= '2020-08-11 04:59:59' and task0.a_ref_4 = '1d979f' and task0.sys_class_name = 'c'"
          }
        }
      

      It should be using task_task_class_created, which is more specific and still can be used for ORDER BY. I have debugged it with optimizer tracing and in gdb, and got to the point that test_if_skip_sort_order() for some reason is getting called with release_task_ref2 key in tab->ref.key. I attempted to recreate the bug artificially with hand-crafted tables and generated data, but was not successful. The case worked properly choosing the more specific key, and test_if_skip_sort_order was getting called with tab->ref.key set to the more specific key, equivalent of task_task_class_created in this case.

      In this example, the query still runs fast in spite of using the wrong key, but this is a trimmed/obfuscated production data. On the actual production data the choice of the wrong key results in a factor of 100 slower performance.

      Attachments

        1. bad.test.txz
          844 kB
        2. bad.test.xz
          845 kB
        3. good.test.xz
          641 kB
        4. task_small1_dump.sql
          4.98 MB

        Issue Links

          Activity

            yury.chaikou Yury Chaikou added a comment -

            Aleksey, we are trying to apply the fix to 10.4.20 but some differences are not trivial to resolve. Could you please port it to 10.4.20 as well?

            yury.chaikou Yury Chaikou added a comment - Aleksey, we are trying to apply the fix to 10.4.20 but some differences are not trivial to resolve. Could you please port it to 10.4.20 as well?
            spachev Sasha Pachev added a comment -

            The following "hammer-slammer" patch fixes the problem for this test case:

            diff --git a/sql/sql_select.cc b/sql/sql_select.cc
            index ccfe9bb..0b676b9 100644
            --- a/sql/sql_select.cc
            +++ b/sql/sql_select.cc
            @@ -28507,7 +28507,7 @@ static bool get_range_limit_read_cost(const JOIN_TAB *tab,
                           (select_limit <= MY_MIN(quick_records,best_records) ?
                            keyinfo->user_defined_key_parts < best_key_parts :
                            quick_records < best_records) ||
            -              (!is_best_covering && is_covering))
            +              (!is_best_covering && is_covering) || (keyinfo->user_defined_key_parts > best_key_parts && select_limit <= MY_MIN(quick_records,best_records) && quick_records < best_records))
                       {
                         possible_key.add("chosen", true);
                         best_key= nr;
            

            Is the original code check correct? Having more key parts is not always bad, as in this case.

            spachev Sasha Pachev added a comment - The following "hammer-slammer" patch fixes the problem for this test case: diff --git a/sql/sql_select.cc b/sql/sql_select.cc index ccfe9bb..0b676b9 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -28507,7 +28507,7 @@ static bool get_range_limit_read_cost( const JOIN_TAB *tab, (select_limit <= MY_MIN(quick_records,best_records) ? keyinfo->user_defined_key_parts < best_key_parts : quick_records < best_records) || - (!is_best_covering && is_covering)) + (!is_best_covering && is_covering) || (keyinfo->user_defined_key_parts > best_key_parts && select_limit <= MY_MIN(quick_records,best_records) && quick_records < best_records)) { possible_key.add( "chosen" , true ); best_key= nr; Is the original code check correct? Having more key parts is not always bad, as in this case.

            yury.chaikou Do you follow https://mariadbcorp.atlassian.net/browse/SAMU-97 ?
            There is a branch 10.4.20samu-midenok-samu-97 with patches applied.

            midenok Aleksey Midenkov added a comment - yury.chaikou Do you follow https://mariadbcorp.atlassian.net/browse/SAMU-97 ? There is a branch 10.4.20samu-midenok-samu-97 with patches applied.
            spachev Sasha Pachev added a comment -

            Better simple patch:

            diff --git a/sql/sql_select.cc b/sql/sql_select.cc
            index ccfe9bb..5629a26 100644
            --- a/sql/sql_select.cc
            +++ b/sql/sql_select.cc
            @@ -28507,7 +28507,7 @@ static bool get_range_limit_read_cost(const JOIN_TAB *tab,
                           (select_limit <= MY_MIN(quick_records,best_records) ?
                            keyinfo->user_defined_key_parts < best_key_parts :
                            quick_records < best_records) ||
            -              (!is_best_covering && is_covering))
            +              (!is_best_covering && is_covering) || (keyinfo->user_defined_key_parts >= best_key_parts && select_limit <= M
                       {
                         possible_key.add("chosen", true);
                         best_key= nr;
            
            

            it additionally fixes a production data case which we have not yet isolated to a test case we can share which 10.4.20samu-midenok-samu-97 does not (due to the preference for an index with fewer key parts). It seems that if we say - if the range is good and select limit is OK, choose the index regardless of the key parts, our queries are happier.

            spachev Sasha Pachev added a comment - Better simple patch: diff --git a/sql/sql_select.cc b/sql/sql_select.cc index ccfe9bb..5629a26 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -28507,7 +28507,7 @@ static bool get_range_limit_read_cost( const JOIN_TAB *tab, (select_limit <= MY_MIN(quick_records,best_records) ? keyinfo->user_defined_key_parts < best_key_parts : quick_records < best_records) || - (!is_best_covering && is_covering)) + (!is_best_covering && is_covering) || (keyinfo->user_defined_key_parts >= best_key_parts && select_limit <= M { possible_key.add( "chosen" , true ); best_key= nr; it additionally fixes a production data case which we have not yet isolated to a test case we can share which 10.4.20samu-midenok-samu-97 does not (due to the preference for an index with fewer key parts). It seems that if we say - if the range is good and select limit is OK, choose the index regardless of the key parts, our queries are happier.
            psergei Sergei Petrunia added a comment - - edited

            Running the test in 11.0 produces

            good_index
            drop table task_small1
            

            ANALYZE
            {
              "query_optimization": {
                "r_total_time_ms": 0.485830854
              },
              "query_block": {
                "select_id": 1,
                "cost": 7.09745112,
                "r_loops": 1,
                "r_total_time_ms": 8.972108247,
                "nested_loop": [
                  {
                    "table": {
                      "table_name": "task0",
                      "access_type": "range",
                      "possible_keys": [
                        "task_index_created",
                        "release_task_ref2",
                        "task_task_class_created"
                      ],
                      "key": "task_task_class_created",
                      "key_length": "89",
                      "used_key_parts": ["sys_class_name", "sys_created_on"],
                      "loops": 1,
                      "r_loops": 1,
                      "rows": 21318,
                      "r_rows": 379,
                      "cost": 7.09745112,
                      "r_table_time_ms": 8.916235096,
                      "r_other_time_ms": 0.052128368,
                      "filtered": 12.02679348,
                      "r_filtered": 5.277044855,
                      "attached_condition": "task0.a_ref_4 <=> '1d979f' and task0.a_ref_4 = '1d979f' and task0.sys_class_name = 'c' and task0.sys_created_on >= '2020-01-01 06:00:00' and task0.sys_created_on <= '2020-08-11 04:59:59'"
                    }
                  }
                ]
              }
            }
            

            as the report says

            It should be using task_task_class_created

            and it's using that index.

            psergei Sergei Petrunia added a comment - - edited Running the test in 11.0 produces good_index drop table task_small1 ANALYZE { "query_optimization": { "r_total_time_ms": 0.485830854 }, "query_block": { "select_id": 1, "cost": 7.09745112, "r_loops": 1, "r_total_time_ms": 8.972108247, "nested_loop": [ { "table": { "table_name": "task0", "access_type": "range", "possible_keys": [ "task_index_created", "release_task_ref2", "task_task_class_created" ], "key": "task_task_class_created", "key_length": "89", "used_key_parts": ["sys_class_name", "sys_created_on"], "loops": 1, "r_loops": 1, "rows": 21318, "r_rows": 379, "cost": 7.09745112, "r_table_time_ms": 8.916235096, "r_other_time_ms": 0.052128368, "filtered": 12.02679348, "r_filtered": 5.277044855, "attached_condition": "task0.a_ref_4 <=> '1d979f' and task0.a_ref_4 = '1d979f' and task0.sys_class_name = 'c' and task0.sys_created_on >= '2020-01-01 06:00:00' and task0.sys_created_on <= '2020-08-11 04:59:59'" } } ] } } as the report says It should be using task_task_class_created and it's using that index.

            People

              rob.schwyzer@mariadb.com Rob Schwyzer (Inactive)
              spachev Sasha Pachev
              Votes:
              1 Vote for this issue
              Watchers:
              8 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Git Integration

                  Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.