Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-38464

best_access_path doesn't set keyread_tmp for prefix const lookups

    XMLWordPrintable

Details

    • Q1/2026 Server Development, Q1/2026 Server Maintenance

    Description

      How to find the problem in the code

      In pre-11.0 code, best_access_path() doesn't set keyread_tmp to any value when computing access path for ref(const) over a prefix.
      As a result, we use whatever keyread_tmp value was left from the index that we have considered before this, or the initial value of keyread_tmp=0.

      The code goes like this:

              if (found_part == all_key_parts && !ref_or_null_part)
              {                                         /* use eq key */
                ...
                    if (table->opt_range_keys.is_set(key))
                    {
                      records= (double) table->opt_range[key].rows;
                      trace_access_idx.add("used_range_estimates", true);
                      tmp= adjust_quick_cost(table->opt_range[key].cost,
                                             table->opt_range[key].rows);
                      keyread_tmp= table->file->keyread_time(key, 1,
                                                             table->opt_range[key].
                                                             rows);
                      goto got_cost;
                    }
                  ...
              got_cost:
                  tmp= COST_MULT(tmp, record_count);
                  keyread_tmp= COST_MULT(keyread_tmp, record_count);
                }
              }
      

      We call adjust_quick_cost() and then set keyread_tmp and then jump to got_cost. Ok.

      Now, the else branch which handles index prefix lookups:

              else
              {
                ...
                  if (table->opt_range_keys.is_set(key) &&
                      all_used_equalities_are_const && // (C1)
                      table->opt_range[key].key_parts == max_key_part &&      //(C2)
                      table->opt_range[key].ranges == 1 + MY_TEST(ref_or_null_part)) //(C3)
                  {
                    records= (double) table->opt_range[key].rows;
                    tmp= adjust_quick_cost(table->opt_range[key].cost,
                                           table->opt_range[key].rows);
                    trace_access_idx.add("used_range_estimates", true);
                    goto got_cost2;
                
              got_cost2:
                  tmp= COST_MULT(tmp, record_count);
                  keyread_tmp= COST_MULT(keyread_tmp, record_count);
                }
      

      We also call adjust_quick_cost() and ... do not set keyread_tmp. It has whatever value it had from the previous index.

      One can construct an example where this will hold: keyread_tmp > tmp

      keyread_tmp is then used by the rowid filter code:

              double key_access_cost=
      	       type == JT_EQ_REF ? 0.5 * tmp : MY_MIN(tmp, keyread_tmp);
              double access_cost_factor= MY_MIN((tmp - key_access_cost) / rows, 1.0);
      

      For (keyread_tmp > tmp) this will produce access_cost_factor=0.0 . This means "index reads are free, all cost come from fetching the rows".

      The testcase

      Apply this patch, first:

      diff --git a/sql/sql_select.cc b/sql/sql_select.cc
      index f7c0b4cc884..a74095f812c 100644
      --- a/sql/sql_select.cc
      +++ b/sql/sql_select.cc
      @@ -8500,6 +8500,8 @@ best_access_path(JOIN      *join,
                     tmp= adjust_quick_cost(table->opt_range[key].cost,
                                            table->opt_range[key].rows);
                     trace_access_idx.add("used_range_estimates", true);
      +
      +              fprintf(stderr, "AABB: not setting keyread_tmp\n");
                     goto got_cost2;
                   }
                   else
      @@ -8642,6 +8644,17 @@ best_access_path(JOIN      *join,
                                                     found_ref);
             } /* not ft_key */
       
      +      if (records < DBL_MAX && (found_part & 1))
      +      {
      +        fprintf(stderr, 
      +                "AABB: query %s\n"
      +                "AABB: idx=%d table=%s, key=%s: tmp=%g keyread_tmp=%g\n",
      +                thd->query(), idx,
      +                table->alias.c_ptr_safe(),
      +                table->key_info[key].name.str,
      +                tmp, keyread_tmp);
      +      }
      +
             if (records < DBL_MAX &&
                (found_part & 1) &&   // start_key->key can be used for index access
                 (table->file->index_flags(start_key->key,0,1) &
      

      Then fill the tables

      create table t1 (
        a int, 
        b int,
        c int
      );
      insert into t1 select seq,seq,seq from seq_1_to_100;
       
      create table t2 (
        a int,
        b int,
        c int,
        d int,
        e int,
        f int,
        filler1 char(255),
        filler2 char(255),
        filler3 char(255),
        index(a, d, e, f),
        index(b, d)
      );
       
      insert into t2 
      select 
        mod(seq, 10),
        mod(seq, 100),
        12345,
        12345,
        123,
        123,
        uuid(), uuid(), uuid()
      from 
        seq_1_to_10000;
      analyze table t1,t2;
      

      Run the query:

      explain
      select * from t1,t2
      where 
        t2.a=t1.a and 
        t2.b=2;
      

      and observe this in the server stderr:

      ...
      AABB: not setting keyread_tmp
      AABB: query explain select * from t1,t2 where    t2.a=t1.a and    t2.b=2
      AABB: idx=1 table=t2, key=b: tmp=10014.5 keyread_tmp=11501.1
      

      and note that keyread_tmp > tmp .

      Are later versions affected?

      No, they use this call:

                    records= (double) table->opt_range[key].rows;
                    table->opt_range[key].get_costs(&tmp);
      

      which sets all cost components:

      void TABLE::OPT_RANGE::get_costs(ALL_READ_COST *res)
      {
        res->index_cost=       cost.index_cost;
        res->row_cost=         cost.row_cost;
        res->copy_cost=        cost.copy_cost;
        res->max_index_blocks= max_index_blocks;
        res->max_row_blocks=   max_row_blocks;
      }
      

      Attachments

        Activity

          People

            psergei Sergei Petrunia
            psergei Sergei Petrunia
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

            Dates

              Created:
              Updated:

              Git Integration

                Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.