Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-26090

where CONCAT(column) = 'xxx' is much faster than without concat condition

    XMLWordPrintable

Details

    • Bug
    • Status: Stalled (View Workflow)
    • Minor
    • Resolution: Unresolved
    • 10.5.9, 10.2(EOL), 10.3(EOL), 10.4(EOL), 10.5
    • 10.11
    • Optimizer

    Description

      Customer shared the interesting test result and was wondering why where CONCAT(col) is faster without CONCAT. I've attached test schema and data.

      • this is the initial table schema and query.

      CREATE TABLE F (
      id INTEGER NOT NULL,
      dept CHAR(4),
      dest CHAR(4),
      t TIMESTAMP NOT NULL,
       
      PRIMARY KEY (id),
      KEY t (t)
      );
       
      CREATE TABLE M (
      station CHAR(4) NOT NULL,
      t_start DATETIME NOT NULL,
      t_end DATETIME DEFAULT NULL,
      windspeed INTEGER unsigned DEFAULT NULL,
      winddir INTEGER unsigned DEFAULT NULL,
      PRIMARY KEY (station,t_start)
      );
      

      SELECT SQL_NO_CACHE F.id, F.t, M.t_start, M.t_end, M.winddir 
      FROM F JOIN M ON (M.t_start BETWEEN F.t - INTERVAL 1 HOUR AND F.t AND M.t_end >= F.t)
      WHERE F.t BETWEEN '2021-01-01' AND '2021-01-03' 
      AND M.station = 'LFBO';
      

      +------+-------------+-------+-------+---------------+---------+---------+-------+-------+--------------------------+
      | id   | select_type | table | type  | possible_keys | key     | key_len | ref   | rows  | Extra                    |
      +------+-------------+-------+-------+---------------+---------+---------+-------+-------+--------------------------+
      |    1 | SIMPLE      | F     | range | t             | t       | 4       | NULL  | 266   | Using where; Using index |
      |    1 | SIMPLE      | M     | ref   | PRIMARY       | PRIMARY | 12      | const | 42462 | Using where              |
      +------+-------------+-------+-------+---------------+---------+---------+-------+-------+--------------------------+
      

      • analyze output

        | {
          "query_block": {
            "select_id": 1,
            "r_loops": 1,
            "r_total_time_ms": 40612.55196,
            "table": {
              "table_name": "F",
              "access_type": "range",
              "possible_keys": ["t"],
              "key": "t",
              "key_length": "4",
              "used_key_parts": ["t"],
              "r_loops": 1,
              "rows": 266,
              "r_rows": 266,
              "r_table_time_ms": 1.012544094,
              "r_other_time_ms": 1.524286562,
              "filtered": 100,
              "r_filtered": 100,
              "attached_condition": "f.t between '2021-01-01 00:00:00.000000' and '2021-01-03 00:00:00.000000'",
              "using_index": true
            },
            "table": {
              "table_name": "M",
              "access_type": "ref",
              "possible_keys": ["PRIMARY"],
              "key": "PRIMARY",
              "key_length": "12",
              "used_key_parts": ["station"],
              "ref": ["const"],
              "r_loops": 266,
              "rows": 42462,
              "r_rows": 21285,
              "r_table_time_ms": 18581.56254,
              "r_other_time_ms": 22028.44358,
              "filtered": 100,
              "r_filtered": 0.004698144,
              "attached_condition": "m.station = 'LFBO' and m.t_start between f.t - interval 1 hour and f.t and m.t_end >= f.t"
            }
          }
        } |
        

      With initial query and index, it was extremely slow. So, I proposed change index of `m` table to use primary key

      MariaDB [raymond]> alter table m drop primary key, add primary key(t_start, station);
      Query OK, 0 rows affected (0.415 sec)               
      Records: 0  Duplicates: 0  Warnings: 0
      

      +------+-------------+-------+-------+---------------+------+---------+------+--------+-------------------------------------------------+
      | id   | select_type | table | type  | possible_keys | key  | key_len | ref  | rows   | Extra                                           |
      +------+-------------+-------+-------+---------------+------+---------+------+--------+-------------------------------------------------+
      |    1 | SIMPLE      | F     | range | t             | t    | 4       | NULL | 266    | Using where; Using index                        |
      |    1 | SIMPLE      | M     | ALL   | PRIMARY       | NULL | NULL    | NULL | 249541 | Using where; Using join buffer (flat, BNL join) |
      +------+-------------+-------+-------+---------------+------+---------+------+--------+-------------------------------------------------+
      

      • analyze output
      • | {
          "query_block": {
            "select_id": 1,
            "r_loops": 1,
            "r_total_time_ms": 4726.954647,
            "table": {
              "table_name": "F",
              "access_type": "range",
              "possible_keys": ["t"],
              "key": "t",
              "key_length": "4",
              "used_key_parts": ["t"],
              "r_loops": 1,
              "rows": 266,
              "r_rows": 266,
              "r_table_time_ms": 0.802683315,
              "r_other_time_ms": 0.913815331,
              "filtered": 100,
              "r_filtered": 100,
              "attached_condition": "f.t between '2021-01-01 00:00:00.000000' and '2021-01-03 00:00:00.000000'",
              "using_index": true
            },
            "block-nl-join": {
              "table": {
                "table_name": "M",
                "access_type": "ALL",
                "possible_keys": ["PRIMARY"],
                "r_loops": 1,
                "rows": 249541,
                "r_rows": 250119,
                "r_table_time_ms": 753.0043604,
                "r_other_time_ms": 3972.225239,
                "filtered": 100,
                "r_filtered": 8.509949264,
                "attached_condition": "m.station = 'LFBO'"
              },
              "buffer_type": "flat",
              "buffer_size": "2Kb",
              "join_type": "BNL",
              "attached_condition": "m.t_start between f.t - interval 1 hour and f.t and m.t_end >= f.t",
              "r_filtered": 0.004698144
            }
          }
        } |
        

      Then, it got much faster compared to 1st one. However, customer noted that query got even faster with concat on where clause.

      +------+-------------+-------+-------+---------------+------+---------+------+--------+------------------------------------------------+
      | id   | select_type | table | type  | possible_keys | key  | key_len | ref  | rows   | Extra                                          |
      +------+-------------+-------+-------+---------------+------+---------+------+--------+------------------------------------------------+
      |    1 | SIMPLE      | F     | range | t             | t    | 4       | NULL | 266    | Using where; Using index                       |
      |    1 | SIMPLE      | M     | ALL   | PRIMARY       | NULL | NULL    | NULL | 249541 | Range checked for each record (index map: 0x1) |
      +------+-------------+-------+-------+---------------+------+---------+------+--------+------------------------------------------------+
      

      • analyze output

        | {
          "query_block": {
            "select_id": 1,
            "r_loops": 1,
            "r_total_time_ms": 67.0752722,
            "table": {
              "table_name": "F",
              "access_type": "range",
              "possible_keys": ["t"],
              "key": "t",
              "key_length": "4",
              "used_key_parts": ["t"],
              "r_loops": 1,
              "rows": 266,
              "r_rows": 266,
              "r_table_time_ms": 0.802407781,
              "r_other_time_ms": 2.921898929,
              "filtered": 100,
              "r_filtered": 100,
              "attached_condition": "f.t between '2021-01-01 00:00:00.000000' and '2021-01-03 00:00:00.000000'",
              "using_index": true
            },
            "range-checked-for-each-record": {
              "keys": ["PRIMARY"],
              "r_keys": {
                "full_scan": 0,
                "index_merge": 0,
                "range": {
                  "PRIMARY": 266
                }
              },
              "table": {
                "table_name": "M",
                "access_type": "ALL",
                "possible_keys": ["PRIMARY"],
                "r_loops": 266,
                "rows": 249541,
                "r_rows": 37.36842105,
                "r_table_time_ms": 31.15710876,
                "r_other_time_ms": 32.18475629,
                "filtered": 100,
                "r_filtered": 2.676056338
              }
            }
          }
        } |
        

      what would be the logical explanation on this dramatic query performance improvement?

      Attachments

        Activity

          People

            psergei Sergei Petrunia
            allen.lee@mariadb.com Allen Lee (Inactive)
            Votes:
            2 Vote for this issue
            Watchers:
            6 Start watching this issue

            Dates

              Created:
              Updated:

              Git Integration

                Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.