[MDEV-26090] where CONCAT(column) = 'xxx' is much faster than without concat condition - Jira

XML

Word

Printable

Customer shared the interesting test result and was wondering why where CONCAT(col) is faster without CONCAT. I've attached test schema and data.

CREATE TABLE F (

id INTEGER NOT NULL,

dept CHAR(4),

dest CHAR(4),

t TIMESTAMP NOT NULL,

PRIMARY KEY (id),

KEY t (t)

);

CREATE TABLE M (

station CHAR(4) NOT NULL,

t_start DATETIME NOT NULL,

t_end DATETIME DEFAULT NULL,

windspeed INTEGER unsigned DEFAULT NULL,

winddir INTEGER unsigned DEFAULT NULL,

PRIMARY KEY (station,t_start)

);

SELECT SQL_NO_CACHE F.id, F.t, M.t_start, M.t_end, M.winddir

FROM F JOIN M ON (M.t_start BETWEEN F.t - INTERVAL 1 HOUR AND F.t AND M.t_end >= F.t)

WHERE F.t BETWEEN '2021-01-01' AND '2021-01-03'

AND M.station = 'LFBO';

+------+-------------+-------+-------+---------------+---------+---------+-------+-------+--------------------------+

| id   | select_type | table | type  | possible_keys | key     | key_len | ref   | rows  | Extra                    |

+------+-------------+-------+-------+---------------+---------+---------+-------+-------+--------------------------+

|    1 | SIMPLE      | F     | range | t             | t       | 4       | NULL  | 266   | Using where; Using index |

|    1 | SIMPLE      | M     | ref   | PRIMARY       | PRIMARY | 12      | const | 42462 | Using where              |

+------+-------------+-------+-------+---------------+---------+---------+-------+-------+--------------------------+

analyze output

| {

  "query_block": {

    "select_id": 1,

    "r_loops": 1,

    "r_total_time_ms": 40612.55196,

    "table": {

      "table_name": "F",

      "access_type": "range",

      "possible_keys": ["t"],

      "key": "t",

      "key_length": "4",

      "used_key_parts": ["t"],

      "r_loops": 1,

      "rows": 266,

      "r_rows": 266,

      "r_table_time_ms": 1.012544094,

      "r_other_time_ms": 1.524286562,

      "filtered": 100,

      "r_filtered": 100,

      "attached_condition": "f.t between '2021-01-01 00:00:00.000000' and '2021-01-03 00:00:00.000000'",

      "using_index": true

},

    "table": {

      "table_name": "M",

      "access_type": "ref",

      "possible_keys": ["PRIMARY"],

      "key": "PRIMARY",

      "key_length": "12",

      "used_key_parts": ["station"],

      "ref": ["const"],

      "r_loops": 266,

      "rows": 42462,

      "r_rows": 21285,

      "r_table_time_ms": 18581.56254,

      "r_other_time_ms": 22028.44358,

      "filtered": 100,

      "r_filtered": 0.004698144,

      "attached_condition": "m.station = 'LFBO' and m.t_start between f.t - interval 1 hour and f.t and m.t_end >= f.t"

} |

With initial query and index, it was extremely slow. So, I proposed change index of `m` table to use primary key

MariaDB [raymond]> alter table m drop primary key, add primary key(t_start, station);

Query OK, 0 rows affected (0.415 sec)

Records: 0  Duplicates: 0  Warnings: 0

+------+-------------+-------+-------+---------------+------+---------+------+--------+-------------------------------------------------+

| id   | select_type | table | type  | possible_keys | key  | key_len | ref  | rows   | Extra                                           |

+------+-------------+-------+-------+---------------+------+---------+------+--------+-------------------------------------------------+

|    1 | SIMPLE      | F     | range | t             | t    | 4       | NULL | 266    | Using where; Using index                        |

|    1 | SIMPLE      | M     | ALL   | PRIMARY       | NULL | NULL    | NULL | 249541 | Using where; Using join buffer (flat, BNL join) |

+------+-------------+-------+-------+---------------+------+---------+------+--------+-------------------------------------------------+

| {

  "query_block": {

    "select_id": 1,

    "r_loops": 1,

    "r_total_time_ms": 4726.954647,

    "table": {

      "table_name": "F",

      "access_type": "range",

      "possible_keys": ["t"],

      "key": "t",

      "key_length": "4",

      "used_key_parts": ["t"],

      "r_loops": 1,

      "rows": 266,

      "r_rows": 266,

      "r_table_time_ms": 0.802683315,

      "r_other_time_ms": 0.913815331,

      "filtered": 100,

      "r_filtered": 100,

      "attached_condition": "f.t between '2021-01-01 00:00:00.000000' and '2021-01-03 00:00:00.000000'",

      "using_index": true

},

    "block-nl-join": {

      "table": {

        "table_name": "M",

        "access_type": "ALL",

        "possible_keys": ["PRIMARY"],

        "r_loops": 1,

        "rows": 249541,

        "r_rows": 250119,

        "r_table_time_ms": 753.0043604,

        "r_other_time_ms": 3972.225239,

        "filtered": 100,

        "r_filtered": 8.509949264,

        "attached_condition": "m.station = 'LFBO'"

},

      "buffer_type": "flat",

      "buffer_size": "2Kb",

      "join_type": "BNL",

      "attached_condition": "m.t_start between f.t - interval 1 hour and f.t and m.t_end >= f.t",

      "r_filtered": 0.004698144

} |

Then, it got much faster compared to 1st one. However, customer noted that query got even faster with concat on where clause.

+------+-------------+-------+-------+---------------+------+---------+------+--------+------------------------------------------------+

| id   | select_type | table | type  | possible_keys | key  | key_len | ref  | rows   | Extra                                          |

+------+-------------+-------+-------+---------------+------+---------+------+--------+------------------------------------------------+

|    1 | SIMPLE      | F     | range | t             | t    | 4       | NULL | 266    | Using where; Using index                       |

|    1 | SIMPLE      | M     | ALL   | PRIMARY       | NULL | NULL    | NULL | 249541 | Range checked for each record (index map: 0x1) |

+------+-------------+-------+-------+---------------+------+---------+------+--------+------------------------------------------------+

analyze output

| {

  "query_block": {

    "select_id": 1,

    "r_loops": 1,

    "r_total_time_ms": 67.0752722,

    "table": {

      "table_name": "F",

      "access_type": "range",

      "possible_keys": ["t"],

      "key": "t",

      "key_length": "4",

      "used_key_parts": ["t"],

      "r_loops": 1,

      "rows": 266,

      "r_rows": 266,

      "r_table_time_ms": 0.802407781,

      "r_other_time_ms": 2.921898929,

      "filtered": 100,

      "r_filtered": 100,

      "attached_condition": "f.t between '2021-01-01 00:00:00.000000' and '2021-01-03 00:00:00.000000'",

      "using_index": true

},

    "range-checked-for-each-record": {

      "keys": ["PRIMARY"],

      "r_keys": {

        "full_scan": 0,

        "index_merge": 0,

        "range": {

          "PRIMARY": 266

},

      "table": {

        "table_name": "M",

        "access_type": "ALL",

        "possible_keys": ["PRIMARY"],

        "r_loops": 266,

        "rows": 249541,

        "r_rows": 37.36842105,

        "r_table_time_ms": 31.15710876,

        "r_other_time_ms": 32.18475629,

        "filtered": 100,

        "r_filtered": 2.676056338

} |

what would be the logical explanation on this dramatic query performance improvement?

Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.