[MDEV-23331] ANALYZE FORMAT=JSON doesn't show materialization time for SJ-Materialized subqueries Created: 2020-07-29  Updated: 2021-03-19

Status: Open
Project: MariaDB Server
Component/s: Optimizer
Affects Version/s: 10.5
Fix Version/s: 10.6

Type: Bug Priority: Major
Reporter: Sergei Petrunia Assignee: Sergei Petrunia
Resolution: Unresolved Votes: 0
Labels: None


 Description   

create table ten(a int primary key);
insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
 
create table one_k(a int primary key);
insert into one_k select A.a + B.a* 10 + C.a * 100 from ten A, ten B, ten C;
create table t10 (a int, b int, c int);
insert into t10 select a,a,a from one_k;
create table t11 (a int, b int, c int);
insert into t11 select a,a,a from one_k;                                                                                     

explain select * from t10 where a in (select a from t11 where b<200);
+------+--------------+-------------+--------+---------------+--------------+---------+------+------+-------------+                 
| id   | select_type  | table       | type   | possible_keys | key          | key_len | ref  | rows | Extra       |
+------+--------------+-------------+--------+---------------+--------------+---------+------+------+-------------+   
|    1 | PRIMARY      | t10         | ALL    | NULL          | NULL         | NULL    | NULL | 1000 |             | 
|    1 | PRIMARY      | <subquery2> | eq_ref | distinct_key  | distinct_key | 4       | func | 1    |             |    
|    2 | MATERIALIZED | t11         | ALL    | NULL          | NULL         | NULL    | NULL | 1000 | Using where |
+------+--------------+-------------+--------+---------------+--------------+---------+------+------+-------------+
3 rows in set (0.01 sec)

Note that ANALYZE FORMAT=JSON output doesn't include the time to do materialization:

analyze format=json select * from t10 where a in (select a from t11 where b<200)\G
*************************** 1. row ***************************
ANALYZE: {
  "query_block": {
    "select_id": 1,
    "r_loops": 1,
    "r_total_time_ms": 101.372774,
    "table": {
      "table_name": "t10",
      "access_type": "ALL",
      "r_loops": 1,
      "rows": 1000,
      "r_rows": 1000,
      "r_table_time_ms": 23.69744705,
      "r_other_time_ms": 8.525119753,
      "filtered": 100,
      "r_filtered": 100
    },
    "table": {
      "table_name": "<subquery2>",
      "access_type": "eq_ref",
      "possible_keys": ["distinct_key"],
      "key": "distinct_key",
      "key_length": "4",
      "used_key_parts": ["a"],
      "ref": ["func"],
      "r_loops": 1000,
      "rows": 1,
      "r_rows": 0.2,
      "r_table_time_ms": 9.660541515,
      "r_other_time_ms": 8.798404243,
      "filtered": 100,
      "r_filtered": 100,
      "materialized": {
        "unique": 1,
        "query_block": {
          "select_id": 2,
          "table": {
            "table_name": "t11",
            "access_type": "ALL",
            "r_loops": 1,
            "rows": 1000,
            "r_rows": 1000,
            "r_table_time_ms": 37.91860276,
            "r_other_time_ms": 12.66568104,
            "filtered": 100,
            "r_filtered": 20,
            "attached_condition": "t11.b < 200"
          }
        }
      }
    }
  }
}

The issue only affects semi-join. Let's try a non-semijoin materialization:

explain select * from t10 where a in (select a from t11 where b<200) or b <3;
+------+--------------+-------+------+---------------+------+---------+------+------+-------------+
| id   | select_type  | table | type | possible_keys | key  | key_len | ref  | rows | Extra       |
+------+--------------+-------+------+---------------+------+---------+------+------+-------------+
|    1 | PRIMARY      | t10   | ALL  | NULL          | NULL | NULL    | NULL | 1000 | Using where |
|    2 | MATERIALIZED | t11   | ALL  | NULL          | NULL | NULL    | NULL | 1000 | Using where |
+------+--------------+-------+------+---------------+------+---------+------+------+-------------+

analyze format=json select * from t10 where a in (select a from t11 where b<200) or b <3 \G
*************************** 1. row ***************************
ANALYZE: {
  "query_block": {
    "select_id": 1,
    "r_loops": 1,
    "r_total_time_ms": 124.4985902,
    "table": {
      "table_name": "t10",
      "access_type": "ALL",
      "r_loops": 1,
      "rows": 1000,
      "r_rows": 1000,
      "r_table_time_ms": 24.17382037,
      "r_other_time_ms": 36.00676106,
      "filtered": 100,
      "r_filtered": 20,
      "attached_condition": "<in_optimizer>(t10.a,t10.a in (subquery#2)) or t10.b < 3"
    },
    "subqueries": [
      {
        "query_block": {
          "select_id": 2,
          "r_loops": 1,
          "r_total_time_ms": 64.01030056,
          "table": {
            "table_name": "t11",
            "access_type": "ALL",
            "r_loops": 1,
            "rows": 1000,
            "r_rows": 1000,
            "r_table_time_ms": 41.05231854,
            "r_other_time_ms": 22.83537212,
            "filtered": 100,
            "r_filtered": 20,
            "attached_condition": "t11.b < 200"
          }
        }
      }
    ]
  }
}

here, one can see that query_block with select_id=2 has r_total_time_ms.


Generated at Thu Feb 08 09:21:36 UTC 2024 using Jira 8.20.16#820016-sha1:9d11dbea5f4be3d4cc21f03a88dd11d8c8687422.