Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-27699

ANALYZE FORMAT=JSON fields are incorrect for UNION ALL queries

Details

    Description

      create table t1 (a int);
      insert into t1 values (1),(2),(3);
       
      create table t2 (a int);
      insert into t2 values (10),(11);
      

      analyze format=json 
      select * from (select a from t1 union all select a from t2) TBL;
      

      Shows something like this:

              "table": {
                "table_name": "<derived2>",
                "access_type": "ALL",
                "r_loops": 1,
                "rows": 5,
                "r_rows": 5,
                "r_table_time_ms": 0.039263025,
                "r_other_time_ms": 0.028563309,
                "filtered": 100,
                "r_filtered": 100,
                "materialized": {
                  "query_block": {
                    "union_result": {
                      "table_name": "<union2,3>",
                      "access_type": "ALL",
                      "r_loops": 0,
                      "r_rows": null,
      

      note r_loops=0, r_rows:null. This is clearly incorrect.

      Note that regular UNION (not UNION ALL) one gets correct result:

                "materialized": {
                  "query_block": {
                    "union_result": {
                      "table_name": "<union2,3>",
                      "access_type": "ALL",
                      "r_loops": 1,
                      "r_rows": 5,
      

      Attachments

        Activity

          oleg.smirnov Oleg Smirnov added a comment -

          Looks like the implementation of ANALYZE FORMAT=JSON ignores the optimization introduced in https://jira.mariadb.org/browse/MDEV-334. Needs to be discussed, probably the problem is not only about "r_rows" and "r_loops" but the whole reflection of the UNION ALL optimization in the output.

          oleg.smirnov Oleg Smirnov added a comment - Looks like the implementation of ANALYZE FORMAT=JSON ignores the optimization introduced in https://jira.mariadb.org/browse/MDEV-334 . Needs to be discussed, probably the problem is not only about "r_rows" and "r_loops" but the whole reflection of the UNION ALL optimization in the output.
          oleg.smirnov Oleg Smirnov added a comment -

          Mysql-8 output for UNION:

          | {
            "query_block": {
              "select_id": 1,
              "cost_info": {
                "query_cost": "3.06"
              },
              "table": {
                "table_name": "TBL",
                "access_type": "ALL",
                "rows_examined_per_scan": 5,
                "rows_produced_per_join": 5,
                "filtered": "100.00",
                "cost_info": {
                  "read_cost": "2.56",
                  "eval_cost": "0.50",
                  "prefix_cost": "3.06",
                  "data_read_per_join": "80"
                },
                "used_columns": [
                  "a"
                ],
                "materialized_from_subquery": {
                  "using_temporary_table": true,
                  "dependent": false,
                  "cacheable": true,
                  "query_block": {
                    "union_result": {
                      "using_temporary_table": true,
                      "table_name": "<union2,3>",
                      "access_type": "ALL",
                      "query_specifications": [
                        {
                          "dependent": false,
                          "cacheable": true,
                          "query_block": {
                            "select_id": 2,
                            "cost_info": {
                              "query_cost": "0.55"
                            },
                            "table": {
                              "table_name": "t1",
                              "access_type": "ALL",
                              "rows_examined_per_scan": 3,
                              "rows_produced_per_join": 3,
                              "filtered": "100.00",
                              "cost_info": {
                                "read_cost": "0.25",
                                "eval_cost": "0.30",
                                "prefix_cost": "0.55",
                                "data_read_per_join": "24"
                              },
                              "used_columns": [
                                "a"
                              ]
                            }
                          }
                        },
                        {
                          "dependent": false,
                          "cacheable": true,
                          "query_block": {
                            "select_id": 3,
                            "cost_info": {
                              "query_cost": "0.45"
                            },
                            "table": {
                              "table_name": "t2",
                              "access_type": "ALL",
                              "rows_examined_per_scan": 2,
                              "rows_produced_per_join": 2,
                              "filtered": "100.00",
                              "cost_info": {
                                "read_cost": "0.25",
                                "eval_cost": "0.20",
                                "prefix_cost": "0.45",
                                "data_read_per_join": "16"
                              },
                              "used_columns": [
                                "a"
                              ]
                            }
                          }
                        }
                      ]
                    }
                  }
                }
              }
            }
          } |
          

          and for UNION ALL:

            "query_block": {
              "select_id": 1,
              "cost_info": {
                "query_cost": "3.06"
              },
              "table": {
                "table_name": "TBL",
                "access_type": "ALL",
                "rows_examined_per_scan": 5,
                "rows_produced_per_join": 5,
                "filtered": "100.00",
                "cost_info": {
                  "read_cost": "2.56",
                  "eval_cost": "0.50",
                  "prefix_cost": "3.06",
                  "data_read_per_join": "80"
                },
                "used_columns": [
                  "a"
                ],
                "materialized_from_subquery": {
                  "using_temporary_table": true,
                  "dependent": false,
                  "cacheable": true,
                  "query_block": {
                    "union_result": {
                      "using_temporary_table": false,
                      "query_specifications": [
                        {
                          "dependent": false,
                          "cacheable": true,
                          "query_block": {
                            "select_id": 2,
                            "cost_info": {
                              "query_cost": "0.55"
                            },
                            "table": {
                              "table_name": "t1",
                              "access_type": "ALL",
                              "rows_examined_per_scan": 3,
                              "rows_produced_per_join": 3,
                              "filtered": "100.00",
                              "cost_info": {
                                "read_cost": "0.25",
                                "eval_cost": "0.30",
                                "prefix_cost": "0.55",
                                "data_read_per_join": "24"
                              },
                              "used_columns": [
                                "a"
                              ]
                            }
                          }
                        },
                        {
                          "dependent": false,
                          "cacheable": true,
                          "query_block": {
                            "select_id": 3,
                            "cost_info": {
                              "query_cost": "0.45"
                            },
                            "table": {
                              "table_name": "t2",
                              "access_type": "ALL",
                              "rows_examined_per_scan": 2,
                              "rows_produced_per_join": 2,
                              "filtered": "100.00",
                              "cost_info": {
                                "read_cost": "0.25",
                                "eval_cost": "0.20",
                                "prefix_cost": "0.45",
                                "data_read_per_join": "16"
                              },
                              "used_columns": [
                                "a"
                              ]
                            }
                          }
                        }
                      ]
                    }
                  }
                }
              }
            }
          } |
          

          The diff is:

          28,30c27
          <             "using_temporary_table": true,
          <             "table_name": "<union2,3>",
          <             "access_type": "ALL",
          ---
          >             "using_temporary_table": false,
          

          oleg.smirnov Oleg Smirnov added a comment - Mysql-8 output for UNION: | { "query_block": { "select_id": 1, "cost_info": { "query_cost": "3.06" }, "table": { "table_name": "TBL", "access_type": "ALL", "rows_examined_per_scan": 5, "rows_produced_per_join": 5, "filtered": "100.00", "cost_info": { "read_cost": "2.56", "eval_cost": "0.50", "prefix_cost": "3.06", "data_read_per_join": "80" }, "used_columns": [ "a" ], "materialized_from_subquery": { "using_temporary_table": true, "dependent": false, "cacheable": true, "query_block": { "union_result": { "using_temporary_table": true, "table_name": "<union2,3>", "access_type": "ALL", "query_specifications": [ { "dependent": false, "cacheable": true, "query_block": { "select_id": 2, "cost_info": { "query_cost": "0.55" }, "table": { "table_name": "t1", "access_type": "ALL", "rows_examined_per_scan": 3, "rows_produced_per_join": 3, "filtered": "100.00", "cost_info": { "read_cost": "0.25", "eval_cost": "0.30", "prefix_cost": "0.55", "data_read_per_join": "24" }, "used_columns": [ "a" ] } } }, { "dependent": false, "cacheable": true, "query_block": { "select_id": 3, "cost_info": { "query_cost": "0.45" }, "table": { "table_name": "t2", "access_type": "ALL", "rows_examined_per_scan": 2, "rows_produced_per_join": 2, "filtered": "100.00", "cost_info": { "read_cost": "0.25", "eval_cost": "0.20", "prefix_cost": "0.45", "data_read_per_join": "16" }, "used_columns": [ "a" ] } } } ] } } } } } } | and for UNION ALL: "query_block": { "select_id": 1, "cost_info": { "query_cost": "3.06" }, "table": { "table_name": "TBL", "access_type": "ALL", "rows_examined_per_scan": 5, "rows_produced_per_join": 5, "filtered": "100.00", "cost_info": { "read_cost": "2.56", "eval_cost": "0.50", "prefix_cost": "3.06", "data_read_per_join": "80" }, "used_columns": [ "a" ], "materialized_from_subquery": { "using_temporary_table": true, "dependent": false, "cacheable": true, "query_block": { "union_result": { "using_temporary_table": false, "query_specifications": [ { "dependent": false, "cacheable": true, "query_block": { "select_id": 2, "cost_info": { "query_cost": "0.55" }, "table": { "table_name": "t1", "access_type": "ALL", "rows_examined_per_scan": 3, "rows_produced_per_join": 3, "filtered": "100.00", "cost_info": { "read_cost": "0.25", "eval_cost": "0.30", "prefix_cost": "0.55", "data_read_per_join": "24" }, "used_columns": [ "a" ] } } }, { "dependent": false, "cacheable": true, "query_block": { "select_id": 3, "cost_info": { "query_cost": "0.45" }, "table": { "table_name": "t2", "access_type": "ALL", "rows_examined_per_scan": 2, "rows_produced_per_join": 2, "filtered": "100.00", "cost_info": { "read_cost": "0.25", "eval_cost": "0.20", "prefix_cost": "0.45", "data_read_per_join": "16" }, "used_columns": [ "a" ] } } } ] } } } } } } | The diff is: 28,30c27 < "using_temporary_table" : true , < "table_name" : "<union2,3>" , < "access_type" : "ALL" , --- > "using_temporary_table" : false ,
          oleg.smirnov Oleg Smirnov added a comment -

          Regarding the second question "In the code that prints tabular explain, locate the position where it either prints or doesn't print the line that is the difference between UNION and UNION ALL":

          sql_explain.cc int Explain_union::print_explain()

            <...>
            if (!using_tmp)
              return 0;
           
            /* Print a line with "UNIT RESULT" */
            List<Item> item_list;
            Item *item_null= new (mem_root) Item_null(thd);
          

          There's no such check in int Explain_union::print_explain_json() though.

          sql_explain.cc int Explain_union::print_explain_json()

            <...>
              make_union_table_name(table_name_buffer);
              writer->add_member("table_name").add_str(table_name_buffer);
              writer->add_member("access_type").add_str("ALL"); // not very useful
          

          I suggest changing it in the following way:

          sql_explain.cc int Explain_union::print_explain_json()

            <...>
              if (using_tmp)
              {
                 make_union_table_name(table_name_buffer);
                 writer->add_member("table_name").add_str(table_name_buffer);
                 writer->add_member("access_type").add_str("ALL"); // not very useful
              }
          

          I would also consider adding "using_temporary_table" field to explicitly display whether the temp table was created and to be more compatible with MySQL.

          oleg.smirnov Oleg Smirnov added a comment - Regarding the second question "In the code that prints tabular explain, locate the position where it either prints or doesn't print the line that is the difference between UNION and UNION ALL": sql_explain.cc int Explain_union::print_explain() <...> if (!using_tmp) return 0;   /* Print a line with "UNIT RESULT" */ List<Item> item_list; Item *item_null= new (mem_root) Item_null(thd); There's no such check in int Explain_union::print_explain_json() though. sql_explain.cc int Explain_union::print_explain_json() <...> make_union_table_name(table_name_buffer); writer->add_member("table_name").add_str(table_name_buffer); writer->add_member("access_type").add_str("ALL"); // not very useful I suggest changing it in the following way: sql_explain.cc int Explain_union::print_explain_json() <...> if (using_tmp) { make_union_table_name(table_name_buffer); writer->add_member("table_name").add_str(table_name_buffer); writer->add_member("access_type").add_str("ALL"); // not very useful } I would also consider adding "using_temporary_table" field to explicitly display whether the temp table was created and to be more compatible with MySQL.
          oleg.smirnov Oleg Smirnov added a comment - - edited

          After closer examination I suggest to go even further and put "r_rows" and "r_loops" members under the same condition:

          int Explain_union::print_explain_json()

            if (using_tmp)
            {
              make_union_table_name(table_name_buffer);
              writer->add_member("table_name").add_str(table_name_buffer);
              writer->add_member("access_type").add_str("ALL"); // not very useful
           
              /* r_loops (not present in tabular output) */
              if (is_analyze)
              {
                writer->add_member("r_loops").add_ll(
                    fake_select_lex_tracker.get_loops());
              }
           
              /* `r_rows` */
              if (is_analyze)
              {
                writer->add_member("r_rows");
                if (fake_select_lex_tracker.has_scans())
                  writer->add_double(fake_select_lex_tracker.get_avg_rows());
                else
                  writer->add_null();
              }
            }
          

          So "r_rows" and "r_loops" will not be printed in case when the temporary table is not created for union.

          oleg.smirnov Oleg Smirnov added a comment - - edited After closer examination I suggest to go even further and put "r_rows" and "r_loops" members under the same condition: int Explain_union::print_explain_json() if (using_tmp) { make_union_table_name(table_name_buffer); writer->add_member("table_name").add_str(table_name_buffer); writer->add_member("access_type").add_str("ALL"); // not very useful   /* r_loops (not present in tabular output) */ if (is_analyze) { writer->add_member("r_loops").add_ll( fake_select_lex_tracker.get_loops()); }   /* `r_rows` */ if (is_analyze) { writer->add_member("r_rows"); if (fake_select_lex_tracker.has_scans()) writer->add_double(fake_select_lex_tracker.get_avg_rows()); else writer->add_null(); } } So "r_rows" and "r_loops" will not be printed in case when the temporary table is not created for union.

          oleg.smirnov, ok please proceed to create a patch following the above suggestions.

          I think it's a good idea to print (or not print) "using_temporary_table": true inside the "union_result" node.

          I see that MySQL has two "using_temporary_table":

                "materialized_from_subquery": {
                  "using_temporary_table": true,
          ...
                    "union_result": {
                      "using_temporary_table": true,
          

          As far as I understand, one will always have using_temporary_table inside materialized_from_subquery...
          Perhaps they've added that member only so that the user is able to see which parts of query use temp. tables...
          I think the goal of "make EXPLAIN FORMAT=JSON indicate which parts of query plan create temp. tables" has some merit but it's outside of the scope of this MDEV.

          psergei Sergei Petrunia added a comment - oleg.smirnov , ok please proceed to create a patch following the above suggestions. I think it's a good idea to print (or not print) "using_temporary_table": true inside the "union_result" node. I see that MySQL has two "using_temporary_table" : "materialized_from_subquery": { "using_temporary_table": true, ... "union_result": { "using_temporary_table": true, As far as I understand, one will always have using_temporary_table inside materialized_from_subquery ... Perhaps they've added that member only so that the user is able to see which parts of query use temp. tables... I think the goal of "make EXPLAIN FORMAT=JSON indicate which parts of query plan create temp. tables" has some merit but it's outside of the scope of this MDEV.

          A question to consider: what should fixVersion be? ANALYZE FORMAT=JSON was introduced in 10.3 ...

          psergei Sergei Petrunia added a comment - A question to consider: what should fixVersion be? ANALYZE FORMAT=JSON was introduced in 10.3 ...

          Ok to push into 10.3.

          psergei Sergei Petrunia added a comment - Ok to push into 10.3.
          oleg.smirnov Oleg Smirnov added a comment - Pushed to 10.3: https://github.com/MariaDB/server/commit/7498978e6af867c3351f8a1392a0de66d2dfae24

          People

            oleg.smirnov Oleg Smirnov
            psergei Sergei Petrunia
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved:

              Git Integration

                Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.