Uploaded image for project: 'MariaDB ColumnStore'
  1. MariaDB ColumnStore
  2. MCOL-4719

ColumnStore ignores where clause: inline view with a window function

Details

    • Bug
    • Status: Closed (View Workflow)
    • Blocker
    • Resolution: Fixed
    • 5.5.1
    • 6.2.1, 6.2.2
    • None
    • None
    • 10.5.9-6-MariaDB-enterprise-log
      Columnstore 5.5
    • 2021-9, 2021-10

    Description

      When columnstore_select_handler=on this query should return only 2 rows. Instead it returns all rows.

      create database test1;
      use test1;
       
      CREATE TABLE test_table (
      category CHAR(1),
      count INTEGER(1)
      ) ENGINE=COLUMNSTORE;
       
      INSERT INTO test_table (category, count) VALUES ('A', 1);
      INSERT INTO test_table (category, count) VALUES ('A', 2);
      INSERT INTO test_table (category, count) VALUES ('B', 3);
      INSERT INTO test_table (category, count) VALUES ('B', 4);
       
      set columnstore_select_handler=off;
      select * FROM (
      SELECT count / SUM(count) OVER (PARTITION BY category) AS ratio
      FROM test_table
      ) a
      where ratio > .5;
      ratio
      0.6667
      0.5714
       
      set columnstore_select_handler=on;
      select * FROM (
      SELECT count / SUM(count) OVER (PARTITION BY category) AS ratio
      FROM test_table
      ) a
      where ratio > .5;
      ratio
      0.6667
      0.3333
      0.5714
      0.4286
      

      Attachments

        Activity

          edward Edward Stoever created issue -
          edward Edward Stoever made changes -
          Field Original Value New Value
          Description {{monospaced text}}
          create database test1;
          use test1;

          CREATE TABLE test_table (
          category CHAR(1),
          count INTEGER(1)
          ) ENGINE=COLUMNSTORE;

          INSERT INTO test_table (category, count) VALUES ('A', 1);
          INSERT INTO test_table (category, count) VALUES ('A', 2);
          INSERT INTO test_table (category, count) VALUES ('B', 3);
          INSERT INTO test_table (category, count) VALUES ('B', 4);

          set columnstore_select_handler=off;
          select * FROM (
          SELECT count / SUM(count) OVER (PARTITION BY category) AS ratio
          FROM test_table
          ) a
          where ratio > .5;
          ratio
          0.6667
          0.5714

          set columnstore_select_handler=on;
          select * FROM (
          SELECT count / SUM(count) OVER (PARTITION BY category) AS ratio
          FROM test_table
          ) a
          where ratio > .5;
          ratio
          0.6667
          0.3333
          0.5714
          0.4286
          {{create database test1;
          use test1;

          CREATE TABLE test_table (
          category CHAR(1),
          count INTEGER(1)
          ) ENGINE=COLUMNSTORE;

          INSERT INTO test_table (category, count) VALUES ('A', 1);
          INSERT INTO test_table (category, count) VALUES ('A', 2);
          INSERT INTO test_table (category, count) VALUES ('B', 3);
          INSERT INTO test_table (category, count) VALUES ('B', 4);

          set columnstore_select_handler=off;
          select * FROM (
          SELECT count / SUM(count) OVER (PARTITION BY category) AS ratio
          FROM test_table
          ) a
          where ratio > .5;
          ratio
          0.6667
          0.5714

          set columnstore_select_handler=on;
          select * FROM (
          SELECT count / SUM(count) OVER (PARTITION BY category) AS ratio
          FROM test_table
          ) a
          where ratio > .5;
          ratio
          0.6667
          0.3333
          0.5714
          0.4286}}
          edward Edward Stoever made changes -
          Description {{create database test1;
          use test1;

          CREATE TABLE test_table (
          category CHAR(1),
          count INTEGER(1)
          ) ENGINE=COLUMNSTORE;

          INSERT INTO test_table (category, count) VALUES ('A', 1);
          INSERT INTO test_table (category, count) VALUES ('A', 2);
          INSERT INTO test_table (category, count) VALUES ('B', 3);
          INSERT INTO test_table (category, count) VALUES ('B', 4);

          set columnstore_select_handler=off;
          select * FROM (
          SELECT count / SUM(count) OVER (PARTITION BY category) AS ratio
          FROM test_table
          ) a
          where ratio > .5;
          ratio
          0.6667
          0.5714

          set columnstore_select_handler=on;
          select * FROM (
          SELECT count / SUM(count) OVER (PARTITION BY category) AS ratio
          FROM test_table
          ) a
          where ratio > .5;
          ratio
          0.6667
          0.3333
          0.5714
          0.4286}}
          create database test1;
          use test1;

          CREATE TABLE test_table (
          category CHAR(1),
          count INTEGER(1)
          ) ENGINE=COLUMNSTORE;

          INSERT INTO test_table (category, count) VALUES ('A', 1);
          INSERT INTO test_table (category, count) VALUES ('A', 2);
          INSERT INTO test_table (category, count) VALUES ('B', 3);
          INSERT INTO test_table (category, count) VALUES ('B', 4);

          set columnstore_select_handler=off;
          select * FROM (
          SELECT count / SUM(count) OVER (PARTITION BY category) AS ratio
          FROM test_table
          ) a
          where ratio > .5;
          ratio
          0.6667
          0.5714

          set columnstore_select_handler=on;
          select * FROM (
          SELECT count / SUM(count) OVER (PARTITION BY category) AS ratio
          FROM test_table
          ) a
          where ratio > .5;
          ratio
          0.6667
          0.3333
          0.5714
          0.4286
          edward Edward Stoever made changes -
          Description create database test1;
          use test1;

          CREATE TABLE test_table (
          category CHAR(1),
          count INTEGER(1)
          ) ENGINE=COLUMNSTORE;

          INSERT INTO test_table (category, count) VALUES ('A', 1);
          INSERT INTO test_table (category, count) VALUES ('A', 2);
          INSERT INTO test_table (category, count) VALUES ('B', 3);
          INSERT INTO test_table (category, count) VALUES ('B', 4);

          set columnstore_select_handler=off;
          select * FROM (
          SELECT count / SUM(count) OVER (PARTITION BY category) AS ratio
          FROM test_table
          ) a
          where ratio > .5;
          ratio
          0.6667
          0.5714

          set columnstore_select_handler=on;
          select * FROM (
          SELECT count / SUM(count) OVER (PARTITION BY category) AS ratio
          FROM test_table
          ) a
          where ratio > .5;
          ratio
          0.6667
          0.3333
          0.5714
          0.4286
          When columnstore_select_handler=on this query should return only 2 rows. Instead it returns all rows.
          {noformat}
          create database test1;
          use test1;

          CREATE TABLE test_table (
          category CHAR(1),
          count INTEGER(1)
          ) ENGINE=COLUMNSTORE;

          INSERT INTO test_table (category, count) VALUES ('A', 1);
          INSERT INTO test_table (category, count) VALUES ('A', 2);
          INSERT INTO test_table (category, count) VALUES ('B', 3);
          INSERT INTO test_table (category, count) VALUES ('B', 4);

          set columnstore_select_handler=off;
          select * FROM (
          SELECT count / SUM(count) OVER (PARTITION BY category) AS ratio
          FROM test_table
          ) a
          where ratio > .5;
          ratio
          0.6667
          0.5714

          set columnstore_select_handler=on;
          select * FROM (
          SELECT count / SUM(count) OVER (PARTITION BY category) AS ratio
          FROM test_table
          ) a
          where ratio > .5;
          ratio
          0.6667
          0.3333
          0.5714
          0.4286
          {noformat}
          toddstoffel Todd Stoffel (Inactive) made changes -
          Rank Ranked higher
          gdorman Gregory Dorman (Inactive) made changes -
          Priority Major [ 3 ] Blocker [ 1 ]
          gdorman Gregory Dorman (Inactive) made changes -
          Fix Version/s 6.1.1 [ 25600 ]
          gdorman Gregory Dorman (Inactive) made changes -
          Assignee David Hall [ david.hall ]
          toddstoffel Todd Stoffel (Inactive) made changes -
          Rank Ranked higher
          gdorman Gregory Dorman (Inactive) made changes -
          Fix Version/s 6.5.1 [ 25801 ]
          Fix Version/s 6.1.1 [ 25600 ]
          David.Hall David Hall (Inactive) added a comment - - edited

          Another, perhaps more palatable, workaround:
          select * FROM ( SELECT (count / SUM(count) OVER (PARTITION BY category)) AS ratio FROM test_table ) a having ratio > .5;
          --------

          ratio

          --------

          0.6667
          0.5714

          --------

          David.Hall David Hall (Inactive) added a comment - - edited Another, perhaps more palatable, workaround: select * FROM ( SELECT (count / SUM(count) OVER (PARTITION BY category)) AS ratio FROM test_table ) a having ratio > .5; -------- ratio -------- 0.6667 0.5714 --------

          I have run this test on 1.1, 1.2, and 1.4. All showed the same deviant behavior.

          David.Hall David Hall (Inactive) added a comment - I have run this test on 1.1, 1.2, and 1.4. All showed the same deviant behavior.

          Analysis:
          Window Functions and aggregates aren't complete until well after filtering is done. This is why you can't use a window function in a WHERE clause. If you try, it will generate an error:

          SELECT SUM(count) OVER (PARTITION BY category) AS x FROM test_table where SUM(count) OVER (PARTITION BY category) < 6;
          ERROR 4015 (HY000): Window function is allowed only in SELECT list and ORDER BY clause
          

          Optimization attempts to push filters from the outer select to the inner select. This isn't done if the filter is on a result of an aggregation or window function. When the inner select doesn't have the operator, it notices that the filter is on the window function and doesn't optimize.

          But when the inner function has an operator that includes the window function, the code only sees the operator and attempts to optimize, causing an illegal filter on the result of a window function which appears to turn into a noop in the lower code.

          The fix is to have the code delve into the operator (or function or whatever) to see what windows or aggregates might lurk inside and suppress the optimization accordingly.

          David.Hall David Hall (Inactive) added a comment - Analysis: Window Functions and aggregates aren't complete until well after filtering is done. This is why you can't use a window function in a WHERE clause. If you try, it will generate an error: SELECT SUM(count) OVER (PARTITION BY category) AS x FROM test_table where SUM(count) OVER (PARTITION BY category) < 6 ; ERROR 4015 (HY000): Window function is allowed only in SELECT list and ORDER BY clause Optimization attempts to push filters from the outer select to the inner select. This isn't done if the filter is on a result of an aggregation or window function. When the inner select doesn't have the operator, it notices that the filter is on the window function and doesn't optimize. But when the inner function has an operator that includes the window function, the code only sees the operator and attempts to optimize, causing an illegal filter on the result of a window function which appears to turn into a noop in the lower code. The fix is to have the code delve into the operator (or function or whatever) to see what windows or aggregates might lurk inside and suppress the optimization accordingly.
          David.Hall David Hall (Inactive) made changes -
          Fix Version/s 6.2.1 [ 26035 ]
          Fix Version/s 6.3.1 [ 25801 ]
          David.Hall David Hall (Inactive) made changes -
          Sprint 2021-9 [ 541 ]

          QA
          I added the following two queries to mtr using the above defined table:

          # a subselect with an arithmetic operator on a window function
          select * FROM (SELECT count / SUM(count) OVER (PARTITION BY category) AS ratio FROM test_table) a where ratio > .5;
          +--------+
          | ratio  |
          +--------+
          | 0.6667 |
          | 0.5714 |
          +--------+
          
          

          # a subselect with a function containing an arithmetic operator on a window function
          select * FROM (SELECT round(0+SUM(count) OVER (PARTITION BY category),3) AS x FROM test_table ) a where x>3;
          +-------+
          | x     |
          +-------+
          | 7.000 |
          | 7.000 |
          +-------+
          
          

          You should be able to create a subquery with any number of nested functions and operators with internal aggregates and window functions.

          David.Hall David Hall (Inactive) added a comment - QA I added the following two queries to mtr using the above defined table: # a subselect with an arithmetic operator on a window function select * FROM (SELECT count / SUM(count) OVER (PARTITION BY category) AS ratio FROM test_table) a where ratio > . 5 ; +--------+ | ratio | +--------+ | 0.6667 | | 0.5714 | +--------+ # a subselect with a function containing an arithmetic operator on a window function select * FROM (SELECT round( 0 +SUM(count) OVER (PARTITION BY category), 3 ) AS x FROM test_table ) a where x> 3 ; +-------+ | x | +-------+ | 7.000 | | 7.000 | +-------+ You should be able to create a subquery with any number of nested functions and operators with internal aggregates and window functions.
          David.Hall David Hall (Inactive) made changes -
          Assignee David Hall [ david.hall ] Gagan Goel [ tntnatbry ]
          Status Open [ 1 ] In Review [ 10002 ]
          David.Hall David Hall (Inactive) made changes -
          Sprint 2021-9 [ 541 ] 2021-9, 2021-10 [ 541, 549 ]
          tntnatbry Gagan Goel (Inactive) made changes -
          Status In Review [ 10002 ] In Testing [ 10301 ]
          tntnatbry Gagan Goel (Inactive) made changes -
          Assignee Gagan Goel [ tntnatbry ] Daniel Lee [ dleeyh ]

          Build verified: 6.2.1-1 ( #2921)

          dleeyh Daniel Lee (Inactive) added a comment - Build verified: 6.2.1-1 ( #2921)
          dleeyh Daniel Lee (Inactive) made changes -
          Resolution Fixed [ 1 ]
          Status In Testing [ 10301 ] Closed [ 6 ]
          toddstoffel Todd Stoffel (Inactive) made changes -
          Rank Ranked higher
          David.Hall David Hall (Inactive) made changes -
          Fix Version/s 6.2.2 [ 27000 ]
          Richard Richard Stracke made changes -
          Assignee Daniel Lee [ dleeyh ] Richard Stracke [ richard ]
          Richard Richard Stracke made changes -
          Assignee Richard Stracke [ richard ] Daniel Lee [ dleeyh ]
          mariadb-jira-automation Jira Automation (IT) made changes -
          Zendesk Related Tickets 112592

          People

            dleeyh Daniel Lee (Inactive)
            edward Edward Stoever
            Votes:
            1 Vote for this issue
            Watchers:
            5 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved:

              Git Integration

                Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.