Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-26337

subquery with groupby and ROLLUP returns incorrect results on LEFT JOIN on INDEXED values

    XMLWordPrintable

Details

    Description

      Wrong result, duplicate group by values and wrong aggregation "row"-data (row 1,2 and 5,6):

      CREATE OR REPLACE TABLE test_kpi_date (
        the_date date NOT NULL PRIMARY KEY
      );
      

      the_date	    org_type	org_id	dept_id	cco_stk_ttl
      2021-08-12	dis	10	ADVB	1                            <- ?????
      2021-08-12	dis	10	ADVB	4096                         <- ?????
      2021-08-12	dis	11	ADVB	2
      2021-08-12	dis	12	ADVB	<null>
      2021-08-12	reg	1	ADVB	1                           <- ????? 
      2021-08-12	reg	1	ADVB	4098                        <- ?????
      2021-08-12	dis	21	ADVB	96
      2021-08-12	dis	22	ADVB	256
      2021-08-12	dis	23	ADVB	512
      2021-08-12	reg	2	ADVB	864
      2021-08-12	nsc	MMD	ADVB	4963
      

      generated with:

      CREATE OR REPLACE TABLE test_kpi_date (
        "id" int(11) NOT NULL AUTO_INCREMENT PRIMARY KEY,
        the_date date NOT NULL
      );
      

      result looks good:

      the_date	    org_type	org_id	dept_id	cco_stk_ttl
      2021-08-12	dis	10	ADVB	4097
      2021-08-12	dis	11	ADVB	2
      2021-08-12	dis	12	ADVB	<null>
      2021-08-12	reg	1	ADVB	4099
      2021-08-12	dis	21	ADVB	96
      2021-08-12	dis	22	ADVB	256
      2021-08-12	dis	23	ADVB	512
      2021-08-12	reg	2	ADVB	864
      2021-08-12	nsc	MMD	ADVB	4963
      

      It looks like MDEV-25714.

      Test Data:

       SET sql_mode = 'REAL_AS_FLOAT,PIPES_AS_CONCAT,ANSI_QUOTES,IGNORE_SPACE,ANSI,ONLY_FULL_GROUP_BY'
       
      CREATE OR REPLACE TABLE test_kpi_date (
        the_date date NOT NULL
      , PRIMARY KEY ("the_date")
      );
       
      SELECT *
      FROM test_kpi_date tkd 
       
      INSERT INTO test_kpi_date VALUES (current_date), (current_date-1), (current_date-2), (current_date-3);
       
      CREATE OR REPLACE TABLE test_org_district (
        "id" int(11) NOT NULL AUTO_INCREMENT,
        "nsc_id" char(5) NOT NULL,
        "dept_id" char(4) NOT NULL,
        "district_id" char(3) NOT NULL,
        "region_id" char(2) NOT NULL,
        PRIMARY KEY ("id"),
        UNIQUE KEY "dept_district" ("dept_id","district_id"),
        KEY "region_id" ("dept_id","region_id")
      );
       
      INSERT INTO test_org_district (nsc_id,dept_id,district_id,region_id) VALUES
         ('MMD','ADVB','10','1'),
         ('MMD','ADVB','11','1'),
         ('MMD','ADVB','21','2'),
         ('MMD','ADVB','22','2');
       
      CREATE OR REPLACE TABLE "test_org_partner" (
        "dept_id" char(4) CHARACTER SET utf8 NOT NULL,
        "ptn_id" char(5) CHARACTER SET utf8,
        "district_id" char(3) CHARACTER SET utf8 NOT NULL DEFAULT '0',
        "nsc_id" char(5) CHARACTER SET utf8 NOT NULL,
        PRIMARY KEY ("ptn_id","dept_id"),
        KEY "dept_ptn" ("dept_id","ptn_id"),
        KEY "dept_district_ptn" ("dept_id","district_id","ptn_id")
      );
       
      INSERT INTO test_org_partner (dept_id,ptn_id,district_id,nsc_id) VALUES
         ('ADVB','10001','10','MMD'),
         ('ADVB','10002','10','MMD'),
         ('ADVB','10003','10','MMD'),
         ('ADVB','11001','11','MMD'),
         ('ADVB','11002','11','MMD'),
         ('ADVB','10740','21','MMD'),
         ('ADVB','10741','21','MMD'),
         ('ADVB','10429','22','MMD'),
         ('ADVB','10771','23','MMD');
       
       CREATE OR REPLACE TABLE "test_kpi_measure" (
        "the_date" date DEFAULT NULL,
        "ptn_id" char(5) CHARACTER SET utf8 NOT NULL DEFAULT '',
        "cco_stk_ttl" int(11) DEFAULT NULL,
        PRIMARY KEY ("the_date","ptn_id"),
        KEY "ptn_id_idx" ("ptn_id")
      );
       
      INSERT INTO test_kpi_measure (the_date,ptn_id,cco_stk_ttl) VALUES
         (current_date-1,'10001',4096),
         (current_date-1,'10002',1),
         (current_date-2,'10002',null),
         (current_date-1,'11001',2),
         (current_date-2,'11001',4),
         (current_date-1,'11003',1024),
         (current_date-2,'11003',2048),
         (current_date-1,'10580',8),
         (current_date-1,'10499',16),
         (current_date-1,'10740',32),
         (current_date-1,'10741',64),
         (current_date-2,'10741',128),
         (current_date-1,'10429',256),
         (current_date-1,'10771',512);
      

      SQL with wrong result:

       SELECT org.the_date
                  , org.org_type
                  , org.org_id
                  , org.dept_id
                  , msr.`cco_stk_ttl`
              FROM (
                SELECT cal.the_date
                     , org.dept_id
                     , case when org.region_id   <=> null then 'nsc'
                            when org.district_id <=> null then 'reg'
                            else 'dis'
                       end as org_type
                     , coalesce(org.district_id, org.region_id, 'MMD') as org_id
                     , org.district_id
                     , org.region_id
                FROM test_kpi_date cal
                CROSS JOIN test_org_district org
                WHERE org.nsc_id = 'MMD' and org.dept_id IN ('ADVB')
                AND cal.the_date = CURRENT_DATE() - INTERVAL 1 DAY
                GROUP BY cal.the_date, org.dept_id, org.region_id, org.district_id WITH ROLLUP
                HAVING NOT (cal.the_date IS NULL OR org.dept_id IS NULL)
              ) org
              LEFT JOIN (
                SELECT  sub.the_date
                      , dis.dept_id
                      , dis.region_id
                      , dis.district_id
                      , SUM(sub.cco_stk_ttl) as cco_stk_ttl
                FROM test_kpi_measure sub
                JOIN test_org_partner org  ON org.ptn_id = sub.ptn_id
                JOIN test_org_district dis ON dis.dept_id = org.dept_id AND dis.district_id = org.district_id
                WHERE dis.nsc_id = 'MMD' and dis.dept_id IN ('ADVB')
      --          AND sub.the_date = CURRENT_DATE() - INTERVAL 1 DAY
                GROUP BY sub.the_date, dis.dept_id, dis.region_id, dis.district_id WITH ROLLUP
              ) msr
              ON    msr.the_date     =  org.the_date
                and msr.dept_id      <=> org.dept_id
                and msr.region_id   <=> org.region_id
                and msr.district_id <=> org.district_id
      

      the result is correct if there is no index on the column the_date:

      CREATE OR REPLACE TABLE test_kpi_date (
        "id" int(11) NOT NULL AUTO_INCREMENT PRIMARY KEY,
        the_date date NOT NULL
      -- , PRIMARY KEY ("the_date")
      -- , UNIQUE (the_date)
      );
      

      You can see, we try to use different definitions with the same behavior (wrong) when the_date get an index (PRIMARY/UNIQUE).
      Without UNIQUE/PRIMARY Index on the_date it works fine.

      Attachments

        Issue Links

          Activity

            People

              psergei Sergei Petrunia
              fglueck Frank Glück
              Votes:
              3 Vote for this issue
              Watchers:
              8 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Git Integration

                  Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.