[MDEV-31115] Covering index not compute order by limit with EITS while index dive could tell if limit can be evaluated Created: 2023-04-23  Updated: 2023-04-24

Status: Open
Project: MariaDB Server
Component/s: Optimizer
Affects Version/s: 10.9.5
Fix Version/s: None

Type: Bug Priority: Minor
Reporter: VAROQUI Stephane Assignee: Unassigned
Resolution: Unresolved Votes: 0
Labels: None


 Description   

CREATE TABLE `data_value_svar` (
  `entity_uri` varchar(255) NOT NULL,
  `predicate_uri` enum('address','associationId','brandLabel','cedexCode','cityLabel','closedCompanies','closingDate','closureDate','companies','companiesPercent','companySizeType','corporateLabel','corporateOfficer','countryLabel','definiteArticle','email','establishments','financialReportLastYear','freelanceEstablishmentType','hasCompanies','hasEstablishments','initials','inseeCode','isSeasonalBusiness','label','lastmodDate','latitude','legalForm','legalFormCode','longitude','nafClassificationRanking','nafClassificationRankingsEndDate','nafClassificationRankingsStartDate','nafCode','nearestNeighborCode','nearestNeighborDepartmentCode','nearestNeighborTownCode','netProfit','nicCode','partitiveArticle','partOfClerkOffice','partOfCompany','partOfDepartment','partOfNafClassification','partOfTown','partOfTownLabel','phone','population','postalCode','preposition','registeredCompanies','registrationDate','returnOnSales','shortLabel','sirenCode','siretCode','slug','storeTypeBySurface','tradeLabel','trueURI','turnover','turnoverRank','vatCode','workforce') NOT NULL,
  `value` varchar(255) DEFAULT NULL,
  `date` date NOT NULL,
  `category` enum('bodacc','datainfogreffe','demo','eco','generic','geo','h-generic','h-postprocess','rdfs','sirene') NOT NULL,
  `value_crc` int(10) unsigned GENERATED ALWAYS AS (crc32(`value`)) VIRTUAL,
  `entity_crc` int(10) unsigned GENERATED ALWAYS AS (crc32(`entity_uri`)) VIRTUAL,
  `uri_type` bigint(20) unsigned GENERATED ALWAYS AS (conv(substr(cast(sha(substring_index(`entity_uri`,'-',1)) as char charset utf8mb3),1,16),16,10)) VIRTUAL,
  `uri_id` bigint(20) unsigned GENERATED ALWAYS AS (substring_index(`entity_uri`,'-',-1)) VIRTUAL,
  `numeric_value` decimal(24,5) GENERATED ALWAYS AS (if(`value` regexp '^[\\-\\+]?\\d+(\\.\\d+)?(e[\\-\\+]?\\d+)?$',cast(`value` as decimal(24,5)),NULL)) VIRTUAL,
  `type_uri_crc` int(10) unsigned DEFAULT NULL,
  `status` tinyint(4) DEFAULT NULL,
  UNIQUE KEY `idx_uni` (`predicate_uri`,`uri_type`,`uri_id`,`date`),
  KEY `category` (`category`),
  KEY `pve` (`predicate_uri`,`value_crc`,`entity_crc`),
  KEY `pne` (`predicate_uri`,`numeric_value`,`entity_crc`),
  KEY `pev` (`predicate_uri`,`entity_crc`,`numeric_value`),
  KEY `dpen` (`date`,`predicate_uri`,`entity_crc`,`numeric_value`),
  KEY `stpdn` (`status`,`type_uri_crc`,`predicate_uri`,`date`,`numeric_value`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci ROW_FORMAT=DYNAMIC `PAGE_COMPRESSED`=1

We notice different execution plan when only on a constante change and this should not happen Company vs company

explain SELECT       d1.entity_uri,      d1.value ,     d1.numeric_value,    d1.entity_crc  FROM      data_value_svar  d1   WHERE    d1.predicate_uri = 'netProfit' AND     d1.date
 = '2017-01-01'  AND    d1.status = 1 AND    d1.type_uri_crc = CRC32('company') ORDER BY d1.numeric_value DESC;
+------+-------------+-------+------+--------------------------------+-------+---------+-------------------------+------+-------------+
| id   | select_type | table | type | possible_keys                  | key   | key_len | ref                     | rows | Extra       |
+------+-------------+-------+------+--------------------------------+-------+---------+-------------------------+------+-------------+
|    1 | SIMPLE      | d1    | ref  | idx_uni,pve,pne,pev,dpen,stpdn | stpdn | 11      | const,const,const,const | 1    | Using where |
+------+-------------+-------+------+--------------------------------+-------+---------+-------------------------+------+-------------+
1 row in set (0.001 sec)

MariaDB [directory_company]>  SELECT       d1.entity_uri,      d1.value ,     d1.numeric_value,    d1.entity_crc  FROM      data_value_svar  d1   WHERE    d1.predicate_uri = 'netProfit' AND     d1.date= '2017-01-01'  AND    d1.status = 1 AND    d1.type_uri_crc = CRC32('company') ORDER BY d1.numeric_value DESC;
Empty set (0.001 sec)

MariaDB [directory_company]> explain SELECT       d1.entity_uri,      d1.value ,     d1.numeric_value,    d1.entity_crc  FROM      data_value_svar  d1   WHERE    d1.predicate_uri = 'netProfit' AND     d1.date = '2017-01-01'  AND    d1.status = 1 AND    d1.type_uri_crc = CRC32('Company') ORDER BY d1.numeric_value DESC;
+------+-------------+-------+------+--------------------------------+------+---------+-------------+--------+-----------------------------+
| id   | select_type | table | type | possible_keys                  | key  | key_len | ref         | rows   | Extra                       |
+------+-------------+-------+------+--------------------------------+------+---------+-------------+--------+-----------------------------+
|    1 | SIMPLE      | d1    | ref  | idx_uni,pve,pne,pev,dpen,stpdn | dpen | 4       | const,const | 945042 | Using where; Using filesort |
+------+-------------+-------+------+--------------------------------+------+---------+-------------+--------+-----------------------------+
1 row in set (0.001 sec)

Our final query should be forced

SELECT d1.entity_uri,  d1.value as sortingValue , d0.value as matchingValue   FROM   (SELECT       d1.entity_uri,      d1.value ,     d1.numeric_value,    d1.entity_crc  FROM
 data_value_svar  d1  FORCE INDEX(stpdn)  WHERE    d1.predicate_uri = 'netProfit' AND     d1.date = '2017-01-01'  AND    d1.status = 1 AND    d1.type_uri_crc = CRC32('Company') ORDER BY d1.numeric_value DESC
) as d1  INNER JOIN   data_value_svar as d0 ON (   d1.entity_crc  = d0.entity_crc AND   d0.predicate_uri = 'partOfDepartment' AND   d0.value_crc = CRC32('departement-01') AND  d0.value = 'departement-01' AND
  d1.entity_uri  = d0.entity_uri )   ORDER BY d1.numeric_value DESC  LIMIT 5\G
*************************** 1. row ***************************
   entity_uri: entreprise-344844998
 sortingValue: 29378000
matchingValue: departement-01
*************************** 2. row ***************************
   entity_uri: entreprise-483018370
 sortingValue: 27203170
matchingValue: departement-01
*************************** 3. row ***************************
   entity_uri: entreprise-779306471
 sortingValue: 17262000
matchingValue: departement-01
*************************** 4. row ***************************
   entity_uri: entreprise-713780278
 sortingValue: 11076000
matchingValue: departement-01
*************************** 5. row ***************************
   entity_uri: entreprise-969509892
 sortingValue: 9368880
matchingValue: departement-01
5 rows in set (0.019 sec)

When unforced index

ANALYZE SELECT d1.entity_uri,  d1.value as sortingValue , d0.value as matchingValue   FROM   (SELECT       d1.entity_uri,      d1.value ,     d1.numeric_value,    d1.entity_crc  F
ROM      data_value_svar  d1   WHERE    d1.predicate_uri = 'netProfit' AND     d1.date = '2017-01-01'  AND    d1.status = 1 AND    d1.type_uri_crc = CRC32('Company') ORDER BY d1.numeric_value DESC ) as d1  INNE
_uri  = d0.entity_uri )   ORDER BY d1.numeric_value DESC  LIMIT 5\G
*************************** 1. row ***************************
           id: 1
  select_type: SIMPLE
        table: d0
         type: ref
possible_keys: idx_uni,pve,pne,pev
          key: pve
      key_len: 6
          ref: const,const
         rows: 192804
       r_rows: 90239.00
     filtered: 100.00
   r_filtered: 100.00
        Extra: Using where; Using temporary; Using filesort
*************************** 2. row ***************************
           id: 1
  select_type: SIMPLE
        table: d1
         type: ref
possible_keys: idx_uni,pve,pne,pev,dpen,stpdn
          key: pev
      key_len: 6
          ref: const,directory_company.d0.entity_crc
         rows: 1
       r_rows: 0.48
     filtered: 0.37
   r_filtered: 7.78
        Extra: Using where
2 rows in set (0.981 sec)

This is a case for LIMIT evaluation the optimizer choose the plan with less work

MariaDB [directory_company]> analyze SELECT count(*)  FROM  (SELECT    d1.entity_uri,   d1.value ,   d1.numeric_value,  d1.entity_crc FROM   data_value_svar d1 FORCE INDEX(stpdn) WHERE  d1.predicate_uri = 'netProfit' AND   d1.date = '2017-01-01' AND  d1.status = 1 AND  d1.type_uri_crc = CRC32('company') ORDER BY d1.numeric_value DESC ) as d1 INNER JOIN  data_value_svar as d0 ON (  d1.entity_crc = d0.entity_crc AND  d0.predicate_uri = 'partOfDepartment' AND  d0.value_crc = CRC32('departement-01') AND d0.value = 'departement-01' AND  d1.entity_uri = d0.entity_uri )  ORDER BY d1.numeric_value DESC \G
*************************** 1. row ***************************
      id: 1
 select_type: SIMPLE
    table: d1
     type: ref
possible_keys: stpdn
     key: stpdn
   key_len: 11
     ref: const,const,const,const
     rows: 873394
    r_rows: 437459.00
   filtered: 100.00
  r_filtered: 100.00
    Extra: Using where
*************************** 2. row ***************************
      id: 1
 select_type: SIMPLE
    table: d0
     type: ref
possible_keys: idx_uni,pve,pne,pev
     key: pve
   key_len: 11
     ref: const,const,directory_company.d1.entity_crc
     rows: 1
    r_rows: 0.01
   filtered: 2.34
  r_filtered: 99.52
    Extra: Using where
2 rows in set (3.878 sec)
 
MariaDB [directory_company]> analyze SELECT count(*)  FROM  (SELECT    d1.entity_uri,   d1.value ,   d1.numeric_value,  d1.entity_crc FROM   data_value_svar d1  WHERE  d1.predicate_uri = 'netProfit' AND   d1.date = '2017-01-01' AND  d1.status = 1 AND  d1.type_uri_crc = CRC32('company') ORDER BY d1.numeric_value DESC ) as d1 INNER JOIN  data_value_svar as d0 ON (  d1.entity_crc = d0.entity_crc AND  d0.predicate_uri = 'partOfDepartment' AND  d0.value_crc = CRC32('departement-01') AND d0.value = 'departement-01' AND  d1.entity_uri = d0.entity_uri )  ORDER BY d1.numeric_value DESC \G
*************************** 1. row ***************************
      id: 1
 select_type: SIMPLE
    table: d0
     type: ref
possible_keys: idx_uni,pve,pne,pev
     key: pve
   key_len: 6
     ref: const,const
     rows: 192804
    r_rows: 90239.00
   filtered: 2.34
  r_filtered: 100.00
    Extra: Using where
*************************** 2. row ***************************
      id: 1
 select_type: SIMPLE
    table: d1
     type: ref
possible_keys: idx_uni,pve,pne,pev,dpen,stpdn
     key: pev
   key_len: 6
     ref: const,directory_company.d0.entity_crc
     rows: 1
    r_rows: 0.48
   filtered: 9.60
  r_filtered: 7.78
    Extra: Using where
2 rows in set (0.884 sec)


Generated at Thu Feb 08 10:21:24 UTC 2024 using Jira 8.20.16#820016-sha1:9d11dbea5f4be3d4cc21f03a88dd11d8c8687422.