[MDEV-20204] Index prefix + nested loop used instead of straight index access under some specific cases Created: 2019-07-29  Updated: 2019-08-04

Status: Open
Project: MariaDB Server
Component/s: Optimizer
Affects Version/s: 10.4.6
Fix Version/s: 10.4

Type: Bug Priority: Minor
Reporter: Slawomir Pryczek Assignee: Igor Babaev
Resolution: Unresolved Votes: 0
Labels: None


 Description   

I noticed that under some specific cases suboptimal execution plan will be picked, which could result in pefrormance degradation from 1-2s to several minutes on larger tables. Reporting as minor because not sure if under some circumstances eq_ref won't be faster than ref and [ref on PK] can be forced using FORCE INDEX.

CREATE TABLE IF NOT EXISTS `stats_cr` (
  `creative_id` int(11) NOT NULL,
  `domain_id` int(11) NOT NULL,
  `date` date NOT NULL,
  `impressions` int(11) NOT NULL,
  PRIMARY KEY (`date`,`creative_id`,`domain_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
 
INSERT INTO stats_cr SELECT
	seq/2, seq%5000, DATE_ADD('2019-01-01', INTERVAL seq/150000 DAY), RAND()*500 from seq_1_to_1000000;
 
 
CREATE TABLE `domain` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `url` varchar(255) NOT NULL,
  `network_id` smallint(6) NOT NULL,
  PRIMARY KEY (`id`),
  UNIQUE KEY `url` (`network_id`,`url`) USING BTREE,
  KEY `url_2` (`url`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
 
INSERT INTO domain SELECT
	NULL, concat(seq, '.com'), seq%10 from seq_1_to_50000;
 
#insert network_id=100 with much less number of items than the rest of networks
INSERT INTO domain SELECT
	NULL, concat(seq, '.com'), 100 from seq_1_to_1000;	 

Test query, when network_id = 1 to 10, domain table will be joined using primary key + eq_ref. When network_id = 100 "url" index will be used and there'll be a loop over all domains contained in network 100.

SELECT
							network_id, 
							SUM(impressions) 
						FROM `stats_cr`
						INNER JOIN `domain` ON `domain_id` = `domain`.`id`
						WHERE `date` = '2019-01-01'
						AND domain.network_id = 100
						AND creative_id between 1 and 2000
						GROUP BY `date` DESC;

{
  "query_block": {
    "select_id": 1,
    "r_loops": 1,
    "r_total_time_ms": 14.502,
    "table": {
      "table_name": "stats_cr",
      "access_type": "range",
      "possible_keys": ["PRIMARY"],
      "key": "PRIMARY",
      "key_length": "7",
      "used_key_parts": ["date", "creative_id"],
      "r_loops": 1,
      "rows": 4000,
      "r_rows": 4000,
      "r_total_time_ms": 1.0506,
      "filtered": 0.4007,
      "r_filtered": 100,
      "attached_condition": "stats_cr.`date` = DATE'2019-01-01' and stats_cr.creative_id between 1 and 2000"
    },
    "table": {
      "table_name": "domain",
      "access_type": "eq_ref",
      "possible_keys": ["PRIMARY", "url"],
      "key": "PRIMARY",
      "key_length": "4",
      "used_key_parts": ["id"],
      "ref": ["test.stats_cr.domain_id"],
      "r_loops": 4000,
      "rows": 1,
      "r_rows": 1,
      "r_total_time_ms": 10.762,
      "filtered": 18.499,
      "r_filtered": 10,
      "attached_condition": "domain.network_id = 1"
    }
  }
}

Second example for network_id = 100

{
  "query_block": {
    "select_id": 1,
    "r_loops": 1,
    "r_total_time_ms": 1173.1,
    "table": {
      "table_name": "stats_cr",
      "access_type": "range",
      "possible_keys": ["PRIMARY"],
      "key": "PRIMARY",
      "key_length": "7",
      "used_key_parts": ["date", "creative_id"],
      "r_loops": 1,
      "rows": 4000,
      "r_rows": 4000,
      "r_total_time_ms": 1.0456,
      "filtered": 0.4007,
      "r_filtered": 100,
      "attached_condition": "stats_cr.`date` = DATE'2019-01-01' and stats_cr.creative_id between 1 and 2000"
    },
    "table": {
      "table_name": "domain",
      "access_type": "ref",
      "possible_keys": ["PRIMARY", "url"],
      "key": "url",
      "key_length": "2",
      "used_key_parts": ["network_id"],
      "ref": ["const"],
      "r_loops": 4000,
      "rows": 1000,
      "r_rows": 1000,
      "r_total_time_ms": 983.44,
      "filtered": 100,
      "r_filtered": 0,
      "attached_condition": "domain.`id` = stats_cr.domain_id",
      "using_index": true
    }
  }
}


Generated at Thu Feb 08 08:57:37 UTC 2024 using Jira 8.20.16#820016-sha1:9d11dbea5f4be3d4cc21f03a88dd11d8c8687422.