[MDEV-33344] REGEXP empty string inconsistent Created: 2024-01-31  Updated: 2024-02-01

Status: In Testing
Project: MariaDB Server
Component/s: Data Manipulation - Subquery
Affects Version/s: 10.6.12, 10.11.6
Fix Version/s: 10.6, 10.11, 11.0

Type: Bug Priority: Major
Reporter: Phteven Jaßnowski Assignee: Sergei Golubchik
Resolution: Unresolved Votes: 0
Labels: consistency
Environment:

Debian 11
Windows Server 2016


Attachments: JPEG File Basic Condition.jpg     JPEG File InconsistentSelect.jpg     JPEG File InconsistentWhere.jpg     JPEG File ManualEscape.jpg    

 Description   

In this case, a static string is to be compared against the saved patterns using saved REGEXP patterns.
The pattern data field can be empty.
According to direct SQL, an empty pattern string is equivalent to '.*'.
However, when used within a query, the escape proves to be unreliable.
Depending on the data lines queried, the regexp query for the empty pattern is valid and invalid once. The same behavior occurs when the regexp is used within a Where query.
If the queried pattern is masked with the condition "if empty to '.*'", the result is consistent.

Please find an example Statement to create an small Datatable with the mentioned Results below:

DROP TABLE IF EXISTS data_table;
 
CREATE TABLE `data_table` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `where_1` int(11) NOT NULL,
  `where_2` int(11) NOT NULL,
  `where_3` int(11) NOT NULL,
  `pattern` varchar(64) NOT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
 
INSERT INTO `data_table` (`id`, `where_1`, `where_2`, `where_3`, `pattern`) VALUES
(9,	11,	40,	1,	''),
(10,	11,	10,	1,	''),
(11,	11,	500,	1,	''),
(12,	12,	500,	1,	''),
(13,	12,	20,	1,	''),
(14,	12,	40,	1,	''),
(15,	11,	920,	1,	''),
(16,	11,	930,	1,	''),
(17,	11,	920,	1,	''),
(18,	11,	930,	1,	''),
(19,	11,	551,	0,	''),
(20,	11,	520,	1,	'/Testwert1/'),
(21,	12,	510,	1,	'/Testwert2*/'),
(22,	11,	60,	0,	''),
(23,	11,	510,	1,	'/Testwert3/'),
(24,	12,	920,	1,	''),
(25,	12,	930,	1,	''),
(26,	12,	920,	1,	''),
(27,	12,	930,	1,	''),
(28,	12,	60,	0,	''),
(29,	12,	510,	1,	'/Testwert1A/'),
(30,	11,	30,	1,	''),
(31,	12,	30,	1,	''),
(32,	12,	30,	1,	'/Testwert4/'),
(33,	12,	30,	1,	'/^(?![\\s|\\S]*Testwert4[\\s|\\S]*)\\b/');
 
-- Basic Dataset that will be used in REGEXP Query
SELECT *
FROM `data_table`
WHERE `where_1` = '11' AND `where_2` IN (30,510,530,531,532,520);
 
-- REGEXP against an empty pattern field
SELECT 'This is a test string' REGEXP '' AS 'regexp', LENGTH('') AS 'length';
 
-- REGEXP with two pattern lines. Empty pattern is valid
SELECT *, 'This is a test string' REGEXP pattern AS 'regexp', LENGTH(pattern) AS 'length'
FROM data_table
WHERE where_1 = 11 AND where_2 IN (30, 510, 530, 531, 532, 520) AND where_3 = 1;
 
-- REGEPX with one pattern line. Empty pattern is invalid
SELECT *, 'This is a test string' REGEXP pattern AS 'regexp', LENGTH(pattern) AS 'length'
FROM data_table
WHERE where_1 = 11 AND where_2 IN (30, 510, 530, 531, 532) AND where_3 = 1
ORDER BY id ASC;
 
-- REGEXP with one pattern line in reverse order. Empty pattern is valid
SELECT *, 'This is a test string' REGEXP pattern AS 'regexp', LENGTH(pattern) AS 'length'
FROM data_table
WHERE where_1 = 11 AND where_2 IN (30, 510, 530, 531, 532) AND where_3 = 1
ORDER BY id DESC;
 
-- REGEXP with one line as the second WHERE condition. Empty pattern is valid
SELECT *, 'This is a test string' REGEXP pattern AS 'regexp', LENGTH(pattern) AS 'length'
FROM data_table
WHERE where_1 = 11
AND 'This is a test string' REGEXP pattern
AND where_2 IN (30, 510, 530, 531, 532)
AND where_3 = 1;
 
-- REGEXP with one line as the forth WHERE condition. Empty pattern is invalid
SELECT *, 'This is a test string' REGEXP pattern AS 'regexp', LENGTH(pattern) AS 'length'
FROM data_table
WHERE where_1 = 11
AND where_2 IN (30, 510, 530, 531, 532)
AND where_3 = 1 
AND 'This is a test string' REGEXP pattern;
 
-- Below are the same statment with an IF(pattern = '', '.*', pattern) escape. All results are consitent
 
SELECT *, 'This is a test string' REGEXP IF(pattern = '', '.*', pattern) AS 'regexp', LENGTH(IF(pattern = '', '.*', pattern)) AS 'length'
FROM data_table
WHERE where_1 = 11 AND where_2 IN (30, 510, 530, 531, 532, 520) AND where_3 = 1;
 
SELECT *, 'This is a test string' REGEXP IF(pattern = '', '.*', pattern) AS 'regexp', LENGTH(IF(pattern = '', '.*', pattern)) AS 'length'
FROM data_table
WHERE where_1 = 11 AND where_2 IN (30, 510, 530, 531, 532) AND where_3 = 1
ORDER BY id ASC;
 
SELECT *, 'This is a test string' REGEXP IF(pattern = '', '.*', pattern) AS 'regexp', LENGTH(IF(pattern = '', '.*', pattern)) AS 'length'
FROM data_table
WHERE where_1 = 11 AND where_2 IN (30, 510, 530, 531, 532) AND where_3 = 1
ORDER BY id DESC;
 
SELECT *, 'This is a test string' REGEXP IF(pattern = '', '.*', pattern) AS 'regexp', LENGTH(IF(pattern = '', '.*', pattern)) AS 'length'
FROM data_table
WHERE where_1 = 11
AND 'This is a test string' REGEXP IF(pattern = '', '.*', pattern)
AND where_2 IN (30, 510, 530, 531, 532)
AND where_3 = 1;
 
SELECT *, 'This is a test string' REGEXP IF(pattern = '', '.*', pattern) AS 'regexp', LENGTH(IF(pattern = '', '.*', pattern)) AS 'length'
FROM data_table
WHERE where_1 = 11
AND where_2 IN (30, 510, 530, 531, 532)
AND where_3 = 1 
AND 'This is a test string' REGEXP IF(pattern = '', '.*', pattern);


Generated at Thu Feb 08 10:38:12 UTC 2024 using Jira 8.20.16#820016-sha1:9d11dbea5f4be3d4cc21f03a88dd11d8c8687422.