The commit starts validating the whole load file using the file character set. BLOBs have always been copied 1:1 (no character set translations - only escape sequences are processed). If the load file is using UTF-8, blob columns can never be encoded in UTF-8, as binary data can contain character sequences, which are invalid UTF-8 and there is no charset conversion.
Starting with this commit, LOAD DATA rejects non-UTF8 sequences in blob fields.
Martin Koegler
added a comment -
--- sql/sql_load.cc.orig 2016-10-18 17:45:32.156615718 +0200
+++ sql/sql_load.cc 2016-10-18 17:49:19.990569542 +0200
@@ -90,7 +90,7 @@
String &field_term,String &line_start,String &line_term,
String &enclosed,int escape,bool get_it_from_net, bool is_fifo);
~READ_INFO();
- int read_field();
+ int read_field(CHARSET_INFO *field_charset);
int read_fixed_length(void);
int next_line(void);
char unescape(char chr);
@@ -1040,7 +1040,7 @@
uchar *pos;
Item *real_item;
- if (read_info.read_field())
+ if (read_info.read_field(item->real_item()->collation.collation))
break;
/* If this line is to be skipped we don't want to fill field or var */
@@ -1527,10 +1527,13 @@
}
-int READ_INFO::read_field()
+int READ_INFO::read_field(CHARSET_INFO *field_charset)
{
int chr,found_enclosed_char;
uchar *to,*new_buffer;
+ CHARSET_INFO *read_charset = this->read_charset;
+ if (field_charset == &my_charset_bin)
+ read_charset = &my_charset_bin;
found_null=0;
if (found_end_of_line)
Author: Thayumanavar S <thayumanavar.x.sachithanantha@oracle.com>
Date: Mon Jun 20 11:35:43 2016 +0530
BUG#23080148 - BACKPORT BUG 14653594 AND BUG 20683959 TO
MYSQL-5.5
The bug asks for a backport of bug#1463594 and bug#20682959. This
is required because of the fact that if replication is enabled, master
transaction can commit whereas slave can't commit due to not exact
'enviroment'. This manifestation is seen in bug#22024200.
koeglermar,
Thanks for the report and test case.
Did you also file a bug report to MySQL?
Elena Stepanova
added a comment - Thanks for the report. In server trees, it's this revision:
commit 9f7288e2e0179db478d20c74f57b5c7d6c95f793 4a3f1c1f104cbfeb6d31ee02788589151b131eca
Author: Thayumanavar S <thayumanavar.x.sachithanantha@oracle.com>
Date: Mon Jun 20 11:35:43 2016 +0530
BUG#23080148 - BACKPORT BUG 14653594 AND BUG 20683959 TO
MYSQL-5.5
The bug asks for a backport of bug#1463594 and bug#20682959. This
is required because of the fact that if replication is enabled, master
transaction can commit whereas slave can't commit due to not exact
'enviroment'. This manifestation is seen in bug#22024200.
koeglermar ,
Thanks for the report and test case.
Did you also file a bug report to MySQL?
I filled no upstream bug, because I have not tested with MySQL.
PS: JIRA didn't send a email notifications, so I have not noticed the new comment earlier.
Martin Koegler
added a comment - - edited I filled no upstream bug, because I have not tested with MySQL.
PS: JIRA didn't send a email notifications, so I have not noticed the new comment earlier.
CREATETABLE t1 (a mediumblob NOTNULL) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_general_ci;
LOAD DATA LOCAL INFILE '/tmp/test.txt'INTOTABLE t1 CHARSET utf8 FIELDS TERMINATED BY';' ENCLOSED BY'"' ESCAPED BY'\\' LINES TERMINATED BY'\n';
SELECT HEX(a) FROM t1;
It loads the data without errors and returns this result as expected:
+----------+
| HEX(a) |
+----------+
| 25AAABAC |
+----------+
10.2 also works fine.
Alexander Barkov
added a comment - - edited In the current 5.5.55 tree it seems to work fine.
I create the file:
$ printf "\x22\x25\xaa\xab\xac\x22\x0a" >/tmp/test.txt
and run this SQL script:
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (a mediumblob NOT NULL ) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE =latin1_general_ci;
LOAD DATA LOCAL INFILE '/tmp/test.txt' INTO TABLE t1 CHARSET utf8 FIELDS TERMINATED BY ';' ENCLOSED BY '"' ESCAPED BY '\\' LINES TERMINATED BY '\n' ;
SELECT HEX(a) FROM t1;
It loads the data without errors and returns this result as expected:
+----------+
| HEX(a) |
+----------+
| 25AAABAC |
+----------+
10.2 also works fine.
I believe that MariaDB is still treating UTF8 binary data incorrectly while using load infile. I have a test case that seems to introduce an extra escape char into saved binary blob data when using UTF8. Please see linked Jira - MDEV-12240.
Kind regards,
Bill
Bill Nokes
added a comment - Hi,
I believe that MariaDB is still treating UTF8 binary data incorrectly while using load infile. I have a test case that seems to introduce an extra escape char into saved binary blob data when using UTF8. Please see linked Jira - MDEV-12240 .
Kind regards,
Bill
People
Sergei Golubchik
Martin Koegler
Votes:
0Vote for this issue
Watchers:
8Start watching this issue
Dates
Created:
Updated:
Resolved:
Git Integration
Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.
{"report":{"fcp":1164.1999998092651,"ttfb":336.5,"pageVisibility":"visible","entityId":58558,"key":"jira.project.issue.view-issue","isInitial":true,"threshold":1000,"elementTimings":{},"userDeviceMemory":8,"userDeviceProcessors":64,"apdex":0.5,"journeyId":"c43b55de-28eb-43da-ae17-83553bb69836","navigationType":0,"readyForUser":1274.1999998092651,"redirectCount":0,"resourceLoadedEnd":1475.1999998092651,"resourceLoadedStart":342.5,"resourceTiming":[{"duration":304.3999996185303,"initiatorType":"link","name":"https://jira.mariadb.org/s/2c21342762a6a02add1c328bed317ffd-CDN/lu2bv2/820016/12ta74/0a8bac35585be7fc6c9cc5a0464cd4cf/_/download/contextbatch/css/_super/batch.css","startTime":342.5,"connectEnd":0,"connectStart":0,"domainLookupEnd":0,"domainLookupStart":0,"fetchStart":342.5,"redirectEnd":0,"redirectStart":0,"requestStart":0,"responseEnd":646.8999996185303,"responseStart":0,"secureConnectionStart":0},{"duration":304.20000076293945,"initiatorType":"link","name":"https://jira.mariadb.org/s/7ebd35e77e471bc30ff0eba799ebc151-CDN/lu2bv2/820016/12ta74/2380add21a9a1006587582385952de73/_/download/contextbatch/css/jira.browse.project,project.issue.navigator,jira.view.issue,jira.general,jira.global,atl.general,-_super/batch.css?agile_global_admin_condition=true&jag=true&jira.create.linked.issue=true&slack-enabled=true","startTime":342.8999996185303,"connectEnd":0,"connectStart":0,"domainLookupEnd":0,"domainLookupStart":0,"fetchStart":342.8999996185303,"redirectEnd":0,"redirectStart":0,"requestStart":0,"responseEnd":647.1000003814697,"responseStart":0,"secureConnectionStart":0},{"duration":313.29999923706055,"initiatorType":"script","name":"https://jira.mariadb.org/s/e9b27a47da5fb0f74a35acd57e9847fb-CDN/lu2bv2/820016/12ta74/0a8bac35585be7fc6c9cc5a0464cd4cf/_/download/contextbatch/js/_super/batch.js?locale=en","startTime":343.1000003814697,"connectEnd":343.1000003814697,"connectStart":343.1000003814697,"domainLookupEnd":343.1000003814697,"domainLookupStart":343.1000003814697,"fetchStart":343.1000003814697,"redirectEnd":0,"redirectStart":0,"requestStart":343.1000003814697,"responseEnd":656.3999996185303,"responseStart":656.3999996185303,"secureConnectionStart":343.1000003814697},{"duration":353.19999980926514,"initiatorType":"script","name":"https://jira.mariadb.org/s/c32eb0da7ad9831253f8397e6cc26afd-CDN/lu2bv2/820016/12ta74/2380add21a9a1006587582385952de73/_/download/contextbatch/js/jira.browse.project,project.issue.navigator,jira.view.issue,jira.general,jira.global,atl.general,-_super/batch.js?agile_global_admin_condition=true&jag=true&jira.create.linked.issue=true&locale=en&slack-enabled=true","startTime":343.30000019073486,"connectEnd":343.30000019073486,"connectStart":343.30000019073486,"domainLookupEnd":343.30000019073486,"domainLookupStart":343.30000019073486,"fetchStart":343.30000019073486,"redirectEnd":0,"redirectStart":0,"requestStart":343.30000019073486,"responseEnd":696.5,"responseStart":696.5,"secureConnectionStart":343.30000019073486},{"duration":357.1000003814697,"initiatorType":"script","name":"https://jira.mariadb.org/s/bc0bcb146314416123c992714ee00ff7-CDN/lu2bv2/820016/12ta74/c92c0caa9a024ae85b0ebdbed7fb4bd7/_/download/contextbatch/js/atl.global,-_super/batch.js?locale=en","startTime":343.5,"connectEnd":343.5,"connectStart":343.5,"domainLookupEnd":343.5,"domainLookupStart":343.5,"fetchStart":343.5,"redirectEnd":0,"redirectStart":0,"requestStart":343.5,"responseEnd":700.6000003814697,"responseStart":700.5,"secureConnectionStart":343.5},{"duration":357.8999996185303,"initiatorType":"script","name":"https://jira.mariadb.org/s/d41d8cd98f00b204e9800998ecf8427e-CDN/lu2bv2/820016/12ta74/1.0/_/download/batch/jira.webresources:calendar-en/jira.webresources:calendar-en.js","startTime":343.80000019073486,"connectEnd":343.80000019073486,"connectStart":343.80000019073486,"domainLookupEnd":343.80000019073486,"domainLookupStart":343.80000019073486,"fetchStart":343.80000019073486,"redirectEnd":0,"redirectStart":0,"requestStart":343.80000019073486,"responseEnd":701.6999998092651,"responseStart":701.6999998092651,"secureConnectionStart":343.80000019073486},{"duration":358.80000019073486,"initiatorType":"script","name":"https://jira.mariadb.org/s/d41d8cd98f00b204e9800998ecf8427e-CDN/lu2bv2/820016/12ta74/1.0/_/download/batch/jira.webresources:calendar-localisation-moment/jira.webresources:calendar-localisation-moment.js","startTime":343.8999996185303,"connectEnd":343.8999996185303,"connectStart":343.8999996185303,"domainLookupEnd":343.8999996185303,"domainLookupStart":343.8999996185303,"fetchStart":343.8999996185303,"redirectEnd":0,"redirectStart":0,"requestStart":343.8999996185303,"responseEnd":702.6999998092651,"responseStart":702.6999998092651,"secureConnectionStart":343.8999996185303},{"duration":467.3999996185303,"initiatorType":"link","name":"https://jira.mariadb.org/s/b04b06a02d1959df322d9cded3aeecc1-CDN/lu2bv2/820016/12ta74/a2ff6aa845ffc9a1d22fe23d9ee791fc/_/download/contextbatch/css/jira.global.look-and-feel,-_super/batch.css","startTime":344.1000003814697,"connectEnd":0,"connectStart":0,"domainLookupEnd":0,"domainLookupStart":0,"fetchStart":344.1000003814697,"redirectEnd":0,"redirectStart":0,"requestStart":0,"responseEnd":811.5,"responseStart":0,"secureConnectionStart":0},{"duration":359,"initiatorType":"script","name":"https://jira.mariadb.org/rest/api/1.0/shortcuts/820016/47140b6e0a9bc2e4913da06536125810/shortcuts.js?context=issuenavigation&context=issueaction","startTime":344.30000019073486,"connectEnd":344.30000019073486,"connectStart":344.30000019073486,"domainLookupEnd":344.30000019073486,"domainLookupStart":344.30000019073486,"fetchStart":344.30000019073486,"redirectEnd":0,"redirectStart":0,"requestStart":344.30000019073486,"responseEnd":703.3000001907349,"responseStart":703.3000001907349,"secureConnectionStart":344.30000019073486},{"duration":467.19999980926514,"initiatorType":"link","name":"https://jira.mariadb.org/s/3ac36323ba5e4eb0af2aa7ac7211b4bb-CDN/lu2bv2/820016/12ta74/d176f0986478cc64f24226b3d20c140d/_/download/contextbatch/css/com.atlassian.jira.projects.sidebar.init,-_super,-project.issue.navigator,-jira.view.issue/batch.css?jira.create.linked.issue=true","startTime":344.5,"connectEnd":0,"connectStart":0,"domainLookupEnd":0,"domainLookupStart":0,"fetchStart":344.5,"redirectEnd":0,"redirectStart":0,"requestStart":0,"responseEnd":811.6999998092651,"responseStart":0,"secureConnectionStart":0},{"duration":359.30000019073486,"initiatorType":"script","name":"https://jira.mariadb.org/s/719848dd97ebe0663199f49a3936487a-CDN/lu2bv2/820016/12ta74/d176f0986478cc64f24226b3d20c140d/_/download/contextbatch/js/com.atlassian.jira.projects.sidebar.init,-_super,-project.issue.navigator,-jira.view.issue/batch.js?jira.create.linked.issue=true&locale=en","startTime":344.69999980926514,"connectEnd":344.69999980926514,"connectStart":344.69999980926514,"domainLookupEnd":344.69999980926514,"domainLookupStart":344.69999980926514,"fetchStart":344.69999980926514,"redirectEnd":0,"redirectStart":0,"requestStart":344.69999980926514,"responseEnd":704,"responseStart":704,"secureConnectionStart":344.69999980926514},{"duration":762.6000003814697,"initiatorType":"script","name":"https://jira.mariadb.org/s/d41d8cd98f00b204e9800998ecf8427e-CDN/lu2bv2/820016/12ta74/1.0/_/download/batch/jira.webresources:bigpipe-js/jira.webresources:bigpipe-js.js","startTime":350.8999996185303,"connectEnd":350.8999996185303,"connectStart":350.8999996185303,"domainLookupEnd":350.8999996185303,"domainLookupStart":350.8999996185303,"fetchStart":350.8999996185303,"redirectEnd":0,"redirectStart":0,"requestStart":350.8999996185303,"responseEnd":1113.5,"responseStart":1113.5,"secureConnectionStart":350.8999996185303},{"duration":1124.1999998092651,"initiatorType":"script","name":"https://jira.mariadb.org/s/d41d8cd98f00b204e9800998ecf8427e-CDN/lu2bv2/820016/12ta74/1.0/_/download/batch/jira.webresources:bigpipe-init/jira.webresources:bigpipe-init.js","startTime":351,"connectEnd":351,"connectStart":351,"domainLookupEnd":351,"domainLookupStart":351,"fetchStart":351,"redirectEnd":0,"redirectStart":0,"requestStart":351,"responseEnd":1475.1999998092651,"responseStart":1475.1999998092651,"secureConnectionStart":351},{"duration":272.19999980926514,"initiatorType":"xmlhttprequest","name":"https://jira.mariadb.org/rest/webResources/1.0/resources","startTime":843.6999998092651,"connectEnd":843.6999998092651,"connectStart":843.6999998092651,"domainLookupEnd":843.6999998092651,"domainLookupStart":843.6999998092651,"fetchStart":843.6999998092651,"redirectEnd":0,"redirectStart":0,"requestStart":843.6999998092651,"responseEnd":1115.8999996185303,"responseStart":1115.8999996185303,"secureConnectionStart":843.6999998092651},{"duration":356.20000076293945,"initiatorType":"script","name":"https://www.google-analytics.com/analytics.js","startTime":1154.3999996185303,"connectEnd":0,"connectStart":0,"domainLookupEnd":0,"domainLookupStart":0,"fetchStart":1154.3999996185303,"redirectEnd":0,"redirectStart":0,"requestStart":0,"responseEnd":1510.6000003814697,"responseStart":0,"secureConnectionStart":0}],"fetchStart":0,"domainLookupStart":0,"domainLookupEnd":0,"connectStart":0,"connectEnd":0,"requestStart":147,"responseStart":337,"responseEnd":342,"domLoading":341,"domInteractive":1515,"domContentLoadedEventStart":1515,"domContentLoadedEventEnd":1571,"domComplete":1826,"loadEventStart":1826,"loadEventEnd":1827,"userAgent":"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; ClaudeBot/1.0; +claudebot@anthropic.com)","marks":[{"name":"bigPipe.sidebar-id.start","time":1478.1000003814697},{"name":"bigPipe.sidebar-id.end","time":1479},{"name":"bigPipe.activity-panel-pipe-id.start","time":1479.1000003814697},{"name":"bigPipe.activity-panel-pipe-id.end","time":1482.8999996185303},{"name":"activityTabFullyLoaded","time":1592.5}],"measures":[],"correlationId":"20bf6f08c7ad9c","effectiveType":"4g","downlink":10,"rtt":0,"serverDuration":112,"dbReadsTimeInMs":10,"dbConnsTimeInMs":18,"applicationHash":"9d11dbea5f4be3d4cc21f03a88dd11d8c8687422","experiments":[]}}
--- sql/sql_load.cc.orig 2016-10-18 17:45:32.156615718 +0200
+++ sql/sql_load.cc 2016-10-18 17:49:19.990569542 +0200
@@ -90,7 +90,7 @@
String &field_term,String &line_start,String &line_term,
String &enclosed,int escape,bool get_it_from_net, bool is_fifo);
~READ_INFO();
- int read_field();
+ int read_field(CHARSET_INFO *field_charset);
int read_fixed_length(void);
int next_line(void);
char unescape(char chr);
@@ -1040,7 +1040,7 @@
uchar *pos;
Item *real_item;
- if (read_info.read_field())
+ if (read_info.read_field(item->real_item()->collation.collation))
break;
/* If this line is to be skipped we don't want to fill field or var */
@@ -1527,10 +1527,13 @@
}
-int READ_INFO::read_field()
+int READ_INFO::read_field(CHARSET_INFO *field_charset)
{
int chr,found_enclosed_char;
uchar *to,*new_buffer;
+ CHARSET_INFO *read_charset = this->read_charset;
+ if (field_charset == &my_charset_bin)
+ read_charset = &my_charset_bin;
found_null=0;
if (found_end_of_line)