Details
Description
After the "fix" for MDEV-11687 we always use posix_fallocate() in the os_file_set_size (see https://github.com/MariaDB/server/blob/b0763f509a3f120e882cee2810495d169ff344a4/storage/innobase/os/os0file.cc#L4926):
...
|
# ifdef HAVE_POSIX_FALLOCATE
|
int err; |
do { |
if (fstat(file, &statbuf)) { |
err = errno;
|
} else { |
os_offset_t current_size = statbuf.st_size;
|
if (current_size >= size) { |
return true; |
}
|
current_size &= ~4095ULL;
|
err = posix_fallocate(file, current_size,
|
size - current_size);
|
}
|
} while (err == EINTR |
&& srv_shutdown_state <= SRV_SHUTDOWN_INITIATED);
|
...
|
while in some cases (like ALTER TABLE ... ALGORITHM=COPY for COMPRESSED table and datadir on a NAS/NFS mount) using alternative allocation method provides performance benefits.
Consider the following test case on 10.1 that proves the point to some extent on a local NFS mount on current Ubuntu 22.04:
openxs@ao756:~/dbs/maria10.1/mysql-test$ ./mtr --mysqld="--innodb_file_format=Barracuda" --mysqld="--innodb_buffer_pool_size=32M" --mysqld="--innodb_use_fallocate=1" innodb.slow_alter
|
Logging: ./mtr --mysqld=--innodb_file_format=Barracuda --mysqld=--innodb_buffer_pool_size=32M --mysqld=--innodb_use_fallocate=1 innodb.slow_alter
|
vardir: /home/openxs/dbs/maria10.1/mysql-test/var
|
Checking leftover processes...
|
Removing old var directory...
|
- WARNING: Using the 'mysql-test/var' symlink
|
Creating var directory '/home/openxs/dbs/maria10.1/mysql-test/var'...
|
Checking supported features...
|
MariaDB Version 10.1.49-MariaDB
|
- SSL connections supported
|
Sphinx 'indexer' binary not found, sphinx suite will be skipped
|
Collecting tests...
|
Installing system database...
|
|
==============================================================================
|
|
TEST RESULT TIME (ms) or COMMENT
|
--------------------------------------------------------------------------
|
|
worker[1] Using MTR_BUILD_THREAD 300, with reserved ports 16000..16019
|
set default_storage_engine=innodb;
|
show global variables like '%fallocate'||
|
Variable_name Value
|
innodb_use_fallocate ON
|
create table tbig(id int auto_increment primary key, c1 char(100)) ROW_FORMAT=COMPRESSED;
|
insert into tbig(c1) values (repeat('a', 100));
|
select @now := now(6)||
|
@now := now(6)
|
2023-09-27 13:31:58.256905
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
select timestampdiff(microsecond, @now, now(6))||
|
timestampdiff(microsecond, @now, now(6))
|
86978103
|
show table status like 'tbig'||
|
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
|
tbig InnoDB 10 Compressed 1048731 60 63258624 0 0 3145728 1376221 2023-09-27 13:31:57 NULL NULL latin1_swedish_ci NULL row_format=COMPRESSED
|
set session profiling = ON||
|
select count(*) from tbig||
|
count(*)
|
1048576
|
alter table tbig add column c2 char(200) default 'b'||
|
set session profiling = OFF||
|
show profiles||
|
Query_ID Duration Query
|
1 1.69141316 select count(*) from tbig
|
2 385.22975249 alter table tbig add column c2 char(200) default 'b'
|
drop table tbig||
|
innodb.slow_alter 'xtradb' [ pass ] 474460
|
--------------------------------------------------------------------------
|
The servers were restarted 0 times
|
Spent 474.460 of 562 seconds executing testcases
|
|
Completed: All 1 tests were successful.
|
|
openxs@ao756:~/dbs/maria10.1/mysql-test$ ./mtr --mysqld="--innodb_file_format=Barracuda" --mysqld="--innodb_buffer_pool_size=32M" --mysqld="--innodb_use_fallocate=0" innodb.slow_alter
|
Logging: ./mtr --mysqld=--innodb_file_format=Barracuda --mysqld=--innodb_buffer_pool_size=32M --mysqld=--innodb_use_fallocate=0 innodb.slow_alter
|
vardir: /home/openxs/dbs/maria10.1/mysql-test/var
|
Checking leftover processes...
|
Removing old var directory...
|
- WARNING: Using the 'mysql-test/var' symlink
|
Creating var directory '/home/openxs/dbs/maria10.1/mysql-test/var'...
|
Checking supported features...
|
MariaDB Version 10.1.49-MariaDB
|
- SSL connections supported
|
Sphinx 'indexer' binary not found, sphinx suite will be skipped
|
Collecting tests...
|
Installing system database...
|
|
==============================================================================
|
|
TEST RESULT TIME (ms) or COMMENT
|
--------------------------------------------------------------------------
|
|
worker[1] Using MTR_BUILD_THREAD 300, with reserved ports 16000..16019
|
set default_storage_engine=innodb;
|
show global variables like '%fallocate'||
|
Variable_name Value
|
innodb_use_fallocate OFF
|
create table tbig(id int auto_increment primary key, c1 char(100)) ROW_FORMAT=COMPRESSED;
|
insert into tbig(c1) values (repeat('a', 100));
|
select @now := now(6)||
|
@now := now(6)
|
2023-09-27 13:46:34.280800
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
insert into tbig(c1) select c1 from tbig||
|
select timestampdiff(microsecond, @now, now(6))||
|
timestampdiff(microsecond, @now, now(6))
|
86275891
|
show table status like 'tbig'||
|
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
|
tbig InnoDB 10 Compressed 1048681 59 62734336 0 0 3145728 1376221 2023-09-27 13:46:33 NULL NULL latin1_swedish_ci NULL row_format=COMPRESSED
|
set session profiling = ON||
|
select count(*) from tbig||
|
count(*)
|
1048576
|
alter table tbig add column c2 char(200) default 'b'||
|
set session profiling = OFF||
|
show profiles||
|
Query_ID Duration Query
|
1 1.91823956 select count(*) from tbig
|
2 377.63216720 alter table tbig add column c2 char(200) default 'b'
|
drop table tbig||
|
innodb.slow_alter 'xtradb' [ pass ] 466519
|
--------------------------------------------------------------------------
|
The servers were restarted 0 times
|
Spent 466.519 of 555 seconds executing testcases
|
|
Completed: All 1 tests were successful.
|
|
openxs@ao756:~/dbs/maria10.1/mysql-test$ ls -l | grep var
|
drwxrwxr-x 8 openxs openxs 4096 вер 27 10:56 oldvar
|
lrwxrwxrwx 1 openxs openxs 4 вер 27 12:09 var -> /mnt
|
openxs@ao756:~/dbs/maria10.1/mysql-test$ mount | grep /mnt
|
127.0.0.1:/home/openxs/share on /mnt type nfs4 (rw,relatime,vers=4.2,rsize=524288,wsize=524288,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,clientaddr=127.0.0.1,local_lock=none,addr=127.0.0.1)
|
Here we have 378 seconds to ALTER the table that is twice as large as the buffer pool with innodb_use_fallocate = OFF vs 385 where it is ON. I expect bigger difference when table size increases, but wanted to keep the test fast enough to be pracical on my slow HDD.
Users reported much more serious impact (that prevents the use of 10.4+ for table terabytes in size) and upgrade from pre-10.2.4 versions.
Attachments
Issue Links
- relates to
-
MDEV-11687 innodb_use_fallocate has no effect
- Closed