|
The problem is not UUID related. MyISAM does not compress descending indexes well.
This script demonstrates the same problem with VARCHAR:
SET NAMES latin1;
|
CREATE OR REPLACE TABLE t1 (a VARCHAR(8) CHARACTER SET latin1, KEY(a)) ENGINE=MyISAM;
|
delimiter $
|
FOR c0 IN 0x61..0x7A DO
|
FOR c1 IN 0x61..0x7A DO
|
FOR c2 IN 0x61..0x7A DO
|
FOR c3 IN 0x61..0x7A DO
|
INSERT INTO t1 VALUES (CONCAT('aaaa',CHAR(c0), CHAR(c1), CHAR(c2), CHAR(c3)));
|
END FOR;
|
END FOR;
|
END FOR;
|
END FOR;
|
$
|
delimiter ;
|
SELECT INDEX_LENGTH, DATA_LENGTH, INDEX_LENGTH/DATA_LENGTH
|
FROM INFORMATION_SCHEMA.TABLES
|
WHERE TABLE_SCHEMA='test' AND TABLE_NAME='t1';
|
+--------------+-------------+--------------------------+
|
| INDEX_LENGTH | DATA_LENGTH | INDEX_LENGTH/DATA_LENGTH |
|
+--------------+-------------+--------------------------+
|
| 3836928 | 9139520 | 0.4198 |
|
+--------------+-------------+--------------------------+
|
SET NAMES latin1;
|
CREATE OR REPLACE TABLE t1 (a VARCHAR(8) CHARACTER SET latin1, KEY(a DESC)) ENGINE=MyISAM;
|
delimiter $
|
FOR c0 IN 0x61..0x7A DO
|
FOR c1 IN 0x61..0x7A DO
|
FOR c2 IN 0x61..0x7A DO
|
FOR c3 IN 0x61..0x7A DO
|
INSERT INTO t1 VALUES (CONCAT('aaaa', CHAR(c0), CHAR(c1), CHAR(c2), CHAR(c3)));
|
END FOR;
|
END FOR;
|
END FOR;
|
END FOR;
|
$
|
delimiter ;
|
SELECT INDEX_LENGTH, DATA_LENGTH, INDEX_LENGTH/DATA_LENGTH
|
FROM INFORMATION_SCHEMA.TABLES
|
WHERE TABLE_SCHEMA='test' AND TABLE_NAME='t1';
|
+--------------+-------------+--------------------------+
|
| INDEX_LENGTH | DATA_LENGTH | INDEX_LENGTH/DATA_LENGTH |
|
+--------------+-------------+--------------------------+
|
| 7424000 | 9139520 | 0.8123 |
|
+--------------+-------------+--------------------------+
|
Notice:
- Ascending index size is 0.4198 of the data size
- Descending index size is 0.8123 of the data size
|
|
The problem is also repeatable with Aria+VARCHAR:
SET NAMES latin1;
|
CREATE OR REPLACE TABLE t1 (a VARCHAR(6) CHARACTER SET latin1, KEY(a)) ENGINE=Aria;
|
delimiter $
|
FOR c0 IN 0x61..0x7A DO
|
FOR c1 IN 0x61..0x7A DO
|
FOR c2 IN 0x61..0x7A DO
|
INSERT INTO t1 VALUES (CONCAT('aaa', CHAR(c0), CHAR(c1), CHAR(c2)));
|
END FOR;
|
END FOR;
|
END FOR;
|
$
|
delimiter ;
|
SELECT INDEX_LENGTH, DATA_LENGTH, INDEX_LENGTH/DATA_LENGTH
|
FROM INFORMATION_SCHEMA.TABLES
|
WHERE TABLE_SCHEMA='test' AND TABLE_NAME='t1';
|
+--------------+-------------+--------------------------+
|
| INDEX_LENGTH | DATA_LENGTH | INDEX_LENGTH/DATA_LENGTH |
|
+--------------+-------------+--------------------------+
|
| 253952 | 770048 | 0.3298 |
|
+--------------+-------------+--------------------------+
|
SET NAMES latin1;
|
CREATE OR REPLACE TABLE t1 (a VARCHAR(6) CHARACTER SET latin1, KEY(a DESC)) ENGINE=Aria;
|
delimiter $
|
FOR c0 IN 0x61..0x7A DO
|
FOR c1 IN 0x61..0x7A DO
|
FOR c2 IN 0x61..0x7A DO
|
INSERT INTO t1 VALUES (CONCAT('aaa', CHAR(c0), CHAR(c1), CHAR(c2)));
|
END FOR;
|
END FOR;
|
END FOR;
|
$
|
delimiter ;
|
SELECT INDEX_LENGTH, DATA_LENGTH, INDEX_LENGTH/DATA_LENGTH
|
FROM INFORMATION_SCHEMA.TABLES
|
WHERE TABLE_SCHEMA='test' AND TABLE_NAME='t1';
|
+--------------+-------------+--------------------------+
|
| INDEX_LENGTH | DATA_LENGTH | INDEX_LENGTH/DATA_LENGTH |
|
+--------------+-------------+--------------------------+
|
| 466944 | 770048 | 0.6064 |
|
+--------------+-------------+--------------------------+
|
|
|
Key packing is completely symmetrical. You can get better or worse results depending on the order of insertion. Here's a complete test case, based on yours:
SET NAMES latin1;
|
CREATE OR REPLACE TABLE t1 (a VARCHAR(8) CHARACTER SET latin1, KEY(a)) ENGINE=MyISAM;
|
CREATE OR REPLACE TABLE t2 (a VARCHAR(8) CHARACTER SET latin1, KEY(a DESC)) ENGINE=MyISAM;
|
CREATE OR REPLACE TABLE t3 (a VARCHAR(8) CHARACTER SET latin1, KEY(a)) ENGINE=MyISAM;
|
CREATE OR REPLACE TABLE t4 (a VARCHAR(8) CHARACTER SET latin1, KEY(a DESC)) ENGINE=MyISAM;
|
delimiter $;
|
FOR c0 IN 0x61..0x7A DO
|
FOR c1 IN 0x61..0x7A DO
|
FOR c2 IN 0x61..0x7A DO
|
FOR c3 IN 0x61..0x7A DO
|
INSERT INTO t1 VALUES (CONCAT('aaaa',CHAR(c0), CHAR(c1), CHAR(c2), CHAR(c3)));
|
INSERT INTO t2 VALUES (CONCAT('aaaa',CHAR(c0), CHAR(c1), CHAR(c2), CHAR(c3)));
|
INSERT INTO t3 VALUES (CONCAT('aaaa',CHAR(219-c0), CHAR(219-c1), CHAR(219-c2), CHAR(219-c3)));
|
INSERT INTO t4 VALUES (CONCAT('aaaa',CHAR(219-c0), CHAR(219-c1), CHAR(219-c2), CHAR(219-c3)));
|
END FOR;
|
END FOR;
|
END FOR;
|
END FOR;
|
$
|
delimiter ;$
|
SELECT INDEX_LENGTH, DATA_LENGTH, INDEX_LENGTH/DATA_LENGTH
|
FROM INFORMATION_SCHEMA.TABLES
|
WHERE TABLE_SCHEMA='test' AND TABLE_NAME LIKE 't_';
|
drop table t1, t2, t3, t4;
|
as you can see, t1 is like in your first test case, ascending indexes, insertions happen in the ascending order. t2 has descending indexes, t3 has ascending indexes but insertions happen in the descending order, t4 has descending indexes again.
The result is
| t1 |
3836928 |
9139520 |
0.4198 |
ASC index ASC insert |
| t2 |
7424000 |
9139520 |
0.8123 |
DESC index ASC insert |
| t3 |
7424000 |
9139520 |
0.8123 |
ASC index DESC insert |
| t4 |
3836928 |
9139520 |
0.4198 |
DESC index DESC insert |
as you can see, values are packed more tightly when you insert in the index order.
|
|
The reason for this is that when you insert a new key, we only consider packing that key compared to the previous key.
To keep the code simple and not increase insert time too much, we are not considering packing the next key.
This means that if you insert ANDER and then ANDERSON, this will be stored approximately as:
ANDER <ref_to_record> <5>SON <ref_to_record>
If we insert ANDERSON and then ANDER, we get:
ANDER <ref_to_record> ANDERSON <ref_to_record>
It is of course possible to (optionally?) add repacking of the next key, but I am not sure if the increased insert time is worth it.
Note that OPTIMIZE TABLE will sort the index before inserting, which will ensure maxium key packing.
|
|
The problem is also repeatable with not compressed keys:
DELIMITER $
|
CREATE OR REPLACE PROCEDURE p1(insert_desc BOOL, dt TEXT)
|
BEGIN
|
DECLARE cmd TEXT DEFAULT 'CREATE TABLE t1 '
|
'(a DATATYPE CHARACTER SET latin1, KEY(a)) '
|
'ENGINE=MyISAM PACK_KEYS=0';
|
SET NAMES latin1;
|
DROP TABLE IF EXISTS t1;
|
EXECUTE IMMEDIATE REPLACE(cmd, 'DATATYPE', dt);
|
SHOW CREATE TABLE t1;
|
FOR c0 IN 0x61..0x7A DO
|
FOR c1 IN 0x61..0x7A DO
|
FOR c2 IN 0x61..0x7A DO
|
IF NOT insert_desc
|
THEN
|
INSERT INTO t1 VALUES (CONCAT('aaaa',CHAR(c0), CHAR(c1), CHAR(c2)));
|
ELSE
|
INSERT INTO t1 VALUES (CONCAT('aaaa',CHAR(219-c0), CHAR(219-c1), CHAR(219-c2)));
|
END IF;
|
END FOR;
|
END FOR;
|
END FOR;
|
SELECT INDEX_LENGTH, DATA_LENGTH, INDEX_LENGTH/DATA_LENGTH
|
FROM INFORMATION_SCHEMA.TABLES
|
WHERE TABLE_SCHEMA='test' AND TABLE_NAME='t1';
|
END;
|
$
|
DELIMITER ;
|
with VARCHAR:
CALL p1(false, 'VARCHAR(8)');
|
+--------------+-------------+--------------------------+
|
| INDEX_LENGTH | DATA_LENGTH | INDEX_LENGTH/DATA_LENGTH |
|
+--------------+-------------+--------------------------+
|
| 277504 | 351520 | 0.7894 |
|
+--------------+-------------+--------------------------+
|
CALL p1(true, 'VARCHAR(8)');
|
+--------------+-------------+--------------------------+
|
| INDEX_LENGTH | DATA_LENGTH | INDEX_LENGTH/DATA_LENGTH |
|
+--------------+-------------+--------------------------+
|
| 536576 | 351520 | 1.5264 |
|
+--------------+-------------+--------------------------+
|
and with CHAR:
CALL p1(false, 'CHAR(8)');
|
+--------------+-------------+--------------------------+
|
| INDEX_LENGTH | DATA_LENGTH | INDEX_LENGTH/DATA_LENGTH |
|
+--------------+-------------+--------------------------+
|
| 276480 | 158184 | 1.7478 |
|
+--------------+-------------+--------------------------+
|
CALL p1(true, 'CHAR(8)');
|
+--------------+-------------+--------------------------+
|
| INDEX_LENGTH | DATA_LENGTH | INDEX_LENGTH/DATA_LENGTH |
|
+--------------+-------------+--------------------------+
|
| 535552 | 158184 | 3.3856 |
|
+--------------+-------------+--------------------------+
|
|