|
I create and populate a table as follows:
SET NAMES utf8;
|
DROP TABLE IF EXISTS t1;
|
CREATE TABLE t1 (a CHAR(1) CHARACTER SET latin1 COLLATE latin1_swedish_ci) ENGINE=ColumnStore;
|
INSERT INTO t1 VALUES (0xE9);
|
SELECT a, HEX(a) FROM t1;
|
+------+--------+
|
| a | HEX(a) |
|
+------+--------+
|
| é | E9 |
|
+------+--------+
|
Looks good so far.
Now I'm checking min and max values for the underlying extent:
SELECT
|
c.table_schema,
|
c.table_name,
|
c.column_name,
|
hex(e.min_value),
|
hex(e.max_value)
|
FROM
|
information_schema.columnstore_extents e,
|
information_schema.columnstore_columns c
|
WHERE c.table_schema='test'
|
AND c.table_name='t1'
|
AND c.column_name='a'
|
AND c.object_id=e.object_id;
|
+--------------+------------+-------------+------------------+------------------+
|
| table_schema | table_name | column_name | hex(e.min_value) | hex(e.max_value) |
|
+--------------+------------+-------------+------------------+------------------+
|
| test | t1 | a | FFFFFFFFFFFFFFE9 | FFFFFFFFFFFFFF80 |
|
+--------------+------------+-------------+------------------+------------------+
|
The min value does not look fine - for some reasons the 0xE9 went to lowest byte, while all higher bytes were filled with 0xFF. I'd expect:
- Either the result in hex(e.min_value) to be just 0xE9 - this is a CHAR(1) column. It's not clear why it gets converted to uint64_t.
- Or at least 0xE9 goes to the highest byte.
The max value does not look fine either - it contains something not correlating with the character 0xE9 at all.
Now let's check how these min/max values affect WHERE:
Non-matching blocks do not get eliminated
Now I run a query with a value which is not in the table:
DO calsettrace(1);
|
SELECT * FROM t1 WHERE a='o';
|
SELECT
|
regexp_substr(calgettrace(1),'(BlocksTouched-[^;]*);') AS touched,
|
regexp_substr(calgettrace(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
|
DO calsettrace(0);
|
Empty set, 1 warning (0.045 sec)
|
+------------------+------------------------------+
|
| touched | eliminated |
|
+------------------+------------------------------+
|
| BlocksTouched-1; | PartitionBlocksEliminated-0; |
|
+------------------+------------------------------+
|
It correctly returned empty set, but the "touched" and "eliminated" values are wrong. It should have eliminated this block.
More examples with CHAR(1)
CHAR(1) CHARACER SET cp1251
SET NAMES utf8;
|
DROP TABLE IF EXISTS t1;
|
CREATE TABLE t1 (a CHAR(1) CHARACTER SET cp1251) ENGINE=ColumnStore;
|
INSERT INTO t1 VALUES ('г' /*_cp1251 x'E3'*/);
|
SELECT a, HEX(a) FROM t1;
|
+------+--------+
|
| a | HEX(a) |
|
+------+--------+
|
| г | E3 |
|
+------+--------+
|
Looks good so far.
SELECT
|
c.table_schema,
|
c.table_name,
|
c.column_name,
|
hex(e.min_value),
|
hex(e.max_value)
|
FROM
|
information_schema.columnstore_extents e,
|
information_schema.columnstore_columns c
|
WHERE c.table_schema='test'
|
AND c.table_name='t1'
|
AND c.column_name='a'
|
AND c.object_id=e.object_id;
|
+--------------+------------+-------------+------------------+------------------+
|
| table_schema | table_name | column_name | hex(e.min_value) | hex(e.max_value) |
|
+--------------+------------+-------------+------------------+------------------+
|
| test | t1 | a | FFFFFFFFFFFFFFE3 | FFFFFFFFFFFFFF80 |
|
+--------------+------------+-------------+------------------+------------------+
|
max_value looks wrong.
DO calsettrace(1);
|
SELECT * FROM t1 WHERE a='д'/*_cp1251 x'E4'*/;
|
SELECT
|
regexp_substr(calgettrace(1),'(BlocksTouched-[^;]*);') AS touched,
|
regexp_substr(calgettrace(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
|
DO calsettrace(0);
|
Empty set, 1 warning (0.046 sec)
|
+------------------+------------------------------+
|
| touched | eliminated |
|
+------------------+------------------------------+
|
| BlocksTouched-1; | PartitionBlocksEliminated-0; |
|
+------------------+------------------------------+
|
Looks wrong. The block should have been eliminated.
CHAR(1) CHARACTER SET utf8mb3 COLLATE utf8mb3_unicode_ci
SET NAMES utf8;
|
DROP TABLE IF EXISTS t1;
|
CREATE TABLE t1 (a CHAR(1) CHARACTER SET utf8mb3 COLLATE utf8mb3_unicode_ci) ENGINE=ColumnStore;
|
INSERT INTO t1 VALUES ('a');
|
SELECT a, HEX(a) FROM t1;
|
+------+--------+
|
| a | HEX(a) |
|
+------+--------+
|
| a | 61 |
|
+------+--------+
|
Looks good so far.
SELECT
|
c.table_schema,
|
c.table_name,
|
c.column_name,
|
hex(e.min_value),
|
hex(e.max_value)
|
FROM
|
information_schema.columnstore_extents e,
|
information_schema.columnstore_columns c
|
WHERE c.table_schema='test'
|
AND c.table_name='t1'
|
AND c.column_name='a'
|
AND c.object_id=e.object_id;
|
+--------------+------------+-------------+------------------+------------------+
|
| table_schema | table_name | column_name | hex(e.min_value) | hex(e.max_value) |
|
+--------------+------------+-------------+------------------+------------------+
|
| test | t1 | a | 61 | FFFFFFFF80000000 |
|
+--------------+------------+-------------+------------------+------------------+
|
max_value looks wrong.
DO calsettrace(1);
|
SELECT * FROM t1 WHERE a='b';
|
SELECT
|
regexp_substr(calgettrace(1),'(BlocksTouched-[^;]*);') AS touched,
|
regexp_substr(calgettrace(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
|
DO calsettrace(0);
|
Empty set, 1 warning (0.040 sec)
|
+------------------+------------------------------+
|
| touched | eliminated |
|
+------------------+------------------------------+
|
| BlocksTouched-1; | PartitionBlocksEliminated-0; |
|
+------------------+------------------------------+
|
Looks wrong. The block should have been eliminated.
|