Details
-
Bug
-
Status: Open (View Workflow)
-
Minor
-
Resolution: Unresolved
-
6.1.1
-
None
Description
Consider the scenario.
create table cs1(t text) engine=columnstore;
|
create table cs2(t text) engine=columnstore;
|
populate the tables with the string from the attached file
cpimport -s '|' test cs1 text_column_length_65280.txt
|
cpimport -s '|' test cs2 text_column_length_65280.txt
|
Now the query returns 0 records:
select cs1.t from cs1,cs2 where cs1.t=cs2.t;
|
The problem is that TypelessDataEncoder/Decoder fails to encode length for key columns values longer or equal 65280 bytes.
Here is the patch for the problem.
diff --git a/utils/joiner/tuplejoiner.cpp b/utils/joiner/tuplejoiner.cpp
|
index 983cb4787..71a1f0bac 100644
|
--- a/utils/joiner/tuplejoiner.cpp
|
+++ b/utils/joiner/tuplejoiner.cpp
|
@@ -1257,16 +1257,23 @@ public: |
{ }
|
bool store(uint8_t* to, uint32_t& off, uint32_t keylen) const |
{
|
- if (mLength >= 65280) // We encode length into two bytes below |
+ if (mLength > 65534) // We encode length into two bytes below |
{
|
- throw runtime_error("Cannot join strings greater than 65279 B"); |
+ throw runtime_error("Cannot join strings greater than 64KB - 2"); |
}
|
|
if (off + mLength + 2 > keylen) |
return true; |
-
|
- to[off++]= mLength / 0xFF;
|
- to[off++]= mLength % 0xFF;
|
+ if (UNLIKELY(mLength > 65025)) |
+ {
|
+ to[off++] = 0xFF;
|
+ to[off++]= mLength & 0x00FF;
|
+ }
|
+ else |
+ {
|
+ to[off++]= mLength / 0xFF;
|
+ to[off++]= mLength % 0xFF;
|
+ }
|
/*
|
QQ: perhaps now when we put length,
|
we don't need to stop at '\0' bytes any more. |
@@ -1310,7 +1317,13 @@ public: |
uint32_t scanStringLength()
|
{
|
checkAvailableData(2);
|
- uint32_t res = ((uint32_t) mPtr[0]) * 255 + mPtr[1];
|
+ uint32_t res = 0;
|
+ if (UNLIKELY(mPtr[0] == 0xFF)) |
+ {
|
+ res = (uint32_t)0xFF00 + mPtr[1];
|
+ }
|
+ else |
+ res = ((uint32_t) mPtr[0]) * 255 + mPtr[1];
|
mPtr += 2;
|
return res; |
}
|