diff --git a/utils/common/collation.h b/utils/common/collation.h
|
index 8d4306213..1152b8baf 100644
|
--- a/utils/common/collation.h
|
+++ b/utils/common/collation.h
|
@@ -109,11 +109,14 @@ namespace datatypes
|
|
class MariaDBHasher
|
{
|
+ static const ulong mPart1DefValue = 1;
|
+ static const ulong mPart2DefValue = 4;
|
+
|
ulong mPart1;
|
ulong mPart2;
|
public:
|
MariaDBHasher()
|
- :mPart1(1), mPart2(4)
|
+ :mPart1(mPart1DefValue), mPart2(mPart2DefValue)
|
{ }
|
MariaDBHasher & add(CHARSET_INFO * cs, const char *str, size_t length)
|
{
|
@@ -124,9 +127,13 @@ public:
|
{
|
return add(cs, str.str(), str.length());
|
}
|
- uint32_t finalize() const
|
+ uint64_t finalize() const
|
+ {
|
+ return mPart1;
|
+ }
|
+ bool wasUsed() const
|
{
|
- return (uint32_t) mPart1;
|
+ return mPart1 != mPart1DefValue || mPart2 != mPart2DefValue;
|
}
|
};
|
|
diff --git a/utils/common/hasher.h b/utils/common/hasher.h
|
index 1633986a2..3f761b6bb 100644
|
--- a/utils/common/hasher.h
|
+++ b/utils/common/hasher.h
|
@@ -402,7 +402,6 @@ private:
|
uint32_t fCmpLen;
|
};
|
|
-
|
}
|
|
#endif // UTILS_HASHER_H
|
diff --git a/utils/common/hashfamily.h b/utils/common/hashfamily.h
|
new file mode 100644
|
index 000000000..ef2f3ec25
|
--- /dev/null
|
+++ b/utils/common/hashfamily.h
|
@@ -0,0 +1,60 @@
|
+/* Copyright (C) 2021 Mariadb Corporation.
|
+
|
+ This program is free software; you can redistribute it and/or
|
+ modify it under the terms of the GNU General Public License
|
+ as published by the Free Software Foundation; version 2 of
|
+ the License.
|
+
|
+ This program is distributed in the hope that it will be useful,
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
+ GNU General Public License for more details.
|
+
|
+ You should have received a copy of the GNU General Public License
|
+ along with this program; if not, write to the Free Software
|
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
+ MA 02110-1301, USA. */
|
+
|
+#ifndef UTILS_HASHFAMILY_H
|
+#define UTILS_HASHFAMILY_H
|
+
|
+#include "hasher.h"
|
+#include "collation.h"
|
+
|
+namespace utils
|
+{
|
+
|
+class HashFamily
|
+{
|
+ public:
|
+ HashFamily(const utils::Hasher_r& h,
|
+ const uint64_t intermediateHash,
|
+ const uint64_t len,
|
+ const datatypes::MariaDBHasher& hM) : mHasher(h),
|
+ mMariaDBHasher(hM),
|
+ mHasher_rHash(intermediateHash),
|
+ mHasher_rLen(len)
|
+ { }
|
+
|
+ // Algorithm, seed and factor are taken from this discussion
|
+ // https://stackoverflow.com/questions/1646807/quick-and-simple-hash-code-combinations
|
+ inline uint64_t finalize() const
|
+ {
|
+ // return (mMariaDBHasher.wasUsed()) ? (seed * factor + mHasher.finalize(mHasher_rHash, mHasher_rLen)) * factor + mMariaDBHasher.finalize()
|
+ // : mHasher.finalize(mHasher_rHash, mHasher_rLen);
|
+ return (seed * factor + mHasher.finalize(mHasher_rHash, mHasher_rLen)) * factor + mMariaDBHasher.finalize();
|
+
|
+ }
|
+ private:
|
+ constexpr static uint64_t seed = 1009ULL;
|
+ constexpr static uint64_t factor = 9176ULL;
|
+
|
+ const utils::Hasher_r& mHasher;
|
+ const datatypes::MariaDBHasher& mMariaDBHasher;
|
+ const uint64_t mHasher_rHash;
|
+ const uint32_t mHasher_rLen;
|
+};
|
+
|
+}
|
+#endif
|
+// vim:ts=2 sw=2:
|
diff --git a/utils/rowgroup/rowgroup.h b/utils/rowgroup/rowgroup.h
|
index ad4e78067..463c50b73 100644
|
--- a/utils/rowgroup/rowgroup.h
|
+++ b/utils/rowgroup/rowgroup.h
|
@@ -60,6 +60,7 @@
|
#include "../winport/winport.h"
|
|
#include "collation.h"
|
+#include "common/hashfamily.h"
|
|
|
// Workaround for my_global.h #define of isnan(X) causing a std::std namespace
|
@@ -558,7 +559,10 @@ public:
|
// a fcn to check the type defs seperately doesn't exist yet. No normalization.
|
inline uint64_t hash(uint32_t lastCol) const; // generates a hash for cols [0-lastCol]
|
inline uint64_t hash() const; // generates a hash for all cols
|
- inline void colUpdateMariaDBHasher(datatypes::MariaDBHasher &hasher, uint32_t col) const;
|
+ inline void colUpdateMariaDBHasher(datatypes::MariaDBHasher &hM,
|
+ const utils::Hasher_r& h,
|
+ const uint32_t col,
|
+ uint32_t& intermediateHash) const;
|
inline void colUpdateMariaDBHasherTypeless(datatypes::MariaDBHasher &hasher, uint32_t col) const;
|
inline uint64_t hashTypeless(const std::vector<uint32_t>& keyCols) const
|
{
|
@@ -930,7 +934,10 @@ inline utils::ConstString Row::getConstString(uint32_t colIndex) const
|
}
|
|
|
-inline void Row::colUpdateMariaDBHasher(datatypes::MariaDBHasher &h, uint32_t col) const
|
+inline void Row::colUpdateMariaDBHasher(datatypes::MariaDBHasher &hM,
|
+ const utils::Hasher_r& h,
|
+ const uint32_t col,
|
+ uint32_t& intermediateHash) const
|
{
|
switch (getColType(col))
|
{
|
@@ -940,12 +947,14 @@ inline void Row::colUpdateMariaDBHasher(datatypes::MariaDBHasher &h, uint32_t co
|
case execplan::CalpontSystemCatalog::TEXT:
|
{
|
CHARSET_INFO *cs = getCharset(col);
|
- h.add(cs, getConstString(col));
|
+ hM.add(cs, getConstString(col));
|
break;
|
}
|
default:
|
- h.add(&my_charset_bin, getShortConstString(col));
|
+ {
|
+ intermediateHash = h((const char*) &data[offsets[col]], colWidths[col], intermediateHash);
|
break;
|
+ }
|
}
|
}
|
|
@@ -1417,17 +1426,21 @@ inline uint64_t Row::hash() const
|
|
inline uint64_t Row::hash(uint32_t lastCol) const
|
{
|
- datatypes::MariaDBHasher h;
|
-
|
+ // Use two hash classes. MariaDBHasher for text-based
|
+ // collation-aware data types and Hasher_r for all other data types.
|
+ // We deliver a hash that is a combination of both hashers' results.
|
+ utils::Hasher_r h;
|
+ datatypes::MariaDBHasher hM;
|
+ uint32_t intermediateHash = 0;
|
// Sometimes we ask this to hash 0 bytes, and it comes through looking like
|
// lastCol = -1. Return 0.
|
if (lastCol >= columnCount)
|
return 0;
|
|
for (uint32_t i = 0; i <= lastCol; i++)
|
- colUpdateMariaDBHasher(h, i);
|
+ colUpdateMariaDBHasher(hM, h, i, intermediateHash);
|
|
- return h.finalize();
|
+ return utils::HashFamily(h, intermediateHash, lastCol << 2, hM).finalize();
|
}
|
|
inline bool Row::equals(const Row& r2) const
|
root@ip-172-31-3-254:/data/mdb-server/storage/columnstore/columnstore# git --no-pager diff HEAD~
|
diff --git a/utils/common/collation.h b/utils/common/collation.h
|
index 8d4306213..1152b8baf 100644
|
--- a/utils/common/collation.h
|
+++ b/utils/common/collation.h
|
@@ -109,11 +109,14 @@ namespace datatypes
|
|
class MariaDBHasher
|
{
|
+ static const ulong mPart1DefValue = 1;
|
+ static const ulong mPart2DefValue = 4;
|
+
|
ulong mPart1;
|
ulong mPart2;
|
public:
|
MariaDBHasher()
|
- :mPart1(1), mPart2(4)
|
+ :mPart1(mPart1DefValue), mPart2(mPart2DefValue)
|
{ }
|
MariaDBHasher & add(CHARSET_INFO * cs, const char *str, size_t length)
|
{
|
@@ -124,9 +127,13 @@ public:
|
{
|
return add(cs, str.str(), str.length());
|
}
|
- uint32_t finalize() const
|
+ uint64_t finalize() const
|
+ {
|
+ return mPart1;
|
+ }
|
+ bool wasUsed() const
|
{
|
- return (uint32_t) mPart1;
|
+ return mPart1 != mPart1DefValue || mPart2 != mPart2DefValue;
|
}
|
};
|
|
diff --git a/utils/common/hasher.h b/utils/common/hasher.h
|
index 1633986a2..3f761b6bb 100644
|
--- a/utils/common/hasher.h
|
+++ b/utils/common/hasher.h
|
@@ -402,7 +402,6 @@ private:
|
uint32_t fCmpLen;
|
};
|
|
-
|
}
|
|
#endif // UTILS_HASHER_H
|
diff --git a/utils/common/hashfamily.h b/utils/common/hashfamily.h
|
new file mode 100644
|
index 000000000..ef2f3ec25
|
--- /dev/null
|
+++ b/utils/common/hashfamily.h
|
@@ -0,0 +1,60 @@
|
+/* Copyright (C) 2021 Mariadb Corporation.
|
+
|
+ This program is free software; you can redistribute it and/or
|
+ modify it under the terms of the GNU General Public License
|
+ as published by the Free Software Foundation; version 2 of
|
+ the License.
|
+
|
+ This program is distributed in the hope that it will be useful,
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
+ GNU General Public License for more details.
|
+
|
+ You should have received a copy of the GNU General Public License
|
+ along with this program; if not, write to the Free Software
|
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
+ MA 02110-1301, USA. */
|
+
|
+#ifndef UTILS_HASHFAMILY_H
|
+#define UTILS_HASHFAMILY_H
|
+
|
+#include "hasher.h"
|
+#include "collation.h"
|
+
|
+namespace utils
|
+{
|
+
|
+class HashFamily
|
+{
|
+ public:
|
+ HashFamily(const utils::Hasher_r& h,
|
+ const uint64_t intermediateHash,
|
+ const uint64_t len,
|
+ const datatypes::MariaDBHasher& hM) : mHasher(h),
|
+ mMariaDBHasher(hM),
|
+ mHasher_rHash(intermediateHash),
|
+ mHasher_rLen(len)
|
+ { }
|
+
|
+ // Algorithm, seed and factor are taken from this discussion
|
+ // https://stackoverflow.com/questions/1646807/quick-and-simple-hash-code-combinations
|
+ inline uint64_t finalize() const
|
+ {
|
+ // return (mMariaDBHasher.wasUsed()) ? (seed * factor + mHasher.finalize(mHasher_rHash, mHasher_rLen)) * factor + mMariaDBHasher.finalize()
|
+ // : mHasher.finalize(mHasher_rHash, mHasher_rLen);
|
+ return (seed * factor + mHasher.finalize(mHasher_rHash, mHasher_rLen)) * factor + mMariaDBHasher.finalize();
|
+
|
+ }
|
+ private:
|
+ constexpr static uint64_t seed = 1009ULL;
|
+ constexpr static uint64_t factor = 9176ULL;
|
+
|
+ const utils::Hasher_r& mHasher;
|
+ const datatypes::MariaDBHasher& mMariaDBHasher;
|
+ const uint64_t mHasher_rHash;
|
+ const uint32_t mHasher_rLen;
|
+};
|
+
|
+}
|
+#endif
|
+// vim:ts=2 sw=2:
|
diff --git a/utils/rowgroup/rowgroup.h b/utils/rowgroup/rowgroup.h
|
index ad4e78067..463c50b73 100644
|
--- a/utils/rowgroup/rowgroup.h
|
+++ b/utils/rowgroup/rowgroup.h
|
@@ -60,6 +60,7 @@
|
#include "../winport/winport.h"
|
|
#include "collation.h"
|
+#include "common/hashfamily.h"
|
|
|
// Workaround for my_global.h #define of isnan(X) causing a std::std namespace
|
@@ -558,7 +559,10 @@ public:
|
// a fcn to check the type defs seperately doesn't exist yet. No normalization.
|
inline uint64_t hash(uint32_t lastCol) const; // generates a hash for cols [0-lastCol]
|
inline uint64_t hash() const; // generates a hash for all cols
|
- inline void colUpdateMariaDBHasher(datatypes::MariaDBHasher &hasher, uint32_t col) const;
|
+ inline void colUpdateMariaDBHasher(datatypes::MariaDBHasher &hM,
|
+ const utils::Hasher_r& h,
|
+ const uint32_t col,
|
+ uint32_t& intermediateHash) const;
|
inline void colUpdateMariaDBHasherTypeless(datatypes::MariaDBHasher &hasher, uint32_t col) const;
|
inline uint64_t hashTypeless(const std::vector<uint32_t>& keyCols) const
|
{
|
@@ -930,7 +934,10 @@ inline utils::ConstString Row::getConstString(uint32_t colIndex) const
|
}
|
|
|
-inline void Row::colUpdateMariaDBHasher(datatypes::MariaDBHasher &h, uint32_t col) const
|
+inline void Row::colUpdateMariaDBHasher(datatypes::MariaDBHasher &hM,
|
+ const utils::Hasher_r& h,
|
+ const uint32_t col,
|
+ uint32_t& intermediateHash) const
|
{
|
switch (getColType(col))
|
{
|
@@ -940,12 +947,14 @@ inline void Row::colUpdateMariaDBHasher(datatypes::MariaDBHasher &h, uint32_t co
|
case execplan::CalpontSystemCatalog::TEXT:
|
{
|
CHARSET_INFO *cs = getCharset(col);
|
- h.add(cs, getConstString(col));
|
+ hM.add(cs, getConstString(col));
|
break;
|
}
|
default:
|
- h.add(&my_charset_bin, getShortConstString(col));
|
+ {
|
+ intermediateHash = h((const char*) &data[offsets[col]], colWidths[col], intermediateHash);
|
break;
|
+ }
|
}
|
}
|
|
@@ -1417,17 +1426,21 @@ inline uint64_t Row::hash() const
|
|
inline uint64_t Row::hash(uint32_t lastCol) const
|
{
|
- datatypes::MariaDBHasher h;
|
-
|
+ // Use two hash classes. MariaDBHasher for text-based
|
+ // collation-aware data types and Hasher_r for all other data types.
|
+ // We deliver a hash that is a combination of both hashers' results.
|
+ utils::Hasher_r h;
|
+ datatypes::MariaDBHasher hM;
|
+ uint32_t intermediateHash = 0;
|
// Sometimes we ask this to hash 0 bytes, and it comes through looking like
|
// lastCol = -1. Return 0.
|
if (lastCol >= columnCount)
|
return 0;
|
|
for (uint32_t i = 0; i <= lastCol; i++)
|
- colUpdateMariaDBHasher(h, i);
|
+ colUpdateMariaDBHasher(hM, h, i, intermediateHash);
|
|
- return h.finalize();
|
+ return utils::HashFamily(h, intermediateHash, lastCol << 2, hM).finalize();
|
}
|
|
inline bool Row::equals(const Row& r2) const
|