some hash improvements: generate uint64_t instead of size_t

This commit is contained in:
Martin Leitner-Ankerl
2022-08-20 15:33:33 +02:00
parent 277696b8ed
commit d6dc7759b5
7 changed files with 73 additions and 40 deletions

View File

@@ -6,6 +6,7 @@ Checks: '*,
-llvmlibc*,
-bugprone-easily-swappable-parameters,
-cert-err58-cpp,
-cppcoreguidelines-avoid-magic-numbers,
-cppcoreguidelines-pro-bounds-pointer-arithmetic,
-llvm-header-guard,

View File

@@ -149,7 +149,7 @@ static inline void mum(uint64_t* a, uint64_t* b) {
return (static_cast<uint64_t>(p[0]) << 16U) | (static_cast<uint64_t>(p[k >> 1U]) << 8U) | p[k - 1];
}
[[nodiscard]] static inline auto hash(void const* key, size_t len) -> uint64_t {
[[maybe_unused]] [[nodiscard]] static inline auto hash(void const* key, size_t len) -> uint64_t {
static constexpr auto secret = std::array{UINT64_C(0xa0761d6478bd642f),
UINT64_C(0xe7037ed1a0b428db),
UINT64_C(0x8ebc6af09c88c6e3),
@@ -203,45 +203,45 @@ static inline void mum(uint64_t* a, uint64_t* b) {
} // namespace detail::wyhash
template <typename T, typename Enable = void>
struct hash : public std::hash<T> {
struct hash {
using is_avalanching = void;
auto operator()(T const& obj) const noexcept(noexcept(std::declval<std::hash<T>>().operator()(std::declval<T const&>())))
-> size_t {
return static_cast<size_t>(detail::wyhash::hash(std::hash<T>::operator()(obj)));
-> uint64_t {
return detail::wyhash::hash(std::hash<T>{}(obj));
}
};
template <typename CharT>
struct hash<std::basic_string<CharT>> {
using is_avalanching = void;
auto operator()(std::basic_string<CharT> const& str) const noexcept -> size_t {
return static_cast<size_t>(detail::wyhash::hash(str.data(), sizeof(CharT) * str.size()));
auto operator()(std::basic_string<CharT> const& str) const noexcept -> uint64_t {
return detail::wyhash::hash(str.data(), sizeof(CharT) * str.size());
}
};
template <typename CharT>
struct hash<std::basic_string_view<CharT>> {
using is_avalanching = void;
auto operator()(std::basic_string_view<CharT> const& sv) const noexcept -> size_t {
return static_cast<size_t>(detail::wyhash::hash(sv.data(), sizeof(CharT) * sv.size()));
auto operator()(std::basic_string_view<CharT> const& sv) const noexcept -> uint64_t {
return detail::wyhash::hash(sv.data(), sizeof(CharT) * sv.size());
}
};
template <class T>
struct hash<T*> {
using is_avalanching = void;
auto operator()(T* ptr) const noexcept -> size_t {
auto operator()(T* ptr) const noexcept -> uint64_t {
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
return static_cast<size_t>(detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr)));
return detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr));
}
};
template <class T>
struct hash<std::unique_ptr<T>> {
using is_avalanching = void;
auto operator()(std::unique_ptr<T> const& ptr) const noexcept -> size_t {
auto operator()(std::unique_ptr<T> const& ptr) const noexcept -> uint64_t {
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
return static_cast<size_t>(detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr.get())));
return detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr.get()));
}
};
@@ -250,27 +250,27 @@ struct hash<std::shared_ptr<T>> {
using is_avalanching = void;
auto operator()(std::shared_ptr<T> const& ptr) const noexcept -> uint64_t {
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
return static_cast<size_t>(detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr.get())));
return detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr.get()));
}
};
template <typename Enum>
struct hash<Enum, typename std::enable_if<std::is_enum<Enum>::value>::type> {
using is_avalanching = void;
auto operator()(Enum e) const noexcept -> size_t {
auto operator()(Enum e) const noexcept -> uint64_t {
using Underlying = typename std::underlying_type_t<Enum>;
return static_cast<size_t>(detail::wyhash::hash(static_cast<Underlying>(e)));
return detail::wyhash::hash(static_cast<Underlying>(e));
}
};
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
# define ANKERL_UNORDERED_DENSE_HASH_STATICCAST(T) \
template <> \
struct hash<T> { \
using is_avalanching = void; \
auto operator()(T const& obj) const noexcept -> size_t { \
return static_cast<size_t>(detail::wyhash::hash(static_cast<uint64_t>(obj))); \
} \
# define ANKERL_UNORDERED_DENSE_HASH_STATICCAST(T) \
template <> \
struct hash<T> { \
using is_avalanching = void; \
auto operator()(T const& obj) const noexcept -> uint64_t { \
return detail::wyhash::hash(static_cast<uint64_t>(obj)); \
} \
}
# if defined(__GNUC__) && !defined(__clang__)
@@ -432,16 +432,20 @@ private:
return static_cast<dist_and_fingerprint_type>(x - Bucket::DIST_INC);
}
// The goal of mixed_hash is to always produce a high quality 64bit hash.
template <typename K>
[[nodiscard]] constexpr auto mixed_hash(K const& key) const -> uint64_t {
if constexpr (is_detected_v<detect_avalanching, Hash>) {
# if SIZE_MAX == UINT32_MAX
// On 32bit systems we still want 64bit hashes
return m_hash(key) * UINT64_C(0x9ddfea08eb382d69);
# else
return m_hash(key);
# endif
// we know that the hash is good because is_avalanching.
if constexpr (sizeof(decltype(m_hash(key))) < sizeof(uint64_t)) {
// 32bit hash and is_avalanching => multiply with a constant to avalanche bits upwards
return m_hash(key) * UINT64_C(0x9ddfea08eb382d69);
} else {
// 64bit and is_avalanching => only use the hash itself.
return m_hash(key);
}
} else {
// not is_avalanching => apply wyhash
return wyhash::hash(m_hash(key));
}
}

View File

@@ -10,15 +10,15 @@ namespace checksum {
// final step from MurmurHash3
[[nodiscard]] static inline auto mix(uint64_t k) -> uint64_t {
k ^= k >> 33;
k ^= k >> 33U;
k *= 0xff51afd7ed558ccdULL;
k ^= k >> 33;
k ^= k >> 33U;
k *= 0xc4ceb9fe1a85ec53ULL;
k ^= k >> 33;
k ^= k >> 33U;
return k;
}
[[nodiscard]] static inline auto mix(std::string_view data) -> uint64_t {
[[maybe_unused]] [[nodiscard]] static inline auto mix(std::string_view data) -> uint64_t {
constexpr uint64_t FNV_offset_basis = UINT64_C(14695981039346656037);
constexpr uint64_t FNV_prime = UINT64_C(1099511628211);
@@ -30,12 +30,12 @@ namespace checksum {
return val;
}
[[nodiscard]] static inline auto mix(Counter::Obj const& cdv) -> uint64_t {
[[maybe_unused]] [[nodiscard]] static inline auto mix(Counter::Obj const& cdv) -> uint64_t {
return mix(cdv.get());
}
// from boost::hash_combine, with additional fmix64 of value
[[nodiscard]] static inline auto combine(uint64_t seed, uint64_t value) -> uint64_t {
[[maybe_unused]] [[nodiscard]] static inline auto combine(uint64_t seed, uint64_t value) -> uint64_t {
return seed ^ (value + 0x9e3779b9 + (seed << 6U) + (seed >> 2U));
}

View File

@@ -60,6 +60,7 @@ test_sources = [
'unit/reserve_and_assign.cpp',
'unit/reserve.cpp',
'unit/set.cpp',
'unit/std_hash.cpp',
'unit/swap.cpp',
'unit/transparent.cpp',
'unit/try_emplace.cpp',

View File

@@ -19,12 +19,12 @@ TEST_CASE("iterators_erase") {
map[Counter::Obj(i * 101, counts)] = Counter::Obj(i * 101, counts);
}
auto it = map.find(Counter::Obj(20 * 101, counts));
auto it = map.find(Counter::Obj(size_t{20} * 101, counts));
REQUIRE(map.size() == 100);
REQUIRE(map.end() != map.find(Counter::Obj(20 * 101, counts)));
REQUIRE(map.end() != map.find(Counter::Obj(size_t{20} * 101, counts)));
it = map.erase(it);
REQUIRE(map.size() == 99);
REQUIRE(map.end() == map.find(Counter::Obj(20 * 101, counts)));
REQUIRE(map.end() == map.find(Counter::Obj(size_t{20} * 101, counts)));
it = map.begin();
size_t currentSize = map.size();

27
test/unit/std_hash.cpp Normal file
View File

@@ -0,0 +1,27 @@
#include <ankerl/unordered_dense.h>
#include <doctest.h>
#include <cstddef> // for size_t
#include <string> // for allocator, string, operator==
#include <utility> // for pair, move
#include <vector> // for vector
struct Foo {
uint64_t i;
};
template <>
struct std::hash<Foo> {
auto operator()(Foo const& foo) const noexcept {
return static_cast<size_t>(foo.i + 1);
}
};
TEST_CASE("std_hash") {
auto f = Foo{12345};
REQUIRE(std::hash<Foo>{}(f) == 12346U);
// unordered_dense::hash blows that up to 64bit!
REQUIRE(ankerl::unordered_dense::hash<Foo>{}(f) == UINT64_C(0x3F645BE4CE24110C));
REQUIRE(ankerl::unordered_dense::hash<uint64_t>{}(12346U) == UINT64_C(0x3F645BE4CE24110C));
}

View File

@@ -23,17 +23,17 @@ public:
using hash_type = ankerl::unordered_dense::hash<std::string_view>;
using is_transparent = void;
[[nodiscard]] auto operator()(const char* str) const -> size_t {
[[nodiscard]] auto operator()(const char* str) const -> uint64_t {
++m_num_charstar;
return hash_type{}(str);
}
[[nodiscard]] auto operator()(std::string_view str) const -> size_t {
[[nodiscard]] auto operator()(std::string_view str) const -> uint64_t {
++m_num_stringview;
return hash_type{}(str);
}
[[nodiscard]] auto operator()(std::string const& str) const -> size_t {
[[nodiscard]] auto operator()(std::string const& str) const -> uint64_t {
++m_num_string;
return hash_type{}(str);
}