mirror of
https://github.com/martinus/unordered_dense.git
synced 2026-01-18 17:21:27 +01:00
some hash improvements: generate uint64_t instead of size_t
This commit is contained in:
@@ -6,6 +6,7 @@ Checks: '*,
|
||||
-llvmlibc*,
|
||||
|
||||
-bugprone-easily-swappable-parameters,
|
||||
-cert-err58-cpp,
|
||||
-cppcoreguidelines-avoid-magic-numbers,
|
||||
-cppcoreguidelines-pro-bounds-pointer-arithmetic,
|
||||
-llvm-header-guard,
|
||||
|
||||
@@ -149,7 +149,7 @@ static inline void mum(uint64_t* a, uint64_t* b) {
|
||||
return (static_cast<uint64_t>(p[0]) << 16U) | (static_cast<uint64_t>(p[k >> 1U]) << 8U) | p[k - 1];
|
||||
}
|
||||
|
||||
[[nodiscard]] static inline auto hash(void const* key, size_t len) -> uint64_t {
|
||||
[[maybe_unused]] [[nodiscard]] static inline auto hash(void const* key, size_t len) -> uint64_t {
|
||||
static constexpr auto secret = std::array{UINT64_C(0xa0761d6478bd642f),
|
||||
UINT64_C(0xe7037ed1a0b428db),
|
||||
UINT64_C(0x8ebc6af09c88c6e3),
|
||||
@@ -203,45 +203,45 @@ static inline void mum(uint64_t* a, uint64_t* b) {
|
||||
} // namespace detail::wyhash
|
||||
|
||||
template <typename T, typename Enable = void>
|
||||
struct hash : public std::hash<T> {
|
||||
struct hash {
|
||||
using is_avalanching = void;
|
||||
auto operator()(T const& obj) const noexcept(noexcept(std::declval<std::hash<T>>().operator()(std::declval<T const&>())))
|
||||
-> size_t {
|
||||
return static_cast<size_t>(detail::wyhash::hash(std::hash<T>::operator()(obj)));
|
||||
-> uint64_t {
|
||||
return detail::wyhash::hash(std::hash<T>{}(obj));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename CharT>
|
||||
struct hash<std::basic_string<CharT>> {
|
||||
using is_avalanching = void;
|
||||
auto operator()(std::basic_string<CharT> const& str) const noexcept -> size_t {
|
||||
return static_cast<size_t>(detail::wyhash::hash(str.data(), sizeof(CharT) * str.size()));
|
||||
auto operator()(std::basic_string<CharT> const& str) const noexcept -> uint64_t {
|
||||
return detail::wyhash::hash(str.data(), sizeof(CharT) * str.size());
|
||||
}
|
||||
};
|
||||
|
||||
template <typename CharT>
|
||||
struct hash<std::basic_string_view<CharT>> {
|
||||
using is_avalanching = void;
|
||||
auto operator()(std::basic_string_view<CharT> const& sv) const noexcept -> size_t {
|
||||
return static_cast<size_t>(detail::wyhash::hash(sv.data(), sizeof(CharT) * sv.size()));
|
||||
auto operator()(std::basic_string_view<CharT> const& sv) const noexcept -> uint64_t {
|
||||
return detail::wyhash::hash(sv.data(), sizeof(CharT) * sv.size());
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct hash<T*> {
|
||||
using is_avalanching = void;
|
||||
auto operator()(T* ptr) const noexcept -> size_t {
|
||||
auto operator()(T* ptr) const noexcept -> uint64_t {
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
|
||||
return static_cast<size_t>(detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr)));
|
||||
return detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr));
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct hash<std::unique_ptr<T>> {
|
||||
using is_avalanching = void;
|
||||
auto operator()(std::unique_ptr<T> const& ptr) const noexcept -> size_t {
|
||||
auto operator()(std::unique_ptr<T> const& ptr) const noexcept -> uint64_t {
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
|
||||
return static_cast<size_t>(detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr.get())));
|
||||
return detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr.get()));
|
||||
}
|
||||
};
|
||||
|
||||
@@ -250,27 +250,27 @@ struct hash<std::shared_ptr<T>> {
|
||||
using is_avalanching = void;
|
||||
auto operator()(std::shared_ptr<T> const& ptr) const noexcept -> uint64_t {
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
|
||||
return static_cast<size_t>(detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr.get())));
|
||||
return detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr.get()));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Enum>
|
||||
struct hash<Enum, typename std::enable_if<std::is_enum<Enum>::value>::type> {
|
||||
using is_avalanching = void;
|
||||
auto operator()(Enum e) const noexcept -> size_t {
|
||||
auto operator()(Enum e) const noexcept -> uint64_t {
|
||||
using Underlying = typename std::underlying_type_t<Enum>;
|
||||
return static_cast<size_t>(detail::wyhash::hash(static_cast<Underlying>(e)));
|
||||
return detail::wyhash::hash(static_cast<Underlying>(e));
|
||||
}
|
||||
};
|
||||
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
|
||||
# define ANKERL_UNORDERED_DENSE_HASH_STATICCAST(T) \
|
||||
template <> \
|
||||
struct hash<T> { \
|
||||
using is_avalanching = void; \
|
||||
auto operator()(T const& obj) const noexcept -> size_t { \
|
||||
return static_cast<size_t>(detail::wyhash::hash(static_cast<uint64_t>(obj))); \
|
||||
} \
|
||||
# define ANKERL_UNORDERED_DENSE_HASH_STATICCAST(T) \
|
||||
template <> \
|
||||
struct hash<T> { \
|
||||
using is_avalanching = void; \
|
||||
auto operator()(T const& obj) const noexcept -> uint64_t { \
|
||||
return detail::wyhash::hash(static_cast<uint64_t>(obj)); \
|
||||
} \
|
||||
}
|
||||
|
||||
# if defined(__GNUC__) && !defined(__clang__)
|
||||
@@ -432,16 +432,20 @@ private:
|
||||
return static_cast<dist_and_fingerprint_type>(x - Bucket::DIST_INC);
|
||||
}
|
||||
|
||||
// The goal of mixed_hash is to always produce a high quality 64bit hash.
|
||||
template <typename K>
|
||||
[[nodiscard]] constexpr auto mixed_hash(K const& key) const -> uint64_t {
|
||||
if constexpr (is_detected_v<detect_avalanching, Hash>) {
|
||||
# if SIZE_MAX == UINT32_MAX
|
||||
// On 32bit systems we still want 64bit hashes
|
||||
return m_hash(key) * UINT64_C(0x9ddfea08eb382d69);
|
||||
# else
|
||||
return m_hash(key);
|
||||
# endif
|
||||
// we know that the hash is good because is_avalanching.
|
||||
if constexpr (sizeof(decltype(m_hash(key))) < sizeof(uint64_t)) {
|
||||
// 32bit hash and is_avalanching => multiply with a constant to avalanche bits upwards
|
||||
return m_hash(key) * UINT64_C(0x9ddfea08eb382d69);
|
||||
} else {
|
||||
// 64bit and is_avalanching => only use the hash itself.
|
||||
return m_hash(key);
|
||||
}
|
||||
} else {
|
||||
// not is_avalanching => apply wyhash
|
||||
return wyhash::hash(m_hash(key));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,15 +10,15 @@ namespace checksum {
|
||||
|
||||
// final step from MurmurHash3
|
||||
[[nodiscard]] static inline auto mix(uint64_t k) -> uint64_t {
|
||||
k ^= k >> 33;
|
||||
k ^= k >> 33U;
|
||||
k *= 0xff51afd7ed558ccdULL;
|
||||
k ^= k >> 33;
|
||||
k ^= k >> 33U;
|
||||
k *= 0xc4ceb9fe1a85ec53ULL;
|
||||
k ^= k >> 33;
|
||||
k ^= k >> 33U;
|
||||
return k;
|
||||
}
|
||||
|
||||
[[nodiscard]] static inline auto mix(std::string_view data) -> uint64_t {
|
||||
[[maybe_unused]] [[nodiscard]] static inline auto mix(std::string_view data) -> uint64_t {
|
||||
constexpr uint64_t FNV_offset_basis = UINT64_C(14695981039346656037);
|
||||
constexpr uint64_t FNV_prime = UINT64_C(1099511628211);
|
||||
|
||||
@@ -30,12 +30,12 @@ namespace checksum {
|
||||
return val;
|
||||
}
|
||||
|
||||
[[nodiscard]] static inline auto mix(Counter::Obj const& cdv) -> uint64_t {
|
||||
[[maybe_unused]] [[nodiscard]] static inline auto mix(Counter::Obj const& cdv) -> uint64_t {
|
||||
return mix(cdv.get());
|
||||
}
|
||||
|
||||
// from boost::hash_combine, with additional fmix64 of value
|
||||
[[nodiscard]] static inline auto combine(uint64_t seed, uint64_t value) -> uint64_t {
|
||||
[[maybe_unused]] [[nodiscard]] static inline auto combine(uint64_t seed, uint64_t value) -> uint64_t {
|
||||
return seed ^ (value + 0x9e3779b9 + (seed << 6U) + (seed >> 2U));
|
||||
}
|
||||
|
||||
|
||||
@@ -60,6 +60,7 @@ test_sources = [
|
||||
'unit/reserve_and_assign.cpp',
|
||||
'unit/reserve.cpp',
|
||||
'unit/set.cpp',
|
||||
'unit/std_hash.cpp',
|
||||
'unit/swap.cpp',
|
||||
'unit/transparent.cpp',
|
||||
'unit/try_emplace.cpp',
|
||||
|
||||
@@ -19,12 +19,12 @@ TEST_CASE("iterators_erase") {
|
||||
map[Counter::Obj(i * 101, counts)] = Counter::Obj(i * 101, counts);
|
||||
}
|
||||
|
||||
auto it = map.find(Counter::Obj(20 * 101, counts));
|
||||
auto it = map.find(Counter::Obj(size_t{20} * 101, counts));
|
||||
REQUIRE(map.size() == 100);
|
||||
REQUIRE(map.end() != map.find(Counter::Obj(20 * 101, counts)));
|
||||
REQUIRE(map.end() != map.find(Counter::Obj(size_t{20} * 101, counts)));
|
||||
it = map.erase(it);
|
||||
REQUIRE(map.size() == 99);
|
||||
REQUIRE(map.end() == map.find(Counter::Obj(20 * 101, counts)));
|
||||
REQUIRE(map.end() == map.find(Counter::Obj(size_t{20} * 101, counts)));
|
||||
|
||||
it = map.begin();
|
||||
size_t currentSize = map.size();
|
||||
|
||||
27
test/unit/std_hash.cpp
Normal file
27
test/unit/std_hash.cpp
Normal file
@@ -0,0 +1,27 @@
|
||||
#include <ankerl/unordered_dense.h>
|
||||
|
||||
#include <doctest.h>
|
||||
|
||||
#include <cstddef> // for size_t
|
||||
#include <string> // for allocator, string, operator==
|
||||
#include <utility> // for pair, move
|
||||
#include <vector> // for vector
|
||||
|
||||
struct Foo {
|
||||
uint64_t i;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct std::hash<Foo> {
|
||||
auto operator()(Foo const& foo) const noexcept {
|
||||
return static_cast<size_t>(foo.i + 1);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_CASE("std_hash") {
|
||||
auto f = Foo{12345};
|
||||
REQUIRE(std::hash<Foo>{}(f) == 12346U);
|
||||
// unordered_dense::hash blows that up to 64bit!
|
||||
REQUIRE(ankerl::unordered_dense::hash<Foo>{}(f) == UINT64_C(0x3F645BE4CE24110C));
|
||||
REQUIRE(ankerl::unordered_dense::hash<uint64_t>{}(12346U) == UINT64_C(0x3F645BE4CE24110C));
|
||||
}
|
||||
@@ -23,17 +23,17 @@ public:
|
||||
using hash_type = ankerl::unordered_dense::hash<std::string_view>;
|
||||
using is_transparent = void;
|
||||
|
||||
[[nodiscard]] auto operator()(const char* str) const -> size_t {
|
||||
[[nodiscard]] auto operator()(const char* str) const -> uint64_t {
|
||||
++m_num_charstar;
|
||||
return hash_type{}(str);
|
||||
}
|
||||
|
||||
[[nodiscard]] auto operator()(std::string_view str) const -> size_t {
|
||||
[[nodiscard]] auto operator()(std::string_view str) const -> uint64_t {
|
||||
++m_num_stringview;
|
||||
return hash_type{}(str);
|
||||
}
|
||||
|
||||
[[nodiscard]] auto operator()(std::string const& str) const -> size_t {
|
||||
[[nodiscard]] auto operator()(std::string const& str) const -> uint64_t {
|
||||
++m_num_string;
|
||||
return hash_type{}(str);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user