Commit 7339447a by Abseil Team Committed by Copybara-Service

Optimize raw_hash_set destructor.

There are three optimizations here:
1. Early exit in case all slots were destroyed. especially useful for empty tables.
2. MatchFull is used in order to iterate over all full slots.
3. Portable group is used for `MatchFull` in the case of small table.

PiperOrigin-RevId: 603434899
Change-Id: I40bc90d17331d579cfbb1b4e0693f0913e5c38e4
parent a3ee6ce2
...@@ -822,21 +822,21 @@ struct GroupPortableImpl { ...@@ -822,21 +822,21 @@ struct GroupPortableImpl {
#ifdef ABSL_INTERNAL_HAVE_SSE2 #ifdef ABSL_INTERNAL_HAVE_SSE2
using Group = GroupSse2Impl; using Group = GroupSse2Impl;
using GroupEmptyOrDeleted = GroupSse2Impl; using GroupFullEmptyOrDeleted = GroupSse2Impl;
#elif defined(ABSL_INTERNAL_HAVE_ARM_NEON) && defined(ABSL_IS_LITTLE_ENDIAN) #elif defined(ABSL_INTERNAL_HAVE_ARM_NEON) && defined(ABSL_IS_LITTLE_ENDIAN)
using Group = GroupAArch64Impl; using Group = GroupAArch64Impl;
// For Aarch64, we use the portable implementation for counting and masking // For Aarch64, we use the portable implementation for counting and masking
// empty or deleted group elements. This is to avoid the latency of moving // full, empty or deleted group elements. This is to avoid the latency of moving
// between data GPRs and Neon registers when it does not provide a benefit. // between data GPRs and Neon registers when it does not provide a benefit.
// Using Neon is profitable when we call Match(), but is not when we don't, // Using Neon is profitable when we call Match(), but is not when we don't,
// which is the case when we do *EmptyOrDeleted operations. It is difficult to // which is the case when we do *EmptyOrDeleted and MaskFull operations.
// make a similar approach beneficial on other architectures such as x86 since // It is difficult to make a similar approach beneficial on other architectures
// they have much lower GPR <-> vector register transfer latency and 16-wide // such as x86 since they have much lower GPR <-> vector register transfer
// Groups. // latency and 16-wide Groups.
using GroupEmptyOrDeleted = GroupPortableImpl; using GroupFullEmptyOrDeleted = GroupPortableImpl;
#else #else
using Group = GroupPortableImpl; using Group = GroupPortableImpl;
using GroupEmptyOrDeleted = GroupPortableImpl; using GroupFullEmptyOrDeleted = GroupPortableImpl;
#endif #endif
// When there is an insertion with no reserved growth, we rehash with // When there is an insertion with no reserved growth, we rehash with
...@@ -1463,7 +1463,7 @@ inline FindInfo find_first_non_full(const CommonFields& common, size_t hash) { ...@@ -1463,7 +1463,7 @@ inline FindInfo find_first_non_full(const CommonFields& common, size_t hash) {
auto seq = probe(common, hash); auto seq = probe(common, hash);
const ctrl_t* ctrl = common.control(); const ctrl_t* ctrl = common.control();
while (true) { while (true) {
GroupEmptyOrDeleted g{ctrl + seq.offset()}; GroupFullEmptyOrDeleted g{ctrl + seq.offset()};
auto mask = g.MaskEmptyOrDeleted(); auto mask = g.MaskEmptyOrDeleted();
if (mask) { if (mask) {
#if !defined(NDEBUG) #if !defined(NDEBUG)
...@@ -1545,6 +1545,44 @@ inline void* SlotAddress(void* slot_array, size_t slot, size_t slot_size) { ...@@ -1545,6 +1545,44 @@ inline void* SlotAddress(void* slot_array, size_t slot, size_t slot_size) {
(slot * slot_size)); (slot * slot_size));
} }
// Iterates over all full slots and calls `cb(SlotType*)`.
// NOTE: no erasure from this table allowed during Callback call.
template <class SlotType, class Callback>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void IterateOverFullSlots(
const CommonFields& c, SlotType* slot, Callback cb) {
const size_t cap = c.capacity();
const ctrl_t* ctrl = c.control();
if (is_small(cap)) {
// Mirrored/cloned control bytes in small table are also located in the
// first group (starting from position 0). We are taking group from position
// `capacity` in order to avoid duplicates.
// Small tables capacity fits into portable group, where
// GroupPortableImpl::MaskFull is more efficient for the
// capacity <= GroupPortableImpl::kWidth.
assert(cap <= GroupPortableImpl::kWidth &&
"unexpectedly large small capacity");
static_assert(Group::kWidth >= GroupPortableImpl::kWidth,
"unexpected group width");
// Group starts from kSentinel slot, so indices in the mask will
// be increased by 1.
--slot;
for (uint32_t i : GroupPortableImpl(ctrl + cap).MaskFull()) {
cb(slot + i);
}
return;
}
size_t remaining = c.size();
while (remaining != 0) {
for (uint32_t i : GroupFullEmptyOrDeleted(ctrl).MaskFull()) {
cb(slot + i);
--remaining;
}
slot += Group::kWidth;
ctrl += Group::kWidth;
}
}
// Helper class to perform resize of the hash set. // Helper class to perform resize of the hash set.
// //
// It contains special optimizations for small group resizes. // It contains special optimizations for small group resizes.
...@@ -2028,7 +2066,7 @@ class raw_hash_set { ...@@ -2028,7 +2066,7 @@ class raw_hash_set {
void skip_empty_or_deleted() { void skip_empty_or_deleted() {
while (IsEmptyOrDeleted(*ctrl_)) { while (IsEmptyOrDeleted(*ctrl_)) {
uint32_t shift = uint32_t shift =
GroupEmptyOrDeleted{ctrl_}.CountLeadingEmptyOrDeleted(); GroupFullEmptyOrDeleted{ctrl_}.CountLeadingEmptyOrDeleted();
ctrl_ += shift; ctrl_ += shift;
slot_ += shift; slot_ += shift;
} }
...@@ -2889,14 +2927,10 @@ class raw_hash_set { ...@@ -2889,14 +2927,10 @@ class raw_hash_set {
inline void destroy_slots() { inline void destroy_slots() {
if (PolicyTraits::template destroy_is_trivial<Alloc>()) return; if (PolicyTraits::template destroy_is_trivial<Alloc>()) return;
const size_t cap = capacity(); IterateOverFullSlots(common(), slot_array(),
const ctrl_t* ctrl = control(); [&](slot_type* slot) ABSL_ATTRIBUTE_ALWAYS_INLINE {
slot_type* slot = slot_array(); this->destroy(slot);
for (size_t i = 0; i != cap; ++i) { });
if (IsFull(ctrl[i])) {
destroy(slot + i);
}
}
} }
inline void dealloc() { inline void dealloc() {
......
...@@ -64,6 +64,10 @@ namespace container_internal { ...@@ -64,6 +64,10 @@ namespace container_internal {
struct RawHashSetTestOnlyAccess { struct RawHashSetTestOnlyAccess {
template <typename C> template <typename C>
static auto GetCommon(const C& c) -> decltype(c.common()) {
return c.common();
}
template <typename C>
static auto GetSlots(const C& c) -> decltype(c.slot_array()) { static auto GetSlots(const C& c) -> decltype(c.slot_array()) {
return c.slot_array(); return c.slot_array();
} }
...@@ -2741,6 +2745,37 @@ TEST(Table, CountedHash) { ...@@ -2741,6 +2745,37 @@ TEST(Table, CountedHash) {
} }
} }
TEST(Table, IterateOverFullSlotsEmpty) {
IntTable t;
auto fail_if_any = [](int64_t* i) { FAIL() << "expected no slots " << i; };
container_internal::IterateOverFullSlots(
RawHashSetTestOnlyAccess::GetCommon(t),
RawHashSetTestOnlyAccess::GetSlots(t), fail_if_any);
for (size_t i = 0; i < 256; ++i) {
t.reserve(i);
container_internal::IterateOverFullSlots(
RawHashSetTestOnlyAccess::GetCommon(t),
RawHashSetTestOnlyAccess::GetSlots(t), fail_if_any);
}
}
TEST(Table, IterateOverFullSlotsFull) {
IntTable t;
std::vector<int64_t> expected_slots;
for (int64_t i = 0; i < 128; ++i) {
t.insert(i);
expected_slots.push_back(i);
std::vector<int64_t> slots;
container_internal::IterateOverFullSlots(
RawHashSetTestOnlyAccess::GetCommon(t),
RawHashSetTestOnlyAccess::GetSlots(t),
[&slots](int64_t* i) { slots.push_back(*i); });
EXPECT_THAT(slots, testing::UnorderedElementsAreArray(expected_slots));
}
}
} // namespace } // namespace
} // namespace container_internal } // namespace container_internal
ABSL_NAMESPACE_END ABSL_NAMESPACE_END
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment