Commit 6ab5b0aa by Vitaly Goldshteyn Committed by Copybara-Service

Move `prepare_insert` out of the line as type erased `PrepareInsertNonSoo`.

This significantly reduces binary size of big binaries and creates a single hot function instead of many cold. That is decreasing cash misses during code execution.

We also avoid size related computations for tables with no deleted slots, when resize is necessary.

PiperOrigin-RevId: 635527119
Change-Id: I763b135f1f6089051e62e348a07b33536af265ab
parent 01283057
...@@ -15,8 +15,11 @@ ...@@ -15,8 +15,11 @@
#include "absl/container/internal/compressed_tuple.h" #include "absl/container/internal/compressed_tuple.h"
#include <memory> #include <memory>
#include <set>
#include <string> #include <string>
#include <type_traits>
#include <utility> #include <utility>
#include <vector>
#include "gmock/gmock.h" #include "gmock/gmock.h"
#include "gtest/gtest.h" #include "gtest/gtest.h"
...@@ -55,6 +58,7 @@ namespace { ...@@ -55,6 +58,7 @@ namespace {
using absl::test_internal::CopyableMovableInstance; using absl::test_internal::CopyableMovableInstance;
using absl::test_internal::InstanceTracker; using absl::test_internal::InstanceTracker;
using ::testing::Each;
TEST(CompressedTupleTest, Sizeof) { TEST(CompressedTupleTest, Sizeof) {
EXPECT_EQ(sizeof(int), sizeof(CompressedTuple<int>)); EXPECT_EQ(sizeof(int), sizeof(CompressedTuple<int>));
...@@ -71,6 +75,30 @@ TEST(CompressedTupleTest, Sizeof) { ...@@ -71,6 +75,30 @@ TEST(CompressedTupleTest, Sizeof) {
sizeof(CompressedTuple<int, Empty<0>, NotEmpty<double>, Empty<1>>)); sizeof(CompressedTuple<int, Empty<0>, NotEmpty<double>, Empty<1>>));
} }
TEST(CompressedTupleTest, PointerToEmpty) {
auto to_void_ptrs = [](const auto&... objs) {
return std::vector<const void*>{static_cast<const void*>(&objs)...};
};
{
using Tuple = CompressedTuple<int, Empty<0>>;
EXPECT_EQ(sizeof(int), sizeof(Tuple));
Tuple t;
EXPECT_THAT(to_void_ptrs(t.get<1>()), Each(&t));
}
{
using Tuple = CompressedTuple<int, Empty<0>, Empty<1>>;
EXPECT_EQ(sizeof(int), sizeof(Tuple));
Tuple t;
EXPECT_THAT(to_void_ptrs(t.get<1>(), t.get<2>()), Each(&t));
}
{
using Tuple = CompressedTuple<int, Empty<0>, Empty<1>, Empty<2>>;
EXPECT_EQ(sizeof(int), sizeof(Tuple));
Tuple t;
EXPECT_THAT(to_void_ptrs(t.get<1>(), t.get<2>(), t.get<3>()), Each(&t));
}
}
TEST(CompressedTupleTest, OneMoveOnRValueConstructionTemp) { TEST(CompressedTupleTest, OneMoveOnRValueConstructionTemp) {
InstanceTracker tracker; InstanceTracker tracker;
CompressedTuple<CopyableMovableInstance> x1(CopyableMovableInstance(1)); CompressedTuple<CopyableMovableInstance> x1(CopyableMovableInstance(1));
......
...@@ -23,7 +23,9 @@ ...@@ -23,7 +23,9 @@
#include "absl/base/attributes.h" #include "absl/base/attributes.h"
#include "absl/base/config.h" #include "absl/base/config.h"
#include "absl/base/dynamic_annotations.h" #include "absl/base/dynamic_annotations.h"
#include "absl/base/optimization.h"
#include "absl/container/internal/container_memory.h" #include "absl/container/internal/container_memory.h"
#include "absl/container/internal/hashtablez_sampler.h"
#include "absl/hash/hash.h" #include "absl/hash/hash.h"
namespace absl { namespace absl {
...@@ -157,6 +159,8 @@ FindInfo find_first_non_full_outofline(const CommonFields& common, ...@@ -157,6 +159,8 @@ FindInfo find_first_non_full_outofline(const CommonFields& common,
return find_first_non_full(common, hash); return find_first_non_full(common, hash);
} }
namespace {
// Returns the address of the slot just after slot assuming each slot has the // Returns the address of the slot just after slot assuming each slot has the
// specified size. // specified size.
static inline void* NextSlot(void* slot, size_t slot_size) { static inline void* NextSlot(void* slot, size_t slot_size) {
...@@ -169,8 +173,22 @@ static inline void* PrevSlot(void* slot, size_t slot_size) { ...@@ -169,8 +173,22 @@ static inline void* PrevSlot(void* slot, size_t slot_size) {
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(slot) - slot_size); return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(slot) - slot_size);
} }
void DropDeletesWithoutResize(CommonFields& common, const void* hash_fn, // Finds guaranteed to exists empty slot from the given position.
const PolicyFunctions& policy, void* tmp_space) { // NOTE: this function is almost never triggered inside of the
// DropDeletesWithoutResize, so we keep it simple.
// The table is rather sparse, so empty slot will be found very quickly.
size_t FindEmptySlot(size_t start, size_t end, const ctrl_t* ctrl) {
for (size_t i = start; i < end; ++i) {
if (IsEmpty(ctrl[i])) {
return i;
}
}
assert(false && "no empty slot");
return ~size_t{};
}
void DropDeletesWithoutResize(CommonFields& common,
const PolicyFunctions& policy) {
void* set = &common; void* set = &common;
void* slot_array = common.slot_array(); void* slot_array = common.slot_array();
const size_t capacity = common.capacity(); const size_t capacity = common.capacity();
...@@ -194,15 +212,26 @@ void DropDeletesWithoutResize(CommonFields& common, const void* hash_fn, ...@@ -194,15 +212,26 @@ void DropDeletesWithoutResize(CommonFields& common, const void* hash_fn,
// repeat procedure for current slot with moved from element (target) // repeat procedure for current slot with moved from element (target)
ctrl_t* ctrl = common.control(); ctrl_t* ctrl = common.control();
ConvertDeletedToEmptyAndFullToDeleted(ctrl, capacity); ConvertDeletedToEmptyAndFullToDeleted(ctrl, capacity);
const void* hash_fn = policy.hash_fn(common);
auto hasher = policy.hash_slot; auto hasher = policy.hash_slot;
auto transfer = policy.transfer; auto transfer = policy.transfer;
const size_t slot_size = policy.slot_size; const size_t slot_size = policy.slot_size;
size_t total_probe_length = 0; size_t total_probe_length = 0;
void* slot_ptr = SlotAddress(slot_array, 0, slot_size); void* slot_ptr = SlotAddress(slot_array, 0, slot_size);
// The index of an empty slot that can be used as temporary memory for
// the swap operation.
constexpr size_t kUnknownId = ~size_t{};
size_t tmp_space_id = kUnknownId;
for (size_t i = 0; i != capacity; for (size_t i = 0; i != capacity;
++i, slot_ptr = NextSlot(slot_ptr, slot_size)) { ++i, slot_ptr = NextSlot(slot_ptr, slot_size)) {
assert(slot_ptr == SlotAddress(slot_array, i, slot_size)); assert(slot_ptr == SlotAddress(slot_array, i, slot_size));
if (IsEmpty(ctrl[i])) {
tmp_space_id = i;
continue;
}
if (!IsDeleted(ctrl[i])) continue; if (!IsDeleted(ctrl[i])) continue;
const size_t hash = (*hasher)(hash_fn, slot_ptr); const size_t hash = (*hasher)(hash_fn, slot_ptr);
const FindInfo target = find_first_non_full(common, hash); const FindInfo target = find_first_non_full(common, hash);
...@@ -231,16 +260,26 @@ void DropDeletesWithoutResize(CommonFields& common, const void* hash_fn, ...@@ -231,16 +260,26 @@ void DropDeletesWithoutResize(CommonFields& common, const void* hash_fn,
SetCtrl(common, new_i, H2(hash), slot_size); SetCtrl(common, new_i, H2(hash), slot_size);
(*transfer)(set, new_slot_ptr, slot_ptr); (*transfer)(set, new_slot_ptr, slot_ptr);
SetCtrl(common, i, ctrl_t::kEmpty, slot_size); SetCtrl(common, i, ctrl_t::kEmpty, slot_size);
// Initialize or change empty space id.
tmp_space_id = i;
} else { } else {
assert(IsDeleted(ctrl[new_i])); assert(IsDeleted(ctrl[new_i]));
SetCtrl(common, new_i, H2(hash), slot_size); SetCtrl(common, new_i, H2(hash), slot_size);
// Until we are done rehashing, DELETED marks previously FULL slots. // Until we are done rehashing, DELETED marks previously FULL slots.
if (tmp_space_id == kUnknownId) {
tmp_space_id = FindEmptySlot(i + 1, capacity, ctrl);
}
void* tmp_space = SlotAddress(slot_array, tmp_space_id, slot_size);
SanitizerUnpoisonMemoryRegion(tmp_space, slot_size);
// Swap i and new_i elements. // Swap i and new_i elements.
(*transfer)(set, tmp_space, new_slot_ptr); (*transfer)(set, tmp_space, new_slot_ptr);
(*transfer)(set, new_slot_ptr, slot_ptr); (*transfer)(set, new_slot_ptr, slot_ptr);
(*transfer)(set, slot_ptr, tmp_space); (*transfer)(set, slot_ptr, tmp_space);
SanitizerPoisonMemoryRegion(tmp_space, slot_size);
// repeat the processing of the ith slot // repeat the processing of the ith slot
--i; --i;
slot_ptr = PrevSlot(slot_ptr, slot_size); slot_ptr = PrevSlot(slot_ptr, slot_size);
...@@ -267,6 +306,8 @@ static bool WasNeverFull(CommonFields& c, size_t index) { ...@@ -267,6 +306,8 @@ static bool WasNeverFull(CommonFields& c, size_t index) {
Group::kWidth; Group::kWidth;
} }
} // namespace
void EraseMetaOnly(CommonFields& c, size_t index, size_t slot_size) { void EraseMetaOnly(CommonFields& c, size_t index, size_t slot_size) {
assert(IsFull(c.control()[index]) && "erasing a dangling iterator"); assert(IsFull(c.control()[index]) && "erasing a dangling iterator");
c.decrement_size(); c.decrement_size();
...@@ -424,6 +465,129 @@ void HashSetResizeHelper::TransferSlotAfterSoo(CommonFields& c, ...@@ -424,6 +465,129 @@ void HashSetResizeHelper::TransferSlotAfterSoo(CommonFields& c,
PoisonSingleGroupEmptySlots(c, slot_size); PoisonSingleGroupEmptySlots(c, slot_size);
} }
namespace {
// Called whenever the table needs to vacate empty slots either by removing
// tombstones via rehash or growth.
ABSL_ATTRIBUTE_NOINLINE
FindInfo FindInsertPositionWithGrowthOrRehash(CommonFields& common, size_t hash,
const PolicyFunctions& policy) {
const size_t cap = common.capacity();
if (cap > Group::kWidth &&
// Do these calculations in 64-bit to avoid overflow.
common.size() * uint64_t{32} <= cap * uint64_t{25}) {
// Squash DELETED without growing if there is enough capacity.
//
// Rehash in place if the current size is <= 25/32 of capacity.
// Rationale for such a high factor: 1) DropDeletesWithoutResize() is
// faster than resize, and 2) it takes quite a bit of work to add
// tombstones. In the worst case, seems to take approximately 4
// insert/erase pairs to create a single tombstone and so if we are
// rehashing because of tombstones, we can afford to rehash-in-place as
// long as we are reclaiming at least 1/8 the capacity without doing more
// than 2X the work. (Where "work" is defined to be size() for rehashing
// or rehashing in place, and 1 for an insert or erase.) But rehashing in
// place is faster per operation than inserting or even doubling the size
// of the table, so we actually afford to reclaim even less space from a
// resize-in-place. The decision is to rehash in place if we can reclaim
// at about 1/8th of the usable capacity (specifically 3/28 of the
// capacity) which means that the total cost of rehashing will be a small
// fraction of the total work.
//
// Here is output of an experiment using the BM_CacheInSteadyState
// benchmark running the old case (where we rehash-in-place only if we can
// reclaim at least 7/16*capacity) vs. this code (which rehashes in place
// if we can recover 3/32*capacity).
//
// Note that although in the worst-case number of rehashes jumped up from
// 15 to 190, but the number of operations per second is almost the same.
//
// Abridged output of running BM_CacheInSteadyState benchmark from
// raw_hash_set_benchmark. N is the number of insert/erase operations.
//
// | OLD (recover >= 7/16 | NEW (recover >= 3/32)
// size | N/s LoadFactor NRehashes | N/s LoadFactor NRehashes
// 448 | 145284 0.44 18 | 140118 0.44 19
// 493 | 152546 0.24 11 | 151417 0.48 28
// 538 | 151439 0.26 11 | 151152 0.53 38
// 583 | 151765 0.28 11 | 150572 0.57 50
// 628 | 150241 0.31 11 | 150853 0.61 66
// 672 | 149602 0.33 12 | 150110 0.66 90
// 717 | 149998 0.35 12 | 149531 0.70 129
// 762 | 149836 0.37 13 | 148559 0.74 190
// 807 | 149736 0.39 14 | 151107 0.39 14
// 852 | 150204 0.42 15 | 151019 0.42 15
DropDeletesWithoutResize(common, policy);
} else {
// Otherwise grow the container.
policy.resize(common, NextCapacity(cap), HashtablezInfoHandle{});
}
// This function is typically called with tables containing deleted slots.
// The table will be big and `FindFirstNonFullAfterResize` will always
// fallback to `find_first_non_full`. So using `find_first_non_full` directly.
return find_first_non_full(common, hash);
}
} // namespace
const void* GetHashRefForEmptyHasher(const CommonFields& common) {
// Empty base optimization typically make the empty base class address to be
// the same as the first address of the derived class object.
// But we generally assume that for empty hasher we can return any valid
// pointer.
return &common;
}
size_t PrepareInsertNonSoo(CommonFields& common, size_t hash, FindInfo target,
const PolicyFunctions& policy) {
// When there are no deleted slots in the table
// and growth_left is positive, we can insert at the first
// empty slot in the probe sequence (target).
const bool use_target_hint =
// Optimization is disabled when generations are enabled.
// We have to rehash even sparse tables randomly in such mode.
!SwisstableGenerationsEnabled() &&
common.growth_info().HasNoDeletedAndGrowthLeft();
if (ABSL_PREDICT_FALSE(!use_target_hint)) {
// Notes about optimized mode when generations are disabled:
// We do not enter this branch if table has no deleted slots
// and growth_left is positive.
// We enter this branch in the following cases listed in decreasing
// frequency:
// 1. Table without deleted slots (>95% cases) that needs to be resized.
// 2. Table with deleted slots that has space for the inserting element.
// 3. Table with deleted slots that needs to be rehashed or resized.
if (ABSL_PREDICT_TRUE(common.growth_info().HasNoGrowthLeftAndNoDeleted())) {
const size_t old_capacity = common.capacity();
policy.resize(common, NextCapacity(old_capacity), HashtablezInfoHandle{});
target = HashSetResizeHelper::FindFirstNonFullAfterResize(
common, old_capacity, hash);
} else {
// Note: the table may have no deleted slots here when generations
// are enabled.
const bool rehash_for_bug_detection =
common.should_rehash_for_bug_detection_on_insert();
if (rehash_for_bug_detection) {
// Move to a different heap allocation in order to detect bugs.
const size_t cap = common.capacity();
policy.resize(common,
common.growth_left() > 0 ? cap : NextCapacity(cap),
HashtablezInfoHandle{});
}
if (ABSL_PREDICT_TRUE(common.growth_left() > 0)) {
target = find_first_non_full(common, hash);
} else {
target = FindInsertPositionWithGrowthOrRehash(common, hash, policy);
}
}
}
PrepareInsertCommon(common);
common.growth_info().OverwriteControlAsFull(common.control()[target.offset]);
SetCtrl(common, target.offset, H2(hash), policy.slot_size);
common.infoz().RecordInsert(hash, target.probe_length);
return target.offset;
}
} // namespace container_internal } // namespace container_internal
ABSL_NAMESPACE_END ABSL_NAMESPACE_END
} // namespace absl } // namespace absl
...@@ -1126,6 +1126,13 @@ class GrowthInfo { ...@@ -1126,6 +1126,13 @@ class GrowthInfo {
return static_cast<std::make_signed_t<size_t>>(growth_left_info_) > 0; return static_cast<std::make_signed_t<size_t>>(growth_left_info_) > 0;
} }
// Returns true if the table satisfies two properties:
// 1. Guaranteed to have no kDeleted slots.
// 2. There is no growth left.
bool HasNoGrowthLeftAndNoDeleted() const {
return growth_left_info_ == 0;
}
// Returns true if table guaranteed to have no k // Returns true if table guaranteed to have no k
bool HasNoDeleted() const { bool HasNoDeleted() const {
return static_cast<std::make_signed_t<size_t>>(growth_left_info_) >= 0; return static_cast<std::make_signed_t<size_t>>(growth_left_info_) >= 0;
...@@ -1374,6 +1381,8 @@ class CommonFields : public CommonFieldsGenerationInfo { ...@@ -1374,6 +1381,8 @@ class CommonFields : public CommonFieldsGenerationInfo {
// This is stored in the heap allocation before the control bytes. // This is stored in the heap allocation before the control bytes.
// TODO(b/289225379): experiment with moving growth_info back inline to // TODO(b/289225379): experiment with moving growth_info back inline to
// increase room for SOO. // increase room for SOO.
size_t growth_left() const { return growth_info().GetGrowthLeft(); }
GrowthInfo& growth_info() { GrowthInfo& growth_info() {
auto* gl_ptr = reinterpret_cast<GrowthInfo*>(control()) - 1; auto* gl_ptr = reinterpret_cast<GrowthInfo*>(control()) - 1;
assert(reinterpret_cast<uintptr_t>(gl_ptr) % alignof(GrowthInfo) == 0); assert(reinterpret_cast<uintptr_t>(gl_ptr) % alignof(GrowthInfo) == 0);
...@@ -1933,8 +1942,7 @@ class HashSetResizeHelper { ...@@ -1933,8 +1942,7 @@ class HashSetResizeHelper {
// will be no performance benefit. // will be no performance benefit.
// It has implicit assumption that `resize` will call // It has implicit assumption that `resize` will call
// `GrowSizeIntoSingleGroup*` in case `IsGrowingIntoSingleGroupApplicable`. // `GrowSizeIntoSingleGroup*` in case `IsGrowingIntoSingleGroupApplicable`.
// Falls back to `find_first_non_full` in case of big groups, so it is // Falls back to `find_first_non_full` in case of big groups.
// safe to use after `rehash_and_grow_if_necessary`.
static FindInfo FindFirstNonFullAfterResize(const CommonFields& c, static FindInfo FindFirstNonFullAfterResize(const CommonFields& c,
size_t old_capacity, size_t old_capacity,
size_t hash) { size_t hash) {
...@@ -2216,14 +2224,22 @@ size_t PrepareInsertAfterSoo(size_t hash, size_t slot_size, ...@@ -2216,14 +2224,22 @@ size_t PrepareInsertAfterSoo(size_t hash, size_t slot_size,
struct PolicyFunctions { struct PolicyFunctions {
size_t slot_size; size_t slot_size;
// Returns the pointer to the hash function stored in the set.
const void* (*hash_fn)(const CommonFields& common);
// Returns the hash of the pointed-to slot. // Returns the hash of the pointed-to slot.
size_t (*hash_slot)(const void* hash_fn, void* slot); size_t (*hash_slot)(const void* hash_fn, void* slot);
// Transfer the contents of src_slot to dst_slot. // Transfers the contents of src_slot to dst_slot.
void (*transfer)(void* set, void* dst_slot, void* src_slot); void (*transfer)(void* set, void* dst_slot, void* src_slot);
// Deallocate the backing store from common. // Deallocates the backing store from common.
void (*dealloc)(CommonFields& common, const PolicyFunctions& policy); void (*dealloc)(CommonFields& common, const PolicyFunctions& policy);
// Resizes set to the new capacity.
// Arguments are used as in raw_hash_set::resize_impl.
void (*resize)(CommonFields& common, size_t new_capacity,
HashtablezInfoHandle forced_infoz);
}; };
// ClearBackingArray clears the backing array, either modifying it in place, // ClearBackingArray clears the backing array, either modifying it in place,
...@@ -2261,9 +2277,26 @@ ABSL_ATTRIBUTE_NOINLINE void TransferRelocatable(void*, void* dst, void* src) { ...@@ -2261,9 +2277,26 @@ ABSL_ATTRIBUTE_NOINLINE void TransferRelocatable(void*, void* dst, void* src) {
memcpy(dst, src, SizeOfSlot); memcpy(dst, src, SizeOfSlot);
} }
// Type-erased version of raw_hash_set::drop_deletes_without_resize. // Type erased raw_hash_set::get_hash_ref_fn for the empty hash function case.
void DropDeletesWithoutResize(CommonFields& common, const void* hash_fn, const void* GetHashRefForEmptyHasher(const CommonFields& common);
const PolicyFunctions& policy, void* tmp_space);
// Given the hash of a value not currently in the table and the first empty
// slot in the probe sequence, finds a viable slot index to insert it at.
//
// In case there's no space left, the table can be resized or rehashed
// (for tables with deleted slots, see FindInsertPositionWithGrowthOrRehash).
//
// In the case of absence of deleted slots and positive growth_left, the element
// can be inserted in the provided `target` position.
//
// When the table has deleted slots (according to GrowthInfo), the target
// position will be searched one more time using `find_first_non_full`.
//
// REQUIRES: Table is not SOO.
// REQUIRES: At least one non-full slot available.
// REQUIRES: `target` is a valid empty position to insert.
size_t PrepareInsertNonSoo(CommonFields& common, size_t hash, FindInfo target,
const PolicyFunctions& policy);
// A SwissTable. // A SwissTable.
// //
...@@ -3533,7 +3566,7 @@ class raw_hash_set { ...@@ -3533,7 +3566,7 @@ class raw_hash_set {
// common(), old_capacity, hash) // common(), old_capacity, hash)
// can be called right after `resize`. // can be called right after `resize`.
void resize(size_t new_capacity) { void resize(size_t new_capacity) {
resize_impl(new_capacity, HashtablezInfoHandle{}); raw_hash_set::resize_impl(common(), new_capacity, HashtablezInfoHandle{});
} }
// As above, except that we also accept a pre-sampled, forced infoz for // As above, except that we also accept a pre-sampled, forced infoz for
...@@ -3541,19 +3574,24 @@ class raw_hash_set { ...@@ -3541,19 +3574,24 @@ class raw_hash_set {
// store the infoz. // store the infoz.
void resize_with_soo_infoz(HashtablezInfoHandle forced_infoz) { void resize_with_soo_infoz(HashtablezInfoHandle forced_infoz) {
assert(forced_infoz.IsSampled()); assert(forced_infoz.IsSampled());
resize_impl(NextCapacity(SooCapacity()), forced_infoz); raw_hash_set::resize_impl(common(), NextCapacity(SooCapacity()),
forced_infoz);
} }
ABSL_ATTRIBUTE_NOINLINE void resize_impl( // Resizes set to the new capacity.
size_t new_capacity, HashtablezInfoHandle forced_infoz) { // It is a static function in order to use its pointer in GetPolicyFunctions.
ABSL_ATTRIBUTE_NOINLINE static void resize_impl(
CommonFields& common, size_t new_capacity,
HashtablezInfoHandle forced_infoz) {
raw_hash_set* set = reinterpret_cast<raw_hash_set*>(&common);
assert(IsValidCapacity(new_capacity)); assert(IsValidCapacity(new_capacity));
assert(!fits_in_soo(new_capacity)); assert(!set->fits_in_soo(new_capacity));
const bool was_soo = is_soo(); const bool was_soo = set->is_soo();
const bool had_soo_slot = was_soo && !empty(); const bool had_soo_slot = was_soo && !set->empty();
const ctrl_t soo_slot_h2 = const ctrl_t soo_slot_h2 =
had_soo_slot ? static_cast<ctrl_t>(H2(hash_of(soo_slot()))) had_soo_slot ? static_cast<ctrl_t>(H2(set->hash_of(set->soo_slot())))
: ctrl_t::kEmpty; : ctrl_t::kEmpty;
HashSetResizeHelper resize_helper(common(), was_soo, had_soo_slot, HashSetResizeHelper resize_helper(common, was_soo, had_soo_slot,
forced_infoz); forced_infoz);
// Initialize HashSetResizeHelper::old_heap_or_soo_. We can't do this in // Initialize HashSetResizeHelper::old_heap_or_soo_. We can't do this in
// HashSetResizeHelper constructor because it can't transfer slots when // HashSetResizeHelper constructor because it can't transfer slots when
...@@ -3561,11 +3599,12 @@ class raw_hash_set { ...@@ -3561,11 +3599,12 @@ class raw_hash_set {
// TODO(b/289225379): try to handle more of the SOO cases inside // TODO(b/289225379): try to handle more of the SOO cases inside
// InitializeSlots. See comment on cl/555990034 snapshot #63. // InitializeSlots. See comment on cl/555990034 snapshot #63.
if (PolicyTraits::transfer_uses_memcpy() || !had_soo_slot) { if (PolicyTraits::transfer_uses_memcpy() || !had_soo_slot) {
resize_helper.old_heap_or_soo() = common().heap_or_soo(); resize_helper.old_heap_or_soo() = common.heap_or_soo();
} else { } else {
transfer(to_slot(resize_helper.old_soo_data()), soo_slot()); set->transfer(set->to_slot(resize_helper.old_soo_data()),
set->soo_slot());
} }
common().set_capacity(new_capacity); common.set_capacity(new_capacity);
// Note that `InitializeSlots` does different number initialization steps // Note that `InitializeSlots` does different number initialization steps
// depending on the values of `transfer_uses_memcpy` and capacities. // depending on the values of `transfer_uses_memcpy` and capacities.
// Refer to the comment in `InitializeSlots` for more details. // Refer to the comment in `InitializeSlots` for more details.
...@@ -3573,7 +3612,7 @@ class raw_hash_set { ...@@ -3573,7 +3612,7 @@ class raw_hash_set {
resize_helper.InitializeSlots<CharAlloc, sizeof(slot_type), resize_helper.InitializeSlots<CharAlloc, sizeof(slot_type),
PolicyTraits::transfer_uses_memcpy(), PolicyTraits::transfer_uses_memcpy(),
SooEnabled(), alignof(slot_type)>( SooEnabled(), alignof(slot_type)>(
common(), CharAlloc(alloc_ref()), soo_slot_h2, sizeof(key_type), common, CharAlloc(set->alloc_ref()), soo_slot_h2, sizeof(key_type),
sizeof(value_type)); sizeof(value_type));
// In the SooEnabled() case, capacity is never 0 so we don't check. // In the SooEnabled() case, capacity is never 0 so we don't check.
...@@ -3585,30 +3624,30 @@ class raw_hash_set { ...@@ -3585,30 +3624,30 @@ class raw_hash_set {
// Nothing more to do in this case. // Nothing more to do in this case.
if (was_soo && !had_soo_slot) return; if (was_soo && !had_soo_slot) return;
slot_type* new_slots = slot_array(); slot_type* new_slots = set->slot_array();
if (grow_single_group) { if (grow_single_group) {
if (PolicyTraits::transfer_uses_memcpy()) { if (PolicyTraits::transfer_uses_memcpy()) {
// InitializeSlots did all the work. // InitializeSlots did all the work.
return; return;
} }
if (was_soo) { if (was_soo) {
transfer(new_slots + resize_helper.SooSlotIndex(), set->transfer(new_slots + resize_helper.SooSlotIndex(),
to_slot(resize_helper.old_soo_data())); to_slot(resize_helper.old_soo_data()));
return; return;
} else { } else {
// We want GrowSizeIntoSingleGroup to be called here in order to make // We want GrowSizeIntoSingleGroup to be called here in order to make
// InitializeSlots not depend on PolicyTraits. // InitializeSlots not depend on PolicyTraits.
resize_helper.GrowSizeIntoSingleGroup<PolicyTraits>(common(), resize_helper.GrowSizeIntoSingleGroup<PolicyTraits>(common,
alloc_ref()); set->alloc_ref());
} }
} else { } else {
// InitializeSlots prepares control bytes to correspond to empty table. // InitializeSlots prepares control bytes to correspond to empty table.
const auto insert_slot = [&](slot_type* slot) { const auto insert_slot = [&](slot_type* slot) {
size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, size_t hash = PolicyTraits::apply(HashElement{set->hash_ref()},
PolicyTraits::element(slot)); PolicyTraits::element(slot));
auto target = find_first_non_full(common(), hash); auto target = find_first_non_full(common, hash);
SetCtrl(common(), target.offset, H2(hash), sizeof(slot_type)); SetCtrl(common, target.offset, H2(hash), sizeof(slot_type));
transfer(new_slots + target.offset, slot); set->transfer(new_slots + target.offset, slot);
return target.probe_length; return target.probe_length;
}; };
if (was_soo) { if (was_soo) {
...@@ -3623,80 +3662,13 @@ class raw_hash_set { ...@@ -3623,80 +3662,13 @@ class raw_hash_set {
total_probe_length += insert_slot(old_slots + i); total_probe_length += insert_slot(old_slots + i);
} }
} }
infoz().RecordRehash(total_probe_length); common.infoz().RecordRehash(total_probe_length);
} }
} }
resize_helper.DeallocateOld<alignof(slot_type)>(CharAlloc(alloc_ref()), resize_helper.DeallocateOld<alignof(slot_type)>(CharAlloc(set->alloc_ref()),
sizeof(slot_type)); sizeof(slot_type));
} }
// Prunes control bytes to remove as many tombstones as possible.
//
// See the comment on `rehash_and_grow_if_necessary()`.
inline void drop_deletes_without_resize() {
// Stack-allocate space for swapping elements.
alignas(slot_type) unsigned char tmp[sizeof(slot_type)];
DropDeletesWithoutResize(common(), &hash_ref(), GetPolicyFunctions(), tmp);
}
// Called whenever the table *might* need to conditionally grow.
//
// This function is an optimization opportunity to perform a rehash even when
// growth is unnecessary, because vacating tombstones is beneficial for
// performance in the long-run.
void rehash_and_grow_if_necessary() {
const size_t cap = capacity();
if (cap > Group::kWidth &&
// Do these calculations in 64-bit to avoid overflow.
size() * uint64_t{32} <= cap * uint64_t{25}) {
// Squash DELETED without growing if there is enough capacity.
//
// Rehash in place if the current size is <= 25/32 of capacity.
// Rationale for such a high factor: 1) drop_deletes_without_resize() is
// faster than resize, and 2) it takes quite a bit of work to add
// tombstones. In the worst case, seems to take approximately 4
// insert/erase pairs to create a single tombstone and so if we are
// rehashing because of tombstones, we can afford to rehash-in-place as
// long as we are reclaiming at least 1/8 the capacity without doing more
// than 2X the work. (Where "work" is defined to be size() for rehashing
// or rehashing in place, and 1 for an insert or erase.) But rehashing in
// place is faster per operation than inserting or even doubling the size
// of the table, so we actually afford to reclaim even less space from a
// resize-in-place. The decision is to rehash in place if we can reclaim
// at about 1/8th of the usable capacity (specifically 3/28 of the
// capacity) which means that the total cost of rehashing will be a small
// fraction of the total work.
//
// Here is output of an experiment using the BM_CacheInSteadyState
// benchmark running the old case (where we rehash-in-place only if we can
// reclaim at least 7/16*capacity) vs. this code (which rehashes in place
// if we can recover 3/32*capacity).
//
// Note that although in the worst-case number of rehashes jumped up from
// 15 to 190, but the number of operations per second is almost the same.
//
// Abridged output of running BM_CacheInSteadyState benchmark from
// raw_hash_set_benchmark. N is the number of insert/erase operations.
//
// | OLD (recover >= 7/16 | NEW (recover >= 3/32)
// size | N/s LoadFactor NRehashes | N/s LoadFactor NRehashes
// 448 | 145284 0.44 18 | 140118 0.44 19
// 493 | 152546 0.24 11 | 151417 0.48 28
// 538 | 151439 0.26 11 | 151152 0.53 38
// 583 | 151765 0.28 11 | 150572 0.57 50
// 628 | 150241 0.31 11 | 150853 0.61 66
// 672 | 149602 0.33 12 | 150110 0.66 90
// 717 | 149998 0.35 12 | 149531 0.70 129
// 762 | 149836 0.37 13 | 148559 0.74 190
// 807 | 149736 0.39 14 | 151107 0.39 14
// 852 | 150204 0.42 15 | 151019 0.42 15
drop_deletes_without_resize();
} else {
// Otherwise grow the container.
resize(NextCapacity(cap));
}
}
// Casting directly from e.g. char* to slot_type* can cause compilation errors // Casting directly from e.g. char* to slot_type* can cause compilation errors
// on objective-C. This function converts to void* first, avoiding the issue. // on objective-C. This function converts to void* first, avoiding the issue.
static slot_type* to_slot(void* buf) { static slot_type* to_slot(void* buf) {
...@@ -3817,6 +3789,7 @@ class raw_hash_set { ...@@ -3817,6 +3789,7 @@ class raw_hash_set {
template <class K> template <class K>
std::pair<iterator, bool> find_or_prepare_insert_non_soo(const K& key) { std::pair<iterator, bool> find_or_prepare_insert_non_soo(const K& key) {
assert(!is_soo());
prefetch_heap_block(); prefetch_heap_block();
auto hash = hash_ref()(key); auto hash = hash_ref()(key);
auto seq = probe(common(), hash); auto seq = probe(common(), hash);
...@@ -3833,8 +3806,9 @@ class raw_hash_set { ...@@ -3833,8 +3806,9 @@ class raw_hash_set {
if (ABSL_PREDICT_TRUE(mask_empty)) { if (ABSL_PREDICT_TRUE(mask_empty)) {
size_t target = seq.offset( size_t target = seq.offset(
GetInsertionOffset(mask_empty, capacity(), hash, control())); GetInsertionOffset(mask_empty, capacity(), hash, control()));
return { return {iterator_at(PrepareInsertNonSoo(common(), hash,
iterator_at(prepare_insert(hash, FindInfo{target, seq.index()})), FindInfo{target, seq.index()},
GetPolicyFunctions())),
true}; true};
} }
seq.next(); seq.next();
...@@ -3852,63 +3826,6 @@ class raw_hash_set { ...@@ -3852,63 +3826,6 @@ class raw_hash_set {
return find_or_prepare_insert_non_soo(key); return find_or_prepare_insert_non_soo(key);
} }
// Given the hash of a value not currently in the table and the first empty
// slot in the probe sequence, finds the viable slot index to insert it at.
//
// In case there's no space left, the table can be resized or rehashed
// (see rehash_and_grow_if_necessary).
//
// In case of absence of deleted slots and positive growth_left, element can
// be inserted in the provided `target` position.
//
// When table has deleted slots (according to GrowthInfo), target position
// will be searched one more time using `find_first_non_full`.
//
// REQUIRES: At least one non-full slot available.
// REQUIRES: `target` is a valid empty position to insert.
size_t prepare_insert(size_t hash, FindInfo target) ABSL_ATTRIBUTE_NOINLINE {
assert(!is_soo());
// When there are no deleted slots in the table
// and growth_left is positive, we can insert at the first
// empty slot in the probe sequence (target).
bool use_target_hint = false;
// Optimization is disabled on enabled generations.
// We have to rehash even sparse tables randomly in such mode.
#ifndef ABSL_SWISSTABLE_ENABLE_GENERATIONS
use_target_hint = growth_info().HasNoDeletedAndGrowthLeft();
#endif
if (ABSL_PREDICT_FALSE(!use_target_hint)) {
const bool rehash_for_bug_detection =
common().should_rehash_for_bug_detection_on_insert();
if (rehash_for_bug_detection) {
// Move to a different heap allocation in order to detect bugs.
const size_t cap = capacity();
resize(growth_left() > 0 ? cap : NextCapacity(cap));
}
if (!rehash_for_bug_detection &&
ABSL_PREDICT_FALSE(growth_left() == 0)) {
const size_t old_capacity = capacity();
rehash_and_grow_if_necessary();
// NOTE: It is safe to use `FindFirstNonFullAfterResize` after
// `rehash_and_grow_if_necessary`, whether capacity changes or not.
// `rehash_and_grow_if_necessary` may *not* call `resize`
// and perform `drop_deletes_without_resize` instead. But this
// could happen only on big tables and will not change capacity.
// For big tables `FindFirstNonFullAfterResize` will always
// fallback to normal `find_first_non_full`.
target = HashSetResizeHelper::FindFirstNonFullAfterResize(
common(), old_capacity, hash);
} else {
target = find_first_non_full(common(), hash);
}
}
PrepareInsertCommon(common());
growth_info().OverwriteControlAsFull(control()[target.offset]);
SetCtrl(common(), target.offset, H2(hash), sizeof(slot_type));
infoz().RecordInsert(hash, target.probe_length);
return target.offset;
}
// Constructs the value in the space pointed by the iterator. This only works // Constructs the value in the space pointed by the iterator. This only works
// after an unsuccessful find_or_prepare_insert() and before any other // after an unsuccessful find_or_prepare_insert() and before any other
// modifications happen in the raw_hash_set. // modifications happen in the raw_hash_set.
...@@ -3949,7 +3866,7 @@ class raw_hash_set { ...@@ -3949,7 +3866,7 @@ class raw_hash_set {
// See `CapacityToGrowth()`. // See `CapacityToGrowth()`.
size_t growth_left() const { size_t growth_left() const {
assert(!is_soo()); assert(!is_soo());
return growth_info().GetGrowthLeft(); return common().growth_left();
} }
GrowthInfo& growth_info() { GrowthInfo& growth_info() {
...@@ -4009,6 +3926,10 @@ class raw_hash_set { ...@@ -4009,6 +3926,10 @@ class raw_hash_set {
return settings_.template get<3>(); return settings_.template get<3>();
} }
static const void* get_hash_ref_fn(const CommonFields& common) {
auto* h = reinterpret_cast<const raw_hash_set*>(&common);
return &h->hash_ref();
}
static void transfer_slot_fn(void* set, void* dst, void* src) { static void transfer_slot_fn(void* set, void* dst, void* src) {
auto* h = static_cast<raw_hash_set*>(set); auto* h = static_cast<raw_hash_set*>(set);
h->transfer(static_cast<slot_type*>(dst), static_cast<slot_type*>(src)); h->transfer(static_cast<slot_type*>(dst), static_cast<slot_type*>(src));
...@@ -4030,6 +3951,10 @@ class raw_hash_set { ...@@ -4030,6 +3951,10 @@ class raw_hash_set {
static const PolicyFunctions& GetPolicyFunctions() { static const PolicyFunctions& GetPolicyFunctions() {
static constexpr PolicyFunctions value = { static constexpr PolicyFunctions value = {
sizeof(slot_type), sizeof(slot_type),
// TODO(b/328722020): try to type erase
// for standard layout and alignof(Hash) <= alignof(CommonFields).
std::is_empty<hasher>::value ? &GetHashRefForEmptyHasher
: &raw_hash_set::get_hash_ref_fn,
PolicyTraits::template get_hash_slot_fn<hasher>(), PolicyTraits::template get_hash_slot_fn<hasher>(),
PolicyTraits::transfer_uses_memcpy() PolicyTraits::transfer_uses_memcpy()
? TransferRelocatable<sizeof(slot_type)> ? TransferRelocatable<sizeof(slot_type)>
...@@ -4037,6 +3962,7 @@ class raw_hash_set { ...@@ -4037,6 +3962,7 @@ class raw_hash_set {
(std::is_same<SlotAlloc, std::allocator<slot_type>>::value (std::is_same<SlotAlloc, std::allocator<slot_type>>::value
? &DeallocateStandard<alignof(slot_type)> ? &DeallocateStandard<alignof(slot_type)>
: &raw_hash_set::dealloc_fn), : &raw_hash_set::dealloc_fn),
&raw_hash_set::resize_impl,
}; };
return value; return value;
} }
......
...@@ -126,6 +126,22 @@ TEST(GrowthInfoTest, HasNoDeletedAndGrowthLeft) { ...@@ -126,6 +126,22 @@ TEST(GrowthInfoTest, HasNoDeletedAndGrowthLeft) {
EXPECT_TRUE(gi.HasNoDeletedAndGrowthLeft()); EXPECT_TRUE(gi.HasNoDeletedAndGrowthLeft());
} }
TEST(GrowthInfoTest, HasNoGrowthLeftAndNoDeleted) {
GrowthInfo gi;
gi.InitGrowthLeftNoDeleted(1);
EXPECT_FALSE(gi.HasNoGrowthLeftAndNoDeleted());
gi.OverwriteEmptyAsFull();
EXPECT_TRUE(gi.HasNoGrowthLeftAndNoDeleted());
gi.OverwriteFullAsDeleted();
EXPECT_FALSE(gi.HasNoGrowthLeftAndNoDeleted());
gi.OverwriteFullAsEmpty();
EXPECT_FALSE(gi.HasNoGrowthLeftAndNoDeleted());
gi.InitGrowthLeftNoDeleted(0);
EXPECT_TRUE(gi.HasNoGrowthLeftAndNoDeleted());
gi.OverwriteFullAsEmpty();
EXPECT_FALSE(gi.HasNoGrowthLeftAndNoDeleted());
}
TEST(GrowthInfoTest, OverwriteFullAsEmpty) { TEST(GrowthInfoTest, OverwriteFullAsEmpty) {
GrowthInfo gi; GrowthInfo gi;
gi.InitGrowthLeftNoDeleted(5); gi.InitGrowthLeftNoDeleted(5);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment