Commit 1449c9a1 by Evan Brown Committed by Copybara-Service

Implement small object optimization in swisstable - disabled for now.

Details:
- We use the space for control/slots pointers as the inline buffer.
- We use a max inline capacity of 1 to make the implementation much simpler and to avoid having to randomize the iteration order for inline tables.
- For iteration of inline tables, we introduce the kSooControl buffer which just has 1 full control byte followed by 1 sentinel control byte so that incrementing yields an end() iterator. We don't access kSooControl during lookups - only iteration.
PiperOrigin-RevId: 613253492
Change-Id: Id98ff11842f8bef27ac7ed88138dc03b46ce4fa6
parent 6bf3c73f
...@@ -181,15 +181,13 @@ TEST(FlatHashSet, EraseIf) { ...@@ -181,15 +181,13 @@ TEST(FlatHashSet, EraseIf) {
} }
} }
class PoisonInline { class PoisonSoo {
int64_t data_; int64_t data_;
public: public:
explicit PoisonInline(int64_t d) : data_(d) { explicit PoisonSoo(int64_t d) : data_(d) { SanitizerPoisonObject(&data_); }
SanitizerPoisonObject(&data_); PoisonSoo(const PoisonSoo& that) : PoisonSoo(*that) {}
} ~PoisonSoo() { SanitizerUnpoisonObject(&data_); }
PoisonInline(const PoisonInline& that) : PoisonInline(*that) {}
~PoisonInline() { SanitizerUnpoisonObject(&data_); }
int64_t operator*() const { int64_t operator*() const {
SanitizerUnpoisonObject(&data_); SanitizerUnpoisonObject(&data_);
...@@ -198,45 +196,56 @@ class PoisonInline { ...@@ -198,45 +196,56 @@ class PoisonInline {
return ret; return ret;
} }
template <typename H> template <typename H>
friend H AbslHashValue(H h, const PoisonInline& pi) { friend H AbslHashValue(H h, const PoisonSoo& pi) {
return H::combine(std::move(h), *pi); return H::combine(std::move(h), *pi);
} }
bool operator==(const PoisonInline& rhs) const { return **this == *rhs; } bool operator==(const PoisonSoo& rhs) const { return **this == *rhs; }
}; };
// Tests that we don't touch the poison_ member of PoisonInline. TEST(FlatHashSet, PoisonSooBasic) {
TEST(FlatHashSet, PoisonInline) { PoisonSoo a(0), b(1);
PoisonInline a(0), b(1); flat_hash_set<PoisonSoo> set;
{ // basic usage set.insert(a);
flat_hash_set<PoisonInline> set; EXPECT_THAT(set, UnorderedElementsAre(a));
set.insert(a); set.insert(b);
EXPECT_THAT(set, UnorderedElementsAre(a)); EXPECT_THAT(set, UnorderedElementsAre(a, b));
set.insert(b); set.erase(a);
EXPECT_THAT(set, UnorderedElementsAre(a, b)); EXPECT_THAT(set, UnorderedElementsAre(b));
set.erase(a); set.rehash(0); // Shrink to SOO.
EXPECT_THAT(set, UnorderedElementsAre(b)); EXPECT_THAT(set, UnorderedElementsAre(b));
set.rehash(0); // shrink to inline }
EXPECT_THAT(set, UnorderedElementsAre(b));
} TEST(FlatHashSet, PoisonSooMoveConstructSooToSoo) {
{ // test move constructor from inline to inline PoisonSoo a(0);
flat_hash_set<PoisonInline> set; flat_hash_set<PoisonSoo> set;
set.insert(a); set.insert(a);
flat_hash_set<PoisonInline> set2(std::move(set)); flat_hash_set<PoisonSoo> set2(std::move(set));
EXPECT_THAT(set2, UnorderedElementsAre(a)); EXPECT_THAT(set2, UnorderedElementsAre(a));
} }
{ // test move assignment from inline to inline
flat_hash_set<PoisonInline> set, set2; TEST(FlatHashSet, PoisonSooAllocMoveConstructSooToSoo) {
set.insert(a); PoisonSoo a(0);
set2 = std::move(set); flat_hash_set<PoisonSoo> set;
EXPECT_THAT(set2, UnorderedElementsAre(a)); set.insert(a);
} flat_hash_set<PoisonSoo> set2(std::move(set), std::allocator<PoisonSoo>());
{ // test alloc move constructor from inline to inline EXPECT_THAT(set2, UnorderedElementsAre(a));
flat_hash_set<PoisonInline> set; }
set.insert(a);
flat_hash_set<PoisonInline> set2(std::move(set), TEST(FlatHashSet, PoisonSooMoveAssignFullSooToEmptySoo) {
std::allocator<PoisonInline>()); PoisonSoo a(0);
EXPECT_THAT(set2, UnorderedElementsAre(a)); flat_hash_set<PoisonSoo> set, set2;
} set.insert(a);
set2 = std::move(set);
EXPECT_THAT(set2, UnorderedElementsAre(a));
}
TEST(FlatHashSet, PoisonSooMoveAssignFullSooToFullSoo) {
PoisonSoo a(0), b(1);
flat_hash_set<PoisonSoo> set, set2;
set.insert(a);
set2.insert(b);
set2 = std::move(set);
EXPECT_THAT(set2, UnorderedElementsAre(a));
} }
TEST(FlatHashSet, FlatHashSetPolicyDestroyReturnsTrue) { TEST(FlatHashSet, FlatHashSetPolicyDestroyReturnsTrue) {
......
...@@ -168,6 +168,9 @@ struct hash_policy_traits : common_policy_traits<Policy> { ...@@ -168,6 +168,9 @@ struct hash_policy_traits : common_policy_traits<Policy> {
#endif #endif
} }
// Whether small object optimization is enabled. False by default.
static constexpr bool soo_enabled() { return soo_enabled_impl(Rank1{}); }
private: private:
template <class Hash> template <class Hash>
struct HashElement { struct HashElement {
...@@ -183,6 +186,18 @@ struct hash_policy_traits : common_policy_traits<Policy> { ...@@ -183,6 +186,18 @@ struct hash_policy_traits : common_policy_traits<Policy> {
return Policy::apply(HashElement<Hash>{*static_cast<const Hash*>(hash_fn)}, return Policy::apply(HashElement<Hash>{*static_cast<const Hash*>(hash_fn)},
Policy::element(static_cast<slot_type*>(slot))); Policy::element(static_cast<slot_type*>(slot)));
} }
// Use go/ranked-overloads for dispatching. Rank1 is preferred.
struct Rank0 {};
struct Rank1 : Rank0 {};
// Use auto -> decltype as an enabler.
template <class P = Policy>
static constexpr auto soo_enabled_impl(Rank1) -> decltype(P::soo_enabled()) {
return P::soo_enabled();
}
static constexpr bool soo_enabled_impl(Rank0) { return false; }
}; };
} // namespace container_internal } // namespace container_internal
......
...@@ -30,12 +30,14 @@ namespace absl { ...@@ -30,12 +30,14 @@ namespace absl {
ABSL_NAMESPACE_BEGIN ABSL_NAMESPACE_BEGIN
namespace container_internal { namespace container_internal {
// Represents a control byte corresponding to a full slot with arbitrary hash.
constexpr ctrl_t ZeroCtrlT() { return static_cast<ctrl_t>(0); }
// We have space for `growth_left` before a single block of control bytes. A // We have space for `growth_left` before a single block of control bytes. A
// single block of empty control bytes for tables without any slots allocated. // single block of empty control bytes for tables without any slots allocated.
// This enables removing a branch in the hot path of find(). In order to ensure // This enables removing a branch in the hot path of find(). In order to ensure
// that the control bytes are aligned to 16, we have 16 bytes before the control // that the control bytes are aligned to 16, we have 16 bytes before the control
// bytes even though growth_left only needs 8. // bytes even though growth_left only needs 8.
constexpr ctrl_t ZeroCtrlT() { return static_cast<ctrl_t>(0); }
alignas(16) ABSL_CONST_INIT ABSL_DLL const ctrl_t kEmptyGroup[32] = { alignas(16) ABSL_CONST_INIT ABSL_DLL const ctrl_t kEmptyGroup[32] = {
ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(),
ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(),
...@@ -46,6 +48,18 @@ alignas(16) ABSL_CONST_INIT ABSL_DLL const ctrl_t kEmptyGroup[32] = { ...@@ -46,6 +48,18 @@ alignas(16) ABSL_CONST_INIT ABSL_DLL const ctrl_t kEmptyGroup[32] = {
ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty,
ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty}; ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty};
// We need one full byte followed by a sentinel byte for iterator::operator++ to
// work. We have a full group after kSentinel to be safe (in case operator++ is
// changed to read a full group).
ABSL_CONST_INIT ABSL_DLL const ctrl_t kSooControl[17] = {
ZeroCtrlT(), ctrl_t::kSentinel, ZeroCtrlT(), ctrl_t::kEmpty,
ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty,
ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty,
ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty,
ctrl_t::kEmpty};
static_assert(NumControlBytes(SooCapacity()) <= 17,
"kSooControl capacity too small");
#ifdef ABSL_INTERNAL_NEED_REDUNDANT_CONSTEXPR_DECL #ifdef ABSL_INTERNAL_NEED_REDUNDANT_CONSTEXPR_DECL
constexpr size_t Group::kWidth; constexpr size_t Group::kWidth;
#endif #endif
...@@ -111,6 +125,20 @@ bool ShouldInsertBackwardsForDebug(size_t capacity, size_t hash, ...@@ -111,6 +125,20 @@ bool ShouldInsertBackwardsForDebug(size_t capacity, size_t hash,
return !is_small(capacity) && (H1(hash, ctrl) ^ RandomSeed()) % 13 > 6; return !is_small(capacity) && (H1(hash, ctrl) ^ RandomSeed()) % 13 > 6;
} }
size_t PrepareInsertAfterSoo(size_t hash, size_t slot_size,
CommonFields& common) {
assert(common.capacity() == NextCapacity(SooCapacity()));
// After resize from capacity 1 to 3, we always have exactly the slot with
// index 1 occupied, so we need to insert either at index 0 or index 2.
assert(HashSetResizeHelper::SooSlotIndex() == 1);
PrepareInsertCommon(common);
const size_t offset = H1(hash, common.control()) & 2;
common.set_growth_left(common.growth_left() - 1);
SetCtrlInSingleGroupTable(common, offset, H2(hash), slot_size);
common.infoz().RecordInsert(hash, /*distance_from_desired=*/0);
return offset;
}
void ConvertDeletedToEmptyAndFullToDeleted(ctrl_t* ctrl, size_t capacity) { void ConvertDeletedToEmptyAndFullToDeleted(ctrl_t* ctrl, size_t capacity) {
assert(ctrl[capacity] == ctrl_t::kSentinel); assert(ctrl[capacity] == ctrl_t::kSentinel);
assert(IsValidCapacity(capacity)); assert(IsValidCapacity(capacity));
...@@ -254,9 +282,10 @@ void EraseMetaOnly(CommonFields& c, size_t index, size_t slot_size) { ...@@ -254,9 +282,10 @@ void EraseMetaOnly(CommonFields& c, size_t index, size_t slot_size) {
} }
void ClearBackingArray(CommonFields& c, const PolicyFunctions& policy, void ClearBackingArray(CommonFields& c, const PolicyFunctions& policy,
bool reuse) { bool reuse, bool soo_enabled) {
c.set_size(0); c.set_size(0);
if (reuse) { if (reuse) {
assert(!soo_enabled || c.capacity() > SooCapacity());
ResetCtrl(c, policy.slot_size); ResetCtrl(c, policy.slot_size);
ResetGrowthLeft(c); ResetGrowthLeft(c);
c.infoz().RecordStorageChanged(0, c.capacity()); c.infoz().RecordStorageChanged(0, c.capacity());
...@@ -264,12 +293,9 @@ void ClearBackingArray(CommonFields& c, const PolicyFunctions& policy, ...@@ -264,12 +293,9 @@ void ClearBackingArray(CommonFields& c, const PolicyFunctions& policy,
// We need to record infoz before calling dealloc, which will unregister // We need to record infoz before calling dealloc, which will unregister
// infoz. // infoz.
c.infoz().RecordClearedReservation(); c.infoz().RecordClearedReservation();
c.infoz().RecordStorageChanged(0, 0); c.infoz().RecordStorageChanged(0, soo_enabled ? SooCapacity() : 0);
(*policy.dealloc)(c, policy); (*policy.dealloc)(c, policy);
c.set_control(EmptyGroup()); c = soo_enabled ? CommonFields{soo_tag_t{}} : CommonFields{};
c.set_generation_ptr(EmptyGeneration());
c.set_slots(nullptr);
c.set_capacity(0);
} }
} }
...@@ -286,7 +312,7 @@ void HashSetResizeHelper::GrowIntoSingleGroupShuffleControlBytes( ...@@ -286,7 +312,7 @@ void HashSetResizeHelper::GrowIntoSingleGroupShuffleControlBytes(
// Copy second half of bytes to the beginning. // Copy second half of bytes to the beginning.
// We potentially copy more bytes in order to have compile time known size. // We potentially copy more bytes in order to have compile time known size.
// Mirrored bytes from the old_ctrl_ will also be copied. // Mirrored bytes from the old_ctrl() will also be copied.
// In case of old_capacity_ == 3, we will copy 1st element twice. // In case of old_capacity_ == 3, we will copy 1st element twice.
// Examples: // Examples:
// old_ctrl = 0S0EEEEEEE... // old_ctrl = 0S0EEEEEEE...
...@@ -297,7 +323,7 @@ void HashSetResizeHelper::GrowIntoSingleGroupShuffleControlBytes( ...@@ -297,7 +323,7 @@ void HashSetResizeHelper::GrowIntoSingleGroupShuffleControlBytes(
// //
// old_ctrl = 0123456S0123456EE... // old_ctrl = 0123456S0123456EE...
// new_ctrl = 456S0123?????????... // new_ctrl = 456S0123?????????...
std::memcpy(new_ctrl, old_ctrl_ + half_old_capacity + 1, kHalfWidth); std::memcpy(new_ctrl, old_ctrl() + half_old_capacity + 1, kHalfWidth);
// Clean up copied kSentinel from old_ctrl. // Clean up copied kSentinel from old_ctrl.
new_ctrl[half_old_capacity] = ctrl_t::kEmpty; new_ctrl[half_old_capacity] = ctrl_t::kEmpty;
...@@ -348,34 +374,55 @@ void HashSetResizeHelper::GrowIntoSingleGroupShuffleControlBytes( ...@@ -348,34 +374,55 @@ void HashSetResizeHelper::GrowIntoSingleGroupShuffleControlBytes(
new_ctrl[new_capacity] = ctrl_t::kSentinel; new_ctrl[new_capacity] = ctrl_t::kSentinel;
} }
void HashSetResizeHelper::InitControlBytesAfterSoo(ctrl_t* new_ctrl, ctrl_t h2,
size_t new_capacity) {
assert(is_single_group(new_capacity));
std::memset(new_ctrl, static_cast<int8_t>(ctrl_t::kEmpty),
NumControlBytes(new_capacity));
assert(HashSetResizeHelper::SooSlotIndex() == 1);
// This allows us to avoid branching on had_soo_slot_.
assert(had_soo_slot_ || h2 == ctrl_t::kEmpty);
new_ctrl[1] = new_ctrl[new_capacity + 2] = h2;
new_ctrl[new_capacity] = ctrl_t::kSentinel;
}
void HashSetResizeHelper::GrowIntoSingleGroupShuffleTransferableSlots( void HashSetResizeHelper::GrowIntoSingleGroupShuffleTransferableSlots(
void* old_slots, void* new_slots, size_t slot_size) const { void* new_slots, size_t slot_size) const {
assert(old_capacity_ > 0); assert(old_capacity_ > 0);
const size_t half_old_capacity = old_capacity_ / 2; const size_t half_old_capacity = old_capacity_ / 2;
SanitizerUnpoisonMemoryRegion(old_slots, slot_size * old_capacity_); SanitizerUnpoisonMemoryRegion(old_slots(), slot_size * old_capacity_);
std::memcpy(new_slots, std::memcpy(new_slots,
SlotAddress(old_slots, half_old_capacity + 1, slot_size), SlotAddress(old_slots(), half_old_capacity + 1, slot_size),
slot_size * half_old_capacity); slot_size * half_old_capacity);
std::memcpy(SlotAddress(new_slots, half_old_capacity + 1, slot_size), std::memcpy(SlotAddress(new_slots, half_old_capacity + 1, slot_size),
old_slots, slot_size * (half_old_capacity + 1)); old_slots(), slot_size * (half_old_capacity + 1));
} }
void HashSetResizeHelper::GrowSizeIntoSingleGroupTransferable( void HashSetResizeHelper::GrowSizeIntoSingleGroupTransferable(
CommonFields& c, void* old_slots, size_t slot_size) { CommonFields& c, size_t slot_size) {
assert(old_capacity_ < Group::kWidth / 2); assert(old_capacity_ < Group::kWidth / 2);
assert(is_single_group(c.capacity())); assert(is_single_group(c.capacity()));
assert(IsGrowingIntoSingleGroupApplicable(old_capacity_, c.capacity())); assert(IsGrowingIntoSingleGroupApplicable(old_capacity_, c.capacity()));
GrowIntoSingleGroupShuffleControlBytes(c.control(), c.capacity()); GrowIntoSingleGroupShuffleControlBytes(c.control(), c.capacity());
GrowIntoSingleGroupShuffleTransferableSlots(old_slots, c.slot_array(), GrowIntoSingleGroupShuffleTransferableSlots(c.slot_array(), slot_size);
slot_size);
// We poison since GrowIntoSingleGroupShuffleTransferableSlots // We poison since GrowIntoSingleGroupShuffleTransferableSlots
// may leave empty slots unpoisoned. // may leave empty slots unpoisoned.
PoisonSingleGroupEmptySlots(c, slot_size); PoisonSingleGroupEmptySlots(c, slot_size);
} }
void HashSetResizeHelper::TransferSlotAfterSoo(CommonFields& c,
size_t slot_size) {
assert(was_soo_);
assert(had_soo_slot_);
assert(is_single_group(c.capacity()));
std::memcpy(SlotAddress(c.slot_array(), SooSlotIndex(), slot_size),
old_soo_data(), slot_size);
PoisonSingleGroupEmptySlots(c, slot_size);
}
} // namespace container_internal } // namespace container_internal
ABSL_NAMESPACE_END ABSL_NAMESPACE_END
} // namespace absl } // namespace absl
...@@ -12,6 +12,11 @@ ...@@ -12,6 +12,11 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include <cstddef>
#include <unordered_set>
#include <utility>
#include <vector>
#include "gmock/gmock.h" #include "gmock/gmock.h"
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_map.h"
...@@ -38,15 +43,16 @@ void TestInlineElementSize( ...@@ -38,15 +43,16 @@ void TestInlineElementSize(
// set cannot be flat_hash_set, however, since that would introduce a mutex // set cannot be flat_hash_set, however, since that would introduce a mutex
// deadlock. // deadlock.
std::unordered_set<const HashtablezInfo*>& preexisting_info, // NOLINT std::unordered_set<const HashtablezInfo*>& preexisting_info, // NOLINT
std::vector<Table>& tables, const typename Table::value_type& elt, std::vector<Table>& tables,
const std::vector<typename Table::value_type>& values,
size_t expected_element_size) { size_t expected_element_size) {
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
// We create a new table and must store it somewhere so that when we store // We create a new table and must store it somewhere so that when we store
// a pointer to the resulting `HashtablezInfo` into `preexisting_info` // a pointer to the resulting `HashtablezInfo` into `preexisting_info`
// that we aren't storing a dangling pointer. // that we aren't storing a dangling pointer.
tables.emplace_back(); tables.emplace_back();
// We must insert an element to get a hashtablez to instantiate. // We must insert elements to get a hashtablez to instantiate.
tables.back().insert(elt); tables.back().insert(values.begin(), values.end());
} }
size_t new_count = 0; size_t new_count = 0;
sampler.Iterate([&](const HashtablezInfo& info) { sampler.Iterate([&](const HashtablezInfo& info) {
...@@ -82,6 +88,9 @@ TEST(FlatHashMap, SampleElementSize) { ...@@ -82,6 +88,9 @@ TEST(FlatHashMap, SampleElementSize) {
std::vector<flat_hash_set<bigstruct>> flat_set_tables; std::vector<flat_hash_set<bigstruct>> flat_set_tables;
std::vector<node_hash_map<int, bigstruct>> node_map_tables; std::vector<node_hash_map<int, bigstruct>> node_map_tables;
std::vector<node_hash_set<bigstruct>> node_set_tables; std::vector<node_hash_set<bigstruct>> node_set_tables;
std::vector<bigstruct> set_values = {bigstruct{{0}}, bigstruct{{1}}};
std::vector<std::pair<const int, bigstruct>> map_values = {{0, bigstruct{}},
{1, bigstruct{}}};
// It takes thousands of new tables after changing the sampling parameters // It takes thousands of new tables after changing the sampling parameters
// before you actually get some instrumentation. And if you must actually // before you actually get some instrumentation. And if you must actually
...@@ -97,14 +106,14 @@ TEST(FlatHashMap, SampleElementSize) { ...@@ -97,14 +106,14 @@ TEST(FlatHashMap, SampleElementSize) {
std::unordered_set<const HashtablezInfo*> preexisting_info; // NOLINT std::unordered_set<const HashtablezInfo*> preexisting_info; // NOLINT
sampler.Iterate( sampler.Iterate(
[&](const HashtablezInfo& info) { preexisting_info.insert(&info); }); [&](const HashtablezInfo& info) { preexisting_info.insert(&info); });
TestInlineElementSize(sampler, preexisting_info, flat_map_tables, TestInlineElementSize(sampler, preexisting_info, flat_map_tables, map_values,
{0, bigstruct{}}, sizeof(int) + sizeof(bigstruct)); sizeof(int) + sizeof(bigstruct));
TestInlineElementSize(sampler, preexisting_info, node_map_tables, TestInlineElementSize(sampler, preexisting_info, node_map_tables, map_values,
{0, bigstruct{}}, sizeof(void*)); sizeof(void*));
TestInlineElementSize(sampler, preexisting_info, flat_set_tables, // TestInlineElementSize(sampler, preexisting_info, flat_set_tables, set_values,
bigstruct{}, sizeof(bigstruct)); sizeof(bigstruct));
TestInlineElementSize(sampler, preexisting_info, node_set_tables, // TestInlineElementSize(sampler, preexisting_info, node_set_tables, set_values,
bigstruct{}, sizeof(void*)); sizeof(void*));
#endif #endif
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment