Commit 1449c9a1 by Evan Brown Committed by Copybara-Service

Implement small object optimization in swisstable - disabled for now.

Details:
- We use the space for control/slots pointers as the inline buffer.
- We use a max inline capacity of 1 to make the implementation much simpler and to avoid having to randomize the iteration order for inline tables.
- For iteration of inline tables, we introduce the kSooControl buffer which just has 1 full control byte followed by 1 sentinel control byte so that incrementing yields an end() iterator. We don't access kSooControl during lookups - only iteration.
PiperOrigin-RevId: 613253492
Change-Id: Id98ff11842f8bef27ac7ed88138dc03b46ce4fa6
parent 6bf3c73f
......@@ -181,15 +181,13 @@ TEST(FlatHashSet, EraseIf) {
}
}
class PoisonInline {
class PoisonSoo {
int64_t data_;
public:
explicit PoisonInline(int64_t d) : data_(d) {
SanitizerPoisonObject(&data_);
}
PoisonInline(const PoisonInline& that) : PoisonInline(*that) {}
~PoisonInline() { SanitizerUnpoisonObject(&data_); }
explicit PoisonSoo(int64_t d) : data_(d) { SanitizerPoisonObject(&data_); }
PoisonSoo(const PoisonSoo& that) : PoisonSoo(*that) {}
~PoisonSoo() { SanitizerUnpoisonObject(&data_); }
int64_t operator*() const {
SanitizerUnpoisonObject(&data_);
......@@ -198,45 +196,56 @@ class PoisonInline {
return ret;
}
template <typename H>
friend H AbslHashValue(H h, const PoisonInline& pi) {
friend H AbslHashValue(H h, const PoisonSoo& pi) {
return H::combine(std::move(h), *pi);
}
bool operator==(const PoisonInline& rhs) const { return **this == *rhs; }
bool operator==(const PoisonSoo& rhs) const { return **this == *rhs; }
};
// Tests that we don't touch the poison_ member of PoisonInline.
TEST(FlatHashSet, PoisonInline) {
PoisonInline a(0), b(1);
{ // basic usage
flat_hash_set<PoisonInline> set;
set.insert(a);
EXPECT_THAT(set, UnorderedElementsAre(a));
set.insert(b);
EXPECT_THAT(set, UnorderedElementsAre(a, b));
set.erase(a);
EXPECT_THAT(set, UnorderedElementsAre(b));
set.rehash(0); // shrink to inline
EXPECT_THAT(set, UnorderedElementsAre(b));
}
{ // test move constructor from inline to inline
flat_hash_set<PoisonInline> set;
set.insert(a);
flat_hash_set<PoisonInline> set2(std::move(set));
EXPECT_THAT(set2, UnorderedElementsAre(a));
}
{ // test move assignment from inline to inline
flat_hash_set<PoisonInline> set, set2;
set.insert(a);
set2 = std::move(set);
EXPECT_THAT(set2, UnorderedElementsAre(a));
}
{ // test alloc move constructor from inline to inline
flat_hash_set<PoisonInline> set;
set.insert(a);
flat_hash_set<PoisonInline> set2(std::move(set),
std::allocator<PoisonInline>());
EXPECT_THAT(set2, UnorderedElementsAre(a));
}
TEST(FlatHashSet, PoisonSooBasic) {
PoisonSoo a(0), b(1);
flat_hash_set<PoisonSoo> set;
set.insert(a);
EXPECT_THAT(set, UnorderedElementsAre(a));
set.insert(b);
EXPECT_THAT(set, UnorderedElementsAre(a, b));
set.erase(a);
EXPECT_THAT(set, UnorderedElementsAre(b));
set.rehash(0); // Shrink to SOO.
EXPECT_THAT(set, UnorderedElementsAre(b));
}
TEST(FlatHashSet, PoisonSooMoveConstructSooToSoo) {
PoisonSoo a(0);
flat_hash_set<PoisonSoo> set;
set.insert(a);
flat_hash_set<PoisonSoo> set2(std::move(set));
EXPECT_THAT(set2, UnorderedElementsAre(a));
}
TEST(FlatHashSet, PoisonSooAllocMoveConstructSooToSoo) {
PoisonSoo a(0);
flat_hash_set<PoisonSoo> set;
set.insert(a);
flat_hash_set<PoisonSoo> set2(std::move(set), std::allocator<PoisonSoo>());
EXPECT_THAT(set2, UnorderedElementsAre(a));
}
TEST(FlatHashSet, PoisonSooMoveAssignFullSooToEmptySoo) {
PoisonSoo a(0);
flat_hash_set<PoisonSoo> set, set2;
set.insert(a);
set2 = std::move(set);
EXPECT_THAT(set2, UnorderedElementsAre(a));
}
TEST(FlatHashSet, PoisonSooMoveAssignFullSooToFullSoo) {
PoisonSoo a(0), b(1);
flat_hash_set<PoisonSoo> set, set2;
set.insert(a);
set2.insert(b);
set2 = std::move(set);
EXPECT_THAT(set2, UnorderedElementsAre(a));
}
TEST(FlatHashSet, FlatHashSetPolicyDestroyReturnsTrue) {
......
......@@ -168,6 +168,9 @@ struct hash_policy_traits : common_policy_traits<Policy> {
#endif
}
// Whether small object optimization is enabled. False by default.
static constexpr bool soo_enabled() { return soo_enabled_impl(Rank1{}); }
private:
template <class Hash>
struct HashElement {
......@@ -183,6 +186,18 @@ struct hash_policy_traits : common_policy_traits<Policy> {
return Policy::apply(HashElement<Hash>{*static_cast<const Hash*>(hash_fn)},
Policy::element(static_cast<slot_type*>(slot)));
}
// Use go/ranked-overloads for dispatching. Rank1 is preferred.
struct Rank0 {};
struct Rank1 : Rank0 {};
// Use auto -> decltype as an enabler.
template <class P = Policy>
static constexpr auto soo_enabled_impl(Rank1) -> decltype(P::soo_enabled()) {
return P::soo_enabled();
}
static constexpr bool soo_enabled_impl(Rank0) { return false; }
};
} // namespace container_internal
......
......@@ -30,12 +30,14 @@ namespace absl {
ABSL_NAMESPACE_BEGIN
namespace container_internal {
// Represents a control byte corresponding to a full slot with arbitrary hash.
constexpr ctrl_t ZeroCtrlT() { return static_cast<ctrl_t>(0); }
// We have space for `growth_left` before a single block of control bytes. A
// single block of empty control bytes for tables without any slots allocated.
// This enables removing a branch in the hot path of find(). In order to ensure
// that the control bytes are aligned to 16, we have 16 bytes before the control
// bytes even though growth_left only needs 8.
constexpr ctrl_t ZeroCtrlT() { return static_cast<ctrl_t>(0); }
alignas(16) ABSL_CONST_INIT ABSL_DLL const ctrl_t kEmptyGroup[32] = {
ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(),
ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(),
......@@ -46,6 +48,18 @@ alignas(16) ABSL_CONST_INIT ABSL_DLL const ctrl_t kEmptyGroup[32] = {
ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty,
ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty};
// We need one full byte followed by a sentinel byte for iterator::operator++ to
// work. We have a full group after kSentinel to be safe (in case operator++ is
// changed to read a full group).
ABSL_CONST_INIT ABSL_DLL const ctrl_t kSooControl[17] = {
ZeroCtrlT(), ctrl_t::kSentinel, ZeroCtrlT(), ctrl_t::kEmpty,
ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty,
ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty,
ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty,
ctrl_t::kEmpty};
static_assert(NumControlBytes(SooCapacity()) <= 17,
"kSooControl capacity too small");
#ifdef ABSL_INTERNAL_NEED_REDUNDANT_CONSTEXPR_DECL
constexpr size_t Group::kWidth;
#endif
......@@ -111,6 +125,20 @@ bool ShouldInsertBackwardsForDebug(size_t capacity, size_t hash,
return !is_small(capacity) && (H1(hash, ctrl) ^ RandomSeed()) % 13 > 6;
}
size_t PrepareInsertAfterSoo(size_t hash, size_t slot_size,
CommonFields& common) {
assert(common.capacity() == NextCapacity(SooCapacity()));
// After resize from capacity 1 to 3, we always have exactly the slot with
// index 1 occupied, so we need to insert either at index 0 or index 2.
assert(HashSetResizeHelper::SooSlotIndex() == 1);
PrepareInsertCommon(common);
const size_t offset = H1(hash, common.control()) & 2;
common.set_growth_left(common.growth_left() - 1);
SetCtrlInSingleGroupTable(common, offset, H2(hash), slot_size);
common.infoz().RecordInsert(hash, /*distance_from_desired=*/0);
return offset;
}
void ConvertDeletedToEmptyAndFullToDeleted(ctrl_t* ctrl, size_t capacity) {
assert(ctrl[capacity] == ctrl_t::kSentinel);
assert(IsValidCapacity(capacity));
......@@ -254,9 +282,10 @@ void EraseMetaOnly(CommonFields& c, size_t index, size_t slot_size) {
}
void ClearBackingArray(CommonFields& c, const PolicyFunctions& policy,
bool reuse) {
bool reuse, bool soo_enabled) {
c.set_size(0);
if (reuse) {
assert(!soo_enabled || c.capacity() > SooCapacity());
ResetCtrl(c, policy.slot_size);
ResetGrowthLeft(c);
c.infoz().RecordStorageChanged(0, c.capacity());
......@@ -264,12 +293,9 @@ void ClearBackingArray(CommonFields& c, const PolicyFunctions& policy,
// We need to record infoz before calling dealloc, which will unregister
// infoz.
c.infoz().RecordClearedReservation();
c.infoz().RecordStorageChanged(0, 0);
c.infoz().RecordStorageChanged(0, soo_enabled ? SooCapacity() : 0);
(*policy.dealloc)(c, policy);
c.set_control(EmptyGroup());
c.set_generation_ptr(EmptyGeneration());
c.set_slots(nullptr);
c.set_capacity(0);
c = soo_enabled ? CommonFields{soo_tag_t{}} : CommonFields{};
}
}
......@@ -286,7 +312,7 @@ void HashSetResizeHelper::GrowIntoSingleGroupShuffleControlBytes(
// Copy second half of bytes to the beginning.
// We potentially copy more bytes in order to have compile time known size.
// Mirrored bytes from the old_ctrl_ will also be copied.
// Mirrored bytes from the old_ctrl() will also be copied.
// In case of old_capacity_ == 3, we will copy 1st element twice.
// Examples:
// old_ctrl = 0S0EEEEEEE...
......@@ -297,7 +323,7 @@ void HashSetResizeHelper::GrowIntoSingleGroupShuffleControlBytes(
//
// old_ctrl = 0123456S0123456EE...
// new_ctrl = 456S0123?????????...
std::memcpy(new_ctrl, old_ctrl_ + half_old_capacity + 1, kHalfWidth);
std::memcpy(new_ctrl, old_ctrl() + half_old_capacity + 1, kHalfWidth);
// Clean up copied kSentinel from old_ctrl.
new_ctrl[half_old_capacity] = ctrl_t::kEmpty;
......@@ -348,34 +374,55 @@ void HashSetResizeHelper::GrowIntoSingleGroupShuffleControlBytes(
new_ctrl[new_capacity] = ctrl_t::kSentinel;
}
void HashSetResizeHelper::InitControlBytesAfterSoo(ctrl_t* new_ctrl, ctrl_t h2,
size_t new_capacity) {
assert(is_single_group(new_capacity));
std::memset(new_ctrl, static_cast<int8_t>(ctrl_t::kEmpty),
NumControlBytes(new_capacity));
assert(HashSetResizeHelper::SooSlotIndex() == 1);
// This allows us to avoid branching on had_soo_slot_.
assert(had_soo_slot_ || h2 == ctrl_t::kEmpty);
new_ctrl[1] = new_ctrl[new_capacity + 2] = h2;
new_ctrl[new_capacity] = ctrl_t::kSentinel;
}
void HashSetResizeHelper::GrowIntoSingleGroupShuffleTransferableSlots(
void* old_slots, void* new_slots, size_t slot_size) const {
void* new_slots, size_t slot_size) const {
assert(old_capacity_ > 0);
const size_t half_old_capacity = old_capacity_ / 2;
SanitizerUnpoisonMemoryRegion(old_slots, slot_size * old_capacity_);
SanitizerUnpoisonMemoryRegion(old_slots(), slot_size * old_capacity_);
std::memcpy(new_slots,
SlotAddress(old_slots, half_old_capacity + 1, slot_size),
SlotAddress(old_slots(), half_old_capacity + 1, slot_size),
slot_size * half_old_capacity);
std::memcpy(SlotAddress(new_slots, half_old_capacity + 1, slot_size),
old_slots, slot_size * (half_old_capacity + 1));
old_slots(), slot_size * (half_old_capacity + 1));
}
void HashSetResizeHelper::GrowSizeIntoSingleGroupTransferable(
CommonFields& c, void* old_slots, size_t slot_size) {
CommonFields& c, size_t slot_size) {
assert(old_capacity_ < Group::kWidth / 2);
assert(is_single_group(c.capacity()));
assert(IsGrowingIntoSingleGroupApplicable(old_capacity_, c.capacity()));
GrowIntoSingleGroupShuffleControlBytes(c.control(), c.capacity());
GrowIntoSingleGroupShuffleTransferableSlots(old_slots, c.slot_array(),
slot_size);
GrowIntoSingleGroupShuffleTransferableSlots(c.slot_array(), slot_size);
// We poison since GrowIntoSingleGroupShuffleTransferableSlots
// may leave empty slots unpoisoned.
PoisonSingleGroupEmptySlots(c, slot_size);
}
void HashSetResizeHelper::TransferSlotAfterSoo(CommonFields& c,
size_t slot_size) {
assert(was_soo_);
assert(had_soo_slot_);
assert(is_single_group(c.capacity()));
std::memcpy(SlotAddress(c.slot_array(), SooSlotIndex(), slot_size),
old_soo_data(), slot_size);
PoisonSingleGroupEmptySlots(c, slot_size);
}
} // namespace container_internal
ABSL_NAMESPACE_END
} // namespace absl
......@@ -12,6 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cstddef>
#include <unordered_set>
#include <utility>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/container/flat_hash_map.h"
......@@ -38,15 +43,16 @@ void TestInlineElementSize(
// set cannot be flat_hash_set, however, since that would introduce a mutex
// deadlock.
std::unordered_set<const HashtablezInfo*>& preexisting_info, // NOLINT
std::vector<Table>& tables, const typename Table::value_type& elt,
std::vector<Table>& tables,
const std::vector<typename Table::value_type>& values,
size_t expected_element_size) {
for (int i = 0; i < 10; ++i) {
// We create a new table and must store it somewhere so that when we store
// a pointer to the resulting `HashtablezInfo` into `preexisting_info`
// that we aren't storing a dangling pointer.
tables.emplace_back();
// We must insert an element to get a hashtablez to instantiate.
tables.back().insert(elt);
// We must insert elements to get a hashtablez to instantiate.
tables.back().insert(values.begin(), values.end());
}
size_t new_count = 0;
sampler.Iterate([&](const HashtablezInfo& info) {
......@@ -82,6 +88,9 @@ TEST(FlatHashMap, SampleElementSize) {
std::vector<flat_hash_set<bigstruct>> flat_set_tables;
std::vector<node_hash_map<int, bigstruct>> node_map_tables;
std::vector<node_hash_set<bigstruct>> node_set_tables;
std::vector<bigstruct> set_values = {bigstruct{{0}}, bigstruct{{1}}};
std::vector<std::pair<const int, bigstruct>> map_values = {{0, bigstruct{}},
{1, bigstruct{}}};
// It takes thousands of new tables after changing the sampling parameters
// before you actually get some instrumentation. And if you must actually
......@@ -97,14 +106,14 @@ TEST(FlatHashMap, SampleElementSize) {
std::unordered_set<const HashtablezInfo*> preexisting_info; // NOLINT
sampler.Iterate(
[&](const HashtablezInfo& info) { preexisting_info.insert(&info); });
TestInlineElementSize(sampler, preexisting_info, flat_map_tables,
{0, bigstruct{}}, sizeof(int) + sizeof(bigstruct));
TestInlineElementSize(sampler, preexisting_info, node_map_tables,
{0, bigstruct{}}, sizeof(void*));
TestInlineElementSize(sampler, preexisting_info, flat_set_tables, //
bigstruct{}, sizeof(bigstruct));
TestInlineElementSize(sampler, preexisting_info, node_set_tables, //
bigstruct{}, sizeof(void*));
TestInlineElementSize(sampler, preexisting_info, flat_map_tables, map_values,
sizeof(int) + sizeof(bigstruct));
TestInlineElementSize(sampler, preexisting_info, node_map_tables, map_values,
sizeof(void*));
TestInlineElementSize(sampler, preexisting_info, flat_set_tables, set_values,
sizeof(bigstruct));
TestInlineElementSize(sampler, preexisting_info, node_set_tables, set_values,
sizeof(void*));
#endif
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment