Commit 8a3caf7d by Abseil Team Committed by Copybara-Service

Introduce `Group::MaskNonFull` without usage.

It can be used instead of `MaskEmptyOrDeleted` in case of inserting to empty table.
`MaskNonFull` requires less operations, in particular it eliminates `_mm_set1_epi8` and `_mm_cmpgt_epi8` operations.

PiperOrigin-RevId: 607587394
Change-Id: Ia48f922d1ca6de38cc91e7ab0d608c45f8f2c446
parent 4580d86d
...@@ -657,6 +657,14 @@ struct GroupSse2Impl { ...@@ -657,6 +657,14 @@ struct GroupSse2Impl {
static_cast<uint16_t>(_mm_movemask_epi8(ctrl) ^ 0xffff)); static_cast<uint16_t>(_mm_movemask_epi8(ctrl) ^ 0xffff));
} }
// Returns a bitmask representing the positions of non full slots.
// Note: this includes: kEmpty, kDeleted, kSentinel.
// It is useful in contexts when kSentinel is not present.
auto MaskNonFull() const {
return BitMask<uint16_t, kWidth>(
static_cast<uint16_t>(_mm_movemask_epi8(ctrl)));
}
// Returns a bitmask representing the positions of empty or deleted slots. // Returns a bitmask representing the positions of empty or deleted slots.
NonIterableBitMask<uint16_t, kWidth> MaskEmptyOrDeleted() const { NonIterableBitMask<uint16_t, kWidth> MaskEmptyOrDeleted() const {
auto special = _mm_set1_epi8(static_cast<char>(ctrl_t::kSentinel)); auto special = _mm_set1_epi8(static_cast<char>(ctrl_t::kSentinel));
...@@ -725,6 +733,18 @@ struct GroupAArch64Impl { ...@@ -725,6 +733,18 @@ struct GroupAArch64Impl {
/*NullifyBitsOnIteration=*/true>(mask); /*NullifyBitsOnIteration=*/true>(mask);
} }
// Returns a bitmask representing the positions of non full slots.
// Note: this includes: kEmpty, kDeleted, kSentinel.
// It is useful in contexts when kSentinel is not present.
auto MaskNonFull() const {
uint64_t mask = vget_lane_u64(
vreinterpret_u64_u8(vclt_s8(vreinterpret_s8_u8(ctrl),
vdup_n_s8(static_cast<int8_t>(0)))),
0);
return BitMask<uint64_t, kWidth, /*Shift=*/3,
/*NullifyBitsOnIteration=*/true>(mask);
}
NonIterableBitMask<uint64_t, kWidth, 3> MaskEmptyOrDeleted() const { NonIterableBitMask<uint64_t, kWidth, 3> MaskEmptyOrDeleted() const {
uint64_t mask = uint64_t mask =
vget_lane_u64(vreinterpret_u64_u8(vcgt_s8( vget_lane_u64(vreinterpret_u64_u8(vcgt_s8(
...@@ -797,6 +817,13 @@ struct GroupPortableImpl { ...@@ -797,6 +817,13 @@ struct GroupPortableImpl {
return BitMask<uint64_t, kWidth, 3>((ctrl ^ kMsbs8Bytes) & kMsbs8Bytes); return BitMask<uint64_t, kWidth, 3>((ctrl ^ kMsbs8Bytes) & kMsbs8Bytes);
} }
// Returns a bitmask representing the positions of non full slots.
// Note: this includes: kEmpty, kDeleted, kSentinel.
// It is useful in contexts when kSentinel is not present.
auto MaskNonFull() const {
return BitMask<uint64_t, kWidth, 3>(ctrl & kMsbs8Bytes);
}
NonIterableBitMask<uint64_t, kWidth, 3> MaskEmptyOrDeleted() const { NonIterableBitMask<uint64_t, kWidth, 3> MaskEmptyOrDeleted() const {
return NonIterableBitMask<uint64_t, kWidth, 3>((ctrl & ~(ctrl << 7)) & return NonIterableBitMask<uint64_t, kWidth, 3>((ctrl & ~(ctrl << 7)) &
kMsbs8Bytes); kMsbs8Bytes);
......
...@@ -479,6 +479,17 @@ void BM_Group_MaskEmptyOrDeleted(benchmark::State& state) { ...@@ -479,6 +479,17 @@ void BM_Group_MaskEmptyOrDeleted(benchmark::State& state) {
} }
BENCHMARK(BM_Group_MaskEmptyOrDeleted); BENCHMARK(BM_Group_MaskEmptyOrDeleted);
void BM_Group_MaskNonFull(benchmark::State& state) {
std::array<ctrl_t, Group::kWidth> group;
Iota(group.begin(), group.end(), -4);
Group g{group.data()};
for (auto _ : state) {
::benchmark::DoNotOptimize(g);
::benchmark::DoNotOptimize(g.MaskNonFull());
}
}
BENCHMARK(BM_Group_MaskNonFull);
void BM_Group_CountLeadingEmptyOrDeleted(benchmark::State& state) { void BM_Group_CountLeadingEmptyOrDeleted(benchmark::State& state) {
std::array<ctrl_t, Group::kWidth> group; std::array<ctrl_t, Group::kWidth> group;
Iota(group.begin(), group.end(), -2); Iota(group.begin(), group.end(), -2);
...@@ -501,6 +512,17 @@ void BM_Group_MatchFirstEmptyOrDeleted(benchmark::State& state) { ...@@ -501,6 +512,17 @@ void BM_Group_MatchFirstEmptyOrDeleted(benchmark::State& state) {
} }
BENCHMARK(BM_Group_MatchFirstEmptyOrDeleted); BENCHMARK(BM_Group_MatchFirstEmptyOrDeleted);
void BM_Group_MatchFirstNonFull(benchmark::State& state) {
std::array<ctrl_t, Group::kWidth> group;
Iota(group.begin(), group.end(), -2);
Group g{group.data()};
for (auto _ : state) {
::benchmark::DoNotOptimize(g);
::benchmark::DoNotOptimize(g.MaskNonFull().LowestBitSet());
}
}
BENCHMARK(BM_Group_MatchFirstNonFull);
void BM_DropDeletes(benchmark::State& state) { void BM_DropDeletes(benchmark::State& state) {
constexpr size_t capacity = (1 << 20) - 1; constexpr size_t capacity = (1 << 20) - 1;
std::vector<ctrl_t> ctrl(capacity + 1 + Group::kWidth); std::vector<ctrl_t> ctrl(capacity + 1 + Group::kWidth);
......
...@@ -307,6 +307,25 @@ TEST(Group, MaskFull) { ...@@ -307,6 +307,25 @@ TEST(Group, MaskFull) {
} }
} }
TEST(Group, MaskNonFull) {
if (Group::kWidth == 16) {
ctrl_t group[] = {
ctrl_t::kEmpty, CtrlT(1), ctrl_t::kDeleted, CtrlT(3),
ctrl_t::kEmpty, CtrlT(5), ctrl_t::kSentinel, CtrlT(7),
CtrlT(7), CtrlT(5), ctrl_t::kDeleted, CtrlT(1),
CtrlT(1), ctrl_t::kSentinel, ctrl_t::kEmpty, CtrlT(1)};
EXPECT_THAT(Group{group}.MaskNonFull(),
ElementsAre(0, 2, 4, 6, 10, 13, 14));
} else if (Group::kWidth == 8) {
ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), ctrl_t::kEmpty,
ctrl_t::kDeleted, CtrlT(2), ctrl_t::kSentinel,
ctrl_t::kSentinel, CtrlT(1)};
EXPECT_THAT(Group{group}.MaskNonFull(), ElementsAre(0, 2, 3, 5, 6));
} else {
FAIL() << "No test coverage for Group::kWidth==" << Group::kWidth;
}
}
TEST(Group, MaskEmptyOrDeleted) { TEST(Group, MaskEmptyOrDeleted) {
if (Group::kWidth == 16) { if (Group::kWidth == 16) {
ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), ctrl_t::kEmpty, CtrlT(3), ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), ctrl_t::kEmpty, CtrlT(3),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment