Commit 0f357650 by Connal de Souza Committed by Copybara-Service

Optimize ConvertSpecialToEmptyAndFullToDeleted on Arm

BM_DropDeletes      73.4µs ± 0%    68.9µs ± 1%   -6.22%   (p=0.008 n=5+5)

PiperOrigin-RevId: 511813266
Change-Id: Id28cece454d583e2dfe060e27cfc4720f987f009
parent 0ecfe235
...@@ -675,9 +675,10 @@ struct GroupAArch64Impl { ...@@ -675,9 +675,10 @@ struct GroupAArch64Impl {
void ConvertSpecialToEmptyAndFullToDeleted(ctrl_t* dst) const { void ConvertSpecialToEmptyAndFullToDeleted(ctrl_t* dst) const {
uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(ctrl), 0); uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(ctrl), 0);
constexpr uint64_t msbs = 0x8080808080808080ULL; constexpr uint64_t msbs = 0x8080808080808080ULL;
constexpr uint64_t lsbs = 0x0101010101010101ULL; constexpr uint64_t slsbs = 0x0202020202020202ULL;
auto x = mask & msbs; constexpr uint64_t midbs = 0x7e7e7e7e7e7e7e7eULL;
auto res = (~x + (x >> 7)) & ~lsbs; auto x = slsbs & (mask >> 6);
auto res = (x + midbs) | msbs;
little_endian::Store64(dst, res); little_endian::Store64(dst, res);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment