Commit 794352a9 by Abseil Team Committed by Copybara-Service

Performance improvement for absl::AsciiStrToUpper() and absl::AsciiStrToLower()

PiperOrigin-RevId: 592664369
Change-Id: I7aa7b045c2b3c0f25cff7b82eb9d9cc13e9fc49f
parent 72d7a159
...@@ -15,8 +15,10 @@ ...@@ -15,8 +15,10 @@
#include "absl/strings/ascii.h" #include "absl/strings/ascii.h"
#include <climits> #include <climits>
#include <cstdint>
#include <cstring> #include <cstring>
#include <string> #include <string>
#include <type_traits>
#include "absl/base/config.h" #include "absl/base/config.h"
#include "absl/base/nullability.h" #include "absl/base/nullability.h"
...@@ -160,6 +162,19 @@ ABSL_DLL const char kToUpper[256] = { ...@@ -160,6 +162,19 @@ ABSL_DLL const char kToUpper[256] = {
}; };
// clang-format on // clang-format on
template <class T>
static constexpr T BroadcastByte(unsigned char value) {
static_assert(std::is_integral<T>::value && sizeof(T) <= sizeof(uint64_t) &&
std::is_unsigned<T>::value,
"only unsigned integers up to 64-bit allowed");
T result = value;
constexpr size_t result_bit_width = sizeof(result) * CHAR_BIT;
result |= result << ((CHAR_BIT << 0) & (result_bit_width - 1));
result |= result << ((CHAR_BIT << 1) & (result_bit_width - 1));
result |= result << ((CHAR_BIT << 2) & (result_bit_width - 1));
return result;
}
// Returns whether `c` is in the a-z/A-Z range (w.r.t. `ToUpper`). // Returns whether `c` is in the a-z/A-Z range (w.r.t. `ToUpper`).
// Implemented by: // Implemented by:
// 1. Pushing the a-z/A-Z range to [SCHAR_MIN, SCHAR_MIN + 26). // 1. Pushing the a-z/A-Z range to [SCHAR_MIN, SCHAR_MIN + 26).
...@@ -175,7 +190,45 @@ constexpr bool AsciiInAZRange(unsigned char c) { ...@@ -175,7 +190,45 @@ constexpr bool AsciiInAZRange(unsigned char c) {
} }
template <bool ToUpper> template <bool ToUpper>
constexpr void AsciiStrCaseFold(absl::Nonnull<char*> p, static constexpr char* PartialAsciiStrCaseFold(absl::Nonnull<char*> p,
absl::Nonnull<char*> end) {
using vec_t = size_t;
const size_t n = static_cast<size_t>(end - p);
// SWAR algorithm: http://0x80.pl/notesen/2016-01-06-swar-swap-case.html
constexpr char ch_a = ToUpper ? 'a' : 'A', ch_z = ToUpper ? 'z' : 'Z';
char* const swar_end = p + (n / sizeof(vec_t)) * sizeof(vec_t);
while (p < swar_end) {
vec_t v = vec_t();
// memcpy the vector, but constexpr
for (size_t i = 0; i < sizeof(vec_t); ++i) {
v |= static_cast<vec_t>(static_cast<unsigned char>(p[i]))
<< (i * CHAR_BIT);
}
constexpr unsigned int msb = 1u << (CHAR_BIT - 1);
const vec_t v_msb = v & BroadcastByte<vec_t>(msb);
const vec_t v_nonascii_mask = (v_msb << 1) - (v_msb >> (CHAR_BIT - 1));
const vec_t v_nonascii = v & v_nonascii_mask;
const vec_t v_ascii = v & ~v_nonascii_mask;
const vec_t a = v_ascii + BroadcastByte<vec_t>(msb - ch_a - 0),
z = v_ascii + BroadcastByte<vec_t>(msb - ch_z - 1);
v = v_nonascii | (v_ascii ^ ((a ^ z) & BroadcastByte<vec_t>(msb)) >> 2);
// memcpy the vector, but constexpr
for (size_t i = 0; i < sizeof(vec_t); ++i) {
p[i] = static_cast<char>(v >> (i * CHAR_BIT));
}
p += sizeof(v);
}
return p;
}
template <bool ToUpper>
static constexpr void AsciiStrCaseFold(absl::Nonnull<char*> p,
absl::Nonnull<char*> end) { absl::Nonnull<char*> end) {
// The upper- and lowercase versions of ASCII characters differ by only 1 bit. // The upper- and lowercase versions of ASCII characters differ by only 1 bit.
// When we need to flip the case, we can xor with this bit to achieve the // When we need to flip the case, we can xor with this bit to achieve the
...@@ -184,10 +237,17 @@ constexpr void AsciiStrCaseFold(absl::Nonnull<char*> p, ...@@ -184,10 +237,17 @@ constexpr void AsciiStrCaseFold(absl::Nonnull<char*> p,
// have the same single bit difference. // have the same single bit difference.
constexpr unsigned char kAsciiCaseBitFlip = 'a' ^ 'A'; constexpr unsigned char kAsciiCaseBitFlip = 'a' ^ 'A';
for (; p < end; ++p) { using vec_t = size_t;
// TODO(b/316380338): When FDO becomes able to vectorize these,
// revert this manual optimization and just leave the naive loop.
if (static_cast<size_t>(end - p) >= sizeof(vec_t)) {
p = ascii_internal::PartialAsciiStrCaseFold<ToUpper>(p, end);
}
while (p < end) {
unsigned char v = static_cast<unsigned char>(*p); unsigned char v = static_cast<unsigned char>(*p);
v ^= AsciiInAZRange<ToUpper>(v) ? kAsciiCaseBitFlip : 0; v ^= AsciiInAZRange<ToUpper>(v) ? kAsciiCaseBitFlip : 0;
*p = static_cast<char>(v); *p = static_cast<char>(v);
++p;
} }
} }
......
...@@ -113,7 +113,7 @@ static void BM_StrToLower(benchmark::State& state) { ...@@ -113,7 +113,7 @@ static void BM_StrToLower(benchmark::State& state) {
BENCHMARK(BM_StrToLower) BENCHMARK(BM_StrToLower)
->DenseRange(0, 32) ->DenseRange(0, 32)
->RangeMultiplier(2) ->RangeMultiplier(2)
->Range(64, 1 << 20); ->Range(64, 1 << 26);
static void BM_StrToUpper(benchmark::State& state) { static void BM_StrToUpper(benchmark::State& state) {
const int size = state.range(0); const int size = state.range(0);
...@@ -127,6 +127,6 @@ static void BM_StrToUpper(benchmark::State& state) { ...@@ -127,6 +127,6 @@ static void BM_StrToUpper(benchmark::State& state) {
BENCHMARK(BM_StrToUpper) BENCHMARK(BM_StrToUpper)
->DenseRange(0, 32) ->DenseRange(0, 32)
->RangeMultiplier(2) ->RangeMultiplier(2)
->Range(64, 1 << 20); ->Range(64, 1 << 26);
} // namespace } // namespace
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment