Commit ac8afe6c by Abseil Team Committed by Copybara-Service

Performance improvement for absl::AsciiStrToUpper() and absl::AsciiStrToLower()

PiperOrigin-RevId: 516275043
Change-Id: I906ef0d96dddf12e3738490bd26cb05753ec008c
parent 34e29aae
......@@ -14,6 +14,10 @@
#include "absl/strings/ascii.h"
#include <climits>
#include <cstring>
#include <string>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace ascii_internal {
......@@ -153,18 +157,62 @@ ABSL_DLL const char kToUpper[256] = {
};
// clang-format on
template <bool ToUpper>
constexpr void AsciiStrCaseFold(char* p, char* end) {
// The upper- and lowercase versions of ASCII characters differ by only 1 bit.
// When we need to flip the case, we can xor with this bit to achieve the
// desired result. Note that the choice of 'a' and 'A' here is arbitrary. We
// could have chosen 'z' and 'Z', or any other pair of characters as they all
// have the same single bit difference.
constexpr unsigned char kAsciiCaseBitFlip = 'a' ^ 'A';
constexpr char ch_a = ToUpper ? 'a' : 'A';
constexpr char ch_z = ToUpper ? 'z' : 'Z';
for (; p < end; ++p) {
unsigned char v = static_cast<unsigned char>(*p);
// We use & instead of && to ensure this always stays branchless
// We use static_cast<int> to suppress -Wbitwise-instead-of-logical
bool is_in_range = static_cast<bool>(static_cast<int>(ch_a <= v) &
static_cast<int>(v <= ch_z));
v ^= is_in_range ? kAsciiCaseBitFlip : 0;
*p = static_cast<char>(v);
}
}
static constexpr size_t ValidateAsciiCasefold() {
constexpr size_t num_chars = 1 + CHAR_MAX - CHAR_MIN;
size_t incorrect_index = 0;
char lowered[num_chars] = {};
char uppered[num_chars] = {};
for (unsigned int i = 0; i < num_chars; ++i) {
uppered[i] = lowered[i] = static_cast<char>(i);
}
AsciiStrCaseFold<false>(&lowered[0], &lowered[num_chars]);
AsciiStrCaseFold<true>(&uppered[0], &uppered[num_chars]);
for (size_t i = 0; i < num_chars; ++i) {
const char ch = static_cast<char>(i),
ch_upper = ('a' <= ch && ch <= 'z' ? 'A' + (ch - 'a') : ch),
ch_lower = ('A' <= ch && ch <= 'Z' ? 'a' + (ch - 'A') : ch);
if (uppered[i] != ch_upper || lowered[i] != ch_lower) {
incorrect_index = i > 0 ? i : num_chars;
break;
}
}
return incorrect_index;
}
static_assert(ValidateAsciiCasefold() == 0, "error in case conversion");
} // namespace ascii_internal
void AsciiStrToLower(std::string* s) {
for (auto& ch : *s) {
ch = absl::ascii_tolower(static_cast<unsigned char>(ch));
}
char* p = &(*s)[0]; // Guaranteed to be valid for empty strings
return ascii_internal::AsciiStrCaseFold<false>(p, p + s->size());
}
void AsciiStrToUpper(std::string* s) {
for (auto& ch : *s) {
ch = absl::ascii_toupper(static_cast<unsigned char>(ch));
}
char* p = &(*s)[0]; // Guaranteed to be valid for empty strings
return ascii_internal::AsciiStrCaseFold<true>(p, p + s->size());
}
void RemoveExtraAsciiWhitespace(std::string* str) {
......
......@@ -14,6 +14,7 @@
#include "absl/strings/ascii.h"
#include <algorithm>
#include <cctype>
#include <clocale>
#include <cstring>
......@@ -189,14 +190,14 @@ TEST(AsciiStrTo, Lower) {
const std::string str("GHIJKL");
const std::string str2("MNOPQR");
const absl::string_view sp(str2);
std::string mutable_str("STUVWX");
std::string mutable_str("_`?@[{AMNOPQRSTUVWXYZ");
EXPECT_EQ("abcdef", absl::AsciiStrToLower(buf));
EXPECT_EQ("ghijkl", absl::AsciiStrToLower(str));
EXPECT_EQ("mnopqr", absl::AsciiStrToLower(sp));
absl::AsciiStrToLower(&mutable_str);
EXPECT_EQ("stuvwx", mutable_str);
EXPECT_EQ("_`?@[{amnopqrstuvwxyz", mutable_str);
char mutable_buf[] = "Mutable";
std::transform(mutable_buf, mutable_buf + strlen(mutable_buf),
......@@ -207,12 +208,12 @@ TEST(AsciiStrTo, Lower) {
TEST(AsciiStrTo, Upper) {
const char buf[] = "abcdef";
const std::string str("ghijkl");
const std::string str2("mnopqr");
const std::string str2("_`?@[{amnopqrstuvwxyz");
const absl::string_view sp(str2);
EXPECT_EQ("ABCDEF", absl::AsciiStrToUpper(buf));
EXPECT_EQ("GHIJKL", absl::AsciiStrToUpper(str));
EXPECT_EQ("MNOPQR", absl::AsciiStrToUpper(sp));
EXPECT_EQ("_`?@[{AMNOPQRSTUVWXYZ", absl::AsciiStrToUpper(sp));
char mutable_buf[] = "Mutable";
std::transform(mutable_buf, mutable_buf + strlen(mutable_buf),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment