Commit 91b861c5 by Abseil Team Committed by Copybara-Service

Add absl::CharSet.

PiperOrigin-RevId: 559415517
Change-Id: I5bbc744bf00be2fd15ec7544b725d699e0d982fb
parent 7aef7808
......@@ -263,6 +263,7 @@ set(ABSL_INTERNAL_DLL_FILES
"strings/ascii.h"
"strings/charconv.cc"
"strings/charconv.h"
"strings/charset.h"
"strings/cord.cc"
"strings/cord.h"
"strings/cord_analysis.cc"
......@@ -327,7 +328,6 @@ set(ABSL_INTERNAL_DLL_FILES
"strings/strip.h"
"strings/substitute.cc"
"strings/substitute.h"
"strings/internal/char_map.h"
"strings/internal/escaping.h"
"strings/internal/escaping.cc"
"strings/internal/memutil.cc"
......
......@@ -92,6 +92,7 @@ cc_library(
"string_view.h",
],
deps = [
":charset",
":internal",
":string_view",
"//absl/base",
......@@ -115,7 +116,6 @@ cc_library(
"internal/utf8.cc",
],
hdrs = [
"internal/char_map.h",
"internal/escaping.h",
"internal/ostringstream.h",
"internal/resize_uninitialized.h",
......@@ -307,6 +307,50 @@ cc_test(
],
)
cc_test(
name = "charset_benchmark",
size = "small",
srcs = [
"charset_benchmark.cc",
],
copts = ABSL_TEST_COPTS,
tags = [
"benchmark",
],
visibility = ["//visibility:private"],
deps = [
":charset",
"//absl/log:check",
"@com_github_google_benchmark//:benchmark_main",
],
)
cc_library(
name = "charset",
hdrs = [
"charset.h",
],
copts = ABSL_DEFAULT_COPTS,
linkopts = ABSL_DEFAULT_LINKOPTS,
deps = [
":string_view",
"//absl/base:core_headers",
],
)
cc_test(
name = "charset_test",
size = "small",
srcs = ["charset_test.cc"],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":charset",
":strings",
"@com_google_googletest//:gtest_main",
],
)
cc_library(
name = "cord_internal",
srcs = [
......@@ -1087,27 +1131,6 @@ cc_test(
)
cc_test(
name = "char_map_test",
srcs = ["internal/char_map_test.cc"],
copts = ABSL_TEST_COPTS,
deps = [
":internal",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "char_map_benchmark",
srcs = ["internal/char_map_benchmark.cc"],
copts = ABSL_TEST_COPTS,
tags = ["benchmark"],
deps = [
":internal",
"@com_github_google_benchmark//:benchmark_main",
],
)
cc_test(
name = "charconv_test",
srcs = ["charconv_test.cc"],
copts = ABSL_TEST_COPTS,
......
......@@ -78,6 +78,7 @@ absl_cc_library(
absl::strings_internal
absl::base
absl::bits
absl::charset
absl::config
absl::core_headers
absl::endian
......@@ -89,12 +90,24 @@ absl_cc_library(
PUBLIC
)
absl_cc_library(
NAME
charset
HDRS
charset.h
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::core_headers
absl::string_view
PUBLIC
)
# Internal-only target, do not depend on directly.
absl_cc_library(
NAME
strings_internal
HDRS
"internal/char_map.h"
"internal/escaping.cc"
"internal/escaping.h"
"internal/ostringstream.h"
......@@ -357,13 +370,13 @@ absl_cc_test(
absl_cc_test(
NAME
char_map_test
charset_test
SRCS
"internal/char_map_test.cc"
"charset_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::strings_internal
absl::strings
GTest::gmock_main
)
......
// Copyright 2022 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: charset.h
// -----------------------------------------------------------------------------
//
// This file contains absl::CharSet, a fast, bit-vector set of 8-bit unsigned
// characters.
//
// Instances can be initialized as constexpr constants. For example:
//
// constexpr absl::CharSet kJustX = absl::CharSet::Char('x');
// constexpr absl::CharSet kMySymbols = absl::CharSet("$@!");
// constexpr absl::CharSet kLetters = absl::CharSet::Range('a', 'z');
//
// Multiple instances can be combined that still forms a constexpr expression.
// For example:
//
// constexpr absl::CharSet kLettersAndNumbers =
// absl::CharSet::Range('a', 'z') | absl::CharSet::Range('0', '9');
//
// Several pre-defined character classes are available that mirror the methods
// from <cctype>. For example:
//
// constexpr absl::CharSet kLettersAndWhitespace =
// absl::CharSet::AsciiAlphabet() | absl::CharSet::AsciiWhitespace();
//
// To check membership, use the .contains method, e.g.
//
// absl::CharSet hex_letters("abcdef");
// hex_letters.contains('a'); // true
// hex_letters.contains('g'); // false
#ifndef ABSL_STRINGS_CHARSET_H_
#define ABSL_STRINGS_CHARSET_H_
#include <cstddef>
#include <cstdint>
#include <cstring>
#include "absl/base/macros.h"
#include "absl/base/port.h"
#include "absl/strings/string_view.h"
namespace absl {
class CharSet {
public:
constexpr CharSet() : m_() {}
// Initializes with a given string_view.
constexpr explicit CharSet(absl::string_view str) : m_() {
for (char c : str) {
SetChar(static_cast<unsigned char>(c));
}
}
constexpr bool contains(char c) const {
return ((m_[static_cast<unsigned char>(c) / 64] >>
(static_cast<unsigned char>(c) % 64)) &
0x1) == 0x1;
}
constexpr bool empty() const {
for (uint64_t c : m_) {
if (c != 0) return false;
}
return true;
}
// Containing only a single specified char.
static constexpr CharSet Char(char x) {
return CharSet(CharMaskForWord(x, 0), CharMaskForWord(x, 1),
CharMaskForWord(x, 2), CharMaskForWord(x, 3));
}
// Containing all the chars in the closed interval [lo,hi].
static constexpr CharSet Range(char lo, char hi) {
return CharSet(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1),
RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3));
}
friend constexpr CharSet operator&(const CharSet& a, const CharSet& b) {
return CharSet(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2],
a.m_[3] & b.m_[3]);
}
friend constexpr CharSet operator|(const CharSet& a, const CharSet& b) {
return CharSet(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2],
a.m_[3] | b.m_[3]);
}
friend constexpr CharSet operator~(const CharSet& a) {
return CharSet(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]);
}
// Mirrors the char-classifying predicates in <cctype>.
static constexpr CharSet AsciiUppercase() { return CharSet::Range('A', 'Z'); }
static constexpr CharSet AsciiLowercase() { return CharSet::Range('a', 'z'); }
static constexpr CharSet AsciiDigits() { return CharSet::Range('0', '9'); }
static constexpr CharSet AsciiAlphabet() {
return AsciiLowercase() | AsciiUppercase();
}
static constexpr CharSet AsciiAlphanumerics() {
return AsciiDigits() | AsciiAlphabet();
}
static constexpr CharSet AsciiHexDigits() {
return AsciiDigits() | CharSet::Range('A', 'F') | CharSet::Range('a', 'f');
}
static constexpr CharSet AsciiPrintable() {
return CharSet::Range(0x20, 0x7e);
}
static constexpr CharSet AsciiWhitespace() { return CharSet("\t\n\v\f\r "); }
static constexpr CharSet AsciiPunctuation() {
return AsciiPrintable() & ~AsciiWhitespace() & ~AsciiAlphanumerics();
}
private:
constexpr CharSet(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3)
: m_{b0, b1, b2, b3} {}
static constexpr uint64_t RangeForWord(char lo, char hi, uint64_t word) {
return OpenRangeFromZeroForWord(static_cast<unsigned char>(hi) + 1, word) &
~OpenRangeFromZeroForWord(static_cast<unsigned char>(lo), word);
}
// All the chars in the specified word of the range [0, upper).
static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper,
uint64_t word) {
return (upper <= 64 * word) ? 0
: (upper >= 64 * (word + 1))
? ~static_cast<uint64_t>(0)
: (~static_cast<uint64_t>(0) >> (64 - upper % 64));
}
static constexpr uint64_t CharMaskForWord(char x, uint64_t word) {
return (static_cast<unsigned char>(x) / 64 == word)
? (static_cast<uint64_t>(1)
<< (static_cast<unsigned char>(x) % 64))
: 0;
}
constexpr void SetChar(unsigned char c) {
m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64);
}
uint64_t m_[4];
};
} // namespace absl
#endif // ABSL_STRINGS_CHARSET_H_
// Copyright 2017 The Abseil Authors.
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,30 +12,30 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/char_map.h"
#include <cstdint>
#include "benchmark/benchmark.h"
#include "absl/log/check.h"
#include "absl/strings/charset.h"
namespace {
absl::strings_internal::Charmap MakeBenchmarkMap() {
absl::strings_internal::Charmap m;
absl::CharSet MakeBenchmarkMap() {
absl::CharSet m;
uint32_t x[] = {0x0, 0x1, 0x2, 0x3, 0xf, 0xe, 0xd, 0xc};
for (uint32_t& t : x) t *= static_cast<uint32_t>(0x11111111UL);
for (uint32_t i = 0; i < 256; ++i) {
if ((x[i / 32] >> (i % 32)) & 1)
m = m | absl::strings_internal::Charmap::Char(i);
if ((x[i / 32] >> (i % 32)) & 1) m = m | absl::CharSet::Char(i);
}
return m;
}
// Micro-benchmark for Charmap::contains.
void BM_Contains(benchmark::State& state) {
static void BM_Contains(benchmark::State& state) {
// Loop-body replicated 10 times to increase time per iteration.
// Argument continuously changed to avoid generating common subexpressions.
const absl::strings_internal::Charmap benchmark_map = MakeBenchmarkMap();
// Final CHECK used to discourage unwanted optimization.
const absl::CharSet benchmark_map = MakeBenchmarkMap();
unsigned char c = 0;
int ops = 0;
for (auto _ : state) {
......@@ -50,12 +50,8 @@ void BM_Contains(benchmark::State& state) {
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
}
benchmark::DoNotOptimize(ops);
CHECK_NE(ops, -1);
}
BENCHMARK(BM_Contains);
// We don't bother benchmarking Charmap::IsZero or Charmap::IntersectsWith;
// their running time is data-dependent and it is not worth characterizing
// "typical" data.
} // namespace
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/charset.h"
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <vector>
#include "gtest/gtest.h"
#include "absl/strings/ascii.h"
#include "absl/strings/string_view.h"
namespace {
constexpr absl::CharSet everything_map = ~absl::CharSet();
constexpr absl::CharSet nothing_map = absl::CharSet();
TEST(Charmap, AllTests) {
const absl::CharSet also_nothing_map("");
EXPECT_TRUE(everything_map.contains('\0'));
EXPECT_FALSE(nothing_map.contains('\0'));
EXPECT_FALSE(also_nothing_map.contains('\0'));
for (unsigned char ch = 1; ch != 0; ++ch) {
SCOPED_TRACE(ch);
EXPECT_TRUE(everything_map.contains(ch));
EXPECT_FALSE(nothing_map.contains(ch));
EXPECT_FALSE(also_nothing_map.contains(ch));
}
const absl::CharSet symbols(absl::string_view("&@#@^!@?", 5));
EXPECT_TRUE(symbols.contains('&'));
EXPECT_TRUE(symbols.contains('@'));
EXPECT_TRUE(symbols.contains('#'));
EXPECT_TRUE(symbols.contains('^'));
EXPECT_FALSE(symbols.contains('!'));
EXPECT_FALSE(symbols.contains('?'));
int cnt = 0;
for (unsigned char ch = 1; ch != 0; ++ch) cnt += symbols.contains(ch);
EXPECT_EQ(cnt, 4);
const absl::CharSet lets(absl::string_view("^abcde", 3));
const absl::CharSet lets2(absl::string_view("fghij\0klmnop", 10));
const absl::CharSet lets3("fghij\0klmnop");
EXPECT_TRUE(lets2.contains('k'));
EXPECT_FALSE(lets3.contains('k'));
EXPECT_FALSE((symbols & lets).empty());
EXPECT_TRUE((lets2 & lets).empty());
EXPECT_FALSE((lets & symbols).empty());
EXPECT_TRUE((lets & lets2).empty());
EXPECT_TRUE(nothing_map.empty());
EXPECT_FALSE(lets.empty());
}
std::string Members(const absl::CharSet& m) {
std::string r;
for (size_t i = 0; i < 256; ++i)
if (m.contains(i)) r.push_back(i);
return r;
}
std::string ClosedRangeString(unsigned char lo, unsigned char hi) {
// Don't depend on lo<hi. Just increment until lo==hi.
std::string s;
while (true) {
s.push_back(lo);
if (lo == hi) break;
++lo;
}
return s;
}
TEST(Charmap, Constexpr) {
constexpr absl::CharSet kEmpty = absl::CharSet();
EXPECT_EQ(Members(kEmpty), "");
constexpr absl::CharSet kA = absl::CharSet::Char('A');
EXPECT_EQ(Members(kA), "A");
constexpr absl::CharSet kAZ = absl::CharSet::Range('A', 'Z');
EXPECT_EQ(Members(kAZ), "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
constexpr absl::CharSet kIdentifier =
absl::CharSet::Range('0', '9') | absl::CharSet::Range('A', 'Z') |
absl::CharSet::Range('a', 'z') | absl::CharSet::Char('_');
EXPECT_EQ(Members(kIdentifier),
"0123456789"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"_"
"abcdefghijklmnopqrstuvwxyz");
constexpr absl::CharSet kAll = ~absl::CharSet();
for (size_t i = 0; i < 256; ++i) {
SCOPED_TRACE(i);
EXPECT_TRUE(kAll.contains(i));
}
constexpr absl::CharSet kHello = absl::CharSet("Hello, world!");
EXPECT_EQ(Members(kHello), " !,Hdelorw");
// test negation and intersection
constexpr absl::CharSet kABC =
absl::CharSet::Range('A', 'Z') & ~absl::CharSet::Range('D', 'Z');
EXPECT_EQ(Members(kABC), "ABC");
// contains
constexpr bool kContainsA = absl::CharSet("abc").contains('a');
EXPECT_TRUE(kContainsA);
constexpr bool kContainsD = absl::CharSet("abc").contains('d');
EXPECT_FALSE(kContainsD);
// empty
constexpr bool kEmptyIsEmpty = absl::CharSet().empty();
EXPECT_TRUE(kEmptyIsEmpty);
constexpr bool kNotEmptyIsEmpty = absl::CharSet("abc").empty();
EXPECT_FALSE(kNotEmptyIsEmpty);
}
TEST(Charmap, Range) {
// Exhaustive testing takes too long, so test some of the boundaries that
// are perhaps going to cause trouble.
std::vector<size_t> poi = {0, 1, 2, 3, 4, 7, 8, 9, 15,
16, 17, 30, 31, 32, 33, 63, 64, 65,
127, 128, 129, 223, 224, 225, 254, 255};
for (auto lo = poi.begin(); lo != poi.end(); ++lo) {
SCOPED_TRACE(*lo);
for (auto hi = lo; hi != poi.end(); ++hi) {
SCOPED_TRACE(*hi);
EXPECT_EQ(Members(absl::CharSet::Range(*lo, *hi)),
ClosedRangeString(*lo, *hi));
}
}
}
TEST(Charmap, NullByteWithStringView) {
char characters[5] = {'a', 'b', '\0', 'd', 'x'};
absl::string_view view(characters, 5);
absl::CharSet tester(view);
EXPECT_TRUE(tester.contains('a'));
EXPECT_TRUE(tester.contains('b'));
EXPECT_TRUE(tester.contains('\0'));
EXPECT_TRUE(tester.contains('d'));
EXPECT_TRUE(tester.contains('x'));
EXPECT_FALSE(tester.contains('c'));
}
TEST(CharmapCtype, Match) {
for (int c = 0; c < 256; ++c) {
SCOPED_TRACE(c);
SCOPED_TRACE(static_cast<char>(c));
EXPECT_EQ(absl::ascii_isupper(c),
absl::CharSet::AsciiUppercase().contains(c));
EXPECT_EQ(absl::ascii_islower(c),
absl::CharSet::AsciiLowercase().contains(c));
EXPECT_EQ(absl::ascii_isdigit(c), absl::CharSet::AsciiDigits().contains(c));
EXPECT_EQ(absl::ascii_isalpha(c),
absl::CharSet::AsciiAlphabet().contains(c));
EXPECT_EQ(absl::ascii_isalnum(c),
absl::CharSet::AsciiAlphanumerics().contains(c));
EXPECT_EQ(absl::ascii_isxdigit(c),
absl::CharSet::AsciiHexDigits().contains(c));
EXPECT_EQ(absl::ascii_isprint(c),
absl::CharSet::AsciiPrintable().contains(c));
EXPECT_EQ(absl::ascii_isspace(c),
absl::CharSet::AsciiWhitespace().contains(c));
EXPECT_EQ(absl::ascii_ispunct(c),
absl::CharSet::AsciiPunctuation().contains(c));
}
}
} // namespace
......@@ -26,7 +26,7 @@
#include "absl/base/internal/raw_logging.h"
#include "absl/base/internal/unaligned_access.h"
#include "absl/strings/ascii.h"
#include "absl/strings/internal/char_map.h"
#include "absl/strings/charset.h"
#include "absl/strings/internal/escaping.h"
#include "absl/strings/internal/resize_uninitialized.h"
#include "absl/strings/internal/utf8.h"
......
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Character Map Class
//
// A fast, bit-vector map for 8-bit unsigned characters.
// This class is useful for non-character purposes as well.
#ifndef ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
#define ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
#include <cstddef>
#include <cstdint>
#include <cstring>
#include "absl/base/macros.h"
#include "absl/base/port.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
class Charmap {
public:
constexpr Charmap() : m_() {}
// Initializes with a given char*. Note that NUL is not treated as
// a terminator, but rather a char to be flicked.
Charmap(const char* str, int len) : m_() {
while (len--) SetChar(*str++);
}
// Initializes with a given char*. NUL is treated as a terminator
// and will not be in the charmap.
explicit Charmap(const char* str) : m_() {
while (*str) SetChar(*str++);
}
constexpr bool contains(unsigned char c) const {
return (m_[c / 64] >> (c % 64)) & 0x1;
}
// Returns true if and only if a character exists in both maps.
bool IntersectsWith(const Charmap& c) const {
for (size_t i = 0; i < ABSL_ARRAYSIZE(m_); ++i) {
if ((m_[i] & c.m_[i]) != 0) return true;
}
return false;
}
bool IsZero() const {
for (uint64_t c : m_) {
if (c != 0) return false;
}
return true;
}
// Containing only a single specified char.
static constexpr Charmap Char(char x) {
return Charmap(CharMaskForWord(x, 0), CharMaskForWord(x, 1),
CharMaskForWord(x, 2), CharMaskForWord(x, 3));
}
// Containing all the chars in the C-string 's'.
static constexpr Charmap FromString(const char* s) {
Charmap ret;
while (*s) ret = ret | Char(*s++);
return ret;
}
// Containing all the chars in the closed interval [lo,hi].
static constexpr Charmap Range(char lo, char hi) {
return Charmap(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1),
RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3));
}
friend constexpr Charmap operator&(const Charmap& a, const Charmap& b) {
return Charmap(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2],
a.m_[3] & b.m_[3]);
}
friend constexpr Charmap operator|(const Charmap& a, const Charmap& b) {
return Charmap(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2],
a.m_[3] | b.m_[3]);
}
friend constexpr Charmap operator~(const Charmap& a) {
return Charmap(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]);
}
private:
constexpr Charmap(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3)
: m_{b0, b1, b2, b3} {}
static constexpr uint64_t RangeForWord(char lo, char hi, uint64_t word) {
return OpenRangeFromZeroForWord(static_cast<unsigned char>(hi) + 1, word) &
~OpenRangeFromZeroForWord(static_cast<unsigned char>(lo), word);
}
// All the chars in the specified word of the range [0, upper).
static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper,
uint64_t word) {
return (upper <= 64 * word)
? 0
: (upper >= 64 * (word + 1))
? ~static_cast<uint64_t>(0)
: (~static_cast<uint64_t>(0) >> (64 - upper % 64));
}
static constexpr uint64_t CharMaskForWord(char x, uint64_t word) {
const auto unsigned_x = static_cast<unsigned char>(x);
return (unsigned_x / 64 == word)
? (static_cast<uint64_t>(1) << (unsigned_x % 64))
: 0;
}
void SetChar(char c) {
const auto unsigned_c = static_cast<unsigned char>(c);
m_[unsigned_c / 64] |= static_cast<uint64_t>(1) << (unsigned_c % 64);
}
uint64_t m_[4];
};
// Mirror the char-classifying predicates in <cctype>
constexpr Charmap UpperCharmap() { return Charmap::Range('A', 'Z'); }
constexpr Charmap LowerCharmap() { return Charmap::Range('a', 'z'); }
constexpr Charmap DigitCharmap() { return Charmap::Range('0', '9'); }
constexpr Charmap AlphaCharmap() { return LowerCharmap() | UpperCharmap(); }
constexpr Charmap AlnumCharmap() { return DigitCharmap() | AlphaCharmap(); }
constexpr Charmap XDigitCharmap() {
return DigitCharmap() | Charmap::Range('A', 'F') | Charmap::Range('a', 'f');
}
constexpr Charmap PrintCharmap() { return Charmap::Range(0x20, 0x7e); }
constexpr Charmap SpaceCharmap() { return Charmap::FromString("\t\n\v\f\r "); }
constexpr Charmap CntrlCharmap() {
return Charmap::Range(0, 0x7f) & ~PrintCharmap();
}
constexpr Charmap BlankCharmap() { return Charmap::FromString("\t "); }
constexpr Charmap GraphCharmap() { return PrintCharmap() & ~SpaceCharmap(); }
constexpr Charmap PunctCharmap() { return GraphCharmap() & ~AlnumCharmap(); }
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/char_map.h"
#include <cctype>
#include <string>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace {
constexpr absl::strings_internal::Charmap everything_map =
~absl::strings_internal::Charmap();
constexpr absl::strings_internal::Charmap nothing_map{};
TEST(Charmap, AllTests) {
const absl::strings_internal::Charmap also_nothing_map("", 0);
ASSERT_TRUE(everything_map.contains('\0'));
ASSERT_TRUE(!nothing_map.contains('\0'));
ASSERT_TRUE(!also_nothing_map.contains('\0'));
for (unsigned char ch = 1; ch != 0; ++ch) {
ASSERT_TRUE(everything_map.contains(ch));
ASSERT_TRUE(!nothing_map.contains(ch));
ASSERT_TRUE(!also_nothing_map.contains(ch));
}
const absl::strings_internal::Charmap symbols("&@#@^!@?", 5);
ASSERT_TRUE(symbols.contains('&'));
ASSERT_TRUE(symbols.contains('@'));
ASSERT_TRUE(symbols.contains('#'));
ASSERT_TRUE(symbols.contains('^'));
ASSERT_TRUE(!symbols.contains('!'));
ASSERT_TRUE(!symbols.contains('?'));
int cnt = 0;
for (unsigned char ch = 1; ch != 0; ++ch)
cnt += symbols.contains(ch);
ASSERT_EQ(cnt, 4);
const absl::strings_internal::Charmap lets("^abcde", 3);
const absl::strings_internal::Charmap lets2("fghij\0klmnop", 10);
const absl::strings_internal::Charmap lets3("fghij\0klmnop");
ASSERT_TRUE(lets2.contains('k'));
ASSERT_TRUE(!lets3.contains('k'));
ASSERT_TRUE(symbols.IntersectsWith(lets));
ASSERT_TRUE(!lets2.IntersectsWith(lets));
ASSERT_TRUE(lets.IntersectsWith(symbols));
ASSERT_TRUE(!lets.IntersectsWith(lets2));
ASSERT_TRUE(nothing_map.IsZero());
ASSERT_TRUE(!lets.IsZero());
}
namespace {
std::string Members(const absl::strings_internal::Charmap& m) {
std::string r;
for (size_t i = 0; i < 256; ++i)
if (m.contains(i)) r.push_back(i);
return r;
}
std::string ClosedRangeString(unsigned char lo, unsigned char hi) {
// Don't depend on lo<hi. Just increment until lo==hi.
std::string s;
while (true) {
s.push_back(lo);
if (lo == hi) break;
++lo;
}
return s;
}
} // namespace
TEST(Charmap, Constexpr) {
constexpr absl::strings_internal::Charmap kEmpty = nothing_map;
EXPECT_THAT(Members(kEmpty), "");
constexpr absl::strings_internal::Charmap kA =
absl::strings_internal::Charmap::Char('A');
EXPECT_THAT(Members(kA), "A");
constexpr absl::strings_internal::Charmap kAZ =
absl::strings_internal::Charmap::Range('A', 'Z');
EXPECT_THAT(Members(kAZ), "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
constexpr absl::strings_internal::Charmap kIdentifier =
absl::strings_internal::Charmap::Range('0', '9') |
absl::strings_internal::Charmap::Range('A', 'Z') |
absl::strings_internal::Charmap::Range('a', 'z') |
absl::strings_internal::Charmap::Char('_');
EXPECT_THAT(Members(kIdentifier),
"0123456789"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"_"
"abcdefghijklmnopqrstuvwxyz");
constexpr absl::strings_internal::Charmap kAll = everything_map;
for (size_t i = 0; i < 256; ++i) {
EXPECT_TRUE(kAll.contains(i)) << i;
}
constexpr absl::strings_internal::Charmap kHello =
absl::strings_internal::Charmap::FromString("Hello, world!");
EXPECT_THAT(Members(kHello), " !,Hdelorw");
// test negation and intersection
constexpr absl::strings_internal::Charmap kABC =
absl::strings_internal::Charmap::Range('A', 'Z') &
~absl::strings_internal::Charmap::Range('D', 'Z');
EXPECT_THAT(Members(kABC), "ABC");
}
TEST(Charmap, Range) {
// Exhaustive testing takes too long, so test some of the boundaries that
// are perhaps going to cause trouble.
std::vector<size_t> poi = {0, 1, 2, 3, 4, 7, 8, 9, 15,
16, 17, 30, 31, 32, 33, 63, 64, 65,
127, 128, 129, 223, 224, 225, 254, 255};
for (auto lo = poi.begin(); lo != poi.end(); ++lo) {
SCOPED_TRACE(*lo);
for (auto hi = lo; hi != poi.end(); ++hi) {
SCOPED_TRACE(*hi);
EXPECT_THAT(Members(absl::strings_internal::Charmap::Range(*lo, *hi)),
ClosedRangeString(*lo, *hi));
}
}
}
bool AsBool(int x) { return static_cast<bool>(x); }
TEST(CharmapCtype, Match) {
for (int c = 0; c < 256; ++c) {
SCOPED_TRACE(c);
SCOPED_TRACE(static_cast<char>(c));
EXPECT_EQ(AsBool(std::isupper(c)),
absl::strings_internal::UpperCharmap().contains(c));
EXPECT_EQ(AsBool(std::islower(c)),
absl::strings_internal::LowerCharmap().contains(c));
EXPECT_EQ(AsBool(std::isdigit(c)),
absl::strings_internal::DigitCharmap().contains(c));
EXPECT_EQ(AsBool(std::isalpha(c)),
absl::strings_internal::AlphaCharmap().contains(c));
EXPECT_EQ(AsBool(std::isalnum(c)),
absl::strings_internal::AlnumCharmap().contains(c));
EXPECT_EQ(AsBool(std::isxdigit(c)),
absl::strings_internal::XDigitCharmap().contains(c));
EXPECT_EQ(AsBool(std::isprint(c)),
absl::strings_internal::PrintCharmap().contains(c));
EXPECT_EQ(AsBool(std::isspace(c)),
absl::strings_internal::SpaceCharmap().contains(c));
EXPECT_EQ(AsBool(std::iscntrl(c)),
absl::strings_internal::CntrlCharmap().contains(c));
EXPECT_EQ(AsBool(std::isblank(c)),
absl::strings_internal::BlankCharmap().contains(c));
EXPECT_EQ(AsBool(std::isgraph(c)),
absl::strings_internal::GraphCharmap().contains(c));
EXPECT_EQ(AsBool(std::ispunct(c)),
absl::strings_internal::PunctCharmap().contains(c));
}
}
} // namespace
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment