Commit 77111e3d by Tsige Solomon Committed by Copybara-Service

Functions added: FindLongestCommonSuffix, FindLongestCommonPrefix.

PiperOrigin-RevId: 539784770
Change-Id: Ie224afa04af023bbddc89b967e8c8440f9e8a887
parent dfc7f46d
...@@ -13,8 +13,13 @@ ...@@ -13,8 +13,13 @@
// limitations under the License. // limitations under the License.
#include "absl/strings/match.h" #include "absl/strings/match.h"
#include "absl/strings/ascii.h"
#include <algorithm>
#include <cstdint>
#include "absl/base/internal/endian.h"
#include "absl/numeric/bits.h"
#include "absl/strings/ascii.h"
#include "absl/strings/internal/memutil.h" #include "absl/strings/internal/memutil.h"
namespace absl { namespace absl {
...@@ -61,5 +66,65 @@ bool EndsWithIgnoreCase(absl::string_view text, ...@@ -61,5 +66,65 @@ bool EndsWithIgnoreCase(absl::string_view text,
EqualsIgnoreCase(text.substr(text.size() - suffix.size()), suffix); EqualsIgnoreCase(text.substr(text.size() - suffix.size()), suffix);
} }
absl::string_view FindLongestCommonPrefix(absl::string_view a,
absl::string_view b) {
const absl::string_view::size_type limit = std::min(a.size(), b.size());
const char* const pa = a.data();
const char* const pb = b.data();
absl::string_view::size_type count = (unsigned) 0;
if (ABSL_PREDICT_FALSE(limit < 8)) {
while (ABSL_PREDICT_TRUE(count + 2 <= limit)) {
uint16_t xor_bytes = absl::little_endian::Load16(pa + count) ^
absl::little_endian::Load16(pb + count);
if (ABSL_PREDICT_FALSE(xor_bytes != 0)) {
if (ABSL_PREDICT_TRUE((xor_bytes & 0xff) == 0)) ++count;
return absl::string_view(pa, count);
}
count += 2;
}
if (ABSL_PREDICT_TRUE(count != limit)) {
if (ABSL_PREDICT_TRUE(pa[count] == pb[count])) ++count;
}
return absl::string_view(pa, count);
}
do {
uint64_t xor_bytes = absl::little_endian::Load64(pa + count) ^
absl::little_endian::Load64(pb + count);
if (ABSL_PREDICT_FALSE(xor_bytes != 0)) {
count += static_cast<uint64_t>(absl::countr_zero(xor_bytes) >> 3);
return absl::string_view(pa, count);
}
count += 8;
} while (ABSL_PREDICT_TRUE(count + 8 < limit));
count = limit - 8;
uint64_t xor_bytes = absl::little_endian::Load64(pa + count) ^
absl::little_endian::Load64(pb + count);
if (ABSL_PREDICT_TRUE(xor_bytes != 0)) {
count += static_cast<uint64_t>(absl::countr_zero(xor_bytes) >> 3);
return absl::string_view(pa, count);
}
return absl::string_view(pa, limit);
}
absl::string_view FindLongestCommonSuffix(absl::string_view a,
absl::string_view b) {
const absl::string_view::size_type limit = std::min(a.size(), b.size());
if (limit == 0) return absl::string_view();
const char* pa = a.data() + a.size() - 1;
const char* pb = b.data() + b.size() - 1;
absl::string_view::size_type count = (unsigned) 0;
while (count < limit && *pa == *pb) {
--pa;
--pb;
++count;
}
return absl::string_view(++pa, count);
}
ABSL_NAMESPACE_END ABSL_NAMESPACE_END
} // namespace absl } // namespace absl
...@@ -103,6 +103,16 @@ bool StartsWithIgnoreCase(absl::string_view text, ...@@ -103,6 +103,16 @@ bool StartsWithIgnoreCase(absl::string_view text,
bool EndsWithIgnoreCase(absl::string_view text, bool EndsWithIgnoreCase(absl::string_view text,
absl::string_view suffix) noexcept; absl::string_view suffix) noexcept;
// Yields the longest prefix in common between both input strings.
// Pointer-wise, the returned result is a subset of input "a".
absl::string_view FindLongestCommonPrefix(absl::string_view a,
absl::string_view b);
// Yields the longest suffix in common between both input strings.
// Pointer-wise, the returned result is a subset of input "a".
absl::string_view FindLongestCommonSuffix(absl::string_view a,
absl::string_view b);
ABSL_NAMESPACE_END ABSL_NAMESPACE_END
} // namespace absl } // namespace absl
......
...@@ -168,4 +168,121 @@ TEST(MatchTest, ContainsCharIgnoreCase) { ...@@ -168,4 +168,121 @@ TEST(MatchTest, ContainsCharIgnoreCase) {
EXPECT_FALSE(absl::StrContainsIgnoreCase("", '0')); EXPECT_FALSE(absl::StrContainsIgnoreCase("", '0'));
} }
TEST(MatchTest, FindLongestCommonPrefix) {
EXPECT_EQ(absl::FindLongestCommonPrefix("", ""), "");
EXPECT_EQ(absl::FindLongestCommonPrefix("", "abc"), "");
EXPECT_EQ(absl::FindLongestCommonPrefix("abc", ""), "");
EXPECT_EQ(absl::FindLongestCommonPrefix("ab", "abc"), "ab");
EXPECT_EQ(absl::FindLongestCommonPrefix("abc", "ab"), "ab");
EXPECT_EQ(absl::FindLongestCommonPrefix("abc", "abd"), "ab");
EXPECT_EQ(absl::FindLongestCommonPrefix("abc", "abcd"), "abc");
EXPECT_EQ(absl::FindLongestCommonPrefix("abcd", "abcd"), "abcd");
EXPECT_EQ(absl::FindLongestCommonPrefix("abcd", "efgh"), "");
// "abcde" v. "abc" but in the middle of other data
EXPECT_EQ(absl::FindLongestCommonPrefix(
absl::string_view("1234 abcdef").substr(5, 5),
absl::string_view("5678 abcdef").substr(5, 3)),
"abc");
}
// Since the little-endian implementation involves a bit of if-else and various
// return paths, the following tests aims to provide full test coverage of the
// implementation.
TEST(MatchTest, FindLongestCommonPrefixLoad16Mismatch) {
const std::string x1 = "abcdefgh";
const std::string x2 = "abcde_";
EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcde");
EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcde");
}
TEST(MatchTest, FindLongestCommonPrefixLoad16MatchesNoLast) {
const std::string x1 = "abcdef";
const std::string x2 = "abcdef";
EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcdef");
EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcdef");
}
TEST(MatchTest, FindLongestCommonPrefixLoad16MatchesLastCharMismatches) {
const std::string x1 = "abcdefg";
const std::string x2 = "abcdef_h";
EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcdef");
EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcdef");
}
TEST(MatchTest, FindLongestCommonPrefixLoad16MatchesLastMatches) {
const std::string x1 = "abcde";
const std::string x2 = "abcdefgh";
EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcde");
EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcde");
}
TEST(MatchTest, FindLongestCommonPrefixSize8Load64Mismatches) {
const std::string x1 = "abcdefghijk";
const std::string x2 = "abcde_g_";
EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcde");
EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcde");
}
TEST(MatchTest, FindLongestCommonPrefixSize8Load64Matches) {
const std::string x1 = "abcdefgh";
const std::string x2 = "abcdefgh";
EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcdefgh");
EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcdefgh");
}
TEST(MatchTest, FindLongestCommonPrefixSize15Load64Mismatches) {
const std::string x1 = "012345670123456";
const std::string x2 = "0123456701_34_6";
EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "0123456701");
EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "0123456701");
}
TEST(MatchTest, FindLongestCommonPrefixSize15Load64Matches) {
const std::string x1 = "012345670123456";
const std::string x2 = "0123456701234567";
EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "012345670123456");
EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "012345670123456");
}
TEST(MatchTest, FindLongestCommonPrefixSizeFirstByteOfLast8BytesMismatch) {
const std::string x1 = "012345670123456701234567";
const std::string x2 = "0123456701234567_1234567";
EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "0123456701234567");
EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "0123456701234567");
}
TEST(MatchTest, FindLongestCommonPrefixLargeLastCharMismatches) {
const std::string x1(300, 'x');
std::string x2 = x1;
x2.back() = '#';
EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), std::string(299, 'x'));
EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), std::string(299, 'x'));
}
TEST(MatchTest, FindLongestCommonPrefixLargeFullMatch) {
const std::string x1(300, 'x');
const std::string x2 = x1;
EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), std::string(300, 'x'));
EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), std::string(300, 'x'));
}
TEST(MatchTest, FindLongestCommonSuffix) {
EXPECT_EQ(absl::FindLongestCommonSuffix("", ""), "");
EXPECT_EQ(absl::FindLongestCommonSuffix("", "abc"), "");
EXPECT_EQ(absl::FindLongestCommonSuffix("abc", ""), "");
EXPECT_EQ(absl::FindLongestCommonSuffix("bc", "abc"), "bc");
EXPECT_EQ(absl::FindLongestCommonSuffix("abc", "bc"), "bc");
EXPECT_EQ(absl::FindLongestCommonSuffix("abc", "dbc"), "bc");
EXPECT_EQ(absl::FindLongestCommonSuffix("bcd", "abcd"), "bcd");
EXPECT_EQ(absl::FindLongestCommonSuffix("abcd", "abcd"), "abcd");
EXPECT_EQ(absl::FindLongestCommonSuffix("abcd", "efgh"), "");
// "abcde" v. "cde" but in the middle of other data
EXPECT_EQ(absl::FindLongestCommonSuffix(
absl::string_view("1234 abcdef").substr(5, 5),
absl::string_view("5678 abcdef").substr(7, 3)),
"cde");
}
} // namespace } // namespace
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment