Commit d61843e5 by Abseil Team Committed by vslashg

Export of internal Abseil changes

--
b20c539b8e21fee7d4d908a8a26a317a3de9d993 by Martijn Vels <mvels@google.com>:

Add CordRepBtree implementation

PiperOrigin-RevId: 385679196

--
96f7753b7af5fd964537d5794dd597bb6e698071 by Derek Mauro <dmauro@google.com>:

Update Abseil dependencies

PiperOrigin-RevId: 385643956

--
67bdae4c686f0df09cc7155633c03218bf23d177 by Abseil Team <absl-team@google.com>:

Fix up some small typos in error messages.

PiperOrigin-RevId: 385625107
GitOrigin-RevId: b20c539b8e21fee7d4d908a8a26a317a3de9d993
Change-Id: I8f602cfe9f7878b0558359ab15efb048caefb3a5
parent 33541e75
......@@ -205,6 +205,8 @@ set(ABSL_INTERNAL_DLL_FILES
"strings/internal/cord_internal.h"
"strings/internal/cord_rep_consume.h"
"strings/internal/cord_rep_consume.cc"
"strings/internal/cord_rep_btree.cc"
"strings/internal/cord_rep_btree.h"
"strings/internal/cord_rep_flat.h"
"strings/internal/cord_rep_ring.cc"
"strings/internal/cord_rep_ring.h"
......
......@@ -15,29 +15,30 @@
#
workspace(name = "com_google_absl")
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
# GoogleTest/GoogleMock framework. Used by most unit-tests.
http_archive(
name = "com_google_googletest",
name = "com_google_googletest", # 2021-07-09T13:28:13Z
sha256 = "12ef65654dc01ab40f6f33f9d02c04f2097d2cd9fbe48dc6001b29543583b0ad",
strip_prefix = "googletest-8d51ffdfab10b3fba636ae69bc03da4b54f8c235",
# Keep this URL in sync with ABSL_GOOGLETEST_COMMIT in ci/cmake_common.sh.
urls = ["https://github.com/google/googletest/archive/5bcd8e3bb929714e031a542d303f818e5a5af45d.zip"], # 2021-06-08T22:36:38Z
strip_prefix = "googletest-5bcd8e3bb929714e031a542d303f818e5a5af45d",
sha256 = "3adecb6686ac7367452561dca518fad5a990fb09c5a961bfa1836f15eb774348",
urls = ["https://github.com/google/googletest/archive/8d51ffdfab10b3fba636ae69bc03da4b54f8c235.zip"],
)
# Google benchmark.
http_archive(
name = "com_github_google_benchmark",
urls = ["https://github.com/google/benchmark/archive/7d0d9061d83b663ce05d9de5da3d5865a3845b79.zip"], # 2021-05-11T11:56:00Z
strip_prefix = "benchmark-7d0d9061d83b663ce05d9de5da3d5865a3845b79",
sha256 = "a07789754963e3ea3a1e13fed3a4d48fac0c5f7f749c5065f6c30cd2c1529661",
name = "com_github_google_benchmark", # 2021-07-01T09:02:54Z
sha256 = "1cb4b97a90aa1fd9c8e412a6bc29fc13fc140162a4a0db3811af40befd8c9ea5",
strip_prefix = "benchmark-e451e50e9b8af453f076dec10bd6890847f1624e",
urls = ["https://github.com/google/benchmark/archive/e451e50e9b8af453f076dec10bd6890847f1624e.zip"],
)
# C++ rules for Bazel.
http_archive(
name = "rules_cc",
urls = ["https://github.com/bazelbuild/rules_cc/archive/ab5395627c80e025e824bd005d41f96b20618b9d.zip"], # 2021-05-10T15:35:04Z
strip_prefix = "rules_cc-ab5395627c80e025e824bd005d41f96b20618b9d",
sha256 = "f3908cb40a6577ab0d1ef9e00052739bf69b4313fa7d20b4d61d4521ea67abf3",
name = "rules_cc", # 2021-06-07T16:41:49Z
sha256 = "b295cad8c5899e371dde175079c0a2cdc0151f5127acc92366a8c986beb95c76",
strip_prefix = "rules_cc-daf6ace7cfeacd6a83e9ff2ed659f416537b6c74",
urls = ["https://github.com/bazelbuild/rules_cc/archive/daf6ace7cfeacd6a83e9ff2ed659f416537b6c74.zip"],
)
......@@ -188,25 +188,25 @@ void ClearCurrentThreadIdentity();
// May be chosen at compile time via: -DABSL_FORCE_THREAD_IDENTITY_MODE=<mode
// index>
#ifdef ABSL_THREAD_IDENTITY_MODE_USE_POSIX_SETSPECIFIC
#error ABSL_THREAD_IDENTITY_MODE_USE_POSIX_SETSPECIFIC cannot be direcly set
#error ABSL_THREAD_IDENTITY_MODE_USE_POSIX_SETSPECIFIC cannot be directly set
#else
#define ABSL_THREAD_IDENTITY_MODE_USE_POSIX_SETSPECIFIC 0
#endif
#ifdef ABSL_THREAD_IDENTITY_MODE_USE_TLS
#error ABSL_THREAD_IDENTITY_MODE_USE_TLS cannot be direcly set
#error ABSL_THREAD_IDENTITY_MODE_USE_TLS cannot be directly set
#else
#define ABSL_THREAD_IDENTITY_MODE_USE_TLS 1
#endif
#ifdef ABSL_THREAD_IDENTITY_MODE_USE_CPP11
#error ABSL_THREAD_IDENTITY_MODE_USE_CPP11 cannot be direcly set
#error ABSL_THREAD_IDENTITY_MODE_USE_CPP11 cannot be directly set
#else
#define ABSL_THREAD_IDENTITY_MODE_USE_CPP11 2
#endif
#ifdef ABSL_THREAD_IDENTITY_MODE
#error ABSL_THREAD_IDENTITY_MODE cannot be direcly set
#error ABSL_THREAD_IDENTITY_MODE cannot be directly set
#elif defined(ABSL_FORCE_THREAD_IDENTITY_MODE)
#define ABSL_THREAD_IDENTITY_MODE ABSL_FORCE_THREAD_IDENTITY_MODE
#elif defined(_WIN32) && !defined(__MINGW32__)
......
......@@ -269,11 +269,13 @@ cc_library(
name = "cord_internal",
srcs = [
"internal/cord_internal.cc",
"internal/cord_rep_btree.cc",
"internal/cord_rep_consume.cc",
"internal/cord_rep_ring.cc",
],
hdrs = [
"internal/cord_internal.h",
"internal/cord_rep_btree.h",
"internal/cord_rep_consume.h",
"internal/cord_rep_flat.h",
"internal/cord_rep_ring.h",
......@@ -296,6 +298,23 @@ cc_library(
"//absl/container:layout",
"//absl/functional:function_ref",
"//absl/meta:type_traits",
"//absl/types:span",
],
)
cc_test(
name = "cord_rep_btree_test",
size = "medium",
srcs = ["internal/cord_rep_btree_test.cc"],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":cord_internal",
":cord_rep_test_util",
":strings",
"//absl/base:config",
"//absl/base:raw_logging_internal",
"@com_google_googletest//:gtest_main",
],
)
......@@ -578,6 +597,19 @@ cc_library(
)
cc_library(
name = "cord_rep_test_util",
testonly = 1,
hdrs = ["internal/cord_rep_test_util.h"],
copts = ABSL_DEFAULT_COPTS,
deps = [
":cord_internal",
":strings",
"//absl/base:config",
"//absl/base:raw_logging_internal",
],
)
cc_library(
name = "cordz_test_helpers",
testonly = 1,
hdrs = ["cordz_test_helpers.h"],
......
......@@ -555,12 +555,14 @@ absl_cc_library(
cord_internal
HDRS
"internal/cord_internal.h"
"internal/cord_rep_btree.h"
"internal/cord_rep_consume.h"
"internal/cord_rep_flat.h"
"internal/cord_rep_ring.h"
"internal/cord_rep_ring_reader.h"
SRCS
"internal/cord_internal.cc"
"internal/cord_rep_btree.cc"
"internal/cord_rep_consume.cc"
"internal/cord_rep_ring.cc"
COPTS
......@@ -849,6 +851,21 @@ absl_cc_library(
absl_cc_library(
NAME
cord_rep_test_util
HDRS
"internal/cord_rep_test_util.h"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::config
absl::cord_internal
absl::raw_logging_internal
absl::strings
TESTONLY
)
absl_cc_library(
NAME
cord_test_helpers
HDRS
"cord_test_helpers.h"
......@@ -924,6 +941,24 @@ absl_cc_test(
absl_cc_test(
NAME
cord_rep_btree_test
SRCS
"internal/cord_rep_btree_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::base
absl::config
absl::cord_internal
absl::cord_rep_test_util
absl::core_headers
absl::raw_logging_internal
absl::strings
GTest::gmock_main
)
absl_cc_test(
NAME
cord_ring_test
SRCS
"cord_ring_test.cc"
......
......@@ -18,6 +18,7 @@
#include <memory>
#include "absl/container/inlined_vector.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/internal/cord_rep_ring.h"
......@@ -50,6 +51,9 @@ void CordRep::Destroy(CordRep* rep) {
rep = left;
continue;
}
} else if (rep->tag == BTREE) {
CordRepBtree::Destroy(rep->btree());
rep = nullptr;
} else if (rep->tag == RING) {
CordRepRing::Destroy(rep->ring());
rep = nullptr;
......
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cord_rep_btree.h"
#include <cassert>
#include <cstdint>
#include <iostream>
#include <string>
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_consume.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
using NodeStack = CordRepBtree * [CordRepBtree::kMaxDepth];
using EdgeType = CordRepBtree::EdgeType;
using OpResult = CordRepBtree::OpResult;
using CopyResult = CordRepBtree::CopyResult;
constexpr auto kFront = CordRepBtree::kFront;
constexpr auto kBack = CordRepBtree::kBack;
// Implementation of the various 'Dump' functions.
// Prints the entire tree structure or 'rep'. External callers should
// not specify 'depth' and leave it to its default (0) value.
// Rep may be a CordRepBtree tree, or a SUBSTRING / EXTERNAL / FLAT node.
void DumpAll(const CordRep* rep, bool include_contents, std::ostream& stream,
int depth = 0) {
// Allow for full height trees + substring -> flat / external nodes.
assert(depth <= CordRepBtree::kMaxDepth + 2);
std::string sharing = const_cast<CordRep*>(rep)->refcount.IsOne()
? std::string("Private")
: absl::StrCat("Shared(", rep->refcount.Get(), ")");
std::string sptr = absl::StrCat("0x", absl::Hex(rep));
// Dumps the data contents of `rep` if `include_contents` is true.
// Always emits a new line character.
auto maybe_dump_data = [&stream, include_contents](const CordRep* r) {
if (include_contents) {
// Allow for up to 60 wide display of content data, which with some
// indentation and prefix / labels keeps us within roughly 80-100 wide.
constexpr size_t kMaxDataLength = 60;
stream << ", data = \""
<< CordRepBtree::EdgeData(r).substr(0, kMaxDataLength)
<< (r->length > kMaxDataLength ? "\"..." : "\"");
}
stream << '\n';
};
// For each level, we print the 'shared/private' state and the rep pointer,
// indented by two spaces per recursive depth.
stream << std::string(depth * 2, ' ') << sharing << " (" << sptr << ") ";
if (rep->tag == BTREE) {
const CordRepBtree* node = rep->btree();
std::string label =
node->height() ? absl::StrCat("Node(", node->height(), ")") : "Leaf";
stream << label << ", len = " << node->length
<< ", begin = " << node->begin() << ", end = " << node->end()
<< "\n";
for (CordRep* edge : node->Edges()) {
DumpAll(edge, include_contents, stream, depth + 1);
}
} else if (rep->tag == SUBSTRING) {
const CordRepSubstring* substring = rep->substring();
stream << "Substring, len = " << rep->length
<< ", start = " << substring->start;
maybe_dump_data(rep);
DumpAll(substring->child, include_contents, stream, depth + 1);
} else if (rep->tag >= FLAT) {
stream << "Flat, len = " << rep->length;
maybe_dump_data(rep);
} else if (rep->tag == EXTERNAL) {
stream << "Extn, len = " << rep->length;
maybe_dump_data(rep);
}
}
// TODO(b/192061034): add 'bytes to copy' logic to avoid large slop on substring
// small data out of large reps, and general efficiency of 'always copy small
// data'. Consider making this a cord rep internal library function.
CordRepSubstring* CreateSubstring(CordRep* rep, size_t offset, size_t n) {
assert(n != 0);
assert(offset + n <= rep->length);
assert(offset != 0 || n != rep->length);
if (rep->tag == SUBSTRING) {
CordRepSubstring* substring = rep->substring();
offset += substring->start;
rep = CordRep::Ref(substring->child);
CordRep::Unref(substring);
}
CordRepSubstring* substring = new CordRepSubstring();
substring->length = n;
substring->tag = SUBSTRING;
substring->start = offset;
substring->child = rep;
return substring;
}
// TODO(b/192061034): consider making this a cord rep library function.
inline CordRep* MakeSubstring(CordRep* rep, size_t offset, size_t n) {
if (n == rep->length) return rep;
if (n == 0) return CordRep::Unref(rep), nullptr;
return CreateSubstring(rep, offset, n);
}
// TODO(b/192061034): consider making this a cord rep library function.
inline CordRep* MakeSubstring(CordRep* rep, size_t offset) {
if (offset == 0) return rep;
return CreateSubstring(rep, offset, rep->length - offset);
}
template <EdgeType edge_type>
inline absl::string_view Consume(absl::string_view s, size_t n) {
return edge_type == kBack ? s.substr(n) : s.substr(0, s.size() - n);
}
template <EdgeType edge_type>
inline absl::string_view Consume(char* dst, absl::string_view s, size_t n) {
if (edge_type == kBack) {
memcpy(dst, s.data(), n);
return s.substr(n);
} else {
const size_t offset = s.size() - n;
memcpy(dst, s.data() + offset, n);
return s.substr(0, offset);
}
}
// Known issue / optimization weirdness: the store associated with the
// decrement introduces traffic between cpus (even if the result of that
// traffic does nothing), making this faster than a single call to
// refcount.Decrement() checking the zero refcount condition.
template <typename R, typename Fn>
inline void FastUnref(R* r, Fn&& fn) {
if (r->refcount.IsOne()) {
fn(r);
} else if (!r->refcount.DecrementExpectHighRefcount()) {
fn(r);
}
}
// Deletes a leaf node data edge. Requires `rep` to be an EXTERNAL or FLAT
// node, or a SUBSTRING of an EXTERNAL or FLAT node.
void DeleteLeafEdge(CordRep* rep) {
for (;;) {
if (rep->tag >= FLAT) {
CordRepFlat::Delete(rep->flat());
return;
}
if (rep->tag == EXTERNAL) {
CordRepExternal::Delete(rep->external());
return;
}
assert(rep->tag == SUBSTRING);
CordRepSubstring* substring = rep->substring();
rep = substring->child;
assert(rep->tag == EXTERNAL || rep->tag >= FLAT);
delete substring;
if (rep->refcount.Decrement()) return;
}
}
// StackOperations contains the logic to build a left-most or right-most stack
// (leg) down to the leaf level of a btree, and 'unwind' / 'Finalize' methods to
// propagate node changes up the stack.
template <EdgeType edge_type>
struct StackOperations {
// Returns true if the node at 'depth' is not shared, i.e. has a refcount
// of one and all of its parent nodes have a refcount of one.
inline bool owned(int depth) const { return depth < share_depth; }
// Returns the node at 'depth'.
inline CordRepBtree* node(int depth) const { return stack[depth]; }
// Builds a `depth` levels deep stack starting at `tree` recording which nodes
// are private in the form of the 'share depth' where nodes are shared.
inline CordRepBtree* BuildStack(CordRepBtree* tree, int depth) {
assert(depth <= tree->height());
int current_depth = 0;
while (current_depth < depth && tree->refcount.IsOne()) {
stack[current_depth++] = tree;
tree = tree->Edge(edge_type)->btree();
}
share_depth = current_depth + (tree->refcount.IsOne() ? 1 : 0);
while (current_depth < depth) {
stack[current_depth++] = tree;
tree = tree->Edge(edge_type)->btree();
}
return tree;
}
// Builds a stack with the invariant that all nodes are private owned / not
// shared. This is used in iterative updates where a previous propagation
// guaranteed all nodes are owned / private.
inline void BuildOwnedStack(CordRepBtree* tree, int height) {
assert(height <= CordRepBtree::kMaxHeight);
int depth = 0;
while (depth < height) {
assert(tree->refcount.IsOne());
stack[depth++] = tree;
tree = tree->Edge(edge_type)->btree();
}
assert(tree->refcount.IsOne());
share_depth = depth + 1;
}
// Processes the final 'top level' result action for the tree.
// See the 'Action' enum for the various action implications.
static inline CordRepBtree* Finalize(CordRepBtree* tree, OpResult result) {
switch (result.action) {
case CordRepBtree::kPopped:
if (ABSL_PREDICT_FALSE(tree->height() >= CordRepBtree::kMaxHeight)) {
ABSL_RAW_LOG(FATAL, "Max height exceeded");
}
return edge_type == kBack ? CordRepBtree::New(tree, result.tree)
: CordRepBtree::New(result.tree, tree);
case CordRepBtree::kCopied:
CordRep::Unref(tree);
ABSL_FALLTHROUGH_INTENDED;
case CordRepBtree::kSelf:
return result.tree;
}
ABSL_INTERNAL_UNREACHABLE;
return result.tree;
}
// Propagate the action result in 'result' up into all nodes of the stack
// starting at depth 'depth'. 'length' contains the extra length of data that
// was added at the lowest level, and is updated into all nodes of the stack.
// See the 'Action' enum for the various action implications.
// If 'propagate' is true, then any copied node values are updated into the
// stack, which is used for iterative processing on the same stack.
template <bool propagate = false>
inline CordRepBtree* Unwind(CordRepBtree* tree, int depth, size_t length,
OpResult result) {
// TODO(mvels): revisit the below code to check if 3 loops with 3
// (incremental) conditions is faster than 1 loop with a switch.
// Benchmarking and perf recordings indicate the loop with switch is
// fastest, likely because of indirect jumps on the tight case values and
// dense branches. But it's worth considering 3 loops, as the `action`
// transitions are mono directional. E.g.:
// while (action == kPopped) {
// ...
// }
// while (action == kCopied) {
// ...
// }
// ...
// We also found that an "if () do {}" loop here seems faster, possibly
// because it allows the branch predictor more granular heuristics on
// 'single leaf' (`depth` == 0) and 'single depth' (`depth` == 1) cases
// which appear to be the most common use cases.
if (depth != 0) {
do {
CordRepBtree* node = stack[--depth];
const bool owned = depth < share_depth;
switch (result.action) {
case CordRepBtree::kPopped:
assert(!propagate);
result = node->AddEdge<edge_type>(owned, result.tree, length);
break;
case CordRepBtree::kCopied:
result = node->SetEdge<edge_type>(owned, result.tree, length);
if (propagate) stack[depth] = result.tree;
break;
case CordRepBtree::kSelf:
node->length += length;
while (depth > 0) {
node = stack[--depth];
node->length += length;
}
return node;
}
} while (depth > 0);
}
return Finalize(tree, result);
}
// Invokes `Unwind` with `propagate=true` to update the stack node values.
inline CordRepBtree* Propagate(CordRepBtree* tree, int depth, size_t length,
OpResult result) {
return Unwind</*propagate=*/true>(tree, depth, length, result);
}
// `share_depth` contains the depth at which the nodes in the stack become
// shared. I.e., if the top most level is shared (i.e.: `!refcount.IsOne()`),
// then `share_depth` is 0. If the 2nd node is shared (and implicitly all
// nodes below that) then `share_depth` is 1, etc. A `share_depth` greater
// than the depth of the stack indicates that none of the nodes in the stack
// are shared.
int share_depth;
NodeStack stack;
};
} // namespace
void CordRepBtree::Dump(const CordRep* rep, absl::string_view label,
bool include_contents, std::ostream& stream) {
stream << "===================================\n";
if (!label.empty()) {
stream << label << '\n';
stream << "-----------------------------------\n";
}
if (rep) {
DumpAll(rep, include_contents, stream);
} else {
stream << "NULL\n";
}
}
void CordRepBtree::Dump(const CordRep* rep, absl::string_view label,
std::ostream& stream) {
Dump(rep, label, false, stream);
}
void CordRepBtree::Dump(const CordRep* rep, std::ostream& stream) {
Dump(rep, absl::string_view(), false, stream);
}
void CordRepBtree::DestroyLeaf(CordRepBtree* tree, size_t begin, size_t end) {
for (CordRep* edge : tree->Edges(begin, end)) {
FastUnref(edge, DeleteLeafEdge);
}
Delete(tree);
}
void CordRepBtree::DestroyNonLeaf(CordRepBtree* tree, size_t begin,
size_t end) {
for (CordRep* edge : tree->Edges(begin, end)) {
FastUnref(edge->btree(), Destroy);
}
Delete(tree);
}
bool CordRepBtree::IsValid(const CordRepBtree* tree) {
#define NODE_CHECK_VALID(x) \
if (!(x)) { \
ABSL_RAW_LOG(ERROR, "CordRepBtree::CheckValid() FAILED: %s", #x); \
return false; \
}
#define NODE_CHECK_EQ(x, y) \
if ((x) != (y)) { \
ABSL_RAW_LOG(ERROR, \
"CordRepBtree::CheckValid() FAILED: %s != %s (%s vs %s)", #x, \
#y, absl::StrCat(x).c_str(), absl::StrCat(y).c_str()); \
return false; \
}
NODE_CHECK_VALID(tree != nullptr);
NODE_CHECK_EQ(tree->tag, BTREE);
NODE_CHECK_VALID(tree->height() <= kMaxHeight);
NODE_CHECK_VALID(tree->begin() < tree->capacity());
NODE_CHECK_VALID(tree->end() <= tree->capacity());
NODE_CHECK_VALID(tree->begin() <= tree->end());
size_t child_length = 0;
for (CordRep* edge : tree->Edges()) {
NODE_CHECK_VALID(edge != nullptr);
if (tree->height() > 0) {
NODE_CHECK_VALID(edge->tag == BTREE);
NODE_CHECK_VALID(edge->btree()->height() == tree->height() - 1);
} else {
NODE_CHECK_VALID(IsDataEdge(edge));
}
child_length += edge->length;
}
NODE_CHECK_EQ(child_length, tree->length);
if (tree->height() > 0) {
for (CordRep* edge : tree->Edges()) {
if (!IsValid(edge->btree())) return false;
}
}
return true;
#undef NODE_CHECK_VALID
#undef NODE_CHECK_EQ
}
#ifndef NDEBUG
CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree) {
if (!IsValid(tree)) {
Dump(tree, "CordRepBtree validation failed:", false, std::cout);
ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED");
}
return tree;
}
const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree) {
if (!IsValid(tree)) {
Dump(tree, "CordRepBtree validation failed:", false, std::cout);
ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED");
}
return tree;
}
#endif // NDEBUG
template <EdgeType edge_type>
inline OpResult CordRepBtree::AddEdge(bool owned, CordRep* edge, size_t delta) {
if (size() >= kMaxCapacity) return {New(edge), kPopped};
OpResult result = ToOpResult(owned);
result.tree->Add<edge_type>(edge);
result.tree->length += delta;
return result;
}
template <EdgeType edge_type>
OpResult CordRepBtree::SetEdge(bool owned, CordRep* edge, size_t delta) {
OpResult result;
const size_t idx = index(edge_type);
if (owned) {
result = {this, kSelf};
CordRep::Unref(edges_[idx]);
} else {
// Create a copy containing all unchanged edges. Unchanged edges are the
// open interval [begin, back) or [begin + 1, end) depending on `edge_type`.
// We conveniently cover both case using a constexpr `shift` being 0 or 1
// as `end :== back + 1`.
result = {CopyRaw(), kCopied};
constexpr int shift = edge_type == kFront ? 1 : 0;
for (CordRep* r : Edges(begin() + shift, back() + shift)) {
CordRep::Ref(r);
}
}
result.tree->edges_[idx] = edge;
result.tree->length += delta;
return result;
}
template <EdgeType edge_type>
CordRepBtree* CordRepBtree::AddCordRep(CordRepBtree* tree, CordRep* rep) {
const int depth = tree->height();
const size_t length = rep->length;
StackOperations<edge_type> ops;
CordRepBtree* leaf = ops.BuildStack(tree, depth);
const OpResult result =
leaf->AddEdge<edge_type>(ops.owned(depth), rep, length);
return ops.Unwind(tree, depth, length, result);
}
template <>
CordRepBtree* CordRepBtree::NewLeaf<kBack>(absl::string_view data,
size_t extra) {
CordRepBtree* leaf = CordRepBtree::New(0);
size_t length = 0;
size_t end = 0;
const size_t cap = leaf->capacity();
while (!data.empty() && end != cap) {
auto* flat = CordRepFlat::New(data.length() + extra);
flat->length = (std::min)(data.length(), flat->Capacity());
length += flat->length;
leaf->edges_[end++] = flat;
data = Consume<kBack>(flat->Data(), data, flat->length);
}
leaf->length = length;
leaf->set_end(end);
return leaf;
}
template <>
CordRepBtree* CordRepBtree::NewLeaf<kFront>(absl::string_view data,
size_t extra) {
CordRepBtree* leaf = CordRepBtree::New(0);
size_t length = 0;
size_t begin = leaf->capacity();
leaf->set_end(leaf->capacity());
while (!data.empty() && begin != 0) {
auto* flat = CordRepFlat::New(data.length() + extra);
flat->length = (std::min)(data.length(), flat->Capacity());
length += flat->length;
leaf->edges_[--begin] = flat;
data = Consume<kFront>(flat->Data(), data, flat->length);
}
leaf->length = length;
leaf->set_begin(begin);
return leaf;
}
template <>
absl::string_view CordRepBtree::AddData<kBack>(absl::string_view data,
size_t extra) {
assert(!data.empty());
assert(size() < capacity());
AlignBegin();
const size_t cap = capacity();
do {
CordRepFlat* flat = CordRepFlat::New(data.length() + extra);
const size_t n = (std::min)(data.length(), flat->Capacity());
flat->length = n;
edges_[fetch_add_end(1)] = flat;
data = Consume<kBack>(flat->Data(), data, n);
} while (!data.empty() && end() != cap);
return data;
}
template <>
absl::string_view CordRepBtree::AddData<kFront>(absl::string_view data,
size_t extra) {
assert(!data.empty());
assert(size() < capacity());
AlignEnd();
do {
CordRepFlat* flat = CordRepFlat::New(data.length() + extra);
const size_t n = (std::min)(data.length(), flat->Capacity());
flat->length = n;
edges_[sub_fetch_begin(1)] = flat;
data = Consume<kFront>(flat->Data(), data, n);
} while (!data.empty() && begin() != 0);
return data;
}
template <EdgeType edge_type>
CordRepBtree* CordRepBtree::AddData(CordRepBtree* tree, absl::string_view data,
size_t extra) {
if (ABSL_PREDICT_FALSE(data.empty())) return tree;
const size_t original_data_size = data.size();
int depth = tree->height();
StackOperations<edge_type> ops;
CordRepBtree* leaf = ops.BuildStack(tree, depth);
// If there is capacity in the last edge, append as much data
// as possible into this last edge.
if (leaf->size() < leaf->capacity()) {
OpResult result = leaf->ToOpResult(ops.owned(depth));
data = result.tree->AddData<edge_type>(data, extra);
if (data.empty()) {
result.tree->length += original_data_size;
return ops.Unwind(tree, depth, original_data_size, result);
}
// We added some data into this leaf, but not all. Propagate the added
// length to the top most node, and rebuild the stack with any newly copied
// or updated nodes. From this point on, the path (leg) from the top most
// node to the right-most node towards the leaf node is privately owned.
size_t delta = original_data_size - data.size();
assert(delta > 0);
result.tree->length += delta;
tree = ops.Propagate(tree, depth, delta, result);
ops.share_depth = depth + 1;
}
// We were unable to append all data into the existing right-most leaf node.
// This means all remaining data must be put into (a) new leaf node(s) which
// we append to the tree. To make this efficient, we iteratively build full
// leaf nodes from `data` until the created leaf contains all remaining data.
// We utilize the `Unwind` method to merge the created leaf into the first
// level towards root that has capacity. On each iteration with remaining
// data, we rebuild the stack in the knowledge that right-most nodes are
// privately owned after the first `Unwind` completes.
for (;;) {
OpResult result = {CordRepBtree::NewLeaf<edge_type>(data, extra), kPopped};
if (result.tree->length == data.size()) {
return ops.Unwind(tree, depth, result.tree->length, result);
}
data = Consume<edge_type>(data, result.tree->length);
tree = ops.Unwind(tree, depth, result.tree->length, result);
depth = tree->height();
ops.BuildOwnedStack(tree, depth);
}
}
template <EdgeType edge_type>
CordRepBtree* CordRepBtree::Merge(CordRepBtree* dst, CordRepBtree* src) {
assert(dst->height() >= src->height());
// Capture source length as we may consume / destroy `src`.
const size_t length = src->length;
// We attempt to merge `src` at its corresponding height in `dst`.
const int depth = dst->height() - src->height();
StackOperations<edge_type> ops;
CordRepBtree* merge_node = ops.BuildStack(dst, depth);
// If there is enough space in `merge_node` for all edges from `src`, add all
// edges to this node, making a fresh copy as needed if not privately owned.
// If `merge_node` does not have capacity for `src`, we rely on `Unwind` and
// `Finalize` to merge `src` into the first level towards `root` where there
// is capacity for another edge, or create a new top level node.
OpResult result;
if (merge_node->size() + src->size() <= kMaxCapacity) {
result = merge_node->ToOpResult(ops.owned(depth));
result.tree->Add<edge_type>(src->Edges());
result.tree->length += src->length;
if (src->refcount.IsOne()) {
Delete(src);
} else {
for (CordRep* edge : src->Edges()) CordRep::Ref(edge);
CordRepBtree::Unref(src);
}
} else {
result = {src, kPopped};
}
// Unless we merged at the top level (i.e.: src and dst are equal height),
// unwind the result towards the top level, and finalize the result.
if (depth) {
return ops.Unwind(dst, depth, length, result);
}
return ops.Finalize(dst, result);
}
CopyResult CordRepBtree::CopySuffix(size_t offset) {
assert(offset < this->length);
// As long as `offset` starts inside the last edge, we can 'drop' the current
// depth. For the most extreme example: if offset references the last data
// edge in the tree, there is only a single edge / path from the top of the
// tree to that last edge, so we can drop all the nodes except that edge.
// The fast path check for this is `back->length >= length - offset`.
int height = this->height();
CordRepBtree* node = this;
size_t len = node->length - offset;
CordRep* back = node->Edge(kBack);
while (back->length >= len) {
offset = back->length - len;
if (--height < 0) {
return {MakeSubstring(CordRep::Ref(back), offset), height};
}
node = back->btree();
back = node->Edge(kBack);
}
if (offset == 0) return {CordRep::Ref(node), height};
// Offset does not point into the last edge, so we span at least two edges.
// Find the index of offset with `IndexBeyond` which provides us the edge
// 'beyond' the offset if offset is not a clean starting point of an edge.
Position pos = node->IndexBeyond(offset);
CordRepBtree* sub = node->CopyToEndFrom(pos.index, len);
const CopyResult result = {sub, height};
// `pos.n` contains a non zero value if the offset is not an exact starting
// point of an edge. In this case, `pos.n` contains the 'trailing' amount of
// bytes of the edge preceding that in `pos.index`. We need to iteratively
// adjust the preceding edge with the 'broken' offset until we have a perfect
// start of the edge.
while (pos.n != 0) {
assert(pos.index >= 1);
const size_t begin = pos.index - 1;
sub->set_begin(begin);
CordRep* const edge = node->Edge(begin);
len = pos.n;
offset = edge->length - len;
if (--height < 0) {
sub->edges_[begin] = MakeSubstring(CordRep::Ref(edge), offset, len);
return result;
}
node = edge->btree();
pos = node->IndexBeyond(offset);
CordRepBtree* nsub = node->CopyToEndFrom(pos.index, len);
sub->edges_[begin] = nsub;
sub = nsub;
}
sub->set_begin(pos.index);
return result;
}
CopyResult CordRepBtree::CopyPrefix(size_t n) {
assert(n > 0);
assert(n <= this->length);
// As long as `n` does not exceed the length of the first edge, we can 'drop'
// the current depth. For the most extreme example: if we'd copy a 1 byte
// prefix from a tree, there is only a single edge / path from the top of the
// tree to the single data edge containing this byte, so we can drop all the
// nodes except the data node.
int height = this->height();
CordRepBtree* node = this;
CordRep* front = node->Edge(kFront);
while (front->length >= n) {
if (--height < 0) return {MakeSubstring(CordRep::Ref(front), 0, n), -1};
node = front->btree();
front = node->Edge(kFront);
}
if (node->length == n) return {CordRep::Ref(node), height};
// `n` spans at least two nodes, find the end point of the span.
Position pos = node->IndexOf(n);
// Create a partial copy of the node up to `pos.index`, with a defined length
// of `n`. Any 'partial last edge' is added further below as needed.
CordRepBtree* sub = node->CopyBeginTo(pos.index, n);
const CopyResult result = {sub, height};
// `pos.n` contains the 'offset inside the edge for IndexOf(n)'. As long as
// this is not zero, we don't have a 'clean cut', so we need to make a
// (partial) copy of that last edge, and repeat this until pos.n is zero.
while (pos.n != 0) {
size_t end = pos.index;
n = pos.n;
CordRep* edge = node->Edge(pos.index);
if (--height < 0) {
sub->edges_[end++] = MakeSubstring(CordRep::Ref(edge), 0, n);
sub->set_end(end);
AssertValid(result.edge->btree());
return result;
}
node = edge->btree();
pos = node->IndexOf(n);
CordRepBtree* nsub = node->CopyBeginTo(pos.index, n);
sub->edges_[end++] = nsub;
sub->set_end(end);
sub = nsub;
}
sub->set_end(pos.index);
AssertValid(result.edge->btree());
return result;
}
CordRep* CordRepBtree::SubTree(size_t offset, size_t n) {
assert(n <= this->length);
assert(offset <= this->length - n);
if (ABSL_PREDICT_FALSE(n == 0)) return nullptr;
CordRepBtree* node = this;
int height = node->height();
Position front = node->IndexOf(offset);
CordRep* left = node->edges_[front.index];
while (front.n + n <= left->length) {
if (--height < 0) return MakeSubstring(CordRep::Ref(left), front.n, n);
node = left->btree();
front = node->IndexOf(front.n);
left = node->edges_[front.index];
}
const Position back = node->IndexBefore(front, n);
CordRep* const right = node->edges_[back.index];
assert(back.index > front.index);
// Get partial suffix and prefix entries.
CopyResult prefix;
CopyResult suffix;
if (height > 0) {
// Copy prefix and suffix of the boundary nodes.
prefix = left->btree()->CopySuffix(front.n);
suffix = right->btree()->CopyPrefix(back.n);
// If there is an edge between the prefix and suffix edges, then the tree
// must remain at its previous (full) height. If we have no edges between
// prefix and suffix edges, then the tree must be as high as either the
// suffix or prefix edges (which are collapsed to their minimum heights).
if (front.index + 1 == back.index) {
height = (std::max)(prefix.height, suffix.height) + 1;
}
// Raise prefix and suffixes to the new tree height.
for (int h = prefix.height + 1; h < height; ++h) {
prefix.edge = CordRepBtree::New(prefix.edge);
}
for (int h = suffix.height + 1; h < height; ++h) {
suffix.edge = CordRepBtree::New(suffix.edge);
}
} else {
// Leaf node, simply take substrings for prefix and suffix.
prefix = CopyResult{MakeSubstring(CordRep::Ref(left), front.n), -1};
suffix = CopyResult{MakeSubstring(CordRep::Ref(right), 0, back.n), -1};
}
// Compose resulting tree.
CordRepBtree* sub = CordRepBtree::New(height);
size_t end = 0;
sub->edges_[end++] = prefix.edge;
for (CordRep* r : node->Edges(front.index + 1, back.index)) {
sub->edges_[end++] = CordRep::Ref(r);
}
sub->edges_[end++] = suffix.edge;
sub->set_end(end);
sub->length = n;
return AssertValid(sub);
}
CordRepBtree* CordRepBtree::MergeTrees(CordRepBtree* left,
CordRepBtree* right) {
return left->height() >= right->height() ? Merge<kBack>(left, right)
: Merge<kFront>(right, left);
}
bool CordRepBtree::IsFlat(absl::string_view* fragment) const {
if (height() == 0 && size() == 1) {
if (fragment) *fragment = Data(begin());
return true;
}
return false;
}
bool CordRepBtree::IsFlat(size_t offset, const size_t n,
absl::string_view* fragment) const {
assert(n <= this->length);
assert(offset <= this->length - n);
if (ABSL_PREDICT_FALSE(n == 0)) return false;
int height = this->height();
const CordRepBtree* node = this;
for (;;) {
const Position front = node->IndexOf(offset);
const CordRep* edge = node->Edge(front.index);
if (edge->length < front.n + n) return false;
if (--height < 0) {
if (fragment) *fragment = EdgeData(edge).substr(front.n, n);
return true;
}
offset = front.n;
node = node->Edge(front.index)->btree();
}
}
char CordRepBtree::GetCharacter(size_t offset) const {
assert(offset < length);
const CordRepBtree* node = this;
int height = node->height();
for (;;) {
Position front = node->IndexOf(offset);
if (--height < 0) return node->Data(front.index)[front.n];
offset = front.n;
node = node->Edge(front.index)->btree();
}
}
Span<char> CordRepBtree::GetAppendBufferSlow(size_t size) {
// The inlined version in `GetAppendBuffer()` deals with all heights <= 3.
assert(height() >= 4);
assert(refcount.IsOne());
// Build a stack of nodes we may potentially need to update if we find a
// non-shared FLAT with capacity at the leaf level.
const int depth = height();
CordRepBtree* node = this;
CordRepBtree* stack[kMaxDepth];
for (int i = 0; i < depth; ++i) {
node = node->Edge(kBack)->btree();
if (!node->refcount.IsOne()) return {};
stack[i] = node;
}
// Must be a privately owned flat.
CordRep* const edge = node->Edge(kBack);
if (!edge->refcount.IsOne() || edge->tag < FLAT) return {};
// Must have capacity.
const size_t avail = edge->flat()->Capacity() - edge->length;
if (avail == 0) return {};
// Build span on remaining capacity.
size_t delta = (std::min)(size, avail);
Span<char> span = {edge->flat()->Data() + edge->length, delta};
edge->length += delta;
this->length += delta;
for (int i = 0; i < depth; ++i) {
stack[i]->length += delta;
}
return span;
}
CordRepBtree* CordRepBtree::CreateSlow(CordRep* rep) {
if (rep->tag == BTREE) return rep->btree();
CordRepBtree* node = nullptr;
auto consume = [&node](CordRep* r, size_t offset, size_t length) {
r = MakeSubstring(r, offset, length);
if (node == nullptr) {
node = New(r);
} else {
node = CordRepBtree::AddCordRep<kBack>(node, r);
}
};
Consume(rep, consume);
return node;
}
CordRepBtree* CordRepBtree::AppendSlow(CordRepBtree* tree, CordRep* rep) {
if (ABSL_PREDICT_TRUE(rep->tag == BTREE)) {
return MergeTrees(tree, rep->btree());
}
auto consume = [&tree](CordRep* r, size_t offset, size_t length) {
r = MakeSubstring(r, offset, length);
tree = CordRepBtree::AddCordRep<kBack>(tree, r);
};
Consume(rep, consume);
return tree;
}
CordRepBtree* CordRepBtree::PrependSlow(CordRepBtree* tree, CordRep* rep) {
if (ABSL_PREDICT_TRUE(rep->tag == BTREE)) {
return MergeTrees(rep->btree(), tree);
}
auto consume = [&tree](CordRep* r, size_t offset, size_t length) {
r = MakeSubstring(r, offset, length);
tree = CordRepBtree::AddCordRep<kFront>(tree, r);
};
ReverseConsume(rep, consume);
return tree;
}
CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, absl::string_view data,
size_t extra) {
return CordRepBtree::AddData<kBack>(tree, data, extra);
}
CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, absl::string_view data,
size_t extra) {
return CordRepBtree::AddData<kFront>(tree, data, extra);
}
template CordRepBtree* CordRepBtree::AddCordRep<kFront>(CordRepBtree* tree,
CordRep* rep);
template CordRepBtree* CordRepBtree::AddCordRep<kBack>(CordRepBtree* tree,
CordRep* rep);
template CordRepBtree* CordRepBtree::AddData<kFront>(CordRepBtree* tree,
absl::string_view data,
size_t extra);
template CordRepBtree* CordRepBtree::AddData<kBack>(CordRepBtree* tree,
absl::string_view data,
size_t extra);
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
#include <cassert>
#include <cstdint>
#include <iosfwd>
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/base/optimization.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
class CordRepBtreeNavigator;
// CordRepBtree is as the name implies a btree implementation of a Cordrep tree.
// Data is stored at the leaf level only, non leaf nodes contain down pointers
// only. Allowed types of data edges are FLAT, EXTERNAL and SUBSTRINGs of FLAT
// or EXTERNAL nodes. The implementation allows for data to be added to either
// end of the tree only, it does not provide any 'insert' logic. This has the
// benefit that we can expect good fill ratios: all nodes except the outer
// 'legs' will have 100% fill ratios for trees built using Append/Prepend
// methods. Merged trees will typically have a fill ratio well above 50% as in a
// similar fashion, one side of the merged tree will typically have a 100% fill
// ratio, and the 'open' end will average 50%. All operations are O(log(n)) or
// better, and the tree never needs balancing.
//
// All methods accepting a CordRep* or CordRepBtree* adopt a reference on that
// input unless explicitly stated otherwise. All functions returning a CordRep*
// or CordRepBtree* instance transfer a reference back to the caller.
// Simplified, callers both 'donate' and 'consume' a reference count on each
// call, simplifying the API. An example of building a tree:
//
// CordRepBtree* tree = CordRepBtree::Create(MakeFlat("Hello"));
// tree = CordRepBtree::Append(tree, MakeFlat("world"));
//
// In the above example, all inputs are consumed, making each call affecting
// `tree` reference count neutral. The returned `tree` value can be different
// from the input if the input is shared with other threads, or if the tree
// grows in height, but callers typically never have to concern themselves with
// that and trust that all methods DTRT at all times.
class CordRepBtree : public CordRep {
public:
// EdgeType identifies `front` and `back` enum values.
// Various implementations in CordRepBtree such as `Add` and `Edge` are
// generic and templated on operating on either of the boundary edges.
// For more information on the possible edges contained in a CordRepBtree
// instance see the documentation for `edges_`.
enum class EdgeType { kFront, kBack };
// Convenience constants into `EdgeType`
static constexpr EdgeType kFront = EdgeType::kFront;
static constexpr EdgeType kBack = EdgeType::kBack;
// Maximum number of edges: based on experiments and performance data, we can
// pick suitable values resulting in optimum cacheline aligned values. The
// preferred values are based on 64-bit systems where we aim to align this
// class onto 64 bytes, i.e.: 6 = 64 bytes, 14 = 128 bytes, etc.
// TODO(b/192061034): experiment with alternative sizes.
static constexpr size_t kMaxCapacity = 6;
// Reasonable maximum height of the btree. We can expect a fill ratio of at
// least 50%: trees are always expanded at the front or back. Concatenating
// trees will then typically fold at the top most node, where the lower nodes
// are at least at capacity on one side of joined inputs. At a lower fill
// rate of 4 edges per node, we have capacity for ~16 million leaf nodes.
// We will fail / abort if an application ever exceeds this height, which
// should be extremely rare (near impossible) and be an indication of an
// application error: we do not assume it reasonable for any application to
// operate correctly with such monster trees.
// Another compelling reason for the number `12` is that any contextual stack
// required for navigation or insertion requires 12 words and 12 bytes, which
// fits inside 2 cache lines with some room to spare, and is reasonable as a
// local stack variable compared to Cord's current near 400 bytes stack use.
// The maximum `height` value of a node is then `kMaxDepth - 1` as node height
// values start with a value of 0 for leaf nodes.
static constexpr int kMaxDepth = 12;
static constexpr int kMaxHeight = kMaxDepth - 1;
// `Action` defines the action for unwinding changes done at the btree's leaf
// level that need to be propagated up to the parent node(s). Each operation
// on a node has an effect / action defined as follows:
// - kSelf
// The operation (add / update, etc) was performed directly on the node as
// the node is private to the current thread (i.e.: not shared directly or
// indirectly through a refcount > 1). Changes can be propagated directly to
// all parent nodes as all parent nodes are also then private to the current
// thread.
// - kCopied
// The operation (add / update, etc) was performed on a copy of the original
// node, as the node is (potentially) directly or indirectly shared with
// other threads. Changes need to be propagated into the parent nodes where
// the old down pointer must be unreffed and replaced with this new copy.
// Such changes to parent nodes may themselves require a copy if the parent
// node is also shared. A kCopied action can propagate all the way to the
// top node where we then must unref the `tree` input provided by the
// caller, and return the new copy.
// - kPopped
// The operation (typically add) could not be satisfied due to insufficient
// capacity in the targeted node, and a new 'leg' was created that needs to
// be added into the parent node. For example, adding a FLAT inside a leaf
// node that is at capacity will create a new leaf node containing that
// FLAT, that needs to be 'popped' up the btree. Such 'pop' actions can
// cascade up the tree if parent nodes are also at capacity. A 'Popped'
// action propagating all the way to the top of the tree will result in
// the tree becoming one level higher than the current tree through a final
// `CordRepBtree::New(tree, popped)` call, resulting in a new top node
// referencing the old tree and the new (fully popped upwards) 'leg'.
enum Action { kSelf, kCopied, kPopped };
// Result of an operation on a node. See the `Action` enum for details.
struct OpResult {
CordRepBtree* tree;
Action action;
};
// Return value of the CopyPrefix and CopySuffix methods which can
// return a node or data edge at any height inside the tree.
// A height of 0 defines the lowest (leaf) node, a height of -1 identifies
// `edge` as being a plain data node: EXTERNAL / FLAT or SUBSTRING thereof.
struct CopyResult {
CordRep* edge;
int height;
};
// Logical position inside a node:
// - index: index of the edge.
// - n: size or offset value depending on context.
struct Position {
size_t index;
size_t n;
};
// Creates a btree from the given input. Adopts a ref of `rep`.
// If the input `rep` is itself a btree, i.e., `tag == BTREE`, then this
// function immediately returns `rep->btree()`. If the input is a valid data
// edge (see IsDataEdge()), then a new leaf node is returned containing `rep`
// as the sole data edge. Else, the input is assumed to be a (legacy) concat
// tree, and the input is consumed and transformed into a btree().
static CordRepBtree* Create(CordRep* rep);
// Destroys the provided tree. Should only be called by cord internal API's,
// typically after a ref_count.Decrement() on the last reference count.
static void Destroy(CordRepBtree* tree);
// Appends / Prepends an existing CordRep instance to this tree.
// The below methods accept three types of input:
// 1) `rep` is a data node (See `IsDataNode` for valid data edges).
// `rep` is appended or prepended to this tree 'as is'.
// 2) `rep` is a BTREE.
// `rep` is merged into `tree` respecting the Append/Prepend order.
// 3) `rep` is some other (legacy) type.
// `rep` is converted in place and added to `tree`
// Requires `tree` and `rep` to be not null.
static CordRepBtree* Append(CordRepBtree* tree, CordRep* rep);
static CordRepBtree* Prepend(CordRepBtree* tree, CordRep* rep);
// Append/Prepend the data in `data` to this tree.
// The `extra` parameter defines how much extra capacity should be allocated
// for any additional FLAT being allocated. This is an optimization hint from
// the caller. For example, a caller may need to add 2 string_views of data
// "abc" and "defghi" which are not consecutive. The caller can in this case
// invoke `AddData(tree, "abc", 6)`, and any newly added flat is allocated
// where possible with at least 6 bytes of extra capacity beyond `length`.
// This helps avoiding data getting fragmented over multiple flats.
// There is no limit on the size of `data`. If `data` can not be stored inside
// a single flat, then the function will iteratively add flats until all data
// has been consumed and appended or prepended to the tree.
static CordRepBtree* Append(CordRepBtree* tree, string_view data,
size_t extra = 0);
static CordRepBtree* Prepend(CordRepBtree* tree, string_view data,
size_t extra = 0);
// Returns a new tree, containing `n` bytes of data from this instance
// starting at offset `offset`. Where possible, the returned tree shares
// (re-uses) data edges and nodes with this instance to minimize the
// combined memory footprint of both trees.
// Requires `offset + n <= length`. Returns `nullptr` if `n` is zero.
CordRep* SubTree(size_t offset, size_t n);
// Returns the character at the given offset.
char GetCharacter(size_t offset) const;
// Returns true if this node holds a single data edge, and if so, sets
// `fragment` to reference the contained data. `fragment` is an optional
// output parameter and allowed to be null.
bool IsFlat(absl::string_view* fragment) const;
// Returns true if the data of `n` bytes starting at offset `offset`
// is contained in a single data edge, and if so, sets fragment to reference
// the contained data. `fragment` is an optional output parameter and allowed
// to be null.
bool IsFlat(size_t offset, size_t n, absl::string_view* fragment) const;
// Returns a span (mutable range of bytes) of up to `size` bytes into the
// last FLAT data edge inside this tree under the following conditions:
// - none of the nodes down into the FLAT node are shared.
// - the last data edge in this tree is a non-shared FLAT.
// - the referenced FLAT has additional capacity available.
// If all these conditions are met, a non-empty span is returned, and the
// length of the flat node and involved tree nodes have been increased by
// `span.length()`. The caller is responsible for immediately assigning values
// to all uninitialized data reference by the returned span.
// Requires `this->refcount.IsOne()`: this function forces the caller to do
// this fast path check on the top level node, as this is the most commonly
// shared node of a cord tree.
Span<char> GetAppendBuffer(size_t size);
// Returns the `height` of the tree. The height of a tree is limited to
// kMaxHeight. `height` is implemented as an `int` as in some places we
// use negative (-1) values for 'data edges'.
int height() const { return static_cast<int>(storage[0]); }
// Properties: begin, back, end, front/back boundary indexes.
size_t begin() const { return static_cast<size_t>(storage[1]); }
size_t back() const { return static_cast<size_t>(storage[2]) - 1; }
size_t end() const { return static_cast<size_t>(storage[2]); }
size_t index(EdgeType edge) const {
return edge == kFront ? begin() : back();
}
// Properties: size and capacity.
// `capacity` contains the current capacity of this instance, where
// `kMaxCapacity` contains the maximum capacity of a btree node.
// For now, `capacity` and `kMaxCapacity` return the same value, but this may
// change in the future if we see benefit in dynamically sizing 'small' nodes
// to 'large' nodes for large data trees.
size_t size() const { return end() - begin(); }
size_t capacity() const { return kMaxCapacity; }
// Edge access
inline CordRep* Edge(size_t index) const;
inline CordRep* Edge(EdgeType edge_type) const;
inline absl::Span<CordRep* const> Edges() const;
inline absl::Span<CordRep* const> Edges(size_t begin, size_t end) const;
// Returns reference to the data edge at `index`.
// Requires this instance to be a leaf node, and `index` to be valid index.
inline absl::string_view Data(size_t index) const;
static const char* EdgeDataPtr(const CordRep* r);
static absl::string_view EdgeData(const CordRep* r);
// Returns true if the provided rep is a FLAT, EXTERNAL or a SUBSTRING node
// holding a FLAT or EXTERNAL child rep.
static bool IsDataEdge(const CordRep* rep);
// Diagnostics
static bool IsValid(const CordRepBtree* tree);
static CordRepBtree* AssertValid(CordRepBtree* tree);
static const CordRepBtree* AssertValid(const CordRepBtree* tree);
static void Dump(const CordRep* rep, std::ostream& stream);
static void Dump(const CordRep* rep, absl::string_view label,
std::ostream& stream);
static void Dump(const CordRep* rep, absl::string_view label,
bool include_contents, std::ostream& stream);
// Adds the edge `edge` to this node if possible. `owned` indicates if the
// current node is potentially shared or not with other threads. Returns:
// - {kSelf, <this>}
// The edge was directly added to this node.
// - {kCopied, <node>}
// The edge was added to a copy of this node.
// - {kPopped, New(edge, height())}
// A new leg with the edge was created as this node has no extra capacity.
template <EdgeType edge_type>
inline OpResult AddEdge(bool owned, CordRep* edge, size_t delta);
// Replaces the front or back edge with the provided new edge. Returns:
// - {kSelf, <this>}
// The edge was directly set in this node. The old edge is unreffed.
// - {kCopied, <node>}
// A copy of this node was created with the new edge value.
// In both cases, the function adopts a reference on `edge`.
template <EdgeType edge_type>
OpResult SetEdge(bool owned, CordRep* edge, size_t delta);
// Creates a new empty node at the specified height.
static CordRepBtree* New(int height = 0);
// Creates a new node containing `rep`, with the height being computed
// automatically based on the type of `rep`.
static CordRepBtree* New(CordRep* rep);
// Creates a new node containing both `front` and `back` at height
// `front.height() + 1`. Requires `back.height() == front.height()`.
static CordRepBtree* New(CordRepBtree* front, CordRepBtree* back);
private:
CordRepBtree() = default;
~CordRepBtree() = default;
// Initializes the main properties `tag`, `begin`, `end`, `height`.
inline void InitInstance(int height, size_t begin = 0, size_t end = 0);
// Direct property access begin / end
void set_begin(size_t begin) { storage[1] = static_cast<uint8_t>(begin); }
void set_end(size_t end) { storage[2] = static_cast<uint8_t>(end); }
// Decreases the value of `begin` by `n`, and returns the new value. Notice
// how this returns the new value unlike atomic::fetch_add which returns the
// old value. This is because this is used to prepend edges at 'begin - 1'.
size_t sub_fetch_begin(size_t n) {
storage[1] -= static_cast<uint8_t>(n);
return storage[1];
}
// Increases the value of `end` by `n`, and returns the previous value. This
// function is typically used to append edges at 'end'.
size_t fetch_add_end(size_t n) {
const uint8_t current = storage[2];
storage[2] = static_cast<uint8_t>(current + n);
return current;
}
// Returns the index of the last edge starting on, or before `offset`, with
// `n` containing the relative offset of `offset` inside that edge.
// Requires `offset` < length.
Position IndexOf(size_t offset) const;
// Returns the index of the last edge starting before `offset`, with `n`
// containing the relative offset of `offset` inside that edge.
// This function is useful to find the edges for some span of bytes ending at
// `offset` (i.e., `n` bytes). For example:
//
// Position pos = IndexBefore(n)
// edges = Edges(begin(), pos.index) // All full edges (may be empty)
// last = Sub(Edge(pos.index), 0, pos.n) // Last partial edge (may be empty)
//
// Requires 0 < `offset` <= length.
Position IndexBefore(size_t offset) const;
// Identical to the above function except starting from the position `front`.
// This function is equivalent to `IndexBefore(front.n + offset)`, with
// the difference that this function is optimized to start at `front.index`.
Position IndexBefore(Position front, size_t offset) const;
// Returns the index of the edge directly beyond the edge containing offset
// `offset`, with `n` containing the distance of that edge from `offset`.
// This function is useful for iteratively finding suffix nodes and remaining
// partial bytes in left-most suffix nodes as for example in CopySuffix.
// Requires `offset` < length.
Position IndexBeyond(size_t offset) const;
// Destruction
static void DestroyLeaf(CordRepBtree* tree, size_t begin, size_t end);
static void DestroyNonLeaf(CordRepBtree* tree, size_t begin, size_t end);
static void DestroyTree(CordRepBtree* tree, size_t begin, size_t end);
static void Delete(CordRepBtree* tree) { delete tree; }
// Creates a new leaf node containing as much data as possible from `data`.
// The data is added either forwards or reversed depending on `edge_type`.
// Callers must check the length of the returned node to determine if all data
// was copied or not.
// See the `Append/Prepend` function for the meaning and purpose of `extra`.
template <EdgeType edge_type>
static CordRepBtree* NewLeaf(absl::string_view data, size_t extra);
// Creates a raw copy of this Btree node, copying all properties, but
// without adding any references to existing edges.
CordRepBtree* CopyRaw() const;
// Creates a full copy of this Btree node, adding a reference on all edges.
CordRepBtree* Copy() const;
// Creates a partial copy of this Btree node, copying all edges up to `end`,
// adding a reference on each copied edge, and sets the length of the newly
// created copy to `new_length`.
CordRepBtree* CopyBeginTo(size_t end, size_t new_length) const;
// Creates a partial copy of this Btree node, copying all edges starting at
// `begin`, adding a reference on each copied edge, and sets the length of
// the newly created copy to `new_length`.
CordRepBtree* CopyToEndFrom(size_t begin, size_t new_length) const;
// Returns a tree containing the result of appending `right` to `left`.
static CordRepBtree* MergeTrees(CordRepBtree* left, CordRepBtree* right);
// Fallback functions for `Create()`, `Append()` and `Prepend()` which
// deal with legacy / non conforming input, i.e.: CONCAT trees.
static CordRepBtree* CreateSlow(CordRep* rep);
static CordRepBtree* AppendSlow(CordRepBtree*, CordRep* rep);
static CordRepBtree* PrependSlow(CordRepBtree*, CordRep* rep);
// Aligns existing edges to start at index 0, to allow for a new edge to be
// added to the back of the current edges.
inline void AlignBegin();
// Aligns existing edges to end at `capacity`, to allow for a new edge to be
// added in front of the current edges.
inline void AlignEnd();
// Adds the provided edge to this node.
// Requires this node to have capacity for the edge. Realigns / moves
// existing edges as needed to prepend or append the new edge.
template <EdgeType edge_type>
inline void Add(CordRep* rep);
// Adds the provided edges to this node.
// Requires this node to have capacity for the edges. Realigns / moves
// existing edges as needed to prepend or append the new edges.
template <EdgeType edge_type>
inline void Add(absl::Span<CordRep* const>);
// Adds data from `data` to this node until either all data has been consumed,
// or there is no more capacity for additional flat nodes inside this node.
// Requires the current node to be a leaf node, data to be non empty, and the
// current node to have capacity for at least one more data edge.
// Returns any remaining data from `data` that was not added, which is
// depending on the edge type (front / back) either the remaining prefix of
// suffix of the input.
// See the `Append/Prepend` function for the meaning and purpose of `extra`.
template <EdgeType edge_type>
absl::string_view AddData(absl::string_view data, size_t extra);
// Replace the front or back edge with the provided value.
// Adopts a reference on `edge` and unrefs the old edge.
template <EdgeType edge_type>
inline void SetEdge(CordRep* edge);
// Returns a partial copy of the current tree containing the first `n` bytes
// of data. `CopyResult` contains both the resulting edge and its height. The
// resulting tree may be less high than the current tree, or even be a single
// matching data edge. For example, if `n == 1`, then the result will be the
// single data edge, and height will be set to -1 (one below the owning leaf
// node). If n == 0, this function returns null.
// Requires `n <= length`
CopyResult CopyPrefix(size_t n);
// Returns a partial copy of the current tree containing all data starting
// after `offset`. `CopyResult` contains both the resulting edge and its
// height. The resulting tree may be less high than the current tree, or even
// be a single matching data edge. For example, if `n == length - 1`, then the
// result will be a single data edge, and height will be set to -1 (one below
// the owning leaf node).
// Requires `offset < length`
CopyResult CopySuffix(size_t offset);
// Returns a OpResult value of {this, kSelf} or {Copy(), kCopied}
// depending on the value of `owned`.
inline OpResult ToOpResult(bool owned);
// Adds `rep` to the specified tree, returning the modified tree.
template <EdgeType edge_type>
static CordRepBtree* AddCordRep(CordRepBtree* tree, CordRep* rep);
// Adds `data` to the specified tree, returning the modified tree.
// See the `Append/Prepend` function for the meaning and purpose of `extra`.
template <EdgeType edge_type>
static CordRepBtree* AddData(CordRepBtree* tree, absl::string_view data,
size_t extra = 0);
// Merges `src` into `dst` with `src` being added either before (kFront) or
// after (kBack) `dst`. Requires the height of `dst` to be greater than or
// equal to the height of `src`.
template <EdgeType edge_type>
static CordRepBtree* Merge(CordRepBtree* dst, CordRepBtree* src);
// Fallback version of GetAppendBuffer for large trees: GetAppendBuffer()
// implements an inlined version for trees of limited height (3 levels),
// GetAppendBufferSlow implements the logic for large trees.
Span<char> GetAppendBufferSlow(size_t size);
// `edges_` contains all edges starting from this instance.
// These are explicitly `child` edges only, a cord btree (or any cord tree in
// that respect) does not store `parent` pointers anywhere: multiple trees /
// parents can reference the same shared child edge. The type of these edges
// depends on the height of the node. `Leaf nodes` (height == 0) contain `data
// edges` (external or flat nodes, or sub-strings thereof). All other nodes
// (height > 0) contain pointers to BTREE nodes with a height of `height - 1`.
CordRep* edges_[kMaxCapacity];
friend class CordRepBtreeTestPeer;
friend class CordRepBtreeNavigator;
};
inline CordRepBtree* CordRep::btree() {
assert(tag == BTREE);
return static_cast<CordRepBtree*>(this);
}
inline const CordRepBtree* CordRep::btree() const {
assert(tag == BTREE);
return static_cast<const CordRepBtree*>(this);
}
inline void CordRepBtree::InitInstance(int height, size_t begin, size_t end) {
tag = BTREE;
storage[0] = height;
storage[1] = begin;
storage[2] = end;
}
inline CordRep* CordRepBtree::Edge(size_t index) const {
assert(index >= begin());
assert(index < end());
return edges_[index];
}
inline CordRep* CordRepBtree::Edge(EdgeType edge_type) const {
return edges_[edge_type == kFront ? begin() : back()];
}
inline absl::Span<CordRep* const> CordRepBtree::Edges() const {
return {edges_ + begin(), size()};
}
inline absl::Span<CordRep* const> CordRepBtree::Edges(size_t begin,
size_t end) const {
assert(begin <= end);
assert(begin >= this->begin());
assert(end <= this->end());
return {edges_ + begin, static_cast<size_t>(end - begin)};
}
inline const char* CordRepBtree::EdgeDataPtr(const CordRep* r) {
assert(IsDataEdge(r));
size_t offset = 0;
if (r->tag == SUBSTRING) {
offset = r->substring()->start;
r = r->substring()->child;
}
return (r->tag >= FLAT ? r->flat()->Data() : r->external()->base) + offset;
}
inline absl::string_view CordRepBtree::EdgeData(const CordRep* r) {
return absl::string_view(EdgeDataPtr(r), r->length);
}
inline absl::string_view CordRepBtree::Data(size_t index) const {
assert(height() == 0);
return EdgeData(Edge(index));
}
inline bool CordRepBtree::IsDataEdge(const CordRep* rep) {
// The fast path is that `rep` is an EXTERNAL or FLAT node, making the below
// if a single, well predicted branch. We then repeat the FLAT or EXTERNAL
// check in the slow path the SUBSTRING check to optimize for the hot path.
if (rep->tag == EXTERNAL || rep->tag >= FLAT) return true;
if (rep->tag == SUBSTRING) rep = rep->substring()->child;
return rep->tag == EXTERNAL || rep->tag >= FLAT;
}
inline CordRepBtree* CordRepBtree::New(int height) {
CordRepBtree* tree = new CordRepBtree;
tree->length = 0;
tree->InitInstance(height);
return tree;
}
inline CordRepBtree* CordRepBtree::New(CordRep* rep) {
CordRepBtree* tree = new CordRepBtree;
int height = rep->tag == BTREE ? rep->btree()->height() + 1 : 0;
tree->length = rep->length;
tree->InitInstance(height, /*begin=*/0, /*end=*/1);
tree->edges_[0] = rep;
return tree;
}
inline CordRepBtree* CordRepBtree::New(CordRepBtree* front,
CordRepBtree* back) {
assert(front->height() == back->height());
CordRepBtree* tree = new CordRepBtree;
tree->length = front->length + back->length;
tree->InitInstance(front->height() + 1, /*begin=*/0, /*end=*/2);
tree->edges_[0] = front;
tree->edges_[1] = back;
return tree;
}
inline void CordRepBtree::DestroyTree(CordRepBtree* tree, size_t begin,
size_t end) {
if (tree->height() == 0) {
DestroyLeaf(tree, begin, end);
} else {
DestroyNonLeaf(tree, begin, end);
}
}
inline void CordRepBtree::Destroy(CordRepBtree* tree) {
DestroyTree(tree, tree->begin(), tree->end());
}
inline CordRepBtree* CordRepBtree::CopyRaw() const {
auto* tree = static_cast<CordRepBtree*>(::operator new(sizeof(CordRepBtree)));
memcpy(static_cast<void*>(tree), this, sizeof(CordRepBtree));
new (&tree->refcount) Refcount;
return tree;
}
inline CordRepBtree* CordRepBtree::Copy() const {
CordRepBtree* tree = CopyRaw();
for (CordRep* rep : Edges()) CordRep::Ref(rep);
return tree;
}
inline CordRepBtree* CordRepBtree::CopyToEndFrom(size_t begin,
size_t new_length) const {
assert(begin >= this->begin());
assert(begin <= this->end());
CordRepBtree* tree = CopyRaw();
tree->length = new_length;
tree->set_begin(begin);
for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
return tree;
}
inline CordRepBtree* CordRepBtree::CopyBeginTo(size_t end,
size_t new_length) const {
assert(end <= capacity());
assert(end >= this->begin());
CordRepBtree* tree = CopyRaw();
tree->length = new_length;
tree->set_end(end);
for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
return tree;
}
inline void CordRepBtree::AlignBegin() {
// The below code itself does not need to be fast as typically we have
// mono-directional append/prepend calls, and `begin` / `end` are typically
// adjusted no more than once. But we want to avoid potential register clobber
// effects, making the compiler emit register save/store/spills, and minimize
// the size of code.
const size_t delta = begin();
if (ABSL_PREDICT_FALSE(delta != 0)) {
const size_t new_end = end() - delta;
set_begin(0);
set_end(new_end);
// TODO(mvels): we can write this using 2 loads / 2 stores depending on
// total size for the kMaxCapacity = 6 case. I.e., we can branch (switch) on
// size, and then do overlapping load/store of up to 4 pointers (inlined as
// XMM, YMM or ZMM load/store) and up to 2 pointers (XMM / YMM), which is a)
// compact and b) not clobbering any registers.
ABSL_INTERNAL_ASSUME(new_end <= kMaxCapacity);
#ifdef __clang__
#pragma unroll 1
#endif
for (size_t i = 0; i < new_end; ++i) {
edges_[i] = edges_[i + delta];
}
}
}
inline void CordRepBtree::AlignEnd() {
// See comments in `AlignBegin` for motivation on the hand-rolled for loops.
const size_t delta = capacity() - end();
if (delta != 0) {
const size_t new_begin = begin() + delta;
const size_t new_end = end() + delta;
set_begin(new_begin);
set_end(new_end);
ABSL_INTERNAL_ASSUME(new_end <= kMaxCapacity);
#ifdef __clang__
#pragma unroll 1
#endif
for (size_t i = new_end - 1; i >= new_begin; --i) {
edges_[i] = edges_[i - delta];
}
}
}
template <>
inline void CordRepBtree::Add<CordRepBtree::kBack>(CordRep* rep) {
AlignBegin();
edges_[fetch_add_end(1)] = rep;
}
template <>
inline void CordRepBtree::Add<CordRepBtree::kBack>(
absl::Span<CordRep* const> edges) {
AlignBegin();
size_t new_end = end();
for (CordRep* edge : edges) edges_[new_end++] = edge;
set_end(new_end);
}
template <>
inline void CordRepBtree::Add<CordRepBtree::kFront>(CordRep* rep) {
AlignEnd();
edges_[sub_fetch_begin(1)] = rep;
}
template <>
inline void CordRepBtree::Add<CordRepBtree::kFront>(
absl::Span<CordRep* const> edges) {
AlignEnd();
size_t new_begin = begin() - edges.size();
set_begin(new_begin);
for (CordRep* edge : edges) edges_[new_begin++] = edge;
}
template <CordRepBtree::EdgeType edge_type>
inline void CordRepBtree::SetEdge(CordRep* edge) {
const int idx = edge_type == kFront ? begin() : back();
CordRep::Unref(edges_[idx]);
edges_[idx] = edge;
}
inline CordRepBtree::OpResult CordRepBtree::ToOpResult(bool owned) {
return owned ? OpResult{this, kSelf} : OpResult{Copy(), kCopied};
}
inline CordRepBtree::Position CordRepBtree::IndexOf(size_t offset) const {
assert(offset < length);
size_t index = begin();
while (offset >= edges_[index]->length) offset -= edges_[index++]->length;
return {index, offset};
}
inline CordRepBtree::Position CordRepBtree::IndexBefore(size_t offset) const {
assert(offset > 0);
assert(offset <= length);
size_t index = begin();
while (offset > edges_[index]->length) offset -= edges_[index++]->length;
return {index, offset};
}
inline CordRepBtree::Position CordRepBtree::IndexBefore(Position front,
size_t offset) const {
size_t index = front.index;
offset = offset + front.n;
while (offset > edges_[index]->length) offset -= edges_[index++]->length;
return {index, offset};
}
inline CordRepBtree::Position CordRepBtree::IndexBeyond(
const size_t offset) const {
// We need to find the edge which `starting offset` is beyond (>=)`offset`.
// For this we can't use the `offset -= length` logic of IndexOf. Instead, we
// track the offset of the `current edge` in `off`, which we increase as we
// iterate over the edges until we find the matching edge.
size_t off = 0;
size_t index = begin();
while (offset > off) off += edges_[index++]->length;
return {index, off - offset};
}
inline CordRepBtree* CordRepBtree::Create(CordRep* rep) {
if (IsDataEdge(rep)) return New(rep);
return CreateSlow(rep);
}
inline Span<char> CordRepBtree::GetAppendBuffer(size_t size) {
assert(refcount.IsOne());
CordRepBtree* tree = this;
const int height = this->height();
CordRepBtree* n1 = tree;
CordRepBtree* n2 = tree;
CordRepBtree* n3 = tree;
switch (height) {
case 3:
tree = tree->Edge(kBack)->btree();
if (!tree->refcount.IsOne()) return {};
n2 = tree;
ABSL_FALLTHROUGH_INTENDED;
case 2:
tree = tree->Edge(kBack)->btree();
if (!tree->refcount.IsOne()) return {};
n1 = tree;
ABSL_FALLTHROUGH_INTENDED;
case 1:
tree = tree->Edge(kBack)->btree();
if (!tree->refcount.IsOne()) return {};
ABSL_FALLTHROUGH_INTENDED;
case 0:
CordRep* edge = tree->Edge(kBack);
if (!edge->refcount.IsOne()) return {};
if (edge->tag < FLAT) return {};
size_t avail = edge->flat()->Capacity() - edge->length;
if (avail == 0) return {};
size_t delta = (std::min)(size, avail);
Span<char> span = {edge->flat()->Data() + edge->length, delta};
edge->length += delta;
switch (height) {
case 3:
n3->length += delta;
ABSL_FALLTHROUGH_INTENDED;
case 2:
n2->length += delta;
ABSL_FALLTHROUGH_INTENDED;
case 1:
n1->length += delta;
ABSL_FALLTHROUGH_INTENDED;
case 0:
tree->length += delta;
return span;
}
break;
}
return GetAppendBufferSlow(size);
}
extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kBack>(
CordRepBtree* tree, CordRep* rep);
extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kFront>(
CordRepBtree* tree, CordRep* rep);
inline CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, CordRep* rep) {
if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) {
return CordRepBtree::AddCordRep<kBack>(tree, rep);
}
return AppendSlow(tree, rep);
}
inline CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, CordRep* rep) {
if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) {
return CordRepBtree::AddCordRep<kFront>(tree, rep);
}
return PrependSlow(tree, rep);
}
#ifdef NDEBUG
inline CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree) {
return tree;
}
inline const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree) {
return tree;
}
#endif
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cord_rep_btree.h"
#include <cmath>
#include <deque>
#include <iostream>
#include <string>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_test_util.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
class CordRepBtreeTestPeer {
public:
static void SetEdge(CordRepBtree* node, size_t idx, CordRep* edge) {
node->edges_[idx] = edge;
}
static void AddEdge(CordRepBtree* node, CordRep* edge) {
node->edges_[node->fetch_add_end(1)] = edge;
}
};
namespace {
using ::absl::cordrep_testing::AutoUnref;
using ::absl::cordrep_testing::CordToString;
using ::absl::cordrep_testing::CreateFlatsFromString;
using ::absl::cordrep_testing::CreateRandomString;
using ::absl::cordrep_testing::MakeConcat;
using ::absl::cordrep_testing::MakeExternal;
using ::absl::cordrep_testing::MakeFlat;
using ::absl::cordrep_testing::MakeSubstring;
using ::testing::_;
using ::testing::AllOf;
using ::testing::AnyOf;
using ::testing::Conditional;
using ::testing::ElementsAre;
using ::testing::ElementsAreArray;
using ::testing::Eq;
using ::testing::HasSubstr;
using ::testing::Ne;
using ::testing::Not;
using ::testing::SizeIs;
using ::testing::TypedEq;
MATCHER_P(EqFlatHolding, data, "Equals flat holding data") {
if (arg->tag < FLAT) {
*result_listener << "Expected FLAT, got tag " << static_cast<int>(arg->tag);
return false;
}
std::string actual = CordToString(arg);
if (actual != data) {
*result_listener << "Expected flat holding \"" << data
<< "\", got flat holding \"" << actual << "\"";
return false;
}
return true;
}
MATCHER_P(IsNode, height, absl::StrCat("Is a valid node of height ", height)) {
if (arg == nullptr) {
*result_listener << "Expected NODE, got nullptr";
return false;
}
if (arg->tag != BTREE) {
*result_listener << "Expected NODE, got " << static_cast<int>(arg->tag);
return false;
}
if (!CordRepBtree::IsValid(arg->btree())) {
CordRepBtree::Dump(arg->btree(), "Expected valid NODE, got:", false,
*result_listener->stream());
return false;
}
if (arg->btree()->height() != height) {
*result_listener << "Expected NODE of height " << height << ", got "
<< arg->btree()->height();
return false;
}
return true;
}
MATCHER_P2(IsSubstring, start, length,
absl::StrCat("Is a substring(start = ", start, ", length = ", length,
")")) {
if (arg == nullptr) {
*result_listener << "Expected substring, got nullptr";
return false;
}
if (arg->tag != SUBSTRING) {
*result_listener << "Expected SUBSTRING, got "
<< static_cast<int>(arg->tag);
return false;
}
const CordRepSubstring* const substr = arg->substring();
if (substr->start != start || substr->length != length) {
*result_listener << "Expected substring(" << start << ", " << length
<< "), got substring(" << substr->start << ", "
<< substr->length << ")";
return false;
}
return true;
}
// DataConsumer is a simple helper class used by tests to 'consume' string
// fragments from the provided input in forward or backward direction.
class DataConsumer {
public:
// Starts consumption of `data`. Caller must make sure `data` outlives this
// instance. Consumes data starting at the front if `forward` is true, else
// consumes data from the back.
DataConsumer(absl::string_view data, bool forward)
: data_(data), forward_(forward) {}
// Return the next `n` bytes from referenced data.
absl::string_view Next(size_t n) {
assert(n <= data_.size() - consumed_);
consumed_ += n;
return data_.substr(forward_ ? consumed_ - n : data_.size() - consumed_, n);
}
// Returns all data consumed so far.
absl::string_view Consumed() const {
return forward_ ? data_.substr(0, consumed_)
: data_.substr(data_.size() - consumed_);
}
private:
absl::string_view data_;
size_t consumed_ = 0;
bool forward_;
};
// BtreeAdd returns either CordRepBtree::Append or CordRepBtree::Prepend.
CordRepBtree* BtreeAdd(CordRepBtree* node, bool append,
absl::string_view data) {
return append ? CordRepBtree::Append(node, data)
: CordRepBtree::Prepend(node, data);
}
// Recursively collects all leaf edges from `tree` and appends them to `edges`.
void GetLeafEdges(const CordRepBtree* tree, std::vector<CordRep*>& edges) {
if (tree->height() == 0) {
for (CordRep* edge : tree->Edges()) {
edges.push_back(edge);
}
} else {
for (CordRep* edge : tree->Edges()) {
GetLeafEdges(edge->btree(), edges);
}
}
}
// Recursively collects and returns all leaf edges from `tree`.
std::vector<CordRep*> GetLeafEdges(const CordRepBtree* tree) {
std::vector<CordRep*> edges;
GetLeafEdges(tree, edges);
return edges;
}
// Creates a flat containing the hexadecimal value of `i` zero padded
// to at least 4 digits prefixed with "0x", e.g.: "0x04AC".
CordRepFlat* MakeHexFlat(size_t i) {
return MakeFlat(absl::StrCat("0x", absl::Hex(i, absl::kZeroPad4)));
}
CordRepBtree* MakeLeaf(size_t size = CordRepBtree::kMaxCapacity) {
assert(size <= CordRepBtree::kMaxCapacity);
CordRepBtree* leaf = CordRepBtree::Create(MakeHexFlat(0));
for (size_t i = 1; i < size; ++i) {
leaf = CordRepBtree::Append(leaf, MakeHexFlat(i));
}
return leaf;
}
CordRepBtree* MakeTree(size_t size, bool append = true) {
CordRepBtree* tree = CordRepBtree::Create(MakeHexFlat(0));
for (size_t i = 1; i < size; ++i) {
tree = append ? CordRepBtree::Append(tree, MakeHexFlat(i))
: CordRepBtree::Prepend(tree, MakeHexFlat(i));
}
return tree;
}
CordRepBtree* CreateTree(absl::string_view data, size_t chunk_size) {
std::vector<CordRep*> flats = CreateFlatsFromString(data, chunk_size);
auto it = flats.begin();
CordRepBtree* tree = CordRepBtree::Create(*it);
while (++it != flats.end()) tree = CordRepBtree::Append(tree, *it);
return tree;
}
CordRepBtree* CreateTreeReverse(absl::string_view data, size_t chunk_size) {
std::vector<CordRep*> flats = CreateFlatsFromString(data, chunk_size);
auto rit = flats.rbegin();
CordRepBtree* tree = CordRepBtree::Create(*rit);
while (++rit != flats.rend()) tree = CordRepBtree::Prepend(tree, *rit);
return tree;
}
class CordRepBtreeTest : public testing::TestWithParam<bool> {
public:
bool shared() const { return GetParam(); }
static std::string ToString(testing::TestParamInfo<bool> param) {
return param.param ? "Shared" : "Private";
}
};
INSTANTIATE_TEST_SUITE_P(WithParam, CordRepBtreeTest, testing::Bool(),
CordRepBtreeTest::ToString);
class CordRepBtreeHeightTest : public testing::TestWithParam<int> {
public:
int height() const { return GetParam(); }
static std::string ToString(testing::TestParamInfo<int> param) {
return absl::StrCat(param.param);
}
};
INSTANTIATE_TEST_SUITE_P(WithHeights, CordRepBtreeHeightTest,
testing::Range(0, CordRepBtree::kMaxHeight),
CordRepBtreeHeightTest::ToString);
using TwoBools = testing::tuple<bool, bool>;
class CordRepBtreeDualTest : public testing::TestWithParam<TwoBools> {
public:
bool first_shared() const { return std::get<0>(GetParam()); }
bool second_shared() const { return std::get<1>(GetParam()); }
static std::string ToString(testing::TestParamInfo<TwoBools> param) {
if (std::get<0>(param.param)) {
return std::get<1>(param.param) ? "BothShared" : "FirstShared";
}
return std::get<1>(param.param) ? "SecondShared" : "Private";
}
};
INSTANTIATE_TEST_SUITE_P(WithParam, CordRepBtreeDualTest,
testing::Combine(testing::Bool(), testing::Bool()),
CordRepBtreeDualTest::ToString);
TEST(CordRepBtreeTest, SizeIsMultipleOf64) {
// Only enforce for fully 64-bit platforms.
if (sizeof(size_t) == 8 && sizeof(void*) == 8) {
EXPECT_THAT(sizeof(CordRepBtree) % 64, Eq(0)) << "Should be multiple of 64";
}
}
TEST(CordRepBtreeTest, NewDestroyEmptyTree) {
auto* tree = CordRepBtree::New();
EXPECT_THAT(tree->size(), Eq(0));
EXPECT_THAT(tree->height(), Eq(0));
EXPECT_THAT(tree->Edges(), ElementsAre());
CordRepBtree::Destroy(tree);
}
TEST(CordRepBtreeTest, NewDestroyEmptyTreeAtHeight) {
auto* tree = CordRepBtree::New(3);
EXPECT_THAT(tree->size(), Eq(0));
EXPECT_THAT(tree->height(), Eq(3));
EXPECT_THAT(tree->Edges(), ElementsAre());
CordRepBtree::Destroy(tree);
}
TEST(CordRepBtreeTest, Btree) {
CordRep* rep = CordRepBtree::New();
EXPECT_THAT(rep->btree(), Eq(rep));
EXPECT_THAT(static_cast<const CordRep*>(rep)->btree(), Eq(rep));
CordRep::Unref(rep);
#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG)
rep = MakeFlat("Hello world");
EXPECT_DEATH(rep->btree(), ".*");
EXPECT_DEATH(static_cast<const CordRep*>(rep)->btree(), ".*");
CordRep::Unref(rep);
#endif
}
TEST(CordRepBtreeTest, EdgeData) {
CordRepFlat* flat = MakeFlat("Hello world");
CordRepExternal* external = MakeExternal("Hello external");
CordRep* substr1 = MakeSubstring(1, 6, CordRep::Ref(flat));
CordRep* substr2 = MakeSubstring(1, 6, CordRep::Ref(external));
CordRep* concat = MakeConcat(CordRep::Ref(flat), CordRep::Ref(external));
CordRep* bad_substr = MakeSubstring(1, 2, CordRep::Ref(substr1));
EXPECT_TRUE(CordRepBtree::IsDataEdge(flat));
EXPECT_THAT(CordRepBtree::EdgeDataPtr(flat),
TypedEq<const void*>(flat->Data()));
EXPECT_THAT(CordRepBtree::EdgeData(flat), Eq("Hello world"));
EXPECT_TRUE(CordRepBtree::IsDataEdge(external));
EXPECT_THAT(CordRepBtree::EdgeDataPtr(external),
TypedEq<const void*>(external->base));
EXPECT_THAT(CordRepBtree::EdgeData(external), Eq("Hello external"));
EXPECT_TRUE(CordRepBtree::IsDataEdge(substr1));
EXPECT_THAT(CordRepBtree::EdgeDataPtr(substr1),
TypedEq<const void*>(flat->Data() + 1));
EXPECT_THAT(CordRepBtree::EdgeData(substr1), Eq("ello w"));
EXPECT_TRUE(CordRepBtree::IsDataEdge(substr2));
EXPECT_THAT(CordRepBtree::EdgeDataPtr(substr2),
TypedEq<const void*>(external->base + 1));
EXPECT_THAT(CordRepBtree::EdgeData(substr2), Eq("ello e"));
EXPECT_FALSE(CordRepBtree::IsDataEdge(concat));
EXPECT_FALSE(CordRepBtree::IsDataEdge(bad_substr));
#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG)
EXPECT_DEATH(CordRepBtree::EdgeData(concat), ".*");
EXPECT_DEATH(CordRepBtree::EdgeDataPtr(concat), ".*");
EXPECT_DEATH(CordRepBtree::EdgeData(bad_substr), ".*");
EXPECT_DEATH(CordRepBtree::EdgeDataPtr(bad_substr), ".*");
#endif
CordRep::Unref(bad_substr);
CordRep::Unref(concat);
CordRep::Unref(substr2);
CordRep::Unref(substr1);
CordRep::Unref(external);
CordRep::Unref(flat);
}
TEST(CordRepBtreeTest, CreateUnrefLeaf) {
auto* flat = MakeFlat("a");
auto* leaf = CordRepBtree::Create(flat);
EXPECT_THAT(leaf->size(), Eq(1));
EXPECT_THAT(leaf->height(), Eq(0));
EXPECT_THAT(leaf->Edges(), ElementsAre(flat));
CordRepBtree::Unref(leaf);
}
TEST(CordRepBtreeTest, NewUnrefNode) {
auto* leaf = CordRepBtree::Create(MakeFlat("a"));
CordRepBtree* tree = CordRepBtree::New(leaf);
EXPECT_THAT(tree->size(), Eq(1));
EXPECT_THAT(tree->height(), Eq(1));
EXPECT_THAT(tree->Edges(), ElementsAre(leaf));
CordRepBtree::Unref(tree);
}
TEST_P(CordRepBtreeTest, AppendToLeafToCapacity) {
AutoUnref refs;
std::vector<CordRep*> flats;
flats.push_back(MakeHexFlat(0));
auto* leaf = CordRepBtree::Create(flats.back());
for (size_t i = 1; i < CordRepBtree::kMaxCapacity; ++i) {
refs.RefIf(shared(), leaf);
flats.push_back(MakeHexFlat(i));
auto* result = CordRepBtree::Append(leaf, flats.back());
EXPECT_THAT(result->height(), Eq(0));
EXPECT_THAT(result, Conditional(shared(), Ne(leaf), Eq(leaf)));
EXPECT_THAT(result->Edges(), ElementsAreArray(flats));
leaf = result;
}
CordRep::Unref(leaf);
}
TEST_P(CordRepBtreeTest, PrependToLeafToCapacity) {
AutoUnref refs;
std::deque<CordRep*> flats;
flats.push_front(MakeHexFlat(0));
auto* leaf = CordRepBtree::Create(flats.front());
for (size_t i = 1; i < CordRepBtree::kMaxCapacity; ++i) {
refs.RefIf(shared(), leaf);
flats.push_front(MakeHexFlat(i));
auto* result = CordRepBtree::Prepend(leaf, flats.front());
EXPECT_THAT(result->height(), Eq(0));
EXPECT_THAT(result, Conditional(shared(), Ne(leaf), Eq(leaf)));
EXPECT_THAT(result->Edges(), ElementsAreArray(flats));
leaf = result;
}
CordRep::Unref(leaf);
}
// This test specifically aims at code aligning data at either the front or the
// back of the contained `edges[]` array, alternating Append and Prepend will
// move `begin()` and `end()` values as needed for each added value.
TEST_P(CordRepBtreeTest, AppendPrependToLeafToCapacity) {
AutoUnref refs;
std::deque<CordRep*> flats;
flats.push_front(MakeHexFlat(0));
auto* leaf = CordRepBtree::Create(flats.front());
for (size_t i = 1; i < CordRepBtree::kMaxCapacity; ++i) {
refs.RefIf(shared(), leaf);
CordRepBtree* result;
if (i % 2 != 0) {
flats.push_front(MakeHexFlat(i));
result = CordRepBtree::Prepend(leaf, flats.front());
} else {
flats.push_back(MakeHexFlat(i));
result = CordRepBtree::Append(leaf, flats.back());
}
EXPECT_THAT(result->height(), Eq(0));
EXPECT_THAT(result, Conditional(shared(), Ne(leaf), Eq(leaf)));
EXPECT_THAT(result->Edges(), ElementsAreArray(flats));
leaf = result;
}
CordRep::Unref(leaf);
}
TEST_P(CordRepBtreeTest, AppendToLeafBeyondCapacity) {
AutoUnref refs;
auto* leaf = MakeLeaf();
refs.RefIf(shared(), leaf);
CordRep* flat = MakeFlat("abc");
auto* result = CordRepBtree::Append(leaf, flat);
ASSERT_THAT(result, IsNode(1));
EXPECT_THAT(result, Ne(leaf));
absl::Span<CordRep* const> edges = result->Edges();
ASSERT_THAT(edges, ElementsAre(leaf, IsNode(0)));
EXPECT_THAT(edges[1]->btree()->Edges(), ElementsAre(flat));
CordRep::Unref(result);
}
TEST_P(CordRepBtreeTest, PrependToLeafBeyondCapacity) {
AutoUnref refs;
auto* leaf = MakeLeaf();
refs.RefIf(shared(), leaf);
CordRep* flat = MakeFlat("abc");
auto* result = CordRepBtree::Prepend(leaf, flat);
ASSERT_THAT(result, IsNode(1));
EXPECT_THAT(result, Ne(leaf));
absl::Span<CordRep* const> edges = result->Edges();
ASSERT_THAT(edges, ElementsAre(IsNode(0), leaf));
EXPECT_THAT(edges[0]->btree()->Edges(), ElementsAre(flat));
CordRep::Unref(result);
}
TEST_P(CordRepBtreeTest, AppendToTreeOneDeep) {
constexpr size_t max_cap = CordRepBtree::kMaxCapacity;
AutoUnref refs;
std::vector<CordRep*> flats;
flats.push_back(MakeHexFlat(0));
CordRepBtree* tree = CordRepBtree::Create(flats.back());
for (size_t i = 1; i <= max_cap; ++i) {
flats.push_back(MakeHexFlat(i));
tree = CordRepBtree::Append(tree, flats.back());
}
ASSERT_THAT(tree, IsNode(1));
for (size_t i = max_cap + 1; i < max_cap * max_cap; ++i) {
// Ref top level tree based on param.
// Ref leaf node once every 4 iterations, which should not have an
// observable effect other than that the leaf itself is copied.
refs.RefIf(shared(), tree);
refs.RefIf(i % 4 == 0, tree->Edges().back());
flats.push_back(MakeHexFlat(i));
CordRepBtree* result = CordRepBtree::Append(tree, flats.back());
ASSERT_THAT(result, IsNode(1));
ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree)));
std::vector<CordRep*> edges = GetLeafEdges(result);
ASSERT_THAT(edges, ElementsAreArray(flats));
tree = result;
}
CordRep::Unref(tree);
}
TEST_P(CordRepBtreeTest, AppendToTreeTwoDeep) {
constexpr size_t max_cap = CordRepBtree::kMaxCapacity;
AutoUnref refs;
std::vector<CordRep*> flats;
flats.push_back(MakeHexFlat(0));
CordRepBtree* tree = CordRepBtree::Create(flats.back());
for (size_t i = 1; i <= max_cap * max_cap; ++i) {
flats.push_back(MakeHexFlat(i));
tree = CordRepBtree::Append(tree, flats.back());
}
ASSERT_THAT(tree, IsNode(2));
for (size_t i = max_cap * max_cap + 1; i < max_cap * max_cap * max_cap; ++i) {
// Ref top level tree based on param.
// Ref child node once every 16 iterations, and leaf node every 4
// iterrations which which should not have an observable effect other than
// the node and/or the leaf below it being copied.
refs.RefIf(shared(), tree);
refs.RefIf(i % 16 == 0, tree->Edges().back());
refs.RefIf(i % 4 == 0, tree->Edges().back()->btree()->Edges().back());
flats.push_back(MakeHexFlat(i));
CordRepBtree* result = CordRepBtree::Append(tree, flats.back());
ASSERT_THAT(result, IsNode(2));
ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree)));
std::vector<CordRep*> edges = GetLeafEdges(result);
ASSERT_THAT(edges, ElementsAreArray(flats));
tree = result;
}
CordRep::Unref(tree);
}
TEST_P(CordRepBtreeTest, PrependToTreeOneDeep) {
constexpr size_t max_cap = CordRepBtree::kMaxCapacity;
AutoUnref refs;
std::deque<CordRep*> flats;
flats.push_back(MakeHexFlat(0));
CordRepBtree* tree = CordRepBtree::Create(flats.back());
for (size_t i = 1; i <= max_cap; ++i) {
flats.push_front(MakeHexFlat(i));
tree = CordRepBtree::Prepend(tree, flats.front());
}
ASSERT_THAT(tree, IsNode(1));
for (size_t i = max_cap + 1; i < max_cap * max_cap; ++i) {
// Ref top level tree based on param.
// Ref leaf node once every 4 iterations which should not have an observable
// effect other than than the leaf itself is copied.
refs.RefIf(shared(), tree);
refs.RefIf(i % 4 == 0, tree->Edges().back());
flats.push_front(MakeHexFlat(i));
CordRepBtree* result = CordRepBtree::Prepend(tree, flats.front());
ASSERT_THAT(result, IsNode(1));
ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree)));
std::vector<CordRep*> edges = GetLeafEdges(result);
ASSERT_THAT(edges, ElementsAreArray(flats));
tree = result;
}
CordRep::Unref(tree);
}
TEST_P(CordRepBtreeTest, PrependToTreeTwoDeep) {
constexpr size_t max_cap = CordRepBtree::kMaxCapacity;
AutoUnref refs;
std::deque<CordRep*> flats;
flats.push_back(MakeHexFlat(0));
CordRepBtree* tree = CordRepBtree::Create(flats.back());
for (size_t i = 1; i <= max_cap * max_cap; ++i) {
flats.push_front(MakeHexFlat(i));
tree = CordRepBtree::Prepend(tree, flats.front());
}
ASSERT_THAT(tree, IsNode(2));
for (size_t i = max_cap * max_cap + 1; i < max_cap * max_cap * max_cap; ++i) {
// Ref top level tree based on param.
// Ref child node once every 16 iterations, and leaf node every 4
// iterrations which which should not have an observable effect other than
// the node and/or the leaf below it being copied.
refs.RefIf(shared(), tree);
refs.RefIf(i % 16 == 0, tree->Edges().back());
refs.RefIf(i % 4 == 0, tree->Edges().back()->btree()->Edges().back());
flats.push_front(MakeHexFlat(i));
CordRepBtree* result = CordRepBtree::Prepend(tree, flats.front());
ASSERT_THAT(result, IsNode(2));
ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree)));
std::vector<CordRep*> edges = GetLeafEdges(result);
ASSERT_THAT(edges, ElementsAreArray(flats));
tree = result;
}
CordRep::Unref(tree);
}
TEST_P(CordRepBtreeDualTest, MergeLeafsNotExceedingCapacity) {
for (bool use_append : {false, true}) {
SCOPED_TRACE(use_append ? "Using Append" : "Using Prepend");
AutoUnref refs;
std::vector<CordRep*> flats;
// Build `left` side leaf appending all contained flats to `flats`
CordRepBtree* left = MakeLeaf(3);
GetLeafEdges(left, flats);
refs.RefIf(first_shared(), left);
// Build `right` side leaf appending all contained flats to `flats`
CordRepBtree* right = MakeLeaf(2);
GetLeafEdges(right, flats);
refs.RefIf(second_shared(), right);
CordRepBtree* tree = use_append ? CordRepBtree::Append(left, right)
: CordRepBtree::Prepend(right, left);
EXPECT_THAT(tree, IsNode(0));
// `tree` contains all flats originally belonging to `left` and `right`.
EXPECT_THAT(tree->Edges(), ElementsAreArray(flats));
CordRepBtree::Unref(tree);
}
}
TEST_P(CordRepBtreeDualTest, MergeLeafsExceedingCapacity) {
for (bool use_append : {false, true}) {
SCOPED_TRACE(use_append ? "Using Append" : "Using Prepend");
AutoUnref refs;
// Build `left` side tree appending all contained flats to `flats`
CordRepBtree* left = MakeLeaf(CordRepBtree::kMaxCapacity - 2);
refs.RefIf(first_shared(), left);
// Build `right` side tree appending all contained flats to `flats`
CordRepBtree* right = MakeLeaf(CordRepBtree::kMaxCapacity - 1);
refs.RefIf(second_shared(), right);
CordRepBtree* tree = use_append ? CordRepBtree::Append(left, right)
: CordRepBtree::Prepend(right, left);
EXPECT_THAT(tree, IsNode(1));
EXPECT_THAT(tree->Edges(), ElementsAre(left, right));
CordRepBtree::Unref(tree);
}
}
TEST_P(CordRepBtreeDualTest, MergeEqualHeightTrees) {
for (bool use_append : {false, true}) {
SCOPED_TRACE(use_append ? "Using Append" : "Using Prepend");
AutoUnref refs;
std::vector<CordRep*> flats;
// Build `left` side tree appending all contained flats to `flats`
CordRepBtree* left = MakeTree(CordRepBtree::kMaxCapacity * 3);
GetLeafEdges(left, flats);
refs.RefIf(first_shared(), left);
// Build `right` side tree appending all contained flats to `flats`
CordRepBtree* right = MakeTree(CordRepBtree::kMaxCapacity * 2);
GetLeafEdges(right, flats);
refs.RefIf(second_shared(), right);
CordRepBtree* tree = use_append ? CordRepBtree::Append(left, right)
: CordRepBtree::Prepend(right, left);
EXPECT_THAT(tree, IsNode(1));
EXPECT_THAT(tree->Edges(), SizeIs(5));
// `tree` contains all flats originally belonging to `left` and `right`.
EXPECT_THAT(GetLeafEdges(tree), ElementsAreArray(flats));
CordRepBtree::Unref(tree);
}
}
TEST_P(CordRepBtreeDualTest, MergeLeafWithTreeNotExceedingLeafCapacity) {
for (bool use_append : {false, true}) {
SCOPED_TRACE(use_append ? "Using Append" : "Using Prepend");
AutoUnref refs;
std::vector<CordRep*> flats;
// Build `left` side tree appending all added flats to `flats`
CordRepBtree* left = MakeTree(CordRepBtree::kMaxCapacity * 2 + 2);
GetLeafEdges(left, flats);
refs.RefIf(first_shared(), left);
// Build `right` side tree appending all added flats to `flats`
CordRepBtree* right = MakeTree(3);
GetLeafEdges(right, flats);
refs.RefIf(second_shared(), right);
CordRepBtree* tree = use_append ? CordRepBtree::Append(left, right)
: CordRepBtree::Prepend(right, left);
EXPECT_THAT(tree, IsNode(1));
EXPECT_THAT(tree->Edges(), SizeIs(3));
// `tree` contains all flats originally belonging to `left` and `right`.
EXPECT_THAT(GetLeafEdges(tree), ElementsAreArray(flats));
CordRepBtree::Unref(tree);
}
}
TEST_P(CordRepBtreeDualTest, MergeLeafWithTreeExceedingLeafCapacity) {
for (bool use_append : {false, true}) {
SCOPED_TRACE(use_append ? "Using Append" : "Using Prepend");
AutoUnref refs;
std::vector<CordRep*> flats;
// Build `left` side tree appending all added flats to `flats`
CordRepBtree* left = MakeTree(CordRepBtree::kMaxCapacity * 3 - 2);
GetLeafEdges(left, flats);
refs.RefIf(first_shared(), left);
// Build `right` side tree appending all added flats to `flats`
CordRepBtree* right = MakeTree(3);
GetLeafEdges(right, flats);
refs.RefIf(second_shared(), right);
CordRepBtree* tree = use_append ? CordRepBtree::Append(left, right)
: CordRepBtree::Prepend(right, left);
EXPECT_THAT(tree, IsNode(1));
EXPECT_THAT(tree->Edges(), SizeIs(4));
// `tree` contains all flats originally belonging to `left` and `right`.
EXPECT_THAT(GetLeafEdges(tree), ElementsAreArray(flats));
CordRepBtree::Unref(tree);
}
}
void RefEdgesAt(size_t depth, AutoUnref& refs, CordRepBtree* tree) {
absl::Span<CordRep* const> edges = tree->Edges();
if (depth == 0) {
refs.Ref(edges.front());
refs.Ref(edges.back());
} else {
assert(tree->height() > 0);
RefEdgesAt(depth - 1, refs, edges.front()->btree());
RefEdgesAt(depth - 1, refs, edges.back()->btree());
}
}
TEST(CordRepBtreeTest, MergeFuzzTest) {
constexpr size_t max_cap = CordRepBtree::kMaxCapacity;
std::minstd_rand rnd;
std::uniform_int_distribution<int> coin_flip(0, 1);
std::uniform_int_distribution<int> dice_throw(1, 6);
auto random_leaf_count = [&]() {
std::uniform_int_distribution<int> dist_height(0, 3);
std::uniform_int_distribution<int> dist_leaf(0, max_cap - 1);
const size_t height = dist_height(rnd);
return (height ? pow(max_cap, height) : 0) + dist_leaf(rnd);
};
for (int i = 0; i < 10000; ++i) {
AutoUnref refs;
std::vector<CordRep*> flats;
CordRepBtree* left = MakeTree(random_leaf_count(), coin_flip(rnd));
GetLeafEdges(left, flats);
if (dice_throw(rnd) == 1) {
std::uniform_int_distribution<int> dist(0, left->height());
RefEdgesAt(dist(rnd), refs, left);
}
CordRepBtree* right = MakeTree(random_leaf_count(), coin_flip(rnd));
GetLeafEdges(right, flats);
if (dice_throw(rnd) == 1) {
std::uniform_int_distribution<int> dist(0, right->height());
RefEdgesAt(dist(rnd), refs, right);
}
CordRepBtree* tree = CordRepBtree::Append(left, right);
EXPECT_THAT(GetLeafEdges(tree), ElementsAreArray(flats));
CordRepBtree::Unref(tree);
}
}
TEST(CordRepBtreeTest, SubTree) {
// Create tree of at least 2 levels high
constexpr size_t max_cap = CordRepBtree::kMaxCapacity;
const size_t n = max_cap * max_cap * 2;
const std::string data = CreateRandomString(n * 3);
std::vector<CordRep*> flats;
for (absl::string_view s = data; !s.empty(); s.remove_prefix(3)) {
flats.push_back(MakeFlat(s.substr(0, 3)));
}
CordRepBtree* node = CordRepBtree::Create(CordRep::Ref(flats[0]));
for (size_t i = 1; i < flats.size(); ++i) {
node = CordRepBtree::Append(node, CordRep::Ref(flats[i]));
}
for (int offset = 0; offset < data.length(); ++offset) {
for (int length = 1; length <= data.length() - offset; ++length) {
CordRep* rep = node->SubTree(offset, length);
EXPECT_THAT(CordToString(rep), Eq(data.substr(offset, length)));
CordRep::Unref(rep);
}
}
CordRepBtree::Unref(node);
for (CordRep* rep : flats) {
CordRep::Unref(rep);
}
}
TEST(CordRepBtreeTest, SubTreeOnExistingSubstring) {
// This test verifies that a SubTree call on a pre-existing (large) substring
// adjusts the existing substring if not shared, and else rewrites the
// existing substring.
AutoUnref refs;
std::string data = CreateRandomString(1000);
CordRepBtree* leaf = CordRepBtree::Create(MakeFlat("abc"));
CordRep* flat = MakeFlat(data);
leaf = CordRepBtree::Append(leaf, flat);
// Setup tree containing substring.
CordRep* result = leaf->SubTree(0, 3 + 990);
ASSERT_THAT(result->tag, Eq(BTREE));
CordRep::Unref(leaf);
leaf = result->btree();
ASSERT_THAT(leaf->Edges(), ElementsAre(_, IsSubstring(0, 990)));
EXPECT_THAT(leaf->Edges()[1]->substring()->child, Eq(flat));
// Verify substring of substring.
result = leaf->SubTree(3 + 5, 970);
ASSERT_THAT(result, IsSubstring(5, 970));
EXPECT_THAT(result->substring()->child, Eq(flat));
CordRep::Unref(result);
CordRep::Unref(leaf);
}
TEST_P(CordRepBtreeTest, AddDataToLeaf) {
const size_t n = CordRepBtree::kMaxCapacity;
const std::string data = CreateRandomString(n * 3);
for (bool append : {true, false}) {
AutoUnref refs;
DataConsumer consumer(data, append);
SCOPED_TRACE(append ? "Append" : "Prepend");
CordRepBtree* leaf = CordRepBtree::Create(MakeFlat(consumer.Next(3)));
for (size_t i = 1; i < n; ++i) {
refs.RefIf(shared(), leaf);
CordRepBtree* result = BtreeAdd(leaf, append, consumer.Next(3));
EXPECT_THAT(result, Conditional(shared(), Ne(leaf), Eq(leaf)));
EXPECT_THAT(CordToString(result), Eq(consumer.Consumed()));
leaf = result;
}
CordRep::Unref(leaf);
}
}
TEST_P(CordRepBtreeTest, AppendDataToTree) {
AutoUnref refs;
size_t n = CordRepBtree::kMaxCapacity + CordRepBtree::kMaxCapacity / 2;
std::string data = CreateRandomString(n * 3);
CordRepBtree* tree = refs.RefIf(shared(), CreateTree(data, 3));
CordRepBtree* leaf0 = tree->Edges()[0]->btree();
CordRepBtree* leaf1 = tree->Edges()[1]->btree();
CordRepBtree* result = CordRepBtree::Append(tree, "123456789");
EXPECT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree)));
EXPECT_THAT(result->Edges(),
ElementsAre(leaf0, Conditional(shared(), Ne(leaf1), Eq(leaf1))));
EXPECT_THAT(CordToString(result), Eq(data + "123456789"));
CordRep::Unref(result);
}
TEST_P(CordRepBtreeTest, PrependDataToTree) {
AutoUnref refs;
size_t n = CordRepBtree::kMaxCapacity + CordRepBtree::kMaxCapacity / 2;
std::string data = CreateRandomString(n * 3);
CordRepBtree* tree = refs.RefIf(shared(), CreateTreeReverse(data, 3));
CordRepBtree* leaf0 = tree->Edges()[0]->btree();
CordRepBtree* leaf1 = tree->Edges()[1]->btree();
CordRepBtree* result = CordRepBtree::Prepend(tree, "123456789");
EXPECT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree)));
EXPECT_THAT(result->Edges(),
ElementsAre(Conditional(shared(), Ne(leaf0), Eq(leaf0)), leaf1));
EXPECT_THAT(CordToString(result), Eq("123456789" + data));
CordRep::Unref(result);
}
TEST_P(CordRepBtreeTest, AddDataToTreeThreeLevelsDeep) {
constexpr size_t max_cap = CordRepBtree::kMaxCapacity;
const size_t n = max_cap * max_cap * max_cap;
const std::string data = CreateRandomString(n * 3);
for (bool append : {true, false}) {
AutoUnref refs;
DataConsumer consumer(data, append);
SCOPED_TRACE(append ? "Append" : "Prepend");
// Fill leaf
CordRepBtree* tree = CordRepBtree::Create(MakeFlat(consumer.Next(3)));
for (size_t i = 1; i < max_cap; ++i) {
tree = BtreeAdd(tree, append, consumer.Next(3));
}
ASSERT_THAT(CordToString(tree), Eq(consumer.Consumed()));
// Fill to maximum at one deep
refs.RefIf(shared(), tree);
CordRepBtree* result = BtreeAdd(tree, append, consumer.Next(3));
ASSERT_THAT(result, IsNode(1));
ASSERT_THAT(result, Ne(tree));
ASSERT_THAT(CordToString(result), Eq(consumer.Consumed()));
tree = result;
for (size_t i = max_cap + 1; i < max_cap * max_cap; ++i) {
refs.RefIf(shared(), tree);
result = BtreeAdd(tree, append, consumer.Next(3));
ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree)));
ASSERT_THAT(CordToString(result), Eq(consumer.Consumed()));
tree = result;
}
// Fill to maximum at two deep
refs.RefIf(shared(), tree);
result = BtreeAdd(tree, append, consumer.Next(3));
ASSERT_THAT(result, IsNode(2));
ASSERT_THAT(result, Ne(tree));
ASSERT_THAT(CordToString(result), Eq(consumer.Consumed()));
tree = result;
for (size_t i = max_cap * max_cap + 1; i < max_cap * max_cap * max_cap;
++i) {
refs.RefIf(shared(), tree);
result = BtreeAdd(tree, append, consumer.Next(3));
ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree)));
ASSERT_THAT(CordToString(result), Eq(consumer.Consumed()));
tree = result;
}
CordRep::Unref(tree);
}
}
TEST_P(CordRepBtreeTest, AddLargeDataToLeaf) {
const size_t max_cap = CordRepBtree::kMaxCapacity;
const size_t n = max_cap * max_cap * max_cap * 3 + 2;
const std::string data = CreateRandomString(n * kMaxFlatLength);
for (bool append : {true, false}) {
AutoUnref refs;
SCOPED_TRACE(append ? "Append" : "Prepend");
CordRepBtree* leaf = CordRepBtree::Create(MakeFlat("abc"));
refs.RefIf(shared(), leaf);
CordRepBtree* result = BtreeAdd(leaf, append, data);
EXPECT_THAT(CordToString(result), Eq(append ? "abc" + data : data + "abc"));
CordRep::Unref(result);
}
}
TEST_P(CordRepBtreeDualTest, CreateFromConcat) {
AutoUnref refs;
CordRep* flats[] = {MakeFlat("abcdefgh"), MakeFlat("ijklm"),
MakeFlat("nopqrstuv"), MakeFlat("wxyz")};
auto* left = MakeConcat(flats[0], flats[1]);
auto* right = MakeConcat(flats[2], refs.RefIf(first_shared(), flats[3]));
auto* concat = refs.RefIf(second_shared(), MakeConcat(left, right));
CordRepBtree* result = CordRepBtree::Create(concat);
ASSERT_TRUE(CordRepBtree::IsValid(result));
EXPECT_THAT(result->length, Eq(26));
EXPECT_THAT(CordToString(result), Eq("abcdefghijklmnopqrstuvwxyz"));
CordRep::Unref(result);
}
TEST_P(CordRepBtreeDualTest, AppendConcat) {
AutoUnref refs;
CordRep* flats[] = {MakeFlat("defgh"), MakeFlat("ijklm"),
MakeFlat("nopqrstuv"), MakeFlat("wxyz")};
auto* left = MakeConcat(flats[0], flats[1]);
auto* right = MakeConcat(flats[2], refs.RefIf(first_shared(), flats[3]));
auto* concat = refs.RefIf(second_shared(), MakeConcat(left, right));
CordRepBtree* result = CordRepBtree::Create(MakeFlat("abc"));
result = CordRepBtree::Append(result, concat);
ASSERT_TRUE(CordRepBtree::IsValid(result));
EXPECT_THAT(result->length, Eq(26));
EXPECT_THAT(CordToString(result), Eq("abcdefghijklmnopqrstuvwxyz"));
CordRep::Unref(result);
}
TEST_P(CordRepBtreeDualTest, PrependConcat) {
AutoUnref refs;
CordRep* flats[] = {MakeFlat("abcdefgh"), MakeFlat("ijklm"),
MakeFlat("nopqrstuv"), MakeFlat("wx")};
auto* left = MakeConcat(flats[0], flats[1]);
auto* right = MakeConcat(flats[2], refs.RefIf(first_shared(), flats[3]));
auto* concat = refs.RefIf(second_shared(), MakeConcat(left, right));
CordRepBtree* result = CordRepBtree::Create(MakeFlat("yz"));
result = CordRepBtree::Prepend(result, concat);
ASSERT_TRUE(CordRepBtree::IsValid(result));
EXPECT_THAT(result->length, Eq(26));
EXPECT_THAT(CordToString(result), Eq("abcdefghijklmnopqrstuvwxyz"));
CordRep::Unref(result);
}
TEST_P(CordRepBtreeTest, CreateFromTreeReturnsTree) {
AutoUnref refs;
CordRepBtree* leaf = CordRepBtree::Create(MakeFlat("Hello world"));
refs.RefIf(shared(), leaf);
CordRepBtree* result = CordRepBtree::Create(leaf);
EXPECT_THAT(result, Eq(leaf));
CordRep::Unref(result);
}
TEST_P(CordRepBtreeTest, ExceedMaxHeight) {
AutoUnref refs;
CordRepBtree* tree = MakeLeaf();
for (int h = 1; h <= CordRepBtree::kMaxHeight; ++h) {
CordRepBtree* newtree = CordRepBtree::New(tree);
for (size_t i = 1; i < CordRepBtree::kMaxCapacity; ++i) {
newtree = CordRepBtree::Append(newtree, CordRep::Ref(tree));
}
tree = newtree;
}
refs.RefIf(shared(), tree);
#if defined(GTEST_HAS_DEATH_TEST)
EXPECT_DEATH(tree = CordRepBtree::Append(tree, MakeFlat("Boom")), ".*");
#endif
CordRep::Unref(tree);
}
TEST(CordRepBtreeTest, GetCharacter) {
size_t n = CordRepBtree::kMaxCapacity * CordRepBtree::kMaxCapacity + 2;
std::string data = CreateRandomString(n * 3);
CordRepBtree* tree = CreateTree(data, 3);
// Add a substring node for good measure.
tree = tree->Append(tree, MakeSubstring(4, 5, MakeFlat("abcdefghijklm")));
data += "efghi";
for (size_t i = 0; i < data.length(); ++i) {
ASSERT_THAT(tree->GetCharacter(i), Eq(data[i]));
}
CordRep::Unref(tree);
}
TEST_P(CordRepBtreeTest, IsFlatSingleFlat) {
CordRepBtree* leaf = CordRepBtree::Create(MakeFlat("Hello world"));
absl::string_view fragment;
EXPECT_TRUE(leaf->IsFlat(nullptr));
EXPECT_TRUE(leaf->IsFlat(&fragment));
EXPECT_THAT(fragment, Eq("Hello world"));
fragment = "";
EXPECT_TRUE(leaf->IsFlat(0, 11, nullptr));
EXPECT_TRUE(leaf->IsFlat(0, 11, &fragment));
EXPECT_THAT(fragment, Eq("Hello world"));
// Arbitrary ranges must check true as well.
EXPECT_TRUE(leaf->IsFlat(1, 4, &fragment));
EXPECT_THAT(fragment, Eq("ello"));
EXPECT_TRUE(leaf->IsFlat(6, 5, &fragment));
EXPECT_THAT(fragment, Eq("world"));
CordRep::Unref(leaf);
}
TEST(CordRepBtreeTest, IsFlatMultiFlat) {
size_t n = CordRepBtree::kMaxCapacity * CordRepBtree::kMaxCapacity + 2;
std::string data = CreateRandomString(n * 3);
CordRepBtree* tree = CreateTree(data, 3);
// Add substring nodes for good measure.
tree = tree->Append(tree, MakeSubstring(4, 3, MakeFlat("abcdefghijklm")));
tree = tree->Append(tree, MakeSubstring(8, 3, MakeFlat("abcdefghijklm")));
data += "efgijk";
EXPECT_FALSE(tree->IsFlat(nullptr));
absl::string_view fragment = "Can't touch this";
EXPECT_FALSE(tree->IsFlat(&fragment));
EXPECT_THAT(fragment, Eq("Can't touch this"));
for (size_t offset = 0; offset < data.size(); offset += 3) {
EXPECT_TRUE(tree->IsFlat(offset, 3, nullptr));
EXPECT_TRUE(tree->IsFlat(offset, 3, &fragment));
EXPECT_THAT(fragment, Eq(data.substr(offset, 3)));
fragment = "Can't touch this";
if (offset > 0) {
EXPECT_FALSE(tree->IsFlat(offset - 1, 4, nullptr));
EXPECT_FALSE(tree->IsFlat(offset - 1, 4, &fragment));
EXPECT_THAT(fragment, Eq("Can't touch this"));
}
if (offset < data.size() - 4) {
EXPECT_FALSE(tree->IsFlat(offset, 4, nullptr));
EXPECT_FALSE(tree->IsFlat(offset, 4, &fragment));
EXPECT_THAT(fragment, Eq("Can't touch this"));
}
}
CordRep::Unref(tree);
}
#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG)
TEST_P(CordRepBtreeHeightTest, GetAppendBufferNotPrivate) {
CordRepBtree* tree = CordRepBtree::Create(MakeExternal("Foo"));
CordRepBtree::Ref(tree);
EXPECT_DEATH(tree->GetAppendBuffer(1), ".*");
CordRepBtree::Unref(tree);
CordRepBtree::Unref(tree);
}
#endif // defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG)
TEST_P(CordRepBtreeHeightTest, GetAppendBufferNotFlat) {
CordRepBtree* tree = CordRepBtree::Create(MakeExternal("Foo"));
for (int i = 1; i <= height(); ++i) {
tree = CordRepBtree::New(tree);
}
EXPECT_THAT(tree->GetAppendBuffer(1), SizeIs(0));
CordRepBtree::Unref(tree);
}
TEST_P(CordRepBtreeHeightTest, GetAppendBufferFlatNotPrivate) {
CordRepFlat* flat = MakeFlat("abc");
CordRepBtree* tree = CordRepBtree::Create(CordRep::Ref(flat));
for (int i = 1; i <= height(); ++i) {
tree = CordRepBtree::New(tree);
}
EXPECT_THAT(tree->GetAppendBuffer(1), SizeIs(0));
CordRepBtree::Unref(tree);
CordRep::Unref(flat);
}
TEST_P(CordRepBtreeHeightTest, GetAppendBufferTreeNotPrivate) {
if (height() == 0) return;
AutoUnref refs;
CordRepFlat* flat = MakeFlat("abc");
CordRepBtree* tree = CordRepBtree::Create(CordRep::Ref(flat));
for (int i = 1; i <= height(); ++i) {
if (i == (height() + 1) / 2) refs.Ref(tree);
tree = CordRepBtree::New(tree);
}
EXPECT_THAT(tree->GetAppendBuffer(1), SizeIs(0));
CordRepBtree::Unref(tree);
CordRep::Unref(flat);
}
TEST_P(CordRepBtreeHeightTest, GetAppendBufferFlatNoCapacity) {
CordRepFlat* flat = MakeFlat("abc");
flat->length = flat->Capacity();
CordRepBtree* tree = CordRepBtree::Create(flat);
for (int i = 1; i <= height(); ++i) {
tree = CordRepBtree::New(tree);
}
EXPECT_THAT(tree->GetAppendBuffer(1), SizeIs(0));
CordRepBtree::Unref(tree);
}
TEST_P(CordRepBtreeHeightTest, GetAppendBufferFlatWithCapacity) {
CordRepFlat* flat = MakeFlat("abc");
CordRepBtree* tree = CordRepBtree::Create(flat);
for (int i = 1; i <= height(); ++i) {
tree = CordRepBtree::New(tree);
}
absl::Span<char> span = tree->GetAppendBuffer(2);
EXPECT_THAT(span, SizeIs(2));
EXPECT_THAT(span.data(), TypedEq<void*>(flat->Data() + 3));
EXPECT_THAT(tree->length, Eq(5));
size_t avail = flat->Capacity() - 5;
span = tree->GetAppendBuffer(avail + 100);
EXPECT_THAT(span, SizeIs(avail));
EXPECT_THAT(span.data(), TypedEq<void*>(flat->Data() + 5));
EXPECT_THAT(tree->length, Eq(5 + avail));
CordRepBtree::Unref(tree);
}
TEST(CordRepBtreeTest, Dump) {
// Handles nullptr
std::stringstream ss;
CordRepBtree::Dump(nullptr, ss);
CordRepBtree::Dump(nullptr, "Once upon a label", ss);
CordRepBtree::Dump(nullptr, "Once upon a label", false, ss);
CordRepBtree::Dump(nullptr, "Once upon a label", true, ss);
// Cover legal edges
CordRepFlat* flat = MakeFlat("Hello world");
CordRepExternal* external = MakeExternal("Hello external");
CordRep* substr_flat = MakeSubstring(1, 6, CordRep::Ref(flat));
CordRep* substr_external = MakeSubstring(2, 7, CordRep::Ref(external));
// Build tree
CordRepBtree* tree = CordRepBtree::Create(flat);
tree = CordRepBtree::Append(tree, external);
tree = CordRepBtree::Append(tree, substr_flat);
tree = CordRepBtree::Append(tree, substr_external);
// Repeat until we have a tree
while (tree->height() == 0) {
tree = CordRepBtree::Append(tree, CordRep::Ref(flat));
tree = CordRepBtree::Append(tree, CordRep::Ref(external));
tree = CordRepBtree::Append(tree, CordRep::Ref(substr_flat));
tree = CordRepBtree::Append(tree, CordRep::Ref(substr_external));
}
for (int api = 0; api <= 3; ++api) {
absl::string_view api_scope;
std::stringstream ss;
switch (api) {
case 0:
api_scope = "Bare";
CordRepBtree::Dump(tree, ss);
break;
case 1:
api_scope = "Label only";
CordRepBtree::Dump(tree, "Once upon a label", ss);
break;
case 2:
api_scope = "Label no content";
CordRepBtree::Dump(tree, "Once upon a label", false, ss);
break;
default:
api_scope = "Label and content";
CordRepBtree::Dump(tree, "Once upon a label", true, ss);
break;
}
SCOPED_TRACE(api_scope);
std::string str = ss.str();
// Contains Node(depth) / Leaf and private / shared indicators
EXPECT_THAT(str, AllOf(HasSubstr("Node(1)"), HasSubstr("Leaf"),
HasSubstr("Private"), HasSubstr("Shared")));
// Contains length and start offset of all data edges
EXPECT_THAT(str, AllOf(HasSubstr("len = 11"), HasSubstr("len = 14"),
HasSubstr("len = 6"), HasSubstr("len = 7"),
HasSubstr("start = 1"), HasSubstr("start = 2")));
// Contains address of all data edges
EXPECT_THAT(
str, AllOf(HasSubstr(absl::StrCat("0x", absl::Hex(flat))),
HasSubstr(absl::StrCat("0x", absl::Hex(external))),
HasSubstr(absl::StrCat("0x", absl::Hex(substr_flat))),
HasSubstr(absl::StrCat("0x", absl::Hex(substr_external)))));
if (api != 0) {
// Contains label
EXPECT_THAT(str, HasSubstr("Once upon a label"));
}
if (api != 3) {
// Does not contain contents
EXPECT_THAT(str, Not(AnyOf((HasSubstr("data = \"Hello world\""),
HasSubstr("data = \"Hello external\""),
HasSubstr("data = \"ello w\""),
HasSubstr("data = \"llo ext\"")))));
} else {
// Contains contents
EXPECT_THAT(str, AllOf((HasSubstr("data = \"Hello world\""),
HasSubstr("data = \"Hello external\""),
HasSubstr("data = \"ello w\""),
HasSubstr("data = \"llo ext\""))));
}
}
CordRep::Unref(tree);
}
TEST(CordRepBtreeTest, IsValid) {
EXPECT_FALSE(CordRepBtree::IsValid(nullptr));
CordRepBtree* empty = CordRepBtree::New(0);
EXPECT_TRUE(CordRepBtree::IsValid(empty));
CordRep::Unref(empty);
for (bool as_tree : {false, true}) {
CordRepBtree* leaf = CordRepBtree::Create(MakeFlat("abc"));
CordRepBtree* tree = as_tree ? CordRepBtree::New(leaf) : nullptr;
CordRepBtree* check = as_tree ? tree : leaf;
ASSERT_TRUE(CordRepBtree::IsValid(check));
leaf->length--;
EXPECT_FALSE(CordRepBtree::IsValid(check));
leaf->length++;
ASSERT_TRUE(CordRepBtree::IsValid(check));
leaf->tag--;
EXPECT_FALSE(CordRepBtree::IsValid(check));
leaf->tag++;
// Height
ASSERT_TRUE(CordRepBtree::IsValid(check));
leaf->storage[0] = static_cast<uint8_t>(CordRepBtree::kMaxHeight + 1);
EXPECT_FALSE(CordRepBtree::IsValid(check));
leaf->storage[0] = 1;
EXPECT_FALSE(CordRepBtree::IsValid(check));
leaf->storage[0] = 0;
// Begin
ASSERT_TRUE(CordRepBtree::IsValid(check));
const uint8_t begin = leaf->storage[1];
leaf->storage[1] = static_cast<uint8_t>(CordRepBtree::kMaxCapacity);
EXPECT_FALSE(CordRepBtree::IsValid(check));
leaf->storage[1] = 2;
EXPECT_FALSE(CordRepBtree::IsValid(check));
leaf->storage[1] = begin;
// End
ASSERT_TRUE(CordRepBtree::IsValid(check));
const uint8_t end = leaf->storage[2];
leaf->storage[2] = static_cast<uint8_t>(CordRepBtree::kMaxCapacity + 1);
EXPECT_FALSE(CordRepBtree::IsValid(check));
leaf->storage[2] = end;
// DataEdge tag and value
ASSERT_TRUE(CordRepBtree::IsValid(check));
CordRep* const edge = leaf->Edges()[0];
const uint8_t tag = edge->tag;
CordRepBtreeTestPeer::SetEdge(leaf, begin, nullptr);
EXPECT_FALSE(CordRepBtree::IsValid(check));
CordRepBtreeTestPeer::SetEdge(leaf, begin, edge);
edge->tag = BTREE;
EXPECT_FALSE(CordRepBtree::IsValid(check));
edge->tag = tag;
if (as_tree) {
ASSERT_TRUE(CordRepBtree::IsValid(check));
leaf->length--;
EXPECT_FALSE(CordRepBtree::IsValid(check));
leaf->length++;
// Height
ASSERT_TRUE(CordRepBtree::IsValid(check));
tree->storage[0] = static_cast<uint8_t>(2);
EXPECT_FALSE(CordRepBtree::IsValid(check));
tree->storage[0] = 1;
// Btree edge
ASSERT_TRUE(CordRepBtree::IsValid(check));
CordRep* const edge = tree->Edges()[0];
const uint8_t tag = edge->tag;
edge->tag = FLAT;
EXPECT_FALSE(CordRepBtree::IsValid(check));
edge->tag = tag;
}
ASSERT_TRUE(CordRepBtree::IsValid(check));
CordRep::Unref(check);
}
}
TEST(CordRepBtreeTest, AssertValid) {
CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc"));
const CordRepBtree* ctree = tree;
EXPECT_THAT(CordRepBtree::AssertValid(tree), Eq(tree));
EXPECT_THAT(CordRepBtree::AssertValid(ctree), Eq(ctree));
#if defined(GTEST_HAS_DEATH_TEST)
CordRepBtree* nulltree = nullptr;
const CordRepBtree* cnulltree = nullptr;
EXPECT_DEBUG_DEATH(
EXPECT_THAT(CordRepBtree::AssertValid(nulltree), Eq(nulltree)), ".*");
EXPECT_DEBUG_DEATH(
EXPECT_THAT(CordRepBtree::AssertValid(cnulltree), Eq(cnulltree)), ".*");
tree->length--;
EXPECT_DEBUG_DEATH(EXPECT_THAT(CordRepBtree::AssertValid(tree), Eq(tree)),
".*");
EXPECT_DEBUG_DEATH(EXPECT_THAT(CordRepBtree::AssertValid(ctree), Eq(ctree)),
".*");
tree->length++;
#endif
CordRep::Unref(tree);
}
} // namespace
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_
#define ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_
#include <cassert>
#include <memory>
#include <random>
#include <string>
#include <vector>
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cordrep_testing {
inline cord_internal::CordRepSubstring* MakeSubstring(
size_t start, size_t len, cord_internal::CordRep* rep) {
auto* sub = new cord_internal::CordRepSubstring;
sub->tag = cord_internal::SUBSTRING;
sub->start = start;
sub->length = len <= 0 ? rep->length - start + len : len;
sub->child = rep;
return sub;
}
inline cord_internal::CordRepConcat* MakeConcat(cord_internal::CordRep* left,
cord_internal::CordRep* right,
int depth = 0) {
auto* concat = new cord_internal::CordRepConcat;
concat->tag = cord_internal::CONCAT;
concat->length = left->length + right->length;
concat->left = left;
concat->right = right;
concat->set_depth(depth);
return concat;
}
inline cord_internal::CordRepFlat* MakeFlat(absl::string_view value) {
assert(value.length() <= cord_internal::kMaxFlatLength);
auto* flat = cord_internal::CordRepFlat::New(value.length());
flat->length = value.length();
memcpy(flat->Data(), value.data(), value.length());
return flat;
}
// Creates an external node for testing
inline cord_internal::CordRepExternal* MakeExternal(absl::string_view s) {
struct Rep : public cord_internal::CordRepExternal {
std::string s;
explicit Rep(absl::string_view sv) : s(sv) {
this->tag = cord_internal::EXTERNAL;
this->base = s.data();
this->length = s.length();
this->releaser_invoker = [](cord_internal::CordRepExternal* self) {
delete static_cast<Rep*>(self);
};
}
};
return new Rep(s);
}
inline std::string CreateRandomString(size_t n) {
absl::string_view data =
"abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"0123456789~!@#$%^&*()_+=-<>?:\"{}[]|";
std::minstd_rand rnd;
std::uniform_int_distribution<size_t> dist(0, data.size() - 1);
std::string s(n, ' ');
for (size_t i = 0; i < n; ++i) {
s[i] = data[dist(rnd)];
}
return s;
}
// Creates an array of flats from the provided string, chopping
// the provided string up into flats of size `chunk_size` characters
// resulting in roughly `data.size() / chunk_size` total flats.
inline std::vector<cord_internal::CordRep*> CreateFlatsFromString(
absl::string_view data, size_t chunk_size) {
assert(chunk_size > 0);
std::vector<cord_internal::CordRep*> flats;
for (absl::string_view s = data; !s.empty(); s.remove_prefix(chunk_size)) {
flats.push_back(MakeFlat(s.substr(0, chunk_size)));
}
return flats;
}
inline cord_internal::CordRepBtree* CordRepBtreeFromFlats(
absl::Span<cord_internal::CordRep* const> flats) {
assert(!flats.empty());
auto* node = cord_internal::CordRepBtree::Create(flats[0]);
for (size_t i = 1; i < flats.size(); ++i) {
node = cord_internal::CordRepBtree::Append(node, flats[i]);
}
return node;
}
inline void CordToString(cord_internal::CordRep* rep, std::string& s) {
size_t offset = 0;
size_t length = rep->length;
while (rep->tag == cord_internal::SUBSTRING) {
offset += rep->substring()->start;
rep = rep->substring()->child;
}
if (rep->tag == cord_internal::BTREE) {
for (cord_internal::CordRep* edge : rep->btree()->Edges()) {
CordToString(edge, s);
}
} else if (rep->tag >= cord_internal::FLAT) {
s.append(rep->flat()->Data() + offset, length);
} else if (rep->tag == cord_internal::EXTERNAL) {
s.append(rep->external()->base + offset, length);
} else {
ABSL_RAW_LOG(FATAL, "Unsupported tag %d", rep->tag);
}
}
inline std::string CordToString(cord_internal::CordRep* rep) {
std::string s;
s.reserve(rep->length);
CordToString(rep, s);
return s;
}
// RAII Helper class to automatically unref reps on destruction.
class AutoUnref {
public:
~AutoUnref() {
for (CordRep* rep : unrefs_) CordRep::Unref(rep);
}
// Adds `rep` to the list of reps to be unreffed at destruction.
template <typename CordRepType>
CordRepType* Add(CordRepType* rep) {
unrefs_.push_back(rep);
return rep;
}
// Increments the reference count of `rep` by one, and adds it to
// the list of reps to be unreffed at destruction.
template <typename CordRepType>
CordRepType* Ref(CordRepType* rep) {
unrefs_.push_back(CordRep::Ref(rep));
return rep;
}
// Increments the reference count of `rep` by one if `condition` is true,
// and adds it to the list of reps to be unreffed at destruction.
template <typename CordRepType>
CordRepType* RefIf(bool condition, CordRepType* rep) {
if (condition) unrefs_.push_back(CordRep::Ref(rep));
return rep;
}
private:
using CordRep = absl::cord_internal::CordRep;
std::vector<CordRep*> unrefs_;
};
} // namespace cordrep_testing
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_
......@@ -14,7 +14,7 @@
# The commit of GoogleTest to be used in the CMake tests in this directory.
# Keep this in sync with the commit in the WORKSPACE file.
readonly ABSL_GOOGLETEST_COMMIT="5bcd8e3bb929714e031a542d303f818e5a5af45d"
readonly ABSL_GOOGLETEST_COMMIT="8d51ffdfab10b3fba636ae69bc03da4b54f8c235"
# Avoid depending on GitHub by looking for a cached copy of the commit first.
if [[ -r "${KOKORO_GFILE_DIR:-}/distdir/${ABSL_GOOGLETEST_COMMIT}.zip" ]]; then
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment