Commit 53fbcb88 by Abseil Team Committed by Copybara-Service

Introduce a kTotalMorePrecise accounting mode for Cord::EstimatedMemoryUsage().…

Introduce a kTotalMorePrecise accounting mode for Cord::EstimatedMemoryUsage(). This mode avoids double-counting blocks that a Cord references more than once; otherwise it is similar to the existing kTotal mode.

There's no change to the existing kTotal or kFairShare accounting modes.

PiperOrigin-RevId: 544378591
Change-Id: I7b4ae55cd93d631194e59a9cd0ff07f47611219e
parent bde85071
......@@ -110,9 +110,30 @@ enum class CordMemoryAccounting {
// Counts the *approximate* number of bytes held in full or in part by this
// Cord (which may not remain the same between invocations). Cords that share
// memory could each be "charged" independently for the same shared memory.
// See also comment on `kTotalMorePrecise` on internally shared memory.
kTotal,
// Counts the *approximate* number of bytes held in full or in part by this
// Cord for the distinct memory held by this cord. This option is similar
// to `kTotal`, except that if the cord has multiple references to the same
// memory, that memory is only counted once.
//
// For example:
// absl::Cord cord;
// cord.append(some_other_cord);
// cord.append(some_other_cord);
// // Counts `some_other_cord` twice:
// cord.EstimatedMemoryUsage(kTotal);
// // Counts `some_other_cord` once:
// cord.EstimatedMemoryUsage(kTotalMorePrecise);
//
// The `kTotalMorePrecise` number is more expensive to compute as it requires
// deduplicating all memory references. Applications should prefer to use
// `kFairShare` or `kTotal` unless they really need a more precise estimate
// on "how much memory is potentially held / kept alive by this cord?"
kTotalMorePrecise,
// Counts the *approximate* number of bytes held in full or in part by this
// Cord weighted by the sharing ratio of that data. For example, if some data
// edge is shared by 4 different Cords, then each cord is attributed 1/4th of
// the total memory usage as a 'fair share' of the total memory usage.
......@@ -1273,10 +1294,16 @@ inline size_t Cord::EstimatedMemoryUsage(
CordMemoryAccounting accounting_method) const {
size_t result = sizeof(Cord);
if (const absl::cord_internal::CordRep* rep = contents_.tree()) {
if (accounting_method == CordMemoryAccounting::kFairShare) {
result += cord_internal::GetEstimatedFairShareMemoryUsage(rep);
} else {
result += cord_internal::GetEstimatedMemoryUsage(rep);
switch (accounting_method) {
case CordMemoryAccounting::kFairShare:
result += cord_internal::GetEstimatedFairShareMemoryUsage(rep);
break;
case CordMemoryAccounting::kTotalMorePrecise:
result += cord_internal::GetMorePreciseMemoryUsage(rep);
break;
case CordMemoryAccounting::kTotal:
result += cord_internal::GetEstimatedMemoryUsage(rep);
break;
}
}
return result;
......
......@@ -16,6 +16,7 @@
#include <cstddef>
#include <cstdint>
#include <unordered_set>
#include "absl/base/attributes.h"
#include "absl/base/config.h"
......@@ -37,7 +38,7 @@ namespace cord_internal {
namespace {
// Accounting mode for analyzing memory usage.
enum class Mode { kTotal, kFairShare };
enum class Mode { kFairShare, kTotal, kTotalMorePrecise };
// CordRepRef holds a `const CordRep*` reference in rep, and depending on mode,
// holds a 'fraction' representing a cumulative inverse refcount weight.
......@@ -62,6 +63,23 @@ struct RawUsage {
void Add(size_t size, CordRepRef<mode>) { total += size; }
};
// Overloaded representation of RawUsage that tracks the set of objects
// counted, and avoids double-counting objects referenced more than once
// by the same Cord.
template <>
struct RawUsage<Mode::kTotalMorePrecise> {
size_t total = 0;
// TODO(b/289250880): Replace this with a flat_hash_set.
std::unordered_set<const CordRep*> counted;
void Add(size_t size, CordRepRef<Mode::kTotalMorePrecise> repref) {
if (counted.find(repref.rep) == counted.end()) {
counted.insert(repref.rep);
total += size;
}
}
};
// Returns n / refcount avoiding a div for the common refcount == 1.
template <typename refcount_t>
double MaybeDiv(double d, refcount_t refcount) {
......@@ -183,6 +201,10 @@ size_t GetEstimatedFairShareMemoryUsage(const CordRep* rep) {
return GetEstimatedUsage<Mode::kFairShare>(rep);
}
size_t GetMorePreciseMemoryUsage(const CordRep* rep) {
return GetEstimatedUsage<Mode::kTotalMorePrecise>(rep);
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
......@@ -31,6 +31,24 @@ namespace cord_internal {
size_t GetEstimatedMemoryUsage(const CordRep* rep);
// Returns the *approximate* number of bytes held in full or in part by this
// Cord for the distinct memory held by this cord. This is similar to
// `GetEstimatedMemoryUsage()`, except that if the cord has multiple references
// to the same memory, that memory is only counted once.
//
// For example:
// absl::Cord cord;
// cord.append(some_other_cord);
// cord.append(some_other_cord);
// // Calls GetEstimatedMemoryUsage() and counts `other_cord` twice:
// cord.EstimatedMemoryUsage(kTotal);
// // Calls GetMorePreciseMemoryUsage() and counts `other_cord` once:
// cord.EstimatedMemoryUsage(kTotalMorePrecise);
//
// This is more expensive than `GetEstimatedMemoryUsage()` as it requires
// deduplicating all memory references.
size_t GetMorePreciseMemoryUsage(const CordRep* rep);
// Returns the *approximate* number of bytes held in full or in part by this
// CordRep weighted by the sharing ratio of that data. For example, if some data
// edge is shared by 4 different Cords, then each cord is attribute 1/4th of
// the total memory usage as a 'fair share' of the total memory usage.
......
......@@ -1765,6 +1765,8 @@ TEST_P(CordTest, ExternalMemoryGet) {
// of empty and inlined cords, and flat nodes.
constexpr auto kFairShare = absl::CordMemoryAccounting::kFairShare;
constexpr auto kTotalMorePrecise =
absl::CordMemoryAccounting::kTotalMorePrecise;
// Creates a cord of `n` `c` values, making sure no string stealing occurs.
absl::Cord MakeCord(size_t n, char c) {
......@@ -1776,12 +1778,14 @@ TEST(CordTest, CordMemoryUsageEmpty) {
absl::Cord cord;
EXPECT_EQ(sizeof(absl::Cord), cord.EstimatedMemoryUsage());
EXPECT_EQ(sizeof(absl::Cord), cord.EstimatedMemoryUsage(kFairShare));
EXPECT_EQ(sizeof(absl::Cord), cord.EstimatedMemoryUsage(kTotalMorePrecise));
}
TEST(CordTest, CordMemoryUsageInlined) {
absl::Cord a("hello");
EXPECT_EQ(a.EstimatedMemoryUsage(), sizeof(absl::Cord));
EXPECT_EQ(a.EstimatedMemoryUsage(kFairShare), sizeof(absl::Cord));
EXPECT_EQ(a.EstimatedMemoryUsage(kTotalMorePrecise), sizeof(absl::Cord));
}
TEST(CordTest, CordMemoryUsageExternalMemory) {
......@@ -1791,6 +1795,7 @@ TEST(CordTest, CordMemoryUsageExternalMemory) {
sizeof(absl::Cord) + 1000 + sizeof(CordRepExternal) + sizeof(intptr_t);
EXPECT_EQ(cord.EstimatedMemoryUsage(), expected);
EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare), expected);
EXPECT_EQ(cord.EstimatedMemoryUsage(kTotalMorePrecise), expected);
}
TEST(CordTest, CordMemoryUsageFlat) {
......@@ -1800,6 +1805,8 @@ TEST(CordTest, CordMemoryUsageFlat) {
EXPECT_EQ(cord.EstimatedMemoryUsage(), sizeof(absl::Cord) + flat_size);
EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare),
sizeof(absl::Cord) + flat_size);
EXPECT_EQ(cord.EstimatedMemoryUsage(kTotalMorePrecise),
sizeof(absl::Cord) + flat_size);
}
TEST(CordTest, CordMemoryUsageSubStringSharedFlat) {
......@@ -1809,6 +1816,8 @@ TEST(CordTest, CordMemoryUsageSubStringSharedFlat) {
absl::Cord cord = flat.Subcord(500, 1000);
EXPECT_EQ(cord.EstimatedMemoryUsage(),
sizeof(absl::Cord) + sizeof(CordRepSubstring) + flat_size);
EXPECT_EQ(cord.EstimatedMemoryUsage(kTotalMorePrecise),
sizeof(absl::Cord) + sizeof(CordRepSubstring) + flat_size);
EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare),
sizeof(absl::Cord) + sizeof(CordRepSubstring) + flat_size / 2);
}
......@@ -1819,6 +1828,8 @@ TEST(CordTest, CordMemoryUsageFlatShared) {
const size_t flat_size =
absl::CordTestPeer::Tree(cord)->flat()->AllocatedSize();
EXPECT_EQ(cord.EstimatedMemoryUsage(), sizeof(absl::Cord) + flat_size);
EXPECT_EQ(cord.EstimatedMemoryUsage(kTotalMorePrecise),
sizeof(absl::Cord) + flat_size);
EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare),
sizeof(absl::Cord) + flat_size / 2);
}
......@@ -1837,6 +1848,8 @@ TEST(CordTest, CordMemoryUsageFlatHardenedAndShared) {
absl::Cord cord2(cord);
EXPECT_EQ(cord2.EstimatedMemoryUsage(),
sizeof(absl::Cord) + sizeof(CordRepCrc) + flat_size);
EXPECT_EQ(cord2.EstimatedMemoryUsage(kTotalMorePrecise),
sizeof(absl::Cord) + sizeof(CordRepCrc) + flat_size);
EXPECT_EQ(cord2.EstimatedMemoryUsage(kFairShare),
sizeof(absl::Cord) + (sizeof(CordRepCrc) + flat_size / 2) / 2);
}
......@@ -1863,6 +1876,8 @@ TEST(CordTest, CordMemoryUsageBTree) {
size_t rep1_shared_size = sizeof(CordRepBtree) + flats1_size / 2;
EXPECT_EQ(cord1.EstimatedMemoryUsage(), sizeof(absl::Cord) + rep1_size);
EXPECT_EQ(cord1.EstimatedMemoryUsage(kTotalMorePrecise),
sizeof(absl::Cord) + rep1_size);
EXPECT_EQ(cord1.EstimatedMemoryUsage(kFairShare),
sizeof(absl::Cord) + rep1_shared_size);
......@@ -1877,6 +1892,8 @@ TEST(CordTest, CordMemoryUsageBTree) {
size_t rep2_size = sizeof(CordRepBtree) + flats2_size;
EXPECT_EQ(cord2.EstimatedMemoryUsage(), sizeof(absl::Cord) + rep2_size);
EXPECT_EQ(cord2.EstimatedMemoryUsage(kTotalMorePrecise),
sizeof(absl::Cord) + rep2_size);
EXPECT_EQ(cord2.EstimatedMemoryUsage(kFairShare),
sizeof(absl::Cord) + rep2_size);
......@@ -1885,6 +1902,8 @@ TEST(CordTest, CordMemoryUsageBTree) {
EXPECT_EQ(cord.EstimatedMemoryUsage(),
sizeof(absl::Cord) + sizeof(CordRepBtree) + rep1_size + rep2_size);
EXPECT_EQ(cord.EstimatedMemoryUsage(kTotalMorePrecise),
sizeof(absl::Cord) + sizeof(CordRepBtree) + rep1_size + rep2_size);
EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare),
sizeof(absl::Cord) + sizeof(CordRepBtree) + rep1_shared_size / 2 +
rep2_size);
......@@ -1903,6 +1922,66 @@ TEST_P(CordTest, CordMemoryUsageInlineRep) {
EXPECT_EQ(c1.EstimatedMemoryUsage(), c2.EstimatedMemoryUsage());
}
TEST_P(CordTest, CordMemoryUsageTotalMorePreciseMode) {
constexpr size_t kChunkSize = 2000;
std::string tmp_str(kChunkSize, 'x');
const absl::Cord flat(std::move(tmp_str));
// Construct `fragmented` with two references into the same
// underlying buffer shared with `flat`:
absl::Cord fragmented(flat);
fragmented.Append(flat);
// Memory usage of `flat`, minus the top-level Cord object:
const size_t flat_internal_usage =
flat.EstimatedMemoryUsage() - sizeof(absl::Cord);
// `fragmented` holds a Cord and a CordRepBtree. That tree points to two
// copies of flat's internals, which we expect to dedup:
EXPECT_EQ(fragmented.EstimatedMemoryUsage(kTotalMorePrecise),
sizeof(absl::Cord) +
sizeof(CordRepBtree) +
flat_internal_usage);
// This is a case where kTotal produces an overestimate:
EXPECT_EQ(fragmented.EstimatedMemoryUsage(),
sizeof(absl::Cord) +
sizeof(CordRepBtree) +
2 * flat_internal_usage);
}
TEST_P(CordTest, CordMemoryUsageTotalMorePreciseModeWithSubstring) {
constexpr size_t kChunkSize = 2000;
std::string tmp_str(kChunkSize, 'x');
const absl::Cord flat(std::move(tmp_str));
// Construct `fragmented` with two references into the same
// underlying buffer shared with `flat`.
//
// This time, each reference is through a Subcord():
absl::Cord fragmented;
fragmented.Append(flat.Subcord(1, kChunkSize - 2));
fragmented.Append(flat.Subcord(1, kChunkSize - 2));
// Memory usage of `flat`, minus the top-level Cord object:
const size_t flat_internal_usage =
flat.EstimatedMemoryUsage() - sizeof(absl::Cord);
// `fragmented` holds a Cord and a CordRepBtree. That tree points to two
// CordRepSubstrings, each pointing at flat's internals.
EXPECT_EQ(fragmented.EstimatedMemoryUsage(kTotalMorePrecise),
sizeof(absl::Cord) +
sizeof(CordRepBtree) +
2 * sizeof(CordRepSubstring) +
flat_internal_usage);
// This is a case where kTotal produces an overestimate:
EXPECT_EQ(fragmented.EstimatedMemoryUsage(),
sizeof(absl::Cord) +
sizeof(CordRepBtree) +
2 * sizeof(CordRepSubstring) +
2 * flat_internal_usage);
}
} // namespace
// Regtest for 7510292 (fix a bug introduced by 7465150)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment