Commit d5a2cec0 by Abseil Team Committed by Copybara-Service

Optimize integer-to-string conversions

The updated code is designed to:
- Be branch-predictor-friendly
- Be cache-friendly
- Minimize the lengths of critical paths
- Minimize slow operations (particularly multiplications)
- Minimize binary/codegen bloat

The most notable performance trick here is perhaps the precomputation & caching of the number of digits, so that we can reuse/exploit it when writing the output.

This precomputation of the exact length enables 2 further performance benefits:
- It makes `StrCat` and `StrAppend` zero-copy when only integers are passed, by avoiding intermediate `AlphaNum` entirely in those cases. If needed in the future, we can probably also make many other mixtures of non-integer types zero-copy as well.
- It avoids over-reservation of the string buffer, allowing for more strings to fit inside SSO, which will likely have further performance benefits.

There is also a side benefit of preventing `FastIntToBuffer` from writing beyond the end of the buffer, which has caused buffer overflows in the past.

The new code continues to use & extend some of the existing core tricks (such as the division-by-100 trick), as those are already efficient.

PiperOrigin-RevId: 595785531
Change-Id: Id6920e7e038fec10b2c45f213de75dc7e2cbddd1
parent ccf0c773
...@@ -138,16 +138,4 @@ ABSL_NAMESPACE_END ...@@ -138,16 +138,4 @@ ABSL_NAMESPACE_END
#define ABSL_INTERNAL_RETHROW do {} while (false) #define ABSL_INTERNAL_RETHROW do {} while (false)
#endif // ABSL_HAVE_EXCEPTIONS #endif // ABSL_HAVE_EXCEPTIONS
// Requires the compiler to prove that the size of the given object is at least
// the expected amount.
#if ABSL_HAVE_ATTRIBUTE(diagnose_if) && ABSL_HAVE_BUILTIN(__builtin_object_size)
#define ABSL_INTERNAL_NEED_MIN_SIZE(Obj, N) \
__attribute__((diagnose_if(__builtin_object_size(Obj, 0) < N, \
"object size provably too small " \
"(this would corrupt memory)", \
"error")))
#else
#define ABSL_INTERNAL_NEED_MIN_SIZE(Obj, N)
#endif
#endif // ABSL_BASE_MACROS_H_ #endif // ABSL_BASE_MACROS_H_
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#endif #endif
#include <cstddef> #include <cstddef>
#include <cstdint>
#include <cstdlib> #include <cstdlib>
#include <cstring> #include <cstring>
#include <ctime> #include <ctime>
...@@ -39,10 +40,12 @@ ...@@ -39,10 +40,12 @@
#include <string> #include <string>
#include <type_traits> #include <type_traits>
#include "absl/base/attributes.h"
#include "absl/base/config.h" #include "absl/base/config.h"
#include "absl/base/internal/endian.h" #include "absl/base/internal/endian.h"
#include "absl/base/macros.h" #include "absl/base/macros.h"
#include "absl/base/nullability.h" #include "absl/base/nullability.h"
#include "absl/base/optimization.h"
#include "absl/base/port.h" #include "absl/base/port.h"
#include "absl/numeric/bits.h" #include "absl/numeric/bits.h"
#include "absl/numeric/int128.h" #include "absl/numeric/int128.h"
...@@ -158,6 +161,96 @@ bool safe_strtou128_base(absl::string_view text, ...@@ -158,6 +161,96 @@ bool safe_strtou128_base(absl::string_view text,
static const int kFastToBufferSize = 32; static const int kFastToBufferSize = 32;
static const int kSixDigitsToBufferSize = 16; static const int kSixDigitsToBufferSize = 16;
template <class T>
std::enable_if_t<!std::is_unsigned<T>::value, bool> IsNegative(const T& v) {
return v < T();
}
template <class T>
std::enable_if_t<std::is_unsigned<T>::value, std::false_type> IsNegative(
const T&) {
// The integer is unsigned, so return a compile-time constant.
// This can help the optimizer avoid having to prove bool to be false later.
return std::false_type();
}
template <class T>
std::enable_if_t<std::is_unsigned<std::decay_t<T>>::value, T&&>
UnsignedAbsoluteValue(T&& v ABSL_ATTRIBUTE_LIFETIME_BOUND) {
// The value is unsigned; just return the original.
return std::forward<T>(v);
}
template <class T>
ABSL_ATTRIBUTE_CONST_FUNCTION
std::enable_if_t<!std::is_unsigned<T>::value, std::make_unsigned_t<T>>
UnsignedAbsoluteValue(T v) {
using U = std::make_unsigned_t<T>;
return IsNegative(v) ? U() - static_cast<U>(v) : static_cast<U>(v);
}
// Returns the number of base-10 digits in the given number.
// Note that this strictly counts digits. It does not count the sign.
// The `initial_digits` parameter is the starting point, which is normally equal
// to 1 because the number of digits in 0 is 1 (a special case).
// However, callers may e.g. wish to change it to 2 to account for the sign.
template <typename T>
std::enable_if_t<std::is_unsigned<T>::value, uint32_t> Base10Digits(
T v, const uint32_t initial_digits = 1) {
uint32_t r = initial_digits;
// If code size becomes an issue, the 'if' stage can be removed for a minor
// performance loss.
for (;;) {
if (ABSL_PREDICT_TRUE(v < 10 * 10)) {
r += (v >= 10);
break;
}
if (ABSL_PREDICT_TRUE(v < 1000 * 10)) {
r += (v >= 1000) + 2;
break;
}
if (ABSL_PREDICT_TRUE(v < 100000 * 10)) {
r += (v >= 100000) + 4;
break;
}
r += 6;
v = static_cast<T>(v / 1000000);
}
return r;
}
template <typename T>
std::enable_if_t<std::is_signed<T>::value, uint32_t> Base10Digits(
T v, uint32_t r = 1) {
// Branchlessly add 1 to account for a minus sign.
r += static_cast<uint32_t>(IsNegative(v));
return Base10Digits(UnsignedAbsoluteValue(v), r);
}
// These functions return the number of base-10 digits, but multiplied by -1 if
// the input itself is negative. This is handy and efficient for later usage,
// since the bitwise complement of the result becomes equal to the number of
// characters required.
ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
signed char v);
ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
unsigned char v);
ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
short v); // NOLINT
ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
unsigned short v); // NOLINT
ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(int v);
ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
unsigned int v);
ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
long v); // NOLINT
ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
unsigned long v); // NOLINT
ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
long long v); // NOLINT
ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
unsigned long long v); // NOLINT
// Helper function for fast formatting of floating-point values. // Helper function for fast formatting of floating-point values.
// The result is the same as printf's "%g", a.k.a. "%.6g"; that is, six // The result is the same as printf's "%g", a.k.a. "%.6g"; that is, six
// significant digits are returned, trailing zeros are removed, and numbers // significant digits are returned, trailing zeros are removed, and numbers
...@@ -166,24 +259,18 @@ static const int kSixDigitsToBufferSize = 16; ...@@ -166,24 +259,18 @@ static const int kSixDigitsToBufferSize = 16;
// Required buffer size is `kSixDigitsToBufferSize`. // Required buffer size is `kSixDigitsToBufferSize`.
size_t SixDigitsToBuffer(double d, absl::Nonnull<char*> buffer); size_t SixDigitsToBuffer(double d, absl::Nonnull<char*> buffer);
// WARNING: These functions may write more characters than necessary, because // All of these functions take an output buffer
// they are intended for speed. All functions take an output buffer
// as an argument and return a pointer to the last byte they wrote, which is the // as an argument and return a pointer to the last byte they wrote, which is the
// terminating '\0'. At most `kFastToBufferSize` bytes are written. // terminating '\0'. At most `kFastToBufferSize` bytes are written.
absl::Nonnull<char*> FastIntToBuffer(int32_t i, absl::Nonnull<char*> buffer) absl::Nonnull<char*> FastIntToBuffer(int32_t i, absl::Nonnull<char*> buffer);
ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize); absl::Nonnull<char*> FastIntToBuffer(uint32_t i, absl::Nonnull<char*> buffer);
absl::Nonnull<char*> FastIntToBuffer(uint32_t i, absl::Nonnull<char*> buffer) absl::Nonnull<char*> FastIntToBuffer(int64_t i, absl::Nonnull<char*> buffer);
ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize); absl::Nonnull<char*> FastIntToBuffer(uint64_t i, absl::Nonnull<char*> buffer);
absl::Nonnull<char*> FastIntToBuffer(int64_t i, absl::Nonnull<char*> buffer)
ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize);
absl::Nonnull<char*> FastIntToBuffer(uint64_t i, absl::Nonnull<char*> buffer)
ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize);
// For enums and integer types that are not an exact match for the types above, // For enums and integer types that are not an exact match for the types above,
// use templates to call the appropriate one of the four overloads above. // use templates to call the appropriate one of the four overloads above.
template <typename int_type> template <typename int_type>
absl::Nonnull<char*> FastIntToBuffer(int_type i, absl::Nonnull<char*> buffer) absl::Nonnull<char*> FastIntToBuffer(int_type i, absl::Nonnull<char*> buffer) {
ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize) {
static_assert(sizeof(i) <= 64 / 8, static_assert(sizeof(i) <= 64 / 8,
"FastIntToBuffer works only with 64-bit-or-less integers."); "FastIntToBuffer works only with 64-bit-or-less integers.");
// TODO(jorg): This signed-ness check is used because it works correctly // TODO(jorg): This signed-ness check is used because it works correctly
...@@ -207,6 +294,58 @@ absl::Nonnull<char*> FastIntToBuffer(int_type i, absl::Nonnull<char*> buffer) ...@@ -207,6 +294,58 @@ absl::Nonnull<char*> FastIntToBuffer(int_type i, absl::Nonnull<char*> buffer)
} }
} }
// These functions do NOT add any null-terminator.
// They return a pointer to the beginning of the written string.
// The digit counts provided must *exactly* match the number of base-10 digits
// in the number, or the behavior is undefined.
// (i.e. do NOT count the minus sign, or over- or under-count the digits.)
absl::Nonnull<char*> FastIntToBufferBackward(int32_t i,
absl::Nonnull<char*> buffer_end,
uint32_t exact_digit_count);
absl::Nonnull<char*> FastIntToBufferBackward(uint32_t i,
absl::Nonnull<char*> buffer_end,
uint32_t exact_digit_count);
absl::Nonnull<char*> FastIntToBufferBackward(int64_t i,
absl::Nonnull<char*> buffer_end,
uint32_t exact_digit_count);
absl::Nonnull<char*> FastIntToBufferBackward(uint64_t i,
absl::Nonnull<char*> buffer_end,
uint32_t exact_digit_count);
// For enums and integer types that are not an exact match for the types above,
// use templates to call the appropriate one of the four overloads above.
template <typename int_type>
absl::Nonnull<char*> FastIntToBufferBackward(int_type i,
absl::Nonnull<char*> buffer_end,
uint32_t exact_digit_count) {
static_assert(
sizeof(i) <= 64 / 8,
"FastIntToBufferBackward works only with 64-bit-or-less integers.");
// This signed-ness check is used because it works correctly
// with enums, and it also serves to check that int_type is not a pointer.
// If one day something like std::is_signed<enum E> works, switch to it.
// These conditions are constexpr bools to suppress MSVC warning C4127.
constexpr bool kIsSigned = static_cast<int_type>(1) - 2 < 0;
constexpr bool kUse64Bit = sizeof(i) > 32 / 8;
if (kIsSigned) {
if (kUse64Bit) {
return FastIntToBufferBackward(static_cast<int64_t>(i), buffer_end,
exact_digit_count);
} else {
return FastIntToBufferBackward(static_cast<int32_t>(i), buffer_end,
exact_digit_count);
}
} else {
if (kUse64Bit) {
return FastIntToBufferBackward(static_cast<uint64_t>(i), buffer_end,
exact_digit_count);
} else {
return FastIntToBufferBackward(static_cast<uint32_t>(i), buffer_end,
exact_digit_count);
}
}
}
// Implementation of SimpleAtoi, generalized to support arbitrary base (used // Implementation of SimpleAtoi, generalized to support arbitrary base (used
// with base different from 10 elsewhere in Abseil implementation). // with base different from 10 elsewhere in Abseil implementation).
template <typename int_type> template <typename int_type>
......
...@@ -231,10 +231,15 @@ TEST(Numbers, TestFastPrints) { ...@@ -231,10 +231,15 @@ TEST(Numbers, TestFastPrints) {
CheckInt32(INT_MIN); CheckInt32(INT_MIN);
CheckInt32(INT_MAX); CheckInt32(INT_MAX);
CheckInt64(LONG_MIN); CheckInt64(LONG_MIN);
CheckInt64(uint64_t{10000000});
CheckInt64(uint64_t{100000000});
CheckInt64(uint64_t{1000000000}); CheckInt64(uint64_t{1000000000});
CheckInt64(uint64_t{9999999999}); CheckInt64(uint64_t{9999999999});
CheckInt64(uint64_t{100000000000000}); CheckInt64(uint64_t{100000000000000});
CheckInt64(uint64_t{999999999999999}); CheckInt64(uint64_t{999999999999999});
CheckInt64(uint64_t{1000000000000000});
CheckInt64(uint64_t{10000000000000000});
CheckInt64(uint64_t{100000000000000000});
CheckInt64(uint64_t{1000000000000000000}); CheckInt64(uint64_t{1000000000000000000});
CheckInt64(uint64_t{1199999999999999999}); CheckInt64(uint64_t{1199999999999999999});
CheckInt64(int64_t{-700000000000000000}); CheckInt64(int64_t{-700000000000000000});
...@@ -246,6 +251,8 @@ TEST(Numbers, TestFastPrints) { ...@@ -246,6 +251,8 @@ TEST(Numbers, TestFastPrints) {
CheckUInt64(uint64_t{999999999999999}); CheckUInt64(uint64_t{999999999999999});
CheckUInt64(uint64_t{1000000000000000000}); CheckUInt64(uint64_t{1000000000000000000});
CheckUInt64(uint64_t{1199999999999999999}); CheckUInt64(uint64_t{1199999999999999999});
CheckUInt64(uint64_t{10000000000000000000u});
CheckUInt64(uint64_t{10200300040000500006u});
CheckUInt64(std::numeric_limits<uint64_t>::max()); CheckUInt64(std::numeric_limits<uint64_t>::max());
for (int i = 0; i < 10000; i++) { for (int i = 0; i < 10000; i++) {
......
...@@ -21,10 +21,12 @@ ...@@ -21,10 +21,12 @@
#include <cstring> #include <cstring>
#include <initializer_list> #include <initializer_list>
#include <string> #include <string>
#include <type_traits>
#include "absl/base/config.h" #include "absl/base/config.h"
#include "absl/base/nullability.h" #include "absl/base/nullability.h"
#include "absl/strings/internal/resize_uninitialized.h" #include "absl/strings/internal/resize_uninitialized.h"
#include "absl/strings/numbers.h"
#include "absl/strings/string_view.h" #include "absl/strings/string_view.h"
namespace absl { namespace absl {
...@@ -41,8 +43,7 @@ ABSL_NAMESPACE_BEGIN ...@@ -41,8 +43,7 @@ ABSL_NAMESPACE_BEGIN
namespace { namespace {
// Append is merely a version of memcpy that returns the address of the byte // Append is merely a version of memcpy that returns the address of the byte
// after the area just overwritten. // after the area just overwritten.
inline absl::Nonnull<char*> Append(absl::Nonnull<char*> out, absl::Nonnull<char*> Append(absl::Nonnull<char*> out, const AlphaNum& x) {
const AlphaNum& x) {
// memcpy is allowed to overwrite arbitrary memory, so doing this after the // memcpy is allowed to overwrite arbitrary memory, so doing this after the
// call would force an extra fetch of x.size(). // call would force an extra fetch of x.size().
char* after = out + x.size(); char* after = out + x.size();
...@@ -52,11 +53,6 @@ inline absl::Nonnull<char*> Append(absl::Nonnull<char*> out, ...@@ -52,11 +53,6 @@ inline absl::Nonnull<char*> Append(absl::Nonnull<char*> out,
return after; return after;
} }
inline void STLStringAppendUninitializedAmortized(std::string* dest,
size_t to_append) {
strings_internal::AppendUninitializedTraits<std::string>::Append(dest,
to_append);
}
} // namespace } // namespace
std::string StrCat(const AlphaNum& a, const AlphaNum& b) { std::string StrCat(const AlphaNum& a, const AlphaNum& b) {
...@@ -102,6 +98,130 @@ std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, ...@@ -102,6 +98,130 @@ std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c,
namespace strings_internal { namespace strings_internal {
// Do not call directly - these are not part of the public API. // Do not call directly - these are not part of the public API.
void STLStringAppendUninitializedAmortized(std::string* dest,
size_t to_append) {
strings_internal::AppendUninitializedTraits<std::string>::Append(dest,
to_append);
}
template <typename Integer>
std::enable_if_t<std::is_integral<Integer>::value, std::string> IntegerToString(
Integer i) {
std::string str;
const auto /* either bool or std::false_type */ is_negative =
absl::numbers_internal::IsNegative(i);
const uint32_t digits = absl::numbers_internal::Base10Digits(
absl::numbers_internal::UnsignedAbsoluteValue(i));
absl::strings_internal::STLStringResizeUninitialized(
&str, digits + static_cast<uint32_t>(is_negative));
absl::numbers_internal::FastIntToBufferBackward(i, &str[str.size()], digits);
return str;
}
template <>
std::string IntegerToString(long i) { // NOLINT
if (sizeof(i) <= sizeof(int)) {
return IntegerToString(static_cast<int>(i));
} else {
return IntegerToString(static_cast<long long>(i)); // NOLINT
}
}
template <>
std::string IntegerToString(unsigned long i) { // NOLINT
if (sizeof(i) <= sizeof(unsigned int)) {
return IntegerToString(static_cast<unsigned int>(i));
} else {
return IntegerToString(static_cast<unsigned long long>(i)); // NOLINT
}
}
template <typename Float>
std::enable_if_t<std::is_floating_point<Float>::value, std::string>
FloatToString(Float f) {
std::string result;
strings_internal::STLStringResizeUninitialized(
&result, numbers_internal::kSixDigitsToBufferSize);
char* start = &result[0];
result.erase(numbers_internal::SixDigitsToBuffer(f, start));
return result;
}
std::string SingleArgStrCat(int x) { return IntegerToString(x); }
std::string SingleArgStrCat(unsigned int x) { return IntegerToString(x); }
// NOLINTNEXTLINE
std::string SingleArgStrCat(long x) { return IntegerToString(x); }
// NOLINTNEXTLINE
std::string SingleArgStrCat(unsigned long x) { return IntegerToString(x); }
// NOLINTNEXTLINE
std::string SingleArgStrCat(long long x) { return IntegerToString(x); }
// NOLINTNEXTLINE
std::string SingleArgStrCat(unsigned long long x) { return IntegerToString(x); }
std::string SingleArgStrCat(float x) { return FloatToString(x); }
std::string SingleArgStrCat(double x) { return FloatToString(x); }
template <class Integer>
std::enable_if_t<std::is_integral<Integer>::value, void> AppendIntegerToString(
std::string& str, Integer i) {
const auto /* either bool or std::false_type */ is_negative =
absl::numbers_internal::IsNegative(i);
const uint32_t digits = absl::numbers_internal::Base10Digits(
absl::numbers_internal::UnsignedAbsoluteValue(i));
absl::strings_internal::STLStringAppendUninitializedAmortized(
&str, digits + static_cast<uint32_t>(is_negative));
absl::numbers_internal::FastIntToBufferBackward(i, &str[str.size()], digits);
}
template <>
void AppendIntegerToString(std::string& str, long i) { // NOLINT
if (sizeof(i) <= sizeof(int)) {
return AppendIntegerToString(str, static_cast<int>(i));
} else {
return AppendIntegerToString(str, static_cast<long long>(i)); // NOLINT
}
}
template <>
void AppendIntegerToString(std::string& str,
unsigned long i) { // NOLINT
if (sizeof(i) <= sizeof(unsigned int)) {
return AppendIntegerToString(str, static_cast<unsigned int>(i));
} else {
return AppendIntegerToString(str,
static_cast<unsigned long long>(i)); // NOLINT
}
}
// `SingleArgStrAppend` overloads are defined here for the same reasons as with
// `SingleArgStrCat` above.
void SingleArgStrAppend(std::string& str, int x) {
return AppendIntegerToString(str, x);
}
void SingleArgStrAppend(std::string& str, unsigned int x) {
return AppendIntegerToString(str, x);
}
// NOLINTNEXTLINE
void SingleArgStrAppend(std::string& str, long x) {
return AppendIntegerToString(str, x);
}
// NOLINTNEXTLINE
void SingleArgStrAppend(std::string& str, unsigned long x) {
return AppendIntegerToString(str, x);
}
// NOLINTNEXTLINE
void SingleArgStrAppend(std::string& str, long long x) {
return AppendIntegerToString(str, x);
}
// NOLINTNEXTLINE
void SingleArgStrAppend(std::string& str, unsigned long long x) {
return AppendIntegerToString(str, x);
}
std::string CatPieces(std::initializer_list<absl::string_view> pieces) { std::string CatPieces(std::initializer_list<absl::string_view> pieces) {
std::string result; std::string result;
size_t total_size = 0; size_t total_size = 0;
...@@ -138,7 +258,7 @@ void AppendPieces(absl::Nonnull<std::string*> dest, ...@@ -138,7 +258,7 @@ void AppendPieces(absl::Nonnull<std::string*> dest,
ASSERT_NO_OVERLAP(*dest, piece); ASSERT_NO_OVERLAP(*dest, piece);
to_append += piece.size(); to_append += piece.size();
} }
STLStringAppendUninitializedAmortized(dest, to_append); strings_internal::STLStringAppendUninitializedAmortized(dest, to_append);
char* const begin = &(*dest)[0]; char* const begin = &(*dest)[0];
char* out = begin + old_size; char* out = begin + old_size;
...@@ -157,7 +277,7 @@ void AppendPieces(absl::Nonnull<std::string*> dest, ...@@ -157,7 +277,7 @@ void AppendPieces(absl::Nonnull<std::string*> dest,
void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a) { void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a) {
ASSERT_NO_OVERLAP(*dest, a); ASSERT_NO_OVERLAP(*dest, a);
std::string::size_type old_size = dest->size(); std::string::size_type old_size = dest->size();
STLStringAppendUninitializedAmortized(dest, a.size()); strings_internal::STLStringAppendUninitializedAmortized(dest, a.size());
char* const begin = &(*dest)[0]; char* const begin = &(*dest)[0];
char* out = begin + old_size; char* out = begin + old_size;
out = Append(out, a); out = Append(out, a);
...@@ -169,7 +289,8 @@ void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a, ...@@ -169,7 +289,8 @@ void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a,
ASSERT_NO_OVERLAP(*dest, a); ASSERT_NO_OVERLAP(*dest, a);
ASSERT_NO_OVERLAP(*dest, b); ASSERT_NO_OVERLAP(*dest, b);
std::string::size_type old_size = dest->size(); std::string::size_type old_size = dest->size();
STLStringAppendUninitializedAmortized(dest, a.size() + b.size()); strings_internal::STLStringAppendUninitializedAmortized(dest,
a.size() + b.size());
char* const begin = &(*dest)[0]; char* const begin = &(*dest)[0];
char* out = begin + old_size; char* out = begin + old_size;
out = Append(out, a); out = Append(out, a);
...@@ -183,7 +304,8 @@ void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a, ...@@ -183,7 +304,8 @@ void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a,
ASSERT_NO_OVERLAP(*dest, b); ASSERT_NO_OVERLAP(*dest, b);
ASSERT_NO_OVERLAP(*dest, c); ASSERT_NO_OVERLAP(*dest, c);
std::string::size_type old_size = dest->size(); std::string::size_type old_size = dest->size();
STLStringAppendUninitializedAmortized(dest, a.size() + b.size() + c.size()); strings_internal::STLStringAppendUninitializedAmortized(
dest, a.size() + b.size() + c.size());
char* const begin = &(*dest)[0]; char* const begin = &(*dest)[0];
char* out = begin + old_size; char* out = begin + old_size;
out = Append(out, a); out = Append(out, a);
...@@ -199,7 +321,7 @@ void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a, ...@@ -199,7 +321,7 @@ void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a,
ASSERT_NO_OVERLAP(*dest, c); ASSERT_NO_OVERLAP(*dest, c);
ASSERT_NO_OVERLAP(*dest, d); ASSERT_NO_OVERLAP(*dest, d);
std::string::size_type old_size = dest->size(); std::string::size_type old_size = dest->size();
STLStringAppendUninitializedAmortized( strings_internal::STLStringAppendUninitializedAmortized(
dest, a.size() + b.size() + c.size() + d.size()); dest, a.size() + b.size() + c.size() + d.size());
char* const begin = &(*dest)[0]; char* const begin = &(*dest)[0];
char* out = begin + old_size; char* out = begin + old_size;
......
...@@ -93,7 +93,6 @@ ...@@ -93,7 +93,6 @@
#include <cstddef> #include <cstddef>
#include <cstdint> #include <cstdint>
#include <cstring> #include <cstring>
#include <limits>
#include <string> #include <string>
#include <type_traits> #include <type_traits>
#include <utility> #include <utility>
...@@ -259,10 +258,9 @@ struct Dec { ...@@ -259,10 +258,9 @@ struct Dec {
typename std::enable_if<(sizeof(Int) <= 8)>::type* = nullptr) typename std::enable_if<(sizeof(Int) <= 8)>::type* = nullptr)
: value(v >= 0 ? static_cast<uint64_t>(v) : value(v >= 0 ? static_cast<uint64_t>(v)
: uint64_t{0} - static_cast<uint64_t>(v)), : uint64_t{0} - static_cast<uint64_t>(v)),
width(spec == absl::kNoPad width(spec == absl::kNoPad ? 1
? 1 : spec >= absl::kSpacePad2 ? spec - absl::kSpacePad2 + 2
: spec >= absl::kSpacePad2 ? spec - absl::kSpacePad2 + 2 : spec - absl::kZeroPad2 + 2),
: spec - absl::kZeroPad2 + 2),
fill(spec >= absl::kSpacePad2 ? ' ' : '0'), fill(spec >= absl::kSpacePad2 ? ' ' : '0'),
neg(v < 0) {} neg(v < 0) {}
...@@ -450,77 +448,36 @@ std::string CatPieces(std::initializer_list<absl::string_view> pieces); ...@@ -450,77 +448,36 @@ std::string CatPieces(std::initializer_list<absl::string_view> pieces);
void AppendPieces(absl::Nonnull<std::string*> dest, void AppendPieces(absl::Nonnull<std::string*> dest,
std::initializer_list<absl::string_view> pieces); std::initializer_list<absl::string_view> pieces);
template <typename Integer> void STLStringAppendUninitializedAmortized(std::string* dest, size_t to_append);
std::string IntegerToString(Integer i) {
// Any integer (signed/unsigned) up to 64 bits can be formatted into a buffer
// with 22 bytes (including NULL at the end).
constexpr size_t kMaxDigits10 = 22;
std::string result;
strings_internal::STLStringResizeUninitialized(&result, kMaxDigits10);
char* start = &result[0];
// note: this can be optimized to not write last zero.
char* end = numbers_internal::FastIntToBuffer(i, start);
auto size = static_cast<size_t>(end - start);
assert((size < result.size()) &&
"StrCat(Integer) does not fit into kMaxDigits10");
result.erase(size);
return result;
}
template <typename Float>
std::string FloatToString(Float f) {
std::string result;
strings_internal::STLStringResizeUninitialized(
&result, numbers_internal::kSixDigitsToBufferSize);
char* start = &result[0];
result.erase(numbers_internal::SixDigitsToBuffer(f, start));
return result;
}
// `SingleArgStrCat` overloads take built-in `int`, `long` and `long long` types // `SingleArgStrCat` overloads take built-in `int`, `long` and `long long` types
// (signed / unsigned) to avoid ambiguity on the call side. If we used int32_t // (signed / unsigned) to avoid ambiguity on the call side. If we used int32_t
// and int64_t, then at least one of the three (`int` / `long` / `long long`) // and int64_t, then at least one of the three (`int` / `long` / `long long`)
// would have been ambiguous when passed to `SingleArgStrCat`. // would have been ambiguous when passed to `SingleArgStrCat`.
inline std::string SingleArgStrCat(int x) { return IntegerToString(x); } std::string SingleArgStrCat(int x);
inline std::string SingleArgStrCat(unsigned int x) { std::string SingleArgStrCat(unsigned int x);
return IntegerToString(x); std::string SingleArgStrCat(long x); // NOLINT
} std::string SingleArgStrCat(unsigned long x); // NOLINT
// NOLINTNEXTLINE std::string SingleArgStrCat(long long x); // NOLINT
inline std::string SingleArgStrCat(long x) { return IntegerToString(x); } std::string SingleArgStrCat(unsigned long long x); // NOLINT
// NOLINTNEXTLINE std::string SingleArgStrCat(float x);
inline std::string SingleArgStrCat(unsigned long x) { std::string SingleArgStrCat(double x);
return IntegerToString(x);
} // `SingleArgStrAppend` overloads are defined here for the same reasons as with
// NOLINTNEXTLINE // `SingleArgStrCat` above.
inline std::string SingleArgStrCat(long long x) { return IntegerToString(x); } void SingleArgStrAppend(std::string& str, int x);
// NOLINTNEXTLINE void SingleArgStrAppend(std::string& str, unsigned int x);
inline std::string SingleArgStrCat(unsigned long long x) { void SingleArgStrAppend(std::string& str, long x); // NOLINT
return IntegerToString(x); void SingleArgStrAppend(std::string& str, unsigned long x); // NOLINT
} void SingleArgStrAppend(std::string& str, long long x); // NOLINT
inline std::string SingleArgStrCat(float x) { return FloatToString(x); } void SingleArgStrAppend(std::string& str, unsigned long long x); // NOLINT
inline std::string SingleArgStrCat(double x) { return FloatToString(x); }
template <typename T,
// As of September 2023, the SingleArgStrCat() optimization is only enabled for typename = std::enable_if_t<std::is_arithmetic<T>::value &&
// libc++. The reasons for this are: !std::is_same<T, char>::value &&
// 1) The SSO size for libc++ is 23, while libstdc++ and MSSTL have an SSO size !std::is_same<T, bool>::value>>
// of 15. Since IntegerToString unconditionally resizes the string to 22 bytes,
// this causes both libstdc++ and MSSTL to allocate.
// 2) strings_internal::STLStringResizeUninitialized() only has an
// implementation that avoids initialization when using libc++. This isn't as
// relevant as (1), and the cost should be benchmarked if (1) ever changes on
// libstc++ or MSSTL.
#ifdef _LIBCPP_VERSION
#define ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE true
#else
#define ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE false
#endif
template <typename T, typename = std::enable_if_t<
ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE &&
std::is_arithmetic<T>{} && !std::is_same<T, char>{}>>
using EnableIfFastCase = T; using EnableIfFastCase = T;
#undef ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE
} // namespace strings_internal } // namespace strings_internal
ABSL_MUST_USE_RESULT inline std::string StrCat() { return std::string(); } ABSL_MUST_USE_RESULT inline std::string StrCat() { return std::string(); }
...@@ -596,6 +553,68 @@ inline void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a, ...@@ -596,6 +553,68 @@ inline void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a,
static_cast<const AlphaNum&>(args).Piece()...}); static_cast<const AlphaNum&>(args).Piece()...});
} }
template <class String, class T>
std::enable_if_t<
std::is_integral<absl::strings_internal::EnableIfFastCase<T>>::value, void>
StrAppend(absl::Nonnull<String*> result, T i) {
return absl::strings_internal::SingleArgStrAppend(*result, i);
}
// This overload is only selected if all the parameters are numbers that can be
// handled quickly.
// Later we can look into how we can extend this to more general argument
// mixtures without bloating codegen too much, or copying unnecessarily.
template <typename String, typename... T>
std::enable_if_t<
(sizeof...(T) > 1),
std::common_type_t<std::conditional_t<
true, void, absl::strings_internal::EnableIfFastCase<T>>...>>
StrAppend(absl::Nonnull<String*> str, T... args) {
// Do not add unnecessary variables, logic, or even "free" lambdas here.
// They can add overhead for the compiler and/or at run time.
// Furthermore, assume this function will be inlined.
// This function is carefully tailored to be able to be largely optimized away
// so that it becomes near-equivalent to the caller handling each argument
// individually while minimizing register pressure, so that the compiler
// can inline it with minimal overhead.
// First, calculate the total length, so we can perform just a single resize.
// Save all the lengths for later.
size_t total_length = 0;
const ptrdiff_t lengths[] = {
absl::numbers_internal::GetNumDigitsOrNegativeIfNegative(args)...};
for (const ptrdiff_t possibly_negative_length : lengths) {
// Lengths are negative for negative numbers. Keep them for later use, but
// take their absolute values for calculating total lengths;
total_length += possibly_negative_length < 0
? static_cast<size_t>(-possibly_negative_length)
: static_cast<size_t>(possibly_negative_length);
}
// Now reserve space for all the arguments.
const size_t old_size = str->size();
absl::strings_internal::STLStringAppendUninitializedAmortized(str,
total_length);
// Finally, output each argument one-by-one, from left to right.
size_t i = 0; // The current argument we're processing
ptrdiff_t n; // The length of the current argument
typename String::pointer pos = &(*str)[old_size];
using SomeTrivialEmptyType = std::false_type;
// Ugly code due to the lack of C++14 fold expression makes us.
const SomeTrivialEmptyType dummy1;
for (const SomeTrivialEmptyType& dummy2 :
{(/* Comma expressions are poor man's C++17 fold expression for C++14 */
(void)(n = lengths[i]),
(void)(n < 0 ? (void)(*pos++ = '-'), (n = ~n) : 0),
(void)absl::numbers_internal::FastIntToBufferBackward(
absl::numbers_internal::UnsignedAbsoluteValue(std::move(args)),
pos += n, static_cast<uint32_t>(n)),
(void)++i, dummy1)...}) {
(void)dummy2; // Remove & migrate to fold expressions in C++17
}
}
// Helper function for the future StrCat default floating-point format, %.6g // Helper function for the future StrCat default floating-point format, %.6g
// This is fast. // This is fast.
inline strings_internal::AlphaNumBuffer< inline strings_internal::AlphaNumBuffer<
......
...@@ -39,6 +39,24 @@ ...@@ -39,6 +39,24 @@
namespace { namespace {
template <typename Integer>
void VerifyInteger(Integer value) {
const std::string expected = std::to_string(value);
EXPECT_EQ(absl::StrCat(value), expected);
const char* short_prefix = "x";
const char* long_prefix = "2;k.msabxiuow2[09i;o3k21-93-9=29]";
std::string short_str = short_prefix;
absl::StrAppend(&short_str, value);
EXPECT_EQ(short_str, short_prefix + expected);
std::string long_str = long_prefix;
absl::StrAppend(&long_str, value);
EXPECT_EQ(long_str, long_prefix + expected);
}
// Test absl::StrCat of ints and longs of various sizes and signdedness. // Test absl::StrCat of ints and longs of various sizes and signdedness.
TEST(StrCat, Ints) { TEST(StrCat, Ints) {
const short s = -1; // NOLINT(runtime/int) const short s = -1; // NOLINT(runtime/int)
...@@ -68,6 +86,34 @@ TEST(StrCat, Ints) { ...@@ -68,6 +86,34 @@ TEST(StrCat, Ints) {
EXPECT_EQ(answer, "-9-12"); EXPECT_EQ(answer, "-9-12");
answer = absl::StrCat(uintptr, 0); answer = absl::StrCat(uintptr, 0);
EXPECT_EQ(answer, "130"); EXPECT_EQ(answer, "130");
for (const uint32_t base : {2u, 10u}) {
for (const int extra_shift : {0, 12}) {
for (uint64_t i = 0; i < (1 << 8); ++i) {
uint64_t j = i;
while (true) {
uint64_t v = j ^ (extra_shift != 0 ? (j << extra_shift) * base : 0);
VerifyInteger(static_cast<bool>(v));
VerifyInteger(static_cast<wchar_t>(v));
VerifyInteger(static_cast<signed char>(v));
VerifyInteger(static_cast<unsigned char>(v));
VerifyInteger(static_cast<short>(v)); // NOLINT
VerifyInteger(static_cast<unsigned short>(v)); // NOLINT
VerifyInteger(static_cast<int>(v)); // NOLINT
VerifyInteger(static_cast<unsigned int>(v)); // NOLINT
VerifyInteger(static_cast<long>(v)); // NOLINT
VerifyInteger(static_cast<unsigned long>(v)); // NOLINT
VerifyInteger(static_cast<long long>(v)); // NOLINT
VerifyInteger(static_cast<unsigned long long>(v)); // NOLINT
const uint64_t next = j == 0 ? 1 : j * base;
if (next <= j) {
break;
}
j = next;
}
}
}
}
} }
TEST(StrCat, Enums) { TEST(StrCat, Enums) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment