Optimize integer-to-string conversions

The updated code is designed to: - Be branch-predictor-friendly - Be cache-friendly - Minimize the lengths of critical paths - Minimize slow operations (particularly multiplications) - Minimize binary/codegen bloat The most notable performance trick here is perhaps the precomputation & caching of the number of digits, so that we can reuse/exploit it when writing the output. This precomputation of the exact length enables 2 further performance benefits: - It makes `StrCat` and `StrAppend` zero-copy when only integers are passed, by avoiding intermediate `AlphaNum` entirely in those cases. If needed in the future, we can probably also make many other mixtures of non-integer types zero-copy as well. - It avoids over-reservation of the string buffer, allowing for more strings to fit inside SSO, which will likely have further performance benefits. There is also a side benefit of preventing `FastIntToBuffer` from writing beyond the end of the buffer, which has caused buffer overflows in the past. The new code continues to use & extend some of the existing core tricks (such as the division-by-100 trick), as those are already efficient. PiperOrigin-RevId: 595785531 Change-Id: Id6920e7e038fec10b2c45f213de75dc7e2cbddd1

Optimize integer-to-string conversions
The updated code is designed to: - Be branch-predictor-friendly - Be cache-friendly - Minimize the lengths of critical paths - Minimize slow operations (particularly multiplications) - Minimize binary/codegen bloat The most notable performance trick here is perhaps the precomputation & caching of the number of digits, so that we can reuse/exploit it when writing the output. This precomputation of the exact length enables 2 further performance benefits: - It makes `StrCat` and `StrAppend` zero-copy when only integers are passed, by avoiding intermediate `AlphaNum` entirely in those cases. If needed in the future, we can probably also make many other mixtures of non-integer types zero-copy as well. - It avoids over-reservation of the string buffer, allowing for more strings to fit inside SSO, which will likely have further performance benefits. There is also a side benefit of preventing `FastIntToBuffer` from writing beyond the end of the buffer, which has caused buffer overflows in the past. The new code continues to use & extend some of the existing core tricks (such as the division-by-100 trick), as those are already efficient. PiperOrigin-RevId: 595785531 Change-Id: Id6920e7e038fec10b2c45f213de75dc7e2cbddd1
d5a2cec0 · Abseil Team · Copybara-Service · ccf0c773 · d5a2cec0 · d5a2cec0
Commit d5a2cec0 authored Jan 04, 2024 by Abseil Team Committed by Copybara-Service Jan 04, 2024
Showing with 426 additions and 105 deletions

absl/base/macros.h
+0 -12

absl/strings/numbers.cc
+0 -0

absl/strings/numbers.h
+151 -12

absl/strings/numbers_test.cc
+7 -0

absl/strings/str_cat.cc
+134 -12

absl/strings/str_cat.h
+88 -69

absl/strings/str_cat_test.cc
+46 -0

No files found.
--- a/absl/base/macros.h
+++ b/absl/base/macros.h
@@ -138,16 +138,4 @@ ABSL_NAMESPACE_END
 #define ABSL_INTERNAL_RETHROW do {} while (false)
 #endif  // ABSL_HAVE_EXCEPTIONS
-// Requires the compiler to prove that the size of the given object is at least
-// the expected amount.
-#if ABSL_HAVE_ATTRIBUTE(diagnose_if) && ABSL_HAVE_BUILTIN(__builtin_object_size)
-#define ABSL_INTERNAL_NEED_MIN_SIZE(Obj, N)                     \
-  __attribute__((diagnose_if(__builtin_object_size(Obj, 0) < N, \
-                             "object size provably too small "  \
-                             "(this would corrupt memory)",     \
-                             "error")))
-#else
-#define ABSL_INTERNAL_NEED_MIN_SIZE(Obj, N)
-#endif
 #endif  // ABSL_BASE_MACROS_H_
--- a/absl/strings/numbers.cc
+++ b/absl/strings/numbers.cc
--- a/absl/strings/numbers.h
+++ b/absl/strings/numbers.h
@@ -32,6 +32,7 @@
 #endif
 #include <cstddef>
+#include <cstdint>
 #include <cstdlib>
 #include <cstring>
 #include <ctime>
@@ -39,10 +40,12 @@
 #include <string>
 #include <type_traits>
+#include "absl/base/attributes.h"
 #include "absl/base/config.h"
 #include "absl/base/internal/endian.h"
 #include "absl/base/macros.h"
 #include "absl/base/nullability.h"
+#include "absl/base/optimization.h"
 #include "absl/base/port.h"
 #include "absl/numeric/bits.h"
 #include "absl/numeric/int128.h"
@@ -158,6 +161,96 @@ bool safe_strtou128_base(absl::string_view text,
 static const int kFastToBufferSize = 32;
 static const int kSixDigitsToBufferSize = 16;
+template <class T>
+std::enable_if_t<!std::is_unsigned<T>::value, bool> IsNegative(const T& v) {
+  return v < T();
+}
+template <class T>
+std::enable_if_t<std::is_unsigned<T>::value, std::false_type> IsNegative(
+    const T&) {
+  // The integer is unsigned, so return a compile-time constant.
+  // This can help the optimizer avoid having to prove bool to be false later.
+  return std::false_type();
+}
+template <class T>
+std::enable_if_t<std::is_unsigned<std::decay_t<T>>::value, T&&>
+UnsignedAbsoluteValue(T&& v ABSL_ATTRIBUTE_LIFETIME_BOUND) {
+  // The value is unsigned; just return the original.
+  return std::forward<T>(v);
+}
+template <class T>
+ABSL_ATTRIBUTE_CONST_FUNCTION
+    std::enable_if_t<!std::is_unsigned<T>::value, std::make_unsigned_t<T>>
+    UnsignedAbsoluteValue(T v) {
+  using U = std::make_unsigned_t<T>;
+  return IsNegative(v) ? U() - static_cast<U>(v) : static_cast<U>(v);
+}
+// Returns the number of base-10 digits in the given number.
+// Note that this strictly counts digits. It does not count the sign.
+// The `initial_digits` parameter is the starting point, which is normally equal
+// to 1 because the number of digits in 0 is 1 (a special case).
+// However, callers may e.g. wish to change it to 2 to account for the sign.
+template <typename T>
+std::enable_if_t<std::is_unsigned<T>::value, uint32_t> Base10Digits(
+    T v, const uint32_t initial_digits = 1) {
+  uint32_t r = initial_digits;
+  // If code size becomes an issue, the 'if' stage can be removed for a minor
+  // performance loss.
+  for (;;) {
+    if (ABSL_PREDICT_TRUE(v < 10 * 10)) {
+      r += (v >= 10);
+      break;
+    }
+    if (ABSL_PREDICT_TRUE(v < 1000 * 10)) {
+      r += (v >= 1000) + 2;
+      break;
+    }
+    if (ABSL_PREDICT_TRUE(v < 100000 * 10)) {
+      r += (v >= 100000) + 4;
+      break;
+    }
+    r += 6;
+    v = static_cast<T>(v / 1000000);
+  }
+  return r;
+}
+template <typename T>
+std::enable_if_t<std::is_signed<T>::value, uint32_t> Base10Digits(
+    T v, uint32_t r = 1) {
+  // Branchlessly add 1 to account for a minus sign.
+  r += static_cast<uint32_t>(IsNegative(v));
+  return Base10Digits(UnsignedAbsoluteValue(v), r);
+}
+// These functions return the number of base-10 digits, but multiplied by -1 if
+// the input itself is negative. This is handy and efficient for later usage,
+// since the bitwise complement of the result becomes equal to the number of
+// characters required.
+ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
+    signed char v);
+ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
+    unsigned char v);
+ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
+    short v);  // NOLINT
+ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
+    unsigned short v);  // NOLINT
+ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(int v);
+ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
+    unsigned int v);
+ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
+    long v);  // NOLINT
+ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
+    unsigned long v);  // NOLINT
+ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
+    long long v);  // NOLINT
+ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
+    unsigned long long v);  // NOLINT
 // Helper function for fast formatting of floating-point values.
 // The result is the same as printf's "%g", a.k.a. "%.6g"; that is, six
 // significant digits are returned, trailing zeros are removed, and numbers
@@ -166,24 +259,18 @@ static const int kSixDigitsToBufferSize = 16;
 // Required buffer size is `kSixDigitsToBufferSize`.
 size_t SixDigitsToBuffer(double d, absl::Nonnull<char*> buffer);
-// WARNING: These functions may write more characters than necessary, because
+// All of these functions take an output buffer
-// they are intended for speed. All functions take an output buffer
 // as an argument and return a pointer to the last byte they wrote, which is the
 // terminating '\0'. At most `kFastToBufferSize` bytes are written.
-absl::Nonnull<char*> FastIntToBuffer(int32_t i, absl::Nonnull<char*> buffer)
+absl::Nonnull<char*> FastIntToBuffer(int32_t i, absl::Nonnull<char*> buffer);
-    ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize);
+absl::Nonnull<char*> FastIntToBuffer(uint32_t i, absl::Nonnull<char*> buffer);
-absl::Nonnull<char*> FastIntToBuffer(uint32_t i, absl::Nonnull<char*> buffer)
+absl::Nonnull<char*> FastIntToBuffer(int64_t i, absl::Nonnull<char*> buffer);
-    ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize);
+absl::Nonnull<char*> FastIntToBuffer(uint64_t i, absl::Nonnull<char*> buffer);
-absl::Nonnull<char*> FastIntToBuffer(int64_t i, absl::Nonnull<char*> buffer)
-    ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize);
-absl::Nonnull<char*> FastIntToBuffer(uint64_t i, absl::Nonnull<char*> buffer)
-    ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize);
 // For enums and integer types that are not an exact match for the types above,
 // use templates to call the appropriate one of the four overloads above.
 template <typename int_type>
-absl::Nonnull<char*> FastIntToBuffer(int_type i, absl::Nonnull<char*> buffer)
+absl::Nonnull<char*> FastIntToBuffer(int_type i, absl::Nonnull<char*> buffer) {
-    ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize) {
  static_assert(sizeof(i) <= 64 / 8,
                "FastIntToBuffer works only with 64-bit-or-less integers.");
  // TODO(jorg): This signed-ness check is used because it works correctly
@@ -207,6 +294,58 @@ absl::Nonnull<char*> FastIntToBuffer(int_type i, absl::Nonnull<char*> buffer)
  }
 }
+// These functions do NOT add any null-terminator.
+// They return a pointer to the beginning of the written string.
+// The digit counts provided must *exactly* match the number of base-10 digits
+// in the number, or the behavior is undefined.
+// (i.e. do NOT count the minus sign, or over- or under-count the digits.)
+absl::Nonnull<char*> FastIntToBufferBackward(int32_t i,
+                                             absl::Nonnull<char*> buffer_end,
+                                             uint32_t exact_digit_count);
+absl::Nonnull<char*> FastIntToBufferBackward(uint32_t i,
+                                             absl::Nonnull<char*> buffer_end,
+                                             uint32_t exact_digit_count);
+absl::Nonnull<char*> FastIntToBufferBackward(int64_t i,
+                                             absl::Nonnull<char*> buffer_end,
+                                             uint32_t exact_digit_count);
+absl::Nonnull<char*> FastIntToBufferBackward(uint64_t i,
+                                             absl::Nonnull<char*> buffer_end,
+                                             uint32_t exact_digit_count);
+// For enums and integer types that are not an exact match for the types above,
+// use templates to call the appropriate one of the four overloads above.
+template <typename int_type>
+absl::Nonnull<char*> FastIntToBufferBackward(int_type i,
+                                             absl::Nonnull<char*> buffer_end,
+                                             uint32_t exact_digit_count) {
+  static_assert(
+      sizeof(i) <= 64 / 8,
+      "FastIntToBufferBackward works only with 64-bit-or-less integers.");
+  // This signed-ness check is used because it works correctly
+  // with enums, and it also serves to check that int_type is not a pointer.
+  // If one day something like std::is_signed<enum E> works, switch to it.
+  // These conditions are constexpr bools to suppress MSVC warning C4127.
+  constexpr bool kIsSigned = static_cast<int_type>(1) - 2 < 0;
+  constexpr bool kUse64Bit = sizeof(i) > 32 / 8;
+  if (kIsSigned) {
+    if (kUse64Bit) {
+      return FastIntToBufferBackward(static_cast<int64_t>(i), buffer_end,
+                                     exact_digit_count);
+    } else {
+      return FastIntToBufferBackward(static_cast<int32_t>(i), buffer_end,
+                                     exact_digit_count);
+    }
+  } else {
+    if (kUse64Bit) {
+      return FastIntToBufferBackward(static_cast<uint64_t>(i), buffer_end,
+                                     exact_digit_count);
+    } else {
+      return FastIntToBufferBackward(static_cast<uint32_t>(i), buffer_end,
+                                     exact_digit_count);
+    }
+  }
+}
 // Implementation of SimpleAtoi, generalized to support arbitrary base (used
 // with base different from 10 elsewhere in Abseil implementation).
 template <typename int_type>

--- a/absl/strings/numbers_test.cc
+++ b/absl/strings/numbers_test.cc
@@ -231,10 +231,15 @@ TEST(Numbers, TestFastPrints) {
  CheckInt32(INT_MIN);
  CheckInt32(INT_MAX);
  CheckInt64(LONG_MIN);
+  CheckInt64(uint64_t{10000000});
+  CheckInt64(uint64_t{100000000});
  CheckInt64(uint64_t{1000000000});
  CheckInt64(uint64_t{9999999999});
  CheckInt64(uint64_t{100000000000000});
  CheckInt64(uint64_t{999999999999999});
+  CheckInt64(uint64_t{1000000000000000});
+  CheckInt64(uint64_t{10000000000000000});
+  CheckInt64(uint64_t{100000000000000000});
  CheckInt64(uint64_t{1000000000000000000});
  CheckInt64(uint64_t{1199999999999999999});
  CheckInt64(int64_t{-700000000000000000});
@@ -246,6 +251,8 @@ TEST(Numbers, TestFastPrints) {
  CheckUInt64(uint64_t{999999999999999});
  CheckUInt64(uint64_t{1000000000000000000});
  CheckUInt64(uint64_t{1199999999999999999});
+  CheckUInt64(uint64_t{10000000000000000000u});
+  CheckUInt64(uint64_t{10200300040000500006u});
  CheckUInt64(std::numeric_limits<uint64_t>::max());
  for (int i = 0; i < 10000; i++) {

--- a/absl/strings/str_cat.cc
+++ b/absl/strings/str_cat.cc
@@ -21,10 +21,12 @@
 #include <cstring>
 #include <initializer_list>
 #include <string>
+#include <type_traits>
 #include "absl/base/config.h"
 #include "absl/base/nullability.h"
 #include "absl/strings/internal/resize_uninitialized.h"
+#include "absl/strings/numbers.h"
 #include "absl/strings/string_view.h"
 namespace absl {
@@ -41,8 +43,7 @@ ABSL_NAMESPACE_BEGIN
 namespace {
 // Append is merely a version of memcpy that returns the address of the byte
 // after the area just overwritten.
-inline absl::Nonnull<char*> Append(absl::Nonnull<char*> out,
+absl::Nonnull<char*> Append(absl::Nonnull<char*> out, const AlphaNum& x) {
-                                   const AlphaNum& x) {
  // memcpy is allowed to overwrite arbitrary memory, so doing this after the
  // call would force an extra fetch of x.size().
  char* after = out + x.size();
@@ -52,11 +53,6 @@ inline absl::Nonnull<char*> Append(absl::Nonnull<char*> out,
  return after;
 }
-inline void STLStringAppendUninitializedAmortized(std::string* dest,
-                                                  size_t to_append) {
-  strings_internal::AppendUninitializedTraits<std::string>::Append(dest,
-                                                                   to_append);
-}
 }  // namespace
 std::string StrCat(const AlphaNum& a, const AlphaNum& b) {
@@ -102,6 +98,130 @@ std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c,
 namespace strings_internal {
 // Do not call directly - these are not part of the public API.
+void STLStringAppendUninitializedAmortized(std::string* dest,
+                                           size_t to_append) {
+  strings_internal::AppendUninitializedTraits<std::string>::Append(dest,
+                                                                   to_append);
+}
+template <typename Integer>
+std::enable_if_t<std::is_integral<Integer>::value, std::string> IntegerToString(
+    Integer i) {
+  std::string str;
+  const auto /* either bool or std::false_type */ is_negative =
+      absl::numbers_internal::IsNegative(i);
+  const uint32_t digits = absl::numbers_internal::Base10Digits(
+      absl::numbers_internal::UnsignedAbsoluteValue(i));
+  absl::strings_internal::STLStringResizeUninitialized(
+      &str, digits + static_cast<uint32_t>(is_negative));
+  absl::numbers_internal::FastIntToBufferBackward(i, &str[str.size()], digits);
+  return str;
+}
+template <>
+std::string IntegerToString(long i) {  // NOLINT
+  if (sizeof(i) <= sizeof(int)) {
+    return IntegerToString(static_cast<int>(i));
+  } else {
+    return IntegerToString(static_cast<long long>(i));  // NOLINT
+  }
+}
+template <>
+std::string IntegerToString(unsigned long i) {  // NOLINT
+  if (sizeof(i) <= sizeof(unsigned int)) {
+    return IntegerToString(static_cast<unsigned int>(i));
+  } else {
+    return IntegerToString(static_cast<unsigned long long>(i));  // NOLINT
+  }
+}
+template <typename Float>
+std::enable_if_t<std::is_floating_point<Float>::value, std::string>
+FloatToString(Float f) {
+  std::string result;
+  strings_internal::STLStringResizeUninitialized(
+      &result, numbers_internal::kSixDigitsToBufferSize);
+  char* start = &result[0];
+  result.erase(numbers_internal::SixDigitsToBuffer(f, start));
+  return result;
+}
+std::string SingleArgStrCat(int x) { return IntegerToString(x); }
+std::string SingleArgStrCat(unsigned int x) { return IntegerToString(x); }
+// NOLINTNEXTLINE
+std::string SingleArgStrCat(long x) { return IntegerToString(x); }
+// NOLINTNEXTLINE
+std::string SingleArgStrCat(unsigned long x) { return IntegerToString(x); }
+// NOLINTNEXTLINE
+std::string SingleArgStrCat(long long x) { return IntegerToString(x); }
+// NOLINTNEXTLINE
+std::string SingleArgStrCat(unsigned long long x) { return IntegerToString(x); }
+std::string SingleArgStrCat(float x) { return FloatToString(x); }
+std::string SingleArgStrCat(double x) { return FloatToString(x); }
+template <class Integer>
+std::enable_if_t<std::is_integral<Integer>::value, void> AppendIntegerToString(
+    std::string& str, Integer i) {
+  const auto /* either bool or std::false_type */ is_negative =
+      absl::numbers_internal::IsNegative(i);
+  const uint32_t digits = absl::numbers_internal::Base10Digits(
+      absl::numbers_internal::UnsignedAbsoluteValue(i));
+  absl::strings_internal::STLStringAppendUninitializedAmortized(
+      &str, digits + static_cast<uint32_t>(is_negative));
+  absl::numbers_internal::FastIntToBufferBackward(i, &str[str.size()], digits);
+}
+template <>
+void AppendIntegerToString(std::string& str, long i) {  // NOLINT
+  if (sizeof(i) <= sizeof(int)) {
+    return AppendIntegerToString(str, static_cast<int>(i));
+  } else {
+    return AppendIntegerToString(str, static_cast<long long>(i));  // NOLINT
+  }
+}
+template <>
+void AppendIntegerToString(std::string& str,
+                           unsigned long i) {  // NOLINT
+  if (sizeof(i) <= sizeof(unsigned int)) {
+    return AppendIntegerToString(str, static_cast<unsigned int>(i));
+  } else {
+    return AppendIntegerToString(str,
+                                 static_cast<unsigned long long>(i));  // NOLINT
+  }
+}
+// `SingleArgStrAppend` overloads are defined here for the same reasons as with
+// `SingleArgStrCat` above.
+void SingleArgStrAppend(std::string& str, int x) {
+  return AppendIntegerToString(str, x);
+}
+void SingleArgStrAppend(std::string& str, unsigned int x) {
+  return AppendIntegerToString(str, x);
+}
+// NOLINTNEXTLINE
+void SingleArgStrAppend(std::string& str, long x) {
+  return AppendIntegerToString(str, x);
+}
+// NOLINTNEXTLINE
+void SingleArgStrAppend(std::string& str, unsigned long x) {
+  return AppendIntegerToString(str, x);
+}
+// NOLINTNEXTLINE
+void SingleArgStrAppend(std::string& str, long long x) {
+  return AppendIntegerToString(str, x);
+}
+// NOLINTNEXTLINE
+void SingleArgStrAppend(std::string& str, unsigned long long x) {
+  return AppendIntegerToString(str, x);
+}
 std::string CatPieces(std::initializer_list<absl::string_view> pieces) {
  std::string result;
  size_t total_size = 0;
@@ -138,7 +258,7 @@ void AppendPieces(absl::Nonnull<std::string*> dest,
    ASSERT_NO_OVERLAP(*dest, piece);
    to_append += piece.size();
  }
-  STLStringAppendUninitializedAmortized(dest, to_append);
+  strings_internal::STLStringAppendUninitializedAmortized(dest, to_append);
  char* const begin = &(*dest)[0];
  char* out = begin + old_size;
@@ -157,7 +277,7 @@ void AppendPieces(absl::Nonnull<std::string*> dest,
 void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a) {
  ASSERT_NO_OVERLAP(*dest, a);
  std::string::size_type old_size = dest->size();
-  STLStringAppendUninitializedAmortized(dest, a.size());
+  strings_internal::STLStringAppendUninitializedAmortized(dest, a.size());
  char* const begin = &(*dest)[0];
  char* out = begin + old_size;
  out = Append(out, a);
@@ -169,7 +289,8 @@ void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a,
  ASSERT_NO_OVERLAP(*dest, a);
  ASSERT_NO_OVERLAP(*dest, b);
  std::string::size_type old_size = dest->size();
-  STLStringAppendUninitializedAmortized(dest, a.size() + b.size());
+  strings_internal::STLStringAppendUninitializedAmortized(dest,
+                                                          a.size() + b.size());
  char* const begin = &(*dest)[0];
  char* out = begin + old_size;
  out = Append(out, a);
@@ -183,7 +304,8 @@ void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a,
  ASSERT_NO_OVERLAP(*dest, b);
  ASSERT_NO_OVERLAP(*dest, c);
  std::string::size_type old_size = dest->size();
-  STLStringAppendUninitializedAmortized(dest, a.size() + b.size() + c.size());
+  strings_internal::STLStringAppendUninitializedAmortized(
+      dest, a.size() + b.size() + c.size());
  char* const begin = &(*dest)[0];
  char* out = begin + old_size;
  out = Append(out, a);
@@ -199,7 +321,7 @@ void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a,
  ASSERT_NO_OVERLAP(*dest, c);
  ASSERT_NO_OVERLAP(*dest, d);
  std::string::size_type old_size = dest->size();
-  STLStringAppendUninitializedAmortized(
+  strings_internal::STLStringAppendUninitializedAmortized(
      dest, a.size() + b.size() + c.size() + d.size());
  char* const begin = &(*dest)[0];
  char* out = begin + old_size;

--- a/absl/strings/str_cat.h
+++ b/absl/strings/str_cat.h
@@ -93,7 +93,6 @@
 #include <cstddef>
 #include <cstdint>
 #include <cstring>
-#include <limits>
 #include <string>
 #include <type_traits>
 #include <utility>
@@ -259,10 +258,9 @@ struct Dec {
               typename std::enable_if<(sizeof(Int) <= 8)>::type* = nullptr)
      : value(v >= 0 ? static_cast<uint64_t>(v)
                     : uint64_t{0} - static_cast<uint64_t>(v)),
-        width(spec == absl::kNoPad
+        width(spec == absl::kNoPad       ? 1
-                  ? 1
+              : spec >= absl::kSpacePad2 ? spec - absl::kSpacePad2 + 2
-                  : spec >= absl::kSpacePad2 ? spec - absl::kSpacePad2 + 2
+                                         : spec - absl::kZeroPad2 + 2),
-                                             : spec - absl::kZeroPad2 + 2),
        fill(spec >= absl::kSpacePad2 ? ' ' : '0'),
        neg(v < 0) {}
@@ -450,77 +448,36 @@ std::string CatPieces(std::initializer_list<absl::string_view> pieces);
 void AppendPieces(absl::Nonnull<std::string*> dest,
                  std::initializer_list<absl::string_view> pieces);
-template <typename Integer>
+void STLStringAppendUninitializedAmortized(std::string* dest, size_t to_append);
-std::string IntegerToString(Integer i) {
-  // Any integer (signed/unsigned) up to 64 bits can be formatted into a buffer
-  // with 22 bytes (including NULL at the end).
-  constexpr size_t kMaxDigits10 = 22;
-  std::string result;
-  strings_internal::STLStringResizeUninitialized(&result, kMaxDigits10);
-  char* start = &result[0];
-  // note: this can be optimized to not write last zero.
-  char* end = numbers_internal::FastIntToBuffer(i, start);
-  auto size = static_cast<size_t>(end - start);
-  assert((size < result.size()) &&
-         "StrCat(Integer) does not fit into kMaxDigits10");
-  result.erase(size);
-  return result;
-}
-template <typename Float>
-std::string FloatToString(Float f) {
-  std::string result;
-  strings_internal::STLStringResizeUninitialized(
-      &result, numbers_internal::kSixDigitsToBufferSize);
-  char* start = &result[0];
-  result.erase(numbers_internal::SixDigitsToBuffer(f, start));
-  return result;
-}
 // `SingleArgStrCat` overloads take built-in `int`, `long` and `long long` types
 // (signed / unsigned) to avoid ambiguity on the call side. If we used int32_t
 // and int64_t, then at least one of the three (`int` / `long` / `long long`)
 // would have been ambiguous when passed to `SingleArgStrCat`.
-inline std::string SingleArgStrCat(int x) { return IntegerToString(x); }
+std::string SingleArgStrCat(int x);
-inline std::string SingleArgStrCat(unsigned int x) {
+std::string SingleArgStrCat(unsigned int x);
-  return IntegerToString(x);
+std::string SingleArgStrCat(long x);                // NOLINT
-}
+std::string SingleArgStrCat(unsigned long x);       // NOLINT
-// NOLINTNEXTLINE
+std::string SingleArgStrCat(long long x);           // NOLINT
-inline std::string SingleArgStrCat(long x) { return IntegerToString(x); }
+std::string SingleArgStrCat(unsigned long long x);  // NOLINT
-// NOLINTNEXTLINE
+std::string SingleArgStrCat(float x);
-inline std::string SingleArgStrCat(unsigned long x) {
+std::string SingleArgStrCat(double x);
-  return IntegerToString(x);
-}
+// `SingleArgStrAppend` overloads are defined here for the same reasons as with
-// NOLINTNEXTLINE
+// `SingleArgStrCat` above.
-inline std::string SingleArgStrCat(long long x) { return IntegerToString(x); }
+void SingleArgStrAppend(std::string& str, int x);
-// NOLINTNEXTLINE
+void SingleArgStrAppend(std::string& str, unsigned int x);
-inline std::string SingleArgStrCat(unsigned long long x) {
+void SingleArgStrAppend(std::string& str, long x);                // NOLINT
-  return IntegerToString(x);
+void SingleArgStrAppend(std::string& str, unsigned long x);       // NOLINT
-}
+void SingleArgStrAppend(std::string& str, long long x);           // NOLINT
-inline std::string SingleArgStrCat(float x) { return FloatToString(x); }
+void SingleArgStrAppend(std::string& str, unsigned long long x);  // NOLINT
-inline std::string SingleArgStrCat(double x) { return FloatToString(x); }
+template <typename T,
-// As of September 2023, the SingleArgStrCat() optimization is only enabled for
+          typename = std::enable_if_t<std::is_arithmetic<T>::value &&
-// libc++. The reasons for this are:
+                                      !std::is_same<T, char>::value &&
-// 1) The SSO size for libc++ is 23, while libstdc++ and MSSTL have an SSO size
+                                      !std::is_same<T, bool>::value>>
-// of 15. Since IntegerToString unconditionally resizes the string to 22 bytes,
-// this causes both libstdc++ and MSSTL to allocate.
-// 2) strings_internal::STLStringResizeUninitialized() only has an
-// implementation that avoids initialization when using libc++. This isn't as
-// relevant as (1), and the cost should be benchmarked if (1) ever changes on
-// libstc++ or MSSTL.
-#ifdef _LIBCPP_VERSION
-#define ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE true
-#else
-#define ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE false
-#endif
-template <typename T, typename = std::enable_if_t<
-                          ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE &&
-                          std::is_arithmetic<T>{} && !std::is_same<T, char>{}>>
 using EnableIfFastCase = T;
-#undef ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE
 }  // namespace strings_internal
 ABSL_MUST_USE_RESULT inline std::string StrCat() { return std::string(); }
@@ -596,6 +553,68 @@ inline void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a,
             static_cast<const AlphaNum&>(args).Piece()...});
 }
+template <class String, class T>
+std::enable_if_t<
+    std::is_integral<absl::strings_internal::EnableIfFastCase<T>>::value, void>
+StrAppend(absl::Nonnull<String*> result, T i) {
+  return absl::strings_internal::SingleArgStrAppend(*result, i);
+}
+// This overload is only selected if all the parameters are numbers that can be
+// handled quickly.
+// Later we can look into how we can extend this to more general argument
+// mixtures without bloating codegen too much, or copying unnecessarily.
+template <typename String, typename... T>
+std::enable_if_t<
+    (sizeof...(T) > 1),
+    std::common_type_t<std::conditional_t<
+        true, void, absl::strings_internal::EnableIfFastCase<T>>...>>
+StrAppend(absl::Nonnull<String*> str, T... args) {
+  // Do not add unnecessary variables, logic, or even "free" lambdas here.
+  // They can add overhead for the compiler and/or at run time.
+  // Furthermore, assume this function will be inlined.
+  // This function is carefully tailored to be able to be largely optimized away
+  // so that it becomes near-equivalent to the caller handling each argument
+  // individually while minimizing register pressure, so that the compiler
+  // can inline it with minimal overhead.
+  // First, calculate the total length, so we can perform just a single resize.
+  // Save all the lengths for later.
+  size_t total_length = 0;
+  const ptrdiff_t lengths[] = {
+      absl::numbers_internal::GetNumDigitsOrNegativeIfNegative(args)...};
+  for (const ptrdiff_t possibly_negative_length : lengths) {
+    // Lengths are negative for negative numbers. Keep them for later use, but
+    // take their absolute values for calculating total lengths;
+    total_length += possibly_negative_length < 0
+                        ? static_cast<size_t>(-possibly_negative_length)
+                        : static_cast<size_t>(possibly_negative_length);
+  }
+  // Now reserve space for all the arguments.
+  const size_t old_size = str->size();
+  absl::strings_internal::STLStringAppendUninitializedAmortized(str,
+                                                                total_length);
+  // Finally, output each argument one-by-one, from left to right.
+  size_t i = 0;  // The current argument we're processing
+  ptrdiff_t n;   // The length of the current argument
+  typename String::pointer pos = &(*str)[old_size];
+  using SomeTrivialEmptyType = std::false_type;
+  // Ugly code due to the lack of C++14 fold expression makes us.
+  const SomeTrivialEmptyType dummy1;
+  for (const SomeTrivialEmptyType& dummy2 :
+       {(/* Comma expressions are poor man's C++17 fold expression for C++14 */
+         (void)(n = lengths[i]),
+         (void)(n < 0 ? (void)(*pos++ = '-'), (n = ~n) : 0),
+         (void)absl::numbers_internal::FastIntToBufferBackward(
+             absl::numbers_internal::UnsignedAbsoluteValue(std::move(args)),
+             pos += n, static_cast<uint32_t>(n)),
+         (void)++i, dummy1)...}) {
+    (void)dummy2;  // Remove & migrate to fold expressions in C++17
+  }
+}
 // Helper function for the future StrCat default floating-point format, %.6g
 // This is fast.
 inline strings_internal::AlphaNumBuffer<

--- a/absl/strings/str_cat_test.cc
+++ b/absl/strings/str_cat_test.cc
@@ -39,6 +39,24 @@
 namespace {
+template <typename Integer>
+void VerifyInteger(Integer value) {
+  const std::string expected = std::to_string(value);
+  EXPECT_EQ(absl::StrCat(value), expected);
+  const char* short_prefix = "x";
+  const char* long_prefix = "2;k.msabxiuow2[09i;o3k21-93-9=29]";
+  std::string short_str = short_prefix;
+  absl::StrAppend(&short_str, value);
+  EXPECT_EQ(short_str, short_prefix + expected);
+  std::string long_str = long_prefix;
+  absl::StrAppend(&long_str, value);
+  EXPECT_EQ(long_str, long_prefix + expected);
+}
 // Test absl::StrCat of ints and longs of various sizes and signdedness.
 TEST(StrCat, Ints) {
  const short s = -1;  // NOLINT(runtime/int)
@@ -68,6 +86,34 @@ TEST(StrCat, Ints) {
  EXPECT_EQ(answer, "-9-12");
  answer = absl::StrCat(uintptr, 0);
  EXPECT_EQ(answer, "130");
+  for (const uint32_t base : {2u, 10u}) {
+    for (const int extra_shift : {0, 12}) {
+      for (uint64_t i = 0; i < (1 << 8); ++i) {
+        uint64_t j = i;
+        while (true) {
+          uint64_t v = j ^ (extra_shift != 0 ? (j << extra_shift) * base : 0);
+          VerifyInteger(static_cast<bool>(v));
+          VerifyInteger(static_cast<wchar_t>(v));
+          VerifyInteger(static_cast<signed char>(v));
+          VerifyInteger(static_cast<unsigned char>(v));
+          VerifyInteger(static_cast<short>(v));               // NOLINT
+          VerifyInteger(static_cast<unsigned short>(v));      // NOLINT
+          VerifyInteger(static_cast<int>(v));                 // NOLINT
+          VerifyInteger(static_cast<unsigned int>(v));        // NOLINT
+          VerifyInteger(static_cast<long>(v));                // NOLINT
+          VerifyInteger(static_cast<unsigned long>(v));       // NOLINT
+          VerifyInteger(static_cast<long long>(v));           // NOLINT
+          VerifyInteger(static_cast<unsigned long long>(v));  // NOLINT
+          const uint64_t next = j == 0 ? 1 : j * base;
+          if (next <= j) {
+            break;
+          }
+          j = next;
+        }
+      }
+    }
+  }
 }
 TEST(StrCat, Enums) {