Commit ba5fd097 by Shahriar Rouf Committed by Copybara-Service

Optimize `CEscape` and `CEscapeAndAppend` by up to 40%.

- Compute the escape character values at compile time.
- Use `little_endian::Store32` invariably to write all escaped characters.
- Use 3 slop bytes at the end so that we can safely call `little_endian::Store32` at the end as well.

PiperOrigin-RevId: 677995014
Change-Id: I9d710fff48d0ce0b013e64d726960364c77ea1d7
parent 29fdacd2
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "absl/strings/escaping.h" #include "absl/strings/escaping.h"
#include <algorithm> #include <algorithm>
#include <array>
#include <cassert> #include <cassert>
#include <cstddef> #include <cstddef>
#include <cstdint> #include <cstdint>
...@@ -24,6 +25,7 @@ ...@@ -24,6 +25,7 @@
#include <utility> #include <utility>
#include "absl/base/config.h" #include "absl/base/config.h"
#include "absl/base/internal/endian.h"
#include "absl/base/internal/raw_logging.h" #include "absl/base/internal/raw_logging.h"
#include "absl/base/internal/unaligned_access.h" #include "absl/base/internal/unaligned_access.h"
#include "absl/base/nullability.h" #include "absl/base/nullability.h"
...@@ -388,6 +390,40 @@ constexpr unsigned char kCEscapedLen[256] = { ...@@ -388,6 +390,40 @@ constexpr unsigned char kCEscapedLen[256] = {
}; };
/* clang-format on */ /* clang-format on */
constexpr uint32_t MakeCEscapedLittleEndianUint32(size_t c) {
size_t char_len = kCEscapedLen[c];
if (char_len == 1) {
return static_cast<uint32_t>(c);
}
if (char_len == 2) {
switch (c) {
case '\n':
return '\\' | (static_cast<uint32_t>('n') << 8);
case '\r':
return '\\' | (static_cast<uint32_t>('r') << 8);
case '\t':
return '\\' | (static_cast<uint32_t>('t') << 8);
case '\"':
return '\\' | (static_cast<uint32_t>('\"') << 8);
case '\'':
return '\\' | (static_cast<uint32_t>('\'') << 8);
case '\\':
return '\\' | (static_cast<uint32_t>('\\') << 8);
}
}
return static_cast<uint32_t>('\\' | (('0' + (c / 64)) << 8) |
(('0' + ((c % 64) / 8)) << 16) |
(('0' + (c % 8)) << 24));
}
template <size_t... indexes>
inline constexpr std::array<uint32_t, sizeof...(indexes)>
MakeCEscapedLittleEndianUint32Array(std::index_sequence<indexes...>) {
return {MakeCEscapedLittleEndianUint32(indexes)...};
}
constexpr std::array<uint32_t, 256> kCEscapedLittleEndianUint32Array =
MakeCEscapedLittleEndianUint32Array(std::make_index_sequence<256>());
// Calculates the length of the C-style escaped version of 'src'. // Calculates the length of the C-style escaped version of 'src'.
// Assumes that non-printable characters are escaped using octal sequences, and // Assumes that non-printable characters are escaped using octal sequences, and
// that UTF-8 bytes are not handled specially. // that UTF-8 bytes are not handled specially.
...@@ -421,52 +457,24 @@ void CEscapeAndAppendInternal(absl::string_view src, ...@@ -421,52 +457,24 @@ void CEscapeAndAppendInternal(absl::string_view src,
return; return;
} }
// We keep 3 slop bytes so that we can call `little_endian::Store32`
// invariably regardless of the length of the escaped character.
constexpr size_t slop_bytes = 3;
size_t cur_dest_len = dest->size(); size_t cur_dest_len = dest->size();
ABSL_INTERNAL_CHECK( size_t new_dest_len = cur_dest_len + escaped_len + slop_bytes;
cur_dest_len <= std::numeric_limits<size_t>::max() - escaped_len, ABSL_INTERNAL_CHECK(new_dest_len > cur_dest_len, "std::string size overflow");
"std::string size overflow"); strings_internal::AppendUninitializedTraits<std::string>::Append(
strings_internal::STLStringResizeUninitialized(dest, dest, escaped_len + slop_bytes);
cur_dest_len + escaped_len);
char* append_ptr = &(*dest)[cur_dest_len]; char* append_ptr = &(*dest)[cur_dest_len];
for (char c : src) { for (char c : src) {
size_t char_len = kCEscapedLen[static_cast<unsigned char>(c)]; unsigned char uc = static_cast<unsigned char>(c);
if (char_len == 1) { size_t char_len = kCEscapedLen[uc];
*append_ptr++ = c; uint32_t little_endian_uint32 = kCEscapedLittleEndianUint32Array[uc];
} else if (char_len == 2) { little_endian::Store32(append_ptr, little_endian_uint32);
switch (c) { append_ptr += char_len;
case '\n':
*append_ptr++ = '\\';
*append_ptr++ = 'n';
break;
case '\r':
*append_ptr++ = '\\';
*append_ptr++ = 'r';
break;
case '\t':
*append_ptr++ = '\\';
*append_ptr++ = 't';
break;
case '\"':
*append_ptr++ = '\\';
*append_ptr++ = '\"';
break;
case '\'':
*append_ptr++ = '\\';
*append_ptr++ = '\'';
break;
case '\\':
*append_ptr++ = '\\';
*append_ptr++ = '\\';
break;
}
} else {
*append_ptr++ = '\\';
*append_ptr++ = '0' + static_cast<unsigned char>(c) / 64;
*append_ptr++ = '0' + (static_cast<unsigned char>(c) % 64) / 8;
*append_ptr++ = '0' + static_cast<unsigned char>(c) % 8;
}
} }
dest->resize(new_dest_len - slop_bytes);
} }
// Reverses the mapping in Base64EscapeInternal; see that method's // Reverses the mapping in Base64EscapeInternal; see that method's
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment