Commit aa146013 by Abseil Team Committed by Copybara-Service

Speed-up absl::Symbolize by ~6x via faster file reads.

absl::Symbolize does tons of tiny reads. Speed this up by switching
from lseek+read to a pread, and by reading more data than requested
into a buffer.

A faster absl::Symbolize will be helpful in tests and when printing
stack traces on /threadz etc.

Results for absl::Symbolize benchmark that exercises uncached behavior
of absl::Symbolize:

```
name          old time/op  new time/op  delta
BM_Symbolize  16.4ms ±12%   2.6ms ± 0%  -84.06%  (p=0.001 n=5+9)
```

PiperOrigin-RevId: 582687566
Change-Id: I44caf189d81867f3fd8c050a3100a4b9a8e744d7
parent 61ea5d25
...@@ -289,6 +289,30 @@ ObjFile *AddrMap::Add() { ...@@ -289,6 +289,30 @@ ObjFile *AddrMap::Add() {
return new (&obj_[size_++]) ObjFile; return new (&obj_[size_++]) ObjFile;
} }
class CachingFile {
public:
// Setup reader for fd that uses buf[0, buf_size-1] as a cache.
CachingFile(int fd, char *buf, size_t buf_size)
: fd_(fd),
cache_(buf),
cache_size_(buf_size),
cache_start_(0),
cache_limit_(0) {}
int fd() const { return fd_; }
ssize_t ReadFromOffset(void *buf, size_t count, off_t offset);
bool ReadFromOffsetExact(void *buf, size_t count, off_t offset);
private:
// Bytes [cache_start_, cache_limit_-1] from fd_ are stored in
// a prefix of cache_[0, cache_size_-1].
int fd_;
char *cache_;
size_t cache_size_;
off_t cache_start_;
off_t cache_limit_;
};
// --------------------------------------------------------------- // ---------------------------------------------------------------
enum FindSymbolResult { SYMBOL_NOT_FOUND = 1, SYMBOL_TRUNCATED, SYMBOL_FOUND }; enum FindSymbolResult { SYMBOL_NOT_FOUND = 1, SYMBOL_TRUNCATED, SYMBOL_FOUND };
...@@ -330,6 +354,7 @@ class Symbolizer { ...@@ -330,6 +354,7 @@ class Symbolizer {
SYMBOL_BUF_SIZE = 3072, SYMBOL_BUF_SIZE = 3072,
TMP_BUF_SIZE = 1024, TMP_BUF_SIZE = 1024,
SYMBOL_CACHE_LINES = 128, SYMBOL_CACHE_LINES = 128,
FILE_CACHE_SIZE = 8192,
}; };
AddrMap addr_map_; AddrMap addr_map_;
...@@ -338,6 +363,7 @@ class Symbolizer { ...@@ -338,6 +363,7 @@ class Symbolizer {
bool addr_map_read_; bool addr_map_read_;
char symbol_buf_[SYMBOL_BUF_SIZE]; char symbol_buf_[SYMBOL_BUF_SIZE];
char file_cache_[FILE_CACHE_SIZE];
// tmp_buf_ will be used to store arrays of ElfW(Shdr) and ElfW(Sym) // tmp_buf_ will be used to store arrays of ElfW(Shdr) and ElfW(Sym)
// so we ensure that tmp_buf_ is properly aligned to store either. // so we ensure that tmp_buf_ is properly aligned to store either.
...@@ -436,34 +462,58 @@ static ssize_t ReadPersistent(int fd, void *buf, size_t count) { ...@@ -436,34 +462,58 @@ static ssize_t ReadPersistent(int fd, void *buf, size_t count) {
return static_cast<ssize_t>(num_bytes); return static_cast<ssize_t>(num_bytes);
} }
// Read up to "count" bytes from "offset" in the file pointed by file // Read up to "count" bytes from "offset" into the buffer starting at "buf",
// descriptor "fd" into the buffer starting at "buf". On success, // while handling short reads and EINTR. On success, return the number of bytes
// return the number of bytes read. Otherwise, return -1. // read. Otherwise, return -1.
static ssize_t ReadFromOffset(const int fd, void *buf, const size_t count, ssize_t CachingFile::ReadFromOffset(void *buf, size_t count, off_t offset) {
const off_t offset) { char *dst = static_cast<char *>(buf);
off_t off = lseek(fd, offset, SEEK_SET); size_t read = 0;
if (off == (off_t)-1) { while (read < count) {
ABSL_RAW_LOG(WARNING, "lseek(%d, %jd, SEEK_SET) failed: errno=%d", fd, // Look in cache first.
static_cast<intmax_t>(offset), errno); if (offset >= cache_start_ && offset < cache_limit_) {
return -1; const char *hit_start = &cache_[offset - cache_start_];
const size_t n =
std::min(count - read, static_cast<size_t>(cache_limit_ - offset));
memcpy(dst, hit_start, n);
dst += n;
read += static_cast<size_t>(n);
offset += static_cast<off_t>(n);
continue;
}
cache_start_ = 0;
cache_limit_ = 0;
ssize_t n = pread(fd_, cache_, cache_size_, offset);
if (n < 0) {
if (errno == EINTR) {
continue;
}
ABSL_RAW_LOG(WARNING, "read failed: errno=%d", errno);
return -1;
}
if (n == 0) { // Reached EOF.
break;
}
cache_start_ = offset;
cache_limit_ = offset + static_cast<off_t>(n);
// Next iteration will copy from cache into dst.
} }
return ReadPersistent(fd, buf, count); return static_cast<ssize_t>(read);
} }
// Try reading exactly "count" bytes from "offset" bytes in a file // Try reading exactly "count" bytes from "offset" bytes into the buffer
// pointed by "fd" into the buffer starting at "buf" while handling // starting at "buf" while handling short reads and EINTR. On success, return
// short reads and EINTR. On success, return true. Otherwise, return // true. Otherwise, return false.
// false. bool CachingFile::ReadFromOffsetExact(void *buf, size_t count, off_t offset) {
static bool ReadFromOffsetExact(const int fd, void *buf, const size_t count, ssize_t len = ReadFromOffset(buf, count, offset);
const off_t offset) {
ssize_t len = ReadFromOffset(fd, buf, count, offset);
return len >= 0 && static_cast<size_t>(len) == count; return len >= 0 && static_cast<size_t>(len) == count;
} }
// Returns elf_header.e_type if the file pointed by fd is an ELF binary. // Returns elf_header.e_type if the file pointed by fd is an ELF binary.
static int FileGetElfType(const int fd) { static int FileGetElfType(CachingFile *file) {
ElfW(Ehdr) elf_header; ElfW(Ehdr) elf_header;
if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) { if (!file->ReadFromOffsetExact(&elf_header, sizeof(elf_header), 0)) {
return -1; return -1;
} }
if (memcmp(elf_header.e_ident, ELFMAG, SELFMAG) != 0) { if (memcmp(elf_header.e_ident, ELFMAG, SELFMAG) != 0) {
...@@ -478,8 +528,8 @@ static int FileGetElfType(const int fd) { ...@@ -478,8 +528,8 @@ static int FileGetElfType(const int fd) {
// To keep stack consumption low, we would like this function to not get // To keep stack consumption low, we would like this function to not get
// inlined. // inlined.
static ABSL_ATTRIBUTE_NOINLINE bool GetSectionHeaderByType( static ABSL_ATTRIBUTE_NOINLINE bool GetSectionHeaderByType(
const int fd, ElfW(Half) sh_num, const off_t sh_offset, ElfW(Word) type, CachingFile *file, ElfW(Half) sh_num, const off_t sh_offset,
ElfW(Shdr) * out, char *tmp_buf, size_t tmp_buf_size) { ElfW(Word) type, ElfW(Shdr) * out, char *tmp_buf, size_t tmp_buf_size) {
ElfW(Shdr) *buf = reinterpret_cast<ElfW(Shdr) *>(tmp_buf); ElfW(Shdr) *buf = reinterpret_cast<ElfW(Shdr) *>(tmp_buf);
const size_t buf_entries = tmp_buf_size / sizeof(buf[0]); const size_t buf_entries = tmp_buf_size / sizeof(buf[0]);
const size_t buf_bytes = buf_entries * sizeof(buf[0]); const size_t buf_bytes = buf_entries * sizeof(buf[0]);
...@@ -490,7 +540,7 @@ static ABSL_ATTRIBUTE_NOINLINE bool GetSectionHeaderByType( ...@@ -490,7 +540,7 @@ static ABSL_ATTRIBUTE_NOINLINE bool GetSectionHeaderByType(
const size_t num_bytes_to_read = const size_t num_bytes_to_read =
(buf_bytes > num_bytes_left) ? num_bytes_left : buf_bytes; (buf_bytes > num_bytes_left) ? num_bytes_left : buf_bytes;
const off_t offset = sh_offset + static_cast<off_t>(i * sizeof(buf[0])); const off_t offset = sh_offset + static_cast<off_t>(i * sizeof(buf[0]));
const ssize_t len = ReadFromOffset(fd, buf, num_bytes_to_read, offset); const ssize_t len = file->ReadFromOffset(buf, num_bytes_to_read, offset);
if (len < 0) { if (len < 0) {
ABSL_RAW_LOG( ABSL_RAW_LOG(
WARNING, WARNING,
...@@ -524,11 +574,17 @@ static ABSL_ATTRIBUTE_NOINLINE bool GetSectionHeaderByType( ...@@ -524,11 +574,17 @@ static ABSL_ATTRIBUTE_NOINLINE bool GetSectionHeaderByType(
// but there has (as yet) been no need for anything longer either. // but there has (as yet) been no need for anything longer either.
const int kMaxSectionNameLen = 64; const int kMaxSectionNameLen = 64;
// Small cache to use for miscellaneous file reads.
const int kSmallFileCacheSize = 100;
bool ForEachSection(int fd, bool ForEachSection(int fd,
const std::function<bool(absl::string_view name, const std::function<bool(absl::string_view name,
const ElfW(Shdr) &)> &callback) { const ElfW(Shdr) &)> &callback) {
char buf[kSmallFileCacheSize];
CachingFile file(fd, buf, sizeof(buf));
ElfW(Ehdr) elf_header; ElfW(Ehdr) elf_header;
if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) { if (!file.ReadFromOffsetExact(&elf_header, sizeof(elf_header), 0)) {
return false; return false;
} }
...@@ -540,7 +596,7 @@ bool ForEachSection(int fd, ...@@ -540,7 +596,7 @@ bool ForEachSection(int fd,
ElfW(Shdr) shstrtab; ElfW(Shdr) shstrtab;
off_t shstrtab_offset = static_cast<off_t>(elf_header.e_shoff) + off_t shstrtab_offset = static_cast<off_t>(elf_header.e_shoff) +
elf_header.e_shentsize * elf_header.e_shstrndx; elf_header.e_shentsize * elf_header.e_shstrndx;
if (!ReadFromOffsetExact(fd, &shstrtab, sizeof(shstrtab), shstrtab_offset)) { if (!file.ReadFromOffsetExact(&shstrtab, sizeof(shstrtab), shstrtab_offset)) {
return false; return false;
} }
...@@ -548,13 +604,13 @@ bool ForEachSection(int fd, ...@@ -548,13 +604,13 @@ bool ForEachSection(int fd,
ElfW(Shdr) out; ElfW(Shdr) out;
off_t section_header_offset = off_t section_header_offset =
static_cast<off_t>(elf_header.e_shoff) + elf_header.e_shentsize * i; static_cast<off_t>(elf_header.e_shoff) + elf_header.e_shentsize * i;
if (!ReadFromOffsetExact(fd, &out, sizeof(out), section_header_offset)) { if (!file.ReadFromOffsetExact(&out, sizeof(out), section_header_offset)) {
return false; return false;
} }
off_t name_offset = static_cast<off_t>(shstrtab.sh_offset) + out.sh_name; off_t name_offset = static_cast<off_t>(shstrtab.sh_offset) + out.sh_name;
char header_name[kMaxSectionNameLen]; char header_name[kMaxSectionNameLen];
ssize_t n_read = ssize_t n_read =
ReadFromOffset(fd, &header_name, kMaxSectionNameLen, name_offset); file.ReadFromOffset(&header_name, kMaxSectionNameLen, name_offset);
if (n_read < 0) { if (n_read < 0) {
return false; return false;
} else if (n_read > kMaxSectionNameLen) { } else if (n_read > kMaxSectionNameLen) {
...@@ -584,8 +640,10 @@ bool GetSectionHeaderByName(int fd, const char *name, size_t name_len, ...@@ -584,8 +640,10 @@ bool GetSectionHeaderByName(int fd, const char *name, size_t name_len,
return false; return false;
} }
char buf[kSmallFileCacheSize];
CachingFile file(fd, buf, sizeof(buf));
ElfW(Ehdr) elf_header; ElfW(Ehdr) elf_header;
if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) { if (!file.ReadFromOffsetExact(&elf_header, sizeof(elf_header), 0)) {
return false; return false;
} }
...@@ -597,18 +655,18 @@ bool GetSectionHeaderByName(int fd, const char *name, size_t name_len, ...@@ -597,18 +655,18 @@ bool GetSectionHeaderByName(int fd, const char *name, size_t name_len,
ElfW(Shdr) shstrtab; ElfW(Shdr) shstrtab;
off_t shstrtab_offset = static_cast<off_t>(elf_header.e_shoff) + off_t shstrtab_offset = static_cast<off_t>(elf_header.e_shoff) +
elf_header.e_shentsize * elf_header.e_shstrndx; elf_header.e_shentsize * elf_header.e_shstrndx;
if (!ReadFromOffsetExact(fd, &shstrtab, sizeof(shstrtab), shstrtab_offset)) { if (!file.ReadFromOffsetExact(&shstrtab, sizeof(shstrtab), shstrtab_offset)) {
return false; return false;
} }
for (int i = 0; i < elf_header.e_shnum; ++i) { for (int i = 0; i < elf_header.e_shnum; ++i) {
off_t section_header_offset = off_t section_header_offset =
static_cast<off_t>(elf_header.e_shoff) + elf_header.e_shentsize * i; static_cast<off_t>(elf_header.e_shoff) + elf_header.e_shentsize * i;
if (!ReadFromOffsetExact(fd, out, sizeof(*out), section_header_offset)) { if (!file.ReadFromOffsetExact(out, sizeof(*out), section_header_offset)) {
return false; return false;
} }
off_t name_offset = static_cast<off_t>(shstrtab.sh_offset) + out->sh_name; off_t name_offset = static_cast<off_t>(shstrtab.sh_offset) + out->sh_name;
ssize_t n_read = ReadFromOffset(fd, &header_name, name_len, name_offset); ssize_t n_read = file.ReadFromOffset(&header_name, name_len, name_offset);
if (n_read < 0) { if (n_read < 0) {
return false; return false;
} else if (static_cast<size_t>(n_read) != name_len) { } else if (static_cast<size_t>(n_read) != name_len) {
...@@ -683,7 +741,7 @@ static const char *ComputeOffset(const char *base, ptrdiff_t offset) { ...@@ -683,7 +741,7 @@ static const char *ComputeOffset(const char *base, ptrdiff_t offset) {
// To keep stack consumption low, we would like this function to not get // To keep stack consumption low, we would like this function to not get
// inlined. // inlined.
static ABSL_ATTRIBUTE_NOINLINE FindSymbolResult FindSymbol( static ABSL_ATTRIBUTE_NOINLINE FindSymbolResult FindSymbol(
const void *const pc, const int fd, char *out, size_t out_size, const void *const pc, CachingFile *file, char *out, size_t out_size,
ptrdiff_t relocation, const ElfW(Shdr) * strtab, const ElfW(Shdr) * symtab, ptrdiff_t relocation, const ElfW(Shdr) * strtab, const ElfW(Shdr) * symtab,
const ElfW(Shdr) * opd, char *tmp_buf, size_t tmp_buf_size) { const ElfW(Shdr) * opd, char *tmp_buf, size_t tmp_buf_size) {
if (symtab == nullptr) { if (symtab == nullptr) {
...@@ -716,7 +774,7 @@ static ABSL_ATTRIBUTE_NOINLINE FindSymbolResult FindSymbol( ...@@ -716,7 +774,7 @@ static ABSL_ATTRIBUTE_NOINLINE FindSymbolResult FindSymbol(
const size_t entries_in_chunk = const size_t entries_in_chunk =
std::min(num_remaining_symbols, buf_entries); std::min(num_remaining_symbols, buf_entries);
const size_t bytes_in_chunk = entries_in_chunk * sizeof(buf[0]); const size_t bytes_in_chunk = entries_in_chunk * sizeof(buf[0]);
const ssize_t len = ReadFromOffset(fd, buf, bytes_in_chunk, offset); const ssize_t len = file->ReadFromOffset(buf, bytes_in_chunk, offset);
SAFE_ASSERT(len >= 0); SAFE_ASSERT(len >= 0);
SAFE_ASSERT(static_cast<size_t>(len) % sizeof(buf[0]) == 0); SAFE_ASSERT(static_cast<size_t>(len) % sizeof(buf[0]) == 0);
const size_t num_symbols_in_buf = static_cast<size_t>(len) / sizeof(buf[0]); const size_t num_symbols_in_buf = static_cast<size_t>(len) / sizeof(buf[0]);
...@@ -772,12 +830,12 @@ static ABSL_ATTRIBUTE_NOINLINE FindSymbolResult FindSymbol( ...@@ -772,12 +830,12 @@ static ABSL_ATTRIBUTE_NOINLINE FindSymbolResult FindSymbol(
if (found_match) { if (found_match) {
const off_t off = const off_t off =
static_cast<off_t>(strtab->sh_offset) + best_match.st_name; static_cast<off_t>(strtab->sh_offset) + best_match.st_name;
const ssize_t n_read = ReadFromOffset(fd, out, out_size, off); const ssize_t n_read = file->ReadFromOffset(out, out_size, off);
if (n_read <= 0) { if (n_read <= 0) {
// This should never happen. // This should never happen.
ABSL_RAW_LOG(WARNING, ABSL_RAW_LOG(WARNING,
"Unable to read from fd %d at offset %lld: n_read = %zd", fd, "Unable to read from fd %d at offset %lld: n_read = %zd",
static_cast<long long>(off), n_read); file->fd(), static_cast<long long>(off), n_read);
return SYMBOL_NOT_FOUND; return SYMBOL_NOT_FOUND;
} }
ABSL_RAW_CHECK(static_cast<size_t>(n_read) <= out_size, ABSL_RAW_CHECK(static_cast<size_t>(n_read) <= out_size,
...@@ -827,22 +885,24 @@ FindSymbolResult Symbolizer::GetSymbolFromObjectFile( ...@@ -827,22 +885,24 @@ FindSymbolResult Symbolizer::GetSymbolFromObjectFile(
} }
} }
CachingFile file(obj.fd, file_cache_, sizeof(file_cache_));
// Consult a regular symbol table, then fall back to the dynamic symbol table. // Consult a regular symbol table, then fall back to the dynamic symbol table.
for (const auto symbol_table_type : {SHT_SYMTAB, SHT_DYNSYM}) { for (const auto symbol_table_type : {SHT_SYMTAB, SHT_DYNSYM}) {
if (!GetSectionHeaderByType(obj.fd, obj.elf_header.e_shnum, if (!GetSectionHeaderByType(&file, obj.elf_header.e_shnum,
static_cast<off_t>(obj.elf_header.e_shoff), static_cast<off_t>(obj.elf_header.e_shoff),
static_cast<ElfW(Word)>(symbol_table_type), static_cast<ElfW(Word)>(symbol_table_type),
&symtab, tmp_buf, tmp_buf_size)) { &symtab, tmp_buf, tmp_buf_size)) {
continue; continue;
} }
if (!ReadFromOffsetExact( if (!file.ReadFromOffsetExact(
obj.fd, &strtab, sizeof(strtab), &strtab, sizeof(strtab),
static_cast<off_t>(obj.elf_header.e_shoff + static_cast<off_t>(obj.elf_header.e_shoff +
symtab.sh_link * sizeof(symtab)))) { symtab.sh_link * sizeof(symtab)))) {
continue; continue;
} }
const FindSymbolResult rc = const FindSymbolResult rc =
FindSymbol(pc, obj.fd, out, out_size, relocation, &strtab, &symtab, FindSymbol(pc, &file, out, out_size, relocation, &strtab, &symtab,
opd_ptr, tmp_buf, tmp_buf_size); opd_ptr, tmp_buf, tmp_buf_size);
if (rc != SYMBOL_NOT_FOUND) { if (rc != SYMBOL_NOT_FOUND) {
return rc; return rc;
...@@ -1323,15 +1383,19 @@ static bool MaybeInitializeObjFile(ObjFile *obj) { ...@@ -1323,15 +1383,19 @@ static bool MaybeInitializeObjFile(ObjFile *obj) {
ABSL_RAW_LOG(WARNING, "%s: open failed: errno=%d", obj->filename, errno); ABSL_RAW_LOG(WARNING, "%s: open failed: errno=%d", obj->filename, errno);
return false; return false;
} }
obj->elf_type = FileGetElfType(obj->fd);
char buf[kSmallFileCacheSize];
CachingFile file(obj->fd, buf, sizeof(buf));
obj->elf_type = FileGetElfType(&file);
if (obj->elf_type < 0) { if (obj->elf_type < 0) {
ABSL_RAW_LOG(WARNING, "%s: wrong elf type: %d", obj->filename, ABSL_RAW_LOG(WARNING, "%s: wrong elf type: %d", obj->filename,
obj->elf_type); obj->elf_type);
return false; return false;
} }
if (!ReadFromOffsetExact(obj->fd, &obj->elf_header, sizeof(obj->elf_header), if (!file.ReadFromOffsetExact(&obj->elf_header, sizeof(obj->elf_header),
0)) { 0)) {
ABSL_RAW_LOG(WARNING, "%s: failed to read elf header", obj->filename); ABSL_RAW_LOG(WARNING, "%s: failed to read elf header", obj->filename);
return false; return false;
} }
...@@ -1341,7 +1405,7 @@ static bool MaybeInitializeObjFile(ObjFile *obj) { ...@@ -1341,7 +1405,7 @@ static bool MaybeInitializeObjFile(ObjFile *obj) {
size_t num_interesting_load_segments = 0; size_t num_interesting_load_segments = 0;
for (int j = 0; j < phnum; j++) { for (int j = 0; j < phnum; j++) {
ElfW(Phdr) phdr; ElfW(Phdr) phdr;
if (!ReadFromOffsetExact(obj->fd, &phdr, sizeof(phdr), phoff)) { if (!file.ReadFromOffsetExact(&phdr, sizeof(phdr), phoff)) {
ABSL_RAW_LOG(WARNING, "%s: failed to read program header %d", ABSL_RAW_LOG(WARNING, "%s: failed to read program header %d",
obj->filename, j); obj->filename, j);
return false; return false;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment