Commit 75d25251 by Martijn Vels Committed by Copybara-Service

Replace absl::base_internal::Prefetch* calls with absl::Prefetch* calls

PiperOrigin-RevId: 505184961
Change-Id: I64482558a76abda6896bec4b2d323833b6cd7edf
parent 8a0693b2
...@@ -738,7 +738,10 @@ cc_library( ...@@ -738,7 +738,10 @@ cc_library(
], ],
copts = ABSL_DEFAULT_COPTS, copts = ABSL_DEFAULT_COPTS,
linkopts = ABSL_DEFAULT_LINKOPTS, linkopts = ABSL_DEFAULT_LINKOPTS,
deps = [":config"], deps = [
":config",
":core_headers", # TODO(b/265984188): remove
],
) )
cc_test( cc_test(
......
...@@ -657,6 +657,7 @@ absl_cc_library( ...@@ -657,6 +657,7 @@ absl_cc_library(
${ABSL_DEFAULT_LINKOPTS} ${ABSL_DEFAULT_LINKOPTS}
DEPS DEPS
absl::config absl::config
absl::core_headers # TODO(b/265984188): remove
) )
absl_cc_test( absl_cc_test(
......
...@@ -12,10 +12,14 @@ ...@@ -12,10 +12,14 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
// TODO(b/265984188): remove all uses and delete this header.
#ifndef ABSL_BASE_INTERNAL_PREFETCH_H_ #ifndef ABSL_BASE_INTERNAL_PREFETCH_H_
#define ABSL_BASE_INTERNAL_PREFETCH_H_ #define ABSL_BASE_INTERNAL_PREFETCH_H_
#include "absl/base/attributes.h"
#include "absl/base/config.h" #include "absl/base/config.h"
#include "absl/base/prefetch.h"
#ifdef __SSE__ #ifdef __SSE__
#include <xmmintrin.h> #include <xmmintrin.h>
...@@ -72,10 +76,21 @@ namespace absl { ...@@ -72,10 +76,21 @@ namespace absl {
ABSL_NAMESPACE_BEGIN ABSL_NAMESPACE_BEGIN
namespace base_internal { namespace base_internal {
void PrefetchT0(const void* addr); ABSL_DEPRECATED("Use absl::PrefetchToLocalCache() instead")
inline void PrefetchT0(const void* address) {
absl::PrefetchToLocalCache(address);
}
ABSL_DEPRECATED("Use absl::PrefetchToLocalCache() instead")
inline void PrefetchNta(const void* address) {
absl::PrefetchToLocalCacheNta(address);
}
ABSL_DEPRECATED("Use __builtin_prefetch() for advanced prefetch logic instead")
void PrefetchT1(const void* addr); void PrefetchT1(const void* addr);
ABSL_DEPRECATED("Use __builtin_prefetch() for advanced prefetch logic instead")
void PrefetchT2(const void* addr); void PrefetchT2(const void* addr);
void PrefetchNta(const void* addr);
// Implementation details follow. // Implementation details follow.
...@@ -90,10 +105,6 @@ void PrefetchNta(const void* addr); ...@@ -90,10 +105,6 @@ void PrefetchNta(const void* addr);
// safe for all currently supported platforms. However, prefetch for // safe for all currently supported platforms. However, prefetch for
// store may have problems depending on the target platform. // store may have problems depending on the target platform.
// //
inline void PrefetchT0(const void* addr) {
// Note: this uses prefetcht0 on Intel.
__builtin_prefetch(addr, 0, 3);
}
inline void PrefetchT1(const void* addr) { inline void PrefetchT1(const void* addr) {
// Note: this uses prefetcht1 on Intel. // Note: this uses prefetcht1 on Intel.
__builtin_prefetch(addr, 0, 2); __builtin_prefetch(addr, 0, 2);
...@@ -102,33 +113,21 @@ inline void PrefetchT2(const void* addr) { ...@@ -102,33 +113,21 @@ inline void PrefetchT2(const void* addr) {
// Note: this uses prefetcht2 on Intel. // Note: this uses prefetcht2 on Intel.
__builtin_prefetch(addr, 0, 1); __builtin_prefetch(addr, 0, 1);
} }
inline void PrefetchNta(const void* addr) {
// Note: this uses prefetchtnta on Intel.
__builtin_prefetch(addr, 0, 0);
}
#elif defined(ABSL_INTERNAL_HAVE_SSE) #elif defined(ABSL_INTERNAL_HAVE_SSE)
#define ABSL_INTERNAL_HAVE_PREFETCH 1 #define ABSL_INTERNAL_HAVE_PREFETCH 1
inline void PrefetchT0(const void* addr) {
_mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T0);
}
inline void PrefetchT1(const void* addr) { inline void PrefetchT1(const void* addr) {
_mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T1); _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T1);
} }
inline void PrefetchT2(const void* addr) { inline void PrefetchT2(const void* addr) {
_mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T2); _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T2);
} }
inline void PrefetchNta(const void* addr) {
_mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_NTA);
}
#else #else
inline void PrefetchT0(const void*) {}
inline void PrefetchT1(const void*) {} inline void PrefetchT1(const void*) {}
inline void PrefetchT2(const void*) {} inline void PrefetchT2(const void*) {}
inline void PrefetchNta(const void*) {}
#endif #endif
} // namespace base_internal } // namespace base_internal
......
...@@ -30,9 +30,11 @@ ...@@ -30,9 +30,11 @@
#include <xmmintrin.h> #include <xmmintrin.h>
#endif #endif
#if defined(_MSC_VER) && defined(ABSL_INTERNAL_HAVE_SSE) #if defined(_MSC_VER) && _MSC_VER >= 1900 && \
(defined(_M_X64) || defined(_M_IX86))
#include <intrin.h> #include <intrin.h>
#pragma intrinsic(_mm_prefetch) #pragma intrinsic(_mm_prefetch)
#pragma intrinsic(_m_prefetchw)
#endif #endif
namespace absl { namespace absl {
...@@ -174,10 +176,15 @@ inline void PrefetchToLocalCacheNta(const void* addr) { ...@@ -174,10 +176,15 @@ inline void PrefetchToLocalCacheNta(const void* addr) {
inline void PrefetchToLocalCacheForWrite(const void* addr) { inline void PrefetchToLocalCacheForWrite(const void* addr) {
#if defined(_MM_HINT_ET0) #if defined(_MM_HINT_ET0)
_mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_ET0); _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_ET0);
#elif defined(__x86_64__) #elif defined(_MSC_VER) && _MSC_VER >= 1900 && \
(defined(_M_X64) || defined(_M_IX86))
// MSVC 2015 and up on x86/x64 supports prefetchw (feature listed as 3DNOW)
_m_prefetchw(const_cast<void*>(addr));
#elif !defined(_MSC_VER) && defined(__x86_64__)
// _MM_HINT_ET0 is not universally supported. As we commented further // _MM_HINT_ET0 is not universally supported. As we commented further
// up, PREFETCHW is recognized as a no-op on older Intel processors // up, PREFETCHW is recognized as a no-op on older Intel processors
// and has been present on AMD processors since the K6-2 // and has been present on AMD processors since the K6-2. We have this
// disabled for MSVC compilers as this miscompiles on older MSVC compilers.
asm("prefetchw (%0)" : : "r"(addr)); asm("prefetchw (%0)" : : "r"(addr));
#endif #endif
} }
......
...@@ -185,10 +185,10 @@ ...@@ -185,10 +185,10 @@
#include "absl/base/config.h" #include "absl/base/config.h"
#include "absl/base/internal/endian.h" #include "absl/base/internal/endian.h"
#include "absl/base/internal/prefetch.h"
#include "absl/base/internal/raw_logging.h" #include "absl/base/internal/raw_logging.h"
#include "absl/base/optimization.h" #include "absl/base/optimization.h"
#include "absl/base/port.h" #include "absl/base/port.h"
#include "absl/base/prefetch.h"
#include "absl/container/internal/common.h" #include "absl/container/internal/common.h"
#include "absl/container/internal/compressed_tuple.h" #include "absl/container/internal/compressed_tuple.h"
#include "absl/container/internal/container_memory.h" #include "absl/container/internal/container_memory.h"
...@@ -2117,12 +2117,12 @@ class raw_hash_set { ...@@ -2117,12 +2117,12 @@ class raw_hash_set {
void prefetch(const key_arg<K>& key) const { void prefetch(const key_arg<K>& key) const {
(void)key; (void)key;
// Avoid probing if we won't be able to prefetch the addresses received. // Avoid probing if we won't be able to prefetch the addresses received.
#ifdef ABSL_INTERNAL_HAVE_PREFETCH #ifdef ABSL_HAVE_PREFETCH
prefetch_heap_block(); prefetch_heap_block();
auto seq = probe(common(), hash_ref()(key)); auto seq = probe(common(), hash_ref()(key));
base_internal::PrefetchT0(control() + seq.offset()); PrefetchToLocalCache(control() + seq.offset());
base_internal::PrefetchT0(slot_array() + seq.offset()); PrefetchToLocalCache(slot_array() + seq.offset());
#endif // ABSL_INTERNAL_HAVE_PREFETCH #endif // ABSL_HAVE_PREFETCH
} }
// The API of find() has two extensions. // The API of find() has two extensions.
...@@ -2529,10 +2529,14 @@ class raw_hash_set { ...@@ -2529,10 +2529,14 @@ class raw_hash_set {
// See `CapacityToGrowth()`. // See `CapacityToGrowth()`.
size_t& growth_left() { return common().growth_left(); } size_t& growth_left() { return common().growth_left(); }
// Prefetch the heap-allocated memory region to resolve potential TLB misses. // Prefetch the heap-allocated memory region to resolve potential TLB and
// This is intended to overlap with execution of calculating the hash for a // cache misses. This is intended to overlap with execution of calculating the
// key. // hash for a key.
void prefetch_heap_block() const { base_internal::PrefetchT2(control()); } void prefetch_heap_block() const {
#if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__)
__builtin_prefetch(control(), 0, 1);
#endif
}
CommonFields& common() { return settings_.template get<0>(); } CommonFields& common() { return settings_.template get<0>(); }
const CommonFields& common() const { return settings_.template get<0>(); } const CommonFields& common() const { return settings_.template get<0>(); }
......
...@@ -40,8 +40,8 @@ ...@@ -40,8 +40,8 @@
#include "absl/base/attributes.h" #include "absl/base/attributes.h"
#include "absl/base/config.h" #include "absl/base/config.h"
#include "absl/base/internal/cycleclock.h" #include "absl/base/internal/cycleclock.h"
#include "absl/base/internal/prefetch.h"
#include "absl/base/internal/raw_logging.h" #include "absl/base/internal/raw_logging.h"
#include "absl/base/prefetch.h"
#include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h" #include "absl/container/flat_hash_set.h"
#include "absl/container/internal/container_memory.h" #include "absl/container/internal/container_memory.h"
......
...@@ -44,8 +44,8 @@ ...@@ -44,8 +44,8 @@
#include <cstdint> #include <cstdint>
#include "absl/base/internal/endian.h" #include "absl/base/internal/endian.h"
#include "absl/base/internal/prefetch.h"
#include "absl/base/internal/raw_logging.h" #include "absl/base/internal/raw_logging.h"
#include "absl/base/prefetch.h"
#include "absl/crc/internal/crc_internal.h" #include "absl/crc/internal/crc_internal.h"
namespace absl { namespace absl {
...@@ -309,7 +309,7 @@ void CRC32::Extend(uint32_t* crc, const void* bytes, size_t length) const { ...@@ -309,7 +309,7 @@ void CRC32::Extend(uint32_t* crc, const void* bytes, size_t length) const {
// Process kStride interleaved swaths through the data in parallel. // Process kStride interleaved swaths through the data in parallel.
while ((e - p) > kPrefetchHorizon) { while ((e - p) > kPrefetchHorizon) {
base_internal::PrefetchNta( PrefetchToLocalCacheNta(
reinterpret_cast<const void*>(p + kPrefetchHorizon)); reinterpret_cast<const void*>(p + kPrefetchHorizon));
// Process 64 bytes at a time // Process 64 bytes at a time
step_stride(); step_stride();
......
...@@ -52,8 +52,8 @@ ...@@ -52,8 +52,8 @@
#include <type_traits> #include <type_traits>
#include "absl/base/dynamic_annotations.h" #include "absl/base/dynamic_annotations.h"
#include "absl/base/internal/prefetch.h"
#include "absl/base/optimization.h" #include "absl/base/optimization.h"
#include "absl/base/prefetch.h"
#include "absl/crc/crc32c.h" #include "absl/crc/crc32c.h"
#include "absl/crc/internal/cpu_detect.h" #include "absl/crc/internal/cpu_detect.h"
#include "absl/crc/internal/crc_memcpy.h" #include "absl/crc/internal/crc_memcpy.h"
...@@ -242,10 +242,8 @@ crc32c_t AcceleratedCrcMemcpyEngine<vec_regions, int_regions>::Compute( ...@@ -242,10 +242,8 @@ crc32c_t AcceleratedCrcMemcpyEngine<vec_regions, int_regions>::Compute(
while (copy_rounds > kBlocksPerCacheLine) { while (copy_rounds > kBlocksPerCacheLine) {
// Prefetch kPrefetchAhead bytes ahead of each pointer. // Prefetch kPrefetchAhead bytes ahead of each pointer.
for (size_t i = 0; i < kRegions; i++) { for (size_t i = 0; i < kRegions; i++) {
absl::base_internal::PrefetchT0(src_bytes + kPrefetchAhead + absl::PrefetchToLocalCache(src_bytes + kPrefetchAhead + region_size * i);
region_size * i); absl::PrefetchToLocalCache(dst_bytes + kPrefetchAhead + region_size * i);
absl::base_internal::PrefetchT0(dst_bytes + kPrefetchAhead +
region_size * i);
} }
// Load and store data, computing CRC on the way. // Load and store data, computing CRC on the way.
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#include "absl/base/config.h" #include "absl/base/config.h"
#include "absl/base/dynamic_annotations.h" #include "absl/base/dynamic_annotations.h"
#include "absl/base/internal/endian.h" #include "absl/base/internal/endian.h"
#include "absl/base/internal/prefetch.h" #include "absl/base/prefetch.h"
#include "absl/crc/internal/cpu_detect.h" #include "absl/crc/internal/cpu_detect.h"
#include "absl/crc/internal/crc.h" #include "absl/crc/internal/crc.h"
#include "absl/crc/internal/crc32_x86_arm_combined_simd.h" #include "absl/crc/internal/crc32_x86_arm_combined_simd.h"
...@@ -429,11 +429,11 @@ class CRC32AcceleratedX86ARMCombinedMultipleStreams ...@@ -429,11 +429,11 @@ class CRC32AcceleratedX86ARMCombinedMultipleStreams
ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2); ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2);
ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2); ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2);
ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2); ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2);
base_internal::PrefetchT0( PrefetchToLocalCache(
reinterpret_cast<const char*>(p + kPrefetchHorizonMedium)); reinterpret_cast<const char*>(p + kPrefetchHorizonMedium));
base_internal::PrefetchT0( PrefetchToLocalCache(
reinterpret_cast<const char*>(p1 + kPrefetchHorizonMedium)); reinterpret_cast<const char*>(p1 + kPrefetchHorizonMedium));
base_internal::PrefetchT0( PrefetchToLocalCache(
reinterpret_cast<const char*>(p2 + kPrefetchHorizonMedium)); reinterpret_cast<const char*>(p2 + kPrefetchHorizonMedium));
} }
// Don't run crc on last 8 bytes. // Don't run crc on last 8 bytes.
...@@ -517,12 +517,12 @@ class CRC32AcceleratedX86ARMCombinedMultipleStreams ...@@ -517,12 +517,12 @@ class CRC32AcceleratedX86ARMCombinedMultipleStreams
for (size_t i = 1; i < bs; i++) { for (size_t i = 1; i < bs; i++) {
// Prefetch data for next itterations. // Prefetch data for next itterations.
for (size_t j = 0; j < num_crc_streams; j++) { for (size_t j = 0; j < num_crc_streams; j++) {
base_internal::PrefetchT0( PrefetchToLocalCache(
reinterpret_cast<const char*>(crc_streams[j] + kPrefetchHorizon)); reinterpret_cast<const char*>(crc_streams[j] + kPrefetchHorizon));
} }
for (size_t j = 0; j < num_pclmul_streams; j++) { for (size_t j = 0; j < num_pclmul_streams; j++) {
base_internal::PrefetchT0(reinterpret_cast<const char*>( PrefetchToLocalCache(reinterpret_cast<const char*>(pclmul_streams[j] +
pclmul_streams[j] + kPrefetchHorizon)); kPrefetchHorizon));
} }
// We process each stream in 64 byte blocks. This can be written as // We process each stream in 64 byte blocks. This can be written as
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment