Commit 29fdacd2 by Derek Mauro Committed by Copybara-Service

Fix the conditional compilation of non_temporal_store_memcpy_avx

to verify that AVX can be forced via `gnu::target`.

Fixes #1759

PiperOrigin-RevId: 677853230
Change-Id: Ic69045c71ddf8230fd7b0210ba4aef8693053232
parent c0b9bd08
...@@ -111,20 +111,31 @@ inline void *non_temporal_store_memcpy(void *__restrict dst, ...@@ -111,20 +111,31 @@ inline void *non_temporal_store_memcpy(void *__restrict dst,
#endif // __SSE3__ || __aarch64__ || (_MSC_VER && __AVX__) #endif // __SSE3__ || __aarch64__ || (_MSC_VER && __AVX__)
} }
// We try to force non_temporal_store_memcpy_avx to use AVX instructions
// so that we can select it at runtime when AVX is available.
// Clang on Windows has gnu::target but does not make AVX types like __m256i
// available when trying to force specific functions to use AVX compiles.
#if ABSL_HAVE_CPP_ATTRIBUTE(gnu::target) && !defined(_MSC_VER) && \
(defined(__x86_64__) || defined(__i386__))
#define ABSL_INTERNAL_CAN_FORCE_AVX 1
#endif
// If the objects overlap, the behavior is undefined. Uses regular memcpy // If the objects overlap, the behavior is undefined. Uses regular memcpy
// instead of non-temporal memcpy if the required CPU intrinsics are unavailable // instead of non-temporal memcpy if the required CPU intrinsics are unavailable
// at compile time. // at compile time.
#if ABSL_HAVE_CPP_ATTRIBUTE(gnu::target) && \ #ifdef ABSL_INTERNAL_CAN_FORCE_AVX
(defined(__x86_64__) || defined(__i386__))
[[gnu::target("avx")]] [[gnu::target("avx")]]
#endif #endif
inline void *non_temporal_store_memcpy_avx(void *__restrict dst, inline void *non_temporal_store_memcpy_avx(void *__restrict dst,
const void *__restrict src, const void *__restrict src,
size_t len) { size_t len) {
// This function requires AVX. For clang and gcc we compile it with AVX even // This function requires AVX. If possible we compile it with AVX even if the
// if the translation unit isn't built with AVX support. This works because we // translation unit isn't built with AVX support. This works because we only
// only select this implementation at runtime if the CPU supports AVX. // select this implementation at runtime if the CPU supports AVX.
#if defined(__SSE3__) || (defined(_MSC_VER) && defined(__AVX__)) // MSVC AVX support implies SSE3 support.
#if ((defined(__AVX__) || defined(ABSL_INTERNAL_CAN_FORCE_AVX)) && \
defined(__SSE3__)) || \
(defined(_MSC_VER) && defined(__AVX__))
uint8_t *d = reinterpret_cast<uint8_t *>(dst); uint8_t *d = reinterpret_cast<uint8_t *>(dst);
const uint8_t *s = reinterpret_cast<const uint8_t *>(src); const uint8_t *s = reinterpret_cast<const uint8_t *>(src);
...@@ -170,10 +181,13 @@ inline void *non_temporal_store_memcpy_avx(void *__restrict dst, ...@@ -170,10 +181,13 @@ inline void *non_temporal_store_memcpy_avx(void *__restrict dst,
} }
return dst; return dst;
#else #else
// Fallback to regular memcpy so that this function compiles.
return memcpy(dst, src, len); return memcpy(dst, src, len);
#endif // __SSE3__ || (_MSC_VER && __AVX__) #endif
} }
#undef ABSL_INTERNAL_CAN_FORCE_AVX
} // namespace crc_internal } // namespace crc_internal
ABSL_NAMESPACE_END ABSL_NAMESPACE_END
} // namespace absl } // namespace absl
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment