Commit 77224c28 by Pavel P Committed by Copybara-Service

PR #1662: Replace shift with addition in crc multiply

Imported from GitHub PR https://github.com/abseil/abseil-cpp/pull/1662

Merge 4b2c6c909b573d31a1cccba7cb72d4d8badeef8b into cba31a95

Merging this change closes #1662

COPYBARA_INTEGRATE_REVIEW=https://github.com/abseil/abseil-cpp/pull/1662 from pps83:crc-add 4b2c6c909b573d31a1cccba7cb72d4d8badeef8b
PiperOrigin-RevId: 631470883
Change-Id: I4a72be643ed341ddf0e0007418ab4a613a03db4b
parent e0df4a72
......@@ -123,8 +123,8 @@ uint64_t V128_Extract64(const V128 l);
// Extracts the low 64 bits from V128.
int64_t V128_Low64(const V128 l);
// Left-shifts packed 64-bit integers in l by r.
V128 V128_ShiftLeft64(const V128 l, const V128 r);
// Add packed 64-bit integers in |l| and |r|.
V128 V128_Add64(const V128 l, const V128 r);
#endif
......@@ -193,8 +193,8 @@ inline uint64_t V128_Extract64(const V128 l) {
inline int64_t V128_Low64(const V128 l) { return _mm_cvtsi128_si64(l); }
inline V128 V128_ShiftLeft64(const V128 l, const V128 r) {
return _mm_sll_epi64(l, r);
inline V128 V128_Add64(const V128 l, const V128 r) {
return _mm_add_epi64(l, r);
}
#elif defined(ABSL_CRC_INTERNAL_HAVE_ARM_SIMD)
......@@ -289,9 +289,7 @@ inline int64_t V128_Low64(const V128 l) {
return vgetq_lane_s64(vreinterpretq_s64_u64(l), 0);
}
inline V128 V128_ShiftLeft64(const V128 l, const V128 r) {
return vshlq_u64(l, vreinterpretq_s64_u64(r));
}
inline V128 V128_Add64(const V128 l, const V128 r) { return vaddq_u64(l, r); }
#endif
......
......@@ -101,13 +101,17 @@ constexpr size_t kMediumCutoff = 2048;
namespace {
uint32_t multiply(uint32_t a, uint32_t b) {
V128 shifts = V128_From64WithZeroFill(1);
V128 power = V128_From64WithZeroFill(a);
V128 crc = V128_From64WithZeroFill(b);
V128 res = V128_PMulLow(power, crc);
// Combine crc values
res = V128_ShiftLeft64(res, shifts);
// Combine crc values.
//
// Adding res to itself is equivalent to multiplying by 2,
// or shifting left by 1. Addition is used as not all compilers
// are able to generate optimal code without this hint.
// https://godbolt.org/z/rr3fMnf39
res = V128_Add64(res, res);
return static_cast<uint32_t>(V128_Extract32<1>(res)) ^
CRC32_u32(0, static_cast<uint32_t>(V128_Low64(res)));
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment