Commit 1687dbf8 by Derek Mauro Committed by Copybara-Service

Release the CRC library

This implementation can advantage of hardware acceleration available
on common CPUs when using GCC and Clang. A future update may enable
this on MSVC as well.

PiperOrigin-RevId: 487327024
Change-Id: I99a8f1bcbdf25297e776537e23bd0a902e0818a1
parent 8cfc1500
...@@ -91,6 +91,24 @@ set(ABSL_INTERNAL_DLL_FILES ...@@ -91,6 +91,24 @@ set(ABSL_INTERNAL_DLL_FILES
"container/internal/tracked.h" "container/internal/tracked.h"
"container/node_hash_map.h" "container/node_hash_map.h"
"container/node_hash_set.h" "container/node_hash_set.h"
"crc/crc32c.cc"
"crc/crc32c.h"
"crc/internal/cpu_detect.cc"
"crc/internal/cpu_detect.h"
"crc/internal/crc32c.h"
"crc/internal/crc32c_inline.h"
"crc/internal/crc32_x86_arm_combined_simd.h"
"crc/internal/crc.cc"
"crc/internal/crc.h"
"crc/internal/crc_internal.h"
"crc/internal/crc_x86_arm_combined.cc"
"crc/internal/crc_memcpy_fallback.cc"
"crc/internal/crc_memcpy.h"
"crc/internal/crc_memcpy_x86_64.cc"
"crc/internal/crc_non_temporal_memcpy.cc"
"crc/internal/crc_x86_arm_combined.cc"
"crc/internal/non_temporal_arm_intrinsics.h"
"crc/internal/non_temporal_memcpy.h"
"debugging/failure_signal_handler.cc" "debugging/failure_signal_handler.cc"
"debugging/failure_signal_handler.h" "debugging/failure_signal_handler.h"
"debugging/leak_check.h" "debugging/leak_check.h"
...@@ -386,6 +404,9 @@ set(ABSL_INTERNAL_DLL_TARGETS ...@@ -386,6 +404,9 @@ set(ABSL_INTERNAL_DLL_TARGETS
"cord" "cord"
"core_headers" "core_headers"
"counting_allocator" "counting_allocator"
"crc_cpu_detect",
"crc_internal",
"crc32c",
"debugging" "debugging"
"debugging_internal" "debugging_internal"
"demangle_internal" "demangle_internal"
...@@ -418,6 +439,8 @@ set(ABSL_INTERNAL_DLL_TARGETS ...@@ -418,6 +439,8 @@ set(ABSL_INTERNAL_DLL_TARGETS
"node_hash_map" "node_hash_map"
"node_hash_set" "node_hash_set"
"node_slot_policy" "node_slot_policy"
"non_temporal_arm_intrinsics",
"non_temporal_memcpy",
"numeric" "numeric"
"optional" "optional"
"periodic_sampler" "periodic_sampler"
......
...@@ -80,6 +80,8 @@ Abseil contains the following C++ library components: ...@@ -80,6 +80,8 @@ Abseil contains the following C++ library components:
* [`container`](absl/container/) * [`container`](absl/container/)
<br /> The `container` library contains additional STL-style containers, <br /> The `container` library contains additional STL-style containers,
including Abseil's unordered "Swiss table" containers. including Abseil's unordered "Swiss table" containers.
* [`crc`](absl/crc/) The `crc` library contains code for
computing error-detecting cyclic redundancy checks on data.
* [`debugging`](absl/debugging/) * [`debugging`](absl/debugging/)
<br /> The `debugging` library contains code useful for enabling leak <br /> The `debugging` library contains code useful for enabling leak
checks, and stacktrace and symbolization utilities. checks, and stacktrace and symbolization utilities.
......
...@@ -18,6 +18,7 @@ add_subdirectory(base) ...@@ -18,6 +18,7 @@ add_subdirectory(base)
add_subdirectory(algorithm) add_subdirectory(algorithm)
add_subdirectory(cleanup) add_subdirectory(cleanup)
add_subdirectory(container) add_subdirectory(container)
add_subdirectory(crc)
add_subdirectory(debugging) add_subdirectory(debugging)
add_subdirectory(flags) add_subdirectory(flags)
add_subdirectory(functional) add_subdirectory(functional)
......
# Copyright 2022 The Abseil Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"//absl:copts/configure_copts.bzl",
"ABSL_DEFAULT_COPTS",
"ABSL_DEFAULT_LINKOPTS",
"ABSL_TEST_COPTS",
)
package(default_visibility = ["//visibility:private"])
licenses(["notice"])
cc_library(
name = "cpu_detect",
srcs = [
"internal/cpu_detect.cc",
],
hdrs = ["internal/cpu_detect.h"],
copts = ABSL_DEFAULT_COPTS,
linkopts = ABSL_DEFAULT_LINKOPTS,
visibility = ["//visibility:private"],
deps = [
"//absl/base",
"//absl/base:config",
],
)
cc_library(
name = "crc_internal",
srcs = [
"internal/crc.cc",
"internal/crc_internal.h",
"internal/crc_x86_arm_combined.cc",
],
hdrs = [
"internal/crc.h",
"internal/crc32_x86_arm_combined_simd.h",
],
copts = ABSL_DEFAULT_COPTS,
linkopts = ABSL_DEFAULT_LINKOPTS,
visibility = ["//visibility:private"],
deps = [
":cpu_detect",
"//absl/base",
"//absl/base:config",
"//absl/base:core_headers",
"//absl/base:dynamic_annotations",
"//absl/base:endian",
"//absl/base:prefetch",
"//absl/base:raw_logging_internal",
"//absl/memory",
"//absl/numeric:bits",
],
)
cc_library(
name = "crc32c",
srcs = [
"crc32c.cc",
"internal/crc32c_inline.h",
"internal/crc_memcpy_fallback.cc",
"internal/crc_memcpy_x86_64.cc",
"internal/crc_non_temporal_memcpy.cc",
],
hdrs = [
"crc32c.h",
"internal/crc32c.h",
"internal/crc_memcpy.h",
],
copts = ABSL_DEFAULT_COPTS,
linkopts = ABSL_DEFAULT_LINKOPTS,
visibility = ["//visibility:public"],
deps = [
":cpu_detect",
":crc_internal",
":non_temporal_memcpy",
"//absl/base:config",
"//absl/base:core_headers",
"//absl/base:dynamic_annotations",
"//absl/base:endian",
"//absl/base:prefetch",
"//absl/strings",
],
)
cc_test(
name = "crc32c_test",
srcs = ["crc32c_test.cc"],
copts = ABSL_TEST_COPTS,
linkopts = ABSL_DEFAULT_LINKOPTS,
visibility = ["//visibility:private"],
deps = [
":crc32c",
"//absl/strings",
"@com_google_googletest//:gtest_main",
],
)
cc_library(
name = "non_temporal_arm_intrinsics",
hdrs = ["internal/non_temporal_arm_intrinsics.h"],
copts = ABSL_DEFAULT_COPTS,
linkopts = ABSL_DEFAULT_LINKOPTS,
visibility = ["//visibility:private"],
)
cc_library(
name = "non_temporal_memcpy",
hdrs = ["internal/non_temporal_memcpy.h"],
copts = ABSL_DEFAULT_COPTS,
linkopts = ABSL_DEFAULT_LINKOPTS,
visibility = ["//visibility:private"],
deps = [
":non_temporal_arm_intrinsics",
"//absl/base:config",
"//absl/base:core_headers",
],
)
cc_test(
name = "crc_memcpy_test",
size = "large",
srcs = ["internal/crc_memcpy_test.cc"],
shard_count = 3,
visibility = ["//visibility:private"],
deps = [
":crc32c",
"//absl/memory",
"//absl/random",
"//absl/random:distributions",
"//absl/strings",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "non_temporal_memcpy_test",
srcs = ["internal/non_temporal_memcpy_test.cc"],
visibility = ["//visibility:private"],
deps = [
":non_temporal_memcpy",
"@com_google_googletest//:gtest_main",
],
)
cc_binary(
name = "crc32c_benchmark",
testonly = 1,
srcs = ["crc32c_benchmark.cc"],
copts = ABSL_TEST_COPTS,
linkopts = ABSL_DEFAULT_LINKOPTS,
tags = [
"benchmark",
],
visibility = ["//visibility:private"],
deps = [
":crc32c",
"//absl/memory",
"@com_github_google_benchmark//:benchmark_main",
],
)
# Copyright 2022 The Abseil Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Internal-only target, do not depend on directly.
absl_cc_library(
NAME
crc_cpu_detect
HDRS
"internal/cpu_detect.h"
SRCS
"internal/cpu_detect.cc"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::base
absl::config
)
# Internal-only target, do not depend on directly.
absl_cc_library(
NAME
crc_internal
HDRS
"internal/crc.h"
"internal/crc32_x86_arm_combined_simd.h"
SRCS
"internal/crc.cc"
"internal/crc_internal.h"
"internal/crc_x86_arm_combined.cc"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::crc_cpu_detect
absl::base
absl::config
absl::core_headers
absl::dynamic_annotations
absl::endian
absl::prefetch
absl::raw_logging_internal
absl::memory
absl::bits
)
absl_cc_library(
NAME
crc32c
HDRS
"crc32c.h"
"internal/crc32c.h"
"internal/crc_memcpy.h"
SRCS
"crc32c.cc"
"internal/crc32c_inline.h"
"internal/crc_memcpy_fallback.cc"
"internal/crc_memcpy_x86_64.cc"
"internal/crc_non_temporal_memcpy.cc"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::crc_cpu_detect
absl::crc_internal
absl::non_temporal_memcpy
absl::config
absl::core_headers
absl::dynamic_annotations
absl::endian
absl::prefetch
absl::strings
)
absl_cc_test(
NAME
crc32c_test
SRCS
"crc32c_test.cc"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::crc32c
absl::strings
GTest::gtest_main
)
# Internal-only target, do not depend on directly.
absl_cc_library(
NAME
non_temporal_arm_intrinsics
HDRS
"internal/non_temporal_arm_intrinsics.h"
COPTS
${ABSL_DEFAULT_COPTS}
)
# Internal-only target, do not depend on directly.
absl_cc_library(
NAME
non_temporal_memcpy
HDRS
"internal/non_temporal_memcpy.h"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::non_temporal_arm_intrinsics
absl::config
absl::core_headers
)
absl_cc_test(
NAME
crc_memcpy_test
SRCS
"internal/crc_memcpy_test.cc"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::crc32c
absl::memory
absl::random_random
absl::random_distributions
absl::strings
GTest::gtest_main
)
absl_cc_test(
NAME
non_temporal_memcpy_test
SRCS
"internal/non_temporal_memcpy_test.cc"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::non_temporal_memcpy
GTest::gtest_main
)
// Copyright 2022 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/crc/crc32c.h"
#include <cstdint>
#include "absl/crc/internal/crc.h"
#include "absl/crc/internal/crc32c.h"
#include "absl/crc/internal/crc_memcpy.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace {
const crc_internal::CRC* CrcEngine() {
static const crc_internal::CRC* engine = crc_internal::CRC::Crc32c();
return engine;
}
constexpr uint32_t kCRC32Xor = 0xffffffffU;
} // namespace
namespace crc_internal {
crc32c_t UnextendCrc32cByZeroes(crc32c_t initial_crc, size_t length) {
uint32_t crc = static_cast<uint32_t>(initial_crc) ^ kCRC32Xor;
CrcEngine()->UnextendByZeroes(&crc, length);
return static_cast<crc32c_t>(crc ^ kCRC32Xor);
}
// Called by `absl::ExtendCrc32c()` on strings with size > 64 or when hardware
// CRC32C support is missing.
crc32c_t ExtendCrc32cInternal(crc32c_t initial_crc,
absl::string_view buf_to_add) {
uint32_t crc = static_cast<uint32_t>(initial_crc) ^ kCRC32Xor;
CrcEngine()->Extend(&crc, buf_to_add.data(), buf_to_add.size());
return static_cast<crc32c_t>(crc ^ kCRC32Xor);
}
} // namespace crc_internal
crc32c_t ComputeCrc32c(absl::string_view buf) {
return ExtendCrc32c(ToCrc32c(0), buf);
}
crc32c_t ExtendCrc32cByZeroes(crc32c_t initial_crc, size_t length) {
uint32_t crc = static_cast<uint32_t>(initial_crc) ^ kCRC32Xor;
CrcEngine()->ExtendByZeroes(&crc, length);
return static_cast<crc32c_t>(crc ^ kCRC32Xor);
}
crc32c_t ConcatCrc32c(crc32c_t lhs_crc, crc32c_t rhs_crc, size_t rhs_len) {
uint32_t result = static_cast<uint32_t>(lhs_crc);
CrcEngine()->ExtendByZeroes(&result, rhs_len);
return static_cast<crc32c_t>(result) ^ rhs_crc;
}
crc32c_t RemoveCrc32cPrefix(crc32c_t crc_a, crc32c_t crc_ab, size_t length_b) {
return ConcatCrc32c(crc_a, crc_ab, length_b);
}
crc32c_t MemcpyCrc32c(void* dest, const void* src, size_t count,
crc32c_t initial_crc) {
return static_cast<crc32c_t>(
crc_internal::Crc32CAndCopy(dest, src, count, initial_crc, false));
}
// Remove a Suffix of given size from a buffer
//
// Given a CRC32C of an existing buffer, `full_string_crc`; the CRC32C of a
// suffix of that buffer to remove, `suffix_crc`; and suffix buffer's length,
// `suffix_len` return the CRC32C of the buffer with suffix removed
//
// This operation has a runtime cost of O(log(`suffix_len`))
crc32c_t RemoveCrc32cSuffix(crc32c_t full_string_crc, crc32c_t suffix_crc,
size_t suffix_len) {
crc32c_t crc_with_suffix_zeroed =
suffix_crc ^ full_string_crc ^
ExtendCrc32cByZeroes(ToCrc32c(0), suffix_len);
return crc_internal::UnextendCrc32cByZeroes(
crc_with_suffix_zeroed, suffix_len);
}
ABSL_NAMESPACE_END
} // namespace absl
// Copyright 2022 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: crc32c.h
// -----------------------------------------------------------------------------
//
// This header file defines the API for computing CRC32C values as checksums
// for arbitrary sequences of bytes provided as a string buffer.
//
// The API includes the basic functions for computing such CRC32C values and
// some utility functions for performing more efficient mathematical
// computations using an existing checksum.
#ifndef ABSL_CRC_CRC32C_H_
#define ABSL_CRC_CRC32C_H_
#include <cstdint>
#include <iostream>
#include <ostream>
#include "absl/crc/internal/crc32c_inline.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
//-----------------------------------------------------------------------------
// crc32c_t
//-----------------------------------------------------------------------------
// `crc32c_t` defines a strongly typed integer type for holding a CRC32C value.
enum class crc32c_t : uint32_t {};
// ToCrc32c()
//
// Converts a uint32_t value to crc32c_t. This API is necessary in C++14
// and earlier. Code targeting C++17-or-later can instead use `crc32c_t{n}`.
inline crc32c_t ToCrc32c(uint32_t n) {
return static_cast<crc32c_t>(n);
}
// operator^
//
// Performs a bitwise XOR on two CRC32C values
inline crc32c_t operator^(crc32c_t lhs, crc32c_t rhs) {
const auto lhs_int = static_cast<uint32_t>(lhs);
const auto rhs_int = static_cast<uint32_t>(rhs);
return ToCrc32c(lhs_int ^ rhs_int);
}
namespace crc_internal {
// Non-inline code path for `absl::ExtendCrc32c()`. Do not call directly.
// Call `absl::ExtendCrc32c()` (defined below) instead.
crc32c_t ExtendCrc32cInternal(crc32c_t initial_crc,
absl::string_view buf_to_add);
} // namespace crc_internal
// -----------------------------------------------------------------------------
// CRC32C Computation Functions
// -----------------------------------------------------------------------------
// ComputeCrc32c()
//
// Returns the CRC32C value of the provided string.
crc32c_t ComputeCrc32c(absl::string_view buf);
// ExtendCrc32c()
//
// Computes a CRC32C value from an `initial_crc` CRC32C value including the
// `buf_to_add` bytes of an additional buffer. Using this function is more
// efficient than computing a CRC32C value for the combined buffer from
// scratch.
//
// Note: `ExtendCrc32c` with an initial_crc of 0 is equivalent to
// `ComputeCrc32c`.
//
// This operation has a runtime cost of O(`buf_to_add.size()`)
inline crc32c_t ExtendCrc32c(crc32c_t initial_crc,
absl::string_view buf_to_add) {
// Approximately 75% of calls have size <= 64.
if (buf_to_add.size() <= 64) {
uint32_t crc = static_cast<uint32_t>(initial_crc);
if (crc_internal::ExtendCrc32cInline(&crc, buf_to_add.data(),
buf_to_add.size())) {
return ToCrc32c(crc);
}
}
return crc_internal::ExtendCrc32cInternal(initial_crc, buf_to_add);
}
// ExtendCrc32cByZeroes()
//
// Computes a CRC32C value for a buffer with an `initial_crc` CRC32C value,
// where `length` bytes with a value of 0 are appended to the buffer. Using this
// function is more efficient than computing a CRC32C value for the combined
// buffer from scratch.
//
// This operation has a runtime cost of O(log(`length`))
crc32c_t ExtendCrc32cByZeroes(crc32c_t initial_crc, size_t length);
// MemcpyCrc32c()
//
// Copies `src` to `dest` using `memcpy()` semantics, returning the CRC32C
// value of the copied buffer.
//
// Using `MemcpyCrc32c()` is potentially faster than performing the `memcpy()`
// and `ComputeCrc32c()` operations separately.
crc32c_t MemcpyCrc32c(void* dest, const void* src, size_t count,
crc32c_t initial_crc = ToCrc32c(0));
// -----------------------------------------------------------------------------
// CRC32C Arithmetic Functions
// -----------------------------------------------------------------------------
// The following functions perform arithmetic on CRC32C values, which are
// generally more efficient than recalculating any given result's CRC32C value.
// ConcatCrc32c()
//
// Calculates the CRC32C value of two buffers with known CRC32C values
// concatenated together.
//
// Given a buffer with CRC32C value `crc1` and a buffer with
// CRC32C value `crc2` and length, `crc2_length`, returns the CRC32C value of
// the concatenation of these two buffers.
//
// This operation has a runtime cost of O(log(`crc2_length`)).
crc32c_t ConcatCrc32c(crc32c_t crc1, crc32c_t crc2, size_t crc2_length);
// RemoveCrc32cPrefix()
//
// Calculates the CRC32C value of an existing buffer with a series of bytes
// (the prefix) removed from the beginning of that buffer.
//
// Given the CRC32C value of an existing buffer, `full_string_crc`; The CRC32C
// value of a prefix of that buffer, `prefix_crc`; and the length of the buffer
// with the prefix removed, `remaining_string_length` , return the CRC32C
// value of the buffer with the prefix removed.
//
// This operation has a runtime cost of O(log(`remaining_string_length`)).
crc32c_t RemoveCrc32cPrefix(crc32c_t prefix_crc, crc32c_t full_string_crc,
size_t remaining_string_length);
// RemoveCrc32cSuffix()
//
// Calculates the CRC32C value of an existing buffer with a series of bytes
// (the suffix) removed from the end of that buffer.
//
// Given a CRC32C value of an existing buffer `full_string_crc`, the CRC32C
// value of the suffix to remove `suffix_crc`, and the length of that suffix
// `suffix_len`, returns the CRC32C value of the buffer with suffix removed.
//
// This operation has a runtime cost of O(log(`suffix_len`))
crc32c_t RemoveCrc32cSuffix(crc32c_t full_string_crc, crc32c_t suffix_crc,
size_t suffix_length);
// operator<<
//
// Streams the CRC32C value `crc` to the stream `os`.
inline std::ostream& operator<<(std::ostream& os, crc32c_t crc) {
return os << static_cast<uint32_t>(crc);
}
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_CRC_CRC32C_H_
// Copyright 2022 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include "absl/crc/crc32c.h"
#include "absl/crc/internal/crc32c.h"
#include "absl/memory/memory.h"
#include "benchmark/benchmark.h"
namespace {
std::string TestString(size_t len) {
std::string result;
result.reserve(len);
for (size_t i = 0; i < len; ++i) {
result.push_back(static_cast<char>(i % 256));
}
return result;
}
void BM_Calculate(benchmark::State& state) {
int len = state.range(0);
std::string data = TestString(len);
for (auto s : state) {
benchmark::DoNotOptimize(data);
absl::crc32c_t crc = absl::ComputeCrc32c(data);
benchmark::DoNotOptimize(crc);
}
}
BENCHMARK(BM_Calculate)->Arg(0)->Arg(1)->Arg(100)->Arg(10000)->Arg(500000);
void BM_Extend(benchmark::State& state) {
int len = state.range(0);
std::string extension = TestString(len);
absl::crc32c_t base = absl::ToCrc32c(0xC99465AA); // CRC32C of "Hello World"
for (auto s : state) {
benchmark::DoNotOptimize(base);
benchmark::DoNotOptimize(extension);
absl::crc32c_t crc = absl::ExtendCrc32c(base, extension);
benchmark::DoNotOptimize(crc);
}
}
BENCHMARK(BM_Extend)->Arg(0)->Arg(1)->Arg(100)->Arg(10000)->Arg(500000);
void BM_ExtendByZeroes(benchmark::State& state) {
absl::crc32c_t base = absl::ToCrc32c(0xC99465AA); // CRC32C of "Hello World"
int num_zeroes = state.range(0);
for (auto s : state) {
benchmark::DoNotOptimize(base);
absl::crc32c_t crc = absl::ExtendCrc32cByZeroes(base, num_zeroes);
benchmark::DoNotOptimize(crc);
}
}
BENCHMARK(BM_ExtendByZeroes)
->RangeMultiplier(10)
->Range(1, 1000000)
->RangeMultiplier(32)
->Range(1, 1 << 20);
void BM_UnextendByZeroes(benchmark::State& state) {
absl::crc32c_t base = absl::ToCrc32c(0xdeadbeef);
int num_zeroes = state.range(0);
for (auto s : state) {
benchmark::DoNotOptimize(base);
absl::crc32c_t crc =
absl::crc_internal::UnextendCrc32cByZeroes(base, num_zeroes);
benchmark::DoNotOptimize(crc);
}
}
BENCHMARK(BM_UnextendByZeroes)
->RangeMultiplier(10)
->Range(1, 1000000)
->RangeMultiplier(32)
->Range(1, 1 << 20);
void BM_Concat(benchmark::State& state) {
int string_b_len = state.range(0);
std::string string_b = TestString(string_b_len);
// CRC32C of "Hello World"
absl::crc32c_t crc_a = absl::ToCrc32c(0xC99465AA);
absl::crc32c_t crc_b = absl::ComputeCrc32c(string_b);
for (auto s : state) {
benchmark::DoNotOptimize(crc_a);
benchmark::DoNotOptimize(crc_b);
benchmark::DoNotOptimize(string_b_len);
absl::crc32c_t crc_ab = absl::ConcatCrc32c(crc_a, crc_b, string_b_len);
benchmark::DoNotOptimize(crc_ab);
}
}
BENCHMARK(BM_Concat)
->RangeMultiplier(10)
->Range(1, 1000000)
->RangeMultiplier(32)
->Range(1, 1 << 20);
void BM_Memcpy(benchmark::State& state) {
int string_len = state.range(0);
std::string source = TestString(string_len);
auto dest = absl::make_unique<char[]>(string_len);
for (auto s : state) {
benchmark::DoNotOptimize(source);
absl::crc32c_t crc =
absl::MemcpyCrc32c(dest.get(), source.data(), source.size());
benchmark::DoNotOptimize(crc);
benchmark::DoNotOptimize(dest);
benchmark::DoNotOptimize(dest.get());
benchmark::DoNotOptimize(dest[0]);
}
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
state.range(0));
}
BENCHMARK(BM_Memcpy)->Arg(0)->Arg(1)->Arg(100)->Arg(10000)->Arg(500000);
void BM_RemoveSuffix(benchmark::State& state) {
int full_string_len = state.range(0);
int suffix_len = state.range(1);
std::string full_string = TestString(full_string_len);
std::string suffix = full_string.substr(
full_string_len - suffix_len, full_string_len);
absl::crc32c_t full_string_crc = absl::ComputeCrc32c(full_string);
absl::crc32c_t suffix_crc = absl::ComputeCrc32c(suffix);
for (auto s : state) {
benchmark::DoNotOptimize(full_string_crc);
benchmark::DoNotOptimize(suffix_crc);
benchmark::DoNotOptimize(suffix_len);
absl::crc32c_t crc = absl::RemoveCrc32cSuffix(full_string_crc, suffix_crc,
suffix_len);
benchmark::DoNotOptimize(crc);
}
}
BENCHMARK(BM_RemoveSuffix)
->ArgPair(1, 1)
->ArgPair(100, 10)
->ArgPair(100, 100)
->ArgPair(10000, 1)
->ArgPair(10000, 100)
->ArgPair(10000, 10000)
->ArgPair(500000, 1)
->ArgPair(500000, 100)
->ArgPair(500000, 10000)
->ArgPair(500000, 500000);
} // namespace
// Copyright 2022 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/crc/crc32c.h"
#include <algorithm>
#include <cstdint>
#include <cstring>
#include <string>
#include "gtest/gtest.h"
#include "absl/crc/internal/crc32c.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
namespace {
TEST(CRC32C, RFC3720) {
// Test the results of the vectors from
// https://www.rfc-editor.org/rfc/rfc3720#appendix-B.4
char data[32];
// 32 bytes of ones.
memset(data, 0, sizeof(data));
EXPECT_EQ(absl::ComputeCrc32c(absl::string_view(data, sizeof(data))),
absl::ToCrc32c(0x8a9136aa));
// 32 bytes of ones.
memset(data, 0xff, sizeof(data));
EXPECT_EQ(absl::ComputeCrc32c(absl::string_view(data, sizeof(data))),
absl::ToCrc32c(0x62a8ab43));
// 32 incrementing bytes.
for (int i = 0; i < 32; ++i) data[i] = static_cast<char>(i);
EXPECT_EQ(absl::ComputeCrc32c(absl::string_view(data, sizeof(data))),
absl::ToCrc32c(0x46dd794e));
// 32 decrementing bytes.
for (int i = 0; i < 32; ++i) data[i] = static_cast<char>(31 - i);
EXPECT_EQ(absl::ComputeCrc32c(absl::string_view(data, sizeof(data))),
absl::ToCrc32c(0x113fdb5c));
// An iSCSI - SCSI Read (10) Command PDU.
constexpr uint8_t cmd[48] = {
0x01, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x18, 0x28, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};
EXPECT_EQ(absl::ComputeCrc32c(absl::string_view(
reinterpret_cast<const char*>(cmd), sizeof(cmd))),
absl::ToCrc32c(0xd9963a56));
}
std::string TestString(size_t len) {
std::string result;
result.reserve(len);
for (size_t i = 0; i < len; ++i) {
result.push_back(static_cast<char>(i % 256));
}
return result;
}
TEST(CRC32C, Compute) {
EXPECT_EQ(absl::ComputeCrc32c(""), absl::ToCrc32c(0));
EXPECT_EQ(absl::ComputeCrc32c("hello world"), absl::ToCrc32c(0xc99465aa));
}
TEST(CRC32C, Extend) {
uint32_t base = 0xC99465AA; // CRC32C of "Hello World"
std::string extension = "Extension String";
EXPECT_EQ(
absl::ExtendCrc32c(absl::ToCrc32c(base), extension),
absl::ToCrc32c(0xD2F65090)); // CRC32C of "Hello WorldExtension String"
}
TEST(CRC32C, ExtendByZeroes) {
std::string base = "hello world";
absl::crc32c_t base_crc = absl::ToCrc32c(0xc99465aa);
for (const size_t extend_by : {100, 10000, 100000}) {
SCOPED_TRACE(extend_by);
absl::crc32c_t crc2 = absl::ExtendCrc32cByZeroes(base_crc, extend_by);
EXPECT_EQ(crc2, absl::ComputeCrc32c(base + std::string(extend_by, '\0')));
}
}
TEST(CRC32C, UnextendByZeroes) {
for (auto seed_crc : {absl::ToCrc32c(0), absl::ToCrc32c(0xc99465aa)}) {
SCOPED_TRACE(seed_crc);
for (const size_t size_1 : {2, 200, 20000, 200000, 20000000}) {
for (const size_t size_2 : {0, 100, 10000, 100000, 10000000}) {
size_t extend_size = std::max(size_1, size_2);
size_t unextend_size = std::min(size_1, size_2);
SCOPED_TRACE(extend_size);
SCOPED_TRACE(unextend_size);
// Extending by A zeroes an unextending by B<A zeros should be identical
// to extending by A-B zeroes.
absl::crc32c_t crc1 = seed_crc;
crc1 = absl::ExtendCrc32cByZeroes(crc1, extend_size);
crc1 = absl::crc_internal::UnextendCrc32cByZeroes(crc1, unextend_size);
absl::crc32c_t crc2 = seed_crc;
crc2 = absl::ExtendCrc32cByZeroes(crc2, extend_size - unextend_size);
EXPECT_EQ(crc1, crc2);
}
}
}
for (const size_t size : {0, 1, 100, 10000}) {
SCOPED_TRACE(size);
std::string string_before = TestString(size);
std::string string_after = string_before + std::string(size, '\0');
absl::crc32c_t crc_before = absl::ComputeCrc32c(string_before);
absl::crc32c_t crc_after = absl::ComputeCrc32c(string_after);
EXPECT_EQ(crc_before,
absl::crc_internal::UnextendCrc32cByZeroes(crc_after, size));
}
}
TEST(CRC32C, Concat) {
std::string hello = "Hello, ";
std::string world = "world!";
std::string hello_world = absl::StrCat(hello, world);
absl::crc32c_t crc_a = absl::ComputeCrc32c(hello);
absl::crc32c_t crc_b = absl::ComputeCrc32c(world);
absl::crc32c_t crc_ab = absl::ComputeCrc32c(hello_world);
EXPECT_EQ(absl::ConcatCrc32c(crc_a, crc_b, world.size()), crc_ab);
}
TEST(CRC32C, Memcpy) {
for (size_t bytes : {0, 1, 20, 500, 100000}) {
SCOPED_TRACE(bytes);
std::string sample_string = TestString(bytes);
std::string target_buffer = std::string(bytes, '\0');
absl::crc32c_t memcpy_crc =
absl::MemcpyCrc32c(&(target_buffer[0]), sample_string.data(), bytes);
absl::crc32c_t compute_crc = absl::ComputeCrc32c(sample_string);
EXPECT_EQ(memcpy_crc, compute_crc);
EXPECT_EQ(sample_string, target_buffer);
}
}
TEST(CRC32C, RemovePrefix) {
std::string hello = "Hello, ";
std::string world = "world!";
std::string hello_world = absl::StrCat(hello, world);
absl::crc32c_t crc_a = absl::ComputeCrc32c(hello);
absl::crc32c_t crc_b = absl::ComputeCrc32c(world);
absl::crc32c_t crc_ab = absl::ComputeCrc32c(hello_world);
EXPECT_EQ(absl::RemoveCrc32cPrefix(crc_a, crc_ab, world.size()), crc_b);
}
TEST(CRC32C, RemoveSuffix) {
std::string hello = "Hello, ";
std::string world = "world!";
std::string hello_world = absl::StrCat(hello, world);
absl::crc32c_t crc_a = absl::ComputeCrc32c(hello);
absl::crc32c_t crc_b = absl::ComputeCrc32c(world);
absl::crc32c_t crc_ab = absl::ComputeCrc32c(hello_world);
EXPECT_EQ(absl::RemoveCrc32cSuffix(crc_ab, crc_b, world.size()), crc_a);
}
} // namespace
// Copyright 2022 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/crc/internal/cpu_detect.h"
#include <cstdint>
#include <string>
#include "absl/base/config.h"
#if defined(__aarch64__) && defined(__linux__)
#include <asm/hwcap.h>
#include <sys/auxv.h>
#endif
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace crc_internal {
#if defined(__x86_64__)
// Inline cpuid instruction. %rbx is occasionally used to address stack
// variables in presence of dynamic allocas. Preserve the %rbx register via
// %rdi to work around a clang bug https://bugs.llvm.org/show_bug.cgi?id=17907
// (%rbx in an output constraint is not considered a clobbered register).
//
// a_inp and c_inp are the input parameters eax and ecx of the CPUID
// instruction.
// a, b, c, and d contain the contents of eax, ebx, ecx, and edx as returned by
// the CPUID instruction
#define ABSL_INTERNAL_GETCPUID(a, b, c, d, a_inp, c_inp) \
asm("mov %%rbx, %%rdi\n" \
"cpuid\n" \
"xchg %%rdi, %%rbx\n" \
: "=a"(a), "=D"(b), "=c"(c), "=d"(d) \
: "a"(a_inp), "2"(c_inp))
namespace {
enum class Vendor {
kUnknown,
kIntel,
kAmd,
};
Vendor GetVendor() {
uint32_t eax, ebx, ecx, edx;
// Get vendor string (issue CPUID with eax = 0)
ABSL_INTERNAL_GETCPUID(eax, ebx, ecx, edx, 0, 0);
std::string vendor;
vendor.append(reinterpret_cast<char*>(&ebx), 4);
vendor.append(reinterpret_cast<char*>(&edx), 4);
vendor.append(reinterpret_cast<char*>(&ecx), 4);
if (vendor == "GenuineIntel") {
return Vendor::kIntel;
} else if (vendor == "AuthenticAmd") {
return Vendor::kAmd;
} else {
return Vendor::kUnknown;
}
}
CpuType GetIntelCpuType() {
uint32_t eax, ebx, ecx, edx;
// to get general information and extended features we send eax = 1 and
// ecx = 0 to cpuid. The response is returned in eax, ebx, ecx and edx.
// (See Intel 64 and IA-32 Architectures Software Developer's Manual
// Volume 2A: Instruction Set Reference, A-M CPUID).
// https://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-vol-2a-manual.html
ABSL_INTERNAL_GETCPUID(eax, ebx, ecx, edx, 1, 0);
// Response in eax bits as follows:
// 0-3 (stepping id)
// 4-7 (model number),
// 8-11 (family code),
// 12-13 (processor type),
// 16-19 (extended model)
// 20-27 (extended family)
int family = (eax >> 8) & 0x0f;
int model_num = (eax >> 4) & 0x0f;
int ext_family = (eax >> 20) & 0xff;
int ext_model_num = (eax >> 16) & 0x0f;
int brand_id = ebx & 0xff;
// Process the extended family and model info if necessary
if (family == 0x0f) {
family += ext_family;
}
if (family == 0x0f || family == 0x6) {
model_num += (ext_model_num << 4);
}
switch (brand_id) {
case 0: // no brand ID, so parse CPU family/model
switch (family) {
case 6: // Most PentiumIII processors are in this category
switch (model_num) {
case 0x2c: // Westmere: Gulftown
return CpuType::kIntelWestmere;
case 0x2d: // Sandybridge
return CpuType::kIntelSandybridge;
case 0x3e: // Ivybridge
return CpuType::kIntelIvybridge;
case 0x3c: // Haswell (client)
case 0x3f: // Haswell
return CpuType::kIntelHaswell;
case 0x4f: // Broadwell
case 0x56: // BroadwellDE
return CpuType::kIntelBroadwell;
case 0x55: // Skylake Xeon
if ((eax & 0x0f) < 5) { // stepping < 5 is skylake
return CpuType::kIntelSkylakeXeon;
} else { // stepping >= 5 is cascadelake
return CpuType::kIntelCascadelakeXeon;
}
case 0x5e: // Skylake (client)
return CpuType::kIntelSkylake;
default:
return CpuType::kUnknown;
}
default:
return CpuType::kUnknown;
}
default:
return CpuType::kUnknown;
}
}
CpuType GetAmdCpuType() {
uint32_t eax, ebx, ecx, edx;
// to get general information and extended features we send eax = 1 and
// ecx = 0 to cpuid. The response is returned in eax, ebx, ecx and edx.
// (See Intel 64 and IA-32 Architectures Software Developer's Manual
// Volume 2A: Instruction Set Reference, A-M CPUID).
ABSL_INTERNAL_GETCPUID(eax, ebx, ecx, edx, 1, 0);
// Response in eax bits as follows:
// 0-3 (stepping id)
// 4-7 (model number),
// 8-11 (family code),
// 12-13 (processor type),
// 16-19 (extended model)
// 20-27 (extended family)
int family = (eax >> 8) & 0x0f;
int model_num = (eax >> 4) & 0x0f;
int ext_family = (eax >> 20) & 0xff;
int ext_model_num = (eax >> 16) & 0x0f;
if (family == 0x0f) {
family += ext_family;
model_num += (ext_model_num << 4);
}
switch (family) {
case 0x17:
switch (model_num) {
case 0x0: // Stepping Ax
case 0x1: // Stepping Bx
return CpuType::kAmdNaples;
case 0x30: // Stepping Ax
case 0x31: // Stepping Bx
return CpuType::kAmdRome;
default:
return CpuType::kUnknown;
}
break;
case 0x19:
switch (model_num) {
case 0x1: // Stepping B0
return CpuType::kAmdMilan;
default:
return CpuType::kUnknown;
}
break;
default:
return CpuType::kUnknown;
}
}
} // namespace
CpuType GetCpuType() {
switch (GetVendor()) {
case Vendor::kIntel:
return GetIntelCpuType();
case Vendor::kAmd:
return GetAmdCpuType();
default:
return CpuType::kUnknown;
}
}
#elif defined(__aarch64__) && defined(__linux__)
#define ABSL_INTERNAL_AARCH64_ID_REG_READ(id, val) \
asm("mrs %0, " #id : "=r"(val))
CpuType GetCpuType() {
// MIDR_EL1 is not visible to EL0, however the access will be emulated by
// linux if AT_HWCAP has HWCAP_CPUID set.
//
// This method will be unreliable on heterogeneous computing systems (ex:
// big.LITTLE) since the value of MIDR_EL1 will change based on the calling
// thread.
uint64_t hwcaps = getauxval(AT_HWCAP);
if (hwcaps & HWCAP_CPUID) {
uint64_t midr = 0;
ABSL_INTERNAL_AARCH64_ID_REG_READ(MIDR_EL1, midr);
uint32_t implementer = (midr >> 24) & 0xff;
uint32_t part_number = (midr >> 4) & 0xfff;
if (implementer == 0x41 && part_number == 0xd0c) {
return CpuType::kArmNeoverseN1;
}
}
return CpuType::kUnknown;
}
bool SupportsArmCRC32PMULL() {
uint64_t hwcaps = getauxval(AT_HWCAP);
return (hwcaps & HWCAP_CRC32) && (hwcaps & HWCAP_PMULL);
}
#else
CpuType GetCpuType() { return CpuType::kUnknown; }
#endif
} // namespace crc_internal
ABSL_NAMESPACE_END
} // namespace absl
// Copyright 2022 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_CRC_INTERNAL_CPU_DETECT_H_
#define ABSL_CRC_INTERNAL_CPU_DETECT_H_
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace crc_internal {
// Enumeration of architectures that we have special-case tuning parameters for.
// This set may change over time.
enum class CpuType {
kUnknown,
kIntelHaswell,
kAmdRome,
kAmdNaples,
kAmdMilan,
kIntelCascadelakeXeon,
kIntelSkylakeXeon,
kIntelBroadwell,
kIntelSkylake,
kIntelIvybridge,
kIntelSandybridge,
kIntelWestmere,
kArmNeoverseN1,
};
// Returns the type of host CPU this code is running on. Returns kUnknown if
// the host CPU is of unknown type, or if detection otherwise fails.
CpuType GetCpuType();
#if defined(__aarch64__)
// Returns whether the host CPU supports the CPU features needed for our
// accelerated implementations. The CpuTypes enumerated above apart from
// kUnknown support the required features. On unknown CPUs, we can use
// this to see if it's safe to use hardware acceleration, though without any
// tuning.
bool SupportsArmCRC32PMULL();
#endif
} // namespace crc_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_CRC_INTERNAL_CPU_DETECT_H_
// Copyright 2022 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_CRC_INTERNAL_CRC_H_
#define ABSL_CRC_INTERNAL_CRC_H_
#include <cstdint>
#include "absl/base/config.h"
// This class implements CRCs (aka Rabin Fingerprints).
// Treats the input as a polynomial with coefficients in Z(2),
// and finds the remainder when divided by an primitive polynomial
// of the appropriate length.
// A polynomial is represented by the bit pattern formed by its coefficients,
// but with the highest order bit not stored.
// The highest degree coefficient is stored in the lowest numbered bit
// in the lowest addressed byte. Thus, in what follows, the highest degree
// coefficient that is stored is in the low order bit of "lo" or "*lo".
// Hardware acceleration is used when available.
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace crc_internal {
class CRC {
public:
virtual ~CRC();
// Place the CRC of the empty string in "*crc"
virtual void Empty(uint32_t* crc) const = 0;
// If "*crc" is the CRC of bytestring A, place the CRC of
// the bytestring formed from the concatenation of A and the "length"
// bytes at "bytes" into "*crc".
virtual void Extend(uint32_t* crc, const void* bytes,
size_t length) const = 0;
// Equivalent to Extend(crc, bytes, length) where "bytes"
// points to an array of "length" zero bytes.
virtual void ExtendByZeroes(uint32_t* crc, size_t length) const = 0;
// Inverse opration of ExtendByZeroes. If `crc` is the CRC value of a string
// ending in `length` zero bytes, this returns a CRC value of that string
// with those zero bytes removed.
virtual void UnextendByZeroes(uint32_t* crc, size_t length) const = 0;
// If *px is the CRC (as defined by *crc) of some string X,
// and y is the CRC of some string Y that is ylen bytes long, set
// *px to the CRC of the concatenation of X followed by Y.
virtual void Concat(uint32_t* px, uint32_t y, size_t ylen);
// Apply a non-linear transformation to "*crc" so that
// it is safe to CRC the result with the same polynomial without
// any reduction of error-detection ability in the outer CRC.
// Unscramble() performs the inverse transformation.
// It is strongly recommended that CRCs be scrambled before storage or
// transmission, and unscrambled at the other end before futher manipulation.
virtual void Scramble(uint32_t* crc) const = 0;
virtual void Unscramble(uint32_t* crc) const = 0;
// Crc32c() returns the singleton implementation of CRC for the CRC32C
// polynomial. Returns a handle that MUST NOT be destroyed with delete.
static CRC* Crc32c();
protected:
CRC(); // Clients may not call constructor; use Crc32c() instead.
private:
CRC(const CRC&) = delete;
CRC& operator=(const CRC&) = delete;
};
} // namespace crc_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_CRC_INTERNAL_CRC_H_
// Copyright 2022 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_CRC_INTERNAL_CRC32_X86_ARM_COMBINED_SIMD_H_
#define ABSL_CRC_INTERNAL_CRC32_X86_ARM_COMBINED_SIMD_H_
#include <cstdint>
#include "absl/base/config.h"
// -------------------------------------------------------------------------
// Many x86 and ARM machines have CRC acceleration hardware.
// We can do a faster version of Extend() on such machines.
// We define a translation layer for both x86 and ARM for the ease of use and
// most performance gains.
// We need CRC (part of sse4.2) and PCLMULQDQ instructions.
#if defined(__SSE4_2__) && defined(__PCLMUL__)
#include <x86intrin.h>
#define ABSL_CRC_INTERNAL_HAVE_X86_SIMD
#elif defined(__aarch64__) && defined(__LITTLE_ENDIAN__) && \
defined(__ARM_FEATURE_CRC32) && defined(__ARM_NEON)
#include <arm_acle.h>
#include <arm_neon.h>
#define ABSL_CRC_INTERNAL_HAVE_ARM_SIMD
#endif
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace crc_internal {
#if defined(ABSL_CRC_INTERNAL_HAVE_ARM_SIMD) || \
defined(ABSL_CRC_INTERNAL_HAVE_X86_SIMD)
#if defined(ABSL_CRC_INTERNAL_HAVE_ARM_SIMD)
using V128 = uint64x2_t;
#else
using V128 = __m128i;
#endif
// Starting with the initial value in |crc|, accumulates a CRC32 value for
// unsigned integers of different sizes.
uint32_t CRC32_u8(uint32_t crc, uint8_t v);
uint32_t CRC32_u16(uint32_t crc, uint16_t v);
uint32_t CRC32_u32(uint32_t crc, uint32_t v);
uint32_t CRC32_u64(uint32_t crc, uint64_t v);
// Loads 128 bits of integer data. |src| must be 16-byte aligned.
V128 V128_Load(const V128* src);
// Load 128 bits of integer data. |src| does not need to be aligned.
V128 V128_LoadU(const V128* src);
// Polynomially multiplies the high 64 bits of |l| and |r|.
V128 V128_PMulHi(const V128 l, const V128 r);
// Polynomially multiplies the low 64 bits of |l| and |r|.
V128 V128_PMulLow(const V128 l, const V128 r);
// Polynomially multiplies the low 64 bits of |r| and high 64 bits of |l|.
V128 V128_PMul01(const V128 l, const V128 r);
// Polynomially multiplies the low 64 bits of |l| and high 64 bits of |r|.
V128 V128_PMul10(const V128 l, const V128 r);
// Produces a XOR operation of |l| and |r|.
V128 V128_Xor(const V128 l, const V128 r);
// Produces an AND operation of |l| and |r|.
V128 V128_And(const V128 l, const V128 r);
// Sets two 64 bit integers to one 128 bit vector. The order is reverse.
// dst[63:0] := |r|
// dst[127:64] := |l|
V128 V128_From2x64(const uint64_t l, const uint64_t r);
// Shift |l| right by |imm| bytes while shifting in zeros.
template <int imm>
V128 V128_ShiftRight(const V128 l);
// Extracts a 32-bit integer from |l|, selected with |imm|.
template <int imm>
int V128_Extract32(const V128 l);
// Extracts the low 64 bits from V128.
int64_t V128_Low64(const V128 l);
// Left-shifts packed 64-bit integers in l by r.
V128 V128_ShiftLeft64(const V128 l, const V128 r);
#endif
#if defined(ABSL_CRC_INTERNAL_HAVE_X86_SIMD)
inline uint32_t CRC32_u8(uint32_t crc, uint8_t v) {
return _mm_crc32_u8(crc, v);
}
inline uint32_t CRC32_u16(uint32_t crc, uint16_t v) {
return _mm_crc32_u16(crc, v);
}
inline uint32_t CRC32_u32(uint32_t crc, uint32_t v) {
return _mm_crc32_u32(crc, v);
}
inline uint32_t CRC32_u64(uint32_t crc, uint64_t v) {
return _mm_crc32_u64(crc, v);
}
inline V128 V128_Load(const V128* src) { return _mm_load_si128(src); }
inline V128 V128_LoadU(const V128* src) { return _mm_loadu_si128(src); }
inline V128 V128_PMulHi(const V128 l, const V128 r) {
return _mm_clmulepi64_si128(l, r, 0x11);
}
inline V128 V128_PMulLow(const V128 l, const V128 r) {
return _mm_clmulepi64_si128(l, r, 0x00);
}
inline V128 V128_PMul01(const V128 l, const V128 r) {
return _mm_clmulepi64_si128(l, r, 0x01);
}
inline V128 V128_PMul10(const V128 l, const V128 r) {
return _mm_clmulepi64_si128(l, r, 0x10);
}
inline V128 V128_Xor(const V128 l, const V128 r) { return _mm_xor_si128(l, r); }
inline V128 V128_And(const V128 l, const V128 r) { return _mm_and_si128(l, r); }
inline V128 V128_From2x64(const uint64_t l, const uint64_t r) {
return _mm_set_epi64x(l, r);
}
template <int imm>
inline V128 V128_ShiftRight(const V128 l) {
return _mm_srli_si128(l, imm);
}
template <int imm>
inline int V128_Extract32(const V128 l) {
return _mm_extract_epi32(l, imm);
}
inline int64_t V128_Low64(const V128 l) { return _mm_cvtsi128_si64(l); }
inline V128 V128_ShiftLeft64(const V128 l, const V128 r) {
return _mm_sll_epi64(l, r);
}
#elif defined(ABSL_CRC_INTERNAL_HAVE_ARM_SIMD)
inline uint32_t CRC32_u8(uint32_t crc, uint8_t v) { return __crc32cb(crc, v); }
inline uint32_t CRC32_u16(uint32_t crc, uint16_t v) {
return __crc32ch(crc, v);
}
inline uint32_t CRC32_u32(uint32_t crc, uint32_t v) {
return __crc32cw(crc, v);
}
inline uint32_t CRC32_u64(uint32_t crc, uint64_t v) {
return __crc32cd(crc, v);
}
inline V128 V128_Load(const V128* src) {
return vld1q_u64(reinterpret_cast<const uint64_t*>(src));
}
inline V128 V128_LoadU(const V128* src) {
return vld1q_u64(reinterpret_cast<const uint64_t*>(src));
}
// Using inline assembly as clang does not generate the pmull2 instruction and
// performance drops by 15-20%.
// TODO(b/193678732): Investigate why the compiler decides not to generate
// such instructions and why it becomes so much worse.
inline V128 V128_PMulHi(const V128 l, const V128 r) {
uint64x2_t res;
__asm__ __volatile__("pmull2 %0.1q, %1.2d, %2.2d \n\t"
: "=w"(res)
: "w"(l), "w"(r));
return res;
}
inline V128 V128_PMulLow(const V128 l, const V128 r) {
return reinterpret_cast<V128>(vmull_p64(
reinterpret_cast<poly64_t>(vget_low_p64(vreinterpretq_p64_u64(l))),
reinterpret_cast<poly64_t>(vget_low_p64(vreinterpretq_p64_u64(r)))));
}
inline V128 V128_PMul01(const V128 l, const V128 r) {
return reinterpret_cast<V128>(vmull_p64(
reinterpret_cast<poly64_t>(vget_high_p64(vreinterpretq_p64_u64(l))),
reinterpret_cast<poly64_t>(vget_low_p64(vreinterpretq_p64_u64(r)))));
}
inline V128 V128_PMul10(const V128 l, const V128 r) {
return reinterpret_cast<V128>(vmull_p64(
reinterpret_cast<poly64_t>(vget_low_p64(vreinterpretq_p64_u64(l))),
reinterpret_cast<poly64_t>(vget_high_p64(vreinterpretq_p64_u64(r)))));
}
inline V128 V128_Xor(const V128 l, const V128 r) { return veorq_u64(l, r); }
inline V128 V128_And(const V128 l, const V128 r) { return vandq_u64(l, r); }
inline V128 V128_From2x64(const uint64_t l, const uint64_t r) {
return vcombine_u64(vcreate_u64(r), vcreate_u64(l));
}
template <int imm>
inline V128 V128_ShiftRight(const V128 l) {
return vreinterpretq_u64_s8(
vextq_s8(vreinterpretq_s8_u64(l), vdupq_n_s8(0), imm));
}
template <int imm>
inline int V128_Extract32(const V128 l) {
return vgetq_lane_s32(vreinterpretq_s32_u64(l), imm);
}
inline int64_t V128_Low64(const V128 l) {
return vgetq_lane_s64(vreinterpretq_s64_u64(l), 0);
}
inline V128 V128_ShiftLeft64(const V128 l, const V128 r) {
return vshlq_u64(l, r);
}
#endif
} // namespace crc_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_CRC_INTERNAL_CRC32_X86_ARM_COMBINED_SIMD_H_
// Copyright 2022 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_CRC_INTERNAL_CRC32C_H_
#define ABSL_CRC_INTERNAL_CRC32C_H_
#include "absl/base/config.h"
#include "absl/crc/crc32c.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace crc_internal {
// Modifies a CRC32 value by removing `length` bytes with a value of 0 from
// the end of the string.
//
// This is the inverse operation of ExtendCrc32cByZeroes().
//
// This operation has a runtime cost of O(log(`length`))
//
// Internal implementation detail, exposed for testing only.
crc32c_t UnextendCrc32cByZeroes(crc32c_t initial_crc, size_t length);
} // namespace crc_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_CRC_INTERNAL_CRC32C_H_
// Copyright 2022 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_CRC_INTERNAL_CRC32C_INLINE_H_
#define ABSL_CRC_INTERNAL_CRC32C_INLINE_H_
#include <cstdint>
#include "absl/base/config.h"
#include "absl/base/internal/endian.h"
#include "absl/crc/internal/crc32_x86_arm_combined_simd.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace crc_internal {
// CRC32C implementation optimized for small inputs.
// Either computes crc and return true, or if there is
// no hardware support does nothing and returns false.
inline bool ExtendCrc32cInline(uint32_t* crc, const char* p, size_t n) {
#if defined(ABSL_CRC_INTERNAL_HAVE_ARM_SIMD) || \
defined(ABSL_CRC_INTERNAL_HAVE_X86_SIMD)
constexpr uint32_t kCrc32Xor = 0xffffffffU;
*crc ^= kCrc32Xor;
if (n & 1) {
*crc = CRC32_u8(*crc, *p);
n--;
p++;
}
if (n & 2) {
*crc = CRC32_u16(*crc, absl::little_endian::Load16(p));
n -= 2;
p += 2;
}
if (n & 4) {
*crc = CRC32_u32(*crc, absl::little_endian::Load32(p));
n -= 4;
p += 4;
}
while (n) {
*crc = CRC32_u64(*crc, absl::little_endian::Load64(p));
n -= 8;
p += 8;
}
*crc ^= kCrc32Xor;
return true;
#else
// No hardware support, signal the need to fallback.
static_cast<void>(crc);
static_cast<void>(p);
static_cast<void>(n);
return false;
#endif // defined(ABSL_CRC_INTERNAL_HAVE_ARM_SIMD) ||
// defined(ABSL_CRC_INTERNAL_HAVE_X86_SIMD)
}
} // namespace crc_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_CRC_INTERNAL_CRC32C_INLINE_H_
// Copyright 2022 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_CRC_INTERNAL_CRC_INTERNAL_H_
#define ABSL_CRC_INTERNAL_CRC_INTERNAL_H_
#include <cstdint>
#include <memory>
#include <vector>
#include "absl/base/internal/raw_logging.h"
#include "absl/crc/internal/crc.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace crc_internal {
// Prefetch constants used in some Extend() implementations
constexpr int kPrefetchHorizon = ABSL_CACHELINE_SIZE * 4; // Prefetch this far
static_assert(kPrefetchHorizon >= 64, "CRCPrefetchHorizon less than loop len");
// We require the Scramble() function:
// - to be reversible (Unscramble() must exist)
// - to be non-linear in the polynomial's Galois field (so the CRC of a
// scrambled CRC is not linearly affected by the scrambled CRC, even if
// using the same polynomial)
// - not to be its own inverse. Preferably, if X=Scramble^N(X) and N!=0, then
// N is large.
// - to be fast.
// - not to change once defined.
// We introduce non-linearity in two ways:
// Addition of a constant.
// - The carries introduce non-linearity; we use bits of an irrational
// (phi) to make it unlikely that we introduce no carries.
// Rotate by a constant number of bits.
// - We use floor(degree/2)+1, which does not divide the degree, and
// splits the bits nearly evenly, which makes it less likely the
// halves will be the same or one will be all zeroes.
// We do both things to improve the chances of non-linearity in the face of
// bit patterns with low numbers of bits set, while still being fast.
// Below is the constant that we add. The bits are the first 128 bits of the
// fractional part of phi, with a 1 ored into the bottom bit to maximize the
// cycle length of repeated adds.
constexpr uint64_t kScrambleHi = (static_cast<uint64_t>(0x4f1bbcdcU) << 32) |
static_cast<uint64_t>(0xbfa53e0aU);
constexpr uint64_t kScrambleLo = (static_cast<uint64_t>(0xf9ce6030U) << 32) |
static_cast<uint64_t>(0x2e76e41bU);
class CRCImpl : public CRC { // Implemention of the abstract class CRC
public:
using Uint32By256 = uint32_t[256];
CRCImpl() {}
~CRCImpl() override = default;
// The internal version of CRC::New().
static CRCImpl* NewInternal();
void Empty(uint32_t* crc) const override;
// Fill in a table for updating a CRC by one word of 'word_size' bytes
// [last_lo, last_hi] contains the answer if the last bit in the word
// is set.
static void FillWordTable(uint32_t poly, uint32_t last, int word_size,
Uint32By256* t);
// Build the table for extending by zeroes, returning the number of entries.
// For a in {1, 2, ..., ZEROES_BASE-1}, b in {0, 1, 2, 3, ...},
// entry j=a-1+(ZEROES_BASE-1)*b
// contains a polynomial Pi such that multiplying
// a CRC by Pi mod P, where P is the CRC polynomial, is equivalent to
// appending a*2**(ZEROES_BASE_LG*b) zero bytes to the original string.
static int FillZeroesTable(uint32_t poly, Uint32By256* t);
virtual void InitTables() = 0;
private:
CRCImpl(const CRCImpl&) = delete;
CRCImpl& operator=(const CRCImpl&) = delete;
};
// This is the 32-bit implementation. It handles all sizes from 8 to 32.
class CRC32 : public CRCImpl {
public:
CRC32() {}
~CRC32() override {}
void Extend(uint32_t* crc, const void* bytes, size_t length) const override;
void ExtendByZeroes(uint32_t* crc, size_t length) const override;
void Scramble(uint32_t* crc) const override;
void Unscramble(uint32_t* crc) const override;
void UnextendByZeroes(uint32_t* crc, size_t length) const override;
void InitTables() override;
private:
// Common implementation guts for ExtendByZeroes and UnextendByZeroes().
//
// zeroes_table is a table as returned by FillZeroesTable(), containing
// polynomials representing CRCs of strings-of-zeros of various lenghts,
// and which can be combined by polynomial multiplication. poly_table is
// a table of CRC byte extension values. These tables are determined by
// the generator polynomial.
//
// These will be set to reverse_zeroes_ and reverse_table0_ for Unextend, and
// CRC32::zeroes_ and CRC32::table0_ for Extend.
void ExtendByZeroesImpl(uint32_t* crc, size_t length,
const uint32_t zeroes_table[256],
const uint32_t poly_table[256]) const;
uint32_t table0_[256]; // table of byte extensions
uint32_t zeroes_[256]; // table of zero extensions
// table of 4-byte extensions shifted by 12 bytes of zeroes
uint32_t table_[4][256];
// Reverse lookup tables, using the alternate polynomial used by
// UnextendByZeroes().
uint32_t reverse_table0_[256]; // table of reverse byte extensions
uint32_t reverse_zeroes_[256]; // table of reverse zero extensions
CRC32(const CRC32&) = delete;
CRC32& operator=(const CRC32&) = delete;
};
// Helpers
// Return a bit mask containing len 1-bits.
// Requires 0 < len <= sizeof(T)
template <typename T>
T MaskOfLength(int len) {
// shift 2 by len-1 rather than 1 by len because shifts of wordsize
// are undefined.
return (T(2) << (len - 1)) - 1;
}
// Rotate low-order "width" bits of "in" right by "r" bits,
// setting other bits in word to arbitrary values.
template <typename T>
T RotateRight(T in, int width, int r) {
return (in << (width - r)) | ((in >> r) & MaskOfLength<T>(width - r));
}
// RoundUp<N>(p) returns the lowest address >= p aligned to an N-byte
// boundary. Requires that N is a power of 2.
template <int alignment>
const uint8_t* RoundUp(const uint8_t* p) {
static_assert((alignment & (alignment - 1)) == 0, "alignment is not 2^n");
constexpr uintptr_t mask = alignment - 1;
const uintptr_t as_uintptr = reinterpret_cast<uintptr_t>(p);
return reinterpret_cast<const uint8_t*>((as_uintptr + mask) & ~mask);
}
// Return a newly created CRC32AcceleratedX86ARMCombined if we can use Intel's
// or ARM's CRC acceleration for a given polynomial. Return nullptr otherwise.
CRCImpl* TryNewCRC32AcceleratedX86ARMCombined();
// Return all possible hardware accelerated implementations. For testing only.
std::vector<std::unique_ptr<CRCImpl>> NewCRC32AcceleratedX86ARMCombinedAll();
} // namespace crc_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_CRC_INTERNAL_CRC_INTERNAL_H_
// Copyright 2022 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_CRC_INTERNAL_CRC_MEMCPY_H_
#define ABSL_CRC_INTERNAL_CRC_MEMCPY_H_
#include <cstddef>
#include <memory>
#include "absl/base/config.h"
#include "absl/crc/crc32c.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace crc_internal {
class CrcMemcpyEngine {
public:
virtual ~CrcMemcpyEngine() = default;
virtual crc32c_t Compute(void* __restrict dst, const void* __restrict src,
std::size_t length, crc32c_t initial_crc) const = 0;
protected:
CrcMemcpyEngine() = default;
};
class CrcMemcpy {
public:
static crc32c_t CrcAndCopy(void* __restrict dst, const void* __restrict src,
std::size_t length,
crc32c_t initial_crc = ToCrc32c(0),
bool non_temporal = false) {
static const ArchSpecificEngines engines = GetArchSpecificEngines();
auto* engine = non_temporal ? engines.non_temporal : engines.temporal;
return engine->Compute(dst, src, length, initial_crc);
}
// For testing only: get an architecture-specific engine for tests.
static std::unique_ptr<CrcMemcpyEngine> GetTestEngine(int vector,
int integer);
private:
struct ArchSpecificEngines {
CrcMemcpyEngine* temporal;
CrcMemcpyEngine* non_temporal;
};
static ArchSpecificEngines GetArchSpecificEngines();
};
// Fallback CRC-memcpy engine.
class FallbackCrcMemcpyEngine : public CrcMemcpyEngine {
public:
FallbackCrcMemcpyEngine() = default;
FallbackCrcMemcpyEngine(const FallbackCrcMemcpyEngine&) = delete;
FallbackCrcMemcpyEngine operator=(const FallbackCrcMemcpyEngine&) = delete;
crc32c_t Compute(void* __restrict dst, const void* __restrict src,
std::size_t length, crc32c_t initial_crc) const override;
};
// CRC Non-Temporal-Memcpy engine.
class CrcNonTemporalMemcpyEngine : public CrcMemcpyEngine {
public:
CrcNonTemporalMemcpyEngine() = default;
CrcNonTemporalMemcpyEngine(const CrcNonTemporalMemcpyEngine&) = delete;
CrcNonTemporalMemcpyEngine operator=(const CrcNonTemporalMemcpyEngine&) =
delete;
crc32c_t Compute(void* __restrict dst, const void* __restrict src,
std::size_t length, crc32c_t initial_crc) const override;
};
// CRC Non-Temporal-Memcpy AVX engine.
class CrcNonTemporalMemcpyAVXEngine : public CrcMemcpyEngine {
public:
CrcNonTemporalMemcpyAVXEngine() = default;
CrcNonTemporalMemcpyAVXEngine(const CrcNonTemporalMemcpyAVXEngine&) = delete;
CrcNonTemporalMemcpyAVXEngine operator=(
const CrcNonTemporalMemcpyAVXEngine&) = delete;
crc32c_t Compute(void* __restrict dst, const void* __restrict src,
std::size_t length, crc32c_t initial_crc) const override;
};
// Copy source to destination and return the CRC32C of the data copied. If an
// accelerated version is available, use the accelerated version, otherwise use
// the generic fallback version.
inline crc32c_t Crc32CAndCopy(void* __restrict dst, const void* __restrict src,
std::size_t length,
crc32c_t initial_crc = ToCrc32c(0),
bool non_temporal = false) {
return CrcMemcpy::CrcAndCopy(dst, src, length, initial_crc, non_temporal);
}
} // namespace crc_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_CRC_INTERNAL_CRC_MEMCPY_H_
// Copyright 2022 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cstdint>
#include <memory>
#include "absl/base/config.h"
#include "absl/crc/crc32c.h"
#include "absl/crc/internal/crc_memcpy.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace crc_internal {
absl::crc32c_t FallbackCrcMemcpyEngine::Compute(void* __restrict dst,
const void* __restrict src,
std::size_t length,
crc32c_t initial_crc) const {
constexpr size_t kBlockSize = 8192;
absl::crc32c_t crc = initial_crc;
const char* src_bytes = reinterpret_cast<const char*>(src);
char* dst_bytes = reinterpret_cast<char*>(dst);
// Copy + CRC loop - run 8k chunks until we are out of full chunks. CRC
// then copy was found to be slightly more efficient in our test cases.
std::size_t offset = 0;
for (; offset + kBlockSize < length; offset += kBlockSize) {
crc = absl::ExtendCrc32c(crc,
absl::string_view(src_bytes + offset, kBlockSize));
memcpy(dst_bytes + offset, src_bytes + offset, kBlockSize);
}
// Save some work if length is 0.
if (offset < length) {
std::size_t final_copy_size = length - offset;
crc = absl::ExtendCrc32c(
crc, absl::string_view(src_bytes + offset, final_copy_size));
memcpy(dst_bytes + offset, src_bytes + offset, final_copy_size);
}
return crc;
}
// Compile the following only if we don't have
#ifndef __SSE4_2__
CrcMemcpy::ArchSpecificEngines CrcMemcpy::GetArchSpecificEngines() {
CrcMemcpy::ArchSpecificEngines engines;
engines.temporal = new FallbackCrcMemcpyEngine();
engines.non_temporal = new FallbackCrcMemcpyEngine();
return engines;
}
std::unique_ptr<CrcMemcpyEngine> CrcMemcpy::GetTestEngine(int /*vector*/,
int /*integer*/) {
return std::make_unique<FallbackCrcMemcpyEngine>();
}
#endif
} // namespace crc_internal
ABSL_NAMESPACE_END
} // namespace absl
// Copyright 2022 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/crc/internal/crc_memcpy.h"
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <limits>
#include <memory>
#include <string>
#include <utility>
#include "gtest/gtest.h"
#include "absl/crc/crc32c.h"
#include "absl/memory/memory.h"
#include "absl/random/distributions.h"
#include "absl/random/random.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
namespace {
enum CrcEngine {
X86 = 0,
NONTEMPORAL = 1,
FALLBACK = 2,
};
// Correctness tests:
// - Every source/destination byte alignment 0-15, every size 0-511 bytes
// - Arbitrarily aligned source, large size
template <size_t max_size>
class CrcMemcpyTest : public testing::Test {
protected:
CrcMemcpyTest() {
source_ = std::make_unique<char[]>(kSize);
destination_ = std::make_unique<char[]>(kSize);
}
static constexpr size_t kAlignment = 16;
static constexpr size_t kMaxCopySize = max_size;
static constexpr size_t kSize = kAlignment + kMaxCopySize;
std::unique_ptr<char[]> source_;
std::unique_ptr<char[]> destination_;
absl::BitGen gen_;
};
// Small test is slightly larger 4096 bytes to allow coverage of the "large"
// copy function. The minimum size to exercise all code paths in that function
// would be around 256 consecutive tests (getting every possible tail value
// and 0-2 small copy loops after the main block), so testing from 4096-4500
// will cover all of those code paths multiple times.
typedef CrcMemcpyTest<4500> CrcSmallTest;
typedef CrcMemcpyTest<(1 << 24)> CrcLargeTest;
// Parametrize the small test so that it can be done with all configurations.
template <typename ParamsT>
class x86ParamTestTemplate : public CrcSmallTest,
public ::testing::WithParamInterface<ParamsT> {
protected:
x86ParamTestTemplate() {
if (GetParam().crc_engine_selector == FALLBACK) {
engine_ = std::make_unique<absl::crc_internal::FallbackCrcMemcpyEngine>();
} else if (GetParam().crc_engine_selector == NONTEMPORAL) {
engine_ =
std::make_unique<absl::crc_internal::CrcNonTemporalMemcpyEngine>();
} else {
engine_ = absl::crc_internal::CrcMemcpy::GetTestEngine(
GetParam().vector_lanes, GetParam().integer_lanes);
}
}
// Convenience method.
ParamsT GetParam() const {
return ::testing::WithParamInterface<ParamsT>::GetParam();
}
std::unique_ptr<absl::crc_internal::CrcMemcpyEngine> engine_;
};
struct TestParams {
CrcEngine crc_engine_selector = X86;
int vector_lanes = 0;
int integer_lanes = 0;
};
using x86ParamTest = x86ParamTestTemplate<TestParams>;
// SmallCorrectness is designed to exercise every possible set of code paths
// in the memcpy code, not including the loop.
TEST_P(x86ParamTest, SmallCorrectnessCheckSourceAlignment) {
constexpr size_t kTestSizes[] = {0, 100, 255, 512, 1024, 4000, kMaxCopySize};
for (size_t source_alignment = 0; source_alignment < kAlignment;
source_alignment++) {
for (auto size : kTestSizes) {
char* base_data = static_cast<char*>(source_.get()) + source_alignment;
for (size_t i = 0; i < size; i++) {
*(base_data + i) =
static_cast<char>(absl::Uniform<unsigned char>(gen_));
}
absl::crc32c_t initial_crc =
absl::ToCrc32c(absl::Uniform<uint32_t>(gen_));
absl::crc32c_t experiment_crc =
engine_->Compute(destination_.get(), source_.get() + source_alignment,
size, initial_crc);
// Check the memory region to make sure it is the same
int mem_comparison =
memcmp(destination_.get(), source_.get() + source_alignment, size);
SCOPED_TRACE(absl::StrCat("Error in memcpy of size: ", size,
" with source alignment: ", source_alignment));
ASSERT_EQ(mem_comparison, 0);
absl::crc32c_t baseline_crc = absl::ExtendCrc32c(
initial_crc,
absl::string_view(
static_cast<char*>(source_.get()) + source_alignment, size));
ASSERT_EQ(baseline_crc, experiment_crc);
}
}
}
TEST_P(x86ParamTest, SmallCorrectnessCheckDestAlignment) {
constexpr size_t kTestSizes[] = {0, 100, 255, 512, 1024, 4000, kMaxCopySize};
for (size_t dest_alignment = 0; dest_alignment < kAlignment;
dest_alignment++) {
for (auto size : kTestSizes) {
char* base_data = static_cast<char*>(source_.get());
for (size_t i = 0; i < size; i++) {
*(base_data + i) =
static_cast<char>(absl::Uniform<unsigned char>(gen_));
}
absl::crc32c_t initial_crc =
absl::ToCrc32c(absl::Uniform<uint32_t>(gen_));
absl::crc32c_t experiment_crc =
engine_->Compute(destination_.get() + dest_alignment, source_.get(),
size, initial_crc);
// Check the memory region to make sure it is the same
int mem_comparison =
memcmp(destination_.get() + dest_alignment, source_.get(), size);
SCOPED_TRACE(absl::StrCat("Error in memcpy of size: ", size,
" with dest alignment: ", dest_alignment));
ASSERT_EQ(mem_comparison, 0);
absl::crc32c_t baseline_crc = absl::ExtendCrc32c(
initial_crc,
absl::string_view(static_cast<char*>(source_.get()), size));
ASSERT_EQ(baseline_crc, experiment_crc);
}
}
}
INSTANTIATE_TEST_SUITE_P(x86ParamTest, x86ParamTest,
::testing::Values(
// Tests for configurations that may occur in prod.
TestParams{X86, 3, 0}, TestParams{X86, 1, 2},
// Fallback test.
TestParams{FALLBACK, 0, 0},
// Non Temporal
TestParams{NONTEMPORAL, 0, 0}));
} // namespace
// Copyright 2022 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cstdint>
#include "absl/base/config.h"
#include "absl/crc/crc32c.h"
#include "absl/crc/internal/crc_memcpy.h"
#include "absl/crc/internal/non_temporal_memcpy.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace crc_internal {
crc32c_t CrcNonTemporalMemcpyEngine::Compute(void* __restrict dst,
const void* __restrict src,
std::size_t length,
crc32c_t initial_crc) const {
constexpr size_t kBlockSize = 8192;
crc32c_t crc = initial_crc;
const char* src_bytes = reinterpret_cast<const char*>(src);
char* dst_bytes = reinterpret_cast<char*>(dst);
// Copy + CRC loop - run 8k chunks until we are out of full chunks.
std::size_t offset = 0;
for (; offset + kBlockSize < length; offset += kBlockSize) {
crc = absl::ExtendCrc32c(crc,
absl::string_view(src_bytes + offset, kBlockSize));
non_temporal_store_memcpy(dst_bytes + offset, src_bytes + offset,
kBlockSize);
}
// Save some work if length is 0.
if (offset < length) {
std::size_t final_copy_size = length - offset;
crc = ExtendCrc32c(crc,
absl::string_view(src_bytes + offset, final_copy_size));
non_temporal_store_memcpy(dst_bytes + offset, src_bytes + offset,
final_copy_size);
}
return crc;
}
crc32c_t CrcNonTemporalMemcpyAVXEngine::Compute(void* __restrict dst,
const void* __restrict src,
std::size_t length,
crc32c_t initial_crc) const {
constexpr size_t kBlockSize = 8192;
crc32c_t crc = initial_crc;
const char* src_bytes = reinterpret_cast<const char*>(src);
char* dst_bytes = reinterpret_cast<char*>(dst);
// Copy + CRC loop - run 8k chunks until we are out of full chunks.
std::size_t offset = 0;
for (; offset + kBlockSize < length; offset += kBlockSize) {
crc = ExtendCrc32c(crc, absl::string_view(src_bytes + offset, kBlockSize));
non_temporal_store_memcpy_avx(dst_bytes + offset, src_bytes + offset,
kBlockSize);
}
// Save some work if length is 0.
if (offset < length) {
std::size_t final_copy_size = length - offset;
crc = ExtendCrc32c(crc,
absl::string_view(src_bytes + offset, final_copy_size));
non_temporal_store_memcpy_avx(dst_bytes + offset, src_bytes + offset,
final_copy_size);
}
return crc;
}
} // namespace crc_internal
ABSL_NAMESPACE_END
} // namespace absl
// Copyright 2022 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_CRC_INTERNAL_NON_TEMPORAL_ARM_INTRINSICS_H_
#define ABSL_CRC_INTERNAL_NON_TEMPORAL_ARM_INTRINSICS_H_
#ifdef __aarch64__
#include <arm_neon.h>
typedef int64x2_t __m128i; /* 128-bit vector containing integers */
#define vreinterpretq_m128i_s32(x) vreinterpretq_s64_s32(x)
#define vreinterpretq_s64_m128i(x) (x)
// Guarantees that every preceding store is globally visible before any
// subsequent store.
// https://msdn.microsoft.com/en-us/library/5h2w73d1%28v=vs.90%29.aspx
static inline __attribute__((always_inline)) void _mm_sfence(void) {
__sync_synchronize();
}
// Load 128-bits of integer data from unaligned memory into dst. This intrinsic
// may perform better than _mm_loadu_si128 when the data crosses a cache line
// boundary.
//
// dst[127:0] := MEM[mem_addr+127:mem_addr]
//
// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lddqu_si128
#define _mm_lddqu_si128 _mm_loadu_si128
// Loads 128-bit value. :
// https://msdn.microsoft.com/zh-cn/library/f4k12ae8(v=vs.90).aspx
static inline __attribute__((always_inline)) __m128i _mm_loadu_si128(
const __m128i *p) {
return vreinterpretq_m128i_s32(vld1q_s32((const int32_t *)p));
}
// Stores the data in a to the address p without polluting the caches. If the
// cache line containing address p is already in the cache, the cache will be
// updated.
// https://msdn.microsoft.com/en-us/library/ba08y07y%28v=vs.90%29.aspx
static inline __attribute__((always_inline)) void _mm_stream_si128(__m128i *p,
__m128i a) {
#if __has_builtin(__builtin_nontemporal_store)
__builtin_nontemporal_store(a, p);
#else
vst1q_s64((int64_t *)p, vreinterpretq_s64_m128i(a));
#endif
}
// Sets the 16 signed 8-bit integer values.
// https://msdn.microsoft.com/en-us/library/x0cx8zd3(v=vs.90).aspx
static inline __attribute__((always_inline)) __m128i _mm_set_epi8(
signed char b15, signed char b14, signed char b13, signed char b12,
signed char b11, signed char b10, signed char b9, signed char b8,
signed char b7, signed char b6, signed char b5, signed char b4,
signed char b3, signed char b2, signed char b1, signed char b0) {
int8_t __attribute__((aligned(16)))
data[16] = {(int8_t)b0, (int8_t)b1, (int8_t)b2, (int8_t)b3,
(int8_t)b4, (int8_t)b5, (int8_t)b6, (int8_t)b7,
(int8_t)b8, (int8_t)b9, (int8_t)b10, (int8_t)b11,
(int8_t)b12, (int8_t)b13, (int8_t)b14, (int8_t)b15};
return (__m128i)vld1q_s8(data);
}
#endif // __aarch64__
#endif // ABSL_CRC_INTERNAL_NON_TEMPORAL_ARM_INTRINSICS_H_
// Copyright 2022 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_CRC_INTERNAL_NON_TEMPORAL_MEMCPY_H_
#define ABSL_CRC_INTERNAL_NON_TEMPORAL_MEMCPY_H_
#include <algorithm>
#include <cassert>
#include <cstring>
#include <iostream>
#include "absl/base/config.h"
#include "absl/base/optimization.h"
#ifdef __SSE__
// Only include if we're running on a CPU that supports SSE ISA, needed for
// sfence
#include <immintrin.h> // IWYU pragma: keep
#endif
#ifdef __SSE2__
// Only include if we're running on a CPU that supports SSE2 ISA, needed for
// movdqa, movdqu, movntdq
#include <emmintrin.h> // IWYU pragma: keep
#endif
#ifdef __aarch64__
// Only include if we're running on a CPU that supports ARM NEON ISA, needed for
// sfence, movdqa, movdqu, movntdq
#include "absl/crc/internal/non_temporal_arm_intrinsics.h"
#endif
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace crc_internal {
// This non-temporal memcpy does regular load and non-temporal store memory
// copy. It is compatible to both 16-byte aligned and unaligned addresses. If
// data at the destination is not immediately accessed, using non-temporal
// memcpy can save 1 DRAM load of the destination cacheline.
constexpr int kCacheLineSize = ABSL_CACHELINE_SIZE;
// If the objects overlap, the behavior is undefined.
// MSVC does not have proper header support for some of these intrinsics,
// so it should go to fallback
inline void *non_temporal_store_memcpy(void *__restrict dst,
const void *__restrict src, size_t len) {
#if (defined(__SSE3__) || defined(__aarch64__)) && !defined(_MSC_VER)
uint8_t *d = reinterpret_cast<uint8_t *>(dst);
const uint8_t *s = reinterpret_cast<const uint8_t *>(src);
// memcpy() the misaligned header. At the end of this if block, <d> is
// aligned to a 64-byte cacheline boundary or <len> == 0.
if (reinterpret_cast<uintptr_t>(d) & (kCacheLineSize - 1)) {
uintptr_t bytes_before_alignment_boundary =
kCacheLineSize -
(reinterpret_cast<uintptr_t>(d) & (kCacheLineSize - 1));
int header_len = (std::min)(bytes_before_alignment_boundary, len);
assert(bytes_before_alignment_boundary < kCacheLineSize);
memcpy(d, s, header_len);
d += header_len;
s += header_len;
len -= header_len;
}
if (len >= kCacheLineSize) {
_mm_sfence();
__m128i *dst_cacheline = reinterpret_cast<__m128i *>(d);
const __m128i *src_cacheline = reinterpret_cast<const __m128i *>(s);
constexpr int kOpsPerCacheLine = kCacheLineSize / sizeof(__m128i);
uint64_t loops = len / kCacheLineSize;
while (len >= kCacheLineSize) {
__m128i temp1, temp2, temp3, temp4;
temp1 = _mm_lddqu_si128(src_cacheline + 0);
temp2 = _mm_lddqu_si128(src_cacheline + 1);
temp3 = _mm_lddqu_si128(src_cacheline + 2);
temp4 = _mm_lddqu_si128(src_cacheline + 3);
_mm_stream_si128(dst_cacheline + 0, temp1);
_mm_stream_si128(dst_cacheline + 1, temp2);
_mm_stream_si128(dst_cacheline + 2, temp3);
_mm_stream_si128(dst_cacheline + 3, temp4);
src_cacheline += kOpsPerCacheLine;
dst_cacheline += kOpsPerCacheLine;
len -= kCacheLineSize;
}
d += loops * kCacheLineSize;
s += loops * kCacheLineSize;
_mm_sfence();
}
// memcpy the tail.
if (len) {
memcpy(d, s, len);
}
return dst;
#else
// Fallback to regular memcpy when SSE2/3 & aarch64 is not available.
return memcpy(dst, src, len);
#endif // __SSE3__ || __aarch64__
}
// MSVC does not have proper header support for some of these intrinsics,
// so it should go to fallback
inline void *non_temporal_store_memcpy_avx(void *__restrict dst,
const void *__restrict src,
size_t len) {
#if defined(__AVX__) && !defined(_MSC_VER)
uint8_t *d = reinterpret_cast<uint8_t *>(dst);
const uint8_t *s = reinterpret_cast<const uint8_t *>(src);
// memcpy() the misaligned header. At the end of this if block, <d> is
// aligned to a 64-byte cacheline boundary or <len> == 0.
if (reinterpret_cast<uintptr_t>(d) & (kCacheLineSize - 1)) {
uintptr_t bytes_before_alignment_boundary =
kCacheLineSize -
(reinterpret_cast<uintptr_t>(d) & (kCacheLineSize - 1));
int header_len = (std::min)(bytes_before_alignment_boundary, len);
assert(bytes_before_alignment_boundary < kCacheLineSize);
memcpy(d, s, header_len);
d += header_len;
s += header_len;
len -= header_len;
}
if (len >= kCacheLineSize) {
_mm_sfence();
__m256i *dst_cacheline = reinterpret_cast<__m256i *>(d);
const __m256i *src_cacheline = reinterpret_cast<const __m256i *>(s);
constexpr int kOpsPerCacheLine = kCacheLineSize / sizeof(__m256i);
int loops = len / kCacheLineSize;
while (len >= kCacheLineSize) {
__m256i temp1, temp2;
temp1 = _mm256_lddqu_si256(src_cacheline + 0);
temp2 = _mm256_lddqu_si256(src_cacheline + 1);
_mm256_stream_si256(dst_cacheline + 0, temp1);
_mm256_stream_si256(dst_cacheline + 1, temp2);
src_cacheline += kOpsPerCacheLine;
dst_cacheline += kOpsPerCacheLine;
len -= kCacheLineSize;
}
d += loops * kCacheLineSize;
s += loops * kCacheLineSize;
_mm_sfence();
}
// memcpy the tail.
if (len) {
memcpy(d, s, len);
}
return dst;
#else
// Fallback to regular memcpy when AVX is not available.
return memcpy(dst, src, len);
#endif // __AVX__
}
} // namespace crc_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_CRC_INTERNAL_NON_TEMPORAL_MEMCPY_H_
// Copyright 2022 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/crc/internal/non_temporal_memcpy.h"
#include <algorithm>
#include <cstdint>
#include <iostream>
#include <vector>
#include "gtest/gtest.h"
namespace {
struct TestParam {
size_t copy_size;
uint32_t src_offset;
uint32_t dst_offset;
};
class NonTemporalMemcpyTest : public testing::TestWithParam<TestParam> {
protected:
void SetUp() override {
// Make buf_size multiple of 16 bytes.
size_t buf_size = ((std::max(GetParam().src_offset, GetParam().dst_offset) +
GetParam().copy_size) +
15) /
16 * 16;
a_.resize(buf_size);
b_.resize(buf_size);
for (size_t i = 0; i < buf_size; i++) {
a_[i] = i % 256;
b_[i] = ~a_[i];
}
}
std::vector<uint8_t> a_, b_;
};
TEST_P(NonTemporalMemcpyTest, SSEEquality) {
uint8_t *src = a_.data() + GetParam().src_offset;
uint8_t *dst = b_.data() + GetParam().dst_offset;
absl::crc_internal::non_temporal_store_memcpy(dst, src, GetParam().copy_size);
for (size_t i = 0; i < GetParam().copy_size; i++) {
EXPECT_EQ(src[i], dst[i]);
}
}
TEST_P(NonTemporalMemcpyTest, AVXEquality) {
uint8_t* src = a_.data() + GetParam().src_offset;
uint8_t* dst = b_.data() + GetParam().dst_offset;
absl::crc_internal::non_temporal_store_memcpy_avx(dst, src,
GetParam().copy_size);
for (size_t i = 0; i < GetParam().copy_size; i++) {
EXPECT_EQ(src[i], dst[i]);
}
}
// 63B is smaller than one cacheline operation thus the non-temporal routine
// will not be called.
// 4352B is sufficient for testing 4092B data copy with room for offsets.
constexpr TestParam params[] = {
{63, 0, 0}, {58, 5, 5}, {61, 2, 0}, {61, 0, 2},
{58, 5, 2}, {4096, 0, 0}, {4096, 0, 1}, {4096, 0, 2},
{4096, 0, 3}, {4096, 0, 4}, {4096, 0, 5}, {4096, 0, 6},
{4096, 0, 7}, {4096, 0, 8}, {4096, 0, 9}, {4096, 0, 10},
{4096, 0, 11}, {4096, 0, 12}, {4096, 0, 13}, {4096, 0, 14},
{4096, 0, 15}, {4096, 7, 7}, {4096, 3, 0}, {4096, 1, 0},
{4096, 9, 3}, {4096, 9, 11}, {8192, 0, 0}, {8192, 5, 2},
{1024768, 7, 11}, {1, 0, 0}, {1, 0, 1}, {1, 1, 0},
{1, 1, 1}};
INSTANTIATE_TEST_SUITE_P(ParameterizedNonTemporalMemcpyTest,
NonTemporalMemcpyTest, testing::ValuesIn(params));
} // namespace
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment