Commit 5d8fc919 by Abseil Team Committed by Gennadiy Rozental

Export of internal Abseil changes

--
fcedaa5714efab8738446fa21620b827a40a3458 by Derek Mauro <dmauro@google.com>:

Uses Wyhash in the implementation of absl::Hash for hashing sequences
of more than 16 bytes on some platforms.

Due to the per-process randomization of the seed used by absl::Hash, users
should not notice this change, other than possibly getting better performance.

This change only affects platforms where absl::uint128 is implemented
with an intrinsic (and where sizeof(size_t)==8) since Wyhash relies on
fast 128-bit multiplication for speed.

PiperOrigin-RevId: 343956735

--
085e108c760084f19caa21dbeb2118de2be3f8f0 by Abseil Team <absl-team@google.com>:

Internal change for cord ring

PiperOrigin-RevId: 343919274

--
4c333278ad14d6692f203074b902506008ad624a by Jorg Brown <jorg@google.com>:

Minimize strings_internal::StringConstant further, by removing the need for the compiler to instantiate a compile-time class function.

PiperOrigin-RevId: 343878568

--
71c3c8c7b7821b67997e3d5345aaec67f93f266f by Abseil Team <absl-team@google.com>:

Internal change

PiperOrigin-RevId: 343838259
GitOrigin-RevId: fcedaa5714efab8738446fa21620b827a40a3458
Change-Id: Ifb91895a82d11e743acd42fe97ab7fb70712b7df
parent e19260fd
......@@ -122,6 +122,8 @@ set(ABSL_INTERNAL_DLL_FILES
"hash/internal/hash.h"
"hash/internal/hash.cc"
"hash/internal/spy_hash_state.h"
"hash/internal/wyhash.h"
"hash/internal/wyhash.cc"
"memory/memory.h"
"meta/type_traits.h"
"numeric/int128.cc"
......
......@@ -37,6 +37,7 @@ cc_library(
linkopts = ABSL_DEFAULT_LINKOPTS,
deps = [
":city",
":wyhash",
"//absl/base:core_headers",
"//absl/base:endian",
"//absl/container:fixed_array",
......@@ -120,3 +121,30 @@ cc_test(
"@com_google_googletest//:gtest_main",
],
)
cc_library(
name = "wyhash",
srcs = ["internal/wyhash.cc"],
hdrs = ["internal/wyhash.h"],
copts = ABSL_DEFAULT_COPTS,
linkopts = ABSL_DEFAULT_LINKOPTS,
visibility = ["//visibility:private"],
deps = [
"//absl/base:config",
"//absl/base:endian",
"//absl/numeric:int128",
],
)
cc_test(
name = "wyhash_test",
srcs = ["internal/wyhash_test.cc"],
copts = ABSL_TEST_COPTS,
linkopts = ABSL_DEFAULT_LINKOPTS,
visibility = ["//visibility:private"],
deps = [
":wyhash",
"//absl/strings",
"@com_google_googletest//:gtest_main",
],
)
......@@ -25,6 +25,7 @@ absl_cc_library(
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::city
absl::core_headers
absl::endian
absl::fixed_array
......@@ -34,7 +35,7 @@ absl_cc_library(
absl::optional
absl::variant
absl::utility
absl::city
absl::wyhash
PUBLIC
)
......@@ -114,3 +115,30 @@ absl_cc_test(
gmock_main
)
absl_cc_library(
NAME
wyhash
HDRS
"internal/wyhash.h"
SRCS
"internal/wyhash.cc"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::config
absl::endian
absl::int128
)
absl_cc_test(
NAME
wyhash_test
SRCS
"internal/wyhash_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::wyhash
absl::strings
gmock_main
)
......@@ -18,7 +18,7 @@ namespace absl {
ABSL_NAMESPACE_BEGIN
namespace hash_internal {
uint64_t CityHashState::CombineLargeContiguousImpl32(uint64_t state,
uint64_t HashState::CombineLargeContiguousImpl32(uint64_t state,
const unsigned char* first,
size_t len) {
while (len >= PiecewiseChunkSize()) {
......@@ -33,13 +33,11 @@ uint64_t CityHashState::CombineLargeContiguousImpl32(uint64_t state,
std::integral_constant<int, 4>{});
}
uint64_t CityHashState::CombineLargeContiguousImpl64(uint64_t state,
uint64_t HashState::CombineLargeContiguousImpl64(uint64_t state,
const unsigned char* first,
size_t len) {
while (len >= PiecewiseChunkSize()) {
state =
Mix(state, absl::hash_internal::CityHash64(reinterpret_cast<const char*>(first),
PiecewiseChunkSize()));
state = Mix(state, Hash64(first, PiecewiseChunkSize()));
len -= PiecewiseChunkSize();
first += PiecewiseChunkSize();
}
......@@ -48,7 +46,24 @@ uint64_t CityHashState::CombineLargeContiguousImpl64(uint64_t state,
std::integral_constant<int, 8>{});
}
ABSL_CONST_INIT const void* const CityHashState::kSeed = &kSeed;
ABSL_CONST_INIT const void* const HashState::kSeed = &kSeed;
// The salt array used by Wyhash. This array is NOT the mechanism used to make
// absl::Hash non-deterministic between program invocations. See `Seed()` for
// that mechanism.
//
// Any random values are fine. These values are just digits from the decimal
// part of pi.
// https://en.wikipedia.org/wiki/Nothing-up-my-sleeve_number
constexpr uint64_t kWyhashSalt[5] = {
uint64_t{0x243F6A8885A308D3}, uint64_t{0x13198A2E03707344},
uint64_t{0xA4093822299F31D0}, uint64_t{0x082EFA98EC4E6C89},
uint64_t{0x452821E638D01377},
};
uint64_t HashState::WyhashImpl(const unsigned char* data, size_t len) {
return Wyhash(data, len, Seed(), kWyhashSalt);
}
} // namespace hash_internal
ABSL_NAMESPACE_END
......
......@@ -41,6 +41,7 @@
#include "absl/base/internal/endian.h"
#include "absl/base/port.h"
#include "absl/container/fixed_array.h"
#include "absl/hash/internal/wyhash.h"
#include "absl/meta/type_traits.h"
#include "absl/numeric/int128.h"
#include "absl/strings/string_view.h"
......@@ -712,9 +713,8 @@ template <typename T>
struct is_hashable
: std::integral_constant<bool, HashSelect::template Apply<T>::value> {};
// CityHashState
class ABSL_DLL CityHashState
: public HashStateBase<CityHashState> {
// HashState
class ABSL_DLL HashState : public HashStateBase<HashState> {
// absl::uint128 is not an alias or a thin wrapper around the intrinsic.
// We use the intrinsic when available to improve performance.
#ifdef ABSL_HAVE_INTRINSIC_INT128
......@@ -733,23 +733,22 @@ class ABSL_DLL CityHashState
public:
// Move only
CityHashState(CityHashState&&) = default;
CityHashState& operator=(CityHashState&&) = default;
HashState(HashState&&) = default;
HashState& operator=(HashState&&) = default;
// CityHashState::combine_contiguous()
// HashState::combine_contiguous()
//
// Fundamental base case for hash recursion: mixes the given range of bytes
// into the hash state.
static CityHashState combine_contiguous(CityHashState hash_state,
const unsigned char* first,
size_t size) {
return CityHashState(
static HashState combine_contiguous(HashState hash_state,
const unsigned char* first, size_t size) {
return HashState(
CombineContiguousImpl(hash_state.state_, first, size,
std::integral_constant<int, sizeof(size_t)>{}));
}
using CityHashState::HashStateBase::combine_contiguous;
using HashState::HashStateBase::combine_contiguous;
// CityHashState::hash()
// HashState::hash()
//
// For performance reasons in non-opt mode, we specialize this for
// integral types.
......@@ -761,24 +760,24 @@ class ABSL_DLL CityHashState
return static_cast<size_t>(Mix(Seed(), static_cast<uint64_t>(value)));
}
// Overload of CityHashState::hash()
// Overload of HashState::hash()
template <typename T, absl::enable_if_t<!IntegralFastPath<T>::value, int> = 0>
static size_t hash(const T& value) {
return static_cast<size_t>(combine(CityHashState{}, value).state_);
return static_cast<size_t>(combine(HashState{}, value).state_);
}
private:
// Invoked only once for a given argument; that plus the fact that this is
// move-only ensures that there is only one non-moved-from object.
CityHashState() : state_(Seed()) {}
HashState() : state_(Seed()) {}
// Workaround for MSVC bug.
// We make the type copyable to fix the calling convention, even though we
// never actually copy it. Keep it private to not affect the public API of the
// type.
CityHashState(const CityHashState&) = default;
HashState(const HashState&) = default;
explicit CityHashState(uint64_t state) : state_(state) {}
explicit HashState(uint64_t state) : state_(state) {}
// Implementation of the base case for combine_contiguous where we actually
// mix the bytes into the state.
......@@ -791,7 +790,8 @@ class ABSL_DLL CityHashState
static uint64_t CombineContiguousImpl(uint64_t state,
const unsigned char* first, size_t len,
std::integral_constant<int, 8>
/* sizeof_size_t*/);
/* sizeof_size_t */);
// Slow dispatch path for calls to CombineContiguousImpl with a size argument
// larger than PiecewiseChunkSize(). Has the same effect as calling
......@@ -838,6 +838,19 @@ class ABSL_DLL CityHashState
return static_cast<uint64_t>(m ^ (m >> (sizeof(m) * 8 / 2)));
}
// An extern to avoid bloat on a direct call to Wyhash() with fixed values for
// both the seed and salt parameters.
static uint64_t WyhashImpl(const unsigned char* data, size_t len);
ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Hash64(const unsigned char* data,
size_t len) {
#ifdef ABSL_HAVE_INTRINSIC_INT128
return WyhashImpl(data, len);
#else
return absl::hash_internal::CityHash64(reinterpret_cast<const char*>(data), len);
#endif
}
// Seed()
//
// A non-deterministic seed.
......@@ -869,8 +882,8 @@ class ABSL_DLL CityHashState
uint64_t state_;
};
// CityHashState::CombineContiguousImpl()
inline uint64_t CityHashState::CombineContiguousImpl(
// HashState::CombineContiguousImpl()
inline uint64_t HashState::CombineContiguousImpl(
uint64_t state, const unsigned char* first, size_t len,
std::integral_constant<int, 4> /* sizeof_size_t */) {
// For large values we use CityHash, for small ones we just use a
......@@ -892,18 +905,18 @@ inline uint64_t CityHashState::CombineContiguousImpl(
return Mix(state, v);
}
// Overload of CityHashState::CombineContiguousImpl()
inline uint64_t CityHashState::CombineContiguousImpl(
// Overload of HashState::CombineContiguousImpl()
inline uint64_t HashState::CombineContiguousImpl(
uint64_t state, const unsigned char* first, size_t len,
std::integral_constant<int, 8> /* sizeof_size_t */) {
// For large values we use CityHash, for small ones we just use a
// multiplicative hash.
// For large values we use Wyhash or CityHash depending on the platform, for
// small ones we just use a multiplicative hash.
uint64_t v;
if (len > 16) {
if (ABSL_PREDICT_FALSE(len > PiecewiseChunkSize())) {
return CombineLargeContiguousImpl64(state, first, len);
}
v = absl::hash_internal::CityHash64(reinterpret_cast<const char*>(first), len);
v = Hash64(first, len);
} else if (len > 8) {
auto p = Read9To16(first, len);
state = Mix(state, p.first);
......@@ -934,7 +947,7 @@ struct PoisonedHash : private AggregateBarrier {
template <typename T>
struct HashImpl {
size_t operator()(const T& value) const { return CityHashState::hash(value); }
size_t operator()(const T& value) const { return HashState::hash(value); }
};
template <typename T>
......
// Copyright 2020 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/hash/internal/wyhash.h"
#include "absl/base/internal/unaligned_access.h"
#include "absl/numeric/int128.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace hash_internal {
static uint64_t WyhashMix(uint64_t v0, uint64_t v1) {
absl::uint128 p = v0;
p *= v1;
return absl::Uint128Low64(p) ^ absl::Uint128High64(p);
}
uint64_t Wyhash(const void* data, size_t len, uint64_t seed,
const uint64_t salt[]) {
const uint8_t* ptr = static_cast<const uint8_t*>(data);
uint64_t starting_length = static_cast<uint64_t>(len);
uint64_t current_state = seed ^ salt[0];
if (len > 64) {
// If we have more than 64 bytes, we're going to handle chunks of 64
// bytes at a time. We're going to build up two separate hash states
// which we will then hash together.
uint64_t duplicated_state = current_state;
do {
uint64_t a = absl::base_internal::UnalignedLoad64(ptr);
uint64_t b = absl::base_internal::UnalignedLoad64(ptr + 8);
uint64_t c = absl::base_internal::UnalignedLoad64(ptr + 16);
uint64_t d = absl::base_internal::UnalignedLoad64(ptr + 24);
uint64_t e = absl::base_internal::UnalignedLoad64(ptr + 32);
uint64_t f = absl::base_internal::UnalignedLoad64(ptr + 40);
uint64_t g = absl::base_internal::UnalignedLoad64(ptr + 48);
uint64_t h = absl::base_internal::UnalignedLoad64(ptr + 56);
uint64_t cs0 = WyhashMix(a ^ salt[1], b ^ current_state);
uint64_t cs1 = WyhashMix(c ^ salt[2], d ^ current_state);
current_state = (cs0 ^ cs1);
uint64_t ds0 = WyhashMix(e ^ salt[3], f ^ duplicated_state);
uint64_t ds1 = WyhashMix(g ^ salt[4], h ^ duplicated_state);
duplicated_state = (ds0 ^ ds1);
ptr += 64;
len -= 64;
} while (len > 64);
current_state = current_state ^ duplicated_state;
}
// We now have a data `ptr` with at most 64 bytes and the current state
// of the hashing state machine stored in current_state.
while (len > 16) {
uint64_t a = absl::base_internal::UnalignedLoad64(ptr);
uint64_t b = absl::base_internal::UnalignedLoad64(ptr + 8);
current_state = WyhashMix(a ^ salt[1], b ^ current_state);
ptr += 16;
len -= 16;
}
// We now have a data `ptr` with at most 16 bytes.
uint64_t a = 0;
uint64_t b = 0;
if (len > 8) {
// When we have at least 9 and at most 16 bytes, set A to the first 64
// bits of the input and B to the last 64 bits of the input. Yes, they will
// overlap in the middle if we are working with less than the full 16
// bytes.
a = absl::base_internal::UnalignedLoad64(ptr);
b = absl::base_internal::UnalignedLoad64(ptr + len - 8);
} else if (len > 3) {
// If we have at least 4 and at most 8 bytes, set A to the first 32
// bits and B to the last 32 bits.
a = absl::base_internal::UnalignedLoad32(ptr);
b = absl::base_internal::UnalignedLoad32(ptr + len - 4);
} else if (len > 0) {
// If we have at least 1 and at most 3 bytes, read all of the provided
// bits into A, with some adjustments.
a = ((ptr[0] << 16) | (ptr[len >> 1] << 8) | ptr[len - 1]);
b = 0;
} else {
a = 0;
b = 0;
}
uint64_t w = WyhashMix(a ^ salt[1], b ^ current_state);
uint64_t z = salt[1] ^ starting_length;
return WyhashMix(w, z);
}
} // namespace hash_internal
ABSL_NAMESPACE_END
} // namespace absl
// Copyright 2020 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file provides the Google-internal implementation of the Wyhash
// algorithm.
//
// Wyhash is a fast hash function for hash tables, the fastest we've currently
// (late 2020) found that passes the SMHasher tests. The algorithm relies on
// intrinsic 128-bit multiplication for speed. This is not meant to be secure -
// just fast.
#ifndef ABSL_HASH_INTERNAL_WYHASH_H_
#define ABSL_HASH_INTERNAL_WYHASH_H_
#include <stdint.h>
#include <stdlib.h>
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace hash_internal {
// Hash function for a byte array. A 64-bit seed and a set of five 64-bit
// integers are hashed into the result.
//
// To allow all hashable types (including string_view and Span) to depend on
// this algoritm, we keep the API low-level, with as few dependencies as
// possible.
uint64_t Wyhash(const void* data, size_t len, uint64_t seed,
const uint64_t salt[5]);
} // namespace hash_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_HASH_INTERNAL_WYHASH_H_
......@@ -17,7 +17,7 @@
#include "absl/base/internal/raw_logging.h"
#include "absl/random/internal/randen_detect.h"
// RANDen = RANDom generator or beetroots in Swiss German.
// RANDen = RANDom generator or beetroots in Swiss High German.
// 'Strong' (well-distributed, unpredictable, backtracking-resistant) random
// generator, faster in some benchmarks than std::mt19937_64 and pcg64_c32.
//
......
......@@ -26,7 +26,7 @@ namespace absl {
ABSL_NAMESPACE_BEGIN
namespace random_internal {
// RANDen = RANDom generator or beetroots in Swiss German.
// RANDen = RANDom generator or beetroots in Swiss High German.
// 'Strong' (well-distributed, unpredictable, backtracking-resistant) random
// generator, faster in some benchmarks than std::mt19937_64 and pcg64_c32.
//
......
......@@ -26,7 +26,7 @@ namespace absl {
ABSL_NAMESPACE_BEGIN
namespace random_internal {
// RANDen = RANDom generator or beetroots in Swiss German.
// RANDen = RANDom generator or beetroots in Swiss High German.
// 'Strong' (well-distributed, unpredictable, backtracking-resistant) random
// generator, faster in some benchmarks than std::mt19937_64 and pcg64_c32.
//
......
......@@ -23,7 +23,7 @@ namespace absl {
ABSL_NAMESPACE_BEGIN
namespace random_internal {
// RANDen = RANDom generator or beetroots in Swiss German.
// RANDen = RANDom generator or beetroots in Swiss High German.
// RandenSlow implements the basic state manipulation methods for
// architectures lacking AES hardware acceleration intrinsics.
class RandenSlow {
......
......@@ -28,7 +28,7 @@ namespace absl {
ABSL_NAMESPACE_BEGIN
namespace random_internal {
// RANDen = RANDom generator or beetroots in Swiss German.
// RANDen = RANDom generator or beetroots in Swiss High German.
// 'Strong' (well-distributed, unpredictable, backtracking-resistant) random
// generator, faster in some benchmarks than std::mt19937_64 and pcg64_c32.
//
......
......@@ -53,6 +53,7 @@ using ::absl::cord_internal::CordRepSubstring;
using ::absl::cord_internal::CONCAT;
using ::absl::cord_internal::EXTERNAL;
using ::absl::cord_internal::FLAT;
using ::absl::cord_internal::MAX_FLAT_TAG;
using ::absl::cord_internal::SUBSTRING;
namespace cord_internal {
......@@ -95,9 +96,21 @@ static const size_t kFlatOverhead = offsetof(CordRep, data);
// Flat allocation size is stored in tag, which currently can encode sizes up
// to 4K, encoded as multiple of either 8 or 32 bytes.
// If we allow for larger sizes, we need to change this to 8/64, 16/128, etc.
// kMinFlatSize is bounded by tag needing to be at least FLAT * 8 bytes, and
// ideally a 'nice' size aligning with allocation and cacheline sizes like 32.
// kMaxFlatSize is bounded by the size resulting in a computed tag no greater
// than MAX_FLAT_TAG. MAX_FLAT_TAG provides for additional 'high' tag values.
static constexpr size_t kMinFlatSize = 32;
static constexpr size_t kMaxFlatSize = 4096;
static constexpr size_t kMaxFlatLength = kMaxFlatSize - kFlatOverhead;
static constexpr size_t kMinFlatLength = 32 - kFlatOverhead;
static constexpr size_t kMinFlatLength = kMinFlatSize - kFlatOverhead;
static constexpr size_t AllocatedSizeToTagUnchecked(size_t size) {
return (size <= 1024) ? size / 8 : 128 + size / 32 - 1024 / 32;
}
static_assert(kMinFlatSize / 8 >= FLAT, "");
static_assert(AllocatedSizeToTagUnchecked(kMaxFlatSize) <= MAX_FLAT_TAG, "");
// Prefer copying blocks of at most this size, otherwise reference count.
static const size_t kMaxBytesToCopy = 511;
......@@ -117,7 +130,7 @@ static size_t RoundUpForTag(size_t size) {
// undefined if the size exceeds the maximum size that can be encoded in
// a tag, i.e., if size is larger than TagToAllocatedSize(<max tag>).
static uint8_t AllocatedSizeToTag(size_t size) {
const size_t tag = (size <= 1024) ? size / 8 : 128 + size / 32 - 1024 / 32;
const size_t tag = AllocatedSizeToTagUnchecked(size);
assert(tag <= std::numeric_limits<uint8_t>::max());
return tag;
}
......
......@@ -116,10 +116,16 @@ enum CordRepKind {
CONCAT = 0,
EXTERNAL = 1,
SUBSTRING = 2,
RING = 3,
// We have different tags for different sized flat arrays,
// starting with FLAT
FLAT = 3,
// starting with FLAT, and limited to MAX_FLAT_TAG. The 224 value is based on
// the current 'size to tag' encoding of 8 / 32 bytes. If a new tag is needed
// in the future, then 'FLAT' and 'MAX_FLAT_TAG' should be adjusted as well
// as the Tag <---> Size logic so that FLAT stil represents the minimum flat
// allocation size. (32 bytes as of now).
FLAT = 4,
MAX_FLAT_TAG = 224,
};
struct CordRep {
......
......@@ -35,18 +35,12 @@ namespace strings_internal {
// below.
template <typename T>
struct StringConstant {
private:
// Returns true if `view` points to constant data.
// Otherwise, it can't be constant evaluated.
static constexpr bool ValidateConstant(absl::string_view view) {
return view.empty() || 2 * view[0] != 1;
}
public:
static constexpr absl::string_view value = T{}();
constexpr absl::string_view operator()() const { return value; }
static_assert(ValidateConstant(value),
// Check to be sure `view` points to constant data.
// Otherwise, it can't be constant evaluated.
static_assert(value.empty() || 2 * value[0] != 1,
"The input string_view must point to constant data.");
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment