mod sparse_array_usize;
use super::entry::Entry;
use crate::list;
use crate::List;
use sparse_array_usize::SparseArrayUsize;
use std::borrow::Borrow;
use std::collections::hash_map::RandomState;
use std::fmt::Display;
use std::hash::BuildHasher;
use std::hash::Hash;
use std::iter::FromIterator;
use std::iter::Peekable;
use std::mem::size_of;
use std::ops::Index;
use std::slice;
use std::sync::Arc;
use std::vec::Vec;
type HashValue = u64;
pub type Iter<'a, K, V> =
::std::iter::Map<IterArc<'a, K, V>, fn(&'a Arc<Entry<K, V>>) -> (&'a K, &'a V)>;
pub type IterKeys<'a, K, V> = ::std::iter::Map<Iter<'a, K, V>, fn((&'a K, &V)) -> &'a K>;
pub type IterValues<'a, K, V> = ::std::iter::Map<Iter<'a, K, V>, fn((&K, &'a V)) -> &'a V>;
const DEFAULT_DEGREE: u8 = 8 * size_of::<usize>() as u8;
#[macro_export]
macro_rules! ht_map {
($($k:expr => $v:expr),*) => {
{
#[allow(unused_mut)]
let mut m = $crate::HashTrieMap::new();
$(
m.insert_mut($k, $v);
)*
m
}
};
}
#[derive(Debug)]
pub struct HashTrieMap<K, V, H: BuildHasher = RandomState> {
root: Arc<Node<K, V>>,
size: usize,
degree: u8,
hasher_builder: H,
}
#[derive(Debug, PartialEq, Eq)]
enum Node<K, V> {
Branch(SparseArrayUsize<Arc<Node<K, V>>>),
Leaf(Bucket<K, V>),
}
#[derive(Debug, PartialEq, Eq)]
enum Bucket<K, V> {
Single(EntryWithHash<K, V>),
Collision(List<EntryWithHash<K, V>>),
}
#[derive(Debug, PartialEq, Eq)]
struct EntryWithHash<K, V> {
entry: Arc<Entry<K, V>>,
key_hash: HashValue,
}
mod node_utils {
use super::HashValue;
use std::hash::BuildHasher;
use std::hash::Hash;
use std::hash::Hasher;
use std::mem::size_of_val;
#[inline]
pub fn index_from_hash(hash: HashValue, depth: usize, degree: u8) -> Option<usize> {
debug_assert!(degree.is_power_of_two());
let shift = depth as u32 * degree.trailing_zeros();
if (shift as usize) < 8 * size_of_val(&hash) {
let mask = degree as HashValue - 1;
Some(((hash >> shift) & mask) as usize)
} else {
None
}
}
pub fn hash<T: ?Sized + Hash, H: BuildHasher>(v: &T, hasher_builder: &H) -> HashValue {
let mut hasher = hasher_builder.build_hasher();
v.hash(&mut hasher);
hasher.finish()
}
}
impl<K, V> Node<K, V>
where
K: Eq + Hash,
{
fn new_empty_branch() -> Node<K, V> {
Node::Branch(SparseArrayUsize::new())
}
fn get<Q: ?Sized>(
&self,
key: &Q,
key_hash: HashValue,
depth: usize,
degree: u8,
) -> Option<&EntryWithHash<K, V>>
where
K: Borrow<Q>,
Q: Hash + Eq,
{
match self {
Node::Branch(subtrees) => {
let index: usize = node_utils::index_from_hash(key_hash, depth, degree)
.expect("hash cannot be exhausted if we are on a branch");
subtrees
.get(index)
.and_then(|subtree| subtree.get(key, key_hash, depth + 1, degree))
}
Node::Leaf(bucket) => bucket.get(key, key_hash),
}
}
fn insert(&mut self, entry: EntryWithHash<K, V>, depth: usize, degree: u8) -> bool {
match self {
Node::Branch(subtrees) => {
let index: usize = node_utils::index_from_hash(entry.key_hash, depth, degree)
.expect("hash cannot be exhausted if we are on a branch");
match subtrees.get_mut(index) {
Some(subtree) => Arc::make_mut(subtree).insert(entry, depth + 1, degree),
None => {
let new_subtree = Node::Leaf(Bucket::Single(entry));
subtrees.set(index, Arc::new(new_subtree));
true
}
}
}
Node::Leaf(bucket) => {
let maximum_depth =
node_utils::index_from_hash(entry.key_hash, depth, degree).is_none();
let bucket_contains_key: bool = bucket.contains_key(entry.key(), entry.key_hash);
match maximum_depth {
false if bucket_contains_key => bucket.insert(entry),
false => {
let old_entry: EntryWithHash<K, V> = match bucket {
Bucket::Single(e) => e.clone(),
Bucket::Collision(_) => unreachable!(
"hash is not exhausted, so there cannot be a collision here"
),
};
*self = Node::new_empty_branch();
self.insert(old_entry, depth, degree);
self.insert(entry, depth, degree);
true
}
true => bucket.insert(entry),
}
}
}
}
fn compress(&mut self) {
let new_node = match self {
Node::Branch(subtrees) => {
match subtrees.size() {
1 => {
let compress: bool = {
let subtree = subtrees.first().unwrap();
match subtree.borrow() {
Node::Leaf(Bucket::Single(_)) => true,
_ => false,
}
};
match compress {
true => subtrees.pop(),
false => None,
}
}
_ => None,
}
}
Node::Leaf(_) => None,
};
if let Some(node) = new_node {
crate::utils::replace(self, node);
}
}
fn remove<Q: ?Sized>(&mut self, key: &Q, key_hash: HashValue, depth: usize, degree: u8) -> bool
where
K: Borrow<Q>,
Q: Hash + Eq,
{
match self {
Node::Branch(subtrees) => {
let index: usize = node_utils::index_from_hash(key_hash, depth, degree)
.expect("hash cannot be exhausted if we are on a branch");
match subtrees.get_mut(index) {
Some(subtree) => {
let subtree = Arc::make_mut(subtree);
let removed = subtree.remove(key, key_hash, depth + 1, degree);
match (subtree.is_empty(), removed) {
(_, false) => (),
(false, true) => {
self.compress();
}
(true, true) => {
subtrees.remove(index);
self.compress();
}
};
removed
}
None => false,
}
}
Node::Leaf(bucket) => {
let mut bucket_ref = Some(bucket);
let removed = Bucket::remove(&mut bucket_ref, key, key_hash);
if bucket_ref.is_none() {
*self = Node::new_empty_branch();
}
removed
}
}
}
fn is_empty(&self) -> bool {
match self {
Node::Branch(subtrees) => subtrees.size() == 0,
Node::Leaf(Bucket::Single(_)) => false,
Node::Leaf(Bucket::Collision(entries)) => {
debug_assert!(
entries.len() >= 2,
"collisions must have at least two entries"
);
false
}
}
}
}
impl<K, V> Clone for Node<K, V>
where
K: Eq + Hash,
{
fn clone(&self) -> Node<K, V> {
match self {
Node::Branch(subtrees) => Node::Branch(subtrees.clone()),
Node::Leaf(bucket) => Node::Leaf(bucket.clone()),
}
}
}
mod bucket_utils {
use super::*;
pub fn list_remove_first<T: Clone, F: Fn(&T) -> bool>(
list: &mut List<T>,
predicate: F,
) -> bool {
let mut before_needle: Vec<T> = Vec::with_capacity(list.len());
let remaining: &mut List<T> = list;
let mut removed = false;
while !remaining.is_empty() {
let e: T = remaining.first().unwrap().clone();
remaining.drop_first_mut();
if predicate(&e) {
removed = true;
break;
}
before_needle.push(e);
}
let new_entries = remaining;
while let Some(e) = before_needle.pop() {
new_entries.push_front_mut(e);
}
removed
}
}
impl<K, V> Bucket<K, V>
where
K: Eq + Hash,
{
fn get<Q: ?Sized>(&self, key: &Q, key_hash: HashValue) -> Option<&EntryWithHash<K, V>>
where
K: Borrow<Q>,
Q: Hash + Eq,
{
match self {
Bucket::Single(entry) if entry.matches(key, key_hash) => Some(entry.borrow()),
Bucket::Single(_) => None,
Bucket::Collision(entries) => entries
.iter()
.find(|e| e.matches(key, key_hash))
.map(|e| e.borrow()),
}
}
#[inline]
fn contains_key<Q: ?Sized>(&self, key: &Q, key_hash: HashValue) -> bool
where
K: Borrow<Q>,
Q: Hash + Eq,
{
self.get(key, key_hash).is_some()
}
fn insert(&mut self, entry: EntryWithHash<K, V>) -> bool {
match self {
Bucket::Single(existing_entry)
if existing_entry.matches(entry.key(), entry.key_hash) =>
{
*existing_entry = entry;
false
}
Bucket::Single(existing_entry) => {
let entries = list!(entry, existing_entry.clone());
*self = Bucket::Collision(entries);
true
}
Bucket::Collision(entries) => {
let key_existed = bucket_utils::list_remove_first(entries, |e| {
e.matches(entry.key(), entry.key_hash)
});
entries.push_front_mut(entry);
!key_existed
}
}
}
fn remove<Q: ?Sized>(
bucket: &mut Option<&mut Bucket<K, V>>,
key: &Q,
key_hash: HashValue,
) -> bool
where
K: Borrow<Q>,
Q: Hash + Eq,
{
match bucket.take() {
Some(b) => {
match b {
Bucket::Single(existing_entry) if existing_entry.matches(key, key_hash) => {
true
}
Bucket::Single(_) => {
*bucket = Some(b);
false
}
Bucket::Collision(entries) => {
let removed =
bucket_utils::list_remove_first(entries, |e| e.matches(key, key_hash));
match entries.len() {
0 => unreachable!(
"impossible to have collision with a single or no entry"
),
1 => {
let entry = entries.first().unwrap().clone();
*b = Bucket::Single(entry);
}
_ => (),
};
*bucket = Some(b);
removed
}
}
}
None => false,
}
}
}
impl<K, V> Clone for Bucket<K, V>
where
K: Eq + Hash,
{
fn clone(&self) -> Bucket<K, V> {
match self {
Bucket::Single(entry) => Bucket::Single(EntryWithHash::clone(entry)),
Bucket::Collision(entries) => Bucket::Collision(List::clone(entries)),
}
}
}
impl<K, V> EntryWithHash<K, V>
where
K: Eq + Hash,
{
fn new<H: BuildHasher>(key: K, value: V, hash_builder: &H) -> EntryWithHash<K, V> {
let key_hash = node_utils::hash(&key, hash_builder);
EntryWithHash {
entry: Arc::new(Entry::new(key, value)),
key_hash,
}
}
fn key(&self) -> &K {
&self.entry.key
}
fn value(&self) -> &V {
&self.entry.value
}
#[inline]
fn matches<Q: ?Sized>(&self, key: &Q, key_hash: HashValue) -> bool
where
K: Borrow<Q>,
Q: Hash + Eq,
{
self.key_hash == key_hash && self.key().borrow() == key
}
}
impl<K, V> Clone for EntryWithHash<K, V>
where
K: Eq + Hash,
{
fn clone(&self) -> EntryWithHash<K, V> {
EntryWithHash {
entry: Arc::clone(&self.entry),
key_hash: self.key_hash,
}
}
}
impl<K, V> HashTrieMap<K, V, RandomState>
where
K: Eq + Hash,
{
#[must_use]
pub fn new() -> HashTrieMap<K, V> {
HashTrieMap::new_with_degree(DEFAULT_DEGREE)
}
#[must_use]
pub fn new_with_degree(degree: u8) -> HashTrieMap<K, V> {
HashTrieMap::new_with_hasher_and_degree(RandomState::new(), degree)
}
}
impl<K, V, H: BuildHasher> HashTrieMap<K, V, H>
where
K: Eq + Hash,
H: Clone,
{
#[must_use]
pub fn new_with_hasher(hasher_builder: H) -> HashTrieMap<K, V, H> {
HashTrieMap::new_with_hasher_and_degree(hasher_builder, DEFAULT_DEGREE)
}
#[must_use]
pub fn new_with_hasher_and_degree(hasher_builder: H, degree: u8) -> HashTrieMap<K, V, H> {
assert!(degree.is_power_of_two(), "degree must be a power of two");
assert!(
degree <= DEFAULT_DEGREE,
format!("degree must not exceed {}", DEFAULT_DEGREE)
);
HashTrieMap {
root: Arc::new(Node::new_empty_branch()),
size: 0,
degree,
hasher_builder,
}
}
#[must_use]
pub fn get<Q: ?Sized>(&self, key: &Q) -> Option<&V>
where
K: Borrow<Q>,
Q: Hash + Eq,
{
let key_hash = node_utils::hash(key, &self.hasher_builder);
self.root
.get(key, key_hash, 0, self.degree)
.map(|e| e.value())
}
#[must_use]
pub fn insert(&self, key: K, value: V) -> HashTrieMap<K, V, H> {
let mut new_map = self.clone();
new_map.insert_mut(key, value);
new_map
}
pub fn insert_mut(&mut self, key: K, value: V) {
let entry = EntryWithHash::new(key, value, &self.hasher_builder);
let is_new_key = Arc::make_mut(&mut self.root).insert(entry, 0, self.degree);
if is_new_key {
self.size += 1;
}
}
#[must_use]
pub fn remove<Q: ?Sized>(&self, key: &Q) -> HashTrieMap<K, V, H>
where
K: Borrow<Q>,
Q: Hash + Eq,
{
let mut new_map = self.clone();
if new_map.remove_mut(key) {
new_map
} else {
self.clone()
}
}
pub fn remove_mut<Q: ?Sized>(&mut self, key: &Q) -> bool
where
K: Borrow<Q>,
Q: Hash + Eq,
{
let key_hash = node_utils::hash(key, &self.hasher_builder);
let removed = Arc::make_mut(&mut self.root).remove(key, key_hash, 0, self.degree);
if removed {
self.size -= 1;
}
removed
}
#[must_use]
pub fn contains_key<Q: ?Sized>(&self, key: &Q) -> bool
where
K: Borrow<Q>,
Q: Hash + Eq,
{
self.get(key).is_some()
}
#[must_use]
#[inline]
pub fn size(&self) -> usize {
self.size
}
#[must_use]
#[inline]
pub fn is_empty(&self) -> bool {
self.size() == 0
}
#[must_use]
pub fn iter(&self) -> Iter<'_, K, V> {
self.iter_arc().map(|e| (&e.key, &e.value))
}
fn iter_arc(&self) -> IterArc<'_, K, V> {
IterArc::new(self)
}
#[must_use]
pub fn keys(&self) -> IterKeys<'_, K, V> {
self.iter().map(|(k, _)| k)
}
#[must_use]
pub fn values(&self) -> IterValues<'_, K, V> {
self.iter().map(|(_, v)| v)
}
}
impl<'a, K, Q: ?Sized, V, H: BuildHasher> Index<&'a Q> for HashTrieMap<K, V, H>
where
K: Eq + Hash + Borrow<Q>,
Q: Hash + Eq,
H: Clone,
{
type Output = V;
fn index(&self, key: &Q) -> &V {
self.get(key).expect("no entry found for key")
}
}
impl<K, V, H: BuildHasher> Clone for HashTrieMap<K, V, H>
where
K: Eq + Hash,
H: Clone,
{
fn clone(&self) -> HashTrieMap<K, V, H> {
HashTrieMap {
root: Arc::clone(&self.root),
size: self.size,
degree: self.degree,
hasher_builder: self.hasher_builder.clone(),
}
}
}
impl<K, V, H: BuildHasher> Default for HashTrieMap<K, V, H>
where
K: Eq + Hash,
H: Default + Clone,
{
fn default() -> HashTrieMap<K, V, H> {
HashTrieMap::new_with_hasher(H::default())
}
}
impl<K: Eq, V: PartialEq, H: BuildHasher> PartialEq for HashTrieMap<K, V, H>
where
K: Hash,
H: Clone,
{
fn eq(&self, other: &HashTrieMap<K, V, H>) -> bool {
self.size() == other.size()
&& self
.iter()
.all(|(key, value)| other.get(key).map_or(false, |v| *value == *v))
}
}
impl<K: Eq, V: Eq, H: BuildHasher> Eq for HashTrieMap<K, V, H>
where
K: Hash,
H: Clone,
{
}
impl<K, V, H: BuildHasher> Display for HashTrieMap<K, V, H>
where
K: Eq + Hash + Display,
V: Display,
H: Clone,
{
fn fmt(&self, fmt: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result {
let mut first = true;
fmt.write_str("{")?;
for (k, v) in self.iter() {
if !first {
fmt.write_str(", ")?;
}
k.fmt(fmt)?;
fmt.write_str(": ")?;
v.fmt(fmt)?;
first = false;
}
fmt.write_str("}")
}
}
impl<'a, K, V, H: BuildHasher> IntoIterator for &'a HashTrieMap<K, V, H>
where
K: Eq + Hash,
H: Default + Clone,
{
type Item = (&'a K, &'a V);
type IntoIter = Iter<'a, K, V>;
fn into_iter(self) -> Iter<'a, K, V> {
self.iter()
}
}
impl<K, V, H> FromIterator<(K, V)> for HashTrieMap<K, V, H>
where
K: Eq + Hash,
H: BuildHasher + Clone + Default,
{
fn from_iter<I: IntoIterator<Item = (K, V)>>(into_iter: I) -> HashTrieMap<K, V, H> {
let mut map = HashTrieMap::new_with_hasher(Default::default());
for (k, v) in into_iter {
map.insert_mut(k, v);
}
map
}
}
#[derive(Debug)]
pub struct IterArc<'a, K, V> {
stack: Vec<IterStackElement<'a, K, V>>,
size: usize,
}
#[derive(Debug)]
enum IterStackElement<'a, K, V> {
Branch(Peekable<slice::Iter<'a, Arc<Node<K, V>>>>),
LeafSingle(&'a EntryWithHash<K, V>),
LeafCollision(Peekable<list::Iter<'a, EntryWithHash<K, V>>>),
}
impl<'a, K, V> IterStackElement<'a, K, V>
where
K: Eq + Hash,
{
fn new(node: &Node<K, V>) -> IterStackElement<'_, K, V> {
match node {
Node::Branch(children) => IterStackElement::Branch(children.iter().peekable()),
Node::Leaf(Bucket::Single(entry)) => IterStackElement::LeafSingle(entry),
Node::Leaf(Bucket::Collision(entries)) => {
IterStackElement::LeafCollision(entries.iter().peekable())
}
}
}
fn current_elem(&mut self) -> &'a Arc<Entry<K, V>> {
match self {
IterStackElement::Branch(_) => panic!("called current element of a branch"),
IterStackElement::LeafSingle(entry) => &entry.entry,
IterStackElement::LeafCollision(iter) => &iter.peek().unwrap().entry,
}
}
#[inline]
fn advance(&mut self) -> bool {
match self {
IterStackElement::Branch(iter) => {
iter.next();
iter.peek().is_none()
}
IterStackElement::LeafSingle(_) => true,
IterStackElement::LeafCollision(iter) => {
iter.next();
iter.peek().is_none()
}
}
}
}
mod iter_utils {
use super::HashValue;
use std::mem::size_of;
pub fn trie_max_height(degree: u8) -> usize {
let bits_per_level = (degree - 1).count_ones() as usize;
let hash_bits = 8 * size_of::<HashValue>();
(hash_bits / bits_per_level) + if hash_bits % bits_per_level > 0 { 1 } else { 0 }
}
}
impl<'a, K, V> IterArc<'a, K, V>
where
K: Eq + Hash,
{
fn new<H: BuildHasher + Clone>(map: &HashTrieMap<K, V, H>) -> IterArc<'_, K, V> {
let mut stack: Vec<IterStackElement<'_, K, V>> =
Vec::with_capacity(iter_utils::trie_max_height(map.degree) + 1);
if map.size() > 0 {
stack.push(IterStackElement::new(map.root.borrow()));
}
let mut iter = IterArc {
stack,
size: map.size(),
};
iter.dig();
iter
}
fn dig(&mut self) {
let next_stack_elem: Option<IterStackElement<'_, K, V>> =
self.stack.last_mut().and_then(|stack_top| match stack_top {
IterStackElement::Branch(iter) => {
iter.peek().map(|node| IterStackElement::new(node))
}
_ => None,
});
if let Some(e) = next_stack_elem {
self.stack.push(e);
self.dig();
}
}
fn advance(&mut self) {
if let Some(mut stack_element) = self.stack.pop() {
let finished = stack_element.advance();
if finished {
self.advance();
} else {
self.stack.push(stack_element);
self.dig();
}
}
}
fn current(&mut self) -> Option<&'a Arc<Entry<K, V>>> {
self.stack.last_mut().map(|e| e.current_elem())
}
}
impl<'a, K, V> Iterator for IterArc<'a, K, V>
where
K: Eq + Hash,
{
type Item = &'a Arc<Entry<K, V>>;
fn next(&mut self) -> Option<&'a Arc<Entry<K, V>>> {
let current = self.current();
self.advance();
if current.is_some() {
self.size -= 1;
}
current
}
fn size_hint(&self) -> (usize, Option<usize>) {
(self.size, Some(self.size))
}
}
impl<'a, K: Eq + Hash, V> ExactSizeIterator for IterArc<'a, K, V> {}
#[cfg(feature = "serde")]
pub mod serde {
use super::*;
use ::serde::de::{Deserialize, Deserializer, MapAccess, Visitor};
use ::serde::ser::{Serialize, Serializer};
use std::fmt;
use std::marker::PhantomData;
impl<K, V, H> Serialize for HashTrieMap<K, V, H>
where
K: Eq + Hash + Serialize,
V: Serialize,
H: BuildHasher + Clone + Default,
{
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
serializer.collect_map(self)
}
}
impl<'de, K, V, H> Deserialize<'de> for HashTrieMap<K, V, H>
where
K: Eq + Hash + Deserialize<'de>,
V: Deserialize<'de>,
H: BuildHasher + Clone + Default,
{
fn deserialize<D: Deserializer<'de>>(
deserializer: D,
) -> Result<HashTrieMap<K, V, H>, D::Error> {
deserializer.deserialize_map(HashTrieMapVisitor {
phantom: PhantomData,
})
}
}
struct HashTrieMapVisitor<K, V, H> {
phantom: PhantomData<(K, V, H)>,
}
impl<'de, K, V, H> Visitor<'de> for HashTrieMapVisitor<K, V, H>
where
K: Eq + Hash + Deserialize<'de>,
V: Deserialize<'de>,
H: BuildHasher + Clone + Default,
{
type Value = HashTrieMap<K, V, H>;
fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
formatter.write_str("a map")
}
fn visit_map<A>(self, mut map: A) -> Result<HashTrieMap<K, V, H>, A::Error>
where
A: MapAccess<'de>,
{
let mut hash_trie_map = HashTrieMap::new_with_hasher(Default::default());
while let Some((k, v)) = map.next_entry()? {
hash_trie_map.insert_mut(k, v);
}
Ok(hash_trie_map)
}
}
}
#[cfg(test)]
mod test;