extern crate crossbeam_epoch;
extern crate scopeguard;
pub mod array;
pub mod cell;
pub mod link;
use array::{Array, MAX_ENLARGE_FACTOR};
use cell::{CellLocker, CellReader};
use crossbeam_epoch::{Atomic, Owned, Shared};
use link::EntryArrayLink;
use std::collections::hash_map::RandomState;
use std::convert::TryInto;
use std::fmt;
use std::hash::{BuildHasher, Hash, Hasher};
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering::{Acquire, Relaxed, Release};
const DEFAULT_CAPACITY: usize = (cell::ARRAY_SIZE as usize) * (cell::ARRAY_SIZE as usize);
pub struct HashMap<K: Eq + Hash + Sync, V: Sync, H: BuildHasher> {
array: Atomic<Array<K, V>>,
minimum_capacity: usize,
resize_mutex: AtomicBool,
build_hasher: H,
}
impl<K: Eq + Hash + Sync, V: Sync> Default for HashMap<K, V, RandomState> {
fn default() -> Self {
HashMap {
array: Atomic::new(Array::<K, V>::new(DEFAULT_CAPACITY, Atomic::null())),
minimum_capacity: DEFAULT_CAPACITY,
resize_mutex: AtomicBool::new(false),
build_hasher: RandomState::new(),
}
}
}
impl<K: Eq + Hash + Sync, V: Sync, H: BuildHasher> HashMap<K, V, H> {
pub fn new(capacity: usize, build_hasher: H) -> HashMap<K, V, H> {
let initial_capacity = capacity.max(DEFAULT_CAPACITY);
HashMap {
array: Atomic::new(Array::<K, V>::new(initial_capacity, Atomic::null())),
minimum_capacity: initial_capacity,
resize_mutex: AtomicBool::new(false),
build_hasher: build_hasher,
}
}
pub fn insert<'a>(
&'a self,
key: K,
value: V,
) -> Result<Accessor<'a, K, V, H>, (Accessor<'a, K, V, H>, V)> {
let (hash, partial_hash) = self.hash(&key);
let mut resize_triggered = false;
loop {
let mut accessor = self.acquire(&key, hash, partial_hash);
if !accessor.entry_ptr.is_null() {
return Err((accessor, value));
}
if !resize_triggered
&& accessor.cell_index < (cell::ARRAY_SIZE as usize)
&& accessor.cell_locker.full()
{
drop(accessor);
resize_triggered = true;
let guard = crossbeam_epoch::pin();
let current_array = self.array.load(Acquire, &guard);
let current_array_ref = unsafe { current_array.deref() };
if current_array_ref.old_array(&guard).is_null() {
let sample_size = current_array_ref.num_sample_size();
let threshold = sample_size * (cell::ARRAY_SIZE as usize - 1);
let mut num_entries = 0;
for i in 0..sample_size {
num_entries +=
current_array_ref.cell(i).size().0 + current_array_ref.cell(i).size().1;
if num_entries > threshold {
self.resize();
break;
}
}
}
continue;
}
let (sub_index, entry_array_link_ptr, entry_ptr) =
accessor.cell_locker.insert(key, partial_hash, value);
accessor.sub_index = sub_index;
accessor.entry_array_link_ptr = entry_array_link_ptr;
accessor.entry_ptr = entry_ptr;
return Ok(accessor);
}
}
pub fn upsert<'a>(&'a self, key: K, value: V) -> Accessor<'a, K, V, H> {
match self.insert(key, value) {
Ok(result) => result,
Err((result, value)) => {
let pair_mut_ptr = result.entry_ptr as *mut (K, V);
unsafe { (*pair_mut_ptr).1 = value };
result
}
}
}
pub fn get<'a>(&'a self, key: &K) -> Option<Accessor<'a, K, V, H>> {
let (hash, partial_hash) = self.hash(key);
let accessor = self.acquire(key, hash, partial_hash);
if accessor.entry_ptr.is_null() {
return None;
}
Some(accessor)
}
pub fn remove(&self, key: &K) -> Option<V> {
self.get(key)
.map_or_else(|| None, |accessor| accessor.erase())
}
pub fn read<U, F: FnOnce(&K, &V) -> U>(&self, key: &K, f: F) -> Option<U> {
let (hash, partial_hash) = self.hash(key);
let guard = crossbeam_epoch::pin();
let current_array = self.array.load(Acquire, &guard);
let current_array_ref = unsafe { current_array.deref() };
let old_array = current_array_ref.old_array(&guard);
for array_ptr in [old_array.as_raw(), current_array.as_raw()].iter() {
if array_ptr.is_null() {
continue;
}
if *array_ptr == old_array.as_raw() {
if current_array_ref.partial_rehash(&guard, |key| self.hash(key)) {
continue;
}
}
let array_ref = unsafe { &(**array_ptr) };
let cell_index = array_ref.calculate_cell_index(hash);
let reader = CellReader::lock(
array_ref.cell(cell_index),
array_ref.entry_array(cell_index),
);
if let Some(entry_ptr) = reader.search(&key, partial_hash) {
let entry_ref = unsafe { &(*entry_ptr) };
return Some(f(&entry_ref.0, &entry_ref.1));
}
}
None
}
pub fn retain<F: Fn(&K, &mut V) -> bool>(&self, f: F) -> (usize, usize) {
let mut retained_entries = 0;
let mut removed_entries = 0;
let mut scanner = self.iter();
while let Some((key, value)) = scanner.next() {
if !f(key, value) {
scanner.erase_on_next = true;
removed_entries += 1;
} else {
retained_entries += 1;
}
}
let guard = crossbeam_epoch::pin();
let current_array = self.array.load(Acquire, &guard);
let current_array_ref = unsafe { current_array.deref() };
if retained_entries <= current_array_ref.capacity() / 8 {
self.resize();
}
(retained_entries, removed_entries)
}
pub fn clear(&self) -> usize {
self.retain(|_, _| false).1
}
pub fn len<F: FnOnce(usize) -> usize>(&self, f: F) -> usize {
let guard = crossbeam_epoch::pin();
let current_array = self.array.load(Acquire, &guard);
let current_array_ref = unsafe { current_array.deref() };
let capacity = current_array_ref.capacity();
let num_samples = std::cmp::min(f(capacity), capacity).next_power_of_two();
let num_cells_to_sample = (num_samples / cell::ARRAY_SIZE as usize).max(1);
if !current_array_ref.old_array(&guard).is_null() {
for _ in 0..num_cells_to_sample {
if current_array_ref.partial_rehash(&guard, |key| self.hash(key)) {
break;
}
}
}
self.estimate(current_array_ref, num_cells_to_sample)
}
pub fn capacity(&self) -> usize {
let guard = crossbeam_epoch::pin();
let current_array = self.array.load(Acquire, &guard);
let current_array_ref = unsafe { current_array.deref() };
if !current_array_ref.old_array(&guard).is_null() {
current_array_ref.partial_rehash(&guard, |key| self.hash(key));
}
current_array_ref.capacity()
}
pub fn statistics(&self) -> Statistics {
let mut statistics = Statistics {
capacity: 0,
effective_capacity: 0,
cells: 0,
killed_entries: 0,
empty_cells: 0,
max_consecutive_empty_cells: 0,
entries: 0,
linked_entries: 0,
cells_having_link: 0,
max_link_length: 0,
};
let guard = crossbeam_epoch::pin();
let current_array = self.array.load(Acquire, &guard);
let old_array = unsafe { current_array.deref().old_array(&guard) };
for array_ptr in [old_array.as_raw(), current_array.as_raw()].iter() {
if array_ptr.is_null() {
continue;
}
let array_ref = unsafe { &(**array_ptr) };
let num_cells = array_ref.num_cells();
let mut consecutive_empty_cells = 0;
statistics.capacity += num_cells * cell::ARRAY_SIZE as usize;
if *array_ptr == current_array.as_raw() {
statistics.effective_capacity = num_cells * cell::ARRAY_SIZE as usize;
}
statistics.cells += num_cells;
for i in 0..num_cells {
let (size, linked_entries) = array_ref.cell(i).size();
statistics.entries += size + linked_entries;
if size == 0 {
statistics.empty_cells += 1;
consecutive_empty_cells += 1;
} else {
if statistics.max_consecutive_empty_cells < consecutive_empty_cells {
statistics.max_consecutive_empty_cells = consecutive_empty_cells;
}
consecutive_empty_cells = 0;
}
if linked_entries > 0 {
statistics.linked_entries += linked_entries;
statistics.cells_having_link += 1;
if statistics.max_link_length < linked_entries {
statistics.max_link_length = linked_entries;
}
}
if array_ref.cell(i).killed() {
statistics.killed_entries += 1;
}
}
}
statistics
}
pub fn iter<'a>(&'a self) -> Scanner<'a, K, V, H> {
let (cell_locker, array_ptr, cell_index) = self.first();
if let Some(cell_locker) = cell_locker {
if let Some(scanner) = self.pick(cell_locker, array_ptr, cell_index) {
return scanner;
}
}
Scanner {
accessor: None,
array_ptr: std::ptr::null(),
activated: false,
erase_on_next: false,
}
}
fn hash(&self, key: &K) -> (u64, u16) {
let mut h = self.build_hasher.build_hasher();
key.hash(&mut h);
let mut hash = h.finish();
hash = hash ^ (hash.rotate_right(25) ^ hash.rotate_right(50));
hash = hash.overflowing_mul(0xA24BAED4963EE407u64).0;
hash = hash ^ (hash.rotate_right(24) ^ hash.rotate_right(49));
hash = hash.overflowing_mul(0x9FB21C651E98DF25u64).0;
hash = hash ^ (hash >> 28);
(hash, (hash & ((1 << 16) - 1)).try_into().unwrap())
}
fn acquire<'a>(&'a self, key: &K, hash: u64, partial_hash: u16) -> Accessor<'a, K, V, H> {
let guard = crossbeam_epoch::pin();
loop {
let current_array = self.array.load(Acquire, &guard);
let current_array_ref = unsafe { current_array.deref() };
let old_array = current_array_ref.old_array(&guard);
if !old_array.is_null() {
if current_array_ref.partial_rehash(&guard, |key| self.hash(key)) {
continue;
}
let (mut cell_locker, cell_index, sub_index, entry_array_link_ptr, entry_ptr) =
self.search(&key, hash, partial_hash, old_array.as_raw());
if !entry_ptr.is_null() {
return Accessor {
hash_map: &self,
cell_locker: cell_locker,
cell_index: cell_index,
sub_index: sub_index,
entry_array_link_ptr: entry_array_link_ptr,
entry_ptr: entry_ptr,
};
} else if !cell_locker.killed() {
let old_array_ref = unsafe { old_array.deref() };
current_array_ref.kill_cell(
&mut cell_locker,
old_array_ref,
cell_index,
&|key| self.hash(key),
);
}
}
let (cell_locker, cell_index, sub_index, entry_array_link_ptr, entry_ptr) =
self.search(&key, hash, partial_hash, current_array.as_raw());
if !cell_locker.killed() {
return Accessor {
hash_map: &self,
cell_locker: cell_locker,
cell_index: cell_index,
sub_index: sub_index,
entry_array_link_ptr: entry_array_link_ptr,
entry_ptr: entry_ptr,
};
}
}
}
fn erase<'a>(&'a self, mut accessor: Accessor<'a, K, V, H>) -> V {
let value = Array::<K, V>::extract_key_value(accessor.entry_ptr).1;
accessor.cell_locker.remove(
false,
accessor.sub_index,
accessor.entry_array_link_ptr,
accessor.entry_ptr,
);
if accessor.cell_locker.empty() && accessor.cell_index < (cell::ARRAY_SIZE as usize) {
drop(accessor);
let guard = crossbeam_epoch::pin();
let current_array = self.array.load(Acquire, &guard);
let current_array_ref = unsafe { current_array.deref() };
if current_array_ref.old_array(&guard).is_null()
&& current_array_ref.capacity() > self.minimum_capacity
{
let sample_size = current_array_ref.num_sample_size();
let mut num_entries = 0;
for i in 0..sample_size {
num_entries +=
current_array_ref.cell(i).size().0 + current_array_ref.cell(i).size().1;
if num_entries >= sample_size * (cell::ARRAY_SIZE as usize) / 16 {
return value;
}
}
self.resize();
}
}
value
}
fn search<'a>(
&self,
key: &K,
hash: u64,
partial_hash: u16,
array_ptr: *const Array<K, V>,
) -> (
CellLocker<'a, K, V>,
usize,
u8,
*const EntryArrayLink<K, V>,
*const (K, V),
) {
let array_ref = unsafe { &(*array_ptr) };
let cell_index = array_ref.calculate_cell_index(hash);
let cell_locker = CellLocker::lock(
array_ref.cell(cell_index),
array_ref.entry_array(cell_index),
);
if !cell_locker.killed() && !cell_locker.empty() {
if let Some((sub_index, entry_array_link_ptr, entry_ptr)) =
cell_locker.search(key, partial_hash)
{
return (
cell_locker,
cell_index,
sub_index,
entry_array_link_ptr,
entry_ptr,
);
}
}
(
cell_locker,
cell_index,
0,
std::ptr::null(),
std::ptr::null(),
)
}
fn first<'a>(&'a self) -> (Option<CellLocker<'a, K, V>>, *const Array<K, V>, usize) {
let guard = crossbeam_epoch::pin();
let mut current_array = self.array.load(Acquire, &guard);
loop {
let old_array = unsafe { current_array.deref().old_array(&guard) };
for array_ptr in [old_array.as_raw(), current_array.as_raw()].iter() {
if array_ptr.is_null() {
continue;
}
let array_ref = unsafe { &(**array_ptr) };
let num_cells = array_ref.num_cells();
for cell_index in 0..num_cells {
let cell_locker = CellLocker::lock(
array_ref.cell(cell_index),
array_ref.entry_array(cell_index),
);
if !cell_locker.empty() {
return (Some(cell_locker), *array_ptr, cell_index);
}
}
}
let current_array_new = self.array.load(Acquire, &guard);
if current_array == current_array_new {
break;
}
current_array = current_array_new;
}
(None, std::ptr::null(), 0)
}
fn next<'a>(
&'a self,
array_ptr: *const Array<K, V>,
current_index: usize,
) -> Option<Scanner<'a, K, V, H>> {
let guard = crossbeam_epoch::pin();
let current_array = self.array.load(Acquire, &guard);
let current_array_ref = unsafe { &(*current_array.as_raw()) };
let old_array = current_array_ref.old_array(&guard);
debug_assert!(array_ptr == current_array.as_raw() || array_ptr == old_array.as_raw());
if old_array.as_raw() == array_ptr {
let old_array_ref = unsafe { &(*old_array.as_raw()) };
let num_cells = old_array_ref.num_cells();
for cell_index in (current_index + 1)..num_cells {
let cell_locker = CellLocker::lock(
old_array_ref.cell(cell_index),
old_array_ref.entry_array(cell_index),
);
if !cell_locker.killed() && !cell_locker.empty() {
if let Some(scanner) = self.pick(cell_locker, old_array.as_raw(), cell_index) {
return Some(scanner);
}
}
}
}
let mut new_array = Shared::<Array<K, V>>::null();
let num_cells = current_array_ref.num_cells();
let start_index = if old_array.as_raw() == array_ptr {
0
} else {
current_index + 1
};
for cell_index in (start_index)..num_cells {
let cell_locker = CellLocker::lock(
current_array_ref.cell(cell_index),
current_array_ref.entry_array(cell_index),
);
if !cell_locker.killed() && !cell_locker.empty() {
if let Some(scanner) = self.pick(cell_locker, current_array.as_raw(), cell_index) {
return Some(scanner);
}
} else if cell_locker.killed() && new_array.is_null() {
new_array = self.array.load(Acquire, &guard);
}
}
if !new_array.is_null() {
let new_array_ref = unsafe { &(*new_array.as_raw()) };
let num_cells = new_array_ref.num_cells();
for cell_index in 0..num_cells {
let cell_locker = CellLocker::lock(
new_array_ref.cell(cell_index),
new_array_ref.entry_array(cell_index),
);
if !cell_locker.killed() && !cell_locker.empty() {
if let Some(scanner) = self.pick(cell_locker, new_array.as_raw(), cell_index) {
return Some(scanner);
}
}
}
}
None
}
fn pick<'a>(
&'a self,
cell_locker: CellLocker<'a, K, V>,
array_ptr: *const Array<K, V>,
cell_index: usize,
) -> Option<Scanner<'a, K, V, H>> {
if let Some((sub_index, entry_array_link_ptr, entry_ptr)) = cell_locker.first() {
return Some(Scanner {
accessor: Some(Accessor {
hash_map: &self,
cell_locker: cell_locker,
cell_index: cell_index,
sub_index: sub_index,
entry_array_link_ptr: entry_array_link_ptr,
entry_ptr: entry_ptr,
}),
array_ptr: array_ptr,
activated: false,
erase_on_next: false,
});
}
None
}
fn resize(&self) {
let guard = crossbeam_epoch::pin();
let current_array = self.array.load(Acquire, &guard);
let current_array_ref = unsafe { current_array.deref() };
let old_array = current_array_ref.old_array(&guard);
if !old_array.is_null() {
if !current_array_ref.partial_rehash(&guard, |key| self.hash(key)) {
return;
}
}
if !self.resize_mutex.swap(true, Acquire) {
let memory_ordering = Relaxed;
let mut mutex_guard = scopeguard::guard(memory_ordering, |memory_ordering| {
self.resize_mutex.store(false, memory_ordering);
});
if current_array != self.array.load(Acquire, &guard) {
return;
}
let capacity = current_array_ref.capacity();
let num_cells = current_array_ref.num_cells();
let num_cells_to_sample = (num_cells / 16).max(4).min(4096);
let estimated_num_entries = self.estimate(current_array_ref, num_cells_to_sample);
let new_capacity = if estimated_num_entries >= (capacity / 8) * 7 {
let max_capacity = 1usize << (std::mem::size_of::<usize>() * 8 - 1);
if capacity == max_capacity {
capacity
} else if estimated_num_entries <= (capacity / 8) * 9 {
capacity * 2
} else {
let new_capacity_candidate = estimated_num_entries
.next_power_of_two()
.min(max_capacity / 2)
* 2;
if new_capacity_candidate / capacity > (1 << MAX_ENLARGE_FACTOR as usize) {
capacity * (1 << MAX_ENLARGE_FACTOR as usize)
} else {
new_capacity_candidate
}
}
} else if estimated_num_entries <= capacity / 8 {
estimated_num_entries
.next_power_of_two()
.max(self.minimum_capacity)
} else {
capacity
};
if new_capacity != capacity {
self.array.store(
Owned::new(Array::<K, V>::new(
new_capacity,
Atomic::from(current_array),
)),
Release,
);
*mutex_guard = Release;
}
}
}
fn estimate(&self, current_array_ref: &Array<K, V>, num_cells_to_sample: usize) -> usize {
let mut num_entries = 0;
for i in 0..num_cells_to_sample {
let (size, linked_entries) = current_array_ref.cell(i).size();
num_entries += size + linked_entries;
}
num_entries * (current_array_ref.num_cells() / num_cells_to_sample)
}
}
impl<K: Eq + Hash + Sync, V: Sync, H: BuildHasher> Drop for HashMap<K, V, H> {
fn drop(&mut self) {
self.clear();
let guard = crossbeam_epoch::pin();
let current_array = self.array.load(Acquire, &guard);
let current_array_ref = unsafe { current_array.deref() };
current_array_ref.drop_old_array(&guard);
let array = self.array.swap(Shared::null(), Relaxed, &guard);
if !array.is_null() {
unsafe { guard.defer_destroy(array) };
}
}
}
pub struct Accessor<'a, K: Eq + Hash + Sync, V: Sync, H: BuildHasher> {
hash_map: &'a HashMap<K, V, H>,
cell_locker: CellLocker<'a, K, V>,
cell_index: usize,
sub_index: u8,
entry_array_link_ptr: *const EntryArrayLink<K, V>,
entry_ptr: *const (K, V),
}
impl<'a, K: Eq + Hash + Sync, V: Sync, H: BuildHasher> Accessor<'a, K, V, H> {
pub fn get(&'a self) -> (&'a K, &'a mut V) {
unsafe {
let key_ptr = &(*self.entry_ptr).0 as *const K;
let value_ptr = &(*self.entry_ptr).1 as *const V;
let value_mut_ptr = value_ptr as *mut V;
(&(*key_ptr), &mut (*value_mut_ptr))
}
}
pub fn erase(self) -> Option<V> {
if self.entry_ptr.is_null() {
return None;
}
Some(self.hash_map.erase(self))
}
}
pub struct Scanner<'a, K: Eq + Hash + Sync, V: Sync, H: BuildHasher> {
accessor: Option<Accessor<'a, K, V, H>>,
array_ptr: *const Array<K, V>,
activated: bool,
erase_on_next: bool,
}
impl<'a, K: Eq + Hash + Sync, V: Sync, H: BuildHasher> Iterator for Scanner<'a, K, V, H> {
type Item = (&'a K, &'a mut V);
fn next(&mut self) -> Option<Self::Item> {
if !self.activated {
self.activated = true;
} else if self.accessor.is_some() {
let erase = self.erase_on_next;
if erase {
self.erase_on_next = false;
}
if let Some((next_sub_index, next_entry_array_link_ptr, next_entry_ptr)) =
self.accessor.as_mut().map_or_else(
|| None,
|accessor| {
accessor.cell_locker.next(
erase,
true,
accessor.sub_index,
accessor.entry_array_link_ptr,
accessor.entry_ptr,
)
},
)
{
self.accessor.as_mut().map_or_else(
|| (),
|accessor| {
accessor.sub_index = next_sub_index;
accessor.entry_array_link_ptr = next_entry_array_link_ptr;
accessor.entry_ptr = next_entry_ptr;
},
);
} else {
let current_array_ptr = self.array_ptr;
let scanner = self.accessor.as_ref().map_or_else(
|| None,
|accessor| {
accessor
.hash_map
.next(current_array_ptr, accessor.cell_index)
},
);
self.accessor.take();
if let Some(mut scanner) = scanner {
self.accessor = scanner.accessor.take();
self.array_ptr = scanner.array_ptr;
}
}
}
if let Some(accessor) = &self.accessor {
unsafe {
let key_ptr = &(*accessor.entry_ptr).0 as *const K;
let value_ptr = &(*accessor.entry_ptr).1 as *const V;
let value_mut_ptr = value_ptr as *mut V;
return Some((&(*key_ptr), &mut (*value_mut_ptr)));
}
}
None
}
}
pub struct Statistics {
capacity: usize,
effective_capacity: usize,
entries: usize,
killed_entries: usize,
cells: usize,
empty_cells: usize,
max_consecutive_empty_cells: usize,
linked_entries: usize,
cells_having_link: usize,
max_link_length: usize,
}
impl Statistics {
pub fn capacity(&self) -> usize {
self.capacity
}
pub fn effective_capacity(&self) -> usize {
self.effective_capacity
}
pub fn num_entries(&self) -> usize {
self.entries
}
pub fn num_killed_entries(&self) -> usize {
self.killed_entries
}
pub fn num_cells(&self) -> usize {
self.cells
}
pub fn num_empty_cells(&self) -> usize {
self.empty_cells
}
pub fn max_consecutive_empty_cells(&self) -> usize {
self.max_consecutive_empty_cells
}
pub fn num_linked_entries(&self) -> usize {
self.linked_entries
}
pub fn num_cells_having_link(&self) -> usize {
self.cells_having_link
}
pub fn max_link_length(&self) -> usize {
self.max_link_length
}
}
impl fmt::Display for Statistics {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"capacity: {}, effective_capacity: {}, cells: {}, killed_entries: {}, empty_cells: {}, max_consecutive_empty_cells: {}, entries: {}, linked_entries: {}, cells_having_link: {}, max_link_length: {}",
self.capacity,
self.effective_capacity,
self.cells,
self.killed_entries,
self.empty_cells,
self.max_consecutive_empty_cells,
self.entries,
self.linked_entries,
self.cells_having_link,
self.max_link_length
)
}
}