extern crate crossbeam_epoch;
pub mod array;
pub mod cell;
pub mod link;
use array::{Array, MAX_ENLARGE_FACTOR};
use cell::{CellLocker, CellReader};
use crossbeam_epoch::{Atomic, Owned, Shared};
use link::EntryArrayLink;
use std::convert::TryInto;
use std::fmt;
use std::hash::{BuildHasher, Hash, Hasher};
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering::{Acquire, Release};
pub struct HashMap<K: Eq + Hash + Sync, V: Sync, H: BuildHasher> {
array: Atomic<Array<K, V>>,
minimum_capacity: usize,
resize_mutex: AtomicBool,
hasher: H,
}
impl<K: Eq + Hash + Sync, V: Sync, H: BuildHasher> HashMap<K, V, H> {
pub fn new(hasher: H, minimum_capacity: Option<usize>) -> HashMap<K, V, H> {
let initial_capacity = if let Some(capacity) = minimum_capacity {
capacity.max(256)
} else {
256
};
HashMap {
array: Atomic::new(Array::<K, V>::new(initial_capacity, Atomic::null())),
minimum_capacity: initial_capacity,
resize_mutex: AtomicBool::new(false),
hasher: hasher,
}
}
pub fn insert<'a>(
&'a self,
key: K,
value: V,
) -> Result<Accessor<'a, K, V, H>, (Accessor<'a, K, V, H>, V)> {
let (hash, partial_hash) = self.hash(&key);
let mut resize_triggered = false;
loop {
let (mut accessor, cell_index) = self.acquire(&key, hash, partial_hash);
if !accessor.entry_ptr.is_null() {
return Err((accessor, value));
}
if !resize_triggered
&& accessor.cell_locker.full()
&& cell_index < cell::ARRAY_SIZE as usize
{
drop(accessor);
self.resize(false);
resize_triggered = true;
continue;
}
let (sub_index, entry_array_link_ptr, entry_ptr) =
accessor.cell_locker.insert(key, partial_hash, value);
accessor.sub_index = sub_index;
accessor.entry_array_link_ptr = entry_array_link_ptr;
accessor.entry_ptr = entry_ptr;
return Ok(accessor);
}
}
pub fn upsert<'a>(&'a self, key: K, value: V) -> Accessor<'a, K, V, H> {
match self.insert(key, value) {
Ok(result) => result,
Err((result, value)) => {
let pair_mut_ptr = result.entry_ptr as *mut (K, V);
unsafe { (*pair_mut_ptr).1 = value };
result
}
}
}
pub fn get<'a>(&'a self, key: K) -> Option<Accessor<'a, K, V, H>> {
let (hash, partial_hash) = self.hash(&key);
let (accessor, _) = self.acquire(&key, hash, partial_hash);
if accessor.entry_ptr.is_null() {
return None;
}
Some(accessor)
}
pub fn remove(&self, key: K) -> bool {
self.get(key)
.map_or_else(|| false, |accessor| accessor.erase())
}
pub fn read<U, F: FnOnce(&K, &V) -> U>(&self, key: K, f: F) -> Option<U> {
let (hash, partial_hash) = self.hash(&key);
let guard = crossbeam_epoch::pin();
let current_array = self.array.load(Acquire, &guard);
let current_array_ref = unsafe { current_array.deref() };
let old_array = current_array_ref.old_array(&guard);
for array_ptr in vec![old_array.as_raw(), current_array.as_raw()] {
if array_ptr.is_null() {
continue;
}
if array_ptr == old_array.as_raw() {
if current_array_ref.partial_rehash(&guard, |key| self.hash(key)) {
continue;
}
}
let array_ref = unsafe { &(*array_ptr) };
let cell_index = array_ref.calculate_cell_index(hash);
let reader = CellReader::lock(
array_ref.cell(cell_index),
array_ref.entry_array(cell_index),
);
if let Some(entry_ptr) = reader.search(&key, partial_hash) {
let entry_ref = unsafe { &(*entry_ptr) };
return Some(f(&entry_ref.0, &entry_ref.1));
}
}
None
}
pub fn retain<F: Fn(&K, &mut V) -> bool>(&self, f: F) -> (usize, usize) {
let mut retained_entries = 0;
let mut removed_entries = 0;
let mut scanner = self.iter();
while let Some((key, value)) = scanner.next() {
if !f(key, value) {
scanner.erase_on_next = true;
removed_entries += 1;
} else {
retained_entries += 1;
}
}
if removed_entries > retained_entries {
let guard = crossbeam_epoch::pin();
let current_array = self.array.load(Acquire, &guard);
let current_array_ref = unsafe { current_array.deref() };
if retained_entries <= current_array_ref.capacity() / 8 {
self.resize(true);
}
}
(retained_entries, removed_entries)
}
pub fn clear(&self) -> usize {
self.retain(|_, _| false).1
}
pub fn len<F: FnOnce(usize) -> usize>(&self, f: F) -> usize {
let guard = crossbeam_epoch::pin();
let current_array = self.array.load(Acquire, &guard);
let current_array_ref = unsafe { current_array.deref() };
let old_array = current_array_ref.old_array(&guard);
let capacity = current_array_ref.capacity();
let num_samples = std::cmp::min(f(capacity), capacity).next_power_of_two();
let num_cells_to_sample = (num_samples / cell::ARRAY_SIZE as usize).max(1);
if !old_array.is_null() {
for _ in 0..num_cells_to_sample {
if current_array_ref.partial_rehash(&guard, |key| self.hash(key)) {
break;
}
}
}
let mut num_entries = 0;
for i in 0..num_cells_to_sample {
let (size, linked_entries) = current_array_ref.cell(i).size();
num_entries += size + linked_entries;
}
num_entries * (current_array_ref.num_cells() / num_cells_to_sample)
}
pub fn statistics(&self) -> Statistics {
let mut statistics = Statistics {
capacity: 0,
effective_capacity: 0,
cells: 0,
killed_entries: 0,
empty_cells: 0,
max_consecutive_empty_cells: 0,
entries: 0,
linked_entries: 0,
cells_having_link: 0,
max_link_length: 0,
};
let guard = crossbeam_epoch::pin();
let current_array = self.array.load(Acquire, &guard);
let old_array = unsafe { current_array.deref().old_array(&guard) };
for array_ptr in vec![old_array.as_raw(), current_array.as_raw()] {
if array_ptr.is_null() {
continue;
}
let array_ref = unsafe { &(*array_ptr) };
let num_cells = array_ref.num_cells();
let mut consecutive_empty_cells = 0;
statistics.capacity += num_cells * cell::ARRAY_SIZE as usize;
if array_ptr == current_array.as_raw() {
statistics.effective_capacity = num_cells * cell::ARRAY_SIZE as usize;
}
statistics.cells += num_cells;
for i in 0..num_cells {
let (size, linked_entries) = array_ref.cell(i).size();
statistics.entries += size + linked_entries;
if size == 0 {
statistics.empty_cells += 1;
consecutive_empty_cells += 1;
} else {
if statistics.max_consecutive_empty_cells < consecutive_empty_cells {
statistics.max_consecutive_empty_cells = consecutive_empty_cells;
}
consecutive_empty_cells = 0;
}
if linked_entries > 0 {
statistics.linked_entries += linked_entries;
statistics.cells_having_link += 1;
if statistics.max_link_length < linked_entries {
statistics.max_link_length = linked_entries;
}
}
if array_ref.cell(i).killed() {
statistics.killed_entries += 1;
}
}
}
statistics
}
pub fn iter<'a>(&'a self) -> Scanner<'a, K, V, H> {
let (locker, array_ptr, cell_index) = self.first();
if let Some(locker) = locker {
if let Some(scanner) = self.pick(locker, array_ptr, cell_index) {
return scanner;
}
}
Scanner {
accessor: None,
array_ptr: std::ptr::null(),
cell_index: 0,
activated: false,
erase_on_next: false,
}
}
fn hash(&self, key: &K) -> (u64, u16) {
let mut h = self.hasher.build_hasher();
key.hash(&mut h);
let mut hash = h.finish();
hash = hash ^ (hash.rotate_right(25) ^ hash.rotate_right(50));
hash = hash.overflowing_mul(0xA24BAED4963EE407u64).0;
hash = hash ^ (hash.rotate_right(24) ^ hash.rotate_right(49));
hash = hash.overflowing_mul(0x9FB21C651E98DF25u64).0;
hash = hash ^ (hash >> 28);
(hash, (hash & ((1 << 16) - 1)).try_into().unwrap())
}
fn acquire<'a>(
&'a self,
key: &K,
hash: u64,
partial_hash: u16,
) -> (Accessor<'a, K, V, H>, usize) {
let guard = crossbeam_epoch::pin();
loop {
let current_array = self.array.load(Acquire, &guard);
let current_array_ref = unsafe { current_array.deref() };
let old_array = current_array_ref.old_array(&guard);
if !old_array.is_null() {
if current_array_ref.partial_rehash(&guard, |key| self.hash(key)) {
continue;
}
let (mut locker, entry_array_link_ptr, entry_ptr, cell_index, sub_index) =
self.search(&key, hash, partial_hash, old_array.as_raw());
if !entry_ptr.is_null() {
return (
Accessor {
hash_map: &self,
cell_locker: locker,
cell_in_sampling_range: cell_index < cell::ARRAY_SIZE as usize,
sub_index: sub_index,
entry_array_link_ptr: entry_array_link_ptr,
entry_ptr: entry_ptr,
},
cell_index,
);
} else if !locker.killed() {
let old_array_ref = unsafe { old_array.deref() };
current_array_ref.kill_cell(&mut locker, old_array_ref, cell_index, &|key| {
self.hash(key)
});
}
}
let (locker, entry_array_link_ptr, entry_ptr, cell_index, sub_index) =
self.search(&key, hash, partial_hash, current_array.as_raw());
if !locker.killed() {
return (
Accessor {
hash_map: &self,
cell_locker: locker,
cell_in_sampling_range: cell_index < cell::ARRAY_SIZE as usize,
sub_index: sub_index,
entry_array_link_ptr: entry_array_link_ptr,
entry_ptr: entry_ptr,
},
cell_index,
);
}
}
}
fn erase<'a>(&'a self, mut accessor: Accessor<'a, K, V, H>) {
accessor.cell_locker.remove(
true,
accessor.sub_index,
accessor.entry_array_link_ptr,
accessor.entry_ptr,
);
if accessor.cell_in_sampling_range && accessor.cell_locker.empty() {
drop(accessor);
self.resize(true);
}
}
fn search<'a>(
&self,
key: &K,
hash: u64,
partial_hash: u16,
array_ptr: *const Array<K, V>,
) -> (
CellLocker<'a, K, V>,
*const EntryArrayLink<K, V>,
*const (K, V),
usize,
u8,
) {
let array_ref = unsafe { &(*array_ptr) };
let cell_index = array_ref.calculate_cell_index(hash);
let locker = CellLocker::lock(
array_ref.cell(cell_index),
array_ref.entry_array(cell_index),
);
if !locker.killed() && !locker.empty() {
if let Some((sub_index, entry_array_link_ptr, entry_ptr)) =
locker.search(key, partial_hash)
{
return (
locker,
entry_array_link_ptr,
entry_ptr,
cell_index,
sub_index,
);
}
}
(locker, std::ptr::null(), std::ptr::null(), cell_index, 0)
}
fn first<'a>(&'a self) -> (Option<CellLocker<'a, K, V>>, *const Array<K, V>, usize) {
let guard = crossbeam_epoch::pin();
let mut current_array = self.array.load(Acquire, &guard);
loop {
let old_array = unsafe { current_array.deref().old_array(&guard) };
for array_ptr in vec![old_array.as_raw(), current_array.as_raw()] {
if array_ptr.is_null() {
continue;
}
let array_ref = unsafe { &(*array_ptr) };
let num_cells = array_ref.num_cells();
for cell_index in 0..num_cells {
let locker = CellLocker::lock(
array_ref.cell(cell_index),
array_ref.entry_array(cell_index),
);
if !locker.empty() {
return (Some(locker), array_ptr, cell_index);
}
}
}
let current_array_new = self.array.load(Acquire, &guard);
if current_array == current_array_new {
break;
}
current_array = current_array_new;
}
(None, std::ptr::null(), 0)
}
fn next<'a>(
&'a self,
array_ptr: *const Array<K, V>,
current_index: usize,
) -> Option<Scanner<'a, K, V, H>> {
let guard = crossbeam_epoch::pin();
let current_array = self.array.load(Acquire, &guard);
let current_array_ref = unsafe { &(*current_array.as_raw()) };
let old_array = current_array_ref.old_array(&guard);
debug_assert!(array_ptr == current_array.as_raw() || array_ptr == old_array.as_raw());
if old_array.as_raw() == array_ptr {
let old_array_ref = unsafe { &(*old_array.as_raw()) };
let num_cells = old_array_ref.num_cells();
for cell_index in (current_index + 1)..num_cells {
let locker = CellLocker::lock(
old_array_ref.cell(cell_index),
old_array_ref.entry_array(cell_index),
);
if !locker.killed() && !locker.empty() {
if let Some(scanner) = self.pick(locker, old_array.as_raw(), cell_index) {
return Some(scanner);
}
}
}
}
let mut new_array = Shared::<Array<K, V>>::null();
let num_cells = current_array_ref.num_cells();
let start_index = if old_array.as_raw() == array_ptr {
0
} else {
current_index + 1
};
for cell_index in (start_index)..num_cells {
let locker = CellLocker::lock(
current_array_ref.cell(cell_index),
current_array_ref.entry_array(cell_index),
);
if !locker.killed() && !locker.empty() {
if let Some(scanner) = self.pick(locker, current_array.as_raw(), cell_index) {
return Some(scanner);
}
} else if locker.killed() && new_array.is_null() {
new_array = self.array.load(Acquire, &guard);
}
}
if !new_array.is_null() {
let new_array_ref = unsafe { &(*new_array.as_raw()) };
let num_cells = new_array_ref.num_cells();
for cell_index in 0..num_cells {
let locker = CellLocker::lock(
new_array_ref.cell(cell_index),
new_array_ref.entry_array(cell_index),
);
if !locker.killed() && !locker.empty() {
if let Some(scanner) = self.pick(locker, new_array.as_raw(), cell_index) {
return Some(scanner);
}
}
}
}
None
}
fn pick<'a>(
&'a self,
cell_locker: CellLocker<'a, K, V>,
array_ptr: *const Array<K, V>,
cell_index: usize,
) -> Option<Scanner<'a, K, V, H>> {
if let Some((sub_index, entry_array_link_ptr, entry_ptr)) = cell_locker.first() {
return Some(Scanner {
accessor: Some(Accessor {
hash_map: &self,
cell_locker: cell_locker,
cell_in_sampling_range: cell_index < cell::ARRAY_SIZE as usize,
sub_index: sub_index,
entry_array_link_ptr: entry_array_link_ptr,
entry_ptr: entry_ptr,
}),
array_ptr: array_ptr,
cell_index: cell_index,
activated: false,
erase_on_next: false,
});
}
None
}
fn resize(&self, shrink: bool) {
let guard = crossbeam_epoch::pin();
let current_array = self.array.load(Acquire, &guard);
let current_array_ref = unsafe { current_array.deref() };
let old_array = current_array_ref.old_array(&guard);
if !old_array.is_null() {
if !current_array_ref.partial_rehash(&guard, |key| self.hash(key)) {
return;
}
} else if shrink && current_array_ref.capacity() == self.minimum_capacity {
return;
}
let mut num_entries = 0;
for i in 0..current_array_ref.num_cells().min(cell::ARRAY_SIZE as usize) {
num_entries += current_array_ref.cell(i).size().0;
if shrink {
if num_entries >= cell::ARRAY_SIZE as usize {
return;
}
} else {
if ((i + 1) * cell::ARRAY_SIZE as usize) - num_entries >= cell::ARRAY_SIZE as usize
{
return;
}
}
}
if !self.resize_mutex.swap(true, Acquire) {
if current_array != self.array.load(Acquire, &guard) {
self.resize_mutex.store(false, Release);
return;
}
let capacity = current_array_ref.capacity();
let estimated_num_entries = self.len(|capacity| (capacity / 16).min(16384));
let new_capacity = if estimated_num_entries >= (capacity / 8) * 7 {
if capacity >= (1usize << (std::mem::size_of::<usize>() * 8 - 1)) {
capacity
} else {
(capacity.min(
1usize
<< (std::mem::size_of::<usize>() * 8
- (MAX_ENLARGE_FACTOR as usize + 1)),
) * (1 << MAX_ENLARGE_FACTOR as usize))
.min(estimated_num_entries.next_power_of_two() * 2)
}
} else if estimated_num_entries <= capacity / 8 {
estimated_num_entries
.next_power_of_two()
.max(self.minimum_capacity)
} else {
capacity
};
if new_capacity != capacity {
let new_array = Array::<K, V>::new(new_capacity, Atomic::from(current_array));
if (!shrink && new_array.capacity() > capacity)
|| (shrink && new_array.capacity() == new_capacity)
{
self.array.store(Owned::new(new_array), Release);
}
}
self.resize_mutex.store(false, Release);
}
}
}
impl<K: Eq + Hash + Sync, V: Sync, H: BuildHasher> Drop for HashMap<K, V, H> {
fn drop(&mut self) {
self.clear();
}
}
pub struct Accessor<'a, K: Eq + Hash + Sync, V: Sync, H: BuildHasher> {
hash_map: &'a HashMap<K, V, H>,
cell_locker: CellLocker<'a, K, V>,
cell_in_sampling_range: bool,
sub_index: u8,
entry_array_link_ptr: *const EntryArrayLink<K, V>,
entry_ptr: *const (K, V),
}
impl<'a, K: Eq + Hash + Sync, V: Sync, H: BuildHasher> Accessor<'a, K, V, H> {
pub fn get(&'a self) -> (&'a K, &'a mut V) {
unsafe {
let key_ptr = &(*self.entry_ptr).0 as *const K;
let value_ptr = &(*self.entry_ptr).1 as *const V;
let value_mut_ptr = value_ptr as *mut V;
(&(*key_ptr), &mut (*value_mut_ptr))
}
}
pub fn erase(self) -> bool {
if self.entry_ptr.is_null() {
return false;
}
self.hash_map.erase(self);
true
}
}
pub struct Scanner<'a, K: Eq + Hash + Sync, V: Sync, H: BuildHasher> {
accessor: Option<Accessor<'a, K, V, H>>,
array_ptr: *const Array<K, V>,
cell_index: usize,
activated: bool,
erase_on_next: bool,
}
impl<'a, K: Eq + Hash + Sync, V: Sync, H: BuildHasher> Iterator for Scanner<'a, K, V, H> {
type Item = (&'a K, &'a mut V);
fn next(&mut self) -> Option<Self::Item> {
if !self.activated {
self.activated = true;
} else if self.accessor.is_some() {
let erase = self.erase_on_next;
if erase {
self.erase_on_next = false;
}
if let Some((next_sub_index, next_entry_array_link_ptr, next_entry_ptr)) =
self.accessor.as_mut().map_or_else(
|| None,
|accessor| {
accessor.cell_locker.next(
erase,
true,
accessor.sub_index,
accessor.entry_array_link_ptr,
accessor.entry_ptr,
)
},
)
{
self.accessor.as_mut().map_or_else(
|| (),
|accessor| {
accessor.sub_index = next_sub_index;
accessor.entry_array_link_ptr = next_entry_array_link_ptr;
accessor.entry_ptr = next_entry_ptr;
},
);
} else {
let current_array_ptr = self.array_ptr;
let current_cell_index = self.cell_index;
let scanner = self.accessor.as_ref().map_or_else(
|| None,
|accessor| {
accessor
.hash_map
.next(current_array_ptr, current_cell_index)
},
);
self.accessor.take();
if let Some(mut scanner) = scanner {
self.accessor = scanner.accessor.take();
self.array_ptr = scanner.array_ptr;
self.cell_index = scanner.cell_index;
}
}
}
if let Some(accessor) = &self.accessor {
unsafe {
let key_ptr = &(*accessor.entry_ptr).0 as *const K;
let value_ptr = &(*accessor.entry_ptr).1 as *const V;
let value_mut_ptr = value_ptr as *mut V;
return Some((&(*key_ptr), &mut (*value_mut_ptr)));
}
}
None
}
}
pub struct Statistics {
capacity: usize,
effective_capacity: usize,
entries: usize,
killed_entries: usize,
cells: usize,
empty_cells: usize,
max_consecutive_empty_cells: usize,
linked_entries: usize,
cells_having_link: usize,
max_link_length: usize,
}
impl Statistics {
pub fn capacity(&self) -> usize {
self.capacity
}
pub fn effective_capacity(&self) -> usize {
self.effective_capacity
}
pub fn num_entries(&self) -> usize {
self.entries
}
pub fn num_killed_entries(&self) -> usize {
self.killed_entries
}
pub fn num_cells(&self) -> usize {
self.cells
}
pub fn num_empty_cells(&self) -> usize {
self.empty_cells
}
pub fn max_consecutive_empty_cells(&self) -> usize {
self.max_consecutive_empty_cells
}
pub fn num_linked_entries(&self) -> usize {
self.linked_entries
}
pub fn num_cells_having_link(&self) -> usize {
self.cells_having_link
}
pub fn max_link_length(&self) -> usize {
self.max_link_length
}
}
impl fmt::Display for Statistics {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"capacity: {}, effective_capacity: {}, cells: {}, killed_entries: {}, empty_cells: {}, max_consecutive_empty_cells: {}, entries: {}, linked_entries: {}, cells_having_link: {}, max_link_length: {}",
self.capacity,
self.effective_capacity,
self.cells,
self.killed_entries,
self.empty_cells,
self.max_consecutive_empty_cells,
self.entries,
self.linked_entries,
self.cells_having_link,
self.max_link_length
)
}
}