use std::collections::hash_map::RandomState;
use std::hash::{BuildHasher, Hash};
const DEFAULT_INITIAL_CAPACITY: usize = 16;
const DEFAULT_MAX_LOAD_FACTOR: f64 = 0.75;
const DEFAULT_TOMBSTONE_THRESHOLD: f64 = 0.2;
#[derive(Debug, Clone, Default)]
enum Slot<K, V> {
#[default]
Empty,
Tombstone,
Occupied(K, V),
}
#[derive(Debug, Clone, Copy)]
pub enum ProbingStrategy {
Linear,
Quadratic,
DoubleHash,
}
#[derive(Debug)]
pub struct OpenAddressingBuilder<S> {
capacity: usize,
max_load_factor: f64,
tombstone_threshold: f64,
strategy: ProbingStrategy,
hasher: S, hasher2: Option<S>, }
impl Default for OpenAddressingBuilder<RandomState> {
fn default() -> Self {
Self {
capacity: DEFAULT_INITIAL_CAPACITY,
max_load_factor: DEFAULT_MAX_LOAD_FACTOR,
tombstone_threshold: DEFAULT_TOMBSTONE_THRESHOLD,
strategy: ProbingStrategy::Linear,
hasher: RandomState::new(),
hasher2: None,
}
}
}
impl OpenAddressingBuilder<RandomState> {
pub fn new() -> Self {
Self::default()
}
}
impl<S: BuildHasher + Clone> OpenAddressingBuilder<S> {
pub fn with_capacity(mut self, cap: usize) -> Self {
self.capacity = cap.max(1);
self
}
pub fn with_max_load_factor(mut self, lf: f64) -> Self {
assert!(
(0.0..1.0).contains(&lf),
"Load factor must be between 0 and 1"
);
self.max_load_factor = lf;
self
}
pub fn with_tombstone_threshold(mut self, ratio: f64) -> Self {
assert!(
(0.0..1.0).contains(&ratio),
"Tombstone threshold must be between 0 and 1"
);
self.tombstone_threshold = ratio;
self
}
pub fn with_strategy(mut self, strategy: ProbingStrategy) -> Self {
self.strategy = strategy;
self
}
pub fn with_double_hasher(mut self, secondary: S) -> Self {
self.hasher2 = Some(secondary);
self
}
pub fn build<K: Hash + Eq + Clone, V: Clone>(self) -> OpenAddressingMap<K, V, S> {
let capacity = self.capacity.next_power_of_two();
let mut slots = Vec::with_capacity(capacity);
slots.resize_with(capacity, Default::default);
if let ProbingStrategy::DoubleHash = self.strategy {
if self.hasher2.is_none() {
panic!("DoubleHash strategy requires a second hasher. Use `.with_double_hasher(...)` first.");
}
}
OpenAddressingMap {
slots,
capacity,
len: 0,
tombstones: 0,
strategy: self.strategy,
buildhasher1: self.hasher,
buildhasher2: self.hasher2.clone(),
max_load_factor: self.max_load_factor,
tombstone_threshold: self.tombstone_threshold,
}
}
}
#[derive(Debug)]
pub struct OpenAddressingMap<K, V, S> {
slots: Vec<Slot<K, V>>,
capacity: usize,
len: usize,
tombstones: usize,
strategy: ProbingStrategy,
buildhasher1: S,
buildhasher2: Option<S>,
max_load_factor: f64,
tombstone_threshold: f64,
}
impl<K: Hash + Eq + Clone, V: Clone> Default for OpenAddressingMap<K, V, RandomState> {
fn default() -> Self {
Self::new()
}
}
impl<K: Hash + Eq + Clone, V: Clone> OpenAddressingMap<K, V, RandomState> {
pub fn new() -> Self {
OpenAddressingBuilder::new().build()
}
}
impl<K: Hash + Eq + Clone, V: Clone, S: BuildHasher + Clone> OpenAddressingMap<K, V, S> {
pub fn len(&self) -> usize {
self.len
}
pub fn is_empty(&self) -> bool {
self.len == 0
}
pub fn insert(&mut self, key: K, value: V) -> Option<V> {
if (self.len + 1) as f64 / self.capacity as f64 > self.max_load_factor
|| self.tombstones as f64 / self.capacity as f64 > self.tombstone_threshold
{
self.grow();
}
let mut idx = self.primary_index(&key);
let second_hash = self.secondary_hash(&key);
let capacity_mask = self.capacity - 1;
let mut probe_i = 0;
loop {
let slot = &mut self.slots[idx];
match slot {
Slot::Empty => {
*slot = Slot::Occupied(key, value);
self.len += 1;
return None;
}
Slot::Occupied(k, v) => {
if k == &key {
let oldv = std::mem::replace(v, value);
return Some(oldv);
}
}
Slot::Tombstone => {
let tomb_idx = idx;
let free_spot = tomb_idx;
let replaced =
self.probe_find_or_place(key, value, second_hash, probe_i, free_spot)?;
return replaced;
}
}
probe_i += 1;
if probe_i > self.capacity {
self.grow();
return self.insert(key, value);
}
let next_idx = match self.strategy {
ProbingStrategy::Linear => (idx + 1) & capacity_mask,
ProbingStrategy::Quadratic => (idx + probe_i * probe_i) & capacity_mask,
ProbingStrategy::DoubleHash => (idx + probe_i * second_hash) & capacity_mask,
};
idx = next_idx;
}
}
pub fn get(&self, key: &K) -> Option<&V> {
let mut idx = self.primary_index(key);
let second_hash = self.secondary_hash(key);
let capacity_mask = self.capacity - 1;
let mut probe_i = 0;
loop {
if let Some(v) = match &self.slots[idx] {
Slot::Empty => None,
Slot::Occupied(k, v) if k == key => Some(v),
_ => None,
} {
return Some(v);
}
probe_i += 1;
if probe_i > self.capacity {
return None;
}
let next_idx = match self.strategy {
ProbingStrategy::Linear => (idx + 1) & capacity_mask,
ProbingStrategy::Quadratic => (idx + probe_i * probe_i) & capacity_mask,
ProbingStrategy::DoubleHash => (idx + probe_i * second_hash) & capacity_mask,
};
idx = next_idx;
}
}
pub fn get_mut(&mut self, key: &K) -> Option<&mut V> {
let mut idx = self.primary_index(key);
let second_hash = self.secondary_hash(key);
let capacity_mask = self.capacity - 1;
let mut found_idx = None;
let mut probe_i = 0;
while probe_i <= self.capacity {
if let Slot::Occupied(k, _) = &self.slots[idx] {
if k == key {
found_idx = Some(idx);
break;
}
} else if let Slot::Empty = &self.slots[idx] {
return None;
}
probe_i += 1;
idx = match self.strategy {
ProbingStrategy::Linear => (idx + 1) & capacity_mask,
ProbingStrategy::Quadratic => (idx + probe_i * probe_i) & capacity_mask,
ProbingStrategy::DoubleHash => (idx + probe_i * second_hash) & capacity_mask,
};
}
if let Some(idx) = found_idx {
if let Slot::Occupied(_, v) = &mut self.slots[idx] {
return Some(v);
}
}
None
}
pub fn remove(&mut self, key: &K) -> Option<V> {
let mut idx = self.primary_index(key);
let second_hash = self.secondary_hash(key);
let capacity_mask = self.capacity - 1;
let mut probe_i = 0;
loop {
let slot = &mut self.slots[idx];
match slot {
Slot::Empty => {
return None;
}
Slot::Occupied(k, _v) => {
if k == key {
let old = match std::mem::replace(slot, Slot::Tombstone) {
Slot::Occupied(_, v) => v,
_ => unreachable!(),
};
self.len -= 1;
self.tombstones += 1;
return Some(old);
}
}
Slot::Tombstone => {
}
}
probe_i += 1;
if probe_i > self.capacity {
return None;
}
let next_idx = match self.strategy {
ProbingStrategy::Linear => (idx + 1) & capacity_mask,
ProbingStrategy::Quadratic => (idx + probe_i * probe_i) & capacity_mask,
ProbingStrategy::DoubleHash => (idx + probe_i * second_hash) & capacity_mask,
};
idx = next_idx;
}
}
pub fn clear(&mut self) {
for slot in self.slots.iter_mut() {
*slot = Slot::Empty;
}
self.len = 0;
self.tombstones = 0;
}
pub fn iter(&self) -> impl Iterator<Item = (&K, &V)> {
self.slots.iter().filter_map(|slot| match slot {
Slot::Occupied(k, v) => Some((k, v)),
_ => None,
})
}
fn grow(&mut self) {
let new_cap = (self.capacity * 2).max(2);
self.rehash(new_cap);
}
fn rehash(&mut self, new_cap: usize) {
let old_slots = std::mem::replace(&mut self.slots, vec![Slot::Empty; new_cap]);
self.capacity = new_cap;
self.len = 0;
self.tombstones = 0;
for slot in old_slots {
if let Slot::Occupied(k, v) = slot {
let _ = self.insert(k, v);
}
}
}
fn primary_index(&self, key: &K) -> usize {
(self.buildhasher1.hash_one(key) as usize) & (self.capacity - 1)
}
fn secondary_hash(&self, key: &K) -> usize {
match self.strategy {
ProbingStrategy::DoubleHash => {
let bh2 = self.buildhasher2.as_ref().unwrap();
let h2 = bh2.hash_one(key) as usize;
(h2 & (self.capacity - 1)) | 1
}
_ => 0,
}
}
fn probe_find_or_place(
&mut self,
key: K,
value: V,
second_hash: usize,
start_i: usize,
tomb_idx: usize,
) -> Option<Option<V>> {
let capacity_mask = self.capacity - 1;
let mut idx = match self.strategy {
ProbingStrategy::Linear => (tomb_idx + 1) & capacity_mask,
ProbingStrategy::Quadratic => {
(tomb_idx + (start_i + 1) * (start_i + 1)) & capacity_mask
}
ProbingStrategy::DoubleHash => (tomb_idx + (start_i + 1) * second_hash) & capacity_mask,
};
for probe_i in (start_i + 1)..self.capacity {
let (left, right) = if idx > tomb_idx {
let (left, right) = self.slots.split_at_mut(idx);
(&mut left[tomb_idx], &mut right[0])
} else {
let (left, right) = self.slots.split_at_mut(tomb_idx);
(&mut right[0], &mut left[idx])
};
match right {
Slot::Empty => {
*left = Slot::Occupied(key, value);
self.len += 1;
self.tombstones -= 1;
return None;
}
Slot::Occupied(k, v) if k == &key => {
let old = v.clone();
*right = Slot::Occupied(key, value);
return Some(Some(old));
}
_ => {}
}
idx = match self.strategy {
ProbingStrategy::Linear => (idx + 1) & capacity_mask,
ProbingStrategy::Quadratic => {
(tomb_idx + (probe_i + 1) * (probe_i + 1)) & capacity_mask
}
ProbingStrategy::DoubleHash => {
(tomb_idx + (probe_i + 1) * second_hash) & capacity_mask
}
};
}
self.slots[tomb_idx] = Slot::Occupied(key, value);
self.len += 1;
self.tombstones -= 1;
None
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn basic_ops() {
let mut map = OpenAddressingBuilder::new()
.with_capacity(8)
.with_strategy(ProbingStrategy::Linear)
.build::<String, i32>();
assert_eq!(map.len(), 0);
assert!(map.is_empty());
map.insert("hello".to_string(), 1);
map.insert("world".to_string(), 2);
map.insert("rust".to_string(), 3);
assert_eq!(map.len(), 3);
assert_eq!(map.get(&"hello".to_string()), Some(&1));
assert_eq!(map.get(&"missing".to_string()), None);
assert_eq!(map.remove(&"world".to_string()), Some(2));
assert_eq!(map.get(&"world".to_string()), None);
assert_eq!(map.len(), 2);
let old = map.insert("hello".to_string(), 10);
assert_eq!(old, Some(1));
assert_eq!(map.get(&"hello".to_string()), Some(&10));
for i in 0..20 {
map.insert(format!("key{}", i), i);
}
for i in 0..20 {
assert_eq!(map.get(&format!("key{}", i)), Some(&i));
}
}
#[test]
fn double_hashing() {
let mut map = OpenAddressingBuilder::new()
.with_capacity(8)
.with_strategy(ProbingStrategy::DoubleHash)
.with_double_hasher(RandomState::new())
.build::<i32, i32>();
map.insert(1, 10);
map.insert(2, 20);
map.insert(3, 30);
assert_eq!(map.get(&1), Some(&10));
assert_eq!(map.get(&2), Some(&20));
assert_eq!(map.get(&99), None);
let rm = map.remove(&2);
assert_eq!(rm, Some(20));
assert_eq!(map.get(&2), None);
}
}