use crate::containers::FastVec;
use crate::error::{Result, ZiporaError};
use crate::hash_map::cache_locality::{CacheMetrics, Prefetcher};
use crate::hash_map::simd_string_ops::SimdStringOps;
use crate::memory::cache_layout::{CacheOptimizedAllocator, PrefetchHint};
use std::borrow::Borrow;
use std::hash::{BuildHasher, Hash, Hasher};
pub trait CollisionResolutionStrategy<K, V> {
type Config: Clone;
type Context: Default;
fn insert(
&self,
context: &mut Self::Context,
buckets: &mut [HashBucket<K, V>],
key: K,
value: V,
hash: u64,
config: &Self::Config,
) -> Result<Option<V>>;
fn get<'a, Q>(
&self,
context: &Self::Context,
buckets: &'a [HashBucket<K, V>],
key: &Q,
hash: u64,
config: &Self::Config,
) -> Option<&'a V>
where
K: Borrow<Q>,
Q: Hash + Eq + ?Sized;
fn remove<Q>(
&self,
context: &mut Self::Context,
buckets: &mut [HashBucket<K, V>],
key: &Q,
hash: u64,
config: &Self::Config,
) -> Option<V>
where
K: Borrow<Q>,
Q: Hash + Eq + ?Sized;
fn needs_resize(&self, context: &Self::Context, capacity: usize, len: usize) -> bool;
fn probe_stats(&self, context: &Self::Context) -> ProbeStats;
}
pub trait StorageLayoutStrategy<K, V> {
type Config: Clone;
type Storage;
fn create_storage(capacity: usize, config: &Self::Config) -> Self::Storage;
fn resize_storage(
storage: &mut Self::Storage,
new_capacity: usize,
config: &Self::Config,
) -> Result<()>;
fn get_buckets_mut(storage: &mut Self::Storage) -> &mut [HashBucket<K, V>];
fn get_buckets(storage: &Self::Storage) -> &[HashBucket<K, V>];
fn capacity(storage: &Self::Storage) -> usize;
fn memory_usage(storage: &Self::Storage) -> usize;
fn optimize_layout(storage: &mut Self::Storage, config: &Self::Config) -> Result<()>;
}
pub trait HashOptimizationStrategy<K, V> {
type Config: Clone;
type Context: Default;
fn pre_insert(
&self,
context: &mut Self::Context,
key: &K,
hash: u64,
buckets: &[HashBucket<K, V>],
config: &Self::Config,
);
fn post_insert(
&self,
context: &mut Self::Context,
key: &K,
inserted: bool,
config: &Self::Config,
);
fn pre_lookup<Q>(
&self,
context: &mut Self::Context,
key: &Q,
hash: u64,
buckets: &[HashBucket<K, V>],
config: &Self::Config,
) where
K: Borrow<Q>,
Q: Hash + Eq + ?Sized;
fn bulk_optimize(
&self,
context: &mut Self::Context,
operations: &[OptimizationHint],
config: &Self::Config,
);
fn metrics(&self, context: &Self::Context) -> OptimizationMetrics;
}
#[repr(align(64))] #[derive(Clone)]
pub struct HashBucket<K, V> {
pub hash: u64,
pub key: Option<K>,
pub value: Option<V>,
pub probe_distance: u16,
pub flags: u16,
pub next: Option<u32>,
pub strategy_data: u64,
}
impl<K, V> Default for HashBucket<K, V> {
fn default() -> Self {
Self {
hash: 0,
key: None,
value: None,
probe_distance: 0,
flags: 0,
next: None,
strategy_data: 0,
}
}
}
impl<K, V> HashBucket<K, V> {
#[inline]
pub fn is_empty(&self) -> bool {
self.key.is_none()
}
pub fn is_deleted(&self) -> bool {
self.flags & 0x8000 != 0
}
pub fn mark_deleted(&mut self) {
self.flags |= 0x8000;
self.key = None;
self.value = None;
}
pub fn clear(&mut self) {
*self = Self::default();
}
}
#[derive(Debug, Default, Clone)]
pub struct ProbeStats {
pub average_probe_distance: f64,
pub max_probe_distance: u16,
pub total_probes: u64,
pub collision_count: u64,
pub variance: f64,
}
#[derive(Debug, Default, Clone)]
pub struct OptimizationMetrics {
pub cache_hits: u64,
pub cache_misses: u64,
pub prefetch_hits: u64,
pub simd_operations: u64,
pub bulk_operations: u64,
}
#[derive(Debug, Clone)]
pub enum OptimizationHint {
Sequential { start_hash: u64, count: usize },
Random { hashes: Vec<u64> },
BulkInsert { count: usize },
BulkLookup { count: usize },
CacheWarm { bucket_range: std::ops::Range<usize> },
}
pub struct RobinHoodStrategy {
max_probe_distance: u16,
variance_reduction: bool,
backward_shift: bool,
}
impl RobinHoodStrategy {
pub fn new(max_probe_distance: u16, variance_reduction: bool, backward_shift: bool) -> Self {
Self {
max_probe_distance,
variance_reduction,
backward_shift,
}
}
}
#[derive(Debug, Clone)]
pub struct RobinHoodConfig {
pub max_probe_distance: u16,
pub variance_reduction: bool,
pub backward_shift: bool,
}
#[derive(Debug, Default)]
pub struct RobinHoodContext {
pub total_probe_distance: u64,
pub max_probe_distance: u16,
pub collision_count: u64,
pub eviction_count: u64,
}
impl<K, V> CollisionResolutionStrategy<K, V> for RobinHoodStrategy
where
K: Hash + Eq + Clone,
V: Clone,
{
type Config = RobinHoodConfig;
type Context = RobinHoodContext;
fn insert(
&self,
context: &mut Self::Context,
buckets: &mut [HashBucket<K, V>],
key: K,
value: V,
hash: u64,
config: &Self::Config,
) -> Result<Option<V>> {
let mask = buckets.len() - 1;
let mut pos = (hash as usize) & mask;
let mut probe_distance = 0;
let mut inserting_key = Some(key);
let mut inserting_value = Some(value);
let mut inserting_hash = hash;
let mut inserting_distance = 0;
loop {
if probe_distance > config.max_probe_distance {
return Err(ZiporaError::invalid_state("Exceeded maximum probe distance"));
}
let bucket = &mut buckets[pos];
if bucket.is_empty() {
bucket.hash = inserting_hash;
bucket.key = inserting_key;
bucket.value = inserting_value;
bucket.probe_distance = inserting_distance;
context.total_probe_distance += inserting_distance as u64;
context.max_probe_distance = context.max_probe_distance.max(inserting_distance);
return Ok(None);
}
if bucket.hash == inserting_hash && bucket.key.as_ref() == inserting_key.as_ref() {
let old_value = bucket.value.take();
bucket.value = inserting_value;
return Ok(old_value);
}
if inserting_distance > bucket.probe_distance {
std::mem::swap(&mut bucket.hash, &mut inserting_hash);
std::mem::swap(&mut bucket.key, &mut inserting_key);
std::mem::swap(&mut bucket.value, &mut inserting_value);
std::mem::swap(&mut bucket.probe_distance, &mut inserting_distance);
context.eviction_count += 1;
}
pos = (pos + 1) & mask;
probe_distance += 1;
inserting_distance += 1;
if inserting_distance > 0 {
context.collision_count += 1;
}
}
}
fn get<'a, Q>(
&self,
context: &Self::Context,
buckets: &'a [HashBucket<K, V>],
key: &Q,
hash: u64,
config: &Self::Config,
) -> Option<&'a V>
where
K: Borrow<Q>,
Q: Hash + Eq + ?Sized,
{
let mask = buckets.len() - 1;
let mut pos = (hash as usize) & mask;
let mut probe_distance = 0;
while probe_distance <= config.max_probe_distance {
let bucket = &buckets[pos];
if bucket.is_empty() {
return None;
}
if bucket.hash == hash && bucket.key.as_ref().map(|k| k.borrow()) == Some(key) {
return bucket.value.as_ref();
}
if probe_distance > bucket.probe_distance {
return None;
}
pos = (pos + 1) & mask;
probe_distance += 1;
}
None
}
fn remove<Q>(
&self,
context: &mut Self::Context,
buckets: &mut [HashBucket<K, V>],
key: &Q,
hash: u64,
config: &Self::Config,
) -> Option<V>
where
K: Borrow<Q>,
Q: Hash + Eq + ?Sized,
{
let mask = buckets.len() - 1;
let mut pos = (hash as usize) & mask;
let mut probe_distance = 0;
while probe_distance <= config.max_probe_distance {
let bucket = &mut buckets[pos];
if bucket.is_empty() {
return None;
}
if bucket.hash == hash && bucket.key.as_ref().map(|k| k.borrow()) == Some(key) {
let value = bucket.value.take();
if config.backward_shift {
self.backward_shift_delete(buckets, pos);
} else {
bucket.mark_deleted();
}
return value;
}
if probe_distance > bucket.probe_distance {
return None;
}
pos = (pos + 1) & mask;
probe_distance += 1;
}
None
}
fn needs_resize(&self, context: &Self::Context, capacity: usize, len: usize) -> bool {
let load_factor = len as f64 / capacity as f64;
load_factor > 0.7 || context.max_probe_distance > self.max_probe_distance
}
fn probe_stats(&self, context: &Self::Context) -> ProbeStats {
let avg_probe = if context.collision_count > 0 {
context.total_probe_distance as f64 / context.collision_count as f64
} else {
0.0
};
ProbeStats {
average_probe_distance: avg_probe,
max_probe_distance: context.max_probe_distance,
total_probes: context.total_probe_distance,
collision_count: context.collision_count,
variance: 0.0, }
}
}
impl RobinHoodStrategy {
fn backward_shift_delete<K, V>(&self, buckets: &mut [HashBucket<K, V>], mut pos: usize)
where
K: Clone,
V: Clone,
{
let mask = buckets.len() - 1;
buckets[pos].clear();
loop {
let next_pos = (pos + 1) & mask;
let next_bucket = &buckets[next_pos];
if next_bucket.is_empty() || next_bucket.probe_distance == 0 {
break;
}
buckets[pos] = buckets[next_pos].clone();
buckets[pos].probe_distance -= 1;
buckets[next_pos].clear();
pos = next_pos;
}
}
}
pub struct StandardStorageStrategy;
#[derive(Debug, Clone)]
pub struct StandardStorageConfig {
pub initial_capacity: usize,
pub growth_factor: f64,
}
impl<K, V> StorageLayoutStrategy<K, V> for StandardStorageStrategy {
type Config = StandardStorageConfig;
type Storage = FastVec<HashBucket<K, V>>;
fn create_storage(capacity: usize, config: &Self::Config) -> Self::Storage {
let mut storage = FastVec::with_capacity(capacity)
.or_else(|_| FastVec::with_capacity(capacity / 2))
.unwrap_or_else(|_| FastVec::new());
if storage.capacity() > 0 {
let actual_capacity = storage.capacity();
let _ = storage.resize_with(actual_capacity, Default::default);
}
storage
}
fn resize_storage(
storage: &mut Self::Storage,
new_capacity: usize,
config: &Self::Config,
) -> Result<()> {
storage.resize_with(new_capacity, Default::default);
Ok(())
}
fn get_buckets_mut(storage: &mut Self::Storage) -> &mut [HashBucket<K, V>] {
storage.as_mut_slice()
}
fn get_buckets(storage: &Self::Storage) -> &[HashBucket<K, V>] {
storage.as_slice()
}
fn capacity(storage: &Self::Storage) -> usize {
storage.len()
}
fn memory_usage(storage: &Self::Storage) -> usize {
storage.capacity() * std::mem::size_of::<HashBucket<K, V>>()
}
fn optimize_layout(storage: &mut Self::Storage, config: &Self::Config) -> Result<()> {
Ok(())
}
}
pub struct CacheOptimizedStorageStrategy {
allocator: CacheOptimizedAllocator,
}
impl CacheOptimizedStorageStrategy {
pub fn new(allocator: CacheOptimizedAllocator) -> Self {
Self { allocator }
}
}
#[derive(Debug, Clone)]
pub struct CacheOptimizedStorageConfig {
pub cache_line_size: usize,
pub numa_aware: bool,
pub prefetch_enabled: bool,
}
impl<K, V> StorageLayoutStrategy<K, V> for CacheOptimizedStorageStrategy {
type Config = CacheOptimizedStorageConfig;
type Storage = FastVec<HashBucket<K, V>>;
fn create_storage(capacity: usize, config: &Self::Config) -> Self::Storage {
let mut storage = FastVec::with_capacity(capacity)
.or_else(|_| FastVec::with_capacity(capacity / 2))
.unwrap_or_else(|_| FastVec::new());
if storage.capacity() > 0 {
let actual_capacity = storage.capacity();
let _ = storage.resize_with(actual_capacity, Default::default);
}
storage
}
fn resize_storage(
storage: &mut Self::Storage,
new_capacity: usize,
config: &Self::Config,
) -> Result<()> {
storage.resize_with(new_capacity, Default::default)?;
Ok(())
}
fn get_buckets_mut(storage: &mut Self::Storage) -> &mut [HashBucket<K, V>] {
storage.as_mut_slice()
}
fn get_buckets(storage: &Self::Storage) -> &[HashBucket<K, V>] {
storage.as_slice()
}
fn capacity(storage: &Self::Storage) -> usize {
storage.len()
}
fn memory_usage(storage: &Self::Storage) -> usize {
storage.capacity() * std::mem::size_of::<HashBucket<K, V>>()
}
fn optimize_layout(storage: &mut Self::Storage, config: &Self::Config) -> Result<()> {
Ok(())
}
}
pub struct SimdOptimizationStrategy {
simd_ops: &'static SimdStringOps,
}
impl SimdOptimizationStrategy {
pub fn new() -> Self {
Self {
simd_ops: crate::hash_map::simd_string_ops::get_global_simd_ops(),
}
}
}
#[derive(Debug, Clone)]
pub struct SimdOptimizationConfig {
pub enable_string_ops: bool,
pub enable_bulk_ops: bool,
pub enable_hash_computation: bool,
}
#[derive(Debug, Default)]
pub struct SimdOptimizationContext {
pub simd_operations: u64,
pub bulk_operations: u64,
pub string_comparisons: u64,
}
impl<K, V> HashOptimizationStrategy<K, V> for SimdOptimizationStrategy {
type Config = SimdOptimizationConfig;
type Context = SimdOptimizationContext;
fn pre_insert(
&self,
context: &mut Self::Context,
key: &K,
hash: u64,
buckets: &[HashBucket<K, V>],
config: &Self::Config,
) {
context.simd_operations += 1;
}
fn post_insert(
&self,
context: &mut Self::Context,
key: &K,
inserted: bool,
config: &Self::Config,
) {
}
fn pre_lookup<Q>(
&self,
context: &mut Self::Context,
key: &Q,
hash: u64,
buckets: &[HashBucket<K, V>],
config: &Self::Config,
) where
K: Borrow<Q>,
Q: Hash + Eq + ?Sized,
{
context.simd_operations += 1;
}
fn bulk_optimize(
&self,
context: &mut Self::Context,
operations: &[OptimizationHint],
config: &Self::Config,
) {
context.bulk_operations += operations.len() as u64;
}
fn metrics(&self, context: &Self::Context) -> OptimizationMetrics {
OptimizationMetrics {
simd_operations: context.simd_operations,
bulk_operations: context.bulk_operations,
..Default::default()
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_robin_hood_strategy() {
let strategy = RobinHoodStrategy::new(64, true, true);
let config = RobinHoodConfig {
max_probe_distance: 64,
variance_reduction: true,
backward_shift: true,
};
let mut context = RobinHoodContext::default();
let mut buckets = vec![HashBucket::default(); 16];
let result = strategy.insert(
&mut context,
&mut buckets,
"key1".to_string(),
"value1".to_string(),
123456,
&config,
);
assert!(result.is_ok());
assert!(result.unwrap().is_none());
let found = strategy.get(&context, &buckets, "key1", 123456, &config);
assert!(found.is_some());
assert_eq!(found.unwrap(), "value1");
}
#[test]
fn test_standard_storage_strategy() {
let strategy = StandardStorageStrategy;
let config = StandardStorageConfig {
initial_capacity: 16,
growth_factor: 2.0,
};
let mut storage: <StandardStorageStrategy as StorageLayoutStrategy<String, i32>>::Storage =
StandardStorageStrategy::create_storage(16, &config);
assert_eq!(StandardStorageStrategy::capacity(&storage), 16);
let buckets: &mut [HashBucket<String, i32>] = StandardStorageStrategy::get_buckets_mut(&mut storage);
assert_eq!(buckets.len(), 16);
}
}