use std::alloc::{Layout, alloc, dealloc, handle_alloc_error};
use std::fmt;
use std::marker::PhantomData;
use std::mem::MaybeUninit;
use std::ptr::{self, NonNull};
use kevy_hash::KevyHash;
use crate::iter::{Iter, Keys, Values};
pub(crate) const GROUP_WIDTH: usize = 16;
pub(crate) const EMPTY: u8 = 0xFF;
pub(crate) const DELETED: u8 = 0x80;
pub(crate) const MIN_CAP: usize = 16;
#[inline]
pub(crate) fn h2(hash: u64) -> u8 {
((hash >> 57) & 0x7F) as u8
}
#[inline(always)]
fn prefetch_t0(ptr: *const u8) {
#[cfg(all(target_arch = "x86_64", not(miri)))]
{
unsafe {
core::arch::x86_64::_mm_prefetch(
ptr as *const i8,
core::arch::x86_64::_MM_HINT_T0,
);
}
}
#[cfg(all(target_arch = "aarch64", not(miri)))]
{
unsafe {
core::arch::asm!(
"prfm pldl1keep, [{p}]",
p = in(reg) ptr,
options(nostack, preserves_flags, readonly),
);
}
}
#[cfg(any(miri, not(any(target_arch = "x86_64", target_arch = "aarch64"))))]
{
let _ = ptr;
}
}
#[inline]
pub(crate) fn table_layout<KV>(cap: usize) -> (Layout, usize) {
let slots = Layout::array::<MaybeUninit<KV>>(cap).expect("slots layout overflow");
let meta = Layout::array::<u8>(cap + GROUP_WIDTH).expect("metadata layout overflow");
let (combined, meta_offset) = slots.extend(meta).expect("layout extend overflow");
(combined.pad_to_align(), meta_offset)
}
pub struct KevyMap<K, V> {
pub(crate) slots_ptr: NonNull<MaybeUninit<(K, V)>>,
pub(crate) metadata_ptr: NonNull<u8>,
pub(crate) cap: usize,
pub(crate) mask: usize,
pub(crate) occupied: usize,
pub(crate) deleted: usize,
_marker: PhantomData<(K, V)>,
}
unsafe impl<K: Send, V: Send> Send for KevyMap<K, V> {}
unsafe impl<K: Sync, V: Sync> Sync for KevyMap<K, V> {}
type SlotSlices<'a, K, V> = (&'a [u8], &'a [MaybeUninit<(K, V)>]);
pub(crate) enum ProbeOutcome {
Found(usize),
NotFound {
insert_at: usize,
via_tombstone: bool,
},
}
impl<K, V> KevyMap<K, V> {
pub fn new() -> Self {
Self {
slots_ptr: NonNull::dangling(),
metadata_ptr: NonNull::dangling(),
cap: 0,
mask: 0,
occupied: 0,
deleted: 0,
_marker: PhantomData,
}
}
pub fn with_capacity(cap_hint: usize) -> Self {
if cap_hint == 0 {
return Self::new();
}
let needed = cap_hint.saturating_mul(8).div_ceil(7);
let cap = needed.next_power_of_two().max(MIN_CAP);
Self::alloc_table(cap)
}
pub(crate) fn alloc_table(cap: usize) -> Self {
debug_assert!(cap.is_power_of_two());
debug_assert!(cap >= MIN_CAP);
let (layout, meta_offset) = table_layout::<(K, V)>(cap);
let base = unsafe { alloc(layout) };
if base.is_null() {
handle_alloc_error(layout);
}
let meta_byte_ptr = unsafe { base.add(meta_offset) };
unsafe { ptr::write_bytes(meta_byte_ptr, EMPTY, cap + GROUP_WIDTH) };
let slots_ptr = base as *mut MaybeUninit<(K, V)>;
let metadata_ptr = meta_byte_ptr;
kevy_madvise::advise_hugepage(base as *const u8, layout.size());
Self {
slots_ptr: unsafe { NonNull::new_unchecked(slots_ptr) },
metadata_ptr: unsafe { NonNull::new_unchecked(metadata_ptr) },
cap,
mask: cap - 1,
occupied: 0,
deleted: 0,
_marker: PhantomData,
}
}
#[inline]
pub(crate) fn set_meta(&mut self, i: usize, v: u8) {
debug_assert!(i < self.cap);
let i2 = (i.wrapping_sub(GROUP_WIDTH) & self.mask) + GROUP_WIDTH;
unsafe {
*self.metadata_ptr.as_ptr().add(i) = v;
*self.metadata_ptr.as_ptr().add(i2) = v;
}
}
#[inline]
pub fn len(&self) -> usize {
self.occupied
}
#[inline]
pub fn is_empty(&self) -> bool {
self.occupied == 0
}
#[inline]
pub fn capacity(&self) -> usize {
self.cap
}
pub fn clear(&mut self) {
if self.cap == 0 {
return;
}
if std::mem::needs_drop::<(K, V)>() {
for i in 0..self.cap {
let meta = unsafe { *self.metadata_ptr.as_ptr().add(i) };
if meta & 0x80 == 0 {
unsafe {
ptr::drop_in_place(self.slots_ptr.as_ptr().add(i) as *mut (K, V));
}
}
}
}
unsafe {
ptr::write_bytes(self.metadata_ptr.as_ptr(), EMPTY, self.cap + GROUP_WIDTH);
}
self.occupied = 0;
self.deleted = 0;
}
pub fn iter(&self) -> Iter<'_, K, V> {
let (metadata, slots) = self.as_slices();
Iter::new(metadata, slots)
}
pub fn iter_from_bucket(&self, start: usize) -> Iter<'_, K, V> {
let (metadata, slots) = self.as_slices();
Iter::with_start(metadata, slots, start)
}
pub fn keys(&self) -> Keys<'_, K, V> {
Keys::new(self.iter())
}
pub fn values(&self) -> Values<'_, K, V> {
Values::new(self.iter())
}
#[inline]
fn as_slices(&self) -> SlotSlices<'_, K, V> {
if self.cap == 0 {
return (&[], &[]);
}
unsafe {
(
std::slice::from_raw_parts(self.metadata_ptr.as_ptr(), self.cap),
std::slice::from_raw_parts(self.slots_ptr.as_ptr(), self.cap),
)
}
}
#[inline(always)]
pub fn prefetch_for_hash(&self, hash: u64) {
if self.cap == 0 {
return;
}
let idx = (hash as usize) & self.mask;
let ptr = unsafe { self.metadata_ptr.as_ptr().add(idx) };
prefetch_t0(ptr);
}
#[inline]
pub(crate) fn threshold(&self) -> usize {
self.cap - (self.cap / 8)
}
}
impl<K, V> Default for KevyMap<K, V> {
fn default() -> Self {
Self::new()
}
}
impl<K, Q, V> std::ops::Index<&Q> for KevyMap<K, V>
where
K: std::borrow::Borrow<Q>,
Q: KevyHash + Eq + ?Sized,
{
type Output = V;
fn index(&self, key: &Q) -> &V {
self.get(key).expect("no entry found for key")
}
}
impl<K, V> Drop for KevyMap<K, V> {
fn drop(&mut self) {
if self.cap == 0 {
return;
}
if std::mem::needs_drop::<(K, V)>() {
for i in 0..self.cap {
let meta = unsafe { *self.metadata_ptr.as_ptr().add(i) };
if meta & 0x80 == 0 {
unsafe {
ptr::drop_in_place(self.slots_ptr.as_ptr().add(i) as *mut (K, V));
}
}
}
}
let (layout, _) = table_layout::<(K, V)>(self.cap);
unsafe {
dealloc(self.slots_ptr.as_ptr() as *mut u8, layout);
}
}
}
impl<K: fmt::Debug, V: fmt::Debug> fmt::Debug for KevyMap<K, V> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_map().entries(self.iter()).finish()
}
}
impl<K: KevyHash + Eq, V> FromIterator<(K, V)> for KevyMap<K, V> {
fn from_iter<I: IntoIterator<Item = (K, V)>>(iter: I) -> Self {
let iter = iter.into_iter();
let mut m = match iter.size_hint() {
(lo, Some(hi)) if hi <= lo.saturating_mul(2) => Self::with_capacity(hi),
(lo, _) => Self::with_capacity(lo),
};
for (k, v) in iter {
m.insert(k, v);
}
m
}
}
impl<K: KevyHash + Eq, V> Extend<(K, V)> for KevyMap<K, V> {
fn extend<I: IntoIterator<Item = (K, V)>>(&mut self, iter: I) {
for (k, v) in iter {
self.insert(k, v);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::set::KevySet;
use std::cell::Cell;
#[test]
fn new_is_empty() {
let m: KevyMap<u64, u64> = KevyMap::new();
assert_eq!(m.len(), 0);
assert!(m.is_empty());
assert_eq!(m.capacity(), 0);
assert_eq!(m.get(&5u64), None);
assert!(!m.contains_key(&5u64));
}
#[test]
fn insert_get() {
let mut m = KevyMap::<u64, u64>::new();
assert!(m.insert(1, 10).is_none());
assert_eq!(m.len(), 1);
assert_eq!(m.get(&1u64), Some(&10));
assert_eq!(m.get(&2u64), None);
assert!(m.contains_key(&1u64));
}
#[test]
fn insert_duplicate_replaces_value_returns_old() {
let mut m = KevyMap::<u64, u64>::new();
assert_eq!(m.insert(1, 10), None);
assert_eq!(m.insert(1, 20), Some(10));
assert_eq!(m.len(), 1);
assert_eq!(m.get(&1u64), Some(&20));
}
#[test]
fn remove_returns_value_decreases_len() {
let mut m = KevyMap::<u64, u64>::new();
m.insert(1, 10);
m.insert(2, 20);
assert_eq!(m.remove(&1u64), Some(10));
assert_eq!(m.len(), 1);
assert_eq!(m.remove(&1u64), None);
assert_eq!(m.get(&1u64), None);
assert_eq!(m.get(&2u64), Some(&20));
}
#[test]
fn tombstone_reused_on_reinsert() {
let mut m = KevyMap::<u64, u64>::new();
m.insert(1, 10);
m.remove(&1u64);
assert_eq!(m.len(), 0);
m.insert(1, 30);
assert_eq!(m.len(), 1);
assert_eq!(m.get(&1u64), Some(&30));
}
#[test]
fn grow_preserves_all_entries_10k() {
let mut m = KevyMap::<u64, u64>::new();
for i in 0..10_000u64 {
m.insert(i, i.wrapping_mul(7));
}
assert_eq!(m.len(), 10_000);
for i in 0..10_000u64 {
assert_eq!(m.get(&i), Some(&i.wrapping_mul(7)));
}
}
#[test]
fn byte_string_keys_with_borrow_lookup() {
let mut m = KevyMap::<Vec<u8>, u64>::new();
m.insert(b"foo".to_vec(), 1);
m.insert(b"bar".to_vec(), 2);
assert_eq!(m.get(b"foo".as_slice()), Some(&1));
assert_eq!(m.get(b"missing".as_slice()), None);
assert_eq!(m.remove(b"bar".as_slice()), Some(2));
assert_eq!(m.len(), 1);
assert!(!m.contains_key(b"bar".as_slice()));
}
#[test]
fn iter_yields_all_entries() {
let mut m = KevyMap::<u64, u64>::new();
for i in 0..20u64 {
m.insert(i, i + 100);
}
let mut seen: Vec<(u64, u64)> = m.iter().map(|(&k, &v)| (k, v)).collect();
seen.sort();
let expected: Vec<(u64, u64)> = (0..20).map(|i| (i, i + 100)).collect();
assert_eq!(seen, expected);
}
struct DropCount<'a>(&'a Cell<usize>);
impl Drop for DropCount<'_> {
fn drop(&mut self) {
self.0.set(self.0.get() + 1);
}
}
#[test]
fn clear_drops_entries_and_resets_len() {
let counter = Cell::new(0);
let mut m: KevyMap<u64, DropCount<'_>> = KevyMap::new();
for i in 0..50u64 {
m.insert(i, DropCount(&counter));
}
assert_eq!(m.len(), 50);
m.clear();
assert_eq!(m.len(), 0);
assert_eq!(counter.get(), 50);
assert!(m.capacity() >= 50);
m.insert(0, DropCount(&counter));
assert_eq!(m.len(), 1);
drop(m);
assert_eq!(counter.get(), 51);
}
#[test]
fn drop_runs_for_remaining_entries() {
let counter = Cell::new(0);
{
let mut m: KevyMap<u64, DropCount<'_>> = KevyMap::new();
for i in 0..30u64 {
m.insert(i, DropCount(&counter));
}
m.remove(&5u64);
assert_eq!(counter.get(), 1);
}
assert_eq!(counter.get(), 30);
}
#[test]
fn grow_then_remove_then_grow_again_stays_consistent() {
let mut m = KevyMap::<u64, u64>::new();
for i in 0..2000u64 {
m.insert(i, i);
}
for i in 0..1000u64 {
assert_eq!(m.remove(&i), Some(i));
}
for i in 2000..4000u64 {
m.insert(i, i);
}
assert_eq!(m.len(), 3000);
for i in 1000..4000u64 {
assert_eq!(m.get(&i), Some(&i));
}
for i in 0..1000u64 {
assert_eq!(m.get(&i), None);
}
}
#[test]
fn with_capacity_preallocates() {
let m: KevyMap<u64, u64> = KevyMap::with_capacity(100);
assert_eq!(m.capacity(), 128);
let m: KevyMap<u64, u64> = KevyMap::with_capacity(0);
assert_eq!(m.capacity(), 0);
let m: KevyMap<u64, u64> = KevyMap::with_capacity(1);
assert_eq!(m.capacity(), MIN_CAP);
}
#[test]
fn get_mut_allows_mutation() {
let mut m = KevyMap::<u64, u64>::new();
m.insert(1, 10);
*m.get_mut(&1u64).unwrap() = 20;
assert_eq!(m.get(&1u64), Some(&20));
assert!(m.get_mut(&2u64).is_none());
}
#[test]
fn debug_format_matches_map_shape() {
let mut m = KevyMap::<u64, u64>::new();
m.insert(1, 10);
m.insert(2, 20);
let s = format!("{m:?}");
assert!(s.starts_with('{'));
assert!(s.ends_with('}'));
assert!(s.contains("1: 10") || s.contains("1:10"));
assert!(s.contains("2: 20") || s.contains("2:20"));
}
#[test]
fn into_iter_ref_works() {
let mut m = KevyMap::<u64, u64>::new();
m.insert(1, 10);
m.insert(2, 20);
let mut total = 0u64;
for (k, v) in &m {
total += *k + *v;
}
assert_eq!(total, 1 + 10 + 2 + 20);
}
#[test]
fn many_collisions_via_long_byte_keys() {
let mut m = KevyMap::<Vec<u8>, u64>::new();
let n = 5_000u64;
for i in 0..n {
let k = format!("session:{i:08}:user").into_bytes();
m.insert(k, i);
}
assert_eq!(m.len(), n as usize);
for i in 0..n {
let k = format!("session:{i:08}:user");
assert_eq!(m.get(k.as_bytes()), Some(&i));
}
}
#[test]
fn zst_value_type() {
let mut m = KevyMap::<u64, ()>::new();
assert!(m.insert(1, ()).is_none());
assert!(m.insert(1, ()).is_some());
assert!(m.contains_key(&1u64));
assert_eq!(m.remove(&1u64), Some(()));
}
#[test]
fn set_basic_ops() {
let mut s: KevySet<Vec<u8>> = KevySet::new();
assert!(s.insert(b"a".to_vec()));
assert!(!s.insert(b"a".to_vec())); assert_eq!(s.len(), 1);
assert!(s.contains(b"a".as_slice()));
assert!(!s.contains(b"b".as_slice()));
assert!(s.remove(b"a".as_slice()));
assert!(!s.remove(b"a".as_slice()));
assert!(s.is_empty());
}
#[test]
fn set_iter_yields_members() {
let mut s: KevySet<u64> = KevySet::new();
for i in 0..10u64 {
s.insert(i);
}
let mut got: Vec<u64> = s.iter().copied().collect();
got.sort();
assert_eq!(got, (0..10u64).collect::<Vec<_>>());
}
#[test]
fn prefetch_for_hash_is_safe_on_any_state() {
let m: KevyMap<u64, u64> = KevyMap::new();
m.prefetch_for_hash(0);
m.prefetch_for_hash(u64::MAX);
let mut m = KevyMap::<u64, u64>::new();
for i in 0..50u64 {
m.insert(i, i);
}
for i in 0..50u64 {
m.prefetch_for_hash(i.kevy_hash());
}
}
#[test]
fn capacity_grows_doubling_from_min_cap() {
let mut m = KevyMap::<u64, u64>::new();
m.insert(1, 1);
assert_eq!(m.capacity(), MIN_CAP);
for i in 2..=14u64 {
m.insert(i, i);
}
m.insert(15, 15);
assert_eq!(m.capacity(), MIN_CAP * 2);
}
#[test]
fn map_keys_iter() {
let mut m = KevyMap::<u64, u64>::new();
for i in 0..5u64 {
m.insert(i, i + 10);
}
let mut ks: Vec<u64> = m.keys().copied().collect();
ks.sort();
assert_eq!(ks, vec![0, 1, 2, 3, 4]);
}
#[test]
fn map_values_iter() {
let mut m = KevyMap::<u64, u64>::new();
for i in 0..5u64 {
m.insert(i, i + 10);
}
let mut vs: Vec<u64> = m.values().copied().collect();
vs.sort();
assert_eq!(vs, vec![10, 11, 12, 13, 14]);
}
#[test]
fn map_default_is_empty() {
let m: KevyMap<u64, u64> = KevyMap::default();
assert!(m.is_empty());
assert_eq!(m.capacity(), 0);
}
#[test]
fn map_from_iterator() {
let m: KevyMap<u64, u64> = (0..10u64).map(|i| (i, i * 2)).collect();
assert_eq!(m.len(), 10);
assert_eq!(m.get(&5u64), Some(&10));
}
#[test]
fn map_extend() {
let mut m = KevyMap::<u64, u64>::new();
m.extend((0..5u64).map(|i| (i, i)));
assert_eq!(m.len(), 5);
assert_eq!(m.get(&3u64), Some(&3));
}
#[test]
fn map_index_panics_on_missing() {
let mut m = KevyMap::<u64, u64>::new();
m.insert(1, 10);
assert_eq!(m[&1u64], 10);
let r = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
let _ = m[&99u64];
}));
assert!(r.is_err(), "Index on missing key should panic");
}
#[test]
fn set_with_capacity_capacity_clear() {
let mut s: KevySet<u64> = KevySet::with_capacity(50);
assert!(s.capacity() >= 50);
for i in 0..10u64 {
s.insert(i);
}
assert_eq!(s.len(), 10);
s.clear();
assert!(s.is_empty());
assert!(s.capacity() >= 50);
}
#[test]
fn set_as_map_smoke() {
let mut s: KevySet<u64> = KevySet::new();
s.insert(7);
assert_eq!(s.as_map().len(), 1);
assert!(s.as_map().contains_key(&7u64));
}
#[test]
fn set_default_debug() {
let s: KevySet<u64> = KevySet::default();
assert!(s.is_empty());
let dbg = format!("{s:?}");
assert_eq!(dbg, "{}");
}
#[test]
fn set_into_iter_ref() {
let mut s: KevySet<u64> = KevySet::new();
for i in 0..3u64 {
s.insert(i);
}
let mut sum = 0u64;
for k in &s {
sum += k;
}
assert_eq!(sum, 3);
}
#[test]
fn set_from_iterator() {
let s: KevySet<u64> = (0..5u64).collect();
assert_eq!(s.len(), 5);
assert!(s.contains(&3u64));
}
#[test]
fn set_extend() {
let mut s: KevySet<u64> = KevySet::new();
s.extend(0..5u64);
assert_eq!(s.len(), 5);
}
}