coca 0.3.0

Data structures with constant capacity
Documentation
//! Forgetful map data structures.
//! 
//! Useful for approximate search tasks and caching the results of expensive
//! computations.

#![allow(clippy::cast_possible_truncation)]

use core::{borrow::Borrow, cell::Cell};
use core::hash::{BuildHasher, BuildHasherDefault, Hash, Hasher};
use core::marker::PhantomData;
use core::mem::MaybeUninit;

use crate::storage::{ArrayLayout, InlineStorage, Storage};

// TODO: wider cache line types!

/// Types that can be used as the cache line type `L` of a [`CacheTable`].
pub trait CacheLine<K: Eq, V> {
    /// The maximum number of entries that can be cached in a line.
    const CAPACITY: usize;
    /// Initialize the pointed to cache line to be empty.
    /// 
    /// # Safety
    /// Implementors may assume the provided pointer to be valid and non-null;
    /// they may *not* assume the referenced memory to be initialized.
    unsafe fn init(this: *mut Self);
    /// Returns a reference to the value corresponding to the key.
    /// 
    /// The key may be any borrowed form of the cache's key type, but [`Eq`] on
    /// the borrowed form *must* match that of the key type.
    fn get<Q: Borrow<K>>(&self, k: &Q) -> Option<&V>;
    /// Returns a mutable reference to the value corresponding to the key.
    /// 
    /// The key may be any borrowed form of the cache's key type, but [`Eq`] on
    /// the borrowed form *must* match that of the key type.
    fn get_mut<Q: Borrow<K>>(&mut self, k: &Q) -> Option<&mut V>;
    /// Inserts a key-value pair into the cache line.
    /// 
    /// If the cache line is already full, another key-value pair must be
    /// evicted from the cache line and returned. Otherwise returns [`None`].
    fn insert(&mut self, k: K, v: V) -> Option<(K, V)>;
    /// Ensures a value corresponding to the provided key is cached by inserting
    /// the result of the default function if none is found, and returns a reference
    /// to the cached value.
    fn get_or_insert_with<F: FnOnce(&K) -> V>(&mut self, k: K, default: F) -> &V;
    /// Clears the cache line, removing all key-value pairs and resetting any
    /// additional state.
    fn clear(&mut self);
}

/// The smallest possible cache, storing only the single most recently accessed key-value pair.
/// 
/// Intended primarily for use as the [`CacheLine`] type `L` of a [`CacheTable`].
/// 
/// # Examples
/// ```
/// use coca::collections::cache::{UnitCache, CacheLine};
/// let mut cache = UnitCache::<&'static str, i32>::default();
/// 
/// assert!(cache.get(&"hello").is_none());
/// assert!(cache.insert("hello", 1).is_none());
/// assert_eq!(cache.get(&"hello"), Some(&1));
/// 
/// assert_eq!(cache.insert("world", 2), Some(("hello", 1)));
/// assert_eq!(cache.get(&"world"), Some(&2));
/// assert!(cache.get(&"hello").is_none());
/// ```
pub struct UnitCache<K, V> {
    key: MaybeUninit<K>,
    value: MaybeUninit<V>,
    occupied: bool,
}

impl<K: Eq, V> Default for UnitCache<K, V> {
    fn default() -> Self {
        let mut result = MaybeUninit::uninit();
        unsafe {
            Self::init(result.as_mut_ptr());
            result.assume_init()
        }
    }
}

impl<K: Eq, V> CacheLine<K, V> for UnitCache<K, V> {
    const CAPACITY: usize = 1;

    unsafe fn init(this: *mut Self) {
        (*this).occupied = false;
    }

    fn get<Q: Borrow<K>>(&self, k: &Q) -> Option<&V> {
        if !self.occupied { return None; }
        let my_key = unsafe { &*self.key.as_ptr() };
        if my_key == k.borrow() {
            Some(unsafe { &*self.value.as_ptr() })
        } else {
            None
        }
    }

    fn get_mut<Q: Borrow<K>>(&mut self, k: &Q) -> Option<&mut V> {
        if !self.occupied { return None; }
        let my_key = unsafe { &*self.key.as_ptr() };
        if my_key == k.borrow() {
            Some(unsafe { &mut *self.value.as_mut_ptr() })
        } else {
            None
        }
    }

    fn insert(&mut self, k: K, v: V) -> Option<(K, V)> {
        let evicted = self.occupied.then(|| {
            unsafe {
                let key = self.key.as_ptr().read();
                let value = self.value.as_ptr().read();
                (key, value)
            }
        });

        self.occupied = true;
        unsafe {
            self.key.as_mut_ptr().write(k);
            self.value.as_mut_ptr().write(v);
        }

        evicted
    }

    fn get_or_insert_with<F: FnOnce(&K) -> V>(&mut self, k: K, default: F) -> &V {
        if !self.occupied {
            self.value = MaybeUninit::new(default(&k));
            self.key = MaybeUninit::new(k);
            self.occupied = true;
            return unsafe { &*self.value.as_ptr() };
        }

        if unsafe { &*self.key.as_ptr() } == &k {
            return unsafe { &*self.value.as_ptr() };
        }

        let key_ptr = self.key.as_mut_ptr();
        let value_ptr = self.value.as_mut_ptr();

        unsafe {
            value_ptr.drop_in_place();
            value_ptr.write(default(&k));

            key_ptr.drop_in_place();
            key_ptr.write(k);

            &*value_ptr
        }
    }

    fn clear(&mut self) {
        if !self.occupied {
            return;
        }

        unsafe {
            self.key.as_mut_ptr().drop_in_place();
            self.value.as_mut_ptr().drop_in_place();
        }

        self.occupied = false;
    }
}

impl<K, V> Drop for UnitCache<K, V> {
    fn drop(&mut self) {
        if !self.occupied {
            return;
        }

        unsafe {
            self.key.as_mut_ptr().drop_in_place();
            self.value.as_mut_ptr().drop_in_place();
        }
    }
}

macro_rules! get_methods {
    () => {
        fn get<Q: Borrow<K>>(&self, k: &Q) -> Option<&V> {
            for i in 0..self.len() {
                let my_key = unsafe { &*self.keys[i].as_ptr() };
                if my_key == k.borrow() {
                    self.mark_used(i);
                    return Some(unsafe { &*self.values[i].as_ptr() });
                }
            }
    
            None
        }
    
        fn get_mut<Q: Borrow<K>>(&mut self, k: &Q) -> Option<&mut V> {
            for i in 0..self.len() {
                let my_key = unsafe { &*self.keys[i].as_ptr() };
                if my_key == k.borrow() {
                    self.mark_used(i);
                    return Some(unsafe { &mut *self.values[i].as_mut_ptr() });
                }
            }
    
            None
        }
    }
}

/// A cache storing the two most recently accessed key-value pairs.
/// 
/// Intended primarily for use as the [`CacheLine`] type `L` of a [`CacheTable`].
/// 
/// # Examples
/// ```
/// use coca::collections::cache::{LruCache2, CacheLine};
/// let mut cache = LruCache2::<i32, &'static str>::default();
/// 
/// assert!(cache.get(&1).is_none());
/// assert!(cache.insert(1, "A").is_none());
/// assert_eq!(cache.get(&1), Some(&"A"));
/// 
/// assert!(cache.get(&2).is_none());
/// assert!(cache.insert(2, "B").is_none());
/// assert_eq!(cache.get(&1), Some(&"A")); // Entry 1 is now most recently used...
/// 
/// assert_eq!(cache.insert(3, "C"), Some((2, "B"))); // ...so entry 2 will be evicted first.
/// assert_eq!(cache.get(&1), Some(&"A"));
/// assert_eq!(cache.get(&3), Some(&"C"));
/// assert!(cache.get(&2).is_none());
/// ```
pub struct LruCache2<K, V> {
    keys: [MaybeUninit<K>; 2],
    values: [MaybeUninit<V>; 2],
    state: Cell<u8>,
}

impl<K: Eq, V> Default for LruCache2<K, V> {
    fn default() -> Self {
        let mut result = MaybeUninit::uninit();
        unsafe {
            Self::init(result.as_mut_ptr());
            result.assume_init()
        }
    }
}

impl<K, V> LruCache2<K, V> {
    #[inline(always)]
    fn len(&self) -> usize {
        (self.state.get() & 0b11) as usize
    }

    #[inline(always)]
    fn least_recently_used(&self) -> usize {
        1 ^ (self.state.get() >> 2) as usize
    }

    #[inline(always)]
    fn mark_used(&self, i: usize) {
        let len = self.len();
        debug_assert!(i < len);
        self.state.set(len as u8 | (i << 2) as u8);
    }
}

impl<K: Eq, V> CacheLine<K, V> for LruCache2<K, V> {
    const CAPACITY: usize = 2;

    unsafe fn init(this: *mut Self) {
        (*this).state = Cell::new(0);
    }

    get_methods!();

    fn insert(&mut self, k: K, v: V) -> Option<(K, V)> {
        let len = self.len();
        for i in 0..len {
            let my_key = unsafe { &*self.keys[i].as_ptr() };
            if my_key == k.borrow() {
                self.mark_used(i);
                
                let evicted = unsafe {(
                    self.keys[i].as_ptr().read(),
                    self.values[i].as_ptr().read()
                )};

                self.keys[i] = MaybeUninit::new(k);
                self.values[i] = MaybeUninit::new(v);

                return Some(evicted);
            }
        }

        if len < 2 {
            self.keys[len] = MaybeUninit::new(k);
            self.values[len] = MaybeUninit::new(v);
            self.state.set((len + 1) as u8 | (len << 2) as u8);
            None
        } else {
            let lru = self.least_recently_used();

            let evicted = unsafe {
                let key = self.keys[lru].as_ptr().read();
                let value = self.values[lru].as_ptr().read();
                (key, value)
            };

            self.keys[lru] = MaybeUninit::new(k);
            self.values[lru] = MaybeUninit::new(v);
            self.state.set((lru << 2) as u8 | 2);

            Some(evicted)
        }
    }

    fn get_or_insert_with<F: FnOnce(&K) -> V>(&mut self, k: K, default: F) -> &V {
        let len = self.len();
        for i in 0..len {
            let my_key = unsafe { &*self.keys[i].as_ptr() };
            if my_key == k.borrow() {
                self.mark_used(i);
                return unsafe { &*self.values[i].as_ptr() };
            }
        }

        let value = default(&k);
        if len < 2 {
            self.keys[len] = MaybeUninit::new(k);
            self.values[len] = MaybeUninit::new(value);
            self.state.set((len + 1) as u8 | (len << 2) as u8);
            unsafe { &*self.values[len].as_ptr() }
        } else {
            let lru = self.least_recently_used();
            self.mark_used(lru);

            unsafe {
                self.keys[lru].as_mut_ptr().drop_in_place();
                self.values[lru].as_mut_ptr().drop_in_place();
            }

            self.keys[lru] = MaybeUninit::new(k);
            self.values[lru] = MaybeUninit::new(value);

            unsafe { &*self.values[lru].as_ptr() }
        }
    }

    fn clear(&mut self) {
        for i in 0..self.len() {
            unsafe {
                self.keys[i].as_mut_ptr().drop_in_place();
                self.values[i].as_mut_ptr().drop_in_place();
            }
        }

        self.state.set(0);
    }
}

impl<K, V> Drop for LruCache2<K, V> {
    fn drop(&mut self) {
        for i in 0..self.len() {
            unsafe {
                self.keys[i].as_mut_ptr().drop_in_place();
                self.values[i].as_mut_ptr().drop_in_place();
            }
        }
    }
}

/// A map implemented with an array of [`CacheLine`]s indexed by the keys' [`Hash`].
/// 
/// The choice of cache line type has several implications for runtime performance,
/// memory overhead, and caching behavior:
/// 
/// * Using [`UnitCache`] requires an additional [`bool`] per cache slot for tracking
///   occupancy, and results in a direct-mapped cache.
/// * Using [`LruCache2`] results in a 2-way set-associative cache with a least
///   recently used eviction policy tracked per cache line with a single [`u8`].
/// 
/// Note that the cache's capacity is always an integer multiple of the cache line's
/// capacity.
/// 
/// For `no_std` compatibility, no default hash builder is provided, but when using
/// [`Hasher`] types implementing [`Default`], the constructors [`new`](CacheTable::new),
/// and [`with_capacity`](CacheTable::with_capacity)
/// are provided. Otherwise, use [`with_hasher`](CacheTable::with_hasher),
/// [`with_capacity_and_hasher`](CacheTable::with_capacity_and_hasher) or
/// [`Arena::try_cache_with_hasher`](crate::arena::Arena::try_cache_with_hasher).
pub struct CacheTable<K: Eq, V, S: Storage<ArrayLayout<L>>, L: CacheLine<K, V>, H> {
    buf: S,
    hash_builder: H,
    lines: PhantomData<L>,
    keys: PhantomData<K>,
    values: PhantomData<V>,
}

impl<K: Eq, V, S: Storage<ArrayLayout<L>>, L: CacheLine<K, V>, H: Default> From<S> for CacheTable<K, V, S, L, H> {
    fn from(buf: S) -> Self {
        let mut result = CacheTable {
            buf, hash_builder: H::default(), lines: PhantomData, keys: PhantomData, values: PhantomData,
        };
        result.init_cache_lines();
        result
    }
}

impl<K: Eq, V, S: Storage<ArrayLayout<L>>, L: CacheLine<K, V>, H> CacheTable<K, V, S, L, H> {
    fn init_cache_lines(&mut self) {
        let line_ptr = self.buf.get_mut_ptr().cast::<L>();
        for i in 0..self.buf.capacity() {
            unsafe { L::init(line_ptr.add(i)); }
        }
    }

    /// Returns the number of key-value pairs the cache can hold.
    #[inline(always)]
    pub fn capacity(&self) -> usize {
        self.buf.capacity() * L::CAPACITY
    }

    fn get_cache_line_for_hash(&self, hash: u64) -> &L {
        let line_index = hash as usize % self.buf.capacity();
        let line_ptr = self.buf.get_ptr().cast::<L>();
        unsafe { &*line_ptr.add(line_index) }
    }

    fn get_cache_line_for_hash_mut(&mut self, hash: u64) -> &mut L {
        let line_index = hash as usize % self.buf.capacity();
        let line_ptr = self.buf.get_mut_ptr().cast::<L>();
        unsafe { &mut *line_ptr.add(line_index) }
    }

    /// Clears the cache, removing all key-value pairs.
    /// 
    /// # Examples
    /// ```
    /// # extern crate rustc_hash;
    /// use rustc_hash::FxHasher;
    /// use coca::collections::InlineDirectMappedCache;
    /// use core::hash::BuildHasherDefault;
    /// 
    /// let mut cache = InlineDirectMappedCache::<i32, &'static str, BuildHasherDefault<FxHasher>, 4>::new();
    /// cache.insert(1, "A");
    /// cache.clear();
    /// assert!(cache.get(&1).is_none());
    /// ```
    pub fn clear(&mut self) {
        let num_lines = self.buf.capacity();
        let line_ptr = self.buf.get_mut_ptr().cast::<L>();
        for i in 0..num_lines {
            let line = unsafe { &mut *line_ptr.add(i) };
            line.clear();
        }
    }
}

impl<K: Eq + Hash, V, S: Storage<ArrayLayout<L>>, L: CacheLine<K, V>, H: BuildHasher> CacheTable<K, V, S, L, H> {
    /// Constructs a new cache table using the specified storage and hash builder.
    pub fn from_storage_and_hasher(buf: S, hash_builder: H) -> Self {
        let mut result = CacheTable {
            buf, hash_builder, lines: PhantomData, keys: PhantomData, values: PhantomData,
        };
        result.init_cache_lines();
        result
    }

    fn make_hash(&self, val: &K) -> u64 {
        let mut state = self.hash_builder.build_hasher();
        val.hash(&mut state);
        state.finish()
    }

    /// Returns a reference to the value corresponding to the key.
    /// 
    /// The key may be any borrowed form of the map's key type, but [`Hash`]
    /// and [`Eq`] on the borrowed form *must* match those for the key type.
    /// 
    /// # Examples
    /// ```
    /// # extern crate rustc_hash;
    /// use rustc_hash::FxHasher;
    /// use coca::collections::InlineDirectMappedCache;
    /// use core::hash::BuildHasherDefault;
    /// 
    /// let mut cache = InlineDirectMappedCache::<i32, &'static str, BuildHasherDefault<FxHasher>, 4>::new();
    /// cache.insert(1, "A");
    /// assert_eq!(cache.get(&1), Some(&"A"));
    /// assert_eq!(cache.get(&2), None);
    /// ```
    pub fn get<Q: Borrow<K>>(&self, k: &Q) -> Option<&V> {
        let key = k.borrow();
        let hash = self.make_hash(key);
        let cache_line = self.get_cache_line_for_hash(hash);
        cache_line.get(key)
    }

    /// Returns a mutable reference to the value corresponding to the key.
    /// 
    /// # Examples
    /// ```
    /// # extern crate rustc_hash;
    /// use rustc_hash::FxHasher;
    /// use coca::collections::InlineDirectMappedCache;
    /// use core::hash::BuildHasherDefault;
    /// 
    /// let mut cache = InlineDirectMappedCache::<i32, &'static str, BuildHasherDefault<FxHasher>, 4>::new();
    /// cache.insert(1, "A");
    /// if let Some(x) = cache.get_mut(&1) {
    ///     *x = "B";
    /// }
    /// assert_eq!(cache.get(&1), Some(&"B"));
    pub fn get_mut<Q: Borrow<K>>(&mut self, k: &Q) -> Option<&mut V> {
        let key = k.borrow();
        let hash = self.make_hash(key);
        let cache_line = self.get_cache_line_for_hash_mut(hash);
        cache_line.get_mut(key)
    }

    /// Inserts a value computed from `f` into the cache if the given key is
    /// not present, then returns a reference to the value in the cache.
    /// 
    /// # Examples
    /// ```
    /// # extern crate rustc_hash;
    /// use rustc_hash::FxHasher;
    /// use coca::collections::InlineDirectMappedCache;
    /// use core::hash::BuildHasherDefault;
    /// 
    /// let mut cache = InlineDirectMappedCache::<i32, &'static str, BuildHasherDefault<FxHasher>, 4>::new();
    /// cache.insert(1, "A");
    /// assert_eq!(cache.get_or_insert_with(1, |_| "B"), &"A");
    /// # assert_eq!(cache.get(&1), Some(&"A"));
    /// assert_eq!(cache.get_or_insert_with(2, |_| "B"), &"B");
    /// # assert_eq!(cache.get(&2), Some(&"B"));
    /// ```
    pub fn get_or_insert_with<F: FnOnce(&K) -> V>(&mut self, k: K, f: F) -> &V {
        let hash = self.make_hash(&k);
        let cache_line = self.get_cache_line_for_hash_mut(hash);
        cache_line.get_or_insert_with(k, f)
    }

    /// Inserts a key-value pair into the cache.
    /// 
    /// Returns the evicted key-value pair if the cache line corresponding to
    /// the key is already full, or [`None`] otherwise.
    /// 
    /// # Examples
    /// ```
    /// # extern crate rustc_hash;
    /// use rustc_hash::FxHasher;
    /// use coca::collections::InlineDirectMappedCache;
    /// use core::hash::BuildHasherDefault;
    /// 
    /// let mut cache = InlineDirectMappedCache::<i32, &'static str, BuildHasherDefault<FxHasher>, 4>::new();
    /// assert_eq!(cache.insert(37, "a"), None);
    /// assert_eq!(cache.insert(37, "b"), Some((37, "a")));
    /// ```
    pub fn insert(&mut self, k: K, v: V) -> Option<(K, V)> {
        let hash = self.make_hash(&k);
        let cache_line = self.get_cache_line_for_hash_mut(hash);
        cache_line.insert(k, v)
    }
}

impl<K: Eq, V, S: Storage<ArrayLayout<L>>, L: CacheLine<K, V>, H> Drop for CacheTable<K, V, S, L, H> {
    fn drop(&mut self) {
        self.clear();
    }
}

impl<K: Eq + Hash, V, L: CacheLine<K, V>, H: BuildHasher, const N: usize> CacheTable<K, V, InlineStorage<L, N>, L, H> {
    /// Constructs a new, empty `CacheTable` using inline storage and the specified [`BuildHasher`].
    /// 
    /// # Examples
    /// ```
    /// # extern crate rustc_hash;
    /// type HashBuilder = core::hash::BuildHasherDefault<rustc_hash::FxHasher>;
    /// type CacheTable = coca::collections::Inline2WayLruCache<u128, &'static str, HashBuilder, 32>;
    /// let mut cache = CacheTable::with_hasher(HashBuilder::default());
    /// assert_eq!(cache.capacity(), 64);
    /// ```
    pub fn with_hasher(hash_builder: H) -> Self {
        let mut result = CacheTable {
            buf: unsafe {
                MaybeUninit::uninit().assume_init()
            },
            hash_builder,
            lines: PhantomData,
            keys: PhantomData,
            values: PhantomData
        };
        result.init_cache_lines();
        result
    }
}

impl<K: Eq + Hash, V, L: CacheLine<K, V>, H: Hasher + Default, const N: usize> CacheTable<K, V, InlineStorage<L, N>, L, BuildHasherDefault<H>> {
    /// Constructs a new, empty `CacheTable` using inline storage and the default [`BuildHasherDefault`].
    /// 
    /// # Examples
    /// ```
    /// # extern crate rustc_hash;
    /// type HashBuilder = core::hash::BuildHasherDefault<rustc_hash::FxHasher>;
    /// type CacheTable = coca::collections::Inline2WayLruCache<u128, &'static str, HashBuilder, 32>;
    /// let mut cache = CacheTable::new();
    /// assert_eq!(cache.capacity(), 64);
    /// ```
    pub fn new() -> Self {
        let mut result = CacheTable {
            buf: unsafe { MaybeUninit::uninit().assume_init() },
            hash_builder: BuildHasherDefault::default(),
            lines: PhantomData,
            keys: PhantomData,
            values: PhantomData
        };
        result.init_cache_lines();
        result
    }
}

impl<K: Eq + Hash, V, L: CacheLine<K, V>, H: Hasher + Default, const N: usize> Default for CacheTable<K, V, InlineStorage<L, N>, L, BuildHasherDefault<H>> {
    fn default() -> Self {
        Self::new()
    }
}

#[cfg(feature = "alloc")]
#[cfg_attr(docs_rs, doc(cfg(feature = "alloc")))]
impl<K: Eq + Hash, V, L: CacheLine<K, V>, H: BuildHasher> CacheTable<K, V, crate::storage::AllocStorage<ArrayLayout<L>>, L, H> {
    /// Constructs a new, empty `CacheTable` with the specified [`BuildHasher`]
    /// and heap-allocated storage of the specified capacity, rounded up to the
    /// next largest multiple of `L::CAPACITY`.
    /// 
    /// # Examples
    /// ```
    /// # extern crate rustc_hash;
    /// type HashBuilder = core::hash::BuildHasherDefault<rustc_hash::FxHasher>;
    /// type CacheTable = coca::collections::Alloc2WayLruCache<u128, &'static str, HashBuilder>;
    /// let mut cache = CacheTable::with_capacity_and_hasher(63, HashBuilder::default());
    /// assert_eq!(cache.capacity(), 64);
    /// ```
    pub fn with_capacity_and_hasher(capacity: usize, hash_builder: H) -> Self {
        let capacity = (capacity + L::CAPACITY - 1) / L::CAPACITY;
        let buf = crate::storage::AllocStorage::with_capacity(capacity);
        Self::from_storage_and_hasher(buf, hash_builder)
    }
}

#[cfg(feature = "alloc")]
#[cfg_attr(docs_rs, doc(cfg(feature = "alloc")))]
impl<K: Eq + Hash, V, L: CacheLine<K, V>, H: Hasher + Default> CacheTable<K, V, crate::storage::AllocStorage<ArrayLayout<L>>, L, BuildHasherDefault<H>> {
    /// Constructs a new, empty `CacheTable` with the default [`BuildHasherDefault`]
    /// and heap-allocated storage the specified capacity, rounded up to the next
    /// largest multiple of `L::CAPACITY`.
    /// 
    /// # Examples
    /// ```
    /// # extern crate rustc_hash;
    /// type HashBuilder = core::hash::BuildHasherDefault<rustc_hash::FxHasher>;
    /// type CacheTable = coca::collections::Alloc2WayLruCache<u128, &'static str, HashBuilder>;
    /// let mut cache = CacheTable::with_capacity(63);
    /// assert_eq!(cache.capacity(), 64);
    /// ```
    pub fn with_capacity(capacity: usize, ) -> Self {
        let capacity = (capacity + L::CAPACITY - 1) / L::CAPACITY;
        let buf = crate::storage::AllocStorage::with_capacity(capacity);
        let hash_builder = BuildHasherDefault::default();
        Self::from_storage_and_hasher(buf, hash_builder)
    }
}