dashcache 0.2.1

A simple, highly concurrent cache for hash-consing values
Documentation
/*!
A simple, highly concurrent cache for hash-consing values
*/

use ahash::RandomState;
use dashmap::DashMap;
use elysees::Arc;
use std::borrow::Borrow;
use std::fmt::{self, Debug, Formatter};
use std::hash::{BuildHasher, Hash};

pub mod arr;

/// An object which can be garbage collected
pub trait CanCollect {
    /// Is this handle unique or, if weak references are allowed, destroyed
    #[inline(always)]
    fn can_collect(&self) -> bool {
        false
    }
    /// Are there two *strong* copies or less of this handle?
    #[inline(always)]
    fn can_collect_clone(&self) -> bool {
        self.can_collect()
    }
}

/// A container which can be used to cache values of type `T`
pub trait Caches<T: ?Sized>: Hash + Eq + Clone + CanCollect {
    /// Get a reference to the cached value of type `T`
    fn cached(&self) -> &T;
}

/// A gloabl cache implementation
pub trait GlobalCache {
    /// The cache entry type
    type Entry;
    /**
    Compute how many items are in a given cache.

    # Example
    ```rust
    use dashcache::{GlobalCache, DashCache};
    use elysees::Arc;
    let int_cache = DashCache::<Arc<u64>>::new();
    assert_eq!(int_cache.len(), 0);
    int_cache.cache(10);
    assert_eq!(int_cache.len(), 1);
    int_cache.cache(20);
    assert_eq!(int_cache.len(), 2);
    // Since 10 is already in the cache, this is a no-op:
    int_cache.cache(10);
    assert_eq!(int_cache.len(), 2);
    ```
    */
    fn len(&self) -> usize;
    /**
    Check if a cache is empty

    # Example
    ```rust
    use dashcache::{GlobalCache, DashCache};
    use elysees::Arc;
    let int_cache = DashCache::<Arc<u64>>::new();
    assert!(int_cache.is_empty());
    int_cache.cache(10);
    assert!(!int_cache.is_empty());
    ```
    */
    fn is_empty(&self) -> bool;
    /**
    Attempt to cache a value. If already cached, return the corresponding entry.

    # Example
    ```rust
    use dashcache::{GlobalCache, DashCache};
    use elysees::Arc;
    let int_cache = DashCache::<Arc<u64>>::new();

    let cached_32 = int_cache.cache(32);
    let arc_32 = Arc::new(32);
    // These are different allocations!
    assert!(!Arc::ptr_eq(&arc_32, &cached_32));

    // We can use the cache to de-duplicate allocations
    let dedup_32 = int_cache.cache(arc_32.clone());
    assert!(Arc::ptr_eq(&dedup_32, &cached_32));

    // Similarly, we'll get the same `Arc` if we insert the value again
    let new_32 = int_cache.cache(32);
    assert!(Arc::ptr_eq(&new_32, &cached_32));

    // We can also insert an `Arc` from the get-go:
    let arc_44 = Arc::new(44);
    let cached_44 = int_cache.cache(arc_44.clone());
    assert!(Arc::ptr_eq(&arc_44, &cached_44));
    // New insertions are also deduplicated:
    let dedup_44 = int_cache.cache(44);
    assert!(Arc::ptr_eq(&arc_44, &dedup_44));
    ```
    */
    fn cache<Q>(&self, value: Q) -> Self::Entry
    where
        Self::Entry: Borrow<Q>,
        Q: Into<Self::Entry> + Hash + Eq;
    /**
    Attempt to garbage-collect this cache, returning how many elements were removed.
    # Example
    ```rust
    use dashcache::{GlobalCache, DashCache};
    use elysees::Arc;
    let int_cache = DashCache::<Arc<u64>>::new();

    // Let's stick 2 used values and 3 unused values into the cache:
    let used_1 = int_cache.cache(77);
    let used_2 = int_cache.cache(88);
    int_cache.cache(99);
    int_cache.cache(500);
    int_cache.cache(81);
    // We can see that at this point there are 5 things in the cache:
    assert_eq!(int_cache.len(), 5);
    // Now, let's garbage collect the cache, which should bring us down 3 things:
    assert_eq!(int_cache.gc(), 3);
    // And we have 2 things left:
    assert_eq!(int_cache.len(), 2);
    assert!(Arc::ptr_eq(&used_1, &int_cache.cache(77)));
    assert!(Arc::ptr_eq(&used_2, &int_cache.cache(88)));
    ```

    # Implementation Notes
    Note that this implements conservative GC, and in general it is perfectly well defined for this to be a no-op
    */
    #[inline(always)]
    fn gc(&self) -> usize {
        0
    }
    /**
    Garbage collect a single entry if doing so would not break the hash-consing property due to uniqueness.
    Returns the removed key on success, or Err(key) on failure

    # Implementation notes
    Note that this implements conservative GC, and in general it is perfectly well defined for this to be a no-op
    */
    #[inline(always)]
    fn try_gc(&self, _key: &mut Self::Entry) -> Option<Self::Entry> {
        None
    }
}

/// A global cache built around a `DashMap`
pub struct DashCache<C, S = RandomState> {
    cache: DashMap<C, (), S>,
}

impl<C: Hash + Eq> DashCache<C> {
    /// Create a new, empty `DashCache`
    #[inline]
    pub fn new() -> DashCache<C> {
        DashCache::default()
    }
}

impl<C: Hash + Eq, S: BuildHasher + Clone + Default> Default for DashCache<C, S> {
    #[inline]
    fn default() -> DashCache<C, S> {
        DashCache {
            cache: DashMap::default(),
        }
    }
}

impl<C, S> From<DashMap<C, (), S>> for DashCache<C, S> {
    #[inline]
    fn from(cache: DashMap<C, (), S>) -> DashCache<C, S> {
        DashCache { cache }
    }
}

impl<C, S> Debug for DashCache<C, S>
where
    C: Debug + Hash + Eq,
    S: BuildHasher + Clone,
{
    fn fmt(&self, fmt: &mut Formatter) -> Result<(), fmt::Error> {
        fmt.debug_struct("DashCache")
            .field("cache", &self.cache)
            .finish()
    }
}

impl<C, S> GlobalCache for DashCache<C, S>
where
    S: BuildHasher + Clone,
    C: Hash + Eq + Clone + CanCollect,
{
    type Entry = C;
    #[inline]
    fn cache<Q>(&self, value: Q) -> C
    where
        C: Borrow<Q>,
        Q: Into<C> + Hash + Eq,
    {
        // Read-lock first!
        // TODO: profile and see if this actually even helps efficiency at all
        if let Some(cached) = self.cache.get(&value) {
            return cached.key().clone();
        }
        self.cache.entry(value.into()).or_default().key().clone()
    }
    #[inline]
    fn gc(&self) -> usize {
        let mut collected = 0;
        self.cache.retain(|arc, _| {
            if arc.can_collect() {
                collected += 1;
                false
            } else {
                true
            }
        });
        collected
    }
    #[inline]
    fn try_gc(&self, key: &mut Self::Entry) -> Option<Self::Entry> {
        if key.can_collect_clone() {
            return self.cache.remove(key).map(|(key, _)| key);
        }
        None
    }
    #[inline]
    fn len(&self) -> usize {
        self.cache.len()
    }
    #[inline]
    fn is_empty(&self) -> bool {
        self.cache.is_empty()
    }
}

impl<T: ?Sized> CanCollect for Arc<T> {
    /// Is this handle unique or, if weak references are allowed, destroyed
    #[inline]
    fn can_collect(&self) -> bool {
        self.is_unique()
    }
    /// Are there two *strong* copies or less of this handle?
    #[inline(always)]
    fn can_collect_clone(&self) -> bool {
        Arc::count(self, std::sync::atomic::Ordering::Acquire) <= 2
    }
}

impl<T: Hash + Eq + ?Sized> Caches<T> for Arc<T> {
    #[inline]
    fn cached(&self) -> &T {
        self
    }
}

impl<T: ?Sized> CanCollect for std::sync::Arc<T> {
    /// Is this handle unique or, if weak references are allowed, destroyed
    #[inline]
    fn can_collect(&self) -> bool {
        std::sync::Arc::strong_count(self) == 1
    }
    /// Are there two *strong* copies or less of this handle?
    #[inline(always)]
    fn can_collect_clone(&self) -> bool {
        std::sync::Arc::strong_count(self) <= 2
    }
}

impl<T: Hash + Eq + ?Sized> Caches<T> for std::sync::Arc<T> {
    #[inline]
    fn cached(&self) -> &T {
        self
    }
}

impl<T: ?Sized> CanCollect for std::rc::Rc<T> {
    /// Is this handle unique or, if weak references are allowed, destroyed
    #[inline]
    fn can_collect(&self) -> bool {
        std::rc::Rc::strong_count(self) == 1
    }
    /// Are there two *strong* copies or less of this handle?
    #[inline(always)]
    fn can_collect_clone(&self) -> bool {
        std::rc::Rc::strong_count(self) <= 2
    }
}

impl<T: Hash + Eq + ?Sized> Caches<T> for std::rc::Rc<T> {
    #[inline]
    fn cached(&self) -> &T {
        self
    }
}