waitcache 0.1.3

An ever-growing concurrent hash map with key-level locking granularity.
Documentation
//
// Copyright (C) 2020 Nathan Sharp.
//
// This Source Code Form is subject to the terms of the Mozilla Public License,
// v. 2.0. If a copy of the MPL was not distributed with this file, You can
// obtain one at https://mozilla.org/MPL/2.0/
//
// This Source Code Form is "Incompatible With Secondary Licenses", as defined
// by the Mozilla Public License, v. 2.0.
//

//! The `waitcache` crate provides the [`WaitCache`] type, an ever-growing
//! concurrent hash map with key-level locking granularity. A hash map of this
//! kind is useful for memoizing highly expensive functions, such as those
//! requiring disk or network accesses. Unlike other cache implementations,
//! [`WaitCache`] only ever resolves a value once, preventing expensive
//! computations from duplicating work.
//!
//! # License
//! `waitcache` is licensed under the terms of the
//! [Mozilla Public License, v. 2.0][MPL]. All Source Code Forms are
//! "Incompatible With Secondary Licenses", as described in *ยง3.3* of the
//! license.
//!
//! # Development
//! `waitcache` is developed at [GitLab] and built on the [`dashmap`] concurrent
//! hash map and the [`waitcell`] synchronous future.
//!
//! [`dashmap`]: https://github.com/xacrimon/dashmap
//! [GitLab]: https://gitlab.com/nwsharp/prehash
//! [MPL]: https://mozilla.org/MPL/2.0
//! [`waitcell`]: https://gitlab.com/nwsharp/waitcell

pub mod iter;

use std::borrow::Borrow;
use std::collections::hash_map::RandomState;
use std::fmt::{self, Debug, Formatter};
use std::hash::{BuildHasher, Hash};
use std::iter::FromIterator;

use dashmap::mapref::entry::Entry as DashEntry;
use dashmap::mapref::one::Ref as DashRef;
use dashmap::mapref::one::RefMut as DashMut;
use dashmap::DashMap;
use waitcell::WaitCell;

/// A concurrent, ever-growing hash map with key-level lock granularity.
///
/// This map is suitable for use in situations where the resolution of values
/// can take an extraordinarily long time, such as caching the result of disk
/// access, network requests, or extensive computations. Concurrent resolutions
/// block access to only the resolving values, and not to hash-adjacent values.
///
/// Unlike some other cache implemetations, the function used to resolve a given
/// key is *never* executed multiple times. This is important for avoiding
/// duplication of expensive work.
pub struct WaitCache<K, V, S = RandomState> {
    items: DashMap<K, Box<WaitCell<V>>, S>,
}

impl<K: Eq + Hash, V> WaitCache<K, V, RandomState> {
    /// Constructs a new `WaitCache` with the default hashing algorithm and an
    /// initial capacity of 0.
    #[must_use]
    pub fn new() -> Self {
        Self::with_capacity(0)
    }

    /// Constructs a new `WaitCache` with the default hashing algorithm and the
    /// specified initial capacity.
    #[must_use]
    pub fn with_capacity(capacity: usize) -> Self {
        Self::with_capacity_and_hasher(capacity, RandomState::new())
    }
}

impl<K: Eq + Hash, V, S: BuildHasher + Clone> WaitCache<K, V, S> {
    /// Constructs a new `WaitCache` with the specified hasher and an initial
    /// capacity of 0.
    #[must_use]
    pub fn with_hasher(hasher: S) -> Self {
        Self::with_capacity_and_hasher(0, hasher)
    }

    /// Constructs a new `WaitCache` with the specified hasher and initial
    /// capacity.
    #[must_use]
    pub fn with_capacity_and_hasher(capacity: usize, hasher: S) -> Self {
        Self { items: DashMap::with_capacity_and_hasher(capacity, hasher) }
    }

    /// Returns `true` if the cache currently contains no items and `false`
    /// otherwise.
    #[must_use]
    pub fn is_empty(&self) -> bool {
        self.items.is_empty()
    }

    /// Returns the number of items currently in the cache.
    #[must_use]
    pub fn len(&self) -> usize {
        self.items.len()
    }

    /// Returns the currently provisioned capacity of the cache.
    #[must_use]
    pub fn capacity(&self) -> usize {
        self.items.capacity()
    }

    /// Shrinks the provisioned capacity to match the current number of items in
    /// the cache.
    pub fn shrink_to_fit(&self) {
        self.items.shrink_to_fit()
    }

    /// Returns the hasher used to construct this cache.
    #[must_use]
    pub fn hasher(&self) -> &S {
        self.items.hasher()
    }

    /// Empties the cache of all items.
    pub fn clear(&mut self) {
        // Safety: `self` is borrowed mutably, so we know that there are no outstanding
        //         references to our WaitCells.
        self.items.clear()
    }

    /// Looks up a value in the cache given a compatible key.
    ///
    /// If the key is present but the value is not fully resolved, the current
    /// thread will block until resolution completes.
    ///
    /// # Returns
    /// * `Some(value)` - The fully-resolved value corresponding to the
    ///   specified key.
    /// * `None` - The specified key was not present.
    #[must_use]
    pub fn get<'a, 'b, Q: Eq + Hash + ?Sized>(&'a self, key: &'b Q) -> Option<&'a V>
    where K: Borrow<Q> {
        // Safety: By construction, it is impossible to invalidate the reference to the
        //         WaitCell while the cache is immutably borrowed.
        self.items.get(key).map(|value| unsafe { self.extend_ref(value).get() })
    }

    /// Retrieves the value with the specified key, or initializes it if it is
    /// not present.
    ///
    /// If the key is present but the value is not fully resolved, the current
    /// thread will block until resolution completes. If the key is not present,
    /// `init` is executed to produce a value. In either case, an immutable
    /// reference to the value is returned.
    ///
    /// # Notes
    /// The resolution closure, `init`, does not provide access to the key being
    /// resolved. You may need to provide a copy of this value to the closure.
    /// This is done to allow for maximum concurrency, as it permits the key
    /// to be accessed by other threads during the resolution process.
    pub fn resolve<F: FnOnce() -> V>(&self, key: K, init: F) -> &V {
        // Safety: By construction, it is impossible to invalidate the reference to the
        //         WaitCell while the cache is immutably borrowed.
        match self.items.entry(key) {
            DashEntry::Occupied(entry) => unsafe {
                self.extend_ref(entry.into_ref().downgrade()).get()
            },
            DashEntry::Vacant(entry) => {
                let cell = unsafe { self.extend_mut(entry.insert(Box::new(WaitCell::new()))) };
                cell.init(init())
            }
        }
    }

    /// Produces an iterator of immutable references to the entries contained in
    /// the cache.
    ///
    /// The values are exposed as immutable references to a [`WaitCell`]
    /// containing the value. The value may still be resolving.
    ///
    /// # Concurrency
    /// This iterator may prevent concurrent updates to certain portions of the
    /// cache.
    ///
    /// [`WaitCell`]: waitcell::WaitCell
    #[must_use]
    pub fn iter(&self) -> iter::Iter<'_, K, V, S> {
        iter::Iter { iter: self.items.iter(), current: None }
    }

    /// Produces an iterator of immutable references to the keyes contained in
    /// the cache.
    ///
    /// # Concurrency
    /// This iterator may prevent concurrent updates to certain portions of the
    /// cache.
    #[must_use]
    pub fn keys(&self) -> iter::Keys<'_, K, V, S> {
        iter::Keys { iter: self.iter() }
    }

    /// Produces an iterator of immutable references to the values contained in
    /// the cache.
    ///
    /// The values are exposed as immutable references to a [`WaitCell`]
    /// containing the value. The value may still be resolving.
    ///
    /// [`WaitCell`]: waitcell::WaitCell
    #[must_use]
    pub fn values(&self) -> iter::Values<'_, K, V, S> {
        iter::Values { iter: self.items.iter() }
    }

    #[must_use]
    unsafe fn extend_ref<'a, 'b>(
        &'a self,
        dref: DashRef<'b, K, Box<WaitCell<V>>, S>,
    ) -> &'a WaitCell<V> {
        &*(dref.value().as_ref() as *const WaitCell<V>)
    }

    #[allow(clippy::mut_from_ref)]
    #[must_use]
    unsafe fn extend_mut<'a, 'b>(
        &'a self,
        mut dref: DashMut<'b, K, Box<WaitCell<V>>, S>,
    ) -> &'a mut WaitCell<V> {
        &mut *(dref.value_mut().as_mut() as *mut WaitCell<V>)
    }

    /// Constructs a `WaitCache` from the specified boxed cell iterator and
    /// hasher.
    ///
    /// # Notes
    /// This method is provided for completeness. Prefer using
    /// [`from_iter_with_hasher`][Self::from_iter_with_hasher] instead.
    #[must_use]
    pub fn from_raw_iter_with_hasher<T: IntoIterator<Item = (K, Box<WaitCell<V>>)>>(
        iter: T,
        hasher: S,
    ) -> Self {
        let iter = iter.into_iter();

        let capacity = match iter.size_hint() {
            (lower, None) => lower,
            (_, Some(upper)) => upper,
        };

        let result = Self::with_capacity_and_hasher(capacity, hasher);
        for (key, boxed) in iter {
            result.items.insert(key, boxed);
        }
        result
    }

    /// Constructs a `WaitCache` from the specified iterator and hasher.
    #[must_use]
    pub fn from_iter_with_hasher<T: IntoIterator<Item = (K, V)>>(iter: T, hasher: S) -> Self {
        Self::from_raw_iter_with_hasher(
            iter.into_iter().map(|(key, value)| (key, Box::new(WaitCell::initialized(value)))),
            hasher,
        )
    }
}

impl<K: Debug + Eq + Hash, V: Debug, S: BuildHasher + Clone> Debug for WaitCache<K, V, S> {
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
        self.items.fmt(f)
    }
}

impl<K: Eq + Hash, V, S: BuildHasher + Clone + Default> Default for WaitCache<K, V, S> {
    fn default() -> Self {
        Self::with_hasher(Default::default())
    }
}

impl<K: Eq + Hash, V> FromIterator<(K, V)> for WaitCache<K, V, RandomState> {
    fn from_iter<T: IntoIterator<Item = (K, V)>>(iter: T) -> Self {
        Self::from_iter_with_hasher(iter, RandomState::new())
    }
}

impl<K: Eq + Hash, V, S: BuildHasher + Clone> IntoIterator for WaitCache<K, V, S> {
    type IntoIter = iter::Owned<K, V, S>;
    type Item = (K, Option<V>);

    fn into_iter(self) -> Self::IntoIter {
        iter::Owned { iter: self.items.into_iter() }
    }
}