sieve_cache/
lib.rs

1#![doc = include_str!("../README.md")]
2
3//! # SIEVE Cache for Rust
4//!
5//! This crate provides implementations of the SIEVE cache replacement algorithm:
6//!
7//! * [`SieveCache`] - The core single-threaded implementation (always available)
8
9#[cfg(feature = "sync")]
10pub mod _docs_sync {}
11
12#[cfg(feature = "sharded")]
13pub mod _docs_sharded {}
14
15pub mod _sieve_algorithm {
16    //! ## The SIEVE Algorithm
17    //!
18    //! SIEVE (Simple, space-efficient, In-memory, EViction mEchanism) is a cache eviction
19    //! algorithm that maintains a single bit per entry to track whether an item has been
20    //! "visited" since it was last considered for eviction. This approach requires less
21    //! state than LRU but achieves excellent performance, especially on skewed workloads.
22}
23
24pub mod _cache_implementation {
25    //! ## Cache Implementation Details
26    //!
27    //! The cache is implemented as a combination of:
28    //!
29    //! 1. A `HashMap` for O(1) key lookups
30    //! 2. A doubly-linked list for managing the eviction order
31    //! 3. A "visited" flag on each entry to track recent access
32    //!
33    //! When the cache is full and a new item is inserted, the eviction algorithm:
34    //! 1. Starts from the "hand" position (or tail if no hand)
35    //! 2. Finds the first non-visited entry, evicting it
36    //! 3. Marks all visited entries as non-visited while searching
37}
38
39pub mod _implementation_choice {
40    //! ## Choosing the Right Implementation
41    //!
42    //! - Use [`SieveCache`] for single-threaded applications
43}
44
45#[cfg(feature = "sync")]
46pub mod _docs_sync_usage {
47    //! - Use [`SyncSieveCache`] for multi-threaded applications with moderate concurrency
48}
49
50#[cfg(feature = "sharded")]
51pub mod _docs_sharded_usage {
52    //! - Use [`ShardedSieveCache`] for applications with high concurrency where operations
53    //!   are distributed across many different keys
54}
55
56use std::borrow::Borrow;
57use std::hash::Hash;
58use std::{collections::HashMap, ptr::NonNull};
59
60#[cfg(feature = "sharded")]
61mod sharded;
62#[cfg(feature = "sync")]
63mod sync;
64
65#[cfg(feature = "sharded")]
66pub use sharded::ShardedSieveCache;
67#[cfg(feature = "sync")]
68pub use sync::SyncSieveCache;
69
70struct Node<K: Eq + Hash + Clone, V> {
71    key: K,
72    value: V,
73    prev: Option<NonNull<Node<K, V>>>,
74    next: Option<NonNull<Node<K, V>>>,
75    visited: bool,
76}
77
78impl<K: Eq + Hash + Clone, V> Node<K, V> {
79    fn new(key: K, value: V) -> Self {
80        Self {
81            key,
82            value,
83            prev: None,
84            next: None,
85            visited: false,
86        }
87    }
88}
89
90/// A cache based on the SIEVE eviction algorithm.
91///
92/// `SieveCache` provides an efficient in-memory cache with a carefully designed eviction
93/// strategy that balances simplicity with good performance characteristics, especially on
94/// skewed workloads common in real-world applications.
95///
96/// This is the single-threaded implementation.
97#[cfg(feature = "sync")]
98/// For thread-safe variants, see [`SyncSieveCache`] (with the `sync` feature)
99#[cfg(feature = "sharded")]
100/// and [`ShardedSieveCache`] (with the `sharded` feature).
101///
102/// # Type Parameters
103///
104/// * `K` - The key type, which must implement `Eq`, `Hash`, and `Clone`
105/// * `V` - The value type, with no constraints
106///
107/// # Example
108///
109/// ```rust
110/// # #[cfg(feature = "doctest")]
111/// # {
112/// use sieve_cache::SieveCache;
113///
114/// // Create a new cache with capacity for 1000 items
115/// let mut cache = SieveCache::new(1000).unwrap();
116///
117/// // Insert some values
118/// cache.insert("key1".to_string(), "value1".to_string());
119/// cache.insert("key2".to_string(), "value2".to_string());
120///
121/// // Retrieve values - returns references to the values
122/// assert_eq!(cache.get("key1"), Some(&"value1".to_string()));
123///
124/// // Check if the cache contains a key
125/// assert!(cache.contains_key("key1"));
126/// assert!(!cache.contains_key("missing_key"));
127///
128/// // Get a mutable reference to modify a value
129/// if let Some(value) = cache.get_mut("key1") {
130///     *value = "modified".to_string();
131/// }
132///
133/// // Verify the modification
134/// assert_eq!(cache.get("key1"), Some(&"modified".to_string()));
135///
136/// // Remove a value
137/// let removed = cache.remove("key2");
138/// assert_eq!(removed, Some("value2".to_string()));
139/// # }
140/// ```
141pub struct SieveCache<K: Eq + Hash + Clone, V> {
142    /// Map of keys to cache entries
143    map: HashMap<K, Box<Node<K, V>>>,
144    /// Pointer to the head (most recently added) node
145    head: Option<NonNull<Node<K, V>>>,
146    /// Pointer to the tail (least recently added) node
147    tail: Option<NonNull<Node<K, V>>>,
148    /// The "hand" pointer used by the SIEVE algorithm for eviction
149    hand: Option<NonNull<Node<K, V>>>,
150    /// Maximum number of entries the cache can hold
151    capacity: usize,
152    /// Current number of entries in the cache
153    len: usize,
154}
155
156unsafe impl<K: Eq + Hash + Clone, V> Send for SieveCache<K, V> {}
157
158impl<K: Eq + Hash + Clone, V> SieveCache<K, V> {
159    /// Creates a new cache with the given capacity.
160    ///
161    /// The capacity represents the maximum number of key-value pairs
162    /// that can be stored in the cache. When this limit is reached,
163    /// the cache will use the SIEVE algorithm to evict entries.
164    ///
165    /// # Errors
166    ///
167    /// Returns an error if the capacity is zero.
168    ///
169    /// # Examples
170    ///
171    /// ```rust
172    /// # #[cfg(feature = "doctest")]
173    /// # {
174    /// use sieve_cache::SieveCache;
175    ///
176    /// // Create a cache with space for 100 entries
177    /// let cache: SieveCache<String, u32> = SieveCache::new(100).unwrap();
178    ///
179    /// // Capacity of zero is invalid
180    /// let invalid = SieveCache::<String, u32>::new(0);
181    /// assert!(invalid.is_err());
182    /// # }
183    /// ```
184    pub fn new(capacity: usize) -> Result<Self, &'static str> {
185        if capacity == 0 {
186            return Err("capacity must be greater than 0");
187        }
188        Ok(Self {
189            map: HashMap::with_capacity(capacity),
190            head: None,
191            tail: None,
192            hand: None,
193            capacity,
194            len: 0,
195        })
196    }
197
198    /// Returns the capacity of the cache.
199    ///
200    /// This is the maximum number of entries that can be stored before
201    /// the cache begins evicting old entries.
202    ///
203    /// # Examples
204    ///
205    /// ```rust
206    /// # #[cfg(feature = "doctest")]
207    /// # {
208    /// use sieve_cache::SieveCache;
209    ///
210    /// let cache = SieveCache::<String, u32>::new(100).unwrap();
211    /// assert_eq!(cache.capacity(), 100);
212    /// # }
213    /// ```
214    #[inline]
215    pub fn capacity(&self) -> usize {
216        self.capacity
217    }
218
219    /// Returns the number of entries currently in the cache.
220    ///
221    /// This value will never exceed the capacity.
222    ///
223    /// # Examples
224    ///
225    /// ```rust
226    /// # #[cfg(feature = "doctest")]
227    /// # {
228    /// use sieve_cache::SieveCache;
229    ///
230    /// let mut cache = SieveCache::new(100).unwrap();
231    /// assert_eq!(cache.len(), 0);
232    ///
233    /// cache.insert("key".to_string(), 42);
234    /// assert_eq!(cache.len(), 1);
235    /// # }
236    /// ```
237    #[inline]
238    pub fn len(&self) -> usize {
239        self.len
240    }
241
242    /// Returns `true` when no values are currently cached.
243    ///
244    /// # Examples
245    ///
246    /// ```rust
247    /// # #[cfg(feature = "doctest")]
248    /// # {
249    /// use sieve_cache::SieveCache;
250    ///
251    /// let mut cache = SieveCache::<String, u32>::new(100).unwrap();
252    /// assert!(cache.is_empty());
253    ///
254    /// cache.insert("key".to_string(), 42);
255    /// assert!(!cache.is_empty());
256    ///
257    /// cache.remove("key");
258    /// assert!(cache.is_empty());
259    /// # }
260    /// ```
261    #[inline]
262    pub fn is_empty(&self) -> bool {
263        self.len == 0
264    }
265
266    /// Returns `true` if there is a value in the cache mapped to by `key`.
267    ///
268    /// This operation does not count as an access for the SIEVE algorithm's
269    /// visited flag.
270    ///
271    /// # Examples
272    ///
273    /// ```rust
274    /// # #[cfg(feature = "doctest")]
275    /// # {
276    /// use sieve_cache::SieveCache;
277    ///
278    /// let mut cache = SieveCache::new(100).unwrap();
279    /// cache.insert("key".to_string(), "value".to_string());
280    ///
281    /// assert!(cache.contains_key("key"));
282    /// assert!(!cache.contains_key("missing"));
283    /// # }
284    /// ```
285    #[inline]
286    pub fn contains_key<Q>(&mut self, key: &Q) -> bool
287    where
288        Q: Hash + Eq + ?Sized,
289        K: Borrow<Q>,
290    {
291        self.map.contains_key(key)
292    }
293
294    /// Gets an immutable reference to the value in the cache mapped to by `key`.
295    ///
296    /// If no value exists for `key`, this returns `None`.
297    ///
298    /// This operation marks the entry as "visited" in the SIEVE algorithm,
299    /// which affects eviction decisions.
300    ///
301    /// # Examples
302    ///
303    /// ```rust
304    /// # #[cfg(feature = "doctest")]
305    /// # {
306    /// use sieve_cache::SieveCache;
307    ///
308    /// let mut cache = SieveCache::new(100).unwrap();
309    /// cache.insert("key".to_string(), "value".to_string());
310    ///
311    /// assert_eq!(cache.get("key"), Some(&"value".to_string()));
312    /// assert_eq!(cache.get("missing"), None);
313    /// # }
314    /// ```
315    pub fn get<Q>(&mut self, key: &Q) -> Option<&V>
316    where
317        Q: Hash + Eq + ?Sized,
318        K: Borrow<Q>,
319    {
320        let node_ = self.map.get_mut(key)?;
321        // Mark as visited for the SIEVE algorithm
322        node_.visited = true;
323        Some(&node_.value)
324    }
325
326    /// Gets a mutable reference to the value in the cache mapped to by `key`.
327    ///
328    /// If no value exists for `key`, this returns `None`.
329    ///
330    /// This operation marks the entry as "visited" in the SIEVE algorithm,
331    /// which affects eviction decisions.
332    ///
333    /// # Examples
334    ///
335    /// ```rust
336    /// # #[cfg(feature = "doctest")]
337    /// # {
338    /// use sieve_cache::SieveCache;
339    ///
340    /// let mut cache = SieveCache::new(100).unwrap();
341    /// cache.insert("key".to_string(), "value".to_string());
342    ///
343    /// // Modify the value in-place
344    /// if let Some(value) = cache.get_mut("key") {
345    ///     *value = "new_value".to_string();
346    /// }
347    ///
348    /// assert_eq!(cache.get("key"), Some(&"new_value".to_string()));
349    /// # }
350    /// ```
351    pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut V>
352    where
353        Q: Hash + Eq + ?Sized,
354        K: Borrow<Q>,
355    {
356        let node_ = self.map.get_mut(key)?;
357        // Mark as visited for the SIEVE algorithm
358        node_.visited = true;
359        Some(&mut node_.value)
360    }
361
362    /// Maps `key` to `value` in the cache, possibly evicting old entries.
363    ///
364    /// If the key already exists, its value is updated and the entry is marked as visited.
365    /// If the key doesn't exist and the cache is at capacity, the SIEVE algorithm is used
366    /// to evict an entry before the new key-value pair is inserted.
367    ///
368    /// # Return Value
369    ///
370    /// Returns `true` when this is a new entry, and `false` if an existing entry was updated.
371    ///
372    /// # Examples
373    ///
374    /// ```rust
375    /// # #[cfg(feature = "doctest")]
376    /// # {
377    /// use sieve_cache::SieveCache;
378    ///
379    /// let mut cache = SieveCache::new(100).unwrap();
380    ///
381    /// // Insert a new entry
382    /// let is_new = cache.insert("key1".to_string(), "value1".to_string());
383    /// assert!(is_new); // Returns true for new entries
384    ///
385    /// // Update an existing entry
386    /// let is_new = cache.insert("key1".to_string(), "updated".to_string());
387    /// assert!(!is_new); // Returns false for updates
388    ///
389    /// assert_eq!(cache.get("key1"), Some(&"updated".to_string()));
390    /// # }
391    /// ```
392    pub fn insert(&mut self, key: K, value: V) -> bool {
393        // Check if key already exists
394        let node = self.map.get_mut(&key);
395        if let Some(node_) = node {
396            // Update existing entry
397            node_.visited = true;
398            node_.value = value;
399            return false;
400        }
401
402        // Evict if at capacity
403        if self.len >= self.capacity {
404            self.evict();
405        }
406
407        // Create new node
408        let node = Box::new(Node::new(key.clone(), value));
409        self.add_node(NonNull::from(node.as_ref()));
410        debug_assert!(!node.visited);
411        self.map.insert(key, node);
412        debug_assert!(self.len < self.capacity);
413        self.len += 1;
414        true
415    }
416
417    /// Removes the cache entry mapped to by `key`.
418    ///
419    /// This method explicitly removes an entry from the cache regardless of its
420    /// "visited" status.
421    ///
422    /// # Return Value
423    ///
424    /// Returns the value removed from the cache. If `key` did not map to any value,
425    /// then this returns `None`.
426    ///
427    /// # Examples
428    ///
429    /// ```rust
430    /// # #[cfg(feature = "doctest")]
431    /// # {
432    /// use sieve_cache::SieveCache;
433    ///
434    /// let mut cache = SieveCache::new(100).unwrap();
435    /// cache.insert("key1".to_string(), "value1".to_string());
436    /// cache.insert("key2".to_string(), "value2".to_string());
437    ///
438    /// // Remove an existing entry
439    /// let removed = cache.remove("key1");
440    /// assert_eq!(removed, Some("value1".to_string()));
441    ///
442    /// // Try to remove a non-existent entry
443    /// let removed = cache.remove("key1");
444    /// assert_eq!(removed, None);
445    ///
446    /// assert_eq!(cache.len(), 1); // Only one entry remains
447    /// # }
448    /// ```
449    pub fn remove<Q>(&mut self, key: &Q) -> Option<V>
450    where
451        K: Borrow<Q>,
452        Q: Eq + Hash + ?Sized,
453    {
454        // Find the node
455        let node_ = self.map.get_mut(key)?;
456        let node__ = NonNull::from(node_.as_ref());
457
458        // Update hand pointer if it points to the node being removed
459        if self.hand == Some(node__) {
460            self.hand = node_.as_ref().prev;
461        }
462
463        // Remove from the map and extract the value
464        let value = self.map.remove(key).map(|node| node.value);
465
466        // Remove from the linked list
467        self.remove_node(node__);
468        debug_assert!(self.len > 0);
469        self.len -= 1;
470        value
471    }
472
473    /// Removes and returns a value from the cache that was not recently accessed.
474    ///
475    /// This method implements the SIEVE eviction algorithm:
476    /// 1. Starting from the "hand" pointer (or tail if no hand), look for an entry
477    ///    that hasn't been visited since the last scan
478    /// 2. Mark all visited entries as non-visited during the scan
479    /// 3. If a non-visited entry is found, evict it
480    /// 4. Update the hand to point to the previous node
481    ///
482    /// # Return Value
483    ///
484    /// If a suitable entry is found, it is removed from the cache and its value is returned.
485    /// If all entries have been recently accessed or the cache is empty, this returns `None`.
486    ///
487    /// # Note
488    ///
489    /// This method is automatically called by `insert` when the cache is at capacity,
490    /// but it can also be called manually to proactively free space.
491    ///
492    /// # Examples
493    ///
494    /// ```rust
495    /// # #[cfg(feature = "doctest")]
496    /// # {
497    /// use sieve_cache::SieveCache;
498    ///
499    /// let mut cache = SieveCache::new(3).unwrap();
500    /// cache.insert("key1".to_string(), "value1".to_string());
501    /// cache.insert("key2".to_string(), "value2".to_string());
502    /// cache.insert("key3".to_string(), "value3".to_string());
503    ///
504    /// // Access key1 and key2 to mark them as visited
505    /// cache.get("key1");
506    /// cache.get("key2");
507    ///
508    /// // key3 hasn't been accessed, so it should be evicted
509    /// let evicted = cache.evict();
510    /// assert!(evicted.is_some());
511    /// assert!(!cache.contains_key("key3")); // key3 was evicted
512    /// # }
513    /// ```
514    pub fn evict(&mut self) -> Option<V> {
515        // Start from the hand pointer or the tail if hand is None
516        let mut node = self.hand.or(self.tail);
517
518        // Scan for a non-visited entry
519        while node.is_some() {
520            let mut node_ = node.unwrap();
521            unsafe {
522                // If we found a non-visited entry, evict it
523                if !node_.as_ref().visited {
524                    break;
525                }
526
527                // Mark as non-visited for the next scan
528                node_.as_mut().visited = false;
529
530                // Move to the previous node, or wrap around to the tail
531                if node_.as_ref().prev.is_some() {
532                    node = node_.as_ref().prev;
533                } else {
534                    node = self.tail;
535                }
536            }
537        }
538
539        // If we found a node to evict
540        if let Some(node_) = node {
541            let value = unsafe {
542                // Update the hand pointer
543                self.hand = node_.as_ref().prev;
544
545                // Remove from the map and get the value
546                self.map.remove(&node_.as_ref().key).map(|node| node.value)
547            };
548
549            // Remove from the linked list
550            self.remove_node(node_);
551            debug_assert!(self.len > 0);
552            self.len -= 1;
553            value
554        } else {
555            None
556        }
557    }
558
559    /// Adds a node to the front of the linked list (making it the new head).
560    ///
561    /// This is an internal helper method used during insertion.
562    fn add_node(&mut self, mut node: NonNull<Node<K, V>>) {
563        unsafe {
564            // Link the new node to the current head
565            node.as_mut().next = self.head;
566            node.as_mut().prev = None;
567
568            // Update the current head's prev pointer to the new node
569            if let Some(mut head) = self.head {
570                head.as_mut().prev = Some(node);
571            }
572        }
573
574        // Set the new node as the head
575        self.head = Some(node);
576
577        // If this is the first node, it's also the tail
578        if self.tail.is_none() {
579            self.tail = self.head;
580        }
581    }
582
583    /// Removes a node from the linked list.
584    ///
585    /// This is an internal helper method used during removal and eviction.
586    fn remove_node(&mut self, node: NonNull<Node<K, V>>) {
587        unsafe {
588            // Update the previous node's next pointer
589            if let Some(mut prev) = node.as_ref().prev {
590                prev.as_mut().next = node.as_ref().next;
591            } else {
592                // If no previous node, this was the head
593                self.head = node.as_ref().next;
594            }
595
596            // Update the next node's prev pointer
597            if let Some(mut next) = node.as_ref().next {
598                next.as_mut().prev = node.as_ref().prev;
599            } else {
600                // If no next node, this was the tail
601                self.tail = node.as_ref().prev;
602            }
603        }
604    }
605}
606
607#[test]
608fn test() {
609    let mut cache = SieveCache::new(3).unwrap();
610    assert!(cache.insert("foo".to_string(), "foocontent".to_string()));
611    assert!(cache.insert("bar".to_string(), "barcontent".to_string()));
612    cache.remove("bar");
613    assert!(cache.insert("bar2".to_string(), "bar2content".to_string()));
614    assert!(cache.insert("bar3".to_string(), "bar3content".to_string()));
615    assert_eq!(cache.get("foo"), Some(&"foocontent".to_string()));
616    assert_eq!(cache.get("bar"), None);
617    assert_eq!(cache.get("bar2"), Some(&"bar2content".to_string()));
618    assert_eq!(cache.get("bar3"), Some(&"bar3content".to_string()));
619}
620
621#[test]
622fn test_visited_flag_update() {
623    let mut cache = SieveCache::new(2).unwrap();
624    cache.insert("key1".to_string(), "value1".to_string());
625    cache.insert("key2".to_string(), "value2".to_string());
626    // update `key1` entry.
627    cache.insert("key1".to_string(), "updated".to_string());
628    // new entry is added.
629    cache.insert("key3".to_string(), "value3".to_string());
630    assert_eq!(cache.get("key1"), Some(&"updated".to_string()));
631}