sieve_cache/
lib.rs

1#![doc = include_str!("../README.md")]
2
3//! # SIEVE Cache for Rust
4//!
5//! This crate provides implementations of the SIEVE cache replacement algorithm:
6//!
7//! * [`SieveCache`] - The core single-threaded implementation (always available)
8
9#[cfg(feature = "sync")]
10pub mod _docs_sync {}
11
12#[cfg(feature = "sharded")]
13pub mod _docs_sharded {}
14
15pub mod _sieve_algorithm {
16    //! ## The SIEVE Algorithm
17    //!
18    //! SIEVE (Simple, space-efficient, In-memory, EViction mEchanism) is a cache eviction
19    //! algorithm that maintains a single bit per entry to track whether an item has been
20    //! "visited" since it was last considered for eviction. This approach requires less
21    //! state than LRU but achieves excellent performance, especially on skewed workloads.
22}
23
24pub mod _cache_implementation {
25    //! ## Cache Implementation Details
26    //!
27    //! The cache is implemented as a combination of:
28    //!
29    //! 1. A `HashMap` for O(1) key lookups
30    //! 2. A vector-based ordered collection for managing the eviction order
31    //! 3. A "visited" flag on each entry to track recent access
32    //!
33    //! When the cache is full and a new item is inserted, the eviction algorithm:
34    //! 1. Starts from the "hand" position (eviction candidate)
35    //! 2. Finds the first non-visited entry, evicting it
36    //! 3. Marks all visited entries as non-visited while searching
37}
38
39pub mod _implementation_choice {
40    //! ## Choosing the Right Implementation
41    //!
42    //! - Use [`SieveCache`] for single-threaded applications
43}
44
45/// [DOCTEST_ONLY]
46///
47/// ```rust
48/// # #[cfg(feature = "sync")]
49/// # fn sync_example() {
50/// # use sieve_cache::SyncSieveCache;
51/// # let cache = SyncSieveCache::<String, u32>::new(1000).unwrap();
52/// # let recommended = cache.recommended_capacity(0.5, 2.0, 0.3, 0.7);
53/// # println!("Recommended capacity: {}", recommended);
54/// # }
55/// #
56/// # #[cfg(feature = "sharded")]
57/// # fn sharded_example() {
58/// # use sieve_cache::ShardedSieveCache;
59/// # let cache = ShardedSieveCache::<String, u32>::new(1000).unwrap();
60/// # let recommended = cache.recommended_capacity(0.5, 2.0, 0.3, 0.7);
61/// # println!("Recommended capacity: {}", recommended);
62/// # }
63/// ```
64fn _readme_examples_doctest() {
65    // This function exists only to host the doctest above
66    // This ensures doctests from the README.md can be validated
67}
68
69#[cfg(feature = "sync")]
70pub mod _docs_sync_usage {
71    //! - Use [`SyncSieveCache`] for multi-threaded applications with moderate concurrency
72}
73
74#[cfg(feature = "sharded")]
75pub mod _docs_sharded_usage {
76    //! - Use [`ShardedSieveCache`] for applications with high concurrency where operations
77    //!   are distributed across many different keys
78}
79
80use std::borrow::Borrow;
81use std::collections::HashMap;
82use std::hash::Hash;
83
84#[cfg(feature = "sharded")]
85mod sharded;
86#[cfg(feature = "sync")]
87mod sync;
88
89#[cfg(feature = "sharded")]
90pub use sharded::ShardedSieveCache;
91#[cfg(feature = "sync")]
92pub use sync::SyncSieveCache;
93
94/// Internal representation of a cache entry
95#[derive(Clone)]
96struct Node<K: Eq + Hash + Clone, V> {
97    key: K,
98    value: V,
99    visited: bool,
100}
101
102impl<K: Eq + Hash + Clone, V> Node<K, V> {
103    fn new(key: K, value: V) -> Self {
104        Self {
105            key,
106            value,
107            visited: false,
108        }
109    }
110}
111
112/// A cache based on the SIEVE eviction algorithm.
113///
114/// `SieveCache` provides an efficient in-memory cache with a carefully designed eviction
115/// strategy that balances simplicity with good performance characteristics, especially on
116/// skewed workloads common in real-world applications.
117///
118/// This is the single-threaded implementation.
119#[cfg(feature = "sync")]
120/// For thread-safe variants, see [`SyncSieveCache`] (with the `sync` feature)
121#[cfg(feature = "sharded")]
122/// and [`ShardedSieveCache`] (with the `sharded` feature).
123///
124/// # Type Parameters
125///
126/// * `K` - The key type, which must implement `Eq`, `Hash`, and `Clone`
127/// * `V` - The value type, with no constraints
128///
129/// # Example
130///
131/// ```rust
132/// # #[cfg(feature = "doctest")]
133/// # {
134/// use sieve_cache::SieveCache;
135///
136/// // Create a new cache with capacity for 1000 items
137/// let mut cache = SieveCache::new(1000).unwrap();
138///
139/// // Insert some values
140/// cache.insert("key1".to_string(), "value1".to_string());
141/// cache.insert("key2".to_string(), "value2".to_string());
142///
143/// // Retrieve values - returns references to the values
144/// assert_eq!(cache.get("key1"), Some(&"value1".to_string()));
145///
146/// // Check if the cache contains a key
147/// assert!(cache.contains_key("key1"));
148/// assert!(!cache.contains_key("missing_key"));
149///
150/// // Get a mutable reference to modify a value
151/// if let Some(value) = cache.get_mut("key1") {
152///     *value = "modified".to_string();
153/// }
154///
155/// // Verify the modification
156/// assert_eq!(cache.get("key1"), Some(&"modified".to_string()));
157///
158/// // Remove a value
159/// let removed = cache.remove("key2");
160/// assert_eq!(removed, Some("value2".to_string()));
161/// # }
162/// ```
163pub struct SieveCache<K: Eq + Hash + Clone, V> {
164    /// Map of keys to indices in the nodes vector
165    map: HashMap<K, usize>,
166    /// Vector of all cache nodes
167    nodes: Vec<Node<K, V>>,
168    /// Index to the "hand" pointer used by the SIEVE algorithm for eviction
169    hand: Option<usize>,
170    /// Maximum number of entries the cache can hold
171    capacity: usize,
172}
173
174impl<K: Eq + Hash + Clone, V> SieveCache<K, V> {
175    /// Creates a new cache with the given capacity.
176    ///
177    /// The capacity represents the maximum number of key-value pairs
178    /// that can be stored in the cache. When this limit is reached,
179    /// the cache will use the SIEVE algorithm to evict entries.
180    ///
181    /// # Errors
182    ///
183    /// Returns an error if the capacity is zero.
184    ///
185    /// # Examples
186    ///
187    /// ```rust
188    /// # #[cfg(feature = "doctest")]
189    /// # {
190    /// use sieve_cache::SieveCache;
191    ///
192    /// // Create a cache with space for 100 entries
193    /// let cache: SieveCache<String, u32> = SieveCache::new(100).unwrap();
194    ///
195    /// // Capacity of zero is invalid
196    /// let invalid = SieveCache::<String, u32>::new(0);
197    /// assert!(invalid.is_err());
198    /// # }
199    /// ```
200    pub fn new(capacity: usize) -> Result<Self, &'static str> {
201        if capacity == 0 {
202            return Err("capacity must be greater than 0");
203        }
204        Ok(Self {
205            map: HashMap::with_capacity(capacity),
206            nodes: Vec::with_capacity(capacity),
207            hand: None,
208            capacity,
209        })
210    }
211
212    /// Returns the capacity of the cache.
213    ///
214    /// This is the maximum number of entries that can be stored before
215    /// the cache begins evicting old entries.
216    ///
217    /// # Examples
218    ///
219    /// ```rust
220    /// # #[cfg(feature = "doctest")]
221    /// # {
222    /// use sieve_cache::SieveCache;
223    ///
224    /// let cache = SieveCache::<String, u32>::new(100).unwrap();
225    /// assert_eq!(cache.capacity(), 100);
226    /// # }
227    /// ```
228    #[inline]
229    pub fn capacity(&self) -> usize {
230        self.capacity
231    }
232
233    /// Returns the number of cached values.
234    ///
235    /// This value will never exceed the capacity.
236    ///
237    /// # Examples
238    ///
239    /// ```rust
240    /// # #[cfg(feature = "doctest")]
241    /// # {
242    /// use sieve_cache::SieveCache;
243    ///
244    /// let mut cache = SieveCache::new(100).unwrap();
245    /// assert_eq!(cache.len(), 0);
246    ///
247    /// cache.insert("key".to_string(), "value".to_string());
248    /// assert_eq!(cache.len(), 1);
249    /// # }
250    /// ```
251    #[inline]
252    pub fn len(&self) -> usize {
253        self.nodes.len()
254    }
255
256    /// Returns `true` when no values are currently cached.
257    ///
258    /// # Examples
259    ///
260    /// ```rust
261    /// # #[cfg(feature = "doctest")]
262    /// # {
263    /// use sieve_cache::SieveCache;
264    ///
265    /// let mut cache = SieveCache::<String, u32>::new(100).unwrap();
266    /// assert!(cache.is_empty());
267    ///
268    /// cache.insert("key".to_string(), "value".to_string());
269    /// assert!(!cache.is_empty());
270    ///
271    /// cache.remove("key");
272    /// assert!(cache.is_empty());
273    /// # }
274    /// ```
275    #[inline]
276    pub fn is_empty(&self) -> bool {
277        self.nodes.is_empty()
278    }
279
280    /// Returns `true` if there is a value in the cache mapped to by `key`.
281    ///
282    /// This operation does not count as an access for the SIEVE algorithm's
283    /// visited flag.
284    ///
285    /// # Examples
286    ///
287    /// ```rust
288    /// # #[cfg(feature = "doctest")]
289    /// # {
290    /// use sieve_cache::SieveCache;
291    ///
292    /// let mut cache = SieveCache::new(100).unwrap();
293    /// cache.insert("key".to_string(), "value".to_string());
294    ///
295    /// assert!(cache.contains_key("key"));
296    /// assert!(!cache.contains_key("missing"));
297    /// # }
298    /// ```
299    #[inline]
300    pub fn contains_key<Q>(&self, key: &Q) -> bool
301    where
302        Q: Hash + Eq + ?Sized,
303        K: Borrow<Q>,
304    {
305        self.map.contains_key(key)
306    }
307
308    /// Gets an immutable reference to the value in the cache mapped to by `key`.
309    ///
310    /// If no value exists for `key`, this returns `None`.
311    ///
312    /// This operation marks the entry as "visited" in the SIEVE algorithm,
313    /// which affects eviction decisions.
314    ///
315    /// # Examples
316    ///
317    /// ```rust
318    /// # #[cfg(feature = "doctest")]
319    /// # {
320    /// use sieve_cache::SieveCache;
321    ///
322    /// let mut cache = SieveCache::new(100).unwrap();
323    /// cache.insert("key".to_string(), "value".to_string());
324    ///
325    /// assert_eq!(cache.get("key"), Some(&"value".to_string()));
326    /// assert_eq!(cache.get("missing"), None);
327    /// # }
328    /// ```
329    pub fn get<Q>(&mut self, key: &Q) -> Option<&V>
330    where
331        Q: Hash + Eq + ?Sized,
332        K: Borrow<Q>,
333    {
334        if let Some(&idx) = self.map.get(key) {
335            // Mark as visited for the SIEVE algorithm
336            self.nodes[idx].visited = true;
337            Some(&self.nodes[idx].value)
338        } else {
339            None
340        }
341    }
342
343    /// Gets a mutable reference to the value in the cache mapped to by `key`.
344    ///
345    /// If no value exists for `key`, this returns `None`.
346    ///
347    /// This operation marks the entry as "visited" in the SIEVE algorithm,
348    /// which affects eviction decisions.
349    ///
350    /// # Examples
351    ///
352    /// ```rust
353    /// # #[cfg(feature = "doctest")]
354    /// # {
355    /// use sieve_cache::SieveCache;
356    ///
357    /// let mut cache = SieveCache::new(100).unwrap();
358    /// cache.insert("key".to_string(), "value".to_string());
359    ///
360    /// // Modify the value in-place
361    /// if let Some(value) = cache.get_mut("key") {
362    ///     *value = "new_value".to_string();
363    /// }
364    ///
365    /// assert_eq!(cache.get("key"), Some(&"new_value".to_string()));
366    /// # }
367    /// ```
368    pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut V>
369    where
370        Q: Hash + Eq + ?Sized,
371        K: Borrow<Q>,
372    {
373        if let Some(&idx) = self.map.get(key) {
374            // Mark as visited for the SIEVE algorithm
375            self.nodes[idx].visited = true;
376            Some(&mut self.nodes[idx].value)
377        } else {
378            None
379        }
380    }
381
382    /// Maps `key` to `value` in the cache, possibly evicting old entries.
383    ///
384    /// If the key already exists, its value is updated and the entry is marked as visited.
385    /// If the key doesn't exist and the cache is at capacity, the SIEVE algorithm is used
386    /// to evict an entry before the new key-value pair is inserted.
387    ///
388    /// # Return Value
389    ///
390    /// Returns `true` when this is a new entry, and `false` if an existing entry was updated.
391    ///
392    /// # Examples
393    ///
394    /// ```rust
395    /// # #[cfg(feature = "doctest")]
396    /// # {
397    /// use sieve_cache::SieveCache;
398    ///
399    /// let mut cache = SieveCache::new(100).unwrap();
400    ///
401    /// // Insert a new entry
402    /// let is_new = cache.insert("key1".to_string(), "value1".to_string());
403    /// assert!(is_new); // Returns true for new entries
404    ///
405    /// // Update an existing entry
406    /// let is_new = cache.insert("key1".to_string(), "updated".to_string());
407    /// assert!(!is_new); // Returns false for updates
408    ///
409    /// assert_eq!(cache.get("key1"), Some(&"updated".to_string()));
410    /// # }
411    /// ```
412    pub fn insert(&mut self, key: K, value: V) -> bool {
413        // Check if key already exists
414        if let Some(&idx) = self.map.get(&key) {
415            // Update existing entry
416            self.nodes[idx].visited = true;
417            self.nodes[idx].value = value;
418            return false;
419        }
420
421        // Evict if at capacity
422        if self.nodes.len() >= self.capacity {
423            let item = self.evict();
424            // When the cache is full and *all* entries are marked `visited`, our `evict()` performs
425            // a first pass that clears the `visited` bits but may return `None` without removing
426            // anything. We still must free a slot before inserting, so we call `evict()` a second
427            // time. This mirrors the original SIEVE miss path, which keeps scanning (wrapping once)
428            // until it finds an item to evict after clearing bits.
429            if item.is_none() {
430                let item = self.evict();
431                debug_assert!(
432                    item.is_some(),
433                    "evict() must remove one entry when at capacity"
434                );
435            }
436        }
437
438        // Add new node to the front
439        let node = Node::new(key.clone(), value);
440        self.nodes.push(node);
441        let idx = self.nodes.len() - 1;
442        self.map.insert(key, idx);
443        debug_assert!(self.nodes.len() <= self.capacity);
444        true
445    }
446
447    /// Removes the cache entry mapped to by `key`.
448    ///
449    /// This method explicitly removes an entry from the cache regardless of its
450    /// "visited" status.
451    ///
452    /// # Return Value
453    ///
454    /// Returns the value removed from the cache. If `key` did not map to any value,
455    /// then this returns `None`.
456    ///
457    /// # Examples
458    ///
459    /// ```rust
460    /// # #[cfg(feature = "doctest")]
461    /// # {
462    /// use sieve_cache::SieveCache;
463    ///
464    /// let mut cache = SieveCache::new(100).unwrap();
465    /// cache.insert("key1".to_string(), "value1".to_string());
466    /// cache.insert("key2".to_string(), "value2".to_string());
467    ///
468    /// // Remove an existing entry
469    /// let removed = cache.remove("key1");
470    /// assert_eq!(removed, Some("value1".to_string()));
471    ///
472    /// // Try to remove a non-existent entry
473    /// let removed = cache.remove("key1");
474    /// assert_eq!(removed, None);
475    ///
476    /// assert_eq!(cache.len(), 1); // Only one entry remains
477    /// # }
478    /// ```
479    pub fn remove<Q>(&mut self, key: &Q) -> Option<V>
480    where
481        K: Borrow<Q>,
482        Q: Eq + Hash + ?Sized,
483    {
484        // Find the node index
485        let idx = self.map.remove(key)?;
486
487        // If this is the last element, just remove it
488        if idx == self.nodes.len() - 1 {
489            return Some(self.nodes.pop().unwrap().value);
490        }
491
492        // Update hand if needed
493        if let Some(hand_idx) = self.hand {
494            if hand_idx == idx {
495                // Move hand to the previous node or wrap to end
496                self.hand = if idx > 0 {
497                    Some(idx - 1)
498                } else {
499                    Some(self.nodes.len() - 2)
500                };
501            } else if hand_idx == self.nodes.len() - 1 {
502                // If hand points to the last element (which will be moved to idx)
503                self.hand = Some(idx);
504            }
505        }
506
507        // Remove the node by replacing it with the last one and updating the map
508        let last_node = self.nodes.pop().unwrap();
509        let removed_value = if idx < self.nodes.len() {
510            // Only need to swap and update map if not the last element
511            let last_key = last_node.key.clone(); // Clone the key before moving
512            let removed_node = std::mem::replace(&mut self.nodes[idx], last_node);
513            self.map.insert(last_key, idx); // Update map for swapped node
514            removed_node.value
515        } else {
516            last_node.value
517        };
518
519        Some(removed_value)
520    }
521
522    /// Removes and returns a value from the cache that was not recently accessed.
523    ///
524    /// This method implements the SIEVE eviction algorithm:
525    /// 1. Starting from the "hand" pointer or the end of the vector, look for an entry
526    ///    that hasn't been visited since the last scan
527    /// 2. Mark all visited entries as non-visited during the scan
528    /// 3. If a non-visited entry is found, evict it
529    /// 4. Update the hand to point to the previous node
530    ///
531    /// # Return Value
532    ///
533    /// If a suitable entry is found, it is removed from the cache and its value is returned.
534    /// If all entries have been recently accessed or the cache is empty, this returns `None`.
535    ///
536    /// # Note
537    ///
538    /// This method is automatically called by `insert` when the cache is at capacity,
539    /// but it can also be called manually to proactively free space.
540    ///
541    /// # Examples
542    ///
543    /// ```rust
544    /// # #[cfg(feature = "doctest")]
545    /// # {
546    /// use sieve_cache::SieveCache;
547    ///
548    /// let mut cache = SieveCache::new(3).unwrap();
549    /// cache.insert("key1".to_string(), "value1".to_string());
550    /// cache.insert("key2".to_string(), "value2".to_string());
551    /// cache.insert("key3".to_string(), "value3".to_string());
552    ///
553    /// // Access key1 and key2 to mark them as visited
554    /// cache.get("key1");
555    /// cache.get("key2");
556    ///
557    /// // key3 hasn't been accessed, so it should be evicted
558    /// let evicted = cache.evict();
559    /// assert!(evicted.is_some());
560    /// assert!(!cache.contains_key("key3")); // key3 was evicted
561    /// # }
562    /// ```
563    pub fn evict(&mut self) -> Option<V> {
564        if self.nodes.is_empty() {
565            return None;
566        }
567
568        // Start from the hand pointer or the end if no hand
569        let mut current_idx = self.hand.unwrap_or(self.nodes.len() - 1);
570        let start_idx = current_idx;
571
572        // Track whether we've wrapped around and whether we found a node to evict
573        let mut wrapped = false;
574        let mut found_idx = None;
575
576        // Scan for a non-visited entry
577        loop {
578            // If current node is not visited, mark it for eviction
579            if !self.nodes[current_idx].visited {
580                found_idx = Some(current_idx);
581                break;
582            }
583
584            // Mark as non-visited for next scan
585            self.nodes[current_idx].visited = false;
586
587            // Move to previous node or wrap to end
588            current_idx = if current_idx > 0 {
589                current_idx - 1
590            } else {
591                // Wrap around to end of vector
592                if wrapped {
593                    // If we've already wrapped, break to avoid infinite loop
594                    break;
595                }
596                wrapped = true;
597                self.nodes.len() - 1
598            };
599
600            // If we've looped back to start, we've checked all nodes
601            if current_idx == start_idx {
602                break;
603            }
604        }
605
606        // If we found a node to evict
607        if let Some(idx) = found_idx {
608            // Update the hand pointer to the previous node or wrap to end
609            self.hand = if idx > 0 {
610                Some(idx - 1)
611            } else if self.nodes.len() > 1 {
612                Some(self.nodes.len() - 2)
613            } else {
614                None
615            };
616
617            // Remove the key from the map
618            let key = &self.nodes[idx].key;
619            self.map.remove(key);
620
621            // Remove the node and return its value
622            if idx == self.nodes.len() - 1 {
623                // If last node, just pop it
624                return Some(self.nodes.pop().unwrap().value);
625            } else {
626                // Otherwise swap with the last node
627                let last_node = self.nodes.pop().unwrap();
628                let last_key = last_node.key.clone(); // Clone the key before moving
629                let removed_node = std::mem::replace(&mut self.nodes[idx], last_node);
630                // Update the map for the moved node
631                self.map.insert(last_key, idx);
632                return Some(removed_node.value);
633            }
634        }
635
636        None
637    }
638
639    /// Removes all entries from the cache.
640    ///
641    /// This operation clears all stored values and resets the cache to an empty state,
642    /// while maintaining the original capacity.
643    ///
644    /// # Examples
645    ///
646    /// ```rust
647    /// # #[cfg(feature = "doctest")]
648    /// # {
649    /// use sieve_cache::SieveCache;
650    ///
651    /// let mut cache = SieveCache::new(100).unwrap();
652    /// cache.insert("key1".to_string(), "value1".to_string());
653    /// cache.insert("key2".to_string(), "value2".to_string());
654    ///
655    /// assert_eq!(cache.len(), 2);
656    ///
657    /// cache.clear();
658    /// assert_eq!(cache.len(), 0);
659    /// assert!(cache.is_empty());
660    /// # }
661    /// ```
662    pub fn clear(&mut self) {
663        self.map.clear();
664        self.nodes.clear();
665        self.hand = None;
666    }
667
668    /// Returns an iterator over all keys in the cache.
669    ///
670    /// The order of keys is not specified and should not be relied upon.
671    ///
672    /// # Examples
673    ///
674    /// ```rust
675    /// # #[cfg(feature = "doctest")]
676    /// # {
677    /// use sieve_cache::SieveCache;
678    /// use std::collections::HashSet;
679    ///
680    /// let mut cache = SieveCache::new(100).unwrap();
681    /// cache.insert("key1".to_string(), "value1".to_string());
682    /// cache.insert("key2".to_string(), "value2".to_string());
683    ///
684    /// let keys: HashSet<_> = cache.keys().collect();
685    /// assert_eq!(keys.len(), 2);
686    /// assert!(keys.contains(&"key1".to_string()));
687    /// assert!(keys.contains(&"key2".to_string()));
688    /// # }
689    /// ```
690    pub fn keys(&self) -> impl Iterator<Item = &K> {
691        self.nodes.iter().map(|node| &node.key)
692    }
693
694    /// Returns an iterator over all values in the cache.
695    ///
696    /// The order of values is not specified and should not be relied upon.
697    ///
698    /// # Examples
699    ///
700    /// ```rust
701    /// # #[cfg(feature = "doctest")]
702    /// # {
703    /// use sieve_cache::SieveCache;
704    /// use std::collections::HashSet;
705    ///
706    /// let mut cache = SieveCache::new(100).unwrap();
707    /// cache.insert("key1".to_string(), "value1".to_string());
708    /// cache.insert("key2".to_string(), "value2".to_string());
709    ///
710    /// let values: HashSet<_> = cache.values().collect();
711    /// assert_eq!(values.len(), 2);
712    /// assert!(values.contains(&"value1".to_string()));
713    /// assert!(values.contains(&"value2".to_string()));
714    /// # }
715    /// ```
716    pub fn values(&self) -> impl Iterator<Item = &V> {
717        self.nodes.iter().map(|node| &node.value)
718    }
719
720    /// Returns an iterator over all mutable values in the cache.
721    ///
722    /// The order of values is not specified and should not be relied upon.
723    /// Note that iterating through this will mark all entries as visited.
724    ///
725    /// # Examples
726    ///
727    /// ```rust
728    /// # #[cfg(feature = "doctest")]
729    /// # {
730    /// use sieve_cache::SieveCache;
731    ///
732    /// let mut cache = SieveCache::new(100).unwrap();
733    /// cache.insert("key1".to_string(), "value1".to_string());
734    /// cache.insert("key2".to_string(), "value2".to_string());
735    ///
736    /// // Update all values by appending text
737    /// for value in cache.values_mut() {
738    ///     *value = format!("{}_updated", value);
739    /// }
740    ///
741    /// assert_eq!(cache.get("key1"), Some(&"value1_updated".to_string()));
742    /// assert_eq!(cache.get("key2"), Some(&"value2_updated".to_string()));
743    /// # }
744    /// ```
745    pub fn values_mut(&mut self) -> impl Iterator<Item = &mut V> {
746        for node in &mut self.nodes {
747            node.visited = true;
748        }
749        self.nodes.iter_mut().map(|node| &mut node.value)
750    }
751
752    /// Returns an iterator over all key-value pairs in the cache.
753    ///
754    /// The order of pairs is not specified and should not be relied upon.
755    ///
756    /// # Examples
757    ///
758    /// ```rust
759    /// # #[cfg(feature = "doctest")]
760    /// # {
761    /// use sieve_cache::SieveCache;
762    /// use std::collections::HashMap;
763    ///
764    /// let mut cache = SieveCache::new(100).unwrap();
765    /// cache.insert("key1".to_string(), "value1".to_string());
766    /// cache.insert("key2".to_string(), "value2".to_string());
767    ///
768    /// let entries: HashMap<_, _> = cache.iter().collect();
769    /// assert_eq!(entries.len(), 2);
770    /// assert_eq!(entries.get(&"key1".to_string()), Some(&&"value1".to_string()));
771    /// assert_eq!(entries.get(&"key2".to_string()), Some(&&"value2".to_string()));
772    /// # }
773    /// ```
774    pub fn iter(&self) -> impl Iterator<Item = (&K, &V)> {
775        self.nodes.iter().map(|node| (&node.key, &node.value))
776    }
777
778    /// Returns an iterator over all key-value pairs in the cache, with mutable references to values.
779    ///
780    /// The order of pairs is not specified and should not be relied upon.
781    /// Note that iterating through this will mark all entries as visited.
782    ///
783    /// # Examples
784    ///
785    /// ```rust
786    /// # #[cfg(feature = "doctest")]
787    /// # {
788    /// use sieve_cache::SieveCache;
789    ///
790    /// let mut cache = SieveCache::new(100).unwrap();
791    /// cache.insert("key1".to_string(), "value1".to_string());
792    /// cache.insert("key2".to_string(), "value2".to_string());
793    ///
794    /// // Update all values associated with keys containing '1'
795    /// for (key, value) in cache.iter_mut() {
796    ///     if key.contains('1') {
797    ///         *value = format!("{}_special", value);
798    ///     }
799    /// }
800    ///
801    /// assert_eq!(cache.get("key1"), Some(&"value1_special".to_string()));
802    /// assert_eq!(cache.get("key2"), Some(&"value2".to_string()));
803    /// # }
804    /// ```
805    pub fn iter_mut(&mut self) -> impl Iterator<Item = (&K, &mut V)> {
806        for node in &mut self.nodes {
807            node.visited = true;
808        }
809        self.nodes
810            .iter_mut()
811            .map(|node| (&node.key, &mut node.value))
812    }
813
814    /// Retains only the elements specified by the predicate.
815    ///
816    /// Removes all entries for which the provided function returns `false`.
817    /// The elements are visited in arbitrary, unspecified order.
818    ///
819    /// # Examples
820    ///
821    /// ```rust
822    /// # #[cfg(feature = "doctest")]
823    /// # {
824    /// use sieve_cache::SieveCache;
825    ///
826    /// let mut cache = SieveCache::new(100).unwrap();
827    /// cache.insert("key1".to_string(), 100);
828    /// cache.insert("key2".to_string(), 200);
829    /// cache.insert("key3".to_string(), 300);
830    ///
831    /// // Keep only entries with values greater than 150
832    /// cache.retain(|_, value| *value > 150);
833    ///
834    /// assert_eq!(cache.len(), 2);
835    /// assert!(!cache.contains_key("key1"));
836    /// assert!(cache.contains_key("key2"));
837    /// assert!(cache.contains_key("key3"));
838    /// # }
839    /// ```
840    pub fn retain<F>(&mut self, mut f: F)
841    where
842        F: FnMut(&K, &V) -> bool,
843    {
844        // Collect indices to remove
845        let mut to_remove = Vec::new();
846
847        for (i, node) in self.nodes.iter().enumerate() {
848            if !f(&node.key, &node.value) {
849                to_remove.push(i);
850            }
851        }
852
853        // Remove indices from highest to lowest to avoid invalidating other indices
854        to_remove.sort_unstable_by(|a, b| b.cmp(a));
855
856        for idx in to_remove {
857            // Remove from map
858            self.map.remove(&self.nodes[idx].key);
859
860            // If it's the last element, just pop it
861            if idx == self.nodes.len() - 1 {
862                self.nodes.pop();
863            } else {
864                // Replace with the last element
865                let last_idx = self.nodes.len() - 1;
866                // Use swap_remove which replaces the removed element with the last element
867                self.nodes.swap_remove(idx);
868                if idx < self.nodes.len() {
869                    // Update map for the swapped node
870                    self.map.insert(self.nodes[idx].key.clone(), idx);
871                }
872
873                // Update hand if needed
874                if let Some(hand_idx) = self.hand {
875                    if hand_idx == idx {
876                        // Hand was pointing to the removed node, move it to previous
877                        self.hand = if idx > 0 {
878                            Some(idx - 1)
879                        } else if !self.nodes.is_empty() {
880                            Some(self.nodes.len() - 1)
881                        } else {
882                            None
883                        };
884                    } else if hand_idx == last_idx {
885                        // Hand was pointing to the last node that was moved
886                        self.hand = Some(idx);
887                    }
888                }
889            }
890        }
891    }
892
893    /// Returns a recommended cache capacity based on current utilization.
894    ///
895    /// This method analyzes the current cache utilization and recommends a new capacity based on:
896    /// - The fill ratio (how much of the capacity is actually being used)
897    /// - The number of entries with the 'visited' flag set
898    /// - A target utilization range
899    ///
900    /// The recommendation aims to keep the cache size optimal:
901    /// - If the cache is significantly underfilled (fill ratio < 10%), it suggests decreasing capacity
902    ///   regardless of other factors to avoid wasting memory
903    /// - If many entries are frequently accessed (high utilization), it suggests increasing capacity
904    /// - If few entries are accessed frequently (low utilization), it suggests decreasing capacity
905    /// - Within a normal utilization range, it keeps the capacity stable
906    ///
907    /// # Arguments
908    ///
909    /// * `min_factor` - Minimum scaling factor (e.g., 0.5 means never recommend less than 50% of current capacity)
910    /// * `max_factor` - Maximum scaling factor (e.g., 2.0 means never recommend more than 200% of current capacity)
911    /// * `low_threshold` - Utilization threshold below which capacity is reduced (e.g., 0.3 means 30% utilization)
912    /// * `high_threshold` - Utilization threshold above which capacity is increased (e.g., 0.7 means 70% utilization)
913    ///
914    /// # Examples
915    ///
916    /// ```rust
917    /// # use sieve_cache::SieveCache;
918    /// #
919    /// # fn main() {
920    /// # let mut cache = SieveCache::<String, i32>::new(100).unwrap();
921    /// #
922    /// # // Add items to the cache
923    /// # for i in 0..80 {
924    /// #     cache.insert(i.to_string(), i);
925    /// #     
926    /// #     // Accessing some items to mark them as visited
927    /// #     if i % 2 == 0 {
928    /// #         cache.get(&i.to_string());
929    /// #     }
930    /// # }
931    /// #
932    /// // Get a recommended capacity with default parameters
933    /// let recommended = cache.recommended_capacity(0.5, 2.0, 0.3, 0.7);
934    /// println!("Recommended capacity: {}", recommended);
935    /// # }
936    /// ```
937    ///
938    /// # Default Recommendation Parameters
939    ///
940    /// If you're unsure what parameters to use, these values provide a reasonable starting point:
941    /// - `min_factor`: 0.5 (never recommend less than half current capacity)
942    /// - `max_factor`: 2.0 (never recommend more than twice current capacity)
943    /// - `low_threshold`: 0.3 (reduce capacity if utilization below 30%)
944    /// - `high_threshold`: 0.7 (increase capacity if utilization above 70%)
945    pub fn recommended_capacity(
946        &self,
947        min_factor: f64,
948        max_factor: f64,
949        low_threshold: f64,
950        high_threshold: f64,
951    ) -> usize {
952        // If the cache is empty, return the current capacity
953        if self.nodes.is_empty() {
954            return self.capacity;
955        }
956
957        // Count entries with visited flag set
958        let visited_count = self.nodes.iter().filter(|node| node.visited).count();
959
960        // Calculate the utilization ratio (visited entries / total entries)
961        let utilization_ratio = visited_count as f64 / self.nodes.len() as f64;
962
963        // Calculate fill ratio (total entries / capacity)
964        let fill_ratio = self.nodes.len() as f64 / self.capacity as f64;
965
966        // Low fill ratio threshold (consider the cache underfilled below this)
967        let low_fill_threshold = 0.1; // 10% filled
968
969        // Fill ratio takes precedence over utilization:
970        // If the cache is severely underfilled, we should decrease capacity
971        // regardless of utilization
972        if fill_ratio < low_fill_threshold {
973            // Calculate how much to decrease based on how empty the cache is
974            let fill_below_threshold = if fill_ratio > 0.0 {
975                (low_fill_threshold - fill_ratio) / low_fill_threshold
976            } else {
977                1.0
978            };
979            // Apply the min_factor as a floor
980            let scaling_factor = 1.0 - (1.0 - min_factor) * fill_below_threshold;
981
982            // Apply the scaling factor to current capacity and ensure it's at least 1
983            return std::cmp::max(1, (self.capacity as f64 * scaling_factor).round() as usize);
984        }
985
986        // For normal fill levels, use the original logic based on utilization
987        let scaling_factor = if utilization_ratio >= high_threshold {
988            // High utilization - recommend increasing the capacity
989            // Scale between 1.0 and max_factor based on utilization above the high threshold
990            let utilization_above_threshold =
991                (utilization_ratio - high_threshold) / (1.0 - high_threshold);
992            1.0 + (max_factor - 1.0) * utilization_above_threshold
993        } else if utilization_ratio <= low_threshold {
994            // Low utilization - recommend decreasing capacity
995            // Scale between min_factor and 1.0 based on how far below the low threshold
996            let utilization_below_threshold = (low_threshold - utilization_ratio) / low_threshold;
997            1.0 - (1.0 - min_factor) * utilization_below_threshold
998        } else {
999            // Normal utilization - keep current capacity
1000            1.0
1001        };
1002
1003        // Apply the scaling factor to current capacity and ensure it's at least 1
1004        std::cmp::max(1, (self.capacity as f64 * scaling_factor).round() as usize)
1005    }
1006}
1007
1008#[test]
1009fn test() {
1010    let mut cache = SieveCache::new(3).unwrap();
1011    assert!(cache.insert("foo".to_string(), "foocontent".to_string()));
1012    assert!(cache.insert("bar".to_string(), "barcontent".to_string()));
1013    cache.remove("bar");
1014    assert!(cache.insert("bar2".to_string(), "bar2content".to_string()));
1015    assert!(cache.insert("bar3".to_string(), "bar3content".to_string()));
1016    assert_eq!(cache.get("foo"), Some(&"foocontent".to_string()));
1017    assert_eq!(cache.get("bar"), None);
1018    assert_eq!(cache.get("bar2"), Some(&"bar2content".to_string()));
1019    assert_eq!(cache.get("bar3"), Some(&"bar3content".to_string()));
1020}
1021
1022#[test]
1023fn test_visited_flag_update() {
1024    let mut cache = SieveCache::new(2).unwrap();
1025    cache.insert("key1".to_string(), "value1".to_string());
1026    cache.insert("key2".to_string(), "value2".to_string());
1027    // update `key1` entry.
1028    cache.insert("key1".to_string(), "updated".to_string());
1029    // new entry is added.
1030    cache.insert("key3".to_string(), "value3".to_string());
1031    assert_eq!(cache.get("key1"), Some(&"updated".to_string()));
1032}
1033
1034#[test]
1035fn test_clear() {
1036    let mut cache = SieveCache::new(10).unwrap();
1037    cache.insert("key1".to_string(), "value1".to_string());
1038    cache.insert("key2".to_string(), "value2".to_string());
1039
1040    assert_eq!(cache.len(), 2);
1041    assert!(!cache.is_empty());
1042
1043    cache.clear();
1044
1045    assert_eq!(cache.len(), 0);
1046    assert!(cache.is_empty());
1047    assert_eq!(cache.get("key1"), None);
1048    assert_eq!(cache.get("key2"), None);
1049}
1050
1051#[test]
1052fn test_iterators() {
1053    let mut cache = SieveCache::new(10).unwrap();
1054    cache.insert("key1".to_string(), "value1".to_string());
1055    cache.insert("key2".to_string(), "value2".to_string());
1056
1057    // Test keys iterator
1058    let keys: Vec<&String> = cache.keys().collect();
1059    assert_eq!(keys.len(), 2);
1060    assert!(keys.contains(&&"key1".to_string()));
1061    assert!(keys.contains(&&"key2".to_string()));
1062
1063    // Test values iterator
1064    let values: Vec<&String> = cache.values().collect();
1065    assert_eq!(values.len(), 2);
1066    assert!(values.contains(&&"value1".to_string()));
1067    assert!(values.contains(&&"value2".to_string()));
1068
1069    // Test values_mut iterator
1070    for value in cache.values_mut() {
1071        *value = format!("{}_updated", value);
1072    }
1073
1074    assert_eq!(cache.get("key1"), Some(&"value1_updated".to_string()));
1075    assert_eq!(cache.get("key2"), Some(&"value2_updated".to_string()));
1076
1077    // Test key-value iterator
1078    let entries: Vec<(&String, &String)> = cache.iter().collect();
1079    assert_eq!(entries.len(), 2);
1080
1081    // Test key-value mutable iterator
1082    for (key, value) in cache.iter_mut() {
1083        if key == "key1" {
1084            *value = format!("{}_special", value);
1085        }
1086    }
1087
1088    assert_eq!(
1089        cache.get("key1"),
1090        Some(&"value1_updated_special".to_string())
1091    );
1092    assert_eq!(cache.get("key2"), Some(&"value2_updated".to_string()));
1093}
1094
1095#[test]
1096fn test_retain() {
1097    let mut cache = SieveCache::new(10).unwrap();
1098
1099    // Add some entries
1100    cache.insert("even1".to_string(), 2);
1101    cache.insert("even2".to_string(), 4);
1102    cache.insert("odd1".to_string(), 1);
1103    cache.insert("odd2".to_string(), 3);
1104
1105    assert_eq!(cache.len(), 4);
1106
1107    // Keep only entries with even values
1108    cache.retain(|_, v| v % 2 == 0);
1109
1110    assert_eq!(cache.len(), 2);
1111    assert!(cache.contains_key("even1"));
1112    assert!(cache.contains_key("even2"));
1113    assert!(!cache.contains_key("odd1"));
1114    assert!(!cache.contains_key("odd2"));
1115
1116    // Keep only entries with keys containing '1'
1117    cache.retain(|k, _| k.contains('1'));
1118
1119    assert_eq!(cache.len(), 1);
1120    assert!(cache.contains_key("even1"));
1121    assert!(!cache.contains_key("even2"));
1122}
1123
1124#[test]
1125fn test_recommended_capacity() {
1126    // Test case 1: Empty cache - should return current capacity
1127    let cache = SieveCache::<String, u32>::new(100).unwrap();
1128    assert_eq!(cache.recommended_capacity(0.5, 2.0, 0.3, 0.7), 100);
1129
1130    // Test case 2: Low utilization (few visited nodes)
1131    let mut cache = SieveCache::new(100).unwrap();
1132    // Fill the cache first without marking anything as visited
1133    for i in 0..90 {
1134        cache.insert(i.to_string(), i);
1135    }
1136
1137    // Now mark only a tiny fraction as visited
1138    for i in 0..5 {
1139        cache.get(&i.to_string()); // Only 5% visited
1140    }
1141
1142    // With very low utilization and high fill, should recommend decreasing capacity
1143    let recommended = cache.recommended_capacity(0.5, 2.0, 0.1, 0.7); // Lower threshold to ensure test passes
1144    assert!(recommended < 100);
1145    assert!(recommended >= 50); // Should not go below min_factor
1146
1147    // Test case 3: High utilization (many visited nodes)
1148    let mut cache = SieveCache::new(100).unwrap();
1149    for i in 0..90 {
1150        cache.insert(i.to_string(), i);
1151        // Mark 80% as visited
1152        if i % 10 != 0 {
1153            cache.get(&i.to_string());
1154        }
1155    }
1156    // With 80% utilization, should recommend increasing capacity
1157    let recommended = cache.recommended_capacity(0.5, 2.0, 0.3, 0.7);
1158    assert!(recommended > 100);
1159    assert!(recommended <= 200); // Should not go above max_factor
1160
1161    // Test case 4: Normal utilization (should keep capacity the same)
1162    let mut cache = SieveCache::new(100).unwrap();
1163    for i in 0..90 {
1164        cache.insert(i.to_string(), i);
1165        // Mark 50% as visited
1166        if i % 2 == 0 {
1167            cache.get(&i.to_string());
1168        }
1169    }
1170    // With 50% utilization (between thresholds), capacity should be fairly stable
1171    let recommended = cache.recommended_capacity(0.5, 2.0, 0.3, 0.7);
1172    assert!(
1173        recommended >= 95,
1174        "With normal utilization, capacity should be close to original"
1175    );
1176    assert!(
1177        recommended <= 100,
1178        "With normal utilization, capacity should not exceed original"
1179    );
1180
1181    // Test case 5: Low fill ratio (few entries relative to capacity)
1182    let mut cache = SieveCache::new(2000).unwrap();
1183    // Add only a few entries (5% of capacity)
1184    for i in 0..100 {
1185        cache.insert(i.to_string(), i);
1186        // Mark all as visited to simulate high hit rate
1187        cache.get(&i.to_string());
1188    }
1189
1190    // Even though utilization is high (100% visited), the fill ratio is very low (5%)
1191    // so it should still recommend decreasing capacity
1192    let recommended = cache.recommended_capacity(0.5, 2.0, 0.3, 0.7);
1193    assert!(
1194        recommended < 2000,
1195        "With low fill ratio, capacity should be decreased despite high hit rate"
1196    );
1197    assert!(
1198        recommended >= 1000, // min_factor = 0.5
1199        "Capacity should not go below min_factor of current capacity"
1200    );
1201}
1202
1203#[test]
1204fn insert_never_exceeds_capacity_when_all_visited() {
1205    let mut c = SieveCache::new(2).unwrap();
1206    c.insert("a".to_string(), 1);
1207    c.insert("b".to_string(), 2);
1208    // Mark all visited
1209    assert!(c.get("a").is_some());
1210    assert!(c.get("b").is_some());
1211    // This would exceed capacity
1212    c.insert("c".to_string(), 3);
1213    // This is our an invariant
1214    assert!(c.len() <= c.capacity());
1215}