sieve_cache/lib.rs
1#![doc = include_str!("../README.md")]
2
3//! # SIEVE Cache for Rust
4//!
5//! This crate provides implementations of the SIEVE cache replacement algorithm:
6//!
7//! * [`SieveCache`] - The core single-threaded implementation (always available)
8
9#[cfg(feature = "sync")]
10pub mod _docs_sync {}
11
12#[cfg(feature = "sharded")]
13pub mod _docs_sharded {}
14
15pub mod _sieve_algorithm {
16 //! ## The SIEVE Algorithm
17 //!
18 //! SIEVE (Simple, space-efficient, In-memory, EViction mEchanism) is a cache eviction
19 //! algorithm that maintains a single bit per entry to track whether an item has been
20 //! "visited" since it was last considered for eviction. This approach requires less
21 //! state than LRU but achieves excellent performance, especially on skewed workloads.
22}
23
24pub mod _cache_implementation {
25 //! ## Cache Implementation Details
26 //!
27 //! The cache is implemented as a combination of:
28 //!
29 //! 1. A `HashMap` for O(1) key lookups
30 //! 2. A vector-based ordered collection for managing the eviction order
31 //! 3. A "visited" flag on each entry to track recent access
32 //!
33 //! When the cache is full and a new item is inserted, the eviction algorithm:
34 //! 1. Starts from the "hand" position (eviction candidate)
35 //! 2. Finds the first non-visited entry, evicting it
36 //! 3. Marks all visited entries as non-visited while searching
37}
38
39pub mod _implementation_choice {
40 //! ## Choosing the Right Implementation
41 //!
42 //! - Use [`SieveCache`] for single-threaded applications
43}
44
45/// [DOCTEST_ONLY]
46///
47/// ```rust
48/// # #[cfg(feature = "sync")]
49/// # fn sync_example() {
50/// # use sieve_cache::SyncSieveCache;
51/// # let cache = SyncSieveCache::<String, u32>::new(1000).unwrap();
52/// # let recommended = cache.recommended_capacity(0.5, 2.0, 0.3, 0.7);
53/// # println!("Recommended capacity: {}", recommended);
54/// # }
55/// #
56/// # #[cfg(feature = "sharded")]
57/// # fn sharded_example() {
58/// # use sieve_cache::ShardedSieveCache;
59/// # let cache = ShardedSieveCache::<String, u32>::new(1000).unwrap();
60/// # let recommended = cache.recommended_capacity(0.5, 2.0, 0.3, 0.7);
61/// # println!("Recommended capacity: {}", recommended);
62/// # }
63/// ```
64fn _readme_examples_doctest() {
65 // This function exists only to host the doctest above
66 // This ensures doctests from the README.md can be validated
67}
68
69#[cfg(feature = "sync")]
70pub mod _docs_sync_usage {
71 //! - Use [`SyncSieveCache`] for multi-threaded applications with moderate concurrency
72}
73
74#[cfg(feature = "sharded")]
75pub mod _docs_sharded_usage {
76 //! - Use [`ShardedSieveCache`] for applications with high concurrency where operations
77 //! are distributed across many different keys
78}
79
80use std::borrow::Borrow;
81use std::collections::HashMap;
82use std::hash::Hash;
83
84#[cfg(feature = "sharded")]
85mod sharded;
86#[cfg(feature = "sync")]
87mod sync;
88
89#[cfg(feature = "sharded")]
90pub use sharded::ShardedSieveCache;
91#[cfg(feature = "sync")]
92pub use sync::SyncSieveCache;
93
94/// Internal representation of a cache entry
95#[derive(Clone)]
96struct Node<K: Eq + Hash + Clone, V> {
97 key: K,
98 value: V,
99 visited: bool,
100}
101
102impl<K: Eq + Hash + Clone, V> Node<K, V> {
103 fn new(key: K, value: V) -> Self {
104 Self {
105 key,
106 value,
107 visited: false,
108 }
109 }
110}
111
112/// A cache based on the SIEVE eviction algorithm.
113///
114/// `SieveCache` provides an efficient in-memory cache with a carefully designed eviction
115/// strategy that balances simplicity with good performance characteristics, especially on
116/// skewed workloads common in real-world applications.
117///
118/// This is the single-threaded implementation.
119#[cfg(feature = "sync")]
120/// For thread-safe variants, see [`SyncSieveCache`] (with the `sync` feature)
121#[cfg(feature = "sharded")]
122/// and [`ShardedSieveCache`] (with the `sharded` feature).
123///
124/// # Type Parameters
125///
126/// * `K` - The key type, which must implement `Eq`, `Hash`, and `Clone`
127/// * `V` - The value type, with no constraints
128///
129/// # Example
130///
131/// ```rust
132/// # #[cfg(feature = "doctest")]
133/// # {
134/// use sieve_cache::SieveCache;
135///
136/// // Create a new cache with capacity for 1000 items
137/// let mut cache = SieveCache::new(1000).unwrap();
138///
139/// // Insert some values
140/// cache.insert("key1".to_string(), "value1".to_string());
141/// cache.insert("key2".to_string(), "value2".to_string());
142///
143/// // Retrieve values - returns references to the values
144/// assert_eq!(cache.get("key1"), Some(&"value1".to_string()));
145///
146/// // Check if the cache contains a key
147/// assert!(cache.contains_key("key1"));
148/// assert!(!cache.contains_key("missing_key"));
149///
150/// // Get a mutable reference to modify a value
151/// if let Some(value) = cache.get_mut("key1") {
152/// *value = "modified".to_string();
153/// }
154///
155/// // Verify the modification
156/// assert_eq!(cache.get("key1"), Some(&"modified".to_string()));
157///
158/// // Remove a value
159/// let removed = cache.remove("key2");
160/// assert_eq!(removed, Some("value2".to_string()));
161/// # }
162/// ```
163pub struct SieveCache<K: Eq + Hash + Clone, V> {
164 /// Map of keys to indices in the nodes vector
165 map: HashMap<K, usize>,
166 /// Vector of all cache nodes
167 nodes: Vec<Node<K, V>>,
168 /// Index to the "hand" pointer used by the SIEVE algorithm for eviction
169 hand: Option<usize>,
170 /// Maximum number of entries the cache can hold
171 capacity: usize,
172}
173
174impl<K: Eq + Hash + Clone, V> SieveCache<K, V> {
175 /// Creates a new cache with the given capacity.
176 ///
177 /// The capacity represents the maximum number of key-value pairs
178 /// that can be stored in the cache. When this limit is reached,
179 /// the cache will use the SIEVE algorithm to evict entries.
180 ///
181 /// # Errors
182 ///
183 /// Returns an error if the capacity is zero.
184 ///
185 /// # Examples
186 ///
187 /// ```rust
188 /// # #[cfg(feature = "doctest")]
189 /// # {
190 /// use sieve_cache::SieveCache;
191 ///
192 /// // Create a cache with space for 100 entries
193 /// let cache: SieveCache<String, u32> = SieveCache::new(100).unwrap();
194 ///
195 /// // Capacity of zero is invalid
196 /// let invalid = SieveCache::<String, u32>::new(0);
197 /// assert!(invalid.is_err());
198 /// # }
199 /// ```
200 pub fn new(capacity: usize) -> Result<Self, &'static str> {
201 if capacity == 0 {
202 return Err("capacity must be greater than 0");
203 }
204 Ok(Self {
205 map: HashMap::with_capacity(capacity),
206 nodes: Vec::with_capacity(capacity),
207 hand: None,
208 capacity,
209 })
210 }
211
212 /// Returns the capacity of the cache.
213 ///
214 /// This is the maximum number of entries that can be stored before
215 /// the cache begins evicting old entries.
216 ///
217 /// # Examples
218 ///
219 /// ```rust
220 /// # #[cfg(feature = "doctest")]
221 /// # {
222 /// use sieve_cache::SieveCache;
223 ///
224 /// let cache = SieveCache::<String, u32>::new(100).unwrap();
225 /// assert_eq!(cache.capacity(), 100);
226 /// # }
227 /// ```
228 #[inline]
229 pub fn capacity(&self) -> usize {
230 self.capacity
231 }
232
233 /// Returns the number of cached values.
234 ///
235 /// This value will never exceed the capacity.
236 ///
237 /// # Examples
238 ///
239 /// ```rust
240 /// # #[cfg(feature = "doctest")]
241 /// # {
242 /// use sieve_cache::SieveCache;
243 ///
244 /// let mut cache = SieveCache::new(100).unwrap();
245 /// assert_eq!(cache.len(), 0);
246 ///
247 /// cache.insert("key".to_string(), "value".to_string());
248 /// assert_eq!(cache.len(), 1);
249 /// # }
250 /// ```
251 #[inline]
252 pub fn len(&self) -> usize {
253 self.nodes.len()
254 }
255
256 /// Returns `true` when no values are currently cached.
257 ///
258 /// # Examples
259 ///
260 /// ```rust
261 /// # #[cfg(feature = "doctest")]
262 /// # {
263 /// use sieve_cache::SieveCache;
264 ///
265 /// let mut cache = SieveCache::<String, u32>::new(100).unwrap();
266 /// assert!(cache.is_empty());
267 ///
268 /// cache.insert("key".to_string(), "value".to_string());
269 /// assert!(!cache.is_empty());
270 ///
271 /// cache.remove("key");
272 /// assert!(cache.is_empty());
273 /// # }
274 /// ```
275 #[inline]
276 pub fn is_empty(&self) -> bool {
277 self.nodes.is_empty()
278 }
279
280 /// Returns `true` if there is a value in the cache mapped to by `key`.
281 ///
282 /// This operation does not count as an access for the SIEVE algorithm's
283 /// visited flag.
284 ///
285 /// # Examples
286 ///
287 /// ```rust
288 /// # #[cfg(feature = "doctest")]
289 /// # {
290 /// use sieve_cache::SieveCache;
291 ///
292 /// let mut cache = SieveCache::new(100).unwrap();
293 /// cache.insert("key".to_string(), "value".to_string());
294 ///
295 /// assert!(cache.contains_key("key"));
296 /// assert!(!cache.contains_key("missing"));
297 /// # }
298 /// ```
299 #[inline]
300 pub fn contains_key<Q>(&self, key: &Q) -> bool
301 where
302 Q: Hash + Eq + ?Sized,
303 K: Borrow<Q>,
304 {
305 self.map.contains_key(key)
306 }
307
308 /// Gets an immutable reference to the value in the cache mapped to by `key`.
309 ///
310 /// If no value exists for `key`, this returns `None`.
311 ///
312 /// This operation marks the entry as "visited" in the SIEVE algorithm,
313 /// which affects eviction decisions.
314 ///
315 /// # Examples
316 ///
317 /// ```rust
318 /// # #[cfg(feature = "doctest")]
319 /// # {
320 /// use sieve_cache::SieveCache;
321 ///
322 /// let mut cache = SieveCache::new(100).unwrap();
323 /// cache.insert("key".to_string(), "value".to_string());
324 ///
325 /// assert_eq!(cache.get("key"), Some(&"value".to_string()));
326 /// assert_eq!(cache.get("missing"), None);
327 /// # }
328 /// ```
329 pub fn get<Q>(&mut self, key: &Q) -> Option<&V>
330 where
331 Q: Hash + Eq + ?Sized,
332 K: Borrow<Q>,
333 {
334 if let Some(&idx) = self.map.get(key) {
335 // Mark as visited for the SIEVE algorithm
336 self.nodes[idx].visited = true;
337 Some(&self.nodes[idx].value)
338 } else {
339 None
340 }
341 }
342
343 /// Gets a mutable reference to the value in the cache mapped to by `key`.
344 ///
345 /// If no value exists for `key`, this returns `None`.
346 ///
347 /// This operation marks the entry as "visited" in the SIEVE algorithm,
348 /// which affects eviction decisions.
349 ///
350 /// # Examples
351 ///
352 /// ```rust
353 /// # #[cfg(feature = "doctest")]
354 /// # {
355 /// use sieve_cache::SieveCache;
356 ///
357 /// let mut cache = SieveCache::new(100).unwrap();
358 /// cache.insert("key".to_string(), "value".to_string());
359 ///
360 /// // Modify the value in-place
361 /// if let Some(value) = cache.get_mut("key") {
362 /// *value = "new_value".to_string();
363 /// }
364 ///
365 /// assert_eq!(cache.get("key"), Some(&"new_value".to_string()));
366 /// # }
367 /// ```
368 pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut V>
369 where
370 Q: Hash + Eq + ?Sized,
371 K: Borrow<Q>,
372 {
373 if let Some(&idx) = self.map.get(key) {
374 // Mark as visited for the SIEVE algorithm
375 self.nodes[idx].visited = true;
376 Some(&mut self.nodes[idx].value)
377 } else {
378 None
379 }
380 }
381
382 /// Maps `key` to `value` in the cache, possibly evicting old entries.
383 ///
384 /// If the key already exists, its value is updated and the entry is marked as visited.
385 /// If the key doesn't exist and the cache is at capacity, the SIEVE algorithm is used
386 /// to evict an entry before the new key-value pair is inserted.
387 ///
388 /// # Return Value
389 ///
390 /// Returns `true` when this is a new entry, and `false` if an existing entry was updated.
391 ///
392 /// # Examples
393 ///
394 /// ```rust
395 /// # #[cfg(feature = "doctest")]
396 /// # {
397 /// use sieve_cache::SieveCache;
398 ///
399 /// let mut cache = SieveCache::new(100).unwrap();
400 ///
401 /// // Insert a new entry
402 /// let is_new = cache.insert("key1".to_string(), "value1".to_string());
403 /// assert!(is_new); // Returns true for new entries
404 ///
405 /// // Update an existing entry
406 /// let is_new = cache.insert("key1".to_string(), "updated".to_string());
407 /// assert!(!is_new); // Returns false for updates
408 ///
409 /// assert_eq!(cache.get("key1"), Some(&"updated".to_string()));
410 /// # }
411 /// ```
412 pub fn insert(&mut self, key: K, value: V) -> bool {
413 // Check if key already exists
414 if let Some(&idx) = self.map.get(&key) {
415 // Update existing entry
416 self.nodes[idx].visited = true;
417 self.nodes[idx].value = value;
418 return false;
419 }
420
421 // Evict if at capacity
422 if self.nodes.len() >= self.capacity {
423 let item = self.evict();
424 // When the cache is full and *all* entries are marked `visited`, our `evict()` performs
425 // a first pass that clears the `visited` bits but may return `None` without removing
426 // anything. We still must free a slot before inserting, so we call `evict()` a second
427 // time. This mirrors the original SIEVE miss path, which keeps scanning (wrapping once)
428 // until it finds an item to evict after clearing bits.
429 if item.is_none() {
430 let item = self.evict();
431 debug_assert!(
432 item.is_some(),
433 "evict() must remove one entry when at capacity"
434 );
435 }
436 }
437
438 // Add new node to the front
439 let node = Node::new(key.clone(), value);
440 self.nodes.push(node);
441 let idx = self.nodes.len() - 1;
442 self.map.insert(key, idx);
443 debug_assert!(self.nodes.len() <= self.capacity);
444 true
445 }
446
447 /// Removes the cache entry mapped to by `key`.
448 ///
449 /// This method explicitly removes an entry from the cache regardless of its
450 /// "visited" status.
451 ///
452 /// # Return Value
453 ///
454 /// Returns the value removed from the cache. If `key` did not map to any value,
455 /// then this returns `None`.
456 ///
457 /// # Examples
458 ///
459 /// ```rust
460 /// # #[cfg(feature = "doctest")]
461 /// # {
462 /// use sieve_cache::SieveCache;
463 ///
464 /// let mut cache = SieveCache::new(100).unwrap();
465 /// cache.insert("key1".to_string(), "value1".to_string());
466 /// cache.insert("key2".to_string(), "value2".to_string());
467 ///
468 /// // Remove an existing entry
469 /// let removed = cache.remove("key1");
470 /// assert_eq!(removed, Some("value1".to_string()));
471 ///
472 /// // Try to remove a non-existent entry
473 /// let removed = cache.remove("key1");
474 /// assert_eq!(removed, None);
475 ///
476 /// assert_eq!(cache.len(), 1); // Only one entry remains
477 /// # }
478 /// ```
479 pub fn remove<Q>(&mut self, key: &Q) -> Option<V>
480 where
481 K: Borrow<Q>,
482 Q: Eq + Hash + ?Sized,
483 {
484 // Find the node index
485 let idx = self.map.remove(key)?;
486
487 // If this is the last element, just remove it
488 if idx == self.nodes.len() - 1 {
489 return Some(self.nodes.pop().unwrap().value);
490 }
491
492 // Update hand if needed
493 if let Some(hand_idx) = self.hand {
494 if hand_idx == idx {
495 // Move hand to the previous node or wrap to end
496 self.hand = if idx > 0 {
497 Some(idx - 1)
498 } else {
499 Some(self.nodes.len() - 2)
500 };
501 } else if hand_idx == self.nodes.len() - 1 {
502 // If hand points to the last element (which will be moved to idx)
503 self.hand = Some(idx);
504 }
505 }
506
507 // Remove the node by replacing it with the last one and updating the map
508 let last_node = self.nodes.pop().unwrap();
509 let removed_value = if idx < self.nodes.len() {
510 // Only need to swap and update map if not the last element
511 let last_key = last_node.key.clone(); // Clone the key before moving
512 let removed_node = std::mem::replace(&mut self.nodes[idx], last_node);
513 self.map.insert(last_key, idx); // Update map for swapped node
514 removed_node.value
515 } else {
516 last_node.value
517 };
518
519 Some(removed_value)
520 }
521
522 /// Removes and returns a value from the cache that was not recently accessed.
523 ///
524 /// This method implements the SIEVE eviction algorithm:
525 /// 1. Starting from the "hand" pointer or the end of the vector, look for an entry
526 /// that hasn't been visited since the last scan
527 /// 2. Mark all visited entries as non-visited during the scan
528 /// 3. If a non-visited entry is found, evict it
529 /// 4. Update the hand to point to the previous node
530 ///
531 /// # Return Value
532 ///
533 /// If a suitable entry is found, it is removed from the cache and its value is returned.
534 /// If all entries have been recently accessed or the cache is empty, this returns `None`.
535 ///
536 /// # Note
537 ///
538 /// This method is automatically called by `insert` when the cache is at capacity,
539 /// but it can also be called manually to proactively free space.
540 ///
541 /// # Examples
542 ///
543 /// ```rust
544 /// # #[cfg(feature = "doctest")]
545 /// # {
546 /// use sieve_cache::SieveCache;
547 ///
548 /// let mut cache = SieveCache::new(3).unwrap();
549 /// cache.insert("key1".to_string(), "value1".to_string());
550 /// cache.insert("key2".to_string(), "value2".to_string());
551 /// cache.insert("key3".to_string(), "value3".to_string());
552 ///
553 /// // Access key1 and key2 to mark them as visited
554 /// cache.get("key1");
555 /// cache.get("key2");
556 ///
557 /// // key3 hasn't been accessed, so it should be evicted
558 /// let evicted = cache.evict();
559 /// assert!(evicted.is_some());
560 /// assert!(!cache.contains_key("key3")); // key3 was evicted
561 /// # }
562 /// ```
563 pub fn evict(&mut self) -> Option<V> {
564 if self.nodes.is_empty() {
565 return None;
566 }
567
568 // Start from the hand pointer or the end if no hand
569 let mut current_idx = self.hand.unwrap_or(self.nodes.len() - 1);
570 let start_idx = current_idx;
571
572 // Track whether we've wrapped around and whether we found a node to evict
573 let mut wrapped = false;
574 let mut found_idx = None;
575
576 // Scan for a non-visited entry
577 loop {
578 // If current node is not visited, mark it for eviction
579 if !self.nodes[current_idx].visited {
580 found_idx = Some(current_idx);
581 break;
582 }
583
584 // Mark as non-visited for next scan
585 self.nodes[current_idx].visited = false;
586
587 // Move to previous node or wrap to end
588 current_idx = if current_idx > 0 {
589 current_idx - 1
590 } else {
591 // Wrap around to end of vector
592 if wrapped {
593 // If we've already wrapped, break to avoid infinite loop
594 break;
595 }
596 wrapped = true;
597 self.nodes.len() - 1
598 };
599
600 // If we've looped back to start, we've checked all nodes
601 if current_idx == start_idx {
602 break;
603 }
604 }
605
606 // If we found a node to evict
607 if let Some(idx) = found_idx {
608 // Update the hand pointer to the previous node or wrap to end
609 self.hand = if idx > 0 {
610 Some(idx - 1)
611 } else if self.nodes.len() > 1 {
612 Some(self.nodes.len() - 2)
613 } else {
614 None
615 };
616
617 // Remove the key from the map
618 let key = &self.nodes[idx].key;
619 self.map.remove(key);
620
621 // Remove the node and return its value
622 if idx == self.nodes.len() - 1 {
623 // If last node, just pop it
624 return Some(self.nodes.pop().unwrap().value);
625 } else {
626 // Otherwise swap with the last node
627 let last_node = self.nodes.pop().unwrap();
628 let last_key = last_node.key.clone(); // Clone the key before moving
629 let removed_node = std::mem::replace(&mut self.nodes[idx], last_node);
630 // Update the map for the moved node
631 self.map.insert(last_key, idx);
632 return Some(removed_node.value);
633 }
634 }
635
636 None
637 }
638
639 /// Removes all entries from the cache.
640 ///
641 /// This operation clears all stored values and resets the cache to an empty state,
642 /// while maintaining the original capacity.
643 ///
644 /// # Examples
645 ///
646 /// ```rust
647 /// # #[cfg(feature = "doctest")]
648 /// # {
649 /// use sieve_cache::SieveCache;
650 ///
651 /// let mut cache = SieveCache::new(100).unwrap();
652 /// cache.insert("key1".to_string(), "value1".to_string());
653 /// cache.insert("key2".to_string(), "value2".to_string());
654 ///
655 /// assert_eq!(cache.len(), 2);
656 ///
657 /// cache.clear();
658 /// assert_eq!(cache.len(), 0);
659 /// assert!(cache.is_empty());
660 /// # }
661 /// ```
662 pub fn clear(&mut self) {
663 self.map.clear();
664 self.nodes.clear();
665 self.hand = None;
666 }
667
668 /// Returns an iterator over all keys in the cache.
669 ///
670 /// The order of keys is not specified and should not be relied upon.
671 ///
672 /// # Examples
673 ///
674 /// ```rust
675 /// # #[cfg(feature = "doctest")]
676 /// # {
677 /// use sieve_cache::SieveCache;
678 /// use std::collections::HashSet;
679 ///
680 /// let mut cache = SieveCache::new(100).unwrap();
681 /// cache.insert("key1".to_string(), "value1".to_string());
682 /// cache.insert("key2".to_string(), "value2".to_string());
683 ///
684 /// let keys: HashSet<_> = cache.keys().collect();
685 /// assert_eq!(keys.len(), 2);
686 /// assert!(keys.contains(&"key1".to_string()));
687 /// assert!(keys.contains(&"key2".to_string()));
688 /// # }
689 /// ```
690 pub fn keys(&self) -> impl Iterator<Item = &K> {
691 self.nodes.iter().map(|node| &node.key)
692 }
693
694 /// Returns an iterator over all values in the cache.
695 ///
696 /// The order of values is not specified and should not be relied upon.
697 ///
698 /// # Examples
699 ///
700 /// ```rust
701 /// # #[cfg(feature = "doctest")]
702 /// # {
703 /// use sieve_cache::SieveCache;
704 /// use std::collections::HashSet;
705 ///
706 /// let mut cache = SieveCache::new(100).unwrap();
707 /// cache.insert("key1".to_string(), "value1".to_string());
708 /// cache.insert("key2".to_string(), "value2".to_string());
709 ///
710 /// let values: HashSet<_> = cache.values().collect();
711 /// assert_eq!(values.len(), 2);
712 /// assert!(values.contains(&"value1".to_string()));
713 /// assert!(values.contains(&"value2".to_string()));
714 /// # }
715 /// ```
716 pub fn values(&self) -> impl Iterator<Item = &V> {
717 self.nodes.iter().map(|node| &node.value)
718 }
719
720 /// Returns an iterator over all mutable values in the cache.
721 ///
722 /// The order of values is not specified and should not be relied upon.
723 /// Note that iterating through this will mark all entries as visited.
724 ///
725 /// # Examples
726 ///
727 /// ```rust
728 /// # #[cfg(feature = "doctest")]
729 /// # {
730 /// use sieve_cache::SieveCache;
731 ///
732 /// let mut cache = SieveCache::new(100).unwrap();
733 /// cache.insert("key1".to_string(), "value1".to_string());
734 /// cache.insert("key2".to_string(), "value2".to_string());
735 ///
736 /// // Update all values by appending text
737 /// for value in cache.values_mut() {
738 /// *value = format!("{}_updated", value);
739 /// }
740 ///
741 /// assert_eq!(cache.get("key1"), Some(&"value1_updated".to_string()));
742 /// assert_eq!(cache.get("key2"), Some(&"value2_updated".to_string()));
743 /// # }
744 /// ```
745 pub fn values_mut(&mut self) -> impl Iterator<Item = &mut V> {
746 for node in &mut self.nodes {
747 node.visited = true;
748 }
749 self.nodes.iter_mut().map(|node| &mut node.value)
750 }
751
752 /// Returns an iterator over all key-value pairs in the cache.
753 ///
754 /// The order of pairs is not specified and should not be relied upon.
755 ///
756 /// # Examples
757 ///
758 /// ```rust
759 /// # #[cfg(feature = "doctest")]
760 /// # {
761 /// use sieve_cache::SieveCache;
762 /// use std::collections::HashMap;
763 ///
764 /// let mut cache = SieveCache::new(100).unwrap();
765 /// cache.insert("key1".to_string(), "value1".to_string());
766 /// cache.insert("key2".to_string(), "value2".to_string());
767 ///
768 /// let entries: HashMap<_, _> = cache.iter().collect();
769 /// assert_eq!(entries.len(), 2);
770 /// assert_eq!(entries.get(&"key1".to_string()), Some(&&"value1".to_string()));
771 /// assert_eq!(entries.get(&"key2".to_string()), Some(&&"value2".to_string()));
772 /// # }
773 /// ```
774 pub fn iter(&self) -> impl Iterator<Item = (&K, &V)> {
775 self.nodes.iter().map(|node| (&node.key, &node.value))
776 }
777
778 /// Returns an iterator over all key-value pairs in the cache, with mutable references to values.
779 ///
780 /// The order of pairs is not specified and should not be relied upon.
781 /// Note that iterating through this will mark all entries as visited.
782 ///
783 /// # Examples
784 ///
785 /// ```rust
786 /// # #[cfg(feature = "doctest")]
787 /// # {
788 /// use sieve_cache::SieveCache;
789 ///
790 /// let mut cache = SieveCache::new(100).unwrap();
791 /// cache.insert("key1".to_string(), "value1".to_string());
792 /// cache.insert("key2".to_string(), "value2".to_string());
793 ///
794 /// // Update all values associated with keys containing '1'
795 /// for (key, value) in cache.iter_mut() {
796 /// if key.contains('1') {
797 /// *value = format!("{}_special", value);
798 /// }
799 /// }
800 ///
801 /// assert_eq!(cache.get("key1"), Some(&"value1_special".to_string()));
802 /// assert_eq!(cache.get("key2"), Some(&"value2".to_string()));
803 /// # }
804 /// ```
805 pub fn iter_mut(&mut self) -> impl Iterator<Item = (&K, &mut V)> {
806 for node in &mut self.nodes {
807 node.visited = true;
808 }
809 self.nodes
810 .iter_mut()
811 .map(|node| (&node.key, &mut node.value))
812 }
813
814 /// Retains only the elements specified by the predicate.
815 ///
816 /// Removes all entries for which the provided function returns `false`.
817 /// The elements are visited in arbitrary, unspecified order.
818 ///
819 /// # Examples
820 ///
821 /// ```rust
822 /// # #[cfg(feature = "doctest")]
823 /// # {
824 /// use sieve_cache::SieveCache;
825 ///
826 /// let mut cache = SieveCache::new(100).unwrap();
827 /// cache.insert("key1".to_string(), 100);
828 /// cache.insert("key2".to_string(), 200);
829 /// cache.insert("key3".to_string(), 300);
830 ///
831 /// // Keep only entries with values greater than 150
832 /// cache.retain(|_, value| *value > 150);
833 ///
834 /// assert_eq!(cache.len(), 2);
835 /// assert!(!cache.contains_key("key1"));
836 /// assert!(cache.contains_key("key2"));
837 /// assert!(cache.contains_key("key3"));
838 /// # }
839 /// ```
840 pub fn retain<F>(&mut self, mut f: F)
841 where
842 F: FnMut(&K, &V) -> bool,
843 {
844 // Collect indices to remove
845 let mut to_remove = Vec::new();
846
847 for (i, node) in self.nodes.iter().enumerate() {
848 if !f(&node.key, &node.value) {
849 to_remove.push(i);
850 }
851 }
852
853 // Remove indices from highest to lowest to avoid invalidating other indices
854 to_remove.sort_unstable_by(|a, b| b.cmp(a));
855
856 for idx in to_remove {
857 // Remove from map
858 self.map.remove(&self.nodes[idx].key);
859
860 // If it's the last element, just pop it
861 if idx == self.nodes.len() - 1 {
862 self.nodes.pop();
863 } else {
864 // Replace with the last element
865 let last_idx = self.nodes.len() - 1;
866 // Use swap_remove which replaces the removed element with the last element
867 self.nodes.swap_remove(idx);
868 if idx < self.nodes.len() {
869 // Update map for the swapped node
870 self.map.insert(self.nodes[idx].key.clone(), idx);
871 }
872
873 // Update hand if needed
874 if let Some(hand_idx) = self.hand {
875 if hand_idx == idx {
876 // Hand was pointing to the removed node, move it to previous
877 self.hand = if idx > 0 {
878 Some(idx - 1)
879 } else if !self.nodes.is_empty() {
880 Some(self.nodes.len() - 1)
881 } else {
882 None
883 };
884 } else if hand_idx == last_idx {
885 // Hand was pointing to the last node that was moved
886 self.hand = Some(idx);
887 }
888 }
889 }
890 }
891 }
892
893 /// Returns a recommended cache capacity based on current utilization.
894 ///
895 /// This method analyzes the current cache utilization and recommends a new capacity based on:
896 /// - The fill ratio (how much of the capacity is actually being used)
897 /// - The number of entries with the 'visited' flag set
898 /// - A target utilization range
899 ///
900 /// The recommendation aims to keep the cache size optimal:
901 /// - If the cache is significantly underfilled (fill ratio < 10%), it suggests decreasing capacity
902 /// regardless of other factors to avoid wasting memory
903 /// - If many entries are frequently accessed (high utilization), it suggests increasing capacity
904 /// - If few entries are accessed frequently (low utilization), it suggests decreasing capacity
905 /// - Within a normal utilization range, it keeps the capacity stable
906 ///
907 /// # Arguments
908 ///
909 /// * `min_factor` - Minimum scaling factor (e.g., 0.5 means never recommend less than 50% of current capacity)
910 /// * `max_factor` - Maximum scaling factor (e.g., 2.0 means never recommend more than 200% of current capacity)
911 /// * `low_threshold` - Utilization threshold below which capacity is reduced (e.g., 0.3 means 30% utilization)
912 /// * `high_threshold` - Utilization threshold above which capacity is increased (e.g., 0.7 means 70% utilization)
913 ///
914 /// # Examples
915 ///
916 /// ```rust
917 /// # use sieve_cache::SieveCache;
918 /// #
919 /// # fn main() {
920 /// # let mut cache = SieveCache::<String, i32>::new(100).unwrap();
921 /// #
922 /// # // Add items to the cache
923 /// # for i in 0..80 {
924 /// # cache.insert(i.to_string(), i);
925 /// #
926 /// # // Accessing some items to mark them as visited
927 /// # if i % 2 == 0 {
928 /// # cache.get(&i.to_string());
929 /// # }
930 /// # }
931 /// #
932 /// // Get a recommended capacity with default parameters
933 /// let recommended = cache.recommended_capacity(0.5, 2.0, 0.3, 0.7);
934 /// println!("Recommended capacity: {}", recommended);
935 /// # }
936 /// ```
937 ///
938 /// # Default Recommendation Parameters
939 ///
940 /// If you're unsure what parameters to use, these values provide a reasonable starting point:
941 /// - `min_factor`: 0.5 (never recommend less than half current capacity)
942 /// - `max_factor`: 2.0 (never recommend more than twice current capacity)
943 /// - `low_threshold`: 0.3 (reduce capacity if utilization below 30%)
944 /// - `high_threshold`: 0.7 (increase capacity if utilization above 70%)
945 pub fn recommended_capacity(
946 &self,
947 min_factor: f64,
948 max_factor: f64,
949 low_threshold: f64,
950 high_threshold: f64,
951 ) -> usize {
952 // If the cache is empty, return the current capacity
953 if self.nodes.is_empty() {
954 return self.capacity;
955 }
956
957 // Count entries with visited flag set
958 let visited_count = self.nodes.iter().filter(|node| node.visited).count();
959
960 // Calculate the utilization ratio (visited entries / total entries)
961 let utilization_ratio = visited_count as f64 / self.nodes.len() as f64;
962
963 // Calculate fill ratio (total entries / capacity)
964 let fill_ratio = self.nodes.len() as f64 / self.capacity as f64;
965
966 // Low fill ratio threshold (consider the cache underfilled below this)
967 let low_fill_threshold = 0.1; // 10% filled
968
969 // Fill ratio takes precedence over utilization:
970 // If the cache is severely underfilled, we should decrease capacity
971 // regardless of utilization
972 if fill_ratio < low_fill_threshold {
973 // Calculate how much to decrease based on how empty the cache is
974 let fill_below_threshold = if fill_ratio > 0.0 {
975 (low_fill_threshold - fill_ratio) / low_fill_threshold
976 } else {
977 1.0
978 };
979 // Apply the min_factor as a floor
980 let scaling_factor = 1.0 - (1.0 - min_factor) * fill_below_threshold;
981
982 // Apply the scaling factor to current capacity and ensure it's at least 1
983 return std::cmp::max(1, (self.capacity as f64 * scaling_factor).round() as usize);
984 }
985
986 // For normal fill levels, use the original logic based on utilization
987 let scaling_factor = if utilization_ratio >= high_threshold {
988 // High utilization - recommend increasing the capacity
989 // Scale between 1.0 and max_factor based on utilization above the high threshold
990 let utilization_above_threshold =
991 (utilization_ratio - high_threshold) / (1.0 - high_threshold);
992 1.0 + (max_factor - 1.0) * utilization_above_threshold
993 } else if utilization_ratio <= low_threshold {
994 // Low utilization - recommend decreasing capacity
995 // Scale between min_factor and 1.0 based on how far below the low threshold
996 let utilization_below_threshold = (low_threshold - utilization_ratio) / low_threshold;
997 1.0 - (1.0 - min_factor) * utilization_below_threshold
998 } else {
999 // Normal utilization - keep current capacity
1000 1.0
1001 };
1002
1003 // Apply the scaling factor to current capacity and ensure it's at least 1
1004 std::cmp::max(1, (self.capacity as f64 * scaling_factor).round() as usize)
1005 }
1006}
1007
1008#[test]
1009fn test() {
1010 let mut cache = SieveCache::new(3).unwrap();
1011 assert!(cache.insert("foo".to_string(), "foocontent".to_string()));
1012 assert!(cache.insert("bar".to_string(), "barcontent".to_string()));
1013 cache.remove("bar");
1014 assert!(cache.insert("bar2".to_string(), "bar2content".to_string()));
1015 assert!(cache.insert("bar3".to_string(), "bar3content".to_string()));
1016 assert_eq!(cache.get("foo"), Some(&"foocontent".to_string()));
1017 assert_eq!(cache.get("bar"), None);
1018 assert_eq!(cache.get("bar2"), Some(&"bar2content".to_string()));
1019 assert_eq!(cache.get("bar3"), Some(&"bar3content".to_string()));
1020}
1021
1022#[test]
1023fn test_visited_flag_update() {
1024 let mut cache = SieveCache::new(2).unwrap();
1025 cache.insert("key1".to_string(), "value1".to_string());
1026 cache.insert("key2".to_string(), "value2".to_string());
1027 // update `key1` entry.
1028 cache.insert("key1".to_string(), "updated".to_string());
1029 // new entry is added.
1030 cache.insert("key3".to_string(), "value3".to_string());
1031 assert_eq!(cache.get("key1"), Some(&"updated".to_string()));
1032}
1033
1034#[test]
1035fn test_clear() {
1036 let mut cache = SieveCache::new(10).unwrap();
1037 cache.insert("key1".to_string(), "value1".to_string());
1038 cache.insert("key2".to_string(), "value2".to_string());
1039
1040 assert_eq!(cache.len(), 2);
1041 assert!(!cache.is_empty());
1042
1043 cache.clear();
1044
1045 assert_eq!(cache.len(), 0);
1046 assert!(cache.is_empty());
1047 assert_eq!(cache.get("key1"), None);
1048 assert_eq!(cache.get("key2"), None);
1049}
1050
1051#[test]
1052fn test_iterators() {
1053 let mut cache = SieveCache::new(10).unwrap();
1054 cache.insert("key1".to_string(), "value1".to_string());
1055 cache.insert("key2".to_string(), "value2".to_string());
1056
1057 // Test keys iterator
1058 let keys: Vec<&String> = cache.keys().collect();
1059 assert_eq!(keys.len(), 2);
1060 assert!(keys.contains(&&"key1".to_string()));
1061 assert!(keys.contains(&&"key2".to_string()));
1062
1063 // Test values iterator
1064 let values: Vec<&String> = cache.values().collect();
1065 assert_eq!(values.len(), 2);
1066 assert!(values.contains(&&"value1".to_string()));
1067 assert!(values.contains(&&"value2".to_string()));
1068
1069 // Test values_mut iterator
1070 for value in cache.values_mut() {
1071 *value = format!("{}_updated", value);
1072 }
1073
1074 assert_eq!(cache.get("key1"), Some(&"value1_updated".to_string()));
1075 assert_eq!(cache.get("key2"), Some(&"value2_updated".to_string()));
1076
1077 // Test key-value iterator
1078 let entries: Vec<(&String, &String)> = cache.iter().collect();
1079 assert_eq!(entries.len(), 2);
1080
1081 // Test key-value mutable iterator
1082 for (key, value) in cache.iter_mut() {
1083 if key == "key1" {
1084 *value = format!("{}_special", value);
1085 }
1086 }
1087
1088 assert_eq!(
1089 cache.get("key1"),
1090 Some(&"value1_updated_special".to_string())
1091 );
1092 assert_eq!(cache.get("key2"), Some(&"value2_updated".to_string()));
1093}
1094
1095#[test]
1096fn test_retain() {
1097 let mut cache = SieveCache::new(10).unwrap();
1098
1099 // Add some entries
1100 cache.insert("even1".to_string(), 2);
1101 cache.insert("even2".to_string(), 4);
1102 cache.insert("odd1".to_string(), 1);
1103 cache.insert("odd2".to_string(), 3);
1104
1105 assert_eq!(cache.len(), 4);
1106
1107 // Keep only entries with even values
1108 cache.retain(|_, v| v % 2 == 0);
1109
1110 assert_eq!(cache.len(), 2);
1111 assert!(cache.contains_key("even1"));
1112 assert!(cache.contains_key("even2"));
1113 assert!(!cache.contains_key("odd1"));
1114 assert!(!cache.contains_key("odd2"));
1115
1116 // Keep only entries with keys containing '1'
1117 cache.retain(|k, _| k.contains('1'));
1118
1119 assert_eq!(cache.len(), 1);
1120 assert!(cache.contains_key("even1"));
1121 assert!(!cache.contains_key("even2"));
1122}
1123
1124#[test]
1125fn test_recommended_capacity() {
1126 // Test case 1: Empty cache - should return current capacity
1127 let cache = SieveCache::<String, u32>::new(100).unwrap();
1128 assert_eq!(cache.recommended_capacity(0.5, 2.0, 0.3, 0.7), 100);
1129
1130 // Test case 2: Low utilization (few visited nodes)
1131 let mut cache = SieveCache::new(100).unwrap();
1132 // Fill the cache first without marking anything as visited
1133 for i in 0..90 {
1134 cache.insert(i.to_string(), i);
1135 }
1136
1137 // Now mark only a tiny fraction as visited
1138 for i in 0..5 {
1139 cache.get(&i.to_string()); // Only 5% visited
1140 }
1141
1142 // With very low utilization and high fill, should recommend decreasing capacity
1143 let recommended = cache.recommended_capacity(0.5, 2.0, 0.1, 0.7); // Lower threshold to ensure test passes
1144 assert!(recommended < 100);
1145 assert!(recommended >= 50); // Should not go below min_factor
1146
1147 // Test case 3: High utilization (many visited nodes)
1148 let mut cache = SieveCache::new(100).unwrap();
1149 for i in 0..90 {
1150 cache.insert(i.to_string(), i);
1151 // Mark 80% as visited
1152 if i % 10 != 0 {
1153 cache.get(&i.to_string());
1154 }
1155 }
1156 // With 80% utilization, should recommend increasing capacity
1157 let recommended = cache.recommended_capacity(0.5, 2.0, 0.3, 0.7);
1158 assert!(recommended > 100);
1159 assert!(recommended <= 200); // Should not go above max_factor
1160
1161 // Test case 4: Normal utilization (should keep capacity the same)
1162 let mut cache = SieveCache::new(100).unwrap();
1163 for i in 0..90 {
1164 cache.insert(i.to_string(), i);
1165 // Mark 50% as visited
1166 if i % 2 == 0 {
1167 cache.get(&i.to_string());
1168 }
1169 }
1170 // With 50% utilization (between thresholds), capacity should be fairly stable
1171 let recommended = cache.recommended_capacity(0.5, 2.0, 0.3, 0.7);
1172 assert!(
1173 recommended >= 95,
1174 "With normal utilization, capacity should be close to original"
1175 );
1176 assert!(
1177 recommended <= 100,
1178 "With normal utilization, capacity should not exceed original"
1179 );
1180
1181 // Test case 5: Low fill ratio (few entries relative to capacity)
1182 let mut cache = SieveCache::new(2000).unwrap();
1183 // Add only a few entries (5% of capacity)
1184 for i in 0..100 {
1185 cache.insert(i.to_string(), i);
1186 // Mark all as visited to simulate high hit rate
1187 cache.get(&i.to_string());
1188 }
1189
1190 // Even though utilization is high (100% visited), the fill ratio is very low (5%)
1191 // so it should still recommend decreasing capacity
1192 let recommended = cache.recommended_capacity(0.5, 2.0, 0.3, 0.7);
1193 assert!(
1194 recommended < 2000,
1195 "With low fill ratio, capacity should be decreased despite high hit rate"
1196 );
1197 assert!(
1198 recommended >= 1000, // min_factor = 0.5
1199 "Capacity should not go below min_factor of current capacity"
1200 );
1201}
1202
1203#[test]
1204fn insert_never_exceeds_capacity_when_all_visited() {
1205 let mut c = SieveCache::new(2).unwrap();
1206 c.insert("a".to_string(), 1);
1207 c.insert("b".to_string(), 2);
1208 // Mark all visited
1209 assert!(c.get("a").is_some());
1210 assert!(c.get("b").is_some());
1211 // This would exceed capacity
1212 c.insert("c".to_string(), 3);
1213 // This is our an invariant
1214 assert!(c.len() <= c.capacity());
1215}