sieve_cache/lib.rs
1#![doc = include_str!("../README.md")]
2
3//! # SIEVE Cache for Rust
4//!
5//! This crate provides implementations of the SIEVE cache replacement algorithm:
6//!
7//! * [`SieveCache`] - The core single-threaded implementation (always available)
8
9#[cfg(feature = "sync")]
10pub mod _docs_sync {}
11
12#[cfg(feature = "sharded")]
13pub mod _docs_sharded {}
14
15pub mod _sieve_algorithm {
16 //! ## The SIEVE Algorithm
17 //!
18 //! SIEVE (Simple, space-efficient, In-memory, EViction mEchanism) is a cache eviction
19 //! algorithm that maintains a single bit per entry to track whether an item has been
20 //! "visited" since it was last considered for eviction. This approach requires less
21 //! state than LRU but achieves excellent performance, especially on skewed workloads.
22}
23
24pub mod _cache_implementation {
25 //! ## Cache Implementation Details
26 //!
27 //! The cache is implemented as a combination of:
28 //!
29 //! 1. A `HashMap` for O(1) key lookups
30 //! 2. A vector-based ordered collection for managing the eviction order
31 //! 3. A "visited" flag on each entry to track recent access
32 //!
33 //! When the cache is full and a new item is inserted, the eviction algorithm:
34 //! 1. Starts from the "hand" position (eviction candidate)
35 //! 2. Finds the first non-visited entry, evicting it
36 //! 3. Marks all visited entries as non-visited while searching
37}
38
39pub mod _implementation_choice {
40 //! ## Choosing the Right Implementation
41 //!
42 //! - Use [`SieveCache`] for single-threaded applications
43}
44
45/// [DOCTEST_ONLY]
46///
47/// ```rust
48/// # #[cfg(feature = "sync")]
49/// # fn sync_example() {
50/// # use sieve_cache::SyncSieveCache;
51/// # let cache = SyncSieveCache::<String, u32>::new(1000).unwrap();
52/// # let recommended = cache.recommended_capacity(0.5, 2.0, 0.3, 0.7);
53/// # println!("Recommended capacity: {}", recommended);
54/// # }
55/// #
56/// # #[cfg(feature = "sharded")]
57/// # fn sharded_example() {
58/// # use sieve_cache::ShardedSieveCache;
59/// # let cache = ShardedSieveCache::<String, u32>::new(1000).unwrap();
60/// # let recommended = cache.recommended_capacity(0.5, 2.0, 0.3, 0.7);
61/// # println!("Recommended capacity: {}", recommended);
62/// # }
63/// ```
64fn _readme_examples_doctest() {
65 // This function exists only to host the doctest above
66 // This ensures doctests from the README.md can be validated
67}
68
69#[cfg(feature = "sync")]
70pub mod _docs_sync_usage {
71 //! - Use [`SyncSieveCache`] for multi-threaded applications with moderate concurrency
72}
73
74#[cfg(feature = "sharded")]
75pub mod _docs_sharded_usage {
76 //! - Use [`ShardedSieveCache`] for applications with high concurrency where operations
77 //! are distributed across many different keys
78}
79
80use std::borrow::Borrow;
81use std::collections::HashMap;
82use std::hash::Hash;
83
84#[cfg(feature = "sharded")]
85mod sharded;
86#[cfg(feature = "sync")]
87mod sync;
88
89#[cfg(feature = "sharded")]
90pub use sharded::ShardedSieveCache;
91#[cfg(feature = "sync")]
92pub use sync::SyncSieveCache;
93
94/// Internal representation of a cache entry
95#[derive(Clone)]
96struct Node<K: Eq + Hash + Clone, V> {
97 key: K,
98 value: V,
99 visited: bool,
100}
101
102impl<K: Eq + Hash + Clone, V> Node<K, V> {
103 fn new(key: K, value: V) -> Self {
104 Self {
105 key,
106 value,
107 visited: false,
108 }
109 }
110}
111
112/// A cache based on the SIEVE eviction algorithm.
113///
114/// `SieveCache` provides an efficient in-memory cache with a carefully designed eviction
115/// strategy that balances simplicity with good performance characteristics, especially on
116/// skewed workloads common in real-world applications.
117///
118/// This is the single-threaded implementation.
119#[cfg(feature = "sync")]
120/// For thread-safe variants, see [`SyncSieveCache`] (with the `sync` feature)
121#[cfg(feature = "sharded")]
122/// and [`ShardedSieveCache`] (with the `sharded` feature).
123///
124/// # Type Parameters
125///
126/// * `K` - The key type, which must implement `Eq`, `Hash`, and `Clone`
127/// * `V` - The value type, with no constraints
128///
129/// # Example
130///
131/// ```rust
132/// # #[cfg(feature = "doctest")]
133/// # {
134/// use sieve_cache::SieveCache;
135///
136/// // Create a new cache with capacity for 1000 items
137/// let mut cache = SieveCache::new(1000).unwrap();
138///
139/// // Insert some values
140/// cache.insert("key1".to_string(), "value1".to_string());
141/// cache.insert("key2".to_string(), "value2".to_string());
142///
143/// // Retrieve values - returns references to the values
144/// assert_eq!(cache.get("key1"), Some(&"value1".to_string()));
145///
146/// // Check if the cache contains a key
147/// assert!(cache.contains_key("key1"));
148/// assert!(!cache.contains_key("missing_key"));
149///
150/// // Get a mutable reference to modify a value
151/// if let Some(value) = cache.get_mut("key1") {
152/// *value = "modified".to_string();
153/// }
154///
155/// // Verify the modification
156/// assert_eq!(cache.get("key1"), Some(&"modified".to_string()));
157///
158/// // Remove a value
159/// let removed = cache.remove("key2");
160/// assert_eq!(removed, Some("value2".to_string()));
161/// # }
162/// ```
163pub struct SieveCache<K: Eq + Hash + Clone, V> {
164 /// Map of keys to indices in the nodes vector
165 map: HashMap<K, usize>,
166 /// Vector of all cache nodes
167 nodes: Vec<Node<K, V>>,
168 /// Index to the "hand" pointer used by the SIEVE algorithm for eviction
169 hand: Option<usize>,
170 /// Maximum number of entries the cache can hold
171 capacity: usize,
172}
173
174impl<K: Eq + Hash + Clone, V> SieveCache<K, V> {
175 /// Creates a new cache with the given capacity.
176 ///
177 /// The capacity represents the maximum number of key-value pairs
178 /// that can be stored in the cache. When this limit is reached,
179 /// the cache will use the SIEVE algorithm to evict entries.
180 ///
181 /// # Errors
182 ///
183 /// Returns an error if the capacity is zero.
184 ///
185 /// # Examples
186 ///
187 /// ```rust
188 /// # #[cfg(feature = "doctest")]
189 /// # {
190 /// use sieve_cache::SieveCache;
191 ///
192 /// // Create a cache with space for 100 entries
193 /// let cache: SieveCache<String, u32> = SieveCache::new(100).unwrap();
194 ///
195 /// // Capacity of zero is invalid
196 /// let invalid = SieveCache::<String, u32>::new(0);
197 /// assert!(invalid.is_err());
198 /// # }
199 /// ```
200 pub fn new(capacity: usize) -> Result<Self, &'static str> {
201 if capacity == 0 {
202 return Err("capacity must be greater than 0");
203 }
204 Ok(Self {
205 map: HashMap::with_capacity(capacity),
206 nodes: Vec::with_capacity(capacity),
207 hand: None,
208 capacity,
209 })
210 }
211
212 /// Returns the capacity of the cache.
213 ///
214 /// This is the maximum number of entries that can be stored before
215 /// the cache begins evicting old entries.
216 ///
217 /// # Examples
218 ///
219 /// ```rust
220 /// # #[cfg(feature = "doctest")]
221 /// # {
222 /// use sieve_cache::SieveCache;
223 ///
224 /// let cache = SieveCache::<String, u32>::new(100).unwrap();
225 /// assert_eq!(cache.capacity(), 100);
226 /// # }
227 /// ```
228 #[inline]
229 pub fn capacity(&self) -> usize {
230 self.capacity
231 }
232
233 /// Returns the number of cached values.
234 ///
235 /// This value will never exceed the capacity.
236 ///
237 /// # Examples
238 ///
239 /// ```rust
240 /// # #[cfg(feature = "doctest")]
241 /// # {
242 /// use sieve_cache::SieveCache;
243 ///
244 /// let mut cache = SieveCache::new(100).unwrap();
245 /// assert_eq!(cache.len(), 0);
246 ///
247 /// cache.insert("key".to_string(), "value".to_string());
248 /// assert_eq!(cache.len(), 1);
249 /// # }
250 /// ```
251 #[inline]
252 pub fn len(&self) -> usize {
253 self.nodes.len()
254 }
255
256 /// Returns `true` when no values are currently cached.
257 ///
258 /// # Examples
259 ///
260 /// ```rust
261 /// # #[cfg(feature = "doctest")]
262 /// # {
263 /// use sieve_cache::SieveCache;
264 ///
265 /// let mut cache = SieveCache::<String, u32>::new(100).unwrap();
266 /// assert!(cache.is_empty());
267 ///
268 /// cache.insert("key".to_string(), "value".to_string());
269 /// assert!(!cache.is_empty());
270 ///
271 /// cache.remove("key");
272 /// assert!(cache.is_empty());
273 /// # }
274 /// ```
275 #[inline]
276 pub fn is_empty(&self) -> bool {
277 self.nodes.is_empty()
278 }
279
280 /// Returns `true` if there is a value in the cache mapped to by `key`.
281 ///
282 /// This operation does not count as an access for the SIEVE algorithm's
283 /// visited flag.
284 ///
285 /// # Examples
286 ///
287 /// ```rust
288 /// # #[cfg(feature = "doctest")]
289 /// # {
290 /// use sieve_cache::SieveCache;
291 ///
292 /// let mut cache = SieveCache::new(100).unwrap();
293 /// cache.insert("key".to_string(), "value".to_string());
294 ///
295 /// assert!(cache.contains_key("key"));
296 /// assert!(!cache.contains_key("missing"));
297 /// # }
298 /// ```
299 #[inline]
300 pub fn contains_key<Q>(&self, key: &Q) -> bool
301 where
302 Q: Hash + Eq + ?Sized,
303 K: Borrow<Q>,
304 {
305 self.map.contains_key(key)
306 }
307
308 /// Gets an immutable reference to the value in the cache mapped to by `key`.
309 ///
310 /// If no value exists for `key`, this returns `None`.
311 ///
312 /// This operation marks the entry as "visited" in the SIEVE algorithm,
313 /// which affects eviction decisions.
314 ///
315 /// # Examples
316 ///
317 /// ```rust
318 /// # #[cfg(feature = "doctest")]
319 /// # {
320 /// use sieve_cache::SieveCache;
321 ///
322 /// let mut cache = SieveCache::new(100).unwrap();
323 /// cache.insert("key".to_string(), "value".to_string());
324 ///
325 /// assert_eq!(cache.get("key"), Some(&"value".to_string()));
326 /// assert_eq!(cache.get("missing"), None);
327 /// # }
328 /// ```
329 pub fn get<Q>(&mut self, key: &Q) -> Option<&V>
330 where
331 Q: Hash + Eq + ?Sized,
332 K: Borrow<Q>,
333 {
334 if let Some(&idx) = self.map.get(key) {
335 // Mark as visited for the SIEVE algorithm
336 self.nodes[idx].visited = true;
337 Some(&self.nodes[idx].value)
338 } else {
339 None
340 }
341 }
342
343 /// Gets a mutable reference to the value in the cache mapped to by `key`.
344 ///
345 /// If no value exists for `key`, this returns `None`.
346 ///
347 /// This operation marks the entry as "visited" in the SIEVE algorithm,
348 /// which affects eviction decisions.
349 ///
350 /// # Examples
351 ///
352 /// ```rust
353 /// # #[cfg(feature = "doctest")]
354 /// # {
355 /// use sieve_cache::SieveCache;
356 ///
357 /// let mut cache = SieveCache::new(100).unwrap();
358 /// cache.insert("key".to_string(), "value".to_string());
359 ///
360 /// // Modify the value in-place
361 /// if let Some(value) = cache.get_mut("key") {
362 /// *value = "new_value".to_string();
363 /// }
364 ///
365 /// assert_eq!(cache.get("key"), Some(&"new_value".to_string()));
366 /// # }
367 /// ```
368 pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut V>
369 where
370 Q: Hash + Eq + ?Sized,
371 K: Borrow<Q>,
372 {
373 if let Some(&idx) = self.map.get(key) {
374 // Mark as visited for the SIEVE algorithm
375 self.nodes[idx].visited = true;
376 Some(&mut self.nodes[idx].value)
377 } else {
378 None
379 }
380 }
381
382 /// Maps `key` to `value` in the cache, possibly evicting old entries.
383 ///
384 /// If the key already exists, its value is updated and the entry is marked as visited.
385 /// If the key doesn't exist and the cache is at capacity, the SIEVE algorithm is used
386 /// to evict an entry before the new key-value pair is inserted.
387 ///
388 /// # Return Value
389 ///
390 /// Returns `true` when this is a new entry, and `false` if an existing entry was updated.
391 ///
392 /// # Examples
393 ///
394 /// ```rust
395 /// # #[cfg(feature = "doctest")]
396 /// # {
397 /// use sieve_cache::SieveCache;
398 ///
399 /// let mut cache = SieveCache::new(100).unwrap();
400 ///
401 /// // Insert a new entry
402 /// let is_new = cache.insert("key1".to_string(), "value1".to_string());
403 /// assert!(is_new); // Returns true for new entries
404 ///
405 /// // Update an existing entry
406 /// let is_new = cache.insert("key1".to_string(), "updated".to_string());
407 /// assert!(!is_new); // Returns false for updates
408 ///
409 /// assert_eq!(cache.get("key1"), Some(&"updated".to_string()));
410 /// # }
411 /// ```
412 pub fn insert(&mut self, key: K, value: V) -> bool {
413 // Check if key already exists
414 if let Some(&idx) = self.map.get(&key) {
415 // Update existing entry
416 self.nodes[idx].visited = true;
417 self.nodes[idx].value = value;
418 return false;
419 }
420
421 // Evict if at capacity
422 if self.nodes.len() >= self.capacity {
423 self.evict();
424 }
425
426 // Add new node to the front
427 let node = Node::new(key.clone(), value);
428 self.nodes.push(node);
429 let idx = self.nodes.len() - 1;
430 self.map.insert(key, idx);
431 true
432 }
433
434 /// Removes the cache entry mapped to by `key`.
435 ///
436 /// This method explicitly removes an entry from the cache regardless of its
437 /// "visited" status.
438 ///
439 /// # Return Value
440 ///
441 /// Returns the value removed from the cache. If `key` did not map to any value,
442 /// then this returns `None`.
443 ///
444 /// # Examples
445 ///
446 /// ```rust
447 /// # #[cfg(feature = "doctest")]
448 /// # {
449 /// use sieve_cache::SieveCache;
450 ///
451 /// let mut cache = SieveCache::new(100).unwrap();
452 /// cache.insert("key1".to_string(), "value1".to_string());
453 /// cache.insert("key2".to_string(), "value2".to_string());
454 ///
455 /// // Remove an existing entry
456 /// let removed = cache.remove("key1");
457 /// assert_eq!(removed, Some("value1".to_string()));
458 ///
459 /// // Try to remove a non-existent entry
460 /// let removed = cache.remove("key1");
461 /// assert_eq!(removed, None);
462 ///
463 /// assert_eq!(cache.len(), 1); // Only one entry remains
464 /// # }
465 /// ```
466 pub fn remove<Q>(&mut self, key: &Q) -> Option<V>
467 where
468 K: Borrow<Q>,
469 Q: Eq + Hash + ?Sized,
470 {
471 // Find the node index
472 let idx = self.map.remove(key)?;
473
474 // If this is the last element, just remove it
475 if idx == self.nodes.len() - 1 {
476 return Some(self.nodes.pop().unwrap().value);
477 }
478
479 // Update hand if needed
480 if let Some(hand_idx) = self.hand {
481 if hand_idx == idx {
482 // Move hand to the previous node or wrap to end
483 self.hand = if idx > 0 {
484 Some(idx - 1)
485 } else {
486 Some(self.nodes.len() - 2)
487 };
488 } else if hand_idx == self.nodes.len() - 1 {
489 // If hand points to the last element (which will be moved to idx)
490 self.hand = Some(idx);
491 }
492 }
493
494 // Remove the node by replacing it with the last one and updating the map
495 let last_node = self.nodes.pop().unwrap();
496 let removed_value = if idx < self.nodes.len() {
497 // Only need to swap and update map if not the last element
498 let last_key = last_node.key.clone(); // Clone the key before moving
499 let removed_node = std::mem::replace(&mut self.nodes[idx], last_node);
500 self.map.insert(last_key, idx); // Update map for swapped node
501 removed_node.value
502 } else {
503 last_node.value
504 };
505
506 Some(removed_value)
507 }
508
509 /// Removes and returns a value from the cache that was not recently accessed.
510 ///
511 /// This method implements the SIEVE eviction algorithm:
512 /// 1. Starting from the "hand" pointer or the end of the vector, look for an entry
513 /// that hasn't been visited since the last scan
514 /// 2. Mark all visited entries as non-visited during the scan
515 /// 3. If a non-visited entry is found, evict it
516 /// 4. Update the hand to point to the previous node
517 ///
518 /// # Return Value
519 ///
520 /// If a suitable entry is found, it is removed from the cache and its value is returned.
521 /// If all entries have been recently accessed or the cache is empty, this returns `None`.
522 ///
523 /// # Note
524 ///
525 /// This method is automatically called by `insert` when the cache is at capacity,
526 /// but it can also be called manually to proactively free space.
527 ///
528 /// # Examples
529 ///
530 /// ```rust
531 /// # #[cfg(feature = "doctest")]
532 /// # {
533 /// use sieve_cache::SieveCache;
534 ///
535 /// let mut cache = SieveCache::new(3).unwrap();
536 /// cache.insert("key1".to_string(), "value1".to_string());
537 /// cache.insert("key2".to_string(), "value2".to_string());
538 /// cache.insert("key3".to_string(), "value3".to_string());
539 ///
540 /// // Access key1 and key2 to mark them as visited
541 /// cache.get("key1");
542 /// cache.get("key2");
543 ///
544 /// // key3 hasn't been accessed, so it should be evicted
545 /// let evicted = cache.evict();
546 /// assert!(evicted.is_some());
547 /// assert!(!cache.contains_key("key3")); // key3 was evicted
548 /// # }
549 /// ```
550 pub fn evict(&mut self) -> Option<V> {
551 if self.nodes.is_empty() {
552 return None;
553 }
554
555 // Start from the hand pointer or the end if no hand
556 let mut current_idx = self.hand.unwrap_or(self.nodes.len() - 1);
557 let start_idx = current_idx;
558
559 // Track whether we've wrapped around and whether we found a node to evict
560 let mut wrapped = false;
561 let mut found_idx = None;
562
563 // Scan for a non-visited entry
564 loop {
565 // If current node is not visited, mark it for eviction
566 if !self.nodes[current_idx].visited {
567 found_idx = Some(current_idx);
568 break;
569 }
570
571 // Mark as non-visited for next scan
572 self.nodes[current_idx].visited = false;
573
574 // Move to previous node or wrap to end
575 current_idx = if current_idx > 0 {
576 current_idx - 1
577 } else {
578 // Wrap around to end of vector
579 if wrapped {
580 // If we've already wrapped, break to avoid infinite loop
581 break;
582 }
583 wrapped = true;
584 self.nodes.len() - 1
585 };
586
587 // If we've looped back to start, we've checked all nodes
588 if current_idx == start_idx {
589 break;
590 }
591 }
592
593 // If we found a node to evict
594 if let Some(idx) = found_idx {
595 // Update the hand pointer to the previous node or wrap to end
596 self.hand = if idx > 0 {
597 Some(idx - 1)
598 } else if self.nodes.len() > 1 {
599 Some(self.nodes.len() - 2)
600 } else {
601 None
602 };
603
604 // Remove the key from the map
605 let key = &self.nodes[idx].key;
606 self.map.remove(key);
607
608 // Remove the node and return its value
609 if idx == self.nodes.len() - 1 {
610 // If last node, just pop it
611 return Some(self.nodes.pop().unwrap().value);
612 } else {
613 // Otherwise swap with the last node
614 let last_node = self.nodes.pop().unwrap();
615 let last_key = last_node.key.clone(); // Clone the key before moving
616 let removed_node = std::mem::replace(&mut self.nodes[idx], last_node);
617 // Update the map for the moved node
618 self.map.insert(last_key, idx);
619 return Some(removed_node.value);
620 }
621 }
622
623 None
624 }
625
626 /// Removes all entries from the cache.
627 ///
628 /// This operation clears all stored values and resets the cache to an empty state,
629 /// while maintaining the original capacity.
630 ///
631 /// # Examples
632 ///
633 /// ```rust
634 /// # #[cfg(feature = "doctest")]
635 /// # {
636 /// use sieve_cache::SieveCache;
637 ///
638 /// let mut cache = SieveCache::new(100).unwrap();
639 /// cache.insert("key1".to_string(), "value1".to_string());
640 /// cache.insert("key2".to_string(), "value2".to_string());
641 ///
642 /// assert_eq!(cache.len(), 2);
643 ///
644 /// cache.clear();
645 /// assert_eq!(cache.len(), 0);
646 /// assert!(cache.is_empty());
647 /// # }
648 /// ```
649 pub fn clear(&mut self) {
650 self.map.clear();
651 self.nodes.clear();
652 self.hand = None;
653 }
654
655 /// Returns an iterator over all keys in the cache.
656 ///
657 /// The order of keys is not specified and should not be relied upon.
658 ///
659 /// # Examples
660 ///
661 /// ```rust
662 /// # #[cfg(feature = "doctest")]
663 /// # {
664 /// use sieve_cache::SieveCache;
665 /// use std::collections::HashSet;
666 ///
667 /// let mut cache = SieveCache::new(100).unwrap();
668 /// cache.insert("key1".to_string(), "value1".to_string());
669 /// cache.insert("key2".to_string(), "value2".to_string());
670 ///
671 /// let keys: HashSet<_> = cache.keys().collect();
672 /// assert_eq!(keys.len(), 2);
673 /// assert!(keys.contains(&"key1".to_string()));
674 /// assert!(keys.contains(&"key2".to_string()));
675 /// # }
676 /// ```
677 pub fn keys(&self) -> impl Iterator<Item = &K> {
678 self.nodes.iter().map(|node| &node.key)
679 }
680
681 /// Returns an iterator over all values in the cache.
682 ///
683 /// The order of values is not specified and should not be relied upon.
684 ///
685 /// # Examples
686 ///
687 /// ```rust
688 /// # #[cfg(feature = "doctest")]
689 /// # {
690 /// use sieve_cache::SieveCache;
691 /// use std::collections::HashSet;
692 ///
693 /// let mut cache = SieveCache::new(100).unwrap();
694 /// cache.insert("key1".to_string(), "value1".to_string());
695 /// cache.insert("key2".to_string(), "value2".to_string());
696 ///
697 /// let values: HashSet<_> = cache.values().collect();
698 /// assert_eq!(values.len(), 2);
699 /// assert!(values.contains(&"value1".to_string()));
700 /// assert!(values.contains(&"value2".to_string()));
701 /// # }
702 /// ```
703 pub fn values(&self) -> impl Iterator<Item = &V> {
704 self.nodes.iter().map(|node| &node.value)
705 }
706
707 /// Returns an iterator over all mutable values in the cache.
708 ///
709 /// The order of values is not specified and should not be relied upon.
710 /// Note that iterating through this will mark all entries as visited.
711 ///
712 /// # Examples
713 ///
714 /// ```rust
715 /// # #[cfg(feature = "doctest")]
716 /// # {
717 /// use sieve_cache::SieveCache;
718 ///
719 /// let mut cache = SieveCache::new(100).unwrap();
720 /// cache.insert("key1".to_string(), "value1".to_string());
721 /// cache.insert("key2".to_string(), "value2".to_string());
722 ///
723 /// // Update all values by appending text
724 /// for value in cache.values_mut() {
725 /// *value = format!("{}_updated", value);
726 /// }
727 ///
728 /// assert_eq!(cache.get("key1"), Some(&"value1_updated".to_string()));
729 /// assert_eq!(cache.get("key2"), Some(&"value2_updated".to_string()));
730 /// # }
731 /// ```
732 pub fn values_mut(&mut self) -> impl Iterator<Item = &mut V> {
733 for node in &mut self.nodes {
734 node.visited = true;
735 }
736 self.nodes.iter_mut().map(|node| &mut node.value)
737 }
738
739 /// Returns an iterator over all key-value pairs in the cache.
740 ///
741 /// The order of pairs is not specified and should not be relied upon.
742 ///
743 /// # Examples
744 ///
745 /// ```rust
746 /// # #[cfg(feature = "doctest")]
747 /// # {
748 /// use sieve_cache::SieveCache;
749 /// use std::collections::HashMap;
750 ///
751 /// let mut cache = SieveCache::new(100).unwrap();
752 /// cache.insert("key1".to_string(), "value1".to_string());
753 /// cache.insert("key2".to_string(), "value2".to_string());
754 ///
755 /// let entries: HashMap<_, _> = cache.iter().collect();
756 /// assert_eq!(entries.len(), 2);
757 /// assert_eq!(entries.get(&"key1".to_string()), Some(&&"value1".to_string()));
758 /// assert_eq!(entries.get(&"key2".to_string()), Some(&&"value2".to_string()));
759 /// # }
760 /// ```
761 pub fn iter(&self) -> impl Iterator<Item = (&K, &V)> {
762 self.nodes.iter().map(|node| (&node.key, &node.value))
763 }
764
765 /// Returns an iterator over all key-value pairs in the cache, with mutable references to values.
766 ///
767 /// The order of pairs is not specified and should not be relied upon.
768 /// Note that iterating through this will mark all entries as visited.
769 ///
770 /// # Examples
771 ///
772 /// ```rust
773 /// # #[cfg(feature = "doctest")]
774 /// # {
775 /// use sieve_cache::SieveCache;
776 ///
777 /// let mut cache = SieveCache::new(100).unwrap();
778 /// cache.insert("key1".to_string(), "value1".to_string());
779 /// cache.insert("key2".to_string(), "value2".to_string());
780 ///
781 /// // Update all values associated with keys containing '1'
782 /// for (key, value) in cache.iter_mut() {
783 /// if key.contains('1') {
784 /// *value = format!("{}_special", value);
785 /// }
786 /// }
787 ///
788 /// assert_eq!(cache.get("key1"), Some(&"value1_special".to_string()));
789 /// assert_eq!(cache.get("key2"), Some(&"value2".to_string()));
790 /// # }
791 /// ```
792 pub fn iter_mut(&mut self) -> impl Iterator<Item = (&K, &mut V)> {
793 for node in &mut self.nodes {
794 node.visited = true;
795 }
796 self.nodes
797 .iter_mut()
798 .map(|node| (&node.key, &mut node.value))
799 }
800
801 /// Retains only the elements specified by the predicate.
802 ///
803 /// Removes all entries for which the provided function returns `false`.
804 /// The elements are visited in arbitrary, unspecified order.
805 ///
806 /// # Examples
807 ///
808 /// ```rust
809 /// # #[cfg(feature = "doctest")]
810 /// # {
811 /// use sieve_cache::SieveCache;
812 ///
813 /// let mut cache = SieveCache::new(100).unwrap();
814 /// cache.insert("key1".to_string(), 100);
815 /// cache.insert("key2".to_string(), 200);
816 /// cache.insert("key3".to_string(), 300);
817 ///
818 /// // Keep only entries with values greater than 150
819 /// cache.retain(|_, value| *value > 150);
820 ///
821 /// assert_eq!(cache.len(), 2);
822 /// assert!(!cache.contains_key("key1"));
823 /// assert!(cache.contains_key("key2"));
824 /// assert!(cache.contains_key("key3"));
825 /// # }
826 /// ```
827 pub fn retain<F>(&mut self, mut f: F)
828 where
829 F: FnMut(&K, &V) -> bool,
830 {
831 // Collect indices to remove
832 let mut to_remove = Vec::new();
833
834 for (i, node) in self.nodes.iter().enumerate() {
835 if !f(&node.key, &node.value) {
836 to_remove.push(i);
837 }
838 }
839
840 // Remove indices from highest to lowest to avoid invalidating other indices
841 to_remove.sort_unstable_by(|a, b| b.cmp(a));
842
843 for idx in to_remove {
844 // Remove from map
845 self.map.remove(&self.nodes[idx].key);
846
847 // If it's the last element, just pop it
848 if idx == self.nodes.len() - 1 {
849 self.nodes.pop();
850 } else {
851 // Replace with the last element
852 let last_idx = self.nodes.len() - 1;
853 // Use swap_remove which replaces the removed element with the last element
854 self.nodes.swap_remove(idx);
855 if idx < self.nodes.len() {
856 // Update map for the swapped node
857 self.map.insert(self.nodes[idx].key.clone(), idx);
858 }
859
860 // Update hand if needed
861 if let Some(hand_idx) = self.hand {
862 if hand_idx == idx {
863 // Hand was pointing to the removed node, move it to previous
864 self.hand = if idx > 0 {
865 Some(idx - 1)
866 } else if !self.nodes.is_empty() {
867 Some(self.nodes.len() - 1)
868 } else {
869 None
870 };
871 } else if hand_idx == last_idx {
872 // Hand was pointing to the last node that was moved
873 self.hand = Some(idx);
874 }
875 }
876 }
877 }
878 }
879
880 /// Returns a recommended cache capacity based on current utilization.
881 ///
882 /// This method analyzes the current cache utilization and recommends a new capacity based on:
883 /// - The fill ratio (how much of the capacity is actually being used)
884 /// - The number of entries with the 'visited' flag set
885 /// - A target utilization range
886 ///
887 /// The recommendation aims to keep the cache size optimal:
888 /// - If the cache is significantly underfilled (fill ratio < 10%), it suggests decreasing capacity
889 /// regardless of other factors to avoid wasting memory
890 /// - If many entries are frequently accessed (high utilization), it suggests increasing capacity
891 /// - If few entries are accessed frequently (low utilization), it suggests decreasing capacity
892 /// - Within a normal utilization range, it keeps the capacity stable
893 ///
894 /// # Arguments
895 ///
896 /// * `min_factor` - Minimum scaling factor (e.g., 0.5 means never recommend less than 50% of current capacity)
897 /// * `max_factor` - Maximum scaling factor (e.g., 2.0 means never recommend more than 200% of current capacity)
898 /// * `low_threshold` - Utilization threshold below which capacity is reduced (e.g., 0.3 means 30% utilization)
899 /// * `high_threshold` - Utilization threshold above which capacity is increased (e.g., 0.7 means 70% utilization)
900 ///
901 /// # Examples
902 ///
903 /// ```rust
904 /// # use sieve_cache::SieveCache;
905 /// #
906 /// # fn main() {
907 /// # let mut cache = SieveCache::<String, i32>::new(100).unwrap();
908 /// #
909 /// # // Add items to the cache
910 /// # for i in 0..80 {
911 /// # cache.insert(i.to_string(), i);
912 /// #
913 /// # // Accessing some items to mark them as visited
914 /// # if i % 2 == 0 {
915 /// # cache.get(&i.to_string());
916 /// # }
917 /// # }
918 /// #
919 /// // Get a recommended capacity with default parameters
920 /// let recommended = cache.recommended_capacity(0.5, 2.0, 0.3, 0.7);
921 /// println!("Recommended capacity: {}", recommended);
922 /// # }
923 /// ```
924 ///
925 /// # Default Recommendation Parameters
926 ///
927 /// If you're unsure what parameters to use, these values provide a reasonable starting point:
928 /// - `min_factor`: 0.5 (never recommend less than half current capacity)
929 /// - `max_factor`: 2.0 (never recommend more than twice current capacity)
930 /// - `low_threshold`: 0.3 (reduce capacity if utilization below 30%)
931 /// - `high_threshold`: 0.7 (increase capacity if utilization above 70%)
932 pub fn recommended_capacity(
933 &self,
934 min_factor: f64,
935 max_factor: f64,
936 low_threshold: f64,
937 high_threshold: f64,
938 ) -> usize {
939 // If the cache is empty, return the current capacity
940 if self.nodes.is_empty() {
941 return self.capacity;
942 }
943
944 // Count entries with visited flag set
945 let visited_count = self.nodes.iter().filter(|node| node.visited).count();
946
947 // Calculate the utilization ratio (visited entries / total entries)
948 let utilization_ratio = visited_count as f64 / self.nodes.len() as f64;
949
950 // Calculate fill ratio (total entries / capacity)
951 let fill_ratio = self.nodes.len() as f64 / self.capacity as f64;
952
953 // Low fill ratio threshold (consider the cache underfilled below this)
954 let low_fill_threshold = 0.1; // 10% filled
955
956 // Fill ratio takes precedence over utilization:
957 // If the cache is severely underfilled, we should decrease capacity
958 // regardless of utilization
959 if fill_ratio < low_fill_threshold {
960 // Calculate how much to decrease based on how empty the cache is
961 let fill_below_threshold = if fill_ratio > 0.0 {
962 (low_fill_threshold - fill_ratio) / low_fill_threshold
963 } else {
964 1.0
965 };
966 // Apply the min_factor as a floor
967 let scaling_factor = 1.0 - (1.0 - min_factor) * fill_below_threshold;
968
969 // Apply the scaling factor to current capacity and ensure it's at least 1
970 return std::cmp::max(1, (self.capacity as f64 * scaling_factor).round() as usize);
971 }
972
973 // For normal fill levels, use the original logic based on utilization
974 let scaling_factor = if utilization_ratio >= high_threshold {
975 // High utilization - recommend increasing the capacity
976 // Scale between 1.0 and max_factor based on utilization above the high threshold
977 let utilization_above_threshold =
978 (utilization_ratio - high_threshold) / (1.0 - high_threshold);
979 1.0 + (max_factor - 1.0) * utilization_above_threshold
980 } else if utilization_ratio <= low_threshold {
981 // Low utilization - recommend decreasing capacity
982 // Scale between min_factor and 1.0 based on how far below the low threshold
983 let utilization_below_threshold = (low_threshold - utilization_ratio) / low_threshold;
984 1.0 - (1.0 - min_factor) * utilization_below_threshold
985 } else {
986 // Normal utilization - keep current capacity
987 1.0
988 };
989
990 // Apply the scaling factor to current capacity and ensure it's at least 1
991 std::cmp::max(1, (self.capacity as f64 * scaling_factor).round() as usize)
992 }
993}
994
995#[test]
996fn test() {
997 let mut cache = SieveCache::new(3).unwrap();
998 assert!(cache.insert("foo".to_string(), "foocontent".to_string()));
999 assert!(cache.insert("bar".to_string(), "barcontent".to_string()));
1000 cache.remove("bar");
1001 assert!(cache.insert("bar2".to_string(), "bar2content".to_string()));
1002 assert!(cache.insert("bar3".to_string(), "bar3content".to_string()));
1003 assert_eq!(cache.get("foo"), Some(&"foocontent".to_string()));
1004 assert_eq!(cache.get("bar"), None);
1005 assert_eq!(cache.get("bar2"), Some(&"bar2content".to_string()));
1006 assert_eq!(cache.get("bar3"), Some(&"bar3content".to_string()));
1007}
1008
1009#[test]
1010fn test_visited_flag_update() {
1011 let mut cache = SieveCache::new(2).unwrap();
1012 cache.insert("key1".to_string(), "value1".to_string());
1013 cache.insert("key2".to_string(), "value2".to_string());
1014 // update `key1` entry.
1015 cache.insert("key1".to_string(), "updated".to_string());
1016 // new entry is added.
1017 cache.insert("key3".to_string(), "value3".to_string());
1018 assert_eq!(cache.get("key1"), Some(&"updated".to_string()));
1019}
1020
1021#[test]
1022fn test_clear() {
1023 let mut cache = SieveCache::new(10).unwrap();
1024 cache.insert("key1".to_string(), "value1".to_string());
1025 cache.insert("key2".to_string(), "value2".to_string());
1026
1027 assert_eq!(cache.len(), 2);
1028 assert!(!cache.is_empty());
1029
1030 cache.clear();
1031
1032 assert_eq!(cache.len(), 0);
1033 assert!(cache.is_empty());
1034 assert_eq!(cache.get("key1"), None);
1035 assert_eq!(cache.get("key2"), None);
1036}
1037
1038#[test]
1039fn test_iterators() {
1040 let mut cache = SieveCache::new(10).unwrap();
1041 cache.insert("key1".to_string(), "value1".to_string());
1042 cache.insert("key2".to_string(), "value2".to_string());
1043
1044 // Test keys iterator
1045 let keys: Vec<&String> = cache.keys().collect();
1046 assert_eq!(keys.len(), 2);
1047 assert!(keys.contains(&&"key1".to_string()));
1048 assert!(keys.contains(&&"key2".to_string()));
1049
1050 // Test values iterator
1051 let values: Vec<&String> = cache.values().collect();
1052 assert_eq!(values.len(), 2);
1053 assert!(values.contains(&&"value1".to_string()));
1054 assert!(values.contains(&&"value2".to_string()));
1055
1056 // Test values_mut iterator
1057 for value in cache.values_mut() {
1058 *value = format!("{}_updated", value);
1059 }
1060
1061 assert_eq!(cache.get("key1"), Some(&"value1_updated".to_string()));
1062 assert_eq!(cache.get("key2"), Some(&"value2_updated".to_string()));
1063
1064 // Test key-value iterator
1065 let entries: Vec<(&String, &String)> = cache.iter().collect();
1066 assert_eq!(entries.len(), 2);
1067
1068 // Test key-value mutable iterator
1069 for (key, value) in cache.iter_mut() {
1070 if key == "key1" {
1071 *value = format!("{}_special", value);
1072 }
1073 }
1074
1075 assert_eq!(
1076 cache.get("key1"),
1077 Some(&"value1_updated_special".to_string())
1078 );
1079 assert_eq!(cache.get("key2"), Some(&"value2_updated".to_string()));
1080}
1081
1082#[test]
1083fn test_retain() {
1084 let mut cache = SieveCache::new(10).unwrap();
1085
1086 // Add some entries
1087 cache.insert("even1".to_string(), 2);
1088 cache.insert("even2".to_string(), 4);
1089 cache.insert("odd1".to_string(), 1);
1090 cache.insert("odd2".to_string(), 3);
1091
1092 assert_eq!(cache.len(), 4);
1093
1094 // Keep only entries with even values
1095 cache.retain(|_, v| v % 2 == 0);
1096
1097 assert_eq!(cache.len(), 2);
1098 assert!(cache.contains_key("even1"));
1099 assert!(cache.contains_key("even2"));
1100 assert!(!cache.contains_key("odd1"));
1101 assert!(!cache.contains_key("odd2"));
1102
1103 // Keep only entries with keys containing '1'
1104 cache.retain(|k, _| k.contains('1'));
1105
1106 assert_eq!(cache.len(), 1);
1107 assert!(cache.contains_key("even1"));
1108 assert!(!cache.contains_key("even2"));
1109}
1110
1111#[test]
1112fn test_recommended_capacity() {
1113 // Test case 1: Empty cache - should return current capacity
1114 let cache = SieveCache::<String, u32>::new(100).unwrap();
1115 assert_eq!(cache.recommended_capacity(0.5, 2.0, 0.3, 0.7), 100);
1116
1117 // Test case 2: Low utilization (few visited nodes)
1118 let mut cache = SieveCache::new(100).unwrap();
1119 // Fill the cache first without marking anything as visited
1120 for i in 0..90 {
1121 cache.insert(i.to_string(), i);
1122 }
1123
1124 // Now mark only a tiny fraction as visited
1125 for i in 0..5 {
1126 cache.get(&i.to_string()); // Only 5% visited
1127 }
1128
1129 // With very low utilization and high fill, should recommend decreasing capacity
1130 let recommended = cache.recommended_capacity(0.5, 2.0, 0.1, 0.7); // Lower threshold to ensure test passes
1131 assert!(recommended < 100);
1132 assert!(recommended >= 50); // Should not go below min_factor
1133
1134 // Test case 3: High utilization (many visited nodes)
1135 let mut cache = SieveCache::new(100).unwrap();
1136 for i in 0..90 {
1137 cache.insert(i.to_string(), i);
1138 // Mark 80% as visited
1139 if i % 10 != 0 {
1140 cache.get(&i.to_string());
1141 }
1142 }
1143 // With 80% utilization, should recommend increasing capacity
1144 let recommended = cache.recommended_capacity(0.5, 2.0, 0.3, 0.7);
1145 assert!(recommended > 100);
1146 assert!(recommended <= 200); // Should not go above max_factor
1147
1148 // Test case 4: Normal utilization (should keep capacity the same)
1149 let mut cache = SieveCache::new(100).unwrap();
1150 for i in 0..90 {
1151 cache.insert(i.to_string(), i);
1152 // Mark 50% as visited
1153 if i % 2 == 0 {
1154 cache.get(&i.to_string());
1155 }
1156 }
1157 // With 50% utilization (between thresholds), capacity should be fairly stable
1158 let recommended = cache.recommended_capacity(0.5, 2.0, 0.3, 0.7);
1159 assert!(
1160 recommended >= 95,
1161 "With normal utilization, capacity should be close to original"
1162 );
1163 assert!(
1164 recommended <= 100,
1165 "With normal utilization, capacity should not exceed original"
1166 );
1167
1168 // Test case 5: Low fill ratio (few entries relative to capacity)
1169 let mut cache = SieveCache::new(2000).unwrap();
1170 // Add only a few entries (5% of capacity)
1171 for i in 0..100 {
1172 cache.insert(i.to_string(), i);
1173 // Mark all as visited to simulate high hit rate
1174 cache.get(&i.to_string());
1175 }
1176
1177 // Even though utilization is high (100% visited), the fill ratio is very low (5%)
1178 // so it should still recommend decreasing capacity
1179 let recommended = cache.recommended_capacity(0.5, 2.0, 0.3, 0.7);
1180 assert!(
1181 recommended < 2000,
1182 "With low fill ratio, capacity should be decreased despite high hit rate"
1183 );
1184 assert!(
1185 recommended >= 1000, // min_factor = 0.5
1186 "Capacity should not go below min_factor of current capacity"
1187 );
1188}