sieve_cache/lib.rs
1#![doc = include_str!("../README.md")]
2
3//! # SIEVE Cache for Rust
4//!
5//! This crate provides implementations of the SIEVE cache replacement algorithm:
6//!
7//! * [`SieveCache`] - The core single-threaded implementation (always available)
8
9#[cfg(feature = "sync")]
10pub mod _docs_sync {}
11
12#[cfg(feature = "sharded")]
13pub mod _docs_sharded {}
14
15pub mod _sieve_algorithm {
16 //! ## The SIEVE Algorithm
17 //!
18 //! SIEVE (Simple, space-efficient, In-memory, EViction mEchanism) is a cache eviction
19 //! algorithm that maintains a single bit per entry to track whether an item has been
20 //! "visited" since it was last considered for eviction. This approach requires less
21 //! state than LRU but achieves excellent performance, especially on skewed workloads.
22}
23
24pub mod _cache_implementation {
25 //! ## Cache Implementation Details
26 //!
27 //! The cache is implemented as a combination of:
28 //!
29 //! 1. A `HashMap` for O(1) key lookups
30 //! 2. A doubly-linked list for managing the eviction order
31 //! 3. A "visited" flag on each entry to track recent access
32 //!
33 //! When the cache is full and a new item is inserted, the eviction algorithm:
34 //! 1. Starts from the "hand" position (or tail if no hand)
35 //! 2. Finds the first non-visited entry, evicting it
36 //! 3. Marks all visited entries as non-visited while searching
37}
38
39pub mod _implementation_choice {
40 //! ## Choosing the Right Implementation
41 //!
42 //! - Use [`SieveCache`] for single-threaded applications
43}
44
45#[cfg(feature = "sync")]
46pub mod _docs_sync_usage {
47 //! - Use [`SyncSieveCache`] for multi-threaded applications with moderate concurrency
48}
49
50#[cfg(feature = "sharded")]
51pub mod _docs_sharded_usage {
52 //! - Use [`ShardedSieveCache`] for applications with high concurrency where operations
53 //! are distributed across many different keys
54}
55
56use std::borrow::Borrow;
57use std::hash::Hash;
58use std::{collections::HashMap, ptr::NonNull};
59
60#[cfg(feature = "sharded")]
61mod sharded;
62#[cfg(feature = "sync")]
63mod sync;
64
65#[cfg(feature = "sharded")]
66pub use sharded::ShardedSieveCache;
67#[cfg(feature = "sync")]
68pub use sync::SyncSieveCache;
69
70struct Node<K: Eq + Hash + Clone, V> {
71 key: K,
72 value: V,
73 prev: Option<NonNull<Node<K, V>>>,
74 next: Option<NonNull<Node<K, V>>>,
75 visited: bool,
76}
77
78impl<K: Eq + Hash + Clone, V> Node<K, V> {
79 fn new(key: K, value: V) -> Self {
80 Self {
81 key,
82 value,
83 prev: None,
84 next: None,
85 visited: false,
86 }
87 }
88}
89
90/// A cache based on the SIEVE eviction algorithm.
91///
92/// `SieveCache` provides an efficient in-memory cache with a carefully designed eviction
93/// strategy that balances simplicity with good performance characteristics, especially on
94/// skewed workloads common in real-world applications.
95///
96/// This is the single-threaded implementation.
97#[cfg(feature = "sync")]
98/// For thread-safe variants, see [`SyncSieveCache`] (with the `sync` feature)
99#[cfg(feature = "sharded")]
100/// and [`ShardedSieveCache`] (with the `sharded` feature).
101///
102/// # Type Parameters
103///
104/// * `K` - The key type, which must implement `Eq`, `Hash`, and `Clone`
105/// * `V` - The value type, with no constraints
106///
107/// # Example
108///
109/// ```rust
110/// # #[cfg(feature = "doctest")]
111/// # {
112/// use sieve_cache::SieveCache;
113///
114/// // Create a new cache with capacity for 1000 items
115/// let mut cache = SieveCache::new(1000).unwrap();
116///
117/// // Insert some values
118/// cache.insert("key1".to_string(), "value1".to_string());
119/// cache.insert("key2".to_string(), "value2".to_string());
120///
121/// // Retrieve values - returns references to the values
122/// assert_eq!(cache.get("key1"), Some(&"value1".to_string()));
123///
124/// // Check if the cache contains a key
125/// assert!(cache.contains_key("key1"));
126/// assert!(!cache.contains_key("missing_key"));
127///
128/// // Get a mutable reference to modify a value
129/// if let Some(value) = cache.get_mut("key1") {
130/// *value = "modified".to_string();
131/// }
132///
133/// // Verify the modification
134/// assert_eq!(cache.get("key1"), Some(&"modified".to_string()));
135///
136/// // Remove a value
137/// let removed = cache.remove("key2");
138/// assert_eq!(removed, Some("value2".to_string()));
139/// # }
140/// ```
141pub struct SieveCache<K: Eq + Hash + Clone, V> {
142 /// Map of keys to cache entries
143 map: HashMap<K, Box<Node<K, V>>>,
144 /// Pointer to the head (most recently added) node
145 head: Option<NonNull<Node<K, V>>>,
146 /// Pointer to the tail (least recently added) node
147 tail: Option<NonNull<Node<K, V>>>,
148 /// The "hand" pointer used by the SIEVE algorithm for eviction
149 hand: Option<NonNull<Node<K, V>>>,
150 /// Maximum number of entries the cache can hold
151 capacity: usize,
152 /// Current number of entries in the cache
153 len: usize,
154}
155
156unsafe impl<K: Eq + Hash + Clone, V> Send for SieveCache<K, V> {}
157
158impl<K: Eq + Hash + Clone, V> SieveCache<K, V> {
159 /// Creates a new cache with the given capacity.
160 ///
161 /// The capacity represents the maximum number of key-value pairs
162 /// that can be stored in the cache. When this limit is reached,
163 /// the cache will use the SIEVE algorithm to evict entries.
164 ///
165 /// # Errors
166 ///
167 /// Returns an error if the capacity is zero.
168 ///
169 /// # Examples
170 ///
171 /// ```rust
172 /// # #[cfg(feature = "doctest")]
173 /// # {
174 /// use sieve_cache::SieveCache;
175 ///
176 /// // Create a cache with space for 100 entries
177 /// let cache: SieveCache<String, u32> = SieveCache::new(100).unwrap();
178 ///
179 /// // Capacity of zero is invalid
180 /// let invalid = SieveCache::<String, u32>::new(0);
181 /// assert!(invalid.is_err());
182 /// # }
183 /// ```
184 pub fn new(capacity: usize) -> Result<Self, &'static str> {
185 if capacity == 0 {
186 return Err("capacity must be greater than 0");
187 }
188 Ok(Self {
189 map: HashMap::with_capacity(capacity),
190 head: None,
191 tail: None,
192 hand: None,
193 capacity,
194 len: 0,
195 })
196 }
197
198 /// Returns the capacity of the cache.
199 ///
200 /// This is the maximum number of entries that can be stored before
201 /// the cache begins evicting old entries.
202 ///
203 /// # Examples
204 ///
205 /// ```rust
206 /// # #[cfg(feature = "doctest")]
207 /// # {
208 /// use sieve_cache::SieveCache;
209 ///
210 /// let cache = SieveCache::<String, u32>::new(100).unwrap();
211 /// assert_eq!(cache.capacity(), 100);
212 /// # }
213 /// ```
214 #[inline]
215 pub fn capacity(&self) -> usize {
216 self.capacity
217 }
218
219 /// Returns the number of entries currently in the cache.
220 ///
221 /// This value will never exceed the capacity.
222 ///
223 /// # Examples
224 ///
225 /// ```rust
226 /// # #[cfg(feature = "doctest")]
227 /// # {
228 /// use sieve_cache::SieveCache;
229 ///
230 /// let mut cache = SieveCache::new(100).unwrap();
231 /// assert_eq!(cache.len(), 0);
232 ///
233 /// cache.insert("key".to_string(), 42);
234 /// assert_eq!(cache.len(), 1);
235 /// # }
236 /// ```
237 #[inline]
238 pub fn len(&self) -> usize {
239 self.len
240 }
241
242 /// Returns `true` when no values are currently cached.
243 ///
244 /// # Examples
245 ///
246 /// ```rust
247 /// # #[cfg(feature = "doctest")]
248 /// # {
249 /// use sieve_cache::SieveCache;
250 ///
251 /// let mut cache = SieveCache::<String, u32>::new(100).unwrap();
252 /// assert!(cache.is_empty());
253 ///
254 /// cache.insert("key".to_string(), 42);
255 /// assert!(!cache.is_empty());
256 ///
257 /// cache.remove("key");
258 /// assert!(cache.is_empty());
259 /// # }
260 /// ```
261 #[inline]
262 pub fn is_empty(&self) -> bool {
263 self.len == 0
264 }
265
266 /// Returns `true` if there is a value in the cache mapped to by `key`.
267 ///
268 /// This operation does not count as an access for the SIEVE algorithm's
269 /// visited flag.
270 ///
271 /// # Examples
272 ///
273 /// ```rust
274 /// # #[cfg(feature = "doctest")]
275 /// # {
276 /// use sieve_cache::SieveCache;
277 ///
278 /// let mut cache = SieveCache::new(100).unwrap();
279 /// cache.insert("key".to_string(), "value".to_string());
280 ///
281 /// assert!(cache.contains_key("key"));
282 /// assert!(!cache.contains_key("missing"));
283 /// # }
284 /// ```
285 #[inline]
286 pub fn contains_key<Q>(&mut self, key: &Q) -> bool
287 where
288 Q: Hash + Eq + ?Sized,
289 K: Borrow<Q>,
290 {
291 self.map.contains_key(key)
292 }
293
294 /// Gets an immutable reference to the value in the cache mapped to by `key`.
295 ///
296 /// If no value exists for `key`, this returns `None`.
297 ///
298 /// This operation marks the entry as "visited" in the SIEVE algorithm,
299 /// which affects eviction decisions.
300 ///
301 /// # Examples
302 ///
303 /// ```rust
304 /// # #[cfg(feature = "doctest")]
305 /// # {
306 /// use sieve_cache::SieveCache;
307 ///
308 /// let mut cache = SieveCache::new(100).unwrap();
309 /// cache.insert("key".to_string(), "value".to_string());
310 ///
311 /// assert_eq!(cache.get("key"), Some(&"value".to_string()));
312 /// assert_eq!(cache.get("missing"), None);
313 /// # }
314 /// ```
315 pub fn get<Q>(&mut self, key: &Q) -> Option<&V>
316 where
317 Q: Hash + Eq + ?Sized,
318 K: Borrow<Q>,
319 {
320 let node_ = self.map.get_mut(key)?;
321 // Mark as visited for the SIEVE algorithm
322 node_.visited = true;
323 Some(&node_.value)
324 }
325
326 /// Gets a mutable reference to the value in the cache mapped to by `key`.
327 ///
328 /// If no value exists for `key`, this returns `None`.
329 ///
330 /// This operation marks the entry as "visited" in the SIEVE algorithm,
331 /// which affects eviction decisions.
332 ///
333 /// # Examples
334 ///
335 /// ```rust
336 /// # #[cfg(feature = "doctest")]
337 /// # {
338 /// use sieve_cache::SieveCache;
339 ///
340 /// let mut cache = SieveCache::new(100).unwrap();
341 /// cache.insert("key".to_string(), "value".to_string());
342 ///
343 /// // Modify the value in-place
344 /// if let Some(value) = cache.get_mut("key") {
345 /// *value = "new_value".to_string();
346 /// }
347 ///
348 /// assert_eq!(cache.get("key"), Some(&"new_value".to_string()));
349 /// # }
350 /// ```
351 pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut V>
352 where
353 Q: Hash + Eq + ?Sized,
354 K: Borrow<Q>,
355 {
356 let node_ = self.map.get_mut(key)?;
357 // Mark as visited for the SIEVE algorithm
358 node_.visited = true;
359 Some(&mut node_.value)
360 }
361
362 /// Maps `key` to `value` in the cache, possibly evicting old entries.
363 ///
364 /// If the key already exists, its value is updated and the entry is marked as visited.
365 /// If the key doesn't exist and the cache is at capacity, the SIEVE algorithm is used
366 /// to evict an entry before the new key-value pair is inserted.
367 ///
368 /// # Return Value
369 ///
370 /// Returns `true` when this is a new entry, and `false` if an existing entry was updated.
371 ///
372 /// # Examples
373 ///
374 /// ```rust
375 /// # #[cfg(feature = "doctest")]
376 /// # {
377 /// use sieve_cache::SieveCache;
378 ///
379 /// let mut cache = SieveCache::new(100).unwrap();
380 ///
381 /// // Insert a new entry
382 /// let is_new = cache.insert("key1".to_string(), "value1".to_string());
383 /// assert!(is_new); // Returns true for new entries
384 ///
385 /// // Update an existing entry
386 /// let is_new = cache.insert("key1".to_string(), "updated".to_string());
387 /// assert!(!is_new); // Returns false for updates
388 ///
389 /// assert_eq!(cache.get("key1"), Some(&"updated".to_string()));
390 /// # }
391 /// ```
392 pub fn insert(&mut self, key: K, value: V) -> bool {
393 // Check if key already exists
394 let node = self.map.get_mut(&key);
395 if let Some(node_) = node {
396 // Update existing entry
397 node_.visited = true;
398 node_.value = value;
399 return false;
400 }
401
402 // Evict if at capacity
403 if self.len >= self.capacity {
404 self.evict();
405 }
406
407 // Create new node
408 let node = Box::new(Node::new(key.clone(), value));
409 self.add_node(NonNull::from(node.as_ref()));
410 debug_assert!(!node.visited);
411 self.map.insert(key, node);
412 debug_assert!(self.len < self.capacity);
413 self.len += 1;
414 true
415 }
416
417 /// Removes the cache entry mapped to by `key`.
418 ///
419 /// This method explicitly removes an entry from the cache regardless of its
420 /// "visited" status.
421 ///
422 /// # Return Value
423 ///
424 /// Returns the value removed from the cache. If `key` did not map to any value,
425 /// then this returns `None`.
426 ///
427 /// # Examples
428 ///
429 /// ```rust
430 /// # #[cfg(feature = "doctest")]
431 /// # {
432 /// use sieve_cache::SieveCache;
433 ///
434 /// let mut cache = SieveCache::new(100).unwrap();
435 /// cache.insert("key1".to_string(), "value1".to_string());
436 /// cache.insert("key2".to_string(), "value2".to_string());
437 ///
438 /// // Remove an existing entry
439 /// let removed = cache.remove("key1");
440 /// assert_eq!(removed, Some("value1".to_string()));
441 ///
442 /// // Try to remove a non-existent entry
443 /// let removed = cache.remove("key1");
444 /// assert_eq!(removed, None);
445 ///
446 /// assert_eq!(cache.len(), 1); // Only one entry remains
447 /// # }
448 /// ```
449 pub fn remove<Q>(&mut self, key: &Q) -> Option<V>
450 where
451 K: Borrow<Q>,
452 Q: Eq + Hash + ?Sized,
453 {
454 // Find the node
455 let node_ = self.map.get_mut(key)?;
456 let node__ = NonNull::from(node_.as_ref());
457
458 // Update hand pointer if it points to the node being removed
459 if self.hand == Some(node__) {
460 self.hand = node_.as_ref().prev;
461 }
462
463 // Remove from the map and extract the value
464 let value = self.map.remove(key).map(|node| node.value);
465
466 // Remove from the linked list
467 self.remove_node(node__);
468 debug_assert!(self.len > 0);
469 self.len -= 1;
470 value
471 }
472
473 /// Removes and returns a value from the cache that was not recently accessed.
474 ///
475 /// This method implements the SIEVE eviction algorithm:
476 /// 1. Starting from the "hand" pointer (or tail if no hand), look for an entry
477 /// that hasn't been visited since the last scan
478 /// 2. Mark all visited entries as non-visited during the scan
479 /// 3. If a non-visited entry is found, evict it
480 /// 4. Update the hand to point to the previous node
481 ///
482 /// # Return Value
483 ///
484 /// If a suitable entry is found, it is removed from the cache and its value is returned.
485 /// If all entries have been recently accessed or the cache is empty, this returns `None`.
486 ///
487 /// # Note
488 ///
489 /// This method is automatically called by `insert` when the cache is at capacity,
490 /// but it can also be called manually to proactively free space.
491 ///
492 /// # Examples
493 ///
494 /// ```rust
495 /// # #[cfg(feature = "doctest")]
496 /// # {
497 /// use sieve_cache::SieveCache;
498 ///
499 /// let mut cache = SieveCache::new(3).unwrap();
500 /// cache.insert("key1".to_string(), "value1".to_string());
501 /// cache.insert("key2".to_string(), "value2".to_string());
502 /// cache.insert("key3".to_string(), "value3".to_string());
503 ///
504 /// // Access key1 and key2 to mark them as visited
505 /// cache.get("key1");
506 /// cache.get("key2");
507 ///
508 /// // key3 hasn't been accessed, so it should be evicted
509 /// let evicted = cache.evict();
510 /// assert!(evicted.is_some());
511 /// assert!(!cache.contains_key("key3")); // key3 was evicted
512 /// # }
513 /// ```
514 pub fn evict(&mut self) -> Option<V> {
515 // Start from the hand pointer or the tail if hand is None
516 let mut node = self.hand.or(self.tail);
517
518 // Scan for a non-visited entry
519 while node.is_some() {
520 let mut node_ = node.unwrap();
521 unsafe {
522 // If we found a non-visited entry, evict it
523 if !node_.as_ref().visited {
524 break;
525 }
526
527 // Mark as non-visited for the next scan
528 node_.as_mut().visited = false;
529
530 // Move to the previous node, or wrap around to the tail
531 if node_.as_ref().prev.is_some() {
532 node = node_.as_ref().prev;
533 } else {
534 node = self.tail;
535 }
536 }
537 }
538
539 // If we found a node to evict
540 if let Some(node_) = node {
541 let value = unsafe {
542 // Update the hand pointer
543 self.hand = node_.as_ref().prev;
544
545 // Remove from the map and get the value
546 self.map.remove(&node_.as_ref().key).map(|node| node.value)
547 };
548
549 // Remove from the linked list
550 self.remove_node(node_);
551 debug_assert!(self.len > 0);
552 self.len -= 1;
553 value
554 } else {
555 None
556 }
557 }
558
559 /// Adds a node to the front of the linked list (making it the new head).
560 ///
561 /// This is an internal helper method used during insertion.
562 fn add_node(&mut self, mut node: NonNull<Node<K, V>>) {
563 unsafe {
564 // Link the new node to the current head
565 node.as_mut().next = self.head;
566 node.as_mut().prev = None;
567
568 // Update the current head's prev pointer to the new node
569 if let Some(mut head) = self.head {
570 head.as_mut().prev = Some(node);
571 }
572 }
573
574 // Set the new node as the head
575 self.head = Some(node);
576
577 // If this is the first node, it's also the tail
578 if self.tail.is_none() {
579 self.tail = self.head;
580 }
581 }
582
583 /// Removes a node from the linked list.
584 ///
585 /// This is an internal helper method used during removal and eviction.
586 fn remove_node(&mut self, node: NonNull<Node<K, V>>) {
587 unsafe {
588 // Update the previous node's next pointer
589 if let Some(mut prev) = node.as_ref().prev {
590 prev.as_mut().next = node.as_ref().next;
591 } else {
592 // If no previous node, this was the head
593 self.head = node.as_ref().next;
594 }
595
596 // Update the next node's prev pointer
597 if let Some(mut next) = node.as_ref().next {
598 next.as_mut().prev = node.as_ref().prev;
599 } else {
600 // If no next node, this was the tail
601 self.tail = node.as_ref().prev;
602 }
603 }
604 }
605}
606
607#[test]
608fn test() {
609 let mut cache = SieveCache::new(3).unwrap();
610 assert!(cache.insert("foo".to_string(), "foocontent".to_string()));
611 assert!(cache.insert("bar".to_string(), "barcontent".to_string()));
612 cache.remove("bar");
613 assert!(cache.insert("bar2".to_string(), "bar2content".to_string()));
614 assert!(cache.insert("bar3".to_string(), "bar3content".to_string()));
615 assert_eq!(cache.get("foo"), Some(&"foocontent".to_string()));
616 assert_eq!(cache.get("bar"), None);
617 assert_eq!(cache.get("bar2"), Some(&"bar2content".to_string()));
618 assert_eq!(cache.get("bar3"), Some(&"bar3content".to_string()));
619}
620
621#[test]
622fn test_visited_flag_update() {
623 let mut cache = SieveCache::new(2).unwrap();
624 cache.insert("key1".to_string(), "value1".to_string());
625 cache.insert("key2".to_string(), "value2".to_string());
626 // update `key1` entry.
627 cache.insert("key1".to_string(), "updated".to_string());
628 // new entry is added.
629 cache.insert("key3".to_string(), "value3".to_string());
630 assert_eq!(cache.get("key1"), Some(&"updated".to_string()));
631}