common_cache/lib.rs
1#![warn(missing_docs, rustdoc::missing_crate_level_docs)]
2#![doc = include_str!("../README.md")]
3use core::borrow::Borrow;
4use core::hash::Hash;
5use core::marker::PhantomData;
6
7use indexmap::IndexMap;
8use rand::prelude::*;
9use replace_with::replace_with_or_abort;
10#[cfg(feature = "serde")]
11use serde::{Deserialize, Serialize};
12
13/// A collection which keeps and prioritizes the most recently and commonly used items.
14///
15/// See the module level documentation for details.
16#[derive(Debug, Clone)]
17#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
18pub struct CommonCache<K, V, R: Rng = StdRng> {
19 /// The base for the exponentially growing size of levels.
20 base: usize,
21 /// All active levels in the cache
22 ///
23 /// These will at most have size [1, base, base^2, base^3, ...] and the last
24 /// will not be empty.
25 #[cfg_attr(
26 feature = "serde",
27 serde(bound(
28 deserialize = "K: Deserialize<'de> + Eq + Hash, V: Deserialize<'de>",
29 serialize = "K: Serialize + Eq + Hash, V: Serialize",
30 ))
31 )]
32 levels: Vec<Level<K, V>>,
33
34 /// A random number generator.
35 #[cfg_attr(
36 feature = "serde",
37 serde(skip, default = "SeedableRng::from_entropy", bound = "R: SeedableRng")
38 )]
39 rng: R,
40
41 /// An upper bound of the number of elements in the cache. Might be set to
42 /// `usize::MAX`.
43 max_size: usize,
44
45 /// A counter that increments every time elements are moved between levels in the cache.
46 ///
47 /// Used to prevent that a `Index` might randomly be invalidated because some random element
48 /// has been moved to another level. Instead, an `Index` is invalid if the generation on the
49 /// index and the cache differs.
50 generation: u64,
51}
52
53/// A level in the cache.
54#[derive(Debug, Clone)]
55#[cfg_attr(
56 feature = "serde",
57 derive(Serialize, Deserialize),
58 serde(bound(
59 deserialize = "K: Deserialize<'de> + Eq + Hash, V: Deserialize<'de>",
60 serialize = "K: Serialize + Eq + Hash, V: Serialize",
61 ))
62)]
63struct Level<K, V> {
64 /// The items in the level. Will at most contain `base^n` items where `n` is the index of this
65 /// level.
66 items: IndexMap<K, V>,
67 /// An instance of a uniform distribution to generate random numbers in the range
68 /// `[0..base^n]`, where `n` is the index of this level.
69 rand_range: rand::distributions::Uniform<usize>,
70}
71
72impl<K, V> CommonCache<K, V> {
73 /// Create a new `CommonCache` with a specific base and `Rng` generated from some entropy.
74 ///
75 /// Takes a base which must be `>1` and optionally a max_size which must be `>= 2`.
76 pub fn new(base: usize, max_size: Option<usize>) -> Self {
77 Self::new_with_rng(base, max_size, StdRng::from_entropy())
78 }
79
80 /// Set the max size. Note that if this might cause many elements to be
81 /// removed.
82 ///
83 /// PRE: max_size >= 2
84 ///
85 /// Runs in linear time if max_size < self.size(), constant time otherwise.
86 ///
87 /// If the new max_size is less than the previous, all indexes to this cache
88 /// will be invalidated. This is because some elements might be removed
89 /// randomly from the cache and we don't want some index lookup to
90 /// randomly work/fail.
91 pub fn set_max_size(&mut self, max_size: usize) {
92 assert!(
93 max_size >= 2,
94 "max_size must be >=2 in CommonCache::set_max_size()"
95 );
96 if max_size >= self.max_size {
97 self.max_size = max_size;
98 return;
99 }
100 let mut sum = 0;
101 for (i, level) in self.levels.iter_mut().enumerate() {
102 if sum == max_size {
103 self.levels.truncate(i);
104 break;
105 }
106 sum += level.items.len();
107 if sum > max_size {
108 for _ in max_size..sum {
109 let to_remove = self.rng.gen_range(0..level.items.len());
110 level.items.swap_remove_index(to_remove);
111 }
112 self.levels.truncate(i + 1);
113 break;
114 }
115 }
116
117 // Some random elements might have been removed so let's increase the generation
118 // to invalidate any indexes to the cache.
119 self.generation += 1;
120 }
121
122 /// Clear the cache.
123 pub fn clear(&mut self) {
124 self.levels.clear();
125 self.generation += 1;
126 }
127}
128
129impl<K, V, R: Rng> CommonCache<K, V, R> {
130 /// Create a new `CommonCache` with a given random generator. This can be
131 /// useful if you have a psuedo random generator and want deterministic
132 /// and reproduceable behaviour.
133 ///
134 /// Also takes in a base which must be >1 and optionally a max_size which
135 /// must be >=2.
136 pub fn new_with_rng(base: usize, max_size: Option<usize>, rng: R) -> Self {
137 let max_size = max_size.unwrap_or(usize::MAX);
138 assert!(max_size >= 2, "max_size in CommonCache must be >= 2");
139 assert!(base >= 2, "base in CommonCache must be >=2.");
140 Self {
141 base,
142 rng,
143 levels: Vec::new(),
144 max_size,
145 generation: 0,
146 }
147 }
148
149 /// Get the number of elements in the cache.
150 ///
151 /// Runs in `O(log[base](n))` time, since the len of all levels must be summed up.
152 pub fn size(&self) -> usize {
153 self.levels.iter().map(|x| x.items.len()).sum()
154 }
155
156 /// Get the currently configured max size for the cache.
157 pub fn max_size(&self) -> usize {
158 self.max_size
159 }
160}
161
162impl<K, V, R> CommonCache<K, V, R>
163where
164 K: Eq + Hash,
165 R: Rng,
166{
167 /// Insert a value into the cache.
168 ///
169 /// If the value is new, it will be inserted at the second lowest level. So if `self.base = 2`,
170 /// then it will be inserted in the second quarter of the cache.
171 ///
172 /// If the value exists in the cache already though, it will be updated with the new key and
173 /// value and be moved to one level above its previous position.
174 ///
175 /// **IMPORTANT: All `Index` to elements in this cache will be invalidated**
176 /// This is because some random elements will be moved between levels in
177 /// the cache, and we don't want indexes to be invalidated randomly. It
178 /// might cause some erroneous tests to pass undeterministicly.
179 ///
180 /// # Returns
181 ///
182 /// Returns the entry for the newly inserted item.
183 ///
184 /// # Examples
185 ///
186 /// ```rust
187 /// use common_cache::CommonCache;
188 ///
189 /// let mut cache = CommonCache::new(2, None);
190 /// let mut entry = cache.insert(4, "Hello");
191 /// assert!(matches!(*entry.get_value(), "Hello"));
192 /// ```
193 pub fn insert(&mut self, key: K, value: V) -> Entry<'_, K, V, R> {
194 // Check if the item is already in the cache.
195 let insert_level = if let Some(entry) = self.entry(&key) {
196 let level = entry.level;
197 let _old_item = entry.remove();
198 // Insert the item at the level above.
199 level.saturating_sub(1)
200 } else {
201 // If the item is new, insert it in the second lowest level.
202 self.levels.len().saturating_sub(2)
203 };
204 self.insert_at_level::<true>(key, value, insert_level)
205 }
206
207 /// Insert an item at a specific level in the cache and possibly push an
208 /// item to lower levels.
209 ///
210 /// This is the core function of the algorithm. It will, with probability
211 /// k/n, (where n is the maximum number of items at the level and k is
212 /// the actual number of items), remove an item from the level and
213 /// insert it on the level below. This will be repeated for all lower
214 /// levels. If an item is selected at the lowest level, a new lowest level
215 /// will be created.
216 ///
217 /// The function will of course also insert the given item at the given
218 /// level.
219 ///
220 /// `self.generation` is increased, so all `Index`es to this cache are
221 /// invalidated.
222 fn insert_at_level<const CREATE_NEW_LEVEL_IF_NEEDED: bool>(
223 &mut self,
224 key: K,
225 value: V,
226 level: usize,
227 ) -> Entry<'_, K, V, R> {
228 // Let's increment the generation immediately so we don't forget it.
229 self.generation += 1;
230
231 if self.size() == self.max_size {
232 // If the max size has been reached.
233 let last_level_items = &mut self.levels.last_mut().unwrap().items;
234 let to_remove = self.rng.gen_range(0..last_level_items.len());
235 last_level_items.swap_remove_index(to_remove);
236 if last_level_items.is_empty() {
237 self.levels.pop();
238 }
239 }
240
241 if self.levels.is_empty() {
242 // If there are no levels, add one.
243 self.levels.push(Level {
244 items: IndexMap::with_capacity(1),
245 rand_range: (0..1).into(),
246 });
247 }
248
249 // Loop through all levels from the lowest to the current (`level`).c For each
250 // level, randomly decide whether to move one item down to the level
251 // below. The fuller a level is, the higher probability it is that an
252 // item will be moved down from that level.
253 for level in (level..self.levels.len()).rev() {
254 let current_level = &mut self.levels[level];
255 // Generate an integer in the range of the total capacity of the level.
256 let i = current_level.rand_range.sample(&mut self.rng);
257 if let Some(move_down_item) = current_level.items.swap_remove_index(i) {
258 if level != self.levels.len() - 1 {
259 // Insert the item on the level below.
260 self.levels[level + 1]
261 .items
262 .insert(move_down_item.0, move_down_item.1);
263 } else if CREATE_NEW_LEVEL_IF_NEEDED {
264 // This was the lowest level. So let's create a new one.
265 let new_level_size = self
266 .base
267 .checked_pow((level + 1).try_into().unwrap_or(u32::MAX))
268 .unwrap_or(usize::MAX);
269 self.levels.push(Level {
270 items: IndexMap::from([move_down_item]),
271 rand_range: (0..new_level_size).into(),
272 });
273 }
274 }
275 }
276 // Finally, add the item to the desired level.
277 let (idx, None) = self.levels[level].items.insert_full(key, value) else {
278 unreachable!()
279 };
280 Entry {
281 cache: self,
282 level,
283 idx,
284 }
285 }
286
287 /// Get a handle to an entry in the cache.
288 ///
289 /// Runs in `O(log[base](n))` time.
290 pub fn entry<Q>(&mut self, key: &Q) -> Option<Entry<'_, K, V, R>>
291 where
292 K: Borrow<Q>,
293 Q: Eq + Hash + ?Sized,
294 {
295 if let Some((level, idx)) = self
296 .levels
297 .iter_mut()
298 .enumerate()
299 .filter_map(|(i, x)| x.items.get_index_of(key).map(|x| (i, x)))
300 .next()
301 {
302 Some(Entry {
303 cache: self,
304 level,
305 idx,
306 })
307 } else {
308 None
309 }
310 }
311
312 /// Iterate over the elements in the cache so that all items on any level
313 /// will come before any item on any lower level.
314 ///
315 /// This does not alter the cache in any way. So no items are promoted to
316 /// higher levels in the cache when iterated over.
317 pub fn iter(&self) -> impl DoubleEndedIterator<Item = (&'_ K, &'_ V)> + '_ {
318 self.levels.iter().flat_map(|x| x.items.iter())
319 }
320
321 /// Iterate over mutable references to the elements in the cache. All items
322 /// on any level will come before any item on any lower level.
323 ///
324 /// This does not alter the structure of the cache. So no items are promoted
325 /// to higher levels in the cache when iterated over.
326 pub fn iter_mut(&mut self) -> impl Iterator<Item = (&'_ K, &'_ mut V)> {
327 self.levels.iter_mut().flat_map(|x| x.items.iter_mut())
328 }
329
330 /// Iterate over indices to the elements in the cache so that all items on
331 /// any level will come before any item on any lower level.
332 ///
333 /// This does not alter the cache in any way. So no items are promoted to
334 /// higher levels in the cache when iterated over.
335 pub fn iter_indices(&self) -> impl DoubleEndedIterator<Item = Index<K, V, R>> + '_ {
336 self.levels
337 .iter()
338 .enumerate()
339 .flat_map(move |(i, x)| (0..x.items.len()).map(move |j| Index::new(i, j, self)))
340 }
341
342 /// Find the first item in the cache matching a predicate.
343 ///
344 /// The advantage of using this method over `self.iter().find()` is that you
345 /// get an `Entry` from this which can be used to promote or remove the
346 /// item with.
347 pub fn find_first(
348 &mut self,
349 mut pred: impl FnMut(&K, &V) -> bool,
350 ) -> Option<Entry<'_, K, V, R>> {
351 if let Some((level, (idx, _))) = self
352 .levels
353 .iter()
354 .enumerate()
355 .flat_map(|(i, level)| level.items.iter().enumerate().map(move |x| (i, x)))
356 .find(|(_, (_, (key, val)))| pred(key, val))
357 {
358 Some(Entry {
359 cache: self,
360 level,
361 idx,
362 })
363 } else {
364 None
365 }
366 }
367}
368
369/// A reference to an occupied entry in the cache.
370#[derive(Debug)]
371pub struct Entry<'a, K, V, R: Rng = StdRng> {
372 /// A reference to the entire cache.
373 cache: &'a mut CommonCache<K, V, R>,
374 /// The index of the level for the entry.
375 level: usize,
376 /// The index for the entry in the level.
377 idx: usize,
378}
379
380impl<'a, K: Eq + Hash, V, R: Rng> Entry<'a, K, V, R> {
381 /// Read the key and value at the entry without touching the rest of the
382 /// cache. This operation will hence not be taken into account when
383 /// considering which elements are most commonly used.
384 pub fn peek_key_value(&self) -> (&K, &V) {
385 self.cache.levels[self.level]
386 .items
387 .get_index(self.idx)
388 .unwrap()
389 }
390
391 /// Silently read the key at this entry.
392 pub fn peek_key(&self) -> &K {
393 self.peek_key_value().0
394 }
395
396 /// Read the value at the entry without touching the rest of the cache. This
397 /// operation will hence not be taken into account when considering
398 /// which elements are most commonly used.
399 pub fn peek_value(&self) -> &V {
400 self.peek_key_value().1
401 }
402
403 /// Read the entry mutably without touching the rest of the cache. This
404 /// operation will not be taken into account when considering which
405 /// elements are most commonly used.
406 pub fn peek_key_value_mut(&mut self) -> (&K, &mut V) {
407 let (key, value) = self.cache.levels[self.level]
408 .items
409 .get_index_mut(self.idx)
410 .unwrap();
411 (key, value)
412 }
413
414 /// Read the value mutably without touching the rest of the cache. This
415 /// operation will not be taken into account when considering which
416 /// elements are most commonly used.
417 pub fn peek_value_mut(&mut self) -> &mut V {
418 self.peek_key_value_mut().1
419 }
420
421 /// Read the item at this entry and destroy the `Entry` struct. The item
422 /// will still be in the cache but this allows us to get a reference
423 /// with the full lifetime of this entry.
424 pub fn peek_long(self) -> (&'a K, &'a mut V) {
425 let (key, value) = self.cache.levels[self.level]
426 .items
427 .get_index_mut(self.idx)
428 .unwrap();
429 (key, value)
430 }
431
432 /// Get the key and value at this entry and promote this entry to a higher
433 /// level in the cache.
434 ///
435 /// This function will promote this entry to a higher level in the cache and
436 /// based on some probability move other items down in the cache.
437 pub fn get_key_value(&mut self) -> (&K, &mut V) {
438 replace_with_or_abort(self, |self_| {
439 let curr_level = self_.level;
440 let (index, cache) = self_.index_and_cache();
441 let (key, value) = index.remove_from(cache);
442 cache.insert_at_level::<false>(key, value, curr_level.saturating_sub(1))
443 });
444 self.peek_key_value_mut()
445 }
446
447 /// Get the value at this entry and promote this entry to a higher level in
448 /// the cache.
449 ///
450 /// This function will promote this entry to a higher level in the cache and
451 /// based on some probability move other items down in the cache.
452 pub fn get_value(&mut self) -> &mut V {
453 self.get_key_value().1
454 }
455
456 /// Get the key and value at this entry and promote this entry to a higher
457 /// level in the cache.
458 ///
459 ///
460 /// Unlike `Self::get_key_value`, this method consumes the `Entry` allowing
461 /// for a longer lifetime of the returned reference. Note that the item
462 /// will still remain in the cache though.
463 ///
464 /// This function will promote this entry to a higher level in the cache and
465 /// based on some probability move other items down in the cache.
466 pub fn get_long(mut self) -> (&'a K, &'a mut V) {
467 self.get_key_value();
468 self.peek_long()
469 }
470
471 /// Remove this entry from the cache. Leaving the rest of the cache intact.
472 ///
473 /// Runs in O(1) time.
474 pub fn remove(self) -> (K, V) {
475 let (index, cache) = self.index_and_cache();
476 index.remove_from(cache)
477 }
478
479 /// Get an index for this entry.
480 ///
481 /// This is like the `Entry` without the reference to the cache. The `Index`
482 /// will be invalidated though if the cache is altered in any way,
483 /// including insertian of new elements or promotion of existing
484 /// elements.
485 pub fn index(self) -> Index<K, V, R> {
486 self.index_and_cache().0
487 }
488
489 /// Split this entry to an index and the cache.
490 ///
491 /// The `Index` is like the `Entry` without the reference to the cache. The
492 /// `Index` will be invalidated though if the cache is altered in any
493 /// way, including insertian of new elements or promotion of existing
494 /// elements.
495 pub fn index_and_cache(self) -> (Index<K, V, R>, &'a mut CommonCache<K, V, R>) {
496 (Index::new(self.level, self.idx, self.cache), self.cache)
497 }
498}
499
500/// An index into a `CommonCache`.
501///
502/// This should be used when an `Entry` is not sufficient due to life time
503/// problems. Note however that all indexes to a cache will be invalidated
504/// whenever the cache is altered in any way. Including insertian of new
505/// elements and promotion of existing elements. This is because the
506/// index is just a pointer to a specific level and index in that level of the
507/// cache. If some element is inserted or promoted, a few other elements will
508/// **randomly** be moved down some levels in the cache, causing their indexes
509/// to be invalid and potentially point to other items. However, this happens
510/// randomly, so tests might not recognize such a bug, and it will not be
511/// reproduceable.
512///
513/// The solution is to use an internal counter, (generation), which increments
514/// each time the cache is altered. Each index has the generation of the cache
515/// when the index was created, and if the index is used with a newer version of
516/// the cache it will be invalid.
517#[derive(Debug, PartialEq, Eq, Hash, Clone)]
518pub struct Index<K, V, R: Rng = StdRng> {
519 /// The index of the level for the item.
520 level: usize,
521 /// The index for the item whithin the level.
522 idx: usize,
523 /// The generation when this index was created.
524 generation: u64,
525 _key_ty: PhantomData<K>,
526 _val_ty: PhantomData<V>,
527 _rng_ty: PhantomData<R>,
528}
529
530impl<K: Eq + Hash, V, R: Rng> Index<K, V, R> {
531 /// Create a new index from a level and an index on that level.
532 fn new(level: usize, idx: usize, in_cache: &CommonCache<K, V, R>) -> Self {
533 Self {
534 level,
535 idx,
536 generation: in_cache.generation,
537 _key_ty: PhantomData,
538 _val_ty: PhantomData,
539 _rng_ty: PhantomData,
540 }
541 }
542
543 /// Assert that this index has the same generation as that of a cache.
544 /// Panics otherwise.
545 fn assert_generation(&self, cache: &CommonCache<K, V, R>) {
546 assert_eq!(
547 self.generation, cache.generation,
548 "The generations of an `Index` and a `CommonCache` differs"
549 );
550 }
551
552 /// Get an entry corresponding to this index.
553 ///
554 /// # Panics
555 ///
556 /// Panics if the generation of the cache and this index differs, I.E that
557 /// something has been inserted or promoted in the cache since this
558 /// index was created.
559 ///
560 /// Might also panic when trying to read the entry if the item corresponding
561 /// to this index has been removed.
562 pub fn entry(self, cache: &mut CommonCache<K, V, R>) -> Entry<'_, K, V, R> {
563 self.assert_generation(cache);
564 Entry {
565 cache,
566 level: self.level,
567 idx: self.idx,
568 }
569 }
570
571 /// Read the key and value at the index without touching the rest of the
572 /// cache. This operation will hence not be taken into account when
573 /// considering which elements are most commonly used.
574 pub fn peek_key_value<'a>(&'a self, cache: &'a CommonCache<K, V, R>) -> (&'a K, &'a V) {
575 self.assert_generation(cache);
576 cache.levels[self.level].items.get_index(self.idx).unwrap()
577 }
578
579 /// Silently read the key at this index.
580 pub fn peek_key<'a>(&'a self, cache: &'a CommonCache<K, V, R>) -> &'a K {
581 self.peek_key_value(cache).0
582 }
583
584 /// Read the value at the index without touching the rest of the cache. This
585 /// operation will hence not be taken into account when considering
586 /// which elements are most commonly used.
587 pub fn peek_value<'a>(&'a self, cache: &'a CommonCache<K, V, R>) -> &'a V {
588 self.peek_key_value(cache).1
589 }
590
591 /// Read the item at this index mutably without touching the rest of the
592 /// cache. This operation will not be taken into account when
593 /// considering which elements are most commonly used.
594 ///
595 /// Note that this does not count as altering the cache so the index is
596 /// still valid after this.
597 pub fn peek_key_value_mut<'a>(
598 &'a self,
599 cache: &'a mut CommonCache<K, V, R>,
600 ) -> (&'a K, &'a mut V) {
601 self.assert_generation(cache);
602 let (key, value) = cache.levels[self.level]
603 .items
604 .get_index_mut(self.idx)
605 .unwrap();
606 (key, value)
607 }
608
609 /// Read the value at this index mutably without touching the rest of the
610 /// cache. This operation will not be taken into account when
611 /// considering which elements are most commonly used.
612 ///
613 /// Note that this does not count as altering the cache so the index is
614 /// still valid after this.
615 pub fn peek_value_mut<'a>(&'a self, cache: &'a mut CommonCache<K, V, R>) -> &'a mut V {
616 self.peek_key_value_mut(cache).1
617 }
618
619 /// Get the key and value at this index and promote the item to a higher
620 /// level in the cache.
621 ///
622 /// This function will promote the item to a higher level in the cache and
623 /// based on some probability move other items down in the cache.
624 ///
625 /// **The index will be invalidated after this operation.**
626 pub fn get_key_value(self, cache: &mut CommonCache<K, V, R>) -> (&K, &mut V) {
627 let curr_level = self.level;
628 let (key, value) = self.remove_from(cache);
629 cache
630 .insert_at_level::<false>(key, value, curr_level.saturating_sub(1))
631 .peek_long()
632 }
633
634 /// Get the value at this index and promote this index to a higher level in
635 /// the cache.
636 ///
637 /// This function will promote this index to a higher level in the cache and
638 /// based on some probability move other items down in the cache.
639 pub fn get_value(self, cache: &mut CommonCache<K, V, R>) -> &mut V {
640 self.get_key_value(cache).1
641 }
642
643 /// Remove the item at this index from the cache.
644 fn remove_from(self, cache: &mut CommonCache<K, V, R>) -> (K, V) {
645 self.assert_generation(cache);
646 let level_items = &mut cache.levels[self.level].items;
647 let (key, value) = level_items.swap_remove_index(self.idx).unwrap();
648 if level_items.is_empty() && self.level == cache.levels.len() - 1 {
649 // If the last level became empty, we shall remove it.
650 cache.levels.pop();
651 }
652 (key, value)
653 }
654}