kismet_cache/
second_chance.rs

1//! The [Second Chance or Clock](https://en.wikipedia.org/wiki/Page_replacement_algorithm#Second-chance)
2//! page replacement policy is a simple approximation of the Least
3//! Recently Used policy.  Kismet uses the second chance policy
4//! because it can be easily implemented on top of the usual file
5//! modification and access times that we can trust operating systems
6//! to update for us.
7//!
8//! This second chance implementation is optimised for *batch*
9//! maintenance: the caller is expected to perform a number of
10//! operations (insertions and accesses) on a set of cached entries
11//! before calling the linearithmic-time `Update::new()`.
12
13/// The Second Chance implementation works with a set of entries; each
14/// entry keeps track of a value that represents its rank in the set,
15/// and a single boolean flag, to determine whether the entry has been
16/// accessed since the last time it was scanned (and move up in rank).
17pub trait Entry {
18    /// The Second Chance policy maintains an implicit list of
19    /// entries.  Entries in the list are ordered by sorting by Rank;
20    /// a lower rank comes earlier in the list of removal victims.
21    ///
22    /// For files, this would be `SystemTime` or `FileTime`: files
23    /// with more recent modification times come later in the eviction
24    /// queue.
25    type Rank: Ord;
26
27    /// Returns the rank value for this entry.
28    ///
29    /// For file, this would be the file's modification time.
30    fn rank(&self) -> Self::Rank;
31
32    /// Returns whether the entry was accessed since it last entered
33    /// or was moved back in the list of candidates for removal.
34    ///
35    /// For files, this is true if the access time is strictly greater
36    /// than the modification time.
37    fn accessed(&self) -> bool;
38}
39
40/// An `Update<T>` represents the maintenance operations to perform
41/// on a set of Second Chance entries.
42pub struct Update<T: Entry> {
43    /// List of entries to evict (remove from the cached set).
44    ///
45    /// For files, this is a plain unlink / deletion.
46    pub to_evict: Vec<T>,
47
48    /// List of entries to move back in the list of potential removal
49    /// victims, in order (i.e., the first entry in `to_move_back`
50    /// should be moved directly to the end of the current list, the
51    /// second entry right after, etc.).
52    ///
53    /// For files, this corresponds to increasing the modification
54    /// time to, e.g., "now."
55    pub to_move_back: Vec<T>,
56}
57
58impl<T: Entry> Update<T> {
59    /// Determines how to update a second chance list of `entries`,
60    /// so that at most `capacity` entries remain.
61    pub fn new(entries: impl IntoIterator<Item = T>, capacity: usize) -> Self {
62        let mut sorted_entries: Vec<T> = entries.into_iter().collect();
63
64        if sorted_entries.len() <= capacity {
65            return Self {
66                to_evict: Vec::new(),
67                to_move_back: Vec::new(),
68            };
69        }
70
71        sorted_entries.sort_by_cached_key(|e| e.rank());
72        let must_remove = sorted_entries.len() - capacity;
73        let mut to_evict = Vec::new();
74        let mut to_move_back = Vec::new();
75
76        for entry in sorted_entries {
77            if to_evict.len() == must_remove {
78                break;
79            }
80
81            // Give the entry a second chance if has been accessed
82            // between the last time it entered the tail of the
83            // eviction list and now.
84            if entry.accessed() {
85                to_move_back.push(entry);
86            } else {
87                // Otherwise, we evict in FIFO order.
88                to_evict.push(entry);
89            }
90        }
91
92        // If we still have elements to remove, we didn't break early
93        // from the loop, so every input entry is either in
94        // `to_move_back` or in `to_evict`.
95        if to_evict.len() < must_remove {
96            let num_left_to_remove = must_remove - to_evict.len();
97            assert!(num_left_to_remove <= to_move_back.len());
98
99            // The current FIFO list matches the contents of
100            // `to_move_back`.  In order to evict `num_left_to_remove`
101            // more items, we must steal the first
102            // `num_left_to_remove` items from `to_move_back`, and
103            // shift them to the `to_evict` vector.
104            to_evict.extend(to_move_back.drain(0..num_left_to_remove));
105        }
106
107        Self {
108            to_evict,
109            to_move_back,
110        }
111    }
112}
113
114#[cfg(test)]
115mod test {
116    use crate::second_chance::*;
117    use proptest::collection::vec;
118    use proptest::prelude::*;
119    use proptest_derive::Arbitrary;
120    use std::collections::HashSet;
121
122    /// A test entry is a u64 timestamp for the virtual time at which
123    /// it entered the second chance list, and an "accessed" bool that
124    /// is true if the entry has been accessed since it last
125    /// (re-)entered the second chance list.
126    #[derive(Arbitrary, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
127    struct TestEntry(u64, bool);
128
129    impl Entry for TestEntry {
130        type Rank = u64;
131
132        fn rank(&self) -> u64 {
133            self.0
134        }
135
136        fn accessed(&self) -> bool {
137            self.1
138        }
139    }
140
141    /// No-op on an empty list of entries.
142    #[test]
143    fn smoke_test_empty() {
144        let result = Update::<TestEntry>::new(vec![], 4);
145
146        assert_eq!(result.to_evict, Vec::new());
147        assert_eq!(result.to_move_back, Vec::new());
148    }
149
150    /// Also no-op when the number of entries matches the capacity
151    /// (desired size).
152    #[test]
153    fn smoke_test_at_capacity() {
154        let result = Update::new(vec![TestEntry(0, true), TestEntry(1, false)], 2);
155
156        assert_eq!(result.to_evict, Vec::new());
157        assert_eq!(result.to_move_back, Vec::new());
158    }
159
160    /// Evict the first entry: it hasn't been touched.
161    #[test]
162    fn evict_first_entry() {
163        let result = Update::new(
164            vec![TestEntry(0, false), TestEntry(1, true), TestEntry(2, false)],
165            2,
166        );
167
168        assert_eq!(result.to_evict, vec![TestEntry(0, false)]);
169        assert_eq!(result.to_move_back, Vec::new());
170    }
171
172    /// Evict the first entry, despite being defined out of order.
173    #[test]
174    fn evict_first_entry_unsorted() {
175        let result = Update::new(
176            vec![TestEntry(2, false), TestEntry(1, true), TestEntry(0, false)],
177            2,
178        );
179
180        assert_eq!(result.to_evict, vec![TestEntry(0, false)]);
181        assert_eq!(result.to_move_back, Vec::new());
182    }
183
184    /// Evict the first entry, move everything else to the back of the
185    /// list: they were all touched.
186    #[test]
187    fn evict_first_entry_all_touched() {
188        let result = Update::new(
189            vec![TestEntry(0, true), TestEntry(1, true), TestEntry(2, true)],
190            2,
191        );
192
193        assert_eq!(result.to_evict, vec![TestEntry(0, true)]);
194        assert_eq!(
195            result.to_move_back,
196            vec![TestEntry(1, true), TestEntry(2, true)]
197        );
198    }
199
200    /// Move the first entry to the back of list (it has been
201    /// accessed), evict the second.
202    #[test]
203    fn evict_second_entry() {
204        let result = Update::new(
205            vec![TestEntry(0, true), TestEntry(1, false), TestEntry(2, false)],
206            2,
207        );
208
209        assert_eq!(result.to_evict, vec![TestEntry(1, false)]);
210        assert_eq!(result.to_move_back, vec![TestEntry(0, true)]);
211    }
212
213    /// Evict all the unaccessed pages, find we still have to evict
214    /// more, so evict from the prospective `to_move_back` list.
215    #[test]
216    fn evict_second_pass() {
217        let result = Update::new(
218            vec![TestEntry(0, true), TestEntry(1, false), TestEntry(2, true)],
219            1,
220        );
221
222        assert_eq!(
223            result.to_evict,
224            vec![TestEntry(1, false), TestEntry(0, true)]
225        );
226        assert_eq!(result.to_move_back, vec![TestEntry(2, true)]);
227    }
228
229    /// See what happens when all the entries have been accessed.
230    /// We'll want to evict the oldest, and move the rest to the back
231    /// of the list, in order.
232    #[test]
233    fn evict_all_touched() {
234        let result = Update::new(
235            vec![TestEntry(1, true), TestEntry(2, true), TestEntry(0, true)],
236            2,
237        );
238
239        assert_eq!(result.to_evict, vec![TestEntry(0, true)]);
240        assert_eq!(
241            result.to_move_back,
242            vec![TestEntry(1, true), TestEntry(2, true)]
243        );
244    }
245
246    proptest! {
247        /// We can figure out the list to evict by sorting on the
248        /// "accessed" `bool` flag and the `order`, and evicting the
249        /// first few elements in that sorted list.
250        #[test]
251        fn compare_eviction_oracle(mut inputs in vec(any::<TestEntry>(), 0..20usize),
252                                   capacity in 1..10usize) {
253            let result = Update::new(inputs.clone(), capacity);
254
255            inputs.sort_by(|x, y| (x.1, x.0).cmp(&(y.1, y.0)));
256
257            let num_evictions = inputs.len().saturating_sub(capacity);
258            assert_eq!(&result.to_evict, &inputs[0..num_evictions]);
259        }
260
261        /// Assuming the list of evictions is valid, we can figure out
262        /// what entries must be moved back to the end of the list.
263        #[test]
264        fn compare_move_back_oracle(mut inputs in vec(any::<TestEntry>(), 0..20usize),
265                                    capacity in 1..10usize) {
266            let result = Update::new(inputs.clone(), capacity);
267
268            inputs.sort();
269
270            if result.to_evict.is_empty() {
271                // No eviction -> nothing to do.
272                assert_eq!(result.to_move_back, vec![]);
273            } else if result.to_evict.iter().any(|e| e.accessed()) {
274                // We evicted something with the access bit set.  We
275                // must must move everything else to the back.
276                let evicted: HashSet<_> = result.to_evict.iter().cloned().collect();
277
278                let expected: Vec<_> = inputs
279                    .iter()
280                    .filter(|e| !evicted.contains(e))
281                    .cloned()
282                    .collect();
283                assert_eq!(result.to_move_back, expected);
284            } else {
285                // All the evictions are for entries without the
286                // access bit set.  Everything with the access bit set
287                // and timestamp less than the most recent evictee
288                // must be moved back to the end of the list.
289                let max_ts = result
290                    .to_evict
291                    .iter()
292                    .map(|e| e.rank())
293                    .max()
294                    .expect("to_evict isn't empty");
295                let must_move: Vec<_> = inputs
296                    .iter()
297                    .filter(|e| e.accessed() && e.rank() < max_ts)
298                    .cloned()
299                    .collect();
300                assert_eq!(&result.to_move_back[0..must_move.len()], &must_move);
301
302                // And entries with the timestamp *equal* to the most
303                // recent evictee *may* be moved back.
304                let may_move: Vec<_> = inputs
305                    .iter()
306                    .filter(|e| e.accessed() && e.rank() <= max_ts)
307                    .cloned()
308                    .collect();
309                assert_eq!(&result.to_move_back, &may_move[0..result.to_move_back.len()]);
310            }
311        }
312    }
313}