kismet_cache/second_chance.rs
1//! The [Second Chance or Clock](https://en.wikipedia.org/wiki/Page_replacement_algorithm#Second-chance)
2//! page replacement policy is a simple approximation of the Least
3//! Recently Used policy. Kismet uses the second chance policy
4//! because it can be easily implemented on top of the usual file
5//! modification and access times that we can trust operating systems
6//! to update for us.
7//!
8//! This second chance implementation is optimised for *batch*
9//! maintenance: the caller is expected to perform a number of
10//! operations (insertions and accesses) on a set of cached entries
11//! before calling the linearithmic-time `Update::new()`.
12
13/// The Second Chance implementation works with a set of entries; each
14/// entry keeps track of a value that represents its rank in the set,
15/// and a single boolean flag, to determine whether the entry has been
16/// accessed since the last time it was scanned (and move up in rank).
17pub trait Entry {
18 /// The Second Chance policy maintains an implicit list of
19 /// entries. Entries in the list are ordered by sorting by Rank;
20 /// a lower rank comes earlier in the list of removal victims.
21 ///
22 /// For files, this would be `SystemTime` or `FileTime`: files
23 /// with more recent modification times come later in the eviction
24 /// queue.
25 type Rank: Ord;
26
27 /// Returns the rank value for this entry.
28 ///
29 /// For file, this would be the file's modification time.
30 fn rank(&self) -> Self::Rank;
31
32 /// Returns whether the entry was accessed since it last entered
33 /// or was moved back in the list of candidates for removal.
34 ///
35 /// For files, this is true if the access time is strictly greater
36 /// than the modification time.
37 fn accessed(&self) -> bool;
38}
39
40/// An `Update<T>` represents the maintenance operations to perform
41/// on a set of Second Chance entries.
42pub struct Update<T: Entry> {
43 /// List of entries to evict (remove from the cached set).
44 ///
45 /// For files, this is a plain unlink / deletion.
46 pub to_evict: Vec<T>,
47
48 /// List of entries to move back in the list of potential removal
49 /// victims, in order (i.e., the first entry in `to_move_back`
50 /// should be moved directly to the end of the current list, the
51 /// second entry right after, etc.).
52 ///
53 /// For files, this corresponds to increasing the modification
54 /// time to, e.g., "now."
55 pub to_move_back: Vec<T>,
56}
57
58impl<T: Entry> Update<T> {
59 /// Determines how to update a second chance list of `entries`,
60 /// so that at most `capacity` entries remain.
61 pub fn new(entries: impl IntoIterator<Item = T>, capacity: usize) -> Self {
62 let mut sorted_entries: Vec<T> = entries.into_iter().collect();
63
64 if sorted_entries.len() <= capacity {
65 return Self {
66 to_evict: Vec::new(),
67 to_move_back: Vec::new(),
68 };
69 }
70
71 sorted_entries.sort_by_cached_key(|e| e.rank());
72 let must_remove = sorted_entries.len() - capacity;
73 let mut to_evict = Vec::new();
74 let mut to_move_back = Vec::new();
75
76 for entry in sorted_entries {
77 if to_evict.len() == must_remove {
78 break;
79 }
80
81 // Give the entry a second chance if has been accessed
82 // between the last time it entered the tail of the
83 // eviction list and now.
84 if entry.accessed() {
85 to_move_back.push(entry);
86 } else {
87 // Otherwise, we evict in FIFO order.
88 to_evict.push(entry);
89 }
90 }
91
92 // If we still have elements to remove, we didn't break early
93 // from the loop, so every input entry is either in
94 // `to_move_back` or in `to_evict`.
95 if to_evict.len() < must_remove {
96 let num_left_to_remove = must_remove - to_evict.len();
97 assert!(num_left_to_remove <= to_move_back.len());
98
99 // The current FIFO list matches the contents of
100 // `to_move_back`. In order to evict `num_left_to_remove`
101 // more items, we must steal the first
102 // `num_left_to_remove` items from `to_move_back`, and
103 // shift them to the `to_evict` vector.
104 to_evict.extend(to_move_back.drain(0..num_left_to_remove));
105 }
106
107 Self {
108 to_evict,
109 to_move_back,
110 }
111 }
112}
113
114#[cfg(test)]
115mod test {
116 use crate::second_chance::*;
117 use proptest::collection::vec;
118 use proptest::prelude::*;
119 use proptest_derive::Arbitrary;
120 use std::collections::HashSet;
121
122 /// A test entry is a u64 timestamp for the virtual time at which
123 /// it entered the second chance list, and an "accessed" bool that
124 /// is true if the entry has been accessed since it last
125 /// (re-)entered the second chance list.
126 #[derive(Arbitrary, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
127 struct TestEntry(u64, bool);
128
129 impl Entry for TestEntry {
130 type Rank = u64;
131
132 fn rank(&self) -> u64 {
133 self.0
134 }
135
136 fn accessed(&self) -> bool {
137 self.1
138 }
139 }
140
141 /// No-op on an empty list of entries.
142 #[test]
143 fn smoke_test_empty() {
144 let result = Update::<TestEntry>::new(vec![], 4);
145
146 assert_eq!(result.to_evict, Vec::new());
147 assert_eq!(result.to_move_back, Vec::new());
148 }
149
150 /// Also no-op when the number of entries matches the capacity
151 /// (desired size).
152 #[test]
153 fn smoke_test_at_capacity() {
154 let result = Update::new(vec![TestEntry(0, true), TestEntry(1, false)], 2);
155
156 assert_eq!(result.to_evict, Vec::new());
157 assert_eq!(result.to_move_back, Vec::new());
158 }
159
160 /// Evict the first entry: it hasn't been touched.
161 #[test]
162 fn evict_first_entry() {
163 let result = Update::new(
164 vec![TestEntry(0, false), TestEntry(1, true), TestEntry(2, false)],
165 2,
166 );
167
168 assert_eq!(result.to_evict, vec![TestEntry(0, false)]);
169 assert_eq!(result.to_move_back, Vec::new());
170 }
171
172 /// Evict the first entry, despite being defined out of order.
173 #[test]
174 fn evict_first_entry_unsorted() {
175 let result = Update::new(
176 vec![TestEntry(2, false), TestEntry(1, true), TestEntry(0, false)],
177 2,
178 );
179
180 assert_eq!(result.to_evict, vec![TestEntry(0, false)]);
181 assert_eq!(result.to_move_back, Vec::new());
182 }
183
184 /// Evict the first entry, move everything else to the back of the
185 /// list: they were all touched.
186 #[test]
187 fn evict_first_entry_all_touched() {
188 let result = Update::new(
189 vec![TestEntry(0, true), TestEntry(1, true), TestEntry(2, true)],
190 2,
191 );
192
193 assert_eq!(result.to_evict, vec![TestEntry(0, true)]);
194 assert_eq!(
195 result.to_move_back,
196 vec![TestEntry(1, true), TestEntry(2, true)]
197 );
198 }
199
200 /// Move the first entry to the back of list (it has been
201 /// accessed), evict the second.
202 #[test]
203 fn evict_second_entry() {
204 let result = Update::new(
205 vec![TestEntry(0, true), TestEntry(1, false), TestEntry(2, false)],
206 2,
207 );
208
209 assert_eq!(result.to_evict, vec![TestEntry(1, false)]);
210 assert_eq!(result.to_move_back, vec![TestEntry(0, true)]);
211 }
212
213 /// Evict all the unaccessed pages, find we still have to evict
214 /// more, so evict from the prospective `to_move_back` list.
215 #[test]
216 fn evict_second_pass() {
217 let result = Update::new(
218 vec![TestEntry(0, true), TestEntry(1, false), TestEntry(2, true)],
219 1,
220 );
221
222 assert_eq!(
223 result.to_evict,
224 vec![TestEntry(1, false), TestEntry(0, true)]
225 );
226 assert_eq!(result.to_move_back, vec![TestEntry(2, true)]);
227 }
228
229 /// See what happens when all the entries have been accessed.
230 /// We'll want to evict the oldest, and move the rest to the back
231 /// of the list, in order.
232 #[test]
233 fn evict_all_touched() {
234 let result = Update::new(
235 vec![TestEntry(1, true), TestEntry(2, true), TestEntry(0, true)],
236 2,
237 );
238
239 assert_eq!(result.to_evict, vec![TestEntry(0, true)]);
240 assert_eq!(
241 result.to_move_back,
242 vec![TestEntry(1, true), TestEntry(2, true)]
243 );
244 }
245
246 proptest! {
247 /// We can figure out the list to evict by sorting on the
248 /// "accessed" `bool` flag and the `order`, and evicting the
249 /// first few elements in that sorted list.
250 #[test]
251 fn compare_eviction_oracle(mut inputs in vec(any::<TestEntry>(), 0..20usize),
252 capacity in 1..10usize) {
253 let result = Update::new(inputs.clone(), capacity);
254
255 inputs.sort_by(|x, y| (x.1, x.0).cmp(&(y.1, y.0)));
256
257 let num_evictions = inputs.len().saturating_sub(capacity);
258 assert_eq!(&result.to_evict, &inputs[0..num_evictions]);
259 }
260
261 /// Assuming the list of evictions is valid, we can figure out
262 /// what entries must be moved back to the end of the list.
263 #[test]
264 fn compare_move_back_oracle(mut inputs in vec(any::<TestEntry>(), 0..20usize),
265 capacity in 1..10usize) {
266 let result = Update::new(inputs.clone(), capacity);
267
268 inputs.sort();
269
270 if result.to_evict.is_empty() {
271 // No eviction -> nothing to do.
272 assert_eq!(result.to_move_back, vec![]);
273 } else if result.to_evict.iter().any(|e| e.accessed()) {
274 // We evicted something with the access bit set. We
275 // must must move everything else to the back.
276 let evicted: HashSet<_> = result.to_evict.iter().cloned().collect();
277
278 let expected: Vec<_> = inputs
279 .iter()
280 .filter(|e| !evicted.contains(e))
281 .cloned()
282 .collect();
283 assert_eq!(result.to_move_back, expected);
284 } else {
285 // All the evictions are for entries without the
286 // access bit set. Everything with the access bit set
287 // and timestamp less than the most recent evictee
288 // must be moved back to the end of the list.
289 let max_ts = result
290 .to_evict
291 .iter()
292 .map(|e| e.rank())
293 .max()
294 .expect("to_evict isn't empty");
295 let must_move: Vec<_> = inputs
296 .iter()
297 .filter(|e| e.accessed() && e.rank() < max_ts)
298 .cloned()
299 .collect();
300 assert_eq!(&result.to_move_back[0..must_move.len()], &must_move);
301
302 // And entries with the timestamp *equal* to the most
303 // recent evictee *may* be moved back.
304 let may_move: Vec<_> = inputs
305 .iter()
306 .filter(|e| e.accessed() && e.rank() <= max_ts)
307 .cloned()
308 .collect();
309 assert_eq!(&result.to_move_back, &may_move[0..result.to_move_back.len()]);
310 }
311 }
312 }
313}