Skip to main content

citadeldb_buffer/
sieve.rs

1use std::collections::HashMap;
2
3/// SIEVE eviction cache.
4///
5/// Single FIFO queue with a moving eviction hand and a visited bit.
6/// Dirty entries are pinned and never evictable.
7///
8/// O(1) amortized eviction.
9pub struct SieveCache<V> {
10    entries: Vec<SieveEntry<V>>,
11    /// Maps key -> index in entries vec.
12    index: HashMap<u64, usize>,
13    /// Moving eviction pointer.
14    hand: usize,
15    /// Number of occupied slots.
16    len: usize,
17    capacity: usize,
18}
19
20struct SieveEntry<V> {
21    key: u64,
22    value: V,
23    visited: bool,
24    dirty: bool,
25    occupied: bool,
26}
27
28impl<V> SieveEntry<V> {
29    fn empty(value: V) -> Self {
30        Self {
31            key: 0,
32            value,
33            visited: false,
34            dirty: false,
35            occupied: false,
36        }
37    }
38}
39
40impl<V: Default> SieveCache<V> {
41    pub fn new(capacity: usize) -> Self {
42        assert!(capacity > 0, "cache capacity must be > 0");
43        let mut entries = Vec::with_capacity(capacity);
44        for _ in 0..capacity {
45            entries.push(SieveEntry::empty(V::default()));
46        }
47        Self {
48            entries,
49            index: HashMap::with_capacity(capacity),
50            hand: 0,
51            len: 0,
52            capacity,
53        }
54    }
55
56    /// Look up a key. If found, set visited=true and return reference.
57    pub fn get(&mut self, key: u64) -> Option<&V> {
58        if let Some(&idx) = self.index.get(&key) {
59            self.entries[idx].visited = true;
60            Some(&self.entries[idx].value)
61        } else {
62            None
63        }
64    }
65
66    /// Look up a key. If found, set visited=true and return mutable reference.
67    pub fn get_mut(&mut self, key: u64) -> Option<&mut V> {
68        if let Some(&idx) = self.index.get(&key) {
69            self.entries[idx].visited = true;
70            Some(&mut self.entries[idx].value)
71        } else {
72            None
73        }
74    }
75
76    /// Check if a key exists without marking visited.
77    pub fn contains(&self, key: u64) -> bool {
78        self.index.contains_key(&key)
79    }
80
81    /// Insert a value. If cache is full, evict using SIEVE algorithm.
82    /// Returns None if inserted without eviction, or Some((evicted_key, evicted_value))
83    /// if an entry was evicted.
84    ///
85    /// Returns Err(()) if the cache is full and all entries are dirty (pinned).
86    pub fn insert(&mut self, key: u64, value: V) -> Result<Option<(u64, V)>, ()> {
87        // If already present, just update
88        if let Some(&idx) = self.index.get(&key) {
89            self.entries[idx].value = value;
90            self.entries[idx].visited = true;
91            return Ok(None);
92        }
93
94        // If cache not full, use next empty slot
95        if self.len < self.capacity {
96            let idx = self.find_empty_slot();
97            self.entries[idx].key = key;
98            self.entries[idx].value = value;
99            self.entries[idx].visited = true;
100            self.entries[idx].dirty = false;
101            self.entries[idx].occupied = true;
102            self.index.insert(key, idx);
103            self.len += 1;
104            return Ok(None);
105        }
106
107        // Cache is full — need to evict using SIEVE
108        let evicted = self.evict()?;
109
110        // Find the slot that was just freed
111        let idx = self.find_empty_slot();
112        self.entries[idx].key = key;
113        self.entries[idx].value = value;
114        self.entries[idx].visited = true;
115        self.entries[idx].dirty = false;
116        self.entries[idx].occupied = true;
117        self.index.insert(key, idx);
118        self.len += 1;
119
120        Ok(Some(evicted))
121    }
122
123    /// Evict one entry using SIEVE algorithm.
124    /// Skips dirty (pinned) entries and visited entries (clears visited on pass).
125    /// Returns the evicted (key, value).
126    fn evict(&mut self) -> Result<(u64, V), ()> {
127        let mut scanned = 0;
128
129        loop {
130            if scanned >= self.capacity * 2 {
131                // All entries are dirty — can't evict
132                return Err(());
133            }
134
135            let idx = self.hand;
136            self.hand = (self.hand + 1) % self.capacity;
137            scanned += 1;
138
139            if !self.entries[idx].occupied {
140                continue;
141            }
142
143            if self.entries[idx].dirty {
144                // Pinned — skip
145                continue;
146            }
147
148            if self.entries[idx].visited {
149                // Give second chance
150                self.entries[idx].visited = false;
151                continue;
152            }
153
154            // Found victim: not visited, not dirty
155            let evicted_key = self.entries[idx].key;
156            let evicted_value = std::mem::take(&mut self.entries[idx].value);
157            self.entries[idx].occupied = false;
158            self.index.remove(&evicted_key);
159            self.len -= 1;
160
161            return Ok((evicted_key, evicted_value));
162        }
163    }
164
165    fn find_empty_slot(&self) -> usize {
166        for (i, entry) in self.entries.iter().enumerate() {
167            if !entry.occupied {
168                return i;
169            }
170        }
171        unreachable!("find_empty_slot called when cache is full");
172    }
173
174    /// Mark an entry as dirty (pinned, not evictable).
175    pub fn set_dirty(&mut self, key: u64) {
176        if let Some(&idx) = self.index.get(&key) {
177            self.entries[idx].dirty = true;
178        }
179    }
180
181    /// Clear dirty flag on an entry (after flush).
182    pub fn clear_dirty(&mut self, key: u64) {
183        if let Some(&idx) = self.index.get(&key) {
184            self.entries[idx].dirty = false;
185        }
186    }
187
188    /// Check if an entry is dirty.
189    pub fn is_dirty(&self, key: u64) -> bool {
190        self.index.get(&key)
191            .map(|&idx| self.entries[idx].dirty)
192            .unwrap_or(false)
193    }
194
195    /// Iterate over all dirty entries. Returns (key, &value) pairs.
196    pub fn dirty_entries(&self) -> impl Iterator<Item = (u64, &V)> {
197        self.entries.iter()
198            .filter(|e| e.occupied && e.dirty)
199            .map(|e| (e.key, &e.value))
200    }
201
202    /// Iterate mutably over all dirty entries.
203    pub fn dirty_entries_mut(&mut self) -> impl Iterator<Item = (u64, &mut V)> {
204        self.entries.iter_mut()
205            .filter(|e| e.occupied && e.dirty)
206            .map(|e| (e.key, &mut e.value))
207    }
208
209    /// Clear all dirty flags (after commit).
210    pub fn clear_all_dirty(&mut self) {
211        for entry in &mut self.entries {
212            if entry.occupied {
213                entry.dirty = false;
214            }
215        }
216    }
217
218    /// Remove an entry by key. Returns the value if present.
219    pub fn remove(&mut self, key: u64) -> Option<V> {
220        if let Some(idx) = self.index.remove(&key) {
221            let value = std::mem::take(&mut self.entries[idx].value);
222            self.entries[idx].occupied = false;
223            self.len -= 1;
224            Some(value)
225        } else {
226            None
227        }
228    }
229
230    pub fn len(&self) -> usize {
231        self.len
232    }
233
234    pub fn is_empty(&self) -> bool {
235        self.len == 0
236    }
237
238    pub fn capacity(&self) -> usize {
239        self.capacity
240    }
241
242    /// Count dirty entries.
243    pub fn dirty_count(&self) -> usize {
244        self.entries.iter().filter(|e| e.occupied && e.dirty).count()
245    }
246
247    /// Remove all entries from the cache.
248    pub fn clear(&mut self) {
249        for entry in &mut self.entries {
250            entry.occupied = false;
251            entry.visited = false;
252            entry.dirty = false;
253        }
254        self.index.clear();
255        self.len = 0;
256        self.hand = 0;
257    }
258}
259
260#[cfg(test)]
261mod tests {
262    use super::*;
263
264    #[test]
265    fn basic_insert_and_get() {
266        let mut cache = SieveCache::<u32>::new(4);
267        cache.insert(1, 100).unwrap();
268        cache.insert(2, 200).unwrap();
269
270        assert_eq!(cache.get(1), Some(&100));
271        assert_eq!(cache.get(2), Some(&200));
272        assert_eq!(cache.get(3), None);
273    }
274
275    #[test]
276    fn eviction_when_full() {
277        let mut cache = SieveCache::<u32>::new(3);
278        cache.insert(1, 100).unwrap();
279        cache.insert(2, 200).unwrap();
280        cache.insert(3, 300).unwrap();
281        assert_eq!(cache.len(), 3);
282
283        // Don't access any — all are unvisited, one should be evicted
284        // Reset visited flags by clearing them
285        for entry in &mut cache.entries {
286            entry.visited = false;
287        }
288
289        let result = cache.insert(4, 400).unwrap();
290        assert!(result.is_some());
291        assert_eq!(cache.len(), 3);
292    }
293
294    #[test]
295    fn visited_entries_survive_eviction() {
296        let mut cache = SieveCache::<u32>::new(3);
297        cache.insert(1, 100).unwrap();
298        cache.insert(2, 200).unwrap();
299        cache.insert(3, 300).unwrap();
300
301        // Reset all visited flags
302        for entry in &mut cache.entries {
303            entry.visited = false;
304        }
305
306        // Access entry 2 (marks visited)
307        cache.get(2);
308
309        // Insert 4 — should evict 1 or 3 (not 2)
310        let evicted = cache.insert(4, 400).unwrap().unwrap();
311        assert_ne!(evicted.0, 2, "visited entry should not be evicted");
312        assert!(cache.contains(2));
313        assert!(cache.contains(4));
314    }
315
316    #[test]
317    fn dirty_entries_not_evicted() {
318        let mut cache = SieveCache::<u32>::new(2);
319        cache.insert(1, 100).unwrap();
320        cache.insert(2, 200).unwrap();
321
322        // Reset visited
323        for entry in &mut cache.entries {
324            entry.visited = false;
325        }
326
327        // Mark entry 1 dirty
328        cache.set_dirty(1);
329
330        // Insert 3 — must evict 2 (not dirty 1)
331        let evicted = cache.insert(3, 300).unwrap().unwrap();
332        assert_eq!(evicted.0, 2);
333        assert!(cache.contains(1));
334        assert!(cache.contains(3));
335    }
336
337    #[test]
338    fn all_dirty_returns_err() {
339        let mut cache = SieveCache::<u32>::new(2);
340        cache.insert(1, 100).unwrap();
341        cache.insert(2, 200).unwrap();
342
343        cache.set_dirty(1);
344        cache.set_dirty(2);
345
346        // Reset visited
347        for entry in &mut cache.entries {
348            entry.visited = false;
349        }
350
351        let result = cache.insert(3, 300);
352        assert!(result.is_err());
353    }
354
355    #[test]
356    fn clear_dirty() {
357        let mut cache = SieveCache::<u32>::new(2);
358        cache.insert(1, 100).unwrap();
359        cache.set_dirty(1);
360        assert!(cache.is_dirty(1));
361
362        cache.clear_dirty(1);
363        assert!(!cache.is_dirty(1));
364    }
365
366    #[test]
367    fn dirty_entries_iterator() {
368        let mut cache = SieveCache::<u32>::new(4);
369        cache.insert(1, 100).unwrap();
370        cache.insert(2, 200).unwrap();
371        cache.insert(3, 300).unwrap();
372
373        cache.set_dirty(1);
374        cache.set_dirty(3);
375
376        let dirty: Vec<_> = cache.dirty_entries().collect();
377        assert_eq!(dirty.len(), 2);
378        assert_eq!(cache.dirty_count(), 2);
379    }
380
381    #[test]
382    fn clear_all_dirty() {
383        let mut cache = SieveCache::<u32>::new(3);
384        cache.insert(1, 100).unwrap();
385        cache.insert(2, 200).unwrap();
386        cache.set_dirty(1);
387        cache.set_dirty(2);
388
389        cache.clear_all_dirty();
390        assert_eq!(cache.dirty_count(), 0);
391    }
392
393    #[test]
394    fn remove_entry() {
395        let mut cache = SieveCache::<u32>::new(4);
396        cache.insert(1, 100).unwrap();
397        cache.insert(2, 200).unwrap();
398
399        let removed = cache.remove(1);
400        assert_eq!(removed, Some(100));
401        assert!(!cache.contains(1));
402        assert_eq!(cache.len(), 1);
403    }
404
405    #[test]
406    fn update_existing_key() {
407        let mut cache = SieveCache::<u32>::new(4);
408        cache.insert(1, 100).unwrap();
409        cache.insert(1, 200).unwrap();
410
411        assert_eq!(cache.get(1), Some(&200));
412        assert_eq!(cache.len(), 1);
413    }
414}