Skip to main content

ix/
neg_cache.rs

1//! Negative result cache.
2//!
3//! Stores "file X with `content_hash` H does NOT match query Q" results.
4//! Prevents re-verification of unchanged files that previously had no matches.
5
6use std::collections::HashSet;
7use std::sync::RwLock;
8use xxhash_rust::xxh64::xxh64;
9
10/// Hit/miss/eviction counters for a [`NegCache`].
11#[derive(Debug, Default, Clone)]
12pub struct NegCacheStats {
13    /// Number of negative cache hits (file skipped).
14    pub hits: u64,
15    /// Number of negative cache misses (file not in cache).
16    pub misses: u64,
17    /// Number of entries evicted due to capacity or policy.
18    pub evictions: u64,
19}
20
21/// Thread-safe negative result cache.
22///
23/// Keys are `(query_fingerprint, content_hash)` pairs. When a file is verified
24/// and produces zero matches, it is recorded here so subsequent queries for the
25/// same pattern can skip re-reading the file if its content hasn't changed.
26#[derive(Debug)]
27pub struct NegCache {
28    set: RwLock<HashSet<(u64, u64)>>,
29    max_entries: usize,
30    admit: RwLock<bool>,
31    stats: RwLock<NegCacheStats>,
32}
33
34impl NegCache {
35    /// Create a negative cache with the given maximum entry count.
36    #[must_use]
37    pub fn new(max_entries: usize) -> Self {
38        Self {
39            set: RwLock::new(HashSet::new()),
40            max_entries,
41            admit: RwLock::new(true),
42            stats: RwLock::new(NegCacheStats::default()),
43        }
44    }
45
46    /// Set whether new entries are admitted.
47    ///
48    /// When `false`, `record_negative` returns immediately without caching.
49    /// Lookups still work on existing entries.
50    ///
51    /// # Panics
52    ///
53    /// Panics if the internal `RwLock` is poisoned.
54    pub fn set_admit(&self, allow: bool) {
55        *self
56            .admit
57            .write()
58            .unwrap_or_else(std::sync::PoisonError::into_inner) = allow;
59    }
60
61    /// # Panics
62    ///
63    /// Panics if the internal `RwLock` is poisoned.
64    #[must_use]
65    pub fn is_known_negative(&self, query_fingerprint: u64, content_hash: u64) -> bool {
66        let set = self
67            .set
68            .read()
69            .unwrap_or_else(std::sync::PoisonError::into_inner);
70        let found = set.contains(&(query_fingerprint, content_hash));
71        drop(set);
72        let mut stats = self
73            .stats
74            .write()
75            .unwrap_or_else(std::sync::PoisonError::into_inner);
76        if found {
77            stats.hits += 1;
78        } else {
79            stats.misses += 1;
80        }
81        found
82    }
83
84    /// # Panics
85    ///
86    /// Panics if the internal `RwLock` is poisoned.
87    pub fn record_negative(&self, query_fingerprint: u64, content_hash: u64) {
88        if !*self
89            .admit
90            .read()
91            .unwrap_or_else(std::sync::PoisonError::into_inner)
92        {
93            return;
94        }
95
96        let mut set = self
97            .set
98            .write()
99            .unwrap_or_else(std::sync::PoisonError::into_inner);
100        if set.len() >= self.max_entries {
101            let to_evict = self
102                .max_entries
103                .checked_div(10usize)
104                .unwrap_or(1usize)
105                .max(1);
106            let mut remaining = to_evict;
107            set.retain(|_| {
108                if remaining > 0 {
109                    remaining -= 1;
110                    false
111                } else {
112                    true
113                }
114            });
115            self.stats
116                .write()
117                .unwrap_or_else(std::sync::PoisonError::into_inner)
118                .evictions += u64::try_from(to_evict).unwrap_or(0);
119        }
120        set.insert((query_fingerprint, content_hash));
121    }
122
123    /// # Panics
124    ///
125    /// Panics if the internal `RwLock` is poisoned.
126    pub fn invalidate_file(&self, content_hash: u64) {
127        let mut set = self
128            .set
129            .write()
130            .unwrap_or_else(std::sync::PoisonError::into_inner);
131        set.retain(|&(_, ch)| ch != content_hash);
132    }
133
134    /// # Panics
135    ///
136    /// Panics if the internal `RwLock` is poisoned.
137    pub fn invalidate_query(&self, query_fingerprint: u64) {
138        let mut set = self
139            .set
140            .write()
141            .unwrap_or_else(std::sync::PoisonError::into_inner);
142        set.retain(|&(qf, _)| qf != query_fingerprint);
143    }
144
145    /// # Panics
146    ///
147    /// Panics if the internal `RwLock` is poisoned.
148    pub fn clear(&self) {
149        let mut set = self
150            .set
151            .write()
152            .unwrap_or_else(std::sync::PoisonError::into_inner);
153        set.clear();
154    }
155
156    /// Evict a fraction of entries (e.g., 0.5 evicts half).
157    /// Used by adaptive cache policy under memory pressure.
158    ///
159    /// # Panics
160    ///
161    /// Panics if the internal `RwLock` is poisoned.
162    #[allow(
163        clippy::cast_precision_loss,
164        clippy::cast_sign_loss,
165        clippy::cast_possible_truncation,
166        clippy::as_conversions
167    )]
168    pub fn evict_fraction(&self, fraction: f64) {
169        if fraction <= 0.0_f64 {
170            return;
171        }
172        if fraction >= 1.0_f64 {
173            self.clear();
174            return;
175        }
176        let mut set = self
177            .set
178            .write()
179            .unwrap_or_else(std::sync::PoisonError::into_inner);
180        let len = set.len();
181        let to_remove = (len as f64 * fraction) as usize;
182        let mut remaining = to_remove;
183        set.retain(|_| {
184            if remaining > 0 {
185                remaining -= 1;
186                false
187            } else {
188                true
189            }
190        });
191        self.stats
192            .write()
193            .unwrap_or_else(std::sync::PoisonError::into_inner)
194            .evictions += u64::try_from(to_remove).unwrap_or(0);
195    }
196
197    /// # Panics
198    ///
199    /// Panics if the internal `RwLock` is poisoned.
200    #[must_use]
201    pub fn stats(&self) -> NegCacheStats {
202        self.stats
203            .read()
204            .unwrap_or_else(std::sync::PoisonError::into_inner)
205            .clone()
206    }
207
208    /// # Panics
209    ///
210    /// Panics if the internal `RwLock` is poisoned.
211    #[must_use]
212    pub fn len(&self) -> usize {
213        self.set
214            .read()
215            .unwrap_or_else(std::sync::PoisonError::into_inner)
216            .len()
217    }
218
219    /// # Panics
220    ///
221    /// Panics if the internal `RwLock` is poisoned.
222    #[must_use]
223    pub fn is_empty(&self) -> bool {
224        self.set
225            .read()
226            .unwrap_or_else(std::sync::PoisonError::into_inner)
227            .is_empty()
228    }
229}
230
231/// Compute a stable fingerprint for a query pattern using XXH64.
232#[must_use]
233pub fn query_fingerprint(pattern: &str) -> u64 {
234    xxh64(pattern.as_bytes(), 0)
235}
236
237#[cfg(test)]
238#[allow(clippy::as_conversions, clippy::unwrap_used, clippy::indexing_slicing)]
239mod tests {
240    use super::*;
241
242    #[test]
243    fn is_known_negative_hit_and_miss() {
244        let cache = NegCache::new(100);
245        assert!(!cache.is_known_negative(1, 2));
246        let stats = cache.stats();
247        assert_eq!(stats.misses, 1);
248        assert_eq!(stats.hits, 0);
249
250        cache.record_negative(1, 2);
251        assert!(cache.is_known_negative(1, 2));
252        let stats = cache.stats();
253        assert_eq!(stats.hits, 1);
254        assert_eq!(stats.misses, 1);
255    }
256
257    #[test]
258    fn record_negative_adds_entry() {
259        let cache = NegCache::new(100);
260        assert!(!cache.is_known_negative(10, 20));
261        cache.record_negative(10, 20);
262        assert!(cache.is_known_negative(10, 20));
263        assert_eq!(cache.len(), 1);
264    }
265
266    #[test]
267    fn invalidate_file_removes_all_entries_for_hash() {
268        let cache = NegCache::new(100);
269        cache.record_negative(1, 100);
270        cache.record_negative(2, 100);
271        cache.record_negative(3, 200);
272        assert_eq!(cache.len(), 3);
273
274        cache.invalidate_file(100);
275        assert_eq!(cache.len(), 1);
276        assert!(!cache.is_known_negative(1, 100));
277        assert!(!cache.is_known_negative(2, 100));
278        assert!(cache.is_known_negative(3, 200));
279    }
280
281    #[test]
282    fn eviction_at_max_entries() {
283        let cache = NegCache::new(10);
284        for i in 0..15u64 {
285            cache.record_negative(i, i * 10);
286        }
287        assert!(cache.len() <= 10);
288        assert!(cache.stats().evictions > 0);
289    }
290
291    #[test]
292    fn clear_empties_cache() {
293        let cache = NegCache::new(100);
294        cache.record_negative(1, 2);
295        cache.record_negative(3, 4);
296        assert_eq!(cache.len(), 2);
297        cache.clear();
298        assert_eq!(cache.len(), 0);
299    }
300
301    #[test]
302    fn invalidate_query_removes_all_entries_for_query() {
303        let cache = NegCache::new(100);
304        cache.record_negative(42, 1);
305        cache.record_negative(42, 2);
306        cache.record_negative(99, 1);
307        cache.invalidate_query(42);
308        assert!(!cache.is_known_negative(42, 1));
309        assert!(!cache.is_known_negative(42, 2));
310        assert!(cache.is_known_negative(99, 1));
311    }
312
313    #[test]
314    fn query_fingerprint_is_deterministic() {
315        let a = query_fingerprint("hello world");
316        let b = query_fingerprint("hello world");
317        let c = query_fingerprint("goodbye");
318        assert_eq!(a, b);
319        assert_ne!(a, c);
320    }
321
322    #[test]
323    fn is_empty_works() {
324        let cache = NegCache::new(100);
325        assert!(cache.is_empty());
326        cache.record_negative(1, 2);
327        assert!(!cache.is_empty());
328        cache.invalidate_file(2);
329        assert!(cache.is_empty());
330    }
331}