1use std::collections::HashSet;
7use std::sync::RwLock;
8use xxhash_rust::xxh64::xxh64;
9
10#[derive(Debug, Default, Clone)]
12pub struct NegCacheStats {
13 pub hits: u64,
15 pub misses: u64,
17 pub evictions: u64,
19}
20
21#[derive(Debug)]
27pub struct NegCache {
28 set: RwLock<HashSet<(u64, u64)>>,
29 max_entries: usize,
30 admit: RwLock<bool>,
31 stats: RwLock<NegCacheStats>,
32}
33
34impl NegCache {
35 #[must_use]
37 pub fn new(max_entries: usize) -> Self {
38 Self {
39 set: RwLock::new(HashSet::new()),
40 max_entries,
41 admit: RwLock::new(true),
42 stats: RwLock::new(NegCacheStats::default()),
43 }
44 }
45
46 pub fn set_admit(&self, allow: bool) {
55 *self
56 .admit
57 .write()
58 .unwrap_or_else(std::sync::PoisonError::into_inner) = allow;
59 }
60
61 #[must_use]
65 pub fn is_known_negative(&self, query_fingerprint: u64, content_hash: u64) -> bool {
66 let set = self
67 .set
68 .read()
69 .unwrap_or_else(std::sync::PoisonError::into_inner);
70 let found = set.contains(&(query_fingerprint, content_hash));
71 drop(set);
72 let mut stats = self
73 .stats
74 .write()
75 .unwrap_or_else(std::sync::PoisonError::into_inner);
76 if found {
77 stats.hits += 1;
78 } else {
79 stats.misses += 1;
80 }
81 found
82 }
83
84 pub fn record_negative(&self, query_fingerprint: u64, content_hash: u64) {
88 if !*self
89 .admit
90 .read()
91 .unwrap_or_else(std::sync::PoisonError::into_inner)
92 {
93 return;
94 }
95
96 let mut set = self
97 .set
98 .write()
99 .unwrap_or_else(std::sync::PoisonError::into_inner);
100 if set.len() >= self.max_entries {
101 let to_evict = self
102 .max_entries
103 .checked_div(10usize)
104 .unwrap_or(1usize)
105 .max(1);
106 let mut remaining = to_evict;
107 set.retain(|_| {
108 if remaining > 0 {
109 remaining -= 1;
110 false
111 } else {
112 true
113 }
114 });
115 self.stats
116 .write()
117 .unwrap_or_else(std::sync::PoisonError::into_inner)
118 .evictions += u64::try_from(to_evict).unwrap_or(0);
119 }
120 set.insert((query_fingerprint, content_hash));
121 }
122
123 pub fn invalidate_file(&self, content_hash: u64) {
127 let mut set = self
128 .set
129 .write()
130 .unwrap_or_else(std::sync::PoisonError::into_inner);
131 set.retain(|&(_, ch)| ch != content_hash);
132 }
133
134 pub fn invalidate_query(&self, query_fingerprint: u64) {
138 let mut set = self
139 .set
140 .write()
141 .unwrap_or_else(std::sync::PoisonError::into_inner);
142 set.retain(|&(qf, _)| qf != query_fingerprint);
143 }
144
145 pub fn clear(&self) {
149 let mut set = self
150 .set
151 .write()
152 .unwrap_or_else(std::sync::PoisonError::into_inner);
153 set.clear();
154 }
155
156 #[allow(
163 clippy::cast_precision_loss,
164 clippy::cast_sign_loss,
165 clippy::cast_possible_truncation,
166 clippy::as_conversions
167 )]
168 pub fn evict_fraction(&self, fraction: f64) {
169 if fraction <= 0.0_f64 {
170 return;
171 }
172 if fraction >= 1.0_f64 {
173 self.clear();
174 return;
175 }
176 let mut set = self
177 .set
178 .write()
179 .unwrap_or_else(std::sync::PoisonError::into_inner);
180 let len = set.len();
181 let to_remove = (len as f64 * fraction) as usize;
182 let mut remaining = to_remove;
183 set.retain(|_| {
184 if remaining > 0 {
185 remaining -= 1;
186 false
187 } else {
188 true
189 }
190 });
191 self.stats
192 .write()
193 .unwrap_or_else(std::sync::PoisonError::into_inner)
194 .evictions += u64::try_from(to_remove).unwrap_or(0);
195 }
196
197 #[must_use]
201 pub fn stats(&self) -> NegCacheStats {
202 self.stats
203 .read()
204 .unwrap_or_else(std::sync::PoisonError::into_inner)
205 .clone()
206 }
207
208 #[must_use]
212 pub fn len(&self) -> usize {
213 self.set
214 .read()
215 .unwrap_or_else(std::sync::PoisonError::into_inner)
216 .len()
217 }
218
219 #[must_use]
223 pub fn is_empty(&self) -> bool {
224 self.set
225 .read()
226 .unwrap_or_else(std::sync::PoisonError::into_inner)
227 .is_empty()
228 }
229}
230
231#[must_use]
233pub fn query_fingerprint(pattern: &str) -> u64 {
234 xxh64(pattern.as_bytes(), 0)
235}
236
237#[cfg(test)]
238#[allow(clippy::as_conversions, clippy::unwrap_used, clippy::indexing_slicing)]
239mod tests {
240 use super::*;
241
242 #[test]
243 fn is_known_negative_hit_and_miss() {
244 let cache = NegCache::new(100);
245 assert!(!cache.is_known_negative(1, 2));
246 let stats = cache.stats();
247 assert_eq!(stats.misses, 1);
248 assert_eq!(stats.hits, 0);
249
250 cache.record_negative(1, 2);
251 assert!(cache.is_known_negative(1, 2));
252 let stats = cache.stats();
253 assert_eq!(stats.hits, 1);
254 assert_eq!(stats.misses, 1);
255 }
256
257 #[test]
258 fn record_negative_adds_entry() {
259 let cache = NegCache::new(100);
260 assert!(!cache.is_known_negative(10, 20));
261 cache.record_negative(10, 20);
262 assert!(cache.is_known_negative(10, 20));
263 assert_eq!(cache.len(), 1);
264 }
265
266 #[test]
267 fn invalidate_file_removes_all_entries_for_hash() {
268 let cache = NegCache::new(100);
269 cache.record_negative(1, 100);
270 cache.record_negative(2, 100);
271 cache.record_negative(3, 200);
272 assert_eq!(cache.len(), 3);
273
274 cache.invalidate_file(100);
275 assert_eq!(cache.len(), 1);
276 assert!(!cache.is_known_negative(1, 100));
277 assert!(!cache.is_known_negative(2, 100));
278 assert!(cache.is_known_negative(3, 200));
279 }
280
281 #[test]
282 fn eviction_at_max_entries() {
283 let cache = NegCache::new(10);
284 for i in 0..15u64 {
285 cache.record_negative(i, i * 10);
286 }
287 assert!(cache.len() <= 10);
288 assert!(cache.stats().evictions > 0);
289 }
290
291 #[test]
292 fn clear_empties_cache() {
293 let cache = NegCache::new(100);
294 cache.record_negative(1, 2);
295 cache.record_negative(3, 4);
296 assert_eq!(cache.len(), 2);
297 cache.clear();
298 assert_eq!(cache.len(), 0);
299 }
300
301 #[test]
302 fn invalidate_query_removes_all_entries_for_query() {
303 let cache = NegCache::new(100);
304 cache.record_negative(42, 1);
305 cache.record_negative(42, 2);
306 cache.record_negative(99, 1);
307 cache.invalidate_query(42);
308 assert!(!cache.is_known_negative(42, 1));
309 assert!(!cache.is_known_negative(42, 2));
310 assert!(cache.is_known_negative(99, 1));
311 }
312
313 #[test]
314 fn query_fingerprint_is_deterministic() {
315 let a = query_fingerprint("hello world");
316 let b = query_fingerprint("hello world");
317 let c = query_fingerprint("goodbye");
318 assert_eq!(a, b);
319 assert_ne!(a, c);
320 }
321
322 #[test]
323 fn is_empty_works() {
324 let cache = NegCache::new(100);
325 assert!(cache.is_empty());
326 cache.record_negative(1, 2);
327 assert!(!cache.is_empty());
328 cache.invalidate_file(2);
329 assert!(cache.is_empty());
330 }
331}