1use std::collections::HashMap;
6
7pub struct LruCache {
24 max_size: usize,
25 keys: Vec<String>,
26 data: HashMap<String, i32>,
27}
28
29impl LruCache {
30 #[must_use]
32 pub fn new(max_size: usize) -> Self {
33 Self {
34 max_size,
35 keys: Vec::new(),
36 data: HashMap::new(),
37 }
38 }
39
40 #[must_use]
42 pub fn get(&self, key: &str) -> Option<i32> {
43 self.data.get(key).copied()
44 }
45
46 #[must_use]
48 pub fn has(&self, key: &str) -> bool {
49 self.data.contains_key(key)
50 }
51
52 pub fn put(&mut self, key: &str, value: i32) {
54 if self.data.contains_key(key) {
55 self.data.insert(key.to_string(), value);
56 return;
57 }
58
59 if self.max_size == 0 {
60 return;
61 }
62
63 if self.keys.len() >= self.max_size {
64 if let Some(oldest_key) = self.keys.first().cloned() {
65 self.keys.remove(0);
66 self.data.remove(&oldest_key);
67 }
68 }
69
70 self.keys.push(key.to_string());
71 self.data.insert(key.to_string(), value);
72 }
73
74 pub fn clear(&mut self) {
76 self.keys.clear();
77 self.data.clear();
78 }
79
80 #[must_use]
82 pub fn len(&self) -> usize {
83 self.keys.len()
84 }
85
86 #[must_use]
88 pub fn is_empty(&self) -> bool {
89 self.keys.is_empty()
90 }
91
92 pub fn remove(&mut self, key: &str) {
94 if !self.data.contains_key(key) {
95 return;
96 }
97
98 if let Some(idx) = self.keys.iter().position(|k| k == key) {
100 self.keys.remove(idx);
101 }
102 self.data.remove(key);
103 }
104}
105
106pub struct Deduplicator {
123 cache: LruCache,
124 threshold: i32,
125}
126
127impl Deduplicator {
128 #[must_use]
135 pub fn new(capacity: usize) -> Self {
136 Self {
137 cache: LruCache::new(capacity),
138 threshold: 2,
139 }
140 }
141
142 #[must_use]
148 pub fn with_threshold(capacity: usize, threshold: i32) -> Self {
149 Self {
150 cache: LruCache::new(capacity),
151 threshold,
152 }
153 }
154
155 pub fn is_duplicate(&mut self, text: &str) -> bool {
159 let count = self.cache.get(text).unwrap_or(0);
160 let is_dup = count > self.threshold;
161
162 self.cache.put(text, count + 1);
164
165 is_dup
166 }
167
168 #[must_use]
170 pub fn check(&self, text: &str) -> bool {
171 self.cache
172 .get(text)
173 .is_some_and(|count| count > self.threshold)
174 }
175
176 #[must_use]
178 pub fn has_seen(&self, text: &str) -> bool {
179 self.cache.has(text)
180 }
181
182 pub fn clear(&mut self) {
184 self.cache.clear();
185 }
186
187 #[must_use]
189 pub fn len(&self) -> usize {
190 self.cache.len()
191 }
192
193 #[must_use]
195 pub fn is_empty(&self) -> bool {
196 self.cache.is_empty()
197 }
198}
199
200#[cfg(test)]
201mod tests {
202 use super::*;
203
204 #[test]
205 fn test_new_deduplicator() {
206 let dedup = Deduplicator::new(100);
207 assert!(dedup.is_empty());
208 assert_eq!(dedup.len(), 0);
209 }
210
211 #[test]
212 fn test_is_duplicate() {
213 let mut dedup = Deduplicator::new(100);
214
215 assert!(!dedup.is_duplicate("test"));
217 assert!(!dedup.is_duplicate("test"));
218 assert!(!dedup.is_duplicate("test"));
219
220 assert!(dedup.is_duplicate("test"));
222 }
223
224 #[test]
225 fn test_check_without_adding() {
226 let mut dedup = Deduplicator::new(100);
227
228 assert!(!dedup.check("test"));
230 assert!(!dedup.has_seen("test"));
231
232 dedup.is_duplicate("test");
234 dedup.is_duplicate("test");
235 dedup.is_duplicate("test");
236
237 assert!(dedup.check("test"));
239 }
240
241 #[test]
242 fn test_has_seen() {
243 let mut dedup = Deduplicator::new(100);
244
245 assert!(!dedup.has_seen("test"));
246 dedup.is_duplicate("test");
247 assert!(dedup.has_seen("test"));
248 }
249
250 #[test]
251 fn test_custom_threshold() {
252 let mut dedup = Deduplicator::with_threshold(100, 1);
253
254 assert!(!dedup.is_duplicate("test")); assert!(!dedup.is_duplicate("test")); assert!(dedup.is_duplicate("test")); }
259
260 #[test]
261 fn test_clear() {
262 let mut dedup = Deduplicator::new(100);
263
264 dedup.is_duplicate("test");
265 assert!(!dedup.is_empty());
266
267 dedup.clear();
268 assert!(dedup.is_empty());
269 assert!(!dedup.has_seen("test"));
270 }
271
272 #[test]
273 fn test_capacity_eviction() {
274 let mut dedup = Deduplicator::new(2);
275
276 dedup.is_duplicate("a");
277 dedup.is_duplicate("b");
278 assert_eq!(dedup.len(), 2);
279
280 dedup.is_duplicate("c"); assert_eq!(dedup.len(), 2);
282 assert!(!dedup.has_seen("a"));
283 assert!(dedup.has_seen("b"));
284 assert!(dedup.has_seen("c"));
285 }
286
287 #[test]
290 fn test_lru_basic_put_get() {
291 let mut cache = LruCache::new(10);
292
293 cache.put("key1", 1);
294 cache.put("key2", 2);
295
296 assert_eq!(cache.get("key1"), Some(1));
297 assert_eq!(cache.get("key2"), Some(2));
298 assert_eq!(cache.get("key3"), None);
299 }
300
301 #[test]
302 fn test_lru_has() {
303 let mut cache = LruCache::new(10);
304
305 cache.put("exists", 1);
306
307 assert!(cache.has("exists"));
308 assert!(!cache.has("missing"));
309 }
310
311 #[test]
312 fn test_lru_eviction_at_capacity() {
313 let mut cache = LruCache::new(3);
314
315 cache.put("a", 1);
316 cache.put("b", 2);
317 cache.put("c", 3);
318
319 assert!(cache.has("a"));
321 assert!(cache.has("b"));
322 assert!(cache.has("c"));
323
324 cache.put("d", 4);
326
327 assert!(!cache.has("a")); assert!(cache.has("b"));
329 assert!(cache.has("c"));
330 assert!(cache.has("d"));
331 }
332
333 #[test]
334 fn test_lru_update_existing_key() {
335 let mut cache = LruCache::new(3);
336
337 cache.put("a", 1);
338 cache.put("b", 2);
339 cache.put("c", 3);
340
341 cache.put("a", 100);
343
344 assert_eq!(cache.get("a"), Some(100));
345
346 cache.put("d", 4);
348
349 assert!(!cache.has("a"));
351 }
352
353 #[test]
354 fn test_lru_remove() {
355 let mut cache = LruCache::new(10);
356
357 cache.put("a", 1);
358 cache.put("b", 2);
359
360 assert!(cache.has("a"));
361 cache.remove("a");
362 assert!(!cache.has("a"));
363 assert!(cache.has("b"));
364 }
365
366 #[test]
367 fn test_lru_clear() {
368 let mut cache = LruCache::new(10);
369
370 cache.put("a", 1);
371 cache.put("b", 2);
372
373 assert_eq!(cache.len(), 2);
374 cache.clear();
375 assert_eq!(cache.len(), 0);
376 assert!(cache.is_empty());
377 }
378
379 #[test]
380 fn test_lru_zero_capacity() {
381 let mut cache = LruCache::new(0);
382
383 cache.put("a", 1);
385 assert!(!cache.has("a"));
386 }
387
388 #[test]
389 fn test_lru_len() {
390 let mut cache = LruCache::new(10);
391
392 assert_eq!(cache.len(), 0);
393 cache.put("a", 1);
394 assert_eq!(cache.len(), 1);
395 cache.put("b", 2);
396 assert_eq!(cache.len(), 2);
397 }
398
399 #[test]
400 fn test_lru_multiple_evictions() {
401 let mut cache = LruCache::new(2);
402
403 cache.put("a", 1);
404 cache.put("b", 2);
405 cache.put("c", 3); cache.put("d", 4); assert!(!cache.has("a"));
409 assert!(!cache.has("b"));
410 assert!(cache.has("c"));
411 assert!(cache.has("d"));
412 assert_eq!(cache.len(), 2);
413 }
414
415 #[test]
416 fn test_lru_remove_then_add() {
417 let mut cache = LruCache::new(3);
418
419 cache.put("a", 1);
420 cache.put("b", 2);
421 cache.remove("a");
422
423 cache.put("a", 10);
425 assert_eq!(cache.get("a"), Some(10));
426 assert_eq!(cache.len(), 2);
427 }
428
429 #[test]
430 fn test_lru_single_capacity() {
431 let mut cache = LruCache::new(1);
432
433 cache.put("a", 1);
434 assert!(cache.has("a"));
435
436 cache.put("b", 2);
437 assert!(!cache.has("a")); assert!(cache.has("b"));
439 }
440
441 #[test]
442 fn test_lru_empty_operations() {
443 let mut cache = LruCache::new(10);
444
445 assert!(cache.is_empty());
446 assert_eq!(cache.len(), 0);
447 assert_eq!(cache.get("missing"), None);
448 assert!(!cache.has("missing"));
449
450 cache.remove("missing");
452 assert!(cache.is_empty());
453
454 cache.clear();
456 assert!(cache.is_empty());
457 }
458}