use std::collections::HashMap;
pub struct LruCache {
max_size: usize,
keys: Vec<String>,
data: HashMap<String, i32>,
}
impl LruCache {
#[must_use]
pub fn new(max_size: usize) -> Self {
Self {
max_size,
keys: Vec::new(),
data: HashMap::new(),
}
}
#[must_use]
pub fn get(&self, key: &str) -> Option<i32> {
self.data.get(key).copied()
}
#[must_use]
pub fn has(&self, key: &str) -> bool {
self.data.contains_key(key)
}
pub fn put(&mut self, key: &str, value: i32) {
if self.data.contains_key(key) {
self.data.insert(key.to_string(), value);
return;
}
if self.max_size == 0 {
return;
}
if self.keys.len() >= self.max_size {
if let Some(oldest_key) = self.keys.first().cloned() {
self.keys.remove(0);
self.data.remove(&oldest_key);
}
}
self.keys.push(key.to_string());
self.data.insert(key.to_string(), value);
}
pub fn clear(&mut self) {
self.keys.clear();
self.data.clear();
}
#[must_use]
pub fn len(&self) -> usize {
self.keys.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.keys.is_empty()
}
pub fn remove(&mut self, key: &str) {
if !self.data.contains_key(key) {
return;
}
if let Some(idx) = self.keys.iter().position(|k| k == key) {
self.keys.remove(idx);
}
self.data.remove(key);
}
}
pub struct Deduplicator {
cache: LruCache,
threshold: i32,
}
impl Deduplicator {
#[must_use]
pub fn new(capacity: usize) -> Self {
Self {
cache: LruCache::new(capacity),
threshold: 2,
}
}
#[must_use]
pub fn with_threshold(capacity: usize, threshold: i32) -> Self {
Self {
cache: LruCache::new(capacity),
threshold,
}
}
pub fn is_duplicate(&mut self, text: &str) -> bool {
let count = self.cache.get(text).unwrap_or(0);
let is_dup = count > self.threshold;
self.cache.put(text, count + 1);
is_dup
}
#[must_use]
pub fn check(&self, text: &str) -> bool {
self.cache
.get(text)
.is_some_and(|count| count > self.threshold)
}
#[must_use]
pub fn has_seen(&self, text: &str) -> bool {
self.cache.has(text)
}
pub fn clear(&mut self) {
self.cache.clear();
}
#[must_use]
pub fn len(&self) -> usize {
self.cache.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.cache.is_empty()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_new_deduplicator() {
let dedup = Deduplicator::new(100);
assert!(dedup.is_empty());
assert_eq!(dedup.len(), 0);
}
#[test]
fn test_is_duplicate() {
let mut dedup = Deduplicator::new(100);
assert!(!dedup.is_duplicate("test"));
assert!(!dedup.is_duplicate("test"));
assert!(!dedup.is_duplicate("test"));
assert!(dedup.is_duplicate("test"));
}
#[test]
fn test_check_without_adding() {
let mut dedup = Deduplicator::new(100);
assert!(!dedup.check("test"));
assert!(!dedup.has_seen("test"));
dedup.is_duplicate("test");
dedup.is_duplicate("test");
dedup.is_duplicate("test");
assert!(dedup.check("test"));
}
#[test]
fn test_has_seen() {
let mut dedup = Deduplicator::new(100);
assert!(!dedup.has_seen("test"));
dedup.is_duplicate("test");
assert!(dedup.has_seen("test"));
}
#[test]
fn test_custom_threshold() {
let mut dedup = Deduplicator::with_threshold(100, 1);
assert!(!dedup.is_duplicate("test")); assert!(!dedup.is_duplicate("test")); assert!(dedup.is_duplicate("test")); }
#[test]
fn test_clear() {
let mut dedup = Deduplicator::new(100);
dedup.is_duplicate("test");
assert!(!dedup.is_empty());
dedup.clear();
assert!(dedup.is_empty());
assert!(!dedup.has_seen("test"));
}
#[test]
fn test_capacity_eviction() {
let mut dedup = Deduplicator::new(2);
dedup.is_duplicate("a");
dedup.is_duplicate("b");
assert_eq!(dedup.len(), 2);
dedup.is_duplicate("c"); assert_eq!(dedup.len(), 2);
assert!(!dedup.has_seen("a"));
assert!(dedup.has_seen("b"));
assert!(dedup.has_seen("c"));
}
#[test]
fn test_lru_basic_put_get() {
let mut cache = LruCache::new(10);
cache.put("key1", 1);
cache.put("key2", 2);
assert_eq!(cache.get("key1"), Some(1));
assert_eq!(cache.get("key2"), Some(2));
assert_eq!(cache.get("key3"), None);
}
#[test]
fn test_lru_has() {
let mut cache = LruCache::new(10);
cache.put("exists", 1);
assert!(cache.has("exists"));
assert!(!cache.has("missing"));
}
#[test]
fn test_lru_eviction_at_capacity() {
let mut cache = LruCache::new(3);
cache.put("a", 1);
cache.put("b", 2);
cache.put("c", 3);
assert!(cache.has("a"));
assert!(cache.has("b"));
assert!(cache.has("c"));
cache.put("d", 4);
assert!(!cache.has("a")); assert!(cache.has("b"));
assert!(cache.has("c"));
assert!(cache.has("d"));
}
#[test]
fn test_lru_update_existing_key() {
let mut cache = LruCache::new(3);
cache.put("a", 1);
cache.put("b", 2);
cache.put("c", 3);
cache.put("a", 100);
assert_eq!(cache.get("a"), Some(100));
cache.put("d", 4);
assert!(!cache.has("a"));
}
#[test]
fn test_lru_remove() {
let mut cache = LruCache::new(10);
cache.put("a", 1);
cache.put("b", 2);
assert!(cache.has("a"));
cache.remove("a");
assert!(!cache.has("a"));
assert!(cache.has("b"));
}
#[test]
fn test_lru_clear() {
let mut cache = LruCache::new(10);
cache.put("a", 1);
cache.put("b", 2);
assert_eq!(cache.len(), 2);
cache.clear();
assert_eq!(cache.len(), 0);
assert!(cache.is_empty());
}
#[test]
fn test_lru_zero_capacity() {
let mut cache = LruCache::new(0);
cache.put("a", 1);
assert!(!cache.has("a"));
}
#[test]
fn test_lru_len() {
let mut cache = LruCache::new(10);
assert_eq!(cache.len(), 0);
cache.put("a", 1);
assert_eq!(cache.len(), 1);
cache.put("b", 2);
assert_eq!(cache.len(), 2);
}
#[test]
fn test_lru_multiple_evictions() {
let mut cache = LruCache::new(2);
cache.put("a", 1);
cache.put("b", 2);
cache.put("c", 3); cache.put("d", 4);
assert!(!cache.has("a"));
assert!(!cache.has("b"));
assert!(cache.has("c"));
assert!(cache.has("d"));
assert_eq!(cache.len(), 2);
}
#[test]
fn test_lru_remove_then_add() {
let mut cache = LruCache::new(3);
cache.put("a", 1);
cache.put("b", 2);
cache.remove("a");
cache.put("a", 10);
assert_eq!(cache.get("a"), Some(10));
assert_eq!(cache.len(), 2);
}
#[test]
fn test_lru_single_capacity() {
let mut cache = LruCache::new(1);
cache.put("a", 1);
assert!(cache.has("a"));
cache.put("b", 2);
assert!(!cache.has("a")); assert!(cache.has("b"));
}
#[test]
fn test_lru_empty_operations() {
let mut cache = LruCache::new(10);
assert!(cache.is_empty());
assert_eq!(cache.len(), 0);
assert_eq!(cache.get("missing"), None);
assert!(!cache.has("missing"));
cache.remove("missing");
assert!(cache.is_empty());
cache.clear();
assert!(cache.is_empty());
}
}