use fastbloom::BloomFilter;
use std::path::{Path, PathBuf};
#[derive(Clone)]
pub struct FileSetBloom {
bloom: BloomFilter,
#[allow(dead_code)] expected_items: usize,
}
impl FileSetBloom {
pub fn new(expected_items: usize) -> Self {
let bloom = BloomFilter::with_false_pos(0.01).expected_items(expected_items);
Self { bloom, expected_items }
}
pub fn insert(&mut self, path: &Path) {
let path_bytes = path.as_os_str().as_encoded_bytes();
self.bloom.insert(path_bytes);
}
pub fn contains(&self, path: &Path) -> bool {
let path_bytes = path.as_os_str().as_encoded_bytes();
self.bloom.contains(path_bytes)
}
#[allow(dead_code)] pub fn expected_items(&self) -> usize {
self.expected_items
}
#[allow(dead_code)] pub fn memory_usage(&self) -> usize {
(self.expected_items * 10) / 8
}
}
#[allow(dead_code)] pub struct BatchProcessor {
batch_size: usize,
}
#[allow(dead_code)] impl BatchProcessor {
pub fn new() -> Self {
Self { batch_size: 10_000 }
}
pub fn with_batch_size(batch_size: usize) -> Self {
Self { batch_size }
}
pub fn batch_size(&self) -> usize {
self.batch_size
}
}
impl Default for BatchProcessor {
fn default() -> Self {
Self::new()
}
}
pub struct StateCache {
cache_path: PathBuf,
}
impl StateCache {
#[allow(dead_code)]
pub fn new(cache_path: PathBuf) -> Self {
Self { cache_path }
}
#[allow(dead_code)]
pub fn cache_path(&self) -> &Path {
&self.cache_path
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
#[test]
fn test_bloom_filter_basic() {
let mut bloom = FileSetBloom::new(1000);
let path1 = PathBuf::from("/test/file1.txt");
let path2 = PathBuf::from("/test/file2.txt");
let path3 = PathBuf::from("/test/file3.txt");
bloom.insert(&path1);
bloom.insert(&path2);
assert!(bloom.contains(&path1));
assert!(bloom.contains(&path2));
assert!(!bloom.contains(&path3));
}
#[test]
fn test_bloom_filter_many_items() {
let mut bloom = FileSetBloom::new(10_000);
let paths: Vec<PathBuf> = (0..1000).map(|i| PathBuf::from(format!("/test/file{}.txt", i))).collect();
for path in &paths {
bloom.insert(path);
}
for path in &paths {
assert!(bloom.contains(path), "Should find inserted path: {:?}", path);
}
let non_existent = PathBuf::from("/test/nonexistent.txt");
let _result = bloom.contains(&non_existent);
}
#[test]
fn test_bloom_filter_memory_usage() {
let bloom = FileSetBloom::new(1_000_000);
let memory = bloom.memory_usage();
assert!(memory > 1_000_000, "Memory usage should be > 1MB");
assert!(memory < 2_000_000, "Memory usage should be < 2MB");
println!("Memory usage for 1M items: {} bytes ({:.2} MB)", memory, memory as f64 / 1_000_000.0);
}
#[test]
fn test_batch_processor_default() {
let processor = BatchProcessor::new();
assert_eq!(processor.batch_size(), 10_000);
}
#[test]
fn test_batch_processor_custom() {
let processor = BatchProcessor::with_batch_size(5_000);
assert_eq!(processor.batch_size(), 5_000);
}
#[test]
fn test_state_cache_creation() {
let cache = StateCache::new(PathBuf::from("/tmp/test.cache"));
assert_eq!(cache.cache_path(), Path::new("/tmp/test.cache"));
}
}