cuckoofilter_mmap/
lib.rs

1//! A disk-based Cuckoo Filter implementation using memory-mapped files.
2//!
3//! This library provides a Cuckoo Filter that stores its data on disk, making it suitable for
4//! large datasets that may not fit into RAM. It uses `memmap2` for efficient file I/O.
5//!
6//! # Features
7//!
8//! - `insert`: Add an item to the filter.
9//! - `contains`: Check if an item is in the filter.
10//! - `delete`: Remove an item from the filter.
11//! - `builder`: Create a new, empty filter backed by a file with custom parameters.
12//! - `open`: Open an existing filter from a file.
13//!
14//! # Example
15//!
16//! ```rust,no_run
17//! use cuckoofilter_mmap::{CuckooFilter, FlushMode};
18//! use std::fs;
19//!
20//! fn main() -> Result<(), Box<dyn std::error::Error>> {
21//!     let path = "my_filter.db";
22//!     let capacity = 1_000_000;
23//!
24//!     // Create a new filter using the builder
25//!     let mut filter = CuckooFilter::<str>::builder(capacity)
26//!         .fingerprint_size(2)
27//!         .bucket_size(4)
28//!         .flush_mode(FlushMode::None)
29//!         .build(path)?;
30//!
31//!     // Insert an item
32//!     let item = "hello world";
33//!     if filter.insert(item)? {
34//!         println!("Item inserted successfully.");
35//!     }
36//!
37//!     // Check for the item
38//!     assert!(filter.contains(item));
39//!
40//!     // Delete the item
41//!     assert!(filter.delete(item)?);
42//!     assert!(!filter.contains(item));
43//!
44//!     // Clean up the file
45//!     fs::remove_file(path)?;
46//!
47//!     Ok(())
48//! }
49//! ```
50
51use log::{debug, trace};
52use memmap2::{MmapMut, MmapOptions};
53use rand::Rng;
54use std::fs::OpenOptions;
55use std::hash::{Hash, Hasher};
56use std::io::{self, Cursor};
57use std::marker::PhantomData;
58use std::path::Path;
59use std::str::FromStr;
60use thiserror::Error;
61
62// Metadata header at the start of the file.
63const METADATA_SIZE: usize = 4; // 2 bytes for fingerprint_size, 2 for bucket_size
64
65/// Custom error types for the Cuckoo Filter.
66#[derive(Error, Debug)]
67pub enum CuckooError {
68    #[error("I/O error")]
69    Io(#[from] io::Error),
70    #[error("Filter is full")]
71    Full,
72    #[error("Item not found")]
73    NotFound,
74    #[error("Invalid file format or metadata")]
75    InvalidFileFormat,
76    #[error("Invalid parameter: {0}")]
77    InvalidParameter(String),
78}
79
80/// A hashable type that can be used with the Cuckoo Filter.
81pub trait Item: Hash {}
82impl<T: Hash + ?Sized> Item for T {}
83
84/// The main Cuckoo Filter structure.
85pub struct CuckooFilter<T: ?Sized> {
86    mmap: MmapMut,
87    num_buckets: usize,
88    fingerprint_size: usize,
89    bucket_size: usize,
90    max_kicks: usize,
91    flush_mode: FlushMode,
92    op_count: usize,
93    _phantom: PhantomData<T>,
94}
95
96/// Defines the flushing strategy for the Cuckoo Filter.
97#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
98pub enum FlushMode {
99    #[default]
100    None,
101    Always,
102    AfterNOperations(usize),
103    AlwaysAsync,
104    AfterNOperationsAsync(usize),
105}
106
107impl FromStr for FlushMode {
108    type Err = CuckooError;
109
110    fn from_str(s: &str) -> Result<Self, Self::Err> {
111        match s.to_lowercase().as_str() {
112            "none" => Ok(FlushMode::None),
113            "always" => Ok(FlushMode::Always),
114            "always_async" => Ok(FlushMode::AlwaysAsync),
115            s if s.starts_with("after:") => {
116                let num_str = s.trim_start_matches("after:");
117                if let Ok(n) = num_str.parse::<usize>() {
118                    Ok(FlushMode::AfterNOperations(n))
119                } else {
120                    Err(CuckooError::InvalidParameter(
121                        "Invalid number for AfterNOperations".to_string(),
122                    ))
123                }
124            }
125            s if s.starts_with("after_async:") => {
126                let num_str = s.trim_start_matches("after_async:");
127                if let Ok(n) = num_str.parse::<usize>() {
128                    Ok(FlushMode::AfterNOperationsAsync(n))
129                } else {
130                    Err(CuckooError::InvalidParameter(
131                        "Invalid number for AfterNOperationsAsync".to_string(),
132                    ))
133                }
134            }
135            _ => Err(CuckooError::InvalidParameter(
136                "Invalid FlushMode string".to_string(),
137            )),
138        }
139    }
140}
141
142/// Represents the location and fingerprint of an item.
143struct Location {
144    bucket_index: usize,
145    fingerprint: Vec<u8>,
146}
147
148/// A builder for creating `CuckooFilter` instances with custom parameters.
149pub struct CuckooFilterBuilder {
150    capacity: usize,
151    fingerprint_size: usize,
152    bucket_size: usize,
153    max_kicks: usize,
154    flush_mode: FlushMode,
155    populate: bool,
156}
157
158impl CuckooFilterBuilder {
159    /// Creates a new builder with a given capacity and default parameters.
160    pub fn new(capacity: usize) -> Self {
161        Self {
162            capacity,
163            fingerprint_size: 2,
164            bucket_size: 4,
165            max_kicks: 500,
166            flush_mode: FlushMode::None,
167            populate: true,
168        }
169    }
170
171    /// Sets the size of the fingerprint in bytes.
172    pub fn fingerprint_size(mut self, size: usize) -> Self {
173        self.fingerprint_size = size;
174        self
175    }
176
177    /// Sets the number of fingerprints per bucket.
178    pub fn bucket_size(mut self, size: usize) -> Self {
179        self.bucket_size = size;
180        self
181    }
182
183    /// Sets the maximum number of displacements before giving up on an insert.
184    pub fn max_kicks(mut self, kicks: usize) -> Self {
185        self.max_kicks = kicks;
186        self
187    }
188
189    /// Sets the flush mode for the filter.
190    pub fn flush_mode(mut self, mode: FlushMode) -> Self {
191        self.flush_mode = mode;
192        self
193    }
194
195    /// Sets whether to pre-populate the memory map.
196    pub fn populate(mut self, populate: bool) -> Self {
197        self.populate = populate;
198        self
199    }
200
201    /// Builds the Cuckoo Filter, creating the backing file.
202    pub fn build<P: AsRef<Path>, T: Item + ?Sized>(
203        self,
204        path: P,
205    ) -> Result<CuckooFilter<T>, CuckooError> {
206        if self.bucket_size == 0 {
207            return Err(CuckooError::InvalidParameter(
208                "Bucket size cannot be zero".to_string(),
209            ));
210        }
211        if self.fingerprint_size == 0 {
212            return Err(CuckooError::InvalidParameter(
213                "Fingerprint size cannot be zero".to_string(),
214            ));
215        }
216        if self.fingerprint_size > 4 {
217            return Err(CuckooError::InvalidParameter(
218                "Fingerprint size cannot be greater than 4".to_string(),
219            ));
220        }
221
222        let mut num_buckets = (self.capacity as f64 / self.bucket_size as f64).ceil() as usize;
223        if num_buckets == 0 {
224            num_buckets = 1;
225        }
226        let num_buckets = num_buckets.next_power_of_two();
227        let data_size = num_buckets * self.bucket_size * self.fingerprint_size;
228        let file_size = METADATA_SIZE + data_size;
229
230        let file = OpenOptions::new()
231            .read(true)
232            .write(true)
233            .create(true)
234            .truncate(true)
235            .open(path)?;
236
237        file.set_len(file_size as u64)?;
238
239        let mut mmap_options = MmapOptions::new();
240        if self.populate {
241            mmap_options.populate();
242        }
243        let mut mmap = unsafe { mmap_options.map_mut(&file)? };
244
245        // Write metadata
246        mmap[0..2].copy_from_slice(&(self.fingerprint_size as u16).to_be_bytes());
247        mmap[2..4].copy_from_slice(&(self.bucket_size as u16).to_be_bytes());
248
249        Ok(CuckooFilter {
250            mmap,
251            num_buckets,
252            fingerprint_size: self.fingerprint_size,
253            bucket_size: self.bucket_size,
254            max_kicks: self.max_kicks,
255            flush_mode: self.flush_mode,
256            op_count: 0,
257            _phantom: PhantomData,
258        })
259    }
260}
261
262impl<T: ?Sized> CuckooFilter<T> {
263    /// Returns a builder for creating a `CuckooFilter`.
264    pub fn builder(capacity: usize) -> CuckooFilterBuilder {
265        CuckooFilterBuilder::new(capacity)
266    }
267}
268
269impl<T: Item + ?Sized> CuckooFilter<T> {
270    /// Opens an existing Cuckoo Filter from a file.
271    pub fn open<P: AsRef<Path>>(
272        path: P,
273        flush_mode: FlushMode,
274        max_kicks: usize,
275        populate: bool,
276    ) -> Result<Self, CuckooError> {
277        let file = OpenOptions::new().read(true).write(true).open(path)?;
278        let metadata = file.metadata()?;
279        let file_size = metadata.len() as usize;
280
281        if file_size < METADATA_SIZE {
282            return Err(CuckooError::InvalidFileFormat);
283        }
284
285        let mut mmap_options = MmapOptions::new();
286        if populate {
287            mmap_options.populate();
288        }
289        let mmap = unsafe { mmap_options.map_mut(&file)? };
290
291        let fingerprint_size = u16::from_be_bytes(mmap[0..2].try_into().unwrap()) as usize;
292        let bucket_size = u16::from_be_bytes(mmap[2..4].try_into().unwrap()) as usize;
293
294        if bucket_size == 0 || fingerprint_size == 0 {
295            return Err(CuckooError::InvalidFileFormat);
296        }
297
298        let data_size = file_size - METADATA_SIZE;
299        if data_size % (bucket_size * fingerprint_size) != 0 {
300            return Err(CuckooError::InvalidFileFormat);
301        }
302
303        let num_buckets = data_size / (bucket_size * fingerprint_size);
304
305        Ok(Self {
306            mmap,
307            num_buckets,
308            fingerprint_size,
309            bucket_size,
310            max_kicks,
311            flush_mode,
312            op_count: 0,
313            _phantom: PhantomData,
314        })
315    }
316
317    pub fn insert(&mut self, item: &T) -> Result<bool, CuckooError> {
318        let mut loc = self.location_for(item);
319
320        if self.contains_fingerprint(loc.bucket_index, &loc.fingerprint) {
321            trace!("Item already exists, not inserting.");
322            return Ok(false);
323        }
324
325        let inserted = if self.write_to_bucket(loc.bucket_index, &loc.fingerprint) {
326            trace!("Wrote fingerprint to initial bucket.");
327            true
328        } else {
329            let alt_index = self.alt_index(loc.bucket_index, &loc.fingerprint);
330            if self.write_to_bucket(alt_index, &loc.fingerprint) {
331                trace!("Wrote fingerprint to alternate bucket.");
332                true
333            } else {
334                let mut rng = rand::thread_rng();
335                let mut current_index = if rng.gen() {
336                    alt_index
337                } else {
338                    loc.bucket_index
339                };
340
341                for _ in 0..self.max_kicks {
342                    let evicted_fp = self.swap_fingerprint(current_index, &loc.fingerprint);
343                    loc.fingerprint = evicted_fp;
344                    current_index = self.alt_index(current_index, &loc.fingerprint);
345
346                    if self.write_to_bucket(current_index, &loc.fingerprint) {
347                        debug!("Successfully inserted item after kicking.");
348                        return Ok(true);
349                    }
350                }
351                false
352            }
353        };
354
355        if inserted {
356            self.handle_flush_on_op()?;
357            Ok(true)
358        } else {
359            Err(CuckooError::Full)
360        }
361    }
362
363    pub fn contains(&self, item: &T) -> bool {
364        let loc = self.location_for(item);
365        let alt_index = self.alt_index(loc.bucket_index, &loc.fingerprint);
366
367        self.contains_fingerprint(loc.bucket_index, &loc.fingerprint)
368            || self.contains_fingerprint(alt_index, &loc.fingerprint)
369    }
370
371    pub fn delete(&mut self, item: &T) -> Result<bool, CuckooError> {
372        let loc = self.location_for(item);
373        let alt_index = self.alt_index(loc.bucket_index, &loc.fingerprint);
374
375        let deleted = if self.delete_from_bucket(loc.bucket_index, &loc.fingerprint) {
376            true
377        } else {
378            self.delete_from_bucket(alt_index, &loc.fingerprint)
379        };
380
381        if deleted {
382            self.handle_flush_on_op()?;
383            Ok(true)
384        } else {
385            Ok(false)
386        }
387    }
388
389    pub fn flush(&self) -> io::Result<()> {
390        self.mmap.flush()
391    }
392
393    pub fn flush_async(&self) -> io::Result<()> {
394        self.mmap.flush_async()
395    }
396
397    pub fn num_buckets(&self) -> usize {
398        self.num_buckets
399    }
400
401    pub fn capacity(&self) -> usize {
402        self.num_buckets * self.bucket_size
403    }
404
405    fn location_for(&self, item: &T) -> Location {
406        let mut hasher = Murmur3Hasher::new();
407        item.hash(&mut hasher);
408        let hash_value = hasher.finish();
409
410        let bucket_index = (hash_value % self.num_buckets as u64) as usize;
411        let fingerprint_bytes = hash_value.to_be_bytes();
412        let mut fingerprint = vec![0u8; self.fingerprint_size];
413        fingerprint.copy_from_slice(&fingerprint_bytes[0..self.fingerprint_size]);
414
415        if fingerprint.iter().all(|&b| b == 0) {
416            fingerprint[self.fingerprint_size - 1] = 1;
417        }
418
419        Location {
420            bucket_index,
421            fingerprint,
422        }
423    }
424
425    fn alt_index(&self, bucket_index: usize, fingerprint: &[u8]) -> usize {
426        let mut hasher = Murmur3Hasher::new();
427        hasher.write(fingerprint);
428        let fp_hash = hasher.finish();
429        (bucket_index ^ (fp_hash % self.num_buckets as u64) as usize) % self.num_buckets
430    }
431
432    fn get_bucket_offset(&self, bucket_index: usize) -> usize {
433        METADATA_SIZE + (bucket_index * self.bucket_size * self.fingerprint_size)
434    }
435
436    fn get_fingerprint_offset(&self, bucket_index: usize, slot_index: usize) -> usize {
437        self.get_bucket_offset(bucket_index) + (slot_index * self.fingerprint_size)
438    }
439
440    fn get_fingerprint(&self, bucket_index: usize, slot_index: usize) -> &[u8] {
441        let offset = self.get_fingerprint_offset(bucket_index, slot_index);
442        &self.mmap[offset..offset + self.fingerprint_size]
443    }
444
445    fn set_fingerprint(&mut self, bucket_index: usize, slot_index: usize, fingerprint: &[u8]) {
446        let offset = self.get_fingerprint_offset(bucket_index, slot_index);
447        self.mmap[offset..offset + self.fingerprint_size].copy_from_slice(fingerprint);
448    }
449
450    fn contains_fingerprint(&self, bucket_index: usize, fingerprint: &[u8]) -> bool {
451        for i in 0..self.bucket_size {
452            if self.get_fingerprint(bucket_index, i) == fingerprint {
453                return true;
454            }
455        }
456        false
457    }
458
459    fn write_to_bucket(&mut self, bucket_index: usize, fingerprint: &[u8]) -> bool {
460        for i in 0..self.bucket_size {
461            if self
462                .get_fingerprint(bucket_index, i)
463                .iter()
464                .all(|&b| b == 0)
465            {
466                self.set_fingerprint(bucket_index, i, fingerprint);
467                return true;
468            }
469        }
470        false
471    }
472
473    fn delete_from_bucket(&mut self, bucket_index: usize, fingerprint: &[u8]) -> bool {
474        for i in 0..self.bucket_size {
475            if self.get_fingerprint(bucket_index, i) == fingerprint {
476                let zeros = vec![0u8; self.fingerprint_size];
477                self.set_fingerprint(bucket_index, i, &zeros);
478                return true;
479            }
480        }
481        false
482    }
483
484    fn swap_fingerprint(&mut self, bucket_index: usize, new_fingerprint: &[u8]) -> Vec<u8> {
485        let mut rng = rand::thread_rng();
486        let slot_to_evict = rng.gen_range(0..self.bucket_size);
487        let evicted_fp = self.get_fingerprint(bucket_index, slot_to_evict).to_vec();
488        self.set_fingerprint(bucket_index, slot_to_evict, new_fingerprint);
489        evicted_fp
490    }
491
492    fn handle_flush_on_op(&mut self) -> Result<(), CuckooError> {
493        match self.flush_mode {
494            FlushMode::None => Ok(()),
495            FlushMode::Always => {
496                self.mmap.flush()?;
497                Ok(())
498            }
499            FlushMode::AfterNOperations(n) => {
500                self.op_count += 1;
501                if self.op_count >= n {
502                    self.mmap.flush()?;
503                    self.op_count = 0;
504                }
505                Ok(())
506            }
507            FlushMode::AlwaysAsync => {
508                self.mmap.flush_async()?;
509                Ok(())
510            }
511            FlushMode::AfterNOperationsAsync(n) => {
512                self.op_count += 1;
513                if self.op_count >= n {
514                    self.mmap.flush_async()?;
515                    self.op_count = 0;
516                }
517                Ok(())
518            }
519        }
520    }
521}
522
523struct Murmur3Hasher(u128);
524
525impl Murmur3Hasher {
526    fn new() -> Self {
527        Murmur3Hasher(0)
528    }
529}
530
531impl Hasher for Murmur3Hasher {
532    fn finish(&self) -> u64 {
533        (self.0 >> 64) as u64 ^ (self.0 & 0xFFFFFFFFFFFFFFFF) as u64
534    }
535
536    fn write(&mut self, bytes: &[u8]) {
537        let mut cursor = Cursor::new(bytes);
538        self.0 = murmur3::murmur3_x64_128(&mut cursor, self.0 as u32).unwrap();
539    }
540}
541
542use std::sync::{Arc, RwLock};
543
544/// A thread-safe, concurrent version of the Cuckoo Filter.
545pub struct ConcurrentCuckooFilter<T: ?Sized> {
546    inner: Arc<RwLock<CuckooFilter<T>>>,
547}
548
549impl<T: Item + ?Sized> ConcurrentCuckooFilter<T> {
550    /// Creates a new thread-safe Cuckoo Filter from an existing filter.
551    pub fn new(filter: CuckooFilter<T>) -> Self {
552        Self {
553            inner: Arc::new(RwLock::new(filter)),
554        }
555    }
556
557    /// Opens an existing thread-safe Cuckoo Filter from a file.
558    pub fn open<P: AsRef<Path>>(
559        path: P,
560        flush_mode: FlushMode,
561        max_kicks: usize,
562        populate: bool,
563    ) -> Result<Self, CuckooError> {
564        let filter = CuckooFilter::open(path, flush_mode, max_kicks, populate)?;
565        Ok(Self {
566            inner: Arc::new(RwLock::new(filter)),
567        })
568    }
569
570    pub fn insert(&self, item: &T) -> Result<bool, CuckooError> {
571        self.inner.write().unwrap().insert(item)
572    }
573
574    pub fn contains(&self, item: &T) -> bool {
575        self.inner.read().unwrap().contains(item)
576    }
577
578    pub fn delete(&self, item: &T) -> Result<bool, CuckooError> {
579        self.inner.write().unwrap().delete(item)
580    }
581
582    pub fn flush(&self) -> io::Result<()> {
583        self.inner.read().unwrap().flush()
584    }
585
586    pub fn flush_async(&self) -> io::Result<()> {
587        self.inner.read().unwrap().flush_async()
588    }
589}
590
591impl<T: ?Sized> Clone for ConcurrentCuckooFilter<T> {
592    fn clone(&self) -> Self {
593        Self {
594            inner: self.inner.clone(),
595        }
596    }
597}
598
599#[cfg(test)]
600mod concurrent_tests {
601    use super::*;
602    use std::fs;
603    use std::thread;
604
605    fn get_test_file_path(name: &str) -> String {
606        format!("/tmp/cuckoo_concurrent_{}.db", name)
607    }
608
609    #[test]
610    fn test_capacity_file_size() {
611        let path = get_test_file_path("capacity_file_size");
612        let _ = fs::remove_file(&path);
613        let _filter: CuckooFilter<String> = CuckooFilter::<String>::builder(50000000)
614            .build(&path)
615            .unwrap();
616    }
617    #[test]
618    fn test_concurrent_insert_and_contains() {
619        let path = get_test_file_path("insert_contains");
620        let _ = fs::remove_file(&path);
621        let filter: CuckooFilter<String> = CuckooFilter::<String>::builder(100_000)
622            .build(&path)
623            .unwrap();
624        let filter = ConcurrentCuckooFilter {
625            inner: Arc::new(RwLock::new(filter)),
626        };
627
628        let items_to_insert: Vec<String> = (0..1000).map(|i| format!("item-{}", i)).collect();
629
630        let handles: Vec<_> = items_to_insert
631            .chunks(100)
632            .map(|chunk| {
633                let filter_clone = filter.clone();
634                let chunk_clone: Vec<String> = chunk.iter().map(|s| s.to_string()).collect();
635                thread::spawn(move || {
636                    for item in &chunk_clone {
637                        filter_clone.insert(item).unwrap();
638                    }
639                })
640            })
641            .collect();
642
643        for handle in handles {
644            handle.join().unwrap();
645        }
646
647        for item in &items_to_insert {
648            assert!(filter.contains(item));
649        }
650
651        assert!(!filter.contains(&"not-an-item".to_string()));
652
653        fs::remove_file(path).unwrap();
654    }
655
656    #[test]
657    fn test_concurrent_read_and_write() {
658        let path = get_test_file_path("read_write");
659        let _ = fs::remove_file(&path);
660        let filter: CuckooFilter<String> = CuckooFilter::<String>::builder(50_000)
661            .build(&path)
662            .unwrap();
663        let filter = ConcurrentCuckooFilter {
664            inner: Arc::new(RwLock::new(filter)),
665        };
666
667        for i in 0..500 {
668            filter.insert(&format!("pre-item-{}", i)).unwrap();
669        }
670
671        let mut handles = vec![];
672
673        for i in 0..4 {
674            let filter_clone = filter.clone();
675            let handle = thread::spawn(move || {
676                for j in 0..200 {
677                    let item = format!("writer-{}-item-{}", i, j);
678                    filter_clone.insert(&item).unwrap();
679                }
680            });
681            handles.push(handle);
682        }
683
684        for _ in 0..4 {
685            let filter_clone = filter.clone();
686            let handle = thread::spawn(move || {
687                for i in 0..500 {
688                    assert!(filter_clone.contains(&format!("pre-item-{}", i)));
689                }
690            });
691            handles.push(handle);
692        }
693
694        for handle in handles {
695            handle.join().unwrap();
696        }
697
698        for i in 0..4 {
699            for j in 0..200 {
700                assert!(filter.contains(&format!("writer-{}-item-{}", i, j)));
701            }
702        }
703
704        fs::remove_file(path).unwrap();
705    }
706}
707
708#[cfg(test)]
709mod tests {
710    use super::*;
711    use std::fs;
712    use std::sync::Once;
713
714    static INIT: Once = Once::new();
715
716    fn setup() {
717        INIT.call_once(|| {
718            let _ = env_logger::builder().is_test(true).try_init();
719        });
720    }
721
722    fn get_test_file_path(name: &str) -> String {
723        format!("/tmp/cuckoo_{}.db", name)
724    }
725
726    #[test]
727    fn test_new_and_open() {
728        setup();
729        let path = get_test_file_path("new_and_open");
730        let _ = fs::remove_file(&path);
731
732        let capacity = 1000;
733        {
734            let filter: CuckooFilter<str> =
735                CuckooFilter::<str>::builder(capacity).build(&path).unwrap();
736            assert_eq!(filter.capacity(), 1024);
737            assert_eq!(filter.num_buckets(), 256);
738        }
739
740        {
741            let filter = CuckooFilter::<str>::open(&path, FlushMode::None, 500, true).unwrap();
742            assert_eq!(filter.capacity(), 1024);
743            assert_eq!(filter.num_buckets(), 256);
744        }
745
746        fs::remove_file(&path).unwrap();
747    }
748
749    #[test]
750    fn test_insert_and_contains() {
751        setup();
752        let path = get_test_file_path("insert_and_contains");
753        let _ = fs::remove_file(&path);
754
755        let mut filter = CuckooFilter::<str>::builder(100).build(&path).unwrap();
756
757        let item1 = "hello";
758        let item2 = "world";
759        let item3 = "rust";
760
761        assert!(filter.insert(item1).unwrap());
762        assert!(filter.insert(item2).unwrap());
763
764        assert!(filter.contains(item1));
765        assert!(filter.contains(item2));
766        assert!(!filter.contains(item3));
767
768        assert!(!filter.insert(item1).unwrap());
769
770        fs::remove_file(&path).unwrap();
771    }
772
773    #[test]
774    fn test_delete() {
775        setup();
776        let path = get_test_file_path("delete");
777        let _ = fs::remove_file(&path);
778
779        let mut filter = CuckooFilter::<str>::builder(100).build(&path).unwrap();
780
781        let item = "test_item";
782        filter.insert(item).unwrap();
783        assert!(filter.contains(item));
784
785        assert!(filter.delete(item).unwrap());
786        assert!(!filter.contains(item));
787
788        assert!(!filter.delete("non_existent").unwrap());
789    }
790
791    #[test]
792    fn test_persistence() {
793        setup();
794        let path = get_test_file_path("persistence");
795        let _ = fs::remove_file(&path);
796
797        let item1 = "persistent_data";
798        let item2 = "another_one";
799
800        {
801            let mut filter = CuckooFilter::<str>::builder(100).build(&path).unwrap();
802            filter.insert(item1).unwrap();
803            filter.insert(item2).unwrap();
804            assert!(filter.contains(item1));
805            filter.flush().unwrap();
806        }
807
808        {
809            let mut filter = CuckooFilter::<str>::open(&path, FlushMode::None, 500, true).unwrap();
810            assert!(filter.contains(item1));
811            assert!(filter.contains(item2));
812            assert!(!filter.contains("not_present"));
813
814            filter.delete(item1).unwrap();
815            filter.flush().unwrap();
816        }
817
818        {
819            let filter = CuckooFilter::<str>::open(&path, FlushMode::None, 500, true).unwrap();
820            assert!(!filter.contains(item1));
821            assert!(filter.contains(item2));
822        }
823
824        fs::remove_file(&path).unwrap();
825    }
826
827    #[test]
828    fn test_full_filter() {
829        setup();
830        let path = get_test_file_path("full_filter");
831        let _ = fs::remove_file(&path);
832
833        let capacity = 16;
834        let mut filter = CuckooFilter::<String>::builder(capacity)
835            .build(&path)
836            .unwrap();
837
838        let mut inserted_count = 0;
839        for i in 0..100 {
840            let item = format!("item-{}", i);
841            if let Ok(true) = filter.insert(&item) {
842                inserted_count += 1;
843            } else {
844                break;
845            }
846        }
847
848        let result = filter.insert(&"one_more_item".to_string());
849        assert!(matches!(result, Err(CuckooError::Full)));
850
851        println!("Filter filled up after {} insertions.", inserted_count);
852
853        fs::remove_file(&path).unwrap();
854    }
855
856    #[test]
857    fn test_zero_capacity() {
858        setup();
859        let path = get_test_file_path("zero_capacity");
860        let _ = fs::remove_file(&path);
861
862        let mut filter = CuckooFilter::<str>::builder(0).build(&path).unwrap();
863        assert_eq!(filter.capacity(), 4);
864        assert_eq!(filter.num_buckets(), 1);
865
866        assert!(filter.insert("a").unwrap());
867        assert!(filter.insert("b").unwrap());
868        assert!(filter.insert("c").unwrap());
869        assert!(filter.insert("d").unwrap());
870
871        assert!(filter.contains("a"));
872        assert!(filter.contains("d"));
873
874        assert!(matches!(filter.insert("e"), Err(CuckooError::Full)));
875
876        fs::remove_file(&path).unwrap();
877    }
878
879    #[test]
880    fn test_flush_after_n_operations() {
881        setup();
882        let path = get_test_file_path("flush_after_n");
883        let _ = fs::remove_file(&path);
884
885        let n = 5;
886        let mut filter = CuckooFilter::<String>::builder(100)
887            .flush_mode(FlushMode::AfterNOperations(n))
888            .build(&path)
889            .unwrap();
890
891        for i in 0..n - 1 {
892            filter.insert(&format!("item-{}", i)).unwrap();
893        }
894        assert_eq!(filter.op_count, n - 1);
895
896        filter.insert(&format!("item-{}", n - 1)).unwrap();
897        assert_eq!(filter.op_count, 0);
898
899        let reopened_filter =
900            CuckooFilter::<String>::open(&path, FlushMode::None, 500, true).unwrap();
901        for i in 0..n {
902            assert!(reopened_filter.contains(&format!("item-{}", i)));
903        }
904
905        fs::remove_file(&path).unwrap();
906    }
907
908    #[test]
909    fn test_flush_always_async() {
910        setup();
911        let path = get_test_file_path("flush_always_async");
912        let _ = fs::remove_file(&path);
913
914        let mut filter = CuckooFilter::<String>::builder(100)
915            .flush_mode(FlushMode::AlwaysAsync)
916            .build(&path)
917            .unwrap();
918
919        filter.insert(&"test_item".to_string()).unwrap();
920
921        // Verify data persists after async flush
922        let reopened_filter =
923            CuckooFilter::<String>::open(&path, FlushMode::None, 500, true).unwrap();
924        assert!(reopened_filter.contains(&"test_item".to_string()));
925
926        fs::remove_file(&path).unwrap();
927    }
928
929    #[test]
930    fn test_flush_after_n_operations_async() {
931        setup();
932        let path = get_test_file_path("flush_after_n_async");
933        let _ = fs::remove_file(&path);
934
935        let n = 3;
936        let mut filter = CuckooFilter::<String>::builder(100)
937            .flush_mode(FlushMode::AfterNOperationsAsync(n))
938            .build(&path)
939            .unwrap();
940
941        for i in 0..n - 1 {
942            filter.insert(&format!("item-{}", i)).unwrap();
943        }
944        assert_eq!(filter.op_count, n - 1);
945
946        filter.insert(&format!("item-{}", n - 1)).unwrap();
947        assert_eq!(filter.op_count, 0);
948
949        let reopened_filter =
950            CuckooFilter::<String>::open(&path, FlushMode::None, 500, true).unwrap();
951        for i in 0..n {
952            assert!(reopened_filter.contains(&format!("item-{}", i)));
953        }
954
955        fs::remove_file(&path).unwrap();
956    }
957
958    #[test]
959    fn test_flush_mode_from_str() {
960        assert_eq!(FlushMode::from_str("none").unwrap(), FlushMode::None);
961        assert_eq!(FlushMode::from_str("always").unwrap(), FlushMode::Always);
962        assert_eq!(
963            FlushMode::from_str("always_async").unwrap(),
964            FlushMode::AlwaysAsync
965        );
966        assert_eq!(
967            FlushMode::from_str("after:5").unwrap(),
968            FlushMode::AfterNOperations(5)
969        );
970        assert_eq!(
971            FlushMode::from_str("after_async:10").unwrap(),
972            FlushMode::AfterNOperationsAsync(10)
973        );
974
975        assert!(FlushMode::from_str("invalid").is_err());
976        assert!(FlushMode::from_str("after:invalid").is_err());
977        assert!(FlushMode::from_str("after_async:invalid").is_err());
978    }
979}