#![allow(clippy::cast_precision_loss, clippy::doc_markdown, clippy::explicit_iter_loop, clippy::uninlined_format_args, clippy::unreadable_literal)]
#![allow(clippy::expect_used, clippy::panic, clippy::unwrap_used)]
use flashsieve::{BlockIndexBuilder, ByteFilter, NgramBloom, NgramFilter};
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
#[test]
fn exhaustive_all_65536_ngrams_zero_false_negatives() {
let mut bloom = NgramBloom::new(65_536).unwrap();
for a in 0_u8..=255 {
for b in 0_u8..=255 {
bloom.insert_ngram(a, b);
}
}
for a in 0_u8..=255 {
for b in 0_u8..=255 {
assert!(
bloom.maybe_contains(a, b),
"false negative for ({a}, {b}) after exhaustive insert"
);
}
}
}
#[test]
fn patterns_at_exact_block_boundaries() {
let block_size = 256;
let pattern = b"boundary";
let mut data = vec![b'x'; block_size * 3];
let offset1 = block_size - 2;
let offset2 = block_size * 2 - 3;
data[offset1..offset1 + pattern.len()].copy_from_slice(pattern);
data[offset2..offset2 + pattern.len()].copy_from_slice(pattern);
let index = BlockIndexBuilder::new()
.block_size(block_size)
.bloom_bits(1024)
.build(&data)
.unwrap();
let bf = ByteFilter::from_patterns(&[pattern.as_slice()]);
let nf = NgramFilter::from_patterns(&[pattern.as_slice()]);
let candidates = index.candidate_blocks(&bf, &nf);
for &offset in &[offset1, offset2] {
let expected_start = (offset / block_size) * block_size;
assert!(
candidates.iter().any(|r| {
expected_start >= r.offset && expected_start < r.offset + r.length
}),
"pattern at offset {offset} not found in candidates {candidates:?}"
);
}
}
#[test]
fn worst_case_repetitive_data() {
let data = vec![0x41; 10_000];
let index = BlockIndexBuilder::new()
.block_size(256)
.bloom_bits(1024)
.build(&data)
.unwrap();
let bf = ByteFilter::from_patterns(&[b"AAAA".as_slice()]);
let nf = NgramFilter::from_patterns(&[b"AAAA".as_slice()]);
let candidates = index.candidate_blocks(&bf, &nf);
assert_eq!(candidates.len(), 1);
assert_eq!(candidates[0].length, index.total_data_length());
}
#[test]
fn random_garbage_never_panics() {
let mut rng = StdRng::seed_from_u64(0x6A6B_6167);
for _ in 0..1000 {
let len = rng.gen_range(0..4096);
let data: Vec<u8> = (0..len).map(|_| rng.gen()).collect();
let _ = BlockIndexBuilder::new()
.block_size(256)
.bloom_bits(1024)
.build(&data);
}
}
#[test]
fn random_serialized_garbage_never_panics() {
let mut rng = StdRng::seed_from_u64(0x6A6B_616A);
for _ in 0..1000 {
let len = rng.gen_range(0..2000);
let data: Vec<u8> = (0..len).map(|_| rng.gen()).collect();
let _ = flashsieve::BlockIndex::from_bytes(&data);
let _ = flashsieve::BlockIndex::from_bytes_checked(&data);
let _ = flashsieve::MmapBlockIndex::from_slice(&data);
}
}
#[test]
fn collision_resistance_shared_byte_set() {
let data = b"abcdefghijklmnopqrstuvwxyz";
let index = BlockIndexBuilder::new()
.block_size(256)
.bloom_bits(1024)
.build(data)
.unwrap();
let patterns: Vec<&[u8]> = vec![
b"abcde",
b"edcba",
b"aebcd",
b"badce",
];
let bf = ByteFilter::from_patterns(&patterns);
let nf = NgramFilter::from_patterns(&patterns);
let candidates = index.candidate_blocks(&bf, &nf);
assert!(!candidates.is_empty());
}
#[test]
fn large_block_size_end_to_end() {
let block_size = 65536;
let mut data = vec![b'x'; block_size * 2];
data[1000..1006].copy_from_slice(b"secret");
let index = BlockIndexBuilder::new()
.block_size(block_size)
.bloom_bits(8192)
.build(&data)
.unwrap();
let bf = ByteFilter::from_patterns(&[b"secret".as_slice()]);
let nf = NgramFilter::from_patterns(&[b"secret".as_slice()]);
let candidates = index.candidate_blocks(&bf, &nf);
assert!(!candidates.is_empty());
}