1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#![allow(
clippy::cast_possible_truncation,
clippy::cast_sign_loss,
clippy::unreadable_literal,
clippy::panic,
clippy::manual_let_else
)]
use simdsieve::SimdSieve;
#[test]
fn test_adversarial_prefix_aliasing_and_overlap() {
// This is the classic Aho-Corasick nightmare state.
// If the patterns themselves overlap and alias each other, the math must not double-yield
// the same offset, and must find all exact starting boundaries across different length targets.
// We search for: "A", "AA", "AAA", "AAAA"
// Inside pure "AAAAAAAAAA"
let haystack = b"AAAAAAAAAA".to_vec();
let patterns: Vec<&[u8]> = vec![b"A", b"AA", b"AAA", b"AAAA"];
let sieve = SimdSieve::new(&haystack, &patterns).unwrap();
let mut results: Vec<usize> = sieve.collect();
// `simdsieve` aims to find if ANY pattern starts at an index.
// Because "A", "AA", "AAA", "AAAA" all start at index 0..10,
// the underlying OR mask should yield exactly 0, 1, 2, 3, 4, 5, 6, 7, 8, 9
// exactly ONCE each, despite 4 different patterns triggering simultaneously.
results.sort_unstable();
let expected: Vec<usize> = (0..10).collect();
assert_eq!(
results, expected,
"Prefix aliasing failed to merge simultaneous identical-offset matches without duplicates."
);
}
#[test]
fn test_adversarial_case_insensitive_aliasing() {
// A mixture of overlapping cases
let haystack = b"xAaAAx".to_vec();
let patterns: Vec<&[u8]> = vec![b"Aa", b"aA"];
let sieve = SimdSieve::new_case_insensitive(&haystack, &patterns).unwrap();
let mut results: Vec<usize> = sieve.collect();
results.sort_unstable();
// Indices:
// 0: x
// 1: A
// 2: a
// 3: A
// 4: A
// 5: x
// "AA" case insensitive matches expected at 1, 2, 3.
assert_eq!(results, vec![1, 2, 3]);
}