flash_fuzzy_core/
bitap.rs1use crate::bloom::{to_lower, BloomFilter};
6use crate::types::SearchMatch;
7use crate::MAX_PATTERN_LEN;
8
9pub struct BitapSearcher {
11 char_masks: [u32; 256],
13 pattern_len: usize,
15 pattern_bloom: BloomFilter,
17}
18
19impl BitapSearcher {
20 pub fn new(pattern: &[u8]) -> Self {
22 let len = pattern.len().min(MAX_PATTERN_LEN);
23 let mut char_masks = [0u32; 256];
24 let mut bloom_bits = 0u64;
25
26 for (i, &ch) in pattern.iter().take(len).enumerate() {
27 let lower = to_lower(ch);
28 let bit = 1u32 << i;
29
30 char_masks[lower as usize] |= bit;
32
33 if lower != ch {
35 char_masks[ch as usize] |= bit;
36 }
37
38 let bloom_idx = (lower & 0x3F) as u64;
40 bloom_bits |= 1u64 << bloom_idx;
41 }
42
43 Self {
44 char_masks,
45 pattern_len: len,
46 pattern_bloom: BloomFilter(bloom_bits),
47 }
48 }
49
50 #[inline]
52 pub fn pattern_len(&self) -> usize {
53 self.pattern_len
54 }
55
56 #[inline]
58 pub fn bloom(&self) -> BloomFilter {
59 self.pattern_bloom
60 }
61
62 pub fn search(&self, text: &[u8], max_errors: u32) -> Option<SearchMatch> {
65 if self.pattern_len == 0 || text.is_empty() {
66 return None;
67 }
68
69 let pattern_len = self.pattern_len as u32;
70
71 let effective_max_errors = if pattern_len <= 3 {
73 0 } else if pattern_len <= 5 {
75 max_errors.min(1) } else {
77 max_errors
78 };
79
80 let mut r = [0u32; MAX_PATTERN_LEN + 1];
82
83 let pattern_mask = (1u32 << self.pattern_len) - 1;
84 let match_bit = 1u32 << (self.pattern_len - 1);
85
86 let mut best_errors = effective_max_errors + 1;
87 let mut best_pos = 0usize;
88
89 for (pos, &ch) in text.iter().enumerate() {
90 let lower = to_lower(ch);
91 let char_mask = self.char_masks[lower as usize];
92
93 let mut old_r = r[0];
95
96 r[0] = ((r[0] << 1) | 1) & char_mask;
98
99 for k in 1..=(effective_max_errors as usize) {
101 let new_r = r[k];
102
103 r[k] = (((r[k] << 1) | 1) & char_mask) | (old_r << 1) | old_r | (r[k - 1] << 1); old_r = new_r;
114 }
115
116 for i in 0..=(effective_max_errors as usize) {
118 r[i] &= pattern_mask;
119 }
120
121 for k in 0..=effective_max_errors {
123 if (r[k as usize] & match_bit) != 0 {
124 if k < best_errors {
125 best_errors = k;
126 best_pos = pos + 1;
127 }
128 break;
129 }
130 }
131 }
132
133 if best_errors <= effective_max_errors {
134 Some(SearchMatch {
135 errors: best_errors,
136 end_pos: best_pos,
137 })
138 } else {
139 None
140 }
141 }
142}
143
144pub fn compute_score(errors: u32, pattern_len: u32, end_pos: usize) -> u16 {
150 let base = 1000u32.saturating_sub(errors * 250);
151
152 let start_pos = end_pos.saturating_sub(pattern_len as usize);
153
154 let pos_bonus = if start_pos == 0 {
155 50
156 } else if start_pos < 10 {
157 25
158 } else {
159 0
160 };
161
162 let score = base + pos_bonus;
163 if score > 1000 { 1000 } else { score as u16 }
164}
165
166#[cfg(test)]
167mod tests {
168 use super::*;
169
170 #[test]
171 fn test_exact_match() {
172 let searcher = BitapSearcher::new(b"hello");
173 let result = searcher.search(b"hello world", 0);
174
175 assert!(result.is_some());
176 let m = result.unwrap();
177 assert_eq!(m.errors, 0);
178 assert_eq!(m.end_pos, 5);
179 }
180
181 #[test]
182 fn test_fuzzy_match() {
183 let searcher = BitapSearcher::new(b"hello");
184 let result = searcher.search(b"hallo world", 2);
185
186 assert!(result.is_some());
187 let m = result.unwrap();
188 assert_eq!(m.errors, 1); }
190
191 #[test]
192 fn test_no_match() {
193 let searcher = BitapSearcher::new(b"xyz");
194 let result = searcher.search(b"hello world", 0);
195
196 assert!(result.is_none());
197 }
198
199 #[test]
200 fn test_case_insensitive() {
201 let searcher = BitapSearcher::new(b"HELLO");
202 let result = searcher.search(b"hello world", 0);
203
204 assert!(result.is_some());
205 assert_eq!(result.unwrap().errors, 0);
206 }
207
208 #[test]
209 fn test_score_computation() {
210 assert_eq!(compute_score(0, 5, 5), 1000);
212
213 assert_eq!(compute_score(1, 5, 5), 800); assert_eq!(compute_score(0, 5, 15), 1000); }
219}