1#![allow(clippy::needless_range_loop)]
7
8use crate::engine::damlev::{DamLevMatch, EditLimits};
9
10pub struct FuzzyNfa {
12 pattern: Vec<char>,
13 pattern_len: usize,
14 edit_limits: EditLimits,
15 case_insensitive: bool,
16 first_char: char,
17}
18
19impl FuzzyNfa {
20 #[must_use]
22 pub fn new(pattern: &str, edit_limits: EditLimits, case_insensitive: bool) -> Option<Self> {
23 let pattern: Vec<char> = if case_insensitive {
24 pattern.to_lowercase().chars().collect()
25 } else {
26 pattern.chars().collect()
27 };
28 let pattern_len = pattern.len();
29
30 if pattern_len == 0 || pattern_len > 63 {
31 return None;
32 }
33
34 let first_char = pattern[0];
35
36 Some(FuzzyNfa {
37 pattern,
38 pattern_len,
39 edit_limits,
40 case_insensitive,
41 first_char,
42 })
43 }
44
45 #[inline]
47 #[must_use]
48 pub fn find_first(&self, text: &str, threshold: f32) -> Option<DamLevMatch> {
49 let max_edits = self.edit_limits.max_edits as usize;
50
51 if self.pattern_len == 0 {
52 return Some(DamLevMatch {
53 start: 0,
54 end: 0,
55 insertions: 0,
56 deletions: 0,
57 substitutions: 0,
58 swaps: 0,
59 similarity: 1.0,
60 });
61 }
62
63 let text_chars: Vec<char> = if self.case_insensitive {
64 text.chars()
65 .map(|c| c.to_lowercase().next().unwrap_or(c))
66 .collect()
67 } else {
68 text.chars().collect()
69 };
70 let text_len = text_chars.len();
71
72 let mut char_to_byte: Vec<usize> = vec![0; text_len + 1];
73 let mut byte_pos = 0;
74 for (i, c) in text.char_indices() {
75 char_to_byte[i] = byte_pos;
76 byte_pos += c.len_utf8();
77 }
78 char_to_byte[text_len] = byte_pos;
79
80 if text_len == 0 {
81 if self.pattern_len <= max_edits {
82 let sim = 1.0 - (self.pattern_len as f32 / (self.pattern_len + max_edits) as f32);
83 return Some(DamLevMatch {
84 start: 0,
85 end: 0,
86 insertions: 0,
87 deletions: self.pattern_len as u8,
88 substitutions: 0,
89 swaps: 0,
90 similarity: sim,
91 });
92 }
93 return None;
94 }
95
96 let m = self.pattern_len;
97
98 let mut states: [(usize, usize, usize, u8, u8, u8); 128] = [(0, 0, 0, 0, 0, 0); 128];
99 let mut state_count = 0;
100 let mut seen: u128 = 0;
101
102 let encode_key =
103 |pat_pos: usize, edits: usize| -> u128 { ((pat_pos as u128) << 6) | (edits as u128) };
104
105 let mut pos = 0;
106 while pos < text_len {
107 let text_char = text_chars[pos];
108 let mut new_states: [(usize, usize, usize, u8, u8, u8); 128] =
109 [(0, 0, 0, 0, 0, 0); 128];
110 let mut new_state_count = 0;
111 let mut new_seen: u128 = 0;
112
113 let initial_key = encode_key(0, 0);
114 if seen & (1u128 << initial_key) == 0 {
115 states[0] = (0, 0, pos, 0, 0, 0);
116 state_count = 1;
117 }
118
119 for i in 0..state_count {
120 let (pat_pos, edits, start_pos, ins, del, sub) = states[i];
121
122 if pat_pos >= m || edits > max_edits {
123 continue;
124 }
125
126 let pat_char = self.pattern[pat_pos];
127
128 if text_char == pat_char {
129 let key = encode_key(pat_pos + 1, edits);
130 if new_seen & (1u128 << key) == 0 {
131 new_states[new_state_count] =
132 (pat_pos + 1, edits, start_pos, ins, del, sub);
133 new_state_count += 1;
134 new_seen |= 1u128 << key;
135 }
136 }
137
138 if edits < max_edits && text_char != pat_char {
139 let key = encode_key(pat_pos + 1, edits + 1);
140 if new_seen & (1u128 << key) == 0 {
141 new_states[new_state_count] =
142 (pat_pos + 1, edits + 1, start_pos, ins, del, sub + 1);
143 new_state_count += 1;
144 new_seen |= 1u128 << key;
145 }
146 }
147
148 if edits < max_edits {
149 let key = encode_key(pat_pos, edits + 1);
150 if new_seen & (1u128 << key) == 0 {
151 new_states[new_state_count] =
152 (pat_pos, edits + 1, start_pos, ins + 1, del, sub);
153 new_state_count += 1;
154 new_seen |= 1u128 << key;
155 }
156 }
157
158 if pat_pos + 1 < m && edits < max_edits {
159 let key = encode_key(pat_pos + 1, edits + 1);
160 if new_seen & (1u128 << key) == 0 {
161 new_states[new_state_count] =
162 (pat_pos + 1, edits + 1, start_pos, ins, del + 1, sub);
163 new_state_count += 1;
164 new_seen |= 1u128 << key;
165 }
166 }
167 }
168
169 for i in 0..new_state_count {
170 let (pat_pos, edits, start_pos, ins, del, sub) = new_states[i];
171 if pat_pos >= m {
172 let sim = 1.0 - (edits as f32 / (m + max_edits) as f32);
173 if sim >= threshold {
174 let match_end = pos + 1;
175 let byte_start = char_to_byte[start_pos];
176 let byte_end = char_to_byte[match_end];
177 return Some(DamLevMatch {
178 start: byte_start,
179 end: byte_end,
180 insertions: ins,
181 deletions: del,
182 substitutions: sub,
183 swaps: 0,
184 similarity: sim,
185 });
186 }
187 }
188 }
189
190 states = new_states;
191 state_count = new_state_count;
192 seen = new_seen;
193
194 if state_count == 0 {
195 pos += 1;
196 while pos < text_len && text_chars[pos] != self.first_char {
197 pos += 1;
198 }
199 } else {
200 pos += 1;
201 }
202 }
203
204 for i in 0..state_count {
205 let (pat_pos, edits, start_pos, ins, del, sub) = states[i];
206 if pat_pos >= m {
207 let sim = 1.0 - (edits as f32 / (m + max_edits) as f32);
208 if sim >= threshold {
209 let byte_start = char_to_byte[start_pos];
210 let byte_end = char_to_byte[text_len];
211 return Some(DamLevMatch {
212 start: byte_start,
213 end: byte_end,
214 insertions: ins,
215 deletions: del,
216 substitutions: sub,
217 swaps: 0,
218 similarity: sim,
219 });
220 }
221 }
222
223 let remaining = m - pat_pos;
224 let new_edits = edits + remaining;
225 if new_edits <= max_edits {
226 let new_del = del + remaining as u8;
227 let sim = 1.0 - (new_edits as f32 / (m + max_edits) as f32);
228 if sim >= threshold {
229 let byte_start = char_to_byte[start_pos];
230 let byte_end = char_to_byte[text_len];
231 return Some(DamLevMatch {
232 start: byte_start,
233 end: byte_end,
234 insertions: ins,
235 deletions: new_del,
236 substitutions: sub,
237 swaps: 0,
238 similarity: sim,
239 });
240 }
241 }
242 }
243
244 None
245 }
246}