fuzzy_regex/engine/bitap.rs
1//! Bitap algorithm for fast fuzzy string matching.
2//!
3//! The Bitap algorithm (also known as shift-or or shift-and) uses bitwise
4//! operations to perform fuzzy matching very efficiently for short patterns
5//! (up to 64 characters).
6//!
7//! Time complexity: O(n × k) where n = text length, k = max edits
8//! Each step involves only a few bitwise operations.
9
10#![allow(
11 clippy::needless_range_loop,
12 clippy::items_after_statements,
13 clippy::too_many_lines,
14 clippy::inline_always
15)]
16
17use super::damlev::{DamLevMatch, EditLimits};
18use super::hash::FxHashMap;
19
20/// Fast UTF-8 character decoder - avoids `str::from_utf8` + `chars().next()` overhead.
21/// Returns (char, `byte_length`).
22///
23/// # Safety
24/// Assumes input is valid UTF-8. Invalid sequences return replacement char.
25#[inline(always)]
26fn decode_utf8_char_fast(bytes: &[u8], pos: usize) -> (char, usize) {
27 let b0 = bytes[pos];
28
29 if b0 < 128 {
30 // ASCII: single byte
31 (b0 as char, 1)
32 } else if b0 < 224 {
33 // 2-byte UTF-8 (Latin Extended, Cyrillic, etc.)
34 if pos + 1 < bytes.len() {
35 let b1 = bytes[pos + 1];
36 let codepoint = ((u32::from(b0) & 0x1F) << 6) | (u32::from(b1) & 0x3F);
37 // SAFETY: Valid 2-byte UTF-8 always produces valid codepoint in 0x80-0x7FF range
38 (unsafe { char::from_u32_unchecked(codepoint) }, 2)
39 } else {
40 ('\u{FFFD}', 1)
41 }
42 } else if b0 < 240 {
43 // 3-byte UTF-8 (CJK, etc.)
44 if pos + 2 < bytes.len() {
45 let b1 = bytes[pos + 1];
46 let b2 = bytes[pos + 2];
47 let codepoint = ((u32::from(b0) & 0x0F) << 12)
48 | ((u32::from(b1) & 0x3F) << 6)
49 | (u32::from(b2) & 0x3F);
50 // SAFETY: Valid 3-byte UTF-8 produces valid codepoint (excluding surrogates handled by validation)
51 (unsafe { char::from_u32_unchecked(codepoint) }, 3)
52 } else {
53 ('\u{FFFD}', 1)
54 }
55 } else {
56 // 4-byte UTF-8 (Emoji, etc.)
57 if pos + 3 < bytes.len() {
58 let b1 = bytes[pos + 1];
59 let b2 = bytes[pos + 2];
60 let b3 = bytes[pos + 3];
61 let codepoint = ((u32::from(b0) & 0x07) << 18)
62 | ((u32::from(b1) & 0x3F) << 12)
63 | ((u32::from(b2) & 0x3F) << 6)
64 | (u32::from(b3) & 0x3F);
65 // SAFETY: Valid 4-byte UTF-8 always produces valid codepoint
66 (unsafe { char::from_u32_unchecked(codepoint) }, 4)
67 } else {
68 ('\u{FFFD}', 1)
69 }
70 }
71}
72
73/// Maximum pattern length supported by Bitap (using u64 bitmasks).
74pub const MAX_PATTERN_LEN: usize = 64;
75
76/// Bitap matcher for fuzzy string matching.
77#[derive(Debug)]
78pub struct BitapMatcher {
79 pattern: String,
80 pattern_chars: Vec<char>,
81 pattern_len: usize,
82 limits: EditLimits,
83 case_insensitive: bool,
84 /// Character masks: for each character, a bitmask where bit i is 0
85 /// if pattern[i] == character.
86 char_masks: FxHashMap<char, u64>,
87 /// ASCII byte masks for O(1) lookup (all 1s = no match).
88 byte_masks: [u64; 128],
89 /// Boyer-Moore style skip table: how far to skip when a byte is NOT in pattern.
90 /// For bytes in pattern: 0 (can't skip). For bytes not in pattern: `pattern_len` - `max_edits`.
91 skip_table: [u8; 256],
92 /// Whether the pattern is pure ASCII.
93 is_ascii: bool,
94 /// Mask with 1 in the position of the last pattern character.
95 accept_mask: u64,
96 /// Unicode block masks for O(1) lookup of non-ASCII characters.
97 /// If all pattern chars are in the same 256-codepoint block, we use this instead of `HashMap`.
98 /// `block_base` is the start codepoint (e.g., 0x0400 for Cyrillic).
99 unicode_block_base: u32,
100 unicode_block_masks: Option<Box<[u64; 256]>>,
101}
102
103impl BitapMatcher {
104 /// Create a new Bitap matcher.
105 ///
106 /// Returns None if the pattern is too long (> 64 chars).
107 pub fn new(pattern: &str, limits: EditLimits, case_insensitive: bool) -> Option<Self> {
108 let pattern_chars: Vec<char> = if case_insensitive {
109 pattern.to_lowercase().chars().collect()
110 } else {
111 pattern.chars().collect()
112 };
113
114 if pattern_chars.len() > MAX_PATTERN_LEN || pattern_chars.is_empty() {
115 return None;
116 }
117
118 let pattern_len = pattern_chars.len();
119 let is_ascii = pattern_chars.iter().all(char::is_ascii);
120
121 // Build character masks
122 // For each character in the alphabet, create a bitmask where bit i is 0
123 // if pattern[i] matches the character, 1 otherwise.
124 // We use the "shift-or" variant where 0 means match.
125 let mut char_masks: FxHashMap<char, u64> = FxHashMap::default();
126 let mut byte_masks = [!0u64; 128]; // All 1s = no match
127
128 for (i, &ch) in pattern_chars.iter().enumerate() {
129 // Set bit i to 0 for this character (start with all 1s, clear bit i)
130 let mask = char_masks.entry(ch).or_insert(!0u64);
131 *mask &= !(1u64 << i);
132
133 // Also update byte_masks for ASCII characters
134 if ch.is_ascii() {
135 let byte = ch as u8;
136 byte_masks[byte as usize] &= !(1u64 << i);
137 // Handle case insensitivity for ASCII
138 if case_insensitive {
139 if byte.is_ascii_lowercase() {
140 byte_masks[byte.to_ascii_uppercase() as usize] &= !(1u64 << i);
141 } else if byte.is_ascii_uppercase() {
142 byte_masks[byte.to_ascii_lowercase() as usize] &= !(1u64 << i);
143 }
144 }
145 }
146 }
147
148 // Accept mask: 1 in position (pattern_len - 1)
149 let accept_mask = 1u64 << (pattern_len - 1);
150
151 // Build Boyer-Moore style skip table
152 // If a byte is not in the pattern, we can skip ahead when we see it
153 let max_edits = limits.max_edits as usize;
154 let skip_distance = pattern_len.saturating_sub(max_edits).max(1) as u8;
155 let mut skip_table = [skip_distance; 256];
156
157 // Bytes that ARE in the pattern can't be skipped
158 for &ch in &pattern_chars {
159 if ch.is_ascii() {
160 skip_table[ch as usize] = 0;
161 // Also mark case variants
162 if case_insensitive {
163 if ch.is_ascii_lowercase() {
164 skip_table[ch.to_ascii_uppercase() as usize] = 0;
165 } else if ch.is_ascii_uppercase() {
166 skip_table[ch.to_ascii_lowercase() as usize] = 0;
167 }
168 }
169 }
170 }
171
172 // Build Unicode block lookup table for non-ASCII patterns.
173 // If all pattern chars fall within a single 256-codepoint block, we can use O(1) array lookup.
174 let (unicode_block_base, unicode_block_masks) =
175 Self::build_unicode_block_masks(&pattern_chars, &char_masks);
176
177 Some(BitapMatcher {
178 pattern: pattern.to_string(),
179 pattern_chars,
180 pattern_len,
181 limits,
182 case_insensitive,
183 char_masks,
184 byte_masks,
185 skip_table,
186 is_ascii,
187 accept_mask,
188 unicode_block_base,
189 unicode_block_masks,
190 })
191 }
192
193 /// Returns the original pattern string.
194 #[must_use]
195 pub fn pattern(&self) -> &str {
196 &self.pattern
197 }
198
199 /// Returns the pattern as a slice of characters.
200 #[must_use]
201 pub fn pattern_chars(&self) -> &[char] {
202 &self.pattern_chars
203 }
204
205 /// Build Unicode block lookup table for O(1) character mask access.
206 /// Returns (`block_base`, `Some(masks)`) if all non-ASCII chars are in a single 256-codepoint block.
207 fn build_unicode_block_masks(
208 pattern_chars: &[char],
209 char_masks: &FxHashMap<char, u64>,
210 ) -> (u32, Option<Box<[u64; 256]>>) {
211 // Find non-ASCII characters
212 let non_ascii: Vec<char> = pattern_chars
213 .iter()
214 .filter(|c| !c.is_ascii())
215 .copied()
216 .collect();
217
218 if non_ascii.is_empty() {
219 return (0, None);
220 }
221
222 // Check if all non-ASCII chars are in the same 256-codepoint block
223 let first_cp = non_ascii[0] as u32;
224 let block_base = first_cp & !0xFF; // Round down to block start (e.g., 0x0400 for Cyrillic)
225
226 let all_in_block = non_ascii.iter().all(|&ch| {
227 let cp = ch as u32;
228 (cp & !0xFF) == block_base
229 });
230
231 if !all_in_block {
232 return (0, None);
233 }
234
235 // Build the lookup table
236 let mut masks = Box::new([!0u64; 256]);
237 for (&ch, &mask) in char_masks {
238 let cp = ch as u32;
239 if (cp & !0xFF) == block_base {
240 let idx = (cp & 0xFF) as usize;
241 masks[idx] = mask;
242 }
243 }
244
245 (block_base, Some(masks))
246 }
247
248 /// Get character mask for a character (all 1s if not in pattern).
249 #[inline(always)]
250 fn get_mask(&self, ch: char) -> u64 {
251 let cp = ch as u32;
252
253 // Fast path: check Unicode block lookup table
254 if let Some(ref masks) = self.unicode_block_masks
255 && (cp & !0xFF) == self.unicode_block_base
256 {
257 return masks[(cp & 0xFF) as usize];
258 }
259
260 // Fallback to HashMap
261 *self.char_masks.get(&ch).unwrap_or(&!0u64)
262 }
263
264 /// Get mask directly from 2-byte UTF-8 sequence (avoids char decode).
265 /// Returns (mask, 2) if successful, or falls back to `decode_utf8_char_fast`.
266 #[inline(always)]
267 fn get_mask_2byte(&self, b0: u8, b1: u8) -> u64 {
268 if let Some(ref masks) = self.unicode_block_masks {
269 // Compute codepoint index directly from UTF-8 bytes
270 // For 2-byte UTF-8: codepoint = ((b0 & 0x1F) << 6) | (b1 & 0x3F)
271 // Block check: (codepoint & !0xFF) == block_base
272 // Index: codepoint & 0xFF
273 let codepoint_low6 = (u32::from(b0) & 0x1F) << 6;
274 let codepoint = codepoint_low6 | (u32::from(b1) & 0x3F);
275 if (codepoint & !0xFF) == self.unicode_block_base {
276 return masks[(codepoint & 0xFF) as usize];
277 }
278 }
279 // Fallback: decode to char and lookup
280 let codepoint = ((u32::from(b0) & 0x1F) << 6) | (u32::from(b1) & 0x3F);
281 let ch = unsafe { char::from_u32_unchecked(codepoint) };
282 *self.char_masks.get(&ch).unwrap_or(&!0u64)
283 }
284
285 /// Get Boyer-Moore skip distance for a byte.
286 /// Returns 0 if byte is in pattern, otherwise returns skip distance.
287 #[inline(always)]
288 #[must_use]
289 pub fn get_skip(&self, byte: u8) -> usize {
290 self.skip_table[byte as usize] as usize
291 }
292
293 /// Find the next position worth checking using Boyer-Moore skipping.
294 /// Scans from `start` looking for a byte that's in the pattern.
295 /// Returns the position of the first pattern-relevant byte, or `text.len()` if none.
296 #[inline]
297 #[must_use]
298 pub fn find_next_candidate(&self, text: &[u8], start: usize) -> usize {
299 let mut pos = start;
300 while pos < text.len() {
301 let skip = self.skip_table[text[pos] as usize];
302 if skip == 0 {
303 return pos;
304 }
305 pos += skip as usize;
306 }
307 text.len()
308 }
309
310 /// Calculate similarity score.
311 fn calc_similarity(&self, edits: u8, insertions: u8, deletions: u8) -> f32 {
312 let pattern_len = self.pattern_len as f32;
313 if pattern_len == 0.0 {
314 return 1.0;
315 }
316
317 let edit_distance = f32::from(edits);
318 let matched_len = pattern_len + f32::from(insertions) - f32::from(deletions);
319 let max_len = pattern_len.max(matched_len).max(1.0);
320
321 (1.0 - edit_distance / max_len).max(0.0)
322 }
323
324 /// Myers' bit-vector algorithm for fast O(n) edit distance computation.
325 /// Returns the edit distance between pattern and text.
326 ///
327 /// This is much faster than full DP (O(n) vs O(m×n)) but doesn't give
328 /// breakdown of edit types. Use for fast verification.
329 #[inline]
330 fn compute_edit_distance_myers(&self, text_chars: &[char]) -> u8 {
331 let m = self.pattern_len;
332 let n = text_chars.len();
333
334 if m == 0 {
335 return n as u8;
336 }
337 if n == 0 {
338 return m as u8;
339 }
340
341 // Myers' algorithm using our precomputed masks
342 // Note: Our masks have bit=0 for match, which is the inverse of typical Myers
343 // We adapt by inverting the eq mask
344 let mut pv = !0u64; // positive vertical delta (all 1s)
345 let mut mv = 0u64; // negative vertical delta (all 0s)
346 let mut score = m as u8;
347
348 let mask = 1u64 << (m - 1);
349
350 for &text_char in text_chars {
351 // Get pattern equality mask (bit i is 1 if pattern[i] matches text_char)
352 // Our stored masks have 0 for match, so we invert
353 let eq = if text_char.is_ascii() {
354 !self.byte_masks[text_char as usize]
355 } else {
356 !self.get_mask(text_char)
357 };
358
359 let xv = eq | mv;
360 let xh = ((eq & pv).wrapping_add(pv)) ^ pv | eq;
361
362 let ph = mv | !(xh | pv);
363 let mh = pv & xh;
364
365 // Update score
366 if (ph & mask) != 0 {
367 score = score.saturating_add(1);
368 }
369 if (mh & mask) != 0 {
370 score = score.saturating_sub(1);
371 }
372
373 // Shift for next column
374 let ph_shift = (ph << 1) | 1;
375 let mh_shift = mh << 1;
376
377 pv = mh_shift | !(xv | ph_shift);
378 mv = ph_shift & xv;
379 }
380
381 score
382 }
383
384 /// Fast edit breakdown using Myers for distance + length heuristic for breakdown.
385 /// Returns (insertions, deletions, substitutions, swaps).
386 ///
387 /// This approximates the breakdown based on:
388 /// - Total distance from Myers (exact)
389 /// - Length difference (insertions - deletions = `text_len` - `pattern_len`)
390 ///
391 /// Transpositions are counted as substitutions in this fast version.
392 #[inline]
393 #[allow(dead_code)]
394 fn compute_edit_breakdown_fast(&self, text_chars: &[char]) -> (u8, u8, u8, u8) {
395 let m = self.pattern_len;
396 let n = text_chars.len();
397
398 if m == 0 {
399 return (n as u8, 0, 0, 0);
400 }
401 if n == 0 {
402 return (0, m as u8, 0, 0);
403 }
404
405 let distance = self.compute_edit_distance_myers(text_chars);
406
407 // Heuristic: length difference determines insertions vs deletions
408 let len_diff = n as i32 - m as i32;
409
410 if len_diff >= 0 {
411 // Text is longer or equal: extra chars are insertions
412 let insertions = (len_diff as u8).min(distance);
413 let other_edits = distance.saturating_sub(insertions);
414 (insertions, 0, other_edits, 0)
415 } else {
416 // Text is shorter: missing chars are deletions
417 let deletions = ((-len_diff) as u8).min(distance);
418 let other_edits = distance.saturating_sub(deletions);
419 (0, deletions, other_edits, 0)
420 }
421 }
422
423 /// Find all matches in text using Bitap algorithm with k errors.
424 #[must_use]
425 pub fn find_all(&self, text: &str, threshold: f32) -> Vec<DamLevMatch> {
426 let max_edits = self.limits.max_edits as usize;
427 let text_chars: Vec<(usize, char)> = text.char_indices().collect();
428
429 if text_chars.is_empty() {
430 return vec![];
431 }
432
433 let mut matches: FxHashMap<(usize, usize), DamLevMatch> = FxHashMap::default();
434
435 // State vectors: R[d] tracks matching state with exactly d errors
436 // Bit i is 0 if we've matched pattern[0..=i] with d errors
437 // Use two buffers and swap to avoid allocation per character
438 let mut r: Vec<u64> = vec![!0u64; max_edits + 1];
439 let mut old_r: Vec<u64> = vec![!0u64; max_edits + 1];
440
441 // Initialize: we can delete up to k characters from the start of pattern
442 // R[d] starts with first d bits as 0 (matched d chars via deletion)
443 // Left shift advances pattern position (bit i → bit i+1)
444 for d in 1..=max_edits {
445 r[d] = r[d - 1] << 1;
446 }
447
448 for (char_idx, &(_, text_char)) in text_chars.iter().enumerate() {
449 let text_char = if self.case_insensitive {
450 text_char.to_lowercase().next().unwrap_or(text_char)
451 } else {
452 text_char
453 };
454
455 let char_mask = self.get_mask(text_char);
456
457 // Swap buffers: old_r gets previous r, r will be updated (no allocation!)
458 std::mem::swap(&mut r, &mut old_r);
459
460 // Update R[0] (exact matching) - use old_r since we swapped
461 r[0] = (old_r[0] << 1) | char_mask;
462
463 // Update R[d] for d > 0 (fuzzy matching)
464 for d in 1..=max_edits {
465 // Can insert from R[d-1]: consume text char without advancing pattern
466 let insert = old_r[d - 1];
467
468 // Can delete from R[d-1]: advance pattern without consuming text
469 // Uses r[d-1] (already updated) with << 1 to advance pattern position
470 let delete = r[d - 1] << 1;
471
472 // Can substitute from R[d-1]: consume both and treat as match
473 let substitute = old_r[d - 1] << 1;
474
475 // Regular match with d errors
476 let match_d = (old_r[d] << 1) | char_mask;
477
478 r[d] = match_d & insert & delete & substitute;
479 }
480
481 // Check for matches (bit pattern_len-1 is 0)
482 let end_byte = text_chars.get(char_idx + 1).map_or(text.len(), |(b, _)| *b);
483
484 for d in 0..=max_edits {
485 if (r[d] & self.accept_mask) == 0 {
486 // Found a match with d edits
487 // Estimate start position (approximate)
488 let min_start_char = char_idx.saturating_sub(self.pattern_len + d);
489 let max_start_char =
490 char_idx.saturating_sub(self.pattern_len.saturating_sub(d + 1));
491
492 for start_char in min_start_char..=max_start_char.min(char_idx) {
493 let start_byte = text_chars.get(start_char).map_or(0, |(b, _)| *b);
494
495 // Compute exact edit breakdown using DP
496 let (insertions, deletions, substitutions, swaps) = self
497 .compute_exact_edit_breakdown(&text.as_bytes()[start_byte..end_byte]);
498
499 // Use actual edit count from DP, verify it matches Bitap state
500 let total_edits = insertions + deletions + substitutions + swaps;
501 if total_edits as usize > d {
502 continue; // More edits than this state allows
503 }
504 let sim = self.calc_similarity(total_edits, insertions, deletions);
505 if sim >= threshold {
506 let key = (start_byte, end_byte);
507 let m = DamLevMatch {
508 start: start_byte,
509 end: end_byte,
510 insertions,
511 deletions,
512 substitutions,
513 swaps,
514 similarity: sim,
515 };
516
517 matches
518 .entry(key)
519 .and_modify(|existing| {
520 if m.similarity > existing.similarity {
521 *existing = m.clone();
522 }
523 })
524 .or_insert(m);
525 }
526 }
527 }
528 }
529 }
530
531 // Handle text shorter than pattern: positions reached during the last iteration
532 // need extra propagation (via deletion) to reach the accept position.
533 if text_chars.len() < self.pattern_len {
534 let chars_short = self.pattern_len - text_chars.len();
535 for _ in 0..chars_short.min(max_edits) {
536 std::mem::swap(&mut r, &mut old_r);
537
538 // Apply deletion propagation: advance pattern position without consuming text.
539 // From d-1 errors at position p, we can delete pattern[p] to reach d errors at p+1.
540 r[0] = old_r[0]; // Can't advance without consuming text or adding error
541 for d in 1..=max_edits {
542 // Deletion: skip pattern char without consuming text
543 let delete = old_r[d - 1] << 1;
544 // Keep existing state if already matched
545 r[d] = old_r[d] & delete;
546 }
547
548 // Check for matches after propagation
549 for d in 0..=max_edits {
550 if (r[d] & self.accept_mask) == 0 {
551 let end_byte = text.len();
552 let min_start_char = text_chars
553 .len()
554 .saturating_sub(self.pattern_len.saturating_sub(d + 1));
555
556 for start_char in 0..=min_start_char.min(text_chars.len().saturating_sub(1))
557 {
558 let start_byte = text_chars.get(start_char).map_or(0, |(b, _)| *b);
559
560 let (insertions, deletions, substitutions, swaps) = self
561 .compute_exact_edit_breakdown(
562 &text.as_bytes()[start_byte..end_byte],
563 );
564
565 let total_edits = insertions + deletions + substitutions + swaps;
566 if total_edits as usize <= d {
567 let sim = self.calc_similarity(total_edits, insertions, deletions);
568 if sim >= threshold {
569 let key = (start_byte, end_byte);
570 let m = DamLevMatch {
571 start: start_byte,
572 end: end_byte,
573 insertions,
574 deletions,
575 substitutions,
576 swaps,
577 similarity: sim,
578 };
579
580 matches
581 .entry(key)
582 .and_modify(|existing| {
583 if m.similarity > existing.similarity {
584 *existing = m.clone();
585 }
586 })
587 .or_insert(m);
588 }
589 }
590 }
591 }
592 }
593 }
594 }
595
596 matches.into_values().collect()
597 }
598
599 /// Find all non-overlapping matches, preferring best (highest similarity) matches.
600 ///
601 /// This method finds all overlapping candidates, sorts by similarity, then
602 /// greedily selects non-overlapping matches starting from highest similarity.
603 /// This ensures we prefer "Lorem" (sim=1.0) over "ore" (sim=0.6).
604 ///
605 /// Matches must be at least `pattern_len - max_edits` characters long to be
606 /// considered valid. This prevents overly short fuzzy matches.
607 ///
608 /// When `require_first_char` is true, matches must start with the same first
609 /// character as the pattern (case-insensitive). This filters out spurious
610 /// matches like "bore" when searching for "Lorem".
611 #[must_use]
612 pub fn find_best_non_overlapping(
613 &self,
614 text: &str,
615 threshold: f32,
616 require_first_char: bool,
617 ) -> Vec<DamLevMatch> {
618 // Get all overlapping matches
619 let mut all_matches = self.find_all(text, threshold);
620
621 if all_matches.is_empty() {
622 return vec![];
623 }
624
625 // Filter: minimum match length = pattern_len - max_edits
626 let min_match_len = self
627 .pattern_len
628 .saturating_sub(self.limits.max_edits as usize);
629 all_matches.retain(|m| m.end - m.start >= min_match_len);
630
631 // Filter: require first character to match pattern's first char
632 // Respects case_insensitive setting - if case-sensitive, require exact first char match
633 if require_first_char && !self.pattern_chars.is_empty() {
634 let pattern_first = self.pattern_chars[0];
635 let text_bytes = text.as_bytes();
636 all_matches.retain(|m| {
637 if m.start >= text_bytes.len() {
638 return false;
639 }
640 // Decode the first character of the match
641 let (first_char, _) = decode_utf8_char_fast(text_bytes, m.start);
642 if self.case_insensitive {
643 first_char.eq_ignore_ascii_case(&pattern_first)
644 } else {
645 first_char == pattern_first
646 }
647 });
648 }
649
650 if all_matches.is_empty() {
651 return vec![];
652 }
653
654 // Sort by similarity descending, then by start position ascending
655 all_matches.sort_by(|a, b| match b.similarity.partial_cmp(&a.similarity) {
656 Some(std::cmp::Ordering::Equal) | None => a.start.cmp(&b.start),
657 Some(ord) => ord,
658 });
659
660 // Greedily select non-overlapping matches
661 let mut result = Vec::new();
662 let mut occupied = vec![false; text.len() + 1];
663
664 for m in all_matches {
665 // Check if this match overlaps with any already selected
666 let overlaps = (m.start..m.end).any(|i| occupied[i]);
667 if !overlaps {
668 // Mark this range as occupied
669 for i in m.start..m.end {
670 occupied[i] = true;
671 }
672 result.push(m);
673 }
674 }
675
676 // Sort result by start position for consistent ordering
677 result.sort_by_key(|m| m.start);
678 result
679 }
680
681 /// Fast find of non-overlapping matches optimized for iteration (greedy leftmost).
682 ///
683 /// This is faster than `find_all()` followed by filtering because:
684 /// 1. It skips ahead after each match (no overlapping work)
685 /// 2. It only verifies the most likely start position per match
686 /// 3. For exact matches (d=0), it trusts Bitap without DP
687 ///
688 /// When `require_first_char` is true, matches must start with the same first
689 /// character as the pattern (case-sensitive unless `case_insensitive` mode).
690 ///
691 /// Note: This uses greedy-leftmost strategy. For best-match selection
692 /// (preferring higher similarity), use `find_best_non_overlapping` instead.
693 #[must_use]
694 pub fn find_all_non_overlapping(
695 &self,
696 text: &str,
697 threshold: f32,
698 require_first_char: bool,
699 ) -> Vec<DamLevMatch> {
700 self.find_non_overlapping_impl(text, threshold, require_first_char, 0)
701 }
702
703 /// Find the first match using the same algorithm as `find_all_non_overlapping`.
704 /// Returns as soon as a match is found, avoiding scanning the rest of the text.
705 #[must_use]
706 pub fn find_first_non_overlapping(&self, text: &str, threshold: f32) -> Option<DamLevMatch> {
707 // Try ASCII fast path if both pattern and text are ASCII
708 if self.is_ascii && text.is_ascii() {
709 if let Some(m) = self.find_first_ascii_fast(text.as_bytes(), threshold) {
710 return Some(m);
711 }
712 // If fast path returns None due to max_edits > 4, fall through to generic path
713 if self.limits.max_edits <= 4 {
714 return None;
715 }
716 }
717
718 let matches = self.find_non_overlapping_impl(text, threshold, false, 1);
719 matches.into_iter().next()
720 }
721
722 /// Find up to `n` non-overlapping matches.
723 /// Stops searching after finding `n` matches for efficiency.
724 #[must_use]
725 pub fn find_n_non_overlapping(
726 &self,
727 text: &str,
728 threshold: f32,
729 require_first_char: bool,
730 n: usize,
731 ) -> Vec<DamLevMatch> {
732 self.find_non_overlapping_impl(text, threshold, require_first_char, n)
733 }
734
735 /// Implementation of non-overlapping match search with optional limit.
736 /// `limit` of 0 means unlimited matches, otherwise stops after finding `limit` matches.
737 fn find_non_overlapping_impl(
738 &self,
739 text: &str,
740 threshold: f32,
741 require_first_char: bool,
742 limit: usize,
743 ) -> Vec<DamLevMatch> {
744 let max_edits = self.limits.max_edits as usize;
745 let text_bytes = text.as_bytes();
746 let text_len = text_bytes.len();
747
748 // Handle empty text: matches if pattern can be fully deleted
749 if text_len == 0 {
750 if self.pattern_len <= max_edits {
751 let deletions = self.pattern_len as u8;
752 let sim = self.calc_similarity(deletions, 0, deletions);
753 if sim >= threshold {
754 return vec![DamLevMatch {
755 start: 0,
756 end: 0,
757 insertions: 0,
758 deletions,
759 substitutions: 0,
760 swaps: 0,
761 similarity: sim,
762 }];
763 }
764 }
765 return vec![];
766 }
767
768 // Precompute first-char check if needed
769 let first_char_check: Option<char> = if require_first_char && !self.pattern_chars.is_empty()
770 {
771 Some(self.pattern_chars[0])
772 } else {
773 None
774 };
775
776 let mut matches = Vec::new();
777 let mut last_end = 0usize;
778
779 // State vectors (3 buffers for transposition support)
780 let mut r: Vec<u64> = vec![!0u64; max_edits + 1];
781 let mut old_r: Vec<u64> = vec![!0u64; max_edits + 1];
782 let mut old_old_r: Vec<u64> = vec![!0u64; max_edits + 1];
783
784 // Initialize deletion states
785 for d in 1..=max_edits {
786 r[d] = r[d - 1] << 1;
787 }
788
789 // Track pending fuzzy match (wait for potential better exact match)
790 let mut pending_match: Option<(usize, DamLevMatch)> = None; // (edit_level, match)
791 let mut chars_since_pending = 0usize;
792
793 // Track previous character mask for transposition
794 let mut prev_mask: u64 = !0;
795
796 // Circular buffer to track byte positions of recent characters
797 // Used to correctly compute start position for matches with multi-byte UTF-8
798 let history_size = self.pattern_len + max_edits + 1;
799 let mut byte_history: Vec<usize> = vec![0; history_size];
800 let mut history_idx = 0usize;
801
802 let mut byte_pos = 0;
803
804 while byte_pos < text_len {
805 // Decode current character
806 let (text_char, char_len) = decode_utf8_char_fast(text_bytes, byte_pos);
807 let text_char = if self.case_insensitive {
808 text_char.to_lowercase().next().unwrap_or(text_char)
809 } else {
810 text_char
811 };
812
813 // Record byte position in circular buffer for correct UTF-8 start computation
814 byte_history[history_idx] = byte_pos;
815 history_idx = (history_idx + 1) % history_size;
816
817 let char_mask = self.get_mask(text_char);
818
819 // Rotate buffers: old_old_r <- old_r <- r
820 std::mem::swap(&mut old_old_r, &mut old_r);
821 std::mem::swap(&mut old_r, &mut r);
822
823 // Update R[0] (exact matching)
824 r[0] = (old_r[0] << 1) | char_mask;
825
826 // Update R[d] for d > 0 (fuzzy matching with transposition)
827 for d in 1..=max_edits {
828 let insert = old_r[d - 1];
829 let delete = r[d - 1] << 1;
830 let substitute = old_r[d - 1] << 1;
831 let match_d = (old_r[d] << 1) | char_mask;
832 let mut new_r = match_d & insert & delete & substitute;
833
834 // Transposition: check if we can swap adjacent chars
835 // trans_valid_mask: bit j is 0 if pattern[j]=curr AND pattern[j+1]=prev
836 let trans_valid_mask = char_mask | (prev_mask >> 1);
837 // From matched position k, we can reach k+2 via transposition at k+1
838 let trans = ((old_old_r[d - 1] << 1) | trans_valid_mask) << 1;
839 new_r &= trans;
840
841 r[d] = new_r;
842 }
843
844 // Update prev_mask for next iteration
845 prev_mask = char_mask;
846
847 let end_byte = byte_pos + char_len;
848
849 // Check for match at each error level (prefer lower error levels)
850 'error_levels: for d in 0..=max_edits {
851 if (r[d] & self.accept_mask) == 0 {
852 // Found a potential match with d edits
853 // For fuzzy matches, the match length could vary:
854 // - With deletions: match is shorter than pattern
855 // - With insertions: match is longer than pattern
856 let min_match_len = self.pattern_len.saturating_sub(d);
857 let max_match_len = self.pattern_len + d;
858
859 // Track best candidate at this error level
860 let mut best_at_level: Option<DamLevMatch> = None;
861
862 // Try all possible match lengths to find the best one
863 // We check all lengths and pick: earliest start, then longest match
864 for try_len in min_match_len..=max_match_len {
865 // Compute start_byte by going back try_len characters (not bytes)
866 // Use the circular buffer to handle multi-byte UTF-8 correctly
867 let start_byte = if try_len <= history_size && try_len > 0 {
868 // Look up the byte position from the circular buffer
869 // history_idx points to the next slot, so most recent is (history_idx - 1)
870 // We need to go back (try_len - 1) more slots from there
871 let idx = (history_idx + history_size - try_len) % history_size;
872 byte_history[idx]
873 } else if try_len == 0 {
874 end_byte
875 } else {
876 // try_len > history_size: shouldn't happen normally, fall back to byte math
877 // This could be inaccurate for multi-byte chars
878 end_byte.saturating_sub(try_len)
879 };
880
881 if start_byte >= end_byte {
882 continue;
883 }
884
885 // Skip empty matches at end of text (can happen when try_len=0)
886 if start_byte >= text_len {
887 continue;
888 }
889
890 // Skip if this match overlaps with previous confirmed match
891 if start_byte < last_end {
892 continue;
893 }
894
895 // Check first-char filter if required
896 if let Some(pattern_first) = first_char_check {
897 let (match_first, _) = decode_utf8_char_fast(text_bytes, start_byte);
898 let matches_first = if self.case_insensitive {
899 match_first.eq_ignore_ascii_case(&pattern_first)
900 } else {
901 match_first == pattern_first
902 };
903 if !matches_first {
904 continue;
905 }
906 }
907
908 // For exact match (d=0), accept immediately
909 if d == 0 {
910 let sim = 1.0f32;
911 if sim >= threshold {
912 // Clear any pending fuzzy match (this exact match is better)
913 pending_match = None;
914 chars_since_pending = 0;
915
916 matches.push(DamLevMatch {
917 start: start_byte,
918 end: end_byte,
919 insertions: 0,
920 deletions: 0,
921 substitutions: 0,
922 swaps: 0,
923 similarity: sim,
924 });
925
926 // Early exit: return immediately if limit reached
927 if limit > 0 && matches.len() >= limit {
928 return matches;
929 }
930
931 last_end = end_byte;
932
933 // Reset state for next non-overlapping match
934 r.fill(!0u64);
935 old_r.fill(!0u64);
936 old_old_r.fill(!0u64);
937 for dd in 1..=max_edits {
938 r[dd] = r[dd - 1] << 1;
939 }
940 prev_mask = !0;
941 break 'error_levels; // Found exact match, move on
942 }
943 } else {
944 // For fuzzy match, verify with DP
945 let matched_text = &text_bytes[start_byte..end_byte];
946 let (insertions, deletions, substitutions, swaps) =
947 self.compute_exact_edit_breakdown(matched_text);
948
949 let total_edits = insertions + deletions + substitutions + swaps;
950 if total_edits as usize <= max_edits {
951 let sim = self.calc_similarity(total_edits, insertions, deletions);
952 if sim >= threshold {
953 let candidate = DamLevMatch {
954 start: start_byte,
955 end: end_byte,
956 insertions,
957 deletions,
958 substitutions,
959 swaps,
960 similarity: sim,
961 };
962
963 // Check if this candidate is better than best at this level
964 // Prefer: earlier start, then longer match
965 let dominated = best_at_level.as_ref().is_some_and(|best| {
966 let best_len = best.end - best.start;
967 let cand_len = candidate.end - candidate.start;
968 best.start < candidate.start
969 || (best.start == candidate.start
970 && best_len >= cand_len)
971 });
972
973 if !dominated {
974 best_at_level = Some(candidate);
975 }
976 }
977 }
978 }
979 }
980
981 // If we found a valid match at this error level, update pending
982 if let Some(candidate) = best_at_level {
983 // Check if this is better than existing pending match
984 let dominated = pending_match.as_ref().is_some_and(|(pd, pm)| {
985 let pm_len = pm.end - pm.start;
986 let cand_len = candidate.end - candidate.start;
987 *pd < d
988 || (*pd == d && pm.start < candidate.start)
989 || (*pd == d && pm.start == candidate.start && pm_len >= cand_len)
990 });
991
992 if !dominated {
993 // Always reset counter when setting a new pending match
994 // This ensures the new match gets its full waiting period
995 chars_since_pending = 0;
996 pending_match = Some((d, candidate));
997 }
998 break 'error_levels; // Found valid fuzzy match at this level
999 }
1000 }
1001 }
1002
1003 // Check if we should commit the pending fuzzy match
1004 if let Some((d, ref m)) = pending_match {
1005 chars_since_pending += 1;
1006 let match_len = m.end - m.start;
1007
1008 // Determine when to commit:
1009 // - Exact matches (d=0): commit immediately
1010 // - Fuzzy matches (d>0): wait at least 1 char to let potential exact matches appear
1011 // This handles cases like "mhussei" (fuzzy at d=2) vs "hussein" (exact at d=0)
1012 // where the exact match ends one character later
1013 let commit_threshold = if d == 0 {
1014 1 // Exact match: commit on first check
1015 } else if match_len >= self.pattern_len {
1016 2 // Full-length fuzzy match: wait 1 char for potential exact match
1017 } else {
1018 max_edits + 1 // Short match: wait longer
1019 };
1020
1021 if chars_since_pending >= commit_threshold {
1022 let (_, m) = pending_match.take().unwrap();
1023 last_end = m.end;
1024 matches.push(m);
1025
1026 // Early exit: return immediately if limit reached
1027 if limit > 0 && matches.len() >= limit {
1028 return matches;
1029 }
1030
1031 // Reset state
1032 r.fill(!0u64);
1033 old_r.fill(!0u64);
1034 old_old_r.fill(!0u64);
1035 for dd in 1..=max_edits {
1036 r[dd] = r[dd - 1] << 1;
1037 }
1038 prev_mask = !0;
1039 chars_since_pending = 0;
1040 }
1041 }
1042
1043 byte_pos = end_byte;
1044 }
1045
1046 // Commit any remaining pending match
1047 if let Some((_, m)) = pending_match {
1048 matches.push(m);
1049 }
1050
1051 matches
1052 }
1053
1054 /// Ultra-fast ASCII-only path for finding first match.
1055 /// Only called when both pattern and text are pure ASCII.
1056 /// Uses stack arrays instead of Vec, direct byte lookup instead of `HashMap`.
1057 ///
1058 /// Returns Some((start, end, edits)) on match, None if no match found.
1059 #[inline]
1060 fn find_first_ascii_fast(&self, text: &[u8], threshold: f32) -> Option<DamLevMatch> {
1061 debug_assert!(self.is_ascii);
1062
1063 let max_edits = self.limits.max_edits as usize;
1064 let text_len = text.len();
1065
1066 // Handle empty text
1067 if text_len == 0 {
1068 if self.pattern_len <= max_edits {
1069 let deletions = self.pattern_len as u8;
1070 let sim = self.calc_similarity(deletions, 0, deletions);
1071 if sim >= threshold {
1072 return Some(DamLevMatch {
1073 start: 0,
1074 end: 0,
1075 insertions: 0,
1076 deletions,
1077 substitutions: 0,
1078 swaps: 0,
1079 similarity: sim,
1080 });
1081 }
1082 }
1083 return None;
1084 }
1085
1086 // Use fixed-size arrays for state vectors (up to 4 edits supported in fast path)
1087 // For more edits, fall back to Vec-based implementation
1088 if max_edits > 4 {
1089 return None; // Signal caller to use generic path
1090 }
1091
1092 // State vectors (3 buffers for transposition support) - stack allocated
1093 let mut r: [u64; 5] = [!0u64; 5];
1094 let mut old_r: [u64; 5] = [!0u64; 5];
1095 let mut old_old_r: [u64; 5] = [!0u64; 5];
1096
1097 // Initialize deletion states
1098 for d in 1..=max_edits {
1099 r[d] = r[d - 1] << 1;
1100 }
1101
1102 // Track pending fuzzy match
1103 let mut pending_match: Option<(usize, DamLevMatch)> = None;
1104 let mut chars_since_pending = 0usize;
1105
1106 // Track previous character mask for transposition
1107 let mut prev_mask: u64 = !0;
1108
1109 // Circular buffer for start position tracking (pattern_len + max_edits + 1)
1110 // Since ASCII: 1 byte = 1 char, we can use byte positions directly
1111 let history_size = self.pattern_len + max_edits + 1;
1112 // Use fixed array - max pattern is 64, max edits is 4, so max history is 69
1113 let mut byte_history: [usize; 72] = [0; 72];
1114 let mut history_idx = 0usize;
1115
1116 let mut byte_pos = 0;
1117
1118 // Pre-fetch for case insensitivity - use a static lookup table
1119 let to_lower: fn(u8) -> u8 = if self.case_insensitive {
1120 |b| b.to_ascii_lowercase()
1121 } else {
1122 |b| b
1123 };
1124
1125 while byte_pos < text_len {
1126 let byte = to_lower(text[byte_pos]);
1127
1128 // Record byte position
1129 byte_history[history_idx % history_size] = byte_pos;
1130 history_idx += 1;
1131
1132 // Direct byte mask lookup - no HashMap, no UTF-8 decode
1133 let char_mask = if byte < 128 {
1134 self.byte_masks[byte as usize]
1135 } else {
1136 !0u64 // Non-ASCII byte: no match (shouldn't happen in ASCII path)
1137 };
1138
1139 // Rotate buffers
1140 let tmp = old_old_r;
1141 old_old_r = old_r;
1142 old_r = r;
1143 r = tmp;
1144
1145 // Update R[0] (exact matching)
1146 r[0] = (old_r[0] << 1) | char_mask;
1147
1148 // Update R[d] for d > 0 (fuzzy matching with transposition)
1149 for d in 1..=max_edits {
1150 let insert = old_r[d - 1];
1151 let delete = r[d - 1] << 1;
1152 let substitute = old_r[d - 1] << 1;
1153 let match_d = (old_r[d] << 1) | char_mask;
1154 let mut new_r = match_d & insert & delete & substitute;
1155
1156 // Transposition
1157 let trans_valid_mask = char_mask | (prev_mask >> 1);
1158 let trans = ((old_old_r[d - 1] << 1) | trans_valid_mask) << 1;
1159 new_r &= trans;
1160
1161 r[d] = new_r;
1162 }
1163
1164 prev_mask = char_mask;
1165 let end_byte = byte_pos + 1; // ASCII: 1 byte per char
1166
1167 // Check for match at each error level
1168 'error_levels: for d in 0..=max_edits {
1169 if (r[d] & self.accept_mask) == 0 {
1170 let min_match_len = self.pattern_len.saturating_sub(d);
1171 let max_match_len = self.pattern_len + d;
1172
1173 let mut best_at_level: Option<DamLevMatch> = None;
1174
1175 for try_len in min_match_len..=max_match_len {
1176 let start_byte =
1177 if try_len <= history_size && try_len > 0 && history_idx >= try_len {
1178 byte_history[(history_idx - try_len) % history_size]
1179 } else if try_len == 0 {
1180 end_byte
1181 } else {
1182 end_byte.saturating_sub(try_len)
1183 };
1184
1185 if start_byte >= end_byte || start_byte >= text_len {
1186 continue;
1187 }
1188
1189 // For exact match (d=0), return immediately
1190 if d == 0 {
1191 let sim = 1.0f32;
1192 if sim >= threshold {
1193 return Some(DamLevMatch {
1194 start: start_byte,
1195 end: end_byte,
1196 insertions: 0,
1197 deletions: 0,
1198 substitutions: 0,
1199 swaps: 0,
1200 similarity: sim,
1201 });
1202 }
1203 } else {
1204 // Fuzzy match - verify with DP
1205 let matched_text = &text[start_byte..end_byte];
1206 let (insertions, deletions, substitutions, swaps) =
1207 self.compute_exact_edit_breakdown(matched_text);
1208
1209 let total_edits = insertions + deletions + substitutions + swaps;
1210 if total_edits as usize <= max_edits {
1211 let sim = self.calc_similarity(total_edits, insertions, deletions);
1212 if sim >= threshold {
1213 let candidate = DamLevMatch {
1214 start: start_byte,
1215 end: end_byte,
1216 insertions,
1217 deletions,
1218 substitutions,
1219 swaps,
1220 similarity: sim,
1221 };
1222
1223 let dominated = best_at_level.as_ref().is_some_and(|best| {
1224 let best_len = best.end - best.start;
1225 let cand_len = candidate.end - candidate.start;
1226 best.start < candidate.start
1227 || (best.start == candidate.start
1228 && best_len >= cand_len)
1229 });
1230
1231 if !dominated {
1232 best_at_level = Some(candidate);
1233 }
1234 }
1235 }
1236 }
1237 }
1238
1239 if let Some(candidate) = best_at_level {
1240 let dominated = pending_match.as_ref().is_some_and(|(pd, pm)| {
1241 let pm_len = pm.end - pm.start;
1242 let cand_len = candidate.end - candidate.start;
1243 *pd < d
1244 || (*pd == d && pm.start < candidate.start)
1245 || (*pd == d && pm.start == candidate.start && pm_len >= cand_len)
1246 });
1247
1248 if !dominated {
1249 chars_since_pending = 0;
1250 pending_match = Some((d, candidate));
1251 }
1252 break 'error_levels;
1253 }
1254 }
1255 }
1256
1257 // Check if we should commit pending match
1258 if let Some((d, ref m)) = pending_match {
1259 chars_since_pending += 1;
1260 let match_len = m.end - m.start;
1261
1262 let commit_threshold = if d == 0 {
1263 1
1264 } else if match_len >= self.pattern_len {
1265 2
1266 } else {
1267 max_edits + 1
1268 };
1269
1270 if chars_since_pending >= commit_threshold {
1271 let (_, m) = pending_match.take().unwrap();
1272 return Some(m);
1273 }
1274 }
1275
1276 byte_pos += 1;
1277 }
1278
1279 // Return any pending match
1280 pending_match.map(|(_, m)| m)
1281 }
1282
1283 /// Compute exact Damerau-Levenshtein edit breakdown using dynamic programming.
1284 /// Returns (insertions, deletions, substitutions, swaps).
1285 ///
1286 /// Optimized version using:
1287 /// - Myers' bit-vector algorithm for fast early positive confirmation
1288 /// - 3-row rotation instead of full O(m×n) table (for transposition support)
1289 /// - Stack allocation for small patterns (no heap allocation in common case)
1290 fn compute_exact_edit_breakdown(&self, matched_text: &[u8]) -> (u8, u8, u8, u8) {
1291 let pattern = &self.pattern_chars;
1292 let m = pattern.len();
1293
1294 // Parse text as UTF-8
1295 let Ok(text_str) = std::str::from_utf8(matched_text) else {
1296 return (0, m as u8, 0, 0);
1297 };
1298
1299 if m == 0 {
1300 let n = text_str.chars().count();
1301 return (n as u8, 0, 0, 0);
1302 }
1303 if text_str.is_empty() {
1304 return (0, m as u8, 0, 0);
1305 }
1306
1307 // For small text, use fully stack-allocated version (common case)
1308 // Stack limit chosen to cover pattern_len <= 64 + typical edits
1309 const STACK_LIMIT: usize = 72;
1310
1311 // For ASCII text, byte length == char count (fast path)
1312 let is_ascii = text_str.is_ascii();
1313 let n = if is_ascii {
1314 text_str.len()
1315 } else {
1316 text_str.chars().count()
1317 };
1318
1319 if n < STACK_LIMIT {
1320 // Fast early rejection using Myers
1321 // Since Myers doesn't support transpositions (counts as 2 subs instead of 1),
1322 // we can only reject if Myers distance > max_edits + potential_transpositions
1323 // Conservative: reject if Myers distance > max_edits + max_possible_transpositions
1324 let max_possible_trans = (m.min(n) / 2) as u8;
1325
1326 // Build text chars for Myers check
1327 let mut text_chars_buf: [char; STACK_LIMIT] = ['\0'; STACK_LIMIT];
1328 for (idx, c) in text_str.chars().take(STACK_LIMIT).enumerate() {
1329 text_chars_buf[idx] = if self.case_insensitive {
1330 c.to_ascii_lowercase()
1331 } else {
1332 c
1333 };
1334 }
1335 let text_chars = &text_chars_buf[..n];
1336
1337 let myers_dist = self.compute_edit_distance_myers(text_chars);
1338
1339 // If Myers distance is low enough that no transpositions could make it invalid,
1340 // we still need full DP for exact breakdown
1341 // If Myers distance is very high, reject early
1342 // Use saturating_add to avoid overflow when max_edits is u8::MAX (unlimited)
1343 if myers_dist > self.limits.max_edits.saturating_add(max_possible_trans) {
1344 // Definitely too many edits - return high value for rejection
1345 return (myers_dist, 0, 0, 0);
1346 }
1347
1348 self.compute_edit_breakdown_small::<STACK_LIMIT>(pattern, text_str, m, n)
1349 } else {
1350 self.compute_edit_breakdown_large(pattern, text_str, m, n)
1351 }
1352 }
1353
1354 /// Optimized DP for small text (stack allocated, 3-row rotation).
1355 #[inline]
1356 fn compute_edit_breakdown_small<const N: usize>(
1357 &self,
1358 pattern: &[char],
1359 text_str: &str,
1360 m: usize,
1361 n: usize,
1362 ) -> (u8, u8, u8, u8) {
1363 debug_assert!(n < N);
1364
1365 // 3 rows for rotation: prev_prev (i-2), prev (i-1), curr (i)
1366 // Each row has n+1 elements
1367 type Cell = (u8, u8, u8, u8, u8); // (dist, ins, del, sub, swap)
1368 let mut prev_prev: [Cell; N] = [(0, 0, 0, 0, 0); N];
1369 let mut prev: [Cell; N] = [(0, 0, 0, 0, 0); N];
1370 let mut curr: [Cell; N] = [(0, 0, 0, 0, 0); N];
1371
1372 // Stack-allocated text chars buffer (avoids heap allocation)
1373 let mut text_chars_buf: [char; N] = ['\0'; N];
1374 for (idx, c) in text_str.chars().take(N).enumerate() {
1375 text_chars_buf[idx] = if self.case_insensitive {
1376 c.to_ascii_lowercase()
1377 } else {
1378 c
1379 };
1380 }
1381 let text_chars = &text_chars_buf[..n];
1382
1383 // Initialize row 0 (base case: insert j chars from text)
1384 // This goes into prev since the loop starts at i=1 and uses prev for i-1
1385 for j in 0..=n {
1386 prev[j] = (j as u8, j as u8, 0, 0, 0);
1387 }
1388
1389 let mut prev_pattern_char = '\0';
1390
1391 for i in 1..=m {
1392 let pattern_char = if self.case_insensitive {
1393 pattern[i - 1].to_ascii_lowercase()
1394 } else {
1395 pattern[i - 1]
1396 };
1397
1398 // Base case for column 0: delete i chars from pattern
1399 curr[0] = (i as u8, 0, i as u8, 0, 0);
1400
1401 for j in 1..=n {
1402 let text_char = text_chars[j - 1];
1403
1404 if pattern_char == text_char {
1405 // Match - no edit needed
1406 curr[j] = prev[j - 1];
1407 } else {
1408 // Try substitution (from prev[j-1])
1409 let (sub_d, sub_i, sub_del, sub_s, sub_sw) = prev[j - 1];
1410 let mut best = (sub_d + 1, sub_i, sub_del, sub_s + 1, sub_sw);
1411
1412 // Try insertion (from curr[j-1])
1413 let (ins_d, ins_i, ins_del, ins_s, ins_sw) = curr[j - 1];
1414 if ins_d + 1 < best.0 {
1415 best = (ins_d + 1, ins_i + 1, ins_del, ins_s, ins_sw);
1416 }
1417
1418 // Try deletion (from prev[j])
1419 let (del_d, del_i, del_del, del_s, del_sw) = prev[j];
1420 if del_d + 1 < best.0 {
1421 best = (del_d + 1, del_i, del_del + 1, del_s, del_sw);
1422 }
1423
1424 // Try transposition (from prev_prev[j-2])
1425 if i > 1 && j > 1 {
1426 let prev_text_char = text_chars[j - 2];
1427 if pattern_char == prev_text_char && prev_pattern_char == text_char {
1428 let (tr_d, tr_i, tr_del, tr_s, tr_sw) = prev_prev[j - 2];
1429 if tr_d + 1 < best.0 {
1430 best = (tr_d + 1, tr_i, tr_del, tr_s, tr_sw + 1);
1431 }
1432 }
1433 }
1434
1435 curr[j] = best;
1436 }
1437 }
1438
1439 // Rotate rows: prev_prev <- prev <- curr
1440 std::mem::swap(&mut prev_prev, &mut prev);
1441 std::mem::swap(&mut prev, &mut curr);
1442 prev_pattern_char = pattern_char;
1443 }
1444
1445 // Result is in prev[n] (after final rotation)
1446 let (_, ins, del, sub, sw) = prev[n];
1447 (ins, del, sub, sw)
1448 }
1449
1450 /// Fallback DP for large text (heap allocated).
1451 fn compute_edit_breakdown_large(
1452 &self,
1453 pattern: &[char],
1454 text_str: &str,
1455 m: usize,
1456 n: usize,
1457 ) -> (u8, u8, u8, u8) {
1458 type Cell = (u8, u8, u8, u8, u8);
1459
1460 // 3 rows for rotation
1461 let mut prev_prev: Vec<Cell> = vec![(0, 0, 0, 0, 0); n + 1];
1462 let mut prev: Vec<Cell> = vec![(0, 0, 0, 0, 0); n + 1];
1463 let mut curr: Vec<Cell> = vec![(0, 0, 0, 0, 0); n + 1];
1464
1465 // Initialize row 0
1466 for j in 0..=n {
1467 prev_prev[j] = (j as u8, j as u8, 0, 0, 0);
1468 }
1469
1470 let text_chars: Vec<char> = if self.case_insensitive {
1471 text_str.chars().map(|c| c.to_ascii_lowercase()).collect()
1472 } else {
1473 text_str.chars().collect()
1474 };
1475
1476 let mut prev_pattern_char = '\0';
1477
1478 for i in 1..=m {
1479 let pattern_char = if self.case_insensitive {
1480 pattern[i - 1].to_ascii_lowercase()
1481 } else {
1482 pattern[i - 1]
1483 };
1484
1485 curr[0] = (i as u8, 0, i as u8, 0, 0);
1486
1487 for j in 1..=n {
1488 let text_char = text_chars[j - 1];
1489
1490 if pattern_char == text_char {
1491 curr[j] = prev[j - 1];
1492 } else {
1493 let (sub_d, sub_i, sub_del, sub_s, sub_sw) = prev[j - 1];
1494 let mut best = (sub_d + 1, sub_i, sub_del, sub_s + 1, sub_sw);
1495
1496 let (ins_d, ins_i, ins_del, ins_s, ins_sw) = curr[j - 1];
1497 if ins_d + 1 < best.0 {
1498 best = (ins_d + 1, ins_i + 1, ins_del, ins_s, ins_sw);
1499 }
1500
1501 let (del_d, del_i, del_del, del_s, del_sw) = prev[j];
1502 if del_d + 1 < best.0 {
1503 best = (del_d + 1, del_i, del_del + 1, del_s, del_sw);
1504 }
1505
1506 if i > 1 && j > 1 {
1507 let prev_text_char = text_chars[j - 2];
1508 if pattern_char == prev_text_char && prev_pattern_char == text_char {
1509 let (tr_d, tr_i, tr_del, tr_s, tr_sw) = prev_prev[j - 2];
1510 if tr_d + 1 < best.0 {
1511 best = (tr_d + 1, tr_i, tr_del, tr_s, tr_sw + 1);
1512 }
1513 }
1514 }
1515
1516 curr[j] = best;
1517 }
1518 }
1519
1520 std::mem::swap(&mut prev_prev, &mut prev);
1521 std::mem::swap(&mut prev, &mut curr);
1522 prev_pattern_char = pattern_char;
1523 }
1524
1525 let (_, ins, del, sub, sw) = prev[n];
1526 (ins, del, sub, sw)
1527 }
1528
1529 /// Find the first match in the text.
1530 ///
1531 /// This delegates to `find_all_non_overlapping` and returns the first result.
1532 /// While not optimal for all cases, this ensures correct behavior for edge cases
1533 /// like transpositions and complex fuzzy matches.
1534 #[must_use]
1535 pub fn find_first(&self, text: &str, threshold: f32) -> Option<DamLevMatch> {
1536 // Delegate to the well-tested find_all_non_overlapping
1537 // require_first_char=false allows matches where first char is edited
1538 let matches = self.find_all_non_overlapping(text, threshold, false);
1539 matches.into_iter().min_by_key(|m| m.start)
1540 }
1541
1542 /// Find first match starting from candidate positions only.
1543 #[must_use]
1544 pub fn find_first_with_candidates(
1545 &self,
1546 text: &str,
1547 threshold: f32,
1548 candidates: &super::hash::FxHashSet<usize>,
1549 ) -> Option<DamLevMatch> {
1550 let max_edits = self.limits.max_edits as usize;
1551 let text_chars: Vec<(usize, char)> = text.char_indices().collect();
1552
1553 if text_chars.is_empty() || candidates.is_empty() {
1554 return None;
1555 }
1556
1557 // For each candidate position, run a localized Bitap search
1558 let mut sorted_candidates: Vec<usize> = candidates.iter().copied().collect();
1559 sorted_candidates.sort_unstable();
1560
1561 // Pre-allocate state buffers outside the loop (reused across candidates)
1562 let mut r: Vec<u64> = vec![!0u64; max_edits + 1];
1563 let mut old_r: Vec<u64> = vec![!0u64; max_edits + 1];
1564
1565 for &start_byte in &sorted_candidates {
1566 // Find the character index for this byte position using binary search (O(log N))
1567 let start_char = text_chars
1568 .binary_search_by_key(&start_byte, |(b, _)| *b)
1569 .unwrap_or(0);
1570
1571 // Reset state for this candidate
1572 r.fill(!0u64);
1573
1574 // Initialize deletion states - left shift advances pattern position
1575 for d in 1..=max_edits {
1576 r[d] = r[d - 1] << 1;
1577 }
1578
1579 let max_window = self.pattern_len + max_edits;
1580
1581 // Track best match within this window
1582 let mut best_match: Option<(usize, DamLevMatch)> = None;
1583
1584 for (rel_idx, &(_, text_char)) in text_chars[start_char..]
1585 .iter()
1586 .enumerate()
1587 .take(max_window + 1)
1588 {
1589 let text_char = if self.case_insensitive {
1590 text_char.to_lowercase().next().unwrap_or(text_char)
1591 } else {
1592 text_char
1593 };
1594
1595 let char_mask = self.get_mask(text_char);
1596
1597 // Swap buffers: old_r gets previous r (no allocation!)
1598 std::mem::swap(&mut r, &mut old_r);
1599
1600 r[0] = (old_r[0] << 1) | char_mask;
1601
1602 for d in 1..=max_edits {
1603 let insert = old_r[d - 1];
1604 let delete = r[d - 1] << 1; // left shift advances pattern position
1605 let substitute = old_r[d - 1] << 1;
1606 let match_d = (old_r[d] << 1) | char_mask;
1607
1608 r[d] = match_d & insert & delete & substitute;
1609 }
1610
1611 // Check for match
1612 let abs_idx = start_char + rel_idx;
1613 let end_byte = text_chars.get(abs_idx + 1).map_or(text.len(), |(b, _)| *b);
1614
1615 for d in 0..=max_edits {
1616 if (r[d] & self.accept_mask) == 0 {
1617 // Compute exact edit breakdown using DP
1618 let (insertions, deletions, substitutions, swaps) = self
1619 .compute_exact_edit_breakdown(&text.as_bytes()[start_byte..end_byte]);
1620
1621 let sim = self.calc_similarity(d as u8, insertions, deletions);
1622 if sim >= threshold {
1623 let candidate = DamLevMatch {
1624 start: start_byte,
1625 end: end_byte,
1626 insertions,
1627 deletions,
1628 substitutions,
1629 swaps,
1630 similarity: sim,
1631 };
1632
1633 // Update best if this has fewer edits
1634 let dominated =
1635 best_match.as_ref().is_some_and(|(best_d, _)| *best_d <= d);
1636 if !dominated {
1637 best_match = Some((d, candidate));
1638 }
1639
1640 // If exact match found, return immediately
1641 if d == 0 {
1642 return best_match.map(|(_, m)| m);
1643 }
1644 }
1645 }
1646 }
1647 }
1648
1649 // Return best match from this candidate window if found
1650 if let Some((_, m)) = best_match {
1651 return Some(m);
1652 }
1653 }
1654
1655 None
1656 }
1657
1658 /// Ultra-fast search starting from a specific byte position.
1659 ///
1660 /// This method is optimized for the greedy-first hot path:
1661 /// - No allocations (uses stack arrays for small k)
1662 /// - Direct byte iteration
1663 /// - Early termination on first match
1664 /// - SIMD acceleration when available (`AVX2` on `x86_64`)
1665 #[inline]
1666 #[must_use]
1667 pub fn find_at_byte_position(
1668 &self,
1669 text: &[u8],
1670 start_pos: usize,
1671 threshold: f32,
1672 ) -> Option<DamLevMatch> {
1673 let max_edits = self.limits.max_edits as usize;
1674
1675 // Handle empty/exhausted text: pattern can still match via pure deletions
1676 if start_pos >= text.len() {
1677 // If pattern length <= max_edits, we can delete the entire pattern
1678 if self.pattern_len <= max_edits {
1679 let deletions = self.pattern_len as u8;
1680 let sim = self.calc_similarity(deletions, 0, deletions);
1681 if sim >= threshold {
1682 return Some(DamLevMatch {
1683 start: start_pos,
1684 end: start_pos,
1685 insertions: 0,
1686 deletions,
1687 substitutions: 0,
1688 swaps: 0,
1689 similarity: sim,
1690 });
1691 }
1692 }
1693 return None;
1694 }
1695
1696 // SIMD fast path: NEON on aarch64 for ASCII patterns with k <= 1
1697 #[cfg(all(feature = "simd", target_arch = "aarch64"))]
1698 {
1699 if self.is_ascii && max_edits <= 1 {
1700 // SAFETY: NEON is mandatory on aarch64
1701 return unsafe {
1702 self.find_at_byte_position_neon(text, start_pos, threshold, max_edits)
1703 };
1704 }
1705 }
1706
1707 // SIMD fast path: AVX2 on x86_64 for ASCII patterns with k <= 3
1708 #[cfg(all(feature = "simd", target_arch = "x86_64"))]
1709 {
1710 if self.is_ascii && max_edits <= 3 && simd_avx2::is_available() {
1711 // SAFETY: We've verified AVX2 is available via runtime detection
1712 return unsafe {
1713 self.find_at_byte_position_avx2(text, start_pos, threshold, max_edits)
1714 };
1715 }
1716 }
1717
1718 // Use ASCII fast path when pattern is ASCII
1719 // This avoids UTF-8 decoding and uses direct byte array lookup
1720 if self.is_ascii && max_edits <= 4 {
1721 return self.find_at_byte_position_ascii::<5>(text, start_pos, threshold);
1722 }
1723
1724 // Use stack array for small k (common case), fall back to vec for large k
1725 if max_edits <= 4 {
1726 self.find_at_byte_position_small_k::<5>(text, start_pos, threshold)
1727 } else {
1728 self.find_at_byte_position_large_k(text, start_pos, threshold)
1729 }
1730 }
1731
1732 /// NEON-accelerated search for ASCII patterns with k <= 1.
1733 ///
1734 /// # Safety
1735 /// Safe on all aarch64 targets (NEON is mandatory).
1736 #[cfg(all(feature = "simd", target_arch = "aarch64"))]
1737 #[inline]
1738 unsafe fn find_at_byte_position_neon(
1739 &self,
1740 text: &[u8],
1741 start_pos: usize,
1742 threshold: f32,
1743 max_edits: usize,
1744 ) -> Option<DamLevMatch> {
1745 debug_assert!(max_edits <= 1);
1746 debug_assert!(self.is_ascii);
1747
1748 let max_window = self.pattern_len + max_edits;
1749 let end_limit = (start_pos + max_window + 1).min(text.len());
1750 let search_len = end_limit - start_pos;
1751
1752 if search_len == 0 {
1753 return None;
1754 }
1755
1756 // State arrays: r = current, old_r = previous, old_old_r = 2 iterations ago (for transposition)
1757 let mut r = [!0u64; 4];
1758 let mut old_r = [!0u64; 4];
1759
1760 // Initialize deletion states - left shift advances pattern position
1761 for d in 1..=max_edits {
1762 r[d] = r[d - 1] << 1;
1763 }
1764
1765 // SAFETY: start_pos < text.len() verified by caller, search_len bounds checked above
1766 let text_ptr = unsafe { text.as_ptr().add(start_pos) };
1767 let byte_masks_ptr = self.byte_masks.as_ptr();
1768 let accept_mask = self.accept_mask;
1769
1770 let mut prev_mask: u64 = !0u64;
1771 // old_old_r is 2 iterations ago - on first iteration, it equals old_r's initial state
1772 let mut old_old_r = old_r;
1773
1774 for i in 0..search_len {
1775 // SAFETY: i < search_len which is bounded by text.len() - start_pos
1776 let byte = unsafe { *text_ptr.add(i) };
1777 let mask_idx = (byte & 0x7F) as usize;
1778 // SAFETY: mask_idx is always < 128 due to & 0x7F, and byte_masks has 128 elements
1779 let char_mask = unsafe { *byte_masks_ptr.add(mask_idx) };
1780
1781 // Rotate state history before update
1782 std::mem::swap(&mut old_old_r, &mut old_r);
1783 old_r = r;
1784
1785 // Use NEON state update
1786 // SAFETY: NEON is mandatory on aarch64
1787 unsafe {
1788 simd_neon::update_states_with_trans_k1_neon(
1789 &mut r, &old_r, &old_old_r, char_mask, prev_mask,
1790 );
1791 }
1792
1793 let char_count = i + 1;
1794
1795 for d in 0..=max_edits {
1796 if (r[d] & accept_mask) == 0 {
1797 let end_byte = start_pos + char_count;
1798 let (insertions, deletions, substitutions, swaps) =
1799 self.compute_exact_edit_breakdown(&text[start_pos..end_byte]);
1800
1801 let sim = self.calc_similarity(d as u8, insertions, deletions);
1802 if sim >= threshold {
1803 return Some(DamLevMatch {
1804 start: start_pos,
1805 end: end_byte,
1806 insertions,
1807 deletions,
1808 substitutions,
1809 swaps,
1810 similarity: sim,
1811 });
1812 }
1813 }
1814 }
1815
1816 prev_mask = char_mask;
1817 }
1818
1819 // Handle text shorter than pattern: positions reached during the last iteration
1820 // need extra match propagation to reach the accept position.
1821 let end_byte = start_pos + search_len;
1822 let chars_short = self.pattern_len.saturating_sub(search_len);
1823 if chars_short > 0 && prev_mask != !0u64 {
1824 for _ in 0..chars_short.min(max_edits) {
1825 old_r = r;
1826
1827 // Apply match propagation with last char's mask
1828 for d in 1..=max_edits {
1829 let match_d = (old_r[d] << 1) | prev_mask;
1830 r[d] &= match_d;
1831 }
1832
1833 // Check for accept
1834 for d in 0..=max_edits {
1835 if (r[d] & accept_mask) == 0 {
1836 let (insertions, deletions, substitutions, swaps) =
1837 self.compute_exact_edit_breakdown(&text[start_pos..end_byte]);
1838
1839 let total = insertions + deletions + substitutions + swaps;
1840 if total as usize <= d {
1841 let sim = self.calc_similarity(total, insertions, deletions);
1842 if sim >= threshold {
1843 return Some(DamLevMatch {
1844 start: start_pos,
1845 end: end_byte,
1846 insertions,
1847 deletions,
1848 substitutions,
1849 swaps,
1850 similarity: sim,
1851 });
1852 }
1853 }
1854 }
1855 }
1856 }
1857 }
1858
1859 None
1860 }
1861
1862 /// AVX2-accelerated search for ASCII patterns with k <= 3.
1863 ///
1864 /// # Safety
1865 /// Caller must ensure AVX2 is available (check with `simd_avx2::is_available()`).
1866 #[cfg(all(feature = "simd", target_arch = "x86_64"))]
1867 #[target_feature(enable = "avx2")]
1868 #[inline]
1869 unsafe fn find_at_byte_position_avx2(
1870 &self,
1871 text: &[u8],
1872 start_pos: usize,
1873 threshold: f32,
1874 max_edits: usize,
1875 ) -> Option<DamLevMatch> {
1876 debug_assert!(max_edits <= 3);
1877 debug_assert!(self.is_ascii);
1878
1879 let max_window = self.pattern_len + max_edits;
1880 let end_limit = (start_pos + max_window + 1).min(text.len());
1881 let search_len = end_limit - start_pos;
1882
1883 if search_len == 0 {
1884 return None;
1885 }
1886
1887 // State arrays (4 elements for k <= 3)
1888 let mut r = [!0u64; 4];
1889 let mut old_r = [!0u64; 4];
1890 #[allow(unused_assignments)]
1891 let mut old_old_r = [!0u64; 4];
1892
1893 // Initialize deletion states - left shift advances pattern position
1894 for d in 1..=max_edits {
1895 r[d] = r[d - 1] << 1;
1896 }
1897
1898 // SAFETY: start_pos < text.len() verified by caller, search_len bounds checked above
1899 let text_ptr = unsafe { text.as_ptr().add(start_pos) };
1900 let byte_masks_ptr = self.byte_masks.as_ptr();
1901 let accept_mask = self.accept_mask;
1902
1903 let mut prev_mask: u64 = !0u64;
1904
1905 for i in 0..search_len {
1906 // SAFETY: i < search_len which is bounded by text.len() - start_pos
1907 let byte = unsafe { *text_ptr.add(i) };
1908
1909 // Direct array lookup for ASCII
1910 let mask_idx = (byte & 0x7F) as usize;
1911 // SAFETY: mask_idx is always < 128 due to & 0x7F, and byte_masks has 128 elements
1912 let char_mask = unsafe { *byte_masks_ptr.add(mask_idx) };
1913
1914 // Save old states
1915 old_old_r = old_r;
1916 old_r = r;
1917
1918 // Use SIMD state update with transposition
1919 // SAFETY: AVX2 availability verified by caller via simd_avx2::is_available()
1920 unsafe {
1921 simd_avx2::update_states_with_trans_avx2(
1922 &mut r, &old_r, &old_old_r, char_mask, prev_mask, max_edits,
1923 );
1924 }
1925
1926 let char_count = i + 1;
1927
1928 // Check for match (prefer fewer edits)
1929 for d in 0..=max_edits {
1930 if (r[d] & accept_mask) == 0 {
1931 let end_byte = start_pos + char_count;
1932
1933 // Compute exact edit breakdown using DP
1934 let (insertions, deletions, substitutions, swaps) =
1935 self.compute_exact_edit_breakdown(&text[start_pos..end_byte]);
1936
1937 let sim = self.calc_similarity(d as u8, insertions, deletions);
1938 if sim >= threshold {
1939 return Some(DamLevMatch {
1940 start: start_pos,
1941 end: end_byte,
1942 insertions,
1943 deletions,
1944 substitutions,
1945 swaps,
1946 similarity: sim,
1947 });
1948 }
1949 }
1950 }
1951
1952 prev_mask = char_mask;
1953 }
1954
1955 // Handle text shorter than pattern: positions reached during the last iteration
1956 // need extra match propagation to reach the accept position.
1957 let end_byte = start_pos + search_len;
1958 let chars_short = self.pattern_len.saturating_sub(search_len);
1959 if chars_short > 0 && prev_mask != !0u64 {
1960 for _ in 0..chars_short.min(max_edits) {
1961 old_r = r;
1962
1963 // Apply match propagation with last char's mask
1964 for d in 1..=max_edits {
1965 let match_d = (old_r[d] << 1) | prev_mask;
1966 r[d] &= match_d;
1967 }
1968
1969 // Check for accept
1970 for d in 0..=max_edits {
1971 if (r[d] & accept_mask) == 0 {
1972 let (insertions, deletions, substitutions, swaps) =
1973 self.compute_exact_edit_breakdown(&text[start_pos..end_byte]);
1974
1975 let total = insertions + deletions + substitutions + swaps;
1976 if total as usize <= d {
1977 let sim = self.calc_similarity(total, insertions, deletions);
1978 if sim >= threshold {
1979 return Some(DamLevMatch {
1980 start: start_pos,
1981 end: end_byte,
1982 insertions,
1983 deletions,
1984 substitutions,
1985 swaps,
1986 similarity: sim,
1987 });
1988 }
1989 }
1990 }
1991 }
1992 }
1993 }
1994
1995 None
1996 }
1997
1998 /// Search multiple positions in parallel using SIMD.
1999 ///
2000 /// Processes up to 4 candidate positions simultaneously, returning the first match.
2001 /// This avoids the cascade dependency issue by parallelizing across positions
2002 /// rather than across error levels.
2003 ///
2004 /// Returns `Some((position_index, match))` if a match is found.
2005 #[cfg(all(feature = "simd", target_arch = "aarch64"))]
2006 #[inline]
2007 #[must_use]
2008 pub fn find_at_positions_parallel(
2009 &self,
2010 text: &[u8],
2011 positions: &[usize],
2012 threshold: f32,
2013 ) -> Option<(usize, DamLevMatch)> {
2014 if positions.is_empty() || !self.is_ascii {
2015 return None;
2016 }
2017
2018 let max_edits = self.limits.max_edits as usize;
2019
2020 // NEON processes 2 positions at a time for k=0
2021 if max_edits == 0 {
2022 // Process pairs of positions
2023 let mut i = 0;
2024 while i + 1 < positions.len() {
2025 let pos_pair = [positions[i], positions[i + 1]];
2026 if let Some((idx, m)) =
2027 unsafe { self.find_at_2_positions_neon_k0(text, pos_pair, threshold) }
2028 {
2029 return Some((i + idx, m));
2030 }
2031 i += 2;
2032 }
2033 // Handle remaining position
2034 if i < positions.len()
2035 && let Some(m) = self.find_at_byte_position(text, positions[i], threshold)
2036 {
2037 return Some((i, m));
2038 }
2039 return None;
2040 }
2041
2042 // For k >= 1, fall back to sequential (NEON k=1 is already optimized)
2043 for (i, &pos) in positions.iter().enumerate() {
2044 if let Some(m) = self.find_at_byte_position(text, pos, threshold) {
2045 return Some((i, m));
2046 }
2047 }
2048 None
2049 }
2050
2051 /// Search multiple positions in parallel using AVX2.
2052 #[cfg(all(feature = "simd", target_arch = "x86_64"))]
2053 #[inline]
2054 #[must_use]
2055 pub fn find_at_positions_parallel(
2056 &self,
2057 text: &[u8],
2058 positions: &[usize],
2059 threshold: f32,
2060 ) -> Option<(usize, DamLevMatch)> {
2061 if positions.is_empty() || !self.is_ascii {
2062 return None;
2063 }
2064
2065 let max_edits = self.limits.max_edits as usize;
2066
2067 // AVX2 processes 4 positions at a time for k=0
2068 if max_edits == 0 && simd_avx2::is_available() {
2069 let mut i = 0;
2070 while i + 3 < positions.len() {
2071 let pos_quad = [
2072 positions[i],
2073 positions[i + 1],
2074 positions[i + 2],
2075 positions[i + 3],
2076 ];
2077 if let Some((idx, m)) =
2078 unsafe { self.find_at_4_positions_avx2_k0(text, pos_quad, threshold) }
2079 {
2080 return Some((i + idx, m));
2081 }
2082 i += 4;
2083 }
2084 // Handle remaining positions
2085 while i < positions.len() {
2086 if let Some(m) = self.find_at_byte_position(text, positions[i], threshold) {
2087 return Some((i, m));
2088 }
2089 i += 1;
2090 }
2091 return None;
2092 }
2093
2094 // Fall back to sequential for k >= 1 or no AVX2
2095 for (i, &pos) in positions.iter().enumerate() {
2096 if let Some(m) = self.find_at_byte_position(text, pos, threshold) {
2097 return Some((i, m));
2098 }
2099 }
2100 None
2101 }
2102
2103 /// Fallback for non-SIMD builds
2104 #[cfg(not(any(
2105 all(feature = "simd", target_arch = "aarch64"),
2106 all(feature = "simd", target_arch = "x86_64")
2107 )))]
2108 #[inline]
2109 pub fn find_at_positions_parallel(
2110 &self,
2111 text: &[u8],
2112 positions: &[usize],
2113 threshold: f32,
2114 ) -> Option<(usize, DamLevMatch)> {
2115 for (i, &pos) in positions.iter().enumerate() {
2116 if let Some(m) = self.find_at_byte_position(text, pos, threshold) {
2117 return Some((i, m));
2118 }
2119 }
2120 None
2121 }
2122
2123 /// NEON: Search 2 positions in parallel for k=0 (exact match).
2124 ///
2125 /// Uses 128-bit NEON to process 2 independent exact-match searches.
2126 #[cfg(all(feature = "simd", target_arch = "aarch64"))]
2127 #[inline]
2128 unsafe fn find_at_2_positions_neon_k0(
2129 &self,
2130 text: &[u8],
2131 positions: [usize; 2],
2132 threshold: f32,
2133 ) -> Option<(usize, DamLevMatch)> {
2134 #[allow(clippy::wildcard_imports)]
2135 use std::arch::aarch64::*;
2136
2137 let max_window = self.pattern_len;
2138 let accept_mask = self.accept_mask;
2139 let byte_masks = &self.byte_masks;
2140
2141 // Calculate search lengths for each position
2142 let end0 = (positions[0] + max_window + 1).min(text.len());
2143 let end1 = (positions[1] + max_window + 1).min(text.len());
2144 let len0 = end0.saturating_sub(positions[0]);
2145 let len1 = end1.saturating_sub(positions[1]);
2146 let max_len = len0.max(len1);
2147
2148 if max_len == 0 {
2149 return None;
2150 }
2151
2152 // Initialize states: all 1s means no match yet
2153 // r[0] = position 0 state, r[1] = position 1 state
2154 let mut r = unsafe { vdupq_n_u64(!0u64) };
2155 let accept_vec = unsafe { vdupq_n_u64(accept_mask) };
2156
2157 for i in 0..max_len {
2158 // Get char masks for both positions (scalar loads, then combine)
2159 let mask0 = if i < len0 {
2160 let byte = text[positions[0] + i];
2161 byte_masks[(byte & 0x7F) as usize]
2162 } else {
2163 !0u64 // No match possible
2164 };
2165
2166 let mask1 = if i < len1 {
2167 let byte = text[positions[1] + i];
2168 byte_masks[(byte & 0x7F) as usize]
2169 } else {
2170 !0u64
2171 };
2172
2173 // Combine masks into NEON vector
2174 let char_masks = unsafe { vcombine_u64(vcreate_u64(mask0), vcreate_u64(mask1)) };
2175
2176 // State update: r = (r << 1) | char_mask
2177 let shifted = unsafe { vshlq_n_u64(r, 1) };
2178 r = unsafe { vorrq_u64(shifted, char_masks) };
2179
2180 // Check for matches: (r & accept_mask) == 0
2181 let masked = unsafe { vandq_u64(r, accept_vec) };
2182
2183 // Extract and check each lane
2184 let lane0 = unsafe { vgetq_lane_u64(masked, 0) };
2185 let lane1 = unsafe { vgetq_lane_u64(masked, 1) };
2186
2187 if lane0 == 0 && i < len0 {
2188 let end_byte = positions[0] + i + 1;
2189 let (insertions, deletions, substitutions, swaps) =
2190 self.compute_exact_edit_breakdown(&text[positions[0]..end_byte]);
2191 let sim = self.calc_similarity(0, insertions, deletions);
2192 if sim >= threshold {
2193 return Some((
2194 0,
2195 DamLevMatch {
2196 start: positions[0],
2197 end: end_byte,
2198 insertions,
2199 deletions,
2200 substitutions,
2201 swaps,
2202 similarity: sim,
2203 },
2204 ));
2205 }
2206 }
2207
2208 if lane1 == 0 && i < len1 {
2209 let end_byte = positions[1] + i + 1;
2210 let (insertions, deletions, substitutions, swaps) =
2211 self.compute_exact_edit_breakdown(&text[positions[1]..end_byte]);
2212 let sim = self.calc_similarity(0, insertions, deletions);
2213 if sim >= threshold {
2214 return Some((
2215 1,
2216 DamLevMatch {
2217 start: positions[1],
2218 end: end_byte,
2219 insertions,
2220 deletions,
2221 substitutions,
2222 swaps,
2223 similarity: sim,
2224 },
2225 ));
2226 }
2227 }
2228 }
2229
2230 None
2231 }
2232
2233 /// AVX2: Search 4 positions in parallel for k=0 (exact match).
2234 ///
2235 /// Uses 256-bit AVX2 to process 4 independent exact-match searches.
2236 #[cfg(all(feature = "simd", target_arch = "x86_64"))]
2237 #[target_feature(enable = "avx2")]
2238 #[inline]
2239 unsafe fn find_at_4_positions_avx2_k0(
2240 &self,
2241 text: &[u8],
2242 positions: [usize; 4],
2243 threshold: f32,
2244 ) -> Option<(usize, DamLevMatch)> {
2245 #[allow(clippy::wildcard_imports)]
2246 use std::arch::x86_64::*;
2247
2248 let max_window = self.pattern_len;
2249 let accept_mask = self.accept_mask;
2250 let byte_masks = &self.byte_masks;
2251
2252 // Calculate search lengths for each position
2253 let ends: [usize; 4] = [
2254 (positions[0] + max_window + 1).min(text.len()),
2255 (positions[1] + max_window + 1).min(text.len()),
2256 (positions[2] + max_window + 1).min(text.len()),
2257 (positions[3] + max_window + 1).min(text.len()),
2258 ];
2259 let lens: [usize; 4] = [
2260 ends[0].saturating_sub(positions[0]),
2261 ends[1].saturating_sub(positions[1]),
2262 ends[2].saturating_sub(positions[2]),
2263 ends[3].saturating_sub(positions[3]),
2264 ];
2265 let max_len = lens[0].max(lens[1]).max(lens[2]).max(lens[3]);
2266
2267 if max_len == 0 {
2268 return None;
2269 }
2270
2271 // Initialize states: all 1s
2272 let mut r = _mm256_set1_epi64x(!0i64);
2273 let accept_vec = _mm256_set1_epi64x(accept_mask as i64);
2274
2275 for i in 0..max_len {
2276 // Get char masks for all 4 positions
2277 let masks: [u64; 4] = [
2278 if i < lens[0] {
2279 byte_masks[(text[positions[0] + i] & 0x7F) as usize]
2280 } else {
2281 !0u64
2282 },
2283 if i < lens[1] {
2284 byte_masks[(text[positions[1] + i] & 0x7F) as usize]
2285 } else {
2286 !0u64
2287 },
2288 if i < lens[2] {
2289 byte_masks[(text[positions[2] + i] & 0x7F) as usize]
2290 } else {
2291 !0u64
2292 },
2293 if i < lens[3] {
2294 byte_masks[(text[positions[3] + i] & 0x7F) as usize]
2295 } else {
2296 !0u64
2297 },
2298 ];
2299
2300 let char_masks = _mm256_set_epi64x(
2301 masks[3] as i64,
2302 masks[2] as i64,
2303 masks[1] as i64,
2304 masks[0] as i64,
2305 );
2306
2307 // State update: r = (r << 1) | char_mask
2308 let shifted = _mm256_slli_epi64(r, 1);
2309 r = _mm256_or_si256(shifted, char_masks);
2310
2311 // Check for matches
2312 let masked = _mm256_and_si256(r, accept_vec);
2313
2314 // Extract and check each lane (use movemask for efficiency)
2315 let zero = _mm256_setzero_si256();
2316 let cmp = _mm256_cmpeq_epi64(masked, zero);
2317 let match_mask = _mm256_movemask_epi8(cmp);
2318
2319 // Check each position (lanes are in order: 0, 1, 2, 3)
2320 // Each lane is 8 bytes, so bits 0-7 = lane 0, 8-15 = lane 1, etc.
2321 if match_mask != 0 {
2322 for (lane, &len) in lens.iter().enumerate() {
2323 if i < len {
2324 let lane_mask = 0xFF << (lane * 8);
2325 if (match_mask & lane_mask) == lane_mask {
2326 let end_byte = positions[lane] + i + 1;
2327 let (insertions, deletions, substitutions, swaps) =
2328 self.compute_exact_edit_breakdown(&text[positions[lane]..end_byte]);
2329 let sim = self.calc_similarity(0, insertions, deletions);
2330 if sim >= threshold {
2331 return Some((
2332 lane,
2333 DamLevMatch {
2334 start: positions[lane],
2335 end: end_byte,
2336 insertions,
2337 deletions,
2338 substitutions,
2339 swaps,
2340 similarity: sim,
2341 },
2342 ));
2343 }
2344 }
2345 }
2346 }
2347 }
2348 }
2349
2350 None
2351 }
2352
2353 /// ASCII-optimized search - no UTF-8 decoding, direct byte mask lookup.
2354 /// Uses unsafe to eliminate bounds checks in the hot loop.
2355 #[inline(always)]
2356 fn find_at_byte_position_ascii<const K: usize>(
2357 &self,
2358 text: &[u8],
2359 start_pos: usize,
2360 threshold: f32,
2361 ) -> Option<DamLevMatch> {
2362 let max_edits = self.limits.max_edits as usize;
2363 debug_assert!(max_edits < K);
2364
2365 let max_window = self.pattern_len + max_edits;
2366 let end_limit = (start_pos + max_window + 1).min(text.len());
2367 let search_len = end_limit - start_pos;
2368
2369 if search_len == 0 {
2370 return None;
2371 }
2372
2373 // SAFETY: We've bounds-checked above, and byte_masks has 128 elements
2374 // which covers all ASCII bytes (0-127). Non-ASCII bytes are handled
2375 // by returning !0u64 (no match).
2376 unsafe {
2377 self.find_at_byte_position_ascii_unchecked::<K>(
2378 text, start_pos, search_len, threshold, max_edits,
2379 )
2380 }
2381 }
2382
2383 /// Inner loop with no bounds checks - SAFETY: caller must ensure bounds are valid
2384 #[inline(always)]
2385 unsafe fn find_at_byte_position_ascii_unchecked<const K: usize>(
2386 &self,
2387 text: &[u8],
2388 start_pos: usize,
2389 search_len: usize,
2390 threshold: f32,
2391 max_edits: usize,
2392 ) -> Option<DamLevMatch> {
2393 // SAFETY: caller guarantees all bounds are valid
2394 unsafe {
2395 // Stack-allocated state vectors
2396 let mut r = [!0u64; K];
2397 let mut old_r = [!0u64; K];
2398 let mut old_old_r = [!0u64; K]; // State from 2 iterations ago for transposition
2399
2400 // Initialize deletion states - left shift advances pattern position
2401 for d in 1..=max_edits {
2402 *r.get_unchecked_mut(d) = *r.get_unchecked(d - 1) << 1;
2403 }
2404
2405 let text_ptr = text.as_ptr().add(start_pos);
2406 let byte_masks_ptr = self.byte_masks.as_ptr();
2407 let accept_mask = self.accept_mask;
2408 let _ = self.pattern_len;
2409
2410 let mut prev_byte: Option<u8> = None;
2411
2412 for i in 0..search_len {
2413 let byte = *text_ptr.add(i);
2414
2415 // Direct array lookup - mask non-ASCII to 0 index (which has !0u64)
2416 let mask_idx = (byte & 0x7F) as usize;
2417 let char_mask = *byte_masks_ptr.add(mask_idx);
2418
2419 // Save states from 2 iterations ago
2420 for d in 0..=max_edits {
2421 *old_old_r.get_unchecked_mut(d) = *old_r.get_unchecked(d);
2422 *old_r.get_unchecked_mut(d) = *r.get_unchecked(d);
2423 }
2424
2425 // Update state 0 (exact match)
2426 *r.get_unchecked_mut(0) = (*r.get_unchecked(0) << 1) | char_mask;
2427
2428 // Update fuzzy states
2429 for d in 1..=max_edits {
2430 let insert = *old_r.get_unchecked(d - 1);
2431 let delete = *r.get_unchecked(d - 1) << 1; // left shift advances pattern position
2432 let substitute = *old_r.get_unchecked(d - 1) << 1;
2433 let match_d = (*old_r.get_unchecked(d) << 1) | char_mask;
2434 let mut new_r = match_d & insert & delete & substitute;
2435
2436 // Transposition: if we have a previous character, check for swaps
2437 // Transposition at position j means pattern[j]=curr AND pattern[j+1]=prev
2438 if let Some(prev_b) = prev_byte {
2439 let prev_mask_idx = (prev_b & 0x7F) as usize;
2440 let prev_mask = *byte_masks_ptr.add(prev_mask_idx);
2441 // trans_valid_mask: bit j is 0 if pattern[j]=curr AND pattern[j+1]=prev
2442 let trans_valid_mask = char_mask | (prev_mask >> 1);
2443 // From matched position k (bit k=0), we can reach k+2 via transposition at k+1
2444 // Shift old_old_r left first to align: bit k becomes bit k+1
2445 // This also makes bit 0 = 0, allowing transposition at position 0
2446 let trans =
2447 ((*old_old_r.get_unchecked(d - 1) << 1) | trans_valid_mask) << 1;
2448 new_r &= trans;
2449 }
2450
2451 *r.get_unchecked_mut(d) = new_r;
2452 }
2453
2454 let char_count = i + 1;
2455
2456 // Check for match (prefer fewer edits)
2457 for d in 0..=max_edits {
2458 if (*r.get_unchecked(d) & accept_mask) == 0 {
2459 let end_byte = start_pos + char_count;
2460 // Compute exact edit breakdown using DP
2461 let (insertions, deletions, substitutions, swaps) =
2462 self.compute_exact_edit_breakdown(&text[start_pos..end_byte]);
2463
2464 let sim = self.calc_similarity(d as u8, insertions, deletions);
2465 if sim >= threshold {
2466 return Some(DamLevMatch {
2467 start: start_pos,
2468 end: end_byte,
2469 insertions,
2470 deletions,
2471 substitutions,
2472 swaps,
2473 similarity: sim,
2474 });
2475 }
2476 }
2477 }
2478
2479 prev_byte = Some(byte);
2480 }
2481
2482 // Handle text shorter than pattern: positions reached during the last iteration
2483 // need extra match propagation to reach the accept position.
2484 // Re-process the last char_mask to allow match propagation.
2485 let end_byte = start_pos + search_len;
2486 if let Some(last_byte) = prev_byte {
2487 let last_mask = *byte_masks_ptr.add((last_byte & 0x7F) as usize);
2488 let chars_short = self.pattern_len.saturating_sub(search_len);
2489
2490 for _ in 0..chars_short.min(max_edits) {
2491 for d in 0..=max_edits {
2492 *old_r.get_unchecked_mut(d) = *r.get_unchecked(d);
2493 }
2494
2495 // Apply match propagation: from position p with d errors,
2496 // if pattern[p+1] matches last_char, reach position p+1 with d errors
2497 for d in 1..=max_edits {
2498 let match_d = (*old_r.get_unchecked(d) << 1) | last_mask;
2499 *r.get_unchecked_mut(d) &= match_d;
2500 }
2501
2502 // Check for accept after each propagation
2503 for d in 0..=max_edits {
2504 if (*r.get_unchecked(d) & accept_mask) == 0 {
2505 let (insertions, deletions, substitutions, swaps) =
2506 self.compute_exact_edit_breakdown(&text[start_pos..end_byte]);
2507
2508 let total = insertions + deletions + substitutions + swaps;
2509 if total as usize <= d {
2510 let sim = self.calc_similarity(total, insertions, deletions);
2511 if sim >= threshold {
2512 return Some(DamLevMatch {
2513 start: start_pos,
2514 end: end_byte,
2515 insertions,
2516 deletions,
2517 substitutions,
2518 swaps,
2519 similarity: sim,
2520 });
2521 }
2522 }
2523 }
2524 }
2525 }
2526 }
2527
2528 None
2529 }
2530 }
2531
2532 #[inline]
2533 fn find_at_byte_position_small_k<const K: usize>(
2534 &self,
2535 text: &[u8],
2536 start_pos: usize,
2537 threshold: f32,
2538 ) -> Option<DamLevMatch> {
2539 let max_edits = self.limits.max_edits as usize;
2540 debug_assert!(max_edits < K);
2541
2542 // Stack-allocated state vectors
2543 let mut r = [!0u64; K];
2544 let mut old_r = [!0u64; K];
2545 let mut old_old_r = [!0u64; K]; // State from 2 iterations ago for transposition
2546
2547 // Initialize deletion states - left shift advances pattern position
2548 for d in 1..=max_edits {
2549 r[d] = r[d - 1] << 1;
2550 }
2551
2552 // max_window is in characters, but we iterate bytes.
2553 // For UTF-8, multiply by max char size (4) to ensure we process enough bytes.
2554 let max_window_chars = self.pattern_len + max_edits;
2555 let max_window_bytes = if self.is_ascii {
2556 max_window_chars + 1
2557 } else {
2558 max_window_chars * 4 + 1
2559 };
2560 let end_limit = (start_pos + max_window_bytes).min(text.len());
2561
2562 // Cache previous mask to avoid redundant lookups in transposition check
2563 let mut prev_mask: Option<u64> = None;
2564 let case_insensitive = self.case_insensitive;
2565
2566 // Iterate bytes, handling UTF-8
2567 let mut pos = start_pos;
2568 let mut char_count = 0usize;
2569 while pos < end_limit && char_count <= max_window_chars {
2570 let byte = text[pos];
2571
2572 // Get character mask and length with fast paths
2573 let (char_mask, char_len) = if byte < 128 {
2574 // ASCII fast path
2575 let lookup_byte = if case_insensitive {
2576 byte.to_ascii_lowercase()
2577 } else {
2578 byte
2579 };
2580 (self.byte_masks[lookup_byte as usize], 1)
2581 } else if byte < 224 && pos + 1 < text.len() {
2582 // 2-byte UTF-8 fast path (Cyrillic, etc.)
2583 let b1 = text[pos + 1];
2584 if case_insensitive {
2585 let codepoint = ((u32::from(byte) & 0x1F) << 6) | (u32::from(b1) & 0x3F);
2586 let ch = unsafe { char::from_u32_unchecked(codepoint) };
2587 let ch_lower = ch.to_lowercase().next().unwrap_or(ch);
2588 (self.get_mask(ch_lower), 2)
2589 } else {
2590 (self.get_mask_2byte(byte, b1), 2)
2591 }
2592 } else {
2593 // 3/4-byte UTF-8 or incomplete
2594 let (ch, len) = decode_utf8_char_fast(text, pos);
2595 let ch = if case_insensitive {
2596 ch.to_lowercase().next().unwrap_or(ch)
2597 } else {
2598 ch
2599 };
2600 (self.get_mask(ch), len)
2601 };
2602
2603 // Save old states
2604 old_old_r[..=max_edits].copy_from_slice(&old_r[..=max_edits]);
2605 old_r[..=max_edits].copy_from_slice(&r[..=max_edits]);
2606
2607 // Update states
2608 r[0] = (r[0] << 1) | char_mask;
2609
2610 for d in 1..=max_edits {
2611 let insert = old_r[d - 1];
2612 let delete = r[d - 1] << 1; // left shift advances pattern position
2613 let substitute = old_r[d - 1] << 1;
2614 let match_d = (old_r[d] << 1) | char_mask;
2615 let mut new_r = match_d & insert & delete & substitute;
2616
2617 // Transposition: if we have a previous mask, check for swaps
2618 // (use cached prev_mask instead of recomputing)
2619 if let Some(pm) = prev_mask {
2620 // trans_valid_mask: bit j is 0 if pattern[j]=curr AND pattern[j+1]=prev
2621 let trans_valid_mask = char_mask | (pm >> 1);
2622 // From matched position k, we can reach k+2 via transposition at k+1
2623 let trans = ((old_old_r[d - 1] << 1) | trans_valid_mask) << 1;
2624 new_r &= trans;
2625 }
2626
2627 r[d] = new_r;
2628 }
2629
2630 let end_byte = pos + char_len;
2631
2632 // Check for match (prefer fewer edits)
2633 for d in 0..=max_edits {
2634 if (r[d] & self.accept_mask) == 0 {
2635 // Compute exact edit breakdown using DP
2636 let (insertions, deletions, substitutions, swaps) =
2637 self.compute_exact_edit_breakdown(&text[start_pos..end_byte]);
2638
2639 let sim = self.calc_similarity(d as u8, insertions, deletions);
2640 if sim >= threshold {
2641 return Some(DamLevMatch {
2642 start: start_pos,
2643 end: end_byte,
2644 insertions,
2645 deletions,
2646 substitutions,
2647 swaps,
2648 similarity: sim,
2649 });
2650 }
2651 }
2652 }
2653
2654 prev_mask = Some(char_mask);
2655 pos += char_len;
2656 char_count += 1;
2657 }
2658
2659 // Handle text shorter than pattern: positions reached during the last iteration
2660 // need extra match propagation to reach the accept position.
2661 // Re-process the last char_mask to allow match propagation.
2662 let end_byte = pos;
2663 if let Some(last_mask) = prev_mask {
2664 let chars_short = self.pattern_len.saturating_sub(char_count);
2665 for _ in 0..chars_short.min(max_edits) {
2666 old_r = r;
2667 // Note: old_old_r is intentionally not updated - transpositions don't apply
2668 // when we're just propagating matches without processing new characters
2669
2670 // Apply match propagation: from position p with d errors,
2671 // if pattern[p+1] matches last_char, reach position p+1 with d errors
2672 // Note: transpositions don't apply here since we're not processing new characters
2673 for d in 1..=max_edits {
2674 let match_d = (old_r[d] << 1) | last_mask;
2675 r[d] &= match_d;
2676 }
2677
2678 // Check for accept after each propagation
2679 for d in 0..=max_edits {
2680 if (r[d] & self.accept_mask) == 0 {
2681 let (insertions, deletions, substitutions, swaps) =
2682 self.compute_exact_edit_breakdown(&text[start_pos..end_byte]);
2683
2684 let total = insertions + deletions + substitutions + swaps;
2685 if total as usize <= d {
2686 let sim = self.calc_similarity(total, insertions, deletions);
2687 if sim >= threshold {
2688 return Some(DamLevMatch {
2689 start: start_pos,
2690 end: end_byte,
2691 insertions,
2692 deletions,
2693 substitutions,
2694 swaps,
2695 similarity: sim,
2696 });
2697 }
2698 }
2699 }
2700 }
2701 }
2702 }
2703
2704 None
2705 }
2706
2707 fn find_at_byte_position_large_k(
2708 &self,
2709 text: &[u8],
2710 start_pos: usize,
2711 threshold: f32,
2712 ) -> Option<DamLevMatch> {
2713 let max_edits = self.limits.max_edits as usize;
2714
2715 let mut r = vec![!0u64; max_edits + 1];
2716 let mut old_r = vec![!0u64; max_edits + 1];
2717 let mut old_old_r = vec![!0u64; max_edits + 1]; // State from 2 iterations ago for transposition
2718
2719 // Initialize deletion states - left shift advances pattern position
2720 for d in 1..=max_edits {
2721 r[d] = r[d - 1] << 1;
2722 }
2723
2724 // max_window is in characters, but we iterate bytes.
2725 // For UTF-8, multiply by max char size (4) to ensure we process enough bytes.
2726 let max_window_chars = self.pattern_len + max_edits;
2727 let max_window_bytes = if self.is_ascii {
2728 max_window_chars + 1
2729 } else {
2730 max_window_chars * 4 + 1
2731 };
2732 let end_limit = (start_pos + max_window_bytes).min(text.len());
2733
2734 let mut pos = start_pos;
2735 let mut prev_mask: Option<u64> = None;
2736 let case_insensitive = self.case_insensitive;
2737 let mut char_count = 0usize;
2738
2739 while pos < end_limit && char_count <= max_window_chars {
2740 let byte = text[pos];
2741
2742 // Get character mask and length with fast paths
2743 let (char_mask, char_len) = if byte < 128 {
2744 let lookup_byte = if case_insensitive {
2745 byte.to_ascii_lowercase()
2746 } else {
2747 byte
2748 };
2749 (self.byte_masks[lookup_byte as usize], 1)
2750 } else if byte < 224 && pos + 1 < text.len() {
2751 // 2-byte UTF-8 fast path
2752 let b1 = text[pos + 1];
2753 if case_insensitive {
2754 let codepoint = ((u32::from(byte) & 0x1F) << 6) | (u32::from(b1) & 0x3F);
2755 let ch = unsafe { char::from_u32_unchecked(codepoint) };
2756 let ch_lower = ch.to_lowercase().next().unwrap_or(ch);
2757 (self.get_mask(ch_lower), 2)
2758 } else {
2759 (self.get_mask_2byte(byte, b1), 2)
2760 }
2761 } else {
2762 let (ch, len) = decode_utf8_char_fast(text, pos);
2763 let ch = if case_insensitive {
2764 ch.to_lowercase().next().unwrap_or(ch)
2765 } else {
2766 ch
2767 };
2768 (self.get_mask(ch), len)
2769 };
2770
2771 old_old_r.copy_from_slice(&old_r);
2772 old_r.copy_from_slice(&r);
2773
2774 r[0] = (r[0] << 1) | char_mask;
2775
2776 for d in 1..=max_edits {
2777 let insert = old_r[d - 1];
2778 let delete = r[d - 1] << 1; // left shift advances pattern position
2779 let substitute = old_r[d - 1] << 1;
2780 let match_d = (old_r[d] << 1) | char_mask;
2781 let mut new_r = match_d & insert & delete & substitute;
2782
2783 // Transposition: use cached prev_mask instead of recomputing
2784 if let Some(pm) = prev_mask {
2785 let trans_valid_mask = char_mask | (pm >> 1);
2786 // From matched position k, we can reach k+2 via transposition at k+1
2787 let trans = ((old_old_r[d - 1] << 1) | trans_valid_mask) << 1;
2788 new_r &= trans;
2789 }
2790
2791 r[d] = new_r;
2792 }
2793
2794 let end_byte = pos + char_len;
2795
2796 for d in 0..=max_edits {
2797 if (r[d] & self.accept_mask) == 0 {
2798 // Compute exact edit breakdown using DP
2799 let (insertions, deletions, substitutions, swaps) =
2800 self.compute_exact_edit_breakdown(&text[start_pos..end_byte]);
2801
2802 let sim = self.calc_similarity(d as u8, insertions, deletions);
2803 if sim >= threshold {
2804 return Some(DamLevMatch {
2805 start: start_pos,
2806 end: end_byte,
2807 insertions,
2808 deletions,
2809 substitutions,
2810 swaps,
2811 similarity: sim,
2812 });
2813 }
2814 }
2815 }
2816
2817 prev_mask = Some(char_mask);
2818 pos += char_len;
2819 char_count += 1;
2820 }
2821
2822 // Handle text shorter than pattern: positions reached during the last iteration
2823 // need extra match propagation to reach the accept position.
2824 let end_byte = pos;
2825 if let Some(last_mask) = prev_mask {
2826 let chars_short = self.pattern_len.saturating_sub(char_count);
2827 for _ in 0..chars_short.min(max_edits) {
2828 old_r.copy_from_slice(&r);
2829
2830 // Apply match propagation: from position p with d errors,
2831 // if pattern[p+1] matches last_char, reach position p+1 with d errors
2832 for d in 1..=max_edits {
2833 let match_d = (old_r[d] << 1) | last_mask;
2834 r[d] &= match_d;
2835 }
2836
2837 // Check for accept after each propagation
2838 for d in 0..=max_edits {
2839 if (r[d] & self.accept_mask) == 0 {
2840 let (insertions, deletions, substitutions, swaps) =
2841 self.compute_exact_edit_breakdown(&text[start_pos..end_byte]);
2842
2843 let total = insertions + deletions + substitutions + swaps;
2844 if total as usize <= d {
2845 let sim = self.calc_similarity(total, insertions, deletions);
2846 if sim >= threshold {
2847 return Some(DamLevMatch {
2848 start: start_pos,
2849 end: end_byte,
2850 insertions,
2851 deletions,
2852 substitutions,
2853 swaps,
2854 similarity: sim,
2855 });
2856 }
2857 }
2858 }
2859 }
2860 }
2861 }
2862
2863 None
2864 }
2865
2866 /// Streaming search: scan entire text in one pass, return first match.
2867 /// This is O(n * k) where n = text length, k = max edits.
2868 /// Much faster for long texts than repeated `find_at_byte_position` calls.
2869 #[inline]
2870 #[must_use]
2871 pub fn find_first_streaming(&self, text: &[u8], threshold: f32) -> Option<DamLevMatch> {
2872 let max_edits = self.limits.max_edits as usize;
2873
2874 // Use const generics for common cases - already highly optimized
2875 match max_edits {
2876 0 => self.find_first_streaming_k::<1>(text, threshold, 0),
2877 1 => self.find_first_streaming_k::<2>(text, threshold, 1),
2878 2 => self.find_first_streaming_k::<3>(text, threshold, 2),
2879 3 => self.find_first_streaming_k::<4>(text, threshold, 3),
2880 4 => self.find_first_streaming_k::<5>(text, threshold, 4),
2881 _ => self.find_first_streaming_large_k(text, threshold),
2882 }
2883 }
2884
2885 /// Streaming search with const-size state arrays for performance.
2886 /// Uses three rotating buffers to support transposition detection.
2887 /// Continues processing after finding fuzzy matches to prefer exact matches.
2888 #[inline]
2889 fn find_first_streaming_k<const K: usize>(
2890 &self,
2891 text: &[u8],
2892 threshold: f32,
2893 max_edits: usize,
2894 ) -> Option<DamLevMatch> {
2895 debug_assert!(max_edits < K);
2896
2897 // Handle empty text: pattern can still match via pure deletions
2898 if text.is_empty() && self.pattern_len <= max_edits {
2899 let deletions = self.pattern_len as u8;
2900 let sim = self.calc_similarity(deletions, 0, deletions);
2901 if sim >= threshold {
2902 return Some(DamLevMatch {
2903 start: 0,
2904 end: 0,
2905 insertions: 0,
2906 deletions,
2907 substitutions: 0,
2908 swaps: 0,
2909 similarity: sim,
2910 });
2911 }
2912 return None;
2913 }
2914
2915 // Use three state arrays for rotation (need old_old for transposition)
2916 let mut r0 = [!0u64; K];
2917 let mut r1 = [!0u64; K];
2918 let mut r2 = [!0u64; K];
2919
2920 // Initialize: can delete up to max_edits chars from pattern start
2921 for d in 1..=max_edits {
2922 r0[d] = r0[d - 1] << 1; // left shift advances pattern position
2923 }
2924
2925 // Track byte positions where each error level's current match started
2926 let mut start_bytes = [0usize; K];
2927
2928 let byte_masks = &self.byte_masks;
2929 let accept_mask = self.accept_mask;
2930 let case_insensitive = self.case_insensitive;
2931
2932 let mut pos = 0usize;
2933 let mut rotation = 0usize; // 0, 1, 2 rotation for three buffers
2934 let mut prev_mask: u64 = !0u64; // Previous character mask for transposition
2935
2936 // Track best match found so far (prefer fewer edits)
2937 // After finding a fuzzy match, continue for max_edits more chars to find better matches
2938 let mut best_match: Option<(usize, DamLevMatch)> = None; // (edit_level, match)
2939 let mut chars_since_first_match = 0usize;
2940
2941 while pos < text.len() {
2942 let byte = text[pos];
2943
2944 // Get character mask and length (ASCII fast path)
2945 let (char_mask, char_len) = if byte < 128 {
2946 let lookup_byte = if case_insensitive {
2947 byte.to_ascii_lowercase()
2948 } else {
2949 byte
2950 };
2951 (byte_masks[lookup_byte as usize], 1)
2952 } else if byte < 224 && pos + 1 < text.len() {
2953 // 2-byte UTF-8 fast path (Cyrillic, Latin Extended, etc.)
2954 let b1 = text[pos + 1];
2955 if case_insensitive {
2956 // Need full decode for case conversion
2957 let codepoint = ((u32::from(byte) & 0x1F) << 6) | (u32::from(b1) & 0x3F);
2958 let ch = unsafe { char::from_u32_unchecked(codepoint) };
2959 let ch_lower = ch.to_lowercase().next().unwrap_or(ch);
2960 (self.get_mask(ch_lower), 2)
2961 } else {
2962 (self.get_mask_2byte(byte, b1), 2)
2963 }
2964 } else {
2965 // 3/4-byte UTF-8 or incomplete sequence
2966 let (ch, len) = decode_utf8_char_fast(text, pos);
2967 let ch = if case_insensitive {
2968 ch.to_lowercase().next().unwrap_or(ch)
2969 } else {
2970 ch
2971 };
2972 (self.get_mask(ch), len)
2973 };
2974
2975 // Three-way rotation: old_old -> old -> new
2976 let (old_old_r, old_r, new_r) = match rotation {
2977 0 => (&r2, &r0, &mut r1),
2978 1 => (&r0, &r1, &mut r2),
2979 _ => (&r1, &r2, &mut r0),
2980 };
2981
2982 // Update R[0] (exact matching)
2983 new_r[0] = (old_r[0] << 1) | char_mask;
2984
2985 // Update start position for d=0 if no partial match
2986 if new_r[0] == !0u64 {
2987 start_bytes[0] = pos + char_len;
2988 }
2989
2990 // Update R[d] for d > 0 (fuzzy matching)
2991 for d in 1..=max_edits {
2992 let insert = old_r[d - 1]; // consume text char without advancing pattern
2993 let delete = new_r[d - 1] << 1; // left shift advances pattern position
2994 let substitute = old_r[d - 1] << 1; // replace pattern char
2995 let match_d = (old_r[d] << 1) | char_mask;
2996
2997 let mut new_val = match_d & insert & delete & substitute;
2998
2999 // Transposition: check if we can swap adjacent chars
3000 // trans_valid_mask: bit j is 0 if pattern[j]=curr AND pattern[j+1]=prev
3001 let trans_valid_mask = char_mask | (prev_mask >> 1);
3002 // From matched position k, we can reach k+2 via transposition at k+1
3003 let trans = ((old_old_r[d - 1] << 1) | trans_valid_mask) << 1;
3004 new_val &= trans;
3005
3006 new_r[d] = new_val;
3007
3008 // Update start position if no partial match
3009 if new_r[d] == !0u64 {
3010 start_bytes[d] = pos + char_len;
3011 }
3012 }
3013
3014 // Check for matches (prefer fewer edits)
3015 let end_byte = pos + char_len;
3016
3017 for d in 0..=max_edits {
3018 if (new_r[d] & accept_mask) == 0 {
3019 // Streaming found a potential match ending here.
3020 // Use tracked start position for this error level (fast path)
3021 let tracked_start = start_bytes[d];
3022
3023 // Fast path: if tracked start gives exact pattern length match, use it directly
3024 if end_byte >= tracked_start {
3025 let match_len = end_byte - tracked_start;
3026
3027 // Ultra-fast path for exact matches (d=0, length matches exactly)
3028 // Skip DP computation entirely - we know it's 0 edits
3029 if d == 0 && match_len == self.pattern_len {
3030 return Some(DamLevMatch {
3031 start: tracked_start,
3032 end: end_byte,
3033 insertions: 0,
3034 deletions: 0,
3035 substitutions: 0,
3036 swaps: 0,
3037 similarity: 1.0,
3038 });
3039 }
3040
3041 // Check if this is likely the best match (close to pattern length)
3042 if match_len >= self.pattern_len.saturating_sub(d)
3043 && match_len <= self.pattern_len + d
3044 {
3045 let (insertions, deletions, substitutions, swaps) =
3046 self.compute_exact_edit_breakdown(&text[tracked_start..end_byte]);
3047 let total = insertions + deletions + substitutions + swaps;
3048
3049 if total as usize <= d {
3050 let sim = self.calc_similarity(total, insertions, deletions);
3051 if sim >= threshold {
3052 // For exact length matches with d=0, return immediately
3053 if d == 0 {
3054 return Some(DamLevMatch {
3055 start: tracked_start,
3056 end: end_byte,
3057 insertions,
3058 deletions,
3059 substitutions,
3060 swaps,
3061 similarity: sim,
3062 });
3063 }
3064
3065 // For fuzzy matches, track as candidate and continue
3066 let candidate = DamLevMatch {
3067 start: tracked_start,
3068 end: end_byte,
3069 insertions,
3070 deletions,
3071 substitutions,
3072 swaps,
3073 similarity: sim,
3074 };
3075
3076 // Prefer: fewer edits, then closer to pattern length
3077 let len_diff =
3078 (match_len as i32 - self.pattern_len as i32).abs();
3079 if best_match.as_ref().is_none_or(|(best_d, b)| {
3080 let b_len = b.end - b.start;
3081 let b_len_diff =
3082 (b_len as i32 - self.pattern_len as i32).abs();
3083 d < *best_d
3084 || (d == *best_d && total < b.total_edits())
3085 || (d == *best_d
3086 && total == b.total_edits()
3087 && len_diff < b_len_diff)
3088 }) {
3089 if best_match.is_none() {
3090 chars_since_first_match = 0;
3091 }
3092 best_match = Some((d, candidate));
3093 }
3094 }
3095 }
3096 }
3097 }
3098
3099 // For fuzzy matches not caught by fast path, search all possible start positions
3100 if d > 0 {
3101 let search_start = end_byte.saturating_sub(self.pattern_len + d);
3102
3103 for try_start in search_start..end_byte {
3104 // Skip if not at a valid UTF-8 char boundary
3105 if try_start > 0 && text[try_start] >= 0x80 && text[try_start] < 0xC0 {
3106 continue;
3107 }
3108
3109 // Compute exact edit breakdown using DP
3110 let (insertions, deletions, substitutions, swaps) =
3111 self.compute_exact_edit_breakdown(&text[try_start..end_byte]);
3112
3113 let total = insertions + deletions + substitutions + swaps;
3114 if total as usize <= d {
3115 let sim = self.calc_similarity(total, insertions, deletions);
3116 if sim >= threshold {
3117 let candidate = DamLevMatch {
3118 start: try_start,
3119 end: end_byte,
3120 insertions,
3121 deletions,
3122 substitutions,
3123 swaps,
3124 similarity: sim,
3125 };
3126 // Prefer: fewer edits, then closer to pattern length
3127 let match_len = end_byte - try_start;
3128 let len_diff =
3129 (match_len as i32 - self.pattern_len as i32).abs();
3130 if best_match.as_ref().is_none_or(|(best_d, b)| {
3131 let b_len = b.end - b.start;
3132 let b_len_diff =
3133 (b_len as i32 - self.pattern_len as i32).abs();
3134 d < *best_d
3135 || (d == *best_d && total < b.total_edits())
3136 || (d == *best_d
3137 && total == b.total_edits()
3138 && len_diff < b_len_diff)
3139 }) {
3140 if best_match.is_none() {
3141 chars_since_first_match = 0;
3142 }
3143 best_match = Some((d, candidate));
3144 }
3145 }
3146 }
3147 }
3148 }
3149 }
3150 }
3151
3152 // After finding a fuzzy match, check if we need to continue looking for better matches.
3153 // Only continue if the match is "suspicious" (shorter than pattern, indicating possible
3154 // early accept due to deletions). If match_length >= pattern_length, return immediately.
3155 if let Some((_, ref m)) = best_match {
3156 let match_len = m.end - m.start;
3157 if match_len >= self.pattern_len {
3158 // Match is at least pattern length - can't be early accept due to deletions
3159 return best_match.map(|(_, m)| m);
3160 }
3161 // Short match - might be early accept, continue for a few more chars
3162 chars_since_first_match += 1;
3163 if chars_since_first_match > max_edits {
3164 return best_match.map(|(_, m)| m);
3165 }
3166 }
3167
3168 prev_mask = char_mask;
3169 pos += char_len;
3170 rotation = (rotation + 1) % 3;
3171 }
3172
3173 best_match.map(|(_, m)| m)
3174 }
3175
3176 /// Streaming search for large k values (uses heap allocation with three-buffer rotation).
3177 /// Supports transposition detection. Continues processing after fuzzy matches to prefer exact matches.
3178 fn find_first_streaming_large_k(&self, text: &[u8], threshold: f32) -> Option<DamLevMatch> {
3179 let max_edits = self.limits.max_edits as usize;
3180
3181 // Handle empty text: pattern can still match via pure deletions
3182 if text.is_empty() && self.pattern_len <= max_edits {
3183 let deletions = self.pattern_len as u8;
3184 let sim = self.calc_similarity(deletions, 0, deletions);
3185 if sim >= threshold {
3186 return Some(DamLevMatch {
3187 start: 0,
3188 end: 0,
3189 insertions: 0,
3190 deletions,
3191 substitutions: 0,
3192 swaps: 0,
3193 similarity: sim,
3194 });
3195 }
3196 return None;
3197 }
3198
3199 // Use three buffers for rotation (need old_old for transposition)
3200 let mut r0 = vec![!0u64; max_edits + 1];
3201 let mut r1 = vec![!0u64; max_edits + 1];
3202 let mut r2 = vec![!0u64; max_edits + 1];
3203 let mut start_bytes = vec![0usize; max_edits + 1];
3204
3205 // Initialize: can delete up to max_edits chars from pattern start
3206 for d in 1..=max_edits {
3207 r0[d] = r0[d - 1] << 1; // left shift advances pattern position
3208 }
3209
3210 let byte_masks = &self.byte_masks;
3211 let accept_mask = self.accept_mask;
3212 let case_insensitive = self.case_insensitive;
3213
3214 let mut pos = 0usize;
3215 let mut rotation = 0usize; // 0, 1, 2 rotation for three buffers
3216 let mut prev_mask: u64 = !0u64; // Previous character mask for transposition
3217
3218 // Track best match found so far (prefer fewer edits)
3219 // After finding a fuzzy match, continue for max_edits more chars to find better matches
3220 let mut best_match: Option<(usize, DamLevMatch)> = None; // (edit_level, match)
3221 let mut chars_since_first_match = 0usize;
3222
3223 while pos < text.len() {
3224 let byte = text[pos];
3225
3226 let (char_mask, char_len) = if byte < 128 {
3227 let lookup_byte = if case_insensitive {
3228 byte.to_ascii_lowercase()
3229 } else {
3230 byte
3231 };
3232 (byte_masks[lookup_byte as usize], 1)
3233 } else if byte < 224 && pos + 1 < text.len() {
3234 // 2-byte UTF-8 fast path (Cyrillic, Latin Extended, etc.)
3235 let b1 = text[pos + 1];
3236 if case_insensitive {
3237 let codepoint = ((u32::from(byte) & 0x1F) << 6) | (u32::from(b1) & 0x3F);
3238 let ch = unsafe { char::from_u32_unchecked(codepoint) };
3239 let ch_lower = ch.to_lowercase().next().unwrap_or(ch);
3240 (self.get_mask(ch_lower), 2)
3241 } else {
3242 (self.get_mask_2byte(byte, b1), 2)
3243 }
3244 } else {
3245 // 3/4-byte UTF-8 or incomplete sequence
3246 let (ch, len) = decode_utf8_char_fast(text, pos);
3247 let ch = if case_insensitive {
3248 ch.to_lowercase().next().unwrap_or(ch)
3249 } else {
3250 ch
3251 };
3252 (self.get_mask(ch), len)
3253 };
3254
3255 // Three-way rotation: old_old -> old -> new
3256 let (old_old_r, old_r, new_r) = match rotation {
3257 0 => (&r2, &r0, &mut r1),
3258 1 => (&r0, &r1, &mut r2),
3259 _ => (&r1, &r2, &mut r0),
3260 };
3261
3262 // Update R[0] (exact matching)
3263 new_r[0] = (old_r[0] << 1) | char_mask;
3264 if new_r[0] == !0u64 {
3265 start_bytes[0] = pos + char_len;
3266 }
3267
3268 // Update R[d] for d > 0 (fuzzy matching)
3269 for d in 1..=max_edits {
3270 let insert = old_r[d - 1]; // consume text char without advancing pattern
3271 let delete = new_r[d - 1] << 1; // left shift advances pattern position
3272 let substitute = old_r[d - 1] << 1; // replace pattern char
3273 let match_d = (old_r[d] << 1) | char_mask;
3274
3275 let mut new_val = match_d & insert & delete & substitute;
3276
3277 // Transposition: check if we can swap adjacent chars
3278 // trans_valid_mask: bit j is 0 if pattern[j]=curr AND pattern[j+1]=prev
3279 let trans_valid_mask = char_mask | (prev_mask >> 1);
3280 // From matched position k, we can reach k+2 via transposition at k+1
3281 let trans = ((old_old_r[d - 1] << 1) | trans_valid_mask) << 1;
3282 new_val &= trans;
3283
3284 new_r[d] = new_val;
3285
3286 if new_r[d] == !0u64 {
3287 start_bytes[d] = pos + char_len;
3288 }
3289 }
3290
3291 let end_byte = pos + char_len;
3292
3293 for d in 0..=max_edits {
3294 if (new_r[d] & accept_mask) == 0 {
3295 // Streaming found a potential match ending here.
3296 // For exact match (d=0), return immediately
3297 if d == 0 {
3298 let tracked_start = start_bytes[0];
3299 if end_byte >= tracked_start {
3300 let match_len = end_byte - tracked_start;
3301 if match_len == self.pattern_len {
3302 return Some(DamLevMatch {
3303 start: tracked_start,
3304 end: end_byte,
3305 insertions: 0,
3306 deletions: 0,
3307 substitutions: 0,
3308 swaps: 0,
3309 similarity: 1.0,
3310 });
3311 }
3312 }
3313 }
3314
3315 // Search all possible start positions
3316 let search_start = end_byte.saturating_sub(self.pattern_len + d);
3317
3318 for try_start in search_start..end_byte {
3319 // Skip if not at a valid UTF-8 char boundary
3320 if try_start > 0 && text[try_start] >= 0x80 && text[try_start] < 0xC0 {
3321 continue;
3322 }
3323
3324 // Compute exact edit breakdown using DP
3325 let (insertions, deletions, substitutions, swaps) =
3326 self.compute_exact_edit_breakdown(&text[try_start..end_byte]);
3327
3328 let total = insertions + deletions + substitutions + swaps;
3329 if total as usize <= d {
3330 let sim = self.calc_similarity(total, insertions, deletions);
3331 if sim >= threshold {
3332 // For exact match (d=0, total=0), return immediately
3333 if d == 0 && total == 0 {
3334 return Some(DamLevMatch {
3335 start: try_start,
3336 end: end_byte,
3337 insertions,
3338 deletions,
3339 substitutions,
3340 swaps,
3341 similarity: sim,
3342 });
3343 }
3344
3345 let candidate = DamLevMatch {
3346 start: try_start,
3347 end: end_byte,
3348 insertions,
3349 deletions,
3350 substitutions,
3351 swaps,
3352 similarity: sim,
3353 };
3354 // Prefer: fewer edits, then closer to pattern length
3355 let match_len = end_byte - try_start;
3356 let len_diff = (match_len as i32 - self.pattern_len as i32).abs();
3357 if best_match.as_ref().is_none_or(|(best_d, b)| {
3358 let b_len = b.end - b.start;
3359 let b_len_diff = (b_len as i32 - self.pattern_len as i32).abs();
3360 d < *best_d
3361 || (d == *best_d && total < b.total_edits())
3362 || (d == *best_d
3363 && total == b.total_edits()
3364 && len_diff < b_len_diff)
3365 }) {
3366 if best_match.is_none() {
3367 chars_since_first_match = 0;
3368 }
3369 best_match = Some((d, candidate));
3370 }
3371 }
3372 }
3373 }
3374 }
3375 }
3376
3377 // After finding a fuzzy match, check if we need to continue looking for better matches.
3378 // Only continue if the match is "suspicious" (shorter than pattern, indicating possible
3379 // early accept due to deletions). If match_length >= pattern_length, return immediately.
3380 if let Some((_, ref m)) = best_match {
3381 let match_len = m.end - m.start;
3382 if match_len >= self.pattern_len {
3383 // Match is at least pattern length - can't be early accept due to deletions
3384 return best_match.map(|(_, m)| m);
3385 }
3386 // Short match - might be early accept, continue for a few more chars
3387 chars_since_first_match += 1;
3388 if chars_since_first_match > max_edits {
3389 return best_match.map(|(_, m)| m);
3390 }
3391 }
3392
3393 prev_mask = char_mask;
3394 pos += char_len;
3395 rotation = (rotation + 1) % 3;
3396 }
3397
3398 best_match.map(|(_, m)| m)
3399 }
3400}
3401
3402// SIMD-accelerated Bitap for ARM with NEON
3403#[cfg(all(feature = "simd", target_arch = "aarch64"))]
3404mod simd_neon {
3405 #[allow(clippy::wildcard_imports)]
3406 use std::arch::aarch64::*;
3407
3408 /// NEON state update with transposition for k <= 1.
3409 #[inline]
3410 pub unsafe fn update_states_with_trans_k1_neon(
3411 r: &mut [u64; 4],
3412 old_r: &[u64; 4],
3413 old_old_r: &[u64; 4],
3414 char_mask: u64,
3415 prev_mask: u64,
3416 ) {
3417 unsafe {
3418 let old_vec = vld1q_u64(old_r.as_ptr());
3419 let old_old_vec = vld1q_u64(old_old_r.as_ptr());
3420 let mask = vdupq_n_u64(char_mask);
3421
3422 // match_d = (old_r[d] << 1) | char_mask
3423 let match_d = vorrq_u64(vshlq_n_u64(old_vec, 1), mask);
3424
3425 // insert: [!0, old_r[0]]
3426 let all_ones = vdupq_n_u64(!0u64);
3427 let insert = vextq_u64(all_ones, old_vec, 1);
3428
3429 // substitute = insert << 1
3430 let subst = vshlq_n_u64(insert, 1);
3431
3432 // Transposition
3433 let trans_valid = char_mask | (prev_mask >> 1);
3434 let trans_valid_vec = vdupq_n_u64(trans_valid);
3435 let old_old_dm1 = vextq_u64(all_ones, old_old_vec, 1);
3436 let trans_inner = vorrq_u64(vshlq_n_u64(old_old_dm1, 1), trans_valid_vec);
3437 let trans = vshlq_n_u64(trans_inner, 1);
3438
3439 // partial = match_d & insert & substitute & trans
3440 let partial = vandq_u64(vandq_u64(vandq_u64(match_d, insert), subst), trans);
3441
3442 vst1q_u64(r.as_mut_ptr(), partial);
3443 r[1] &= r[0] << 1; // left shift advances pattern position
3444 }
3445 }
3446}
3447
3448// SIMD-accelerated Bitap for x86_64 with AVX2
3449#[cfg(all(feature = "simd", target_arch = "x86_64"))]
3450mod simd_avx2 {
3451 #[cfg(target_arch = "x86_64")]
3452 #[allow(clippy::wildcard_imports)]
3453 use std::arch::x86_64::*;
3454
3455 /// Check if AVX2 is available at runtime.
3456 #[inline]
3457 pub fn is_available() -> bool {
3458 is_x86_feature_detected!("avx2")
3459 }
3460
3461 /// SIMD-accelerated state update for Bitap with k <= 3.
3462 ///
3463 /// Computes:
3464 /// - `r[0]` = (`old_r[0]` << 1) | `char_mask`
3465 /// - `r[d]` = ((`old_r[d]` << 1) | `char_mask`) & `old_r[d-1]` & (`r[d-1]` << 1) & (`old_r[d-1]` << 1)
3466 ///
3467 /// The cascade dependency (r[d] depends on r[d-1]) is handled sequentially after
3468 /// computing the independent terms in parallel.
3469 ///
3470 /// # Safety
3471 /// Requires AVX2 support. Caller must verify with `is_available()`.
3472 #[target_feature(enable = "avx2")]
3473 #[inline]
3474 #[allow(dead_code)]
3475 pub unsafe fn update_states_avx2(
3476 r: &mut [u64; 4],
3477 old_r: &[u64; 4],
3478 char_mask: u64,
3479 max_edits: usize,
3480 ) {
3481 debug_assert!(max_edits <= 3);
3482
3483 unsafe {
3484 // Load old states into 256-bit register
3485 let old_vec = _mm256_loadu_si256(old_r.as_ptr().cast::<__m256i>());
3486 let mask = _mm256_set1_epi64x(char_mask as i64);
3487
3488 // match_d = (old_r[d] << 1) | char_mask (parallel for all d)
3489 let match_d = _mm256_or_si256(_mm256_slli_epi64(old_vec, 1), mask);
3490
3491 // insert = old_r[d-1]: shift lanes right, filling lane 0 with !0
3492 // Use permute to shift: [old_r[0], old_r[1], old_r[2], old_r[3]] -> [!0, old_r[0], old_r[1], old_r[2]]
3493 let all_ones = _mm256_set1_epi64x(!0i64);
3494 // _mm256_permute4x64_epi64 with control 0b10_01_00_11 = [3,0,1,2] but we need [X,0,1,2]
3495 // Instead, use blend: shift and insert !0 at position 0
3496 let shifted = _mm256_permute4x64_epi64(old_vec, 0b10_01_00_00); // [0,0,1,2]
3497 let insert = _mm256_blend_epi32(shifted, all_ones, 0b0000_0011); // lane 0 = !0
3498
3499 // substitute = old_r[d-1] << 1
3500 let subst = _mm256_slli_epi64(insert, 1);
3501
3502 // Combine independent terms: match_d & insert & substitute
3503 let partial = _mm256_and_si256(_mm256_and_si256(match_d, insert), subst);
3504
3505 // Store partial results
3506 _mm256_storeu_si256(r.as_mut_ptr().cast::<__m256i>(), partial);
3507
3508 // Apply delete cascade: r[d] &= r[d-1] << 1
3509 // Left shift advances pattern position, must be sequential due to dependency
3510 if max_edits >= 1 {
3511 r[1] &= r[0] << 1;
3512 }
3513 if max_edits >= 2 {
3514 r[2] &= r[1] << 1;
3515 }
3516 if max_edits >= 3 {
3517 r[3] &= r[2] << 1;
3518 }
3519 }
3520 }
3521
3522 /// SIMD-accelerated state update with transposition support.
3523 ///
3524 /// # Safety
3525 /// Requires AVX2 support. Caller must verify with `is_available()`.
3526 #[target_feature(enable = "avx2")]
3527 #[inline]
3528 pub unsafe fn update_states_with_trans_avx2(
3529 r: &mut [u64; 4],
3530 old_r: &[u64; 4],
3531 old_old_r: &[u64; 4],
3532 char_mask: u64,
3533 prev_mask: u64,
3534 max_edits: usize,
3535 ) {
3536 debug_assert!(max_edits <= 3);
3537
3538 unsafe {
3539 // Load old states
3540 let old_vec = _mm256_loadu_si256(old_r.as_ptr().cast::<__m256i>());
3541 let old_old_vec = _mm256_loadu_si256(old_old_r.as_ptr().cast::<__m256i>());
3542 let mask = _mm256_set1_epi64x(char_mask as i64);
3543
3544 // match_d = (old_r[d] << 1) | char_mask
3545 let match_d = _mm256_or_si256(_mm256_slli_epi64(old_vec, 1), mask);
3546
3547 // Shift for d-1 access
3548 let all_ones = _mm256_set1_epi64x(!0i64);
3549 let shifted_old = _mm256_permute4x64_epi64(old_vec, 0b10_01_00_00);
3550 let insert = _mm256_blend_epi32(shifted_old, all_ones, 0b0000_0011);
3551
3552 // substitute = old_r[d-1] << 1
3553 let subst = _mm256_slli_epi64(insert, 1);
3554
3555 // Transposition term
3556 // trans_valid_mask: bit j is 0 if pattern[j]=curr AND pattern[j+1]=prev
3557 let trans_valid = char_mask | (prev_mask >> 1);
3558 let trans_valid_vec = _mm256_set1_epi64x(trans_valid as i64);
3559
3560 // Shift old_old for d-1 access
3561 let shifted_old_old = _mm256_permute4x64_epi64(old_old_vec, 0b10_01_00_00);
3562 let old_old_dm1 = _mm256_blend_epi32(shifted_old_old, all_ones, 0b0000_0011);
3563
3564 // trans = ((old_old_r[d-1] << 1) | trans_valid_mask) << 1
3565 let trans_inner = _mm256_or_si256(_mm256_slli_epi64(old_old_dm1, 1), trans_valid_vec);
3566 let trans = _mm256_slli_epi64(trans_inner, 1);
3567
3568 // Combine: match_d & insert & substitute & trans
3569 let partial = _mm256_and_si256(
3570 _mm256_and_si256(_mm256_and_si256(match_d, insert), subst),
3571 trans,
3572 );
3573
3574 // Store partial results
3575 _mm256_storeu_si256(r.as_mut_ptr().cast::<__m256i>(), partial);
3576
3577 // Apply delete cascade - left shift advances pattern position
3578 if max_edits >= 1 {
3579 r[1] &= r[0] << 1;
3580 }
3581 if max_edits >= 2 {
3582 r[2] &= r[1] << 1;
3583 }
3584 if max_edits >= 3 {
3585 r[3] &= r[2] << 1;
3586 }
3587 }
3588 }
3589}
3590
3591#[cfg(test)]
3592mod tests {
3593 use super::*;
3594
3595 #[test]
3596 fn test_exact_match() {
3597 let matcher = BitapMatcher::new("hello", EditLimits::new(0), false).unwrap();
3598 let matches = matcher.find_all("hello world", 0.8);
3599
3600 assert!(!matches.is_empty());
3601 assert!(matches.iter().any(|m| m.start == 0 && m.total_edits() == 0));
3602 }
3603
3604 #[test]
3605 fn test_one_substitution() {
3606 let matcher = BitapMatcher::new("hello", EditLimits::new(1), false).unwrap();
3607 let matches = matcher.find_all("hallo world", 0.5);
3608
3609 assert!(!matches.is_empty());
3610 assert!(matches.iter().any(|m| m.start == 0));
3611 }
3612
3613 #[test]
3614 fn test_find_first() {
3615 let matcher = BitapMatcher::new("quick", EditLimits::new(1), false).unwrap();
3616 let result = matcher.find_first("The quick brown fox", 0.8);
3617
3618 assert!(result.is_some());
3619 let m = result.unwrap();
3620 assert_eq!(m.start, 4);
3621 }
3622
3623 #[test]
3624 fn test_case_insensitive() {
3625 let matcher = BitapMatcher::new("hello", EditLimits::new(0), true).unwrap();
3626 let matches = matcher.find_all("HELLO world", 0.8);
3627
3628 assert!(!matches.is_empty());
3629 }
3630
3631 #[test]
3632 fn test_pattern_too_long() {
3633 let long_pattern = "a".repeat(65);
3634 let result = BitapMatcher::new(&long_pattern, EditLimits::new(1), false);
3635 assert!(result.is_none());
3636 }
3637
3638 #[test]
3639 fn test_transposition_match() {
3640 // With transposition support, "ba" should match "ab" with 1 edit (swap)
3641 // not 2 edits (2 substitutions)
3642 let matcher = BitapMatcher::new("ab", EditLimits::new(1), false).unwrap();
3643 let result = matcher.find_at_byte_position(b"ba", 0, 0.5);
3644
3645 assert!(result.is_some(), "Should find transposition match");
3646 let m = result.unwrap();
3647 assert_eq!(m.total_edits(), 1, "Transposition should count as 1 edit");
3648 }
3649
3650 #[test]
3651 fn test_transposition_in_word() {
3652 // "teh" should match "the" with 1 transposition
3653 let matcher = BitapMatcher::new("the", EditLimits::new(1), false).unwrap();
3654 let result = matcher.find_at_byte_position(b"teh quick brown", 0, 0.5);
3655
3656 assert!(
3657 result.is_some(),
3658 "Should find transposition match for 'teh'"
3659 );
3660 let m = result.unwrap();
3661 assert_eq!(m.total_edits(), 1, "Should be 1 edit for transposition");
3662 }
3663
3664 #[test]
3665 fn test_transposition_vs_two_substitutions() {
3666 // Without transposition, matching "ab" against "ba" would need 2 substitutions
3667 // With transposition, it's just 1 edit
3668 // Test that with max_edits=1, we CAN find "ba" because transposition counts as 1
3669 let matcher = BitapMatcher::new("ab", EditLimits::new(1), false).unwrap();
3670 let result = matcher.find_at_byte_position(b"ba", 0, 0.0);
3671 assert!(
3672 result.is_some(),
3673 "Transposition should allow match with 1 edit"
3674 );
3675 }
3676}
3677
3678/// Match result with pattern index for multi-pattern search.
3679#[derive(Debug, Clone)]
3680pub struct MultiPatternMatch {
3681 /// Index of the pattern that matched.
3682 pub pattern_index: usize,
3683 /// The match details.
3684 pub match_result: DamLevMatch,
3685}
3686
3687/// Multi-pattern Bitap matcher for searching multiple patterns in a single text pass.
3688///
3689/// This is more efficient than running N separate Bitap searches because:
3690/// 1. Text is scanned only once
3691/// 2. Character decoding is done once per character
3692/// 3. Better cache locality
3693#[derive(Debug)]
3694pub struct MultiBitapMatcher {
3695 /// Individual matchers for each pattern.
3696 matchers: Vec<BitapMatcher>,
3697 /// Case insensitive matching.
3698 case_insensitive: bool,
3699}
3700
3701impl MultiBitapMatcher {
3702 /// Create a new multi-pattern matcher.
3703 ///
3704 /// Returns None if any pattern is too long or empty.
3705 #[must_use]
3706 pub fn new(patterns: &[&str], limits: &EditLimits, case_insensitive: bool) -> Option<Self> {
3707 if patterns.is_empty() {
3708 return None;
3709 }
3710
3711 let matchers: Option<Vec<BitapMatcher>> = patterns
3712 .iter()
3713 .map(|p| BitapMatcher::new(p, limits.clone(), case_insensitive))
3714 .collect();
3715
3716 Some(MultiBitapMatcher {
3717 matchers: matchers?,
3718 case_insensitive,
3719 })
3720 }
3721
3722 /// Find all matches for all patterns in a single text pass.
3723 ///
3724 /// Returns matches with their pattern indices.
3725 #[must_use]
3726 pub fn find_all(&self, text: &str, threshold: f32) -> Vec<MultiPatternMatch> {
3727 let text_chars: Vec<(usize, char)> = text.char_indices().collect();
3728
3729 if text_chars.is_empty() || self.matchers.is_empty() {
3730 return vec![];
3731 }
3732
3733 // Find max_edits across all patterns
3734 let max_edits = self
3735 .matchers
3736 .iter()
3737 .map(|m| m.limits.max_edits as usize)
3738 .max()
3739 .unwrap_or(0);
3740
3741 // State vectors for each pattern: r[pattern][edit_level]
3742 let mut r: Vec<Vec<u64>> = self
3743 .matchers
3744 .iter()
3745 .map(|_| vec![!0u64; max_edits + 1])
3746 .collect();
3747 let mut old_r: Vec<Vec<u64>> = self
3748 .matchers
3749 .iter()
3750 .map(|_| vec![!0u64; max_edits + 1])
3751 .collect();
3752
3753 // Initialize deletion states for each pattern
3754 for (p_idx, matcher) in self.matchers.iter().enumerate() {
3755 let p_max = matcher.limits.max_edits as usize;
3756 for d in 1..=p_max {
3757 r[p_idx][d] = r[p_idx][d - 1] << 1;
3758 }
3759 }
3760
3761 // Use FxHashMap for deduplication
3762 let mut matches: FxHashMap<(usize, usize, usize), MultiPatternMatch> = FxHashMap::default();
3763
3764 // Process each character once
3765 for (char_idx, &(_, text_char)) in text_chars.iter().enumerate() {
3766 let text_char = if self.case_insensitive {
3767 text_char.to_lowercase().next().unwrap_or(text_char)
3768 } else {
3769 text_char
3770 };
3771
3772 // Update all pattern states
3773 for (p_idx, matcher) in self.matchers.iter().enumerate() {
3774 let p_max = matcher.limits.max_edits as usize;
3775 let char_mask = matcher.get_mask(text_char);
3776
3777 // Swap buffers for this pattern
3778 std::mem::swap(&mut r[p_idx], &mut old_r[p_idx]);
3779
3780 // Update R[0] (exact matching)
3781 r[p_idx][0] = (old_r[p_idx][0] << 1) | char_mask;
3782
3783 // Update R[d] for d > 0 (fuzzy matching)
3784 for d in 1..=p_max {
3785 let insert = old_r[p_idx][d - 1];
3786 let delete = r[p_idx][d - 1] << 1;
3787 let substitute = old_r[p_idx][d - 1] << 1;
3788 let match_d = (old_r[p_idx][d] << 1) | char_mask;
3789 r[p_idx][d] = match_d & insert & delete & substitute;
3790 }
3791
3792 // Check for matches
3793 let end_byte = text_chars.get(char_idx + 1).map_or(text.len(), |(b, _)| *b);
3794
3795 for d in 0..=p_max {
3796 if (r[p_idx][d] & matcher.accept_mask) == 0 {
3797 // Found a match with d edits for pattern p_idx
3798 let min_start_char = char_idx.saturating_sub(matcher.pattern_len + d);
3799 let max_start_char =
3800 char_idx.saturating_sub(matcher.pattern_len.saturating_sub(d + 1));
3801
3802 for start_char in min_start_char..=max_start_char.min(char_idx) {
3803 let start_byte = text_chars.get(start_char).map_or(0, |(b, _)| *b);
3804
3805 let (insertions, deletions, substitutions, swaps) = matcher
3806 .compute_exact_edit_breakdown(
3807 &text.as_bytes()[start_byte..end_byte],
3808 );
3809
3810 let total_edits = insertions + deletions + substitutions + swaps;
3811 if total_edits as usize > d {
3812 continue;
3813 }
3814
3815 let sim = matcher.calc_similarity(total_edits, insertions, deletions);
3816 if sim >= threshold {
3817 let key = (start_byte, end_byte, p_idx);
3818 let m = MultiPatternMatch {
3819 pattern_index: p_idx,
3820 match_result: DamLevMatch {
3821 start: start_byte,
3822 end: end_byte,
3823 insertions,
3824 deletions,
3825 substitutions,
3826 swaps,
3827 similarity: sim,
3828 },
3829 };
3830
3831 matches
3832 .entry(key)
3833 .and_modify(|existing| {
3834 if m.match_result.similarity
3835 > existing.match_result.similarity
3836 {
3837 *existing = m.clone();
3838 }
3839 })
3840 .or_insert(m);
3841 }
3842 }
3843 }
3844 }
3845 }
3846 }
3847
3848 matches.into_values().collect()
3849 }
3850
3851 /// Get the number of patterns.
3852 #[must_use]
3853 pub fn pattern_count(&self) -> usize {
3854 self.matchers.len()
3855 }
3856
3857 /// Get a pattern by index.
3858 #[must_use]
3859 pub fn pattern(&self, index: usize) -> Option<&str> {
3860 self.matchers.get(index).map(BitapMatcher::pattern)
3861 }
3862}