Skip to main content

hematite/agent/
fuzzy.rs

1/// High-Precision Fuzzy Matching Module.
2/// Ports the subsequence matching algorithm from Codex-RS to improve
3/// architectural grounding during file and symbol discovery.
4/// Returns the indices of the matched characters in the original haystack
5/// and a score where smaller is better.
6pub fn fuzzy_match(haystack: &str, needle: &str) -> Option<(Vec<usize>, i32)> {
7    if needle.is_empty() {
8        return Some((Vec::new(), i32::MAX));
9    }
10
11    let mut lowered_chars: Vec<char> = Vec::with_capacity(haystack.len());
12    let mut lowered_to_orig_char_idx: Vec<usize> = Vec::with_capacity(haystack.len());
13    for (orig_idx, ch) in haystack.chars().enumerate() {
14        for lc in ch.to_lowercase() {
15            lowered_chars.push(lc);
16            lowered_to_orig_char_idx.push(orig_idx);
17        }
18    }
19
20    let lowered_needle: Vec<char> = needle.to_lowercase().chars().collect();
21
22    let mut result_orig_indices: Vec<usize> = Vec::with_capacity(lowered_needle.len());
23    let mut last_lower_pos: Option<usize> = None;
24    let mut cur = 0usize;
25
26    for &nc in lowered_needle.iter() {
27        let mut found_at: Option<usize> = None;
28        while cur < lowered_chars.len() {
29            if lowered_chars[cur] == nc {
30                found_at = Some(cur);
31                cur += 1;
32                break;
33            }
34            cur += 1;
35        }
36        let pos = found_at?;
37        result_orig_indices.push(lowered_to_orig_char_idx[pos]);
38        last_lower_pos = Some(pos);
39    }
40
41    let first_lower_pos = if result_orig_indices.is_empty() {
42        0usize
43    } else {
44        let target_orig = result_orig_indices[0];
45        lowered_to_orig_char_idx
46            .iter()
47            .position(|&oi| oi == target_orig)
48            .unwrap_or(0)
49    };
50
51    let last_lower_pos = last_lower_pos.unwrap_or(first_lower_pos);
52    let window =
53        (last_lower_pos as i32 - first_lower_pos as i32 + 1) - (lowered_needle.len() as i32);
54    let mut score = window.max(0);
55
56    // Prefix bonus: strongly reward matches at the start of the string.
57    if first_lower_pos == 0 {
58        score -= 100;
59    }
60
61    result_orig_indices.sort_unstable();
62    result_orig_indices.dedup();
63    Some((result_orig_indices, score))
64}
65
66#[cfg(test)]
67mod tests {
68    use super::*;
69
70    #[test]
71    fn test_basic_fuzzy_match() {
72        let (idx, score) = fuzzy_match("main.rs", "mnrs").unwrap();
73        assert_eq!(idx, vec![0, 3, 5, 6]);
74        assert!(score < 0); // Should have prefix bonus
75    }
76
77    #[test]
78    fn test_case_insensitivity() {
79        let (_, score_a) = fuzzy_match("FooBar", "foobar").unwrap();
80        let (_, score_b) = fuzzy_match("foobar", "foobar").unwrap();
81        assert_eq!(score_a, score_b);
82    }
83
84    #[test]
85    fn test_prefer_prefix() {
86        let (_, score_a) = fuzzy_match("important_file.rs", "import").unwrap();
87        let (_, score_b) = fuzzy_match("another_important_file.rs", "import").unwrap();
88        assert!(score_a < score_b);
89    }
90}