Skip to main content

hematite/agent/
fuzzy.rs

1/// High-Precision Fuzzy Matching Module.
2/// Ports the subsequence matching algorithm from Codex-RS to improve
3/// architectural grounding during file and symbol discovery.
4
5/// Returns the indices of the matched characters in the original haystack
6/// and a score where smaller is better.
7pub fn fuzzy_match(haystack: &str, needle: &str) -> Option<(Vec<usize>, i32)> {
8    if needle.is_empty() {
9        return Some((Vec::new(), i32::MAX));
10    }
11
12    let mut lowered_chars: Vec<char> = Vec::new();
13    let mut lowered_to_orig_char_idx: Vec<usize> = Vec::new();
14    for (orig_idx, ch) in haystack.chars().enumerate() {
15        for lc in ch.to_lowercase() {
16            lowered_chars.push(lc);
17            lowered_to_orig_char_idx.push(orig_idx);
18        }
19    }
20
21    let lowered_needle: Vec<char> = needle.to_lowercase().chars().collect();
22
23    let mut result_orig_indices: Vec<usize> = Vec::with_capacity(lowered_needle.len());
24    let mut last_lower_pos: Option<usize> = None;
25    let mut cur = 0usize;
26
27    for &nc in lowered_needle.iter() {
28        let mut found_at: Option<usize> = None;
29        while cur < lowered_chars.len() {
30            if lowered_chars[cur] == nc {
31                found_at = Some(cur);
32                cur += 1;
33                break;
34            }
35            cur += 1;
36        }
37        let pos = found_at?;
38        result_orig_indices.push(lowered_to_orig_char_idx[pos]);
39        last_lower_pos = Some(pos);
40    }
41
42    let first_lower_pos = if result_orig_indices.is_empty() {
43        0usize
44    } else {
45        let target_orig = result_orig_indices[0];
46        lowered_to_orig_char_idx
47            .iter()
48            .position(|&oi| oi == target_orig)
49            .unwrap_or(0)
50    };
51
52    let last_lower_pos = last_lower_pos.unwrap_or(first_lower_pos);
53    let window =
54        (last_lower_pos as i32 - first_lower_pos as i32 + 1) - (lowered_needle.len() as i32);
55    let mut score = window.max(0);
56
57    // Prefix bonus: strongly reward matches at the start of the string.
58    if first_lower_pos == 0 {
59        score -= 100;
60    }
61
62    result_orig_indices.sort_unstable();
63    result_orig_indices.dedup();
64    Some((result_orig_indices, score))
65}
66
67#[cfg(test)]
68mod tests {
69    use super::*;
70
71    #[test]
72    fn test_basic_fuzzy_match() {
73        let (idx, score) = fuzzy_match("main.rs", "mnrs").unwrap();
74        assert_eq!(idx, vec![0, 3, 5, 6]);
75        assert!(score < 0); // Should have prefix bonus
76    }
77
78    #[test]
79    fn test_case_insensitivity() {
80        let (_, score_a) = fuzzy_match("FooBar", "foobar").unwrap();
81        let (_, score_b) = fuzzy_match("foobar", "foobar").unwrap();
82        assert_eq!(score_a, score_b);
83    }
84
85    #[test]
86    fn test_prefer_prefix() {
87        let (_, score_a) = fuzzy_match("important_file.rs", "import").unwrap();
88        let (_, score_b) = fuzzy_match("another_important_file.rs", "import").unwrap();
89        assert!(score_a < score_b);
90    }
91}