Skip to main content

roder_edit_core/
fuzzy.rs

1use serde::{Deserialize, Serialize};
2
3#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
4pub struct FuzzyCandidate {
5    pub start_line: usize,
6    pub end_line: usize,
7    pub score: u32,
8    pub snippet: String,
9    pub reason: String,
10}
11
12pub fn strip_line_number_prefixes(input: &str) -> String {
13    input
14        .lines()
15        .map(|line| {
16            let trimmed = line.trim_start();
17            let digit_count = trimmed.chars().take_while(|ch| ch.is_ascii_digit()).count();
18            if digit_count > 0 && trimmed[digit_count..].starts_with(':') {
19                trimmed[digit_count + 1..]
20                    .strip_prefix(' ')
21                    .unwrap_or(&trimmed[digit_count + 1..])
22            } else if digit_count > 0 && trimmed[digit_count..].starts_with(" |") {
23                trimmed[digit_count + 2..]
24                    .strip_prefix(' ')
25                    .unwrap_or(&trimmed[digit_count + 2..])
26            } else {
27                line
28            }
29        })
30        .collect::<Vec<_>>()
31        .join("\n")
32}
33
34pub fn normalize_for_match(input: &str) -> String {
35    input
36        .replace("\r\n", "\n")
37        .lines()
38        .map(|line| line.trim_end().to_ascii_lowercase())
39        .collect::<Vec<_>>()
40        .join("\n")
41}
42
43/**
44 * Finds the original byte range whose normalized form uniquely matches the
45 * normalized needle. Matching is line-wise so byte offsets always refer to
46 * the original text — normalization (trailing-whitespace and case folding)
47 * must never be used to index into the un-normalized haystack.
48 */
49pub fn normalized_unique_match_range(
50    haystack: &str,
51    needle: &str,
52) -> Option<std::ops::Range<usize>> {
53    let needle_lines: Vec<String> = needle.split('\n').map(normalize_line_for_match).collect();
54    if needle_lines.is_empty() || needle_lines.iter().all(|line| line.is_empty()) {
55        return None;
56    }
57    // Byte offset and raw text for every original line.
58    let mut line_spans = Vec::new();
59    let mut offset = 0;
60    for line in haystack.split('\n') {
61        line_spans.push((offset, line));
62        offset += line.len() + 1;
63    }
64    let window = needle_lines.len();
65    if line_spans.len() < window {
66        return None;
67    }
68    let mut found: Option<std::ops::Range<usize>> = None;
69    for start in 0..=(line_spans.len() - window) {
70        let matches = (0..window).all(|index| {
71            normalize_line_for_match(line_spans[start + index].1) == needle_lines[index]
72        });
73        if !matches {
74            continue;
75        }
76        if found.is_some() {
77            // Two candidate windows match after normalization; refuse.
78            return None;
79        }
80        let (first_offset, _) = line_spans[start];
81        let (last_offset, last_line) = line_spans[start + window - 1];
82        found = Some(first_offset..last_offset + last_line.len());
83    }
84    found
85}
86
87fn normalize_line_for_match(line: &str) -> String {
88    line.trim_end().to_ascii_lowercase()
89}
90
91pub fn diagnostic_candidates(haystack: &str, needle: &str, limit: usize) -> Vec<FuzzyCandidate> {
92    let needle_lines = needle.lines().count().max(1);
93    let normalized_needle = normalize_for_match(needle);
94    let lines = haystack.lines().collect::<Vec<_>>();
95    let mut candidates = Vec::new();
96    for start in 0..lines.len() {
97        let end = (start + needle_lines + 1).min(lines.len());
98        let snippet = lines[start..end].join("\n");
99        let normalized = normalize_for_match(&snippet);
100        let score = line_overlap_score(&normalized, &normalized_needle);
101        if score > 0 {
102            candidates.push(FuzzyCandidate {
103                start_line: start + 1,
104                end_line: end,
105                score,
106                snippet,
107                reason: "line overlap candidate".to_string(),
108            });
109        }
110    }
111    candidates.sort_by(|a, b| b.score.cmp(&a.score).then(a.start_line.cmp(&b.start_line)));
112    candidates.truncate(limit);
113    candidates
114}
115
116fn line_overlap_score(left: &str, right: &str) -> u32 {
117    let left = left
118        .lines()
119        .map(str::trim)
120        .filter(|line| !line.is_empty())
121        .collect::<Vec<_>>();
122    let right = right
123        .lines()
124        .map(str::trim)
125        .filter(|line| !line.is_empty())
126        .collect::<Vec<_>>();
127    if right.is_empty() {
128        return 0;
129    }
130    let matches = right.iter().filter(|line| left.contains(line)).count();
131    ((matches * 100) / right.len()) as u32
132}
133
134#[cfg(test)]
135mod tests {
136    use super::*;
137
138    #[test]
139    fn strips_colon_and_pipe_line_prefixes() {
140        assert_eq!(strip_line_number_prefixes("1: foo\n  2 | bar"), "foo\nbar");
141    }
142
143    #[test]
144    fn returns_candidates_for_nearby_lines() {
145        let candidates = diagnostic_candidates("one\ntwo\nthree", "two\nTHREE", 2);
146        assert!(!candidates.is_empty());
147        assert!(candidates.iter().any(|candidate| candidate.start_line == 2));
148    }
149}