Skip to main content

roder_edit_core/
fuzzy.rs

1use serde::{Deserialize, Serialize};
2
3#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
4pub struct FuzzyCandidate {
5    pub start_line: usize,
6    pub end_line: usize,
7    pub score: u32,
8    pub snippet: String,
9    pub reason: String,
10}
11
12pub fn strip_line_number_prefixes(input: &str) -> String {
13    input
14        .lines()
15        .map(|line| {
16            let trimmed = line.trim_start();
17            let digit_count = trimmed.chars().take_while(|ch| ch.is_ascii_digit()).count();
18            if digit_count > 0 && trimmed[digit_count..].starts_with(':') {
19                trimmed[digit_count + 1..]
20                    .strip_prefix(' ')
21                    .unwrap_or(&trimmed[digit_count + 1..])
22            } else if digit_count > 0 && trimmed[digit_count..].starts_with(" |") {
23                trimmed[digit_count + 2..]
24                    .strip_prefix(' ')
25                    .unwrap_or(&trimmed[digit_count + 2..])
26            } else {
27                line
28            }
29        })
30        .collect::<Vec<_>>()
31        .join("\n")
32}
33
34pub fn normalize_for_match(input: &str) -> String {
35    input
36        .replace("\r\n", "\n")
37        .lines()
38        .map(|line| line.trim_end().to_ascii_lowercase())
39        .collect::<Vec<_>>()
40        .join("\n")
41}
42
43/**
44 * Finds the original byte range whose normalized form uniquely matches the
45 * normalized needle. Matching is line-wise so byte offsets always refer to
46 * the original text — normalization (trailing-whitespace and case folding)
47 * must never be used to index into the un-normalized haystack.
48 */
49pub fn normalized_unique_match_range(
50    haystack: &str,
51    needle: &str,
52) -> Option<std::ops::Range<usize>> {
53    let needle_lines: Vec<String> = needle
54        .split('\n')
55        .map(normalize_line_for_match)
56        .collect();
57    if needle_lines.is_empty() || needle_lines.iter().all(|line| line.is_empty()) {
58        return None;
59    }
60    // Byte offset and raw text for every original line.
61    let mut line_spans = Vec::new();
62    let mut offset = 0;
63    for line in haystack.split('\n') {
64        line_spans.push((offset, line));
65        offset += line.len() + 1;
66    }
67    let window = needle_lines.len();
68    if line_spans.len() < window {
69        return None;
70    }
71    let mut found: Option<std::ops::Range<usize>> = None;
72    for start in 0..=(line_spans.len() - window) {
73        let matches = (0..window).all(|index| {
74            normalize_line_for_match(line_spans[start + index].1) == needle_lines[index]
75        });
76        if !matches {
77            continue;
78        }
79        if found.is_some() {
80            // Two candidate windows match after normalization; refuse.
81            return None;
82        }
83        let (first_offset, _) = line_spans[start];
84        let (last_offset, last_line) = line_spans[start + window - 1];
85        found = Some(first_offset..last_offset + last_line.len());
86    }
87    found
88}
89
90fn normalize_line_for_match(line: &str) -> String {
91    line.trim_end().to_ascii_lowercase()
92}
93
94pub fn diagnostic_candidates(haystack: &str, needle: &str, limit: usize) -> Vec<FuzzyCandidate> {
95    let needle_lines = needle.lines().count().max(1);
96    let normalized_needle = normalize_for_match(needle);
97    let lines = haystack.lines().collect::<Vec<_>>();
98    let mut candidates = Vec::new();
99    for start in 0..lines.len() {
100        let end = (start + needle_lines + 1).min(lines.len());
101        let snippet = lines[start..end].join("\n");
102        let normalized = normalize_for_match(&snippet);
103        let score = line_overlap_score(&normalized, &normalized_needle);
104        if score > 0 {
105            candidates.push(FuzzyCandidate {
106                start_line: start + 1,
107                end_line: end,
108                score,
109                snippet,
110                reason: "line overlap candidate".to_string(),
111            });
112        }
113    }
114    candidates.sort_by(|a, b| b.score.cmp(&a.score).then(a.start_line.cmp(&b.start_line)));
115    candidates.truncate(limit);
116    candidates
117}
118
119fn line_overlap_score(left: &str, right: &str) -> u32 {
120    let left = left
121        .lines()
122        .map(str::trim)
123        .filter(|line| !line.is_empty())
124        .collect::<Vec<_>>();
125    let right = right
126        .lines()
127        .map(str::trim)
128        .filter(|line| !line.is_empty())
129        .collect::<Vec<_>>();
130    if right.is_empty() {
131        return 0;
132    }
133    let matches = right.iter().filter(|line| left.contains(line)).count();
134    ((matches * 100) / right.len()) as u32
135}
136
137#[cfg(test)]
138mod tests {
139    use super::*;
140
141    #[test]
142    fn strips_colon_and_pipe_line_prefixes() {
143        assert_eq!(strip_line_number_prefixes("1: foo\n  2 | bar"), "foo\nbar");
144    }
145
146    #[test]
147    fn returns_candidates_for_nearby_lines() {
148        let candidates = diagnostic_candidates("one\ntwo\nthree", "two\nTHREE", 2);
149        assert!(!candidates.is_empty());
150        assert!(candidates.iter().any(|candidate| candidate.start_line == 2));
151    }
152}