harness_write/
matching.rs1use crate::constants::{
2 CONTEXT_LINES, DEFAULT_FUZZY_LENGTH_TOLERANCE, DEFAULT_FUZZY_THRESHOLD, DEFAULT_FUZZY_TOP_K,
3};
4use crate::levenshtein::similarity;
5use crate::types::{FuzzyCandidate, MatchLocation};
6
7pub fn line_of_offset(text: &str, offset: usize) -> usize {
9 if offset == 0 {
10 return 1;
11 }
12 let mut line = 1usize;
13 let limit = offset.min(text.len());
14 for b in text.as_bytes()[..limit].iter() {
15 if *b == b'\n' {
16 line += 1;
17 }
18 }
19 line
20}
21
22fn split_lines(text: &str) -> Vec<&str> {
23 if text.is_empty() {
24 return Vec::new();
25 }
26 text.split('\n').collect()
27}
28
29fn context_around(
30 file_lines: &[&str],
31 first_line_1based: usize,
32 window_line_count: usize,
33) -> (Vec<String>, Vec<String>) {
34 let first_idx = first_line_1based.saturating_sub(1);
35 let last_idx = first_idx + window_line_count.saturating_sub(1);
36 let before_start = first_idx.saturating_sub(CONTEXT_LINES);
37 let before_end = first_idx;
38 let after_start = last_idx + 1;
39 let after_end = (last_idx + 1 + CONTEXT_LINES).min(file_lines.len());
40 let before = file_lines
41 .get(before_start..before_end)
42 .unwrap_or(&[])
43 .iter()
44 .map(|s| s.to_string())
45 .collect();
46 let after = file_lines
47 .get(after_start..after_end)
48 .unwrap_or(&[])
49 .iter()
50 .map(|s| s.to_string())
51 .collect();
52 (before, after)
53}
54
55pub fn find_all_occurrences(haystack: &str, needle: &str) -> Vec<usize> {
57 if needle.is_empty() {
58 return Vec::new();
59 }
60 let mut out = Vec::new();
61 let mut from = 0usize;
62 let hb = haystack.as_bytes();
63 let nb = needle.as_bytes();
64 while from + nb.len() <= hb.len() {
65 let slice = &hb[from..];
66 match find_subslice(slice, nb) {
67 Some(rel) => {
68 let abs = from + rel;
69 out.push(abs);
70 from = abs + nb.len();
71 }
72 None => break,
73 }
74 }
75 out
76}
77
78fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option<usize> {
79 if needle.is_empty() {
80 return Some(0);
81 }
82 if needle.len() > haystack.len() {
83 return None;
84 }
85 (0..=haystack.len() - needle.len()).find(|&i| &haystack[i..i + needle.len()] == needle)
86}
87
88pub fn build_match_locations(
89 file: &str,
90 needle: &str,
91 offsets: &[usize],
92) -> Vec<MatchLocation> {
93 let file_lines = split_lines(file);
94 let needle_line_count = split_lines(needle).len().max(1);
95 offsets
96 .iter()
97 .map(|&off| {
98 let first_line = line_of_offset(file, off);
99 let (before, after) = context_around(&file_lines, first_line, needle_line_count);
100 MatchLocation {
101 line: first_line,
102 preview: needle.to_string(),
103 context_before: before,
104 context_after: after,
105 }
106 })
107 .collect()
108}
109
110pub struct FuzzyOpts {
111 pub top_k: usize,
112 pub threshold: f64,
113 pub length_tolerance: f64,
114}
115
116impl Default for FuzzyOpts {
117 fn default() -> Self {
118 Self {
119 top_k: DEFAULT_FUZZY_TOP_K,
120 threshold: DEFAULT_FUZZY_THRESHOLD,
121 length_tolerance: DEFAULT_FUZZY_LENGTH_TOLERANCE,
122 }
123 }
124}
125
126pub fn find_fuzzy_candidates(file: &str, needle: &str, opts: FuzzyOpts) -> Vec<FuzzyCandidate> {
127 if file.is_empty() || needle.is_empty() {
128 return Vec::new();
129 }
130 let file_lines = split_lines(file);
131 let needle_lines = split_lines(needle);
132 let window_line_count = needle_lines.len().max(1);
133 if file_lines.len() < window_line_count {
134 return Vec::new();
135 }
136
137 let mut cands: Vec<(usize, f64, String)> = Vec::new();
138 for i in 0..=file_lines.len().saturating_sub(window_line_count) {
139 let window = file_lines[i..i + window_line_count].join("\n");
140 let max_len = window.len().max(needle.len());
141 if max_len > 0 {
142 let delta =
143 (window.len() as f64 - needle.len() as f64).abs() / max_len as f64;
144 if delta > opts.length_tolerance {
145 continue;
146 }
147 }
148 let score = similarity(&window, needle);
149 if score < opts.threshold {
150 continue;
151 }
152 if (score - 1.0).abs() < f64::EPSILON {
153 continue;
154 }
155 cands.push((i + 1, score, window));
156 }
157
158 cands.sort_by(|a, b| {
159 b.1.partial_cmp(&a.1)
160 .unwrap_or(std::cmp::Ordering::Equal)
161 .then_with(|| a.0.cmp(&b.0))
162 });
163
164 cands
165 .into_iter()
166 .take(opts.top_k)
167 .map(|(line, score, window_text)| {
168 let (before, after) = context_around(&file_lines, line, window_line_count);
169 FuzzyCandidate {
170 line,
171 score: (score * 100.0).round() / 100.0,
172 preview: window_text,
173 context_before: before,
174 context_after: after,
175 }
176 })
177 .collect()
178}
179
180pub fn substring_boundary_collisions(file: &str, needle: &str, offsets: &[usize]) -> Vec<usize> {
183 let mut flagged: Vec<usize> = Vec::new();
184 let fb = file.as_bytes();
185 let nb_len = needle.as_bytes().len();
186 for &off in offsets {
187 let before = if off > 0 { Some(fb[off - 1]) } else { None };
188 let after = fb.get(off + nb_len).copied();
189 if is_ident(before) || is_ident(after) {
190 flagged.push(line_of_offset(file, off));
191 }
192 }
193 flagged
194}
195
196fn is_ident(b: Option<u8>) -> bool {
197 match b {
198 Some(c) => matches!(c, b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'_'),
199 None => false,
200 }
201}