1use strsim::normalized_levenshtein;
7
8#[derive(Debug, Clone)]
10pub struct FuzzyMatch {
11 pub start: usize,
13 pub end: usize,
15 pub matched_text: String,
17 pub score: f64,
19}
20
21pub fn find_best_match(needle: &str, haystack: &str, threshold: f64) -> Option<FuzzyMatch> {
31 let window_size = needle.lines().count();
32 let (lines, offsets) = build_haystack_info(haystack);
33 slide_window(needle, haystack, &lines, &offsets, threshold, window_size)
34}
35
36pub fn find_best_match_elastic(
51 needle: &str,
52 haystack: &str,
53 threshold: f64,
54 max_expansion: usize,
55) -> Option<FuzzyMatch> {
56 let base_lines = needle.lines().count();
57 let (lines, offsets) = build_haystack_info(haystack);
59
60 let mut best: Option<FuzzyMatch> = None;
61 for expansion in 0..=max_expansion {
62 let candidate = slide_window(
63 needle,
64 haystack,
65 &lines,
66 &offsets,
67 threshold,
68 base_lines + expansion,
69 );
70 if let Some(c) = candidate {
71 if best.as_ref().is_none_or(|b| c.score > b.score) {
72 best = Some(c);
73 }
74 }
75 }
76 best
77}
78
79fn build_haystack_info(haystack: &str) -> (Vec<&str>, Vec<usize>) {
87 let lines: Vec<&str> = haystack.lines().collect();
88 let offsets: Vec<usize> = std::iter::once(0)
91 .chain(haystack.split('\n').scan(0usize, |acc, segment| {
92 *acc += segment.len() + 1; Some(*acc)
94 }))
95 .collect();
96 (lines, offsets)
97}
98
99fn slide_window(
103 needle: &str,
104 haystack: &str,
105 haystack_lines: &[&str],
106 line_offsets: &[usize],
107 threshold: f64,
108 window_size: usize,
109) -> Option<FuzzyMatch> {
110 if window_size == 0 || haystack_lines.len() < window_size {
111 return None;
112 }
113
114 let mut best: Option<FuzzyMatch> = None;
115
116 for i in 0..=haystack_lines.len().saturating_sub(window_size) {
117 let window = haystack_lines[i..i + window_size].join("\n");
118 let score = normalized_levenshtein(needle, &window);
119
120 if score >= threshold && best.as_ref().is_none_or(|b| score > b.score) {
121 let start = line_offsets[i];
122 let end = if i + window_size < line_offsets.len() {
125 line_offsets[i + window_size].saturating_sub(1)
126 } else {
127 haystack.len()
128 };
129
130 best = Some(FuzzyMatch {
131 start,
132 end,
133 matched_text: window,
134 score,
135 });
136 }
137 }
138
139 best
140}
141
142#[cfg(test)]
145mod tests {
146 use super::*;
147
148 #[test]
151 fn exact_match_scores_one() {
152 let src = "fn main() {\n println!(\"hello\");\n}";
153 let m = find_best_match(src, src, 0.9).expect("should find exact match");
154 assert!((m.score - 1.0).abs() < 1e-9);
155 }
156
157 #[test]
158 fn similar_match_above_threshold() {
159 let haystack = "fn main() {\n println!(\"hello world\");\n}";
160 let needle = "fn main() {\n println!(\"hello\");\n}";
161 let m = find_best_match(needle, haystack, 0.8).expect("should find similar match");
162 assert!(m.score > 0.8);
163 }
164
165 #[test]
166 fn dissimilar_content_below_threshold() {
167 let haystack = "completely different content";
168 let needle = "fn main() {}";
169 assert!(find_best_match(needle, haystack, 0.8).is_none());
170 }
171
172 #[test]
173 fn selects_best_match_among_candidates() {
174 let haystack = "fn foo() {}\nfn bar() {\n x\n}\nfn baz() {\n y\n}";
175 let needle = "fn bar() {\n x\n}";
176 let m = find_best_match(needle, haystack, 0.9).expect("should match");
177 assert!(m.matched_text.contains("bar"));
178 }
179
180 #[test]
181 fn byte_positions_are_correct() {
182 let haystack = "line1\nline2\nline3";
183 let needle = "line2";
184 let m = find_best_match(needle, haystack, 0.9).expect("should match");
185 assert_eq!(m.start, 6, "\"line1\\n\" is 6 bytes");
186 assert_eq!(&haystack[m.start..m.end], "line2");
187 }
188
189 #[test]
190 fn trailing_newline_does_not_corrupt_offsets() {
191 let haystack = "alpha\nbeta\n";
192 let needle = "beta";
193 let m = find_best_match(needle, haystack, 0.9).expect("should match");
194 assert_eq!(&haystack[m.start..m.end], "beta");
195 }
196
197 #[test]
200 fn elastic_zero_expansion_matches_fixed_window() {
201 let needle = "fn foo() {}";
202 let haystack = "fn foo() {}\nfn bar() {}";
203 let fixed = find_best_match(needle, haystack, 0.9);
204 let elastic = find_best_match_elastic(needle, haystack, 0.9, 0);
205 match (fixed, elastic) {
206 (None, None) => {}
207 (Some(a), Some(b)) => {
208 assert_eq!(a.start, b.start);
209 assert_eq!(a.end, b.end);
210 assert!((a.score - b.score).abs() < 1e-9);
211 }
212 (a, b) => panic!("results differ: fixed={a:?} elastic={b:?}"),
213 }
214 }
215
216 #[test]
217 fn elastic_finds_needle_with_inserted_lines() {
218 let needle = "TOP_ANCHOR\nBOTTOM_ANCHOR";
229 let haystack = "TOP_ANCHOR\nINSERTED_LINE\nBOTTOM_ANCHOR\nextra";
230
231 assert!(
232 find_best_match(needle, haystack, 0.55).is_none(),
233 "fixed window (best score 0.519) should miss at threshold 0.55"
234 );
235
236 let m = find_best_match_elastic(needle, haystack, 0.55, 1)
237 .expect("elastic (best score 0.632) should find match at threshold 0.55");
238 assert!(
239 m.matched_text.contains("TOP_ANCHOR"),
240 "match must contain opening anchor"
241 );
242 assert!(
243 m.matched_text.contains("BOTTOM_ANCHOR"),
244 "match must contain closing anchor"
245 );
246 }
247
248 #[test]
249 fn elastic_picks_highest_score_across_expansions() {
250 let needle = "fn exact() {}";
253 let haystack = "preamble\nfn exact() {}\npostamble\nmore stuff\n";
254 let m = find_best_match_elastic(needle, haystack, 0.9, 5).expect("should find exact match");
255 assert!((m.score - 1.0).abs() < 1e-9, "perfect match should win");
256 assert_eq!(&haystack[m.start..m.end], "fn exact() {}");
257 }
258
259 #[test]
260 fn elastic_returns_none_when_nothing_exceeds_threshold() {
261 let needle = "fn absolutely_nothing_like_this() {}";
262 let haystack = "let a = 1;\nlet b = 2;\nlet c = 3;\n";
263 assert!(find_best_match_elastic(needle, haystack, 0.85, 10).is_none());
264 }
265
266 #[test]
267 fn elastic_handles_haystack_shorter_than_window() {
268 let needle = "a\nb\nc\nd\ne";
269 let haystack = "a\nb";
270 assert!(find_best_match_elastic(needle, haystack, 0.5, 20).is_none());
272 }
273}