1use std::collections::HashSet;
7
8pub const REFLOW_NON_WS_TOLERANCE: usize = 8;
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum MatchTier {
13 Exact,
14 Rstrip,
15 Trim,
16 Indent,
17 Unicode,
18 Reflow,
19}
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub struct SequenceMatch {
23 pub found: usize,
24 pub tier: MatchTier,
25 pub line_count: usize,
26}
27
28pub fn normalize_unicode(input: &str) -> String {
30 let mut normalized = String::with_capacity(input.len());
31 for ch in input.chars() {
32 match ch {
33 '\u{2018}' | '\u{2019}' | '\u{201A}' | '\u{201B}' => normalized.push('\''),
34 '\u{201C}' | '\u{201D}' | '\u{201E}' | '\u{201F}' => normalized.push('"'),
35 '\u{2010}' | '\u{2011}' | '\u{2012}' | '\u{2013}' | '\u{2014}' | '\u{2015}' => {
36 normalized.push('-');
37 }
38 '\u{2026}' => normalized.push_str("..."),
39 '\u{00A0}' => normalized.push(' '),
40 _ => normalized.push(ch),
41 }
42 }
43 normalized
44}
45
46pub fn normalize_indent(input: &str) -> String {
48 let mut leading_chars = 0;
49 let mut leading_bytes = 0;
50
51 for ch in input.chars() {
52 if ch != '\t' && ch != ' ' {
53 break;
54 }
55 leading_chars += 1;
56 leading_bytes += ch.len_utf8();
57 }
58
59 if leading_chars == 0 {
60 return input.to_owned();
61 }
62
63 let mut normalized = String::with_capacity(input.len());
64 normalized.push_str(&" ".repeat(leading_chars));
65 normalized.push_str(&input[leading_bytes..]);
66 normalized
67}
68
69pub fn normalize_reflow_whitespace(input: &str) -> String {
71 let mut collapsed = String::with_capacity(input.len());
72 let mut in_whitespace = false;
73
74 for ch in input.chars() {
75 if ch.is_whitespace() {
76 if !in_whitespace {
77 collapsed.push(' ');
78 in_whitespace = true;
79 }
80 } else {
81 collapsed.push(ch);
82 in_whitespace = false;
83 }
84 }
85
86 collapsed.trim().to_owned()
87}
88
89pub fn strip_reflow_whitespace(input: &str) -> String {
91 input.chars().filter(|ch| !ch.is_whitespace()).collect()
92}
93
94pub fn has_reflow_content(input: &str) -> bool {
96 input.chars().any(|ch| !ch.is_whitespace())
97}
98
99fn matches_at<F>(lines: &[&str], pattern: &[&str], start: usize, compare: &F) -> bool
100where
101 F: Fn(&str, &str) -> bool,
102{
103 pattern
104 .iter()
105 .enumerate()
106 .all(|(offset, expected)| compare(lines[start + offset], expected))
107}
108
109pub fn try_match<F>(
111 lines: &[&str],
112 pattern: &[&str],
113 start_index: usize,
114 compare: F,
115 eof: bool,
116) -> Option<usize>
117where
118 F: Fn(&str, &str) -> bool,
119{
120 if pattern.is_empty() || pattern.len() > lines.len() {
121 return None;
122 }
123
124 if eof {
125 let from_end = lines.len() - pattern.len();
126 if from_end >= start_index && matches_at(lines, pattern, from_end, &compare) {
127 return Some(from_end);
128 }
129 return None;
130 }
131
132 let last_start = lines.len() - pattern.len();
133 if start_index > last_start {
134 return None;
135 }
136
137 (start_index..=last_start).find(|&start| matches_at(lines, pattern, start, &compare))
138}
139
140fn non_whitespace_unit_count(input: &str) -> usize {
141 strip_reflow_whitespace(input).chars().count()
145}
146
147pub fn find_reflow_match(
149 lines: &[&str],
150 pattern: &[&str],
151 start_index: usize,
152) -> Option<(usize, usize)> {
153 let needle_text = pattern.join("\n");
154 let normalized_needle = normalize_reflow_whitespace(&needle_text);
155 let needle_non_whitespace = strip_reflow_whitespace(&needle_text);
156 if normalized_needle.is_empty() || needle_non_whitespace.is_empty() {
157 return None;
158 }
159
160 let needle_non_whitespace_len = needle_non_whitespace.chars().count();
161 let min_non_whitespace = needle_non_whitespace_len.saturating_sub(REFLOW_NON_WS_TOLERANCE);
162 let max_non_whitespace = needle_non_whitespace_len + REFLOW_NON_WS_TOLERANCE;
163 let mut matches = Vec::new();
164 let mut seen = HashSet::new();
165
166 for start in start_index..lines.len() {
167 if !has_reflow_content(lines[start]) {
168 continue;
169 }
170
171 let mut window_non_whitespace_len = 0;
172 for end in (start + 1)..=lines.len() {
173 let line = lines[end - 1];
174 window_non_whitespace_len += non_whitespace_unit_count(line);
175
176 if window_non_whitespace_len > max_non_whitespace {
177 break;
178 }
179 if window_non_whitespace_len < min_non_whitespace {
180 continue;
181 }
182 if !has_reflow_content(line) {
183 continue;
184 }
185
186 let window_text = lines[start..end].join("\n");
187 let window_non_whitespace = strip_reflow_whitespace(&window_text);
188 if window_non_whitespace != needle_non_whitespace {
189 continue;
190 }
191 if normalize_reflow_whitespace(&window_text) != normalized_needle {
192 continue;
193 }
194
195 if seen.insert((start, end)) {
196 matches.push((start, end - start));
197 }
198 }
199 }
200
201 if matches.len() == 1 {
202 Some(matches[0])
203 } else {
204 None
205 }
206}
207
208pub fn seek_sequence_tiered(
210 lines: &[&str],
211 pattern: &[&str],
212 start_index: usize,
213 eof: bool,
214) -> Option<SequenceMatch> {
215 if pattern.is_empty() {
216 return None;
217 }
218
219 if let Some(found) = try_match(lines, pattern, start_index, |a, b| a == b, eof) {
220 return Some(SequenceMatch {
221 found,
222 tier: MatchTier::Exact,
223 line_count: pattern.len(),
224 });
225 }
226
227 if let Some(found) = try_match(
228 lines,
229 pattern,
230 start_index,
231 |a, b| a.trim_end() == b.trim_end(),
232 eof,
233 ) {
234 return Some(SequenceMatch {
235 found,
236 tier: MatchTier::Rstrip,
237 line_count: pattern.len(),
238 });
239 }
240
241 if let Some(found) = try_match(
242 lines,
243 pattern,
244 start_index,
245 |a, b| a.trim() == b.trim(),
246 eof,
247 ) {
248 return Some(SequenceMatch {
249 found,
250 tier: MatchTier::Trim,
251 line_count: pattern.len(),
252 });
253 }
254
255 if let Some(found) = try_match(
256 lines,
257 pattern,
258 start_index,
259 |a, b| normalize_indent(a).trim_end() == normalize_indent(b).trim_end(),
260 eof,
261 ) {
262 return Some(SequenceMatch {
263 found,
264 tier: MatchTier::Indent,
265 line_count: pattern.len(),
266 });
267 }
268
269 if let Some(found) = try_match(
270 lines,
271 pattern,
272 start_index,
273 |a, b| normalize_unicode(a.trim()) == normalize_unicode(b.trim()),
274 eof,
275 ) {
276 return Some(SequenceMatch {
277 found,
278 tier: MatchTier::Unicode,
279 line_count: pattern.len(),
280 });
281 }
282
283 if eof {
284 return None;
285 }
286
287 find_reflow_match(lines, pattern, start_index).map(|(found, line_count)| SequenceMatch {
288 found,
289 tier: MatchTier::Reflow,
290 line_count,
291 })
292}
293
294#[cfg(test)]
295mod tests {
296 use super::*;
297
298 fn assert_match(
299 actual: Option<SequenceMatch>,
300 found: usize,
301 tier: MatchTier,
302 line_count: usize,
303 ) {
304 assert_eq!(
305 actual,
306 Some(SequenceMatch {
307 found,
308 tier,
309 line_count,
310 })
311 );
312 }
313
314 #[test]
315 fn normalization_helpers_match_patch_parser_sources() {
316 assert_eq!(
317 normalize_unicode("‘’‚‛“”„‟‐‑‒–—―…\u{00A0}"),
318 "''''\"\"\"\"------... "
319 );
320 assert_eq!(normalize_indent("\t alpha\t beta "), " alpha\t beta ");
321 assert_eq!(normalize_indent(""), "");
322 assert_eq!(
323 normalize_reflow_whitespace(" \talpha\n\u{00A0} beta "),
324 "alpha beta"
325 );
326 assert_eq!(
327 strip_reflow_whitespace(" \talpha\n\u{00A0} beta "),
328 "alphabeta"
329 );
330 assert!(has_reflow_content("\u{00A0}x"));
331 assert!(!has_reflow_content(" \t\n"));
332 }
333
334 #[test]
335 fn exact_tier_wins_without_upgrading_to_later_tiers() {
336 assert_match(
337 seek_sequence_tiered(&["alpha", "beta"], &["beta"], 0, false),
338 1,
339 MatchTier::Exact,
340 1,
341 );
342 }
343
344 #[test]
345 fn rstrip_tier_wins_before_trim() {
346 assert_match(
347 seek_sequence_tiered(&["alpha "], &["alpha"], 0, false),
348 0,
349 MatchTier::Rstrip,
350 1,
351 );
352 }
353
354 #[test]
355 fn trim_tier_wins_before_indent_and_unicode() {
356 assert_match(
357 seek_sequence_tiered(&[" alpha "], &["alpha"], 0, false),
358 0,
359 MatchTier::Trim,
360 1,
361 );
362 }
363
364 #[test]
365 fn indent_normalization_matches_tab_space_drift_but_trim_shadows_the_tier() {
366 assert_eq!(normalize_indent("\treturn 42;"), " return 42;");
367 assert_eq!(normalize_indent(" return 42;"), " return 42;");
368 assert_eq!(
369 try_match(
370 &["\treturn 42;"],
371 &[" return 42;"],
372 0,
373 |a, b| normalize_indent(a).trim_end() == normalize_indent(b).trim_end(),
374 false,
375 ),
376 Some(0)
377 );
378 assert_match(
381 seek_sequence_tiered(&["\treturn 42;"], &[" return 42;"], 0, false),
382 0,
383 MatchTier::Trim,
384 1,
385 );
386 }
387
388 #[test]
389 fn unicode_tier_normalizes_smart_punctuation_after_stricter_tiers_fail() {
390 assert_match(
391 seek_sequence_tiered(
392 &["const label = “alpha”—beta…;"],
393 &["const label = \"alpha\"-beta...;"],
394 0,
395 false,
396 ),
397 0,
398 MatchTier::Unicode,
399 1,
400 );
401 }
402
403 #[test]
404 fn reflow_tier_matches_one_line_hunk_against_three_line_formatter_split() {
405 let lines = [
406 "function demo() {",
407 " const value = alpha +",
408 " beta +",
409 " gamma;",
410 " return value;",
411 "}",
412 ];
413 let pattern = [" const value = alpha + beta + gamma;"];
414
415 assert_match(
416 seek_sequence_tiered(&lines, &pattern, 0, false),
417 1,
418 MatchTier::Reflow,
419 3,
420 );
421 }
422
423 #[test]
424 fn rejects_ambiguous_reflow_matches_instead_of_choosing_a_window() {
425 let lines = [
427 "const value = alpha +",
428 " beta +",
429 " gamma;",
430 "",
431 "const value = alpha +",
432 " beta +",
433 " gamma;",
434 ];
435 let pattern = ["const value = alpha + beta + gamma;"];
436
437 assert_eq!(find_reflow_match(&lines, &pattern, 0), None);
438 assert_eq!(seek_sequence_tiered(&lines, &pattern, 0, false), None);
439 }
440
441 #[test]
442 fn uses_line_contiguous_match_before_considering_reflow_candidate() {
443 let lines = [
445 "const value = alpha +",
446 " beta +",
447 " gamma;",
448 "const value = alpha + beta + gamma;",
449 ];
450 let pattern = ["const value = alpha + beta + gamma;"];
451
452 assert_match(
453 seek_sequence_tiered(&lines, &pattern, 0, false),
454 3,
455 MatchTier::Exact,
456 1,
457 );
458 }
459
460 #[test]
461 fn eof_hunk_only_matches_the_tail_and_never_forward_scans() {
462 let pattern = ["marker", "old"];
464
465 assert_match(
466 seek_sequence_tiered(
467 &["header", "marker", "old", "middle", "marker", "old"],
468 &pattern,
469 0,
470 true,
471 ),
472 4,
473 MatchTier::Exact,
474 2,
475 );
476 assert_eq!(
477 seek_sequence_tiered(
478 &["header", "marker", "old", "middle", "marker", "changed"],
479 &pattern,
480 0,
481 true,
482 ),
483 None
484 );
485 }
486
487 #[test]
488 fn eof_hunk_skips_reflow_even_when_the_tail_would_reflow_match() {
489 let lines = ["header", "const value = alpha +", " beta +", " gamma;"];
490 let pattern = ["const value = alpha + beta + gamma;"];
491
492 assert_eq!(find_reflow_match(&lines, &pattern, 0), Some((1, 3)));
493 assert_eq!(seek_sequence_tiered(&lines, &pattern, 0, true), None);
494 }
495
496 #[test]
497 fn try_match_honors_start_index_for_forward_scans_and_eof_anchor() {
498 assert_eq!(
499 try_match(&["a", "b", "a", "b"], &["a", "b"], 1, |a, b| a == b, false),
500 Some(2)
501 );
502 assert_eq!(
503 try_match(&["a", "b", "a", "b"], &["a", "b"], 3, |a, b| a == b, false),
504 None
505 );
506 assert_eq!(
507 try_match(&["a", "b", "a", "b"], &["a", "b"], 3, |a, b| a == b, true),
508 None
509 );
510 }
511}