use std::ops::Range;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum MatchTier {
Exact,
Whitespace,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) enum MatchOutcome {
Located {
ranges: Vec<Range<usize>>,
replacement: String,
tier: MatchTier,
},
Ambiguous {
count: usize,
locations: Vec<(usize, usize)>,
},
NotFound {
near: Option<NearMiss>,
},
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct NearMiss {
start_line: usize,
expected: Vec<String>,
found: Vec<String>,
}
const NEAR_MISS_MAX_LINES: usize = 20_000;
const NEAR_MISS_MAX_RENDER: usize = 40;
impl NearMiss {
pub(crate) fn render(&self) -> String {
use std::fmt::Write as _;
let mut out = format!(
"closest match near line {} (no exact or whitespace-tolerant match found):\n",
self.start_line
);
let exp_shown = self.expected.len().min(NEAR_MISS_MAX_RENDER);
for line in &self.expected[..exp_shown] {
out.push_str("- ");
out.push_str(line);
out.push('\n');
}
if self.expected.len() > exp_shown {
let _ = writeln!(
out,
" … ({} more expected line(s) elided)",
self.expected.len() - exp_shown
);
}
let found_shown = self.found.len().min(NEAR_MISS_MAX_RENDER);
for line in &self.found[..found_shown] {
out.push_str("+ ");
out.push_str(line);
out.push('\n');
}
if self.found.len() > found_shown {
let _ = writeln!(
out,
" … ({} more found line(s) elided)",
self.found.len() - found_shown
);
}
out
}
}
pub(crate) fn locate(text: &str, old: &str, new: &str, replace_all: bool) -> MatchOutcome {
if old.is_empty() {
return MatchOutcome::NotFound { near: None };
}
let exact: Vec<Range<usize>> = text
.match_indices(old)
.map(|(start, m)| start..start + m.len())
.collect();
if !exact.is_empty() {
if !replace_all && exact.len() > 1 {
let locations = exact
.iter()
.map(|r| byte_range_to_line_span(text, r))
.collect();
return MatchOutcome::Ambiguous {
count: exact.len(),
locations,
};
}
return MatchOutcome::Located {
ranges: exact,
replacement: new.to_string(),
tier: MatchTier::Exact,
};
}
if let Some(outcome) = locate_whitespace(text, old, new, replace_all) {
return outcome;
}
MatchOutcome::NotFound {
near: nearest_window(text, old),
}
}
struct LineSpan {
start: usize,
end: usize,
}
fn line_spans(text: &str) -> Vec<LineSpan> {
let mut spans = Vec::new();
let bytes = text.as_bytes();
let mut start = 0usize;
let mut i = 0usize;
while i < bytes.len() {
if bytes[i] == b'\n' {
let end = if i > start && bytes[i - 1] == b'\r' {
i - 1
} else {
i
};
spans.push(LineSpan { start, end });
start = i + 1;
}
i += 1;
}
if start < bytes.len() {
spans.push(LineSpan {
start,
end: bytes.len(),
});
}
spans
}
fn byte_range_to_line_span(text: &str, range: &Range<usize>) -> (usize, usize) {
let start_line = text[..range.start].bytes().filter(|&b| b == b'\n').count() + 1;
let last = range.end.saturating_sub(1).min(text.len());
let end_line = text[..last].bytes().filter(|&b| b == b'\n').count() + 1;
(start_line, end_line.max(start_line))
}
fn leading_ws(line: &str) -> &str {
let end = line
.find(|c: char| c != ' ' && c != '\t')
.unwrap_or(line.len());
&line[..end]
}
fn normalize_eol(s: &str) -> String {
s.replace("\r\n", "\n").replace('\r', "\n")
}
fn locate_whitespace(text: &str, old: &str, new: &str, replace_all: bool) -> Option<MatchOutcome> {
let norm_old = normalize_eol(old);
let old_lines: Vec<&str> = norm_old.lines().collect();
if old_lines.is_empty() {
return None;
}
let spans = line_spans(text);
let win = old_lines.len();
if spans.len() < win {
return None;
}
let strip = |s: &str| -> String { normalize_eol(s).trim_end().to_string() };
let old_trimmed: Vec<String> = old_lines.iter().map(|l| strip(l)).collect();
let mut matches: Vec<(Range<usize>, String)> = Vec::new();
'windows: for w in 0..=spans.len() - win {
let mut delta: Option<IndentDelta> = None;
for (offset, old_t) in old_trimmed.iter().enumerate() {
let span = &spans[w + offset];
let file_line = norm_text_line(text, span);
let file_t = file_line.trim_end();
if old_t.is_empty() && file_t.is_empty() {
continue;
}
let file_lead = leading_ws(file_t);
let old_lead = leading_ws(old_t);
let file_body = &file_t[file_lead.len()..];
let old_body = &old_t[old_lead.len()..];
if file_body != old_body {
continue 'windows; }
let Some(this) = IndentDelta::between(old_lead, file_lead) else {
continue 'windows; };
match &delta {
None => delta = Some(this),
Some(d) if *d == this => {}
Some(_) => continue 'windows, }
}
let range_start = spans[w].start;
let range_end = spans[w + win - 1].end;
let range = range_start..range_end;
let replacement = match &delta {
None => new.to_string(), Some(d) => d.apply(new),
};
matches.push((range, replacement));
}
if matches.is_empty() {
return None;
}
if !replace_all && matches.len() > 1 {
let locations = matches
.iter()
.map(|(r, _)| byte_range_to_line_span(text, r))
.collect();
return Some(MatchOutcome::Ambiguous {
count: matches.len(),
locations,
});
}
if matches.iter().any(|(_, r)| *r != matches[0].1) {
let locations = matches
.iter()
.map(|(r, _)| byte_range_to_line_span(text, r))
.collect();
return Some(MatchOutcome::Ambiguous {
count: matches.len(),
locations,
});
}
let replacement = matches[0].1.clone();
let ranges = matches.into_iter().map(|(r, _)| r).collect();
Some(MatchOutcome::Located {
ranges,
replacement,
tier: MatchTier::Whitespace,
})
}
fn norm_text_line(text: &str, span: &LineSpan) -> String {
normalize_eol(&text[span.start..span.end])
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum IndentDelta {
Add(String),
Strip(String),
}
impl IndentDelta {
fn between(old_lead: &str, file_lead: &str) -> Option<Self> {
if let Some(extra) = file_lead.strip_prefix(old_lead) {
Some(IndentDelta::Add(extra.to_string()))
} else {
old_lead
.strip_prefix(file_lead)
.map(|extra| IndentDelta::Strip(extra.to_string()))
}
}
fn apply(&self, new: &str) -> String {
match self {
IndentDelta::Add(p) if p.is_empty() => new.to_string(),
IndentDelta::Strip(p) if p.is_empty() => new.to_string(),
_ => {
let mut out = String::with_capacity(new.len());
let mut lines = new.split('\n').peekable();
while let Some(line) = lines.next() {
if line.trim().is_empty() {
out.push_str(line);
} else {
match self {
IndentDelta::Add(p) => {
out.push_str(p);
out.push_str(line);
}
IndentDelta::Strip(p) => {
out.push_str(line.strip_prefix(p.as_str()).unwrap_or(line));
}
}
}
if lines.peek().is_some() {
out.push('\n');
}
}
out
}
}
}
}
fn nearest_window(text: &str, old: &str) -> Option<NearMiss> {
let norm_old = normalize_eol(old);
let old_lines: Vec<String> = norm_old.lines().map(|l| l.trim_end().to_string()).collect();
if old_lines.is_empty() {
return None;
}
let spans = line_spans(text);
if spans.is_empty() || spans.len() > NEAR_MISS_MAX_LINES {
return None;
}
let win = old_lines.len();
let file_lines: Vec<String> = spans
.iter()
.map(|s| norm_text_line(text, s).trim_end().to_string())
.collect();
if win > file_lines.len() {
return None;
}
let old_joined = old_lines.join("\n");
let old_key = old_joined.trim().to_string();
let upper = file_lines.len().saturating_sub(win);
let mut best: Option<(usize, usize)> = None; for w in 0..=upper {
let window = &file_lines[w..w + win];
let joined = window.join("\n");
let dist = levenshtein(joined.trim(), &old_key);
match best {
Some((bd, _)) if dist >= bd => {}
_ => best = Some((dist, w)),
}
}
let (_, w) = best?;
let found = file_lines[w..w + win].to_vec();
Some(NearMiss {
start_line: w + 1,
expected: old_lines,
found,
})
}
fn levenshtein(a: &str, b: &str) -> usize {
let a: Vec<char> = a.chars().collect();
let b: Vec<char> = b.chars().collect();
if a.is_empty() {
return b.len();
}
if b.is_empty() {
return a.len();
}
let mut prev: Vec<usize> = (0..=b.len()).collect();
let mut cur: Vec<usize> = vec![0; b.len() + 1];
for (i, &ca) in a.iter().enumerate() {
cur[0] = i + 1;
for (j, &cb) in b.iter().enumerate() {
let cost = usize::from(ca != cb);
cur[j + 1] = (prev[j + 1] + 1).min(cur[j] + 1).min(prev[j] + cost);
}
std::mem::swap(&mut prev, &mut cur);
}
prev[b.len()]
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn exact_unique() {
let text = "hello foo world";
let out = locate(text, "foo", "bar", false);
match out {
MatchOutcome::Located {
ranges,
replacement,
tier,
} => {
assert_eq!(tier, MatchTier::Exact);
assert_eq!(ranges, vec![6..9]);
assert_eq!(replacement, "bar");
assert_eq!(&text[ranges[0].clone()], "foo");
}
other => panic!("expected Located, got {other:?}"),
}
}
#[test]
fn exact_multiple_no_replace_all_ambiguous() {
let out = locate("foo and foo", "foo", "bar", false);
match out {
MatchOutcome::Ambiguous { count, locations } => {
assert_eq!(count, 2);
assert_eq!(locations.len(), 2);
assert_eq!(locations[0].0, 1); }
other => panic!("expected Ambiguous, got {other:?}"),
}
}
#[test]
fn exact_multiple_replace_all() {
let out = locate("foo and foo", "foo", "bar", true);
match out {
MatchOutcome::Located { ranges, tier, .. } => {
assert_eq!(tier, MatchTier::Exact);
assert_eq!(ranges, vec![0..3, 8..11]);
}
other => panic!("expected Located, got {other:?}"),
}
}
#[test]
fn trailing_ws_tolerated() {
let text = "let x = 1;\nlet y = 2;\n";
let old = "let x = 1; \nlet y = 2;"; let out = locate(text, old, "let x = 9;\nlet y = 8;", false);
match out {
MatchOutcome::Located {
ranges,
replacement,
tier,
} => {
assert_eq!(tier, MatchTier::Whitespace);
assert_eq!(ranges.len(), 1);
assert_eq!(&text[ranges[0].clone()], "let x = 1;\nlet y = 2;");
assert_eq!(replacement, "let x = 9;\nlet y = 8;");
}
other => panic!("expected Located/Whitespace, got {other:?}"),
}
}
#[test]
fn crlf_file_lf_old() {
let text = "alpha\r\nbeta\r\ngamma\r\n";
let old = "alpha\nbeta";
let out = locate(text, old, "ALPHA\nBETA", false);
match out {
MatchOutcome::Located { ranges, tier, .. } => {
assert_eq!(tier, MatchTier::Whitespace);
assert_eq!(ranges.len(), 1);
assert_eq!(&text[ranges[0].clone()], "alpha\r\nbeta");
}
other => panic!("expected Located/Whitespace, got {other:?}"),
}
}
#[test]
fn uniform_indent_shift_reindents_new() {
let text = " if x {\n y();\n }\n";
let old = "if x {\n y();\n}";
let new = "if x {\n z();\n}";
let out = locate(text, old, new, false);
match out {
MatchOutcome::Located {
ranges,
replacement,
tier,
} => {
assert_eq!(tier, MatchTier::Whitespace);
assert_eq!(ranges.len(), 1);
assert_eq!(&text[ranges[0].clone()], " if x {\n y();\n }");
assert_eq!(replacement, " if x {\n z();\n }");
let mut spliced = text.to_string();
spliced.replace_range(ranges[0].clone(), &replacement);
assert_eq!(spliced, " if x {\n z();\n }\n");
for line in replacement.lines().filter(|l| !l.trim().is_empty()) {
assert!(line.starts_with(" "), "line not reindented: {line:?}");
}
}
other => panic!("expected Located/Whitespace, got {other:?}"),
}
}
#[test]
fn not_found_renders_near_miss() {
let text = "fn alpha() {\n do_thing();\n}\n";
let old = "fn alpha() {\n do_OTHER();\n}";
let out = locate(text, old, "x", false);
match out {
MatchOutcome::NotFound { near: Some(nm) } => {
let rendered = nm.render();
assert!(
!rendered.contains("not found in file"),
"should not be bare not-found: {rendered}"
);
assert!(rendered.contains("line 1"), "rendered: {rendered}");
assert!(rendered.contains("- "), "expected `-` diff: {rendered}");
assert!(rendered.contains("+ "), "expected `+` diff: {rendered}");
assert!(
rendered.contains("do_OTHER();"),
"expected side missing: {rendered}"
);
assert!(
rendered.contains("do_thing();"),
"found side missing: {rendered}"
);
}
other => panic!("expected NotFound/Some(near), got {other:?}"),
}
}
#[test]
fn tier2_ambiguity() {
let text = " a()\n b()\nMID\n a()\n b()\n";
let old = "a()\nb()";
let out = locate(text, old, "x()\ny()", false);
match out {
MatchOutcome::Ambiguous { count, locations } => {
assert_eq!(count, 2);
assert_eq!(locations.len(), 2);
assert_eq!(locations[0], (1, 2));
assert_eq!(locations[1], (4, 5));
}
other => panic!("expected Ambiguous, got {other:?}"),
}
}
#[test]
fn tier2_ambiguity_replace_all() {
let text = " a()\n b()\nMID\n a()\n b()\n";
let old = "a()\nb()";
let out = locate(text, old, "x()\ny()", true);
match out {
MatchOutcome::Located {
ranges,
replacement,
tier,
} => {
assert_eq!(tier, MatchTier::Whitespace);
assert_eq!(ranges.len(), 2);
assert_eq!(replacement, " x()\n y()");
let mut spliced = text.to_string();
for r in ranges.iter().rev() {
spliced.replace_range(r.clone(), &replacement);
}
assert_eq!(spliced, " x()\n y()\nMID\n x()\n y()\n");
}
other => panic!("expected Located, got {other:?}"),
}
}
#[test]
fn tier2_replace_all_divergent_deltas_is_ambiguous() {
let text = " a()\n b()\nMID\n a()\n b()\n";
let old = "a()\nb()";
let new = "x()\ny()";
let out = locate(text, old, new, true);
match out {
MatchOutcome::Ambiguous { count, locations } => {
assert_eq!(count, 2);
assert_eq!(locations, vec![(1, 2), (4, 5)]);
}
other => panic!("expected Ambiguous (divergent deltas must not splice), got {other:?}"),
}
}
#[test]
fn exact_wins_unique() {
let text = "UNIQ()\nMID\n UNIQX()\n";
let old = "UNIQ()";
let out = locate(text, old, "DONE()", false);
match out {
MatchOutcome::Located {
tier, replacement, ..
} => {
assert_eq!(tier, MatchTier::Exact);
assert_eq!(replacement, "DONE()");
}
other => panic!("expected Located/Exact, got {other:?}"),
}
}
#[test]
fn non_uniform_indent_falls_through() {
let text = " a()\n b()\n";
let old = "a()\nb()";
let out = locate(text, old, "x", false);
assert!(
matches!(out, MatchOutcome::NotFound { .. }),
"non-uniform indent must not match tier 2: {out:?}"
);
}
#[test]
fn levenshtein_basics() {
assert_eq!(levenshtein("", ""), 0);
assert_eq!(levenshtein("abc", "abc"), 0);
assert_eq!(levenshtein("abc", ""), 3);
assert_eq!(levenshtein("", "abc"), 3);
assert_eq!(levenshtein("kitten", "sitting"), 3);
assert_eq!(levenshtein("flaw", "lawn"), 2);
}
#[test]
fn empty_old_is_not_found() {
assert!(matches!(
locate("anything", "", "x", false),
MatchOutcome::NotFound { near: None }
));
}
#[test]
fn final_line_without_newline_maps_correctly() {
let text = "line1\nline2"; let out = locate(text, "line2", "LINE2", false);
match out {
MatchOutcome::Located { ranges, tier, .. } => {
assert_eq!(tier, MatchTier::Exact);
assert_eq!(&text[ranges[0].clone()], "line2");
}
other => panic!("got {other:?}"),
}
}
#[test]
fn not_found_old_longer_than_file_no_panic() {
let out = locate("x", "aaaa\nbbbb\ncccc", "y", false);
assert!(
matches!(out, MatchOutcome::NotFound { near: None }),
"expected NotFound{{near: None}}, got {out:?}"
);
}
#[test]
fn not_found_old_two_lines_single_line_file_no_panic() {
let out = locate(
"only_one_line",
"missing_a\nmissing_b",
"replacement",
false,
);
assert!(
matches!(out, MatchOutcome::NotFound { near: None }),
"expected NotFound{{near: None}}, got {out:?}"
);
}
#[test]
fn whitespace_tier_final_line_no_newline() {
let text = "alpha\n beta\n gamma"; let old = "beta\ngamma";
let out = locate(text, old, "BETA\nGAMMA", false);
match out {
MatchOutcome::Located {
ranges,
tier,
replacement,
} => {
assert_eq!(tier, MatchTier::Whitespace);
assert_eq!(&text[ranges[0].clone()], " beta\n gamma");
assert_eq!(replacement, " BETA\n GAMMA");
}
other => panic!("got {other:?}"),
}
}
}