use imara_diff::{diff, intern::InternedInput, Algorithm};
use std::collections::HashMap;
use std::ops::Range;
use super::inline::compute_inline_diff_merged;
use super::LineSource;
fn boundary_score(line: &str) -> u32 {
if line.trim().is_empty() {
3 } else if !line.starts_with(' ') && !line.starts_with('\t') {
2 } else {
0
}
}
fn slide_hunks(
hunks: &mut [(Range<u32>, Range<u32>)],
old_lines: &[&str],
new_lines: &[&str],
) {
let old_len = old_lines.len() as u32;
let new_len = new_lines.len() as u32;
for hunk in hunks.iter_mut() {
let (ref mut before, ref mut after) = *hunk;
let before_len = before.end - before.start;
let after_len = after.end - after.start;
if before_len > 0 && after_len > 0 {
continue;
}
let mut best_score = 0u32;
let mut best_offset: i32 = 0;
let mut offset: i32 = 0;
loop {
let new_before_end = before.end as i32 + offset;
let new_after_end = after.end as i32 + offset;
let new_before_start = before.start as i32 + offset;
if new_before_end >= old_len as i32 || new_after_end >= new_len as i32 {
break;
}
if new_before_start < 0 {
break;
}
let ctx_score = if before_len > 0 {
let ctx_idx = new_before_start - 1;
if ctx_idx >= 0 { boundary_score(old_lines[ctx_idx as usize]) } else { 0 }
} else {
let new_after_start = after.start as i32 + offset;
let ctx_idx = new_after_start - 1;
if ctx_idx >= 0 { boundary_score(new_lines[ctx_idx as usize]) } else { 0 }
};
if ctx_score > best_score {
best_score = ctx_score;
best_offset = offset;
}
if before_len > 0 {
if old_lines[new_before_start as usize] == old_lines[new_before_end as usize] {
offset += 1;
} else {
break;
}
} else {
let new_after_start = after.start as i32 + offset;
if new_after_start >= 0
&& (new_after_end as usize) < new_lines.len()
&& new_lines[new_after_start as usize] == new_lines[new_after_end as usize]
{
offset += 1;
} else {
break;
}
}
}
let final_score = if before_len > 0 {
let ctx = before.start as i32 + offset - 1;
if ctx >= 0 { boundary_score(old_lines[ctx as usize]) } else { 0 }
} else {
let ctx = after.start as i32 + offset - 1;
if ctx >= 0 { boundary_score(new_lines[ctx as usize]) } else { 0 }
};
if final_score > best_score {
best_score = final_score;
best_offset = offset;
}
if best_offset != 0 && best_score > 0 {
before.start = (before.start as i32 + best_offset) as u32;
before.end = (before.end as i32 + best_offset) as u32;
after.start = (after.start as i32 + best_offset) as u32;
after.end = (after.end as i32 + best_offset) as u32;
}
}
}
pub(super) fn build_provenance_map(old_lines: &[&str], new_lines: &[&str]) -> Vec<Option<usize>> {
if old_lines.is_empty() {
return vec![None; new_lines.len()];
}
if new_lines.is_empty() {
return Vec::new();
}
let old_text = old_lines.join("\n");
let new_text = new_lines.join("\n");
let input = InternedInput::new(old_text.as_str(), new_text.as_str());
let mut hunks: Vec<(Range<u32>, Range<u32>)> = Vec::new();
diff(
Algorithm::Histogram,
&input,
|before: Range<u32>, after: Range<u32>| {
hunks.push((before, after));
},
);
slide_hunks(&mut hunks, old_lines, new_lines);
let mut result = vec![None; new_lines.len()];
let mut old_idx = 0usize;
let mut new_idx = 0usize;
for (before, after) in hunks {
let hunk_new_start = after.start as usize;
while new_idx < hunk_new_start {
result[new_idx] = Some(old_idx);
old_idx += 1;
new_idx += 1;
}
old_idx = before.end as usize;
new_idx = after.end as usize;
}
while new_idx < new_lines.len() && old_idx < old_lines.len() {
result[new_idx] = Some(old_idx);
old_idx += 1;
new_idx += 1;
}
result
}
pub(super) fn build_modification_map<'a>(
old_lines: &[&'a str],
new_lines: &[&'a str],
_change_source: LineSource,
) -> HashMap<usize, (usize, &'a str)> {
let mut result = HashMap::new();
if old_lines.is_empty() || new_lines.is_empty() {
return result;
}
let old_text = old_lines.join("\n");
let new_text = new_lines.join("\n");
let input = InternedInput::new(old_text.as_str(), new_text.as_str());
let mut hunks: Vec<(Range<u32>, Range<u32>)> = Vec::new();
diff(
Algorithm::Histogram,
&input,
|before: Range<u32>, after: Range<u32>| {
hunks.push((before, after));
},
);
slide_hunks(&mut hunks, old_lines, new_lines);
for (before, after) in hunks {
let old_start = before.start as usize;
let old_end = before.end as usize;
let new_start = after.start as usize;
let new_end = after.end as usize;
let deletions: Vec<(&str, usize)> = (old_start..old_end)
.map(|i| (old_lines[i].trim_end(), i))
.collect();
let insertions: Vec<(&str, usize)> = (new_start..new_end)
.map(|i| (new_lines[i].trim_end(), i))
.filter(|(content, _)| !content.trim().is_empty())
.collect();
let mut paired_inserts: std::collections::HashSet<usize> = std::collections::HashSet::new();
for (old_content, old_i) in &deletions {
for (ins_idx, (new_content, new_i)) in insertions.iter().enumerate() {
if paired_inserts.contains(&ins_idx) {
continue;
}
let inline_result =
compute_inline_diff_merged(old_content, new_content, LineSource::Unstaged);
if inline_result.is_meaningful {
paired_inserts.insert(ins_idx);
result.insert(*new_i, (*old_i, old_lines[*old_i]));
break;
}
}
}
}
result
}
pub(super) fn survives_in(provenance: &[Option<usize>], target_idx: usize) -> bool {
provenance.contains(&Some(target_idx))
}
pub(super) fn find_sources(
provenance: &[Option<usize>],
target_idx: usize,
) -> impl Iterator<Item = usize> + '_ {
provenance
.iter()
.enumerate()
.filter_map(move |(idx, &prov)| {
if prov == Some(target_idx) {
Some(idx)
} else {
None
}
})
}
pub(super) fn survives_chain(
source_idx: usize,
intermediate_from_source: &[Option<usize>],
final_from_intermediate: &[Option<usize>],
) -> bool {
find_sources(intermediate_from_source, source_idx)
.any(|intermediate_idx| survives_in(final_from_intermediate, intermediate_idx))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_provenance_map_new_has_more_lines_than_old() {
let old_lines = &["line1", "line2"];
let new_lines = &["line1", "line2", "line3", "line4", "line5"];
let provenance = build_provenance_map(old_lines, new_lines);
assert_eq!(provenance.len(), 5);
assert_eq!(provenance[0], Some(0));
assert_eq!(provenance[1], Some(1));
assert_eq!(provenance[2], None);
assert_eq!(provenance[3], None);
assert_eq!(provenance[4], None);
for prov in &provenance {
if let Some(idx) = prov {
assert!(
*idx < old_lines.len(),
"provenance index {} is out of bounds for old_lines (len {})",
idx,
old_lines.len()
);
}
}
}
#[test]
fn test_provenance_map_old_has_more_lines_than_new() {
let old_lines = &["line1", "line2", "line3", "line4", "line5"];
let new_lines = &["line1", "line2"];
let provenance = build_provenance_map(old_lines, new_lines);
assert_eq!(provenance.len(), 2);
assert_eq!(provenance[0], Some(0));
assert_eq!(provenance[1], Some(1));
for prov in &provenance {
if let Some(idx) = prov {
assert!(*idx < old_lines.len());
}
}
}
#[test]
fn test_survives_in_found() {
let provenance = vec![Some(0), Some(1), None, Some(2)];
assert!(survives_in(&provenance, 0));
assert!(survives_in(&provenance, 1));
assert!(survives_in(&provenance, 2));
}
#[test]
fn test_survives_in_not_found() {
let provenance = vec![Some(0), Some(1), None, Some(2)];
assert!(!survives_in(&provenance, 3));
assert!(!survives_in(&provenance, 99));
}
#[test]
fn test_survives_in_empty() {
let provenance: Vec<Option<usize>> = vec![];
assert!(!survives_in(&provenance, 0));
}
#[test]
fn test_find_sources_single_match() {
let provenance = vec![Some(0), Some(1), Some(2)];
let sources: Vec<_> = find_sources(&provenance, 1).collect();
assert_eq!(sources, vec![1]);
}
#[test]
fn test_find_sources_multiple_matches() {
let provenance = vec![Some(0), Some(1), Some(0), Some(1)];
let sources: Vec<_> = find_sources(&provenance, 0).collect();
assert_eq!(sources, vec![0, 2]);
}
#[test]
fn test_find_sources_no_match() {
let provenance = vec![Some(0), Some(1), Some(2)];
let sources: Vec<_> = find_sources(&provenance, 99).collect();
assert!(sources.is_empty());
}
#[test]
fn test_survives_chain_direct_path() {
let intermediate_from_source = vec![None, Some(0), None];
let final_from_intermediate = vec![None, None, Some(1)];
assert!(survives_chain(0, &intermediate_from_source, &final_from_intermediate));
}
#[test]
fn test_survives_chain_no_path() {
let intermediate_from_source = vec![None, Some(0), None];
let final_from_intermediate = vec![Some(99), None, None];
assert!(!survives_chain(0, &intermediate_from_source, &final_from_intermediate));
}
#[test]
fn test_survives_chain_source_not_in_intermediate() {
let intermediate_from_source = vec![Some(1), Some(2)];
let final_from_intermediate = vec![Some(0), Some(1)];
assert!(!survives_chain(99, &intermediate_from_source, &final_from_intermediate));
}
#[test]
fn test_hunk_boundary_multi_deletion_provenance() {
let old: Vec<&str> = "fn one() {\n println!(\"one\");\n}\n\n\
fn two() {\n println!(\"two\");\n}\n\n\
fn three() {\n println!(\"three\");\n}\n\n\
fn four() {\n println!(\"four\");\n println!(\"more\");\n}\n\n\
fn five() {\n println!(\"five\");\n}".lines().collect();
let new: Vec<&str> = "fn one() {\n println!(\"one\");\n}\n\n\
fn three() {\n println!(\"three\");\n}\n\n\
fn five() {\n println!(\"five\");\n}".lines().collect();
let prov = build_provenance_map(&old, &new);
assert_eq!(prov[6], Some(10),
"fn three's '}}' (new[6]) should map to old[10], not old[{}]",
prov[6].unwrap_or(999));
}
#[test]
fn test_hunk_boundary_prefers_blank_line() {
let old: &[&str] = &[
"fn three() {", " println!(\"three\");", "}", "", "fn four() {", " println!(\"four\");", "}", "", "fn five() {", " println!(\"five\");", "}", ];
let new: &[&str] = &[
"fn three() {", " println!(\"three\");", "}", "", "fn five() {", " println!(\"five\");", "}", ];
let prov = build_provenance_map(old, new);
assert_eq!(prov[0], Some(0), "fn three");
assert_eq!(prov[1], Some(1));
assert_eq!(prov[2], Some(2), "three's }}");
assert_eq!(prov[3], Some(3), "blank line between three and five");
assert_eq!(prov[4], Some(8), "fn five should map to old[8]");
assert_eq!(prov[5], Some(9));
assert_eq!(prov[6], Some(10));
}
}