use crate::{
line_end::LineEnd,
patch::{Diff, Hunk, Line},
utils::{LineIter, Text},
};
use std::{fmt, iter};
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ApplyError(usize, String);
impl fmt::Debug for ApplyError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_tuple("ApplyError")
.field(&self.0)
.field(&self.1)
.finish()
}
}
impl fmt::Display for ApplyError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(
f,
"error applying hunk #{}: could not find context in target file",
self.0
)?;
writeln!(f)?;
writeln!(f, "Hunk content:")?;
write!(f, "{}", self.1)
}
}
impl std::error::Error for ApplyError {}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
struct HunkStats {
added: usize,
deleted: usize,
context: usize,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ApplyStats {
pub lines_added: usize,
pub lines_deleted: usize,
pub lines_context: usize,
pub hunks_applied: usize,
}
impl ApplyStats {
fn new() -> Self {
Self {
lines_added: 0,
lines_deleted: 0,
lines_context: 0,
hunks_applied: 0,
}
}
fn add_hunk(&mut self, hunk_stats: HunkStats) {
self.lines_added += hunk_stats.added;
self.lines_deleted += hunk_stats.deleted;
self.lines_context += hunk_stats.context;
self.hunks_applied += 1;
}
pub fn has_changes(&self) -> bool {
self.lines_added > 0 || self.lines_deleted > 0
}
}
pub type ApplyResult<T, E = ApplyError> = Result<(T, ApplyStats), E>;
#[derive(Default, Debug, Clone)]
pub struct ApplyConfig {
pub line_end_strategy: LineEndHandling,
pub fuzzy_config: FuzzyConfig,
}
#[derive(Debug, Clone, Default)]
pub enum LineEndHandling {
EnsurePatchLineEnding,
#[default]
EnsureFileLineEnding,
EnsureLineEnding(LineEnd),
}
#[derive(Debug, Clone)]
pub struct FuzzyConfig {
pub max_fuzz: usize,
pub ignore_whitespace: bool,
pub ignore_case: bool,
}
impl Default for FuzzyConfig {
fn default() -> Self {
Self {
max_fuzz: 2,
ignore_whitespace: false,
ignore_case: false,
}
}
}
pub trait FuzzyComparable {
fn fuzzy_eq(&self, other: &Self, config: &ApplyConfig) -> bool;
fn similarity(&self, other: &Self, config: &ApplyConfig) -> f32;
}
impl FuzzyComparable for str {
fn fuzzy_eq(&self, other: &Self, config: &ApplyConfig) -> bool {
self.similarity(other, config) > 0.8
}
fn similarity(&self, other: &Self, config: &ApplyConfig) -> f32 {
let (s1, s2) = if config.fuzzy_config.ignore_case {
(self.to_lowercase(), other.to_lowercase())
} else {
(self.to_string(), other.to_string())
};
let (s1, s2) = if config.fuzzy_config.ignore_whitespace {
(
s1.chars()
.filter(|c| !c.is_whitespace())
.collect::<String>(),
s2.chars()
.filter(|c| !c.is_whitespace())
.collect::<String>(),
)
} else {
(s1, s2)
};
if s1 == s2 {
return 1.0;
}
let max_len = s1.len().max(s2.len());
if max_len == 0 {
return 1.0;
}
let distance = strsim::levenshtein(&s1, &s2);
1.0 - (distance as f32 / max_len as f32)
}
}
impl FuzzyComparable for [u8] {
fn fuzzy_eq(&self, other: &Self, config: &ApplyConfig) -> bool {
if let (Ok(s1), Ok(s2)) = (std::str::from_utf8(self), std::str::from_utf8(other)) {
s1.fuzzy_eq(s2, config)
} else {
self == other
}
}
fn similarity(&self, other: &Self, config: &ApplyConfig) -> f32 {
if let (Ok(s1), Ok(s2)) = (std::str::from_utf8(self), std::str::from_utf8(other)) {
s1.similarity(s2, config)
} else {
if self == other { 1.0 } else { 0.0 }
}
}
}
#[derive(Debug)]
enum ImageLine<'a, T: ?Sized> {
Unpatched((&'a T, Option<LineEnd>)),
Patched((&'a T, Option<LineEnd>)),
}
impl<'a, T: ?Sized + Text> ImageLine<'a, T> {
fn inner(&self) -> (&T, Option<LineEnd>) {
match self {
ImageLine::Unpatched(inner) | ImageLine::Patched(inner) => *inner,
}
}
fn into_inner(self) -> (&'a T, Option<LineEnd>) {
match self {
ImageLine::Unpatched(inner) | ImageLine::Patched(inner) => inner,
}
}
fn is_patched(&self) -> bool {
match self {
ImageLine::Unpatched(_) => false,
ImageLine::Patched(_) => true,
}
}
}
impl<T: ?Sized> Copy for ImageLine<'_, T> {}
impl<T: ?Sized> Clone for ImageLine<'_, T> {
fn clone(&self) -> Self {
*self
}
}
fn map_line_ending<T>(line_end: Option<LineEnd>, ensure_line_end: Option<LineEnd>) -> T
where
T: From<LineEnd> + Default,
{
let Some(line_end) = line_end else {
return Default::default();
};
if let Some(ensure_line_end) = ensure_line_end {
ensure_line_end.into()
} else {
line_end.into()
}
}
pub fn apply(base_image: &str, diff: &Diff<'_, str>) -> ApplyResult<String, ApplyError> {
apply_with_config(base_image, diff, &ApplyConfig::default())
}
pub fn apply_with_config(
base_image: &str,
diff: &Diff<'_, str>,
config: &ApplyConfig,
) -> ApplyResult<String, ApplyError> {
let mut image: Vec<_> = LineIter::new(base_image)
.map(ImageLine::Unpatched)
.collect();
let mut stats = ApplyStats::new();
for (i, hunk) in diff.hunks().iter().enumerate() {
let hunk_stats = match apply_hunk_with_config(&mut image, hunk, config) {
Ok(stats) => stats,
Err(_) => return Err(ApplyError(i + 1, format!("{}", hunk))),
};
stats.add_hunk(hunk_stats);
}
let preferred_line_ending = Some(match config.line_end_strategy {
LineEndHandling::EnsurePatchLineEnding => {
let mut lf_score = 0usize;
let mut crlf_score = 0usize;
for hunk in diff.hunks().iter() {
for line in hunk.lines() {
match line.line_end() {
Some(LineEnd::Lf) => lf_score += 1,
Some(LineEnd::CrLf) => crlf_score += 1,
_ => (),
}
}
}
LineEnd::choose_from_scores(lf_score, crlf_score)
}
LineEndHandling::EnsureFileLineEnding => LineEnd::most_common(base_image),
LineEndHandling::EnsureLineEnding(line_end) => line_end,
});
let content = image
.into_iter()
.map(ImageLine::into_inner)
.map(|(line, ending)| {
format!(
"{}{}",
line,
map_line_ending::<&str>(ending, preferred_line_ending)
)
})
.collect();
Ok((content, stats))
}
pub fn apply_bytes(base_image: &[u8], patch: &Diff<'_, [u8]>) -> ApplyResult<Vec<u8>, ApplyError> {
apply_bytes_with_config(base_image, patch, &ApplyConfig::default())
}
pub fn apply_bytes_with_config(
base_image: &[u8],
diff: &Diff<'_, [u8]>,
config: &ApplyConfig,
) -> ApplyResult<Vec<u8>, ApplyError> {
let mut image: Vec<_> = LineIter::new(base_image)
.map(ImageLine::Unpatched)
.collect();
let mut stats = ApplyStats::new();
for (i, hunk) in diff.hunks().iter().enumerate() {
let hunk_stats = match apply_hunk_with_config(&mut image, hunk, config) {
Ok(stats) => stats,
Err(_) => return Err(ApplyError(i + 1, format!("{}", hunk))),
};
stats.add_hunk(hunk_stats);
}
let preferred_line_ending = Some(match config.line_end_strategy {
LineEndHandling::EnsurePatchLineEnding => {
let mut lf_score = 0usize;
let mut crlf_score = 0usize;
for hunk in diff.hunks().iter() {
for line in hunk.lines() {
match line.line_end() {
Some(LineEnd::Lf) => lf_score += 1,
Some(LineEnd::CrLf) => crlf_score += 1,
_ => (),
}
}
}
LineEnd::choose_from_scores(lf_score, crlf_score)
}
LineEndHandling::EnsureFileLineEnding => LineEnd::most_common(base_image),
LineEndHandling::EnsureLineEnding(line_end) => line_end,
});
let content = image
.into_iter()
.map(ImageLine::into_inner)
.flat_map(|(line, ending)| {
[
line,
map_line_ending::<&[u8]>(ending, preferred_line_ending),
]
.concat()
})
.collect();
Ok((content, stats))
}
fn apply_hunk_with_config<'a, T>(
image: &mut Vec<ImageLine<'a, T>>,
hunk: &Hunk<'a, T>,
config: &ApplyConfig,
) -> Result<HunkStats, ()>
where
T: PartialEq + FuzzyComparable + ?Sized + Text + ToOwned,
{
let (pos, fuzz_level) = find_position_fuzzy(image, hunk, config).ok_or(())?;
let mut added = 0;
let mut deleted = 0;
let mut context = 0;
for line in hunk.lines() {
match line {
Line::Insert(_) => added += 1,
Line::Delete(_) => deleted += 1,
Line::Context(_) => context += 1,
}
}
if fuzz_level == 0 {
image.splice(
pos..pos + pre_image_line_count(hunk.lines()),
post_image(hunk.lines()).map(ImageLine::Patched),
);
} else {
apply_hunk_preserving_context(image, hunk, pos);
}
Ok(HunkStats {
added,
deleted,
context,
})
}
fn apply_hunk_preserving_context<'a, T>(
image: &mut Vec<ImageLine<'a, T>>,
hunk: &Hunk<'a, T>,
pos: usize,
) where
T: ?Sized + Text + ToOwned,
{
let mut image_offset = 0;
for line in hunk.lines() {
match *line {
Line::Context(_) => {
if let Some(img_line) = image.get_mut(pos + image_offset) {
*img_line = ImageLine::Patched(img_line.into_inner());
}
image_offset += 1;
}
Line::Delete(_) => {
image.remove(pos + image_offset);
}
Line::Insert(line) => {
image.insert(pos + image_offset, ImageLine::Patched(line));
image_offset += 1;
}
}
}
}
fn find_position_fuzzy<T>(
image: &[ImageLine<T>],
hunk: &Hunk<'_, T>,
config: &ApplyConfig,
) -> Option<(usize, usize)>
where
T: PartialEq + FuzzyComparable + ?Sized + Text + ToOwned,
{
if let Some(pos) = find_position(image, hunk) {
return Some((pos, 0));
}
for fuzz_level in 1..=config.fuzzy_config.max_fuzz {
if let Some(pos) = find_position_with_fuzz(image, hunk, fuzz_level, config) {
return Some((pos, fuzz_level));
}
}
None
}
fn find_position_with_fuzz<T>(
image: &[ImageLine<T>],
hunk: &Hunk<'_, T>,
fuzz_level: usize,
config: &ApplyConfig,
) -> Option<usize>
where
T: PartialEq + FuzzyComparable + ?Sized + Text + ToOwned,
{
let pos = std::cmp::min(hunk.new_range().start().saturating_sub(1), image.len());
let backward = (0..pos).rev();
let forward = pos + 1..image.len();
iter::once(pos)
.chain(interleave(backward, forward))
.find(|&pos| match_fragment_fuzzy(image, hunk.lines(), pos, fuzz_level, config))
}
fn match_fragment_fuzzy<T>(
image: &[ImageLine<T>],
lines: &[Line<'_, T>],
pos: usize,
fuzz_level: usize,
config: &ApplyConfig,
) -> bool
where
T: PartialEq + FuzzyComparable + ?Sized + Text,
{
let len = pre_image_line_count(lines);
let image_slice = if let Some(image) = image.get(pos..pos + len) {
image
} else {
return false;
};
if image_slice.iter().any(ImageLine::is_patched) {
return false;
}
let pre_image_lines: Vec<_> = pre_image(lines).collect();
let image_lines: Vec<_> = image_slice.iter().map(ImageLine::inner).collect();
if pre_image_lines.len() != image_lines.len() {
return false;
}
let context_indices: Vec<_> = lines
.iter()
.enumerate()
.filter_map(|(i, line)| match line {
Line::Context(_) => Some(i),
_ => None,
})
.collect();
let mut pre_image_context_indices = Vec::new();
let mut pre_image_idx = 0;
for (original_idx, line) in lines.iter().enumerate() {
match line {
Line::Context(_) | Line::Delete(_) => {
if context_indices.contains(&original_idx) {
pre_image_context_indices.push(pre_image_idx);
}
pre_image_idx += 1;
}
Line::Insert(_) => {}
}
}
if pre_image_context_indices.len() < fuzz_level {
let len = pre_image_line_count(lines);
let image = if let Some(image) = image.get(pos..pos + len) {
image
} else {
return false;
};
if image.iter().any(ImageLine::is_patched) {
return false;
}
for (pre_line, image_line) in pre_image_lines.iter().zip(image_lines.iter()) {
if !pre_line.0.fuzzy_eq(image_line.0, config) {
return false;
}
}
return true;
}
let combinations = generate_fuzz_combinations(&pre_image_context_indices, fuzz_level);
for ignored_indices in combinations {
if match_with_ignored_context(
pre_image_lines.as_slice(),
&image_lines,
&ignored_indices,
config,
) {
return true;
}
}
false
}
fn generate_fuzz_combinations(context_indices: &[usize], fuzz_level: usize) -> Vec<Vec<usize>> {
if fuzz_level == 0 || context_indices.is_empty() {
return vec![vec![]];
}
let len = context_indices.len();
let mut combinations = Vec::new();
for start_ignore in 0..=fuzz_level.min(len) {
for end_ignore in 0..=fuzz_level.min(len.saturating_sub(start_ignore)) {
let mut ignored = Vec::new();
ignored.extend(context_indices.iter().take(start_ignore).copied());
ignored.extend(
context_indices
.iter()
.skip(start_ignore)
.rev()
.take(end_ignore)
.copied(),
);
combinations.push(ignored);
}
}
combinations
}
fn match_with_ignored_context<T>(
pre_image_lines: &[(&T, Option<LineEnd>)],
image_lines: &[(&T, Option<LineEnd>)],
ignored_indices: &[usize],
config: &ApplyConfig,
) -> bool
where
T: PartialEq + FuzzyComparable + ?Sized,
{
for (i, (pre_line, image_line)) in pre_image_lines.iter().zip(image_lines.iter()).enumerate() {
if ignored_indices.contains(&i) {
continue; }
if !pre_line.0.fuzzy_eq(image_line.0, config) {
return false;
}
}
true
}
fn find_position<T: PartialEq + ?Sized + Text + ToOwned>(
image: &[ImageLine<T>],
hunk: &Hunk<'_, T>,
) -> Option<usize> {
let pos = std::cmp::min(hunk.new_range().start().saturating_sub(1), image.len());
let backward = (0..pos).rev();
let forward = pos + 1..image.len();
iter::once(pos)
.chain(interleave(backward, forward))
.find(|&pos| match_fragment(image, hunk.lines(), pos))
}
fn pre_image_line_count<T: ?Sized>(lines: &[Line<'_, T>]) -> usize {
pre_image(lines).count()
}
fn post_image<'a, 'b, T: ?Sized>(
lines: &'b [Line<'a, T>],
) -> impl Iterator<Item = (&'a T, Option<LineEnd>)> + 'b {
lines.iter().filter_map(move |line| match *line {
Line::Context(l) | Line::Insert(l) => Some(l),
Line::Delete(_) => None,
})
}
fn pre_image<'a, 'b: 'a, T: ?Sized>(
lines: &'b [Line<'a, T>],
) -> impl Iterator<Item = (&'a T, Option<LineEnd>)> + 'b {
lines.iter().filter_map(|line| match *line {
Line::Context(l) | Line::Delete(l) => Some(l),
Line::Insert(_) => None,
})
}
fn match_fragment<T: PartialEq + ?Sized + Text>(
image: &[ImageLine<T>],
lines: &[Line<'_, T>],
pos: usize,
) -> bool {
let len = pre_image_line_count(lines);
let image = if let Some(image) = image.get(pos..pos + len) {
image
} else {
return false;
};
if image.iter().any(ImageLine::is_patched) {
return false;
}
pre_image(lines).eq(image.iter().map(ImageLine::inner))
}
#[derive(Debug)]
struct Interleave<I, J> {
a: iter::Fuse<I>,
b: iter::Fuse<J>,
flag: bool,
}
fn interleave<I, J>(
i: I,
j: J,
) -> Interleave<<I as IntoIterator>::IntoIter, <J as IntoIterator>::IntoIter>
where
I: IntoIterator,
J: IntoIterator<Item = I::Item>,
{
Interleave {
a: i.into_iter().fuse(),
b: j.into_iter().fuse(),
flag: false,
}
}
impl<I, J> Iterator for Interleave<I, J>
where
I: Iterator,
J: Iterator<Item = I::Item>,
{
type Item = I::Item;
fn next(&mut self) -> Option<I::Item> {
self.flag = !self.flag;
if self.flag {
match self.a.next() {
None => self.b.next(),
item => item,
}
} else {
match self.b.next() {
None => self.a.next(),
item => item,
}
}
}
}
#[cfg(test)]
mod test {
use std::path::PathBuf;
use crate::{Diff, apply};
fn load_files(name: &str) -> (String, String) {
let base_folder = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("test-data")
.join(name);
let base_image = std::fs::read_to_string(base_folder.join("target.txt")).unwrap();
let patch = std::fs::read_to_string(base_folder.join("patch.patch")).unwrap();
(base_image, patch)
}
#[test]
fn apply_patch() {
let (base_image, patch) = load_files("fuzzy");
let patch = crate::Diff::from_bytes(patch.as_bytes()).unwrap();
println!("Applied: {:#?}", patch);
let (content, _stats) = crate::apply_bytes(base_image.as_bytes(), &patch).unwrap();
let result = String::from_utf8(content)
.unwrap()
.lines()
.take(50)
.collect::<Vec<_>>()
.join("\n");
insta::assert_snapshot!(result);
println!("Result:\n{}", result);
}
fn assert_patch(old: &str, new: &str, patch: &str) {
let diff = Diff::from_str(patch).unwrap();
let (content, _stats) = apply(old, &diff).unwrap();
assert_eq!(new, content);
}
#[test]
fn test_apply_result_statistics() {
let old = "line 1\nline 2\nline 3\n";
let new = "line 1\nline 2 modified\nline 4\n";
let patch = "\
--- original
+++ modified
@@ -1,3 +1,3 @@
line 1
-line 2
-line 3
+line 2 modified
+line 4
";
let diff = Diff::from_str(patch).unwrap();
let (content, stats) = apply(old, &diff).unwrap();
assert_eq!(content, new);
assert_eq!(stats.lines_added, 2);
assert_eq!(stats.lines_deleted, 2);
assert_eq!(stats.lines_context, 1);
assert_eq!(stats.hunks_applied, 1);
assert!(stats.has_changes());
}
#[test]
fn test_apply_result_no_changes() {
let old = "line 1\nline 2\n";
let new = "line 1\nline 2\n";
let patch = "\
--- original
+++ modified
@@ -1,2 +1,2 @@
line 1
line 2
";
let diff = Diff::from_str(patch).unwrap();
let (content, stats) = apply(old, &diff).unwrap();
assert_eq!(content, new);
assert_eq!(stats.lines_added, 0);
assert_eq!(stats.lines_deleted, 0);
assert_eq!(stats.lines_context, 2);
assert_eq!(stats.hunks_applied, 1);
assert!(!stats.has_changes());
}
#[test]
fn test_apply_result_multiple_hunks() {
let old = "line 1\nline 2\nline 3\nline 4\nline 5\n";
let new = "line 1\nline 2 modified\nline 3\nline 4 modified\nline 5\n";
let patch = "\
--- original
+++ modified
@@ -1,2 +1,2 @@
line 1
-line 2
+line 2 modified
@@ -4,2 +4,2 @@
-line 4
+line 4 modified
line 5
";
let diff = Diff::from_str(patch).unwrap();
let (content, stats) = apply(old, &diff).unwrap();
assert_eq!(content, new);
assert_eq!(stats.lines_added, 2);
assert_eq!(stats.lines_deleted, 2);
assert_eq!(stats.lines_context, 2);
assert_eq!(stats.hunks_applied, 2);
assert!(stats.has_changes());
}
#[test]
fn test_detect_already_applied_patch() {
let old = "line 1\nline 2\nline 3\n";
let patch = "\
--- original
+++ modified
@@ -1,3 +1,3 @@
line 1
-line 2
+line 2 modified
line 3
";
let diff = Diff::from_str(patch).unwrap();
let (content, stats) = apply(old, &diff).unwrap();
assert_eq!(content, "line 1\nline 2 modified\nline 3\n");
assert!(stats.has_changes());
assert_eq!(stats.lines_added, 1);
assert_eq!(stats.lines_deleted, 1);
let result = apply(&content, &diff);
assert!(result.is_err(), "Applying the same patch twice should fail");
}
#[test]
fn line_end_strategies() {
let old = "old line\r\n";
let new = "new line\r\n";
let patch = "\
--- original
+++ modified
@@ -1 +1 @@
-old line
+new line
";
assert_patch(old, new, patch);
let old = "old line\n";
let new = "new line\n";
let expected = "\
--- original
+++ modified
@@ -1 +1 @@
-old line
+new line
"
.replace("\n", "\r\n");
assert_patch(old, new, expected.as_str());
}
#[test]
fn test_error_message_format() {
let base = "completely different content\n";
let patch = "\
--- original
+++ modified
@@ -1,3 +1,4 @@
line 1
-line 2
+line 2 modified
+new line
line 3
";
let diff = Diff::from_str(patch).unwrap();
let result = apply(base, &diff);
assert!(result.is_err());
let err = result.unwrap_err();
let err_msg = err.to_string();
insta::assert_snapshot!(err_msg);
}
#[test]
fn test_tectonic_patch_with_fuzz() {
let (base_image, patch) = load_files("tectonic");
let diff = crate::Diff::from_str(&patch).unwrap();
let (result, stats) = crate::apply(&base_image, &diff)
.expect("Patch should succeed with GNU patch-style fuzz");
assert!(stats.has_changes());
assert_eq!(stats.hunks_applied, 1);
assert!(
result.contains("[patch.crates-io]"),
"Patched file should contain [patch.crates-io]"
);
assert!(
result.contains("libz-sys"),
"Patched file should contain libz-sys"
);
let relevant_lines: String = result
.lines()
.skip(97) .take(10)
.collect::<Vec<_>>()
.join("\n");
insta::assert_snapshot!(relevant_lines);
}
}