use log::{debug, info, trace, warn};
#[cfg(feature = "parallel")]
use rayon::prelude::*;
use similar::udiff::unified_diff;
use similar::TextDiff;
use std::fs;
use std::path::{Path, PathBuf};
use thiserror::Error;
#[derive(Error, Debug, PartialEq)]
pub enum ParseError {
#[error("Diff block starting on line {line} was found without a file path header (e.g., '--- a/path/to/file')")]
MissingFileHeader {
line: usize,
},
}
#[derive(Error, Debug, PartialEq)]
#[non_exhaustive]
pub enum SingleParseError {
#[error("Failed to parse diff content")]
Parse(#[from] ParseError),
#[error("No patches were found in the provided diff content")]
NoPatchesFound,
#[error(
"Found patches for multiple files ({0} patches), but this function only supports single-file diffs"
)]
MultiplePatchesFound(usize),
}
#[derive(Error, Debug)]
pub enum PatchError {
#[error("Path '{0}' resolves outside the target directory. Aborting for security.")]
PathTraversal(PathBuf),
#[error("Target file not found for patching: {0}")]
TargetNotFound(PathBuf),
#[error("Permission denied for path: {path:?}")]
PermissionDenied {
path: PathBuf,
},
#[error("Target path is a directory, not a file: {path:?}")]
TargetIsDirectory {
path: PathBuf,
},
#[error("I/O error while processing {path:?}: {source}")]
Io {
path: PathBuf,
#[source]
source: std::io::Error,
},
}
#[derive(Error, Debug)]
#[non_exhaustive]
pub enum StrictApplyError {
#[error(transparent)]
Patch(#[from] PatchError),
#[error("Patch applied partially. See report for details.")]
PartialApply {
report: ApplyResult,
},
}
#[derive(Error, Debug)]
#[non_exhaustive]
pub enum OneShotError {
#[error("Failed to parse diff content")]
Parse(#[from] ParseError),
#[error("Failed to apply patch")]
Apply(#[from] StrictApplyError),
#[error("No patches were found in the provided diff content")]
NoPatchesFound,
#[error(
"Found patches for multiple files ({0} files), but this function only supports single-file diffs"
)]
MultiplePatchesFound(usize),
}
#[derive(Error, Debug, Clone, PartialEq)]
pub enum HunkApplyError {
#[error("Context not found")]
ContextNotFound,
#[error("Ambiguous exact match found at lines: {0:?}")]
AmbiguousExactMatch(Vec<usize>),
#[error("Ambiguous fuzzy match found at locations: {0:?}")]
AmbiguousFuzzyMatch(Vec<(usize, usize)>),
#[error("Best fuzzy match at {location} (score: {best_score:.3}) was below threshold ({threshold:.3})")]
FuzzyMatchBelowThreshold {
best_score: f64,
threshold: f32,
location: HunkLocation,
},
}
#[derive(Debug, Clone, PartialEq)]
pub enum MatchType {
Exact,
ExactIgnoringWhitespace,
Fuzzy {
score: f64,
},
}
#[derive(Debug, Clone, PartialEq)]
pub enum HunkApplyStatus {
Applied {
location: HunkLocation,
match_type: MatchType,
replaced_lines: Vec<String>,
},
SkippedNoChanges,
Failed(HunkApplyError),
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct ApplyOptions {
pub dry_run: bool,
pub fuzz_factor: f32,
}
impl Default for ApplyOptions {
fn default() -> Self {
Self {
dry_run: false,
fuzz_factor: 0.7,
}
}
}
impl ApplyOptions {
pub fn new() -> Self {
Self::default()
}
pub fn dry_run() -> Self {
Self {
dry_run: true,
..Self::default()
}
}
pub fn exact() -> Self {
Self {
fuzz_factor: 0.0,
..Self::default()
}
}
pub fn with_dry_run(mut self, dry_run: bool) -> Self {
self.dry_run = dry_run;
self
}
pub fn with_fuzz_factor(mut self, fuzz_factor: f32) -> Self {
self.fuzz_factor = fuzz_factor;
self
}
pub fn builder() -> ApplyOptionsBuilder {
ApplyOptionsBuilder::default()
}
}
#[derive(Debug, Clone, Copy)]
pub struct ApplyOptionsBuilder {
dry_run: Option<bool>,
fuzz_factor: Option<f32>,
}
impl Default for ApplyOptionsBuilder {
fn default() -> Self {
Self {
dry_run: None,
fuzz_factor: None,
}
}
}
impl ApplyOptionsBuilder {
pub fn dry_run(mut self, dry_run: bool) -> Self {
self.dry_run = Some(dry_run);
self
}
pub fn fuzz_factor(mut self, fuzz_factor: f32) -> Self {
self.fuzz_factor = Some(fuzz_factor);
self
}
pub fn build(self) -> ApplyOptions {
let default = ApplyOptions::default();
ApplyOptions {
dry_run: self.dry_run.unwrap_or(default.dry_run),
fuzz_factor: self.fuzz_factor.unwrap_or(default.fuzz_factor),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct PatchResult {
pub report: ApplyResult,
pub diff: Option<String>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct InMemoryResult {
pub new_content: String,
pub report: ApplyResult,
}
#[derive(Debug, Clone, PartialEq)]
pub struct ApplyResult {
pub hunk_results: Vec<HunkApplyStatus>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct HunkFailure {
pub hunk_index: usize,
pub reason: HunkApplyError,
}
impl ApplyResult {
pub fn all_applied_cleanly(&self) -> bool {
self.hunk_results
.iter()
.all(|r| !matches!(r, HunkApplyStatus::Failed(_)))
}
pub fn failures(&self) -> Vec<HunkFailure> {
self.hunk_results
.iter()
.enumerate()
.filter_map(|(i, status)| {
if let HunkApplyStatus::Failed(reason) = status {
Some(HunkFailure {
hunk_index: i + 1,
reason: reason.clone(),
})
} else {
None
}
})
.collect()
}
}
#[derive(Debug)]
pub struct BatchResult {
pub results: Vec<(PathBuf, Result<PatchResult, PatchError>)>,
}
impl BatchResult {
pub fn all_succeeded(&self) -> bool {
self.results.iter().all(|(_, res)| res.is_ok())
}
pub fn hard_failures(&self) -> Vec<(&PathBuf, &PatchError)> {
self.results
.iter()
.filter_map(|(path, res)| res.as_ref().err().map(|e| (path, e)))
.collect()
}
}
impl ApplyResult {
pub fn has_failures(&self) -> bool {
!self.all_applied_cleanly()
}
pub fn failure_count(&self) -> usize {
self.failures().len()
}
pub fn success_count(&self) -> usize {
self.hunk_results.len() - self.failure_count()
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Hunk {
pub lines: Vec<String>,
pub old_start_line: Option<usize>,
pub new_start_line: Option<usize>,
}
impl Hunk {
pub fn invert(&self) -> Hunk {
let inverted_lines = self
.lines
.iter()
.map(|line| {
if let Some(stripped) = line.strip_prefix('+') {
format!("-{}", stripped)
} else if let Some(stripped) = line.strip_prefix('-') {
format!("+{}", stripped)
} else {
line.clone()
}
})
.collect();
Hunk {
lines: inverted_lines,
old_start_line: self.new_start_line,
new_start_line: self.old_start_line,
}
}
pub fn get_match_block(&self) -> Vec<&str> {
self.lines
.iter()
.filter(|l| !l.starts_with('+'))
.map(|l| &l[1..])
.collect()
}
pub fn get_replace_block(&self) -> Vec<&str> {
self.lines
.iter()
.filter(|l| !l.starts_with('-'))
.map(|l| &l[1..])
.collect()
}
pub fn context_lines(&self) -> Vec<&str> {
self.lines
.iter()
.filter(|l| l.starts_with(' '))
.map(|l| &l[1..])
.collect()
}
pub fn added_lines(&self) -> Vec<&str> {
self.lines
.iter()
.filter(|l| l.starts_with('+'))
.map(|l| &l[1..])
.collect()
}
pub fn removed_lines(&self) -> Vec<&str> {
self.lines
.iter()
.filter(|l| l.starts_with('-'))
.map(|l| &l[1..])
.collect()
}
pub fn has_changes(&self) -> bool {
self.lines.iter().any(|l| l.starts_with(['+', '-']))
}
}
impl std::fmt::Display for Hunk {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let (old_len, new_len) = self.lines.iter().fold((0, 0), |(old, new), line| {
if line.starts_with('+') {
(old, new + 1)
} else if line.starts_with('-') {
(old + 1, new)
} else {
(old + 1, new + 1)
}
});
let old_start = self.old_start_line.unwrap_or(1);
let new_start = self.new_start_line.unwrap_or(1);
writeln!(
f,
"@@ -{},{} +{},{} @@",
old_start, old_len, new_start, new_len
)?;
for line in &self.lines {
writeln!(f, "{}", line)?;
}
Ok(())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct HunkLocation {
pub start_index: usize,
pub length: usize,
}
impl std::fmt::Display for HunkLocation {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "line {}", self.start_index + 1)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Patch {
pub file_path: PathBuf,
pub hunks: Vec<Hunk>,
pub ends_with_newline: bool,
}
impl Patch {
pub fn from_texts(
file_path: impl Into<PathBuf>,
old_text: &str,
new_text: &str,
context_len: usize,
) -> Result<Self, ParseError> {
let path = file_path.into();
let diff = TextDiff::from_lines(old_text, new_text);
let mut hunks = Vec::new();
for group in diff.grouped_ops(context_len) {
let mut lines = Vec::new();
let mut old_start = None;
let mut new_start = None;
if let Some(first_op) = group.first() {
old_start = Some(first_op.old_range().start + 1);
new_start = Some(first_op.new_range().start + 1);
}
for op in group {
match op {
similar::DiffOp::Equal { old_index, len, .. } => {
for i in 0..len {
let line =
diff.old_slices()[old_index + i].trim_end_matches(['\r', '\n']);
lines.push(format!(" {}", line));
}
}
similar::DiffOp::Delete {
old_index, old_len, ..
} => {
for i in 0..old_len {
let line =
diff.old_slices()[old_index + i].trim_end_matches(['\r', '\n']);
lines.push(format!("-{}", line));
}
}
similar::DiffOp::Insert {
new_index, new_len, ..
} => {
for i in 0..new_len {
let line =
diff.new_slices()[new_index + i].trim_end_matches(['\r', '\n']);
lines.push(format!("+{}", line));
}
}
similar::DiffOp::Replace {
old_index,
old_len,
new_index,
new_len,
} => {
for i in 0..old_len {
let line =
diff.old_slices()[old_index + i].trim_end_matches(['\r', '\n']);
lines.push(format!("-{}", line));
}
for i in 0..new_len {
let line =
diff.new_slices()[new_index + i].trim_end_matches(['\r', '\n']);
lines.push(format!("+{}", line));
}
}
}
}
hunks.push(Hunk {
lines,
old_start_line: old_start,
new_start_line: new_start,
});
}
Ok(Patch {
file_path: path,
hunks,
ends_with_newline: new_text.ends_with('\n') || new_text.is_empty(),
})
}
pub fn invert(&self) -> Patch {
Patch {
file_path: self.file_path.clone(),
hunks: self.hunks.iter().map(|h| h.invert()).collect(),
ends_with_newline: true,
}
}
pub fn is_creation(&self) -> bool {
self.hunks
.first()
.is_some_and(|h| h.old_start_line == Some(0) || h.get_match_block().is_empty())
}
pub fn is_deletion(&self) -> bool {
!self.hunks.is_empty()
&& self
.hunks
.iter()
.all(|h| h.new_start_line == Some(0) || h.get_replace_block().is_empty())
}
}
impl std::fmt::Display for Patch {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "--- a/{}", self.file_path.display())?;
writeln!(f, "+++ b/{}", self.file_path.display())?;
for hunk in &self.hunks {
write!(f, "{}", hunk)?;
}
if !self.ends_with_newline && !self.hunks.is_empty() {
write!(f, "\\ No newline at end of file")?;
}
Ok(())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[non_exhaustive]
pub enum PatchFormat {
Unified,
Markdown,
Conflict,
Unknown,
}
pub fn detect_patch(content: &str) -> PatchFormat {
let mut lines = content.lines().peekable();
let mut in_code_block = false;
let mut current_fence_len = 0;
let mut has_unified_headers = false;
let mut has_conflict_start = false;
let mut has_conflict_middle_or_end = false;
let mut has_conflict_markers = false;
while let Some(line) = lines.next() {
let trimmed = line.trim_start();
if trimmed.starts_with("```") {
let fence_len = trimmed.chars().take_while(|&c| c == '`').count();
if fence_len >= 3 {
if !in_code_block {
in_code_block = true;
current_fence_len = fence_len;
let info = &trimmed[fence_len..];
if info.contains("diff") || info.contains("patch") {
return PatchFormat::Markdown;
}
} else if fence_len >= current_fence_len {
in_code_block = false;
current_fence_len = 0;
}
continue;
}
}
let is_diff_git = line.starts_with("diff --git");
let is_unified_header =
line.starts_with("--- ") && lines.peek().is_some_and(|l| l.starts_with("+++ "));
let is_hunk_header = line.starts_with("@@ -") && line.contains(" @@");
if is_diff_git || is_unified_header || is_hunk_header {
if in_code_block {
return PatchFormat::Markdown;
}
has_unified_headers = true;
}
let trimmed = line.trim_start();
if trimmed.starts_with("<<<<") {
has_conflict_start = true;
} else if (trimmed.starts_with("====") || trimmed.starts_with(">>>>")) && has_conflict_start
{
has_conflict_middle_or_end = true;
}
if has_conflict_start && has_conflict_middle_or_end {
if in_code_block {
return PatchFormat::Markdown;
}
has_conflict_markers = true;
}
}
if has_unified_headers {
PatchFormat::Unified
} else if has_conflict_markers {
PatchFormat::Conflict
} else {
PatchFormat::Unknown
}
}
pub fn parse_auto(content: &str) -> Result<Vec<Patch>, ParseError> {
match detect_patch(content) {
PatchFormat::Markdown => parse_diffs(content),
PatchFormat::Unified => parse_patches(content),
PatchFormat::Conflict => Ok(parse_conflict_markers(content)),
PatchFormat::Unknown => {
let patches = parse_patches(content)?;
if !patches.is_empty() {
Ok(patches)
} else {
Ok(Vec::new())
}
}
}
}
pub fn parse_diffs(content: &str) -> Result<Vec<Patch>, ParseError> {
debug!("Starting to parse diffs from content (Markdown mode).");
let mut all_patches = Vec::new();
let mut lines = content.lines().enumerate().peekable();
while let Some((line_index, line_text)) = lines.by_ref().find(|(_, line)| {
let trimmed = line.trim_start();
trimmed.starts_with("```") && trimmed.chars().take_while(|&c| c == '`').count() >= 3
}) {
let trimmed = line_text.trim_start();
let fence_len = trimmed.chars().take_while(|&c| c == '`').count();
let opening_indent = line_text.len() - trimmed.len();
trace!(
"Found potential diff block start on line {}: '{}'",
line_index,
line_text
);
let diff_block_start_line = line_index + 1;
let mut block_lines = Vec::new();
while let Some((_, line)) = lines.peek() {
let inner_trimmed = line.trim_start();
let current_indent = line.len() - inner_trimmed.len();
if inner_trimmed.starts_with("```")
&& inner_trimmed.chars().take_while(|&c| c == '`').count() >= fence_len
&& current_indent <= opening_indent
{
lines.next(); break;
}
let (_, line) = lines.next().unwrap();
block_lines.push(line);
}
if has_patch_signature_at_level_1(&block_lines) {
debug!(
"Parsing diff block starting on line {}.",
diff_block_start_line
);
let block_patches = parse_generic_block_lines(block_lines, diff_block_start_line)?;
all_patches.extend(block_patches);
} else {
trace!(
"Skipping code block starting on line {} (no patch markers found).",
diff_block_start_line
);
}
}
debug!(
"Finished parsing. Found {} patch(es) in total.",
all_patches.len()
);
Ok(all_patches)
}
fn has_patch_signature_at_level_1<S: AsRef<str>>(lines: &[S]) -> bool {
let mut in_nested_block = false;
let mut current_fence_len = 0;
for line in lines {
let line = line.as_ref();
let trimmed = line.trim_start();
if trimmed.starts_with("```") {
let fence_len = trimmed.chars().take_while(|&c| c == '`').count();
if fence_len >= 3 {
if !in_nested_block {
in_nested_block = true;
current_fence_len = fence_len;
continue;
} else if fence_len >= current_fence_len {
in_nested_block = false;
current_fence_len = 0;
continue;
}
}
}
if !in_nested_block
&& (line.starts_with("--- ")
|| line.starts_with("diff --git")
|| trimmed.starts_with("<<<<")
|| trimmed.starts_with("====")
|| trimmed.starts_with(">>>>"))
{
return true;
}
}
false
}
fn parse_generic_block_lines(
lines: Vec<&str>,
start_line: usize,
) -> Result<Vec<Patch>, ParseError> {
let standard_result = parse_patches_from_lines(lines.clone().into_iter());
match standard_result {
Ok(patches) => {
if !patches.is_empty() {
Ok(patches)
} else {
Ok(parse_conflict_markers_from_lines(lines.into_iter()))
}
}
Err(e) => {
let conflict_patches = parse_conflict_markers_from_lines(lines.into_iter());
if !conflict_patches.is_empty() {
Ok(conflict_patches)
} else {
match e {
ParseError::MissingFileHeader { .. } => {
Err(ParseError::MissingFileHeader { line: start_line })
}
}
}
}
}
}
pub fn parse_single_patch(content: &str) -> Result<Patch, SingleParseError> {
let mut patches = parse_auto(content)?;
if patches.len() > 1 {
Err(SingleParseError::MultiplePatchesFound(patches.len()))
} else if patches.is_empty() {
Err(SingleParseError::NoPatchesFound)
} else {
Ok(patches.remove(0))
}
}
pub fn parse_patches(content: &str) -> Result<Vec<Patch>, ParseError> {
debug!("Starting to parse raw diff content.");
parse_patches_from_lines(content.lines())
}
pub fn parse_conflict_markers(content: &str) -> Vec<Patch> {
debug!("Starting to parse conflict marker content.");
parse_conflict_markers_from_lines(content.lines())
}
pub fn parse_patches_from_lines<'a, I>(lines: I) -> Result<Vec<Patch>, ParseError>
where
I: Iterator<Item = &'a str>,
{
let mut unmerged_patches: Vec<Patch> = Vec::new();
const HUNK_BUFFER_CAPACITY: usize = 32;
let mut first_hunk_header_line: Option<usize> = None;
let mut current_file: Option<PathBuf> = None;
let mut current_hunks: Vec<Hunk> = Vec::new();
let mut current_hunk_lines: Vec<String> = Vec::with_capacity(HUNK_BUFFER_CAPACITY);
let mut current_hunk_old_start_line: Option<usize> = None;
let mut current_hunk_new_start_line: Option<usize> = None;
let mut ends_with_newline_for_section = true;
macro_rules! finalize_hunk {
() => {
if current_hunk_old_start_line.is_some() {
trace!(
" Finalizing previous hunk with {} lines.",
current_hunk_lines.len()
);
while let Some(last) = current_hunk_lines.last() {
if last.trim().is_empty() {
current_hunk_lines.pop();
} else {
break;
}
}
current_hunks.push(Hunk {
lines: std::mem::replace(
&mut current_hunk_lines,
Vec::with_capacity(HUNK_BUFFER_CAPACITY),
),
old_start_line: current_hunk_old_start_line,
new_start_line: current_hunk_new_start_line,
});
}
};
}
for (line_idx, line) in lines.enumerate() {
if let Some(stripped_line) = line.strip_prefix("--- ") {
trace!(" Found file header line: '{}'", line);
if let Some(existing_file) = ¤t_file {
finalize_hunk!();
if !current_hunks.is_empty() {
debug!(
" Finalizing patch section for '{}' with {} hunk(s).",
existing_file.display(),
current_hunks.len()
);
unmerged_patches.push(Patch {
file_path: existing_file.clone(),
hunks: std::mem::take(&mut current_hunks),
ends_with_newline: ends_with_newline_for_section,
});
}
}
trace!(" Resetting parser state for new file section.");
current_file = None;
current_hunk_lines.clear();
current_hunk_old_start_line = None;
current_hunk_new_start_line = None;
ends_with_newline_for_section = true;
let path_part = stripped_line.trim();
if path_part == "/dev/null" || path_part == "a/dev/null" {
trace!(" Path is /dev/null, indicating file creation.");
} else {
let path_str = path_part.strip_prefix("a/").unwrap_or(path_part);
debug!(" Starting new patch section for file: '{}'", path_str);
current_file = Some(PathBuf::from(path_str.trim()));
}
} else if let Some(stripped_line) = line.strip_prefix("+++ ") {
trace!(" Found '+++' line: '{}'", line);
if current_file.is_none() {
let path_part = stripped_line.trim();
let path_str = path_part.strip_prefix("b/").unwrap_or(path_part);
debug!(" Set file path from '+++' line: '{}'", path_str);
current_file = Some(PathBuf::from(path_str.trim()));
}
} else if line.starts_with("@@") {
trace!(" Found hunk header: '{}'", line);
finalize_hunk!();
if first_hunk_header_line.is_none() {
first_hunk_header_line = Some(line_idx + 1);
}
let (old, new) = parse_hunk_header(line);
trace!(" Parsed old_start={:?}, new_start={:?}", old, new);
current_hunk_old_start_line = old;
current_hunk_new_start_line = new;
} else if line.starts_with(['+', '-', ' ']) {
if current_hunk_old_start_line.is_some() {
trace!(" Adding line to current hunk: '{}'", line.trim_end());
current_hunk_lines.push(line.to_string());
}
} else if line.starts_with('\\') {
if current_hunk_old_start_line.is_some() {
trace!(" Found '\\ No newline at end of file' marker.");
if let Some(last_line) = current_hunk_lines.last() {
if last_line.starts_with('+') || last_line.starts_with(' ') {
ends_with_newline_for_section = false;
}
}
}
} else if is_git_header_line(line) {
trace!(" Ignoring Git header line: '{}'", line.trim_end());
} else if current_hunk_old_start_line.is_some() {
trace!(
" Adding unrecognized line as context to current hunk: '{}'",
line.trim_end()
);
current_hunk_lines.push(format!(" {}", line));
}
}
debug!(" End of diff block. Finalizing last hunk and patch section.");
finalize_hunk!();
if let Some(file_path) = current_file {
if !current_hunks.is_empty() {
debug!(
" Finalizing patch section for '{}' with {} hunk(s).",
file_path.display(),
current_hunks.len()
);
unmerged_patches.push(Patch {
file_path,
hunks: current_hunks,
ends_with_newline: ends_with_newline_for_section,
});
}
} else if !current_hunks.is_empty() {
let error_line = first_hunk_header_line.unwrap_or(1);
warn!(
"Found hunks starting near line {} but no file path header ('--- a/path').",
error_line
);
return Err(ParseError::MissingFileHeader { line: error_line });
}
if unmerged_patches.is_empty() {
return Ok(vec![]);
}
debug!(
"Merging {} patch section(s) found in the block.",
unmerged_patches.len()
);
let mut merged_patches: Vec<Patch> = Vec::new();
for patch_section in unmerged_patches {
if let Some(existing_patch) = merged_patches
.iter_mut()
.find(|p| p.file_path == patch_section.file_path)
{
debug!(
" Merging {} hunk(s) for '{}' into existing patch.",
patch_section.hunks.len(),
patch_section.file_path.display()
);
existing_patch.hunks.extend(patch_section.hunks);
existing_patch.ends_with_newline = patch_section.ends_with_newline;
} else {
debug!(
" Adding new patch for '{}'.",
patch_section.file_path.display()
);
merged_patches.push(patch_section);
}
}
Ok(merged_patches)
}
fn is_git_header_line(line: &str) -> bool {
line.starts_with("diff --git")
|| line.starts_with("index ")
|| line.starts_with("old mode ")
|| line.starts_with("new mode ")
|| line.starts_with("new file mode ")
|| line.starts_with("deleted file mode ")
|| line.starts_with("similarity index ")
|| line.starts_with("copy from ")
|| line.starts_with("copy to ")
|| line.starts_with("rename from ")
|| line.starts_with("rename to ")
}
fn parse_conflict_markers_from_lines<'a, I>(lines: I) -> Vec<Patch>
where
I: Iterator<Item = &'a str>,
{
let mut hunk_lines = Vec::new();
let mut has_start = false;
let mut has_middle_or_end = false;
enum State {
Context,
Old,
New,
}
let mut state = State::Context;
for line in lines {
if line.trim_start().starts_with("<<<<") {
state = State::Old;
has_start = true;
continue;
} else if line.trim_start().starts_with("====") {
state = State::New;
if has_start {
has_middle_or_end = true;
}
continue;
} else if line.trim_start().starts_with(">>>>") {
state = State::Context;
if has_start {
has_middle_or_end = true;
}
continue;
}
match state {
State::Context => hunk_lines.push(format!(" {}", line)),
State::Old => hunk_lines.push(format!("-{}", line)),
State::New => hunk_lines.push(format!("+{}", line)),
}
}
if !(has_start && has_middle_or_end) {
return Vec::new();
}
let hunk = Hunk {
lines: hunk_lines,
old_start_line: None,
new_start_line: None,
};
vec![Patch {
file_path: PathBuf::from("patch_target"),
hunks: vec![hunk],
ends_with_newline: true, }]
}
fn map_io_error(path: PathBuf, e: std::io::Error) -> PatchError {
match e.kind() {
std::io::ErrorKind::PermissionDenied => PatchError::PermissionDenied { path },
std::io::ErrorKind::IsADirectory => PatchError::TargetIsDirectory { path },
_ => PatchError::Io { path, source: e },
}
}
pub fn ensure_path_is_safe(base_dir: &Path, relative_path: &Path) -> Result<PathBuf, PatchError> {
trace!(
" Checking path safety for base '{}' and relative path '{}'",
base_dir.display(),
relative_path.display()
);
let base_path =
fs::canonicalize(base_dir).map_err(|e| map_io_error(base_dir.to_path_buf(), e))?;
let target_file_path = base_dir.join(relative_path);
let parent = target_file_path.parent().unwrap_or(Path::new(""));
fs::create_dir_all(parent).map_err(|e| map_io_error(parent.to_path_buf(), e))?;
let final_path = fs::canonicalize(parent)
.map_err(|e| map_io_error(parent.to_path_buf(), e))?
.join(target_file_path.file_name().unwrap_or_default());
if !final_path.starts_with(&base_path) {
return Err(PatchError::PathTraversal(relative_path.to_path_buf()));
}
Ok(final_path)
}
pub fn apply_patches_to_dir(
patches: &[Patch],
target_dir: &Path,
options: ApplyOptions,
) -> BatchResult {
let results = patches
.iter()
.map(|patch| {
let result = apply_patch_to_file(patch, target_dir, options);
(patch.file_path.clone(), result)
})
.collect();
BatchResult { results }
}
pub fn invert_patches(patches: &[Patch]) -> Vec<Patch> {
patches.iter().map(|p| p.invert()).collect()
}
pub fn apply_patch_to_file(
patch: &Patch,
target_dir: &Path,
options: ApplyOptions,
) -> Result<PatchResult, PatchError> {
info!("Applying patch to: {}", patch.file_path.display());
let safe_target_path = ensure_path_is_safe(target_dir, &patch.file_path)?;
trace!(
" Path is safe. Absolute target path: '{}'",
safe_target_path.display()
);
if safe_target_path.is_dir() {
return Err(PatchError::TargetIsDirectory {
path: safe_target_path,
});
}
let (original_content, is_new_file) = if safe_target_path.is_file() {
trace!(" Reading target file '{}'", patch.file_path.display());
let content = fs::read_to_string(&safe_target_path)
.map_err(|e| map_io_error(safe_target_path.clone(), e))?;
trace!(" Read {} bytes from target file.", content.len());
(content, false)
} else {
if !patch.is_creation() {
return Err(PatchError::TargetNotFound(
target_dir.join(&patch.file_path),
));
}
info!(" Target file does not exist. Assuming file creation.");
(String::new(), true)
};
trace!(
" Read {} lines from target file.",
original_content.lines().count()
);
trace!(" Calling apply_patch_to_content...");
let result = apply_patch_to_content(
patch,
if is_new_file {
None
} else {
Some(&original_content)
},
&options,
);
let new_content = result.new_content;
let apply_result = result.report;
let mut diff = None;
if options.dry_run {
info!(
" DRY RUN: Would write changes to '{}'",
patch.file_path.display()
);
trace!(" Generating diff for dry run...");
let diff_text = unified_diff(
similar::Algorithm::default(),
&original_content,
&new_content,
3,
Some(("a", "b")),
);
diff = Some(diff_text.to_string());
} else {
if new_content.is_empty() {
if safe_target_path.exists() {
info!(
" Resulting content is empty. Removing file '{}'",
patch.file_path.display()
);
fs::remove_file(&safe_target_path)
.map_err(|e| map_io_error(safe_target_path.clone(), e))?;
} else {
info!(
" Resulting content is empty. Skipping creation of '{}'",
patch.file_path.display()
);
}
if apply_result.all_applied_cleanly() {
info!(
" Successfully processed deletion/empty-result for '{}'",
patch.file_path.display()
);
} else {
warn!(
" Partial application resulted in empty content for '{}'",
patch.file_path.display()
);
}
} else {
if let Some(parent) = safe_target_path.parent() {
fs::create_dir_all(parent).map_err(|e| map_io_error(parent.to_path_buf(), e))?;
}
trace!(
" Writing {} bytes to '{}'",
new_content.len(),
safe_target_path.display()
);
fs::write(&safe_target_path, new_content)
.map_err(|e| map_io_error(safe_target_path.clone(), e))?;
if apply_result.all_applied_cleanly() {
info!(
" Successfully wrote changes to '{}'",
patch.file_path.display()
);
} else {
warn!(" Wrote partial changes to '{}'", patch.file_path.display());
}
}
}
Ok(PatchResult {
report: apply_result,
diff,
})
}
pub fn try_apply_patch_to_file(
patch: &Patch,
target_dir: &Path,
options: ApplyOptions,
) -> Result<PatchResult, StrictApplyError> {
let result = apply_patch_to_file(patch, target_dir, options)?;
if result.report.all_applied_cleanly() {
Ok(result)
} else {
Err(StrictApplyError::PartialApply {
report: result.report,
})
}
}
#[derive(Debug)]
pub struct HunkApplier<'a> {
hunks: std::slice::Iter<'a, Hunk>,
current_lines: Vec<String>,
options: &'a ApplyOptions,
patch_ends_with_newline: bool,
original_ends_with_newline: bool,
last_hunk_end_index: usize,
}
impl<'a> HunkApplier<'a> {
pub fn new<T: AsRef<str>>(
patch: &'a Patch,
original_lines: Option<&'a [T]>,
options: &'a ApplyOptions,
) -> Self {
let current_lines: Vec<String> = original_lines
.map(|lines| lines.iter().map(|s| s.as_ref().to_string()).collect())
.unwrap_or_default();
Self {
hunks: patch.hunks.iter(),
current_lines,
options,
patch_ends_with_newline: patch.ends_with_newline,
original_ends_with_newline: true,
last_hunk_end_index: 0,
}
}
pub fn current_lines(&self) -> &[String] {
&self.current_lines
}
pub fn set_original_newline_status(&mut self, ends_with_newline: bool) {
self.original_ends_with_newline = ends_with_newline;
}
pub fn into_lines(self) -> Vec<String> {
self.current_lines
}
pub fn into_content(self) -> String {
let mut new_content = self.current_lines.join("\n");
let touched_eof = self.last_hunk_end_index >= self.current_lines.len();
let should_have_newline = if touched_eof {
self.patch_ends_with_newline
} else {
self.original_ends_with_newline
};
if should_have_newline && !self.current_lines.is_empty() {
new_content.push('\n');
}
new_content
}
}
impl<'a> Iterator for HunkApplier<'a> {
type Item = HunkApplyStatus;
fn next(&mut self) -> Option<Self::Item> {
let hunk = self.hunks.next()?;
let old_len = self.current_lines.len();
let status = apply_hunk_to_lines(hunk, &mut self.current_lines, self.options);
if let HunkApplyStatus::Applied { location, .. } = &status {
let new_len = self.current_lines.len();
let delta = (new_len as isize) - (old_len as isize);
let inserted_len = (location.length as isize + delta) as usize;
self.last_hunk_end_index = location.start_index + inserted_len;
}
Some(status)
}
}
pub fn apply_patch_to_lines<T: AsRef<str>>(
patch: &Patch,
original_lines: Option<&[T]>,
options: &ApplyOptions,
) -> InMemoryResult {
apply_patch_to_lines_internal(patch, original_lines, options, true)
}
fn apply_patch_to_lines_internal<T: AsRef<str>>(
patch: &Patch,
original_lines: Option<&[T]>,
options: &ApplyOptions,
original_ends_with_newline: bool,
) -> InMemoryResult {
trace!(
" apply_patch_to_lines called with {} lines of original content.",
original_lines.map_or(0, |l| l.len())
);
let mut applier = HunkApplier::new(patch, original_lines, options);
applier.set_original_newline_status(original_ends_with_newline);
let total_hunks = patch.hunks.len();
let hunk_results: Vec<_> = applier
.by_ref()
.enumerate()
.map(|(i, status)| {
let hunk_index = i + 1;
info!(" Applying Hunk {}/{}...", hunk_index, total_hunks);
match &status {
HunkApplyStatus::Applied {
location,
match_type,
replaced_lines,
} => {
debug!(
" Successfully applied Hunk {} at {} via {:?}",
hunk_index, location, match_type
);
trace!(" Replaced lines:");
for line in replaced_lines {
trace!(" - {}", line);
}
}
HunkApplyStatus::SkippedNoChanges => {
debug!(" Skipped Hunk {} (no changes).", hunk_index);
}
HunkApplyStatus::Failed(error) => {
warn!(" Failed to apply Hunk {}. {}", hunk_index, error);
}
}
status
})
.collect();
let new_content = applier.into_content();
let report = ApplyResult { hunk_results };
InMemoryResult {
new_content,
report,
}
}
pub fn try_apply_patch_to_lines<T: AsRef<str>>(
patch: &Patch,
original_lines: Option<&[T]>,
options: &ApplyOptions,
) -> Result<InMemoryResult, StrictApplyError> {
let result = apply_patch_to_lines(patch, original_lines, options);
if result.report.all_applied_cleanly() {
Ok(result)
} else {
Err(StrictApplyError::PartialApply {
report: result.report,
})
}
}
pub fn apply_patch_to_content(
patch: &Patch,
original_content: Option<&str>,
options: &ApplyOptions,
) -> InMemoryResult {
let original_lines: Option<Vec<String>> =
original_content.map(|c| c.lines().map(String::from).collect());
let original_ends_with_newline = original_content.is_none_or(|s| {
if s.is_empty() {
false
} else {
s.ends_with('\n')
}
});
apply_patch_to_lines_internal(
patch,
original_lines.as_deref(),
options,
original_ends_with_newline,
)
}
pub fn try_apply_patch_to_content(
patch: &Patch,
original_content: Option<&str>,
options: &ApplyOptions,
) -> Result<InMemoryResult, StrictApplyError> {
let result = apply_patch_to_content(patch, original_content, options);
if result.report.all_applied_cleanly() {
Ok(result)
} else {
Err(StrictApplyError::PartialApply {
report: result.report,
})
}
}
pub fn patch_content_str(
diff_content: &str,
original_content: Option<&str>,
options: &ApplyOptions,
) -> Result<String, OneShotError> {
let mut patches = parse_auto(diff_content)?;
if patches.is_empty() {
return Err(OneShotError::NoPatchesFound);
}
if patches.len() > 1 {
return Err(OneShotError::MultiplePatchesFound(patches.len()));
}
let patch = patches.remove(0);
let result = try_apply_patch_to_content(&patch, original_content, options)?;
Ok(result.new_content)
}
fn adjust_indentation(line: &str, hunk_indent: &str, target_indent: &str) -> String {
if line.trim().is_empty() {
return String::new();
}
if hunk_indent == target_indent {
return line.to_string();
}
let line_indent = get_indent(line);
if !hunk_indent.is_empty() && !target_indent.is_empty() {
let hunk_is_spaces = hunk_indent.chars().all(|c| c == ' ');
let target_is_tabs = target_indent.chars().all(|c| c == '\t');
if hunk_is_spaces && target_is_tabs {
let spaces_per_tab = if hunk_indent.len().is_multiple_of(target_indent.len())
&& hunk_indent.len() / target_indent.len() <= 4
{
hunk_indent.len() / target_indent.len()
} else {
4
};
if line_indent.chars().all(|c| c == ' ') {
let hunk_tabs = hunk_indent.len() / spaces_per_tab;
let line_tabs = line_indent.len() / spaces_per_tab;
let line_spaces = line_indent.len() % spaces_per_tab;
let target_tabs = target_indent.len();
if line_tabs >= hunk_tabs {
let new_tabs = target_tabs + (line_tabs - hunk_tabs);
let new_indent =
format!("{}{}", "\t".repeat(new_tabs), " ".repeat(line_spaces));
return format!("{}{}", new_indent, &line[line_indent.len()..]);
} else {
let outdent = hunk_tabs - line_tabs;
let new_tabs = target_tabs.saturating_sub(outdent);
let new_indent =
format!("{}{}", "\t".repeat(new_tabs), " ".repeat(line_spaces));
return format!("{}{}", new_indent, &line[line_indent.len()..]);
}
}
}
let hunk_is_tabs = hunk_indent.chars().all(|c| c == '\t');
let target_is_spaces = target_indent.chars().all(|c| c == ' ');
if hunk_is_tabs && target_is_spaces {
let spaces_per_tab = if target_indent.len().is_multiple_of(hunk_indent.len())
&& target_indent.len() / hunk_indent.len() <= 4
{
target_indent.len() / hunk_indent.len()
} else {
4
};
if line_indent.chars().all(|c| c == '\t') {
let hunk_tabs = hunk_indent.len();
let line_tabs = line_indent.len();
let target_spaces = target_indent.len();
if line_tabs >= hunk_tabs {
let new_spaces = target_spaces + (line_tabs - hunk_tabs) * spaces_per_tab;
let new_indent = " ".repeat(new_spaces);
return format!("{}{}", new_indent, &line[line_indent.len()..]);
} else {
let outdent_spaces = (hunk_tabs - line_tabs) * spaces_per_tab;
let new_spaces = target_spaces.saturating_sub(outdent_spaces);
let new_indent = " ".repeat(new_spaces);
return format!("{}{}", new_indent, &line[line_indent.len()..]);
}
}
}
}
if let Some(stripped) = line.strip_prefix(hunk_indent) {
return format!("{}{}", target_indent, stripped);
}
if let Some(diff) = hunk_indent.strip_prefix(target_indent) {
let mut new_indent = line_indent.to_string();
for c in diff.chars().rev() {
if new_indent.ends_with(c) {
new_indent.pop();
} else {
break;
}
}
format!("{}{}", new_indent, &line[line_indent.len()..])
} else if let Some(diff) = target_indent.strip_prefix(hunk_indent) {
format!("{}{}", diff, line)
} else {
line.to_string()
}
}
fn get_indent(line: &str) -> &str {
&line[..line.len() - line.trim_start().len()]
}
pub fn apply_hunk_to_lines(
hunk: &Hunk,
target_lines: &mut Vec<String>,
options: &ApplyOptions,
) -> HunkApplyStatus {
trace!("Applying hunk with {} lines.", hunk.lines.len());
trace!(" Match block: {:?}", hunk.get_match_block());
trace!(" Replace block: {:?}", hunk.get_replace_block());
if !hunk.has_changes() {
trace!(" Hunk has no changes, skipping.");
return HunkApplyStatus::SkippedNoChanges;
}
match find_hunk_location_in_lines(hunk, target_lines, options) {
Ok((location, match_type)) => {
trace!(
" Found location {:?} with match type {:?}. Applying changes.",
location,
match_type
);
let final_replace_block: Vec<String> = if matches!(match_type, MatchType::Exact) {
trace!(" Applying hunk via exact logic.");
hunk.get_replace_block()
.iter()
.map(|s| {
if s.trim().is_empty() {
String::new()
} else {
s.to_string()
}
})
.collect()
} else {
debug!(" Applying hunk via robust reconstruction logic (preserving file context & adjusting indent).");
trace!(
" Fuzzy match location: start={}, len={}",
location.start_index,
location.length
);
let file_matched_lines: Vec<_> = target_lines
[location.start_index..location.start_index + location.length]
.to_vec();
trace!(
" File content in matched range: {:?}",
file_matched_lines
);
let mut match_lines_meta: Vec<(bool, Vec<String>)> = Vec::new();
let mut initial_additions: Vec<String> = Vec::new();
let mut line_iter = hunk.lines.iter().peekable();
while let Some(line) = line_iter.peek() {
if let Some(stripped) = line.strip_prefix('+') {
initial_additions.push(stripped.to_string());
line_iter.next();
} else {
break;
}
}
for line in line_iter {
if let Some(stripped) = line.strip_prefix('+') {
if let Some(last) = match_lines_meta.last_mut() {
last.1.push(stripped.to_string());
} else {
initial_additions.push(stripped.to_string());
}
} else {
let is_removal = line.starts_with('-');
match_lines_meta.push((is_removal, Vec::new()));
}
}
let match_block_content: Vec<&str> = hunk.get_match_block();
let file_block_content: Vec<&str> =
file_matched_lines.iter().map(|s| s.as_str()).collect();
let match_block_trimmed: Vec<&str> =
match_block_content.iter().map(|s| s.trim()).collect();
let file_block_trimmed: Vec<&str> =
file_block_content.iter().map(|s| s.trim()).collect();
let diff =
similar::TextDiff::from_slices(&match_block_trimmed, &file_block_trimmed);
let mut current_hunk_indent = "";
let mut current_target_indent = "";
for op in diff.ops() {
let mut found = false;
match op {
similar::DiffOp::Equal {
old_index,
new_index,
len,
} => {
for i in 0..*len {
let h_line = match_block_content[*old_index + i];
let t_line = file_block_content[*new_index + i];
let h_ind = get_indent(h_line);
let t_ind = get_indent(t_line);
if !h_ind.is_empty() || !t_ind.is_empty() {
current_hunk_indent = h_ind;
current_target_indent = t_ind;
found = true;
break;
}
}
}
similar::DiffOp::Replace {
old_index,
new_index,
old_len,
new_len,
} => {
let min_len = std::cmp::min(*old_len, *new_len);
for i in 0..min_len {
let h_line = match_block_content[*old_index + i];
let t_line = file_block_content[*new_index + i];
let h_ind = get_indent(h_line);
let t_ind = get_indent(t_line);
if !h_ind.is_empty() || !t_ind.is_empty() {
current_hunk_indent = h_ind;
current_target_indent = t_ind;
found = true;
break;
}
}
}
_ => {}
}
if found {
break;
}
}
let mut final_lines = Vec::new();
for line in initial_additions {
final_lines.push(adjust_indentation(
&line,
current_hunk_indent,
current_target_indent,
));
}
let is_at_eof = (location.start_index + location.length) == target_lines.len();
let ops = diff.ops().to_vec();
for (op_idx, op) in ops.iter().enumerate() {
match op {
similar::DiffOp::Equal {
old_index,
new_index,
len,
} => {
for i in 0..*len {
let old_idx = old_index + i;
let new_idx = new_index + i;
let (is_removal, additions) = &match_lines_meta[old_idx];
let h_line = match_block_content[old_idx];
let t_line = &file_matched_lines[new_idx];
let h_ind = get_indent(h_line);
let t_ind = get_indent(t_line);
if !h_ind.is_empty() || !t_ind.is_empty() {
current_hunk_indent = h_ind;
current_target_indent = t_ind;
}
if !*is_removal {
final_lines.push(file_matched_lines[new_idx].clone());
}
for add in additions {
final_lines.push(adjust_indentation(
add,
current_hunk_indent,
current_target_indent,
));
}
}
}
similar::DiffOp::Delete {
old_index, old_len, ..
} => {
let is_last_op = op_idx == ops.len() - 1;
for i in 0..*old_len {
let old_idx = old_index + i;
let (is_removal, additions) = &match_lines_meta[old_idx];
if !*is_removal && is_at_eof && is_last_op {
let line = match_block_content[old_idx];
final_lines.push(adjust_indentation(
line,
current_hunk_indent,
current_target_indent,
));
}
for add in additions {
final_lines.push(adjust_indentation(
add,
current_hunk_indent,
current_target_indent,
));
}
}
}
similar::DiffOp::Insert {
new_index, new_len, ..
} => {
for i in 0..*new_len {
let new_idx = new_index + i;
final_lines.push(file_matched_lines[new_idx].clone());
}
}
similar::DiffOp::Replace {
old_index,
old_len,
new_index,
new_len,
} => {
if *old_len > 0 && *new_len > 0 {
let min_len = std::cmp::min(*old_len, *new_len);
for i in 0..min_len {
let h_line = match_block_content[*old_index + i];
let t_line = &file_matched_lines[*new_index + i];
let h_ind = get_indent(h_line);
let t_ind = get_indent(t_line);
if !h_ind.is_empty() || !t_ind.is_empty() {
current_hunk_indent = h_ind;
current_target_indent = t_ind;
break;
}
}
}
if *old_len == *new_len {
for i in 0..*old_len {
let old_idx = old_index + i;
let new_idx = new_index + i;
let (is_removal, additions) = &match_lines_meta[old_idx];
let h_line = match_block_content[old_idx];
let t_line = &file_matched_lines[new_idx];
let h_ind = get_indent(h_line);
let t_ind = get_indent(t_line);
if !h_ind.is_empty() || !t_ind.is_empty() {
current_hunk_indent = h_ind;
current_target_indent = t_ind;
}
if !*is_removal {
final_lines.push(file_matched_lines[new_idx].clone());
}
for add in additions {
final_lines.push(adjust_indentation(
add,
current_hunk_indent,
current_target_indent,
));
}
}
} else {
let mut has_context = false;
for i in 0..*old_len {
if !match_lines_meta[old_index + i].0 {
has_context = true;
break;
}
}
if has_context {
for i in 0..*new_len {
final_lines.push(file_matched_lines[new_index + i].clone());
}
}
for i in 0..*old_len {
let (_, additions) = &match_lines_meta[old_index + i];
for add in additions {
final_lines.push(adjust_indentation(
add,
current_hunk_indent,
current_target_indent,
));
}
}
}
}
}
}
final_lines
};
let replaced_lines: Vec<String> = target_lines
.splice(
location.start_index..location.start_index + location.length,
final_replace_block,
)
.collect();
trace!(
" Successfully spliced changes into target lines. Replaced {} lines.",
replaced_lines.len()
);
HunkApplyStatus::Applied {
location,
match_type,
replaced_lines,
}
}
Err(error) => {
HunkApplyStatus::Failed(error)
}
}
}
pub trait HunkFinder {
fn find_location<T: AsRef<str> + Sync>(
&self,
hunk: &Hunk,
target_lines: &[T],
) -> Result<(HunkLocation, MatchType), HunkApplyError>;
}
#[derive(Debug)]
pub struct DefaultHunkFinder<'a> {
options: &'a ApplyOptions,
}
impl<'a> DefaultHunkFinder<'a> {
pub fn new(options: &'a ApplyOptions) -> Self {
Self { options }
}
fn find_search_ranges<T: AsRef<str>>(
match_block: &[&str],
target_lines: &[T],
hunk_size: usize,
) -> Vec<(usize, usize)> {
const MAX_ANCHOR_OCCURRENCES: usize = 5;
const MIN_ANCHOR_LEN: usize = 5;
const SEARCH_RADIUS_FACTOR: usize = 2;
const MIN_SEARCH_RADIUS: usize = 15;
if hunk_size == 0 {
return vec![(0, target_lines.len())];
}
let mid = hunk_size / 2;
for i in 0..=mid {
let indices_to_check = [Some(mid + i), if i > 0 { Some(mid - i) } else { None }];
for &line_idx_opt in &indices_to_check {
if let Some(line_idx) = line_idx_opt {
if line_idx >= hunk_size {
continue;
}
let anchor_line = match_block[line_idx].trim();
if anchor_line.len() < MIN_ANCHOR_LEN {
continue;
}
let occurrences: Vec<_> = target_lines
.iter()
.enumerate()
.filter(|(_, l)| l.as_ref().trim() == anchor_line)
.map(|(i, _)| i)
.collect();
if !occurrences.is_empty() && occurrences.len() <= MAX_ANCHOR_OCCURRENCES {
trace!(
" Found good anchor line (hunk line {}) with {} occurrences: '{}'",
line_idx + 1,
occurrences.len(),
anchor_line
);
let mut ranges = Vec::new();
let search_radius =
(hunk_size * SEARCH_RADIUS_FACTOR).max(MIN_SEARCH_RADIUS);
for &occurrence_idx in &occurrences {
let estimated_start = occurrence_idx.saturating_sub(line_idx);
let start = estimated_start.saturating_sub(search_radius);
let end = (estimated_start + hunk_size + search_radius)
.min(target_lines.len());
ranges.push((start, end));
}
return Self::merge_ranges(ranges);
}
}
}
}
debug!(" No suitable anchor line found. Falling back to full file scan.");
vec![(0, target_lines.len())]
}
fn merge_ranges(mut ranges: Vec<(usize, usize)>) -> Vec<(usize, usize)> {
if ranges.is_empty() {
return vec![];
}
ranges.sort_unstable_by_key(|k| k.0);
let mut merged = Vec::with_capacity(ranges.len());
let mut current_range = ranges[0];
for &(start, end) in &ranges[1..] {
if start <= current_range.1 {
current_range.1 = current_range.1.max(end);
} else {
merged.push(current_range);
current_range = (start, end);
}
}
merged.push(current_range);
merged
}
fn find_hunk_location_internal<T: AsRef<str> + Sync>(
&self,
match_block: &[&str],
target_lines: &[T],
old_start_line: Option<usize>,
) -> Result<(HunkLocation, MatchType), HunkApplyError> {
trace!(
" find_hunk_location_internal called for a hunk with {} lines to match against {} target lines.",
match_block.len(),
target_lines.len()
);
if match_block.is_empty() {
trace!(" Match block is empty (file creation).");
return if target_lines.is_empty() {
trace!(" Target is empty, match successful at (0, 0).");
Ok((
HunkLocation {
start_index: 0,
length: 0,
},
MatchType::Exact,
))
} else {
trace!(" Target is not empty, match failed.");
Err(HunkApplyError::ContextNotFound)
};
}
trace!(" Attempting exact match for hunk...");
{
let result = if match_block.len() <= target_lines.len() {
let iter = target_lines
.windows(match_block.len())
.enumerate()
.filter(|(_, window)| {
window
.iter()
.map(|s| s.as_ref())
.eq(match_block.iter().copied())
})
.map(|(i, _)| i);
Self::tie_break_with_line_number(iter, old_start_line, "exact")
} else {
Self::tie_break_with_line_number(std::iter::empty(), old_start_line, "exact")
};
match result {
Ok(Some(index)) => {
debug!(" Found unique exact match at index {}.", index);
return Ok((
HunkLocation {
start_index: index,
length: match_block.len(),
},
MatchType::Exact,
));
}
Ok(None) => {} Err(matches) => return Err(HunkApplyError::AmbiguousExactMatch(matches)),
}
}
let target_trimmed: Vec<String> = target_lines
.iter()
.map(|s| s.as_ref().trim_end().to_string())
.collect();
let target_refs: Vec<&str> = target_trimmed.iter().map(|s| s.as_str()).collect();
trace!(" Attempting exact match (ignoring trailing whitespace)...");
{
let match_stripped: Vec<_> = match_block.iter().map(|s| s.trim_end()).collect();
let result = if match_block.len() <= target_lines.len() {
let iter = target_trimmed
.windows(match_block.len())
.enumerate()
.filter(|(_, window)| {
window
.iter()
.map(|s| s.as_str())
.eq(match_stripped.iter().copied())
})
.map(|(i, _)| i);
Self::tie_break_with_line_number(
iter,
old_start_line,
"exact (ignoring whitespace)",
)
} else {
Self::tie_break_with_line_number(
std::iter::empty(),
old_start_line,
"exact (ignoring whitespace)",
)
};
match result {
Ok(Some(index)) => {
debug!(
" Found unique whitespace-insensitive match at index {}.",
index
);
return Ok((
HunkLocation {
start_index: index,
length: match_block.len(),
},
MatchType::ExactIgnoringWhitespace,
));
}
Ok(None) => {} Err(matches) => return Err(HunkApplyError::AmbiguousExactMatch(matches)),
}
}
if self.options.fuzz_factor > 0.0 && !match_block.is_empty() {
trace!(
" Exact matches failed. Attempting flexible fuzzy match (threshold={:.2})...",
self.options.fuzz_factor
);
trace!(
" Hunk match block ({} lines): {:?}",
match_block.len(),
match_block
);
let match_stripped_lines: Vec<&str> =
match_block.iter().map(|s| s.trim_end()).collect();
let match_content = match_stripped_lines.join("\n");
let match_loose_lines: Vec<&str> = match_block.iter().map(|s| s.trim()).collect();
let match_loose_content = match_loose_lines.join("\n");
let mut best_score = -1.0;
let mut best_ratio_at_best_score = -1.0;
let mut potential_matches = Vec::new();
let len = match_block.len();
let fuzz_distance = (len / 4).clamp(3, 8);
let min_len = len.saturating_sub(fuzz_distance).max(1);
let max_len = len.saturating_add(fuzz_distance);
trace!(
" Searching with window sizes from {} to {} (hunk size: {}, fuzz distance: {})",
min_len,
max_len,
len,
fuzz_distance
);
let search_ranges = Self::find_search_ranges(match_block, &target_trimmed, len);
trace!(" Using search ranges: {:?}", search_ranges);
#[cfg(feature = "parallel")]
let all_scored_windows: Vec<(f64, f64, f64, f64, usize, usize)> = search_ranges
.par_iter()
.flat_map(|&(range_start, range_end)| {
let match_stripped_lines = &match_stripped_lines;
let match_content = &match_content;
let match_loose_lines = &match_loose_lines;
let match_loose_content = &match_loose_content;
let target_slice = &target_refs[range_start..range_end];
(min_len..=max_len)
.into_par_iter()
.filter(move |&window_len| window_len <= target_slice.len())
.flat_map(move |window_len| {
(0..=target_slice.len() - window_len)
.into_par_iter()
.map(move |i| {
let window_stripped_lines = &target_slice[i..i + window_len];
let absolute_index = range_start + i;
let diff_lines = similar::TextDiff::from_slices(
window_stripped_lines,
match_stripped_lines,
);
let ratio_lines = diff_lines.ratio();
let mut capacity = 0;
for line in window_stripped_lines {
capacity += line.len() + 1;
}
let mut window_content = String::with_capacity(capacity);
for (j, line) in window_stripped_lines.iter().enumerate() {
if j > 0 {
window_content.push('\n');
}
window_content.push_str(line);
}
let diff_words = similar::TextDiff::from_words(
&window_content,
match_content,
);
let ratio_words = diff_words.ratio();
let ratio_strict =
0.3 * ratio_lines as f64 + 0.7 * ratio_words as f64;
let window_loose_lines: Vec<&str> =
window_stripped_lines.iter().map(|s| s.trim()).collect();
let diff_loose_lines = similar::TextDiff::from_slices(
&window_loose_lines,
match_loose_lines,
);
let ratio_loose_lines = diff_loose_lines.ratio();
let window_loose_content = window_loose_lines.join("\n");
let diff_loose_words = similar::TextDiff::from_words(
&window_loose_content,
match_loose_content,
);
let ratio_loose_words = diff_loose_words.ratio();
let ratio_loose = 0.3 * ratio_loose_lines as f64
+ 0.7 * ratio_loose_words as f64;
let ratio = ratio_strict.max(ratio_loose);
let score = ratio;
(
score,
ratio,
ratio_lines as f64,
ratio_words as f64,
absolute_index,
window_len,
)
})
})
})
.collect();
#[cfg(not(feature = "parallel"))]
let all_scored_windows: Vec<(f64, f64, f64, f64, usize, usize)> = search_ranges
.iter()
.flat_map(|&(range_start, range_end)| {
let match_stripped_lines = &match_stripped_lines;
let match_content = &match_content;
let match_loose_lines = &match_loose_lines;
let match_loose_content = &match_loose_content;
let target_slice = &target_refs[range_start..range_end];
(min_len..=max_len)
.filter(move |&window_len| window_len <= target_slice.len())
.flat_map(move |window_len| {
(0..=target_slice.len() - window_len).map(move |i| {
let window_stripped_lines = &target_slice[i..i + window_len];
let absolute_index = range_start + i;
let diff_lines = similar::TextDiff::from_slices(
window_stripped_lines,
match_stripped_lines,
);
let ratio_lines = diff_lines.ratio();
let mut capacity = 0;
for line in window_stripped_lines {
capacity += line.len() + 1;
}
let mut window_content = String::with_capacity(capacity);
for (j, line) in window_stripped_lines.iter().enumerate() {
if j > 0 {
window_content.push('\n');
}
window_content.push_str(line);
}
let diff_words =
similar::TextDiff::from_words(&window_content, match_content);
let ratio_words = diff_words.ratio();
let ratio_strict =
0.3 * ratio_lines as f64 + 0.7 * ratio_words as f64;
let window_loose_lines: Vec<&str> =
window_stripped_lines.iter().map(|s| s.trim()).collect();
let diff_loose_lines = similar::TextDiff::from_slices(
&window_loose_lines,
match_loose_lines,
);
let ratio_loose_lines = diff_loose_lines.ratio();
let window_loose_content = window_loose_lines.join("\n");
let diff_loose_words = similar::TextDiff::from_words(
&window_loose_content,
match_loose_content,
);
let ratio_loose_words = diff_loose_words.ratio();
let ratio_loose =
0.3 * ratio_loose_lines as f64 + 0.7 * ratio_loose_words as f64;
let ratio = ratio_strict.max(ratio_loose);
let score = ratio;
(
score,
ratio,
ratio_lines as f64,
ratio_words as f64,
absolute_index,
window_len,
)
})
})
})
.collect();
if log::log_enabled!(log::Level::Trace) {
let mut sorted_windows = all_scored_windows.clone();
sorted_windows
.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
trace!(" Top fuzzy match candidates:");
for (score, ratio, _, _, idx, len) in sorted_windows.iter().take(5) {
let window_content: Vec<_> = target_refs[*idx..*idx + *len].to_vec();
trace!(
" - Index {}, Len {}: Score {:.3} (Ratio {:.3}) | Content: {:?}",
idx,
len,
score,
ratio,
window_content
);
}
}
for (score, ratio, ratio_lines, ratio_words, absolute_index, window_len) in
all_scored_windows
{
if score > best_score {
trace!(
" New best score: {:.3} (ratio {:.3} [l:{:.3},w:{:.3}]) at index {} (window len {})",
score,
ratio,
ratio_lines,
ratio_words,
absolute_index, window_len
);
best_score = score;
best_ratio_at_best_score = ratio;
potential_matches.clear();
potential_matches.push((absolute_index, window_len));
} else if f64::abs(score - best_score) < 1e-9 {
if potential_matches.is_empty() {
potential_matches.push((absolute_index, window_len));
continue;
}
if ratio > best_ratio_at_best_score {
trace!(
" Tie in score ({:.3}), but new ratio {:.3} is better than old {:.3}. New best.",
score,
ratio,
best_ratio_at_best_score
);
best_ratio_at_best_score = ratio;
potential_matches.clear();
potential_matches.push((absolute_index, window_len));
} else if f64::abs(ratio - best_ratio_at_best_score) < 1e-9 {
trace!(
" Tie in score ({:.3}) and ratio ({:.3}). Adding candidate: index {}, len {}",
score,
ratio,
absolute_index,
window_len
);
potential_matches.push((absolute_index, window_len));
}
}
}
trace!(
" Fuzzy search complete. Best score: {:.3}, best ratio: {:.3}, potential matches: {:?}",
best_score,
best_ratio_at_best_score,
potential_matches
);
if best_ratio_at_best_score >= f64::from(self.options.fuzz_factor) {
if potential_matches.len() == 1 {
let (start, len) = potential_matches[0];
debug!(
" Found best fuzzy match at index {} (length {}, similarity: {:.3} >= threshold: {:.3}).",
start, len, best_ratio_at_best_score, self.options.fuzz_factor
);
return Ok((
HunkLocation {
start_index: start,
length: len,
},
MatchType::Fuzzy {
score: best_ratio_at_best_score,
},
));
}
if let Some(line) = old_start_line {
trace!(
" Ambiguous fuzzy match found at {:?}. Attempting to tie-break using line number hint: {}",
potential_matches,
line
);
let mut closest_match: Option<(usize, usize)> = None;
let mut min_distance = usize::MAX;
let mut is_tie = false;
for &(match_index, match_len) in &potential_matches {
trace!(
" Candidate {:?}: distance from line hint is {}",
(match_index, match_len),
(match_index + 1).abs_diff(line)
);
let distance = (match_index + 1).abs_diff(line);
if distance < min_distance {
min_distance = distance;
closest_match = Some((match_index, match_len));
is_tie = false;
} else if distance == min_distance {
is_tie = true;
}
}
if !is_tie {
if let Some((start, len)) = closest_match {
debug!(
" Tie-broke ambiguous fuzzy match using line number. Best match is at index {} (length {}, similarity: {:.3} >= threshold: {:.3}).",
start, len, best_ratio_at_best_score, self.options.fuzz_factor
);
return Ok((
HunkLocation {
start_index: start,
length: len,
},
MatchType::Fuzzy {
score: best_ratio_at_best_score,
},
));
}
} else {
trace!(" Tie-breaking failed: multiple fuzzy matches are equidistant from the line number hint.");
}
}
warn!(" Ambiguous fuzzy match: Multiple locations found with same top score ({:.3}): {:?}. Skipping.", best_ratio_at_best_score, potential_matches);
return Err(HunkApplyError::AmbiguousFuzzyMatch(potential_matches));
} else if best_ratio_at_best_score >= 0.0 {
let (start, len) = potential_matches.first().copied().unwrap_or((0, 0));
debug!(
" Fuzzy match failed: Best location (index {}, len {}) had similarity {:.3}, which is below the threshold of {:.3}.",
start, len, best_ratio_at_best_score, self.options.fuzz_factor
);
return Err(HunkApplyError::FuzzyMatchBelowThreshold {
best_score: best_ratio_at_best_score,
threshold: self.options.fuzz_factor,
location: HunkLocation {
start_index: start,
length: len,
},
});
} else {
debug!(" Fuzzy match: Could not find any potential match location.");
}
} else if self.options.fuzz_factor <= 0.0 {
trace!(" Failed exact matches. Fuzzy matching disabled.");
}
if !target_lines.is_empty()
&& target_lines.len() < match_block.len()
&& self.options.fuzz_factor > 0.0
{
trace!(" Target file is shorter than hunk. Attempting end-of-file fuzzy match...");
let match_stripped: Vec<&str> = match_block.iter().map(|s| s.trim_end()).collect();
let diff = TextDiff::from_slices(&target_refs, &match_stripped);
let ratio = diff.ratio();
let effective_threshold = (f64::from(self.options.fuzz_factor) - 0.1).max(0.5);
trace!(
" Using effective threshold for EOF match: {:.3}",
effective_threshold
);
if ratio as f64 >= effective_threshold {
debug!(
" End-of-file fuzzy match succeeded with ratio {:.3} (threshold {:.3}). Treating as full-file match.",
ratio, effective_threshold
);
return Ok((
HunkLocation {
start_index: 0,
length: target_lines.len(),
},
MatchType::Fuzzy {
score: ratio as f64,
},
));
} else {
trace!(
" End-of-file fuzzy match ratio {:.3} did not meet effective threshold {:.3}.",
ratio,
effective_threshold
);
}
}
debug!(" Failed to find any suitable match location for hunk.");
Err(HunkApplyError::ContextNotFound)
}
fn tie_break_with_line_number(
mut matches: impl Iterator<Item = usize>,
start_line: Option<usize>,
match_type: &str,
) -> Result<Option<usize>, Vec<usize>> {
let first_match = match matches.next() {
Some(m) => m,
None => {
trace!(" No {} matches found.", match_type);
return Ok(None);
}
};
if let Some(second_match) = matches.next() {
let mut all_matches = vec![first_match, second_match];
all_matches.extend(matches);
trace!(
" Found {} {} match candidate(s) at indices: {:?}",
all_matches.len(),
match_type,
all_matches
);
if let Some(line) = start_line {
trace!(
" Ambiguous {} match found at {:?}. Attempting to tie-break using line number hint: {}",
match_type,
all_matches,
line
);
let mut closest_index = 0;
let mut min_distance = usize::MAX;
let mut is_tie = false;
for &match_index in &all_matches {
trace!(
" Candidate index {}: distance from line hint {} is {}",
match_index,
line,
(match_index + 1).abs_diff(line)
);
let distance = (match_index + 1).abs_diff(line);
if distance < min_distance {
min_distance = distance;
closest_index = match_index;
is_tie = false;
} else if distance == min_distance {
is_tie = true;
}
}
if !is_tie {
trace!(
" Successfully tie-broke using line number. Best match is at index {}.",
closest_index
);
return Ok(Some(closest_index));
}
trace!(
" Tie-breaking failed: multiple matches are equidistant from the line number hint."
);
} else {
trace!(
" tie_break: Ambiguous '{}' match, but no line number hint provided.",
match_type
);
}
Err(all_matches)
} else {
trace!(
" Found 1 {} match candidate at index: {}",
match_type,
first_match
);
trace!(
" tie_break: Only one match found for '{}' match at index {}. No tie-break needed.",
match_type,
first_match
);
Ok(Some(first_match))
}
}
}
impl<'a> HunkFinder for DefaultHunkFinder<'a> {
fn find_location<T: AsRef<str> + Sync>(
&self,
hunk: &Hunk,
target_lines: &[T],
) -> Result<(HunkLocation, MatchType), HunkApplyError> {
let match_block = hunk.get_match_block();
self.find_hunk_location_internal(&match_block, target_lines, hunk.old_start_line)
}
}
pub fn find_hunk_location(
hunk: &Hunk,
target_content: &str,
options: &ApplyOptions,
) -> Result<(HunkLocation, MatchType), HunkApplyError> {
let target_lines: Vec<_> = target_content.lines().collect();
find_hunk_location_in_lines(hunk, &target_lines, options)
}
pub fn find_hunk_location_in_lines<T: AsRef<str> + Sync>(
hunk: &Hunk,
target_lines: &[T],
options: &ApplyOptions,
) -> Result<(HunkLocation, MatchType), HunkApplyError> {
let finder = DefaultHunkFinder::new(options);
finder.find_location(hunk, target_lines)
}
fn parse_hunk_header(line: &str) -> (Option<usize>, Option<usize>) {
let parts: Vec<_> = line.split_whitespace().collect();
if parts.len() < 3 {
return (None, None);
}
let old_line = parts[1]
.strip_prefix('-')
.and_then(|s| s.split(',').next())
.and_then(|s| s.parse::<usize>().ok());
let new_line = parts[2]
.strip_prefix('+')
.and_then(|s| s.split(',').next())
.and_then(|s| s.parse::<usize>().ok());
(old_line, new_line)
}