use super::FileMode;
use super::FileOperation;
use super::FilePatch;
use super::Format;
use super::ParseOptions;
use super::PatchSetParseError;
use super::error::PatchSetParseErrorKind;
use crate::Patch;
use crate::binary::BinaryPatch;
use crate::binary::parse_binary_patch;
use crate::patch::parse::parse_one;
use crate::utils::Text;
use crate::utils::escaped_filename;
use alloc::borrow::Cow;
use alloc::string::String;
const ORIGINAL_PREFIX: &str = "--- ";
const MODIFIED_PREFIX: &str = "+++ ";
const DEV_NULL: &str = "/dev/null";
const EMAIL_PREAMBLE_SEPARATOR: &str = "\n---\n";
pub struct PatchSet<'a, T: ?Sized> {
input: &'a T,
offset: usize,
opts: ParseOptions,
finished: bool,
found_any: bool,
}
impl<'a> PatchSet<'a, str> {
pub fn parse(input: &'a str, opts: ParseOptions) -> Self {
let input = strip_email_preamble(input);
Self {
input,
offset: 0,
opts,
finished: false,
found_any: false,
}
}
}
impl<'a> PatchSet<'a, [u8]> {
pub fn parse_bytes(input: &'a [u8], opts: ParseOptions) -> Self {
let input = strip_email_preamble(input);
Self {
input,
offset: 0,
opts,
finished: false,
found_any: false,
}
}
}
impl<'a> Iterator for PatchSet<'a, str> {
type Item = Result<FilePatch<'a, str>, PatchSetParseError>;
fn next(&mut self) -> Option<Self::Item> {
next_patch(self)
}
}
impl<'a> Iterator for PatchSet<'a, [u8]> {
type Item = Result<FilePatch<'a, [u8]>, PatchSetParseError>;
fn next(&mut self) -> Option<Self::Item> {
next_patch(self)
}
}
fn next_patch<'a, T: Text + ?Sized>(
ps: &mut PatchSet<'a, T>,
) -> Option<Result<FilePatch<'a, T>, PatchSetParseError>> {
if ps.finished {
return None;
}
let result = match ps.opts.format {
Format::UniDiff => next_unidiff_patch(ps),
Format::GitDiff => next_gitdiff_patch(ps),
};
if result.is_none() {
ps.finished = true;
if !ps.found_any {
let err = PatchSetParseError::new(
PatchSetParseErrorKind::NoPatchesFound,
ps.offset..ps.offset,
);
return Some(Err(err));
}
}
result
}
fn next_unidiff_patch<'a, T: Text + ?Sized>(
ps: &mut PatchSet<'a, T>,
) -> Option<Result<FilePatch<'a, T>, PatchSetParseError>> {
let remaining = remaining(ps);
if remaining.is_empty() {
return None;
}
let patch_start = find_patch_start(remaining)?;
ps.found_any = true;
let (_, patch_input) = remaining.split_at(patch_start);
let opts = crate::patch::parse::ParseOpts::default();
let (result, consumed) = parse_one(patch_input, opts);
let abs_patch_start = ps.offset + patch_start;
ps.offset += patch_start + consumed;
let patch = match result {
Ok(patch) => patch,
Err(e) => return Some(Err(e.into())),
};
let operation = match extract_file_op_unidiff(patch.original_path(), patch.modified_path()) {
Ok(op) => op,
Err(mut e) => {
e.set_span(abs_patch_start..abs_patch_start);
return Some(Err(e));
}
};
Some(Ok(FilePatch::new(operation, patch, None, None)))
}
fn remaining<'a, T: Text + ?Sized>(ps: &PatchSet<'a, T>) -> &'a T {
let (_, rest) = ps.input.split_at(ps.offset);
rest
}
fn find_patch_start<T: Text + ?Sized>(input: &T) -> Option<usize> {
let mut offset = 0;
for line in input.lines() {
if line.starts_with(ORIGINAL_PREFIX) || line.starts_with(MODIFIED_PREFIX) {
return Some(offset);
}
offset += line.len();
}
None
}
fn strip_email_preamble<T: Text + ?Sized>(input: &T) -> &T {
if !input.starts_with("From ") {
return input;
}
match input.find(EMAIL_PREAMBLE_SEPARATOR) {
Some(pos) => {
let (_, rest) = input.split_at(pos + EMAIL_PREAMBLE_SEPARATOR.len());
rest
}
None => input,
}
}
fn next_gitdiff_patch<'a, T: Text + ?Sized>(
ps: &mut PatchSet<'a, T>,
) -> Option<Result<FilePatch<'a, T>, PatchSetParseError>> {
let patch_start = find_gitdiff_start(remaining(ps))?;
ps.offset += patch_start;
ps.found_any = true;
let abs_patch_start = ps.offset;
let (header, header_consumed) = GitHeader::parse(remaining(ps));
ps.offset += header_consumed;
if header.is_binary_marker {
let operation = match extract_file_op_binary(&header, abs_patch_start) {
Ok(op) => op,
Err(e) => return Some(Err(e)),
};
let (old_mode, new_mode) = match parse_file_modes(&header) {
Ok(modes) => modes,
Err(mut e) => {
e.set_span(abs_patch_start..abs_patch_start);
return Some(Err(e));
}
};
return Some(Ok(FilePatch::new_binary(
operation,
BinaryPatch::Marker,
old_mode,
new_mode,
)));
}
if let Some(binary_patch_start) = header.binary_patch_offset {
let (_, binary_input) = ps.input.split_at(abs_patch_start + binary_patch_start);
let (binary_patch, consumed) = match parse_binary_patch(binary_input.as_bytes()) {
Ok(result) => result,
Err(e) => return Some(Err(e.into())),
};
ps.offset = abs_patch_start + binary_patch_start + consumed;
let operation = match extract_file_op_binary(&header, abs_patch_start) {
Ok(op) => op,
Err(e) => return Some(Err(e)),
};
let (old_mode, new_mode) = match parse_file_modes(&header) {
Ok(modes) => modes,
Err(mut e) => {
e.set_span(abs_patch_start..abs_patch_start);
return Some(Err(e));
}
};
return Some(Ok(FilePatch::new_binary(
operation,
binary_patch,
old_mode,
new_mode,
)));
}
let opts = crate::patch::parse::ParseOpts::default().no_skip_preamble();
let (result, consumed) = parse_one(remaining(ps), opts);
ps.offset += consumed;
let patch = match result {
Ok(patch) => patch,
Err(e) => return Some(Err(e.into())),
};
let operation = match extract_file_op_gitdiff(&header, &patch) {
Ok(op) => op,
Err(mut e) => {
e.set_span(abs_patch_start..abs_patch_start);
return Some(Err(e));
}
};
let (old_mode, new_mode) = match parse_file_modes(&header) {
Ok(modes) => modes,
Err(mut e) => {
e.set_span(abs_patch_start..abs_patch_start);
return Some(Err(e));
}
};
Some(Ok(FilePatch::new(operation, patch, old_mode, new_mode)))
}
fn find_gitdiff_start<T: Text + ?Sized>(input: &T) -> Option<usize> {
let mut offset = 0;
for line in input.lines() {
if line.starts_with("diff --git ") {
return Some(offset);
}
offset += line.len();
}
None
}
#[derive(Debug)]
struct GitHeader<'a, T: ?Sized> {
diff_git_line: Option<&'a T>,
rename_from: Option<&'a T>,
rename_to: Option<&'a T>,
copy_from: Option<&'a T>,
copy_to: Option<&'a T>,
old_mode: Option<&'a T>,
new_mode: Option<&'a T>,
new_file_mode: Option<&'a T>,
deleted_file_mode: Option<&'a T>,
is_binary_marker: bool,
binary_patch_offset: Option<usize>,
}
impl<T: ?Sized> Default for GitHeader<'_, T> {
fn default() -> Self {
Self {
diff_git_line: None,
rename_from: None,
rename_to: None,
copy_from: None,
copy_to: None,
old_mode: None,
new_mode: None,
new_file_mode: None,
deleted_file_mode: None,
is_binary_marker: false,
binary_patch_offset: None,
}
}
}
impl<'a, T: Text + ?Sized> GitHeader<'a, T> {
fn parse(input: &'a T) -> (Self, usize) {
let mut header = GitHeader::default();
let mut consumed = 0;
for line in input.lines() {
let trimmed = strip_line_ending(line);
if let Some(rest) = trimmed.strip_prefix("diff --git ") {
if header.diff_git_line.is_some() {
break;
}
header.diff_git_line = Some(rest);
} else if let Some(path) = trimmed.strip_prefix("rename from ") {
header.rename_from = Some(path);
} else if let Some(path) = trimmed.strip_prefix("rename to ") {
header.rename_to = Some(path);
} else if let Some(path) = trimmed.strip_prefix("copy from ") {
header.copy_from = Some(path);
} else if let Some(path) = trimmed.strip_prefix("copy to ") {
header.copy_to = Some(path);
} else if let Some(mode) = trimmed.strip_prefix("old mode ") {
header.old_mode = Some(mode);
} else if let Some(mode) = trimmed.strip_prefix("new mode ") {
header.new_mode = Some(mode);
} else if let Some(mode) = trimmed.strip_prefix("new file mode ") {
header.new_file_mode = Some(mode);
} else if let Some(mode) = trimmed.strip_prefix("deleted file mode ") {
header.deleted_file_mode = Some(mode);
} else if trimmed.starts_with("index ")
|| trimmed.starts_with("similarity index ")
|| trimmed.starts_with("dissimilarity index ")
{
} else if trimmed.starts_with("Binary files ") {
header.is_binary_marker = true;
} else if trimmed.starts_with("GIT binary patch") {
header.binary_patch_offset = Some(consumed);
} else {
break;
}
consumed += line.len();
}
(header, consumed)
}
}
fn extract_file_op_gitdiff<'a, T: Text + ?Sized>(
header: &GitHeader<'a, T>,
patch: &Patch<'a, T>,
) -> Result<FileOperation<'a, T>, PatchSetParseError> {
if let (Some(from), Some(to)) = (header.rename_from, header.rename_to) {
return Ok(FileOperation::Rename {
from: escaped_filename(from)?,
to: escaped_filename(to)?,
});
}
if let (Some(from), Some(to)) = (header.copy_from, header.copy_to) {
return Ok(FileOperation::Copy {
from: escaped_filename(from)?,
to: escaped_filename(to)?,
});
}
if patch.original().is_some() || patch.modified().is_some() {
return extract_file_op_unidiff(patch.original_path(), patch.modified_path());
}
let Some((original, modified)) = header.diff_git_line.and_then(parse_diff_git_path) else {
return Err(PatchSetParseErrorKind::InvalidDiffGitPath.into());
};
if header.new_file_mode.is_some() {
Ok(FileOperation::Create(modified))
} else if header.deleted_file_mode.is_some() {
Ok(FileOperation::Delete(original))
} else {
Ok(FileOperation::Modify { original, modified })
}
}
fn parse_file_modes<T: Text + ?Sized>(
header: &GitHeader<'_, T>,
) -> Result<(Option<FileMode>, Option<FileMode>), PatchSetParseError> {
let parse_mode = |mode: &T| -> Result<FileMode, PatchSetParseError> {
mode.as_str()
.ok_or_else(|| {
let s = String::from_utf8_lossy(mode.as_bytes()).into_owned();
PatchSetParseErrorKind::InvalidFileMode(s)
})?
.parse::<FileMode>()
};
let old_mode = header
.old_mode
.or(header.deleted_file_mode)
.map(parse_mode)
.transpose()?;
let new_mode = header
.new_mode
.or(header.new_file_mode)
.map(parse_mode)
.transpose()?;
Ok((old_mode, new_mode))
}
fn parse_diff_git_path<'a, T: Text + ?Sized>(line: &'a T) -> Option<(Cow<'a, T>, Cow<'a, T>)> {
if line.starts_with("\"") || line.ends_with("\"") {
parse_quoted_diff_git_path(line)
} else {
parse_unquoted_diff_git_path(line)
}
}
fn parse_unquoted_diff_git_path<'a, T: Text + ?Sized>(
line: &'a T,
) -> Option<(Cow<'a, T>, Cow<'a, T>)> {
let bytes = line.as_bytes();
let mut best_match = None;
let mut longest_path_len = 0;
for (i, _) in bytes.iter().enumerate().filter(|&(_, &b)| b == b' ') {
let (left, right_with_space) = line.split_at(i);
let (_, right) = right_with_space.split_at(1);
if left.is_empty() || right.is_empty() {
continue;
}
if let Some(path) = longest_common_path_suffix(left, right) {
if path.len() > longest_path_len {
longest_path_len = path.len();
best_match = Some((left, right));
}
}
}
best_match.map(|(l, r)| (Cow::Borrowed(l), Cow::Borrowed(r)))
}
fn parse_quoted_diff_git_path<'a, T: Text + ?Sized>(
line: &'a T,
) -> Option<(Cow<'a, T>, Cow<'a, T>)> {
let (left_raw, right_raw) = if line.starts_with("\"") {
let bytes = line.as_bytes();
let mut i = 1;
let end = loop {
match bytes.get(i)? {
b'"' => break i + 1,
b'\\' => i += 2,
_ => i += 1,
}
};
let (first, rest) = line.split_at(end);
let rest = rest.strip_prefix(" ")?;
(first, rest)
} else if let Some(pos) = line.find(" \"") {
let (left, rest) = line.split_at(pos);
let (_, right) = rest.split_at(1); (left, right)
} else {
return None;
};
let left = escaped_filename(left_raw).ok()?;
let right = escaped_filename(right_raw).ok()?;
longest_common_path_suffix(left.as_ref(), right.as_ref())?;
Some((left, right))
}
fn longest_common_path_suffix<'a, T: Text + ?Sized>(a: &'a T, b: &T) -> Option<&'a T> {
if a.is_empty() || b.is_empty() {
return None;
}
let mut last_slash = None;
let mut matched = 0;
for (i, (x, y)) in a
.as_bytes()
.iter()
.rev()
.zip(b.as_bytes().iter().rev())
.enumerate()
{
if x != y {
break;
}
if *x == b'/' {
last_slash = Some(i + 1);
}
matched = i + 1;
}
if matched == 0 {
return None;
}
if matched == a.len() && a.len() == b.len() {
return Some(a);
}
let suffix_len = last_slash?;
let start = a.len() - suffix_len + 1; let (_, path) = a.split_at(start);
(!path.is_empty()).then_some(path)
}
fn extract_file_op_binary<'a, T: Text + ?Sized>(
header: &GitHeader<'a, T>,
abs_patch_start: usize,
) -> Result<FileOperation<'a, T>, PatchSetParseError> {
if let (Some(from), Some(to)) = (header.rename_from, header.rename_to) {
return Ok(FileOperation::Rename {
from: escaped_filename(from)?,
to: escaped_filename(to)?,
});
}
if let (Some(from), Some(to)) = (header.copy_from, header.copy_to) {
return Ok(FileOperation::Copy {
from: escaped_filename(from)?,
to: escaped_filename(to)?,
});
}
let Some((original, modified)) = header.diff_git_line.and_then(parse_diff_git_path) else {
return Err(PatchSetParseError::new(
PatchSetParseErrorKind::InvalidDiffGitPath,
abs_patch_start..abs_patch_start,
));
};
if header.new_file_mode.is_some() {
Ok(FileOperation::Create(modified))
} else if header.deleted_file_mode.is_some() {
Ok(FileOperation::Delete(original))
} else {
Ok(FileOperation::Modify { original, modified })
}
}
fn extract_file_op_unidiff<'a, T: Text + ?Sized>(
original: Option<&Cow<'a, T>>,
modified: Option<&Cow<'a, T>>,
) -> Result<FileOperation<'a, T>, PatchSetParseError> {
let is_dev_null = |cow: &Cow<'_, T>| cow.as_ref().as_bytes() == DEV_NULL.as_bytes();
let is_create = original.is_some_and(is_dev_null);
let is_delete = modified.is_some_and(is_dev_null);
if is_create && is_delete {
return Err(PatchSetParseErrorKind::BothDevNull.into());
}
if is_delete {
let path = original.ok_or(PatchSetParseErrorKind::DeleteMissingOriginalPath)?;
Ok(FileOperation::Delete(path.clone()))
} else if is_create {
let path = modified.ok_or(PatchSetParseErrorKind::CreateMissingModifiedPath)?;
Ok(FileOperation::Create(path.clone()))
} else {
match (original, modified) {
(Some(original), Some(modified)) => Ok(FileOperation::Modify {
original: original.clone(),
modified: modified.clone(),
}),
(None, Some(modified)) => {
Ok(FileOperation::Modify {
original: modified.clone(),
modified: modified.clone(),
})
}
(Some(original), None) => {
Ok(FileOperation::Modify {
modified: original.clone(),
original: original.clone(),
})
}
(None, None) => Err(PatchSetParseErrorKind::NoFilePath.into()),
}
}
}
fn strip_line_ending<T: Text + ?Sized>(line: &T) -> &T {
line.strip_suffix("\n").unwrap_or(line)
}