use anyhow::{Result, anyhow};
use std::path::PathBuf;
use std::time::SystemTime;
use super::{DiffContent, DiffLine, FileDiff, FileStatus, Hunk, LineKind, path::bytes_to_path};
fn parse_diff_git_header(rest: &str) -> Option<PathBuf> {
let bytes = rest.as_bytes();
if bytes.starts_with(b"\"a/") {
let (_a_decoded, after_a) = parse_quoted_token(bytes)?;
let after_space = after_a.strip_prefix(b" ")?;
let (b_decoded, _tail) = parse_quoted_token(after_space)?;
if !b_decoded.starts_with(b"b/") {
return None;
}
return Some(bytes_to_path(&b_decoded[2..]));
}
let len = bytes.len();
if len < 5 + 2 {
return None;
}
let inner = len.checked_sub(5)?;
if !inner.is_multiple_of(2) {
return None;
}
let p = inner / 2;
if !bytes.starts_with(b"a/") {
return None;
}
let a_side = &bytes[2..2 + p];
let b_prefix_start = 2 + p;
if bytes.get(b_prefix_start..b_prefix_start + 3) != Some(b" b/") {
return None;
}
let b_side = &bytes[b_prefix_start + 3..];
if a_side != b_side {
return None;
}
Some(bytes_to_path(a_side))
}
fn parse_quoted_token(bytes: &[u8]) -> Option<(Vec<u8>, &[u8])> {
if bytes.first() != Some(&b'"') {
return None;
}
let mut out: Vec<u8> = Vec::new();
let mut i = 1;
while i < bytes.len() {
let c = bytes[i];
if c == b'"' {
return Some((out, &bytes[i + 1..]));
}
if c == b'\\' {
let n = *bytes.get(i + 1)?;
match n {
b'a' => out.push(0x07),
b'b' => out.push(0x08),
b't' => out.push(b'\t'),
b'n' => out.push(b'\n'),
b'v' => out.push(0x0b),
b'f' => out.push(0x0c),
b'r' => out.push(b'\r'),
b'"' => out.push(b'"'),
b'\\' => out.push(b'\\'),
d if (b'0'..=b'7').contains(&d) => {
let end = i + 4;
if end > bytes.len() {
return None;
}
let octal = std::str::from_utf8(&bytes[i + 1..end]).ok()?;
if octal.len() != 3 || !octal.bytes().all(|b| (b'0'..=b'7').contains(&b)) {
return None;
}
let byte = u8::from_str_radix(octal, 8).ok()?;
out.push(byte);
i += 4;
continue;
}
_ => return None,
}
i += 2;
continue;
}
out.push(c);
i += 1;
}
None
}
pub(crate) fn parse_unified_diff(raw: &str) -> Result<Vec<FileDiff>> {
let mut files: Vec<FileDiff> = Vec::new();
let mut current_hunks: Vec<Hunk> = Vec::new();
let mut current_hunk: Option<Hunk> = None;
fn finish_hunk(current_hunk: &mut Option<Hunk>, hunks: &mut Vec<Hunk>) {
if let Some(h) = current_hunk.take() {
hunks.push(h);
}
}
fn finish_file(
files: &mut [FileDiff],
current_hunks: &mut Vec<Hunk>,
current_hunk: &mut Option<Hunk>,
) {
finish_hunk(current_hunk, current_hunks);
if let Some(file) = files.last_mut() {
let hunks = std::mem::take(current_hunks);
if !matches!(file.content, DiffContent::Binary) {
file.content = DiffContent::Text(hunks);
}
}
}
for line in raw.lines() {
if let Some(rest) = line.strip_prefix("diff --git ") {
finish_file(&mut files, &mut current_hunks, &mut current_hunk);
let path = parse_diff_git_header(rest)
.ok_or_else(|| anyhow!("unparseable `diff --git` header: {rest}"))?;
files.push(FileDiff {
path,
status: FileStatus::Modified,
added: 0,
deleted: 0,
content: DiffContent::Text(Vec::new()),
mtime: SystemTime::UNIX_EPOCH,
header_prefix: None,
});
continue;
}
if line.starts_with("Binary files ") && line.ends_with(" differ") {
if let Some(file) = files.last_mut() {
file.content = DiffContent::Binary;
}
continue;
}
if line.starts_with("new file mode ") {
if let Some(file) = files.last_mut() {
file.status = FileStatus::Added;
}
continue;
}
if line.starts_with("deleted file mode ") {
if let Some(file) = files.last_mut() {
file.status = FileStatus::Deleted;
}
continue;
}
if let Some(rest) = line.strip_prefix("@@ ") {
finish_hunk(&mut current_hunk, &mut current_hunks);
let (header, context) = match rest.split_once(" @@") {
Some((header, tail)) => {
let trimmed = tail.trim();
let context = if trimmed.is_empty() {
None
} else {
Some(trimmed.to_string())
};
(header, context)
}
None => (rest.trim_end_matches("@@"), None),
};
let mut parts = header.split_whitespace();
let old = parts
.next()
.ok_or_else(|| anyhow!("malformed hunk header missing old range: {line}"))?;
let new = parts
.next()
.ok_or_else(|| anyhow!("malformed hunk header missing new range: {line}"))?;
let (old_start, old_count) = parse_hunk_range(old.trim_start_matches('-'))
.ok_or_else(|| anyhow!("malformed old hunk range: {line}"))?;
let (new_start, new_count) = parse_hunk_range(new.trim_start_matches('+'))
.ok_or_else(|| anyhow!("malformed new hunk range: {line}"))?;
current_hunk = Some(Hunk {
old_start,
old_count,
new_start,
new_count,
lines: Vec::new(),
context,
});
continue;
}
if let Some(hunk) = current_hunk.as_mut() {
if line == r"\ No newline at end of file" {
if let Some(last) = hunk.lines.last_mut() {
last.has_trailing_newline = false;
}
continue;
}
if let Some(content) = line.strip_prefix('+') {
hunk.lines.push(DiffLine {
kind: LineKind::Added,
content: content.to_string(),
has_trailing_newline: true,
});
if let Some(file) = files.last_mut() {
file.added += 1;
}
continue;
}
if let Some(content) = line.strip_prefix('-') {
hunk.lines.push(DiffLine {
kind: LineKind::Deleted,
content: content.to_string(),
has_trailing_newline: true,
});
if let Some(file) = files.last_mut() {
file.deleted += 1;
}
continue;
}
if let Some(content) = line.strip_prefix(' ') {
hunk.lines.push(DiffLine {
kind: LineKind::Context,
content: content.to_string(),
has_trailing_newline: true,
});
continue;
}
}
}
finish_file(&mut files, &mut current_hunks, &mut current_hunk);
Ok(files)
}
pub(in crate::git) fn split_logical_lines(text: &str) -> Vec<(String, bool)> {
if text.is_empty() {
return Vec::new();
}
text.split_inclusive('\n')
.map(|chunk| {
let has_trailing_newline = chunk.ends_with('\n');
let without_newline = chunk.strip_suffix('\n').unwrap_or(chunk);
let line = if has_trailing_newline {
without_newline
.strip_suffix('\r')
.unwrap_or(without_newline)
.to_string()
} else {
without_newline.to_string()
};
(line, has_trailing_newline)
})
.collect()
}
fn parse_hunk_range(spec: &str) -> Option<(usize, usize)> {
match spec.split_once(',') {
Some((start, count)) => Some((start.parse().ok()?, count.parse().ok()?)),
None => Some((spec.parse().ok()?, 1)),
}
}