use sha1::{Digest, Sha1};
use similar::{ChangeTag, TextDiff};
use crate::diff::{diff_trees, zero_oid};
use crate::error::Result;
use crate::merge_file;
use crate::objects::{parse_commit, ObjectId, ObjectKind};
use crate::odb::Odb;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PatchIdMode {
Unstable,
Stable,
Verbatim,
}
pub fn compute_patch_ids_from_text(input: &[u8], mode: PatchIdMode) -> Vec<(ObjectId, ObjectId)> {
let stable = mode != PatchIdMode::Unstable;
let verbatim = mode == PatchIdMode::Verbatim;
let mut results: Vec<(ObjectId, ObjectId)> = Vec::new();
let mut ctx = Sha1::new();
let mut result = [0u8; 20];
let mut patchlen: usize = 0;
let mut before: i32 = -1;
let mut after: i32 = -1;
let mut diff_is_binary = false;
let mut pre_oid_str = String::new();
let mut post_oid_str = String::new();
let mut current_commit: Option<ObjectId> = None;
let mut implicit_commit = true;
let lines = split_lines_with_nl(input);
let mut i = 0;
while i < lines.len() {
let raw = lines[i];
i += 1;
let line = std::str::from_utf8(raw).unwrap_or("");
let oid_candidate: Option<&str> = if let Some(rest) = line.strip_prefix("commit ") {
Some(rest)
} else if let Some(rest) = line.strip_prefix("From ") {
Some(rest)
} else {
None
};
if let Some(candidate) = oid_candidate {
if let Some(oid) = try_parse_oid_prefix(candidate) {
text_flush_one_hunk(&mut result, &mut ctx);
if patchlen > 0 {
if let Some(coid) = current_commit.take() {
if let Ok(pid) = ObjectId::from_bytes(&result) {
results.push((pid, coid));
}
}
}
result = [0u8; 20];
ctx = Sha1::new();
patchlen = 0;
before = -1;
after = -1;
diff_is_binary = false;
pre_oid_str.clear();
post_oid_str.clear();
current_commit = Some(oid);
implicit_commit = false;
continue;
}
}
if line.starts_with("\\ ") && line.len() > 12 {
if verbatim {
ctx.update(raw);
patchlen += raw.len();
}
continue;
}
if patchlen == 0 && !line.starts_with("diff ") {
continue;
}
if implicit_commit && line.starts_with("diff ") && current_commit.is_none() {
current_commit = Some(ObjectId::from_bytes(&[0u8; 20]).unwrap());
implicit_commit = false;
}
if before == -1 {
if line.starts_with("GIT binary patch") || line.starts_with("Binary files") {
diff_is_binary = true;
before = 0;
let pre = pre_oid_str.clone();
let post = post_oid_str.clone();
ctx.update(pre.as_bytes());
ctx.update(post.as_bytes());
patchlen += pre.len() + post.len();
if stable {
text_flush_one_hunk(&mut result, &mut ctx);
}
continue;
} else if let Some(rest) = line.strip_prefix("index ") {
if let Some(dd) = rest.find("..") {
pre_oid_str = rest[..dd].to_owned();
let tail = &rest[dd + 2..];
let end = tail
.find(|c: char| c.is_ascii_whitespace())
.unwrap_or_else(|| {
tail.trim_end_matches('\n').trim_end_matches('\r').len()
});
post_oid_str = tail[..end].to_owned();
}
continue;
} else if line.starts_with("--- ") {
before = 1;
after = 1;
} else if !line.chars().next().is_some_and(|c| c.is_ascii_alphabetic()) {
text_flush_one_hunk(&mut result, &mut ctx);
if patchlen > 0 {
if let Some(coid) = current_commit.take() {
if let Ok(pid) = ObjectId::from_bytes(&result) {
results.push((pid, coid));
}
}
}
result = [0u8; 20];
ctx = Sha1::new();
patchlen = 0;
before = -1;
after = -1;
diff_is_binary = false;
continue;
}
}
if diff_is_binary {
if line.starts_with("diff ") {
diff_is_binary = false;
before = -1;
i -= 1; }
continue;
}
if before == 0 && after == 0 {
if line.starts_with("@@ -") {
let (b, a) = scan_hunk_header(line);
before = b;
after = a;
continue;
}
if !line.starts_with("diff ") {
continue;
}
if stable {
text_flush_one_hunk(&mut result, &mut ctx);
}
before = -1;
after = -1;
i -= 1;
continue;
}
let first = raw.first().copied().unwrap_or(b' ');
if first == b'-' || first == b' ' {
before -= 1;
}
if first == b'+' || first == b' ' {
after -= 1;
}
let hashed = if verbatim {
ctx.update(raw);
raw.len()
} else {
hash_without_whitespace(&mut ctx, raw)
};
patchlen += hashed;
}
text_flush_one_hunk(&mut result, &mut ctx);
if patchlen > 0 {
if let Some(coid) = current_commit {
if let Ok(pid) = ObjectId::from_bytes(&result) {
results.push((pid, coid));
}
}
}
results
}
fn text_flush_one_hunk(result: &mut [u8; 20], ctx: &mut Sha1) {
let old = std::mem::replace(ctx, Sha1::new());
let hash: [u8; 20] = old.finalize().into();
let mut carry: u16 = 0;
for i in 0..20 {
carry = carry + result[i] as u16 + hash[i] as u16;
result[i] = carry as u8;
carry >>= 8;
}
}
fn hash_without_whitespace(ctx: &mut Sha1, raw: &[u8]) -> usize {
let mut count = 0;
for &b in raw {
if !b.is_ascii_whitespace() {
ctx.update([b]);
count += 1;
}
}
count
}
fn try_parse_oid_prefix(s: &str) -> Option<ObjectId> {
let s = s.trim_end_matches('\n').trim_end_matches('\r');
if s.len() < 40 {
return None;
}
let hex = &s[..40];
if !hex.bytes().all(|b| b.is_ascii_hexdigit()) {
return None;
}
if s.len() > 40 && !s.as_bytes()[40].is_ascii_whitespace() {
return None;
}
let mut bytes = [0u8; 20];
for (i, chunk) in hex.as_bytes().chunks(2).enumerate() {
let hi = hex_val(chunk[0])?;
let lo = hex_val(chunk[1])?;
bytes[i] = (hi << 4) | lo;
}
ObjectId::from_bytes(&bytes).ok()
}
fn hex_val(b: u8) -> Option<u8> {
match b {
b'0'..=b'9' => Some(b - b'0'),
b'a'..=b'f' => Some(b - b'a' + 10),
b'A'..=b'F' => Some(b - b'A' + 10),
_ => None,
}
}
fn scan_hunk_header(line: &str) -> (i32, i32) {
let rest = match line.strip_prefix("@@ -") {
Some(r) => r,
None => return (1, 1),
};
let before = parse_hunk_count(rest);
let after = rest
.find(" +")
.and_then(|p| parse_hunk_count_opt(&rest[p + 2..]))
.unwrap_or(1);
(before, after)
}
fn parse_hunk_count(s: &str) -> i32 {
let after_start = s.trim_start_matches(|c: char| c.is_ascii_digit());
if let Some(rest) = after_start.strip_prefix(',') {
rest.split(|c: char| !c.is_ascii_digit())
.next()
.and_then(|n| n.parse().ok())
.unwrap_or(1)
} else {
1
}
}
fn parse_hunk_count_opt(s: &str) -> Option<i32> {
Some(parse_hunk_count(s))
}
fn split_lines_with_nl(input: &[u8]) -> Vec<&[u8]> {
let mut lines = Vec::new();
let mut start = 0;
for (i, &b) in input.iter().enumerate() {
if b == b'\n' {
lines.push(&input[start..=i]);
start = i + 1;
}
}
if start < input.len() {
lines.push(&input[start..]);
}
lines
}
pub fn compute_patch_id(odb: &Odb, commit_oid: &ObjectId) -> Result<Option<ObjectId>> {
let obj = odb.read(commit_oid)?;
if obj.kind != ObjectKind::Commit {
return Ok(None);
}
let commit = parse_commit(&obj.data)?;
if commit.parents.len() > 1 {
return Ok(None);
}
let parent_tree_oid = if commit.parents.is_empty() {
None
} else {
let parent_obj = odb.read(&commit.parents[0])?;
let parent_commit = parse_commit(&parent_obj.data)?;
Some(parent_commit.tree)
};
let mut diffs = diff_trees(odb, parent_tree_oid.as_ref(), Some(&commit.tree), "")?;
diffs.sort_by(|a, b| a.path().cmp(b.path()));
let mut result = [0u8; 20];
for entry in &diffs {
let old_path = entry.old_path.as_deref().unwrap_or("");
let new_path = entry.new_path.as_deref().unwrap_or("");
let mut old_path_buf = old_path.as_bytes().to_vec();
let mut new_path_buf = new_path.as_bytes().to_vec();
let len1 = remove_space_bytes(&mut old_path_buf);
let len2 = remove_space_bytes(&mut new_path_buf);
let old_mode = parse_mode_u32(&entry.old_mode);
let new_mode = parse_mode_u32(&entry.new_mode);
let mut ctx = Sha1::new();
patch_id_add_string(&mut ctx, b"diff--git");
patch_id_add_string(&mut ctx, b"a/");
ctx.update(&old_path_buf[..len1]);
patch_id_add_string(&mut ctx, b"b/");
ctx.update(&new_path_buf[..len2]);
if old_mode == 0 {
patch_id_add_string(&mut ctx, b"newfilemode");
patch_id_add_mode(&mut ctx, new_mode);
} else if new_mode == 0 {
patch_id_add_string(&mut ctx, b"deletedfilemode");
patch_id_add_mode(&mut ctx, old_mode);
} else if old_mode != new_mode {
patch_id_add_string(&mut ctx, b"oldmode");
patch_id_add_mode(&mut ctx, old_mode);
patch_id_add_string(&mut ctx, b"newmode");
patch_id_add_mode(&mut ctx, new_mode);
}
let old_bytes = read_blob(odb, &entry.old_oid)?;
let new_bytes = read_blob(odb, &entry.new_oid)?;
if merge_file::is_binary(&old_bytes) || merge_file::is_binary(&new_bytes) {
let a = entry.old_oid.to_hex();
let b = entry.new_oid.to_hex();
ctx.update(a.as_bytes());
ctx.update(b.as_bytes());
} else {
let old_str = std::str::from_utf8(&old_bytes).unwrap_or("");
let new_str = std::str::from_utf8(&new_bytes).unwrap_or("");
if old_mode == 0 {
patch_id_add_string(&mut ctx, b"---/dev/null");
patch_id_add_string(&mut ctx, b"+++b/");
ctx.update(&new_path_buf[..len2]);
} else if new_mode == 0 {
patch_id_add_string(&mut ctx, b"---a/");
ctx.update(&old_path_buf[..len1]);
patch_id_add_string(&mut ctx, b"+++/dev/null");
} else {
patch_id_add_string(&mut ctx, b"---a/");
ctx.update(&old_path_buf[..len1]);
patch_id_add_string(&mut ctx, b"+++b/");
ctx.update(&new_path_buf[..len2]);
}
let diff = TextDiff::from_lines(old_str, new_str);
for change in diff.iter_all_changes() {
let prefix = match change.tag() {
ChangeTag::Equal => b' ',
ChangeTag::Delete => b'-',
ChangeTag::Insert => b'+',
};
let text = change.as_str().unwrap_or("");
for piece in text.split_inclusive('\n') {
let line_body = piece.strip_suffix('\n').unwrap_or(piece);
let mut line_buf = Vec::with_capacity(1 + line_body.len() + 1);
line_buf.push(prefix);
line_buf.extend_from_slice(line_body.as_bytes());
line_buf.push(b'\n');
let n = remove_space_bytes(&mut line_buf);
ctx.update(&line_buf[..n]);
}
}
}
text_flush_one_hunk(&mut result, &mut ctx);
}
ObjectId::from_bytes(&result).map(Some)
}
fn parse_mode_u32(mode: &str) -> u32 {
u32::from_str_radix(mode.trim(), 8).unwrap_or(0)
}
fn patch_id_add_string(ctx: &mut Sha1, s: &[u8]) {
ctx.update(s);
}
fn patch_id_add_mode(ctx: &mut Sha1, mode: u32) {
let text = format!("{mode:06o}");
ctx.update(text.as_bytes());
}
fn remove_space_bytes(buf: &mut Vec<u8>) -> usize {
let mut dst = 0usize;
for i in 0..buf.len() {
let c = buf[i];
if !c.is_ascii_whitespace() {
buf[dst] = c;
dst += 1;
}
}
dst
}
fn read_blob(odb: &Odb, oid: &ObjectId) -> Result<Vec<u8>> {
if *oid == zero_oid() {
return Ok(Vec::new());
}
let obj = odb.read(oid)?;
Ok(obj.data)
}