use sha1::{Digest, Sha1};
use similar::{ChangeTag, TextDiff};
use crate::diff::{diff_trees, zero_oid};
use crate::error::Result;
use crate::objects::{parse_commit, ObjectId, ObjectKind};
use crate::odb::Odb;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PatchIdMode {
Unstable,
Stable,
Verbatim,
}
pub fn compute_patch_ids_from_text(input: &[u8], mode: PatchIdMode) -> Vec<(ObjectId, ObjectId)> {
let stable = mode != PatchIdMode::Unstable;
let verbatim = mode == PatchIdMode::Verbatim;
let mut results: Vec<(ObjectId, ObjectId)> = Vec::new();
let mut ctx = Sha1::new();
let mut result = [0u8; 20];
let mut patchlen: usize = 0;
let mut before: i32 = -1;
let mut after: i32 = -1;
let mut diff_is_binary = false;
let mut pre_oid_str = String::new();
let mut post_oid_str = String::new();
let mut current_commit: Option<ObjectId> = None;
let lines = split_lines_with_nl(input);
let mut i = 0;
while i < lines.len() {
let raw = lines[i];
i += 1;
let line = std::str::from_utf8(raw).unwrap_or("");
let oid_candidate: Option<&str> = if let Some(rest) = line.strip_prefix("commit ") {
Some(rest)
} else if let Some(rest) = line.strip_prefix("From ") {
Some(rest)
} else {
None
};
if let Some(candidate) = oid_candidate {
if let Some(oid) = try_parse_oid_prefix(candidate) {
text_flush_one_hunk(&mut result, &mut ctx);
if patchlen > 0 {
if let Some(coid) = current_commit.take() {
if let Ok(pid) = ObjectId::from_bytes(&result) {
results.push((pid, coid));
}
}
}
result = [0u8; 20];
ctx = Sha1::new();
patchlen = 0;
before = -1;
after = -1;
diff_is_binary = false;
pre_oid_str.clear();
post_oid_str.clear();
current_commit = Some(oid);
continue;
}
}
if line.starts_with("\\ ") && line.len() > 12 {
if verbatim {
ctx.update(raw);
patchlen += raw.len();
}
continue;
}
if patchlen == 0 && !line.starts_with("diff ") {
continue;
}
if before == -1 {
if line.starts_with("GIT binary patch") || line.starts_with("Binary files") {
diff_is_binary = true;
before = 0;
let pre = pre_oid_str.clone();
let post = post_oid_str.clone();
ctx.update(pre.as_bytes());
ctx.update(post.as_bytes());
patchlen += pre.len() + post.len();
if stable {
text_flush_one_hunk(&mut result, &mut ctx);
}
continue;
} else if let Some(rest) = line.strip_prefix("index ") {
if let Some(dd) = rest.find("..") {
pre_oid_str = rest[..dd].to_owned();
let tail = &rest[dd + 2..];
let end = tail
.find(|c: char| c.is_ascii_whitespace())
.unwrap_or_else(|| {
tail.trim_end_matches('\n').trim_end_matches('\r').len()
});
post_oid_str = tail[..end].to_owned();
}
continue;
} else if line.starts_with("--- ") {
before = 1;
after = 1;
} else if !line
.chars()
.next()
.is_some_and(|c| c.is_ascii_alphabetic())
{
text_flush_one_hunk(&mut result, &mut ctx);
if patchlen > 0 {
if let Some(coid) = current_commit.take() {
if let Ok(pid) = ObjectId::from_bytes(&result) {
results.push((pid, coid));
}
}
}
result = [0u8; 20];
ctx = Sha1::new();
patchlen = 0;
before = -1;
after = -1;
diff_is_binary = false;
continue;
}
}
if diff_is_binary {
if line.starts_with("diff ") {
diff_is_binary = false;
before = -1;
i -= 1; }
continue;
}
if before == 0 && after == 0 {
if line.starts_with("@@ -") {
let (b, a) = scan_hunk_header(line);
before = b;
after = a;
continue;
}
if !line.starts_with("diff ") {
continue;
}
if stable {
text_flush_one_hunk(&mut result, &mut ctx);
}
before = -1;
after = -1;
i -= 1;
continue;
}
let first = raw.first().copied().unwrap_or(b' ');
if first == b'-' || first == b' ' {
before -= 1;
}
if first == b'+' || first == b' ' {
after -= 1;
}
let hashed = if verbatim {
ctx.update(raw);
raw.len()
} else {
hash_without_whitespace(&mut ctx, raw)
};
patchlen += hashed;
}
text_flush_one_hunk(&mut result, &mut ctx);
if patchlen > 0 {
if let Some(coid) = current_commit {
if let Ok(pid) = ObjectId::from_bytes(&result) {
results.push((pid, coid));
}
}
}
results
}
fn text_flush_one_hunk(result: &mut [u8; 20], ctx: &mut Sha1) {
let old = std::mem::replace(ctx, Sha1::new());
let hash: [u8; 20] = old.finalize().into();
let mut carry: u16 = 0;
for i in 0..20 {
carry = carry + result[i] as u16 + hash[i] as u16;
result[i] = carry as u8;
carry >>= 8;
}
}
fn hash_without_whitespace(ctx: &mut Sha1, raw: &[u8]) -> usize {
let mut count = 0;
for &b in raw {
if !b.is_ascii_whitespace() {
ctx.update([b]);
count += 1;
}
}
count
}
fn try_parse_oid_prefix(s: &str) -> Option<ObjectId> {
let s = s.trim_end_matches('\n').trim_end_matches('\r');
if s.len() < 40 {
return None;
}
let hex = &s[..40];
if !hex.bytes().all(|b| b.is_ascii_hexdigit()) {
return None;
}
if s.len() > 40 && !s.as_bytes()[40].is_ascii_whitespace() {
return None;
}
let mut bytes = [0u8; 20];
for (i, chunk) in hex.as_bytes().chunks(2).enumerate() {
let hi = hex_val(chunk[0])?;
let lo = hex_val(chunk[1])?;
bytes[i] = (hi << 4) | lo;
}
ObjectId::from_bytes(&bytes).ok()
}
fn hex_val(b: u8) -> Option<u8> {
match b {
b'0'..=b'9' => Some(b - b'0'),
b'a'..=b'f' => Some(b - b'a' + 10),
b'A'..=b'F' => Some(b - b'A' + 10),
_ => None,
}
}
fn scan_hunk_header(line: &str) -> (i32, i32) {
let rest = match line.strip_prefix("@@ -") {
Some(r) => r,
None => return (1, 1),
};
let before = parse_hunk_count(rest);
let after = rest
.find(" +")
.and_then(|p| parse_hunk_count_opt(&rest[p + 2..]))
.unwrap_or(1);
(before, after)
}
fn parse_hunk_count(s: &str) -> i32 {
let after_start = s.trim_start_matches(|c: char| c.is_ascii_digit());
if let Some(rest) = after_start.strip_prefix(',') {
rest.split(|c: char| !c.is_ascii_digit())
.next()
.and_then(|n| n.parse().ok())
.unwrap_or(1)
} else {
1
}
}
fn parse_hunk_count_opt(s: &str) -> Option<i32> {
Some(parse_hunk_count(s))
}
fn split_lines_with_nl(input: &[u8]) -> Vec<&[u8]> {
let mut lines = Vec::new();
let mut start = 0;
for (i, &b) in input.iter().enumerate() {
if b == b'\n' {
lines.push(&input[start..=i]);
start = i + 1;
}
}
if start < input.len() {
lines.push(&input[start..]);
}
lines
}
pub fn compute_patch_id(odb: &Odb, commit_oid: &ObjectId) -> Result<Option<ObjectId>> {
let obj = odb.read(commit_oid)?;
if obj.kind != ObjectKind::Commit {
return Ok(None);
}
let commit = parse_commit(&obj.data)?;
if commit.parents.len() > 1 {
return Ok(None);
}
let parent_tree_oid = if commit.parents.is_empty() {
None
} else {
let parent_obj = odb.read(&commit.parents[0])?;
let parent_commit = parse_commit(&parent_obj.data)?;
Some(parent_commit.tree)
};
let mut diffs = diff_trees(odb, parent_tree_oid.as_ref(), Some(&commit.tree), "")?;
diffs.sort_by(|a, b| a.path().cmp(b.path()));
let mut hasher = Sha1::new();
for entry in &diffs {
let src = entry
.old_path
.as_deref()
.or(entry.new_path.as_deref())
.unwrap_or("");
let dst = entry
.new_path
.as_deref()
.or(entry.old_path.as_deref())
.unwrap_or("");
let src_compact = compact_path(src);
let dst_compact = compact_path(dst);
let header = format!("diff --git a/{src_compact} b/{dst_compact}\n");
hasher.update(header.as_bytes());
let old_bytes = read_blob(odb, &entry.old_oid)?;
let new_bytes = read_blob(odb, &entry.new_oid)?;
let old_str = std::str::from_utf8(&old_bytes).unwrap_or("");
let new_str = std::str::from_utf8(&new_bytes).unwrap_or("");
let diff = TextDiff::from_lines(old_str, new_str);
for change in diff.iter_all_changes() {
match change.tag() {
ChangeTag::Delete | ChangeTag::Insert => {
let line = change.as_str().unwrap_or("");
for &byte in line.as_bytes() {
if !byte.is_ascii_whitespace() {
hasher.update([byte]);
}
}
}
ChangeTag::Equal => {}
}
}
}
let digest = hasher.finalize();
ObjectId::from_bytes(&digest).map(Some)
}
fn compact_path(path: &str) -> String {
path.bytes()
.filter(|b| !b.is_ascii_whitespace())
.map(|b| b as char)
.collect()
}
fn read_blob(odb: &Odb, oid: &ObjectId) -> Result<Vec<u8>> {
if *oid == zero_oid() {
return Ok(Vec::new());
}
let obj = odb.read(oid)?;
Ok(obj.data)
}