1use sha1::{Digest, Sha1};
18use similar::{ChangeTag, TextDiff};
19
20use crate::diff::{diff_trees, zero_oid, DiffEntry};
21use crate::error::Result;
22use crate::merge_file;
23use crate::objects::{parse_commit, ObjectId, ObjectKind};
24use crate::odb::Odb;
25
26#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28pub enum PatchIdMode {
29 Unstable,
32 Stable,
35 Verbatim,
38}
39
40pub fn compute_patch_ids_from_text(input: &[u8], mode: PatchIdMode) -> Vec<(ObjectId, ObjectId)> {
57 let stable = mode != PatchIdMode::Unstable;
58 let verbatim = mode == PatchIdMode::Verbatim;
59
60 let mut results: Vec<(ObjectId, ObjectId)> = Vec::new();
61
62 let mut ctx = Sha1::new();
64 let mut result = [0u8; 20];
65 let mut patchlen: usize = 0;
66 let mut before: i32 = -1;
68 let mut after: i32 = -1;
69 let mut diff_is_binary = false;
70 let mut pre_oid_str = String::new();
71 let mut post_oid_str = String::new();
72 let mut current_commit: Option<ObjectId> = None;
73 let mut implicit_commit = true;
76
77 let lines = split_lines_with_nl(input);
80
81 let mut i = 0;
82 while i < lines.len() {
83 let raw = lines[i];
84 i += 1;
85
86 let line = std::str::from_utf8(raw).unwrap_or("");
88
89 let oid_candidate: Option<&str> = if let Some(rest) = line.strip_prefix("commit ") {
91 Some(rest)
92 } else if let Some(rest) = line.strip_prefix("From ") {
93 Some(rest)
94 } else {
95 None
96 };
97
98 if let Some(candidate) = oid_candidate {
99 if let Some(oid) = try_parse_oid_prefix(candidate) {
100 text_flush_one_hunk(&mut result, &mut ctx);
102 if patchlen > 0 {
103 if let Some(coid) = current_commit.take() {
104 if let Ok(pid) = ObjectId::from_bytes(&result) {
105 results.push((pid, coid));
106 }
107 }
108 }
109 result = [0u8; 20];
111 ctx = Sha1::new();
112 patchlen = 0;
113 before = -1;
114 after = -1;
115 diff_is_binary = false;
116 pre_oid_str.clear();
117 post_oid_str.clear();
118 current_commit = Some(oid);
119 implicit_commit = false;
120 continue;
121 }
122 }
123
124 if line.starts_with("\\ ") && line.len() > 12 {
126 if verbatim {
127 ctx.update(raw);
128 patchlen += raw.len();
129 }
130 continue;
131 }
132
133 if patchlen == 0 && !line.starts_with("diff ") {
135 continue;
136 }
137
138 if implicit_commit && line.starts_with("diff ") && current_commit.is_none() {
141 current_commit = Some(ObjectId::zero());
142 implicit_commit = false;
143 }
144
145 if before == -1 {
147 if line.starts_with("GIT binary patch") || line.starts_with("Binary files") {
148 diff_is_binary = true;
150 before = 0;
151 let pre = pre_oid_str.clone();
152 let post = post_oid_str.clone();
153 ctx.update(pre.as_bytes());
154 ctx.update(post.as_bytes());
155 patchlen += pre.len() + post.len();
156 if stable {
157 text_flush_one_hunk(&mut result, &mut ctx);
158 }
159 continue;
160 } else if let Some(rest) = line.strip_prefix("index ") {
161 if let Some(dd) = rest.find("..") {
163 pre_oid_str = rest[..dd].to_owned();
164 let tail = &rest[dd + 2..];
165 let end = tail
166 .find(|c: char| c.is_ascii_whitespace())
167 .unwrap_or_else(|| {
168 tail.trim_end_matches('\n').trim_end_matches('\r').len()
169 });
170 post_oid_str = tail[..end].to_owned();
171 }
172 continue;
173 } else if line.starts_with("--- ") {
174 before = 1;
175 after = 1;
176 } else if !line.chars().next().is_some_and(|c| c.is_ascii_alphabetic()) {
178 text_flush_one_hunk(&mut result, &mut ctx);
181 if patchlen > 0 {
182 if let Some(coid) = current_commit.take() {
183 if let Ok(pid) = ObjectId::from_bytes(&result) {
184 results.push((pid, coid));
185 }
186 }
187 }
188 result = [0u8; 20];
189 ctx = Sha1::new();
190 patchlen = 0;
191 before = -1;
192 after = -1;
193 diff_is_binary = false;
194 continue;
195 }
196 }
197
198 if diff_is_binary {
200 if line.starts_with("diff ") {
201 diff_is_binary = false;
202 before = -1;
203 i -= 1; }
206 continue;
207 }
208
209 if before == 0 && after == 0 {
211 if line.starts_with("@@ -") {
212 let (b, a) = scan_hunk_header(line);
213 before = b;
214 after = a;
215 continue;
216 }
217 if !line.starts_with("diff ") {
218 continue;
221 }
222 if stable {
224 text_flush_one_hunk(&mut result, &mut ctx);
225 }
226 before = -1;
227 after = -1;
228 i -= 1;
230 continue;
231 }
232
233 let first = raw.first().copied().unwrap_or(b' ');
235 if first == b'-' || first == b' ' {
236 before -= 1;
237 }
238 if first == b'+' || first == b' ' {
239 after -= 1;
240 }
241
242 let hashed = if verbatim {
244 ctx.update(raw);
245 raw.len()
246 } else {
247 hash_without_whitespace(&mut ctx, raw)
248 };
249 patchlen += hashed;
250 }
251
252 text_flush_one_hunk(&mut result, &mut ctx);
254 if patchlen > 0 {
255 if let Some(coid) = current_commit {
256 if let Ok(pid) = ObjectId::from_bytes(&result) {
257 results.push((pid, coid));
258 }
259 }
260 }
261
262 results
263}
264
265fn text_flush_one_hunk(result: &mut [u8; 20], ctx: &mut Sha1) {
268 let old = std::mem::replace(ctx, Sha1::new());
269 let hash: [u8; 20] = old.finalize().into();
270 let mut carry: u16 = 0;
271 for i in 0..20 {
272 carry = carry + result[i] as u16 + hash[i] as u16;
273 result[i] = carry as u8;
274 carry >>= 8;
275 }
276}
277
278fn hash_without_whitespace(ctx: &mut Sha1, raw: &[u8]) -> usize {
282 let mut count = 0;
283 for &b in raw {
284 if !b.is_ascii_whitespace() {
285 ctx.update([b]);
286 count += 1;
287 }
288 }
289 count
290}
291
292fn try_parse_oid_prefix(s: &str) -> Option<ObjectId> {
297 let s = s.trim_end_matches('\n').trim_end_matches('\r');
298 if s.len() < 40 {
299 return None;
300 }
301 let hex = &s[..40];
302 if !hex.bytes().all(|b| b.is_ascii_hexdigit()) {
303 return None;
304 }
305 if s.len() > 40 && !s.as_bytes()[40].is_ascii_whitespace() {
307 return None;
308 }
309 let mut bytes = [0u8; 20];
310 for (i, chunk) in hex.as_bytes().chunks(2).enumerate() {
311 let hi = hex_val(chunk[0])?;
312 let lo = hex_val(chunk[1])?;
313 bytes[i] = (hi << 4) | lo;
314 }
315 ObjectId::from_bytes(&bytes).ok()
316}
317
318fn hex_val(b: u8) -> Option<u8> {
320 match b {
321 b'0'..=b'9' => Some(b - b'0'),
322 b'a'..=b'f' => Some(b - b'a' + 10),
323 b'A'..=b'F' => Some(b - b'A' + 10),
324 _ => None,
325 }
326}
327
328fn scan_hunk_header(line: &str) -> (i32, i32) {
332 let rest = match line.strip_prefix("@@ -") {
334 Some(r) => r,
335 None => return (1, 1),
336 };
337 let before = parse_hunk_count(rest);
339 let after = rest
341 .find(" +")
342 .and_then(|p| parse_hunk_count_opt(&rest[p + 2..]))
343 .unwrap_or(1);
344 (before, after)
345}
346
347fn parse_hunk_count(s: &str) -> i32 {
349 let after_start = s.trim_start_matches(|c: char| c.is_ascii_digit());
351 if let Some(rest) = after_start.strip_prefix(',') {
352 rest.split(|c: char| !c.is_ascii_digit())
353 .next()
354 .and_then(|n| n.parse().ok())
355 .unwrap_or(1)
356 } else {
357 1
358 }
359}
360
361fn parse_hunk_count_opt(s: &str) -> Option<i32> {
363 Some(parse_hunk_count(s))
364}
365
366fn split_lines_with_nl(input: &[u8]) -> Vec<&[u8]> {
371 let mut lines = Vec::new();
372 let mut start = 0;
373 for (i, &b) in input.iter().enumerate() {
374 if b == b'\n' {
375 lines.push(&input[start..=i]);
376 start = i + 1;
377 }
378 }
379 if start < input.len() {
380 lines.push(&input[start..]);
381 }
382 lines
383}
384
385pub fn compute_patch_id(odb: &Odb, commit_oid: &ObjectId) -> Result<Option<ObjectId>> {
400 compute_patch_id_filtered(odb, commit_oid, &[])
401}
402
403pub fn compute_patch_id_for_paths(
418 odb: &Odb,
419 commit_oid: &ObjectId,
420 paths: &[String],
421) -> Result<Option<ObjectId>> {
422 compute_patch_id_filtered(odb, commit_oid, paths)
423}
424
425fn compute_patch_id_filtered(
426 odb: &Odb,
427 commit_oid: &ObjectId,
428 paths: &[String],
429) -> Result<Option<ObjectId>> {
430 let obj = odb.read(commit_oid)?;
431 if obj.kind != ObjectKind::Commit {
432 return Ok(None);
433 }
434 let commit = parse_commit(&obj.data)?;
435
436 if commit.parents.len() > 1 {
438 return Ok(None);
439 }
440
441 let parent_tree_oid = if commit.parents.is_empty() {
443 None
444 } else {
445 let parent_obj = odb.read(&commit.parents[0])?;
446 let parent_commit = parse_commit(&parent_obj.data)?;
447 Some(parent_commit.tree)
448 };
449
450 let mut diffs = diff_trees(odb, parent_tree_oid.as_ref(), Some(&commit.tree), "")?;
452 if !paths.is_empty() {
453 diffs.retain(|entry| diff_entry_matches_paths(entry, paths));
454 }
455
456 diffs.sort_by(|a, b| a.path().cmp(b.path()));
458
459 let mut result = [0u8; 20];
460
461 for entry in &diffs {
462 let old_path = entry
466 .old_path
467 .as_deref()
468 .or(entry.new_path.as_deref())
469 .unwrap_or("");
470 let new_path = entry
471 .new_path
472 .as_deref()
473 .or(entry.old_path.as_deref())
474 .unwrap_or("");
475 let mut old_path_buf = old_path.as_bytes().to_vec();
476 let mut new_path_buf = new_path.as_bytes().to_vec();
477 let len1 = remove_space_bytes(&mut old_path_buf);
478 let len2 = remove_space_bytes(&mut new_path_buf);
479
480 let old_mode = parse_mode_u32(&entry.old_mode);
481 let new_mode = parse_mode_u32(&entry.new_mode);
482
483 let mut ctx = Sha1::new();
484 patch_id_add_string(&mut ctx, b"diff--git");
485 patch_id_add_string(&mut ctx, b"a/");
486 ctx.update(&old_path_buf[..len1]);
487 patch_id_add_string(&mut ctx, b"b/");
488 ctx.update(&new_path_buf[..len2]);
489
490 if old_mode == 0 {
491 patch_id_add_string(&mut ctx, b"newfilemode");
492 patch_id_add_mode(&mut ctx, new_mode);
493 } else if new_mode == 0 {
494 patch_id_add_string(&mut ctx, b"deletedfilemode");
495 patch_id_add_mode(&mut ctx, old_mode);
496 } else if old_mode != new_mode {
497 patch_id_add_string(&mut ctx, b"oldmode");
498 patch_id_add_mode(&mut ctx, old_mode);
499 patch_id_add_string(&mut ctx, b"newmode");
500 patch_id_add_mode(&mut ctx, new_mode);
501 }
502
503 let old_bytes = read_blob(odb, &entry.old_oid)?;
504 let new_bytes = read_blob(odb, &entry.new_oid)?;
505
506 if merge_file::is_binary(&old_bytes) || merge_file::is_binary(&new_bytes) {
507 let a = entry.old_oid.to_hex();
508 let b = entry.new_oid.to_hex();
509 ctx.update(a.as_bytes());
510 ctx.update(b.as_bytes());
511 } else {
512 let old_str = std::str::from_utf8(&old_bytes).unwrap_or("");
513 let new_str = std::str::from_utf8(&new_bytes).unwrap_or("");
514
515 if old_mode == 0 {
516 patch_id_add_string(&mut ctx, b"---/dev/null");
517 patch_id_add_string(&mut ctx, b"+++b/");
518 ctx.update(&new_path_buf[..len2]);
519 } else if new_mode == 0 {
520 patch_id_add_string(&mut ctx, b"---a/");
521 ctx.update(&old_path_buf[..len1]);
522 patch_id_add_string(&mut ctx, b"+++/dev/null");
523 } else {
524 patch_id_add_string(&mut ctx, b"---a/");
525 ctx.update(&old_path_buf[..len1]);
526 patch_id_add_string(&mut ctx, b"+++b/");
527 ctx.update(&new_path_buf[..len2]);
528 }
529
530 let diff = TextDiff::from_lines(old_str, new_str);
531 for change in diff.iter_all_changes() {
532 let prefix = match change.tag() {
533 ChangeTag::Equal => b' ',
534 ChangeTag::Delete => b'-',
535 ChangeTag::Insert => b'+',
536 };
537 let text = change.as_str().unwrap_or("");
538 for piece in text.split_inclusive('\n') {
539 let line_body = piece.strip_suffix('\n').unwrap_or(piece);
540 let mut line_buf = Vec::with_capacity(1 + line_body.len() + 1);
541 line_buf.push(prefix);
542 line_buf.extend_from_slice(line_body.as_bytes());
543 line_buf.push(b'\n');
544 let n = remove_space_bytes(&mut line_buf);
545 ctx.update(&line_buf[..n]);
546 }
547 }
548 }
549
550 text_flush_one_hunk(&mut result, &mut ctx);
551 }
552
553 ObjectId::from_bytes(&result).map(Some)
554}
555
556fn diff_entry_matches_paths(entry: &DiffEntry, paths: &[String]) -> bool {
557 entry
558 .old_path
559 .as_deref()
560 .into_iter()
561 .chain(entry.new_path.as_deref())
562 .any(|path| crate::pathspec::matches_pathspec_list(path, paths))
563}
564
565fn parse_mode_u32(mode: &str) -> u32 {
566 u32::from_str_radix(mode.trim(), 8).unwrap_or(0)
567}
568
569fn patch_id_add_string(ctx: &mut Sha1, s: &[u8]) {
570 ctx.update(s);
571}
572
573fn patch_id_add_mode(ctx: &mut Sha1, mode: u32) {
574 let text = format!("{mode:06o}");
575 ctx.update(text.as_bytes());
576}
577
578fn remove_space_bytes(buf: &mut Vec<u8>) -> usize {
580 let mut dst = 0usize;
581 for i in 0..buf.len() {
582 let c = buf[i];
583 if !c.is_ascii_whitespace() {
584 buf[dst] = c;
585 dst += 1;
586 }
587 }
588 dst
589}
590
591fn read_blob(odb: &Odb, oid: &ObjectId) -> Result<Vec<u8>> {
595 if *oid == zero_oid() {
596 return Ok(Vec::new());
597 }
598 let obj = odb.read(oid)?;
599 Ok(obj.data)
600}