1use sha1::{Digest, Sha1};
18use similar::{ChangeTag, TextDiff};
19
20use crate::diff::{diff_trees, zero_oid};
21use crate::error::Result;
22use crate::merge_file;
23use crate::objects::{parse_commit, ObjectId, ObjectKind};
24use crate::odb::Odb;
25
26#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28pub enum PatchIdMode {
29 Unstable,
32 Stable,
35 Verbatim,
38}
39
40pub fn compute_patch_ids_from_text(input: &[u8], mode: PatchIdMode) -> Vec<(ObjectId, ObjectId)> {
57 let stable = mode != PatchIdMode::Unstable;
58 let verbatim = mode == PatchIdMode::Verbatim;
59
60 let mut results: Vec<(ObjectId, ObjectId)> = Vec::new();
61
62 let mut ctx = Sha1::new();
64 let mut result = [0u8; 20];
65 let mut patchlen: usize = 0;
66 let mut before: i32 = -1;
68 let mut after: i32 = -1;
69 let mut diff_is_binary = false;
70 let mut pre_oid_str = String::new();
71 let mut post_oid_str = String::new();
72 let mut current_commit: Option<ObjectId> = None;
73 let mut implicit_commit = true;
76
77 let lines = split_lines_with_nl(input);
80
81 let mut i = 0;
82 while i < lines.len() {
83 let raw = lines[i];
84 i += 1;
85
86 let line = std::str::from_utf8(raw).unwrap_or("");
88
89 let oid_candidate: Option<&str> = if let Some(rest) = line.strip_prefix("commit ") {
91 Some(rest)
92 } else if let Some(rest) = line.strip_prefix("From ") {
93 Some(rest)
94 } else {
95 None
96 };
97
98 if let Some(candidate) = oid_candidate {
99 if let Some(oid) = try_parse_oid_prefix(candidate) {
100 text_flush_one_hunk(&mut result, &mut ctx);
102 if patchlen > 0 {
103 if let Some(coid) = current_commit.take() {
104 if let Ok(pid) = ObjectId::from_bytes(&result) {
105 results.push((pid, coid));
106 }
107 }
108 }
109 result = [0u8; 20];
111 ctx = Sha1::new();
112 patchlen = 0;
113 before = -1;
114 after = -1;
115 diff_is_binary = false;
116 pre_oid_str.clear();
117 post_oid_str.clear();
118 current_commit = Some(oid);
119 implicit_commit = false;
120 continue;
121 }
122 }
123
124 if line.starts_with("\\ ") && line.len() > 12 {
126 if verbatim {
127 ctx.update(raw);
128 patchlen += raw.len();
129 }
130 continue;
131 }
132
133 if patchlen == 0 && !line.starts_with("diff ") {
135 continue;
136 }
137
138 if implicit_commit && line.starts_with("diff ") && current_commit.is_none() {
141 current_commit = Some(ObjectId::from_bytes(&[0u8; 20]).unwrap());
142 implicit_commit = false;
143 }
144
145 if before == -1 {
147 if line.starts_with("GIT binary patch") || line.starts_with("Binary files") {
148 diff_is_binary = true;
150 before = 0;
151 let pre = pre_oid_str.clone();
152 let post = post_oid_str.clone();
153 ctx.update(pre.as_bytes());
154 ctx.update(post.as_bytes());
155 patchlen += pre.len() + post.len();
156 if stable {
157 text_flush_one_hunk(&mut result, &mut ctx);
158 }
159 continue;
160 } else if let Some(rest) = line.strip_prefix("index ") {
161 if let Some(dd) = rest.find("..") {
163 pre_oid_str = rest[..dd].to_owned();
164 let tail = &rest[dd + 2..];
165 let end = tail
166 .find(|c: char| c.is_ascii_whitespace())
167 .unwrap_or_else(|| {
168 tail.trim_end_matches('\n').trim_end_matches('\r').len()
169 });
170 post_oid_str = tail[..end].to_owned();
171 }
172 continue;
173 } else if line.starts_with("--- ") {
174 before = 1;
175 after = 1;
176 } else if !line.chars().next().is_some_and(|c| c.is_ascii_alphabetic()) {
178 text_flush_one_hunk(&mut result, &mut ctx);
181 if patchlen > 0 {
182 if let Some(coid) = current_commit.take() {
183 if let Ok(pid) = ObjectId::from_bytes(&result) {
184 results.push((pid, coid));
185 }
186 }
187 }
188 result = [0u8; 20];
189 ctx = Sha1::new();
190 patchlen = 0;
191 before = -1;
192 after = -1;
193 diff_is_binary = false;
194 continue;
195 }
196 }
197
198 if diff_is_binary {
200 if line.starts_with("diff ") {
201 diff_is_binary = false;
202 before = -1;
203 i -= 1; }
206 continue;
207 }
208
209 if before == 0 && after == 0 {
211 if line.starts_with("@@ -") {
212 let (b, a) = scan_hunk_header(line);
213 before = b;
214 after = a;
215 continue;
216 }
217 if !line.starts_with("diff ") {
218 continue;
221 }
222 if stable {
224 text_flush_one_hunk(&mut result, &mut ctx);
225 }
226 before = -1;
227 after = -1;
228 i -= 1;
230 continue;
231 }
232
233 let first = raw.first().copied().unwrap_or(b' ');
235 if first == b'-' || first == b' ' {
236 before -= 1;
237 }
238 if first == b'+' || first == b' ' {
239 after -= 1;
240 }
241
242 let hashed = if verbatim {
244 ctx.update(raw);
245 raw.len()
246 } else {
247 hash_without_whitespace(&mut ctx, raw)
248 };
249 patchlen += hashed;
250 }
251
252 text_flush_one_hunk(&mut result, &mut ctx);
254 if patchlen > 0 {
255 if let Some(coid) = current_commit {
256 if let Ok(pid) = ObjectId::from_bytes(&result) {
257 results.push((pid, coid));
258 }
259 }
260 }
261
262 results
263}
264
265fn text_flush_one_hunk(result: &mut [u8; 20], ctx: &mut Sha1) {
268 let old = std::mem::replace(ctx, Sha1::new());
269 let hash: [u8; 20] = old.finalize().into();
270 let mut carry: u16 = 0;
271 for i in 0..20 {
272 carry = carry + result[i] as u16 + hash[i] as u16;
273 result[i] = carry as u8;
274 carry >>= 8;
275 }
276}
277
278fn hash_without_whitespace(ctx: &mut Sha1, raw: &[u8]) -> usize {
282 let mut count = 0;
283 for &b in raw {
284 if !b.is_ascii_whitespace() {
285 ctx.update([b]);
286 count += 1;
287 }
288 }
289 count
290}
291
292fn try_parse_oid_prefix(s: &str) -> Option<ObjectId> {
297 let s = s.trim_end_matches('\n').trim_end_matches('\r');
298 if s.len() < 40 {
299 return None;
300 }
301 let hex = &s[..40];
302 if !hex.bytes().all(|b| b.is_ascii_hexdigit()) {
303 return None;
304 }
305 if s.len() > 40 && !s.as_bytes()[40].is_ascii_whitespace() {
307 return None;
308 }
309 let mut bytes = [0u8; 20];
310 for (i, chunk) in hex.as_bytes().chunks(2).enumerate() {
311 let hi = hex_val(chunk[0])?;
312 let lo = hex_val(chunk[1])?;
313 bytes[i] = (hi << 4) | lo;
314 }
315 ObjectId::from_bytes(&bytes).ok()
316}
317
318fn hex_val(b: u8) -> Option<u8> {
320 match b {
321 b'0'..=b'9' => Some(b - b'0'),
322 b'a'..=b'f' => Some(b - b'a' + 10),
323 b'A'..=b'F' => Some(b - b'A' + 10),
324 _ => None,
325 }
326}
327
328fn scan_hunk_header(line: &str) -> (i32, i32) {
332 let rest = match line.strip_prefix("@@ -") {
334 Some(r) => r,
335 None => return (1, 1),
336 };
337 let before = parse_hunk_count(rest);
339 let after = rest
341 .find(" +")
342 .and_then(|p| parse_hunk_count_opt(&rest[p + 2..]))
343 .unwrap_or(1);
344 (before, after)
345}
346
347fn parse_hunk_count(s: &str) -> i32 {
349 let after_start = s.trim_start_matches(|c: char| c.is_ascii_digit());
351 if let Some(rest) = after_start.strip_prefix(',') {
352 rest.split(|c: char| !c.is_ascii_digit())
353 .next()
354 .and_then(|n| n.parse().ok())
355 .unwrap_or(1)
356 } else {
357 1
358 }
359}
360
361fn parse_hunk_count_opt(s: &str) -> Option<i32> {
363 Some(parse_hunk_count(s))
364}
365
366fn split_lines_with_nl(input: &[u8]) -> Vec<&[u8]> {
371 let mut lines = Vec::new();
372 let mut start = 0;
373 for (i, &b) in input.iter().enumerate() {
374 if b == b'\n' {
375 lines.push(&input[start..=i]);
376 start = i + 1;
377 }
378 }
379 if start < input.len() {
380 lines.push(&input[start..]);
381 }
382 lines
383}
384
385pub fn compute_patch_id(odb: &Odb, commit_oid: &ObjectId) -> Result<Option<ObjectId>> {
400 let obj = odb.read(commit_oid)?;
401 if obj.kind != ObjectKind::Commit {
402 return Ok(None);
403 }
404 let commit = parse_commit(&obj.data)?;
405
406 if commit.parents.len() > 1 {
408 return Ok(None);
409 }
410
411 let parent_tree_oid = if commit.parents.is_empty() {
413 None
414 } else {
415 let parent_obj = odb.read(&commit.parents[0])?;
416 let parent_commit = parse_commit(&parent_obj.data)?;
417 Some(parent_commit.tree)
418 };
419
420 let mut diffs = diff_trees(odb, parent_tree_oid.as_ref(), Some(&commit.tree), "")?;
422
423 diffs.sort_by(|a, b| a.path().cmp(b.path()));
425
426 let mut result = [0u8; 20];
427
428 for entry in &diffs {
429 let old_path = entry.old_path.as_deref().unwrap_or("");
430 let new_path = entry.new_path.as_deref().unwrap_or("");
431 let mut old_path_buf = old_path.as_bytes().to_vec();
432 let mut new_path_buf = new_path.as_bytes().to_vec();
433 let len1 = remove_space_bytes(&mut old_path_buf);
434 let len2 = remove_space_bytes(&mut new_path_buf);
435
436 let old_mode = parse_mode_u32(&entry.old_mode);
437 let new_mode = parse_mode_u32(&entry.new_mode);
438
439 let mut ctx = Sha1::new();
440 patch_id_add_string(&mut ctx, b"diff--git");
441 patch_id_add_string(&mut ctx, b"a/");
442 ctx.update(&old_path_buf[..len1]);
443 patch_id_add_string(&mut ctx, b"b/");
444 ctx.update(&new_path_buf[..len2]);
445
446 if old_mode == 0 {
447 patch_id_add_string(&mut ctx, b"newfilemode");
448 patch_id_add_mode(&mut ctx, new_mode);
449 } else if new_mode == 0 {
450 patch_id_add_string(&mut ctx, b"deletedfilemode");
451 patch_id_add_mode(&mut ctx, old_mode);
452 } else if old_mode != new_mode {
453 patch_id_add_string(&mut ctx, b"oldmode");
454 patch_id_add_mode(&mut ctx, old_mode);
455 patch_id_add_string(&mut ctx, b"newmode");
456 patch_id_add_mode(&mut ctx, new_mode);
457 }
458
459 let old_bytes = read_blob(odb, &entry.old_oid)?;
460 let new_bytes = read_blob(odb, &entry.new_oid)?;
461
462 if merge_file::is_binary(&old_bytes) || merge_file::is_binary(&new_bytes) {
463 let a = entry.old_oid.to_hex();
464 let b = entry.new_oid.to_hex();
465 ctx.update(a.as_bytes());
466 ctx.update(b.as_bytes());
467 } else {
468 let old_str = std::str::from_utf8(&old_bytes).unwrap_or("");
469 let new_str = std::str::from_utf8(&new_bytes).unwrap_or("");
470
471 if old_mode == 0 {
472 patch_id_add_string(&mut ctx, b"---/dev/null");
473 patch_id_add_string(&mut ctx, b"+++b/");
474 ctx.update(&new_path_buf[..len2]);
475 } else if new_mode == 0 {
476 patch_id_add_string(&mut ctx, b"---a/");
477 ctx.update(&old_path_buf[..len1]);
478 patch_id_add_string(&mut ctx, b"+++/dev/null");
479 } else {
480 patch_id_add_string(&mut ctx, b"---a/");
481 ctx.update(&old_path_buf[..len1]);
482 patch_id_add_string(&mut ctx, b"+++b/");
483 ctx.update(&new_path_buf[..len2]);
484 }
485
486 let diff = TextDiff::from_lines(old_str, new_str);
487 for change in diff.iter_all_changes() {
488 let prefix = match change.tag() {
489 ChangeTag::Equal => b' ',
490 ChangeTag::Delete => b'-',
491 ChangeTag::Insert => b'+',
492 };
493 let text = change.as_str().unwrap_or("");
494 for piece in text.split_inclusive('\n') {
495 let line_body = piece.strip_suffix('\n').unwrap_or(piece);
496 let mut line_buf = Vec::with_capacity(1 + line_body.len() + 1);
497 line_buf.push(prefix);
498 line_buf.extend_from_slice(line_body.as_bytes());
499 line_buf.push(b'\n');
500 let n = remove_space_bytes(&mut line_buf);
501 ctx.update(&line_buf[..n]);
502 }
503 }
504 }
505
506 text_flush_one_hunk(&mut result, &mut ctx);
507 }
508
509 ObjectId::from_bytes(&result).map(Some)
510}
511
512fn parse_mode_u32(mode: &str) -> u32 {
513 u32::from_str_radix(mode.trim(), 8).unwrap_or(0)
514}
515
516fn patch_id_add_string(ctx: &mut Sha1, s: &[u8]) {
517 ctx.update(s);
518}
519
520fn patch_id_add_mode(ctx: &mut Sha1, mode: u32) {
521 let text = format!("{mode:06o}");
522 ctx.update(text.as_bytes());
523}
524
525fn remove_space_bytes(buf: &mut Vec<u8>) -> usize {
527 let mut dst = 0usize;
528 for i in 0..buf.len() {
529 let c = buf[i];
530 if !c.is_ascii_whitespace() {
531 buf[dst] = c;
532 dst += 1;
533 }
534 }
535 dst
536}
537
538fn read_blob(odb: &Odb, oid: &ObjectId) -> Result<Vec<u8>> {
542 if *oid == zero_oid() {
543 return Ok(Vec::new());
544 }
545 let obj = odb.read(oid)?;
546 Ok(obj.data)
547}