1use crate::error::{Error, Result};
11use regex::Regex;
12use std::sync::OnceLock;
13
14#[derive(Debug, Clone)]
16pub struct Hunk {
17 pub old_start: usize,
19 pub old_count: usize,
21 pub new_start: usize,
23 pub new_count: usize,
25 pub first_body_line: usize,
27 pub lines: Vec<HunkLine>,
29}
30
31#[derive(Debug, Clone)]
32pub enum HunkLine {
33 Context(String),
34 Add(String),
35 Remove(String),
36 NoNewline,
38}
39
40#[derive(Debug, Clone)]
42pub struct FilePatch {
43 pub diff_old_path: Option<String>,
45 pub diff_new_path: Option<String>,
47 pub old_path: Option<String>,
49 pub new_path: Option<String>,
51 pub saw_old_header: bool,
53 pub saw_new_header: bool,
55 pub old_mode: Option<String>,
57 pub new_mode: Option<String>,
59 pub old_mode_line: Option<usize>,
61 pub new_mode_line: Option<usize>,
63 pub is_new: bool,
65 pub is_deleted: bool,
67 pub is_rename: bool,
69 pub is_copy: bool,
71 pub similarity_index: Option<u32>,
73 pub dissimilarity_index: Option<u32>,
75 pub old_oid: Option<String>,
77 pub new_oid: Option<String>,
79 pub binary_patch: Option<BinaryPatchPayload>,
81 pub is_binary: bool,
84 pub hunks: Vec<Hunk>,
86 pub ws_rule: u32,
88 pub is_toplevel_relative: bool,
91}
92
93#[derive(Debug, Clone)]
95pub struct BinaryPatchPayload {
96 pub forward_compressed: Vec<u8>,
97 pub forward_declared_size: usize,
98 pub reverse_compressed: Vec<u8>,
99 pub reverse_declared_size: usize,
100}
101
102impl FilePatch {
103 pub fn effective_path(&self) -> Option<&str> {
108 if self.is_deleted {
109 return self
110 .old_path
111 .as_deref()
112 .filter(|p| *p != "/dev/null")
113 .or(self.new_path.as_deref().filter(|p| *p != "/dev/null"));
114 }
115 if self.is_new {
116 return self
117 .new_path
118 .as_deref()
119 .filter(|p| *p != "/dev/null")
120 .or(self.old_path.as_deref().filter(|p| *p != "/dev/null"));
121 }
122 self.new_path
123 .as_deref()
124 .filter(|p| *p != "/dev/null")
125 .or(self.old_path.as_deref().filter(|p| *p != "/dev/null"))
126 }
127
128 pub fn source_path(&self) -> Option<&str> {
133 if self.is_rename || self.is_copy {
134 self.old_path
135 .as_deref()
136 .filter(|p| *p != "/dev/null")
137 .or(self.effective_path())
138 } else if let (Some(old), Some(new)) = (self.old_path.as_deref(), self.new_path.as_deref())
139 {
140 if old != "/dev/null" && new != "/dev/null" && old != new {
141 Some(old)
142 } else {
143 self.effective_path()
144 }
145 } else {
146 self.effective_path()
147 }
148 }
149
150 pub fn target_path(&self) -> Option<&str> {
155 if self.is_new || self.is_rename || self.is_copy {
156 self.new_path
157 .as_deref()
158 .filter(|p| *p != "/dev/null")
159 .or(self.effective_path())
160 } else {
161 self.effective_path()
162 }
163 }
164
165 pub fn involves_gitlink(&self) -> bool {
167 self.old_mode.as_deref() == Some("160000") || self.new_mode.as_deref() == Some("160000")
168 }
169
170 pub fn worktree_rel_operational(&self, adjusted: &str, setup_prefix: &str) -> String {
172 if self.is_toplevel_relative {
173 adjusted.to_string()
174 } else {
175 format!("{setup_prefix}{adjusted}")
176 }
177 }
178}
179
180fn sanitize_patch_header_value(s: &mut String) {
185 *s = s.trim().trim_end_matches('\r').to_string();
186}
187
188fn strip_git_diff_path_prefix(path: &str) -> String {
193 if path == "/dev/null" {
194 return path.to_string();
195 }
196 let p = path.trim_start_matches("./");
197 if let Some(rest) = p.strip_prefix("a/") {
198 return rest.to_string();
199 }
200 if let Some(rest) = p.strip_prefix("b/") {
201 return rest.to_string();
202 }
203 path.to_string()
204}
205
206fn sanitize_file_patch_headers(fp: &mut FilePatch) {
207 if let Some(ref mut s) = fp.old_mode {
208 sanitize_patch_header_value(s);
209 if s.is_empty() {
210 fp.old_mode = None;
211 }
212 }
213 if let Some(ref mut s) = fp.new_mode {
214 sanitize_patch_header_value(s);
215 if s.is_empty() {
216 fp.new_mode = None;
217 }
218 }
219 if let Some(ref mut s) = fp.old_oid {
220 sanitize_patch_header_value(s);
221 }
222 if let Some(ref mut s) = fp.new_oid {
223 sanitize_patch_header_value(s);
224 }
225 for ref mut s in [
226 &mut fp.diff_old_path,
227 &mut fp.diff_new_path,
228 &mut fp.old_path,
229 &mut fp.new_path,
230 ]
231 .into_iter()
232 .flatten()
233 {
234 sanitize_patch_header_value(s);
235 **s = strip_git_diff_path_prefix(s);
236 }
237}
238
239fn squash_slash_path(s: &str) -> String {
241 let mut out = String::with_capacity(s.len());
242 let mut prev_slash = false;
243 for ch in s.chars() {
244 if ch == '/' {
245 if !prev_slash {
246 out.push('/');
247 }
248 prev_slash = true;
249 } else {
250 prev_slash = false;
251 out.push(ch);
252 }
253 }
254 out
255}
256
257fn unquote_c_style_diff_prefix(line: &str) -> Option<(Vec<u8>, &str)> {
260 let b = line.as_bytes();
261 if b.first() != Some(&b'"') {
262 return None;
263 }
264 let mut q = &b[1..];
265 let mut out = Vec::new();
266 loop {
267 let len = q
268 .iter()
269 .position(|&c| c == b'"' || c == b'\\')
270 .unwrap_or(q.len());
271 out.extend_from_slice(&q[..len]);
272 q = &q[len..];
273 if q.is_empty() {
274 return None;
275 }
276 match q[0] {
277 b'"' => {
278 let rest = std::str::from_utf8(&q[1..]).ok()?;
279 return Some((out, rest));
280 }
281 b'\\' => {
282 q = &q[1..];
283 if q.is_empty() {
284 return None;
285 }
286 let ch = q[0];
287 q = &q[1..];
288 match ch {
289 b'a' => out.push(0x07),
290 b'b' => out.push(0x08),
291 b'f' => out.push(0x0c),
292 b'n' => out.push(b'\n'),
293 b'r' => out.push(b'\r'),
294 b't' => out.push(b'\t'),
295 b'v' => out.push(0x0b),
296 b'\\' => out.push(b'\\'),
297 b'"' => out.push(b'"'),
298 b'0'..=b'3' => {
299 if q.len() < 2 {
300 return None;
301 }
302 let ch2 = q[0];
303 let ch3 = q[1];
304 if !(b'0'..=b'7').contains(&ch2) || !(b'0'..=b'7').contains(&ch3) {
305 return None;
306 }
307 let ac = u32::from(ch - b'0') * 64
308 + u32::from(ch2 - b'0') * 8
309 + u32::from(ch3 - b'0');
310 out.push(ac as u8);
311 q = &q[2..];
312 }
313 _ => return None,
314 }
315 }
316 _ => return None,
317 }
318 }
319}
320
321fn bytes_to_path_string(bytes: &[u8]) -> Result<String> {
322 let s = String::from_utf8(bytes.to_vec())
323 .map_err(|e| Error::Message(format!("diff path is not valid UTF-8: {e}")))?;
324 Ok(squash_slash_path(&s))
325}
326
327fn skip_tree_prefix_bytes(line: &[u8], p_value: usize) -> Option<&[u8]> {
329 if p_value == 0 {
330 return Some(line);
331 }
332 let mut nslash = p_value;
333 let mut i = 0usize;
334 while i < line.len() {
335 if line[i] == b'/' {
336 nslash = nslash.saturating_sub(1);
337 if nslash == 0 {
338 return if i == 0 { None } else { Some(&line[i + 1..]) };
339 }
340 }
341 i += 1;
342 }
343 None
344}
345
346fn skip_tree_prefix_str(path: &str, p_value: usize) -> Option<String> {
348 let stripped = skip_tree_prefix_bytes(path.as_bytes(), p_value)?;
349 Some(String::from_utf8_lossy(stripped).into_owned())
350}
351
352fn sane_tz_len(line: &[u8]) -> usize {
353 const SUFFIX: &[u8] = b" +0500";
354 if line.len() < SUFFIX.len() || line[line.len() - SUFFIX.len()] != b' ' {
355 return 0;
356 }
357 let tz = &line[line.len() - SUFFIX.len()..];
358 if tz[1] != b'+' && tz[1] != b'-' {
359 return 0;
360 }
361 for p in &tz[2..] {
362 if !p.is_ascii_digit() {
363 return 0;
364 }
365 }
366 SUFFIX.len()
367}
368
369fn tz_with_colon_len(line: &[u8]) -> usize {
370 const SUFFIX_LEN: usize = 7;
372 if line.len() < SUFFIX_LEN || line[line.len() - 3] != b':' {
373 return 0;
374 }
375 let tz = &line[line.len() - SUFFIX_LEN..];
376 if tz[0] != b' ' || (tz[1] != b'+' && tz[1] != b'-') {
377 return 0;
378 }
379 let p = &tz[2..];
380 if p.len() != 5
381 || !p[0].is_ascii_digit()
382 || !p[1].is_ascii_digit()
383 || p[2] != b':'
384 || !p[3].is_ascii_digit()
385 || !p[4].is_ascii_digit()
386 {
387 return 0;
388 }
389 SUFFIX_LEN
390}
391
392fn date_len(line: &[u8]) -> usize {
393 const SHORT: &[u8] = b"72-02-05";
394 if line.len() < SHORT.len() || line[line.len() - 3] != b'-' {
395 return 0;
396 }
397 let mut p = line.len() - SHORT.len();
398 let date = &line[p..];
399 if !date[0].is_ascii_digit()
400 || !date[1].is_ascii_digit()
401 || date[2] != b'-'
402 || !date[3].is_ascii_digit()
403 || !date[4].is_ascii_digit()
404 || date[5] != b'-'
405 || !date[6].is_ascii_digit()
406 || !date[7].is_ascii_digit()
407 {
408 return 0;
409 }
410 if p >= 2 {
411 let y1 = line[p - 1];
412 let y2 = line[p - 2];
413 if y1.is_ascii_digit() && y2.is_ascii_digit() {
414 p -= 2;
415 }
416 }
417 line.len() - p
418}
419
420fn short_time_len(line: &[u8]) -> usize {
421 const PAT: &[u8] = b" 07:01:32";
422 if line.len() < PAT.len() || line[line.len() - 3] != b':' {
423 return 0;
424 }
425 let p = line.len() - PAT.len();
426 let time = &line[p..];
427 if time[0] != b' '
428 || !time[1].is_ascii_digit()
429 || !time[2].is_ascii_digit()
430 || time[3] != b':'
431 || !time[4].is_ascii_digit()
432 || !time[5].is_ascii_digit()
433 || time[6] != b':'
434 || !time[7].is_ascii_digit()
435 || !time[8].is_ascii_digit()
436 {
437 return 0;
438 }
439 PAT.len()
440}
441
442fn fractional_time_len(line: &[u8]) -> usize {
443 if line.is_empty() || !line[line.len() - 1].is_ascii_digit() {
444 return 0;
445 }
446 let mut p = line.len() - 1;
447 while p > 0 && line[p].is_ascii_digit() {
448 p -= 1;
449 }
450 if p == 0 || line[p] != b'.' {
451 return 0;
452 }
453 let n = short_time_len(&line[..p]);
454 if n == 0 {
455 return 0;
456 }
457 line.len() - p + n
458}
459
460fn trailing_spaces_len(line: &[u8]) -> usize {
461 if line.is_empty() || line[line.len() - 1] != b' ' {
462 return 0;
463 }
464 let mut p = line.len();
465 while p > 0 {
466 p -= 1;
467 if line[p] != b' ' {
468 return line.len() - (p + 1);
469 }
470 }
471 line.len()
472}
473
474fn diff_timestamp_len(line: &[u8]) -> usize {
475 if line.is_empty() || !line[line.len() - 1].is_ascii_digit() {
476 return 0;
477 }
478 let mut end = line.len();
479 let mut n = sane_tz_len(&line[..end]);
480 if n == 0 {
481 n = tz_with_colon_len(&line[..end]);
482 }
483 if n == 0 {
484 return 0;
485 }
486 end -= n;
487
488 n = short_time_len(&line[..end]);
489 if n == 0 {
490 n = fractional_time_len(&line[..end]);
491 }
492 if n == 0 {
493 return 0;
494 }
495 end -= n;
496
497 n = date_len(&line[..end]);
498 if n == 0 {
499 return 0;
500 }
501 end -= n;
502
503 if end == 0 {
504 return 0;
505 }
506 match line[end - 1] {
507 b'\t' => {
508 end -= 1;
509 line.len() - end
510 }
511 b' ' => {
512 end -= trailing_spaces_len(&line[..end]);
513 line.len() - end
514 }
515 _ => 0,
516 }
517}
518
519fn find_name_common_bounded(
521 line: &[u8],
522 def: Option<&[u8]>,
523 p_value: usize,
524 end: usize,
525) -> Option<Vec<u8>> {
526 let end = end.min(line.len());
527 let mut start: Option<usize> = if p_value == 0 { Some(0) } else { None };
528 let mut p = p_value;
529 let mut i = 0usize;
530 while i < end {
531 let c = line[i];
532 i += 1;
533 if c == b'/' && p > 0 {
534 p -= 1;
535 if p == 0 {
536 start = Some(i);
537 }
538 }
539 }
540 let start = start?;
541 let len = i - start;
542 if len == 0 {
543 return def.map(|d| d.to_vec());
544 }
545 let slice = &line[start..i];
546 if let Some(d) = def {
547 if d.len() < len && slice.starts_with(d) {
548 return Some(d.to_vec());
549 }
550 }
551 Some(slice.to_vec())
552}
553
554fn find_name_traditional(line: &[u8], def: Option<&[u8]>, p_value: usize) -> Option<Vec<u8>> {
556 if line.first() == Some(&b'"') {
557 let (decoded, _) = unquote_c_style_diff_prefix(std::str::from_utf8(line).ok()?)?;
558 let skip = skip_tree_prefix_bytes(&decoded, p_value)?;
559 return Some(skip.to_vec());
560 }
561 let ts = diff_timestamp_len(line);
562 let name_end = line.len().saturating_sub(ts);
563 find_name_common_bounded(line, def, p_value, name_end)
564}
565
566fn find_name_tab_terminated(line: &[u8], p_value: usize) -> Option<Vec<u8>> {
567 if line.first() == Some(&b'"') {
568 let (decoded, _) = unquote_c_style_diff_prefix(std::str::from_utf8(line).ok()?)?;
569 let skip = skip_tree_prefix_bytes(&decoded, p_value)?;
570 return Some(skip.to_vec());
571 }
572 let end = line
573 .iter()
574 .position(|&b| b == b'\t' || b == b'\n' || b == b'\r')
575 .unwrap_or(line.len());
576 find_name_common_bounded(line, None, p_value, end)
577}
578
579fn is_dev_null_nameline(line: &[u8]) -> bool {
580 line.strip_prefix(b"/dev/null")
581 .map(|rest| rest.is_empty() || rest.first().is_some_and(|b| b.is_ascii_whitespace()))
582 .unwrap_or(false)
583}
584
585fn count_slashes_in_prefix(prefix: &str) -> usize {
586 prefix.bytes().filter(|&b| b == b'/').count()
587}
588
589fn guess_p_value_from_nameline(line: &[u8], setup_prefix: Option<&str>) -> Option<usize> {
591 if is_dev_null_nameline(line) {
592 return None;
593 }
594 let name = find_name_traditional(line, None, 0)?;
595 let name_str = String::from_utf8_lossy(&name);
596 if !name_str.contains('/') {
597 return Some(0);
598 }
599 let pfx = setup_prefix.filter(|p| !p.is_empty())?;
600 if name_str.starts_with(pfx) {
601 return Some(count_slashes_in_prefix(pfx));
602 }
603 let slash = name_str.find('/')?;
604 let rest = name_str.get(slash + 1..)?;
605 if rest.starts_with(pfx) {
606 return Some(count_slashes_in_prefix(pfx) + 1);
607 }
608 None
609}
610
611fn epoch_stamp_regex() -> &'static Regex {
612 static RE: OnceLock<Regex> = OnceLock::new();
613 RE.get_or_init(|| {
614 #[allow(clippy::expect_used)]
616 Regex::new(r"^([0-2][0-9]):([0-5][0-9]):00(?:\.0+)? ([-+][0-2][0-9]:?[0-5][0-9])")
617 .expect("epoch stamp regex is a valid constant pattern")
618 })
619}
620
621fn has_epoch_timestamp(nameline: &[u8]) -> bool {
623 let Some(tab) = nameline.iter().position(|&b| b == b'\t') else {
624 return false;
625 };
626 let mut ts = &nameline[tab + 1..];
627 let epoch_hour = if let Some(r) = ts.strip_prefix(b"1969-12-31 ") {
628 ts = r;
629 24i32
630 } else if let Some(r) = ts.strip_prefix(b"1970-01-01 ") {
631 ts = r;
632 0i32
633 } else {
634 return false;
635 };
636 let end = ts.iter().position(|&b| b == b'\n').unwrap_or(ts.len());
637 let stamp = &ts[..end];
638 let stamp_str = match std::str::from_utf8(stamp) {
639 Ok(s) => s,
640 Err(_) => return false,
641 };
642 let caps = match epoch_stamp_regex().captures(stamp_str) {
643 Some(c) => c,
644 None => return false,
645 };
646 let hour: i32 = caps
647 .get(1)
648 .and_then(|m| m.as_str().parse().ok())
649 .unwrap_or(-1);
650 let minute: i32 = caps
651 .get(2)
652 .and_then(|m| m.as_str().parse().ok())
653 .unwrap_or(-1);
654 let tz_s = match caps.get(3).map(|m| m.as_str()) {
655 Some(s) if !s.is_empty() => s,
656 _ => return false,
657 };
658 if hour < 0 || minute < 0 {
659 return false;
660 }
661 let tz_byte = tz_s.as_bytes()[0];
662 let tz_rest = &tz_s[1..];
663 let zoneoffset: i32 = if let Some(colon_pos) = tz_rest.find(':') {
664 let h: i32 = tz_rest[..colon_pos].parse().unwrap_or(0);
665 let mm: i32 = tz_rest[colon_pos + 1..].parse().unwrap_or(0);
666 h * 60 + mm
667 } else if tz_rest.len() >= 4 {
668 let n: i32 = tz_rest[..4].parse().unwrap_or(0);
669 (n / 100) * 60 + (n % 100)
670 } else {
671 return false;
672 };
673 let zoneoffset = if tz_byte == b'-' {
674 -zoneoffset
675 } else {
676 zoneoffset
677 };
678 hour * 60 + minute - zoneoffset == epoch_hour * 60
679}
680
681fn parse_traditional_patch_pair(
683 old_line: &[u8],
684 new_line: &[u8],
685 strip: usize,
686 p_guess: &mut Option<usize>,
687 setup_prefix: Option<&str>,
688) -> Result<FilePatch> {
689 let old_p = old_line.strip_prefix(b"--- ").unwrap_or(old_line);
690 let new_p = new_line.strip_prefix(b"+++ ").unwrap_or(new_line);
691
692 if p_guess.is_none() {
693 let p = guess_p_value_from_nameline(old_p, setup_prefix);
694 let q = guess_p_value_from_nameline(new_p, setup_prefix);
695 let chosen = match (p, q) {
696 (None, None) => None,
697 (Some(a), None) => Some(a),
698 (None, Some(b)) => Some(b),
699 (Some(a), Some(b)) if a == b => Some(a),
700 _ => None,
701 };
702 *p_guess = chosen;
703 }
704 let p_val = p_guess.unwrap_or(strip);
705
706 let mut fp = FilePatch {
707 diff_old_path: None,
708 diff_new_path: None,
709 old_path: None,
710 new_path: None,
711 saw_old_header: true,
712 saw_new_header: true,
713 old_mode: None,
714 new_mode: None,
715 old_mode_line: None,
716 new_mode_line: None,
717 is_new: false,
718 is_deleted: false,
719 is_rename: false,
720 is_copy: false,
721 similarity_index: None,
722 dissimilarity_index: None,
723 old_oid: None,
724 new_oid: None,
725 binary_patch: None,
726 is_binary: false,
727 hunks: Vec::new(),
728 ws_rule: 0,
729 is_toplevel_relative: false,
730 };
731
732 if is_dev_null_nameline(old_p) {
733 fp.is_new = true;
734 let name = find_name_traditional(new_p, None, p_val).ok_or_else(|| {
735 Error::Message("unable to find filename in traditional patch".to_string())
736 })?;
737 fp.new_path = Some(bytes_to_path_string(&name)?);
738 } else if is_dev_null_nameline(new_p) {
739 fp.is_deleted = true;
740 let name = find_name_traditional(old_p, None, p_val).ok_or_else(|| {
741 Error::Message("unable to find filename in traditional patch".to_string())
742 })?;
743 fp.old_path = Some(bytes_to_path_string(&name)?);
744 } else {
745 let first_name = find_name_traditional(old_p, None, p_val).ok_or_else(|| {
746 Error::Message("unable to find filename in traditional patch".to_string())
747 })?;
748 let name = find_name_traditional(new_p, Some(&first_name), p_val).ok_or_else(|| {
749 Error::Message("unable to find filename in traditional patch".to_string())
750 })?;
751 let name_str = bytes_to_path_string(&name)?;
752 if has_epoch_timestamp(old_p) {
753 fp.is_new = true;
754 fp.new_path = Some(name_str);
755 } else if has_epoch_timestamp(new_p) {
756 fp.is_deleted = true;
757 fp.old_path = Some(name_str);
758 } else {
759 fp.old_path = Some(name_str.clone());
762 fp.new_path = Some(name_str);
763 }
764 }
765
766 Ok(fp)
767}
768
769fn git_header_def_name(line: &str, p_value: usize) -> Option<String> {
771 let rest = line.strip_prefix("diff --git ").unwrap_or(line);
772 let rest_b = rest.as_bytes();
773
774 if rest_b.first() == Some(&b'"') {
775 let (first_decoded, second_raw) = unquote_c_style_diff_prefix(rest)?;
776 let rel_first = skip_tree_prefix_bytes(&first_decoded, p_value)?;
777 let second = second_raw.trim_start_matches(|c: char| c.is_ascii_whitespace());
778 if second.is_empty() {
779 return None;
780 }
781 if second.as_bytes().first() == Some(&b'"') {
782 let (second_decoded, _) = unquote_c_style_diff_prefix(second)?;
783 let rel2 = skip_tree_prefix_bytes(&second_decoded, p_value)?;
784 if rel2 != rel_first {
785 return None;
786 }
787 } else {
788 let rel2 = skip_tree_prefix_bytes(second.as_bytes(), p_value)?;
789 if rel2.len() != rel_first.len() || rel2 != rel_first {
790 return None;
791 }
792 }
793 return bytes_to_path_string(rel_first).ok();
794 }
795
796 let name = skip_tree_prefix_bytes(rest_b, p_value)?;
797 let name_start = name.as_ptr() as usize - rest_b.as_ptr() as usize;
798
799 for offset in 0..name.len() {
800 if name[offset] != b'"' {
801 continue;
802 }
803 let second_slice = &rest_b[name_start + offset..];
804 let (decoded, _) = unquote_c_style_diff_prefix(std::str::from_utf8(second_slice).ok()?)?;
805 let np = skip_tree_prefix_bytes(&decoded, p_value)?;
806 let plen = np.len();
807 if plen < offset
808 && name.len() > plen
809 && &name[..plen] == np
810 && name[plen].is_ascii_whitespace()
811 {
812 return bytes_to_path_string(np).ok();
813 }
814 return None;
815 }
816
817 let line_len = rest.len().saturating_sub(name_start);
818 let mut len = 0usize;
819 while len < line_len {
820 match rest_b[name_start + len] {
821 b'\n' => return None,
822 b'\t' | b' ' => {
823 let after = name_start + len + 1;
824 if after > name_start + line_len {
825 return None;
826 }
827 let second =
828 skip_tree_prefix_bytes(&rest_b[after..name_start + line_len], p_value)?;
829 let names_match =
830 name.len() >= len && second.len() >= len && name[..len] == second[..len];
831 let boundary_ok = second.get(len) == Some(&b'\n') || second.len() == len;
832 if names_match && boundary_ok {
833 return bytes_to_path_string(&name[..len]).ok();
834 }
835 }
836 _ => {}
837 }
838 len += 1;
839 }
840 None
841}
842
843fn find_name_extended_header(rest: &str, p_extended: usize) -> Option<String> {
845 let rest = rest.trim_end_matches(['\r', '\n']);
846 let b = rest.as_bytes();
847 if b.first() == Some(&b'"') {
848 let (decoded, tail) = unquote_c_style_diff_prefix(rest)?;
849 if !tail.trim().is_empty() {
850 return None;
851 }
852 let skip = skip_tree_prefix_bytes(&decoded, p_extended)?;
853 return bytes_to_path_string(skip).ok();
854 }
855 let end = b
856 .iter()
857 .position(|&c| c == b'\t' || c == b'\n' || c == b'\r' || c == b' ')
858 .unwrap_or(b.len());
859 let name = find_name_common_bounded(b, None, p_extended, end)?;
860 bytes_to_path_string(&name).ok()
861}
862
863pub fn parse_patch(
870 input: &str,
871 strip: usize,
872 input_name: &str,
873 recount: bool,
874 setup_prefix: Option<&str>,
875) -> Result<Vec<FilePatch>> {
876 let lines: Vec<&str> = input.lines().collect();
877 let mut patches = Vec::new();
878 let mut i = 0;
879 let mut p_guess_for_traditional: Option<usize> = None;
880 let setup_prefix_for_guess = setup_prefix.filter(|p| !p.is_empty());
881
882 let p_strip = strip;
883 let p_extended = strip.saturating_sub(1);
884
885 while i < lines.len() {
886 if lines[i].starts_with("diff --git ") {
888 let mut fp = FilePatch {
889 diff_old_path: None,
890 diff_new_path: None,
891 old_path: None,
892 new_path: None,
893 saw_old_header: false,
894 saw_new_header: false,
895 old_mode: None,
896 new_mode: None,
897 old_mode_line: None,
898 new_mode_line: None,
899 is_new: false,
900 is_deleted: false,
901 is_rename: false,
902 is_copy: false,
903 similarity_index: None,
904 dissimilarity_index: None,
905 old_oid: None,
906 new_oid: None,
907 binary_patch: None,
908 is_binary: false,
909 hunks: Vec::new(),
910 ws_rule: 0,
911 is_toplevel_relative: true,
912 };
913
914 let header_line = lines[i];
915 let def_name = git_header_def_name(header_line, p_strip);
916
917 let rest = &header_line["diff --git ".len()..];
919 if let Some((a, b)) = split_diff_git_paths(rest) {
920 fp.diff_old_path = Some(a.clone());
921 fp.diff_new_path = Some(b.clone());
922 fp.old_path = Some(skip_tree_prefix_str(&a, p_strip).ok_or_else(|| {
923 Error::Message(format!("malformed old path in diff --git header: {a}"))
924 })?);
925 fp.new_path = Some(skip_tree_prefix_str(&b, p_strip).ok_or_else(|| {
926 Error::Message(format!("malformed new path in diff --git header: {b}"))
927 })?);
928 }
929 i += 1;
930
931 while i < lines.len()
933 && !lines[i].starts_with("--- ")
934 && !lines[i].starts_with("diff --git ")
935 && !lines[i].starts_with("@@ ")
936 {
937 let line = lines[i];
938 let line_no = i + 1;
939 if let Some(val) = line.strip_prefix("old mode ") {
940 let v = val.trim_end_matches('\r').trim_end();
941 if v.is_empty() {
942 return Err(Error::Message(format!(
943 "invalid mode on line {line_no}: {line}"
944 )));
945 }
946 fp.old_mode = Some(v.to_string());
947 fp.old_mode_line = Some(line_no);
948 } else if let Some(val) = line.strip_prefix("new mode ") {
949 let v = val.trim_end_matches('\r').trim_end();
950 if v.is_empty() {
951 return Err(Error::Message(format!(
952 "invalid mode on line {line_no}: {line}"
953 )));
954 }
955 fp.new_mode = Some(v.to_string());
956 fp.new_mode_line = Some(line_no);
957 } else if let Some(val) = line.strip_prefix("new file mode ") {
958 let v = val.trim_end_matches('\r').trim_end();
959 if v.is_empty() {
960 return Err(Error::Message(format!(
961 "invalid mode on line {line_no}: {line}"
962 )));
963 }
964 fp.is_new = true;
965 fp.new_mode = Some(v.to_string());
966 fp.new_mode_line = Some(line_no);
967 } else if let Some(val) = line.strip_prefix("deleted file mode ") {
968 let v = val.trim_end_matches('\r').trim_end();
969 if v.is_empty() {
970 return Err(Error::Message(format!(
971 "invalid mode on line {line_no}: {line}"
972 )));
973 }
974 fp.is_deleted = true;
975 fp.old_mode = Some(v.to_string());
976 fp.old_mode_line = Some(line_no);
977 } else if let Some(val) = line.strip_prefix("rename from ") {
978 fp.is_rename = true;
979 if let Some(p) = find_name_extended_header(val, p_extended) {
980 fp.old_path = Some(p);
981 }
982 } else if let Some(val) = line.strip_prefix("rename to ") {
983 fp.is_rename = true;
984 if let Some(p) = find_name_extended_header(val, p_extended) {
985 fp.new_path = Some(p);
986 }
987 } else if let Some(val) = line.strip_prefix("copy from ") {
988 fp.is_copy = true;
989 if let Some(p) = find_name_extended_header(val, p_extended) {
990 fp.old_path = Some(p);
991 }
992 } else if let Some(val) = line.strip_prefix("copy to ") {
993 fp.is_copy = true;
994 if let Some(p) = find_name_extended_header(val, p_extended) {
995 fp.new_path = Some(p);
996 }
997 } else if let Some(val) = line.strip_prefix("similarity index ") {
998 fp.similarity_index = val.trim_end_matches('%').parse().ok();
999 } else if let Some(val) = line.strip_prefix("dissimilarity index ") {
1000 fp.dissimilarity_index = val.trim_end_matches('%').parse().ok();
1001 } else if let Some(val) = line.strip_prefix("index ") {
1002 let mut parts = val.split_whitespace();
1004 let hash_part = parts.next().unwrap_or("");
1005 if let Some((old, new)) = hash_part.split_once("..") {
1006 fp.old_oid = Some(old.to_string());
1007 fp.new_oid = Some(new.to_string());
1008 }
1009 if let Some(mode_tok) = parts.next() {
1010 let v = mode_tok.trim_end_matches('\r').trim_end();
1011 if !v.is_empty() {
1012 fp.old_mode = Some(v.to_string());
1013 fp.old_mode_line = Some(line_no);
1014 }
1015 }
1016 } else if line == "GIT binary patch" {
1017 let (binary_patch, next_i) = parse_binary_patch(&lines, i + 1)?;
1018 fp.binary_patch = Some(binary_patch);
1019 fp.is_binary = true;
1020 i = next_i;
1021 break;
1022 } else if line.starts_with("Binary files ") && line.ends_with(" differ") {
1023 fp.is_binary = true;
1026 }
1027 i += 1;
1029 }
1030
1031 if let Some(dn) = def_name {
1032 if fp.old_path.is_none() {
1033 fp.old_path = Some(dn.clone());
1034 }
1035 if fp.new_path.is_none() {
1036 fp.new_path = Some(dn);
1037 }
1038 }
1039
1040 if i < lines.len() && lines[i].starts_with("--- ") {
1042 let old_p = lines[i]["--- ".len()..].trim_end_matches(['\r', '\n']);
1043 let old_b = old_p.as_bytes();
1044 if is_dev_null_nameline(old_b) {
1045 fp.old_path = Some("/dev/null".to_string());
1046 } else if let Some(p) = find_name_tab_terminated(old_b, p_strip) {
1047 fp.old_path = Some(bytes_to_path_string(&p)?);
1048 }
1049 fp.saw_old_header = true;
1050 i += 1;
1051 if i < lines.len() && lines[i].starts_with("+++ ") {
1052 let new_p = lines[i]["+++ ".len()..].trim_end_matches(['\r', '\n']);
1053 let new_b = new_p.as_bytes();
1054 if is_dev_null_nameline(new_b) {
1055 fp.new_path = Some("/dev/null".to_string());
1056 } else if let Some(p) = find_name_tab_terminated(new_b, p_strip) {
1057 fp.new_path = Some(bytes_to_path_string(&p)?);
1058 }
1059 fp.saw_new_header = true;
1060 i += 1;
1061 }
1062 }
1063
1064 while i < lines.len() && lines[i].starts_with("@@ ") {
1066 let (hunk, next_i) = parse_hunk(&lines, i, input_name, recount)?;
1067 fp.hunks.push(hunk);
1068 i = next_i;
1069 }
1070
1071 sanitize_file_patch_headers(&mut fp);
1072 patches.push(fp);
1073 } else if lines[i].starts_with("--- ")
1074 && i + 1 < lines.len()
1075 && lines[i + 1].starts_with("+++ ")
1076 {
1077 let old_line = lines[i].as_bytes();
1078 let new_line = lines[i + 1].as_bytes();
1079 let mut fp = parse_traditional_patch_pair(
1080 old_line,
1081 new_line,
1082 strip,
1083 &mut p_guess_for_traditional,
1084 setup_prefix_for_guess,
1085 )?;
1086 i += 2;
1087
1088 while i < lines.len() && lines[i].starts_with("@@ ") {
1090 let (hunk, next_i) = parse_hunk(&lines, i, input_name, recount)?;
1091 fp.hunks.push(hunk);
1092 i = next_i;
1093 }
1094
1095 sanitize_file_patch_headers(&mut fp);
1096 patches.push(fp);
1097 } else {
1098 i += 1;
1099 }
1100 }
1101
1102 Ok(patches)
1103}
1104
1105fn parse_binary_patch(lines: &[&str], mut i: usize) -> Result<(BinaryPatchPayload, usize)> {
1107 let (forward_compressed, forward_declared_size) = parse_binary_literal(lines, &mut i)?;
1108 let (reverse_compressed, reverse_declared_size) =
1109 if i < lines.len() && lines[i].starts_with("literal ") {
1110 parse_binary_literal(lines, &mut i)?
1111 } else {
1112 (Vec::new(), 0)
1113 };
1114
1115 Ok((
1116 BinaryPatchPayload {
1117 forward_compressed,
1118 forward_declared_size,
1119 reverse_compressed,
1120 reverse_declared_size,
1121 },
1122 i,
1123 ))
1124}
1125
1126fn parse_binary_literal(lines: &[&str], i: &mut usize) -> Result<(Vec<u8>, usize)> {
1128 let header = lines.get(*i).copied().unwrap_or_default();
1129 let Some(size_str) = header.strip_prefix("literal ") else {
1130 return Err(Error::Message(format!(
1131 "unsupported binary patch section: '{header}'"
1132 )));
1133 };
1134 let declared_size: usize = size_str
1135 .trim()
1136 .parse()
1137 .map_err(|e: std::num::ParseIntError| {
1138 Error::Message(format!("invalid binary patch literal size: {e}"))
1139 })?;
1140 *i += 1;
1141
1142 let mut compressed = Vec::new();
1143 while *i < lines.len() {
1144 let line = lines[*i];
1145 if line.is_empty() {
1146 *i += 1;
1147 break;
1148 }
1149 decode_binary_patch_line(line, &mut compressed)?;
1150 *i += 1;
1151 }
1152
1153 Ok((compressed, declared_size))
1154}
1155
1156fn decode_binary_patch_line(line: &str, out: &mut Vec<u8>) -> Result<()> {
1158 let mut chars = line.chars();
1159 let Some(len_ch) = chars.next() else {
1160 return Err(Error::Message(
1161 "empty binary patch payload line".to_string(),
1162 ));
1163 };
1164 let expected_len = decode_binary_line_len(len_ch)?;
1165 let body = chars.as_str().as_bytes();
1166 let decoded = crate::git_binary_base85::decode_body(body, expected_len)
1167 .map_err(|e| Error::Message(format!("invalid binary patch base85: {e}")))?;
1168 out.extend_from_slice(&decoded);
1169 Ok(())
1170}
1171
1172fn decode_binary_line_len(ch: char) -> Result<usize> {
1173 if ch.is_ascii_uppercase() {
1174 return Ok((ch as u8 - b'A' + 1) as usize);
1175 }
1176 if ch.is_ascii_lowercase() {
1177 return Ok((ch as u8 - b'a' + 27) as usize);
1178 }
1179 Err(Error::Message(format!(
1180 "invalid binary patch line length marker: '{ch}'"
1181 )))
1182}
1183
1184pub fn inflate_binary_payload(compressed: &[u8]) -> Result<Vec<u8>> {
1186 use flate2::read::ZlibDecoder;
1187 use std::io::Read;
1188
1189 let mut decoder = ZlibDecoder::new(compressed);
1190 let mut out = Vec::new();
1191 decoder
1192 .read_to_end(&mut out)
1193 .map_err(|e| Error::Message(format!("failed to inflate binary patch payload: {e}")))?;
1194 Ok(out)
1195}
1196
1197fn split_diff_git_paths(s: &str) -> Option<(String, String)> {
1199 let s = s.trim_end_matches(['\r', '\n']);
1200
1201 if s.as_bytes().first() == Some(&b'"') {
1202 let (first, rest_raw) = unquote_c_style_diff_prefix(s)?;
1203 let rest = rest_raw.trim_start_matches(|c: char| c.is_ascii_whitespace());
1204 if rest.is_empty() {
1205 return None;
1206 }
1207 if rest.as_bytes().first() == Some(&b'"') {
1208 let (second, _) = unquote_c_style_diff_prefix(rest)?;
1209 return Some((
1210 String::from_utf8_lossy(&first).into_owned(),
1211 String::from_utf8_lossy(&second).into_owned(),
1212 ));
1213 }
1214 let second = rest;
1215 if second.len() != first.len() || second.as_bytes() != first.as_slice() {
1216 return None;
1217 }
1218 return Some((
1219 String::from_utf8_lossy(&first).into_owned(),
1220 second.to_string(),
1221 ));
1222 }
1223
1224 if let Some(pos) = s.find(" b/") {
1225 let a = &s[..pos];
1226 let b = &s[pos + 1..];
1227 return Some((a.to_string(), b.to_string()));
1228 }
1229 if s.starts_with("a/") {
1230 if let Some(pos) = s.find(" /dev/null") {
1231 let a = &s[..pos];
1232 return Some((a.to_string(), "/dev/null".to_string()));
1233 }
1234 }
1235 if let Some(b) = s.strip_prefix("/dev/null ") {
1236 return Some(("/dev/null".to_string(), b.to_string()));
1237 }
1238
1239 let name = s.as_bytes();
1240 let line_len = name.len();
1241 let mut len = 0usize;
1242 while len < line_len {
1243 match name[len] {
1244 b'\n' => return None,
1245 b'\t' | b' ' => {
1246 if len + 1 > line_len {
1247 return None;
1248 }
1249 let second = &name[len + 1..line_len];
1250 let names_match =
1251 name.len() >= len && second.len() >= len && name[..len] == second[..len];
1252 let boundary_ok = second.get(len) == Some(&b'\n') || second.len() == len;
1253 if names_match && boundary_ok {
1254 return Some((
1255 String::from_utf8_lossy(&name[..len]).into_owned(),
1256 String::from_utf8_lossy(second).into_owned(),
1257 ));
1258 }
1259 }
1260 _ => {}
1261 }
1262 len += 1;
1263 }
1264 None
1265}
1266
1267fn parse_hunk(
1269 lines: &[&str],
1270 start: usize,
1271 input_name: &str,
1272 recount: bool,
1273) -> Result<(Hunk, usize)> {
1274 let header = lines[start];
1275 let (old_start, old_count, new_start, new_count) = parse_hunk_header(header)
1276 .map_err(|e| Error::Message(format!("invalid hunk header: {header}: {e}")))?;
1277
1278 let mut hunk = Hunk {
1279 old_start,
1280 old_count,
1281 new_start,
1282 new_count,
1283 first_body_line: start + 2,
1284 lines: Vec::new(),
1285 };
1286
1287 let mut old_seen = 0usize;
1291 let mut new_seen = 0usize;
1292 let mut i = start + 1;
1293 while i < lines.len() {
1294 let line = lines[i];
1295 if line.starts_with("@@ ") || line.starts_with("diff --git ") {
1296 break;
1297 }
1298 if line.starts_with("--- ") || line.starts_with("+++ ") {
1301 break;
1302 }
1303 if line == "-- " {
1304 break;
1306 }
1307 if let Some(rest) = line.strip_prefix('+') {
1308 hunk.lines.push(HunkLine::Add(rest.to_string()));
1309 new_seen += 1;
1310 } else if let Some(rest) = line.strip_prefix('-') {
1311 hunk.lines.push(HunkLine::Remove(rest.to_string()));
1312 old_seen += 1;
1313 } else if line.is_empty() {
1314 hunk.lines.push(HunkLine::Context(String::new()));
1315 old_seen += 1;
1316 new_seen += 1;
1317 } else if let Some(rest) = line.strip_prefix(' ') {
1318 hunk.lines.push(HunkLine::Context(rest.to_string()));
1320 old_seen += 1;
1321 new_seen += 1;
1322 } else if line.starts_with('\\') {
1323 hunk.lines.push(HunkLine::NoNewline);
1324 } else {
1325 break;
1327 }
1328 i += 1;
1329 }
1330
1331 if recount {
1332 hunk.old_count = old_seen;
1333 hunk.new_count = new_seen;
1334 } else if old_seen < old_count || new_seen < new_count {
1335 return Err(Error::Message(format!(
1336 "error: corrupt patch at {input_name}:{}",
1337 i + 1
1338 )));
1339 }
1340
1341 Ok((hunk, i))
1342}
1343
1344fn parse_hunk_header(line: &str) -> Result<(usize, usize, usize, usize)> {
1346 let trimmed = line.trim_start_matches('@').trim_start();
1348 let end = trimmed.find(" @@").unwrap_or(trimmed.len());
1349 let range_part = &trimmed[..end];
1350
1351 let parts: Vec<&str> = range_part.split_whitespace().collect();
1352 if parts.len() < 2 {
1353 return Err(Error::Message(
1354 "expected old and new range in hunk header".to_string(),
1355 ));
1356 }
1357
1358 let (old_start, old_count) = parse_range(parts[0].trim_start_matches('-'))?;
1359 let (new_start, new_count) = parse_range(parts[1].trim_start_matches('+'))?;
1360
1361 Ok((old_start, old_count, new_start, new_count))
1362}
1363
1364fn parse_range(s: &str) -> Result<(usize, usize)> {
1366 if let Some((start_s, count_s)) = s.split_once(',') {
1367 let start = start_s
1368 .parse::<usize>()
1369 .map_err(|e| Error::Message(e.to_string()))?;
1370 let count = count_s
1371 .parse::<usize>()
1372 .map_err(|e| Error::Message(e.to_string()))?;
1373 Ok((start, count))
1374 } else {
1375 let n: usize = s
1376 .parse()
1377 .map_err(|e: std::num::ParseIntError| Error::Message(e.to_string()))?;
1378 Ok((n, 1))
1379 }
1380}
1381
1382#[cfg(test)]
1383mod tests {
1384 use super::*;
1385
1386 #[test]
1387 fn parses_simple_git_diff_into_one_file_patch() {
1388 let input = "diff --git a/foo.txt b/foo.txt\n\
1389 index e69de29..d95f3ad 100644\n\
1390 --- a/foo.txt\n\
1391 +++ b/foo.txt\n\
1392 @@ -0,0 +1 @@\n\
1393 +hello\n";
1394 let patches = parse_patch(input, 1, "<test>", false, None).expect("parse");
1395 assert_eq!(patches.len(), 1);
1396 let fp = &patches[0];
1397 assert_eq!(fp.old_path.as_deref(), Some("foo.txt"));
1398 assert_eq!(fp.new_path.as_deref(), Some("foo.txt"));
1399 assert_eq!(fp.hunks.len(), 1);
1400 let hunk = &fp.hunks[0];
1401 assert_eq!(hunk.new_count, 1);
1402 assert!(matches!(hunk.lines.as_slice(), [HunkLine::Add(s)] if s == "hello"));
1403 }
1404
1405 #[test]
1406 fn parses_new_file_mode_and_deletion() {
1407 let new_file = "diff --git a/n b/n\n\
1408 new file mode 100644\n\
1409 index 0000000..9daeafb\n\
1410 --- /dev/null\n\
1411 +++ b/n\n\
1412 @@ -0,0 +1 @@\n\
1413 +x\n";
1414 let patches = parse_patch(new_file, 1, "<test>", false, None).expect("parse");
1415 assert_eq!(patches.len(), 1);
1416 assert!(patches[0].is_new);
1417 assert_eq!(patches[0].new_mode.as_deref(), Some("100644"));
1418
1419 let deleted = "diff --git a/d b/d\n\
1420 deleted file mode 100644\n\
1421 index 9daeafb..0000000\n\
1422 --- a/d\n\
1423 +++ /dev/null\n\
1424 @@ -1 +0,0 @@\n\
1425 -x\n";
1426 let patches = parse_patch(deleted, 1, "<test>", false, None).expect("parse");
1427 assert!(patches[0].is_deleted);
1428 }
1429
1430 #[test]
1431 fn corrupt_hunk_is_reported_with_input_name_and_line() {
1432 let input = "--- a/x\n\
1434 +++ b/x\n\
1435 @@ -1,3 +1,3 @@\n\
1436 one\n";
1437 let err = parse_patch(input, 1, "patch", false, None)
1438 .err()
1439 .expect("should fail");
1440 assert_eq!(err.to_string(), "error: corrupt patch at patch:4");
1441 }
1442
1443 #[test]
1444 fn parse_hunk_header_parses_ranges() {
1445 assert_eq!(parse_hunk_header("@@ -1,3 +2,4 @@").unwrap(), (1, 3, 2, 4));
1446 assert_eq!(parse_hunk_header("@@ -5 +6 @@ ctx").unwrap(), (5, 1, 6, 1));
1447 }
1448
1449 #[test]
1450 fn invalid_hunk_header_chains_inner_message() {
1451 let err = parse_hunk_header("@@ -x +1 @@").err().expect("fail");
1452 assert_eq!(err.to_string(), "invalid digit found in string");
1454 }
1455}