Skip to main content

grit_lib/
apply.rs

1//! Unified/`git`-diff patch parsing for `grit apply`.
2//!
3//! This is the self-contained *parse* core extracted from `grit apply`: it turns
4//! patch text into structured [`FilePatch`]/[`Hunk`] data with no I/O, no
5//! environment access, and no CLI dependencies. The worktree/index application
6//! engine and all CLI output still live in the `grit` crate; only the
7//! text-to-structured-data layer lives here so it can be unit-tested and reused
8//! as a library.
9
10use crate::error::{Error, Result};
11use regex::Regex;
12use std::sync::OnceLock;
13
14/// A single hunk in a unified diff.
15#[derive(Debug, Clone)]
16pub struct Hunk {
17    /// 1-based line number in the old file.
18    pub old_start: usize,
19    /// Number of lines in the old side.
20    pub old_count: usize,
21    /// 1-based line number in the new file.
22    pub new_start: usize,
23    /// Number of lines on the new side.
24    pub new_count: usize,
25    /// 1-based line number in the patch file of the first hunk body line (line after `@@`).
26    pub first_body_line: usize,
27    /// Lines of the hunk body (' ', '+', '-' prefixed, or bare '\' no newline).
28    pub lines: Vec<HunkLine>,
29}
30
31#[derive(Debug, Clone)]
32pub enum HunkLine {
33    Context(String),
34    Add(String),
35    Remove(String),
36    /// "\ No newline at end of file"
37    NoNewline,
38}
39
40/// Represents one file in a unified diff.
41#[derive(Debug, Clone)]
42pub struct FilePatch {
43    /// Path from `diff --git` old side (`a/...`) when present.
44    pub diff_old_path: Option<String>,
45    /// Path from `diff --git` new side (`b/...`) when present.
46    pub diff_new_path: Option<String>,
47    /// Path on the old side (None for new files).
48    pub old_path: Option<String>,
49    /// Path on the new side (None for deleted files).
50    pub new_path: Option<String>,
51    /// Whether an explicit `---` header line was present.
52    pub saw_old_header: bool,
53    /// Whether an explicit `+++` header line was present.
54    pub saw_new_header: bool,
55    /// Old mode from extended header.
56    pub old_mode: Option<String>,
57    /// New mode from extended header.
58    pub new_mode: Option<String>,
59    /// Source line (1-based) of `old mode` / `deleted file mode` for diagnostics.
60    pub old_mode_line: Option<usize>,
61    /// Source line (1-based) of `new mode` / `new file mode` for diagnostics.
62    pub new_mode_line: Option<usize>,
63    /// Whether this file is being newly created.
64    pub is_new: bool,
65    /// Whether this file is being deleted.
66    pub is_deleted: bool,
67    /// Whether this is a rename.
68    pub is_rename: bool,
69    /// Whether this is a copy.
70    pub is_copy: bool,
71    /// Similarity index (e.g., 90 for 90%).
72    pub similarity_index: Option<u32>,
73    /// Dissimilarity index for rewrites.
74    pub dissimilarity_index: Option<u32>,
75    /// Old blob OID from the index header (abbreviated).
76    pub old_oid: Option<String>,
77    /// New blob OID from the index header (abbreviated).
78    pub new_oid: Option<String>,
79    /// Parsed binary patch payload (`GIT binary patch`) if present.
80    pub binary_patch: Option<BinaryPatchPayload>,
81    /// Whether this is a binary change (`GIT binary patch` payload or a
82    /// `Binary files ... differ` marker); stat/numstat show `Bin` / `-`.
83    pub is_binary: bool,
84    /// Hunks to apply.
85    pub hunks: Vec<Hunk>,
86    /// Merged `core.whitespace` + `whitespace` attribute (Git `ws_rule`); `0` before assignment.
87    pub ws_rule: u32,
88    /// Git `patch->is_toplevel_relative`: set for `diff --git` patches only. When false, paths are
89    /// prefixed with the setup directory (work-tree-relative CWD) like `prefix_patch` in Git.
90    pub is_toplevel_relative: bool,
91}
92
93/// Binary patch payload as compressed base85 chunks for forward/reverse apply.
94#[derive(Debug, Clone)]
95pub struct BinaryPatchPayload {
96    pub forward_compressed: Vec<u8>,
97    pub forward_declared_size: usize,
98    pub reverse_compressed: Vec<u8>,
99    pub reverse_declared_size: usize,
100}
101
102impl FilePatch {
103    /// Effective path for the file.
104    /// For deletions, use old_path (new is /dev/null).
105    /// For additions, use new_path (old is /dev/null).
106    /// Otherwise prefer new_path.
107    pub fn effective_path(&self) -> Option<&str> {
108        if self.is_deleted {
109            return self
110                .old_path
111                .as_deref()
112                .filter(|p| *p != "/dev/null")
113                .or(self.new_path.as_deref().filter(|p| *p != "/dev/null"));
114        }
115        if self.is_new {
116            return self
117                .new_path
118                .as_deref()
119                .filter(|p| *p != "/dev/null")
120                .or(self.old_path.as_deref().filter(|p| *p != "/dev/null"));
121        }
122        self.new_path
123            .as_deref()
124            .filter(|p| *p != "/dev/null")
125            .or(self.old_path.as_deref().filter(|p| *p != "/dev/null"))
126    }
127
128    /// Source path to read preimage content from.
129    ///
130    /// For rename/copy patches this is the old path, otherwise this is the
131    /// effective path.
132    pub fn source_path(&self) -> Option<&str> {
133        if self.is_rename || self.is_copy {
134            self.old_path
135                .as_deref()
136                .filter(|p| *p != "/dev/null")
137                .or(self.effective_path())
138        } else if let (Some(old), Some(new)) = (self.old_path.as_deref(), self.new_path.as_deref())
139        {
140            if old != "/dev/null" && new != "/dev/null" && old != new {
141                Some(old)
142            } else {
143                self.effective_path()
144            }
145        } else {
146            self.effective_path()
147        }
148    }
149
150    /// Destination path to write postimage content to.
151    ///
152    /// For additions/renames/copies this is the new path, otherwise this is
153    /// the effective path.
154    pub fn target_path(&self) -> Option<&str> {
155        if self.is_new || self.is_rename || self.is_copy {
156            self.new_path
157                .as_deref()
158                .filter(|p| *p != "/dev/null")
159                .or(self.effective_path())
160        } else {
161            self.effective_path()
162        }
163    }
164
165    /// True when this patch touches a gitlink/submodule (mode `160000`).
166    pub fn involves_gitlink(&self) -> bool {
167        self.old_mode.as_deref() == Some("160000") || self.new_mode.as_deref() == Some("160000")
168    }
169
170    /// Work-tree-relative path for filesystem IO and `.gitattributes` (Git `prefix_patch`).
171    pub fn worktree_rel_operational(&self, adjusted: &str, setup_prefix: &str) -> String {
172        if self.is_toplevel_relative {
173            adjusted.to_string()
174        } else {
175            format!("{setup_prefix}{adjusted}")
176        }
177    }
178}
179
180/// Strip trailing `\r` and surrounding whitespace from parsed header tokens.
181///
182/// `git diff` may emit CRLF line endings; without this, `new mode 160000\r` fails to match
183/// submodule handling (`t4137-apply-submodule`).
184fn sanitize_patch_header_value(s: &mut String) {
185    *s = s.trim().trim_end_matches('\r').to_string();
186}
187
188/// Strip Git's `diff --git a/... b/...` path prefix when it leaked into stored paths.
189///
190/// Binary patches often omit `---`/`+++` lines that would normally resynchronize names; without
191/// this, paths like `a/bin.png` are misinterpreted as real file paths (`t4108-apply-threeway`).
192fn strip_git_diff_path_prefix(path: &str) -> String {
193    if path == "/dev/null" {
194        return path.to_string();
195    }
196    let p = path.trim_start_matches("./");
197    if let Some(rest) = p.strip_prefix("a/") {
198        return rest.to_string();
199    }
200    if let Some(rest) = p.strip_prefix("b/") {
201        return rest.to_string();
202    }
203    path.to_string()
204}
205
206fn sanitize_file_patch_headers(fp: &mut FilePatch) {
207    if let Some(ref mut s) = fp.old_mode {
208        sanitize_patch_header_value(s);
209        if s.is_empty() {
210            fp.old_mode = None;
211        }
212    }
213    if let Some(ref mut s) = fp.new_mode {
214        sanitize_patch_header_value(s);
215        if s.is_empty() {
216            fp.new_mode = None;
217        }
218    }
219    if let Some(ref mut s) = fp.old_oid {
220        sanitize_patch_header_value(s);
221    }
222    if let Some(ref mut s) = fp.new_oid {
223        sanitize_patch_header_value(s);
224    }
225    for ref mut s in [
226        &mut fp.diff_old_path,
227        &mut fp.diff_new_path,
228        &mut fp.old_path,
229        &mut fp.new_path,
230    ]
231    .into_iter()
232    .flatten()
233    {
234        sanitize_patch_header_value(s);
235        **s = strip_git_diff_path_prefix(s);
236    }
237}
238
239/// Collapse runs of `/` to a single slash (Git `squash_slash`).
240fn squash_slash_path(s: &str) -> String {
241    let mut out = String::with_capacity(s.len());
242    let mut prev_slash = false;
243    for ch in s.chars() {
244        if ch == '/' {
245            if !prev_slash {
246                out.push('/');
247            }
248            prev_slash = true;
249        } else {
250            prev_slash = false;
251            out.push(ch);
252        }
253    }
254    out
255}
256
257/// Unquote a leading C-style `"..."` from `line`; returns decoded bytes and remainder after closing `"`.
258/// Matches Git `unquote_c_style` / `quote.c` escapes used in diff headers.
259fn unquote_c_style_diff_prefix(line: &str) -> Option<(Vec<u8>, &str)> {
260    let b = line.as_bytes();
261    if b.first() != Some(&b'"') {
262        return None;
263    }
264    let mut q = &b[1..];
265    let mut out = Vec::new();
266    loop {
267        let len = q
268            .iter()
269            .position(|&c| c == b'"' || c == b'\\')
270            .unwrap_or(q.len());
271        out.extend_from_slice(&q[..len]);
272        q = &q[len..];
273        if q.is_empty() {
274            return None;
275        }
276        match q[0] {
277            b'"' => {
278                let rest = std::str::from_utf8(&q[1..]).ok()?;
279                return Some((out, rest));
280            }
281            b'\\' => {
282                q = &q[1..];
283                if q.is_empty() {
284                    return None;
285                }
286                let ch = q[0];
287                q = &q[1..];
288                match ch {
289                    b'a' => out.push(0x07),
290                    b'b' => out.push(0x08),
291                    b'f' => out.push(0x0c),
292                    b'n' => out.push(b'\n'),
293                    b'r' => out.push(b'\r'),
294                    b't' => out.push(b'\t'),
295                    b'v' => out.push(0x0b),
296                    b'\\' => out.push(b'\\'),
297                    b'"' => out.push(b'"'),
298                    b'0'..=b'3' => {
299                        if q.len() < 2 {
300                            return None;
301                        }
302                        let ch2 = q[0];
303                        let ch3 = q[1];
304                        if !(b'0'..=b'7').contains(&ch2) || !(b'0'..=b'7').contains(&ch3) {
305                            return None;
306                        }
307                        let ac = u32::from(ch - b'0') * 64
308                            + u32::from(ch2 - b'0') * 8
309                            + u32::from(ch3 - b'0');
310                        out.push(ac as u8);
311                        q = &q[2..];
312                    }
313                    _ => return None,
314                }
315            }
316            _ => return None,
317        }
318    }
319}
320
321fn bytes_to_path_string(bytes: &[u8]) -> Result<String> {
322    let s = String::from_utf8(bytes.to_vec())
323        .map_err(|e| Error::Message(format!("diff path is not valid UTF-8: {e}")))?;
324    Ok(squash_slash_path(&s))
325}
326
327/// Skip `p_value` leading path components (Git `skip_tree_prefix`); `p_value == 0` allows absolute paths.
328fn skip_tree_prefix_bytes(line: &[u8], p_value: usize) -> Option<&[u8]> {
329    if p_value == 0 {
330        return Some(line);
331    }
332    let mut nslash = p_value;
333    let mut i = 0usize;
334    while i < line.len() {
335        if line[i] == b'/' {
336            nslash = nslash.saturating_sub(1);
337            if nslash == 0 {
338                return if i == 0 { None } else { Some(&line[i + 1..]) };
339            }
340        }
341        i += 1;
342    }
343    None
344}
345
346/// Strip `p_value` leading `/`-separated components from a UTF-8 path (for `rename from` etc.).
347fn skip_tree_prefix_str(path: &str, p_value: usize) -> Option<String> {
348    let stripped = skip_tree_prefix_bytes(path.as_bytes(), p_value)?;
349    Some(String::from_utf8_lossy(stripped).into_owned())
350}
351
352fn sane_tz_len(line: &[u8]) -> usize {
353    const SUFFIX: &[u8] = b" +0500";
354    if line.len() < SUFFIX.len() || line[line.len() - SUFFIX.len()] != b' ' {
355        return 0;
356    }
357    let tz = &line[line.len() - SUFFIX.len()..];
358    if tz[1] != b'+' && tz[1] != b'-' {
359        return 0;
360    }
361    for p in &tz[2..] {
362        if !p.is_ascii_digit() {
363            return 0;
364        }
365    }
366    SUFFIX.len()
367}
368
369fn tz_with_colon_len(line: &[u8]) -> usize {
370    // Git: suffix is ` ±HH:MM` (space, sign, two hour digits, colon, two minute digits) = 7 bytes.
371    const SUFFIX_LEN: usize = 7;
372    if line.len() < SUFFIX_LEN || line[line.len() - 3] != b':' {
373        return 0;
374    }
375    let tz = &line[line.len() - SUFFIX_LEN..];
376    if tz[0] != b' ' || (tz[1] != b'+' && tz[1] != b'-') {
377        return 0;
378    }
379    let p = &tz[2..];
380    if p.len() != 5
381        || !p[0].is_ascii_digit()
382        || !p[1].is_ascii_digit()
383        || p[2] != b':'
384        || !p[3].is_ascii_digit()
385        || !p[4].is_ascii_digit()
386    {
387        return 0;
388    }
389    SUFFIX_LEN
390}
391
392fn date_len(line: &[u8]) -> usize {
393    const SHORT: &[u8] = b"72-02-05";
394    if line.len() < SHORT.len() || line[line.len() - 3] != b'-' {
395        return 0;
396    }
397    let mut p = line.len() - SHORT.len();
398    let date = &line[p..];
399    if !date[0].is_ascii_digit()
400        || !date[1].is_ascii_digit()
401        || date[2] != b'-'
402        || !date[3].is_ascii_digit()
403        || !date[4].is_ascii_digit()
404        || date[5] != b'-'
405        || !date[6].is_ascii_digit()
406        || !date[7].is_ascii_digit()
407    {
408        return 0;
409    }
410    if p >= 2 {
411        let y1 = line[p - 1];
412        let y2 = line[p - 2];
413        if y1.is_ascii_digit() && y2.is_ascii_digit() {
414            p -= 2;
415        }
416    }
417    line.len() - p
418}
419
420fn short_time_len(line: &[u8]) -> usize {
421    const PAT: &[u8] = b" 07:01:32";
422    if line.len() < PAT.len() || line[line.len() - 3] != b':' {
423        return 0;
424    }
425    let p = line.len() - PAT.len();
426    let time = &line[p..];
427    if time[0] != b' '
428        || !time[1].is_ascii_digit()
429        || !time[2].is_ascii_digit()
430        || time[3] != b':'
431        || !time[4].is_ascii_digit()
432        || !time[5].is_ascii_digit()
433        || time[6] != b':'
434        || !time[7].is_ascii_digit()
435        || !time[8].is_ascii_digit()
436    {
437        return 0;
438    }
439    PAT.len()
440}
441
442fn fractional_time_len(line: &[u8]) -> usize {
443    if line.is_empty() || !line[line.len() - 1].is_ascii_digit() {
444        return 0;
445    }
446    let mut p = line.len() - 1;
447    while p > 0 && line[p].is_ascii_digit() {
448        p -= 1;
449    }
450    if p == 0 || line[p] != b'.' {
451        return 0;
452    }
453    let n = short_time_len(&line[..p]);
454    if n == 0 {
455        return 0;
456    }
457    line.len() - p + n
458}
459
460fn trailing_spaces_len(line: &[u8]) -> usize {
461    if line.is_empty() || line[line.len() - 1] != b' ' {
462        return 0;
463    }
464    let mut p = line.len();
465    while p > 0 {
466        p -= 1;
467        if line[p] != b' ' {
468            return line.len() - (p + 1);
469        }
470    }
471    line.len()
472}
473
474fn diff_timestamp_len(line: &[u8]) -> usize {
475    if line.is_empty() || !line[line.len() - 1].is_ascii_digit() {
476        return 0;
477    }
478    let mut end = line.len();
479    let mut n = sane_tz_len(&line[..end]);
480    if n == 0 {
481        n = tz_with_colon_len(&line[..end]);
482    }
483    if n == 0 {
484        return 0;
485    }
486    end -= n;
487
488    n = short_time_len(&line[..end]);
489    if n == 0 {
490        n = fractional_time_len(&line[..end]);
491    }
492    if n == 0 {
493        return 0;
494    }
495    end -= n;
496
497    n = date_len(&line[..end]);
498    if n == 0 {
499        return 0;
500    }
501    end -= n;
502
503    if end == 0 {
504        return 0;
505    }
506    match line[end - 1] {
507        b'\t' => {
508            end -= 1;
509            line.len() - end
510        }
511        b' ' => {
512            end -= trailing_spaces_len(&line[..end]);
513            line.len() - end
514        }
515        _ => 0,
516    }
517}
518
519/// Git `find_name_common` with optional `end` bound (exclusive).
520fn find_name_common_bounded(
521    line: &[u8],
522    def: Option<&[u8]>,
523    p_value: usize,
524    end: usize,
525) -> Option<Vec<u8>> {
526    let end = end.min(line.len());
527    let mut start: Option<usize> = if p_value == 0 { Some(0) } else { None };
528    let mut p = p_value;
529    let mut i = 0usize;
530    while i < end {
531        let c = line[i];
532        i += 1;
533        if c == b'/' && p > 0 {
534            p -= 1;
535            if p == 0 {
536                start = Some(i);
537            }
538        }
539    }
540    let start = start?;
541    let len = i - start;
542    if len == 0 {
543        return def.map(|d| d.to_vec());
544    }
545    let slice = &line[start..i];
546    if let Some(d) = def {
547        if d.len() < len && slice.starts_with(d) {
548            return Some(d.to_vec());
549        }
550    }
551    Some(slice.to_vec())
552}
553
554/// Git `find_name_traditional` on the line after `--- ` / `+++ ` (no prefix).
555fn find_name_traditional(line: &[u8], def: Option<&[u8]>, p_value: usize) -> Option<Vec<u8>> {
556    if line.first() == Some(&b'"') {
557        let (decoded, _) = unquote_c_style_diff_prefix(std::str::from_utf8(line).ok()?)?;
558        let skip = skip_tree_prefix_bytes(&decoded, p_value)?;
559        return Some(skip.to_vec());
560    }
561    let ts = diff_timestamp_len(line);
562    let name_end = line.len().saturating_sub(ts);
563    find_name_common_bounded(line, def, p_value, name_end)
564}
565
566fn find_name_tab_terminated(line: &[u8], p_value: usize) -> Option<Vec<u8>> {
567    if line.first() == Some(&b'"') {
568        let (decoded, _) = unquote_c_style_diff_prefix(std::str::from_utf8(line).ok()?)?;
569        let skip = skip_tree_prefix_bytes(&decoded, p_value)?;
570        return Some(skip.to_vec());
571    }
572    let end = line
573        .iter()
574        .position(|&b| b == b'\t' || b == b'\n' || b == b'\r')
575        .unwrap_or(line.len());
576    find_name_common_bounded(line, None, p_value, end)
577}
578
579fn is_dev_null_nameline(line: &[u8]) -> bool {
580    line.strip_prefix(b"/dev/null")
581        .map(|rest| rest.is_empty() || rest.first().is_some_and(|b| b.is_ascii_whitespace()))
582        .unwrap_or(false)
583}
584
585fn count_slashes_in_prefix(prefix: &str) -> usize {
586    prefix.bytes().filter(|&b| b == b'/').count()
587}
588
589/// Git `guess_p_value` for traditional patches (`apply.c`). Uses `setup_git_directory` prefix.
590fn guess_p_value_from_nameline(line: &[u8], setup_prefix: Option<&str>) -> Option<usize> {
591    if is_dev_null_nameline(line) {
592        return None;
593    }
594    let name = find_name_traditional(line, None, 0)?;
595    let name_str = String::from_utf8_lossy(&name);
596    if !name_str.contains('/') {
597        return Some(0);
598    }
599    let pfx = setup_prefix.filter(|p| !p.is_empty())?;
600    if name_str.starts_with(pfx) {
601        return Some(count_slashes_in_prefix(pfx));
602    }
603    let slash = name_str.find('/')?;
604    let rest = name_str.get(slash + 1..)?;
605    if rest.starts_with(pfx) {
606        return Some(count_slashes_in_prefix(pfx) + 1);
607    }
608    None
609}
610
611fn epoch_stamp_regex() -> &'static Regex {
612    static RE: OnceLock<Regex> = OnceLock::new();
613    RE.get_or_init(|| {
614        // Provably infallible: the pattern is a fixed string literal that is a valid regex.
615        #[allow(clippy::expect_used)]
616        Regex::new(r"^([0-2][0-9]):([0-5][0-9]):00(?:\.0+)? ([-+][0-2][0-9]:?[0-5][0-9])")
617            .expect("epoch stamp regex is a valid constant pattern")
618    })
619}
620
621/// True when the `---`/`+++` line has a tab-separated epoch timestamp (Git `has_epoch_timestamp`).
622fn has_epoch_timestamp(nameline: &[u8]) -> bool {
623    let Some(tab) = nameline.iter().position(|&b| b == b'\t') else {
624        return false;
625    };
626    let mut ts = &nameline[tab + 1..];
627    let epoch_hour = if let Some(r) = ts.strip_prefix(b"1969-12-31 ") {
628        ts = r;
629        24i32
630    } else if let Some(r) = ts.strip_prefix(b"1970-01-01 ") {
631        ts = r;
632        0i32
633    } else {
634        return false;
635    };
636    let end = ts.iter().position(|&b| b == b'\n').unwrap_or(ts.len());
637    let stamp = &ts[..end];
638    let stamp_str = match std::str::from_utf8(stamp) {
639        Ok(s) => s,
640        Err(_) => return false,
641    };
642    let caps = match epoch_stamp_regex().captures(stamp_str) {
643        Some(c) => c,
644        None => return false,
645    };
646    let hour: i32 = caps
647        .get(1)
648        .and_then(|m| m.as_str().parse().ok())
649        .unwrap_or(-1);
650    let minute: i32 = caps
651        .get(2)
652        .and_then(|m| m.as_str().parse().ok())
653        .unwrap_or(-1);
654    let tz_s = match caps.get(3).map(|m| m.as_str()) {
655        Some(s) if !s.is_empty() => s,
656        _ => return false,
657    };
658    if hour < 0 || minute < 0 {
659        return false;
660    }
661    let tz_byte = tz_s.as_bytes()[0];
662    let tz_rest = &tz_s[1..];
663    let zoneoffset: i32 = if let Some(colon_pos) = tz_rest.find(':') {
664        let h: i32 = tz_rest[..colon_pos].parse().unwrap_or(0);
665        let mm: i32 = tz_rest[colon_pos + 1..].parse().unwrap_or(0);
666        h * 60 + mm
667    } else if tz_rest.len() >= 4 {
668        let n: i32 = tz_rest[..4].parse().unwrap_or(0);
669        (n / 100) * 60 + (n % 100)
670    } else {
671        return false;
672    };
673    let zoneoffset = if tz_byte == b'-' {
674        -zoneoffset
675    } else {
676        zoneoffset
677    };
678    hour * 60 + minute - zoneoffset == epoch_hour * 60
679}
680
681/// Parse `---` / `+++` pair for a traditional unified diff (Git `parse_traditional_patch`).
682fn parse_traditional_patch_pair(
683    old_line: &[u8],
684    new_line: &[u8],
685    strip: usize,
686    p_guess: &mut Option<usize>,
687    setup_prefix: Option<&str>,
688) -> Result<FilePatch> {
689    let old_p = old_line.strip_prefix(b"--- ").unwrap_or(old_line);
690    let new_p = new_line.strip_prefix(b"+++ ").unwrap_or(new_line);
691
692    if p_guess.is_none() {
693        let p = guess_p_value_from_nameline(old_p, setup_prefix);
694        let q = guess_p_value_from_nameline(new_p, setup_prefix);
695        let chosen = match (p, q) {
696            (None, None) => None,
697            (Some(a), None) => Some(a),
698            (None, Some(b)) => Some(b),
699            (Some(a), Some(b)) if a == b => Some(a),
700            _ => None,
701        };
702        *p_guess = chosen;
703    }
704    let p_val = p_guess.unwrap_or(strip);
705
706    let mut fp = FilePatch {
707        diff_old_path: None,
708        diff_new_path: None,
709        old_path: None,
710        new_path: None,
711        saw_old_header: true,
712        saw_new_header: true,
713        old_mode: None,
714        new_mode: None,
715        old_mode_line: None,
716        new_mode_line: None,
717        is_new: false,
718        is_deleted: false,
719        is_rename: false,
720        is_copy: false,
721        similarity_index: None,
722        dissimilarity_index: None,
723        old_oid: None,
724        new_oid: None,
725        binary_patch: None,
726        is_binary: false,
727        hunks: Vec::new(),
728        ws_rule: 0,
729        is_toplevel_relative: false,
730    };
731
732    if is_dev_null_nameline(old_p) {
733        fp.is_new = true;
734        let name = find_name_traditional(new_p, None, p_val).ok_or_else(|| {
735            Error::Message("unable to find filename in traditional patch".to_string())
736        })?;
737        fp.new_path = Some(bytes_to_path_string(&name)?);
738    } else if is_dev_null_nameline(new_p) {
739        fp.is_deleted = true;
740        let name = find_name_traditional(old_p, None, p_val).ok_or_else(|| {
741            Error::Message("unable to find filename in traditional patch".to_string())
742        })?;
743        fp.old_path = Some(bytes_to_path_string(&name)?);
744    } else {
745        let first_name = find_name_traditional(old_p, None, p_val).ok_or_else(|| {
746            Error::Message("unable to find filename in traditional patch".to_string())
747        })?;
748        let name = find_name_traditional(new_p, Some(&first_name), p_val).ok_or_else(|| {
749            Error::Message("unable to find filename in traditional patch".to_string())
750        })?;
751        let name_str = bytes_to_path_string(&name)?;
752        if has_epoch_timestamp(old_p) {
753            fp.is_new = true;
754            fp.new_path = Some(name_str);
755        } else if has_epoch_timestamp(new_p) {
756            fp.is_deleted = true;
757            fp.old_path = Some(name_str);
758        } else {
759            // Git uses the `+++` filename for both sides when neither line carries an epoch
760            // marker; the `---` line only participates via `def` when shortening `.orig` etc.
761            fp.old_path = Some(name_str.clone());
762            fp.new_path = Some(name_str);
763        }
764    }
765
766    Ok(fp)
767}
768
769/// Default filename from `diff --git` when both sides agree (Git `git_header_name`).
770fn git_header_def_name(line: &str, p_value: usize) -> Option<String> {
771    let rest = line.strip_prefix("diff --git ").unwrap_or(line);
772    let rest_b = rest.as_bytes();
773
774    if rest_b.first() == Some(&b'"') {
775        let (first_decoded, second_raw) = unquote_c_style_diff_prefix(rest)?;
776        let rel_first = skip_tree_prefix_bytes(&first_decoded, p_value)?;
777        let second = second_raw.trim_start_matches(|c: char| c.is_ascii_whitespace());
778        if second.is_empty() {
779            return None;
780        }
781        if second.as_bytes().first() == Some(&b'"') {
782            let (second_decoded, _) = unquote_c_style_diff_prefix(second)?;
783            let rel2 = skip_tree_prefix_bytes(&second_decoded, p_value)?;
784            if rel2 != rel_first {
785                return None;
786            }
787        } else {
788            let rel2 = skip_tree_prefix_bytes(second.as_bytes(), p_value)?;
789            if rel2.len() != rel_first.len() || rel2 != rel_first {
790                return None;
791            }
792        }
793        return bytes_to_path_string(rel_first).ok();
794    }
795
796    let name = skip_tree_prefix_bytes(rest_b, p_value)?;
797    let name_start = name.as_ptr() as usize - rest_b.as_ptr() as usize;
798
799    for offset in 0..name.len() {
800        if name[offset] != b'"' {
801            continue;
802        }
803        let second_slice = &rest_b[name_start + offset..];
804        let (decoded, _) = unquote_c_style_diff_prefix(std::str::from_utf8(second_slice).ok()?)?;
805        let np = skip_tree_prefix_bytes(&decoded, p_value)?;
806        let plen = np.len();
807        if plen < offset
808            && name.len() > plen
809            && &name[..plen] == np
810            && name[plen].is_ascii_whitespace()
811        {
812            return bytes_to_path_string(np).ok();
813        }
814        return None;
815    }
816
817    let line_len = rest.len().saturating_sub(name_start);
818    let mut len = 0usize;
819    while len < line_len {
820        match rest_b[name_start + len] {
821            b'\n' => return None,
822            b'\t' | b' ' => {
823                let after = name_start + len + 1;
824                if after > name_start + line_len {
825                    return None;
826                }
827                let second =
828                    skip_tree_prefix_bytes(&rest_b[after..name_start + line_len], p_value)?;
829                let names_match =
830                    name.len() >= len && second.len() >= len && name[..len] == second[..len];
831                let boundary_ok = second.get(len) == Some(&b'\n') || second.len() == len;
832                if names_match && boundary_ok {
833                    return bytes_to_path_string(&name[..len]).ok();
834                }
835            }
836            _ => {}
837        }
838        len += 1;
839    }
840    None
841}
842
843/// Path from `rename from` / `copy from` lines (Git `find_name` with `terminate == 0`).
844fn find_name_extended_header(rest: &str, p_extended: usize) -> Option<String> {
845    let rest = rest.trim_end_matches(['\r', '\n']);
846    let b = rest.as_bytes();
847    if b.first() == Some(&b'"') {
848        let (decoded, tail) = unquote_c_style_diff_prefix(rest)?;
849        if !tail.trim().is_empty() {
850            return None;
851        }
852        let skip = skip_tree_prefix_bytes(&decoded, p_extended)?;
853        return bytes_to_path_string(skip).ok();
854    }
855    let end = b
856        .iter()
857        .position(|&c| c == b'\t' || c == b'\n' || c == b'\r' || c == b' ')
858        .unwrap_or(b.len());
859    let name = find_name_common_bounded(b, None, p_extended, end)?;
860    bytes_to_path_string(&name).ok()
861}
862
863/// Parse a unified diff into a list of `FilePatch` entries.
864///
865/// `strip` is Git's `p_value` (`-p` count, default 1). `setup_prefix` is Git's
866/// `setup_git_directory` prefix (work-tree-relative path from CWD to the repo
867/// root, with trailing `/`); pass `None` when running from the work-tree root.
868/// The caller computes it because it is a CLI/environment concern.
869pub fn parse_patch(
870    input: &str,
871    strip: usize,
872    input_name: &str,
873    recount: bool,
874    setup_prefix: Option<&str>,
875) -> Result<Vec<FilePatch>> {
876    let lines: Vec<&str> = input.lines().collect();
877    let mut patches = Vec::new();
878    let mut i = 0;
879    let mut p_guess_for_traditional: Option<usize> = None;
880    let setup_prefix_for_guess = setup_prefix.filter(|p| !p.is_empty());
881
882    let p_strip = strip;
883    let p_extended = strip.saturating_sub(1);
884
885    while i < lines.len() {
886        // Look for "diff --git" header or a bare ---/+++ pair.
887        if lines[i].starts_with("diff --git ") {
888            let mut fp = FilePatch {
889                diff_old_path: None,
890                diff_new_path: None,
891                old_path: None,
892                new_path: None,
893                saw_old_header: false,
894                saw_new_header: false,
895                old_mode: None,
896                new_mode: None,
897                old_mode_line: None,
898                new_mode_line: None,
899                is_new: false,
900                is_deleted: false,
901                is_rename: false,
902                is_copy: false,
903                similarity_index: None,
904                dissimilarity_index: None,
905                old_oid: None,
906                new_oid: None,
907                binary_patch: None,
908                is_binary: false,
909                hunks: Vec::new(),
910                ws_rule: 0,
911                is_toplevel_relative: true,
912            };
913
914            let header_line = lines[i];
915            let def_name = git_header_def_name(header_line, p_strip);
916
917            // Parse "diff --git a/foo b/foo"
918            let rest = &header_line["diff --git ".len()..];
919            if let Some((a, b)) = split_diff_git_paths(rest) {
920                fp.diff_old_path = Some(a.clone());
921                fp.diff_new_path = Some(b.clone());
922                fp.old_path = Some(skip_tree_prefix_str(&a, p_strip).ok_or_else(|| {
923                    Error::Message(format!("malformed old path in diff --git header: {a}"))
924                })?);
925                fp.new_path = Some(skip_tree_prefix_str(&b, p_strip).ok_or_else(|| {
926                    Error::Message(format!("malformed new path in diff --git header: {b}"))
927                })?);
928            }
929            i += 1;
930
931            // Parse extended headers
932            while i < lines.len()
933                && !lines[i].starts_with("--- ")
934                && !lines[i].starts_with("diff --git ")
935                && !lines[i].starts_with("@@ ")
936            {
937                let line = lines[i];
938                let line_no = i + 1;
939                if let Some(val) = line.strip_prefix("old mode ") {
940                    let v = val.trim_end_matches('\r').trim_end();
941                    if v.is_empty() {
942                        return Err(Error::Message(format!(
943                            "invalid mode on line {line_no}: {line}"
944                        )));
945                    }
946                    fp.old_mode = Some(v.to_string());
947                    fp.old_mode_line = Some(line_no);
948                } else if let Some(val) = line.strip_prefix("new mode ") {
949                    let v = val.trim_end_matches('\r').trim_end();
950                    if v.is_empty() {
951                        return Err(Error::Message(format!(
952                            "invalid mode on line {line_no}: {line}"
953                        )));
954                    }
955                    fp.new_mode = Some(v.to_string());
956                    fp.new_mode_line = Some(line_no);
957                } else if let Some(val) = line.strip_prefix("new file mode ") {
958                    let v = val.trim_end_matches('\r').trim_end();
959                    if v.is_empty() {
960                        return Err(Error::Message(format!(
961                            "invalid mode on line {line_no}: {line}"
962                        )));
963                    }
964                    fp.is_new = true;
965                    fp.new_mode = Some(v.to_string());
966                    fp.new_mode_line = Some(line_no);
967                } else if let Some(val) = line.strip_prefix("deleted file mode ") {
968                    let v = val.trim_end_matches('\r').trim_end();
969                    if v.is_empty() {
970                        return Err(Error::Message(format!(
971                            "invalid mode on line {line_no}: {line}"
972                        )));
973                    }
974                    fp.is_deleted = true;
975                    fp.old_mode = Some(v.to_string());
976                    fp.old_mode_line = Some(line_no);
977                } else if let Some(val) = line.strip_prefix("rename from ") {
978                    fp.is_rename = true;
979                    if let Some(p) = find_name_extended_header(val, p_extended) {
980                        fp.old_path = Some(p);
981                    }
982                } else if let Some(val) = line.strip_prefix("rename to ") {
983                    fp.is_rename = true;
984                    if let Some(p) = find_name_extended_header(val, p_extended) {
985                        fp.new_path = Some(p);
986                    }
987                } else if let Some(val) = line.strip_prefix("copy from ") {
988                    fp.is_copy = true;
989                    if let Some(p) = find_name_extended_header(val, p_extended) {
990                        fp.old_path = Some(p);
991                    }
992                } else if let Some(val) = line.strip_prefix("copy to ") {
993                    fp.is_copy = true;
994                    if let Some(p) = find_name_extended_header(val, p_extended) {
995                        fp.new_path = Some(p);
996                    }
997                } else if let Some(val) = line.strip_prefix("similarity index ") {
998                    fp.similarity_index = val.trim_end_matches('%').parse().ok();
999                } else if let Some(val) = line.strip_prefix("dissimilarity index ") {
1000                    fp.dissimilarity_index = val.trim_end_matches('%').parse().ok();
1001                } else if let Some(val) = line.strip_prefix("index ") {
1002                    // Parse "index abc123..def456 100644" or "index abc123..def456"
1003                    let mut parts = val.split_whitespace();
1004                    let hash_part = parts.next().unwrap_or("");
1005                    if let Some((old, new)) = hash_part.split_once("..") {
1006                        fp.old_oid = Some(old.to_string());
1007                        fp.new_oid = Some(new.to_string());
1008                    }
1009                    if let Some(mode_tok) = parts.next() {
1010                        let v = mode_tok.trim_end_matches('\r').trim_end();
1011                        if !v.is_empty() {
1012                            fp.old_mode = Some(v.to_string());
1013                            fp.old_mode_line = Some(line_no);
1014                        }
1015                    }
1016                } else if line == "GIT binary patch" {
1017                    let (binary_patch, next_i) = parse_binary_patch(&lines, i + 1)?;
1018                    fp.binary_patch = Some(binary_patch);
1019                    fp.is_binary = true;
1020                    i = next_i;
1021                    break;
1022                } else if line.starts_with("Binary files ") && line.ends_with(" differ") {
1023                    // Plain (non --binary) diff of a binary change; no payload to
1024                    // apply but stat/numstat must report it as binary.
1025                    fp.is_binary = true;
1026                }
1027                // skip other extended headers
1028                i += 1;
1029            }
1030
1031            if let Some(dn) = def_name {
1032                if fp.old_path.is_none() {
1033                    fp.old_path = Some(dn.clone());
1034                }
1035                if fp.new_path.is_none() {
1036                    fp.new_path = Some(dn);
1037                }
1038            }
1039
1040            // Parse ---/+++ headers if present
1041            if i < lines.len() && lines[i].starts_with("--- ") {
1042                let old_p = lines[i]["--- ".len()..].trim_end_matches(['\r', '\n']);
1043                let old_b = old_p.as_bytes();
1044                if is_dev_null_nameline(old_b) {
1045                    fp.old_path = Some("/dev/null".to_string());
1046                } else if let Some(p) = find_name_tab_terminated(old_b, p_strip) {
1047                    fp.old_path = Some(bytes_to_path_string(&p)?);
1048                }
1049                fp.saw_old_header = true;
1050                i += 1;
1051                if i < lines.len() && lines[i].starts_with("+++ ") {
1052                    let new_p = lines[i]["+++ ".len()..].trim_end_matches(['\r', '\n']);
1053                    let new_b = new_p.as_bytes();
1054                    if is_dev_null_nameline(new_b) {
1055                        fp.new_path = Some("/dev/null".to_string());
1056                    } else if let Some(p) = find_name_tab_terminated(new_b, p_strip) {
1057                        fp.new_path = Some(bytes_to_path_string(&p)?);
1058                    }
1059                    fp.saw_new_header = true;
1060                    i += 1;
1061                }
1062            }
1063
1064            // Parse hunks
1065            while i < lines.len() && lines[i].starts_with("@@ ") {
1066                let (hunk, next_i) = parse_hunk(&lines, i, input_name, recount)?;
1067                fp.hunks.push(hunk);
1068                i = next_i;
1069            }
1070
1071            sanitize_file_patch_headers(&mut fp);
1072            patches.push(fp);
1073        } else if lines[i].starts_with("--- ")
1074            && i + 1 < lines.len()
1075            && lines[i + 1].starts_with("+++ ")
1076        {
1077            let old_line = lines[i].as_bytes();
1078            let new_line = lines[i + 1].as_bytes();
1079            let mut fp = parse_traditional_patch_pair(
1080                old_line,
1081                new_line,
1082                strip,
1083                &mut p_guess_for_traditional,
1084                setup_prefix_for_guess,
1085            )?;
1086            i += 2;
1087
1088            // Parse hunks
1089            while i < lines.len() && lines[i].starts_with("@@ ") {
1090                let (hunk, next_i) = parse_hunk(&lines, i, input_name, recount)?;
1091                fp.hunks.push(hunk);
1092                i = next_i;
1093            }
1094
1095            sanitize_file_patch_headers(&mut fp);
1096            patches.push(fp);
1097        } else {
1098            i += 1;
1099        }
1100    }
1101
1102    Ok(patches)
1103}
1104
1105/// Parse a `GIT binary patch` payload.
1106fn parse_binary_patch(lines: &[&str], mut i: usize) -> Result<(BinaryPatchPayload, usize)> {
1107    let (forward_compressed, forward_declared_size) = parse_binary_literal(lines, &mut i)?;
1108    let (reverse_compressed, reverse_declared_size) =
1109        if i < lines.len() && lines[i].starts_with("literal ") {
1110            parse_binary_literal(lines, &mut i)?
1111        } else {
1112            (Vec::new(), 0)
1113        };
1114
1115    Ok((
1116        BinaryPatchPayload {
1117            forward_compressed,
1118            forward_declared_size,
1119            reverse_compressed,
1120            reverse_declared_size,
1121        },
1122        i,
1123    ))
1124}
1125
1126/// Parse one `literal <size>` block from a binary patch.
1127fn parse_binary_literal(lines: &[&str], i: &mut usize) -> Result<(Vec<u8>, usize)> {
1128    let header = lines.get(*i).copied().unwrap_or_default();
1129    let Some(size_str) = header.strip_prefix("literal ") else {
1130        return Err(Error::Message(format!(
1131            "unsupported binary patch section: '{header}'"
1132        )));
1133    };
1134    let declared_size: usize = size_str
1135        .trim()
1136        .parse()
1137        .map_err(|e: std::num::ParseIntError| {
1138            Error::Message(format!("invalid binary patch literal size: {e}"))
1139        })?;
1140    *i += 1;
1141
1142    let mut compressed = Vec::new();
1143    while *i < lines.len() {
1144        let line = lines[*i];
1145        if line.is_empty() {
1146            *i += 1;
1147            break;
1148        }
1149        decode_binary_patch_line(line, &mut compressed)?;
1150        *i += 1;
1151    }
1152
1153    Ok((compressed, declared_size))
1154}
1155
1156/// Decode one binary patch payload line into compressed bytes.
1157fn decode_binary_patch_line(line: &str, out: &mut Vec<u8>) -> Result<()> {
1158    let mut chars = line.chars();
1159    let Some(len_ch) = chars.next() else {
1160        return Err(Error::Message(
1161            "empty binary patch payload line".to_string(),
1162        ));
1163    };
1164    let expected_len = decode_binary_line_len(len_ch)?;
1165    let body = chars.as_str().as_bytes();
1166    let decoded = crate::git_binary_base85::decode_body(body, expected_len)
1167        .map_err(|e| Error::Message(format!("invalid binary patch base85: {e}")))?;
1168    out.extend_from_slice(&decoded);
1169    Ok(())
1170}
1171
1172fn decode_binary_line_len(ch: char) -> Result<usize> {
1173    if ch.is_ascii_uppercase() {
1174        return Ok((ch as u8 - b'A' + 1) as usize);
1175    }
1176    if ch.is_ascii_lowercase() {
1177        return Ok((ch as u8 - b'a' + 27) as usize);
1178    }
1179    Err(Error::Message(format!(
1180        "invalid binary patch line length marker: '{ch}'"
1181    )))
1182}
1183
1184/// Inflate zlib-compressed binary payload.
1185pub fn inflate_binary_payload(compressed: &[u8]) -> Result<Vec<u8>> {
1186    use flate2::read::ZlibDecoder;
1187    use std::io::Read;
1188
1189    let mut decoder = ZlibDecoder::new(compressed);
1190    let mut out = Vec::new();
1191    decoder
1192        .read_to_end(&mut out)
1193        .map_err(|e| Error::Message(format!("failed to inflate binary patch payload: {e}")))?;
1194    Ok(out)
1195}
1196
1197/// Split the two path tokens from the remainder of a `diff --git` line (after `diff --git `).
1198fn split_diff_git_paths(s: &str) -> Option<(String, String)> {
1199    let s = s.trim_end_matches(['\r', '\n']);
1200
1201    if s.as_bytes().first() == Some(&b'"') {
1202        let (first, rest_raw) = unquote_c_style_diff_prefix(s)?;
1203        let rest = rest_raw.trim_start_matches(|c: char| c.is_ascii_whitespace());
1204        if rest.is_empty() {
1205            return None;
1206        }
1207        if rest.as_bytes().first() == Some(&b'"') {
1208            let (second, _) = unquote_c_style_diff_prefix(rest)?;
1209            return Some((
1210                String::from_utf8_lossy(&first).into_owned(),
1211                String::from_utf8_lossy(&second).into_owned(),
1212            ));
1213        }
1214        let second = rest;
1215        if second.len() != first.len() || second.as_bytes() != first.as_slice() {
1216            return None;
1217        }
1218        return Some((
1219            String::from_utf8_lossy(&first).into_owned(),
1220            second.to_string(),
1221        ));
1222    }
1223
1224    if let Some(pos) = s.find(" b/") {
1225        let a = &s[..pos];
1226        let b = &s[pos + 1..];
1227        return Some((a.to_string(), b.to_string()));
1228    }
1229    if s.starts_with("a/") {
1230        if let Some(pos) = s.find(" /dev/null") {
1231            let a = &s[..pos];
1232            return Some((a.to_string(), "/dev/null".to_string()));
1233        }
1234    }
1235    if let Some(b) = s.strip_prefix("/dev/null ") {
1236        return Some(("/dev/null".to_string(), b.to_string()));
1237    }
1238
1239    let name = s.as_bytes();
1240    let line_len = name.len();
1241    let mut len = 0usize;
1242    while len < line_len {
1243        match name[len] {
1244            b'\n' => return None,
1245            b'\t' | b' ' => {
1246                if len + 1 > line_len {
1247                    return None;
1248                }
1249                let second = &name[len + 1..line_len];
1250                let names_match =
1251                    name.len() >= len && second.len() >= len && name[..len] == second[..len];
1252                let boundary_ok = second.get(len) == Some(&b'\n') || second.len() == len;
1253                if names_match && boundary_ok {
1254                    return Some((
1255                        String::from_utf8_lossy(&name[..len]).into_owned(),
1256                        String::from_utf8_lossy(second).into_owned(),
1257                    ));
1258                }
1259            }
1260            _ => {}
1261        }
1262        len += 1;
1263    }
1264    None
1265}
1266
1267/// Parse a single hunk starting at line `i` (which should be an `@@` line).
1268fn parse_hunk(
1269    lines: &[&str],
1270    start: usize,
1271    input_name: &str,
1272    recount: bool,
1273) -> Result<(Hunk, usize)> {
1274    let header = lines[start];
1275    let (old_start, old_count, new_start, new_count) = parse_hunk_header(header)
1276        .map_err(|e| Error::Message(format!("invalid hunk header: {header}: {e}")))?;
1277
1278    let mut hunk = Hunk {
1279        old_start,
1280        old_count,
1281        new_start,
1282        new_count,
1283        first_body_line: start + 2,
1284        lines: Vec::new(),
1285    };
1286
1287    // Track how many old/new lines the body actually provides so a hunk that
1288    // ends prematurely is diagnosed like Git: "corrupt patch at <file>:<line>"
1289    // (t4012; Git parse_fragment returns -1 when oldlines/newlines remain).
1290    let mut old_seen = 0usize;
1291    let mut new_seen = 0usize;
1292    let mut i = start + 1;
1293    while i < lines.len() {
1294        let line = lines[i];
1295        if line.starts_with("@@ ") || line.starts_with("diff --git ") {
1296            break;
1297        }
1298        // `---` / `+++` with a space begin a new file header; do not treat `---` as a `-` hunk line
1299        // (would misparse `--- /dev/null` as a remove of `-- /dev/null` and merge the next file).
1300        if line.starts_with("--- ") || line.starts_with("+++ ") {
1301            break;
1302        }
1303        if line == "-- " {
1304            // format-patch signature separator; not part of hunk body
1305            break;
1306        }
1307        if let Some(rest) = line.strip_prefix('+') {
1308            hunk.lines.push(HunkLine::Add(rest.to_string()));
1309            new_seen += 1;
1310        } else if let Some(rest) = line.strip_prefix('-') {
1311            hunk.lines.push(HunkLine::Remove(rest.to_string()));
1312            old_seen += 1;
1313        } else if line.is_empty() {
1314            hunk.lines.push(HunkLine::Context(String::new()));
1315            old_seen += 1;
1316            new_seen += 1;
1317        } else if let Some(rest) = line.strip_prefix(' ') {
1318            // context line
1319            hunk.lines.push(HunkLine::Context(rest.to_string()));
1320            old_seen += 1;
1321            new_seen += 1;
1322        } else if line.starts_with('\\') {
1323            hunk.lines.push(HunkLine::NoNewline);
1324        } else {
1325            // Unknown line type — could be start of something else
1326            break;
1327        }
1328        i += 1;
1329    }
1330
1331    if recount {
1332        hunk.old_count = old_seen;
1333        hunk.new_count = new_seen;
1334    } else if old_seen < old_count || new_seen < new_count {
1335        return Err(Error::Message(format!(
1336            "error: corrupt patch at {input_name}:{}",
1337            i + 1
1338        )));
1339    }
1340
1341    Ok((hunk, i))
1342}
1343
1344/// Parse "@@ -old_start[,old_count] +new_start[,new_count] @@..."
1345fn parse_hunk_header(line: &str) -> Result<(usize, usize, usize, usize)> {
1346    // Find the range part between @@ markers
1347    let trimmed = line.trim_start_matches('@').trim_start();
1348    let end = trimmed.find(" @@").unwrap_or(trimmed.len());
1349    let range_part = &trimmed[..end];
1350
1351    let parts: Vec<&str> = range_part.split_whitespace().collect();
1352    if parts.len() < 2 {
1353        return Err(Error::Message(
1354            "expected old and new range in hunk header".to_string(),
1355        ));
1356    }
1357
1358    let (old_start, old_count) = parse_range(parts[0].trim_start_matches('-'))?;
1359    let (new_start, new_count) = parse_range(parts[1].trim_start_matches('+'))?;
1360
1361    Ok((old_start, old_count, new_start, new_count))
1362}
1363
1364/// Parse "N" or "N,M" into (start, count).
1365fn parse_range(s: &str) -> Result<(usize, usize)> {
1366    if let Some((start_s, count_s)) = s.split_once(',') {
1367        let start = start_s
1368            .parse::<usize>()
1369            .map_err(|e| Error::Message(e.to_string()))?;
1370        let count = count_s
1371            .parse::<usize>()
1372            .map_err(|e| Error::Message(e.to_string()))?;
1373        Ok((start, count))
1374    } else {
1375        let n: usize = s
1376            .parse()
1377            .map_err(|e: std::num::ParseIntError| Error::Message(e.to_string()))?;
1378        Ok((n, 1))
1379    }
1380}
1381
1382#[cfg(test)]
1383mod tests {
1384    use super::*;
1385
1386    #[test]
1387    fn parses_simple_git_diff_into_one_file_patch() {
1388        let input = "diff --git a/foo.txt b/foo.txt\n\
1389                     index e69de29..d95f3ad 100644\n\
1390                     --- a/foo.txt\n\
1391                     +++ b/foo.txt\n\
1392                     @@ -0,0 +1 @@\n\
1393                     +hello\n";
1394        let patches = parse_patch(input, 1, "<test>", false, None).expect("parse");
1395        assert_eq!(patches.len(), 1);
1396        let fp = &patches[0];
1397        assert_eq!(fp.old_path.as_deref(), Some("foo.txt"));
1398        assert_eq!(fp.new_path.as_deref(), Some("foo.txt"));
1399        assert_eq!(fp.hunks.len(), 1);
1400        let hunk = &fp.hunks[0];
1401        assert_eq!(hunk.new_count, 1);
1402        assert!(matches!(hunk.lines.as_slice(), [HunkLine::Add(s)] if s == "hello"));
1403    }
1404
1405    #[test]
1406    fn parses_new_file_mode_and_deletion() {
1407        let new_file = "diff --git a/n b/n\n\
1408                        new file mode 100644\n\
1409                        index 0000000..9daeafb\n\
1410                        --- /dev/null\n\
1411                        +++ b/n\n\
1412                        @@ -0,0 +1 @@\n\
1413                        +x\n";
1414        let patches = parse_patch(new_file, 1, "<test>", false, None).expect("parse");
1415        assert_eq!(patches.len(), 1);
1416        assert!(patches[0].is_new);
1417        assert_eq!(patches[0].new_mode.as_deref(), Some("100644"));
1418
1419        let deleted = "diff --git a/d b/d\n\
1420                       deleted file mode 100644\n\
1421                       index 9daeafb..0000000\n\
1422                       --- a/d\n\
1423                       +++ /dev/null\n\
1424                       @@ -1 +0,0 @@\n\
1425                       -x\n";
1426        let patches = parse_patch(deleted, 1, "<test>", false, None).expect("parse");
1427        assert!(patches[0].is_deleted);
1428    }
1429
1430    #[test]
1431    fn corrupt_hunk_is_reported_with_input_name_and_line() {
1432        // The body provides fewer lines than the header declares.
1433        let input = "--- a/x\n\
1434                     +++ b/x\n\
1435                     @@ -1,3 +1,3 @@\n\
1436                      one\n";
1437        let err = parse_patch(input, 1, "patch", false, None)
1438            .err()
1439            .expect("should fail");
1440        assert_eq!(err.to_string(), "error: corrupt patch at patch:4");
1441    }
1442
1443    #[test]
1444    fn parse_hunk_header_parses_ranges() {
1445        assert_eq!(parse_hunk_header("@@ -1,3 +2,4 @@").unwrap(), (1, 3, 2, 4));
1446        assert_eq!(parse_hunk_header("@@ -5 +6 @@ ctx").unwrap(), (5, 1, 6, 1));
1447    }
1448
1449    #[test]
1450    fn invalid_hunk_header_chains_inner_message() {
1451        let err = parse_hunk_header("@@ -x +1 @@").err().expect("fail");
1452        // The numeric parse failure must surface its own message.
1453        assert_eq!(err.to_string(), "invalid digit found in string");
1454    }
1455}