Skip to main content

vcs_git/
parse.rs

1//! Pure parsers for git's machine-readable output. No process execution, so the
2//! tests here are hermetic and run on CI.
3//!
4//! The git-format unified-diff model + parser and the version type live in the
5//! shared [`vcs_diff`] crate (`git diff` and `jj diff --git` are byte-identical);
6//! this module keeps only the git-specific parsers (porcelain, log, blame, …).
7
8use std::path::PathBuf;
9
10use vcs_diff::DiffStat;
11
12/// One entry from `git status --porcelain=v1 -z` (`XY <path>`, NUL-delimited).
13#[derive(Debug, Clone, PartialEq, Eq)]
14#[non_exhaustive]
15pub struct StatusEntry {
16    /// Two-character status code, e.g. `" M"`, `"??"`, `"A "`, `"R "`.
17    pub code: String,
18    /// Path the status applies to (the *new* path for a rename/copy). Raw bytes
19    /// from `-z` — no C-quoting/escaping to undo, even for paths with spaces.
20    pub path: String,
21    /// For a rename/copy, the original path; `None` otherwise.
22    pub orig_path: Option<String>,
23}
24
25/// A combined branch + working-tree snapshot from `git status --porcelain=v2
26/// --branch -z`: HEAD, branch, upstream tracking, ahead/behind, and change
27/// counts — everything a prompt/status-bar needs, in **one** process spawn.
28#[derive(Debug, Clone, PartialEq, Eq, Default)]
29#[non_exhaustive]
30pub struct BranchStatus {
31    /// The HEAD commit's full object id (`# branch.oid`); `None` on an unborn
32    /// repo (git reports `(initial)`). Truncate for display.
33    pub head: Option<String>,
34    /// Current branch name (`# branch.head`); `None` when detached.
35    pub branch: Option<String>,
36    /// Upstream tracking branch (`# branch.upstream`); `None` when unset.
37    pub upstream: Option<String>,
38    /// Commits ahead of the upstream (`# branch.ab +A`); `None` when no upstream.
39    pub ahead: Option<usize>,
40    /// Commits behind the upstream (`# branch.ab -B`); `None` when no upstream.
41    pub behind: Option<usize>,
42    /// Count of changed *tracked* entries — modified/added/deleted/renamed/copied
43    /// and unmerged (the `1`/`2`/`u` records).
44    pub tracked_changes: usize,
45    /// Count of untracked files (the `?` records).
46    pub untracked: usize,
47    /// Count of unmerged (conflicted) entries (the `u` records; also in
48    /// `tracked_changes`).
49    pub conflicts: usize,
50}
51
52impl BranchStatus {
53    /// Whether the working tree has any change at all — tracked or untracked.
54    pub fn is_dirty(&self) -> bool {
55        self.tracked_changes > 0 || self.untracked > 0
56    }
57}
58
59/// A commit, parsed from a `\x1f`-delimited `git log` line.
60#[derive(Debug, Clone, PartialEq, Eq)]
61#[non_exhaustive]
62pub struct Commit {
63    /// Full commit hash (`%H`).
64    pub hash: String,
65    /// Abbreviated commit hash (`%h`).
66    pub short_hash: String,
67    /// Author name (`%an`).
68    pub author: String,
69    /// Author date, strict ISO-8601 (`%aI`), e.g. `2026-05-31T10:00:00+00:00`.
70    pub date: String,
71    /// Subject line (`%s`).
72    pub subject: String,
73}
74
75/// A local branch from `git branch`.
76#[derive(Debug, Clone, PartialEq, Eq)]
77#[non_exhaustive]
78pub struct Branch {
79    /// Branch name.
80    pub name: String,
81    /// Whether this is the checked-out branch (the `*` marker).
82    pub current: bool,
83}
84
85/// A worktree from `git worktree list --porcelain`.
86#[derive(Debug, Clone, PartialEq, Eq)]
87#[non_exhaustive]
88pub struct Worktree {
89    /// Absolute path to the worktree.
90    pub path: PathBuf,
91    /// Short branch name (`refs/heads/` stripped); `None` when detached or bare.
92    pub branch: Option<String>,
93    /// The checked-out commit (`HEAD <sha>`); `None` for a bare entry.
94    pub head: Option<String>,
95    /// The main worktree of a bare repository.
96    pub bare: bool,
97    /// Checked out at a detached HEAD (no branch).
98    pub detached: bool,
99    /// Locked against pruning.
100    pub locked: bool,
101}
102
103/// Parse `git status --porcelain=v1 -z` output: NUL-delimited records, raw
104/// (unquoted) paths. A rename/copy entry is followed by its source path as the
105/// next NUL record (e.g. `R  new\0old\0`).
106pub(crate) fn parse_porcelain(output: &str) -> Vec<StatusEntry> {
107    let mut entries = Vec::new();
108    let mut records = output.split('\0').filter(|rec| !rec.is_empty());
109    while let Some(rec) = records.next() {
110        // "XY path": two status-code chars, a space, then the path. Real git
111        // codes are ASCII, but slice via `get` so a malformed record (a
112        // multibyte char where the code/space belong) is skipped, not a panic.
113        let (Some(code), Some(path)) = (rec.get(..2), rec.get(3..)) else {
114            continue;
115        };
116        // A rename/copy (R/C in the index column) carries its source path as the
117        // immediately following NUL record; consume it.
118        let orig_path = if matches!(rec.as_bytes().first(), Some(b'R' | b'C')) {
119            records.next().map(str::to_string)
120        } else {
121            None
122        };
123        entries.push(StatusEntry {
124            code: code.to_string(),
125            path: path.to_string(),
126            orig_path,
127        });
128    }
129    entries
130}
131
132/// Parse `git status --porcelain=v2 --branch -z` output into a [`BranchStatus`].
133///
134/// Records are NUL-terminated: `# branch.*` header lines first, then entry lines
135/// (`1`/`2` changed, `u` unmerged, `?` untracked, `!` ignored). A `2` (rename/copy)
136/// entry stores its original path as the *next* NUL record, so that record is
137/// consumed and skipped. Everything is `strip_prefix`/compare based — no byte
138/// indexing — so arbitrary bytes never panic (proven by proptest).
139pub(crate) fn parse_porcelain_v2(output: &str) -> BranchStatus {
140    let mut status = BranchStatus::default();
141    let mut records = output.split('\0');
142    while let Some(rec) = records.next() {
143        if let Some(rest) = rec.strip_prefix("# branch.oid ") {
144            // `(initial)` marks an unborn repo (no commits yet).
145            status.head = (rest != "(initial)").then(|| rest.to_string());
146        } else if let Some(rest) = rec.strip_prefix("# branch.head ") {
147            status.branch = (rest != "(detached)").then(|| rest.to_string());
148        } else if let Some(rest) = rec.strip_prefix("# branch.upstream ") {
149            status.upstream = Some(rest.to_string());
150        } else if let Some(rest) = rec.strip_prefix("# branch.ab ") {
151            // `+<ahead> -<behind>`.
152            let mut parts = rest.split(' ');
153            status.ahead = parts
154                .next()
155                .and_then(|t| t.strip_prefix('+'))
156                .and_then(|n| n.parse().ok());
157            status.behind = parts
158                .next()
159                .and_then(|t| t.strip_prefix('-'))
160                .and_then(|n| n.parse().ok());
161        } else if rec.starts_with("1 ") {
162            status.tracked_changes += 1;
163        } else if rec.starts_with("2 ") {
164            status.tracked_changes += 1;
165            // The rename/copy original path is the next NUL record; consume it so
166            // it isn't mis-read as another entry.
167            records.next();
168        } else if rec.starts_with("u ") {
169            status.tracked_changes += 1;
170            status.conflicts += 1;
171        } else if rec.starts_with("? ") {
172            status.untracked += 1;
173        }
174        // `! ` (ignored) and other `# ` headers contribute nothing.
175    }
176    status
177}
178
179/// Parse `git --version` output (`git version 2.54.0.windows.1`) into the shared
180/// [`vcs_diff::Version`]: the first dotted-numeric token wins; non-numeric
181/// trailers (`.windows.1`, `-rc1`) are ignored; a missing patch reads as `0`.
182pub(crate) fn parse_git_version(raw: &str) -> Option<vcs_diff::Version> {
183    vcs_diff::parse_dotted_version(raw)
184}
185
186/// Parse a NUL-delimited path list (e.g. `git diff --name-only -z`): one
187/// repo-relative path per record, `/` separators, no quoting.
188pub(crate) fn parse_nul_paths(output: &str) -> Vec<String> {
189    output
190        .split('\0')
191        .filter(|path| !path.is_empty())
192        .map(str::to_string)
193        .collect()
194}
195
196/// Parse `git log -z --format=%H%x1f%h%x1f%an%x1f%aI%x1f%s` output: commits are
197/// NUL-separated (robust to multi-line fields), fields split on the ASCII unit
198/// separator.
199pub(crate) fn parse_log(output: &str) -> Vec<Commit> {
200    output
201        .split('\0')
202        .filter(|rec| !rec.is_empty())
203        .filter_map(|rec| {
204            let mut fields = rec.split('\u{1f}');
205            Some(Commit {
206                hash: fields.next()?.to_string(),
207                short_hash: fields.next()?.to_string(),
208                author: fields.next()?.to_string(),
209                date: fields.next()?.to_string(),
210                subject: fields.next().unwrap_or("").to_string(),
211            })
212        })
213        .collect()
214}
215
216/// Parse `git branch` output. The first column is the `* `/`  `/`+ ` marker.
217pub(crate) fn parse_branches(output: &str) -> Vec<Branch> {
218    output
219        .lines()
220        .filter(|line| !line.trim().is_empty())
221        .filter_map(|line| {
222            let current = line.starts_with('*');
223            let name = line.get(1..).unwrap_or("").trim();
224            // Skip the detached-HEAD pseudo-entry, e.g. "* (HEAD detached at …)".
225            if name.is_empty() || name.starts_with('(') {
226                return None;
227            }
228            Some(Branch {
229                name: name.to_string(),
230                current,
231            })
232        })
233        .collect()
234}
235
236/// Parse `git worktree list --porcelain`: records separated by a blank line,
237/// each a set of `label [value]` lines — `worktree <path>`, `HEAD <sha>`,
238/// `branch refs/heads/<name>`, plus the valueless attributes `bare` / `detached`
239/// / `locked`. Unknown labels (e.g. `prunable`) are ignored.
240pub(crate) fn parse_worktree_porcelain(output: &str) -> Vec<Worktree> {
241    let mut worktrees = Vec::new();
242    let mut current: Option<Worktree> = None;
243    let flush = |current: &mut Option<Worktree>, out: &mut Vec<Worktree>| {
244        if let Some(wt) = current.take() {
245            out.push(wt);
246        }
247    };
248    for line in output.lines() {
249        if line.is_empty() {
250            flush(&mut current, &mut worktrees);
251            continue;
252        }
253        let (label, value) = match line.split_once(' ') {
254            Some((l, v)) => (l, Some(v)),
255            None => (line, None),
256        };
257        match label {
258            // A new record begins; flush any record not closed by a blank line.
259            "worktree" => {
260                flush(&mut current, &mut worktrees);
261                current = Some(Worktree {
262                    path: PathBuf::from(value.unwrap_or("")),
263                    branch: None,
264                    head: None,
265                    bare: false,
266                    detached: false,
267                    locked: false,
268                });
269            }
270            "HEAD" => {
271                if let Some(wt) = current.as_mut() {
272                    wt.head = value.map(str::to_string);
273                }
274            }
275            "branch" => {
276                if let Some(wt) = current.as_mut() {
277                    // Value is a full ref (`refs/heads/main`); expose the short name.
278                    wt.branch =
279                        value.map(|v| v.strip_prefix("refs/heads/").unwrap_or(v).to_string());
280                }
281            }
282            "bare" => {
283                if let Some(wt) = current.as_mut() {
284                    wt.bare = true;
285                }
286            }
287            "detached" => {
288                if let Some(wt) = current.as_mut() {
289                    wt.detached = true;
290                }
291            }
292            "locked" => {
293                if let Some(wt) = current.as_mut() {
294                    wt.locked = true;
295                }
296            }
297            _ => {}
298        }
299    }
300    flush(&mut current, &mut worktrees);
301    worktrees
302}
303
304/// One line of `git blame --line-porcelain` output: who last touched the line
305/// and where it came from.
306#[derive(Debug, Clone, PartialEq, Eq)]
307#[non_exhaustive]
308pub struct BlameLine {
309    /// Full hash of the commit that last changed the line.
310    pub commit: String,
311    /// Line number in that commit's version of the file (1-based).
312    pub orig_line: u32,
313    /// Line number in the blamed version of the file (1-based).
314    pub final_line: u32,
315    /// Author name of that commit.
316    pub author: String,
317    /// Author timestamp as a unix epoch (seconds).
318    pub author_time: i64,
319    /// Author timezone offset, e.g. `+0200`.
320    pub author_tz: String,
321    /// The line's content (without the trailing newline).
322    pub content: String,
323}
324
325/// Parse `git blame --line-porcelain` output. Every line gets a header
326/// (`<40-hex sha> <orig> <final> [<group count>]`), a full set of `tag value`
327/// metadata lines (`author`, `author-time`, …, optional `boundary`), then the
328/// content prefixed with a literal TAB.
329pub(crate) fn parse_blame_porcelain(output: &str) -> Vec<BlameLine> {
330    let mut lines = Vec::new();
331    let mut current: Option<BlameLine> = None;
332    for line in output.lines() {
333        // Content line: closes the current record.
334        if let Some(content) = line.strip_prefix('\t') {
335            if let Some(mut entry) = current.take() {
336                entry.content = content.to_string();
337                lines.push(entry);
338            }
339            continue;
340        }
341        let (label, value) = match line.split_once(' ') {
342            Some((l, v)) => (l, v),
343            None => (line, ""),
344        };
345        // Header: a 40-hex sha followed by line numbers (and an optional group
346        // count, which only appears on a group's first line).
347        if label.len() == 40 && label.bytes().all(|b| b.is_ascii_hexdigit()) {
348            let mut nums = value.split(' ');
349            let orig = nums.next().and_then(|n| n.parse().ok()).unwrap_or(0);
350            let fin = nums.next().and_then(|n| n.parse().ok()).unwrap_or(0);
351            current = Some(BlameLine {
352                commit: label.to_string(),
353                orig_line: orig,
354                final_line: fin,
355                author: String::new(),
356                author_time: 0,
357                author_tz: String::new(),
358                content: String::new(),
359            });
360            continue;
361        }
362        let Some(entry) = current.as_mut() else {
363            continue;
364        };
365        match label {
366            "author" => entry.author = value.to_string(),
367            "author-time" => entry.author_time = value.parse().unwrap_or(0),
368            "author-tz" => entry.author_tz = value.to_string(),
369            // committer*/summary/filename/previous/boundary intentionally not
370            // captured — `#[non_exhaustive]` leaves room to add them later.
371            _ => {}
372        }
373    }
374    lines
375}
376
377/// Parse `git diff --shortstat`, e.g. ` 3 files changed, 12 insertions(+), 4
378/// deletions(-)`. Any clause may be absent (a pure-insertion diff omits
379/// deletions; no changes yields an empty string → all zeros).
380pub(crate) fn parse_shortstat(output: &str) -> DiffStat {
381    let mut stat = DiffStat::default();
382    for part in output.split(',') {
383        let part = part.trim();
384        let n = part
385            .split_whitespace()
386            .next()
387            .and_then(|tok| tok.parse().ok())
388            .unwrap_or(0);
389        if part.contains("file") {
390            stat.files_changed = n;
391        } else if part.contains("insertion") {
392            stat.insertions = n;
393        } else if part.contains("deletion") {
394            stat.deletions = n;
395        }
396    }
397    stat
398}
399
400/// Parse `git ls-remote --heads <remote>` output — `<sha>\trefs/heads/<name>`
401/// per line — into the bare branch names.
402pub(crate) fn parse_ls_remote_heads(output: &str) -> Vec<String> {
403    output
404        .lines()
405        .filter_map(|line| {
406            let (_sha, refname) = line.split_once('\t')?;
407            refname
408                .trim()
409                .strip_prefix("refs/heads/")
410                .map(str::to_string)
411        })
412        .collect()
413}
414
415#[cfg(test)]
416mod tests {
417    use super::*;
418
419    #[test]
420    fn porcelain_parses_codes_and_paths() {
421        // NUL-delimited records; the path with a space stays raw (no quoting).
422        let got = parse_porcelain(" M src/lib.rs\0?? new file.txt\0A  added.rs\0");
423        assert_eq!(
424            got,
425            vec![
426                StatusEntry {
427                    code: " M".into(),
428                    path: "src/lib.rs".into(),
429                    orig_path: None,
430                },
431                StatusEntry {
432                    code: "??".into(),
433                    path: "new file.txt".into(),
434                    orig_path: None,
435                },
436                StatusEntry {
437                    code: "A ".into(),
438                    path: "added.rs".into(),
439                    orig_path: None,
440                },
441            ]
442        );
443    }
444
445    #[test]
446    fn porcelain_parses_rename_with_orig_path() {
447        // `R  new\0old\0` — the source path is the next NUL record.
448        let got = parse_porcelain("R  new.rs\0old.rs\0 M other.rs\0");
449        assert_eq!(
450            got,
451            vec![
452                StatusEntry {
453                    code: "R ".into(),
454                    path: "new.rs".into(),
455                    orig_path: Some("old.rs".into()),
456                },
457                StatusEntry {
458                    code: " M".into(),
459                    path: "other.rs".into(),
460                    orig_path: None,
461                },
462            ]
463        );
464    }
465
466    #[test]
467    fn porcelain_ignores_blank_and_short_records() {
468        assert!(parse_porcelain("\0  \0X\0").is_empty());
469    }
470
471    // Regression (found by proptest): a record whose leading char is multibyte
472    // must be skipped, not panic on a non-char-boundary slice. `𝓁` is 4 bytes,
473    // so byte index 2 lands inside it.
474    #[test]
475    fn porcelain_skips_non_ascii_status_records() {
476        assert!(parse_porcelain("𝓁abc\0").is_empty());
477        // A well-formed record alongside the garbage still parses.
478        let entries = parse_porcelain("𝓁abc\0 M a.rs\0");
479        assert_eq!(entries.len(), 1);
480        assert_eq!(entries[0].path, "a.rs");
481    }
482
483    #[test]
484    fn porcelain_v2_parses_branch_and_change_counts() {
485        // The rename's original path (`1 trap.rs`) is the next NUL record; it must
486        // be CONSUMED, not counted as a fourth `1 …` change.
487        let out = concat!(
488            "# branch.oid abcdef1234567890\0",
489            "# branch.head main\0",
490            "# branch.upstream origin/main\0",
491            "# branch.ab +2 -1\0",
492            "1 .M N... 100644 100644 100644 1111 2222 a.rs\0",
493            "2 R. N... 100644 100644 100644 3333 4444 R100 new.rs\0",
494            "1 trap.rs\0",
495            "u UU N... 100644 100644 100644 100644 5 6 7 conflict.rs\0",
496            "? untracked.txt\0",
497            "! ignored.txt\0",
498        );
499        let s = parse_porcelain_v2(out);
500        assert_eq!(s.head.as_deref(), Some("abcdef1234567890"));
501        assert_eq!(s.branch.as_deref(), Some("main"));
502        assert_eq!(s.upstream.as_deref(), Some("origin/main"));
503        assert_eq!((s.ahead, s.behind), (Some(2), Some(1)));
504        assert_eq!(
505            s.tracked_changes, 3,
506            "1 + 2(rename) + u; the trap is consumed"
507        );
508        assert_eq!(s.untracked, 1);
509        assert_eq!(s.conflicts, 1);
510        assert!(s.is_dirty());
511    }
512
513    #[test]
514    fn porcelain_v2_handles_unborn_detached_and_no_upstream() {
515        // Unborn repo: `(initial)` oid, no ab line, clean tree.
516        let s = parse_porcelain_v2("# branch.oid (initial)\0# branch.head main\0");
517        assert_eq!(s.head, None);
518        assert_eq!(s.branch.as_deref(), Some("main"));
519        assert_eq!(s.upstream, None);
520        assert_eq!((s.ahead, s.behind), (None, None));
521        assert!(!s.is_dirty());
522
523        // Detached HEAD, no upstream tracking.
524        let s = parse_porcelain_v2("# branch.oid deadbeef\0# branch.head (detached)\0");
525        assert_eq!(s.head.as_deref(), Some("deadbeef"));
526        assert_eq!(s.branch, None);
527        assert_eq!(s.upstream, None);
528    }
529
530    // --line-porcelain repeats the full metadata for every line; the group
531    // count appears only on a group's first header, and `boundary` is a
532    // valueless tag — both must parse.
533    #[test]
534    fn blame_line_porcelain_parses_headers_and_metadata() {
535        let sha_a = "a".repeat(40);
536        let sha_b = "b".repeat(40);
537        let out = format!(
538            "{sha_a} 1 1 2\nauthor Alice\nauthor-mail <a@x>\nauthor-time 1717500000\n\
539             author-tz +0200\ncommitter Alice\nsummary first\nboundary\nfilename f.txt\n\
540             \tline one\n\
541             {sha_a} 2 2\nauthor Alice\nauthor-mail <a@x>\nauthor-time 1717500000\n\
542             author-tz +0200\ncommitter Alice\nsummary first\nfilename f.txt\n\
543             \tline two\n\
544             {sha_b} 1 3 1\nauthor Bob\nauthor-mail <b@x>\nauthor-time 1717600000\n\
545             author-tz -0500\ncommitter Bob\nsummary second\nfilename f.txt\n\
546             \t\n"
547        );
548        let lines = parse_blame_porcelain(&out);
549        assert_eq!(lines.len(), 3);
550        assert_eq!(lines[0].commit, sha_a);
551        assert_eq!(lines[0].orig_line, 1);
552        assert_eq!(lines[0].final_line, 1);
553        assert_eq!(lines[0].author, "Alice");
554        assert_eq!(lines[0].author_time, 1717500000);
555        assert_eq!(lines[0].author_tz, "+0200");
556        assert_eq!(lines[0].content, "line one");
557        // Second line of the same group: header without a group count.
558        assert_eq!(lines[1].final_line, 2);
559        assert_eq!(lines[1].content, "line two");
560        // A different commit, and an empty content line stays empty.
561        assert_eq!(lines[2].commit, sha_b);
562        assert_eq!(lines[2].author, "Bob");
563        assert_eq!(lines[2].content, "");
564    }
565
566    #[test]
567    fn blame_ignores_garbage_and_empty_input() {
568        assert!(parse_blame_porcelain("").is_empty());
569        assert!(parse_blame_porcelain("not a header\n\torphan content\n").is_empty());
570    }
571
572    #[test]
573    fn git_version_parses_real_world_shapes() {
574        // The Windows build trailer (`.windows.1`) is extra dotted components
575        // beyond the patch; an `-rc1` suffix rides on the patch itself.
576        let v = parse_git_version("git version 2.54.0.windows.1").unwrap();
577        assert_eq!((v.major, v.minor, v.patch), (2, 54, 0));
578        let v = parse_git_version("git version 2.41.0-rc1").unwrap();
579        assert_eq!((v.major, v.minor, v.patch), (2, 41, 0));
580        let v = parse_git_version("git version 2.54").unwrap();
581        assert_eq!(v.patch, 0, "missing patch defaults to 0");
582        assert!(parse_git_version("no digits here").is_none());
583        assert!(parse_git_version("git version unknowable").is_none());
584    }
585
586    #[test]
587    fn nul_paths_split_and_keep_special_characters() {
588        assert_eq!(
589            parse_nul_paths("a.rs\0sub/with space.rs\0"),
590            ["a.rs", "sub/with space.rs"]
591        );
592        assert!(parse_nul_paths("").is_empty());
593    }
594
595    #[test]
596    fn log_splits_unit_separated_fields() {
597        let input = "abc123\u{1f}abc\u{1f}Ada\u{1f}2026-05-31T10:00:00+00:00\u{1f}Add feature\0\
598                     def456\u{1f}def\u{1f}Linus\u{1f}2026-05-30T09:00:00+00:00\u{1f}Fix bug\0";
599        let got = parse_log(input);
600        assert_eq!(got.len(), 2);
601        assert_eq!(
602            got[0],
603            Commit {
604                hash: "abc123".into(),
605                short_hash: "abc".into(),
606                author: "Ada".into(),
607                date: "2026-05-31T10:00:00+00:00".into(),
608                subject: "Add feature".into(),
609            }
610        );
611        assert_eq!(got[1].subject, "Fix bug");
612    }
613
614    #[test]
615    fn log_tolerates_empty_subject() {
616        let got = parse_log("h\u{1f}h\u{1f}A\u{1f}2026-05-31T10:00:00+00:00\u{1f}\0");
617        assert_eq!(got[0].subject, "");
618    }
619
620    #[test]
621    fn branches_marks_current_and_skips_detached() {
622        let got = parse_branches("* main\n  feature\n  (HEAD detached at abc123)\n");
623        assert_eq!(
624            got,
625            vec![
626                Branch {
627                    name: "main".into(),
628                    current: true
629                },
630                Branch {
631                    name: "feature".into(),
632                    current: false
633                },
634            ]
635        );
636    }
637
638    #[test]
639    fn worktrees_parse_branch_detached_and_bare() {
640        let input = "worktree /repo\nHEAD abc123\nbranch refs/heads/main\n\
641                     \nworktree /repo/wt\nHEAD def456\ndetached\n\
642                     \nworktree /repo/bare\nbare\n";
643        let got = parse_worktree_porcelain(input);
644        assert_eq!(got.len(), 3);
645        assert_eq!(got[0].path, PathBuf::from("/repo"));
646        assert_eq!(got[0].branch.as_deref(), Some("main"));
647        assert_eq!(got[0].head.as_deref(), Some("abc123"));
648        assert!(got[1].detached && got[1].branch.is_none());
649        assert!(got[2].bare && got[2].head.is_none());
650    }
651
652    #[test]
653    fn worktrees_parse_last_record_without_trailing_blank() {
654        // The final record may not be followed by a blank line.
655        let got = parse_worktree_porcelain("worktree /only\nHEAD aaa\nbranch refs/heads/x\n");
656        assert_eq!(got.len(), 1);
657        assert_eq!(got[0].branch.as_deref(), Some("x"));
658    }
659
660    #[test]
661    fn shortstat_parses_all_clauses() {
662        let got = parse_shortstat(" 3 files changed, 12 insertions(+), 4 deletions(-)\n");
663        assert_eq!(got, DiffStat::new(3, 12, 4));
664    }
665
666    #[test]
667    fn shortstat_tolerates_missing_clauses_and_empty() {
668        // Pure-insertion diff omits deletions; no changes yields all zeros.
669        let only_ins = parse_shortstat(" 1 file changed, 2 insertions(+)\n");
670        assert_eq!(only_ins.insertions, 2);
671        assert_eq!(only_ins.deletions, 0);
672        assert_eq!(parse_shortstat(""), DiffStat::default());
673    }
674}
675
676// Property-based fuzzing: the parsers are pure functions over *arbitrary* CLI
677// text (a git on the user's machine we don't control), so the load-bearing
678// invariant is "never panic, whatever the bytes". These feed both unconstrained
679// Unicode and structure-biased inputs (real delimiters: NUL, tab, unit
680// separator, `diff --git`, `@@` hunks, rename braces) so the fuzzer reaches the
681// byte-offset branches, not just the early returns.
682#[cfg(test)]
683mod proptests {
684    use super::*;
685    use proptest::prelude::*;
686
687    /// A line drawn from git's structural vocabulary plus multibyte text, so a
688    /// joined document exercises the porcelain/diff/blame branches.
689    fn structured_line() -> impl Strategy<Value = String> {
690        prop_oneof![
691            Just("diff --git a/f b/f\n".to_string()),
692            Just("--- a/f\n".to_string()),
693            Just("+++ b/f\n".to_string()),
694            Just("@@ -1,2 +3,4 @@ ctx\n".to_string()),
695            Just("@@ -1 +1 @@\n".to_string()),
696            Just("rename from {old => new}.rs\n".to_string()),
697            Just("R100\told\tnew\n".to_string()),
698            Just(format!("{}\n", "a".repeat(40))), // a 40-hex-ish blame header
699            "[-+ ]?[a-zé\t]{0,12}\n",              // diff body / text incl. multibyte
700            "[ MARD?]{0,2} [a-zé/]{0,8}\0",        // porcelain-ish NUL record
701        ]
702    }
703
704    fn structured_doc() -> impl Strategy<Value = String> {
705        prop::collection::vec(structured_line(), 0..40).prop_map(|lines| lines.concat())
706    }
707
708    proptest! {
709        // Panic-freedom on completely arbitrary input.
710        #[test]
711        fn parsers_never_panic_on_arbitrary_text(s in any::<String>()) {
712            let _ = parse_porcelain(&s);
713            let _ = parse_porcelain_v2(&s);
714            let _ = parse_log(&s);
715            let _ = parse_branches(&s);
716            let _ = parse_worktree_porcelain(&s);
717            let _ = parse_blame_porcelain(&s);
718            let _ = parse_shortstat(&s);
719            let _ = parse_ls_remote_heads(&s);
720            let _ = parse_nul_paths(&s);
721            let _ = parse_git_version(&s);
722        }
723
724        // …and on structure-biased input that reaches the parsing branches.
725        #[test]
726        fn parsers_never_panic_on_structured_text(s in structured_doc()) {
727            let _ = parse_porcelain(&s);
728            let _ = parse_porcelain_v2(&s);
729            let _ = parse_log(&s);
730            let _ = parse_blame_porcelain(&s);
731        }
732
733        // porcelain v2 header/entry lines (with the `2`-consumes-next-record path)
734        // must never panic on arbitrary NUL-joined records.
735        #[test]
736        fn porcelain_v2_never_panics(records in prop::collection::vec(
737            prop_oneof![
738                Just("# branch.oid (initial)".to_string()),
739                Just("# branch.head main".to_string()),
740                Just("# branch.ab +1 -2".to_string()),
741                "1 [.MADRCU]{2} [a-zé /]{0,10}".prop_map(|s| s),
742                "2 R\\. .* R100 [a-zé /]{0,8}".prop_map(|s| s),
743                "u UU [a-zé /]{0,8}".prop_map(|s| s),
744                "\\? [a-zé /]{0,8}".prop_map(|s| s),
745                "[a-zé0-9# ]{0,12}".prop_map(|s| s),
746            ],
747            0..20,
748        ).prop_map(|r| r.join("\0"))) {
749            let _ = parse_porcelain_v2(&records);
750        }
751    }
752}