Skip to main content

vcs_git/
parse.rs

1//! Pure parsers for git's machine-readable output. No process execution, so the
2//! tests here are hermetic and run on CI.
3//!
4//! The git-format unified-diff model + parser and the version type live in the
5//! shared [`vcs_diff`] crate (`git diff` and `jj diff --git` are byte-identical);
6//! this module keeps only the git-specific parsers (porcelain, log, blame, …).
7
8use std::path::PathBuf;
9
10use vcs_diff::DiffStat;
11
12/// One entry from `git status --porcelain=v1 -z` (`XY <path>`, NUL-delimited).
13#[derive(Debug, Clone, PartialEq, Eq)]
14#[non_exhaustive]
15pub struct StatusEntry {
16    /// Two-character status code, e.g. `" M"`, `"??"`, `"A "`, `"R "`.
17    pub code: String,
18    /// Path the status applies to (the *new* path for a rename/copy). Raw bytes
19    /// from `-z` — no C-quoting/escaping to undo, even for paths with spaces.
20    pub path: String,
21    /// For a rename/copy, the original path; `None` otherwise. Named to match
22    /// `vcs_jj::ChangedPath::old_path` so cross-backend code reads the rename
23    /// source the same way on both wrappers.
24    pub old_path: Option<String>,
25}
26
27/// A combined branch + working-tree snapshot from `git status --porcelain=v2
28/// --branch -z`: HEAD, branch, upstream tracking, ahead/behind, and change
29/// counts — everything a prompt/status-bar needs, in **one** process spawn.
30#[derive(Debug, Clone, PartialEq, Eq, Default)]
31#[non_exhaustive]
32pub struct BranchStatus {
33    /// The HEAD commit's full object id (`# branch.oid`); `None` on an unborn
34    /// repo (git reports `(initial)`). Truncate for display.
35    pub head: Option<String>,
36    /// Current branch name (`# branch.head`); `None` when detached.
37    pub branch: Option<String>,
38    /// Upstream tracking branch (`# branch.upstream`); `None` when unset.
39    pub upstream: Option<String>,
40    /// Commits ahead of the upstream (`# branch.ab +A`); `None` when no upstream.
41    pub ahead: Option<usize>,
42    /// Commits behind the upstream (`# branch.ab -B`); `None` when no upstream.
43    pub behind: Option<usize>,
44    /// Count of changed *tracked* entries — modified/added/deleted/renamed/copied
45    /// and unmerged (the `1`/`2`/`u` records).
46    pub tracked_changes: usize,
47    /// Count of untracked files (the `?` records).
48    pub untracked: usize,
49    /// Count of unmerged (conflicted) entries (the `u` records; also in
50    /// `tracked_changes`).
51    pub conflicts: usize,
52}
53
54impl BranchStatus {
55    /// Whether the working tree has any change at all — tracked or untracked.
56    pub fn is_dirty(&self) -> bool {
57        self.tracked_changes > 0 || self.untracked > 0
58    }
59}
60
61/// A commit, parsed from a `\x1f`-delimited `git log` line.
62#[derive(Debug, Clone, PartialEq, Eq)]
63#[non_exhaustive]
64pub struct Commit {
65    /// Full commit hash (`%H`).
66    pub hash: String,
67    /// Abbreviated commit hash (`%h`).
68    pub short_hash: String,
69    /// Author name (`%an`).
70    pub author: String,
71    /// Author date, strict ISO-8601 (`%aI`), e.g. `2026-05-31T10:00:00+00:00`.
72    pub date: String,
73    /// Subject line (`%s`).
74    pub subject: String,
75}
76
77/// A local branch from `git branch`.
78#[derive(Debug, Clone, PartialEq, Eq)]
79#[non_exhaustive]
80pub struct Branch {
81    /// Branch name.
82    pub name: String,
83    /// Whether this is the checked-out branch (the `*` marker).
84    pub current: bool,
85}
86
87/// A worktree from `git worktree list --porcelain`.
88#[derive(Debug, Clone, PartialEq, Eq)]
89#[non_exhaustive]
90pub struct Worktree {
91    /// Absolute path to the worktree.
92    pub path: PathBuf,
93    /// Short branch name (`refs/heads/` stripped); `None` when detached or bare.
94    pub branch: Option<String>,
95    /// The checked-out commit (`HEAD <sha>`); `None` for a bare entry.
96    pub head: Option<String>,
97    /// The main worktree of a bare repository.
98    pub bare: bool,
99    /// Checked out at a detached HEAD (no branch).
100    pub detached: bool,
101    /// Locked against pruning.
102    pub locked: bool,
103}
104
105/// Parse `git status --porcelain=v1 -z` output: NUL-delimited records, raw
106/// (unquoted) paths. A rename/copy entry is followed by its source path as the
107/// next NUL record (e.g. `R  new\0old\0`).
108pub(crate) fn parse_porcelain(output: &str) -> Vec<StatusEntry> {
109    let mut entries = Vec::new();
110    let mut records = output.split('\0').filter(|rec| !rec.is_empty());
111    while let Some(rec) = records.next() {
112        // "XY path": two status-code chars, a space, then the path. Real git
113        // codes are ASCII, but slice via `get` so a malformed record (a
114        // multibyte char where the code/space belong) is skipped, not a panic.
115        let (Some(code), Some(path)) = (rec.get(..2), rec.get(3..)) else {
116            continue;
117        };
118        // A rename/copy carries its source path as the immediately following NUL
119        // record; consume it. The `R`/`C` can sit in EITHER status column — the index
120        // column (`R ` staged rename) or the worktree column (` R` worktree rename) —
121        // so check both. Missing the ` R`/` C` case left the source record as a
122        // phantom entry with a garbage `code`/`path` (M11).
123        let old_path = if matches!(code.as_bytes(), [b'R' | b'C', _] | [_, b'R' | b'C']) {
124            records.next().map(str::to_string)
125        } else {
126            None
127        };
128        entries.push(StatusEntry {
129            code: code.to_string(),
130            path: path.to_string(),
131            old_path,
132        });
133    }
134    entries
135}
136
137/// Parse `git status --porcelain=v2 --branch -z` output into a [`BranchStatus`].
138///
139/// Records are NUL-terminated: `# branch.*` header lines first, then entry lines
140/// (`1`/`2` changed, `u` unmerged, `?` untracked, `!` ignored). A `2` (rename/copy)
141/// entry stores its original path as the *next* NUL record, so that record is
142/// consumed and skipped. Everything is `strip_prefix`/compare based — no byte
143/// indexing — so arbitrary bytes never panic (proven by proptest).
144pub(crate) fn parse_porcelain_v2(output: &str) -> BranchStatus {
145    let mut status = BranchStatus::default();
146    let mut records = output.split('\0');
147    while let Some(rec) = records.next() {
148        if let Some(rest) = rec.strip_prefix("# branch.oid ") {
149            // `(initial)` marks an unborn repo (no commits yet).
150            status.head = (rest != "(initial)").then(|| rest.to_string());
151        } else if let Some(rest) = rec.strip_prefix("# branch.head ") {
152            status.branch = (rest != "(detached)").then(|| rest.to_string());
153        } else if let Some(rest) = rec.strip_prefix("# branch.upstream ") {
154            status.upstream = Some(rest.to_string());
155        } else if let Some(rest) = rec.strip_prefix("# branch.ab ") {
156            // `+<ahead> -<behind>`.
157            let mut parts = rest.split(' ');
158            status.ahead = parts
159                .next()
160                .and_then(|t| t.strip_prefix('+'))
161                .and_then(|n| n.parse().ok());
162            status.behind = parts
163                .next()
164                .and_then(|t| t.strip_prefix('-'))
165                .and_then(|n| n.parse().ok());
166        } else if rec.starts_with("1 ") {
167            status.tracked_changes += 1;
168        } else if rec.starts_with("2 ") {
169            status.tracked_changes += 1;
170            // The rename/copy original path is the next NUL record; consume it so
171            // it isn't mis-read as another entry.
172            records.next();
173        } else if rec.starts_with("u ") {
174            status.tracked_changes += 1;
175            status.conflicts += 1;
176        } else if rec.starts_with("? ") {
177            status.untracked += 1;
178        }
179        // `! ` (ignored) and other `# ` headers contribute nothing.
180    }
181    status
182}
183
184/// Parse `git --version` output (`git version 2.54.0.windows.1`) into the shared
185/// [`vcs_diff::Version`]: the first dotted-numeric token wins; non-numeric
186/// trailers (`.windows.1`, `-rc1`) are ignored; a missing patch reads as `0`.
187pub(crate) fn parse_git_version(raw: &str) -> Option<vcs_diff::Version> {
188    vcs_diff::parse_dotted_version(raw)
189}
190
191/// Parse a NUL-delimited path list (e.g. `git diff --name-only -z`): one
192/// repo-relative path per record, `/` separators, no quoting.
193pub(crate) fn parse_nul_paths(output: &str) -> Vec<String> {
194    output
195        .split('\0')
196        .filter(|path| !path.is_empty())
197        .map(str::to_string)
198        .collect()
199}
200
201/// Parse `git log -z --format=%H%x1f%h%x1f%an%x1f%aI%x1f%s` output: commits are
202/// NUL-separated (robust to multi-line fields), fields split on the ASCII unit
203/// separator.
204pub(crate) fn parse_log(output: &str) -> Vec<Commit> {
205    output
206        .split('\0')
207        .filter(|rec| !rec.is_empty())
208        .filter_map(|rec| {
209            let mut fields = rec.split('\u{1f}');
210            Some(Commit {
211                hash: fields.next()?.to_string(),
212                short_hash: fields.next()?.to_string(),
213                author: fields.next()?.to_string(),
214                date: fields.next()?.to_string(),
215                subject: fields.next().unwrap_or("").to_string(),
216            })
217        })
218        .collect()
219}
220
221/// Parse `git branch` output. The first column is the `* `/`  `/`+ ` marker.
222pub(crate) fn parse_branches(output: &str) -> Vec<Branch> {
223    output
224        .lines()
225        .filter(|line| !line.trim().is_empty())
226        .filter_map(|line| {
227            let current = line.starts_with('*');
228            let name = line.get(1..).unwrap_or("").trim();
229            // Skip the detached-HEAD pseudo-entry, e.g. "* (HEAD detached at …)".
230            if name.is_empty() || name.starts_with('(') {
231                return None;
232            }
233            Some(Branch {
234                name: name.to_string(),
235                current,
236            })
237        })
238        .collect()
239}
240
241/// Parse `git worktree list --porcelain`: records separated by a blank line,
242/// each a set of `label [value]` lines — `worktree <path>`, `HEAD <sha>`,
243/// `branch refs/heads/<name>`, plus the valueless attributes `bare` / `detached`
244/// / `locked`. Unknown labels (e.g. `prunable`) are ignored.
245pub(crate) fn parse_worktree_porcelain(output: &str) -> Vec<Worktree> {
246    let mut worktrees = Vec::new();
247    let mut current: Option<Worktree> = None;
248    let flush = |current: &mut Option<Worktree>, out: &mut Vec<Worktree>| {
249        if let Some(wt) = current.take() {
250            out.push(wt);
251        }
252    };
253    for line in output.lines() {
254        if line.is_empty() {
255            flush(&mut current, &mut worktrees);
256            continue;
257        }
258        let (label, value) = match line.split_once(' ') {
259            Some((l, v)) => (l, Some(v)),
260            None => (line, None),
261        };
262        match label {
263            // A new record begins; flush any record not closed by a blank line.
264            "worktree" => {
265                flush(&mut current, &mut worktrees);
266                current = Some(Worktree {
267                    path: PathBuf::from(value.unwrap_or("")),
268                    branch: None,
269                    head: None,
270                    bare: false,
271                    detached: false,
272                    locked: false,
273                });
274            }
275            "HEAD" => {
276                if let Some(wt) = current.as_mut() {
277                    wt.head = value.map(str::to_string);
278                }
279            }
280            "branch" => {
281                if let Some(wt) = current.as_mut() {
282                    // Value is a full ref (`refs/heads/main`); expose the short name.
283                    wt.branch =
284                        value.map(|v| v.strip_prefix("refs/heads/").unwrap_or(v).to_string());
285                }
286            }
287            "bare" => {
288                if let Some(wt) = current.as_mut() {
289                    wt.bare = true;
290                }
291            }
292            "detached" => {
293                if let Some(wt) = current.as_mut() {
294                    wt.detached = true;
295                }
296            }
297            "locked" => {
298                if let Some(wt) = current.as_mut() {
299                    wt.locked = true;
300                }
301            }
302            _ => {}
303        }
304    }
305    flush(&mut current, &mut worktrees);
306    worktrees
307}
308
309/// One line of `git blame --line-porcelain` output: who last touched the line
310/// and where it came from.
311#[derive(Debug, Clone, PartialEq, Eq)]
312#[non_exhaustive]
313pub struct BlameLine {
314    /// Full hash of the commit that last changed the line.
315    pub commit: String,
316    /// Line number in that commit's version of the file (1-based).
317    pub orig_line: u32,
318    /// Line number in the blamed version of the file (1-based).
319    pub final_line: u32,
320    /// Author name of that commit.
321    pub author: String,
322    /// Author timestamp as a unix epoch (seconds).
323    pub author_time: i64,
324    /// Author timezone offset, e.g. `+0200`.
325    pub author_tz: String,
326    /// The line's content (without the trailing newline).
327    pub content: String,
328}
329
330/// Parse `git blame --line-porcelain` output. Every line gets a header
331/// (`<sha> <orig> <final> [<group count>]`, where `<sha>` is a 40-hex SHA-1 or a
332/// 64-hex SHA-256 object id), a full set of `tag value` metadata lines (`author`,
333/// `author-time`, …, optional `boundary`), then the content prefixed with a literal
334/// TAB.
335pub(crate) fn parse_blame_porcelain(output: &str) -> Vec<BlameLine> {
336    let mut lines = Vec::new();
337    let mut current: Option<BlameLine> = None;
338    for line in output.lines() {
339        // Content line: closes the current record.
340        if let Some(content) = line.strip_prefix('\t') {
341            if let Some(mut entry) = current.take() {
342                entry.content = content.to_string();
343                lines.push(entry);
344            }
345            continue;
346        }
347        let (label, value) = match line.split_once(' ') {
348            Some((l, v)) => (l, v),
349            None => (line, ""),
350        };
351        // Header: a commit sha followed by line numbers (and an optional group
352        // count, which only appears on a group's first line). Accept both SHA-1
353        // (40 hex) and SHA-256 (64 hex) object ids — a SHA-256 repo would otherwise
354        // never match, so `blame` would silently return an empty `Vec`.
355        if (label.len() == 40 || label.len() == 64) && label.bytes().all(|b| b.is_ascii_hexdigit())
356        {
357            let mut nums = value.split(' ');
358            let orig = nums.next().and_then(|n| n.parse().ok()).unwrap_or(0);
359            let fin = nums.next().and_then(|n| n.parse().ok()).unwrap_or(0);
360            current = Some(BlameLine {
361                commit: label.to_string(),
362                orig_line: orig,
363                final_line: fin,
364                author: String::new(),
365                author_time: 0,
366                author_tz: String::new(),
367                content: String::new(),
368            });
369            continue;
370        }
371        let Some(entry) = current.as_mut() else {
372            continue;
373        };
374        match label {
375            "author" => entry.author = value.to_string(),
376            "author-time" => entry.author_time = value.parse().unwrap_or(0),
377            "author-tz" => entry.author_tz = value.to_string(),
378            // committer*/summary/filename/previous/boundary intentionally not
379            // captured — `#[non_exhaustive]` leaves room to add them later.
380            _ => {}
381        }
382    }
383    lines
384}
385
386/// Parse `git diff --shortstat`, e.g. ` 3 files changed, 12 insertions(+), 4
387/// deletions(-)`. Any clause may be absent (a pure-insertion diff omits
388/// deletions; no changes yields an empty string → all zeros).
389pub(crate) fn parse_shortstat(output: &str) -> DiffStat {
390    let mut stat = DiffStat::default();
391    for part in output.split(',') {
392        let part = part.trim();
393        let n = part
394            .split_whitespace()
395            .next()
396            .and_then(|tok| tok.parse().ok())
397            .unwrap_or(0);
398        if part.contains("file") {
399            stat.files_changed = n;
400        } else if part.contains("insertion") {
401            stat.insertions = n;
402        } else if part.contains("deletion") {
403            stat.deletions = n;
404        }
405    }
406    stat
407}
408
409/// Parse `git ls-remote --heads <remote>` output — `<sha>\trefs/heads/<name>`
410/// per line — into the bare branch names.
411pub(crate) fn parse_ls_remote_heads(output: &str) -> Vec<String> {
412    output
413        .lines()
414        .filter_map(|line| {
415            let (_sha, refname) = line.split_once('\t')?;
416            refname
417                .trim()
418                .strip_prefix("refs/heads/")
419                .map(str::to_string)
420        })
421        .collect()
422}
423
424#[cfg(test)]
425mod tests {
426    use super::*;
427
428    #[test]
429    fn porcelain_parses_codes_and_paths() {
430        // NUL-delimited records; the path with a space stays raw (no quoting).
431        let got = parse_porcelain(" M src/lib.rs\0?? new file.txt\0A  added.rs\0");
432        assert_eq!(
433            got,
434            vec![
435                StatusEntry {
436                    code: " M".into(),
437                    path: "src/lib.rs".into(),
438                    old_path: None,
439                },
440                StatusEntry {
441                    code: "??".into(),
442                    path: "new file.txt".into(),
443                    old_path: None,
444                },
445                StatusEntry {
446                    code: "A ".into(),
447                    path: "added.rs".into(),
448                    old_path: None,
449                },
450            ]
451        );
452    }
453
454    #[test]
455    fn porcelain_parses_rename_with_old_path() {
456        // `R  new\0old\0` — the source path is the next NUL record.
457        let got = parse_porcelain("R  new.rs\0old.rs\0 M other.rs\0");
458        assert_eq!(
459            got,
460            vec![
461                StatusEntry {
462                    code: "R ".into(),
463                    path: "new.rs".into(),
464                    old_path: Some("old.rs".into()),
465                },
466                StatusEntry {
467                    code: " M".into(),
468                    path: "other.rs".into(),
469                    old_path: None,
470                },
471            ]
472        );
473    }
474
475    // M11: a rename/copy in the WORKTREE column (` R`/` C`, not just the index `R `)
476    // must also consume its source record — otherwise the source became a phantom
477    // entry with a garbage code/path.
478    #[test]
479    fn porcelain_parses_worktree_rename_in_the_y_column() {
480        // ` R new\0old\0` — space in X, R in Y (a worktree rename).
481        let got = parse_porcelain(" R new.rs\0old.rs\0 M other.rs\0");
482        assert_eq!(
483            got,
484            vec![
485                StatusEntry {
486                    code: " R".into(),
487                    path: "new.rs".into(),
488                    old_path: Some("old.rs".into()),
489                },
490                StatusEntry {
491                    code: " M".into(),
492                    path: "other.rs".into(),
493                    old_path: None,
494                },
495            ],
496            "the source record must be consumed, not left as a phantom entry"
497        );
498    }
499
500    #[test]
501    fn porcelain_ignores_blank_and_short_records() {
502        assert!(parse_porcelain("\0  \0X\0").is_empty());
503    }
504
505    // Regression (found by proptest): a record whose leading char is multibyte
506    // must be skipped, not panic on a non-char-boundary slice. `𝓁` is 4 bytes,
507    // so byte index 2 lands inside it.
508    #[test]
509    fn porcelain_skips_non_ascii_status_records() {
510        assert!(parse_porcelain("𝓁abc\0").is_empty());
511        // A well-formed record alongside the garbage still parses.
512        let entries = parse_porcelain("𝓁abc\0 M a.rs\0");
513        assert_eq!(entries.len(), 1);
514        assert_eq!(entries[0].path, "a.rs");
515    }
516
517    #[test]
518    fn porcelain_v2_parses_branch_and_change_counts() {
519        // The rename's original path (`1 trap.rs`) is the next NUL record; it must
520        // be CONSUMED, not counted as a fourth `1 …` change.
521        let out = concat!(
522            "# branch.oid abcdef1234567890\0",
523            "# branch.head main\0",
524            "# branch.upstream origin/main\0",
525            "# branch.ab +2 -1\0",
526            "1 .M N... 100644 100644 100644 1111 2222 a.rs\0",
527            "2 R. N... 100644 100644 100644 3333 4444 R100 new.rs\0",
528            "1 trap.rs\0",
529            "u UU N... 100644 100644 100644 100644 5 6 7 conflict.rs\0",
530            "? untracked.txt\0",
531            "! ignored.txt\0",
532        );
533        let s = parse_porcelain_v2(out);
534        assert_eq!(s.head.as_deref(), Some("abcdef1234567890"));
535        assert_eq!(s.branch.as_deref(), Some("main"));
536        assert_eq!(s.upstream.as_deref(), Some("origin/main"));
537        assert_eq!((s.ahead, s.behind), (Some(2), Some(1)));
538        assert_eq!(
539            s.tracked_changes, 3,
540            "1 + 2(rename) + u; the trap is consumed"
541        );
542        assert_eq!(s.untracked, 1);
543        assert_eq!(s.conflicts, 1);
544        assert!(s.is_dirty());
545    }
546
547    #[test]
548    fn porcelain_v2_handles_unborn_detached_and_no_upstream() {
549        // Unborn repo: `(initial)` oid, no ab line, clean tree.
550        let s = parse_porcelain_v2("# branch.oid (initial)\0# branch.head main\0");
551        assert_eq!(s.head, None);
552        assert_eq!(s.branch.as_deref(), Some("main"));
553        assert_eq!(s.upstream, None);
554        assert_eq!((s.ahead, s.behind), (None, None));
555        assert!(!s.is_dirty());
556
557        // Detached HEAD, no upstream tracking.
558        let s = parse_porcelain_v2("# branch.oid deadbeef\0# branch.head (detached)\0");
559        assert_eq!(s.head.as_deref(), Some("deadbeef"));
560        assert_eq!(s.branch, None);
561        assert_eq!(s.upstream, None);
562    }
563
564    // --line-porcelain repeats the full metadata for every line; the group
565    // count appears only on a group's first header, and `boundary` is a
566    // valueless tag — both must parse.
567    #[test]
568    fn blame_line_porcelain_parses_headers_and_metadata() {
569        let sha_a = "a".repeat(40);
570        let sha_b = "b".repeat(40);
571        let out = format!(
572            "{sha_a} 1 1 2\nauthor Alice\nauthor-mail <a@x>\nauthor-time 1717500000\n\
573             author-tz +0200\ncommitter Alice\nsummary first\nboundary\nfilename f.txt\n\
574             \tline one\n\
575             {sha_a} 2 2\nauthor Alice\nauthor-mail <a@x>\nauthor-time 1717500000\n\
576             author-tz +0200\ncommitter Alice\nsummary first\nfilename f.txt\n\
577             \tline two\n\
578             {sha_b} 1 3 1\nauthor Bob\nauthor-mail <b@x>\nauthor-time 1717600000\n\
579             author-tz -0500\ncommitter Bob\nsummary second\nfilename f.txt\n\
580             \t\n"
581        );
582        let lines = parse_blame_porcelain(&out);
583        assert_eq!(lines.len(), 3);
584        assert_eq!(lines[0].commit, sha_a);
585        assert_eq!(lines[0].orig_line, 1);
586        assert_eq!(lines[0].final_line, 1);
587        assert_eq!(lines[0].author, "Alice");
588        assert_eq!(lines[0].author_time, 1717500000);
589        assert_eq!(lines[0].author_tz, "+0200");
590        assert_eq!(lines[0].content, "line one");
591        // Second line of the same group: header without a group count.
592        assert_eq!(lines[1].final_line, 2);
593        assert_eq!(lines[1].content, "line two");
594        // A different commit, and an empty content line stays empty.
595        assert_eq!(lines[2].commit, sha_b);
596        assert_eq!(lines[2].author, "Bob");
597        assert_eq!(lines[2].content, "");
598    }
599
600    #[test]
601    fn blame_ignores_garbage_and_empty_input() {
602        assert!(parse_blame_porcelain("").is_empty());
603        assert!(parse_blame_porcelain("not a header\n\torphan content\n").is_empty());
604    }
605
606    // A SHA-256 repository emits 64-hex commit ids; the header must still be
607    // recognised (the old `len()==40`-only check made `blame` return an empty Vec).
608    #[test]
609    fn blame_recognises_sha256_object_ids() {
610        let sha = "c".repeat(64);
611        let out = format!(
612            "{sha} 1 1 1\nauthor Carol\nauthor-mail <c@x>\nauthor-time 1717700000\n\
613             author-tz +0000\ncommitter Carol\nsummary s\nfilename f.txt\n\
614             \tline\n"
615        );
616        let lines = parse_blame_porcelain(&out);
617        assert_eq!(
618            lines.len(),
619            1,
620            "a SHA-256 blame must parse, not drop to empty"
621        );
622        assert_eq!(lines[0].commit, sha);
623        assert_eq!(lines[0].author, "Carol");
624        assert_eq!(lines[0].content, "line");
625    }
626
627    #[test]
628    fn git_version_parses_real_world_shapes() {
629        // The Windows build trailer (`.windows.1`) is extra dotted components
630        // beyond the patch; an `-rc1` suffix rides on the patch itself.
631        let v = parse_git_version("git version 2.54.0.windows.1").unwrap();
632        assert_eq!((v.major, v.minor, v.patch), (2, 54, 0));
633        let v = parse_git_version("git version 2.41.0-rc1").unwrap();
634        assert_eq!((v.major, v.minor, v.patch), (2, 41, 0));
635        let v = parse_git_version("git version 2.54").unwrap();
636        assert_eq!(v.patch, 0, "missing patch defaults to 0");
637        assert!(parse_git_version("no digits here").is_none());
638        assert!(parse_git_version("git version unknowable").is_none());
639    }
640
641    #[test]
642    fn nul_paths_split_and_keep_special_characters() {
643        assert_eq!(
644            parse_nul_paths("a.rs\0sub/with space.rs\0"),
645            ["a.rs", "sub/with space.rs"]
646        );
647        assert!(parse_nul_paths("").is_empty());
648    }
649
650    #[test]
651    fn log_splits_unit_separated_fields() {
652        let input = "abc123\u{1f}abc\u{1f}Ada\u{1f}2026-05-31T10:00:00+00:00\u{1f}Add feature\0\
653                     def456\u{1f}def\u{1f}Linus\u{1f}2026-05-30T09:00:00+00:00\u{1f}Fix bug\0";
654        let got = parse_log(input);
655        assert_eq!(got.len(), 2);
656        assert_eq!(
657            got[0],
658            Commit {
659                hash: "abc123".into(),
660                short_hash: "abc".into(),
661                author: "Ada".into(),
662                date: "2026-05-31T10:00:00+00:00".into(),
663                subject: "Add feature".into(),
664            }
665        );
666        assert_eq!(got[1].subject, "Fix bug");
667    }
668
669    #[test]
670    fn log_tolerates_empty_subject() {
671        let got = parse_log("h\u{1f}h\u{1f}A\u{1f}2026-05-31T10:00:00+00:00\u{1f}\0");
672        assert_eq!(got[0].subject, "");
673    }
674
675    #[test]
676    fn branches_marks_current_and_skips_detached() {
677        let got = parse_branches("* main\n  feature\n  (HEAD detached at abc123)\n");
678        assert_eq!(
679            got,
680            vec![
681                Branch {
682                    name: "main".into(),
683                    current: true
684                },
685                Branch {
686                    name: "feature".into(),
687                    current: false
688                },
689            ]
690        );
691    }
692
693    #[test]
694    fn worktrees_parse_branch_detached_and_bare() {
695        let input = "worktree /repo\nHEAD abc123\nbranch refs/heads/main\n\
696                     \nworktree /repo/wt\nHEAD def456\ndetached\n\
697                     \nworktree /repo/bare\nbare\n";
698        let got = parse_worktree_porcelain(input);
699        assert_eq!(got.len(), 3);
700        assert_eq!(got[0].path, PathBuf::from("/repo"));
701        assert_eq!(got[0].branch.as_deref(), Some("main"));
702        assert_eq!(got[0].head.as_deref(), Some("abc123"));
703        assert!(got[1].detached && got[1].branch.is_none());
704        assert!(got[2].bare && got[2].head.is_none());
705    }
706
707    #[test]
708    fn worktrees_parse_last_record_without_trailing_blank() {
709        // The final record may not be followed by a blank line.
710        let got = parse_worktree_porcelain("worktree /only\nHEAD aaa\nbranch refs/heads/x\n");
711        assert_eq!(got.len(), 1);
712        assert_eq!(got[0].branch.as_deref(), Some("x"));
713    }
714
715    #[test]
716    fn shortstat_parses_all_clauses() {
717        let got = parse_shortstat(" 3 files changed, 12 insertions(+), 4 deletions(-)\n");
718        assert_eq!(got, DiffStat::new(3, 12, 4));
719    }
720
721    #[test]
722    fn shortstat_tolerates_missing_clauses_and_empty() {
723        // Pure-insertion diff omits deletions; no changes yields all zeros.
724        let only_ins = parse_shortstat(" 1 file changed, 2 insertions(+)\n");
725        assert_eq!(only_ins.insertions, 2);
726        assert_eq!(only_ins.deletions, 0);
727        assert_eq!(parse_shortstat(""), DiffStat::default());
728    }
729}
730
731// Property-based fuzzing: the parsers are pure functions over *arbitrary* CLI
732// text (a git on the user's machine we don't control), so the load-bearing
733// invariant is "never panic, whatever the bytes". These feed both unconstrained
734// Unicode and structure-biased inputs (real delimiters: NUL, tab, unit
735// separator, `diff --git`, `@@` hunks, rename braces) so the fuzzer reaches the
736// byte-offset branches, not just the early returns.
737#[cfg(test)]
738mod proptests {
739    use super::*;
740    use proptest::prelude::*;
741
742    /// A line drawn from git's structural vocabulary plus multibyte text, so a
743    /// joined document exercises the porcelain/diff/blame branches.
744    fn structured_line() -> impl Strategy<Value = String> {
745        prop_oneof![
746            Just("diff --git a/f b/f\n".to_string()),
747            Just("--- a/f\n".to_string()),
748            Just("+++ b/f\n".to_string()),
749            Just("@@ -1,2 +3,4 @@ ctx\n".to_string()),
750            Just("@@ -1 +1 @@\n".to_string()),
751            Just("rename from {old => new}.rs\n".to_string()),
752            Just("R100\told\tnew\n".to_string()),
753            Just(format!("{}\n", "a".repeat(40))), // a 40-hex-ish blame header
754            "[-+ ]?[a-zé\t]{0,12}\n",              // diff body / text incl. multibyte
755            "[ MARD?]{0,2} [a-zé/]{0,8}\0",        // porcelain-ish NUL record
756        ]
757    }
758
759    fn structured_doc() -> impl Strategy<Value = String> {
760        prop::collection::vec(structured_line(), 0..40).prop_map(|lines| lines.concat())
761    }
762
763    proptest! {
764        // Panic-freedom on completely arbitrary input.
765        #[test]
766        fn parsers_never_panic_on_arbitrary_text(s in any::<String>()) {
767            let _ = parse_porcelain(&s);
768            let _ = parse_porcelain_v2(&s);
769            let _ = parse_log(&s);
770            let _ = parse_branches(&s);
771            let _ = parse_worktree_porcelain(&s);
772            let _ = parse_blame_porcelain(&s);
773            let _ = parse_shortstat(&s);
774            let _ = parse_ls_remote_heads(&s);
775            let _ = parse_nul_paths(&s);
776            let _ = parse_git_version(&s);
777        }
778
779        // …and on structure-biased input that reaches the parsing branches.
780        #[test]
781        fn parsers_never_panic_on_structured_text(s in structured_doc()) {
782            let _ = parse_porcelain(&s);
783            let _ = parse_porcelain_v2(&s);
784            let _ = parse_log(&s);
785            let _ = parse_blame_porcelain(&s);
786        }
787
788        // porcelain v2 header/entry lines (with the `2`-consumes-next-record path)
789        // must never panic on arbitrary NUL-joined records.
790        #[test]
791        fn porcelain_v2_never_panics(records in prop::collection::vec(
792            prop_oneof![
793                Just("# branch.oid (initial)".to_string()),
794                Just("# branch.head main".to_string()),
795                Just("# branch.ab +1 -2".to_string()),
796                "1 [.MADRCU]{2} [a-zé /]{0,10}".prop_map(|s| s),
797                "2 R\\. .* R100 [a-zé /]{0,8}".prop_map(|s| s),
798                "u UU [a-zé /]{0,8}".prop_map(|s| s),
799                "\\? [a-zé /]{0,8}".prop_map(|s| s),
800                "[a-zé0-9# ]{0,12}".prop_map(|s| s),
801            ],
802            0..20,
803        ).prop_map(|r| r.join("\0"))) {
804            let _ = parse_porcelain_v2(&records);
805        }
806    }
807}