Skip to main content

vcs_git/
parse.rs

1//! Pure parsers for git's machine-readable output. No process execution, so the
2//! tests here are hermetic and run on CI.
3//!
4//! The git-format unified-diff model + parser and the version type live in the
5//! shared [`vcs_diff`] crate (`git diff` and `jj diff --git` are byte-identical);
6//! this module keeps only the git-specific parsers (porcelain, log, blame, …).
7
8use std::path::PathBuf;
9
10use vcs_diff::DiffStat;
11
12/// One entry from `git status --porcelain=v1 -z` (`XY <path>`, NUL-delimited).
13#[derive(Debug, Clone, PartialEq, Eq)]
14#[non_exhaustive]
15pub struct StatusEntry {
16    /// Two-character status code, e.g. `" M"`, `"??"`, `"A "`, `"R "`.
17    pub code: String,
18    /// Path the status applies to (the *new* path for a rename/copy). Raw bytes
19    /// from `-z` — no C-quoting/escaping to undo, even for paths with spaces.
20    pub path: String,
21    /// For a rename/copy, the original path; `None` otherwise. Named to match
22    /// `vcs_jj::ChangedPath::old_path` so cross-backend code reads the rename
23    /// source the same way on both wrappers.
24    pub old_path: Option<String>,
25}
26
27/// A combined branch + working-tree snapshot from `git status --porcelain=v2
28/// --branch -z`: HEAD, branch, upstream tracking, ahead/behind, and change
29/// counts — everything a prompt/status-bar needs, in **one** process spawn.
30#[derive(Debug, Clone, PartialEq, Eq, Default)]
31#[non_exhaustive]
32pub struct BranchStatus {
33    /// The HEAD commit's full object id (`# branch.oid`); `None` on an unborn
34    /// repo (git reports `(initial)`). Truncate for display.
35    pub head: Option<String>,
36    /// Current branch name (`# branch.head`); `None` when detached.
37    pub branch: Option<String>,
38    /// Upstream tracking branch (`# branch.upstream`); `None` when unset.
39    pub upstream: Option<String>,
40    /// Commits ahead of the upstream (`# branch.ab +A`); `None` when no upstream.
41    pub ahead: Option<usize>,
42    /// Commits behind the upstream (`# branch.ab -B`); `None` when no upstream.
43    pub behind: Option<usize>,
44    /// Count of changed *tracked* entries — modified/added/deleted/renamed/copied
45    /// and unmerged (the `1`/`2`/`u` records).
46    pub tracked_changes: usize,
47    /// Count of untracked files (the `?` records).
48    pub untracked: usize,
49    /// Count of unmerged (conflicted) entries (the `u` records; also in
50    /// `tracked_changes`).
51    pub conflicts: usize,
52}
53
54impl BranchStatus {
55    /// Whether the working tree has any change at all — tracked or untracked.
56    pub fn is_dirty(&self) -> bool {
57        self.tracked_changes > 0 || self.untracked > 0
58    }
59}
60
61/// A commit, parsed from a `\x1f`-delimited `git log` line.
62#[derive(Debug, Clone, PartialEq, Eq)]
63#[non_exhaustive]
64pub struct Commit {
65    /// Full commit hash (`%H`).
66    pub hash: String,
67    /// Abbreviated commit hash (`%h`).
68    pub short_hash: String,
69    /// Author name (`%an`).
70    pub author: String,
71    /// Author date, strict ISO-8601 (`%aI`), e.g. `2026-05-31T10:00:00+00:00`.
72    pub date: String,
73    /// Subject line (`%s`).
74    pub subject: String,
75}
76
77/// A local branch from `git branch`.
78#[derive(Debug, Clone, PartialEq, Eq)]
79#[non_exhaustive]
80pub struct Branch {
81    /// Branch name.
82    pub name: String,
83    /// Whether this is the checked-out branch (the `*` marker).
84    pub current: bool,
85}
86
87/// A worktree from `git worktree list --porcelain`.
88#[derive(Debug, Clone, PartialEq, Eq)]
89#[non_exhaustive]
90pub struct Worktree {
91    /// Absolute path to the worktree.
92    pub path: PathBuf,
93    /// Short branch name (`refs/heads/` stripped); `None` when detached or bare.
94    pub branch: Option<String>,
95    /// The checked-out commit (`HEAD <sha>`); `None` for a bare entry.
96    pub head: Option<String>,
97    /// The main worktree of a bare repository.
98    pub bare: bool,
99    /// Checked out at a detached HEAD (no branch).
100    pub detached: bool,
101    /// Locked against pruning.
102    pub locked: bool,
103}
104
105/// Parse `git status --porcelain=v1 -z` output: NUL-delimited records, raw
106/// (unquoted) paths. A rename/copy entry is followed by its source path as the
107/// next NUL record (e.g. `R  new\0old\0`).
108pub(crate) fn parse_porcelain(output: &str) -> Vec<StatusEntry> {
109    let mut entries = Vec::new();
110    let mut records = output.split('\0').filter(|rec| !rec.is_empty());
111    while let Some(rec) = records.next() {
112        // "XY path": two status-code chars, a space, then the path. Real git
113        // codes are ASCII, but slice via `get` so a malformed record (a
114        // multibyte char where the code/space belong) is skipped, not a panic.
115        let (Some(code), Some(path)) = (rec.get(..2), rec.get(3..)) else {
116            continue;
117        };
118        // A rename/copy (R/C in the index column) carries its source path as the
119        // immediately following NUL record; consume it.
120        let old_path = if matches!(rec.as_bytes().first(), Some(b'R' | b'C')) {
121            records.next().map(str::to_string)
122        } else {
123            None
124        };
125        entries.push(StatusEntry {
126            code: code.to_string(),
127            path: path.to_string(),
128            old_path,
129        });
130    }
131    entries
132}
133
134/// Parse `git status --porcelain=v2 --branch -z` output into a [`BranchStatus`].
135///
136/// Records are NUL-terminated: `# branch.*` header lines first, then entry lines
137/// (`1`/`2` changed, `u` unmerged, `?` untracked, `!` ignored). A `2` (rename/copy)
138/// entry stores its original path as the *next* NUL record, so that record is
139/// consumed and skipped. Everything is `strip_prefix`/compare based — no byte
140/// indexing — so arbitrary bytes never panic (proven by proptest).
141pub(crate) fn parse_porcelain_v2(output: &str) -> BranchStatus {
142    let mut status = BranchStatus::default();
143    let mut records = output.split('\0');
144    while let Some(rec) = records.next() {
145        if let Some(rest) = rec.strip_prefix("# branch.oid ") {
146            // `(initial)` marks an unborn repo (no commits yet).
147            status.head = (rest != "(initial)").then(|| rest.to_string());
148        } else if let Some(rest) = rec.strip_prefix("# branch.head ") {
149            status.branch = (rest != "(detached)").then(|| rest.to_string());
150        } else if let Some(rest) = rec.strip_prefix("# branch.upstream ") {
151            status.upstream = Some(rest.to_string());
152        } else if let Some(rest) = rec.strip_prefix("# branch.ab ") {
153            // `+<ahead> -<behind>`.
154            let mut parts = rest.split(' ');
155            status.ahead = parts
156                .next()
157                .and_then(|t| t.strip_prefix('+'))
158                .and_then(|n| n.parse().ok());
159            status.behind = parts
160                .next()
161                .and_then(|t| t.strip_prefix('-'))
162                .and_then(|n| n.parse().ok());
163        } else if rec.starts_with("1 ") {
164            status.tracked_changes += 1;
165        } else if rec.starts_with("2 ") {
166            status.tracked_changes += 1;
167            // The rename/copy original path is the next NUL record; consume it so
168            // it isn't mis-read as another entry.
169            records.next();
170        } else if rec.starts_with("u ") {
171            status.tracked_changes += 1;
172            status.conflicts += 1;
173        } else if rec.starts_with("? ") {
174            status.untracked += 1;
175        }
176        // `! ` (ignored) and other `# ` headers contribute nothing.
177    }
178    status
179}
180
181/// Parse `git --version` output (`git version 2.54.0.windows.1`) into the shared
182/// [`vcs_diff::Version`]: the first dotted-numeric token wins; non-numeric
183/// trailers (`.windows.1`, `-rc1`) are ignored; a missing patch reads as `0`.
184pub(crate) fn parse_git_version(raw: &str) -> Option<vcs_diff::Version> {
185    vcs_diff::parse_dotted_version(raw)
186}
187
188/// Parse a NUL-delimited path list (e.g. `git diff --name-only -z`): one
189/// repo-relative path per record, `/` separators, no quoting.
190pub(crate) fn parse_nul_paths(output: &str) -> Vec<String> {
191    output
192        .split('\0')
193        .filter(|path| !path.is_empty())
194        .map(str::to_string)
195        .collect()
196}
197
198/// Parse `git log -z --format=%H%x1f%h%x1f%an%x1f%aI%x1f%s` output: commits are
199/// NUL-separated (robust to multi-line fields), fields split on the ASCII unit
200/// separator.
201pub(crate) fn parse_log(output: &str) -> Vec<Commit> {
202    output
203        .split('\0')
204        .filter(|rec| !rec.is_empty())
205        .filter_map(|rec| {
206            let mut fields = rec.split('\u{1f}');
207            Some(Commit {
208                hash: fields.next()?.to_string(),
209                short_hash: fields.next()?.to_string(),
210                author: fields.next()?.to_string(),
211                date: fields.next()?.to_string(),
212                subject: fields.next().unwrap_or("").to_string(),
213            })
214        })
215        .collect()
216}
217
218/// Parse `git branch` output. The first column is the `* `/`  `/`+ ` marker.
219pub(crate) fn parse_branches(output: &str) -> Vec<Branch> {
220    output
221        .lines()
222        .filter(|line| !line.trim().is_empty())
223        .filter_map(|line| {
224            let current = line.starts_with('*');
225            let name = line.get(1..).unwrap_or("").trim();
226            // Skip the detached-HEAD pseudo-entry, e.g. "* (HEAD detached at …)".
227            if name.is_empty() || name.starts_with('(') {
228                return None;
229            }
230            Some(Branch {
231                name: name.to_string(),
232                current,
233            })
234        })
235        .collect()
236}
237
238/// Parse `git worktree list --porcelain`: records separated by a blank line,
239/// each a set of `label [value]` lines — `worktree <path>`, `HEAD <sha>`,
240/// `branch refs/heads/<name>`, plus the valueless attributes `bare` / `detached`
241/// / `locked`. Unknown labels (e.g. `prunable`) are ignored.
242pub(crate) fn parse_worktree_porcelain(output: &str) -> Vec<Worktree> {
243    let mut worktrees = Vec::new();
244    let mut current: Option<Worktree> = None;
245    let flush = |current: &mut Option<Worktree>, out: &mut Vec<Worktree>| {
246        if let Some(wt) = current.take() {
247            out.push(wt);
248        }
249    };
250    for line in output.lines() {
251        if line.is_empty() {
252            flush(&mut current, &mut worktrees);
253            continue;
254        }
255        let (label, value) = match line.split_once(' ') {
256            Some((l, v)) => (l, Some(v)),
257            None => (line, None),
258        };
259        match label {
260            // A new record begins; flush any record not closed by a blank line.
261            "worktree" => {
262                flush(&mut current, &mut worktrees);
263                current = Some(Worktree {
264                    path: PathBuf::from(value.unwrap_or("")),
265                    branch: None,
266                    head: None,
267                    bare: false,
268                    detached: false,
269                    locked: false,
270                });
271            }
272            "HEAD" => {
273                if let Some(wt) = current.as_mut() {
274                    wt.head = value.map(str::to_string);
275                }
276            }
277            "branch" => {
278                if let Some(wt) = current.as_mut() {
279                    // Value is a full ref (`refs/heads/main`); expose the short name.
280                    wt.branch =
281                        value.map(|v| v.strip_prefix("refs/heads/").unwrap_or(v).to_string());
282                }
283            }
284            "bare" => {
285                if let Some(wt) = current.as_mut() {
286                    wt.bare = true;
287                }
288            }
289            "detached" => {
290                if let Some(wt) = current.as_mut() {
291                    wt.detached = true;
292                }
293            }
294            "locked" => {
295                if let Some(wt) = current.as_mut() {
296                    wt.locked = true;
297                }
298            }
299            _ => {}
300        }
301    }
302    flush(&mut current, &mut worktrees);
303    worktrees
304}
305
306/// One line of `git blame --line-porcelain` output: who last touched the line
307/// and where it came from.
308#[derive(Debug, Clone, PartialEq, Eq)]
309#[non_exhaustive]
310pub struct BlameLine {
311    /// Full hash of the commit that last changed the line.
312    pub commit: String,
313    /// Line number in that commit's version of the file (1-based).
314    pub orig_line: u32,
315    /// Line number in the blamed version of the file (1-based).
316    pub final_line: u32,
317    /// Author name of that commit.
318    pub author: String,
319    /// Author timestamp as a unix epoch (seconds).
320    pub author_time: i64,
321    /// Author timezone offset, e.g. `+0200`.
322    pub author_tz: String,
323    /// The line's content (without the trailing newline).
324    pub content: String,
325}
326
327/// Parse `git blame --line-porcelain` output. Every line gets a header
328/// (`<sha> <orig> <final> [<group count>]`, where `<sha>` is a 40-hex SHA-1 or a
329/// 64-hex SHA-256 object id), a full set of `tag value` metadata lines (`author`,
330/// `author-time`, …, optional `boundary`), then the content prefixed with a literal
331/// TAB.
332pub(crate) fn parse_blame_porcelain(output: &str) -> Vec<BlameLine> {
333    let mut lines = Vec::new();
334    let mut current: Option<BlameLine> = None;
335    for line in output.lines() {
336        // Content line: closes the current record.
337        if let Some(content) = line.strip_prefix('\t') {
338            if let Some(mut entry) = current.take() {
339                entry.content = content.to_string();
340                lines.push(entry);
341            }
342            continue;
343        }
344        let (label, value) = match line.split_once(' ') {
345            Some((l, v)) => (l, v),
346            None => (line, ""),
347        };
348        // Header: a commit sha followed by line numbers (and an optional group
349        // count, which only appears on a group's first line). Accept both SHA-1
350        // (40 hex) and SHA-256 (64 hex) object ids — a SHA-256 repo would otherwise
351        // never match, so `blame` would silently return an empty `Vec`.
352        if (label.len() == 40 || label.len() == 64) && label.bytes().all(|b| b.is_ascii_hexdigit())
353        {
354            let mut nums = value.split(' ');
355            let orig = nums.next().and_then(|n| n.parse().ok()).unwrap_or(0);
356            let fin = nums.next().and_then(|n| n.parse().ok()).unwrap_or(0);
357            current = Some(BlameLine {
358                commit: label.to_string(),
359                orig_line: orig,
360                final_line: fin,
361                author: String::new(),
362                author_time: 0,
363                author_tz: String::new(),
364                content: String::new(),
365            });
366            continue;
367        }
368        let Some(entry) = current.as_mut() else {
369            continue;
370        };
371        match label {
372            "author" => entry.author = value.to_string(),
373            "author-time" => entry.author_time = value.parse().unwrap_or(0),
374            "author-tz" => entry.author_tz = value.to_string(),
375            // committer*/summary/filename/previous/boundary intentionally not
376            // captured — `#[non_exhaustive]` leaves room to add them later.
377            _ => {}
378        }
379    }
380    lines
381}
382
383/// Parse `git diff --shortstat`, e.g. ` 3 files changed, 12 insertions(+), 4
384/// deletions(-)`. Any clause may be absent (a pure-insertion diff omits
385/// deletions; no changes yields an empty string → all zeros).
386pub(crate) fn parse_shortstat(output: &str) -> DiffStat {
387    let mut stat = DiffStat::default();
388    for part in output.split(',') {
389        let part = part.trim();
390        let n = part
391            .split_whitespace()
392            .next()
393            .and_then(|tok| tok.parse().ok())
394            .unwrap_or(0);
395        if part.contains("file") {
396            stat.files_changed = n;
397        } else if part.contains("insertion") {
398            stat.insertions = n;
399        } else if part.contains("deletion") {
400            stat.deletions = n;
401        }
402    }
403    stat
404}
405
406/// Parse `git ls-remote --heads <remote>` output — `<sha>\trefs/heads/<name>`
407/// per line — into the bare branch names.
408pub(crate) fn parse_ls_remote_heads(output: &str) -> Vec<String> {
409    output
410        .lines()
411        .filter_map(|line| {
412            let (_sha, refname) = line.split_once('\t')?;
413            refname
414                .trim()
415                .strip_prefix("refs/heads/")
416                .map(str::to_string)
417        })
418        .collect()
419}
420
421#[cfg(test)]
422mod tests {
423    use super::*;
424
425    #[test]
426    fn porcelain_parses_codes_and_paths() {
427        // NUL-delimited records; the path with a space stays raw (no quoting).
428        let got = parse_porcelain(" M src/lib.rs\0?? new file.txt\0A  added.rs\0");
429        assert_eq!(
430            got,
431            vec![
432                StatusEntry {
433                    code: " M".into(),
434                    path: "src/lib.rs".into(),
435                    old_path: None,
436                },
437                StatusEntry {
438                    code: "??".into(),
439                    path: "new file.txt".into(),
440                    old_path: None,
441                },
442                StatusEntry {
443                    code: "A ".into(),
444                    path: "added.rs".into(),
445                    old_path: None,
446                },
447            ]
448        );
449    }
450
451    #[test]
452    fn porcelain_parses_rename_with_old_path() {
453        // `R  new\0old\0` — the source path is the next NUL record.
454        let got = parse_porcelain("R  new.rs\0old.rs\0 M other.rs\0");
455        assert_eq!(
456            got,
457            vec![
458                StatusEntry {
459                    code: "R ".into(),
460                    path: "new.rs".into(),
461                    old_path: Some("old.rs".into()),
462                },
463                StatusEntry {
464                    code: " M".into(),
465                    path: "other.rs".into(),
466                    old_path: None,
467                },
468            ]
469        );
470    }
471
472    #[test]
473    fn porcelain_ignores_blank_and_short_records() {
474        assert!(parse_porcelain("\0  \0X\0").is_empty());
475    }
476
477    // Regression (found by proptest): a record whose leading char is multibyte
478    // must be skipped, not panic on a non-char-boundary slice. `𝓁` is 4 bytes,
479    // so byte index 2 lands inside it.
480    #[test]
481    fn porcelain_skips_non_ascii_status_records() {
482        assert!(parse_porcelain("𝓁abc\0").is_empty());
483        // A well-formed record alongside the garbage still parses.
484        let entries = parse_porcelain("𝓁abc\0 M a.rs\0");
485        assert_eq!(entries.len(), 1);
486        assert_eq!(entries[0].path, "a.rs");
487    }
488
489    #[test]
490    fn porcelain_v2_parses_branch_and_change_counts() {
491        // The rename's original path (`1 trap.rs`) is the next NUL record; it must
492        // be CONSUMED, not counted as a fourth `1 …` change.
493        let out = concat!(
494            "# branch.oid abcdef1234567890\0",
495            "# branch.head main\0",
496            "# branch.upstream origin/main\0",
497            "# branch.ab +2 -1\0",
498            "1 .M N... 100644 100644 100644 1111 2222 a.rs\0",
499            "2 R. N... 100644 100644 100644 3333 4444 R100 new.rs\0",
500            "1 trap.rs\0",
501            "u UU N... 100644 100644 100644 100644 5 6 7 conflict.rs\0",
502            "? untracked.txt\0",
503            "! ignored.txt\0",
504        );
505        let s = parse_porcelain_v2(out);
506        assert_eq!(s.head.as_deref(), Some("abcdef1234567890"));
507        assert_eq!(s.branch.as_deref(), Some("main"));
508        assert_eq!(s.upstream.as_deref(), Some("origin/main"));
509        assert_eq!((s.ahead, s.behind), (Some(2), Some(1)));
510        assert_eq!(
511            s.tracked_changes, 3,
512            "1 + 2(rename) + u; the trap is consumed"
513        );
514        assert_eq!(s.untracked, 1);
515        assert_eq!(s.conflicts, 1);
516        assert!(s.is_dirty());
517    }
518
519    #[test]
520    fn porcelain_v2_handles_unborn_detached_and_no_upstream() {
521        // Unborn repo: `(initial)` oid, no ab line, clean tree.
522        let s = parse_porcelain_v2("# branch.oid (initial)\0# branch.head main\0");
523        assert_eq!(s.head, None);
524        assert_eq!(s.branch.as_deref(), Some("main"));
525        assert_eq!(s.upstream, None);
526        assert_eq!((s.ahead, s.behind), (None, None));
527        assert!(!s.is_dirty());
528
529        // Detached HEAD, no upstream tracking.
530        let s = parse_porcelain_v2("# branch.oid deadbeef\0# branch.head (detached)\0");
531        assert_eq!(s.head.as_deref(), Some("deadbeef"));
532        assert_eq!(s.branch, None);
533        assert_eq!(s.upstream, None);
534    }
535
536    // --line-porcelain repeats the full metadata for every line; the group
537    // count appears only on a group's first header, and `boundary` is a
538    // valueless tag — both must parse.
539    #[test]
540    fn blame_line_porcelain_parses_headers_and_metadata() {
541        let sha_a = "a".repeat(40);
542        let sha_b = "b".repeat(40);
543        let out = format!(
544            "{sha_a} 1 1 2\nauthor Alice\nauthor-mail <a@x>\nauthor-time 1717500000\n\
545             author-tz +0200\ncommitter Alice\nsummary first\nboundary\nfilename f.txt\n\
546             \tline one\n\
547             {sha_a} 2 2\nauthor Alice\nauthor-mail <a@x>\nauthor-time 1717500000\n\
548             author-tz +0200\ncommitter Alice\nsummary first\nfilename f.txt\n\
549             \tline two\n\
550             {sha_b} 1 3 1\nauthor Bob\nauthor-mail <b@x>\nauthor-time 1717600000\n\
551             author-tz -0500\ncommitter Bob\nsummary second\nfilename f.txt\n\
552             \t\n"
553        );
554        let lines = parse_blame_porcelain(&out);
555        assert_eq!(lines.len(), 3);
556        assert_eq!(lines[0].commit, sha_a);
557        assert_eq!(lines[0].orig_line, 1);
558        assert_eq!(lines[0].final_line, 1);
559        assert_eq!(lines[0].author, "Alice");
560        assert_eq!(lines[0].author_time, 1717500000);
561        assert_eq!(lines[0].author_tz, "+0200");
562        assert_eq!(lines[0].content, "line one");
563        // Second line of the same group: header without a group count.
564        assert_eq!(lines[1].final_line, 2);
565        assert_eq!(lines[1].content, "line two");
566        // A different commit, and an empty content line stays empty.
567        assert_eq!(lines[2].commit, sha_b);
568        assert_eq!(lines[2].author, "Bob");
569        assert_eq!(lines[2].content, "");
570    }
571
572    #[test]
573    fn blame_ignores_garbage_and_empty_input() {
574        assert!(parse_blame_porcelain("").is_empty());
575        assert!(parse_blame_porcelain("not a header\n\torphan content\n").is_empty());
576    }
577
578    // A SHA-256 repository emits 64-hex commit ids; the header must still be
579    // recognised (the old `len()==40`-only check made `blame` return an empty Vec).
580    #[test]
581    fn blame_recognises_sha256_object_ids() {
582        let sha = "c".repeat(64);
583        let out = format!(
584            "{sha} 1 1 1\nauthor Carol\nauthor-mail <c@x>\nauthor-time 1717700000\n\
585             author-tz +0000\ncommitter Carol\nsummary s\nfilename f.txt\n\
586             \tline\n"
587        );
588        let lines = parse_blame_porcelain(&out);
589        assert_eq!(
590            lines.len(),
591            1,
592            "a SHA-256 blame must parse, not drop to empty"
593        );
594        assert_eq!(lines[0].commit, sha);
595        assert_eq!(lines[0].author, "Carol");
596        assert_eq!(lines[0].content, "line");
597    }
598
599    #[test]
600    fn git_version_parses_real_world_shapes() {
601        // The Windows build trailer (`.windows.1`) is extra dotted components
602        // beyond the patch; an `-rc1` suffix rides on the patch itself.
603        let v = parse_git_version("git version 2.54.0.windows.1").unwrap();
604        assert_eq!((v.major, v.minor, v.patch), (2, 54, 0));
605        let v = parse_git_version("git version 2.41.0-rc1").unwrap();
606        assert_eq!((v.major, v.minor, v.patch), (2, 41, 0));
607        let v = parse_git_version("git version 2.54").unwrap();
608        assert_eq!(v.patch, 0, "missing patch defaults to 0");
609        assert!(parse_git_version("no digits here").is_none());
610        assert!(parse_git_version("git version unknowable").is_none());
611    }
612
613    #[test]
614    fn nul_paths_split_and_keep_special_characters() {
615        assert_eq!(
616            parse_nul_paths("a.rs\0sub/with space.rs\0"),
617            ["a.rs", "sub/with space.rs"]
618        );
619        assert!(parse_nul_paths("").is_empty());
620    }
621
622    #[test]
623    fn log_splits_unit_separated_fields() {
624        let input = "abc123\u{1f}abc\u{1f}Ada\u{1f}2026-05-31T10:00:00+00:00\u{1f}Add feature\0\
625                     def456\u{1f}def\u{1f}Linus\u{1f}2026-05-30T09:00:00+00:00\u{1f}Fix bug\0";
626        let got = parse_log(input);
627        assert_eq!(got.len(), 2);
628        assert_eq!(
629            got[0],
630            Commit {
631                hash: "abc123".into(),
632                short_hash: "abc".into(),
633                author: "Ada".into(),
634                date: "2026-05-31T10:00:00+00:00".into(),
635                subject: "Add feature".into(),
636            }
637        );
638        assert_eq!(got[1].subject, "Fix bug");
639    }
640
641    #[test]
642    fn log_tolerates_empty_subject() {
643        let got = parse_log("h\u{1f}h\u{1f}A\u{1f}2026-05-31T10:00:00+00:00\u{1f}\0");
644        assert_eq!(got[0].subject, "");
645    }
646
647    #[test]
648    fn branches_marks_current_and_skips_detached() {
649        let got = parse_branches("* main\n  feature\n  (HEAD detached at abc123)\n");
650        assert_eq!(
651            got,
652            vec![
653                Branch {
654                    name: "main".into(),
655                    current: true
656                },
657                Branch {
658                    name: "feature".into(),
659                    current: false
660                },
661            ]
662        );
663    }
664
665    #[test]
666    fn worktrees_parse_branch_detached_and_bare() {
667        let input = "worktree /repo\nHEAD abc123\nbranch refs/heads/main\n\
668                     \nworktree /repo/wt\nHEAD def456\ndetached\n\
669                     \nworktree /repo/bare\nbare\n";
670        let got = parse_worktree_porcelain(input);
671        assert_eq!(got.len(), 3);
672        assert_eq!(got[0].path, PathBuf::from("/repo"));
673        assert_eq!(got[0].branch.as_deref(), Some("main"));
674        assert_eq!(got[0].head.as_deref(), Some("abc123"));
675        assert!(got[1].detached && got[1].branch.is_none());
676        assert!(got[2].bare && got[2].head.is_none());
677    }
678
679    #[test]
680    fn worktrees_parse_last_record_without_trailing_blank() {
681        // The final record may not be followed by a blank line.
682        let got = parse_worktree_porcelain("worktree /only\nHEAD aaa\nbranch refs/heads/x\n");
683        assert_eq!(got.len(), 1);
684        assert_eq!(got[0].branch.as_deref(), Some("x"));
685    }
686
687    #[test]
688    fn shortstat_parses_all_clauses() {
689        let got = parse_shortstat(" 3 files changed, 12 insertions(+), 4 deletions(-)\n");
690        assert_eq!(got, DiffStat::new(3, 12, 4));
691    }
692
693    #[test]
694    fn shortstat_tolerates_missing_clauses_and_empty() {
695        // Pure-insertion diff omits deletions; no changes yields all zeros.
696        let only_ins = parse_shortstat(" 1 file changed, 2 insertions(+)\n");
697        assert_eq!(only_ins.insertions, 2);
698        assert_eq!(only_ins.deletions, 0);
699        assert_eq!(parse_shortstat(""), DiffStat::default());
700    }
701}
702
703// Property-based fuzzing: the parsers are pure functions over *arbitrary* CLI
704// text (a git on the user's machine we don't control), so the load-bearing
705// invariant is "never panic, whatever the bytes". These feed both unconstrained
706// Unicode and structure-biased inputs (real delimiters: NUL, tab, unit
707// separator, `diff --git`, `@@` hunks, rename braces) so the fuzzer reaches the
708// byte-offset branches, not just the early returns.
709#[cfg(test)]
710mod proptests {
711    use super::*;
712    use proptest::prelude::*;
713
714    /// A line drawn from git's structural vocabulary plus multibyte text, so a
715    /// joined document exercises the porcelain/diff/blame branches.
716    fn structured_line() -> impl Strategy<Value = String> {
717        prop_oneof![
718            Just("diff --git a/f b/f\n".to_string()),
719            Just("--- a/f\n".to_string()),
720            Just("+++ b/f\n".to_string()),
721            Just("@@ -1,2 +3,4 @@ ctx\n".to_string()),
722            Just("@@ -1 +1 @@\n".to_string()),
723            Just("rename from {old => new}.rs\n".to_string()),
724            Just("R100\told\tnew\n".to_string()),
725            Just(format!("{}\n", "a".repeat(40))), // a 40-hex-ish blame header
726            "[-+ ]?[a-zé\t]{0,12}\n",              // diff body / text incl. multibyte
727            "[ MARD?]{0,2} [a-zé/]{0,8}\0",        // porcelain-ish NUL record
728        ]
729    }
730
731    fn structured_doc() -> impl Strategy<Value = String> {
732        prop::collection::vec(structured_line(), 0..40).prop_map(|lines| lines.concat())
733    }
734
735    proptest! {
736        // Panic-freedom on completely arbitrary input.
737        #[test]
738        fn parsers_never_panic_on_arbitrary_text(s in any::<String>()) {
739            let _ = parse_porcelain(&s);
740            let _ = parse_porcelain_v2(&s);
741            let _ = parse_log(&s);
742            let _ = parse_branches(&s);
743            let _ = parse_worktree_porcelain(&s);
744            let _ = parse_blame_porcelain(&s);
745            let _ = parse_shortstat(&s);
746            let _ = parse_ls_remote_heads(&s);
747            let _ = parse_nul_paths(&s);
748            let _ = parse_git_version(&s);
749        }
750
751        // …and on structure-biased input that reaches the parsing branches.
752        #[test]
753        fn parsers_never_panic_on_structured_text(s in structured_doc()) {
754            let _ = parse_porcelain(&s);
755            let _ = parse_porcelain_v2(&s);
756            let _ = parse_log(&s);
757            let _ = parse_blame_porcelain(&s);
758        }
759
760        // porcelain v2 header/entry lines (with the `2`-consumes-next-record path)
761        // must never panic on arbitrary NUL-joined records.
762        #[test]
763        fn porcelain_v2_never_panics(records in prop::collection::vec(
764            prop_oneof![
765                Just("# branch.oid (initial)".to_string()),
766                Just("# branch.head main".to_string()),
767                Just("# branch.ab +1 -2".to_string()),
768                "1 [.MADRCU]{2} [a-zé /]{0,10}".prop_map(|s| s),
769                "2 R\\. .* R100 [a-zé /]{0,8}".prop_map(|s| s),
770                "u UU [a-zé /]{0,8}".prop_map(|s| s),
771                "\\? [a-zé /]{0,8}".prop_map(|s| s),
772                "[a-zé0-9# ]{0,12}".prop_map(|s| s),
773            ],
774            0..20,
775        ).prop_map(|r| r.join("\0"))) {
776            let _ = parse_porcelain_v2(&records);
777        }
778    }
779}