Skip to main content

vcs_git/
parse.rs

1//! Pure parsers for git's machine-readable output. No process execution, so the
2//! tests here are hermetic and run on CI.
3
4use std::path::PathBuf;
5
6/// One entry from `git status --porcelain=v1 -z` (`XY <path>`, NUL-delimited).
7#[derive(Debug, Clone, PartialEq, Eq)]
8#[non_exhaustive]
9pub struct StatusEntry {
10    /// Two-character status code, e.g. `" M"`, `"??"`, `"A "`, `"R "`.
11    pub code: String,
12    /// Path the status applies to (the *new* path for a rename/copy). Raw bytes
13    /// from `-z` — no C-quoting/escaping to undo, even for paths with spaces.
14    pub path: String,
15    /// For a rename/copy, the original path; `None` otherwise.
16    pub orig_path: Option<String>,
17}
18
19/// A commit, parsed from a `\x1f`-delimited `git log` line.
20#[derive(Debug, Clone, PartialEq, Eq)]
21#[non_exhaustive]
22pub struct Commit {
23    /// Full commit hash (`%H`).
24    pub hash: String,
25    /// Abbreviated commit hash (`%h`).
26    pub short_hash: String,
27    /// Author name (`%an`).
28    pub author: String,
29    /// Author date, strict ISO-8601 (`%aI`), e.g. `2026-05-31T10:00:00+00:00`.
30    pub date: String,
31    /// Subject line (`%s`).
32    pub subject: String,
33}
34
35/// A local branch from `git branch`.
36#[derive(Debug, Clone, PartialEq, Eq)]
37#[non_exhaustive]
38pub struct Branch {
39    /// Branch name.
40    pub name: String,
41    /// Whether this is the checked-out branch (the `*` marker).
42    pub current: bool,
43}
44
45/// A worktree from `git worktree list --porcelain`.
46#[derive(Debug, Clone, PartialEq, Eq)]
47#[non_exhaustive]
48pub struct Worktree {
49    /// Absolute path to the worktree.
50    pub path: PathBuf,
51    /// Short branch name (`refs/heads/` stripped); `None` when detached or bare.
52    pub branch: Option<String>,
53    /// The checked-out commit (`HEAD <sha>`); `None` for a bare entry.
54    pub head: Option<String>,
55    /// The main worktree of a bare repository.
56    pub bare: bool,
57    /// Checked out at a detached HEAD (no branch).
58    pub detached: bool,
59    /// Locked against pruning.
60    pub locked: bool,
61}
62
63/// Aggregate line/file counts from `git diff --shortstat`.
64#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
65#[non_exhaustive]
66pub struct DiffStat {
67    /// Number of files changed.
68    pub files_changed: usize,
69    /// Lines added (`insertions(+)`).
70    pub insertions: usize,
71    /// Lines removed (`deletions(-)`).
72    pub deletions: usize,
73}
74
75/// How a file changed in a unified diff.
76#[derive(Debug, Clone, Copy, PartialEq, Eq)]
77#[non_exhaustive]
78pub enum ChangeKind {
79    /// A new file (`new file mode …`).
80    Added,
81    /// An existing file's contents changed.
82    Modified,
83    /// The file was removed (`deleted file mode …`).
84    Deleted,
85    /// The file was renamed (`rename from …` / `rename to …`).
86    Renamed,
87}
88
89/// One line inside a [`Hunk`], tagged by its role. The stored text excludes the
90/// leading ` `/`+`/`-` marker.
91#[derive(Debug, Clone, PartialEq, Eq)]
92#[non_exhaustive]
93pub enum DiffLine {
94    /// Unchanged context line (leading ` `).
95    Context(String),
96    /// Added line (leading `+`).
97    Added(String),
98    /// Removed line (leading `-`).
99    Removed(String),
100}
101
102/// A single `@@ … @@` hunk within a [`FileDiff`].
103#[derive(Debug, Clone, PartialEq, Eq)]
104#[non_exhaustive]
105pub struct Hunk {
106    /// Start line in the old file (the `-<start>` of the `@@` header).
107    pub old_start: usize,
108    /// Line count in the old file (defaults to 1 when the `,<count>` is omitted).
109    pub old_lines: usize,
110    /// Start line in the new file (the `+<start>` of the `@@` header).
111    pub new_start: usize,
112    /// Line count in the new file (defaults to 1 when the `,<count>` is omitted).
113    pub new_lines: usize,
114    /// Text after the closing `@@` (the function/section heading); empty when none.
115    pub section: String,
116    /// The hunk body, one entry per `+`/`-`/` ` line.
117    pub lines: Vec<DiffLine>,
118}
119
120/// One file's entry in a parsed git-format unified diff (`git diff` or
121/// `jj diff --git`).
122#[derive(Debug, Clone, PartialEq, Eq)]
123#[non_exhaustive]
124pub struct FileDiff {
125    /// How the file changed.
126    pub change: ChangeKind,
127    /// The file's path — the *new* path for a rename — forward-slash normalised.
128    pub path: String,
129    /// For a rename, the original path (forward-slash normalised); `None` otherwise.
130    pub old_path: Option<String>,
131    /// The `@@` hunks; empty for a binary file or a pure rename with no edits.
132    pub hunks: Vec<Hunk>,
133}
134
135/// Parse `git status --porcelain=v1 -z` output: NUL-delimited records, raw
136/// (unquoted) paths. A rename/copy entry is followed by its source path as the
137/// next NUL record (e.g. `R  new\0old\0`).
138pub(crate) fn parse_porcelain(output: &str) -> Vec<StatusEntry> {
139    let mut entries = Vec::new();
140    let mut records = output.split('\0').filter(|rec| !rec.is_empty());
141    while let Some(rec) = records.next() {
142        // "XY path": two ASCII code chars (always ASCII → byte-slicing is safe),
143        // a space, then a non-empty path.
144        if rec.len() < 4 {
145            continue;
146        }
147        // A rename/copy (R/C in the index column) carries its source path as the
148        // immediately following NUL record; consume it.
149        let orig_path = if matches!(rec.as_bytes()[0], b'R' | b'C') {
150            records.next().map(str::to_string)
151        } else {
152            None
153        };
154        entries.push(StatusEntry {
155            code: rec[..2].to_string(),
156            path: rec[3..].to_string(),
157            orig_path,
158        });
159    }
160    entries
161}
162
163/// Parse `git log -z --format=%H%x1f%h%x1f%an%x1f%aI%x1f%s` output: commits are
164/// NUL-separated (robust to multi-line fields), fields split on the ASCII unit
165/// separator.
166pub(crate) fn parse_log(output: &str) -> Vec<Commit> {
167    output
168        .split('\0')
169        .filter(|rec| !rec.is_empty())
170        .filter_map(|rec| {
171            let mut fields = rec.split('\u{1f}');
172            Some(Commit {
173                hash: fields.next()?.to_string(),
174                short_hash: fields.next()?.to_string(),
175                author: fields.next()?.to_string(),
176                date: fields.next()?.to_string(),
177                subject: fields.next().unwrap_or("").to_string(),
178            })
179        })
180        .collect()
181}
182
183/// Parse `git branch` output. The first column is the `* `/`  `/`+ ` marker.
184pub(crate) fn parse_branches(output: &str) -> Vec<Branch> {
185    output
186        .lines()
187        .filter(|line| !line.trim().is_empty())
188        .filter_map(|line| {
189            let current = line.starts_with('*');
190            let name = line.get(1..).unwrap_or("").trim();
191            // Skip the detached-HEAD pseudo-entry, e.g. "* (HEAD detached at …)".
192            if name.is_empty() || name.starts_with('(') {
193                return None;
194            }
195            Some(Branch {
196                name: name.to_string(),
197                current,
198            })
199        })
200        .collect()
201}
202
203/// Parse `git worktree list --porcelain`: records separated by a blank line,
204/// each a set of `label [value]` lines — `worktree <path>`, `HEAD <sha>`,
205/// `branch refs/heads/<name>`, plus the valueless attributes `bare` / `detached`
206/// / `locked`. Unknown labels (e.g. `prunable`) are ignored.
207pub(crate) fn parse_worktree_porcelain(output: &str) -> Vec<Worktree> {
208    let mut worktrees = Vec::new();
209    let mut current: Option<Worktree> = None;
210    let flush = |current: &mut Option<Worktree>, out: &mut Vec<Worktree>| {
211        if let Some(wt) = current.take() {
212            out.push(wt);
213        }
214    };
215    for line in output.lines() {
216        if line.is_empty() {
217            flush(&mut current, &mut worktrees);
218            continue;
219        }
220        let (label, value) = match line.split_once(' ') {
221            Some((l, v)) => (l, Some(v)),
222            None => (line, None),
223        };
224        match label {
225            // A new record begins; flush any record not closed by a blank line.
226            "worktree" => {
227                flush(&mut current, &mut worktrees);
228                current = Some(Worktree {
229                    path: PathBuf::from(value.unwrap_or("")),
230                    branch: None,
231                    head: None,
232                    bare: false,
233                    detached: false,
234                    locked: false,
235                });
236            }
237            "HEAD" => {
238                if let Some(wt) = current.as_mut() {
239                    wt.head = value.map(str::to_string);
240                }
241            }
242            "branch" => {
243                if let Some(wt) = current.as_mut() {
244                    // Value is a full ref (`refs/heads/main`); expose the short name.
245                    wt.branch =
246                        value.map(|v| v.strip_prefix("refs/heads/").unwrap_or(v).to_string());
247                }
248            }
249            "bare" => {
250                if let Some(wt) = current.as_mut() {
251                    wt.bare = true;
252                }
253            }
254            "detached" => {
255                if let Some(wt) = current.as_mut() {
256                    wt.detached = true;
257                }
258            }
259            "locked" => {
260                if let Some(wt) = current.as_mut() {
261                    wt.locked = true;
262                }
263            }
264            _ => {}
265        }
266    }
267    flush(&mut current, &mut worktrees);
268    worktrees
269}
270
271/// Parse `git diff --shortstat`, e.g. ` 3 files changed, 12 insertions(+), 4
272/// deletions(-)`. Any clause may be absent (a pure-insertion diff omits
273/// deletions; no changes yields an empty string → all zeros).
274pub(crate) fn parse_shortstat(output: &str) -> DiffStat {
275    let mut stat = DiffStat::default();
276    for part in output.split(',') {
277        let part = part.trim();
278        let n = part
279            .split_whitespace()
280            .next()
281            .and_then(|tok| tok.parse().ok())
282            .unwrap_or(0);
283        if part.contains("file") {
284            stat.files_changed = n;
285        } else if part.contains("insertion") {
286            stat.insertions = n;
287        } else if part.contains("deletion") {
288            stat.deletions = n;
289        }
290    }
291    stat
292}
293
294/// Parse a git-format unified diff into one [`FileDiff`] per file. Works on
295/// `git diff` and `jj diff --git` output alike. Public so a consumer can parse
296/// diff text it obtained by other means.
297///
298/// Paths are read from the unambiguous single-path lines (`+++ b/…`, `--- a/…`,
299/// `rename to …`) rather than the space-ambiguous `diff --git a/… b/…` header,
300/// and normalised to forward slashes. Ported from the `vcs-flow-commit` parser.
301pub fn parse_diff(diff: &str) -> Vec<FileDiff> {
302    diff_sections(diff).filter_map(parse_section).collect()
303}
304
305/// Slice a git-format diff into per-file sections (each starts at `diff --git`).
306fn diff_sections(full: &str) -> impl Iterator<Item = &str> {
307    let mut bounds = Vec::new();
308    let mut idx = 0;
309    for line in full.split_inclusive('\n') {
310        if line.starts_with("diff --git ") {
311            bounds.push(idx);
312        }
313        idx += line.len();
314    }
315    let ends = bounds
316        .iter()
317        .skip(1)
318        .copied()
319        .chain(std::iter::once(full.len()));
320    bounds
321        .clone()
322        .into_iter()
323        .zip(ends)
324        .map(move |(s, e)| &full[s..e])
325        .collect::<Vec<_>>()
326        .into_iter()
327}
328
329/// Determine the [`FileDiff`] for one `diff --git` section: change kind and path
330/// from the header lines, plus every `@@` hunk and its body.
331fn parse_section(section: &str) -> Option<FileDiff> {
332    let mut kind = ChangeKind::Modified;
333    let mut new_path = None;
334    let mut minus_path = None;
335    let mut rename_to = None;
336    let mut rename_from = None;
337    let mut hunks: Vec<Hunk> = Vec::new();
338    let mut current: Option<Hunk> = None;
339
340    for line in section.lines() {
341        if let Some(hunk) = parse_hunk_header(line) {
342            if let Some(done) = current.replace(hunk) {
343                hunks.push(done);
344            }
345            continue;
346        }
347        if let Some(hunk) = current.as_mut() {
348            // Inside a hunk body: classify by the leading marker. `\ No newline at
349            // end of file` annotations and any stray blank line are dropped.
350            match line.as_bytes().first() {
351                Some(b' ') => hunk.lines.push(DiffLine::Context(line[1..].to_string())),
352                Some(b'+') => hunk.lines.push(DiffLine::Added(line[1..].to_string())),
353                Some(b'-') => hunk.lines.push(DiffLine::Removed(line[1..].to_string())),
354                _ => {}
355            }
356            continue;
357        }
358        // Header region (before the first `@@`).
359        if line.starts_with("new file") {
360            kind = ChangeKind::Added;
361        } else if line.starts_with("deleted file") {
362            kind = ChangeKind::Deleted;
363        } else if let Some(p) = line.strip_prefix("rename to ") {
364            rename_to = Some(p.trim_end().to_string());
365        } else if let Some(p) = line.strip_prefix("rename from ") {
366            rename_from = Some(p.trim_end().to_string());
367        } else if let Some(p) = line.strip_prefix("+++ b/") {
368            new_path = Some(p.trim_end().to_string());
369        } else if let Some(p) = line.strip_prefix("--- a/") {
370            minus_path = Some(p.trim_end().to_string());
371        }
372    }
373    if let Some(done) = current.take() {
374        hunks.push(done);
375    }
376
377    let normalize = |p: String| p.replace('\\', "/");
378    // A rename keeps its old path so a caller can record the deletion too.
379    let old_path = if rename_to.is_some() {
380        kind = ChangeKind::Renamed;
381        rename_from.map(normalize)
382    } else {
383        None
384    };
385    let path = rename_to
386        .or(new_path)
387        .or(minus_path)
388        .or_else(|| header_b_path(section))?;
389    Some(FileDiff {
390        change: kind,
391        path: normalize(path),
392        old_path,
393        hunks,
394    })
395}
396
397/// Parse a hunk header `@@ -<os>[,<ol>] +<ns>[,<nl>] @@[ <section>]` into an empty
398/// [`Hunk`]; `None` for any other line.
399fn parse_hunk_header(line: &str) -> Option<Hunk> {
400    let rest = line.strip_prefix("@@ ")?;
401    let (ranges, section) = rest.split_once(" @@")?;
402    let mut parts = ranges.split_whitespace();
403    let (old_start, old_lines) = parse_hunk_range(parts.next()?.strip_prefix('-')?);
404    let (new_start, new_lines) = parse_hunk_range(parts.next()?.strip_prefix('+')?);
405    Some(Hunk {
406        old_start,
407        old_lines,
408        new_start,
409        new_lines,
410        section: section.strip_prefix(' ').unwrap_or(section).to_string(),
411        lines: Vec::new(),
412    })
413}
414
415/// Parse a `<start>[,<count>]` hunk range; an omitted count means 1 line.
416fn parse_hunk_range(range: &str) -> (usize, usize) {
417    match range.split_once(',') {
418        Some((start, count)) => (start.parse().unwrap_or(0), count.parse().unwrap_or(0)),
419        None => (range.parse().unwrap_or(0), 1),
420    }
421}
422
423/// Fallback path extraction for sections with no `+++`/`---`/`rename` lines
424/// (e.g. binary files): the `b/<new>` of the `diff --git` header. Ambiguous only
425/// when a path contains the literal `" b/"`, which binary-with-spaces makes rare.
426fn header_b_path(section: &str) -> Option<String> {
427    let first = section.lines().next()?;
428    let s = first.strip_prefix("diff --git ")?;
429    let idx = s.find(" b/")?;
430    Some(s[idx + 1..].strip_prefix("b/").unwrap_or("").to_string())
431}
432
433#[cfg(test)]
434mod tests {
435    use super::*;
436
437    #[test]
438    fn porcelain_parses_codes_and_paths() {
439        // NUL-delimited records; the path with a space stays raw (no quoting).
440        let got = parse_porcelain(" M src/lib.rs\0?? new file.txt\0A  added.rs\0");
441        assert_eq!(
442            got,
443            vec![
444                StatusEntry {
445                    code: " M".into(),
446                    path: "src/lib.rs".into(),
447                    orig_path: None,
448                },
449                StatusEntry {
450                    code: "??".into(),
451                    path: "new file.txt".into(),
452                    orig_path: None,
453                },
454                StatusEntry {
455                    code: "A ".into(),
456                    path: "added.rs".into(),
457                    orig_path: None,
458                },
459            ]
460        );
461    }
462
463    #[test]
464    fn porcelain_parses_rename_with_orig_path() {
465        // `R  new\0old\0` — the source path is the next NUL record.
466        let got = parse_porcelain("R  new.rs\0old.rs\0 M other.rs\0");
467        assert_eq!(
468            got,
469            vec![
470                StatusEntry {
471                    code: "R ".into(),
472                    path: "new.rs".into(),
473                    orig_path: Some("old.rs".into()),
474                },
475                StatusEntry {
476                    code: " M".into(),
477                    path: "other.rs".into(),
478                    orig_path: None,
479                },
480            ]
481        );
482    }
483
484    #[test]
485    fn porcelain_ignores_blank_and_short_records() {
486        assert!(parse_porcelain("\0  \0X\0").is_empty());
487    }
488
489    #[test]
490    fn log_splits_unit_separated_fields() {
491        let input = "abc123\u{1f}abc\u{1f}Ada\u{1f}2026-05-31T10:00:00+00:00\u{1f}Add feature\0\
492                     def456\u{1f}def\u{1f}Linus\u{1f}2026-05-30T09:00:00+00:00\u{1f}Fix bug\0";
493        let got = parse_log(input);
494        assert_eq!(got.len(), 2);
495        assert_eq!(
496            got[0],
497            Commit {
498                hash: "abc123".into(),
499                short_hash: "abc".into(),
500                author: "Ada".into(),
501                date: "2026-05-31T10:00:00+00:00".into(),
502                subject: "Add feature".into(),
503            }
504        );
505        assert_eq!(got[1].subject, "Fix bug");
506    }
507
508    #[test]
509    fn log_tolerates_empty_subject() {
510        let got = parse_log("h\u{1f}h\u{1f}A\u{1f}2026-05-31T10:00:00+00:00\u{1f}\0");
511        assert_eq!(got[0].subject, "");
512    }
513
514    #[test]
515    fn branches_marks_current_and_skips_detached() {
516        let got = parse_branches("* main\n  feature\n  (HEAD detached at abc123)\n");
517        assert_eq!(
518            got,
519            vec![
520                Branch {
521                    name: "main".into(),
522                    current: true
523                },
524                Branch {
525                    name: "feature".into(),
526                    current: false
527                },
528            ]
529        );
530    }
531
532    #[test]
533    fn worktrees_parse_branch_detached_and_bare() {
534        let input = "worktree /repo\nHEAD abc123\nbranch refs/heads/main\n\
535                     \nworktree /repo/wt\nHEAD def456\ndetached\n\
536                     \nworktree /repo/bare\nbare\n";
537        let got = parse_worktree_porcelain(input);
538        assert_eq!(got.len(), 3);
539        assert_eq!(got[0].path, PathBuf::from("/repo"));
540        assert_eq!(got[0].branch.as_deref(), Some("main"));
541        assert_eq!(got[0].head.as_deref(), Some("abc123"));
542        assert!(got[1].detached && got[1].branch.is_none());
543        assert!(got[2].bare && got[2].head.is_none());
544    }
545
546    #[test]
547    fn worktrees_parse_last_record_without_trailing_blank() {
548        // The final record may not be followed by a blank line.
549        let got = parse_worktree_porcelain("worktree /only\nHEAD aaa\nbranch refs/heads/x\n");
550        assert_eq!(got.len(), 1);
551        assert_eq!(got[0].branch.as_deref(), Some("x"));
552    }
553
554    #[test]
555    fn shortstat_parses_all_clauses() {
556        let got = parse_shortstat(" 3 files changed, 12 insertions(+), 4 deletions(-)\n");
557        assert_eq!(
558            got,
559            DiffStat {
560                files_changed: 3,
561                insertions: 12,
562                deletions: 4
563            }
564        );
565    }
566
567    #[test]
568    fn shortstat_tolerates_missing_clauses_and_empty() {
569        // Pure-insertion diff omits deletions; no changes yields all zeros.
570        let only_ins = parse_shortstat(" 1 file changed, 2 insertions(+)\n");
571        assert_eq!(only_ins.insertions, 2);
572        assert_eq!(only_ins.deletions, 0);
573        assert_eq!(parse_shortstat(""), DiffStat::default());
574    }
575
576    #[test]
577    fn diff_covers_add_modify_delete_rename() {
578        // Add (new), modify (mod), delete (gone), and a directory-changing rename
579        // (old/f -> new/f). Ported from the vcs-flow section-parser test.
580        let full = concat!(
581            "diff --git a/new b/new\n",
582            "new file mode 100644\n--- /dev/null\n+++ b/new\n@@ -0,0 +1 @@\n+n\n",
583            "diff --git a/mod b/mod\n",
584            "--- a/mod\n+++ b/mod\n@@ -1 +1 @@\n-a\n+b\n",
585            "diff --git a/gone b/gone\n",
586            "deleted file mode 100644\n--- a/gone\n+++ /dev/null\n@@ -1 +0,0 @@\n-x\n",
587            "diff --git a/old/f.txt b/new/f.txt\n",
588            "similarity index 100%\nrename from old/f.txt\nrename to new/f.txt\n",
589        );
590        let files = parse_diff(full);
591        let kinds: Vec<_> = files.iter().map(|f| (f.path.as_str(), f.change)).collect();
592        assert_eq!(
593            kinds,
594            vec![
595                ("new", ChangeKind::Added),
596                ("mod", ChangeKind::Modified),
597                ("gone", ChangeKind::Deleted),
598                ("new/f.txt", ChangeKind::Renamed),
599            ]
600        );
601        // The rename carries its old path so the deletion is recorded too.
602        let rename = files
603            .iter()
604            .find(|f| f.change == ChangeKind::Renamed)
605            .unwrap();
606        assert_eq!(rename.old_path.as_deref(), Some("old/f.txt"));
607    }
608
609    #[test]
610    fn diff_handles_space_paths() {
611        // git appends a trailing tab to `+++`/`---` paths containing spaces; the
612        // path must survive intact (the `diff --git` header is ambiguous here).
613        let full = "diff --git a/a b/c.txt b/a b/c.txt\n--- a/a b/c.txt\t\n+++ b/a b/c.txt\t\n@@ -1 +1 @@\n-x\n+y\n";
614        let files = parse_diff(full);
615        assert_eq!(files.len(), 1);
616        assert_eq!(files[0].path, "a b/c.txt");
617    }
618
619    #[test]
620    fn diff_parses_hunk_ranges_and_body() {
621        let full = "diff --git a/f b/f\n--- a/f\n+++ b/f\n@@ -1,2 +1,3 @@ fn main()\n ctx\n-old\n+new\n+added\n";
622        let files = parse_diff(full);
623        assert_eq!(files.len(), 1);
624        let hunk = &files[0].hunks[0];
625        assert_eq!(
626            (
627                hunk.old_start,
628                hunk.old_lines,
629                hunk.new_start,
630                hunk.new_lines
631            ),
632            (1, 2, 1, 3)
633        );
634        assert_eq!(hunk.section, "fn main()");
635        assert_eq!(
636            hunk.lines,
637            vec![
638                DiffLine::Context("ctx".into()),
639                DiffLine::Removed("old".into()),
640                DiffLine::Added("new".into()),
641                DiffLine::Added("added".into()),
642            ]
643        );
644    }
645
646    #[test]
647    fn diff_omitted_count_defaults_to_one() {
648        // `@@ -3 +3 @@` (no `,count`) means a single line on each side.
649        let full = "diff --git a/f b/f\n--- a/f\n+++ b/f\n@@ -3 +3 @@\n-a\n+b\n";
650        let hunk = &parse_diff(full)[0].hunks[0];
651        assert_eq!((hunk.old_start, hunk.old_lines), (3, 1));
652        assert_eq!((hunk.new_start, hunk.new_lines), (3, 1));
653    }
654}