Skip to main content

vcs_git/
parse.rs

1//! Pure parsers for git's machine-readable output. No process execution, so the
2//! tests here are hermetic and run on CI.
3
4use std::path::PathBuf;
5
6/// One entry from `git status --porcelain=v1 -z` (`XY <path>`, NUL-delimited).
7#[derive(Debug, Clone, PartialEq, Eq)]
8#[non_exhaustive]
9pub struct StatusEntry {
10    /// Two-character status code, e.g. `" M"`, `"??"`, `"A "`, `"R "`.
11    pub code: String,
12    /// Path the status applies to (the *new* path for a rename/copy). Raw bytes
13    /// from `-z` — no C-quoting/escaping to undo, even for paths with spaces.
14    pub path: String,
15    /// For a rename/copy, the original path; `None` otherwise.
16    pub orig_path: Option<String>,
17}
18
19/// A commit, parsed from a `\x1f`-delimited `git log` line.
20#[derive(Debug, Clone, PartialEq, Eq)]
21#[non_exhaustive]
22pub struct Commit {
23    /// Full commit hash (`%H`).
24    pub hash: String,
25    /// Abbreviated commit hash (`%h`).
26    pub short_hash: String,
27    /// Author name (`%an`).
28    pub author: String,
29    /// Author date, strict ISO-8601 (`%aI`), e.g. `2026-05-31T10:00:00+00:00`.
30    pub date: String,
31    /// Subject line (`%s`).
32    pub subject: String,
33}
34
35/// A local branch from `git branch`.
36#[derive(Debug, Clone, PartialEq, Eq)]
37#[non_exhaustive]
38pub struct Branch {
39    /// Branch name.
40    pub name: String,
41    /// Whether this is the checked-out branch (the `*` marker).
42    pub current: bool,
43}
44
45/// A worktree from `git worktree list --porcelain`.
46#[derive(Debug, Clone, PartialEq, Eq)]
47#[non_exhaustive]
48pub struct Worktree {
49    /// Absolute path to the worktree.
50    pub path: PathBuf,
51    /// Short branch name (`refs/heads/` stripped); `None` when detached or bare.
52    pub branch: Option<String>,
53    /// The checked-out commit (`HEAD <sha>`); `None` for a bare entry.
54    pub head: Option<String>,
55    /// The main worktree of a bare repository.
56    pub bare: bool,
57    /// Checked out at a detached HEAD (no branch).
58    pub detached: bool,
59    /// Locked against pruning.
60    pub locked: bool,
61}
62
63/// Aggregate line/file counts from `git diff --shortstat`.
64#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
65#[non_exhaustive]
66pub struct DiffStat {
67    /// Number of files changed.
68    pub files_changed: usize,
69    /// Lines added (`insertions(+)`).
70    pub insertions: usize,
71    /// Lines removed (`deletions(-)`).
72    pub deletions: usize,
73}
74
75/// Parse `git status --porcelain=v1 -z` output: NUL-delimited records, raw
76/// (unquoted) paths. A rename/copy entry is followed by its source path as the
77/// next NUL record (e.g. `R  new\0old\0`).
78pub(crate) fn parse_porcelain(output: &str) -> Vec<StatusEntry> {
79    let mut entries = Vec::new();
80    let mut records = output.split('\0').filter(|rec| !rec.is_empty());
81    while let Some(rec) = records.next() {
82        // "XY path": two ASCII code chars (always ASCII → byte-slicing is safe),
83        // a space, then a non-empty path.
84        if rec.len() < 4 {
85            continue;
86        }
87        // A rename/copy (R/C in the index column) carries its source path as the
88        // immediately following NUL record; consume it.
89        let orig_path = if matches!(rec.as_bytes()[0], b'R' | b'C') {
90            records.next().map(str::to_string)
91        } else {
92            None
93        };
94        entries.push(StatusEntry {
95            code: rec[..2].to_string(),
96            path: rec[3..].to_string(),
97            orig_path,
98        });
99    }
100    entries
101}
102
103/// Parse `git log -z --format=%H%x1f%h%x1f%an%x1f%aI%x1f%s` output: commits are
104/// NUL-separated (robust to multi-line fields), fields split on the ASCII unit
105/// separator.
106pub(crate) fn parse_log(output: &str) -> Vec<Commit> {
107    output
108        .split('\0')
109        .filter(|rec| !rec.is_empty())
110        .filter_map(|rec| {
111            let mut fields = rec.split('\u{1f}');
112            Some(Commit {
113                hash: fields.next()?.to_string(),
114                short_hash: fields.next()?.to_string(),
115                author: fields.next()?.to_string(),
116                date: fields.next()?.to_string(),
117                subject: fields.next().unwrap_or("").to_string(),
118            })
119        })
120        .collect()
121}
122
123/// Parse `git branch` output. The first column is the `* `/`  `/`+ ` marker.
124pub(crate) fn parse_branches(output: &str) -> Vec<Branch> {
125    output
126        .lines()
127        .filter(|line| !line.trim().is_empty())
128        .filter_map(|line| {
129            let current = line.starts_with('*');
130            let name = line.get(1..).unwrap_or("").trim();
131            // Skip the detached-HEAD pseudo-entry, e.g. "* (HEAD detached at …)".
132            if name.is_empty() || name.starts_with('(') {
133                return None;
134            }
135            Some(Branch {
136                name: name.to_string(),
137                current,
138            })
139        })
140        .collect()
141}
142
143/// Parse `git worktree list --porcelain`: records separated by a blank line,
144/// each a set of `label [value]` lines — `worktree <path>`, `HEAD <sha>`,
145/// `branch refs/heads/<name>`, plus the valueless attributes `bare` / `detached`
146/// / `locked`. Unknown labels (e.g. `prunable`) are ignored.
147pub(crate) fn parse_worktree_porcelain(output: &str) -> Vec<Worktree> {
148    let mut worktrees = Vec::new();
149    let mut current: Option<Worktree> = None;
150    let flush = |current: &mut Option<Worktree>, out: &mut Vec<Worktree>| {
151        if let Some(wt) = current.take() {
152            out.push(wt);
153        }
154    };
155    for line in output.lines() {
156        if line.is_empty() {
157            flush(&mut current, &mut worktrees);
158            continue;
159        }
160        let (label, value) = match line.split_once(' ') {
161            Some((l, v)) => (l, Some(v)),
162            None => (line, None),
163        };
164        match label {
165            // A new record begins; flush any record not closed by a blank line.
166            "worktree" => {
167                flush(&mut current, &mut worktrees);
168                current = Some(Worktree {
169                    path: PathBuf::from(value.unwrap_or("")),
170                    branch: None,
171                    head: None,
172                    bare: false,
173                    detached: false,
174                    locked: false,
175                });
176            }
177            "HEAD" => {
178                if let Some(wt) = current.as_mut() {
179                    wt.head = value.map(str::to_string);
180                }
181            }
182            "branch" => {
183                if let Some(wt) = current.as_mut() {
184                    // Value is a full ref (`refs/heads/main`); expose the short name.
185                    wt.branch =
186                        value.map(|v| v.strip_prefix("refs/heads/").unwrap_or(v).to_string());
187                }
188            }
189            "bare" => {
190                if let Some(wt) = current.as_mut() {
191                    wt.bare = true;
192                }
193            }
194            "detached" => {
195                if let Some(wt) = current.as_mut() {
196                    wt.detached = true;
197                }
198            }
199            "locked" => {
200                if let Some(wt) = current.as_mut() {
201                    wt.locked = true;
202                }
203            }
204            _ => {}
205        }
206    }
207    flush(&mut current, &mut worktrees);
208    worktrees
209}
210
211/// Parse `git diff --shortstat`, e.g. ` 3 files changed, 12 insertions(+), 4
212/// deletions(-)`. Any clause may be absent (a pure-insertion diff omits
213/// deletions; no changes yields an empty string → all zeros).
214pub(crate) fn parse_shortstat(output: &str) -> DiffStat {
215    let mut stat = DiffStat::default();
216    for part in output.split(',') {
217        let part = part.trim();
218        let n = part
219            .split_whitespace()
220            .next()
221            .and_then(|tok| tok.parse().ok())
222            .unwrap_or(0);
223        if part.contains("file") {
224            stat.files_changed = n;
225        } else if part.contains("insertion") {
226            stat.insertions = n;
227        } else if part.contains("deletion") {
228            stat.deletions = n;
229        }
230    }
231    stat
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237
238    #[test]
239    fn porcelain_parses_codes_and_paths() {
240        // NUL-delimited records; the path with a space stays raw (no quoting).
241        let got = parse_porcelain(" M src/lib.rs\0?? new file.txt\0A  added.rs\0");
242        assert_eq!(
243            got,
244            vec![
245                StatusEntry {
246                    code: " M".into(),
247                    path: "src/lib.rs".into(),
248                    orig_path: None,
249                },
250                StatusEntry {
251                    code: "??".into(),
252                    path: "new file.txt".into(),
253                    orig_path: None,
254                },
255                StatusEntry {
256                    code: "A ".into(),
257                    path: "added.rs".into(),
258                    orig_path: None,
259                },
260            ]
261        );
262    }
263
264    #[test]
265    fn porcelain_parses_rename_with_orig_path() {
266        // `R  new\0old\0` — the source path is the next NUL record.
267        let got = parse_porcelain("R  new.rs\0old.rs\0 M other.rs\0");
268        assert_eq!(
269            got,
270            vec![
271                StatusEntry {
272                    code: "R ".into(),
273                    path: "new.rs".into(),
274                    orig_path: Some("old.rs".into()),
275                },
276                StatusEntry {
277                    code: " M".into(),
278                    path: "other.rs".into(),
279                    orig_path: None,
280                },
281            ]
282        );
283    }
284
285    #[test]
286    fn porcelain_ignores_blank_and_short_records() {
287        assert!(parse_porcelain("\0  \0X\0").is_empty());
288    }
289
290    #[test]
291    fn log_splits_unit_separated_fields() {
292        let input = "abc123\u{1f}abc\u{1f}Ada\u{1f}2026-05-31T10:00:00+00:00\u{1f}Add feature\0\
293                     def456\u{1f}def\u{1f}Linus\u{1f}2026-05-30T09:00:00+00:00\u{1f}Fix bug\0";
294        let got = parse_log(input);
295        assert_eq!(got.len(), 2);
296        assert_eq!(
297            got[0],
298            Commit {
299                hash: "abc123".into(),
300                short_hash: "abc".into(),
301                author: "Ada".into(),
302                date: "2026-05-31T10:00:00+00:00".into(),
303                subject: "Add feature".into(),
304            }
305        );
306        assert_eq!(got[1].subject, "Fix bug");
307    }
308
309    #[test]
310    fn log_tolerates_empty_subject() {
311        let got = parse_log("h\u{1f}h\u{1f}A\u{1f}2026-05-31T10:00:00+00:00\u{1f}\0");
312        assert_eq!(got[0].subject, "");
313    }
314
315    #[test]
316    fn branches_marks_current_and_skips_detached() {
317        let got = parse_branches("* main\n  feature\n  (HEAD detached at abc123)\n");
318        assert_eq!(
319            got,
320            vec![
321                Branch {
322                    name: "main".into(),
323                    current: true
324                },
325                Branch {
326                    name: "feature".into(),
327                    current: false
328                },
329            ]
330        );
331    }
332
333    #[test]
334    fn worktrees_parse_branch_detached_and_bare() {
335        let input = "worktree /repo\nHEAD abc123\nbranch refs/heads/main\n\
336                     \nworktree /repo/wt\nHEAD def456\ndetached\n\
337                     \nworktree /repo/bare\nbare\n";
338        let got = parse_worktree_porcelain(input);
339        assert_eq!(got.len(), 3);
340        assert_eq!(got[0].path, PathBuf::from("/repo"));
341        assert_eq!(got[0].branch.as_deref(), Some("main"));
342        assert_eq!(got[0].head.as_deref(), Some("abc123"));
343        assert!(got[1].detached && got[1].branch.is_none());
344        assert!(got[2].bare && got[2].head.is_none());
345    }
346
347    #[test]
348    fn worktrees_parse_last_record_without_trailing_blank() {
349        // The final record may not be followed by a blank line.
350        let got = parse_worktree_porcelain("worktree /only\nHEAD aaa\nbranch refs/heads/x\n");
351        assert_eq!(got.len(), 1);
352        assert_eq!(got[0].branch.as_deref(), Some("x"));
353    }
354
355    #[test]
356    fn shortstat_parses_all_clauses() {
357        let got = parse_shortstat(" 3 files changed, 12 insertions(+), 4 deletions(-)\n");
358        assert_eq!(
359            got,
360            DiffStat {
361                files_changed: 3,
362                insertions: 12,
363                deletions: 4
364            }
365        );
366    }
367
368    #[test]
369    fn shortstat_tolerates_missing_clauses_and_empty() {
370        // Pure-insertion diff omits deletions; no changes yields all zeros.
371        let only_ins = parse_shortstat(" 1 file changed, 2 insertions(+)\n");
372        assert_eq!(only_ins.insertions, 2);
373        assert_eq!(only_ins.deletions, 0);
374        assert_eq!(parse_shortstat(""), DiffStat::default());
375    }
376}