Skip to main content

vcs_git/
parse.rs

1//! Pure parsers for git's machine-readable output. No process execution, so the
2//! tests here are hermetic and run on CI.
3
4/// One entry from `git status --porcelain=v1 -z` (`XY <path>`, NUL-delimited).
5#[derive(Debug, Clone, PartialEq, Eq)]
6#[non_exhaustive]
7pub struct StatusEntry {
8    /// Two-character status code, e.g. `" M"`, `"??"`, `"A "`, `"R "`.
9    pub code: String,
10    /// Path the status applies to (the *new* path for a rename/copy). Raw bytes
11    /// from `-z` — no C-quoting/escaping to undo, even for paths with spaces.
12    pub path: String,
13    /// For a rename/copy, the original path; `None` otherwise.
14    pub orig_path: Option<String>,
15}
16
17/// A commit, parsed from a `\x1f`-delimited `git log` line.
18#[derive(Debug, Clone, PartialEq, Eq)]
19#[non_exhaustive]
20pub struct Commit {
21    /// Full commit hash (`%H`).
22    pub hash: String,
23    /// Abbreviated commit hash (`%h`).
24    pub short_hash: String,
25    /// Author name (`%an`).
26    pub author: String,
27    /// Author date, strict ISO-8601 (`%aI`), e.g. `2026-05-31T10:00:00+00:00`.
28    pub date: String,
29    /// Subject line (`%s`).
30    pub subject: String,
31}
32
33/// A local branch from `git branch`.
34#[derive(Debug, Clone, PartialEq, Eq)]
35#[non_exhaustive]
36pub struct Branch {
37    /// Branch name.
38    pub name: String,
39    /// Whether this is the checked-out branch (the `*` marker).
40    pub current: bool,
41}
42
43/// Parse `git status --porcelain=v1 -z` output: NUL-delimited records, raw
44/// (unquoted) paths. A rename/copy entry is followed by its source path as the
45/// next NUL record (e.g. `R  new\0old\0`).
46pub(crate) fn parse_porcelain(output: &str) -> Vec<StatusEntry> {
47    let mut entries = Vec::new();
48    let mut records = output.split('\0').filter(|rec| !rec.is_empty());
49    while let Some(rec) = records.next() {
50        // "XY path": two ASCII code chars (always ASCII → byte-slicing is safe),
51        // a space, then a non-empty path.
52        if rec.len() < 4 {
53            continue;
54        }
55        // A rename/copy (R/C in the index column) carries its source path as the
56        // immediately following NUL record; consume it.
57        let orig_path = if matches!(rec.as_bytes()[0], b'R' | b'C') {
58            records.next().map(str::to_string)
59        } else {
60            None
61        };
62        entries.push(StatusEntry {
63            code: rec[..2].to_string(),
64            path: rec[3..].to_string(),
65            orig_path,
66        });
67    }
68    entries
69}
70
71/// Parse `git log -z --format=%H%x1f%h%x1f%an%x1f%aI%x1f%s` output: commits are
72/// NUL-separated (robust to multi-line fields), fields split on the ASCII unit
73/// separator.
74pub(crate) fn parse_log(output: &str) -> Vec<Commit> {
75    output
76        .split('\0')
77        .filter(|rec| !rec.is_empty())
78        .filter_map(|rec| {
79            let mut fields = rec.split('\u{1f}');
80            Some(Commit {
81                hash: fields.next()?.to_string(),
82                short_hash: fields.next()?.to_string(),
83                author: fields.next()?.to_string(),
84                date: fields.next()?.to_string(),
85                subject: fields.next().unwrap_or("").to_string(),
86            })
87        })
88        .collect()
89}
90
91/// Parse `git branch` output. The first column is the `* `/`  `/`+ ` marker.
92pub(crate) fn parse_branches(output: &str) -> Vec<Branch> {
93    output
94        .lines()
95        .filter(|line| !line.trim().is_empty())
96        .filter_map(|line| {
97            let current = line.starts_with('*');
98            let name = line.get(1..).unwrap_or("").trim();
99            // Skip the detached-HEAD pseudo-entry, e.g. "* (HEAD detached at …)".
100            if name.is_empty() || name.starts_with('(') {
101                return None;
102            }
103            Some(Branch {
104                name: name.to_string(),
105                current,
106            })
107        })
108        .collect()
109}
110
111#[cfg(test)]
112mod tests {
113    use super::*;
114
115    #[test]
116    fn porcelain_parses_codes_and_paths() {
117        // NUL-delimited records; the path with a space stays raw (no quoting).
118        let got = parse_porcelain(" M src/lib.rs\0?? new file.txt\0A  added.rs\0");
119        assert_eq!(
120            got,
121            vec![
122                StatusEntry {
123                    code: " M".into(),
124                    path: "src/lib.rs".into(),
125                    orig_path: None,
126                },
127                StatusEntry {
128                    code: "??".into(),
129                    path: "new file.txt".into(),
130                    orig_path: None,
131                },
132                StatusEntry {
133                    code: "A ".into(),
134                    path: "added.rs".into(),
135                    orig_path: None,
136                },
137            ]
138        );
139    }
140
141    #[test]
142    fn porcelain_parses_rename_with_orig_path() {
143        // `R  new\0old\0` — the source path is the next NUL record.
144        let got = parse_porcelain("R  new.rs\0old.rs\0 M other.rs\0");
145        assert_eq!(
146            got,
147            vec![
148                StatusEntry {
149                    code: "R ".into(),
150                    path: "new.rs".into(),
151                    orig_path: Some("old.rs".into()),
152                },
153                StatusEntry {
154                    code: " M".into(),
155                    path: "other.rs".into(),
156                    orig_path: None,
157                },
158            ]
159        );
160    }
161
162    #[test]
163    fn porcelain_ignores_blank_and_short_records() {
164        assert!(parse_porcelain("\0  \0X\0").is_empty());
165    }
166
167    #[test]
168    fn log_splits_unit_separated_fields() {
169        let input = "abc123\u{1f}abc\u{1f}Ada\u{1f}2026-05-31T10:00:00+00:00\u{1f}Add feature\0\
170                     def456\u{1f}def\u{1f}Linus\u{1f}2026-05-30T09:00:00+00:00\u{1f}Fix bug\0";
171        let got = parse_log(input);
172        assert_eq!(got.len(), 2);
173        assert_eq!(
174            got[0],
175            Commit {
176                hash: "abc123".into(),
177                short_hash: "abc".into(),
178                author: "Ada".into(),
179                date: "2026-05-31T10:00:00+00:00".into(),
180                subject: "Add feature".into(),
181            }
182        );
183        assert_eq!(got[1].subject, "Fix bug");
184    }
185
186    #[test]
187    fn log_tolerates_empty_subject() {
188        let got = parse_log("h\u{1f}h\u{1f}A\u{1f}2026-05-31T10:00:00+00:00\u{1f}\0");
189        assert_eq!(got[0].subject, "");
190    }
191
192    #[test]
193    fn branches_marks_current_and_skips_detached() {
194        let got = parse_branches("* main\n  feature\n  (HEAD detached at abc123)\n");
195        assert_eq!(
196            got,
197            vec![
198                Branch {
199                    name: "main".into(),
200                    current: true
201                },
202                Branch {
203                    name: "feature".into(),
204                    current: false
205                },
206            ]
207        );
208    }
209}