Skip to main content

git_spawn/parse/
log.rs

1//! Parser for `git log` output using a fixed token format.
2//!
3//! Use [`LOG_FORMAT`] as `--format=<fmt>` so `log` emits stable, parseable
4//! records: fields separated by NUL `\x1f` (unit separator) and entries
5//! separated by NUL `\x1e` (record separator).
6
7use crate::error::{Error, Result};
8
9/// Format string to pass as `--format=<fmt>` so entries parse cleanly.
10///
11/// Fields (in order): full SHA, short SHA, author name, author email,
12/// author date (ISO 8601 strict), committer name, committer email,
13/// committer date, subject, body. Body is last because it may contain
14/// newlines; records are terminated by `\x1e`.
15pub const LOG_FORMAT: &str = "%H\x1f%h\x1f%an\x1f%ae\x1f%aI\x1f%cn\x1f%ce\x1f%cI\x1f%s\x1f%b\x1e";
16
17/// A single parsed commit entry.
18#[derive(Debug, Clone, PartialEq, Eq)]
19#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
20pub struct CommitEntry {
21    /// Full SHA.
22    pub sha: String,
23    /// Abbreviated SHA.
24    pub short_sha: String,
25    /// Author name.
26    pub author_name: String,
27    /// Author email.
28    pub author_email: String,
29    /// Author date, RFC 3339 / ISO 8601 strict.
30    pub author_date: String,
31    /// Committer name.
32    pub committer_name: String,
33    /// Committer email.
34    pub committer_email: String,
35    /// Committer date, RFC 3339 / ISO 8601 strict.
36    pub committer_date: String,
37    /// Commit subject line.
38    pub subject: String,
39    /// Commit body (may be empty, may span multiple lines).
40    pub body: String,
41}
42
43/// Parse the output of `git log --format=<LOG_FORMAT>`.
44///
45/// # Errors
46/// Returns [`Error::ParseError`] if any record has fewer fields than expected.
47///
48/// # Example
49/// ```
50/// use git_spawn::parse::{parse_log, LOG_FORMAT};
51/// # let _ = LOG_FORMAT; // silence unused if not used at doc-time
52/// let input = "abc\x1fabc\x1fA\x1fa@x\x1f2024-01-01T00:00:00Z\x1fB\x1fb@y\x1f2024-01-02T00:00:00Z\x1fhi\x1fbody\x1e";
53/// let commits = parse_log(input).unwrap();
54/// assert_eq!(commits.len(), 1);
55/// assert_eq!(commits[0].subject, "hi");
56/// assert_eq!(commits[0].author_name, "A");
57/// ```
58pub fn parse_log(input: &str) -> Result<Vec<CommitEntry>> {
59    let mut out = Vec::new();
60    for record in input.split('\x1e') {
61        let trimmed = record.trim_matches('\n');
62        if trimmed.is_empty() {
63            continue;
64        }
65        let fields: Vec<&str> = trimmed.split('\x1f').collect();
66        if fields.len() < 10 {
67            return Err(Error::parse_error(format!(
68                "expected 10 fields, got {}",
69                fields.len()
70            )));
71        }
72        out.push(CommitEntry {
73            sha: fields[0].to_string(),
74            short_sha: fields[1].to_string(),
75            author_name: fields[2].to_string(),
76            author_email: fields[3].to_string(),
77            author_date: fields[4].to_string(),
78            committer_name: fields[5].to_string(),
79            committer_email: fields[6].to_string(),
80            committer_date: fields[7].to_string(),
81            subject: fields[8].to_string(),
82            body: fields[9..].join("\x1f"),
83        });
84    }
85    Ok(out)
86}
87
88#[cfg(test)]
89mod tests {
90    use super::*;
91
92    #[test]
93    fn parses_single_commit() {
94        let input = "sha1\x1fsh\x1fAlice\x1fa@x\x1f2024-01-01T00:00:00Z\x1fBob\x1fb@y\x1f2024-01-02T00:00:00Z\x1fhello\x1fbody text\x1e";
95        let out = parse_log(input).unwrap();
96        assert_eq!(out.len(), 1);
97        assert_eq!(out[0].sha, "sha1");
98        assert_eq!(out[0].author_name, "Alice");
99        assert_eq!(out[0].body, "body text");
100    }
101
102    #[test]
103    fn parses_multiple_commits() {
104        let input = concat!(
105            "a\x1fa\x1fA\x1fa@x\x1fd1\x1fB\x1fb@y\x1fd2\x1fone\x1f\x1e",
106            "b\x1fb\x1fA\x1fa@x\x1fd3\x1fB\x1fb@y\x1fd4\x1ftwo\x1f\x1e",
107        );
108        let out = parse_log(input).unwrap();
109        assert_eq!(out.len(), 2);
110        assert_eq!(out[0].subject, "one");
111        assert_eq!(out[1].subject, "two");
112    }
113
114    #[test]
115    fn empty_yields_no_commits() {
116        assert!(parse_log("").unwrap().is_empty());
117    }
118
119    #[test]
120    fn too_few_fields_errors() {
121        assert!(parse_log("a\x1fb\x1e").is_err());
122    }
123}