Skip to main content

radicle_git_metadata/commit/
parse.rs

1#[cfg(test)]
2mod test;
3
4use std::borrow::Cow;
5
6use crate::author::Author;
7
8use super::{
9    headers::Headers,
10    trailers::{OwnedTrailer, Token, Trailer},
11    CommitData,
12};
13
14#[derive(Debug, thiserror::Error)]
15pub enum ParseError {
16    #[error("the provided commit data contained invalid UTF-8")]
17    Utf8(#[source] std::str::Utf8Error),
18    #[error("the commit header is missing the 'tree' entry")]
19    MissingTree,
20    #[error("failed to parse 'tree' value: {0}")]
21    InvalidTree(#[source] Box<dyn std::error::Error + Send + Sync + 'static>),
22    #[error("invalid format: {reason}")]
23    InvalidFormat { reason: &'static str },
24    #[error("failed to parse 'parent' value: {0}")]
25    InvalidParent(#[source] Box<dyn std::error::Error + Send + Sync + 'static>),
26    #[error("invalid header")]
27    InvalidHeader,
28    #[error("failed to parse 'author' value: {0}")]
29    InvalidAuthor(#[source] Box<dyn std::error::Error + Send + Sync + 'static>),
30    #[error("the commit header is missing the 'author' entry")]
31    MissingAuthor,
32    #[error("failed to parse 'committer' value: {0}")]
33    InvalidCommitter(#[source] Box<dyn std::error::Error + Send + Sync + 'static>),
34    #[error("the commit header is missing the 'committer' entry")]
35    MissingCommitter,
36}
37
38pub(super) fn parse<Tree: std::str::FromStr, Parent: std::str::FromStr>(
39    commit: &str,
40) -> Result<CommitData<Tree, Parent>, ParseError>
41where
42    Tree::Err: std::error::Error + Send + Sync + 'static,
43    Parent::Err: std::error::Error + Send + Sync + 'static,
44{
45    // The header and body are separated by the first blank line.
46    let (header, body) = commit.split_once("\n\n").ok_or(ParseError::InvalidFormat {
47        reason: "commit headers and body must be separated by a blank line",
48    })?;
49
50    let (tree, parents, author, committer, headers) =
51        parse_headers::<Tree, Parent, Author>(header)?;
52
53    let (message, trailers) = parse_body(body);
54
55    Ok(CommitData {
56        tree,
57        parents,
58        author,
59        committer,
60        headers,
61        message,
62        trailers,
63    })
64}
65
66fn parse_headers<Tree: std::str::FromStr, Parent: std::str::FromStr, Signature: std::str::FromStr>(
67    header: &str,
68) -> Result<(Tree, Vec<Parent>, Signature, Signature, Headers), ParseError>
69where
70    Tree::Err: std::error::Error + Send + Sync + 'static,
71    Parent::Err: std::error::Error + Send + Sync + 'static,
72    Signature::Err: std::error::Error + Send + Sync + 'static,
73{
74    let mut lines = header.lines();
75
76    let tree = lines
77        .next()
78        .ok_or(ParseError::MissingTree)?
79        .strip_prefix("tree ")
80        .map(Tree::from_str)
81        .transpose()
82        .map_err(|err| ParseError::InvalidTree(Box::new(err)))?
83        .ok_or(ParseError::MissingTree)?;
84
85    let mut parents = Vec::new();
86    let mut author: Option<Signature> = None;
87    let mut committer: Option<Signature> = None;
88    let mut headers = Headers::new();
89
90    for line in lines {
91        // Check if a signature is still being parsed
92        if let Some(rest) = line.strip_prefix(' ') {
93            let value: &mut String =
94                headers
95                    .0
96                    .last_mut()
97                    .map(|(_, v)| v)
98                    .ok_or(ParseError::InvalidFormat {
99                        reason: "failed to parse extra header",
100                    })?;
101            value.push('\n');
102            value.push_str(rest);
103            continue;
104        }
105
106        if let Some((name, value)) = line.split_once(' ') {
107            match name {
108                "parent" => parents.push(
109                    value
110                        .parse::<Parent>()
111                        .map_err(|err| ParseError::InvalidParent(Box::new(err)))?,
112                ),
113                "author" => {
114                    author = Some(
115                        value
116                            .parse::<Signature>()
117                            .map_err(|err| ParseError::InvalidAuthor(Box::new(err)))?,
118                    )
119                }
120                "committer" => {
121                    committer = Some(
122                        value
123                            .parse::<Signature>()
124                            .map_err(|err| ParseError::InvalidCommitter(Box::new(err)))?,
125                    )
126                }
127                _ => headers.push(name, value),
128            }
129            continue;
130        }
131    }
132
133    Ok((
134        tree,
135        parents,
136        author.ok_or(ParseError::MissingAuthor)?,
137        committer.ok_or(ParseError::MissingCommitter)?,
138        headers,
139    ))
140}
141
142/// Split the commit body (the portion after the first `\n\n` in the object)
143/// into a message string and a list of trailers.
144///
145/// Trailers are only separated out when the last paragraph of the body
146/// consists entirely of valid `Token: value` lines. If parsing the last
147/// paragraph as trailers fails for any line, the whole body is returned as
148/// the message with an empty trailer list.
149fn parse_body(body: &str) -> (String, Vec<OwnedTrailer>) {
150    // Strip the single trailing newline that Display always writes after the
151    // message, so that rfind("\n\n") reliably finds the trailer separator
152    // rather than a spurious match at the very end.
153    let body = body.trim_end_matches('\n');
154
155    if let Some(split) = body.rfind("\n\n") {
156        let candidate = &body[split + 2..];
157        // Only treat non-empty paragraphs as trailers.
158        if !candidate.trim().is_empty() {
159            if let Some(trailers) = try_parse_trailers(candidate) {
160                return (body[..split].to_string(), trailers);
161            }
162        }
163    }
164
165    (body.to_string(), Vec::new())
166}
167
168/// Attempt to parse every non-empty line in `s` as a `Token: value` trailer.
169///
170/// Returns `None` if any line is not a valid trailer, so that the caller can
171/// fall back to treating the whole paragraph as part of the message.
172fn try_parse_trailers(s: &str) -> Option<Vec<OwnedTrailer>> {
173    s.lines()
174        .filter(|l| !l.is_empty())
175        .map(|line| {
176            let (token_str, value) = line.split_once(": ")?;
177            let token = Token::try_from(token_str).ok()?;
178            // Round-trip through Trailer so that OwnedToken construction
179            // stays inside the trailers module where the private field lives.
180            Some(
181                Trailer {
182                    token,
183                    value: Cow::Borrowed(value),
184                }
185                .to_owned(),
186            )
187        })
188        .collect()
189}