git_object/commit/
ref_iter.rs

1use std::borrow::Cow;
2
3use bstr::BStr;
4use git_hash::{oid, ObjectId};
5use nom::{
6    branch::alt,
7    bytes::complete::is_not,
8    combinator::{all_consuming, opt},
9    error::context,
10};
11
12use crate::{bstr::ByteSlice, commit::decode, parse, parse::NL, CommitRefIter};
13
14#[derive(Copy, Clone)]
15pub(crate) enum SignatureKind {
16    Author,
17    Committer,
18}
19
20#[derive(Copy, Clone)]
21pub(crate) enum State {
22    Tree,
23    Parents,
24    Signature { of: SignatureKind },
25    Encoding,
26    ExtraHeaders,
27    Message,
28}
29
30impl Default for State {
31    fn default() -> Self {
32        State::Tree
33    }
34}
35
36impl<'a> CommitRefIter<'a> {
37    /// Create a commit iterator from data.
38    pub fn from_bytes(data: &'a [u8]) -> CommitRefIter<'a> {
39        CommitRefIter {
40            data,
41            state: State::default(),
42        }
43    }
44
45    /// Returns the object id of this commits tree if it is the first function called and if there is no error in decoding
46    /// the data.
47    ///
48    /// Note that this method must only be called once or else will always return None while consuming a single token.
49    /// Errors are coerced into options, hiding whether there was an error or not. The caller should assume an error if they
50    /// call the method as intended. Such a squelched error cannot be recovered unless the objects data is retrieved and parsed again.
51    /// `next()`.
52    pub fn tree_id(&mut self) -> Result<ObjectId, crate::decode::Error> {
53        let tree_id = self.next().ok_or_else(missing_field)??;
54        Token::try_into_id(tree_id).ok_or_else(missing_field)
55    }
56
57    /// Return all parent_ids as iterator.
58    ///
59    /// Parsing errors are ignored quietly.
60    pub fn parent_ids(self) -> impl Iterator<Item = git_hash::ObjectId> + 'a {
61        self.filter_map(|t| match t {
62            Ok(Token::Parent { id }) => Some(id),
63            _ => None,
64        })
65    }
66
67    /// Returns all signatures, first the author, then the committer, if there is no decoding error.
68    ///
69    /// Errors are coerced into options, hiding whether there was an error or not. The caller knows if there was an error or not
70    /// if not exactly two signatures were iterable.
71    /// Errors are not the common case - if an error needs to be detectable, use this instance as iterator.
72    pub fn signatures(self) -> impl Iterator<Item = git_actor::SignatureRef<'a>> + 'a {
73        self.filter_map(|t| match t {
74            Ok(Token::Author { signature }) | Ok(Token::Committer { signature }) => Some(signature),
75            _ => None,
76        })
77    }
78
79    /// Returns the committer signature if there is no decoding error.
80    pub fn committer(mut self) -> Result<git_actor::SignatureRef<'a>, crate::decode::Error> {
81        self.find_map(|t| match t {
82            Ok(Token::Committer { signature }) => Some(Ok(signature)),
83            Err(err) => Some(Err(err)),
84            _ => None,
85        })
86        .ok_or_else(missing_field)?
87    }
88
89    /// Returns the author signature if there is no decoding error.
90    ///
91    /// It may contain white space surrounding it, and is exactly as parsed.
92    pub fn author(mut self) -> Result<git_actor::SignatureRef<'a>, crate::decode::Error> {
93        self.find_map(|t| match t {
94            Ok(Token::Author { signature }) => Some(Ok(signature)),
95            Err(err) => Some(Err(err)),
96            _ => None,
97        })
98        .ok_or_else(missing_field)?
99    }
100
101    /// Returns the message if there is no decoding error.
102    ///
103    /// It may contain white space surrounding it, and is exactly as
104    //  parsed.
105    pub fn message(mut self) -> Result<&'a BStr, crate::decode::Error> {
106        self.find_map(|t| match t {
107            Ok(Token::Message(msg)) => Some(Ok(msg)),
108            Err(err) => Some(Err(err)),
109            _ => None,
110        })
111        .transpose()
112        .map(|msg| msg.unwrap_or_default())
113    }
114}
115
116fn missing_field() -> crate::decode::Error {
117    crate::decode::empty_error()
118}
119
120impl<'a> CommitRefIter<'a> {
121    fn next_inner(i: &'a [u8], state: &mut State) -> Result<(&'a [u8], Token<'a>), crate::decode::Error> {
122        use State::*;
123        Ok(match state {
124            Tree => {
125                let (i, tree) = context("tree <40 lowercase hex char>", |i| {
126                    parse::header_field(i, b"tree", parse::hex_hash)
127                })(i)?;
128                *state = State::Parents;
129                (
130                    i,
131                    Token::Tree {
132                        id: ObjectId::from_hex(tree).expect("parsing validation"),
133                    },
134                )
135            }
136            Parents => {
137                let (i, parent) = context(
138                    "commit <40 lowercase hex char>",
139                    opt(|i| parse::header_field(i, b"parent", parse::hex_hash)),
140                )(i)?;
141                match parent {
142                    Some(parent) => (
143                        i,
144                        Token::Parent {
145                            id: ObjectId::from_hex(parent).expect("parsing validation"),
146                        },
147                    ),
148                    None => {
149                        *state = State::Signature {
150                            of: SignatureKind::Author,
151                        };
152                        return Self::next_inner(i, state);
153                    }
154                }
155            }
156            Signature { ref mut of } => {
157                let who = *of;
158                let (field_name, err_msg) = match of {
159                    SignatureKind::Author => {
160                        *of = SignatureKind::Committer;
161                        (&b"author"[..], "author <signature>")
162                    }
163                    SignatureKind::Committer => {
164                        *state = State::Encoding;
165                        (&b"committer"[..], "committer <signature>")
166                    }
167                };
168                let (i, signature) = context(err_msg, |i| parse::header_field(i, field_name, parse::signature))(i)?;
169                (
170                    i,
171                    match who {
172                        SignatureKind::Author => Token::Author { signature },
173                        SignatureKind::Committer => Token::Committer { signature },
174                    },
175                )
176            }
177            Encoding => {
178                let (i, encoding) = context(
179                    "encoding <encoding>",
180                    opt(|i| parse::header_field(i, b"encoding", is_not(NL))),
181                )(i)?;
182                *state = State::ExtraHeaders;
183                match encoding {
184                    Some(encoding) => (i, Token::Encoding(encoding.as_bstr())),
185                    None => return Self::next_inner(i, state),
186                }
187            }
188            ExtraHeaders => {
189                let (i, extra_header) = context(
190                    "<field> <single-line|multi-line>",
191                    opt(alt((
192                        |i| parse::any_header_field_multi_line(i).map(|(i, (k, o))| (i, (k.as_bstr(), Cow::Owned(o)))),
193                        |i| {
194                            parse::any_header_field(i, is_not(NL))
195                                .map(|(i, (k, o))| (i, (k.as_bstr(), Cow::Borrowed(o.as_bstr()))))
196                        },
197                    ))),
198                )(i)?;
199                match extra_header {
200                    Some(extra_header) => (i, Token::ExtraHeader(extra_header)),
201                    None => {
202                        *state = State::Message;
203                        return Self::next_inner(i, state);
204                    }
205                }
206            }
207            Message => {
208                let (i, message) = all_consuming(decode::message)(i)?;
209                debug_assert!(
210                    i.is_empty(),
211                    "we should have consumed all data - otherwise iter may go forever"
212                );
213                return Ok((i, Token::Message(message)));
214            }
215        })
216    }
217}
218
219impl<'a> Iterator for CommitRefIter<'a> {
220    type Item = Result<Token<'a>, crate::decode::Error>;
221
222    fn next(&mut self) -> Option<Self::Item> {
223        if self.data.is_empty() {
224            return None;
225        }
226        match Self::next_inner(self.data, &mut self.state) {
227            Ok((data, token)) => {
228                self.data = data;
229                Some(Ok(token))
230            }
231            Err(err) => {
232                self.data = &[];
233                Some(Err(err))
234            }
235        }
236    }
237}
238
239/// A token returned by the [commit iterator][CommitRefIter].
240#[allow(missing_docs)]
241#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
242pub enum Token<'a> {
243    Tree {
244        id: ObjectId,
245    },
246    Parent {
247        id: ObjectId,
248    },
249    /// A person who authored the content of the commit.
250    Author {
251        signature: git_actor::SignatureRef<'a>,
252    },
253    /// A person who committed the authors work to the repository.
254    Committer {
255        signature: git_actor::SignatureRef<'a>,
256    },
257    Encoding(&'a BStr),
258    ExtraHeader((&'a BStr, Cow<'a, BStr>)),
259    Message(&'a BStr),
260}
261
262impl<'a> Token<'a> {
263    /// Return the object id of this token if its a [tree][Token::Tree] or a [parent commit][Token::Parent].
264    pub fn id(&self) -> Option<&oid> {
265        match self {
266            Token::Tree { id } | Token::Parent { id } => Some(id.as_ref()),
267            _ => None,
268        }
269    }
270
271    /// Return the owned object id of this token if its a [tree][Token::Tree] or a [parent commit][Token::Parent].
272    pub fn try_into_id(self) -> Option<ObjectId> {
273        match self {
274            Token::Tree { id } | Token::Parent { id } => Some(id),
275            _ => None,
276        }
277    }
278}