gix_object/commit/
ref_iter.rs

1use std::{borrow::Cow, ops::Range};
2
3use bstr::BStr;
4use gix_hash::{oid, ObjectId};
5use winnow::{
6    combinator::{alt, eof, opt, terminated},
7    error::StrContext,
8    prelude::*,
9    token::take_till,
10};
11
12use crate::{
13    bstr::ByteSlice,
14    commit::{decode, SignedData, SIGNATURE_FIELD_NAME},
15    parse::{self, NL},
16    CommitRefIter,
17};
18
19#[derive(Copy, Clone)]
20pub(crate) enum SignatureKind {
21    Author,
22    Committer,
23}
24
25#[derive(Default, Copy, Clone)]
26pub(crate) enum State {
27    #[default]
28    Tree,
29    Parents,
30    Signature {
31        of: SignatureKind,
32    },
33    Encoding,
34    ExtraHeaders,
35    Message,
36}
37
38/// Lifecycle
39impl<'a> CommitRefIter<'a> {
40    /// Create a commit iterator from data.
41    pub fn from_bytes(data: &'a [u8]) -> CommitRefIter<'a> {
42        CommitRefIter {
43            data,
44            state: State::default(),
45        }
46    }
47}
48
49/// Access
50impl<'a> CommitRefIter<'a> {
51    /// Parse `data` as commit and return its PGP signature, along with *all non-signature* data as [`SignedData`], or `None`
52    /// if the commit isn't signed.
53    ///
54    /// This allows the caller to validate the signature by passing the signed data along with the signature back to the program
55    /// that created it.
56    pub fn signature(data: &'a [u8]) -> Result<Option<(Cow<'a, BStr>, SignedData<'a>)>, crate::decode::Error> {
57        let mut signature_and_range = None;
58
59        let raw_tokens = CommitRefIterRaw {
60            data,
61            state: State::default(),
62            offset: 0,
63        };
64        for token in raw_tokens {
65            let token = token?;
66            if let Token::ExtraHeader((name, value)) = &token.token {
67                if *name == SIGNATURE_FIELD_NAME {
68                    // keep track of the signature range alongside the signature data,
69                    // because all but the signature is the signed data.
70                    signature_and_range = Some((value.clone(), token.token_range));
71                    break;
72                }
73            }
74        }
75
76        Ok(signature_and_range.map(|(sig, signature_range)| (sig, SignedData { data, signature_range })))
77    }
78
79    /// Returns the object id of this commits tree if it is the first function called and if there is no error in decoding
80    /// the data.
81    ///
82    /// Note that this method must only be called once or else will always return None while consuming a single token.
83    /// Errors are coerced into options, hiding whether there was an error or not. The caller should assume an error if they
84    /// call the method as intended. Such a squelched error cannot be recovered unless the objects data is retrieved and parsed again.
85    /// `next()`.
86    pub fn tree_id(&mut self) -> Result<ObjectId, crate::decode::Error> {
87        let tree_id = self.next().ok_or_else(missing_field)??;
88        Token::try_into_id(tree_id).ok_or_else(missing_field)
89    }
90
91    /// Return all `parent_ids` as iterator.
92    ///
93    /// Parsing errors are ignored quietly.
94    pub fn parent_ids(self) -> impl Iterator<Item = gix_hash::ObjectId> + 'a {
95        self.filter_map(|t| match t {
96            Ok(Token::Parent { id }) => Some(id),
97            _ => None,
98        })
99    }
100
101    /// Returns all signatures, first the author, then the committer, if there is no decoding error.
102    ///
103    /// Errors are coerced into options, hiding whether there was an error or not. The caller knows if there was an error or not
104    /// if not exactly two signatures were iterable.
105    /// Errors are not the common case - if an error needs to be detectable, use this instance as iterator.
106    pub fn signatures(self) -> impl Iterator<Item = gix_actor::SignatureRef<'a>> + 'a {
107        self.filter_map(|t| match t {
108            Ok(Token::Author { signature } | Token::Committer { signature }) => Some(signature),
109            _ => None,
110        })
111    }
112
113    /// Returns the committer signature if there is no decoding error.
114    pub fn committer(mut self) -> Result<gix_actor::SignatureRef<'a>, crate::decode::Error> {
115        self.find_map(|t| match t {
116            Ok(Token::Committer { signature }) => Some(Ok(signature)),
117            Err(err) => Some(Err(err)),
118            _ => None,
119        })
120        .ok_or_else(missing_field)?
121    }
122
123    /// Returns the author signature if there is no decoding error.
124    ///
125    /// It may contain white space surrounding it, and is exactly as parsed.
126    pub fn author(mut self) -> Result<gix_actor::SignatureRef<'a>, crate::decode::Error> {
127        self.find_map(|t| match t {
128            Ok(Token::Author { signature }) => Some(Ok(signature)),
129            Err(err) => Some(Err(err)),
130            _ => None,
131        })
132        .ok_or_else(missing_field)?
133    }
134
135    /// Returns the message if there is no decoding error.
136    ///
137    /// It may contain white space surrounding it, and is exactly as
138    //  parsed.
139    pub fn message(mut self) -> Result<&'a BStr, crate::decode::Error> {
140        self.find_map(|t| match t {
141            Ok(Token::Message(msg)) => Some(Ok(msg)),
142            Err(err) => Some(Err(err)),
143            _ => None,
144        })
145        .transpose()
146        .map(Option::unwrap_or_default)
147    }
148}
149
150fn missing_field() -> crate::decode::Error {
151    crate::decode::empty_error()
152}
153
154impl<'a> CommitRefIter<'a> {
155    #[inline]
156    fn next_inner(mut i: &'a [u8], state: &mut State) -> Result<(&'a [u8], Token<'a>), crate::decode::Error> {
157        let input = &mut i;
158        match Self::next_inner_(input, state) {
159            Ok(token) => Ok((*input, token)),
160            Err(err) => Err(crate::decode::Error::with_err(err, input)),
161        }
162    }
163
164    fn next_inner_(
165        input: &mut &'a [u8],
166        state: &mut State,
167    ) -> Result<Token<'a>, winnow::error::ErrMode<crate::decode::ParseError>> {
168        use State::*;
169        Ok(match state {
170            Tree => {
171                let tree = (|i: &mut _| parse::header_field(i, b"tree", parse::hex_hash))
172                    .context(StrContext::Expected("tree <40 lowercase hex char>".into()))
173                    .parse_next(input)?;
174                *state = State::Parents;
175                Token::Tree {
176                    id: ObjectId::from_hex(tree).expect("parsing validation"),
177                }
178            }
179            Parents => {
180                let parent = opt(|i: &mut _| parse::header_field(i, b"parent", parse::hex_hash))
181                    .context(StrContext::Expected("commit <40 lowercase hex char>".into()))
182                    .parse_next(input)?;
183                match parent {
184                    Some(parent) => Token::Parent {
185                        id: ObjectId::from_hex(parent).expect("parsing validation"),
186                    },
187                    None => {
188                        *state = State::Signature {
189                            of: SignatureKind::Author,
190                        };
191                        Self::next_inner_(input, state)?
192                    }
193                }
194            }
195            Signature { ref mut of } => {
196                let who = *of;
197                let (field_name, err_msg) = match of {
198                    SignatureKind::Author => {
199                        *of = SignatureKind::Committer;
200                        (&b"author"[..], "author <signature>")
201                    }
202                    SignatureKind::Committer => {
203                        *state = State::Encoding;
204                        (&b"committer"[..], "committer <signature>")
205                    }
206                };
207                let signature = (|i: &mut _| parse::header_field(i, field_name, parse::signature))
208                    .context(StrContext::Expected(err_msg.into()))
209                    .parse_next(input)?;
210                match who {
211                    SignatureKind::Author => Token::Author { signature },
212                    SignatureKind::Committer => Token::Committer { signature },
213                }
214            }
215            Encoding => {
216                let encoding = opt(|i: &mut _| parse::header_field(i, b"encoding", take_till(0.., NL)))
217                    .context(StrContext::Expected("encoding <encoding>".into()))
218                    .parse_next(input)?;
219                *state = State::ExtraHeaders;
220                match encoding {
221                    Some(encoding) => Token::Encoding(encoding.as_bstr()),
222                    None => Self::next_inner_(input, state)?,
223                }
224            }
225            ExtraHeaders => {
226                let extra_header = opt(alt((
227                    |i: &mut _| parse::any_header_field_multi_line(i).map(|(k, o)| (k.as_bstr(), Cow::Owned(o))),
228                    |i: &mut _| {
229                        parse::any_header_field(i, take_till(0.., NL))
230                            .map(|(k, o)| (k.as_bstr(), Cow::Borrowed(o.as_bstr())))
231                    },
232                )))
233                .context(StrContext::Expected("<field> <single-line|multi-line>".into()))
234                .parse_next(input)?;
235                match extra_header {
236                    Some(extra_header) => Token::ExtraHeader(extra_header),
237                    None => {
238                        *state = State::Message;
239                        Self::next_inner_(input, state)?
240                    }
241                }
242            }
243            Message => {
244                let message = terminated(decode::message, eof).parse_next(input)?;
245                debug_assert!(
246                    input.is_empty(),
247                    "we should have consumed all data - otherwise iter may go forever"
248                );
249                Token::Message(message)
250            }
251        })
252    }
253}
254
255impl<'a> Iterator for CommitRefIter<'a> {
256    type Item = Result<Token<'a>, crate::decode::Error>;
257
258    fn next(&mut self) -> Option<Self::Item> {
259        if self.data.is_empty() {
260            return None;
261        }
262        match Self::next_inner(self.data, &mut self.state) {
263            Ok((data, token)) => {
264                self.data = data;
265                Some(Ok(token))
266            }
267            Err(err) => {
268                self.data = &[];
269                Some(Err(err))
270            }
271        }
272    }
273}
274
275/// A variation of [`CommitRefIter`] that return's [`RawToken`]s instead.
276struct CommitRefIterRaw<'a> {
277    data: &'a [u8],
278    state: State,
279    offset: usize,
280}
281
282impl<'a> Iterator for CommitRefIterRaw<'a> {
283    type Item = Result<RawToken<'a>, crate::decode::Error>;
284
285    fn next(&mut self) -> Option<Self::Item> {
286        if self.data.is_empty() {
287            return None;
288        }
289        match CommitRefIter::next_inner(self.data, &mut self.state) {
290            Ok((remaining, token)) => {
291                let consumed = self.data.len() - remaining.len();
292                let start = self.offset;
293                let end = start + consumed;
294                self.offset = end;
295
296                self.data = remaining;
297                Some(Ok(RawToken {
298                    token,
299                    token_range: start..end,
300                }))
301            }
302            Err(err) => {
303                self.data = &[];
304                Some(Err(err))
305            }
306        }
307    }
308}
309
310/// A combination of a parsed [`Token`] as well as the range of bytes that were consumed to parse it.
311struct RawToken<'a> {
312    /// The parsed token.
313    token: Token<'a>,
314    token_range: Range<usize>,
315}
316
317/// A token returned by the [commit iterator][CommitRefIter].
318#[allow(missing_docs)]
319#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
320pub enum Token<'a> {
321    Tree {
322        id: ObjectId,
323    },
324    Parent {
325        id: ObjectId,
326    },
327    /// A person who authored the content of the commit.
328    Author {
329        signature: gix_actor::SignatureRef<'a>,
330    },
331    /// A person who committed the authors work to the repository.
332    Committer {
333        signature: gix_actor::SignatureRef<'a>,
334    },
335    Encoding(&'a BStr),
336    ExtraHeader((&'a BStr, Cow<'a, BStr>)),
337    Message(&'a BStr),
338}
339
340impl Token<'_> {
341    /// Return the object id of this token if it's a [tree][Token::Tree] or a [parent commit][Token::Parent].
342    pub fn id(&self) -> Option<&oid> {
343        match self {
344            Token::Tree { id } | Token::Parent { id } => Some(id.as_ref()),
345            _ => None,
346        }
347    }
348
349    /// Return the owned object id of this token if it's a [tree][Token::Tree] or a [parent commit][Token::Parent].
350    pub fn try_into_id(self) -> Option<ObjectId> {
351        match self {
352            Token::Tree { id } | Token::Parent { id } => Some(id),
353            _ => None,
354        }
355    }
356}