Skip to main content

gix_object/commit/
ref_iter.rs

1use std::{borrow::Cow, ops::Range};
2
3use bstr::BStr;
4use gix_hash::{ObjectId, oid};
5
6use crate::{
7    CommitRefIter,
8    bstr::ByteSlice,
9    commit::{SIGNATURE_FIELD_NAME, SignedData, decode},
10    parse,
11};
12
13#[derive(Copy, Clone)]
14pub(crate) enum SignatureKind {
15    Author,
16    Committer,
17}
18
19#[derive(Default, Copy, Clone)]
20pub(crate) enum State {
21    #[default]
22    Tree,
23    Parents,
24    Signature {
25        of: SignatureKind,
26    },
27    Encoding,
28    ExtraHeaders,
29    Message,
30}
31
32/// Lifecycle
33impl<'a> CommitRefIter<'a> {
34    /// Create a commit iterator from the given `data`, using `object_hash` to know
35    /// what kind of hash to expect for validation.
36    pub fn from_bytes(data: &'a [u8], hash_kind: gix_hash::Kind) -> CommitRefIter<'a> {
37        CommitRefIter {
38            data,
39            state: State::default(),
40            hash_kind,
41        }
42    }
43}
44
45/// Access
46impl<'a> CommitRefIter<'a> {
47    /// Parse `data` as commit and return its PGP signature, along with *all non-signature* data as [`SignedData`], or `None`
48    /// if the commit isn't signed. All hashes in `data` are parsed as `object_hash`.
49    ///
50    /// This allows the caller to validate the signature by passing the signed data along with the signature back to the program
51    /// that created it.
52    pub fn signature(
53        data: &'a [u8],
54        hash_kind: gix_hash::Kind,
55    ) -> Result<Option<(Cow<'a, BStr>, SignedData<'a>)>, crate::decode::Error> {
56        let mut signature_and_range = None;
57
58        let raw_tokens = CommitRefIterRaw {
59            data,
60            state: State::default(),
61            offset: 0,
62            hash_kind,
63        };
64        for token in raw_tokens {
65            let token = token?;
66            if let Token::ExtraHeader((name, value)) = &token.token {
67                if *name == SIGNATURE_FIELD_NAME {
68                    // keep track of the signature range alongside the signature data,
69                    // because all but the signature is the signed data.
70                    signature_and_range = Some((value.clone(), token.token_range));
71                    break;
72                }
73            }
74        }
75
76        Ok(signature_and_range.map(|(sig, signature_range)| (sig, SignedData { data, signature_range })))
77    }
78
79    /// Returns the object id of this commits tree if it is the first function called and if there is no error in decoding
80    /// the data.
81    ///
82    /// Note that this method must only be called once or else will always return None while consuming a single token.
83    /// Errors are coerced into options, hiding whether there was an error or not. The caller should assume an error if they
84    /// call the method as intended. Such a squelched error cannot be recovered unless the objects data is retrieved and parsed again.
85    /// `next()`.
86    pub fn tree_id(&mut self) -> Result<ObjectId, crate::decode::Error> {
87        let tree_id = self.next().ok_or_else(missing_field)??;
88        Token::try_into_id(tree_id).ok_or_else(missing_field)
89    }
90
91    /// Return all `parent_ids` as iterator.
92    ///
93    /// Parsing errors are ignored quietly.
94    pub fn parent_ids(self) -> impl Iterator<Item = gix_hash::ObjectId> + 'a {
95        self.filter_map(|t| match t {
96            Ok(Token::Parent { id }) => Some(id),
97            _ => None,
98        })
99    }
100
101    /// Returns all signatures, first the author, then the committer, if there is no decoding error.
102    ///
103    /// Errors are coerced into options, hiding whether there was an error or not. The caller knows if there was an error or not
104    /// if not exactly two signatures were iterable.
105    /// Errors are not the common case - if an error needs to be detectable, use this instance as iterator.
106    pub fn signatures(self) -> impl Iterator<Item = gix_actor::SignatureRef<'a>> + 'a {
107        self.filter_map(|t| match t {
108            Ok(Token::Author { signature } | Token::Committer { signature }) => Some(signature),
109            _ => None,
110        })
111    }
112
113    /// Returns the committer signature if there is no decoding error.
114    pub fn committer(mut self) -> Result<gix_actor::SignatureRef<'a>, crate::decode::Error> {
115        self.find_map(|t| match t {
116            Ok(Token::Committer { signature }) => Some(Ok(signature)),
117            Err(err) => Some(Err(err)),
118            _ => None,
119        })
120        .ok_or_else(missing_field)?
121    }
122
123    /// Returns the author signature if there is no decoding error.
124    ///
125    /// It may contain white space surrounding it, and is exactly as parsed.
126    pub fn author(mut self) -> Result<gix_actor::SignatureRef<'a>, crate::decode::Error> {
127        self.find_map(|t| match t {
128            Ok(Token::Author { signature }) => Some(Ok(signature)),
129            Err(err) => Some(Err(err)),
130            _ => None,
131        })
132        .ok_or_else(missing_field)?
133    }
134
135    /// Returns the message if there is no decoding error.
136    ///
137    /// It may contain white space surrounding it, and is exactly as
138    //  parsed.
139    pub fn message(mut self) -> Result<&'a BStr, crate::decode::Error> {
140        self.find_map(|t| match t {
141            Ok(Token::Message(msg)) => Some(Ok(msg)),
142            Err(err) => Some(Err(err)),
143            _ => None,
144        })
145        .transpose()
146        .map(Option::unwrap_or_default)
147    }
148}
149
150fn missing_field() -> crate::decode::Error {
151    crate::decode::empty_error()
152}
153
154impl<'a> CommitRefIter<'a> {
155    #[inline]
156    fn next_inner(
157        mut i: &'a [u8],
158        state: &mut State,
159        hash_kind: gix_hash::Kind,
160    ) -> Result<(&'a [u8], Token<'a>), crate::decode::Error> {
161        let input = &mut i;
162        match Self::next_inner_(input, state, hash_kind) {
163            Ok(token) => Ok((*input, token)),
164            Err(err) => Err(err),
165        }
166    }
167
168    fn next_inner_(
169        input: &mut &'a [u8],
170        state: &mut State,
171        hash_kind: gix_hash::Kind,
172    ) -> Result<Token<'a>, crate::decode::Error> {
173        use State::*;
174        Ok(match state {
175            Tree => {
176                let tree = parse::header_field(input, b"tree", |value| parse::hex_hash(value, hash_kind))?;
177                *state = State::Parents;
178                Token::Tree {
179                    id: ObjectId::from_hex(tree).expect("parsing validation"),
180                }
181            }
182            Parents => {
183                if input.starts_with(b"parent ") {
184                    let parent = parse::header_field(input, b"parent", |value| parse::hex_hash(value, hash_kind))?;
185                    Token::Parent {
186                        id: ObjectId::from_hex(parent).expect("parsing validation"),
187                    }
188                } else {
189                    *state = State::Signature {
190                        of: SignatureKind::Author,
191                    };
192                    Self::next_inner_(input, state, hash_kind)?
193                }
194            }
195            Signature { of } => {
196                let who = *of;
197                let field_name = match of {
198                    SignatureKind::Author => {
199                        *of = SignatureKind::Committer;
200                        &b"author"[..]
201                    }
202                    SignatureKind::Committer => {
203                        *state = State::Encoding;
204                        &b"committer"[..]
205                    }
206                };
207                let signature = parse::header_field(input, field_name, parse::signature)?;
208                match who {
209                    SignatureKind::Author => Token::Author { signature },
210                    SignatureKind::Committer => Token::Committer { signature },
211                }
212            }
213            Encoding => {
214                *state = State::ExtraHeaders;
215                if input.starts_with(b"encoding ") {
216                    let encoding = parse::header_field(input, b"encoding", Ok)?;
217                    Token::Encoding(encoding.as_bstr())
218                } else {
219                    Self::next_inner_(input, state, hash_kind)?
220                }
221            }
222            ExtraHeaders => {
223                if input.starts_with(b"\n") {
224                    *state = State::Message;
225                    Self::next_inner_(input, state, hash_kind)?
226                } else {
227                    let before = *input;
228                    match parse::any_header_field_multi_line(input)
229                        .map(|(k, o)| (k.as_bstr(), Cow::Owned(o)))
230                        .or_else(|_| {
231                            *input = before;
232                            parse::any_header_field(input).map(|(k, o)| (k.as_bstr(), Cow::Borrowed(o.as_bstr())))
233                        }) {
234                        Ok(extra_header) => Token::ExtraHeader(extra_header),
235                        Err(err) => return Err(err),
236                    }
237                }
238            }
239            Message => {
240                let message = decode::message(input)?;
241                debug_assert!(
242                    input.is_empty(),
243                    "we should have consumed all data - otherwise iter may go forever"
244                );
245                Token::Message(message)
246            }
247        })
248    }
249}
250
251impl<'a> Iterator for CommitRefIter<'a> {
252    type Item = Result<Token<'a>, crate::decode::Error>;
253
254    fn next(&mut self) -> Option<Self::Item> {
255        if self.data.is_empty() {
256            return None;
257        }
258        match Self::next_inner(self.data, &mut self.state, self.hash_kind) {
259            Ok((data, token)) => {
260                self.data = data;
261                Some(Ok(token))
262            }
263            Err(err) => {
264                self.data = &[];
265                Some(Err(err))
266            }
267        }
268    }
269}
270
271/// A variation of [`CommitRefIter`] that return's [`RawToken`]s instead.
272struct CommitRefIterRaw<'a> {
273    data: &'a [u8],
274    state: State,
275    offset: usize,
276    hash_kind: gix_hash::Kind,
277}
278
279impl<'a> Iterator for CommitRefIterRaw<'a> {
280    type Item = Result<RawToken<'a>, crate::decode::Error>;
281
282    fn next(&mut self) -> Option<Self::Item> {
283        if self.data.is_empty() {
284            return None;
285        }
286        match CommitRefIter::next_inner(self.data, &mut self.state, self.hash_kind) {
287            Ok((remaining, token)) => {
288                let consumed = self.data.len() - remaining.len();
289                let start = self.offset;
290                let end = start + consumed;
291                self.offset = end;
292
293                self.data = remaining;
294                Some(Ok(RawToken {
295                    token,
296                    token_range: start..end,
297                }))
298            }
299            Err(err) => {
300                self.data = &[];
301                Some(Err(err))
302            }
303        }
304    }
305}
306
307/// A combination of a parsed [`Token`] as well as the range of bytes that were consumed to parse it.
308struct RawToken<'a> {
309    /// The parsed token.
310    token: Token<'a>,
311    token_range: Range<usize>,
312}
313
314/// A token returned by the [commit iterator][CommitRefIter].
315#[allow(missing_docs)]
316#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
317pub enum Token<'a> {
318    Tree {
319        id: ObjectId,
320    },
321    Parent {
322        id: ObjectId,
323    },
324    /// A person who authored the content of the commit.
325    Author {
326        signature: gix_actor::SignatureRef<'a>,
327    },
328    /// A person who committed the authors work to the repository.
329    Committer {
330        signature: gix_actor::SignatureRef<'a>,
331    },
332    Encoding(&'a BStr),
333    ExtraHeader((&'a BStr, Cow<'a, BStr>)),
334    Message(&'a BStr),
335}
336
337impl Token<'_> {
338    /// Return the object id of this token if it's a [tree][Token::Tree] or a [parent commit][Token::Parent].
339    pub fn id(&self) -> Option<&oid> {
340        match self {
341            Token::Tree { id } | Token::Parent { id } => Some(id.as_ref()),
342            _ => None,
343        }
344    }
345
346    /// Return the owned object id of this token if it's a [tree][Token::Tree] or a [parent commit][Token::Parent].
347    pub fn try_into_id(self) -> Option<ObjectId> {
348        match self {
349            Token::Tree { id } | Token::Parent { id } => Some(id),
350            _ => None,
351        }
352    }
353}