Skip to main content

gix_object/commit/
ref_iter.rs

1use std::{borrow::Cow, ops::Range};
2
3use bstr::BStr;
4use gix_hash::{oid, ObjectId};
5
6use crate::{
7    bstr::ByteSlice,
8    commit::{decode, SignedData, SIGNATURE_FIELD_NAME},
9    parse, CommitRefIter,
10};
11
12#[derive(Copy, Clone)]
13pub(crate) enum SignatureKind {
14    Author,
15    Committer,
16}
17
18#[derive(Default, Copy, Clone)]
19pub(crate) enum State {
20    #[default]
21    Tree,
22    Parents,
23    Signature {
24        of: SignatureKind,
25    },
26    Encoding,
27    ExtraHeaders,
28    Message,
29}
30
31/// Lifecycle
32impl<'a> CommitRefIter<'a> {
33    /// Create a commit iterator from the given `data`, using `hash_kind` to know
34    /// what kind of hash to expect for validation.
35    pub fn from_bytes(data: &'a [u8], hash_kind: gix_hash::Kind) -> CommitRefIter<'a> {
36        CommitRefIter {
37            data,
38            state: State::default(),
39            hash_kind,
40        }
41    }
42}
43
44/// Access
45impl<'a> CommitRefIter<'a> {
46    /// Parse `data` as commit and return its PGP signature, along with *all non-signature* data as [`SignedData`], or `None`
47    /// if the commit isn't signed. All hashes in `data` are parsed as `hash_kind`.
48    ///
49    /// This allows the caller to validate the signature by passing the signed data along with the signature back to the program
50    /// that created it.
51    pub fn signature(
52        data: &'a [u8],
53        hash_kind: gix_hash::Kind,
54    ) -> Result<Option<(Cow<'a, BStr>, SignedData<'a>)>, crate::decode::Error> {
55        let mut signature_and_range = None;
56
57        let raw_tokens = CommitRefIterRaw {
58            data,
59            state: State::default(),
60            offset: 0,
61            hash_kind,
62        };
63        for token in raw_tokens {
64            let token = token?;
65            if let Token::ExtraHeader((name, value)) = &token.token {
66                if *name == SIGNATURE_FIELD_NAME {
67                    // keep track of the signature range alongside the signature data,
68                    // because all but the signature is the signed data.
69                    signature_and_range = Some((value.clone(), token.token_range));
70                    break;
71                }
72            }
73        }
74
75        Ok(signature_and_range.map(|(sig, signature_range)| (sig, SignedData { data, signature_range })))
76    }
77
78    /// Returns the object id of this commits tree if it is the first function called and if there is no error in decoding
79    /// the data.
80    ///
81    /// Note that this method must only be called once or else will always return None while consuming a single token.
82    /// Errors are coerced into options, hiding whether there was an error or not. The caller should assume an error if they
83    /// call the method as intended. Such a squelched error cannot be recovered unless the objects data is retrieved and parsed again.
84    /// `next()`.
85    pub fn tree_id(&mut self) -> Result<ObjectId, crate::decode::Error> {
86        let tree_id = self.next().ok_or_else(missing_field)??;
87        Token::try_into_id(tree_id).ok_or_else(missing_field)
88    }
89
90    /// Return all `parent_ids` as iterator.
91    ///
92    /// Parsing errors are ignored quietly.
93    pub fn parent_ids(self) -> impl Iterator<Item = gix_hash::ObjectId> + 'a {
94        self.filter_map(|t| match t {
95            Ok(Token::Parent { id }) => Some(id),
96            _ => None,
97        })
98    }
99
100    /// Returns all signatures, first the author, then the committer, if there is no decoding error.
101    ///
102    /// Errors are coerced into options, hiding whether there was an error or not. The caller knows if there was an error or not
103    /// if not exactly two signatures were iterable.
104    /// Errors are not the common case - if an error needs to be detectable, use this instance as iterator.
105    pub fn signatures(self) -> impl Iterator<Item = gix_actor::SignatureRef<'a>> + 'a {
106        self.filter_map(|t| match t {
107            Ok(Token::Author { signature } | Token::Committer { signature }) => Some(signature),
108            _ => None,
109        })
110    }
111
112    /// Returns the committer signature if there is no decoding error.
113    pub fn committer(mut self) -> Result<gix_actor::SignatureRef<'a>, crate::decode::Error> {
114        self.find_map(|t| match t {
115            Ok(Token::Committer { signature }) => Some(Ok(signature)),
116            Err(err) => Some(Err(err)),
117            _ => None,
118        })
119        .ok_or_else(missing_field)?
120    }
121
122    /// Returns the author signature if there is no decoding error.
123    ///
124    /// It may contain white space surrounding it, and is exactly as parsed.
125    pub fn author(mut self) -> Result<gix_actor::SignatureRef<'a>, crate::decode::Error> {
126        self.find_map(|t| match t {
127            Ok(Token::Author { signature }) => Some(Ok(signature)),
128            Err(err) => Some(Err(err)),
129            _ => None,
130        })
131        .ok_or_else(missing_field)?
132    }
133
134    /// Returns the message if there is no decoding error.
135    ///
136    /// It may contain white space surrounding it, and is exactly as
137    //  parsed.
138    pub fn message(mut self) -> Result<&'a BStr, crate::decode::Error> {
139        self.find_map(|t| match t {
140            Ok(Token::Message(msg)) => Some(Ok(msg)),
141            Err(err) => Some(Err(err)),
142            _ => None,
143        })
144        .transpose()
145        .map(Option::unwrap_or_default)
146    }
147}
148
149fn missing_field() -> crate::decode::Error {
150    crate::decode::empty_error()
151}
152
153impl<'a> CommitRefIter<'a> {
154    #[inline]
155    fn next_inner(
156        mut i: &'a [u8],
157        state: &mut State,
158        hash_kind: gix_hash::Kind,
159    ) -> Result<(&'a [u8], Token<'a>), crate::decode::Error> {
160        let input = &mut i;
161        match Self::next_inner_(input, state, hash_kind) {
162            Ok(token) => Ok((*input, token)),
163            Err(err) => Err(err),
164        }
165    }
166
167    fn next_inner_(
168        input: &mut &'a [u8],
169        state: &mut State,
170        hash_kind: gix_hash::Kind,
171    ) -> Result<Token<'a>, crate::decode::Error> {
172        use State::*;
173        Ok(match state {
174            Tree => {
175                let tree = parse::header_field(input, b"tree", |value| parse::hex_hash(value, hash_kind))?;
176                *state = State::Parents;
177                Token::Tree {
178                    id: ObjectId::from_hex(tree).expect("parsing validation"),
179                }
180            }
181            Parents => {
182                if input.starts_with(b"parent ") {
183                    let parent = parse::header_field(input, b"parent", |value| parse::hex_hash(value, hash_kind))?;
184                    Token::Parent {
185                        id: ObjectId::from_hex(parent).expect("parsing validation"),
186                    }
187                } else {
188                    *state = State::Signature {
189                        of: SignatureKind::Author,
190                    };
191                    Self::next_inner_(input, state, hash_kind)?
192                }
193            }
194            Signature { ref mut of } => {
195                let who = *of;
196                let field_name = match of {
197                    SignatureKind::Author => {
198                        *of = SignatureKind::Committer;
199                        &b"author"[..]
200                    }
201                    SignatureKind::Committer => {
202                        *state = State::Encoding;
203                        &b"committer"[..]
204                    }
205                };
206                let signature = parse::header_field(input, field_name, parse::signature)?;
207                match who {
208                    SignatureKind::Author => Token::Author { signature },
209                    SignatureKind::Committer => Token::Committer { signature },
210                }
211            }
212            Encoding => {
213                *state = State::ExtraHeaders;
214                if input.starts_with(b"encoding ") {
215                    let encoding = parse::header_field(input, b"encoding", Ok)?;
216                    Token::Encoding(encoding.as_bstr())
217                } else {
218                    Self::next_inner_(input, state, hash_kind)?
219                }
220            }
221            ExtraHeaders => {
222                if input.starts_with(b"\n") {
223                    *state = State::Message;
224                    Self::next_inner_(input, state, hash_kind)?
225                } else {
226                    let before = *input;
227                    match parse::any_header_field_multi_line(input)
228                        .map(|(k, o)| (k.as_bstr(), Cow::Owned(o)))
229                        .or_else(|_| {
230                            *input = before;
231                            parse::any_header_field(input).map(|(k, o)| (k.as_bstr(), Cow::Borrowed(o.as_bstr())))
232                        }) {
233                        Ok(extra_header) => Token::ExtraHeader(extra_header),
234                        Err(err) => return Err(err),
235                    }
236                }
237            }
238            Message => {
239                let message = decode::message(input)?;
240                debug_assert!(
241                    input.is_empty(),
242                    "we should have consumed all data - otherwise iter may go forever"
243                );
244                Token::Message(message)
245            }
246        })
247    }
248}
249
250impl<'a> Iterator for CommitRefIter<'a> {
251    type Item = Result<Token<'a>, crate::decode::Error>;
252
253    fn next(&mut self) -> Option<Self::Item> {
254        if self.data.is_empty() {
255            return None;
256        }
257        match Self::next_inner(self.data, &mut self.state, self.hash_kind) {
258            Ok((data, token)) => {
259                self.data = data;
260                Some(Ok(token))
261            }
262            Err(err) => {
263                self.data = &[];
264                Some(Err(err))
265            }
266        }
267    }
268}
269
270/// A variation of [`CommitRefIter`] that return's [`RawToken`]s instead.
271struct CommitRefIterRaw<'a> {
272    data: &'a [u8],
273    state: State,
274    offset: usize,
275    hash_kind: gix_hash::Kind,
276}
277
278impl<'a> Iterator for CommitRefIterRaw<'a> {
279    type Item = Result<RawToken<'a>, crate::decode::Error>;
280
281    fn next(&mut self) -> Option<Self::Item> {
282        if self.data.is_empty() {
283            return None;
284        }
285        match CommitRefIter::next_inner(self.data, &mut self.state, self.hash_kind) {
286            Ok((remaining, token)) => {
287                let consumed = self.data.len() - remaining.len();
288                let start = self.offset;
289                let end = start + consumed;
290                self.offset = end;
291
292                self.data = remaining;
293                Some(Ok(RawToken {
294                    token,
295                    token_range: start..end,
296                }))
297            }
298            Err(err) => {
299                self.data = &[];
300                Some(Err(err))
301            }
302        }
303    }
304}
305
306/// A combination of a parsed [`Token`] as well as the range of bytes that were consumed to parse it.
307struct RawToken<'a> {
308    /// The parsed token.
309    token: Token<'a>,
310    token_range: Range<usize>,
311}
312
313/// A token returned by the [commit iterator][CommitRefIter].
314#[allow(missing_docs)]
315#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
316pub enum Token<'a> {
317    Tree {
318        id: ObjectId,
319    },
320    Parent {
321        id: ObjectId,
322    },
323    /// A person who authored the content of the commit.
324    Author {
325        signature: gix_actor::SignatureRef<'a>,
326    },
327    /// A person who committed the authors work to the repository.
328    Committer {
329        signature: gix_actor::SignatureRef<'a>,
330    },
331    Encoding(&'a BStr),
332    ExtraHeader((&'a BStr, Cow<'a, BStr>)),
333    Message(&'a BStr),
334}
335
336impl Token<'_> {
337    /// Return the object id of this token if it's a [tree][Token::Tree] or a [parent commit][Token::Parent].
338    pub fn id(&self) -> Option<&oid> {
339        match self {
340            Token::Tree { id } | Token::Parent { id } => Some(id.as_ref()),
341            _ => None,
342        }
343    }
344
345    /// Return the owned object id of this token if it's a [tree][Token::Tree] or a [parent commit][Token::Parent].
346    pub fn try_into_id(self) -> Option<ObjectId> {
347        match self {
348            Token::Tree { id } | Token::Parent { id } => Some(id),
349            _ => None,
350        }
351    }
352}