gix_object/
lib.rs

1//! This crate provides types for [read-only git objects][crate::ObjectRef] backed by bytes provided in git's serialization format
2//! as well as [mutable versions][Object] of these. Both types of objects can be encoded.
3//! ## Feature Flags
4#![cfg_attr(
5    all(doc, feature = "document-features"),
6    doc = ::document_features::document_features!()
7)]
8#![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg))]
9#![deny(missing_docs, rust_2018_idioms)]
10#![forbid(unsafe_code)]
11
12use std::borrow::Cow;
13
14/// For convenience to allow using `bstr` without adding it to own cargo manifest.
15pub use bstr;
16use bstr::{BStr, BString, ByteSlice};
17/// For convenience to allow using `gix-date` without adding it to own cargo manifest.
18pub use gix_date as date;
19use smallvec::SmallVec;
20
21///
22pub mod commit;
23mod object;
24///
25pub mod tag;
26///
27pub mod tree;
28
29mod blob;
30///
31pub mod data;
32
33///
34pub mod find;
35
36///
37pub mod write {
38    /// The error type returned by the [`Write`](crate::Write) trait.
39    pub type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
40}
41
42mod traits;
43pub use traits::{Exists, Find, FindExt, FindObjectOrHeader, Header as FindHeader, HeaderExt, Write, WriteTo};
44
45pub mod encode;
46pub(crate) mod parse;
47
48///
49pub mod kind;
50
51/// The four types of objects that git differentiates.
52#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
53#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
54#[allow(missing_docs)]
55pub enum Kind {
56    Tree,
57    Blob,
58    Commit,
59    Tag,
60}
61/// A chunk of any [`data`](BlobRef::data).
62#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
63#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
64pub struct BlobRef<'a> {
65    /// The bytes themselves.
66    pub data: &'a [u8],
67}
68
69/// A mutable chunk of any [`data`](Blob::data).
70#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
71#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
72pub struct Blob {
73    /// The data itself.
74    pub data: Vec<u8>,
75}
76
77/// A git commit parsed using [`from_bytes()`](CommitRef::from_bytes()).
78///
79/// A commit encapsulates information about a point in time at which the state of the repository is recorded, usually after a
80/// change which is documented in the commit `message`.
81#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
82#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
83pub struct CommitRef<'a> {
84    /// HEX hash of tree object we point to. Usually 40 bytes long.
85    ///
86    /// Use [`tree()`](CommitRef::tree()) to obtain a decoded version of it.
87    #[cfg_attr(feature = "serde", serde(borrow))]
88    pub tree: &'a BStr,
89    /// HEX hash of each parent commit. Empty for first commit in repository.
90    pub parents: SmallVec<[&'a BStr; 1]>,
91    /// The raw author header value as encountered during parsing.
92    ///
93    /// Use the [`author()`](CommitRef::author()) method to obtain a parsed version of it.
94    #[cfg_attr(feature = "serde", serde(borrow))]
95    pub author: &'a BStr,
96    /// The raw committer header value as encountered during parsing.
97    ///
98    /// Use the [`committer()`](CommitRef::committer()) method to obtain a parsed version of it.
99    #[cfg_attr(feature = "serde", serde(borrow))]
100    pub committer: &'a BStr,
101    /// The name of the message encoding, otherwise [UTF-8 should be assumed](https://github.com/git/git/blob/e67fbf927dfdf13d0b21dc6ea15dc3c7ef448ea0/commit.c#L1493:L1493).
102    pub encoding: Option<&'a BStr>,
103    /// The commit message documenting the change.
104    pub message: &'a BStr,
105    /// Extra header fields, in order of them being encountered, made accessible with the iterator returned by [`extra_headers()`](CommitRef::extra_headers()).
106    pub extra_headers: Vec<(&'a BStr, Cow<'a, BStr>)>,
107}
108
109/// Like [`CommitRef`], but as `Iterator` to support (up to) entirely allocation free parsing.
110/// It's particularly useful to traverse the commit graph without ever allocating arrays for parents.
111#[derive(Copy, Clone)]
112pub struct CommitRefIter<'a> {
113    data: &'a [u8],
114    state: commit::ref_iter::State,
115}
116
117/// A mutable git commit, representing an annotated state of a working tree along with a reference to its historical commits.
118#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
119#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
120pub struct Commit {
121    /// The hash of recorded working tree state.
122    pub tree: gix_hash::ObjectId,
123    /// Hash of each parent commit. Empty for the first commit in repository.
124    pub parents: SmallVec<[gix_hash::ObjectId; 1]>,
125    /// Who wrote this commit.
126    pub author: gix_actor::Signature,
127    /// Who committed this commit.
128    ///
129    /// This may be different from the `author` in case the author couldn't write to the repository themselves and
130    /// is commonly encountered with contributed commits.
131    pub committer: gix_actor::Signature,
132    /// The name of the message encoding, otherwise [UTF-8 should be assumed](https://github.com/git/git/blob/e67fbf927dfdf13d0b21dc6ea15dc3c7ef448ea0/commit.c#L1493:L1493).
133    pub encoding: Option<BString>,
134    /// The commit message documenting the change.
135    pub message: BString,
136    /// Extra header fields, in order of them being encountered, made accessible with the iterator returned
137    /// by [`extra_headers()`](Commit::extra_headers()).
138    pub extra_headers: Vec<(BString, BString)>,
139}
140
141/// Represents a git tag, commonly indicating a software release.
142#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
143#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
144pub struct TagRef<'a> {
145    /// The hash in hexadecimal being the object this tag points to. Use [`target()`](TagRef::target()) to obtain a byte representation.
146    #[cfg_attr(feature = "serde", serde(borrow))]
147    pub target: &'a BStr,
148    /// The kind of object that `target` points to.
149    pub target_kind: Kind,
150    /// The name of the tag, e.g. "v1.0".
151    pub name: &'a BStr,
152    /// The raw tagger header value as encountered during parsing.
153    ///
154    /// Use the [`tagger()`](TagRef::tagger()) method to obtain a parsed version of it.
155    #[cfg_attr(feature = "serde", serde(borrow))]
156    pub tagger: Option<&'a BStr>,
157    /// The message describing this release.
158    pub message: &'a BStr,
159    /// A cryptographic signature over the entire content of the serialized tag object thus far.
160    pub pgp_signature: Option<&'a BStr>,
161}
162
163/// Like [`TagRef`], but as `Iterator` to support entirely allocation free parsing.
164/// It's particularly useful to dereference only the target chain.
165#[derive(Copy, Clone)]
166pub struct TagRefIter<'a> {
167    data: &'a [u8],
168    state: tag::ref_iter::State,
169}
170
171/// A mutable git tag.
172#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
173#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
174pub struct Tag {
175    /// The hash this tag is pointing to.
176    pub target: gix_hash::ObjectId,
177    /// The kind of object this tag is pointing to.
178    pub target_kind: Kind,
179    /// The name of the tag, e.g. "v1.0".
180    pub name: BString,
181    /// The tags author.
182    pub tagger: Option<gix_actor::Signature>,
183    /// The message describing the tag.
184    pub message: BString,
185    /// A pgp signature over all bytes of the encoded tag, excluding the pgp signature itself.
186    pub pgp_signature: Option<BString>,
187}
188
189/// Immutable objects are read-only structures referencing most data from [a byte slice](ObjectRef::from_bytes()).
190///
191/// Immutable objects are expected to be deserialized from bytes that acts as backing store, and they
192/// cannot be mutated or serialized. Instead, one will [convert](ObjectRef::into_owned()) them into their [`mutable`](Object) counterparts
193/// which support mutation and serialization.
194///
195/// An `ObjectRef` is representing [`Trees`](TreeRef), [`Blobs`](BlobRef), [`Commits`](CommitRef), or [`Tags`](TagRef).
196#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
197#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
198#[allow(missing_docs)]
199pub enum ObjectRef<'a> {
200    #[cfg_attr(feature = "serde", serde(borrow))]
201    Tree(TreeRef<'a>),
202    Blob(BlobRef<'a>),
203    Commit(CommitRef<'a>),
204    Tag(TagRef<'a>),
205}
206
207/// Mutable objects with each field being separately allocated and changeable.
208///
209/// Mutable objects are Commits, Trees, Blobs and Tags that can be changed and serialized.
210///
211/// They either created using object [construction](Object) or by [deserializing existing objects](ObjectRef::from_bytes())
212/// and converting these [into mutable copies](ObjectRef::into_owned()) for adjustments.
213///
214/// An `Object` is representing [`Trees`](Tree), [`Blobs`](Blob), [`Commits`](Commit), or [`Tags`](Tag).
215#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
216#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
217#[allow(clippy::large_enum_variant, missing_docs)]
218pub enum Object {
219    Tree(Tree),
220    Blob(Blob),
221    Commit(Commit),
222    Tag(Tag),
223}
224/// A directory snapshot containing files (blobs), directories (trees) and submodules (commits).
225#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
226#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
227pub struct TreeRef<'a> {
228    /// The directories and files contained in this tree.
229    ///
230    /// Beware that the sort order isn't *quite* by name, so one may bisect only with a [`tree::EntryRef`] to handle ordering correctly.
231    #[cfg_attr(feature = "serde", serde(borrow))]
232    pub entries: Vec<tree::EntryRef<'a>>,
233}
234
235/// A directory snapshot containing files (blobs), directories (trees) and submodules (commits), lazily evaluated.
236#[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
237pub struct TreeRefIter<'a> {
238    /// The directories and files contained in this tree.
239    data: &'a [u8],
240}
241
242/// A mutable Tree, containing other trees, blobs or commits.
243#[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
244#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
245pub struct Tree {
246    /// The directories and files contained in this tree. They must be and remain sorted by [`filename`][tree::Entry::filename].
247    ///
248    /// Beware that the sort order isn't *quite* by name, so one may bisect only with a [`tree::Entry`] to handle ordering correctly.
249    pub entries: Vec<tree::Entry>,
250}
251
252impl Tree {
253    /// Return an empty tree which serializes to a well-known hash
254    pub fn empty() -> Self {
255        Tree { entries: Vec::new() }
256    }
257}
258
259/// A borrowed object using a slice as backing buffer, or in other words a bytes buffer that knows the kind of object it represents.
260#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
261pub struct Data<'a> {
262    /// kind of object
263    pub kind: Kind,
264    /// decoded, decompressed data, owned by a backing store.
265    pub data: &'a [u8],
266}
267
268/// Information about an object, which includes its kind and the amount of bytes it would have when obtained.
269#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
270pub struct Header {
271    /// The kind of object.
272    pub kind: Kind,
273    /// The object's size in bytes, or the size of the buffer when it's retrieved in full.
274    pub size: u64,
275}
276
277///
278pub mod decode {
279    #[cfg(feature = "verbose-object-parsing-errors")]
280    mod _decode {
281        /// The type to be used for parse errors.
282        pub type ParseError = winnow::error::ContextError<winnow::error::StrContext>;
283
284        pub(crate) fn empty_error() -> Error {
285            Error {
286                inner: winnow::error::ContextError::new(),
287                remaining: Default::default(),
288            }
289        }
290
291        /// A type to indicate errors during parsing and to abstract away details related to `nom`.
292        #[derive(Debug, Clone)]
293        pub struct Error {
294            /// The actual error
295            pub inner: ParseError,
296            /// Where the error occurred
297            pub remaining: Vec<u8>,
298        }
299
300        impl Error {
301            pub(crate) fn with_err(err: winnow::error::ErrMode<ParseError>, remaining: &[u8]) -> Self {
302                Self {
303                    inner: err.into_inner().expect("we don't have streaming parsers"),
304                    remaining: remaining.to_owned(),
305                }
306            }
307        }
308
309        impl std::fmt::Display for Error {
310            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
311                write!(f, "object parsing failed at `{}`", bstr::BStr::new(&self.remaining))?;
312                if self.inner.context().next().is_some() {
313                    writeln!(f)?;
314                    self.inner.fmt(f)?;
315                }
316                Ok(())
317            }
318        }
319
320        impl std::error::Error for Error {
321            fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
322                self.inner.cause().map(|v| v as &(dyn std::error::Error + 'static))
323            }
324        }
325    }
326
327    ///
328    #[cfg(not(feature = "verbose-object-parsing-errors"))]
329    mod _decode {
330        /// The type to be used for parse errors, discards everything and is zero size
331        pub type ParseError = ();
332
333        pub(crate) fn empty_error() -> Error {
334            Error { inner: () }
335        }
336
337        /// A type to indicate errors during parsing and to abstract away details related to `nom`.
338        #[derive(Debug, Clone)]
339        pub struct Error {
340            /// The actual error
341            pub inner: ParseError,
342        }
343
344        impl Error {
345            pub(crate) fn with_err(err: winnow::error::ErrMode<ParseError>, _remaining: &[u8]) -> Self {
346                Self {
347                    inner: err.into_inner().expect("we don't have streaming parsers"),
348                }
349            }
350        }
351
352        impl std::fmt::Display for Error {
353            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
354                f.write_str("object parsing failed")
355            }
356        }
357
358        impl std::error::Error for Error {}
359    }
360    pub(crate) use _decode::empty_error;
361    pub use _decode::{Error, ParseError};
362
363    /// Returned by [`loose_header()`]
364    #[derive(Debug, thiserror::Error)]
365    #[allow(missing_docs)]
366    pub enum LooseHeaderDecodeError {
367        #[error("{message}: {number:?}")]
368        ParseIntegerError {
369            source: gix_utils::btoi::ParseIntegerError,
370            message: &'static str,
371            number: bstr::BString,
372        },
373        #[error("{message}")]
374        InvalidHeader { message: &'static str },
375        #[error("The object header contained an unknown object kind.")]
376        ObjectHeader(#[from] super::kind::Error),
377    }
378
379    use bstr::ByteSlice;
380    /// Decode a loose object header, being `<kind> <size>\0`, returns
381    /// ([`kind`](super::Kind), `size`, `consumed bytes`).
382    ///
383    /// `size` is the uncompressed size of the payload in bytes.
384    pub fn loose_header(input: &[u8]) -> Result<(super::Kind, u64, usize), LooseHeaderDecodeError> {
385        use LooseHeaderDecodeError::*;
386        let kind_end = input.find_byte(0x20).ok_or(InvalidHeader {
387            message: "Expected '<type> <size>'",
388        })?;
389        let kind = super::Kind::from_bytes(&input[..kind_end])?;
390        let size_end = input.find_byte(0x0).ok_or(InvalidHeader {
391            message: "Did not find 0 byte in header",
392        })?;
393        let size_bytes = &input[kind_end + 1..size_end];
394        let size = gix_utils::btoi::to_signed(size_bytes).map_err(|source| ParseIntegerError {
395            source,
396            message: "Object size in header could not be parsed",
397            number: size_bytes.into(),
398        })?;
399        Ok((kind, size, size_end + 1))
400    }
401}
402
403fn object_hasher(hash_kind: gix_hash::Kind, object_kind: Kind, object_size: u64) -> gix_hash::Hasher {
404    let mut hasher = gix_hash::hasher(hash_kind);
405    hasher.update(&encode::loose_header(object_kind, object_size));
406    hasher
407}
408
409/// A function to compute a hash of kind `hash_kind` for an object of `object_kind` and its `data`.
410#[doc(alias = "hash_object", alias = "git2")]
411pub fn compute_hash(
412    hash_kind: gix_hash::Kind,
413    object_kind: Kind,
414    data: &[u8],
415) -> Result<gix_hash::ObjectId, gix_hash::hasher::Error> {
416    let mut hasher = object_hasher(hash_kind, object_kind, data.len() as u64);
417    hasher.update(data);
418    hasher.try_finalize()
419}
420
421/// A function to compute a hash of kind `hash_kind` for an object of `object_kind` and its data read from `stream`
422/// which has to yield exactly `stream_len` bytes.
423/// Use `progress` to learn about progress in bytes processed and `should_interrupt` to be able to abort the operation
424/// if set to `true`.
425#[doc(alias = "hash_file", alias = "git2")]
426pub fn compute_stream_hash(
427    hash_kind: gix_hash::Kind,
428    object_kind: Kind,
429    stream: &mut dyn std::io::Read,
430    stream_len: u64,
431    progress: &mut dyn gix_features::progress::Progress,
432    should_interrupt: &std::sync::atomic::AtomicBool,
433) -> Result<gix_hash::ObjectId, gix_hash::io::Error> {
434    let hasher = object_hasher(hash_kind, object_kind, stream_len);
435    gix_hash::bytes_with_hasher(stream, stream_len, hasher, progress, should_interrupt)
436}