Skip to main content

gix_object/
lib.rs

1//! This crate provides types for [read-only git objects][crate::ObjectRef] backed by bytes provided in git's serialization format
2//! as well as [mutable versions][Object] of these. Both types of objects can be encoded.
3//!
4//! ## Decode Borrowed Objects
5//!
6//! ```
7//! let object = gix_object::ObjectRef::from_loose(b"blob 5\0hello", gix_hash::Kind::Sha1).unwrap();
8//! let blob = object.as_blob().unwrap();
9//!
10//! assert_eq!(blob.data, b"hello");
11//! assert_eq!(object.kind(), gix_object::Kind::Blob);
12//! ```
13//!
14//! ## Mutate And Encode Owned Objects
15//!
16//! ```
17//! use gix_object::WriteTo;
18//!
19//! let object = gix_object::ObjectRef::from_loose(b"blob 5\0hello", gix_hash::Kind::Sha1)
20//!     .unwrap()
21//!     .into_owned()
22//!     .unwrap();
23//! let mut blob = object.into_blob();
24//! blob.data.extend_from_slice(b" world");
25//!
26//! let mut out = Vec::new();
27//! blob.write_to(&mut out).unwrap();
28//! assert_eq!(out, b"hello world");
29//! assert_eq!(blob.loose_header().as_slice(), b"blob 11\0");
30//! ```
31//! ## Feature Flags
32#![cfg_attr(
33    all(doc, feature = "document-features"),
34    doc = ::document_features::document_features!()
35)]
36#![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg))]
37#![deny(missing_docs, rust_2018_idioms)]
38#![forbid(unsafe_code)]
39
40use std::borrow::Cow;
41
42/// For convenience to allow using `bstr` without adding it to own cargo manifest.
43pub use bstr;
44use bstr::{BStr, BString, ByteSlice};
45/// For convenience to allow using `gix-date` without adding it to own cargo manifest.
46pub use gix_date as date;
47use smallvec::SmallVec;
48
49///
50pub mod commit;
51mod object;
52///
53pub mod tag;
54///
55pub mod tree;
56
57mod blob;
58///
59pub mod data;
60
61///
62pub mod find;
63
64///
65pub mod write {
66    /// The error type returned by the [`Write`](crate::Write) trait.
67    pub type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
68}
69
70mod traits;
71pub use traits::{Exists, Find, FindExt, FindObjectOrHeader, Header as FindHeader, HeaderExt, Write, WriteTo};
72
73pub mod encode;
74pub(crate) mod parse;
75
76///
77pub mod kind;
78
79/// The four types of objects that git differentiates.
80#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
81#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
82#[allow(missing_docs)]
83pub enum Kind {
84    Tree,
85    Blob,
86    Commit,
87    Tag,
88}
89/// A chunk of any [`data`](BlobRef::data).
90#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
91#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
92pub struct BlobRef<'a> {
93    /// The bytes themselves.
94    pub data: &'a [u8],
95}
96
97/// A mutable chunk of any [`data`](Blob::data).
98#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
99#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
100pub struct Blob {
101    /// The data itself.
102    pub data: Vec<u8>,
103}
104
105/// A git commit parsed using [`from_bytes()`](CommitRef::from_bytes()).
106///
107/// A commit encapsulates information about a point in time at which the state of the repository is recorded, usually after a
108/// change which is documented in the commit `message`.
109#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
110#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
111pub struct CommitRef<'a> {
112    /// HEX hash of tree object we point to. Usually 40 bytes long.
113    ///
114    /// Use [`tree()`](CommitRef::tree()) to obtain a decoded version of it.
115    #[cfg_attr(feature = "serde", serde(borrow))]
116    pub tree: &'a BStr,
117    /// HEX hash of each parent commit. Empty for first commit in repository.
118    pub parents: SmallVec<[&'a BStr; 1]>,
119    /// The raw author header value as encountered during parsing.
120    ///
121    /// Use the [`author()`](CommitRef::author()) method to obtain a parsed version of it.
122    #[cfg_attr(feature = "serde", serde(borrow))]
123    pub author: &'a BStr,
124    /// The raw committer header value as encountered during parsing.
125    ///
126    /// Use the [`committer()`](CommitRef::committer()) method to obtain a parsed version of it.
127    #[cfg_attr(feature = "serde", serde(borrow))]
128    pub committer: &'a BStr,
129    /// The name of the message encoding, otherwise [UTF-8 should be assumed](https://github.com/git/git/blob/e67fbf927dfdf13d0b21dc6ea15dc3c7ef448ea0/commit.c#L1493:L1493).
130    pub encoding: Option<&'a BStr>,
131    /// The commit message documenting the change.
132    pub message: &'a BStr,
133    /// Extra header fields, in order of them being encountered, made accessible with the iterator returned by [`extra_headers()`](CommitRef::extra_headers()).
134    pub extra_headers: Vec<(&'a BStr, Cow<'a, BStr>)>,
135}
136
137/// Like [`CommitRef`], but as `Iterator` to support (up to) entirely allocation free parsing.
138/// It's particularly useful to traverse the commit graph without ever allocating arrays for parents.
139#[derive(Copy, Clone)]
140pub struct CommitRefIter<'a> {
141    data: &'a [u8],
142    state: commit::ref_iter::State,
143}
144
145/// A mutable git commit, representing an annotated state of a working tree along with a reference to its historical commits.
146#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
147#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
148pub struct Commit {
149    /// The hash of recorded working tree state.
150    pub tree: gix_hash::ObjectId,
151    /// Hash of each parent commit. Empty for the first commit in repository.
152    pub parents: SmallVec<[gix_hash::ObjectId; 1]>,
153    /// Who wrote this commit.
154    pub author: gix_actor::Signature,
155    /// Who committed this commit.
156    ///
157    /// This may be different from the `author` in case the author couldn't write to the repository themselves and
158    /// is commonly encountered with contributed commits.
159    pub committer: gix_actor::Signature,
160    /// The name of the message encoding, otherwise [UTF-8 should be assumed](https://github.com/git/git/blob/e67fbf927dfdf13d0b21dc6ea15dc3c7ef448ea0/commit.c#L1493:L1493).
161    pub encoding: Option<BString>,
162    /// The commit message documenting the change.
163    pub message: BString,
164    /// Extra header fields, in order of them being encountered, made accessible with the iterator returned
165    /// by [`extra_headers()`](Commit::extra_headers()).
166    pub extra_headers: Vec<(BString, BString)>,
167}
168
169/// Represents a git tag, commonly indicating a software release.
170#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
171#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
172pub struct TagRef<'a> {
173    /// The hash in hexadecimal being the object this tag points to. Use [`target()`](TagRef::target()) to obtain a byte representation.
174    #[cfg_attr(feature = "serde", serde(borrow))]
175    pub target: &'a BStr,
176    /// The kind of object that `target` points to.
177    pub target_kind: Kind,
178    /// The name of the tag, e.g. "v1.0".
179    pub name: &'a BStr,
180    /// The raw tagger header value as encountered during parsing.
181    ///
182    /// Use the [`tagger()`](TagRef::tagger()) method to obtain a parsed version of it.
183    #[cfg_attr(feature = "serde", serde(borrow))]
184    pub tagger: Option<&'a BStr>,
185    /// The message describing this release.
186    pub message: &'a BStr,
187    /// A cryptographic signature over the entire content of the serialized tag object thus far.
188    pub pgp_signature: Option<&'a BStr>,
189}
190
191/// Like [`TagRef`], but as `Iterator` to support entirely allocation free parsing.
192/// It's particularly useful to dereference only the target chain.
193#[derive(Copy, Clone)]
194pub struct TagRefIter<'a> {
195    data: &'a [u8],
196    state: tag::ref_iter::State,
197}
198
199/// A mutable git tag.
200#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
201#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
202pub struct Tag {
203    /// The hash this tag is pointing to.
204    pub target: gix_hash::ObjectId,
205    /// The kind of object this tag is pointing to.
206    pub target_kind: Kind,
207    /// The name of the tag, e.g. "v1.0".
208    pub name: BString,
209    /// The tags author.
210    pub tagger: Option<gix_actor::Signature>,
211    /// The message describing the tag.
212    pub message: BString,
213    /// A pgp signature over all bytes of the encoded tag, excluding the pgp signature itself.
214    pub pgp_signature: Option<BString>,
215}
216
217/// Immutable objects are read-only structures referencing most data from [a byte slice](ObjectRef::from_bytes()).
218///
219/// Immutable objects are expected to be deserialized from bytes that acts as backing store, and they
220/// cannot be mutated or serialized. Instead, one will [convert](ObjectRef::into_owned()) them into their [`mutable`](Object) counterparts
221/// which support mutation and serialization.
222///
223/// An `ObjectRef` is representing [`Trees`](TreeRef), [`Blobs`](BlobRef), [`Commits`](CommitRef), or [`Tags`](TagRef).
224#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
225#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
226#[allow(missing_docs)]
227pub enum ObjectRef<'a> {
228    #[cfg_attr(feature = "serde", serde(borrow))]
229    Tree(TreeRef<'a>),
230    Blob(BlobRef<'a>),
231    Commit(CommitRef<'a>),
232    Tag(TagRef<'a>),
233}
234
235/// Mutable objects with each field being separately allocated and changeable.
236///
237/// Mutable objects are Commits, Trees, Blobs and Tags that can be changed and serialized.
238///
239/// They either created using object [construction](Object) or by [deserializing existing objects](ObjectRef::from_bytes())
240/// and converting these [into mutable copies](ObjectRef::into_owned()) for adjustments.
241///
242/// An `Object` is representing [`Trees`](Tree), [`Blobs`](Blob), [`Commits`](Commit), or [`Tags`](Tag).
243#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
244#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
245#[allow(clippy::large_enum_variant, missing_docs)]
246pub enum Object {
247    Tree(Tree),
248    Blob(Blob),
249    Commit(Commit),
250    Tag(Tag),
251}
252/// A directory snapshot containing files (blobs), directories (trees) and submodules (commits).
253#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
254#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
255pub struct TreeRef<'a> {
256    /// The directories and files contained in this tree.
257    ///
258    /// Beware that the sort order isn't *quite* by name, so one may bisect only with a [`tree::EntryRef`] to handle ordering correctly.
259    #[cfg_attr(feature = "serde", serde(borrow))]
260    pub entries: Vec<tree::EntryRef<'a>>,
261}
262
263/// A directory snapshot containing files (blobs), directories (trees) and submodules (commits), lazily evaluated.
264#[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
265pub struct TreeRefIter<'a> {
266    /// The hash kind to use for parsing this tree.
267    hash_kind: gix_hash::Kind,
268    /// The directories and files contained in this tree.
269    data: &'a [u8],
270}
271
272/// A mutable Tree, containing other trees, blobs or commits.
273#[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
274#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
275pub struct Tree {
276    /// The directories and files contained in this tree. They must be and remain sorted by [`filename`][tree::Entry::filename].
277    ///
278    /// Beware that the sort order isn't *quite* by name, so one may bisect only with a [`tree::Entry`] to handle ordering correctly.
279    pub entries: Vec<tree::Entry>,
280}
281
282impl Tree {
283    /// Return an empty tree which serializes to a well-known hash
284    pub fn empty() -> Self {
285        Tree { entries: Vec::new() }
286    }
287}
288
289/// A borrowed object using a slice as backing buffer, or in other words a bytes buffer that knows the kind of object it represents.
290#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
291pub struct Data<'a> {
292    /// kind of object
293    pub kind: Kind,
294    /// The hash kind to use for parsing this data.
295    pub hash_kind: gix_hash::Kind,
296    /// decoded, decompressed data, owned by a backing store.
297    pub data: &'a [u8],
298}
299
300/// Information about an object, which includes its kind and the amount of bytes it would have when obtained.
301#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
302pub struct Header {
303    /// The kind of object.
304    pub kind: Kind,
305    /// The object's size in bytes, or the size of the buffer when it's retrieved in full.
306    pub size: u64,
307}
308
309///
310pub mod decode {
311    #[cfg(feature = "verbose-object-parsing-errors")]
312    mod _decode {
313        /// The type to be used for parse errors.
314        pub type ParseError = winnow::error::ContextError<winnow::error::StrContext>;
315
316        pub(crate) fn empty_error() -> Error {
317            Error {
318                inner: winnow::error::ContextError::new(),
319                remaining: Default::default(),
320            }
321        }
322
323        /// A type to indicate errors during parsing and to abstract away details related to `nom`.
324        #[derive(Debug, Clone)]
325        pub struct Error {
326            /// The actual error
327            pub inner: ParseError,
328            /// Where the error occurred
329            pub remaining: Vec<u8>,
330        }
331
332        impl Error {
333            pub(crate) fn with_err(err: winnow::error::ErrMode<ParseError>, remaining: &[u8]) -> Self {
334                Self {
335                    inner: err.into_inner().expect("we don't have streaming parsers"),
336                    remaining: remaining.to_owned(),
337                }
338            }
339        }
340
341        impl std::fmt::Display for Error {
342            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
343                write!(f, "object parsing failed at `{}`", bstr::BStr::new(&self.remaining))?;
344                if self.inner.context().next().is_some() {
345                    writeln!(f)?;
346                    self.inner.fmt(f)?;
347                }
348                Ok(())
349            }
350        }
351
352        impl std::error::Error for Error {
353            fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
354                self.inner.cause().map(|v| v as &(dyn std::error::Error + 'static))
355            }
356        }
357    }
358
359    ///
360    #[cfg(not(feature = "verbose-object-parsing-errors"))]
361    mod _decode {
362        /// The type to be used for parse errors, discards everything and is zero size
363        pub type ParseError = ();
364
365        pub(crate) fn empty_error() -> Error {
366            Error { inner: () }
367        }
368
369        /// A type to indicate errors during parsing and to abstract away details related to `nom`.
370        #[derive(Debug, Clone)]
371        pub struct Error {
372            /// The actual error
373            pub inner: ParseError,
374        }
375
376        impl Error {
377            pub(crate) fn with_err(err: winnow::error::ErrMode<ParseError>, _remaining: &[u8]) -> Self {
378                Self {
379                    inner: err.into_inner().expect("we don't have streaming parsers"),
380                }
381            }
382        }
383
384        impl std::fmt::Display for Error {
385            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
386                f.write_str("object parsing failed")
387            }
388        }
389
390        impl std::error::Error for Error {}
391    }
392    pub(crate) use _decode::empty_error;
393    pub use _decode::{Error, ParseError};
394
395    /// Returned by [`loose_header()`]
396    #[derive(Debug, thiserror::Error)]
397    #[allow(missing_docs)]
398    pub enum LooseHeaderDecodeError {
399        #[error("{message}: {number:?}")]
400        ParseIntegerError {
401            source: gix_utils::btoi::ParseIntegerError,
402            message: &'static str,
403            number: bstr::BString,
404        },
405        #[error("{message}")]
406        InvalidHeader { message: &'static str },
407        #[error("The object header contained an unknown object kind.")]
408        ObjectHeader(#[from] super::kind::Error),
409    }
410
411    use bstr::ByteSlice;
412    /// Decode a loose object header, being `<kind> <size>\0`, returns
413    /// ([`kind`](super::Kind), `size`, `consumed bytes`).
414    ///
415    /// `size` is the uncompressed size of the payload in bytes.
416    pub fn loose_header(input: &[u8]) -> Result<(super::Kind, u64, usize), LooseHeaderDecodeError> {
417        use LooseHeaderDecodeError::*;
418        let kind_end = input.find_byte(0x20).ok_or(InvalidHeader {
419            message: "Expected '<type> <size>'",
420        })?;
421        let kind = super::Kind::from_bytes(&input[..kind_end])?;
422        let size_end = input.find_byte(0x0).ok_or(InvalidHeader {
423            message: "Did not find 0 byte in header",
424        })?;
425        let size_bytes = &input[kind_end + 1..size_end];
426        let size = gix_utils::btoi::to_signed(size_bytes).map_err(|source| ParseIntegerError {
427            source,
428            message: "Object size in header could not be parsed",
429            number: size_bytes.into(),
430        })?;
431        Ok((kind, size, size_end + 1))
432    }
433}
434
435fn object_hasher(hash_kind: gix_hash::Kind, object_kind: Kind, object_size: u64) -> gix_hash::Hasher {
436    let mut hasher = gix_hash::hasher(hash_kind);
437    hasher.update(&encode::loose_header(object_kind, object_size));
438    hasher
439}
440
441/// A function to compute a hash of kind `hash_kind` for an object of `object_kind` and its `data`.
442#[doc(alias = "hash_object", alias = "git2")]
443pub fn compute_hash(
444    hash_kind: gix_hash::Kind,
445    object_kind: Kind,
446    data: &[u8],
447) -> Result<gix_hash::ObjectId, gix_hash::hasher::Error> {
448    let mut hasher = object_hasher(hash_kind, object_kind, data.len() as u64);
449    hasher.update(data);
450    hasher.try_finalize()
451}
452
453/// A function to compute a hash of kind `hash_kind` for an object of `object_kind` and its data read from `stream`
454/// which has to yield exactly `stream_len` bytes.
455/// Use `progress` to learn about progress in bytes processed and `should_interrupt` to be able to abort the operation
456/// if set to `true`.
457#[doc(alias = "hash_file", alias = "git2")]
458pub fn compute_stream_hash(
459    hash_kind: gix_hash::Kind,
460    object_kind: Kind,
461    stream: &mut dyn std::io::Read,
462    stream_len: u64,
463    progress: &mut dyn gix_features::progress::Progress,
464    should_interrupt: &std::sync::atomic::AtomicBool,
465) -> Result<gix_hash::ObjectId, gix_hash::io::Error> {
466    let hasher = object_hasher(hash_kind, object_kind, stream_len);
467    gix_hash::bytes_with_hasher(stream, stream_len, hasher, progress, should_interrupt)
468}