git_object/lib.rs
1//! This crate provides types for [read-only git objects][crate::ObjectRef] backed by bytes provided in git's serialization format
2//! as well as [mutable versions][Object] of these. Both types of objects can be encoded.
3//! ## Feature Flags
4#![cfg_attr(
5 feature = "document-features",
6 cfg_attr(doc, doc = ::document_features::document_features!())
7)]
8#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))]
9#![deny(missing_docs, rust_2018_idioms)]
10#![forbid(unsafe_code)]
11
12use std::borrow::Cow;
13
14/// For convenience to allow using `bstr` without adding it to own cargo manifest.
15pub use bstr;
16use bstr::{BStr, BString, ByteSlice};
17use smallvec::SmallVec;
18
19///
20pub mod commit;
21mod object;
22///
23pub mod tag;
24///
25pub mod tree;
26
27mod blob;
28///
29pub mod data;
30
31mod traits;
32pub use traits::WriteTo;
33
34pub mod encode;
35pub(crate) mod parse;
36
37///
38pub mod kind;
39
40/// The four types of objects that git differentiates. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
41#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
42#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
43#[allow(missing_docs)]
44pub enum Kind {
45 Tree,
46 Blob,
47 Commit,
48 Tag,
49}
50/// A chunk of any [`data`][BlobRef::data].
51#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
52#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
53pub struct BlobRef<'a> {
54 /// The bytes themselves.
55 pub data: &'a [u8],
56}
57
58/// A mutable chunk of any [`data`][Blob::data].
59#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
60#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
61pub struct Blob {
62 /// The data itself.
63 pub data: Vec<u8>,
64}
65
66/// A git commit parsed using [`from_bytes()`][CommitRef::from_bytes()].
67///
68/// A commit encapsulates information about a point in time at which the state of the repository is recorded, usually after a
69/// change which is documented in the commit `message`.
70#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
71#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
72pub struct CommitRef<'a> {
73 /// HEX hash of tree object we point to. Usually 40 bytes long.
74 ///
75 /// Use [`tree()`][CommitRef::tree()] to obtain a decoded version of it.
76 #[cfg_attr(feature = "serde1", serde(borrow))]
77 pub tree: &'a BStr,
78 /// HEX hash of each parent commit. Empty for first commit in repository.
79 pub parents: SmallVec<[&'a BStr; 1]>,
80 /// Who wrote this commit. Name and email might contain whitespace and are not trimmed to ensure round-tripping.
81 ///
82 /// Use the [`author()`][CommitRef::author()] method to received a trimmed version of it.
83 pub author: git_actor::SignatureRef<'a>,
84 /// Who committed this commit. Name and email might contain whitespace and are not trimmed to ensure round-tripping.
85 ///
86 /// Use the [`committer()`][CommitRef::committer()] method to received a trimmed version of it.
87 ///
88 /// This may be different from the `author` in case the author couldn't write to the repository themselves and
89 /// is commonly encountered with contributed commits.
90 pub committer: git_actor::SignatureRef<'a>,
91 /// The name of the message encoding, otherwise [UTF-8 should be assumed](https://github.com/git/git/blob/e67fbf927dfdf13d0b21dc6ea15dc3c7ef448ea0/commit.c#L1493:L1493).
92 pub encoding: Option<&'a BStr>,
93 /// The commit message documenting the change.
94 pub message: &'a BStr,
95 /// Extra header fields, in order of them being encountered, made accessible with the iterator returned by [`extra_headers()`][CommitRef::extra_headers()].
96 pub extra_headers: Vec<(&'a BStr, Cow<'a, BStr>)>,
97}
98
99/// Like [`CommitRef`][crate::CommitRef], but as `Iterator` to support (up to) entirely allocation free parsing.
100/// It's particularly useful to traverse the commit graph without ever allocating arrays for parents.
101#[derive(Copy, Clone)]
102pub struct CommitRefIter<'a> {
103 data: &'a [u8],
104 state: commit::ref_iter::State,
105}
106
107/// A mutable git commit, representing an annotated state of a working tree along with a reference to its historical commits.
108#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
109#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
110pub struct Commit {
111 /// The hash of recorded working tree state.
112 pub tree: git_hash::ObjectId,
113 /// Hash of each parent commit. Empty for the first commit in repository.
114 pub parents: SmallVec<[git_hash::ObjectId; 1]>,
115 /// Who wrote this commit.
116 pub author: git_actor::Signature,
117 /// Who committed this commit.
118 ///
119 /// This may be different from the `author` in case the author couldn't write to the repository themselves and
120 /// is commonly encountered with contributed commits.
121 pub committer: git_actor::Signature,
122 /// The name of the message encoding, otherwise [UTF-8 should be assumed](https://github.com/git/git/blob/e67fbf927dfdf13d0b21dc6ea15dc3c7ef448ea0/commit.c#L1493:L1493).
123 pub encoding: Option<BString>,
124 /// The commit message documenting the change.
125 pub message: BString,
126 /// Extra header fields, in order of them being encountered, made accessible with the iterator returned
127 /// by [`extra_headers()`][Commit::extra_headers()].
128 pub extra_headers: Vec<(BString, BString)>,
129}
130
131/// Represents a git tag, commonly indicating a software release.
132#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
133#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
134pub struct TagRef<'a> {
135 /// The hash in hexadecimal being the object this tag points to. Use [`target()`][TagRef::target()] to obtain a byte representation.
136 #[cfg_attr(feature = "serde1", serde(borrow))]
137 pub target: &'a BStr,
138 /// The kind of object that `target` points to.
139 pub target_kind: Kind,
140 /// The name of the tag, e.g. "v1.0".
141 pub name: &'a BStr,
142 /// The author of the tag.
143 pub tagger: Option<git_actor::SignatureRef<'a>>,
144 /// The message describing this release.
145 pub message: &'a BStr,
146 /// A cryptographic signature over the entire content of the serialized tag object thus far.
147 pub pgp_signature: Option<&'a BStr>,
148}
149
150/// Like [`TagRef`], but as `Iterator` to support entirely allocation free parsing.
151/// It's particularly useful to dereference only the target chain.
152#[derive(Copy, Clone)]
153pub struct TagRefIter<'a> {
154 data: &'a [u8],
155 state: tag::ref_iter::State,
156}
157
158/// A mutable git tag.
159#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
160#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
161pub struct Tag {
162 /// The hash this tag is pointing to.
163 pub target: git_hash::ObjectId,
164 /// The kind of object this tag is pointing to.
165 pub target_kind: Kind,
166 /// The name of the tag, e.g. "v1.0".
167 pub name: BString,
168 /// The tags author.
169 pub tagger: Option<git_actor::Signature>,
170 /// The message describing the tag.
171 pub message: BString,
172 /// A pgp signature over all bytes of the encoded tag, excluding the pgp signature itself.
173 pub pgp_signature: Option<BString>,
174}
175
176/// Immutable objects are read-only structures referencing most data from [a byte slice][crate::ObjectRef::from_bytes()].
177///
178/// Immutable objects are expected to be deserialized from bytes that acts as backing store, and they
179/// cannot be mutated or serialized. Instead, one will [convert][crate::ObjectRef::into_owned()] them into their [`mutable`][Object] counterparts
180/// which support mutation and serialization.
181///
182/// An `ObjectRef` is representing [`Trees`][TreeRef], [`Blobs`][BlobRef], [`Commits`][CommitRef], or [`Tags`][TagRef].
183#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
184#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
185#[allow(missing_docs)]
186pub enum ObjectRef<'a> {
187 #[cfg_attr(feature = "serde1", serde(borrow))]
188 Tree(TreeRef<'a>),
189 Blob(BlobRef<'a>),
190 Commit(CommitRef<'a>),
191 Tag(TagRef<'a>),
192}
193
194/// Mutable objects with each field being separately allocated and changeable.
195///
196/// Mutable objects are Commits, Trees, Blobs and Tags that can be changed and serialized.
197///
198/// They either created using object [construction][Object] or by [deserializing existing objects][ObjectRef::from_bytes()]
199/// and converting these [into mutable copies][ObjectRef::into_owned()] for adjustments.
200///
201/// An `Object` is representing [`Trees`][Tree], [`Blobs`][Blob], [`Commits`][Commit] or [`Tags`][Tag].
202#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
203#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
204#[allow(clippy::large_enum_variant, missing_docs)]
205pub enum Object {
206 Tree(Tree),
207 Blob(Blob),
208 Commit(Commit),
209 Tag(Tag),
210}
211/// A directory snapshot containing files (blobs), directories (trees) and submodules (commits).
212#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
213#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
214pub struct TreeRef<'a> {
215 /// The directories and files contained in this tree.
216 #[cfg_attr(feature = "serde1", serde(borrow))]
217 pub entries: Vec<tree::EntryRef<'a>>,
218}
219
220/// A directory snapshot containing files (blobs), directories (trees) and submodules (commits), lazily evaluated.
221#[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
222pub struct TreeRefIter<'a> {
223 /// The directories and files contained in this tree.
224 data: &'a [u8],
225}
226
227/// A mutable Tree, containing other trees, blobs or commits.
228#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
229#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
230pub struct Tree {
231 /// The directories and files contained in this tree. They must be and remain sorted by [`filename`][tree::Entry::filename].
232 pub entries: Vec<tree::Entry>,
233}
234
235impl Tree {
236 /// Return an empty tree which serializes to a well-known hash
237 pub fn empty() -> Self {
238 Tree { entries: Vec::new() }
239 }
240}
241
242/// A borrowed object using a slice as backing buffer, or in other words a bytes buffer that knows the kind of object it represents.
243#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
244pub struct Data<'a> {
245 /// kind of object
246 pub kind: Kind,
247 /// decoded, decompressed data, owned by a backing store.
248 pub data: &'a [u8],
249}
250
251///
252pub mod decode {
253 #[cfg(feature = "verbose-object-parsing-errors")]
254 mod _decode {
255 use crate::bstr::{BString, ByteSlice};
256
257 /// The type to be used for parse errors.
258 pub type ParseError<'a> = nom::error::VerboseError<&'a [u8]>;
259 /// The owned type to be used for parse errors.
260 pub type ParseErrorOwned = nom::error::VerboseError<BString>;
261
262 pub(crate) fn empty_error() -> Error {
263 Error {
264 inner: nom::error::VerboseError::<BString> { errors: Vec::new() },
265 }
266 }
267
268 /// A type to indicate errors during parsing and to abstract away details related to `nom`.
269 #[derive(Debug, Clone)]
270 pub struct Error {
271 /// The actual error
272 pub inner: ParseErrorOwned,
273 }
274
275 impl<'a> From<nom::Err<ParseError<'a>>> for Error {
276 fn from(v: nom::Err<ParseError<'a>>) -> Self {
277 Error {
278 inner: match v {
279 nom::Err::Error(err) | nom::Err::Failure(err) => nom::error::VerboseError {
280 errors: err
281 .errors
282 .into_iter()
283 .map(|(i, v)| (i.as_bstr().to_owned(), v))
284 .collect(),
285 },
286 nom::Err::Incomplete(_) => unreachable!("we don't have streaming parsers"),
287 },
288 }
289 }
290 }
291
292 impl std::fmt::Display for Error {
293 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
294 self.inner.fmt(f)
295 }
296 }
297 }
298
299 ///
300 #[cfg(not(feature = "verbose-object-parsing-errors"))]
301 mod _decode {
302 /// The type to be used for parse errors, discards everything and is zero size
303 pub type ParseError<'a> = ();
304 /// The owned type to be used for parse errors, discards everything and is zero size
305 pub type ParseErrorOwned = ();
306
307 pub(crate) fn empty_error() -> Error {
308 Error { inner: () }
309 }
310
311 /// A type to indicate errors during parsing and to abstract away details related to `nom`.
312 #[derive(Debug, Clone)]
313 pub struct Error {
314 /// The actual error
315 pub inner: ParseErrorOwned,
316 }
317
318 impl<'a> From<nom::Err<ParseError<'a>>> for Error {
319 fn from(v: nom::Err<ParseError<'a>>) -> Self {
320 Error {
321 inner: match v {
322 nom::Err::Error(err) | nom::Err::Failure(err) => err,
323 nom::Err::Incomplete(_) => unreachable!("we don't have streaming parsers"),
324 },
325 }
326 }
327 }
328
329 impl std::fmt::Display for Error {
330 fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
331 Ok(())
332 }
333 }
334 }
335 pub(crate) use _decode::empty_error;
336 pub use _decode::{Error, ParseError, ParseErrorOwned};
337 impl std::error::Error for Error {}
338
339 /// Returned by [`loose_header()`]
340 #[derive(Debug, thiserror::Error)]
341 #[allow(missing_docs)]
342 pub enum LooseHeaderDecodeError {
343 #[error("{message}: {number:?}")]
344 ParseIntegerError {
345 source: btoi::ParseIntegerError,
346 message: &'static str,
347 number: bstr::BString,
348 },
349 #[error("{message}")]
350 InvalidHeader { message: &'static str },
351 #[error("The object header contained an unknown object kind.")]
352 ObjectHeader(#[from] super::kind::Error),
353 }
354
355 use bstr::ByteSlice;
356 /// Decode a loose object header, being `<kind> <size>\0`, returns
357 /// ([`kind`](super::Kind), `size`, `consumed bytes`).
358 ///
359 /// `size` is the uncompressed size of the payload in bytes.
360 pub fn loose_header(input: &[u8]) -> Result<(super::Kind, usize, usize), LooseHeaderDecodeError> {
361 use LooseHeaderDecodeError::*;
362 let kind_end = input.find_byte(0x20).ok_or(InvalidHeader {
363 message: "Expected '<type> <size>'",
364 })?;
365 let kind = super::Kind::from_bytes(&input[..kind_end])?;
366 let size_end = input.find_byte(0x0).ok_or(InvalidHeader {
367 message: "Did not find 0 byte in header",
368 })?;
369 let size_bytes = &input[kind_end + 1..size_end];
370 let size = btoi::btoi(size_bytes).map_err(|source| ParseIntegerError {
371 source,
372 message: "Object size in header could not be parsed",
373 number: size_bytes.into(),
374 })?;
375 Ok((kind, size, size_end + 1))
376 }
377}