git_pack/data/
mod.rs

1//! a pack data file
2use std::{convert::TryInto, path::Path};
3
4/// The offset to an entry into the pack data file, relative to its beginning.
5pub type Offset = u64;
6
7/// An identifier to uniquely identify all packs loaded within a known context or namespace.
8pub type Id = u32;
9
10use memmap2::Mmap;
11
12/// An representing an full- or delta-object within a pack
13#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
14#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
15pub struct Entry {
16    /// The entry's header
17    pub header: entry::Header,
18    /// The decompressed size of the entry in bytes.
19    ///
20    /// Note that for non-delta entries this will be the size of the object itself.
21    pub decompressed_size: u64,
22    /// absolute offset to compressed object data in the pack, just behind the entry's header
23    pub data_offset: Offset,
24}
25
26mod file;
27pub use file::{decode, verify, Header};
28///
29pub mod header;
30
31///
32pub mod init {
33    pub use super::header::decode::Error;
34}
35
36///
37pub mod entry;
38
39///
40pub mod input;
41
42/// Utilities to encode pack data entries and write them to a `Write` implementation to resemble a pack data file.
43pub mod output;
44
45/// A slice into a pack file denoting a pack entry.
46///
47/// An entry can be decoded into an object.
48pub type EntryRange = std::ops::Range<Offset>;
49
50/// Supported versions of a pack data file
51#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
52#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
53#[allow(missing_docs)]
54pub enum Version {
55    V2,
56    V3,
57}
58
59impl Default for Version {
60    fn default() -> Self {
61        Version::V2
62    }
63}
64
65/// A pack data file
66pub struct File {
67    data: Mmap,
68    path: std::path::PathBuf,
69    /// A value to represent this pack uniquely when used with cache lookup, or a way to identify this pack by its location on disk.
70    /// The same location on disk should yield the same id.
71    ///
72    /// These must be unique per pack and must be stable, that is they don't change if the pack doesn't change.
73    /// If the same id is assigned (or reassigned) to different packs, pack creation or cache access will fail in hard-to-debug ways.
74    ///
75    /// This value is controlled by the owning object store, which can use it in whichever way it wants as long as the above constraints are met.
76    pub id: Id,
77    version: Version,
78    num_objects: u32,
79    /// The size of the hash contained within. This is entirely determined by the caller, and repositories have to know which hash to use
80    /// based on their configuration.
81    hash_len: usize,
82    object_hash: git_hash::Kind,
83}
84
85/// Information about the pack data file itself
86impl File {
87    /// The pack data version of this file
88    pub fn version(&self) -> Version {
89        self.version
90    }
91    /// The number of objects stored in this pack data file
92    pub fn num_objects(&self) -> u32 {
93        self.num_objects
94    }
95    /// The length of all mapped data, including the pack header and the pack trailer
96    pub fn data_len(&self) -> usize {
97        self.data.len()
98    }
99    /// The kind of hash we use internally.
100    pub fn object_hash(&self) -> git_hash::Kind {
101        self.object_hash
102    }
103    /// The position of the byte one past the last pack entry, or in other terms, the first byte of the trailing hash.
104    pub fn pack_end(&self) -> usize {
105        self.data.len() - self.hash_len
106    }
107
108    /// The path to the pack data file on disk
109    pub fn path(&self) -> &Path {
110        &self.path
111    }
112
113    /// Returns the pack data at the given slice if its range is contained in the mapped pack data
114    pub fn entry_slice(&self, slice: EntryRange) -> Option<&[u8]> {
115        let entry_end: usize = slice.end.try_into().expect("end of pack fits into usize");
116        let entry_start = slice.start as usize;
117        self.data.get(entry_start..entry_end)
118    }
119
120    /// Returns the CRC32 of the pack data indicated by `pack_offset` and the `size` of the mapped data.
121    ///
122    /// _Note:_ finding the right size is only possible by decompressing
123    /// the pack entry beforehand, or by using the (to be sorted) offsets stored in an index file.
124    ///
125    /// # Panics
126    ///
127    /// If `pack_offset` or `size` are pointing to a range outside of the mapped pack data.
128    pub fn entry_crc32(&self, pack_offset: Offset, size: usize) -> u32 {
129        let pack_offset: usize = pack_offset.try_into().expect("pack_size fits into usize");
130        git_features::hash::crc32(&self.data[pack_offset..pack_offset + size])
131    }
132}
133
134pub(crate) mod delta;