Skip to main content

gix_pack/data/
mod.rs

1//! a pack data file
2use crate::MMap;
3use std::path::Path;
4
5/// The offset to an entry into the pack data file, relative to its beginning.
6pub type Offset = u64;
7
8/// An identifier to uniquely identify all packs loaded within a known context or namespace.
9pub type Id = u32;
10
11/// An representing an full- or delta-object within a pack
12#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
13#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
14pub struct Entry {
15    /// The entry's header
16    pub header: entry::Header,
17    /// The decompressed size of the entry in bytes.
18    ///
19    /// Note that for non-delta entries this will be the size of the object itself.
20    pub decompressed_size: u64,
21    /// absolute offset to compressed object data in the pack, just behind the entry's header
22    pub data_offset: Offset,
23}
24
25mod file;
26pub use file::{Header, decode, verify};
27///
28pub mod header;
29
30///
31pub mod init {
32    pub use super::header::decode::Error;
33}
34
35///
36pub mod entry;
37
38///
39#[cfg(feature = "streaming-input")]
40pub mod input;
41
42/// Utilities to encode pack data entries and write them to a `Write` implementation to resemble a pack data file.
43#[cfg(feature = "generate")]
44pub mod output;
45
46/// A slice into a pack file denoting a pack entry.
47///
48/// An entry can be decoded into an object.
49pub type EntryRange = std::ops::Range<Offset>;
50
51/// Supported versions of a pack data file
52#[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
53#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
54pub enum Version {
55    /// The default pack data version.
56    ///
57    /// This is the version generated by Git and by `gix-pack` writers.
58    #[default]
59    V2,
60    /// A pack data version accepted by Git and recognized by `gix-pack` readers.
61    ///
62    /// Git does not generate this version, and `gix-pack` writers currently reject it.
63    /// Entries are decoded with the same layout as [`V2`](Version::V2); the difference
64    /// visible to this crate is the version number stored in the pack header.
65    V3,
66}
67
68/// A pack data file
69pub struct File<T = MMap> {
70    data: T,
71    path: std::path::PathBuf,
72    /// A value to represent this pack uniquely when used with cache lookup, or a way to identify this pack by its location on disk.
73    /// The same location on disk should yield the same id.
74    ///
75    /// These must be unique per pack and must be stable, that is they don't change if the pack doesn't change.
76    /// If the same id is assigned (or reassigned) to different packs, pack creation or cache access will fail in hard-to-debug ways.
77    ///
78    /// This value is controlled by the owning object store, which can use it in whichever way it wants as long as the above constraints are met.
79    pub id: Id,
80    version: Version,
81    num_objects: u32,
82    /// The size of the hash contained within. This is entirely determined by the caller, and repositories have to know which hash to use
83    /// based on their configuration.
84    hash_len: usize,
85    object_hash: gix_hash::Kind,
86    /// The maximum size of a single allocation caused by user-controlled on-disk pack data.
87    ///
88    /// If `None`, no additional limit is enforced.
89    alloc_limit_bytes: Option<usize>,
90}
91
92/// Information about the pack data file itself
93impl<T> File<T>
94where
95    T: crate::FileData,
96{
97    /// The pack data version of this file
98    pub fn version(&self) -> Version {
99        self.version
100    }
101    /// The number of objects stored in this pack data file
102    pub fn num_objects(&self) -> u32 {
103        self.num_objects
104    }
105    /// The length of all mapped data, including the pack header and the pack trailer
106    pub fn data_len(&self) -> usize {
107        self.data.len()
108    }
109    /// The kind of hash we use internally.
110    pub fn object_hash(&self) -> gix_hash::Kind {
111        self.object_hash
112    }
113    /// The maximum size of a single allocation caused by user-controlled on-disk pack data.
114    ///
115    /// A value of `None` means no additional limit is enforced.
116    pub fn alloc_limit_bytes(&self) -> Option<usize> {
117        self.alloc_limit_bytes
118    }
119    /// The position of the byte one past the last pack entry, or in other terms, the first byte of the trailing hash.
120    pub fn pack_end(&self) -> usize {
121        self.data.len() - self.hash_len
122    }
123
124    /// The path to the pack data file on disk
125    pub fn path(&self) -> &Path {
126        &self.path
127    }
128
129    /// Returns the pack data at the given slice if its range is contained in the mapped pack data
130    pub fn entry_slice(&self, slice: EntryRange) -> Option<&[u8]> {
131        let entry_end: usize = slice.end.try_into().expect("end of pack fits into usize");
132        let entry_start = slice.start as usize;
133        self.data.get(entry_start..entry_end)
134    }
135
136    /// Returns the CRC32 of the pack data indicated by `pack_offset` and the `size` of the mapped data.
137    ///
138    /// _Note:_ finding the right size is only possible by decompressing
139    /// the pack entry beforehand, or by using the (to be sorted) offsets stored in an index file.
140    ///
141    /// # Panics
142    ///
143    /// If `pack_offset` or `size` are pointing to a range outside of the mapped pack data.
144    pub fn entry_crc32(&self, pack_offset: Offset, size: usize) -> u32 {
145        let pack_offset: usize = pack_offset.try_into().expect("pack_size fits into usize");
146        gix_features::hash::crc32(&self.data[pack_offset..pack_offset + size])
147    }
148}
149
150///
151pub mod delta;