Skip to main content

rustic_core/
blob.rs

1pub(crate) mod packer;
2pub(crate) mod tree;
3
4use std::{cmp::Ordering, num::NonZeroU32};
5
6use derive_more::Constructor;
7use enum_map::{Enum, EnumMap};
8use serde_derive::{Deserialize, Serialize};
9
10use crate::define_new_id_struct;
11
12pub(super) mod constants {
13    /// The maximum size of pack-part which is read at once from the backend.
14    /// (needed to limit the memory size used for large backends)
15    pub(crate) const LIMIT_PACK_READ: u32 = 40 * 1024 * 1024; // 40 MiB
16    /// The maximum size of holes which are still read when repacking
17    pub(crate) const MAX_HOLESIZE: u32 = 256 * 1024; // 256 kiB
18}
19
20/// All [`BlobType`]s which are supported by the repository
21pub const ALL_BLOB_TYPES: [BlobType; 2] = [BlobType::Tree, BlobType::Data];
22
23#[derive(
24    Serialize,
25    Deserialize,
26    Clone,
27    Copy,
28    Debug,
29    PartialEq,
30    Eq,
31    PartialOrd,
32    Ord,
33    Hash,
34    Enum,
35    derive_more::Display,
36)]
37/// The type a `blob` or a `packfile` can have
38pub enum BlobType {
39    #[serde(rename = "tree")]
40    /// This is a tree blob
41    Tree,
42    #[serde(rename = "data")]
43    /// This is a data blob
44    Data,
45}
46
47impl BlobType {
48    /// Defines the cacheability of a [`BlobType`]
49    ///
50    /// # Returns
51    ///
52    /// `true` if the [`BlobType`] is cacheable, `false` otherwise
53    #[must_use]
54    pub(crate) const fn is_cacheable(self) -> bool {
55        match self {
56            Self::Tree => true,
57            Self::Data => false,
58        }
59    }
60}
61
62pub type BlobTypeMap<T> = EnumMap<BlobType, T>;
63
64/// Initialize is a new trait to define the method `init()` for a [`BlobTypeMap`]
65pub trait Initialize<T: Default + Sized> {
66    /// Initialize a [`BlobTypeMap`] by processing a given function for each [`BlobType`]
67    fn init<F: FnMut(BlobType) -> T>(init: F) -> BlobTypeMap<T>;
68}
69
70impl<T: Default> Initialize<T> for BlobTypeMap<T> {
71    /// Initialize a [`BlobTypeMap`] by processing a given function for each [`BlobType`]
72    ///
73    /// # Arguments
74    ///
75    /// * `init` - The function to process for each [`BlobType`]
76    ///
77    /// # Returns
78    ///
79    /// A [`BlobTypeMap`] with the result of the function for each [`BlobType`]
80    fn init<F: FnMut(BlobType) -> T>(mut init: F) -> Self {
81        let mut btm = Self::default();
82        for i in 0..BlobType::LENGTH {
83            let bt = BlobType::from_usize(i);
84            btm[bt] = init(bt);
85        }
86        btm
87    }
88}
89
90define_new_id_struct!(BlobId, "blob");
91
92/// A marker trait for Ids which identify Blobs in pack files
93pub trait PackedId: Copy + Into<BlobId> + From<BlobId> {
94    /// The `BlobType` of the blob identified by the Id
95    const TYPE: BlobType;
96}
97
98#[macro_export]
99/// Generate newtypes for `Id`s identifying packed blobs
100macro_rules! impl_blobid {
101    ($a:ident, $b: expr) => {
102        $crate::define_new_id_struct!($a, concat!("blob of type", stringify!($b)));
103        impl From<$crate::blob::BlobId> for $a {
104            fn from(id: $crate::blob::BlobId) -> Self {
105                (*id).into()
106            }
107        }
108        impl From<$a> for $crate::blob::BlobId {
109            fn from(id: $a) -> Self {
110                (*id).into()
111            }
112        }
113        impl $crate::blob::PackedId for $a {
114            const TYPE: $crate::blob::BlobType = $b;
115        }
116    };
117}
118
119impl_blobid!(DataId, BlobType::Data);
120
121/// A `Blob` is a file that is stored in the backend.
122///
123/// It can be a `tree` or a `data` blob.
124///
125/// A `tree` blob is a file that contains a list of other blobs.
126/// A `data` blob is a file that contains the actual data.
127#[derive(Debug, PartialEq, Eq, Copy, Clone, Constructor)]
128pub(crate) struct Blob {
129    /// The type of the blob
130    tpe: BlobType,
131
132    /// The id of the blob
133    id: BlobId,
134}
135
136/// `BlobLocation` contains information about a blob within a pack
137#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
138pub struct BlobLocation {
139    /// The offset of the blob within the pack
140    pub offset: u32,
141    /// The length of the blob
142    pub length: u32,
143    /// The uncompressed length of the blob
144    pub uncompressed_length: Option<NonZeroU32>,
145}
146
147impl PartialOrd<Self> for BlobLocation {
148    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
149        Some(self.cmp(other))
150    }
151}
152
153impl Ord for BlobLocation {
154    fn cmp(&self, other: &Self) -> Ordering {
155        self.offset.cmp(&other.offset)
156    }
157}
158
159impl BlobLocation {
160    /// Get the length of the data contained in this blob
161    pub const fn data_length(&self) -> u32 {
162        match self.uncompressed_length {
163            None => self.length - 32,
164            Some(length) => NonZeroU32::get(length),
165        }
166    }
167}
168
169#[derive(Debug, PartialEq, Eq)]
170pub struct BlobLocations<T> {
171    pub offset: u32,
172    pub length: u32,
173    pub blobs: Vec<(BlobLocation, T)>,
174}
175
176impl<T: Eq + PartialEq> PartialOrd<Self> for BlobLocations<T> {
177    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
178        Some(self.cmp(other))
179    }
180}
181
182impl<T: Eq> Ord for BlobLocations<T> {
183    fn cmp(&self, other: &Self) -> Ordering {
184        self.offset.cmp(&other.offset)
185    }
186}
187
188impl<T> BlobLocations<T> {
189    pub fn length(&self) -> u32 {
190        self.blobs.iter().map(|bl| bl.0.length).sum()
191    }
192
193    pub fn data_length(&self) -> u32 {
194        self.blobs.iter().map(|bl| bl.0.data_length()).sum()
195    }
196
197    pub fn from_blob_location(location: BlobLocation, target: T) -> Self {
198        Self {
199            offset: location.offset,
200            length: location.length,
201            blobs: vec![(location, target)],
202        }
203    }
204    pub fn can_coalesce(&self, other: &Self) -> bool {
205        // if the blobs are (almost) contiguous and we don't trespass the limit, blobs can be read in one partial read
206        other.offset <= self.offset + self.length + constants::MAX_HOLESIZE
207            && other.offset >= self.offset + self.length
208            && other.offset + other.length - self.offset <= constants::LIMIT_PACK_READ
209    }
210
211    pub fn append(mut self, mut other: Self) -> Self {
212        self.length = other.offset + other.length - self.offset; // read till the end of other
213        self.blobs.append(&mut other.blobs);
214        self
215    }
216
217    #[allow(clippy::result_large_err)]
218    /// coalesce two `BlobLocations` if possible
219    pub fn coalesce(self, other: Self) -> Result<Self, (Self, Self)> {
220        if self.can_coalesce(&other) {
221            Ok(self.append(other))
222        } else {
223            Err((self, other))
224        }
225    }
226}
227
228#[cfg(test)]
229mod tests {
230    use super::*;
231    use rstest::rstest;
232
233    #[rstest]
234    #[case(12, 123, 0, 123, None)] // second before first
235    #[case(12, 123, 12, 123, None)] // second overlaps
236    #[case(12, 123, 134, 123, None)] // second still overlaps
237    #[case(12, 123, 135, 123, Some(246))] // second contiguous to first => OK
238    #[case(12, 123, 136, 123, Some(247))] // small hole => OK
239    #[case(12, 123, 135 + constants::MAX_HOLESIZE, 123, Some(246 + constants::MAX_HOLESIZE))] // maximum hole => OK
240    #[case(12, 123, 136 + constants::MAX_HOLESIZE, 123, None)] // hole too large
241    #[case(12, constants::LIMIT_PACK_READ - 15, constants::LIMIT_PACK_READ - 3, 15, Some(constants::LIMIT_PACK_READ))] // maximum length
242    #[case(12, constants::LIMIT_PACK_READ - 15, constants::LIMIT_PACK_READ - 3, 16, None)] // exceeds limit to read
243    #[case(12, constants::LIMIT_PACK_READ - 15, constants::LIMIT_PACK_READ, 12, Some(constants::LIMIT_PACK_READ))] // maximum length with hole
244    #[case(12, constants::LIMIT_PACK_READ - 15, constants::LIMIT_PACK_READ + 1, 12, None)] // exceeds limit
245    fn test_coalesce(
246        #[case] offset1: u32,
247        #[case] length1: u32,
248        #[case] offset2: u32,
249        #[case] length2: u32,
250        #[case] expected: Option<u32>,
251    ) {
252        // helper to create BlobLocations
253        let bl = |offset, length| {
254            BlobLocations::from_blob_location(
255                BlobLocation {
256                    offset,
257                    length,
258                    uncompressed_length: None,
259                },
260                (),
261            )
262        };
263
264        let coalesced_length = bl(offset1, length1)
265            .coalesce(bl(offset2, length2))
266            .ok()
267            .map(|bl| bl.length);
268        assert_eq!(coalesced_length, expected);
269    }
270}