Skip to main content

gix_pack/index/traverse/
mod.rs

1use std::sync::atomic::AtomicBool;
2
3use gix_features::{parallel, progress::Progress, zlib};
4
5use crate::index;
6
7mod reduce;
8///
9pub mod with_index;
10///
11pub mod with_lookup;
12use reduce::Reducer;
13
14mod error;
15pub use error::Error;
16use gix_features::progress::DynNestedProgress;
17
18mod types;
19pub use types::{Algorithm, ProgressId, SafetyCheck, Statistics};
20
21/// Traversal options for [`index::File::traverse()`].
22#[derive(Debug, Clone)]
23pub struct Options<F> {
24    /// The algorithm to employ.
25    pub traversal: Algorithm,
26    /// If `Some`, only use the given number of threads. Otherwise, the number of threads to use will be selected based on
27    /// the number of available logical cores.
28    pub thread_limit: Option<usize>,
29    /// The kinds of safety checks to perform.
30    pub check: SafetyCheck,
31    /// A function to create a pack cache
32    pub make_pack_lookup_cache: F,
33}
34
35impl Default for Options<fn() -> crate::cache::Never> {
36    fn default() -> Self {
37        Options {
38            check: Default::default(),
39            traversal: Default::default(),
40            thread_limit: None,
41            make_pack_lookup_cache: || crate::cache::Never,
42        }
43    }
44}
45
46/// The outcome of the [`traverse()`][index::File::traverse()] method.
47pub struct Outcome {
48    /// The checksum obtained when hashing the file, which matched the checksum contained within the file.
49    pub actual_index_checksum: gix_hash::ObjectId,
50    /// The statistics obtained during traversal.
51    pub statistics: Statistics,
52}
53
54/// Traversal of pack data files using an index file
55impl<T> index::File<T>
56where
57    T: crate::FileData + Sync,
58{
59    /// Iterate through all _decoded objects_ in the given `pack` and handle them with a `Processor`.
60    /// The return value is (pack-checksum, [`Outcome`], `progress`), thus the pack traversal will always verify
61    /// the whole packs checksum to assure it was correct. In case of bit-rod, the operation will abort early without
62    /// verifying all objects using the [interrupt mechanism][gix_features::interrupt] mechanism.
63    ///
64    /// # Algorithms
65    ///
66    /// Using the [`Options::traversal`] field one can chose between two algorithms providing different tradeoffs. Both invoke
67    /// `new_processor()` to create functions receiving decoded objects, their object kind, index entry and a progress instance to provide
68    /// progress information.
69    ///
70    /// * [`Algorithm::DeltaTreeLookup`] builds an index to avoid any unnecessary computation while resolving objects, avoiding
71    ///   the need for a cache entirely, rendering `new_cache()` unused.
72    ///   One could also call [`traverse_with_index()`][index::File::traverse_with_index()] directly.
73    /// * [`Algorithm::Lookup`] uses a cache created by `new_cache()` to avoid having to re-compute all bases of a delta-chain while
74    ///   decoding objects.
75    ///   One could also call [`traverse_with_lookup()`][index::File::traverse_with_lookup()] directly.
76    ///
77    /// Use [`thread_limit`][Options::thread_limit] to further control parallelism and [`check`][SafetyCheck] to define how much the passed
78    /// objects shall be verified beforehand.
79    pub fn traverse<C, Processor, E, F, D>(
80        &self,
81        pack: &crate::data::File<D>,
82        progress: &mut dyn DynNestedProgress,
83        should_interrupt: &AtomicBool,
84        processor: Processor,
85        Options {
86            traversal,
87            thread_limit,
88            check,
89            make_pack_lookup_cache,
90        }: Options<F>,
91    ) -> Result<Outcome, Error<E>>
92    where
93        C: crate::cache::DecodeEntry,
94        E: std::error::Error + Send + Sync + 'static,
95        Processor: FnMut(gix_object::Kind, &[u8], &index::Entry, &dyn Progress) -> Result<(), E> + Send + Clone,
96        F: Fn() -> C + Send + Clone,
97        D: crate::FileData + Send + Sync,
98    {
99        match traversal {
100            Algorithm::Lookup => self.traverse_with_lookup(
101                processor,
102                pack,
103                progress,
104                should_interrupt,
105                with_lookup::Options {
106                    thread_limit,
107                    check,
108                    make_pack_lookup_cache,
109                },
110            ),
111            Algorithm::DeltaTreeLookup => self.traverse_with_index(
112                pack,
113                processor,
114                progress,
115                should_interrupt,
116                with_index::Options { check, thread_limit },
117            ),
118        }
119    }
120
121    fn possibly_verify<E, D>(
122        &self,
123        pack: &crate::data::File<D>,
124        check: SafetyCheck,
125        pack_progress: &mut dyn Progress,
126        index_progress: &mut dyn Progress,
127        should_interrupt: &AtomicBool,
128    ) -> Result<gix_hash::ObjectId, Error<E>>
129    where
130        E: std::error::Error + Send + Sync + 'static,
131        D: crate::FileData + Send + Sync,
132    {
133        Ok(if check.file_checksum() {
134            pack.checksum()
135                .verify(&self.pack_checksum())
136                .map_err(Error::PackMismatch)?;
137            let (pack_res, id) = parallel::join(
138                move || pack.verify_checksum(pack_progress, should_interrupt),
139                move || self.verify_checksum(index_progress, should_interrupt),
140            );
141            pack_res.map_err(Error::PackVerify)?;
142            id.map_err(Error::IndexVerify)?
143        } else {
144            self.index_checksum()
145        })
146    }
147
148    #[allow(clippy::too_many_arguments)]
149    fn decode_and_process_entry<C, E, D>(
150        &self,
151        check: SafetyCheck,
152        pack: &crate::data::File<D>,
153        cache: &mut C,
154        buf: &mut Vec<u8>,
155        inflate: &mut zlib::Inflate,
156        progress: &mut dyn Progress,
157        index_entry: &index::Entry,
158        processor: &mut impl FnMut(gix_object::Kind, &[u8], &index::Entry, &dyn Progress) -> Result<(), E>,
159    ) -> Result<crate::data::decode::entry::Outcome, Error<E>>
160    where
161        C: crate::cache::DecodeEntry,
162        E: std::error::Error + Send + Sync + 'static,
163        D: crate::FileData + Send + Sync,
164    {
165        let pack_entry = pack.entry(index_entry.pack_offset)?;
166        let pack_entry_data_offset = pack_entry.data_offset;
167        let entry_stats = pack
168            .decode_entry(
169                pack_entry,
170                buf,
171                inflate,
172                &|id, _| {
173                    let index = self.lookup(id)?;
174                    pack.entry(self.pack_offset_at_index(index))
175                        .ok()
176                        .map(crate::data::decode::entry::ResolvedBase::InPack)
177                },
178                cache,
179            )
180            .map_err(|e| Error::PackDecode {
181                source: e,
182                id: index_entry.oid,
183                offset: index_entry.pack_offset,
184            })?;
185        let object_kind = entry_stats.kind;
186        let header_size = (pack_entry_data_offset - index_entry.pack_offset) as usize;
187        let entry_len = header_size + entry_stats.compressed_size;
188
189        process_entry(
190            check,
191            object_kind,
192            buf,
193            index_entry,
194            || pack.entry_crc32(index_entry.pack_offset, entry_len),
195            progress,
196            processor,
197        )?;
198        Ok(entry_stats)
199    }
200}
201
202#[allow(clippy::too_many_arguments)]
203fn process_entry<E>(
204    check: SafetyCheck,
205    object_kind: gix_object::Kind,
206    decompressed: &[u8],
207    index_entry: &index::Entry,
208    pack_entry_crc32: impl FnOnce() -> u32,
209    progress: &dyn Progress,
210    processor: &mut impl FnMut(gix_object::Kind, &[u8], &index::Entry, &dyn Progress) -> Result<(), E>,
211) -> Result<(), Error<E>>
212where
213    E: std::error::Error + Send + Sync + 'static,
214{
215    if check.object_checksum() {
216        gix_object::Data::new(decompressed, object_kind, index_entry.oid.kind())
217            .verify_checksum(&index_entry.oid)
218            .map_err(|source| Error::PackObjectVerify {
219                offset: index_entry.pack_offset,
220                source,
221            })?;
222        if let Some(desired_crc32) = index_entry.crc32 {
223            let actual_crc32 = pack_entry_crc32();
224            if actual_crc32 != desired_crc32 {
225                return Err(Error::Crc32Mismatch {
226                    actual: actual_crc32,
227                    expected: desired_crc32,
228                    offset: index_entry.pack_offset,
229                    kind: object_kind,
230                });
231            }
232        }
233    }
234    processor(object_kind, decompressed, index_entry, progress).map_err(Error::Processor)
235}