Skip to main content

gix_pack/multi_index/
verify.rs

1use std::{cmp::Ordering, sync::atomic::AtomicBool, time::Instant};
2
3use gix_features::progress::{Count, DynNestedProgress, Progress};
4
5use crate::{exact_vec, index, multi_index::File};
6
7///
8pub mod integrity {
9    use crate::multi_index::EntryIndex;
10
11    /// Returned by [`multi_index::File::verify_integrity()`][crate::multi_index::File::verify_integrity()].
12    #[derive(thiserror::Error, Debug)]
13    #[allow(missing_docs)]
14    pub enum Error {
15        #[error("Object {id} should be at pack-offset {expected_pack_offset} but was found at {actual_pack_offset}")]
16        PackOffsetMismatch {
17            id: gix_hash::ObjectId,
18            expected_pack_offset: u64,
19            actual_pack_offset: u64,
20        },
21        #[error(transparent)]
22        MultiIndexChecksum(#[from] crate::multi_index::verify::checksum::Error),
23        #[error(transparent)]
24        IndexIntegrity(#[from] crate::index::verify::integrity::Error),
25        #[error(transparent)]
26        BundleInit(#[from] crate::bundle::init::Error),
27        #[error("Counted {actual} objects, but expected {expected} as per multi-index")]
28        UnexpectedObjectCount { actual: usize, expected: usize },
29        #[error("{id} wasn't found in the index referenced in the multi-pack index")]
30        OidNotFound { id: gix_hash::ObjectId },
31        #[error("The object id at multi-index entry {index} wasn't in order")]
32        OutOfOrder { index: EntryIndex },
33        #[error("The fan at index {index} is out of order as it's larger then the following value.")]
34        Fan { index: usize },
35        #[error("The multi-index claims to have no objects")]
36        Empty,
37        #[error("The multi-index path '{path}' has no parent directory")]
38        InvalidPath { path: std::path::PathBuf },
39        #[error("Interrupted")]
40        Interrupted,
41    }
42
43    /// Returned by [`multi_index::File::verify_integrity()`][crate::multi_index::File::verify_integrity()].
44    pub struct Outcome {
45        /// The computed checksum of the multi-index which matched the stored one.
46        pub actual_index_checksum: gix_hash::ObjectId,
47        /// The for each entry in [`index_names()`][super::File::index_names()] provide the corresponding pack traversal outcome.
48        pub pack_traverse_statistics: Vec<crate::index::traverse::Statistics>,
49    }
50
51    /// The progress ids used in [`multi_index::File::verify_integrity()`][crate::multi_index::File::verify_integrity()].
52    ///
53    /// Use this information to selectively extract the progress of interest in case the parent application has custom visualization.
54    #[derive(Debug, Copy, Clone)]
55    pub enum ProgressId {
56        /// The amount of bytes read to verify the multi-index checksum.
57        ChecksumBytes,
58        /// The amount of objects whose offset has been checked.
59        ObjectOffsets,
60    }
61
62    impl From<ProgressId> for gix_features::progress::Id {
63        fn from(v: ProgressId) -> Self {
64            match v {
65                ProgressId::ChecksumBytes => *b"MVCK",
66                ProgressId::ObjectOffsets => *b"MVOF",
67            }
68        }
69    }
70}
71
72///
73pub mod checksum {
74    /// Returned by [`multi_index::File::verify_checksum()`][crate::multi_index::File::verify_checksum()].
75    pub type Error = crate::verify::checksum::Error;
76}
77
78impl<T> File<T>
79where
80    T: crate::FileData,
81{
82    /// Validate that our [`checksum()`][File::checksum()] matches the actual contents
83    /// of this index file, and return it if it does.
84    pub fn verify_checksum(
85        &self,
86        progress: &mut dyn Progress,
87        should_interrupt: &AtomicBool,
88    ) -> Result<gix_hash::ObjectId, checksum::Error> {
89        crate::verify::checksum_on_disk_or_mmap(
90            self.path(),
91            &self.data,
92            self.checksum(),
93            self.object_hash,
94            progress,
95            should_interrupt,
96        )
97    }
98
99    /// Similar to [`verify_integrity()`][File::verify_integrity()] but without any deep inspection of objects.
100    ///
101    /// Instead we only validate the contents of the multi-index itself.
102    pub fn verify_integrity_fast(
103        &self,
104        progress: &mut dyn DynNestedProgress,
105        should_interrupt: &AtomicBool,
106    ) -> Result<gix_hash::ObjectId, integrity::Error> {
107        self.verify_integrity_inner(
108            progress,
109            should_interrupt,
110            false,
111            index::verify::integrity::Options::default(),
112        )
113        .map_err(|err| match err {
114            index::traverse::Error::Processor(err) => err,
115            _ => unreachable!("BUG: no other error type is possible"),
116        })
117        .map(|o| o.actual_index_checksum)
118    }
119
120    /// Similar to [`crate::Bundle::verify_integrity()`] but checks all contained indices and their packs.
121    ///
122    /// Note that it's considered a failure if an index doesn't have a corresponding pack.
123    pub fn verify_integrity<C, F>(
124        &self,
125        progress: &mut dyn DynNestedProgress,
126        should_interrupt: &AtomicBool,
127        options: index::verify::integrity::Options<F>,
128    ) -> Result<integrity::Outcome, index::traverse::Error<integrity::Error>>
129    where
130        C: crate::cache::DecodeEntry,
131        F: Fn() -> C + Send + Clone,
132    {
133        self.verify_integrity_inner(progress, should_interrupt, true, options)
134    }
135
136    fn verify_integrity_inner<C, F>(
137        &self,
138        progress: &mut dyn DynNestedProgress,
139        should_interrupt: &AtomicBool,
140        deep_check: bool,
141        options: index::verify::integrity::Options<F>,
142    ) -> Result<integrity::Outcome, index::traverse::Error<integrity::Error>>
143    where
144        C: crate::cache::DecodeEntry,
145        F: Fn() -> C + Send + Clone,
146    {
147        let parent = self.path.parent().ok_or_else(|| {
148            index::traverse::Error::Processor(integrity::Error::InvalidPath {
149                path: self.path.clone(),
150            })
151        })?;
152
153        let actual_index_checksum = self
154            .verify_checksum(
155                &mut progress.add_child_with_id(
156                    format!("{}: checksum", self.path.display()),
157                    integrity::ProgressId::ChecksumBytes.into(),
158                ),
159                should_interrupt,
160            )
161            .map_err(integrity::Error::from)
162            .map_err(index::traverse::Error::Processor)?;
163
164        if let Some(first_invalid) = crate::verify::fan(&self.fan) {
165            return Err(index::traverse::Error::Processor(integrity::Error::Fan {
166                index: first_invalid,
167            }));
168        }
169
170        if self.num_objects == 0 {
171            return Err(index::traverse::Error::Processor(integrity::Error::Empty));
172        }
173
174        let mut pack_traverse_statistics = Vec::new();
175
176        let operation_start = Instant::now();
177        let mut total_objects_checked = 0;
178        let mut pack_ids_and_offsets = exact_vec(self.num_objects as usize);
179        {
180            let order_start = Instant::now();
181            let mut progress = progress.add_child_with_id("checking oid order".into(), gix_features::progress::UNKNOWN);
182            progress.init(
183                Some(self.num_objects as usize),
184                gix_features::progress::count("objects"),
185            );
186
187            for entry_index in 0..(self.num_objects - 1) {
188                let lhs = self.oid_at_index(entry_index);
189                let rhs = self.oid_at_index(entry_index + 1);
190
191                if rhs.cmp(lhs) != Ordering::Greater {
192                    return Err(index::traverse::Error::Processor(integrity::Error::OutOfOrder {
193                        index: entry_index,
194                    }));
195                }
196                let (pack_id, _) = self.pack_id_and_pack_offset_at_index(entry_index);
197                pack_ids_and_offsets.push((pack_id, entry_index));
198                progress.inc();
199            }
200            {
201                let entry_index = self.num_objects - 1;
202                let (pack_id, _) = self.pack_id_and_pack_offset_at_index(entry_index);
203                pack_ids_and_offsets.push((pack_id, entry_index));
204            }
205            // sort by pack-id to allow handling all indices matching a pack while its open.
206            pack_ids_and_offsets.sort_by_key(|l| l.0);
207            progress.show_throughput(order_start);
208        };
209
210        progress.init(
211            Some(self.num_indices as usize),
212            gix_features::progress::count("indices"),
213        );
214
215        let mut pack_ids_slice = pack_ids_and_offsets.as_slice();
216
217        for (pack_id, index_file_name) in self.index_names.iter().enumerate() {
218            progress.set_name(index_file_name.display().to_string());
219            progress.inc();
220
221            let mut bundle = None;
222            let index;
223            let index_path = parent.join(index_file_name);
224            let index = if deep_check {
225                bundle = crate::Bundle::at(index_path, self.object_hash)
226                    .map_err(integrity::Error::from)
227                    .map_err(index::traverse::Error::Processor)?
228                    .into();
229                bundle.as_ref().map(|b| &b.index).expect("just set")
230            } else {
231                index = Some(
232                    index::File::at(index_path, self.object_hash)
233                        .map_err(|err| integrity::Error::BundleInit(crate::bundle::init::Error::Index(err)))
234                        .map_err(index::traverse::Error::Processor)?,
235                );
236                index.as_ref().expect("just set")
237            };
238
239            let slice_end = pack_ids_slice.partition_point(|e| e.0 == pack_id as crate::data::Id);
240            let multi_index_entries_to_check = &pack_ids_slice[..slice_end];
241            {
242                let offset_start = Instant::now();
243                let mut offsets_progress = progress.add_child_with_id(
244                    "verify object offsets".into(),
245                    integrity::ProgressId::ObjectOffsets.into(),
246                );
247                offsets_progress.init(
248                    Some(pack_ids_and_offsets.len()),
249                    gix_features::progress::count("objects"),
250                );
251                pack_ids_slice = &pack_ids_slice[slice_end..];
252
253                for entry_id in multi_index_entries_to_check.iter().map(|e| e.1) {
254                    let oid = self.oid_at_index(entry_id);
255                    let (_, expected_pack_offset) = self.pack_id_and_pack_offset_at_index(entry_id);
256                    let entry_in_bundle_index = index.lookup(oid).ok_or_else(|| {
257                        index::traverse::Error::Processor(integrity::Error::OidNotFound { id: oid.to_owned() })
258                    })?;
259                    let actual_pack_offset = index.pack_offset_at_index(entry_in_bundle_index);
260                    if actual_pack_offset != expected_pack_offset {
261                        return Err(index::traverse::Error::Processor(
262                            integrity::Error::PackOffsetMismatch {
263                                id: oid.to_owned(),
264                                expected_pack_offset,
265                                actual_pack_offset,
266                            },
267                        ));
268                    }
269                    offsets_progress.inc();
270                }
271
272                if should_interrupt.load(std::sync::atomic::Ordering::Relaxed) {
273                    return Err(index::traverse::Error::Processor(integrity::Error::Interrupted));
274                }
275                offsets_progress.show_throughput(offset_start);
276            }
277
278            total_objects_checked += multi_index_entries_to_check.len();
279
280            if let Some(bundle) = bundle {
281                progress.set_name(format!("Validating {}", index_file_name.display()));
282                let crate::bundle::verify::integrity::Outcome {
283                    actual_index_checksum: _,
284                    pack_traverse_outcome,
285                } = bundle
286                    .verify_integrity(progress, should_interrupt, options.clone())
287                    .map_err(|err| {
288                        use index::traverse::Error::*;
289                        match err {
290                            Processor(err) => Processor(integrity::Error::IndexIntegrity(err)),
291                            IndexVerify(err) => IndexVerify(err),
292                            Tree(err) => Tree(err),
293                            TreeTraversal(err) => TreeTraversal(err),
294                            PackVerify(err) => PackVerify(err),
295                            PackDecode { id, offset, source } => PackDecode { id, offset, source },
296                            PackMismatch(err) => PackMismatch(err),
297                            EntryType(err) => EntryType(err),
298                            PackObjectVerify { offset, source } => PackObjectVerify { offset, source },
299                            Crc32Mismatch {
300                                expected,
301                                actual,
302                                offset,
303                                kind,
304                            } => Crc32Mismatch {
305                                expected,
306                                actual,
307                                offset,
308                                kind,
309                            },
310                            Interrupted => Interrupted,
311                        }
312                    })?;
313                pack_traverse_statistics.push(pack_traverse_outcome);
314            }
315        }
316
317        assert_eq!(
318            self.num_objects as usize, total_objects_checked,
319            "BUG: our slicing should allow to visit all objects"
320        );
321
322        progress.set_name("Validating multi-pack".into());
323        progress.show_throughput(operation_start);
324
325        Ok(integrity::Outcome {
326            actual_index_checksum,
327            pack_traverse_statistics,
328        })
329    }
330}