git_pack/multi_index/
verify.rs

1use std::{cmp::Ordering, sync::atomic::AtomicBool, time::Instant};
2
3use git_features::progress::Progress;
4
5use crate::{index, multi_index::File};
6
7///
8pub mod integrity {
9    use crate::multi_index::EntryIndex;
10
11    /// Returned by [`multi_index::File::verify_integrity()`][crate::multi_index::File::verify_integrity()].
12    #[derive(thiserror::Error, Debug)]
13    #[allow(missing_docs)]
14    pub enum Error {
15        #[error("Object {id} should be at pack-offset {expected_pack_offset} but was found at {actual_pack_offset}")]
16        PackOffsetMismatch {
17            id: git_hash::ObjectId,
18            expected_pack_offset: u64,
19            actual_pack_offset: u64,
20        },
21        #[error(transparent)]
22        MultiIndexChecksum(#[from] crate::multi_index::verify::checksum::Error),
23        #[error(transparent)]
24        IndexIntegrity(#[from] crate::index::verify::integrity::Error),
25        #[error(transparent)]
26        BundleInit(#[from] crate::bundle::init::Error),
27        #[error("Counted {actual} objects, but expected {expected} as per multi-index")]
28        UnexpectedObjectCount { actual: usize, expected: usize },
29        #[error("{id} wasn't found in the index referenced in the multi-pack index")]
30        OidNotFound { id: git_hash::ObjectId },
31        #[error("The object id at multi-index entry {index} wasn't in order")]
32        OutOfOrder { index: EntryIndex },
33        #[error("The fan at index {index} is out of order as it's larger then the following value.")]
34        Fan { index: usize },
35        #[error("The multi-index claims to have no objects")]
36        Empty,
37        #[error("Interrupted")]
38        Interrupted,
39    }
40
41    /// Returned by [`multi_index::File::verify_integrity()`][crate::multi_index::File::verify_integrity()].
42    pub struct Outcome<P> {
43        /// The computed checksum of the multi-index which matched the stored one.
44        pub actual_index_checksum: git_hash::ObjectId,
45        /// The for each entry in [`index_names()`][super::File::index_names()] provide the corresponding pack traversal outcome.
46        pub pack_traverse_statistics: Vec<crate::index::traverse::Statistics>,
47        /// The provided progress instance.
48        pub progress: P,
49    }
50
51    /// The progress ids used in [`multi_index::File::verify_integrity()`][crate::multi_index::File::verify_integrity()].
52    ///
53    /// Use this information to selectively extract the progress of interest in case the parent application has custom visualization.
54    #[derive(Debug, Copy, Clone)]
55    pub enum ProgressId {
56        /// The amount of bytes read to verify the multi-index checksum.
57        ChecksumBytes,
58        /// The amount of objects whose offset has been checked.
59        ObjectOffsets,
60    }
61
62    impl From<ProgressId> for git_features::progress::Id {
63        fn from(v: ProgressId) -> Self {
64            match v {
65                ProgressId::ChecksumBytes => *b"MVCK",
66                ProgressId::ObjectOffsets => *b"MVOF",
67            }
68        }
69    }
70}
71
72///
73pub mod checksum {
74    /// Returned by [`multi_index::File::verify_checksum()`][crate::multi_index::File::verify_checksum()].
75    pub type Error = crate::verify::checksum::Error;
76}
77
78impl File {
79    /// Validate that our [`checksum()`][File::checksum()] matches the actual contents
80    /// of this index file, and return it if it does.
81    pub fn verify_checksum(
82        &self,
83        progress: impl Progress,
84        should_interrupt: &AtomicBool,
85    ) -> Result<git_hash::ObjectId, checksum::Error> {
86        crate::verify::checksum_on_disk_or_mmap(
87            self.path(),
88            &self.data,
89            self.checksum(),
90            self.object_hash,
91            progress,
92            should_interrupt,
93        )
94    }
95
96    /// Similar to [`verify_integrity()`][File::verify_integrity()] but without any deep inspection of objects.
97    ///
98    /// Instead we only validate the contents of the multi-index itself.
99    pub fn verify_integrity_fast<P>(
100        &self,
101        progress: P,
102        should_interrupt: &AtomicBool,
103    ) -> Result<(git_hash::ObjectId, P), integrity::Error>
104    where
105        P: Progress,
106    {
107        self.verify_integrity_inner(
108            progress,
109            should_interrupt,
110            false,
111            index::verify::integrity::Options::default(),
112        )
113        .map_err(|err| match err {
114            index::traverse::Error::Processor(err) => err,
115            _ => unreachable!("BUG: no other error type is possible"),
116        })
117        .map(|o| (o.actual_index_checksum, o.progress))
118    }
119
120    /// Similar to [`crate::Bundle::verify_integrity()`] but checks all contained indices and their packs.
121    ///
122    /// Note that it's considered a failure if an index doesn't have a corresponding pack.
123    pub fn verify_integrity<C, P, F>(
124        &self,
125        progress: P,
126        should_interrupt: &AtomicBool,
127        options: index::verify::integrity::Options<F>,
128    ) -> Result<integrity::Outcome<P>, index::traverse::Error<integrity::Error>>
129    where
130        P: Progress,
131        C: crate::cache::DecodeEntry,
132        F: Fn() -> C + Send + Clone,
133    {
134        self.verify_integrity_inner(progress, should_interrupt, true, options)
135    }
136
137    fn verify_integrity_inner<C, P, F>(
138        &self,
139        mut progress: P,
140        should_interrupt: &AtomicBool,
141        deep_check: bool,
142        options: index::verify::integrity::Options<F>,
143    ) -> Result<integrity::Outcome<P>, index::traverse::Error<integrity::Error>>
144    where
145        P: Progress,
146        C: crate::cache::DecodeEntry,
147        F: Fn() -> C + Send + Clone,
148    {
149        let parent = self.path.parent().expect("must be in a directory");
150
151        let actual_index_checksum = self
152            .verify_checksum(
153                progress.add_child_with_id(
154                    format!("{}: checksum", self.path.display()),
155                    integrity::ProgressId::ChecksumBytes.into(),
156                ),
157                should_interrupt,
158            )
159            .map_err(integrity::Error::from)
160            .map_err(index::traverse::Error::Processor)?;
161
162        if let Some(first_invalid) = crate::verify::fan(&self.fan) {
163            return Err(index::traverse::Error::Processor(integrity::Error::Fan {
164                index: first_invalid,
165            }));
166        }
167
168        if self.num_objects == 0 {
169            return Err(index::traverse::Error::Processor(integrity::Error::Empty));
170        }
171
172        let mut pack_traverse_statistics = Vec::new();
173
174        let operation_start = Instant::now();
175        let mut total_objects_checked = 0;
176        let mut pack_ids_and_offsets = Vec::with_capacity(self.num_objects as usize);
177        {
178            let order_start = Instant::now();
179            let mut progress = progress.add_child_with_id("checking oid order", git_features::progress::UNKNOWN);
180            progress.init(
181                Some(self.num_objects as usize),
182                git_features::progress::count("objects"),
183            );
184
185            for entry_index in 0..(self.num_objects - 1) {
186                let lhs = self.oid_at_index(entry_index);
187                let rhs = self.oid_at_index(entry_index + 1);
188
189                if rhs.cmp(lhs) != Ordering::Greater {
190                    return Err(index::traverse::Error::Processor(integrity::Error::OutOfOrder {
191                        index: entry_index,
192                    }));
193                }
194                let (pack_id, _) = self.pack_id_and_pack_offset_at_index(entry_index);
195                pack_ids_and_offsets.push((pack_id, entry_index));
196                progress.inc();
197            }
198            {
199                let entry_index = self.num_objects - 1;
200                let (pack_id, _) = self.pack_id_and_pack_offset_at_index(entry_index);
201                pack_ids_and_offsets.push((pack_id, entry_index));
202            }
203            // sort by pack-id to allow handling all indices matching a pack while its open.
204            pack_ids_and_offsets.sort_by(|l, r| l.0.cmp(&r.0));
205            progress.show_throughput(order_start);
206        };
207
208        progress.init(
209            Some(self.num_indices as usize),
210            git_features::progress::count("indices"),
211        );
212
213        let mut pack_ids_slice = pack_ids_and_offsets.as_slice();
214
215        for (pack_id, index_file_name) in self.index_names.iter().enumerate() {
216            progress.set_name(index_file_name.display().to_string());
217            progress.inc();
218
219            let mut bundle = None;
220            let index;
221            let index_path = parent.join(index_file_name);
222            let index = if deep_check {
223                bundle = crate::Bundle::at(index_path, self.object_hash)
224                    .map_err(integrity::Error::from)
225                    .map_err(index::traverse::Error::Processor)?
226                    .into();
227                bundle.as_ref().map(|b| &b.index).expect("just set")
228            } else {
229                index = Some(
230                    index::File::at(index_path, self.object_hash)
231                        .map_err(|err| integrity::Error::BundleInit(crate::bundle::init::Error::Index(err)))
232                        .map_err(index::traverse::Error::Processor)?,
233                );
234                index.as_ref().expect("just set")
235            };
236
237            let slice_end = pack_ids_slice.partition_point(|e| e.0 == pack_id as crate::data::Id);
238            let multi_index_entries_to_check = &pack_ids_slice[..slice_end];
239            {
240                let offset_start = Instant::now();
241                let mut offsets_progress =
242                    progress.add_child_with_id("verify object offsets", integrity::ProgressId::ObjectOffsets.into());
243                offsets_progress.init(
244                    Some(pack_ids_and_offsets.len()),
245                    git_features::progress::count("objects"),
246                );
247                pack_ids_slice = &pack_ids_slice[slice_end..];
248
249                for entry_id in multi_index_entries_to_check.iter().map(|e| e.1) {
250                    let oid = self.oid_at_index(entry_id);
251                    let (_, expected_pack_offset) = self.pack_id_and_pack_offset_at_index(entry_id);
252                    let entry_in_bundle_index = index.lookup(oid).ok_or_else(|| {
253                        index::traverse::Error::Processor(integrity::Error::OidNotFound { id: oid.to_owned() })
254                    })?;
255                    let actual_pack_offset = index.pack_offset_at_index(entry_in_bundle_index);
256                    if actual_pack_offset != expected_pack_offset {
257                        return Err(index::traverse::Error::Processor(
258                            integrity::Error::PackOffsetMismatch {
259                                id: oid.to_owned(),
260                                expected_pack_offset,
261                                actual_pack_offset,
262                            },
263                        ));
264                    }
265                    offsets_progress.inc();
266                }
267
268                if should_interrupt.load(std::sync::atomic::Ordering::Relaxed) {
269                    return Err(index::traverse::Error::Processor(integrity::Error::Interrupted));
270                }
271                offsets_progress.show_throughput(offset_start);
272            }
273
274            total_objects_checked += multi_index_entries_to_check.len();
275
276            if let Some(bundle) = bundle {
277                progress.set_name(format!("Validating {}", index_file_name.display()));
278                let crate::bundle::verify::integrity::Outcome {
279                    actual_index_checksum: _,
280                    pack_traverse_outcome,
281                    progress: returned_progress,
282                } = bundle
283                    .verify_integrity(progress, should_interrupt, options.clone())
284                    .map_err(|err| {
285                        use index::traverse::Error::*;
286                        match err {
287                            Processor(err) => Processor(integrity::Error::IndexIntegrity(err)),
288                            VerifyChecksum(err) => VerifyChecksum(err),
289                            Tree(err) => Tree(err),
290                            TreeTraversal(err) => TreeTraversal(err),
291                            PackDecode { id, offset, source } => PackDecode { id, offset, source },
292                            PackMismatch { expected, actual } => PackMismatch { expected, actual },
293                            PackObjectMismatch {
294                                expected,
295                                actual,
296                                offset,
297                                kind,
298                            } => PackObjectMismatch {
299                                expected,
300                                actual,
301                                offset,
302                                kind,
303                            },
304                            Crc32Mismatch {
305                                expected,
306                                actual,
307                                offset,
308                                kind,
309                            } => Crc32Mismatch {
310                                expected,
311                                actual,
312                                offset,
313                                kind,
314                            },
315                            Interrupted => Interrupted,
316                        }
317                    })?;
318                progress = returned_progress;
319                pack_traverse_statistics.push(pack_traverse_outcome);
320            }
321        }
322
323        assert_eq!(
324            self.num_objects as usize, total_objects_checked,
325            "BUG: our slicing should allow to visit all objects"
326        );
327
328        progress.set_name("Validating multi-pack");
329        progress.show_throughput(operation_start);
330
331        Ok(integrity::Outcome {
332            actual_index_checksum,
333            pack_traverse_statistics,
334            progress,
335        })
336    }
337}