git_odb/store_impls/dynamic/
verify.rs

1use std::{
2    ops::Deref,
3    sync::atomic::{AtomicBool, Ordering},
4    time::Instant,
5};
6
7use git_features::progress::{MessageLevel, Progress};
8
9use crate::{
10    pack,
11    store::verify::integrity::{IndexStatistics, SingleOrMultiStatistics},
12    types::IndexAndPacks,
13};
14
15///
16pub mod integrity {
17    use std::{marker::PhantomData, path::PathBuf};
18
19    use crate::pack;
20
21    /// Options for use in [`Store::verify_integrity()`][crate::Store::verify_integrity()].
22    pub type Options<F> = pack::index::verify::integrity::Options<F>;
23
24    /// Returned by [`Store::verify_integrity()`][crate::Store::verify_integrity()].
25    #[derive(Debug, thiserror::Error)]
26    #[allow(missing_docs)]
27    pub enum Error {
28        #[error(transparent)]
29        MultiIndexIntegrity(#[from] pack::index::traverse::Error<pack::multi_index::verify::integrity::Error>),
30        #[error(transparent)]
31        IndexIntegrity(#[from] pack::index::traverse::Error<pack::index::verify::integrity::Error>),
32        #[error(transparent)]
33        IndexOpen(#[from] pack::index::init::Error),
34        #[error(transparent)]
35        LooseObjectStoreIntegrity(#[from] crate::loose::verify::integrity::Error),
36        #[error(transparent)]
37        MultiIndexOpen(#[from] pack::multi_index::init::Error),
38        #[error(transparent)]
39        PackOpen(#[from] pack::data::init::Error),
40        #[error(transparent)]
41        InitializeODB(#[from] crate::store::load_index::Error),
42        #[error("The disk on state changed while performing the operation, and we observed the change.")]
43        NeedsRetryDueToChangeOnDisk,
44    }
45
46    #[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
47    #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
48    /// Integrity information about loose object databases
49    pub struct LooseObjectStatistics {
50        /// The path to the root directory of the loose objects database
51        pub path: PathBuf,
52        /// The statistics created after verifying the loose object database.
53        pub statistics: crate::loose::verify::integrity::Statistics,
54    }
55
56    #[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
57    #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
58    /// Traversal statistics of packs governed by single indices or multi-pack indices.
59    #[allow(missing_docs)]
60    pub enum SingleOrMultiStatistics {
61        Single(pack::index::traverse::Statistics),
62        Multi(Vec<(PathBuf, pack::index::traverse::Statistics)>),
63    }
64
65    /// Statistics gathered when traversing packs of various kinds of indices.
66    #[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
67    #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
68    pub struct IndexStatistics {
69        /// The path to the index or multi-pack index for which statics were gathered.
70        pub path: PathBuf,
71        /// The actual statistics for the index at `path`.
72        pub statistics: SingleOrMultiStatistics,
73    }
74
75    /// Returned by [`Store::verify_integrity()`][crate::Store::verify_integrity()].
76    pub struct Outcome<P> {
77        /// Statistics for validated loose object stores.
78        pub loose_object_stores: Vec<LooseObjectStatistics>,
79        /// Pack traversal statistics for each index and their pack(s)
80        pub index_statistics: Vec<IndexStatistics>,
81        /// The provided progress instance.
82        pub progress: P,
83    }
84
85    /// The progress ids used in [`Store::verify_integrity()`][crate::Store::verify_integrity()].
86    ///
87    /// Use this information to selectively extract the progress of interest in case the parent application has custom visualization.
88    #[derive(Debug, Copy, Clone)]
89    pub enum ProgressId {
90        /// Contains the path of the currently validated loose object database.
91        VerifyLooseObjectDbPath,
92        /// The root progress for all verification of an index. It doesn't contain any useful information itself.
93        VerifyIndex(PhantomData<git_pack::index::verify::integrity::ProgressId>),
94        /// The root progress for all verification of a multi-index. It doesn't contain any useful information itself.
95        VerifyMultiIndex(PhantomData<git_pack::multi_index::verify::integrity::ProgressId>),
96    }
97
98    impl From<ProgressId> for git_features::progress::Id {
99        fn from(v: ProgressId) -> Self {
100            match v {
101                ProgressId::VerifyLooseObjectDbPath => *b"VISP",
102                ProgressId::VerifyMultiIndex(_) => *b"VIMI",
103                ProgressId::VerifyIndex(_) => *b"VISI",
104            }
105        }
106    }
107}
108
109impl super::Store {
110    /// Check the integrity of all objects as per the given `options`.
111    ///
112    /// Note that this will not not force loading all indices or packs permanently, as we will only use the momentarily loaded disk state.
113    /// This does, however, include all alternates.
114    pub fn verify_integrity<C, P, F>(
115        &self,
116        mut progress: P,
117        should_interrupt: &AtomicBool,
118        options: integrity::Options<F>,
119    ) -> Result<integrity::Outcome<P>, integrity::Error>
120    where
121        P: Progress,
122        C: pack::cache::DecodeEntry,
123        F: Fn() -> C + Send + Clone,
124    {
125        let mut index = self.index.load();
126        if !index.is_initialized() {
127            self.consolidate_with_disk_state(true, false)?;
128            index = self.index.load();
129            assert!(
130                index.is_initialized(),
131                "BUG: after consolidating successfully, we have an initialized index"
132            )
133        }
134
135        progress.init(
136            Some(index.slot_indices.len()),
137            git_features::progress::count("pack indices"),
138        );
139        let mut statistics = Vec::new();
140        let index_check_message = |path: &std::path::Path| {
141            format!(
142                "Checking integrity: {}",
143                path.file_name()
144                    .map(|f| f.to_string_lossy())
145                    .unwrap_or_else(std::borrow::Cow::default)
146            )
147        };
148        for slot_index in &index.slot_indices {
149            let slot = &self.files[*slot_index];
150            if slot.generation.load(Ordering::SeqCst) != index.generation {
151                return Err(integrity::Error::NeedsRetryDueToChangeOnDisk);
152            }
153            let files = slot.files.load();
154            let files = Option::as_ref(&files).ok_or(integrity::Error::NeedsRetryDueToChangeOnDisk)?;
155
156            let start = Instant::now();
157            let (mut child_progress, num_objects, index_path) = match files {
158                IndexAndPacks::Index(bundle) => {
159                    let index;
160                    let index = match bundle.index.loaded() {
161                        Some(index) => index.deref(),
162                        None => {
163                            index = pack::index::File::at(bundle.index.path(), self.object_hash)?;
164                            &index
165                        }
166                    };
167                    let pack;
168                    let data = match bundle.data.loaded() {
169                        Some(pack) => pack.deref(),
170                        None => {
171                            pack = pack::data::File::at(bundle.data.path(), self.object_hash)?;
172                            &pack
173                        }
174                    };
175                    let outcome = index.verify_integrity(
176                        Some(pack::index::verify::PackContext {
177                            data,
178                            options: options.clone(),
179                        }),
180                        progress.add_child_with_id(
181                            "verify index",
182                            integrity::ProgressId::VerifyIndex(Default::default()).into(),
183                        ),
184                        should_interrupt,
185                    )?;
186                    statistics.push(IndexStatistics {
187                        path: bundle.index.path().to_owned(),
188                        statistics: SingleOrMultiStatistics::Single(
189                            outcome
190                                .pack_traverse_statistics
191                                .expect("pack provided so there are stats"),
192                        ),
193                    });
194                    (outcome.progress, index.num_objects(), index.path().to_owned())
195                }
196                IndexAndPacks::MultiIndex(bundle) => {
197                    let index;
198                    let index = match bundle.multi_index.loaded() {
199                        Some(index) => index.deref(),
200                        None => {
201                            index = pack::multi_index::File::at(bundle.multi_index.path())?;
202                            &index
203                        }
204                    };
205                    let outcome = index.verify_integrity(
206                        progress.add_child_with_id(
207                            "verify multi-index",
208                            integrity::ProgressId::VerifyMultiIndex(Default::default()).into(),
209                        ),
210                        should_interrupt,
211                        options.clone(),
212                    )?;
213
214                    let index_dir = bundle.multi_index.path().parent().expect("file in a directory");
215                    statistics.push(IndexStatistics {
216                        path: Default::default(),
217                        statistics: SingleOrMultiStatistics::Multi(
218                            outcome
219                                .pack_traverse_statistics
220                                .into_iter()
221                                .zip(index.index_names())
222                                .map(|(statistics, index_name)| (index_dir.join(index_name), statistics))
223                                .collect(),
224                        ),
225                    });
226                    (outcome.progress, index.num_objects(), index.path().to_owned())
227                }
228            };
229
230            child_progress.set_name(index_check_message(&index_path));
231            child_progress.show_throughput_with(
232                start,
233                num_objects as usize,
234                git_features::progress::count("objects").expect("set"),
235                MessageLevel::Success,
236            );
237            progress.inc();
238        }
239
240        progress.init(
241            Some(index.loose_dbs.len()),
242            git_features::progress::count("loose object stores"),
243        );
244        let mut loose_object_stores = Vec::new();
245        for loose_db in &*index.loose_dbs {
246            let out = loose_db
247                .verify_integrity(
248                    progress.add_child_with_id(
249                        loose_db.path().display().to_string(),
250                        integrity::ProgressId::VerifyLooseObjectDbPath.into(),
251                    ),
252                    should_interrupt,
253                )
254                .map(|statistics| integrity::LooseObjectStatistics {
255                    path: loose_db.path().to_owned(),
256                    statistics,
257                })?;
258            loose_object_stores.push(out);
259        }
260
261        Ok(integrity::Outcome {
262            loose_object_stores,
263            index_statistics: statistics,
264            progress,
265        })
266    }
267}