git_pack/index/
verify.rs

1use std::sync::atomic::AtomicBool;
2
3use git_features::progress::Progress;
4use git_object::{bstr::ByteSlice, WriteTo};
5
6use crate::index;
7
8///
9pub mod integrity {
10    use std::marker::PhantomData;
11
12    use git_object::bstr::BString;
13
14    /// Returned by [`index::File::verify_integrity()`][crate::index::File::verify_integrity()].
15    #[derive(thiserror::Error, Debug)]
16    #[allow(missing_docs)]
17    pub enum Error {
18        #[error("The fan at index {index} is out of order as it's larger then the following value.")]
19        Fan { index: usize },
20        #[error("{kind} object {id} could not be decoded")]
21        ObjectDecode {
22            source: git_object::decode::Error,
23            kind: git_object::Kind,
24            id: git_hash::ObjectId,
25        },
26        #[error("{kind} object {id} wasn't re-encoded without change, wanted\n{expected}\n\nGOT\n\n{actual}")]
27        ObjectEncodeMismatch {
28            kind: git_object::Kind,
29            id: git_hash::ObjectId,
30            expected: BString,
31            actual: BString,
32        },
33    }
34
35    /// Returned by [`index::File::verify_integrity()`][crate::index::File::verify_integrity()].
36    pub struct Outcome<P> {
37        /// The computed checksum of the index which matched the stored one.
38        pub actual_index_checksum: git_hash::ObjectId,
39        /// The packs traversal outcome, if one was provided
40        pub pack_traverse_statistics: Option<crate::index::traverse::Statistics>,
41        /// The provided progress instance.
42        pub progress: P,
43    }
44
45    /// Additional options to define how the integrity should be verified.
46    #[derive(Clone)]
47    pub struct Options<F> {
48        /// The thoroughness of the verification
49        pub verify_mode: crate::index::verify::Mode,
50        /// The way to traverse packs
51        pub traversal: crate::index::traverse::Algorithm,
52        /// The amount of threads to use of `Some(N)`, with `None|Some(0)` using all available cores are used.
53        pub thread_limit: Option<usize>,
54        /// A function to create a pack cache
55        pub make_pack_lookup_cache: F,
56    }
57
58    impl Default for Options<fn() -> crate::cache::Never> {
59        fn default() -> Self {
60            Options {
61                verify_mode: Default::default(),
62                traversal: Default::default(),
63                thread_limit: None,
64                make_pack_lookup_cache: || crate::cache::Never,
65            }
66        }
67    }
68
69    /// The progress ids used in [`index::File::verify_integrity()`][crate::index::File::verify_integrity()].
70    ///
71    /// Use this information to selectively extract the progress of interest in case the parent application has custom visualization.
72    #[derive(Debug, Copy, Clone)]
73    pub enum ProgressId {
74        /// The amount of bytes read to verify the index checksum.
75        ChecksumBytes,
76        /// A root progress for traversal which isn't actually used directly, but here to link to the respective `ProgressId` types.
77        Traverse(PhantomData<crate::index::verify::index::traverse::ProgressId>),
78    }
79
80    impl From<ProgressId> for git_features::progress::Id {
81        fn from(v: ProgressId) -> Self {
82            match v {
83                ProgressId::ChecksumBytes => *b"PTHI",
84                ProgressId::Traverse(_) => git_features::progress::UNKNOWN,
85            }
86        }
87    }
88}
89
90///
91pub mod checksum {
92    /// Returned by [`index::File::verify_checksum()`][crate::index::File::verify_checksum()].
93    pub type Error = crate::verify::checksum::Error;
94}
95
96/// Various ways in which a pack and index can be verified
97#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
98pub enum Mode {
99    /// Validate the object hash and CRC32
100    HashCrc32,
101    /// Validate hash and CRC32, and decode each non-Blob object.
102    /// Each object should be valid, i.e. be decodable.
103    HashCrc32Decode,
104    /// Validate hash and CRC32, and decode and encode each non-Blob object.
105    /// Each object should yield exactly the same hash when re-encoded.
106    HashCrc32DecodeEncode,
107}
108
109impl Default for Mode {
110    fn default() -> Self {
111        Mode::HashCrc32DecodeEncode
112    }
113}
114
115/// Information to allow verifying the integrity of an index with the help of its corresponding pack.
116pub struct PackContext<'a, F> {
117    /// The pack data file itself.
118    pub data: &'a crate::data::File,
119    /// The options further configuring the pack traversal and verification
120    pub options: integrity::Options<F>,
121}
122
123/// Verify and validate the content of the index file
124impl index::File {
125    /// Returns the trailing hash stored at the end of this index file.
126    ///
127    /// It's a hash over all bytes of the index.
128    pub fn index_checksum(&self) -> git_hash::ObjectId {
129        git_hash::ObjectId::from(&self.data[self.data.len() - self.hash_len..])
130    }
131
132    /// Returns the hash of the pack data file that this index file corresponds to.
133    ///
134    /// It should [`crate::data::File::checksum()`] of the corresponding pack data file.
135    pub fn pack_checksum(&self) -> git_hash::ObjectId {
136        let from = self.data.len() - self.hash_len * 2;
137        git_hash::ObjectId::from(&self.data[from..][..self.hash_len])
138    }
139
140    /// Validate that our [`index_checksum()`][index::File::index_checksum()] matches the actual contents
141    /// of this index file, and return it if it does.
142    pub fn verify_checksum(
143        &self,
144        progress: impl Progress,
145        should_interrupt: &AtomicBool,
146    ) -> Result<git_hash::ObjectId, checksum::Error> {
147        crate::verify::checksum_on_disk_or_mmap(
148            self.path(),
149            &self.data,
150            self.index_checksum(),
151            self.object_hash,
152            progress,
153            should_interrupt,
154        )
155    }
156
157    /// The most thorough validation of integrity of both index file and the corresponding pack data file, if provided.
158    /// Returns the checksum of the index file, the traversal outcome and the given progress if the integrity check is successful.
159    ///
160    /// If `pack` is provided, it is expected (and validated to be) the pack belonging to this index.
161    /// It will be used to validate internal integrity of the pack before checking each objects integrity
162    /// is indeed as advertised via its SHA1 as stored in this index, as well as the CRC32 hash.
163    /// The last member of the Option is a function returning an implementation of [`crate::cache::DecodeEntry`] to be used if
164    /// the [`index::traverse::Algorithm`] is `Lookup`.
165    /// To set this to `None`, use `None::<(_, _, _, fn() -> crate::cache::Never)>`.
166    ///
167    /// The `thread_limit` optionally specifies the amount of threads to be used for the [pack traversal][index::File::traverse()].
168    /// `make_cache` is only used in case a `pack` is specified, use existing implementations in the [`crate::cache`] module.
169    ///
170    /// # Tradeoffs
171    ///
172    /// The given `progress` is inevitably consumed if there is an error, which is a tradeoff chosen to easily allow using `?` in the
173    /// error case.
174    pub fn verify_integrity<P, C, F>(
175        &self,
176        pack: Option<PackContext<'_, F>>,
177        mut progress: P,
178        should_interrupt: &AtomicBool,
179    ) -> Result<integrity::Outcome<P>, index::traverse::Error<index::verify::integrity::Error>>
180    where
181        P: Progress,
182        C: crate::cache::DecodeEntry,
183        F: Fn() -> C + Send + Clone,
184    {
185        if let Some(first_invalid) = crate::verify::fan(&self.fan) {
186            return Err(index::traverse::Error::Processor(integrity::Error::Fan {
187                index: first_invalid,
188            }));
189        }
190
191        match pack {
192            Some(PackContext {
193                data: pack,
194                options:
195                    integrity::Options {
196                        verify_mode,
197                        traversal,
198                        thread_limit,
199                        make_pack_lookup_cache,
200                    },
201            }) => self
202                .traverse(
203                    pack,
204                    progress,
205                    should_interrupt,
206                    || {
207                        let mut encode_buf = Vec::with_capacity(2048);
208                        move |kind, data, index_entry, progress| {
209                            Self::verify_entry(verify_mode, &mut encode_buf, kind, data, index_entry, progress)
210                        }
211                    },
212                    index::traverse::Options {
213                        traversal,
214                        thread_limit,
215                        check: index::traverse::SafetyCheck::All,
216                        make_pack_lookup_cache,
217                    },
218                )
219                .map(|o| integrity::Outcome {
220                    actual_index_checksum: o.actual_index_checksum,
221                    pack_traverse_statistics: Some(o.statistics),
222                    progress: o.progress,
223                }),
224            None => self
225                .verify_checksum(
226                    progress.add_child_with_id("Sha1 of index", integrity::ProgressId::ChecksumBytes.into()),
227                    should_interrupt,
228                )
229                .map_err(Into::into)
230                .map(|id| integrity::Outcome {
231                    actual_index_checksum: id,
232                    pack_traverse_statistics: None,
233                    progress,
234                }),
235        }
236    }
237
238    #[allow(clippy::too_many_arguments)]
239    fn verify_entry<P>(
240        verify_mode: Mode,
241        encode_buf: &mut Vec<u8>,
242        object_kind: git_object::Kind,
243        buf: &[u8],
244        index_entry: &index::Entry,
245        progress: &mut P,
246    ) -> Result<(), integrity::Error>
247    where
248        P: Progress,
249    {
250        if let Mode::HashCrc32Decode | Mode::HashCrc32DecodeEncode = verify_mode {
251            use git_object::Kind::*;
252            match object_kind {
253                Tree | Commit | Tag => {
254                    let object = git_object::ObjectRef::from_bytes(object_kind, buf).map_err(|err| {
255                        integrity::Error::ObjectDecode {
256                            source: err,
257                            kind: object_kind,
258                            id: index_entry.oid,
259                        }
260                    })?;
261                    if let Mode::HashCrc32DecodeEncode = verify_mode {
262                        encode_buf.clear();
263                        object
264                            .write_to(&mut *encode_buf)
265                            .expect("writing to a memory buffer never fails");
266                        if encode_buf.as_slice() != buf {
267                            let mut should_return_error = true;
268                            if let git_object::Kind::Tree = object_kind {
269                                if buf.as_bstr().find(b"100664").is_some() || buf.as_bstr().find(b"100640").is_some() {
270                                    progress.info(format!("Tree object {} would be cleaned up during re-serialization, replacing mode '100664|100640' with '100644'", index_entry.oid));
271                                    should_return_error = false
272                                }
273                            }
274                            if should_return_error {
275                                return Err(integrity::Error::ObjectEncodeMismatch {
276                                    kind: object_kind,
277                                    id: index_entry.oid,
278                                    expected: buf.into(),
279                                    actual: encode_buf.clone().into(),
280                                });
281                            }
282                        }
283                    }
284                }
285                Blob => {}
286            };
287        }
288        Ok(())
289    }
290}