gix_pack/index/verify.rs
1use std::sync::atomic::AtomicBool;
2
3use gix_features::progress::{DynNestedProgress, Progress};
4use gix_object::WriteTo;
5
6use crate::index;
7
8///
9pub mod integrity {
10 use std::marker::PhantomData;
11
12 use gix_object::bstr::BString;
13
14 /// Returned by [`index::File::verify_integrity()`][crate::index::File::verify_integrity()].
15 #[derive(thiserror::Error, Debug)]
16 #[allow(missing_docs)]
17 pub enum Error {
18 #[error("Reserialization of an object failed")]
19 Io(#[from] std::io::Error),
20 #[error("The fan at index {index} is out of order as it's larger then the following value.")]
21 Fan { index: usize },
22 #[error("{kind} object {id} could not be decoded")]
23 ObjectDecode {
24 source: gix_object::decode::Error,
25 kind: gix_object::Kind,
26 id: gix_hash::ObjectId,
27 },
28 #[error("{kind} object {id} wasn't re-encoded without change, wanted\n{expected}\n\nGOT\n\n{actual}")]
29 ObjectEncodeMismatch {
30 kind: gix_object::Kind,
31 id: gix_hash::ObjectId,
32 expected: BString,
33 actual: BString,
34 },
35 }
36
37 /// Returned by [`index::File::verify_integrity()`][crate::index::File::verify_integrity()].
38 pub struct Outcome {
39 /// The computed checksum of the index which matched the stored one.
40 pub actual_index_checksum: gix_hash::ObjectId,
41 /// The packs traversal outcome, if one was provided
42 pub pack_traverse_statistics: Option<crate::index::traverse::Statistics>,
43 }
44
45 /// Additional options to define how the integrity should be verified.
46 #[derive(Clone)]
47 pub struct Options<F> {
48 /// The thoroughness of the verification
49 pub verify_mode: crate::index::verify::Mode,
50 /// The way to traverse packs
51 pub traversal: crate::index::traverse::Algorithm,
52 /// The amount of threads to use of `Some(N)`, with `None|Some(0)` using all available cores are used.
53 pub thread_limit: Option<usize>,
54 /// A function to create a pack cache
55 pub make_pack_lookup_cache: F,
56 }
57
58 impl Default for Options<fn() -> crate::cache::Never> {
59 fn default() -> Self {
60 Options {
61 verify_mode: Default::default(),
62 traversal: Default::default(),
63 thread_limit: None,
64 make_pack_lookup_cache: || crate::cache::Never,
65 }
66 }
67 }
68
69 /// The progress ids used in [`index::File::verify_integrity()`][crate::index::File::verify_integrity()].
70 ///
71 /// Use this information to selectively extract the progress of interest in case the parent application has custom visualization.
72 #[derive(Debug, Copy, Clone)]
73 pub enum ProgressId {
74 /// The amount of bytes read to verify the index checksum.
75 ChecksumBytes,
76 /// A root progress for traversal which isn't actually used directly, but here to link to the respective `ProgressId` types.
77 Traverse(PhantomData<crate::index::verify::index::traverse::ProgressId>),
78 }
79
80 impl From<ProgressId> for gix_features::progress::Id {
81 fn from(v: ProgressId) -> Self {
82 match v {
83 ProgressId::ChecksumBytes => *b"PTHI",
84 ProgressId::Traverse(_) => gix_features::progress::UNKNOWN,
85 }
86 }
87 }
88}
89
90///
91pub mod checksum {
92 /// Returned by [`index::File::verify_checksum()`][crate::index::File::verify_checksum()].
93 pub type Error = crate::verify::checksum::Error;
94}
95
96/// Various ways in which a pack and index can be verified
97#[derive(Default, Debug, Eq, PartialEq, Hash, Clone, Copy)]
98pub enum Mode {
99 /// Validate the object hash and CRC32
100 HashCrc32,
101 /// Validate hash and CRC32, and decode each non-Blob object.
102 /// Each object should be valid, i.e. be decodable.
103 HashCrc32Decode,
104 /// Validate hash and CRC32, and decode and encode each non-Blob object.
105 /// Each object should yield exactly the same hash when re-encoded.
106 #[default]
107 HashCrc32DecodeEncode,
108}
109
110/// Information to allow verifying the integrity of an index with the help of its corresponding pack.
111pub struct PackContext<'a, F, D = crate::MMap> {
112 /// The pack data file itself.
113 pub data: &'a crate::data::File<D>,
114 /// The options further configuring the pack traversal and verification
115 pub options: integrity::Options<F>,
116}
117
118/// Verify and validate the content of the index file
119impl<T> index::File<T>
120where
121 T: crate::FileData + Sync,
122{
123 /// Returns the trailing hash stored at the end of this index file.
124 ///
125 /// It's a hash over all bytes of the index.
126 pub fn index_checksum(&self) -> gix_hash::ObjectId {
127 gix_hash::ObjectId::from_bytes_or_panic(&self.data[self.data.len() - self.hash_len..])
128 }
129
130 /// Returns the hash of the pack data file that this index file corresponds to.
131 ///
132 /// It should [`crate::data::File::checksum()`] of the corresponding pack data file.
133 pub fn pack_checksum(&self) -> gix_hash::ObjectId {
134 let from = self.data.len() - self.hash_len * 2;
135 gix_hash::ObjectId::from_bytes_or_panic(&self.data[from..][..self.hash_len])
136 }
137
138 /// Validate that our [`index_checksum()`][index::File::index_checksum()] matches the actual contents
139 /// of this index file, and return it if it does.
140 pub fn verify_checksum(
141 &self,
142 progress: &mut dyn Progress,
143 should_interrupt: &AtomicBool,
144 ) -> Result<gix_hash::ObjectId, checksum::Error> {
145 crate::verify::checksum_on_disk_or_mmap(
146 self.path(),
147 &self.data,
148 self.index_checksum(),
149 self.object_hash,
150 progress,
151 should_interrupt,
152 )
153 }
154
155 /// The most thorough validation of integrity of both index file and the corresponding pack data file, if provided.
156 /// Returns the checksum of the index file, the traversal outcome and the given progress if the integrity check is successful.
157 ///
158 /// If `pack` is provided, it is expected (and validated to be) the pack belonging to this index.
159 /// It will be used to validate internal integrity of the pack before checking each objects integrity
160 /// is indeed as advertised via its SHA1 as stored in this index, as well as the CRC32 hash.
161 /// The last member of the Option is a function returning an implementation of [`crate::cache::DecodeEntry`] to be used if
162 /// the [`index::traverse::Algorithm`] is `Lookup`.
163 /// To set this to `None`, use `None::<(_, _, _, fn() -> crate::cache::Never)>`.
164 ///
165 /// The `thread_limit` optionally specifies the amount of threads to be used for the [pack traversal][index::File::traverse()].
166 /// `make_cache` is only used in case a `pack` is specified, use existing implementations in the [`crate::cache`] module.
167 ///
168 /// # Tradeoffs
169 ///
170 /// The given `progress` is inevitably consumed if there is an error, which is a tradeoff chosen to easily allow using `?` in the
171 /// error case.
172 pub fn verify_integrity<C, F, D>(
173 &self,
174 pack: Option<PackContext<'_, F, D>>,
175 progress: &mut dyn DynNestedProgress,
176 should_interrupt: &AtomicBool,
177 ) -> Result<integrity::Outcome, index::traverse::Error<index::verify::integrity::Error>>
178 where
179 C: crate::cache::DecodeEntry,
180 F: Fn() -> C + Send + Clone,
181 D: crate::FileData + Send + Sync,
182 {
183 if let Some(first_invalid) = crate::verify::fan(&self.fan) {
184 return Err(index::traverse::Error::Processor(integrity::Error::Fan {
185 index: first_invalid,
186 }));
187 }
188
189 match pack {
190 Some(PackContext {
191 data: pack,
192 options:
193 integrity::Options {
194 verify_mode,
195 traversal,
196 thread_limit,
197 make_pack_lookup_cache,
198 },
199 }) => self
200 .traverse(
201 pack,
202 progress,
203 should_interrupt,
204 {
205 let mut encode_buf = Vec::with_capacity(2048);
206 move |kind, data, index_entry, progress| {
207 Self::verify_entry(verify_mode, &mut encode_buf, kind, data, index_entry, progress)
208 }
209 },
210 index::traverse::Options {
211 traversal,
212 thread_limit,
213 check: index::traverse::SafetyCheck::All,
214 make_pack_lookup_cache,
215 },
216 )
217 .map(|o| integrity::Outcome {
218 actual_index_checksum: o.actual_index_checksum,
219 pack_traverse_statistics: Some(o.statistics),
220 }),
221 None => self
222 .verify_checksum(
223 &mut progress
224 .add_child_with_id("Sha1 of index".into(), integrity::ProgressId::ChecksumBytes.into()),
225 should_interrupt,
226 )
227 .map_err(index::traverse::Error::IndexVerify)
228 .map(|id| integrity::Outcome {
229 actual_index_checksum: id,
230 pack_traverse_statistics: None,
231 }),
232 }
233 }
234
235 #[allow(clippy::too_many_arguments)]
236 fn verify_entry(
237 verify_mode: Mode,
238 encode_buf: &mut Vec<u8>,
239 object_kind: gix_object::Kind,
240 buf: &[u8],
241 index_entry: &index::Entry,
242 _progress: &dyn gix_features::progress::Progress,
243 ) -> Result<(), integrity::Error> {
244 if let Mode::HashCrc32Decode | Mode::HashCrc32DecodeEncode = verify_mode {
245 use gix_object::Kind::*;
246 match object_kind {
247 Tree | Commit | Tag => {
248 let object =
249 gix_object::ObjectRef::from_bytes(buf, object_kind, index_entry.oid.kind()).map_err(|err| {
250 integrity::Error::ObjectDecode {
251 source: err,
252 kind: object_kind,
253 id: index_entry.oid,
254 }
255 })?;
256 if let Mode::HashCrc32DecodeEncode = verify_mode {
257 encode_buf.clear();
258 object.write_to(&mut *encode_buf)?;
259 if encode_buf.as_slice() != buf {
260 return Err(integrity::Error::ObjectEncodeMismatch {
261 kind: object_kind,
262 id: index_entry.oid,
263 expected: buf.into(),
264 actual: encode_buf.clone().into(),
265 });
266 }
267 }
268 }
269 Blob => {}
270 }
271 }
272 Ok(())
273 }
274}