git_pack/index/verify.rs
1use std::sync::atomic::AtomicBool;
2
3use git_features::progress::Progress;
4use git_object::{bstr::ByteSlice, WriteTo};
5
6use crate::index;
7
8///
9pub mod integrity {
10 use std::marker::PhantomData;
11
12 use git_object::bstr::BString;
13
14 /// Returned by [`index::File::verify_integrity()`][crate::index::File::verify_integrity()].
15 #[derive(thiserror::Error, Debug)]
16 #[allow(missing_docs)]
17 pub enum Error {
18 #[error("The fan at index {index} is out of order as it's larger then the following value.")]
19 Fan { index: usize },
20 #[error("{kind} object {id} could not be decoded")]
21 ObjectDecode {
22 source: git_object::decode::Error,
23 kind: git_object::Kind,
24 id: git_hash::ObjectId,
25 },
26 #[error("{kind} object {id} wasn't re-encoded without change, wanted\n{expected}\n\nGOT\n\n{actual}")]
27 ObjectEncodeMismatch {
28 kind: git_object::Kind,
29 id: git_hash::ObjectId,
30 expected: BString,
31 actual: BString,
32 },
33 }
34
35 /// Returned by [`index::File::verify_integrity()`][crate::index::File::verify_integrity()].
36 pub struct Outcome<P> {
37 /// The computed checksum of the index which matched the stored one.
38 pub actual_index_checksum: git_hash::ObjectId,
39 /// The packs traversal outcome, if one was provided
40 pub pack_traverse_statistics: Option<crate::index::traverse::Statistics>,
41 /// The provided progress instance.
42 pub progress: P,
43 }
44
45 /// Additional options to define how the integrity should be verified.
46 #[derive(Clone)]
47 pub struct Options<F> {
48 /// The thoroughness of the verification
49 pub verify_mode: crate::index::verify::Mode,
50 /// The way to traverse packs
51 pub traversal: crate::index::traverse::Algorithm,
52 /// The amount of threads to use of `Some(N)`, with `None|Some(0)` using all available cores are used.
53 pub thread_limit: Option<usize>,
54 /// A function to create a pack cache
55 pub make_pack_lookup_cache: F,
56 }
57
58 impl Default for Options<fn() -> crate::cache::Never> {
59 fn default() -> Self {
60 Options {
61 verify_mode: Default::default(),
62 traversal: Default::default(),
63 thread_limit: None,
64 make_pack_lookup_cache: || crate::cache::Never,
65 }
66 }
67 }
68
69 /// The progress ids used in [`index::File::verify_integrity()`][crate::index::File::verify_integrity()].
70 ///
71 /// Use this information to selectively extract the progress of interest in case the parent application has custom visualization.
72 #[derive(Debug, Copy, Clone)]
73 pub enum ProgressId {
74 /// The amount of bytes read to verify the index checksum.
75 ChecksumBytes,
76 /// A root progress for traversal which isn't actually used directly, but here to link to the respective `ProgressId` types.
77 Traverse(PhantomData<crate::index::verify::index::traverse::ProgressId>),
78 }
79
80 impl From<ProgressId> for git_features::progress::Id {
81 fn from(v: ProgressId) -> Self {
82 match v {
83 ProgressId::ChecksumBytes => *b"PTHI",
84 ProgressId::Traverse(_) => git_features::progress::UNKNOWN,
85 }
86 }
87 }
88}
89
90///
91pub mod checksum {
92 /// Returned by [`index::File::verify_checksum()`][crate::index::File::verify_checksum()].
93 pub type Error = crate::verify::checksum::Error;
94}
95
96/// Various ways in which a pack and index can be verified
97#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
98pub enum Mode {
99 /// Validate the object hash and CRC32
100 HashCrc32,
101 /// Validate hash and CRC32, and decode each non-Blob object.
102 /// Each object should be valid, i.e. be decodable.
103 HashCrc32Decode,
104 /// Validate hash and CRC32, and decode and encode each non-Blob object.
105 /// Each object should yield exactly the same hash when re-encoded.
106 HashCrc32DecodeEncode,
107}
108
109impl Default for Mode {
110 fn default() -> Self {
111 Mode::HashCrc32DecodeEncode
112 }
113}
114
115/// Information to allow verifying the integrity of an index with the help of its corresponding pack.
116pub struct PackContext<'a, F> {
117 /// The pack data file itself.
118 pub data: &'a crate::data::File,
119 /// The options further configuring the pack traversal and verification
120 pub options: integrity::Options<F>,
121}
122
123/// Verify and validate the content of the index file
124impl index::File {
125 /// Returns the trailing hash stored at the end of this index file.
126 ///
127 /// It's a hash over all bytes of the index.
128 pub fn index_checksum(&self) -> git_hash::ObjectId {
129 git_hash::ObjectId::from(&self.data[self.data.len() - self.hash_len..])
130 }
131
132 /// Returns the hash of the pack data file that this index file corresponds to.
133 ///
134 /// It should [`crate::data::File::checksum()`] of the corresponding pack data file.
135 pub fn pack_checksum(&self) -> git_hash::ObjectId {
136 let from = self.data.len() - self.hash_len * 2;
137 git_hash::ObjectId::from(&self.data[from..][..self.hash_len])
138 }
139
140 /// Validate that our [`index_checksum()`][index::File::index_checksum()] matches the actual contents
141 /// of this index file, and return it if it does.
142 pub fn verify_checksum(
143 &self,
144 progress: impl Progress,
145 should_interrupt: &AtomicBool,
146 ) -> Result<git_hash::ObjectId, checksum::Error> {
147 crate::verify::checksum_on_disk_or_mmap(
148 self.path(),
149 &self.data,
150 self.index_checksum(),
151 self.object_hash,
152 progress,
153 should_interrupt,
154 )
155 }
156
157 /// The most thorough validation of integrity of both index file and the corresponding pack data file, if provided.
158 /// Returns the checksum of the index file, the traversal outcome and the given progress if the integrity check is successful.
159 ///
160 /// If `pack` is provided, it is expected (and validated to be) the pack belonging to this index.
161 /// It will be used to validate internal integrity of the pack before checking each objects integrity
162 /// is indeed as advertised via its SHA1 as stored in this index, as well as the CRC32 hash.
163 /// The last member of the Option is a function returning an implementation of [`crate::cache::DecodeEntry`] to be used if
164 /// the [`index::traverse::Algorithm`] is `Lookup`.
165 /// To set this to `None`, use `None::<(_, _, _, fn() -> crate::cache::Never)>`.
166 ///
167 /// The `thread_limit` optionally specifies the amount of threads to be used for the [pack traversal][index::File::traverse()].
168 /// `make_cache` is only used in case a `pack` is specified, use existing implementations in the [`crate::cache`] module.
169 ///
170 /// # Tradeoffs
171 ///
172 /// The given `progress` is inevitably consumed if there is an error, which is a tradeoff chosen to easily allow using `?` in the
173 /// error case.
174 pub fn verify_integrity<P, C, F>(
175 &self,
176 pack: Option<PackContext<'_, F>>,
177 mut progress: P,
178 should_interrupt: &AtomicBool,
179 ) -> Result<integrity::Outcome<P>, index::traverse::Error<index::verify::integrity::Error>>
180 where
181 P: Progress,
182 C: crate::cache::DecodeEntry,
183 F: Fn() -> C + Send + Clone,
184 {
185 if let Some(first_invalid) = crate::verify::fan(&self.fan) {
186 return Err(index::traverse::Error::Processor(integrity::Error::Fan {
187 index: first_invalid,
188 }));
189 }
190
191 match pack {
192 Some(PackContext {
193 data: pack,
194 options:
195 integrity::Options {
196 verify_mode,
197 traversal,
198 thread_limit,
199 make_pack_lookup_cache,
200 },
201 }) => self
202 .traverse(
203 pack,
204 progress,
205 should_interrupt,
206 || {
207 let mut encode_buf = Vec::with_capacity(2048);
208 move |kind, data, index_entry, progress| {
209 Self::verify_entry(verify_mode, &mut encode_buf, kind, data, index_entry, progress)
210 }
211 },
212 index::traverse::Options {
213 traversal,
214 thread_limit,
215 check: index::traverse::SafetyCheck::All,
216 make_pack_lookup_cache,
217 },
218 )
219 .map(|o| integrity::Outcome {
220 actual_index_checksum: o.actual_index_checksum,
221 pack_traverse_statistics: Some(o.statistics),
222 progress: o.progress,
223 }),
224 None => self
225 .verify_checksum(
226 progress.add_child_with_id("Sha1 of index", integrity::ProgressId::ChecksumBytes.into()),
227 should_interrupt,
228 )
229 .map_err(Into::into)
230 .map(|id| integrity::Outcome {
231 actual_index_checksum: id,
232 pack_traverse_statistics: None,
233 progress,
234 }),
235 }
236 }
237
238 #[allow(clippy::too_many_arguments)]
239 fn verify_entry<P>(
240 verify_mode: Mode,
241 encode_buf: &mut Vec<u8>,
242 object_kind: git_object::Kind,
243 buf: &[u8],
244 index_entry: &index::Entry,
245 progress: &mut P,
246 ) -> Result<(), integrity::Error>
247 where
248 P: Progress,
249 {
250 if let Mode::HashCrc32Decode | Mode::HashCrc32DecodeEncode = verify_mode {
251 use git_object::Kind::*;
252 match object_kind {
253 Tree | Commit | Tag => {
254 let object = git_object::ObjectRef::from_bytes(object_kind, buf).map_err(|err| {
255 integrity::Error::ObjectDecode {
256 source: err,
257 kind: object_kind,
258 id: index_entry.oid,
259 }
260 })?;
261 if let Mode::HashCrc32DecodeEncode = verify_mode {
262 encode_buf.clear();
263 object
264 .write_to(&mut *encode_buf)
265 .expect("writing to a memory buffer never fails");
266 if encode_buf.as_slice() != buf {
267 let mut should_return_error = true;
268 if let git_object::Kind::Tree = object_kind {
269 if buf.as_bstr().find(b"100664").is_some() || buf.as_bstr().find(b"100640").is_some() {
270 progress.info(format!("Tree object {} would be cleaned up during re-serialization, replacing mode '100664|100640' with '100644'", index_entry.oid));
271 should_return_error = false
272 }
273 }
274 if should_return_error {
275 return Err(integrity::Error::ObjectEncodeMismatch {
276 kind: object_kind,
277 id: index_entry.oid,
278 expected: buf.into(),
279 actual: encode_buf.clone().into(),
280 });
281 }
282 }
283 }
284 }
285 Blob => {}
286 };
287 }
288 Ok(())
289 }
290}