1use std::sync::atomic::{AtomicBool, Ordering};
2
3use git_features::{parallel, progress::Progress};
4
5use super::Error;
6use crate::{
7 cache::delta::traverse,
8 index::{self, traverse::Outcome, util::index_entries_sorted_by_offset_ascending},
9};
10
11#[derive(Default)]
13pub struct Options {
14 pub thread_limit: Option<usize>,
17 pub check: crate::index::traverse::SafetyCheck,
19}
20
21#[derive(Debug, Copy, Clone)]
25pub enum ProgressId {
26 HashPackDataBytes,
28 HashPackIndexBytes,
30 CollectSortedIndexEntries,
32 TreeFromOffsetsObjects,
34 DecodedObjects,
36 DecodedBytes,
38}
39
40impl From<ProgressId> for git_features::progress::Id {
41 fn from(v: ProgressId) -> Self {
42 match v {
43 ProgressId::HashPackDataBytes => *b"PTHP",
44 ProgressId::HashPackIndexBytes => *b"PTHI",
45 ProgressId::CollectSortedIndexEntries => *b"PTCE",
46 ProgressId::TreeFromOffsetsObjects => *b"PTDI",
47 ProgressId::DecodedObjects => *b"PTRO",
48 ProgressId::DecodedBytes => *b"PTDB",
49 }
50 }
51}
52
53impl index::File {
55 pub fn traverse_with_index<P, Processor, E>(
60 &self,
61 pack: &crate::data::File,
62 new_processor: impl Fn() -> Processor + Send + Clone,
63 mut progress: P,
64 should_interrupt: &AtomicBool,
65 Options { check, thread_limit }: Options,
66 ) -> Result<Outcome<P>, Error<E>>
67 where
68 P: Progress,
69 Processor: FnMut(
70 git_object::Kind,
71 &[u8],
72 &index::Entry,
73 &mut <P::SubProgress as Progress>::SubProgress,
74 ) -> Result<(), E>,
75 E: std::error::Error + Send + Sync + 'static,
76 {
77 let (verify_result, traversal_result) = parallel::join(
78 {
79 let pack_progress = progress.add_child_with_id(
80 format!(
81 "Hash of pack '{}'",
82 pack.path().file_name().expect("pack has filename").to_string_lossy()
83 ),
84 ProgressId::HashPackDataBytes.into(),
85 );
86 let index_progress = progress.add_child_with_id(
87 format!(
88 "Hash of index '{}'",
89 self.path.file_name().expect("index has filename").to_string_lossy()
90 ),
91 ProgressId::HashPackIndexBytes.into(),
92 );
93 move || {
94 let res = self.possibly_verify(pack, check, pack_progress, index_progress, should_interrupt);
95 if res.is_err() {
96 should_interrupt.store(true, Ordering::SeqCst);
97 }
98 res
99 }
100 },
101 || -> Result<_, Error<_>> {
102 let sorted_entries = index_entries_sorted_by_offset_ascending(
103 self,
104 progress.add_child_with_id("collecting sorted index", ProgressId::CollectSortedIndexEntries.into()),
105 ); let tree = crate::cache::delta::Tree::from_offsets_in_pack(
107 pack.path(),
108 sorted_entries.into_iter().map(Entry::from),
109 |e| e.index_entry.pack_offset,
110 |id| self.lookup(id).map(|idx| self.pack_offset_at_index(idx)),
111 progress.add_child_with_id("indexing", ProgressId::TreeFromOffsetsObjects.into()),
112 should_interrupt,
113 self.object_hash,
114 )?;
115 let mut outcome = digest_statistics(tree.traverse(
116 |slice, out| pack.entry_slice(slice).map(|entry| out.copy_from_slice(entry)),
117 pack.pack_end() as u64,
118 new_processor,
119 |data,
120 progress,
121 traverse::Context {
122 entry: pack_entry,
123 entry_end,
124 decompressed: bytes,
125 state: ref mut processor,
126 level,
127 }| {
128 let object_kind = pack_entry.header.as_kind().expect("non-delta object");
129 data.level = level;
130 data.decompressed_size = pack_entry.decompressed_size;
131 data.object_kind = object_kind;
132 data.compressed_size = entry_end - pack_entry.data_offset;
133 data.object_size = bytes.len() as u64;
134 let result = crate::index::traverse::process_entry(
135 check,
136 object_kind,
137 bytes,
138 progress,
139 &data.index_entry,
140 || {
141 git_features::hash::crc32(
145 pack.entry_slice(data.index_entry.pack_offset..entry_end)
146 .expect("slice pointing into the pack (by now data is verified)"),
147 )
148 },
149 processor,
150 );
151 match result {
152 Err(err @ Error::PackDecode { .. }) if !check.fatal_decode_error() => {
153 progress.info(format!("Ignoring decode error: {err}"));
154 Ok(())
155 }
156 res => res,
157 }
158 },
159 crate::cache::delta::traverse::Options {
160 object_progress: progress.add_child_with_id("Resolving", ProgressId::DecodedObjects.into()),
161 size_progress: progress.add_child_with_id("Decoding", ProgressId::DecodedBytes.into()),
162 thread_limit,
163 should_interrupt,
164 object_hash: self.object_hash,
165 },
166 )?);
167 outcome.pack_size = pack.data_len() as u64;
168 Ok(outcome)
169 },
170 );
171 Ok(Outcome {
172 actual_index_checksum: verify_result?,
173 statistics: traversal_result?,
174 progress,
175 })
176 }
177}
178
179struct Entry {
180 index_entry: crate::index::Entry,
181 object_kind: git_object::Kind,
182 object_size: u64,
183 decompressed_size: u64,
184 compressed_size: u64,
185 level: u16,
186}
187
188impl From<crate::index::Entry> for Entry {
189 fn from(index_entry: crate::index::Entry) -> Self {
190 Entry {
191 index_entry,
192 level: 0,
193 object_kind: git_object::Kind::Tree,
194 object_size: 0,
195 decompressed_size: 0,
196 compressed_size: 0,
197 }
198 }
199}
200
201fn digest_statistics(traverse::Outcome { roots, children }: traverse::Outcome<Entry>) -> index::traverse::Statistics {
202 let mut res = index::traverse::Statistics::default();
203 let average = &mut res.average;
204 for item in roots.iter().chain(children.iter()) {
205 res.total_compressed_entries_size += item.data.compressed_size;
206 res.total_decompressed_entries_size += item.data.decompressed_size;
207 res.total_object_size += item.data.object_size;
208 *res.objects_per_chain_length.entry(item.data.level as u32).or_insert(0) += 1;
209
210 average.decompressed_size += item.data.decompressed_size;
211 average.compressed_size += item.data.compressed_size as usize;
212 average.object_size += item.data.object_size;
213 average.num_deltas += item.data.level as u32;
214 use git_object::Kind::*;
215 match item.data.object_kind {
216 Blob => res.num_blobs += 1,
217 Tree => res.num_trees += 1,
218 Tag => res.num_tags += 1,
219 Commit => res.num_commits += 1,
220 };
221 }
222
223 let num_nodes = roots.len() + children.len();
224 average.decompressed_size /= num_nodes as u64;
225 average.compressed_size /= num_nodes;
226 average.object_size /= num_nodes as u64;
227 average.num_deltas /= num_nodes as u32;
228
229 res
230}