1use std::{convert::TryInto, io, sync::atomic::AtomicBool};
2
3pub use error::Error;
4use git_features::progress::{self, Progress};
5
6use crate::cache::delta::{traverse, Tree};
7
8pub(crate) mod encode;
9mod error;
10
11pub(crate) struct TreeEntry {
12 pub id: git_hash::ObjectId,
13 pub crc32: u32,
14}
15
16#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
18#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
19pub struct Outcome {
20 pub index_version: crate::index::Version,
22 pub index_hash: git_hash::ObjectId,
24
25 pub data_hash: git_hash::ObjectId,
27 pub num_objects: u32,
29}
30
31#[derive(Debug, Copy, Clone)]
35pub enum ProgressId {
36 IndexObjects,
38 DecompressedBytes,
42 ResolveObjects,
46 DecodedBytes,
48 IndexBytesWritten,
50}
51
52impl From<ProgressId> for git_features::progress::Id {
53 fn from(v: ProgressId) -> Self {
54 match v {
55 ProgressId::IndexObjects => *b"IWIO",
56 ProgressId::DecompressedBytes => *b"IWDB",
57 ProgressId::ResolveObjects => *b"IWRO",
58 ProgressId::DecodedBytes => *b"IWDB",
59 ProgressId::IndexBytesWritten => *b"IWBW",
60 }
61 }
62}
63
64impl crate::index::File {
66 #[allow(clippy::too_many_arguments)]
86 pub fn write_data_iter_to_stream<F, F2>(
87 version: crate::index::Version,
88 make_resolver: F,
89 entries: impl Iterator<Item = Result<crate::data::input::Entry, crate::data::input::Error>>,
90 thread_limit: Option<usize>,
91 mut root_progress: impl Progress,
92 out: impl io::Write,
93 should_interrupt: &AtomicBool,
94 object_hash: git_hash::Kind,
95 pack_version: crate::data::Version,
96 ) -> Result<Outcome, Error>
97 where
98 F: FnOnce() -> io::Result<F2>,
99 F2: for<'r> Fn(crate::data::EntryRange, &'r mut Vec<u8>) -> Option<()> + Send + Clone,
100 {
101 if version != crate::index::Version::default() {
102 return Err(Error::Unsupported(version));
103 }
104 let mut num_objects: usize = 0;
105 let mut last_seen_trailer = None;
106 let anticipated_num_objects = entries.size_hint().1.unwrap_or_else(|| entries.size_hint().0);
107 let mut tree = Tree::with_capacity(anticipated_num_objects)?;
108 let indexing_start = std::time::Instant::now();
109
110 root_progress.init(Some(4), progress::steps());
111 let mut objects_progress = root_progress.add_child_with_id("indexing", ProgressId::IndexObjects.into());
112 objects_progress.init(entries.size_hint().1, progress::count("objects"));
113 let mut decompressed_progress =
114 root_progress.add_child_with_id("decompressing", ProgressId::DecompressedBytes.into());
115 decompressed_progress.init(None, progress::bytes());
116 let mut pack_entries_end: u64 = 0;
117
118 for entry in entries {
119 let crate::data::input::Entry {
120 header,
121 pack_offset,
122 crc32,
123 header_size,
124 compressed: _,
125 compressed_size,
126 decompressed_size,
127 trailer,
128 } = entry?;
129
130 decompressed_progress.inc_by(decompressed_size as usize);
131
132 let entry_len = header_size as u64 + compressed_size;
133 pack_entries_end = pack_offset + entry_len;
134
135 let crc32 = crc32.expect("crc32 to be computed by the iterator. Caller assures correct configuration.");
136
137 use crate::data::entry::Header::*;
138 match header {
139 Tree | Blob | Commit | Tag => {
140 tree.add_root(
141 pack_offset,
142 TreeEntry {
143 id: object_hash.null(),
144 crc32,
145 },
146 )?;
147 }
148 RefDelta { .. } => return Err(Error::IteratorInvariantNoRefDelta),
149 OfsDelta { base_distance } => {
150 let base_pack_offset =
151 crate::data::entry::Header::verified_base_pack_offset(pack_offset, base_distance).ok_or(
152 Error::IteratorInvariantBaseOffset {
153 pack_offset,
154 distance: base_distance,
155 },
156 )?;
157 tree.add_child(
158 base_pack_offset,
159 pack_offset,
160 TreeEntry {
161 id: object_hash.null(),
162 crc32,
163 },
164 )?;
165 }
166 };
167 last_seen_trailer = trailer;
168 num_objects += 1;
169 objects_progress.inc();
170 }
171 if num_objects != anticipated_num_objects {
172 objects_progress.info(format!(
173 "{anticipated_num_objects} objects were resolved into {num_objects} objects during thin-pack resolution"
174 ));
175 }
176 let num_objects: u32 = num_objects
177 .try_into()
178 .map_err(|_| Error::IteratorInvariantTooManyObjects(num_objects))?;
179
180 objects_progress.show_throughput(indexing_start);
181 decompressed_progress.show_throughput(indexing_start);
182 drop(objects_progress);
183 drop(decompressed_progress);
184
185 root_progress.inc();
186
187 let resolver = make_resolver()?;
188 let sorted_pack_offsets_by_oid = {
189 let traverse::Outcome { roots, children } = tree.traverse(
190 resolver,
191 pack_entries_end,
192 || (),
193 |data,
194 _progress,
195 traverse::Context {
196 entry,
197 decompressed: bytes,
198 ..
199 }| {
200 modify_base(data, entry, bytes, version.hash());
201 Ok::<_, Error>(())
202 },
203 traverse::Options {
204 object_progress: root_progress.add_child_with_id("Resolving", ProgressId::ResolveObjects.into()),
205 size_progress: root_progress.add_child_with_id("Decoding", ProgressId::DecodedBytes.into()),
206 thread_limit,
207 should_interrupt,
208 object_hash,
209 },
210 )?;
211 root_progress.inc();
212
213 let mut items = roots;
214 items.extend(children);
215 {
216 let _progress = root_progress.add_child_with_id("sorting by id", git_features::progress::UNKNOWN);
217 items.sort_by_key(|e| e.data.id);
218 }
219
220 root_progress.inc();
221 items
222 };
223
224 let pack_hash = match last_seen_trailer {
225 Some(ph) => ph,
226 None if num_objects == 0 => {
227 let header = crate::data::header::encode(pack_version, 0);
228 let mut hasher = git_features::hash::hasher(object_hash);
229 hasher.update(&header);
230 git_hash::ObjectId::from(hasher.digest())
231 }
232 None => return Err(Error::IteratorInvariantTrailer),
233 };
234 let index_hash = encode::write_to(
235 out,
236 sorted_pack_offsets_by_oid,
237 &pack_hash,
238 version,
239 root_progress.add_child_with_id("writing index file", ProgressId::IndexBytesWritten.into()),
240 )?;
241 root_progress.show_throughput_with(
242 indexing_start,
243 num_objects as usize,
244 progress::count("objects").expect("unit always set"),
245 progress::MessageLevel::Success,
246 );
247 Ok(Outcome {
248 index_version: version,
249 index_hash,
250 data_hash: pack_hash,
251 num_objects,
252 })
253 }
254}
255
256fn modify_base(entry: &mut TreeEntry, pack_entry: &crate::data::Entry, decompressed: &[u8], hash: git_hash::Kind) {
257 fn compute_hash(kind: git_object::Kind, bytes: &[u8], object_hash: git_hash::Kind) -> git_hash::ObjectId {
258 let mut hasher = git_features::hash::hasher(object_hash);
259 hasher.update(&git_object::encode::loose_header(kind, bytes.len()));
260 hasher.update(bytes);
261 git_hash::ObjectId::from(hasher.digest())
262 }
263
264 let object_kind = pack_entry.header.as_kind().expect("base object as source of iteration");
265 let id = compute_hash(object_kind, decompressed, hash);
266 entry.id = id;
267}