1use std::{
2 cell::RefCell,
3 sync::{atomic::AtomicBool, Arc},
4};
5
6use git_features::{parallel, progress::Progress};
7use git_hash::ObjectId;
8
9use crate::{data::output, find};
10
11pub(in crate::data::output::count::objects_impl) mod reduce;
12mod util;
13
14mod types;
15pub use types::{Error, ObjectExpansion, Options, Outcome};
16
17mod tree;
18
19pub type Result<E1, E2> = std::result::Result<(Vec<output::Count>, Outcome), Error<E1, E2>>;
21
22pub fn objects<Find, Iter, IterErr, Oid>(
39 db: Find,
40 objects_ids: Iter,
41 progress: impl Progress,
42 should_interrupt: &AtomicBool,
43 Options {
44 thread_limit,
45 input_object_expansion,
46 chunk_size,
47 }: Options,
48) -> Result<find::existing::Error<Find::Error>, IterErr>
49where
50 Find: crate::Find + Send + Clone,
51 <Find as crate::Find>::Error: Send,
52 Iter: Iterator<Item = std::result::Result<Oid, IterErr>> + Send,
53 Oid: Into<ObjectId> + Send,
54 IterErr: std::error::Error + Send,
55{
56 let lower_bound = objects_ids.size_hint().0;
57 let (chunk_size, thread_limit, _) = parallel::optimize_chunk_size_and_thread_limit(
58 chunk_size,
59 if lower_bound == 0 { None } else { Some(lower_bound) },
60 thread_limit,
61 None,
62 );
63 let chunks = git_features::iter::Chunks {
64 inner: objects_ids,
65 size: chunk_size,
66 };
67 let seen_objs = dashmap::DashSet::<ObjectId, git_hashtable::hash::Builder>::default();
68 let progress = Arc::new(parking_lot::Mutex::new(progress));
69
70 parallel::in_parallel(
71 chunks,
72 thread_limit,
73 {
74 let progress = Arc::clone(&progress);
75 move |n| {
76 (
77 Vec::new(), Vec::new(), {
80 let mut p = progress
81 .lock()
82 .add_child_with_id(format!("thread {n}"), git_features::progress::UNKNOWN);
83 p.init(None, git_features::progress::count("objects"));
84 p
85 },
86 )
87 }
88 },
89 {
90 let seen_objs = &seen_objs;
91 move |oids: Vec<std::result::Result<Oid, IterErr>>, (buf1, buf2, progress)| {
92 expand::this(
93 &db,
94 input_object_expansion,
95 seen_objs,
96 oids,
97 buf1,
98 buf2,
99 progress,
100 should_interrupt,
101 true, )
103 }
104 },
105 reduce::Statistics::new(progress),
106 )
107}
108
109pub fn objects_unthreaded<Find, IterErr, Oid>(
111 db: Find,
112 object_ids: impl Iterator<Item = std::result::Result<Oid, IterErr>>,
113 mut progress: impl Progress,
114 should_interrupt: &AtomicBool,
115 input_object_expansion: ObjectExpansion,
116) -> Result<find::existing::Error<Find::Error>, IterErr>
117where
118 Find: crate::Find,
119 Oid: Into<ObjectId>,
120 IterErr: std::error::Error,
121{
122 let seen_objs = RefCell::new(git_hashtable::HashSet::default());
123
124 let (mut buf1, mut buf2) = (Vec::new(), Vec::new());
125 expand::this(
126 &db,
127 input_object_expansion,
128 &seen_objs,
129 object_ids,
130 &mut buf1,
131 &mut buf2,
132 &mut progress,
133 should_interrupt,
134 false, )
136}
137
138mod expand {
139 use std::sync::atomic::{AtomicBool, Ordering};
140
141 use git_features::progress::Progress;
142 use git_hash::{oid, ObjectId};
143 use git_object::{CommitRefIter, TagRefIter};
144
145 use super::{
146 tree,
147 types::{Error, ObjectExpansion, Outcome},
148 util,
149 };
150 use crate::{
151 data::{output, output::count::PackLocation},
152 find, FindExt,
153 };
154
155 #[allow(clippy::too_many_arguments)]
156 pub fn this<Find, IterErr, Oid>(
157 db: &Find,
158 input_object_expansion: ObjectExpansion,
159 seen_objs: &impl util::InsertImmutable<ObjectId>,
160 oids: impl IntoIterator<Item = std::result::Result<Oid, IterErr>>,
161 buf1: &mut Vec<u8>,
162 #[allow(clippy::ptr_arg)] buf2: &mut Vec<u8>,
163 progress: &mut impl Progress,
164 should_interrupt: &AtomicBool,
165 allow_pack_lookups: bool,
166 ) -> super::Result<find::existing::Error<Find::Error>, IterErr>
167 where
168 Find: crate::Find,
169 Oid: Into<ObjectId>,
170 IterErr: std::error::Error,
171 {
172 use ObjectExpansion::*;
173
174 let mut out = Vec::new();
175 let mut tree_traversal_state = git_traverse::tree::breadthfirst::State::default();
176 let mut tree_diff_state = git_diff::tree::State::default();
177 let mut parent_commit_ids = Vec::new();
178 let mut traverse_delegate = tree::traverse::AllUnseen::new(seen_objs);
179 let mut changes_delegate = tree::changes::AllNew::new(seen_objs);
180 let mut outcome = Outcome::default();
181
182 let stats = &mut outcome;
183 for id in oids.into_iter() {
184 if should_interrupt.load(Ordering::Relaxed) {
185 return Err(Error::Interrupted);
186 }
187
188 let id = id.map(|oid| oid.into()).map_err(Error::InputIteration)?;
189 let (obj, location) = db.find(id, buf1)?;
190 stats.input_objects += 1;
191 match input_object_expansion {
192 TreeAdditionsComparedToAncestor => {
193 use git_object::Kind::*;
194 let mut obj = obj;
195 let mut location = location;
196 let mut id = id.to_owned();
197
198 loop {
199 push_obj_count_unique(&mut out, seen_objs, &id, location, progress, stats, false);
200 match obj.kind {
201 Tree | Blob => break,
202 Tag => {
203 id = TagRefIter::from_bytes(obj.data)
204 .target_id()
205 .expect("every tag has a target");
206 let tmp = db.find(id, buf1)?;
207
208 obj = tmp.0;
209 location = tmp.1;
210
211 stats.expanded_objects += 1;
212 continue;
213 }
214 Commit => {
215 let current_tree_iter = {
216 let mut commit_iter = CommitRefIter::from_bytes(obj.data);
217 let tree_id = commit_iter.tree_id().expect("every commit has a tree");
218 parent_commit_ids.clear();
219 for token in commit_iter {
220 match token {
221 Ok(git_object::commit::ref_iter::Token::Parent { id }) => {
222 parent_commit_ids.push(id)
223 }
224 Ok(_) => break,
225 Err(err) => return Err(Error::CommitDecode(err)),
226 }
227 }
228 let (obj, location) = db.find(tree_id, buf1)?;
229 push_obj_count_unique(
230 &mut out, seen_objs, &tree_id, location, progress, stats, true,
231 );
232 git_object::TreeRefIter::from_bytes(obj.data)
233 };
234
235 let objects = if parent_commit_ids.is_empty() {
236 traverse_delegate.clear();
237 git_traverse::tree::breadthfirst(
238 current_tree_iter,
239 &mut tree_traversal_state,
240 |oid, buf| {
241 stats.decoded_objects += 1;
242 match db.find(oid, buf).ok() {
243 Some((obj, location)) => {
244 progress.inc();
245 stats.expanded_objects += 1;
246 out.push(output::Count::from_data(oid, location));
247 obj.try_into_tree_iter()
248 }
249 None => None,
250 }
251 },
252 &mut traverse_delegate,
253 )
254 .map_err(Error::TreeTraverse)?;
255 &traverse_delegate.non_trees
256 } else {
257 for commit_id in &parent_commit_ids {
258 let parent_tree_id = {
259 let (parent_commit_obj, location) = db.find(commit_id, buf2)?;
260
261 push_obj_count_unique(
262 &mut out, seen_objs, commit_id, location, progress, stats, true,
263 );
264 CommitRefIter::from_bytes(parent_commit_obj.data)
265 .tree_id()
266 .expect("every commit has a tree")
267 };
268 let parent_tree = {
269 let (parent_tree_obj, location) = db.find(parent_tree_id, buf2)?;
270 push_obj_count_unique(
271 &mut out,
272 seen_objs,
273 &parent_tree_id,
274 location,
275 progress,
276 stats,
277 true,
278 );
279 git_object::TreeRefIter::from_bytes(parent_tree_obj.data)
280 };
281
282 changes_delegate.clear();
283 git_diff::tree::Changes::from(Some(parent_tree))
284 .needed_to_obtain(
285 current_tree_iter.clone(),
286 &mut tree_diff_state,
287 |oid, buf| {
288 stats.decoded_objects += 1;
289 db.find_tree_iter(oid, buf).map(|t| t.0)
290 },
291 &mut changes_delegate,
292 )
293 .map_err(Error::TreeChanges)?;
294 }
295 &changes_delegate.objects
296 };
297 for id in objects.iter() {
298 out.push(id_to_count(db, buf2, id, progress, stats, allow_pack_lookups));
299 }
300 break;
301 }
302 }
303 }
304 }
305 TreeContents => {
306 use git_object::Kind::*;
307 let mut id = id;
308 let mut obj = (obj, location);
309 loop {
310 push_obj_count_unique(&mut out, seen_objs, &id, obj.1.clone(), progress, stats, false);
311 match obj.0.kind {
312 Tree => {
313 traverse_delegate.clear();
314 git_traverse::tree::breadthfirst(
315 git_object::TreeRefIter::from_bytes(obj.0.data),
316 &mut tree_traversal_state,
317 |oid, buf| {
318 stats.decoded_objects += 1;
319 match db.find(oid, buf).ok() {
320 Some((obj, location)) => {
321 progress.inc();
322 stats.expanded_objects += 1;
323 out.push(output::Count::from_data(oid, location));
324 obj.try_into_tree_iter()
325 }
326 None => None,
327 }
328 },
329 &mut traverse_delegate,
330 )
331 .map_err(Error::TreeTraverse)?;
332 for id in traverse_delegate.non_trees.iter() {
333 out.push(id_to_count(db, buf1, id, progress, stats, allow_pack_lookups));
334 }
335 break;
336 }
337 Commit => {
338 id = CommitRefIter::from_bytes(obj.0.data)
339 .tree_id()
340 .expect("every commit has a tree");
341 stats.expanded_objects += 1;
342 obj = db.find(id, buf1)?;
343 continue;
344 }
345 Blob => break,
346 Tag => {
347 id = TagRefIter::from_bytes(obj.0.data)
348 .target_id()
349 .expect("every tag has a target");
350 stats.expanded_objects += 1;
351 obj = db.find(id, buf1)?;
352 continue;
353 }
354 }
355 }
356 }
357 AsIs => push_obj_count_unique(&mut out, seen_objs, &id, location, progress, stats, false),
358 }
359 }
360 outcome.total_objects = out.len();
361 Ok((out, outcome))
362 }
363
364 #[inline]
365 fn push_obj_count_unique(
366 out: &mut Vec<output::Count>,
367 all_seen: &impl util::InsertImmutable<ObjectId>,
368 id: &oid,
369 location: Option<crate::data::entry::Location>,
370 progress: &mut impl Progress,
371 statistics: &mut Outcome,
372 count_expanded: bool,
373 ) {
374 let inserted = all_seen.insert(id.to_owned());
375 if inserted {
376 progress.inc();
377 statistics.decoded_objects += 1;
378 if count_expanded {
379 statistics.expanded_objects += 1;
380 }
381 out.push(output::Count::from_data(id, location));
382 }
383 }
384
385 #[inline]
386 fn id_to_count<Find: crate::Find>(
387 db: &Find,
388 buf: &mut Vec<u8>,
389 id: &oid,
390 progress: &mut impl Progress,
391 statistics: &mut Outcome,
392 allow_pack_lookups: bool,
393 ) -> output::Count {
394 progress.inc();
395 statistics.expanded_objects += 1;
396 output::Count {
397 id: id.to_owned(),
398 entry_pack_location: if allow_pack_lookups {
399 PackLocation::LookedUp(db.location_by_oid(id, buf))
400 } else {
401 PackLocation::NotLookedUp
402 },
403 }
404 }
405}