git_pack/data/output/count/objects/
mod.rs

1use std::{
2    cell::RefCell,
3    sync::{atomic::AtomicBool, Arc},
4};
5
6use git_features::{parallel, progress::Progress};
7use git_hash::ObjectId;
8
9use crate::{data::output, find};
10
11pub(in crate::data::output::count::objects_impl) mod reduce;
12mod util;
13
14mod types;
15pub use types::{Error, ObjectExpansion, Options, Outcome};
16
17mod tree;
18
19/// The return type used by [`objects()`].
20pub type Result<E1, E2> = std::result::Result<(Vec<output::Count>, Outcome), Error<E1, E2>>;
21
22/// Generate [`Count`][output::Count]s from input `objects` with object expansion based on [`options`][Options]
23/// to learn which objects would would constitute a pack. This step is required to know exactly how many objects would
24/// be in a pack while keeping data around to avoid minimize object database access.
25///
26/// A [`Count`][output::Count] object maintains enough state to greatly accelerate future access of packed objects.
27///
28/// * `db` - the object store to use for accessing objects.
29/// * `objects_ids`
30///   * A list of objects ids to add to the pack. Duplication checks are performed so no object is ever added to a pack twice.
31///   * Objects may be expanded based on the provided [`options`][Options]
32/// * `progress`
33///   * a way to obtain progress information
34/// * `should_interrupt`
35///  * A flag that is set to true if the operation should stop
36/// * `options`
37///   * more configuration
38pub fn objects<Find, Iter, IterErr, Oid>(
39    db: Find,
40    objects_ids: Iter,
41    progress: impl Progress,
42    should_interrupt: &AtomicBool,
43    Options {
44        thread_limit,
45        input_object_expansion,
46        chunk_size,
47    }: Options,
48) -> Result<find::existing::Error<Find::Error>, IterErr>
49where
50    Find: crate::Find + Send + Clone,
51    <Find as crate::Find>::Error: Send,
52    Iter: Iterator<Item = std::result::Result<Oid, IterErr>> + Send,
53    Oid: Into<ObjectId> + Send,
54    IterErr: std::error::Error + Send,
55{
56    let lower_bound = objects_ids.size_hint().0;
57    let (chunk_size, thread_limit, _) = parallel::optimize_chunk_size_and_thread_limit(
58        chunk_size,
59        if lower_bound == 0 { None } else { Some(lower_bound) },
60        thread_limit,
61        None,
62    );
63    let chunks = git_features::iter::Chunks {
64        inner: objects_ids,
65        size: chunk_size,
66    };
67    let seen_objs = dashmap::DashSet::<ObjectId, git_hashtable::hash::Builder>::default();
68    let progress = Arc::new(parking_lot::Mutex::new(progress));
69
70    parallel::in_parallel(
71        chunks,
72        thread_limit,
73        {
74            let progress = Arc::clone(&progress);
75            move |n| {
76                (
77                    Vec::new(), // object data buffer
78                    Vec::new(), // object data buffer 2 to hold two objects at a time
79                    {
80                        let mut p = progress
81                            .lock()
82                            .add_child_with_id(format!("thread {n}"), git_features::progress::UNKNOWN);
83                        p.init(None, git_features::progress::count("objects"));
84                        p
85                    },
86                )
87            }
88        },
89        {
90            let seen_objs = &seen_objs;
91            move |oids: Vec<std::result::Result<Oid, IterErr>>, (buf1, buf2, progress)| {
92                expand::this(
93                    &db,
94                    input_object_expansion,
95                    seen_objs,
96                    oids,
97                    buf1,
98                    buf2,
99                    progress,
100                    should_interrupt,
101                    true, /*allow pack lookups*/
102                )
103            }
104        },
105        reduce::Statistics::new(progress),
106    )
107}
108
109/// Like [`objects()`] but using a single thread only to mostly save on the otherwise required overhead.
110pub fn objects_unthreaded<Find, IterErr, Oid>(
111    db: Find,
112    object_ids: impl Iterator<Item = std::result::Result<Oid, IterErr>>,
113    mut progress: impl Progress,
114    should_interrupt: &AtomicBool,
115    input_object_expansion: ObjectExpansion,
116) -> Result<find::existing::Error<Find::Error>, IterErr>
117where
118    Find: crate::Find,
119    Oid: Into<ObjectId>,
120    IterErr: std::error::Error,
121{
122    let seen_objs = RefCell::new(git_hashtable::HashSet::default());
123
124    let (mut buf1, mut buf2) = (Vec::new(), Vec::new());
125    expand::this(
126        &db,
127        input_object_expansion,
128        &seen_objs,
129        object_ids,
130        &mut buf1,
131        &mut buf2,
132        &mut progress,
133        should_interrupt,
134        false, /*allow pack lookups*/
135    )
136}
137
138mod expand {
139    use std::sync::atomic::{AtomicBool, Ordering};
140
141    use git_features::progress::Progress;
142    use git_hash::{oid, ObjectId};
143    use git_object::{CommitRefIter, TagRefIter};
144
145    use super::{
146        tree,
147        types::{Error, ObjectExpansion, Outcome},
148        util,
149    };
150    use crate::{
151        data::{output, output::count::PackLocation},
152        find, FindExt,
153    };
154
155    #[allow(clippy::too_many_arguments)]
156    pub fn this<Find, IterErr, Oid>(
157        db: &Find,
158        input_object_expansion: ObjectExpansion,
159        seen_objs: &impl util::InsertImmutable<ObjectId>,
160        oids: impl IntoIterator<Item = std::result::Result<Oid, IterErr>>,
161        buf1: &mut Vec<u8>,
162        #[allow(clippy::ptr_arg)] buf2: &mut Vec<u8>,
163        progress: &mut impl Progress,
164        should_interrupt: &AtomicBool,
165        allow_pack_lookups: bool,
166    ) -> super::Result<find::existing::Error<Find::Error>, IterErr>
167    where
168        Find: crate::Find,
169        Oid: Into<ObjectId>,
170        IterErr: std::error::Error,
171    {
172        use ObjectExpansion::*;
173
174        let mut out = Vec::new();
175        let mut tree_traversal_state = git_traverse::tree::breadthfirst::State::default();
176        let mut tree_diff_state = git_diff::tree::State::default();
177        let mut parent_commit_ids = Vec::new();
178        let mut traverse_delegate = tree::traverse::AllUnseen::new(seen_objs);
179        let mut changes_delegate = tree::changes::AllNew::new(seen_objs);
180        let mut outcome = Outcome::default();
181
182        let stats = &mut outcome;
183        for id in oids.into_iter() {
184            if should_interrupt.load(Ordering::Relaxed) {
185                return Err(Error::Interrupted);
186            }
187
188            let id = id.map(|oid| oid.into()).map_err(Error::InputIteration)?;
189            let (obj, location) = db.find(id, buf1)?;
190            stats.input_objects += 1;
191            match input_object_expansion {
192                TreeAdditionsComparedToAncestor => {
193                    use git_object::Kind::*;
194                    let mut obj = obj;
195                    let mut location = location;
196                    let mut id = id.to_owned();
197
198                    loop {
199                        push_obj_count_unique(&mut out, seen_objs, &id, location, progress, stats, false);
200                        match obj.kind {
201                            Tree | Blob => break,
202                            Tag => {
203                                id = TagRefIter::from_bytes(obj.data)
204                                    .target_id()
205                                    .expect("every tag has a target");
206                                let tmp = db.find(id, buf1)?;
207
208                                obj = tmp.0;
209                                location = tmp.1;
210
211                                stats.expanded_objects += 1;
212                                continue;
213                            }
214                            Commit => {
215                                let current_tree_iter = {
216                                    let mut commit_iter = CommitRefIter::from_bytes(obj.data);
217                                    let tree_id = commit_iter.tree_id().expect("every commit has a tree");
218                                    parent_commit_ids.clear();
219                                    for token in commit_iter {
220                                        match token {
221                                            Ok(git_object::commit::ref_iter::Token::Parent { id }) => {
222                                                parent_commit_ids.push(id)
223                                            }
224                                            Ok(_) => break,
225                                            Err(err) => return Err(Error::CommitDecode(err)),
226                                        }
227                                    }
228                                    let (obj, location) = db.find(tree_id, buf1)?;
229                                    push_obj_count_unique(
230                                        &mut out, seen_objs, &tree_id, location, progress, stats, true,
231                                    );
232                                    git_object::TreeRefIter::from_bytes(obj.data)
233                                };
234
235                                let objects = if parent_commit_ids.is_empty() {
236                                    traverse_delegate.clear();
237                                    git_traverse::tree::breadthfirst(
238                                        current_tree_iter,
239                                        &mut tree_traversal_state,
240                                        |oid, buf| {
241                                            stats.decoded_objects += 1;
242                                            match db.find(oid, buf).ok() {
243                                                Some((obj, location)) => {
244                                                    progress.inc();
245                                                    stats.expanded_objects += 1;
246                                                    out.push(output::Count::from_data(oid, location));
247                                                    obj.try_into_tree_iter()
248                                                }
249                                                None => None,
250                                            }
251                                        },
252                                        &mut traverse_delegate,
253                                    )
254                                    .map_err(Error::TreeTraverse)?;
255                                    &traverse_delegate.non_trees
256                                } else {
257                                    for commit_id in &parent_commit_ids {
258                                        let parent_tree_id = {
259                                            let (parent_commit_obj, location) = db.find(commit_id, buf2)?;
260
261                                            push_obj_count_unique(
262                                                &mut out, seen_objs, commit_id, location, progress, stats, true,
263                                            );
264                                            CommitRefIter::from_bytes(parent_commit_obj.data)
265                                                .tree_id()
266                                                .expect("every commit has a tree")
267                                        };
268                                        let parent_tree = {
269                                            let (parent_tree_obj, location) = db.find(parent_tree_id, buf2)?;
270                                            push_obj_count_unique(
271                                                &mut out,
272                                                seen_objs,
273                                                &parent_tree_id,
274                                                location,
275                                                progress,
276                                                stats,
277                                                true,
278                                            );
279                                            git_object::TreeRefIter::from_bytes(parent_tree_obj.data)
280                                        };
281
282                                        changes_delegate.clear();
283                                        git_diff::tree::Changes::from(Some(parent_tree))
284                                            .needed_to_obtain(
285                                                current_tree_iter.clone(),
286                                                &mut tree_diff_state,
287                                                |oid, buf| {
288                                                    stats.decoded_objects += 1;
289                                                    db.find_tree_iter(oid, buf).map(|t| t.0)
290                                                },
291                                                &mut changes_delegate,
292                                            )
293                                            .map_err(Error::TreeChanges)?;
294                                    }
295                                    &changes_delegate.objects
296                                };
297                                for id in objects.iter() {
298                                    out.push(id_to_count(db, buf2, id, progress, stats, allow_pack_lookups));
299                                }
300                                break;
301                            }
302                        }
303                    }
304                }
305                TreeContents => {
306                    use git_object::Kind::*;
307                    let mut id = id;
308                    let mut obj = (obj, location);
309                    loop {
310                        push_obj_count_unique(&mut out, seen_objs, &id, obj.1.clone(), progress, stats, false);
311                        match obj.0.kind {
312                            Tree => {
313                                traverse_delegate.clear();
314                                git_traverse::tree::breadthfirst(
315                                    git_object::TreeRefIter::from_bytes(obj.0.data),
316                                    &mut tree_traversal_state,
317                                    |oid, buf| {
318                                        stats.decoded_objects += 1;
319                                        match db.find(oid, buf).ok() {
320                                            Some((obj, location)) => {
321                                                progress.inc();
322                                                stats.expanded_objects += 1;
323                                                out.push(output::Count::from_data(oid, location));
324                                                obj.try_into_tree_iter()
325                                            }
326                                            None => None,
327                                        }
328                                    },
329                                    &mut traverse_delegate,
330                                )
331                                .map_err(Error::TreeTraverse)?;
332                                for id in traverse_delegate.non_trees.iter() {
333                                    out.push(id_to_count(db, buf1, id, progress, stats, allow_pack_lookups));
334                                }
335                                break;
336                            }
337                            Commit => {
338                                id = CommitRefIter::from_bytes(obj.0.data)
339                                    .tree_id()
340                                    .expect("every commit has a tree");
341                                stats.expanded_objects += 1;
342                                obj = db.find(id, buf1)?;
343                                continue;
344                            }
345                            Blob => break,
346                            Tag => {
347                                id = TagRefIter::from_bytes(obj.0.data)
348                                    .target_id()
349                                    .expect("every tag has a target");
350                                stats.expanded_objects += 1;
351                                obj = db.find(id, buf1)?;
352                                continue;
353                            }
354                        }
355                    }
356                }
357                AsIs => push_obj_count_unique(&mut out, seen_objs, &id, location, progress, stats, false),
358            }
359        }
360        outcome.total_objects = out.len();
361        Ok((out, outcome))
362    }
363
364    #[inline]
365    fn push_obj_count_unique(
366        out: &mut Vec<output::Count>,
367        all_seen: &impl util::InsertImmutable<ObjectId>,
368        id: &oid,
369        location: Option<crate::data::entry::Location>,
370        progress: &mut impl Progress,
371        statistics: &mut Outcome,
372        count_expanded: bool,
373    ) {
374        let inserted = all_seen.insert(id.to_owned());
375        if inserted {
376            progress.inc();
377            statistics.decoded_objects += 1;
378            if count_expanded {
379                statistics.expanded_objects += 1;
380            }
381            out.push(output::Count::from_data(id, location));
382        }
383    }
384
385    #[inline]
386    fn id_to_count<Find: crate::Find>(
387        db: &Find,
388        buf: &mut Vec<u8>,
389        id: &oid,
390        progress: &mut impl Progress,
391        statistics: &mut Outcome,
392        allow_pack_lookups: bool,
393    ) -> output::Count {
394        progress.inc();
395        statistics.expanded_objects += 1;
396        output::Count {
397            id: id.to_owned(),
398            entry_pack_location: if allow_pack_lookups {
399                PackLocation::LookedUp(db.location_by_oid(id, buf))
400            } else {
401                PackLocation::NotLookedUp
402            },
403        }
404    }
405}