git_odb/store_impls/dynamic/
iter.rs

1use std::{ops::Deref, option::Option::None, sync::Arc, vec::IntoIter};
2
3use git_hash::ObjectId;
4
5use crate::{
6    loose,
7    store::{handle, handle::SingleOrMultiIndex, types::PackId},
8    store_impls::dynamic,
9};
10
11struct EntryForOrdering {
12    pack_offset: u64,
13    entry_index: u32,
14    pack_index: u16,
15}
16
17enum State {
18    Pack {
19        index_iter: IntoIter<handle::IndexLookup>,
20        index: handle::IndexLookup,
21        ordered_entries: Option<Vec<EntryForOrdering>>,
22        entry_index: u32,
23        num_objects: u32,
24    },
25    Loose {
26        iter: loose::Iter,
27        index: usize,
28    },
29    Depleted,
30}
31
32/// Define the order in which objects are returned.
33#[derive(Debug, Copy, Clone)]
34pub enum Ordering {
35    /// Traverse packs first as sorted by their index files in lexicographical order (sorted by object id), then traverse loose objects
36    /// as sorted by their names as well.
37    ///
38    /// This mode uses no memory as it's the natural ordering of objects, and is best to obtain all object ids as quickly as possible,
39    /// while noting that these may contain duplicates. However, it's very costly to obtain object information or decode them with this
40    /// scheme as cache-hits are unlikely with it and memory maps are less efficient when loading them in random order.
41    PackLexicographicalThenLooseLexicographical,
42    /// Traverse packs first yielding object ids sorted by their position in the pack, with those at the beginning of the pack file coming first.
43    /// Then follow loose objects sorted by their names.
44    ///
45    /// This mode allocates and as to pre-sort objects by their offsets, delaying the start of the iteration once per pack while keeping
46    /// memory allocated once per pack. This price is usually worth paying once querying object information is planned as pack caches
47    /// are more efficiently used that way.
48    PackAscendingOffsetThenLooseLexicographical,
49}
50
51impl Default for Ordering {
52    fn default() -> Self {
53        Ordering::PackLexicographicalThenLooseLexicographical
54    }
55}
56
57/// An iterator over all, _possibly duplicate_, objects of an object store, which by default uses no extra memory but yields an
58/// order that is costly to traverse when querying object information or decoding them.
59///
60/// Use [`with_ordering()`][AllObjects::with_ordering()] to choose a performance trade-off.
61pub struct AllObjects {
62    state: State,
63    num_objects: usize,
64    loose_dbs: Arc<Vec<loose::Store>>,
65    order: Ordering,
66}
67
68/// Builder
69impl AllObjects {
70    /// Set the ordering of the objects returned, trading off memory and latency for object query performance.
71    pub fn with_ordering(mut self, order: Ordering) -> Self {
72        self.order = order;
73        self
74    }
75}
76
77impl AllObjects {
78    /// Create a new iterator from a dynamic store, which will be forced to load all indices eagerly and in the current thread.
79    pub fn new(db: &dynamic::Store) -> Result<Self, crate::store::load_index::Error> {
80        let snapshot = db.load_all_indices()?;
81
82        let packed_objects = snapshot
83            .indices
84            .iter()
85            .fold(0usize, |dbc, index| dbc.saturating_add(index.num_objects() as usize));
86        let mut index_iter = snapshot.indices.into_iter();
87        let loose_dbs = snapshot.loose_dbs;
88        let order = Default::default();
89        let state = match index_iter.next() {
90            Some(index) => {
91                let num_objects = index.num_objects();
92                State::Pack {
93                    index_iter,
94                    ordered_entries: maybe_sort_entries(&index, order),
95                    index,
96                    entry_index: 0,
97                    num_objects,
98                }
99            }
100            None => {
101                let index = 0;
102                State::Loose {
103                    iter: loose_dbs.get(index).expect("at least one loose db").iter(),
104                    index,
105                }
106            }
107        };
108        Ok(AllObjects {
109            state,
110            loose_dbs,
111            num_objects: packed_objects,
112            order,
113        })
114    }
115}
116
117fn maybe_sort_entries(index: &handle::IndexLookup, order: Ordering) -> Option<Vec<EntryForOrdering>> {
118    let mut order: Vec<_> = match order {
119        Ordering::PackLexicographicalThenLooseLexicographical => return None,
120        Ordering::PackAscendingOffsetThenLooseLexicographical => match &index.file {
121            // We know that we cannot have more than u32 entry indices per pack.
122            SingleOrMultiIndex::Single { index, .. } => index
123                .iter()
124                .enumerate()
125                .map(|(idx, e)| EntryForOrdering {
126                    pack_offset: e.pack_offset,
127                    entry_index: idx as u32,
128                    pack_index: 0,
129                })
130                .collect(),
131            SingleOrMultiIndex::Multi { index, .. } => index
132                .iter()
133                .enumerate()
134                .map(|(idx, e)| EntryForOrdering {
135                    pack_offset: e.pack_offset,
136                    entry_index: idx as u32,
137                    pack_index: {
138                        debug_assert!(
139                            e.pack_index < PackId::max_packs_in_multi_index(),
140                            "this shows the relation between u16 and pack_index (u32) and why this is OK"
141                        );
142                        e.pack_index as u16
143                    },
144                })
145                .collect(),
146        },
147    };
148    order.sort_by(|a, b| {
149        a.pack_index
150            .cmp(&b.pack_index)
151            .then_with(|| a.pack_offset.cmp(&b.pack_offset))
152    });
153    Some(order)
154}
155
156impl Iterator for AllObjects {
157    type Item = Result<ObjectId, loose::iter::Error>;
158
159    fn next(&mut self) -> Option<Self::Item> {
160        match &mut self.state {
161            State::Depleted => None,
162            State::Pack {
163                index_iter,
164                ordered_entries,
165                index,
166                entry_index,
167                num_objects,
168            } => {
169                if *entry_index < *num_objects {
170                    let oid = match ordered_entries {
171                        Some(entries) => index.oid_at_index(entries[*entry_index as usize].entry_index),
172                        None => index.oid_at_index(*entry_index),
173                    }
174                    .to_owned();
175                    *entry_index += 1;
176                    Some(Ok(oid))
177                } else {
178                    match index_iter.next() {
179                        Some(new_index) => {
180                            *ordered_entries = maybe_sort_entries(&new_index, self.order);
181                            *index = new_index;
182                            *entry_index = 0;
183                            *num_objects = index.num_objects();
184                        }
185                        None => {
186                            let index = 0;
187                            self.state = State::Loose {
188                                iter: self.loose_dbs.get(index).expect("at least one loose odb").iter(),
189                                index,
190                            }
191                        }
192                    }
193                    self.next()
194                }
195            }
196            State::Loose { iter, index } => match iter.next() {
197                Some(id) => Some(id),
198                None => {
199                    *index += 1;
200                    match self.loose_dbs.get(*index).map(|ldb| ldb.iter()) {
201                        Some(new_iter) => {
202                            *iter = new_iter;
203                            self.next()
204                        }
205                        None => {
206                            self.state = State::Depleted;
207                            None
208                        }
209                    }
210                }
211            },
212        }
213    }
214
215    fn size_hint(&self) -> (usize, Option<usize>) {
216        (self.num_objects, None)
217    }
218}
219
220impl<S> super::Handle<S>
221where
222    S: Deref<Target = super::Store> + Clone,
223{
224    /// Return an iterator over all, _possibly duplicate_, objects, first the ones in all packs of all linked databases (via alternates),
225    /// followed by all loose objects.
226    pub fn iter(&self) -> Result<AllObjects, dynamic::load_index::Error> {
227        AllObjects::new(self.store_ref())
228    }
229}
230
231impl dynamic::Store {
232    /// Like [`Handle::iter()`][super::Handle::iter()], but accessible directly on the store.
233    pub fn iter(&self) -> Result<AllObjects, dynamic::load_index::Error> {
234        AllObjects::new(self)
235    }
236}