git_odb/store_impls/dynamic/
find.rs

1use std::{convert::TryInto, ops::Deref};
2
3use git_pack::cache::DecodeEntry;
4
5use crate::store::{handle, load_index};
6
7pub(crate) mod error {
8    use crate::{loose, pack};
9
10    /// Returned by [`Handle::try_find()`][git_pack::Find::try_find()]
11    #[derive(thiserror::Error, Debug)]
12    #[allow(missing_docs)]
13    pub enum Error {
14        #[error("An error occurred while obtaining an object from the loose object store")]
15        Loose(#[from] loose::find::Error),
16        #[error("An error occurred while obtaining an object from the packed object store")]
17        Pack(#[from] pack::data::decode::Error),
18        #[error(transparent)]
19        LoadIndex(#[from] crate::store::load_index::Error),
20        #[error(transparent)]
21        LoadPack(#[from] std::io::Error),
22        #[error("Reached recursion limit of {} while resolving ref delta bases for {}", .max_depth, .id)]
23        DeltaBaseRecursionLimit {
24            /// the maximum recursion depth we encountered.
25            max_depth: usize,
26            /// The original object to lookup
27            id: git_hash::ObjectId,
28        },
29        #[error("The base object {} could not be found but is required to decode {}", .base_id, .id)]
30        DeltaBaseMissing {
31            /// the id of the base object which failed to lookup
32            base_id: git_hash::ObjectId,
33            /// The original object to lookup
34            id: git_hash::ObjectId,
35        },
36        #[error("An error occurred when looking up a ref delta base object {} to decode {}", .base_id, .id)]
37        DeltaBaseLookup {
38            #[source]
39            err: Box<Self>,
40            /// the id of the base object which failed to lookup
41            base_id: git_hash::ObjectId,
42            /// The original object to lookup
43            id: git_hash::ObjectId,
44        },
45    }
46
47    #[derive(Copy, Clone)]
48    pub(crate) struct DeltaBaseRecursion<'a> {
49        pub depth: usize,
50        pub original_id: &'a git_hash::oid,
51    }
52
53    impl<'a> DeltaBaseRecursion<'a> {
54        pub fn new(id: &'a git_hash::oid) -> Self {
55            Self {
56                original_id: id,
57                depth: 0,
58            }
59        }
60        pub fn inc_depth(mut self) -> Self {
61            self.depth += 1;
62            self
63        }
64    }
65
66    #[cfg(test)]
67    mod tests {
68        use super::*;
69
70        #[test]
71        fn error_size() {
72            let actual = std::mem::size_of::<Error>();
73            assert!(actual <= 88, "{actual} <= 88: should not grow without us noticing");
74        }
75    }
76}
77pub use error::Error;
78
79use crate::{store::types::PackId, Find};
80
81impl<S> super::Handle<S>
82where
83    S: Deref<Target = super::Store> + Clone,
84{
85    fn try_find_cached_inner<'a, 'b>(
86        &'b self,
87        mut id: &'b git_hash::oid,
88        buffer: &'a mut Vec<u8>,
89        pack_cache: &mut impl DecodeEntry,
90        snapshot: &mut load_index::Snapshot,
91        recursion: Option<error::DeltaBaseRecursion<'_>>,
92    ) -> Result<Option<(git_object::Data<'a>, Option<git_pack::data::entry::Location>)>, Error> {
93        if let Some(r) = recursion {
94            if r.depth >= self.max_recursion_depth {
95                return Err(Error::DeltaBaseRecursionLimit {
96                    max_depth: self.max_recursion_depth,
97                    id: r.original_id.to_owned(),
98                });
99            }
100        } else if !self.ignore_replacements {
101            if let Ok(pos) = self
102                .store
103                .replacements
104                .binary_search_by(|(map_this, _)| map_this.as_ref().cmp(id))
105            {
106                id = self.store.replacements[pos].1.as_ref();
107            }
108        }
109
110        'outer: loop {
111            {
112                let marker = snapshot.marker;
113                for (idx, index) in snapshot.indices.iter_mut().enumerate() {
114                    if let Some(handle::index_lookup::Outcome {
115                        object_index: handle::IndexForObjectInPack { pack_id, pack_offset },
116                        index_file,
117                        pack: possibly_pack,
118                    }) = index.lookup(id)
119                    {
120                        let pack = match possibly_pack {
121                            Some(pack) => pack,
122                            None => match self.store.load_pack(pack_id, marker)? {
123                                Some(pack) => {
124                                    *possibly_pack = Some(pack);
125                                    possibly_pack.as_deref().expect("just put it in")
126                                }
127                                None => {
128                                    // The pack wasn't available anymore so we are supposed to try another round with a fresh index
129                                    match self.store.load_one_index(self.refresh, snapshot.marker)? {
130                                        Some(new_snapshot) => {
131                                            *snapshot = new_snapshot;
132                                            self.clear_cache();
133                                            continue 'outer;
134                                        }
135                                        None => {
136                                            // nothing new in the index, kind of unexpected to not have a pack but to also
137                                            // to have no new index yet. We set the new index before removing any slots, so
138                                            // this should be observable.
139                                            return Ok(None);
140                                        }
141                                    }
142                                }
143                            },
144                        };
145                        let entry = pack.entry(pack_offset);
146                        let header_size = entry.header_size();
147                        let res = match pack.decode_entry(
148                            entry,
149                            buffer,
150                            |id, _out| {
151                                index_file.pack_offset_by_id(id).map(|pack_offset| {
152                                    git_pack::data::decode::entry::ResolvedBase::InPack(pack.entry(pack_offset))
153                                })
154                            },
155                            pack_cache,
156                        ) {
157                            Ok(r) => Ok((
158                                git_object::Data {
159                                    kind: r.kind,
160                                    data: buffer.as_slice(),
161                                },
162                                Some(git_pack::data::entry::Location {
163                                    pack_id: pack.id,
164                                    pack_offset,
165                                    entry_size: r.compressed_size + header_size,
166                                }),
167                            )),
168                            Err(git_pack::data::decode::Error::DeltaBaseUnresolved(base_id)) => {
169                                // Only with multi-pack indices it's allowed to jump to refer to other packs within this
170                                // multi-pack. Otherwise this would constitute a thin pack which is only allowed in transit.
171                                // However, if we somehow end up with that, we will resolve it safely, even though we could
172                                // avoid handling this case and error instead.
173
174                                // Since this is a special case, we just allocate here to make it work. It's an actual delta-ref object
175                                // which is sent by some servers that points to an object outside of the pack we are looking
176                                // at right now. With the complexities of loading packs, we go into recursion here. Git itself
177                                // doesn't do a cycle check, and we won't either but limit the recursive depth.
178                                // The whole ordeal isn't as efficient as it could be due to memory allocation and
179                                // later mem-copying when trying again.
180                                let mut buf = Vec::new();
181                                let obj_kind = self
182                                    .try_find_cached_inner(
183                                        &base_id,
184                                        &mut buf,
185                                        pack_cache,
186                                        snapshot,
187                                        recursion
188                                            .map(|r| r.inc_depth())
189                                            .or_else(|| error::DeltaBaseRecursion::new(id).into()),
190                                    )
191                                    .map_err(|err| Error::DeltaBaseLookup {
192                                        err: Box::new(err),
193                                        base_id,
194                                        id: id.to_owned(),
195                                    })?
196                                    .ok_or_else(|| Error::DeltaBaseMissing {
197                                        base_id,
198                                        id: id.to_owned(),
199                                    })?
200                                    .0
201                                    .kind;
202                                let handle::index_lookup::Outcome {
203                                    object_index:
204                                        handle::IndexForObjectInPack {
205                                            pack_id: _,
206                                            pack_offset,
207                                        },
208                                    index_file,
209                                    pack: possibly_pack,
210                                } = match snapshot.indices[idx].lookup(id) {
211                                    Some(res) => res,
212                                    None => {
213                                        let mut out = None;
214                                        for index in snapshot.indices.iter_mut() {
215                                            out = index.lookup(id);
216                                            if out.is_some() {
217                                                break;
218                                            }
219                                        }
220
221                                        out.unwrap_or_else(|| {
222                                           panic!("could not find object {id} in any index after looking up one of its base objects {base_id}" )
223                                       })
224                                    }
225                                };
226                                let pack = possibly_pack
227                                    .as_ref()
228                                    .expect("pack to still be available like just now");
229                                let entry = pack.entry(pack_offset);
230                                let header_size = entry.header_size();
231                                pack.decode_entry(
232                                    entry,
233                                    buffer,
234                                    |id, out| {
235                                        index_file
236                                            .pack_offset_by_id(id)
237                                            .map(|pack_offset| {
238                                                git_pack::data::decode::entry::ResolvedBase::InPack(
239                                                    pack.entry(pack_offset),
240                                                )
241                                            })
242                                            .or_else(|| {
243                                                (id == base_id).then(|| {
244                                                    out.resize(buf.len(), 0);
245                                                    out.copy_from_slice(buf.as_slice());
246                                                    git_pack::data::decode::entry::ResolvedBase::OutOfPack {
247                                                        kind: obj_kind,
248                                                        end: out.len(),
249                                                    }
250                                                })
251                                            })
252                                    },
253                                    pack_cache,
254                                )
255                                .map(move |r| {
256                                    (
257                                        git_object::Data {
258                                            kind: r.kind,
259                                            data: buffer.as_slice(),
260                                        },
261                                        Some(git_pack::data::entry::Location {
262                                            pack_id: pack.id,
263                                            pack_offset,
264                                            entry_size: r.compressed_size + header_size,
265                                        }),
266                                    )
267                                })
268                            }
269                            Err(err) => Err(err),
270                        }?;
271
272                        if idx != 0 {
273                            snapshot.indices.swap(0, idx);
274                        }
275                        return Ok(Some(res));
276                    }
277                }
278            }
279
280            for lodb in snapshot.loose_dbs.iter() {
281                // TODO: remove this double-lookup once the borrow checker allows it.
282                if lodb.contains(id) {
283                    return lodb
284                        .try_find(id, buffer)
285                        .map(|obj| obj.map(|obj| (obj, None)))
286                        .map_err(Into::into);
287                }
288            }
289
290            match self.store.load_one_index(self.refresh, snapshot.marker)? {
291                Some(new_snapshot) => {
292                    *snapshot = new_snapshot;
293                    self.clear_cache();
294                }
295                None => return Ok(None),
296            }
297        }
298    }
299
300    pub(crate) fn clear_cache(&self) {
301        self.packed_object_count.borrow_mut().take();
302    }
303}
304
305impl<S> git_pack::Find for super::Handle<S>
306where
307    S: Deref<Target = super::Store> + Clone,
308{
309    type Error = Error;
310
311    // TODO: probably make this method fallible, but that would mean its own error type.
312    fn contains(&self, id: impl AsRef<git_hash::oid>) -> bool {
313        let id = id.as_ref();
314        let mut snapshot = self.snapshot.borrow_mut();
315        loop {
316            for (idx, index) in snapshot.indices.iter().enumerate() {
317                if index.contains(id) {
318                    if idx != 0 {
319                        snapshot.indices.swap(0, idx);
320                    }
321                    return true;
322                }
323            }
324
325            for lodb in snapshot.loose_dbs.iter() {
326                if lodb.contains(id) {
327                    return true;
328                }
329            }
330
331            match self.store.load_one_index(self.refresh, snapshot.marker) {
332                Ok(Some(new_snapshot)) => {
333                    *snapshot = new_snapshot;
334                    self.clear_cache();
335                }
336                Ok(None) => return false, // nothing more to load, or our refresh mode doesn't allow disk refreshes
337                Err(_) => return false, // something went wrong, nothing we can communicate here with this trait. TODO: Maybe that should change?
338            }
339        }
340    }
341
342    fn try_find_cached<'a>(
343        &self,
344        id: impl AsRef<git_hash::oid>,
345        buffer: &'a mut Vec<u8>,
346        pack_cache: &mut impl DecodeEntry,
347    ) -> Result<Option<(git_object::Data<'a>, Option<git_pack::data::entry::Location>)>, Self::Error> {
348        let id = id.as_ref();
349        let mut snapshot = self.snapshot.borrow_mut();
350        self.try_find_cached_inner(id, buffer, pack_cache, &mut snapshot, None)
351    }
352
353    fn location_by_oid(
354        &self,
355        id: impl AsRef<git_hash::oid>,
356        buf: &mut Vec<u8>,
357    ) -> Option<git_pack::data::entry::Location> {
358        assert!(
359            matches!(self.token.as_ref(), Some(handle::Mode::KeepDeletedPacksAvailable)),
360            "BUG: handle must be configured to `prevent_pack_unload()` before using this method"
361        );
362
363        assert!(self.store_ref().replacements.is_empty() || self.ignore_replacements, "Everything related to packing must not use replacements. These are not used here, but it should be turned off for good measure.");
364
365        let id = id.as_ref();
366        let mut snapshot = self.snapshot.borrow_mut();
367        'outer: loop {
368            {
369                let marker = snapshot.marker;
370                for (idx, index) in snapshot.indices.iter_mut().enumerate() {
371                    if let Some(handle::index_lookup::Outcome {
372                        object_index: handle::IndexForObjectInPack { pack_id, pack_offset },
373                        index_file: _,
374                        pack: possibly_pack,
375                    }) = index.lookup(id)
376                    {
377                        let pack = match possibly_pack {
378                            Some(pack) => pack,
379                            None => match self.store.load_pack(pack_id, marker).ok()? {
380                                Some(pack) => {
381                                    *possibly_pack = Some(pack);
382                                    possibly_pack.as_deref().expect("just put it in")
383                                }
384                                None => {
385                                    // The pack wasn't available anymore so we are supposed to try another round with a fresh index
386                                    match self.store.load_one_index(self.refresh, snapshot.marker).ok()? {
387                                        Some(new_snapshot) => {
388                                            *snapshot = new_snapshot;
389                                            self.clear_cache();
390                                            continue 'outer;
391                                        }
392                                        None => {
393                                            // nothing new in the index, kind of unexpected to not have a pack but to also
394                                            // to have no new index yet. We set the new index before removing any slots, so
395                                            // this should be observable.
396                                            return None;
397                                        }
398                                    }
399                                }
400                            },
401                        };
402                        let entry = pack.entry(pack_offset);
403
404                        buf.resize(entry.decompressed_size.try_into().expect("representable size"), 0);
405                        assert_eq!(pack.id, pack_id.to_intrinsic_pack_id(), "both ids must always match");
406
407                        let res = pack.decompress_entry(&entry, buf).ok().map(|entry_size_past_header| {
408                            git_pack::data::entry::Location {
409                                pack_id: pack.id,
410                                pack_offset,
411                                entry_size: entry.header_size() + entry_size_past_header,
412                            }
413                        });
414
415                        if idx != 0 {
416                            snapshot.indices.swap(0, idx);
417                        }
418                        return res;
419                    }
420                }
421            }
422
423            match self.store.load_one_index(self.refresh, snapshot.marker).ok()? {
424                Some(new_snapshot) => {
425                    *snapshot = new_snapshot;
426                    self.clear_cache();
427                }
428                None => return None,
429            }
430        }
431    }
432
433    fn pack_offsets_and_oid(&self, pack_id: u32) -> Option<Vec<(u64, git_hash::ObjectId)>> {
434        assert!(
435            matches!(self.token.as_ref(), Some(handle::Mode::KeepDeletedPacksAvailable)),
436            "BUG: handle must be configured to `prevent_pack_unload()` before using this method"
437        );
438        let pack_id = PackId::from_intrinsic_pack_id(pack_id);
439        loop {
440            let snapshot = self.snapshot.borrow();
441            {
442                for index in snapshot.indices.iter() {
443                    if let Some(iter) = index.iter(pack_id) {
444                        return Some(iter.map(|e| (e.pack_offset, e.oid)).collect());
445                    }
446                }
447            }
448
449            match self.store.load_one_index(self.refresh, snapshot.marker).ok()? {
450                Some(new_snapshot) => {
451                    drop(snapshot);
452                    *self.snapshot.borrow_mut() = new_snapshot;
453                }
454                None => return None,
455            }
456        }
457    }
458
459    fn entry_by_location(&self, location: &git_pack::data::entry::Location) -> Option<git_pack::find::Entry> {
460        assert!(
461            matches!(self.token.as_ref(), Some(handle::Mode::KeepDeletedPacksAvailable)),
462            "BUG: handle must be configured to `prevent_pack_unload()` before using this method"
463        );
464        let pack_id = PackId::from_intrinsic_pack_id(location.pack_id);
465        let mut snapshot = self.snapshot.borrow_mut();
466        let marker = snapshot.marker;
467        loop {
468            {
469                for index in snapshot.indices.iter_mut() {
470                    if let Some(possibly_pack) = index.pack(pack_id) {
471                        let pack = match possibly_pack {
472                            Some(pack) => pack,
473                            None => {
474                                let pack = self.store.load_pack(pack_id, marker).ok()?.expect(
475                                "BUG: pack must exist from previous call to location_by_oid() and must not be unloaded",
476                            );
477                                *possibly_pack = Some(pack);
478                                possibly_pack.as_deref().expect("just put it in")
479                            }
480                        };
481                        return pack
482                            .entry_slice(location.entry_range(location.pack_offset))
483                            .map(|data| git_pack::find::Entry {
484                                data: data.to_owned(),
485                                version: pack.version(),
486                            });
487                    }
488                }
489            }
490
491            snapshot.indices.insert(
492                0,
493                self.store
494                    .index_by_id(pack_id, marker)
495                    .expect("BUG: index must always be present, must not be unloaded or overwritten"),
496            );
497        }
498    }
499}
500
501impl<S> Find for super::Handle<S>
502where
503    S: Deref<Target = super::Store> + Clone,
504    Self: git_pack::Find,
505{
506    type Error = <Self as git_pack::Find>::Error;
507
508    fn contains(&self, id: impl AsRef<git_hash::oid>) -> bool {
509        git_pack::Find::contains(self, id)
510    }
511
512    fn try_find<'a>(
513        &self,
514        id: impl AsRef<git_hash::oid>,
515        buffer: &'a mut Vec<u8>,
516    ) -> Result<Option<git_object::Data<'a>>, Self::Error> {
517        git_pack::Find::try_find(self, id, buffer).map(|t| t.map(|t| t.0))
518    }
519}