Skip to main content

gix_odb/store_impls/dynamic/
find.rs

1use std::ops::Deref;
2
3use gix_pack::cache::DecodeEntry;
4
5use crate::store::{handle, load_index};
6
7pub(crate) mod error {
8    use crate::{loose, pack};
9
10    /// Returned by [`Handle::try_find()`][gix_pack::Find::try_find()]
11    #[derive(thiserror::Error, Debug)]
12    #[allow(missing_docs)]
13    pub enum Error {
14        #[error("An error occurred while obtaining an object from the loose object store")]
15        Loose(#[from] loose::find::Error),
16        #[error("An error occurred while obtaining an object from the packed object store")]
17        Pack(#[from] pack::data::decode::Error),
18        #[error(transparent)]
19        LoadIndex(#[from] crate::store::load_index::Error),
20        #[error(transparent)]
21        LoadPack(#[from] std::io::Error),
22        #[error(transparent)]
23        EntryType(#[from] gix_pack::data::entry::decode::Error),
24        #[error("Reached recursion limit of {} while resolving ref delta bases for {}", .max_depth, .id)]
25        DeltaBaseRecursionLimit {
26            /// the maximum recursion depth we encountered.
27            max_depth: usize,
28            /// The original object to lookup
29            id: gix_hash::ObjectId,
30        },
31        #[error("The base object {} could not be found but is required to decode {}", .base_id, .id)]
32        DeltaBaseMissing {
33            /// the id of the base object which failed to lookup
34            base_id: gix_hash::ObjectId,
35            /// The original object to lookup
36            id: gix_hash::ObjectId,
37        },
38        #[error("An error occurred when looking up a ref delta base object {} to decode {}", .base_id, .id)]
39        DeltaBaseLookup {
40            #[source]
41            err: Box<Self>,
42            /// the id of the base object which failed to lookup
43            base_id: gix_hash::ObjectId,
44            /// The original object to lookup
45            id: gix_hash::ObjectId,
46        },
47    }
48
49    #[derive(Copy, Clone)]
50    pub(crate) struct DeltaBaseRecursion<'a> {
51        pub depth: usize,
52        pub original_id: &'a gix_hash::oid,
53    }
54
55    impl<'a> DeltaBaseRecursion<'a> {
56        pub fn new(id: &'a gix_hash::oid) -> Self {
57            Self {
58                original_id: id,
59                depth: 0,
60            }
61        }
62        pub fn inc_depth(mut self) -> Self {
63            self.depth += 1;
64            self
65        }
66    }
67
68    #[cfg(test)]
69    mod tests {
70        use super::*;
71
72        #[test]
73        fn error_size() {
74            let actual = std::mem::size_of::<Error>();
75            assert!(actual <= 88, "{actual} <= 88: should not grow without us noticing");
76        }
77    }
78}
79pub use error::Error;
80use gix_features::zlib;
81
82use crate::store::types::PackId;
83
84impl<S> super::Handle<S>
85where
86    S: Deref<Target = super::Store> + Clone,
87{
88    fn try_find_cached_inner<'a, 'b>(
89        &'b self,
90        mut id: &'b gix_hash::oid,
91        buffer: &'a mut Vec<u8>,
92        inflate: &mut zlib::Inflate,
93        pack_cache: &mut dyn DecodeEntry,
94        snapshot: &mut load_index::Snapshot,
95        recursion: Option<error::DeltaBaseRecursion<'_>>,
96    ) -> Result<Option<(gix_object::Data<'a>, Option<gix_pack::data::entry::Location>)>, Error> {
97        if let Some(r) = recursion {
98            if r.depth >= self.max_recursion_depth {
99                return Err(Error::DeltaBaseRecursionLimit {
100                    max_depth: self.max_recursion_depth,
101                    id: r.original_id.to_owned(),
102                });
103            }
104        } else if !self.ignore_replacements {
105            if let Ok(pos) = self
106                .store
107                .replacements
108                .binary_search_by(|(map_this, _)| map_this.as_ref().cmp(id))
109            {
110                id = self.store.replacements[pos].1.as_ref();
111            }
112        }
113
114        'outer: loop {
115            {
116                let marker = snapshot.marker;
117                for (idx, index) in snapshot.indices.iter_mut().enumerate() {
118                    if let Some(handle::index_lookup::Outcome {
119                        object_index: handle::IndexForObjectInPack { pack_id, pack_offset },
120                        index_file,
121                        pack: possibly_pack,
122                    }) = index.lookup(id)
123                    {
124                        let pack = match possibly_pack {
125                            Some(pack) => pack,
126                            None => match self.store.load_pack(pack_id, marker)? {
127                                Some(pack) => {
128                                    *possibly_pack = Some(pack);
129                                    possibly_pack.as_deref().expect("just put it in")
130                                }
131                                None => {
132                                    // The pack wasn't available anymore so we are supposed to try another round with a fresh index
133                                    match self.store.load_one_index(self.refresh, snapshot.marker)? {
134                                        Some(new_snapshot) => {
135                                            *snapshot = new_snapshot;
136                                            self.clear_cache();
137                                            continue 'outer;
138                                        }
139                                        None => {
140                                            // nothing new in the index, kind of unexpected to not have a pack but to also
141                                            // to have no new index yet. We set the new index before removing any slots, so
142                                            // this should be observable.
143                                            return Ok(None);
144                                        }
145                                    }
146                                }
147                            },
148                        };
149                        let entry = pack.entry(pack_offset)?;
150                        let header_size = entry.header_size();
151                        let res = pack.decode_entry(
152                            entry,
153                            buffer,
154                            inflate,
155                            &|id, _out| {
156                                let pack_offset = index_file.pack_offset_by_id(id)?;
157                                pack.entry(pack_offset)
158                                    .ok()
159                                    .map(gix_pack::data::decode::entry::ResolvedBase::InPack)
160                            },
161                            pack_cache,
162                        );
163                        let res = match res {
164                            Ok(r) => Ok((
165                                gix_object::Data {
166                                    kind: r.kind,
167                                    object_hash: pack.object_hash(),
168                                    data: buffer.as_slice(),
169                                },
170                                Some(gix_pack::data::entry::Location {
171                                    pack_id: pack.id,
172                                    pack_offset,
173                                    entry_size: r.compressed_size + header_size,
174                                }),
175                            )),
176                            Err(gix_pack::data::decode::Error::DeltaBaseUnresolved(base_id)) => {
177                                // Only with multi-pack indices it's allowed to jump to refer to other packs within this
178                                // multi-pack. Otherwise this would constitute a thin pack which is only allowed in transit.
179                                // However, if we somehow end up with that, we will resolve it safely, even though we could
180                                // avoid handling this case and error instead.
181
182                                // Since this is a special case, we just allocate here to make it work. It's an actual delta-ref object
183                                // which is sent by some servers that points to an object outside of the pack we are looking
184                                // at right now. With the complexities of loading packs, we go into recursion here. Git itself
185                                // doesn't do a cycle check, and we won't either but limit the recursive depth.
186                                // The whole ordeal isn't as efficient as it could be due to memory allocation and
187                                // later mem-copying when trying again.
188                                let mut buf = Vec::new();
189                                let obj_kind = self
190                                    .try_find_cached_inner(
191                                        &base_id,
192                                        &mut buf,
193                                        inflate,
194                                        pack_cache,
195                                        snapshot,
196                                        recursion
197                                            .map(error::DeltaBaseRecursion::inc_depth)
198                                            .or_else(|| error::DeltaBaseRecursion::new(id).into()),
199                                    )
200                                    .map_err(|err| Error::DeltaBaseLookup {
201                                        err: Box::new(err),
202                                        base_id,
203                                        id: id.to_owned(),
204                                    })?
205                                    .ok_or_else(|| Error::DeltaBaseMissing {
206                                        base_id,
207                                        id: id.to_owned(),
208                                    })?
209                                    .0
210                                    .kind;
211                                let handle::index_lookup::Outcome {
212                                    object_index:
213                                        handle::IndexForObjectInPack {
214                                            pack_id: _,
215                                            pack_offset,
216                                        },
217                                    index_file,
218                                    pack: possibly_pack,
219                                } = match snapshot.indices[idx].lookup(id) {
220                                    Some(res) => res,
221                                    None => {
222                                        let mut out = None;
223                                        for index in &mut snapshot.indices {
224                                            out = index.lookup(id);
225                                            if out.is_some() {
226                                                break;
227                                            }
228                                        }
229
230                                        out.unwrap_or_else(|| {
231                                           panic!("could not find object {id} in any index after looking up one of its base objects {base_id}" )
232                                       })
233                                    }
234                                };
235                                let pack = possibly_pack
236                                    .as_ref()
237                                    .expect("pack to still be available like just now");
238                                let entry = pack.entry(pack_offset)?;
239                                let header_size = entry.header_size();
240                                pack.decode_entry(
241                                    entry,
242                                    buffer,
243                                    inflate,
244                                    &|id, out| {
245                                        index_file
246                                            .pack_offset_by_id(id)
247                                            .and_then(|pack_offset| {
248                                                pack.entry(pack_offset)
249                                                    .ok()
250                                                    .map(gix_pack::data::decode::entry::ResolvedBase::InPack)
251                                            })
252                                            .or_else(|| {
253                                                (id == base_id).then(|| {
254                                                    out.resize(buf.len(), 0);
255                                                    out.copy_from_slice(buf.as_slice());
256                                                    gix_pack::data::decode::entry::ResolvedBase::OutOfPack {
257                                                        kind: obj_kind,
258                                                        end: out.len(),
259                                                    }
260                                                })
261                                            })
262                                    },
263                                    pack_cache,
264                                )
265                                .map(move |r| {
266                                    (
267                                        gix_object::Data {
268                                            kind: r.kind,
269                                            object_hash: pack.object_hash(),
270                                            data: buffer.as_slice(),
271                                        },
272                                        Some(gix_pack::data::entry::Location {
273                                            pack_id: pack.id,
274                                            pack_offset,
275                                            entry_size: r.compressed_size + header_size,
276                                        }),
277                                    )
278                                })
279                            }
280                            Err(err) => Err(err),
281                        }?;
282
283                        if idx != 0 {
284                            snapshot.indices.swap(0, idx);
285                        }
286                        return Ok(Some(res));
287                    }
288                }
289            }
290
291            for lodb in snapshot.loose_dbs.iter() {
292                // TODO: remove this double-lookup once the borrow checker allows it.
293                if lodb.contains(id) {
294                    return lodb
295                        .try_find(id, buffer)
296                        .map(|obj| obj.map(|obj| (obj, None)))
297                        .map_err(Into::into);
298                }
299            }
300
301            match self.store.load_one_index(self.refresh, snapshot.marker)? {
302                Some(new_snapshot) => {
303                    *snapshot = new_snapshot;
304                    self.clear_cache();
305                }
306                None => return Ok(None),
307            }
308        }
309    }
310
311    pub(crate) fn clear_cache(&self) {
312        self.packed_object_count.borrow_mut().take();
313    }
314}
315
316impl<S> gix_pack::Find for super::Handle<S>
317where
318    S: Deref<Target = super::Store> + Clone,
319{
320    // TODO: probably make this method fallible, but that would mean its own error type.
321    fn contains(&self, id: &gix_hash::oid) -> bool {
322        let mut snapshot = self.snapshot.borrow_mut();
323        loop {
324            for (idx, index) in snapshot.indices.iter().enumerate() {
325                if index.contains(id) {
326                    if idx != 0 {
327                        snapshot.indices.swap(0, idx);
328                    }
329                    return true;
330                }
331            }
332
333            for lodb in snapshot.loose_dbs.iter() {
334                if lodb.contains(id) {
335                    return true;
336                }
337            }
338
339            match self.store.load_one_index(self.refresh, snapshot.marker) {
340                Ok(Some(new_snapshot)) => {
341                    *snapshot = new_snapshot;
342                    self.clear_cache();
343                }
344                Ok(None) => return false, // nothing more to load, or our refresh mode doesn't allow disk refreshes
345                Err(_) => return false, // something went wrong, nothing we can communicate here with this trait. TODO: Maybe that should change?
346            }
347        }
348    }
349
350    fn try_find_cached<'a>(
351        &self,
352        id: &gix_hash::oid,
353        buffer: &'a mut Vec<u8>,
354        pack_cache: &mut dyn DecodeEntry,
355    ) -> Result<Option<(gix_object::Data<'a>, Option<gix_pack::data::entry::Location>)>, gix_object::find::Error> {
356        let mut snapshot = self.snapshot.borrow_mut();
357        let mut inflate = self.inflate.borrow_mut();
358        self.try_find_cached_inner(id, buffer, &mut inflate, pack_cache, &mut snapshot, None)
359            .map_err(|err| Box::new(err) as _)
360    }
361
362    fn location_by_oid(&self, id: &gix_hash::oid, buf: &mut Vec<u8>) -> Option<gix_pack::data::entry::Location> {
363        assert!(
364            matches!(self.token.as_ref(), Some(handle::Mode::KeepDeletedPacksAvailable)),
365            "BUG: handle must be configured to `prevent_pack_unload()` before using this method"
366        );
367
368        assert!(
369            self.store_ref().replacements.is_empty() || self.ignore_replacements,
370            "Everything related to packing must not use replacements. These are not used here, but it should be turned off for good measure."
371        );
372
373        let mut snapshot = self.snapshot.borrow_mut();
374        let mut inflate = self.inflate.borrow_mut();
375        'outer: loop {
376            {
377                let marker = snapshot.marker;
378                for (idx, index) in snapshot.indices.iter_mut().enumerate() {
379                    if let Some(handle::index_lookup::Outcome {
380                        object_index: handle::IndexForObjectInPack { pack_id, pack_offset },
381                        index_file: _,
382                        pack: possibly_pack,
383                    }) = index.lookup(id)
384                    {
385                        let pack = match possibly_pack {
386                            Some(pack) => pack,
387                            None => match self.store.load_pack(pack_id, marker).ok()? {
388                                Some(pack) => {
389                                    *possibly_pack = Some(pack);
390                                    possibly_pack.as_deref().expect("just put it in")
391                                }
392                                None => {
393                                    // The pack wasn't available anymore so we are supposed to try another round with a fresh index
394                                    match self.store.load_one_index(self.refresh, snapshot.marker).ok()? {
395                                        Some(new_snapshot) => {
396                                            *snapshot = new_snapshot;
397                                            self.clear_cache();
398                                            continue 'outer;
399                                        }
400                                        None => {
401                                            // nothing new in the index, kind of unexpected to not have a pack but to also
402                                            // to have no new index yet. We set the new index before removing any slots, so
403                                            // this should be observable.
404                                            return None;
405                                        }
406                                    }
407                                }
408                            },
409                        };
410                        let entry = pack.entry(pack_offset).ok()?;
411                        // This allocation is driven by on-disk pack metadata, so keep it aligned with
412                        // `gix_pack::data::File::with_alloc_limit_bytes()`.
413                        let size: usize = entry.decompressed_size.try_into().ok()?;
414                        if pack.alloc_limit_bytes().is_some_and(|limit| size > limit) {
415                            return None;
416                        }
417                        buf.resize(size, 0);
418                        assert_eq!(pack.id, pack_id.to_intrinsic_pack_id(), "both ids must always match");
419
420                        let res = pack
421                            .decompress_entry(&entry, &mut inflate, buf)
422                            .ok()
423                            .map(|entry_size_past_header| gix_pack::data::entry::Location {
424                                pack_id: pack.id,
425                                pack_offset,
426                                entry_size: entry.header_size() + entry_size_past_header,
427                            });
428
429                        if idx != 0 {
430                            snapshot.indices.swap(0, idx);
431                        }
432                        return res;
433                    }
434                }
435            }
436
437            match self.store.load_one_index(self.refresh, snapshot.marker).ok()? {
438                Some(new_snapshot) => {
439                    *snapshot = new_snapshot;
440                    self.clear_cache();
441                }
442                None => return None,
443            }
444        }
445    }
446
447    fn pack_offsets_and_oid(&self, pack_id: u32) -> Option<Vec<(u64, gix_hash::ObjectId)>> {
448        assert!(
449            matches!(self.token.as_ref(), Some(handle::Mode::KeepDeletedPacksAvailable)),
450            "BUG: handle must be configured to `prevent_pack_unload()` before using this method"
451        );
452        let pack_id = PackId::from_intrinsic_pack_id(pack_id);
453        loop {
454            let snapshot = self.snapshot.borrow();
455            {
456                for index in &snapshot.indices {
457                    if let Some(iter) = index.iter(pack_id) {
458                        return Some(iter.map(|e| (e.pack_offset, e.oid)).collect());
459                    }
460                }
461            }
462
463            match self.store.load_one_index(self.refresh, snapshot.marker).ok()? {
464                Some(new_snapshot) => {
465                    drop(snapshot);
466                    *self.snapshot.borrow_mut() = new_snapshot;
467                }
468                None => return None,
469            }
470        }
471    }
472
473    fn entry_by_location(&self, location: &gix_pack::data::entry::Location) -> Option<gix_pack::find::Entry> {
474        assert!(
475            matches!(self.token.as_ref(), Some(handle::Mode::KeepDeletedPacksAvailable)),
476            "BUG: handle must be configured to `prevent_pack_unload()` before using this method"
477        );
478        let pack_id = PackId::from_intrinsic_pack_id(location.pack_id);
479        let mut snapshot = self.snapshot.borrow_mut();
480        let marker = snapshot.marker;
481        loop {
482            {
483                for index in &mut snapshot.indices {
484                    if let Some(possibly_pack) = index.pack(pack_id) {
485                        let pack = match possibly_pack {
486                            Some(pack) => pack,
487                            None => {
488                                let pack = self.store.load_pack(pack_id, marker).ok()?.expect(
489                                "BUG: pack must exist from previous call to location_by_oid() and must not be unloaded",
490                            );
491                                *possibly_pack = Some(pack);
492                                possibly_pack.as_deref().expect("just put it in")
493                            }
494                        };
495                        return pack
496                            .entry_slice(location.entry_range(location.pack_offset))
497                            .map(|data| gix_pack::find::Entry {
498                                data: data.to_owned(),
499                                version: pack.version(),
500                            });
501                    }
502                }
503            }
504
505            snapshot.indices.insert(
506                0,
507                self.store
508                    .index_by_id(pack_id, marker)
509                    .expect("BUG: index must always be present, must not be unloaded or overwritten"),
510            );
511        }
512    }
513}
514
515impl<S> gix_object::Find for super::Handle<S>
516where
517    S: Deref<Target = super::Store> + Clone,
518    Self: gix_pack::Find,
519{
520    fn try_find<'a>(
521        &self,
522        id: &gix_hash::oid,
523        buffer: &'a mut Vec<u8>,
524    ) -> Result<Option<gix_object::Data<'a>>, gix_object::find::Error> {
525        gix_pack::Find::try_find(self, id, buffer).map(|t| t.map(|t| t.0))
526    }
527}
528
529impl<S> gix_object::FindHeader for super::Handle<S>
530where
531    S: Deref<Target = super::Store> + Clone,
532{
533    fn try_header(&self, id: &gix_hash::oid) -> Result<Option<gix_object::Header>, gix_object::find::Error> {
534        let mut snapshot = self.snapshot.borrow_mut();
535        let mut inflate = self.inflate.borrow_mut();
536        self.try_header_inner(id, &mut inflate, &mut snapshot, None)
537            .map(|maybe_header| {
538                maybe_header.map(|hdr| gix_object::Header {
539                    kind: hdr.kind(),
540                    size: hdr.size(),
541                })
542            })
543            .map_err(|err| Box::new(err) as _)
544    }
545}
546
547impl<S> gix_object::Exists for super::Handle<S>
548where
549    S: Deref<Target = super::Store> + Clone,
550    Self: gix_pack::Find,
551{
552    fn exists(&self, id: &gix_hash::oid) -> bool {
553        gix_pack::Find::contains(self, id)
554    }
555}