git_pack/data/input/
lookup_ref_delta_objects.rs

1use std::convert::TryInto;
2
3use git_hash::ObjectId;
4
5use crate::data::{entry::Header, input};
6
7/// An iterator to resolve thin packs on the fly.
8pub struct LookupRefDeltaObjectsIter<I, LFn> {
9    /// The inner iterator whose entries we will resolve.
10    pub inner: I,
11    lookup: LFn,
12    /// The cached delta to provide next time we are called, it's the delta to go with the base we just resolved in its place.
13    next_delta: Option<input::Entry>,
14    /// Fuse to stop iteration after first missing object.
15    error: bool,
16    /// The overall pack-offset we accumulated thus far. Each inserted entry offsets all following
17    /// objects by its length. We need to determine exactly where the object was inserted to see if its affected at all.
18    inserted_entry_length_at_offset: Vec<Change>,
19    /// The sum of all entries added so far, as a cache to avoid recomputation
20    inserted_entries_length_in_bytes: i64,
21    buf: Vec<u8>,
22}
23
24impl<I, LFn> LookupRefDeltaObjectsIter<I, LFn>
25where
26    I: Iterator<Item = Result<input::Entry, input::Error>>,
27    LFn: for<'a> FnMut(ObjectId, &'a mut Vec<u8>) -> Option<git_object::Data<'a>>,
28{
29    /// Create a new instance wrapping `iter` and using `lookup` as function to retrieve objects that will serve as bases
30    /// for ref deltas seen while traversing `iter`.
31    pub fn new(iter: I, lookup: LFn) -> Self {
32        LookupRefDeltaObjectsIter {
33            inner: iter,
34            lookup,
35            error: false,
36            inserted_entry_length_at_offset: Vec::new(),
37            inserted_entries_length_in_bytes: 0,
38            next_delta: None,
39            buf: Vec::new(),
40        }
41    }
42
43    fn shifted_pack_offset(&self, pack_offset: u64) -> u64 {
44        let new_ofs = pack_offset as i64 + self.inserted_entries_length_in_bytes;
45        new_ofs.try_into().expect("offset value is never becomes negative")
46    }
47
48    /// positive `size_change` values mean an object grew or was more commonly, was inserted. Negative values
49    /// mean the object shrunk, usually because there header changed from ref-deltas to ofs deltas.
50    fn track_change(
51        &mut self,
52        shifted_pack_offset: u64,
53        pack_offset: u64,
54        size_change: i64,
55        oid: impl Into<Option<ObjectId>>,
56    ) {
57        if size_change == 0 {
58            return;
59        }
60        self.inserted_entry_length_at_offset.push(Change {
61            shifted_pack_offset,
62            pack_offset,
63            size_change_in_bytes: size_change,
64            oid: oid.into().unwrap_or_else(||
65                // NOTE: this value acts as sentinel and the actual hash kind doesn't matter.
66                git_hash::Kind::Sha1.null()),
67        });
68        self.inserted_entries_length_in_bytes += size_change;
69    }
70
71    fn shift_entry_and_point_to_base_by_offset(&mut self, entry: &mut input::Entry, base_distance: u64) {
72        let pack_offset = entry.pack_offset;
73        entry.pack_offset = self.shifted_pack_offset(pack_offset);
74        entry.header = Header::OfsDelta { base_distance };
75        let previous_header_size = entry.header_size;
76        entry.header_size = entry.header.size(entry.decompressed_size) as u16;
77
78        let change = entry.header_size as i64 - previous_header_size as i64;
79        entry.crc32 = Some(entry.compute_crc32());
80        self.track_change(entry.pack_offset, pack_offset, change, None);
81    }
82}
83
84impl<I, LFn> Iterator for LookupRefDeltaObjectsIter<I, LFn>
85where
86    I: Iterator<Item = Result<input::Entry, input::Error>>,
87    LFn: for<'a> FnMut(ObjectId, &'a mut Vec<u8>) -> Option<git_object::Data<'a>>,
88{
89    type Item = Result<input::Entry, input::Error>;
90
91    fn next(&mut self) -> Option<Self::Item> {
92        if self.error {
93            return None;
94        }
95        if let Some(delta) = self.next_delta.take() {
96            return Some(Ok(delta));
97        }
98        match self.inner.next() {
99            Some(Ok(mut entry)) => match entry.header {
100                Header::RefDelta { base_id } => {
101                    match self.inserted_entry_length_at_offset.iter().rfind(|e| e.oid == base_id) {
102                        None => {
103                            let base_entry = match (self.lookup)(base_id, &mut self.buf) {
104                                Some(obj) => {
105                                    let current_pack_offset = entry.pack_offset;
106                                    let mut entry = match input::Entry::from_data_obj(&obj, 0) {
107                                        Ok(e) => e,
108                                        Err(err) => return Some(Err(err)),
109                                    };
110                                    entry.pack_offset = self.shifted_pack_offset(current_pack_offset);
111                                    self.track_change(
112                                        entry.pack_offset,
113                                        current_pack_offset,
114                                        entry.bytes_in_pack() as i64,
115                                        base_id,
116                                    );
117                                    entry
118                                }
119                                None => {
120                                    self.error = true;
121                                    return Some(Err(input::Error::NotFound { object_id: base_id }));
122                                }
123                            };
124
125                            {
126                                self.shift_entry_and_point_to_base_by_offset(&mut entry, base_entry.bytes_in_pack());
127                                self.next_delta = Some(entry);
128                            }
129                            Some(Ok(base_entry))
130                        }
131                        Some(base_entry) => {
132                            let base_distance =
133                                self.shifted_pack_offset(entry.pack_offset) - base_entry.shifted_pack_offset;
134                            self.shift_entry_and_point_to_base_by_offset(&mut entry, base_distance);
135                            Some(Ok(entry))
136                        }
137                    }
138                }
139                _ => {
140                    if self.inserted_entries_length_in_bytes != 0 {
141                        if let Header::OfsDelta { base_distance } = entry.header {
142                            // We have to find the new distance based on the previous distance to the base, using the absolute
143                            // pack offset computed from it as stored in `base_pack_offset`.
144                            let base_pack_offset = entry
145                                .pack_offset
146                                .checked_sub(base_distance)
147                                .expect("distance to be in range of pack");
148                            match self
149                                .inserted_entry_length_at_offset
150                                .binary_search_by_key(&base_pack_offset, |c| c.pack_offset)
151                            {
152                                Ok(index) => {
153                                    let index = {
154                                        let maybe_index_of_actual_entry = index + 1;
155                                        self.inserted_entry_length_at_offset
156                                            .get(maybe_index_of_actual_entry)
157                                            .and_then(|c| {
158                                                (c.pack_offset == base_pack_offset)
159                                                    .then_some(maybe_index_of_actual_entry)
160                                            })
161                                            .unwrap_or(index)
162                                    };
163                                    let new_distance = self
164                                        .shifted_pack_offset(entry.pack_offset)
165                                        .checked_sub(self.inserted_entry_length_at_offset[index].shifted_pack_offset)
166                                        .expect("a base that is behind us in the pack");
167                                    self.shift_entry_and_point_to_base_by_offset(&mut entry, new_distance);
168                                }
169                                Err(index) => {
170                                    let change_since_offset = self.inserted_entry_length_at_offset[index..]
171                                        .iter()
172                                        .map(|c| c.size_change_in_bytes)
173                                        .sum::<i64>();
174                                    let new_distance: u64 = {
175                                        (base_distance as i64 + change_since_offset)
176                                            .try_into()
177                                            .expect("it still points behind us")
178                                    };
179                                    self.shift_entry_and_point_to_base_by_offset(&mut entry, new_distance);
180                                }
181                            }
182                        } else {
183                            // Offset this entry by all changes (positive or negative) that we saw thus far.
184                            entry.pack_offset = self.shifted_pack_offset(entry.pack_offset);
185                        }
186                    }
187                    Some(Ok(entry))
188                }
189            },
190            other => other,
191        }
192    }
193
194    fn size_hint(&self) -> (usize, Option<usize>) {
195        let (min, max) = self.inner.size_hint();
196        max.map(|max| (min, Some(max * 2))).unwrap_or_else(|| (min * 2, None))
197    }
198}
199
200#[derive(Debug)]
201struct Change {
202    /// The original pack offset as mentioned in the entry we saw. This is used to find this as base object if deltas refer to it by
203    /// old offset.
204    pack_offset: u64,
205    /// The new pack offset that is the shifted location of the pack entry in the pack.
206    shifted_pack_offset: u64,
207    /// The size change of the entry header, negative values denote shrinking, positive denote growing.
208    size_change_in_bytes: i64,
209    /// The object id of the entry responsible for the change, or null if it's an entry just for tracking an insertion.
210    oid: ObjectId,
211}