Skip to main content

gix_pack/index/
access.rs

1use std::{mem::size_of, ops::Range};
2
3use crate::{
4    data,
5    index::{self, EntryIndex, FAN_LEN, PrefixLookupResult},
6};
7
8const N32_SIZE: usize = size_of::<u32>();
9const N64_SIZE: usize = size_of::<u64>();
10const V1_HEADER_SIZE: usize = FAN_LEN * N32_SIZE;
11const V2_HEADER_SIZE: usize = N32_SIZE * 2 + FAN_LEN * N32_SIZE;
12const N32_HIGH_BIT: u32 = 1 << 31;
13
14/// Represents an entry within a pack index file, effectively mapping object [`IDs`][gix_hash::ObjectId] to pack data file locations.
15#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
16#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
17pub struct Entry {
18    /// The ID of the object
19    pub oid: gix_hash::ObjectId,
20    /// The offset to the object's header in the pack data file
21    pub pack_offset: data::Offset,
22    /// The CRC32 hash over all bytes of the pack data entry.
23    ///
24    /// This can be useful for direct copies of pack data entries from one pack to another with insurance there was no bit rot.
25    /// _Note_: Only available in index version 2 or newer
26    pub crc32: Option<u32>,
27}
28
29/// Iteration and access
30impl<T> index::File<T>
31where
32    T: crate::FileData,
33{
34    fn iter_v1(&self) -> impl Iterator<Item = Entry> + '_ {
35        match self.version {
36            index::Version::V1 => self.data[V1_HEADER_SIZE..]
37                .chunks_exact(N32_SIZE + self.hash_len)
38                .take(self.num_objects as usize)
39                .map(|c| {
40                    let (ofs, oid) = c.split_at(N32_SIZE);
41                    Entry {
42                        oid: gix_hash::ObjectId::from_bytes_or_panic(oid),
43                        pack_offset: u64::from(crate::read_u32(ofs)),
44                        crc32: None,
45                    }
46                }),
47            _ => panic!("Cannot use iter_v1() on index of type {:?}", self.version),
48        }
49    }
50
51    fn iter_v2(&self) -> impl Iterator<Item = Entry> + '_ {
52        let pack64_offset = self.offset_pack_offset64_v2();
53        let oids = self.data[V2_HEADER_SIZE..]
54            .chunks_exact(self.hash_len)
55            .take(self.num_objects as usize);
56        let crcs = self.data[self.offset_crc32_v2()..]
57            .chunks_exact(N32_SIZE)
58            .take(self.num_objects as usize);
59        let offsets = self.data[self.offset_pack_offset_v2()..]
60            .chunks_exact(N32_SIZE)
61            .take(self.num_objects as usize);
62        assert_eq!(oids.len(), crcs.len());
63        assert_eq!(crcs.len(), offsets.len());
64        match self.version {
65            index::Version::V2 => izip!(oids, crcs, offsets).map(move |(oid, crc32, ofs32)| Entry {
66                oid: gix_hash::ObjectId::from_bytes_or_panic(oid),
67                pack_offset: self.pack_offset_from_offset_v2(ofs32, pack64_offset),
68                crc32: Some(crate::read_u32(crc32)),
69            }),
70            _ => panic!("Cannot use iter_v2() on index of type {:?}", self.version),
71        }
72    }
73
74    /// Returns the object hash at the given index in our list of (sorted) sha1 hashes.
75    /// The index ranges from 0 to `self.num_objects()`
76    ///
77    /// # Panics
78    ///
79    /// If `index` is out of bounds.
80    pub fn oid_at_index(&self, index: EntryIndex) -> &gix_hash::oid {
81        let index = index as usize;
82        let start = match self.version {
83            index::Version::V2 => V2_HEADER_SIZE + index * self.hash_len,
84            index::Version::V1 => V1_HEADER_SIZE + index * (N32_SIZE + self.hash_len) + N32_SIZE,
85        };
86        gix_hash::oid::from_bytes_unchecked(&self.data[start..][..self.hash_len])
87    }
88
89    /// Returns the offset into our pack data file at which to start reading the object at `index`.
90    ///
91    /// # Panics
92    ///
93    /// If `index` is out of bounds.
94    pub fn pack_offset_at_index(&self, index: EntryIndex) -> data::Offset {
95        let index = index as usize;
96        match self.version {
97            index::Version::V2 => {
98                let start = self.offset_pack_offset_v2() + index * N32_SIZE;
99                self.pack_offset_from_offset_v2(&self.data[start..][..N32_SIZE], self.offset_pack_offset64_v2())
100            }
101            index::Version::V1 => {
102                let start = V1_HEADER_SIZE + index * (N32_SIZE + self.hash_len);
103                u64::from(crate::read_u32(&self.data[start..][..N32_SIZE]))
104            }
105        }
106    }
107
108    /// Returns the CRC32 of the object at the given `index`.
109    ///
110    /// _Note_: These are always present for index version 2 or higher.
111    /// # Panics
112    ///
113    /// If `index` is out of bounds.
114    pub fn crc32_at_index(&self, index: EntryIndex) -> Option<u32> {
115        let index = index as usize;
116        match self.version {
117            index::Version::V2 => {
118                let start = self.offset_crc32_v2() + index * N32_SIZE;
119                Some(crate::read_u32(&self.data[start..start + N32_SIZE]))
120            }
121            index::Version::V1 => None,
122        }
123    }
124
125    /// Returns the `index` of the given hash for use with the [`oid_at_index()`][index::File::oid_at_index()],
126    /// [`pack_offset_at_index()`][index::File::pack_offset_at_index()] or [`crc32_at_index()`][index::File::crc32_at_index()].
127    // NOTE: pretty much the same things as in `multi_index::File::lookup`, change things there
128    //       as well.
129    pub fn lookup(&self, id: impl AsRef<gix_hash::oid>) -> Option<EntryIndex> {
130        lookup(id.as_ref(), &self.fan, &|idx| self.oid_at_index(idx))
131    }
132
133    /// Given a `prefix`, find an object that matches it uniquely within this index and return `Some(Ok(entry_index))`.
134    /// If there is more than one object matching the object `Some(Err(())` is returned.
135    ///
136    /// Finally, if no object matches the index, the return value is `None`.
137    ///
138    /// Pass `candidates` to obtain the set of entry-indices matching `prefix`, with the same return value as
139    /// one would have received if it remained `None`. It will be empty if no object matched the `prefix`.
140    ///
141    // NOTE: pretty much the same things as in `index::File::lookup`, change things there
142    //       as well.
143    pub fn lookup_prefix(
144        &self,
145        prefix: gix_hash::Prefix,
146        candidates: Option<&mut Range<EntryIndex>>,
147    ) -> Option<PrefixLookupResult> {
148        lookup_prefix(
149            prefix,
150            candidates,
151            &self.fan,
152            &|idx| self.oid_at_index(idx),
153            self.num_objects,
154        )
155    }
156
157    /// An iterator over all [`Entries`][Entry] of this index file.
158    pub fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = Entry> + 'a> {
159        match self.version {
160            index::Version::V2 => Box::new(self.iter_v2()),
161            index::Version::V1 => Box::new(self.iter_v1()),
162        }
163    }
164
165    /// Return a vector of ascending offsets into our respective pack data file.
166    ///
167    /// Useful to control an iteration over all pack entries in a cache-friendly way.
168    pub fn sorted_offsets(&self) -> Vec<data::Offset> {
169        let mut ofs: Vec<_> = match self.version {
170            index::Version::V1 => self.iter().map(|e| e.pack_offset).collect(),
171            index::Version::V2 => {
172                let offset32_start = &self.data[self.offset_pack_offset_v2()..];
173                let offsets32 = offset32_start.chunks_exact(N32_SIZE).take(self.num_objects as usize);
174                assert_eq!(self.num_objects as usize, offsets32.len());
175                let pack_offset_64_start = self.offset_pack_offset64_v2();
176                offsets32
177                    .map(|offset| self.pack_offset_from_offset_v2(offset, pack_offset_64_start))
178                    .collect()
179            }
180        };
181        ofs.sort_unstable();
182        ofs
183    }
184
185    #[inline]
186    fn offset_crc32_v2(&self) -> usize {
187        V2_HEADER_SIZE + self.num_objects as usize * self.hash_len
188    }
189
190    #[inline]
191    fn offset_pack_offset_v2(&self) -> usize {
192        self.offset_crc32_v2() + self.num_objects as usize * N32_SIZE
193    }
194
195    #[inline]
196    fn offset_pack_offset64_v2(&self) -> usize {
197        self.offset_pack_offset_v2() + self.num_objects as usize * N32_SIZE
198    }
199
200    #[inline]
201    fn pack_offset_from_offset_v2(&self, offset: &[u8], pack64_offset: usize) -> data::Offset {
202        debug_assert_eq!(self.version, index::Version::V2);
203        let ofs32 = crate::read_u32(offset);
204        if (ofs32 & N32_HIGH_BIT) == N32_HIGH_BIT {
205            let from = pack64_offset + (ofs32 ^ N32_HIGH_BIT) as usize * N64_SIZE;
206            crate::read_u64(&self.data[from..][..N64_SIZE])
207        } else {
208            u64::from(ofs32)
209        }
210    }
211}
212
213pub(crate) fn lookup_prefix<'a>(
214    prefix: gix_hash::Prefix,
215    candidates: Option<&mut Range<EntryIndex>>,
216    fan: &[u32; FAN_LEN],
217    oid_at_index: &dyn Fn(EntryIndex) -> &'a gix_hash::oid,
218    num_objects: u32,
219) -> Option<PrefixLookupResult> {
220    let first_byte = prefix.as_oid().first_byte() as usize;
221    let mut upper_bound = fan[first_byte];
222    let mut lower_bound = if first_byte != 0 { fan[first_byte - 1] } else { 0 };
223
224    // Bisect using indices
225    while lower_bound < upper_bound {
226        let mid = u32::midpoint(lower_bound, upper_bound);
227        let mid_sha = oid_at_index(mid);
228
229        use std::cmp::Ordering::*;
230        match prefix.cmp_oid(mid_sha) {
231            Less => upper_bound = mid,
232            Equal => match candidates {
233                Some(candidates) => {
234                    let first_past_entry = ((0..mid).rev())
235                        .take_while(|prev| prefix.cmp_oid(oid_at_index(*prev)) == Equal)
236                        .last();
237
238                    let last_future_entry = ((mid + 1)..num_objects)
239                        .take_while(|next| prefix.cmp_oid(oid_at_index(*next)) == Equal)
240                        .last();
241
242                    *candidates = match (first_past_entry, last_future_entry) {
243                        (Some(first), Some(last)) => first..last + 1,
244                        (Some(first), None) => first..mid + 1,
245                        (None, Some(last)) => mid..last + 1,
246                        (None, None) => mid..mid + 1,
247                    };
248
249                    return if candidates.len() > 1 {
250                        Some(Err(()))
251                    } else {
252                        Some(Ok(mid))
253                    };
254                }
255                None => {
256                    let next = mid + 1;
257                    if next < num_objects && prefix.cmp_oid(oid_at_index(next)) == Equal {
258                        return Some(Err(()));
259                    }
260                    if mid != 0 && prefix.cmp_oid(oid_at_index(mid - 1)) == Equal {
261                        return Some(Err(()));
262                    }
263                    return Some(Ok(mid));
264                }
265            },
266            Greater => lower_bound = mid + 1,
267        }
268    }
269
270    if let Some(candidates) = candidates {
271        *candidates = 0..0;
272    }
273    None
274}
275
276pub(crate) fn lookup<'a>(
277    id: &gix_hash::oid,
278    fan: &[u32; FAN_LEN],
279    oid_at_index: &dyn Fn(EntryIndex) -> &'a gix_hash::oid,
280) -> Option<EntryIndex> {
281    let first_byte = id.first_byte() as usize;
282    let mut upper_bound = fan[first_byte];
283    let mut lower_bound = if first_byte != 0 { fan[first_byte - 1] } else { 0 };
284
285    while lower_bound < upper_bound {
286        let mid = u32::midpoint(lower_bound, upper_bound);
287        let mid_sha = oid_at_index(mid);
288
289        use std::cmp::Ordering::*;
290        match id.cmp(mid_sha) {
291            Less => upper_bound = mid,
292            Equal => return Some(mid),
293            Greater => lower_bound = mid + 1,
294        }
295    }
296    None
297}