git_pack/index/
access.rs

1use std::{mem::size_of, ops::Range};
2
3use crate::{
4    data,
5    index::{self, EntryIndex, PrefixLookupResult, FAN_LEN},
6};
7
8const N32_SIZE: usize = size_of::<u32>();
9const N64_SIZE: usize = size_of::<u64>();
10const V1_HEADER_SIZE: usize = FAN_LEN * N32_SIZE;
11const V2_HEADER_SIZE: usize = N32_SIZE * 2 + FAN_LEN * N32_SIZE;
12const N32_HIGH_BIT: u32 = 1 << 31;
13
14/// Represents an entry within a pack index file, effectively mapping object [`IDs`][git_hash::ObjectId] to pack data file locations.
15#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
16#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
17pub struct Entry {
18    /// The ID of the object
19    pub oid: git_hash::ObjectId,
20    /// The offset to the object's header in the pack data file
21    pub pack_offset: data::Offset,
22    /// The CRC32 hash over all bytes of the pack data entry.
23    ///
24    /// This can be useful for direct copies of pack data entries from one pack to another with insurance there was no bit rot.
25    /// _Note_: Only available in index version 2 or newer
26    pub crc32: Option<u32>,
27}
28
29/// Iteration and access
30impl index::File {
31    fn iter_v1(&self) -> impl Iterator<Item = Entry> + '_ {
32        match self.version {
33            index::Version::V1 => self.data[V1_HEADER_SIZE..]
34                .chunks(N32_SIZE + self.hash_len)
35                .take(self.num_objects as usize)
36                .map(|c| {
37                    let (ofs, oid) = c.split_at(N32_SIZE);
38                    Entry {
39                        oid: git_hash::ObjectId::from(oid),
40                        pack_offset: crate::read_u32(ofs) as u64,
41                        crc32: None,
42                    }
43                }),
44            _ => panic!("Cannot use iter_v1() on index of type {:?}", self.version),
45        }
46    }
47
48    fn iter_v2(&self) -> impl Iterator<Item = Entry> + '_ {
49        let pack64_offset = self.offset_pack_offset64_v2();
50        match self.version {
51            index::Version::V2 => izip!(
52                self.data[V2_HEADER_SIZE..].chunks(self.hash_len),
53                self.data[self.offset_crc32_v2()..].chunks(N32_SIZE),
54                self.data[self.offset_pack_offset_v2()..].chunks(N32_SIZE)
55            )
56            .take(self.num_objects as usize)
57            .map(move |(oid, crc32, ofs32)| Entry {
58                oid: git_hash::ObjectId::from(oid),
59                pack_offset: self.pack_offset_from_offset_v2(ofs32, pack64_offset),
60                crc32: Some(crate::read_u32(crc32)),
61            }),
62            _ => panic!("Cannot use iter_v2() on index of type {:?}", self.version),
63        }
64    }
65
66    /// Returns the object hash at the given index in our list of (sorted) sha1 hashes.
67    /// The index ranges from 0 to self.num_objects()
68    ///
69    /// # Panics
70    ///
71    /// If `index` is out of bounds.
72    pub fn oid_at_index(&self, index: EntryIndex) -> &git_hash::oid {
73        let index = index as usize;
74        let start = match self.version {
75            index::Version::V2 => V2_HEADER_SIZE + index * self.hash_len,
76            index::Version::V1 => V1_HEADER_SIZE + index * (N32_SIZE + self.hash_len) + N32_SIZE,
77        };
78        git_hash::oid::from_bytes_unchecked(&self.data[start..][..self.hash_len])
79    }
80
81    /// Returns the offset into our pack data file at which to start reading the object at `index`.
82    ///
83    /// # Panics
84    ///
85    /// If `index` is out of bounds.
86    pub fn pack_offset_at_index(&self, index: EntryIndex) -> data::Offset {
87        let index = index as usize;
88        match self.version {
89            index::Version::V2 => {
90                let start = self.offset_pack_offset_v2() + index * N32_SIZE;
91                self.pack_offset_from_offset_v2(&self.data[start..][..N32_SIZE], self.offset_pack_offset64_v2())
92            }
93            index::Version::V1 => {
94                let start = V1_HEADER_SIZE + index * (N32_SIZE + self.hash_len);
95                crate::read_u32(&self.data[start..][..N32_SIZE]) as u64
96            }
97        }
98    }
99
100    /// Returns the CRC32 of the object at the given `index`.
101    ///
102    /// _Note_: These are always present for index version 2 or higher.
103    /// # Panics
104    ///
105    /// If `index` is out of bounds.
106    pub fn crc32_at_index(&self, index: EntryIndex) -> Option<u32> {
107        let index = index as usize;
108        match self.version {
109            index::Version::V2 => {
110                let start = self.offset_crc32_v2() + index * N32_SIZE;
111                Some(crate::read_u32(&self.data[start..start + N32_SIZE]))
112            }
113            index::Version::V1 => None,
114        }
115    }
116
117    /// Returns the `index` of the given hash for use with the [`oid_at_index()`][index::File::oid_at_index()],
118    /// [`pack_offset_at_index()`][index::File::pack_offset_at_index()] or [`crc32_at_index()`][index::File::crc32_at_index()].
119    // NOTE: pretty much the same things as in `multi_index::File::lookup`, change things there
120    //       as well.
121    pub fn lookup(&self, id: impl AsRef<git_hash::oid>) -> Option<EntryIndex> {
122        lookup(id, &self.fan, |idx| self.oid_at_index(idx))
123    }
124
125    /// Given a `prefix`, find an object that matches it uniquely within this index and return `Some(Ok(entry_index))`.
126    /// If there is more than one object matching the object `Some(Err(())` is returned.
127    ///
128    /// Finally, if no object matches the index, the return value is `None`.
129    ///
130    /// Pass `candidates` to obtain the set of entry-indices matching `prefix`, with the same return value as
131    /// one would have received if it remained `None`. It will be empty if no object matched the `prefix`.
132    ///
133    // NOTE: pretty much the same things as in `index::File::lookup`, change things there
134    //       as well.
135    pub fn lookup_prefix(
136        &self,
137        prefix: git_hash::Prefix,
138        candidates: Option<&mut Range<EntryIndex>>,
139    ) -> Option<PrefixLookupResult> {
140        lookup_prefix(
141            prefix,
142            candidates,
143            &self.fan,
144            |idx| self.oid_at_index(idx),
145            self.num_objects,
146        )
147    }
148
149    /// An iterator over all [`Entries`][Entry] of this index file.
150    pub fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = Entry> + 'a> {
151        match self.version {
152            index::Version::V2 => Box::new(self.iter_v2()),
153            index::Version::V1 => Box::new(self.iter_v1()),
154        }
155    }
156
157    /// Return a vector of ascending offsets into our respective pack data file.
158    ///
159    /// Useful to control an iteration over all pack entries in a cache-friendly way.
160    pub fn sorted_offsets(&self) -> Vec<data::Offset> {
161        let mut ofs: Vec<_> = match self.version {
162            index::Version::V1 => self.iter().map(|e| e.pack_offset).collect(),
163            index::Version::V2 => {
164                let offset32_start = &self.data[self.offset_pack_offset_v2()..];
165                let pack_offset_64_start = self.offset_pack_offset64_v2();
166                offset32_start
167                    .chunks(N32_SIZE)
168                    .take(self.num_objects as usize)
169                    .map(|offset| self.pack_offset_from_offset_v2(offset, pack_offset_64_start))
170                    .collect()
171            }
172        };
173        ofs.sort_unstable();
174        ofs
175    }
176
177    #[inline]
178    fn offset_crc32_v2(&self) -> usize {
179        V2_HEADER_SIZE + self.num_objects as usize * self.hash_len
180    }
181
182    #[inline]
183    fn offset_pack_offset_v2(&self) -> usize {
184        self.offset_crc32_v2() + self.num_objects as usize * N32_SIZE
185    }
186
187    #[inline]
188    fn offset_pack_offset64_v2(&self) -> usize {
189        self.offset_pack_offset_v2() + self.num_objects as usize * N32_SIZE
190    }
191
192    #[inline]
193    fn pack_offset_from_offset_v2(&self, offset: &[u8], pack64_offset: usize) -> data::Offset {
194        debug_assert_eq!(self.version, index::Version::V2);
195        let ofs32 = crate::read_u32(offset);
196        if (ofs32 & N32_HIGH_BIT) == N32_HIGH_BIT {
197            let from = pack64_offset + (ofs32 ^ N32_HIGH_BIT) as usize * N64_SIZE;
198            crate::read_u64(&self.data[from..][..N64_SIZE])
199        } else {
200            ofs32 as u64
201        }
202    }
203}
204
205pub(crate) fn lookup_prefix<'a>(
206    prefix: git_hash::Prefix,
207    candidates: Option<&mut Range<EntryIndex>>,
208    fan: &[u32; FAN_LEN],
209    oid_at_index: impl Fn(EntryIndex) -> &'a git_hash::oid,
210    num_objects: u32,
211) -> Option<PrefixLookupResult> {
212    let first_byte = prefix.as_oid().first_byte() as usize;
213    let mut upper_bound = fan[first_byte];
214    let mut lower_bound = if first_byte != 0 { fan[first_byte - 1] } else { 0 };
215
216    // Bisect using indices
217    while lower_bound < upper_bound {
218        let mid = (lower_bound + upper_bound) / 2;
219        let mid_sha = oid_at_index(mid);
220
221        use std::cmp::Ordering::*;
222        match prefix.cmp_oid(mid_sha) {
223            Less => upper_bound = mid,
224            Equal => match candidates {
225                Some(candidates) => {
226                    let first_past_entry = ((0..mid).rev())
227                        .take_while(|prev| prefix.cmp_oid(oid_at_index(*prev)) == Equal)
228                        .last();
229
230                    let last_future_entry = ((mid + 1)..num_objects)
231                        .take_while(|next| prefix.cmp_oid(oid_at_index(*next)) == Equal)
232                        .last();
233
234                    *candidates = match (first_past_entry, last_future_entry) {
235                        (Some(first), Some(last)) => first..last + 1,
236                        (Some(first), None) => first..mid + 1,
237                        (None, Some(last)) => mid..last + 1,
238                        (None, None) => mid..mid + 1,
239                    };
240
241                    return if candidates.len() > 1 {
242                        Some(Err(()))
243                    } else {
244                        Some(Ok(mid))
245                    };
246                }
247                None => {
248                    let next = mid + 1;
249                    if next < num_objects && prefix.cmp_oid(oid_at_index(next)) == Equal {
250                        return Some(Err(()));
251                    }
252                    if mid != 0 && prefix.cmp_oid(oid_at_index(mid - 1)) == Equal {
253                        return Some(Err(()));
254                    }
255                    return Some(Ok(mid));
256                }
257            },
258            Greater => lower_bound = mid + 1,
259        }
260    }
261
262    if let Some(candidates) = candidates {
263        *candidates = 0..0;
264    }
265    None
266}
267
268pub(crate) fn lookup<'a>(
269    id: impl AsRef<git_hash::oid>,
270    fan: &[u32; FAN_LEN],
271    oid_at_index: impl Fn(EntryIndex) -> &'a git_hash::oid,
272) -> Option<EntryIndex> {
273    let id = id.as_ref();
274    let first_byte = id.first_byte() as usize;
275    let mut upper_bound = fan[first_byte];
276    let mut lower_bound = if first_byte != 0 { fan[first_byte - 1] } else { 0 };
277
278    while lower_bound < upper_bound {
279        let mid = (lower_bound + upper_bound) / 2;
280        let mid_sha = oid_at_index(mid);
281
282        use std::cmp::Ordering::*;
283        match id.cmp(mid_sha) {
284            Less => upper_bound = mid,
285            Equal => return Some(mid),
286            Greater => lower_bound = mid + 1,
287        }
288    }
289    None
290}