Skip to main content

objects/store/pack/
pack_index.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Pack index for fast object lookup within packfiles.
3
4use crate::store::{
5    Result,
6    pack::{
7        PackObjectId,
8        versioned_header::{HeaderChecksum, VersionedHeader},
9    },
10};
11
12pub(super) const INDEX_MAGIC: &[u8; 4] = b"LMI\0";
13pub(super) const INDEX_VERSION: u32 = 2;
14const MIN_INDEX_ENTRY_LEN: usize = 17 + 8;
15
16/// Entry in the pack index.
17#[derive(Debug, Clone, Copy)]
18pub struct IndexEntry {
19    pub id: PackObjectId,
20    pub offset: u64,
21}
22
23/// Pack index for fast object lookup.
24#[derive(Debug)]
25pub struct PackIndex {
26    entries: Vec<IndexEntry>,
27}
28
29impl PackIndex {
30    /// Create a new empty index.
31    pub fn new() -> Self {
32        Self {
33            entries: Vec::new(),
34        }
35    }
36
37    /// Add an entry.
38    pub fn add(&mut self, id: PackObjectId, offset: u64) {
39        self.entries.push(IndexEntry { id, offset });
40    }
41
42    /// Sort entries by hash for binary search.
43    pub fn sort(&mut self) {
44        self.entries.sort_by_key(|e| e.id);
45    }
46
47    /// Find an entry by hash.
48    pub fn find(&self, id: &PackObjectId) -> Option<u64> {
49        self.entries
50            .binary_search_by_key(id, |e| e.id)
51            .ok()
52            .map(|idx| self.entries[idx].offset)
53    }
54
55    /// Serialize to bytes.
56    pub fn to_bytes(&self) -> Vec<u8> {
57        let mut result = Vec::new();
58        index_header().write_vec(&mut result, self.entries.len() as u64);
59        for entry in &self.entries {
60            entry.id.encode_tagged(&mut result);
61            result.extend_from_slice(&entry.offset.to_be_bytes());
62        }
63        result
64    }
65
66    /// Deserialize from bytes.
67    pub fn from_bytes(data: &[u8]) -> Result<Self> {
68        let header = index_header().verify(data)?;
69        let count = header.count;
70        let max_entries = ((data.len() - header.header_len) / MIN_INDEX_ENTRY_LEN) as u64;
71        if count > max_entries {
72            return Err(crate::store::StoreError::InvalidObject(format!(
73                "Index entry count {} exceeds available data capacity {}",
74                count, max_entries
75            )));
76        }
77        let count = usize::try_from(count).map_err(|_| {
78            crate::store::StoreError::InvalidObject(
79                "Index entry count exceeds platform limits".to_string(),
80            )
81        })?;
82        let mut entries = Vec::with_capacity(count);
83        let mut pos = header.header_len;
84        for _ in 0..count {
85            let (id, id_len) = PackObjectId::decode_tagged(&data[pos..])?;
86            pos += id_len;
87            if pos + 8 > data.len() {
88                return Err(crate::store::StoreError::InvalidObject(
89                    "Index data truncated".to_string(),
90                ));
91            }
92            let offset = u64::from_be_bytes(data[pos..pos + 8].try_into().map_err(|_| {
93                crate::store::StoreError::InvalidObject("Invalid offset length".to_string())
94            })?);
95            entries.push(IndexEntry { id, offset });
96            pos += 8;
97        }
98        Ok(Self { entries })
99    }
100}
101
102impl PackIndex {
103    /// Return all ids in this index.
104    pub fn ids(&self) -> Vec<PackObjectId> {
105        self.entries.iter().map(|e| e.id).collect()
106    }
107}
108
109impl Default for PackIndex {
110    fn default() -> Self {
111        Self::new()
112    }
113}
114
115pub(super) fn index_header() -> VersionedHeader {
116    VersionedHeader {
117        magic: INDEX_MAGIC,
118        version: INDEX_VERSION,
119        checksum: HeaderChecksum::None,
120        too_short: "Index too short",
121        invalid_magic: "Invalid index magic",
122        unsupported_version: "Unsupported index version",
123        checksum_mismatch: "",
124    }
125}