Skip to main content

objects/store/pack/
manager.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Pack file manager for coordinating multiple pack files.
3
4use std::{
5    fs,
6    path::{Path, PathBuf},
7};
8
9use tracing::{debug, instrument, trace};
10
11use crate::{
12    object::ContentHash,
13    store::{
14        Result,
15        pack::{ObjectType, PackObjectId, PackReader},
16    },
17};
18
19pub struct PackManager {
20    packs_dir: PathBuf,
21    packs: Vec<CachedPack>,
22}
23
24struct CachedPack {
25    pack_path: PathBuf,
26    index_path: PathBuf,
27    reader: PackReader<'static>,
28}
29
30impl PackManager {
31    pub fn new(packs_dir: PathBuf) -> Self {
32        let packs = Self::load_packs(&packs_dir).unwrap_or_default();
33        Self { packs_dir, packs }
34    }
35
36    fn discover_pack_paths(packs_dir: &Path) -> Result<Vec<(PathBuf, PathBuf)>> {
37        let mut packs = Vec::new();
38
39        if !packs_dir.exists() {
40            return Ok(packs);
41        }
42
43        for entry in fs::read_dir(packs_dir)? {
44            let entry = entry?;
45            let path = entry.path();
46
47            if path.extension().map(|e| e == "pack").unwrap_or(false) {
48                let index_path = path.with_extension("idx");
49                if index_path.exists() {
50                    packs.push((path, index_path));
51                }
52            }
53        }
54
55        debug!(count = packs.len(), "Discovered pack files");
56        Ok(packs)
57    }
58
59    fn load_packs(packs_dir: &Path) -> Result<Vec<CachedPack>> {
60        let mut cached_packs = Vec::new();
61
62        for (pack_path, index_path) in Self::discover_pack_paths(packs_dir)? {
63            match PackReader::open(&pack_path, &index_path) {
64                Ok(reader) => cached_packs.push(CachedPack {
65                    pack_path,
66                    index_path,
67                    reader,
68                }),
69                Err(error) => {
70                    debug!("Failed to open pack {:?}: {}", pack_path, error);
71                }
72            }
73        }
74
75        Ok(cached_packs)
76    }
77
78    pub fn reload(&mut self) -> Result<()> {
79        self.packs = Self::load_packs(&self.packs_dir)?;
80        Ok(())
81    }
82
83    /// Cheap check: does the packs directory hold more pack/index
84    /// pairs than we have loaded? Reuses `discover_pack_paths` so
85    /// half-installed packs (a `.pack` whose `.idx` sibling hasn't
86    /// landed yet) are filtered out — otherwise we'd loop forever
87    /// reloading a count we can never match.
88    pub(crate) fn needs_reload(&self) -> Result<bool> {
89        Ok(Self::discover_pack_paths(&self.packs_dir)?.len() > self.packs.len())
90    }
91
92    /// Reload the pack list only if the packs directory has more
93    /// pack/index pairs on disk than we know about in memory.
94    ///
95    /// Catches the multi-instance case: two `FsStore`s back the same
96    /// shared object dir (typical for lightweight thread worktrees,
97    /// where the worktree's repo opens its own store but points at
98    /// the main repo's `.heddle/`). When the worktree's store installs
99    /// a new pack, the main repo's already-open `pack_manager`
100    /// doesn't know about it; without this `get_blob`/`has_blob`
101    /// from the main repo would surface "object not found".
102    pub(crate) fn reload_if_disk_grew(&mut self) -> Result<bool> {
103        if !self.needs_reload()? {
104            return Ok(false);
105        }
106        debug!("PackManager: pack dir grew under us, reloading");
107        self.reload()?;
108        Ok(true)
109    }
110
111    pub fn get_object(&self, id: &PackObjectId) -> Result<Option<(ObjectType, Vec<u8>)>> {
112        for pack in &self.packs {
113            if let Some((obj_type, data)) = pack.reader.get_object(id)? {
114                trace!("Found object in pack");
115                return Ok(Some((obj_type, data)));
116            }
117        }
118
119        trace!("Object not found in any pack");
120        Ok(None)
121    }
122
123    #[instrument(skip(self), fields(hash = %hash.short()))]
124    pub fn get_hashed_object(&self, hash: &ContentHash) -> Result<Option<(ObjectType, Vec<u8>)>> {
125        self.get_object(&PackObjectId::Hash(*hash))
126    }
127
128    /// Zero-copy variant of `get_hashed_object`. Returns
129    /// [`bytes::Bytes`] views into the underlying pack mmap when
130    /// the entry is non-delta and stored uncompressed; falls back
131    /// to the standard decompress-into-Vec path otherwise.
132    pub fn get_hashed_object_bytes(
133        &self,
134        hash: &ContentHash,
135    ) -> Result<Option<(ObjectType, bytes::Bytes)>> {
136        let id = PackObjectId::Hash(*hash);
137        for pack in &self.packs {
138            if let Some((obj_type, data)) = pack.reader.get_object_bytes(&id)? {
139                return Ok(Some((obj_type, data)));
140            }
141        }
142        Ok(None)
143    }
144
145    pub fn has_object(&self, hash: &ContentHash) -> bool {
146        self.packs
147            .iter()
148            .any(|pack| pack.reader.has_object(&PackObjectId::Hash(*hash)))
149    }
150
151    /// Look up the uncompressed size of `hash` across all loaded
152    /// packs without decompressing the payload. Returns `Ok(None)`
153    /// when the object isn't in any loaded pack.
154    pub fn get_hashed_object_size(&self, hash: &ContentHash) -> Result<Option<u64>> {
155        for pack in &self.packs {
156            if let Some(size) = pack.reader.get_hashed_object_size(hash)? {
157                return Ok(Some(size));
158            }
159        }
160        Ok(None)
161    }
162
163    pub fn has_object_id(&self, id: &PackObjectId) -> bool {
164        self.packs.iter().any(|pack| pack.reader.has_object(id))
165    }
166
167    /// List all object hashes across all packs.
168    pub fn list_all_hashes(&self) -> Result<Vec<ContentHash>> {
169        let mut hashes = Vec::new();
170        for pack in &self.packs {
171            hashes.extend(pack.reader.list_hashes());
172        }
173        Ok(hashes)
174    }
175
176    pub fn list_all_ids(&self) -> Result<Vec<PackObjectId>> {
177        let mut ids = Vec::new();
178        for pack in &self.packs {
179            ids.extend(pack.reader.list_ids());
180        }
181        Ok(ids)
182    }
183
184    /// Return paths of all pack files (for deletion during aggressive repack).
185    pub fn pack_file_paths(&self) -> Vec<(&Path, &Path)> {
186        self.packs
187            .iter()
188            .map(|pack| (pack.pack_path.as_path(), pack.index_path.as_path()))
189            .collect()
190    }
191
192    pub fn pack_count(&self) -> usize {
193        self.packs.len()
194    }
195
196    pub fn packs_dir(&self) -> &Path {
197        &self.packs_dir
198    }
199}