Skip to main content

vanta_store/
lib.rs

1//! `vanta-store` — the content-addressed store and download cache.
2//!
3//! Materialized tools live in immutable, content-addressed directories
4//! (`store/blake3-<hex>/`); publication is atomic (stage → rename); identical
5//! content is deduplicated; GC is reachability from roots. See `docs/09-store.md`.
6#![forbid(unsafe_code)]
7
8pub mod hash;
9pub mod link;
10
11pub use hash::{hash_bytes, hash_tree};
12pub use link::link_best;
13
14use std::collections::HashSet;
15use std::fs;
16use std::path::{Path, PathBuf};
17use std::sync::atomic::{AtomicU64, Ordering};
18use std::time::{SystemTime, UNIX_EPOCH};
19use vanta_core::{Area, StoreKey, VtaError, VtaResult};
20
21static COUNTER: AtomicU64 = AtomicU64::new(0);
22
23/// The content-addressed store rooted at `$VANTA_HOME`.
24pub struct Store {
25    home: PathBuf,
26}
27
28impl Store {
29    /// Open (creating the directory skeleton if needed) a store at `home`.
30    pub fn open(home: impl AsRef<Path>) -> VtaResult<Store> {
31        let store = Store {
32            home: home.as_ref().to_path_buf(),
33        };
34        for dir in [store.store_dir(), store.downloads_dir(), store.envs_dir()] {
35            fs::create_dir_all(&dir).map_err(|e| io(&dir, e))?;
36        }
37        Ok(store)
38    }
39
40    pub fn store_dir(&self) -> PathBuf {
41        self.home.join("store")
42    }
43    pub fn downloads_dir(&self) -> PathBuf {
44        self.home.join("cache").join("downloads")
45    }
46    pub fn envs_dir(&self) -> PathBuf {
47        self.home.join("envs")
48    }
49
50    /// The directory a store key occupies.
51    pub fn entry_path(&self, key: &StoreKey) -> PathBuf {
52        self.store_dir().join(key.as_str())
53    }
54
55    /// Whether the store already contains an entry for `key`.
56    pub fn has(&self, key: &StoreKey) -> bool {
57        self.entry_path(key).is_dir()
58    }
59
60    /// Create a fresh, unique staging directory under the store. The installer
61    /// unpacks into it, then calls [`Store::publish_tree`].
62    pub fn new_staging(&self) -> VtaResult<PathBuf> {
63        let path = self.store_dir().join(format!(".tmp-{}", unique()));
64        fs::create_dir_all(&path).map_err(|e| io(&path, e))?;
65        Ok(path)
66    }
67
68    /// Publish a staged tree into the store, content-addressed and atomically.
69    /// If an identical entry already exists, the staged copy is discarded
70    /// (dedup) and the existing key returned.
71    pub fn publish_tree(&self, staged: &Path) -> VtaResult<StoreKey> {
72        let key = StoreKey::new(hash_tree(staged)?)?;
73        let dest = self.entry_path(&key);
74        if dest.exists() {
75            let _ = fs::remove_dir_all(staged);
76            return Ok(key);
77        }
78        fs::create_dir_all(self.store_dir()).map_err(|e| io(&self.store_dir(), e))?;
79        // Same-filesystem rename is atomic: a reader sees all-or-nothing.
80        fs::rename(staged, &dest)
81            .map_err(|e| VtaError::new(Area::Store, 4, format!("publishing store entry: {e}")))?;
82        let _ = set_readonly_recursive(&dest);
83        Ok(key)
84    }
85
86    /// Re-hash an entry and confirm it still matches its key (integrity check).
87    pub fn verify_entry(&self, key: &StoreKey) -> VtaResult<bool> {
88        let path = self.entry_path(key);
89        if !path.is_dir() {
90            return Ok(false);
91        }
92        Ok(hash_tree(&path)? == key.as_str())
93    }
94
95    /// Store a downloaded blob in the content-addressed download cache, returning
96    /// its `blake3-<hex>` cache key. Idempotent.
97    pub fn cache_put_blob(&self, bytes: &[u8]) -> VtaResult<String> {
98        let key = hash_bytes(bytes);
99        let path = self.downloads_dir().join(&key);
100        if !path.exists() {
101            let tmp = self.downloads_dir().join(format!(".tmp-{}", unique()));
102            fs::write(&tmp, bytes).map_err(|e| io(&tmp, e))?;
103            fs::rename(&tmp, &path).map_err(|e| io(&path, e))?;
104        }
105        Ok(key)
106    }
107
108    /// The path of a cached blob, if present.
109    pub fn cache_get_path(&self, cache_key: &str) -> Option<PathBuf> {
110        let p = self.downloads_dir().join(cache_key);
111        p.exists().then_some(p)
112    }
113
114    /// Garbage-collect: remove every store entry not reachable from `roots`, plus
115    /// stale staging dirs. Returns the number of entries removed.
116    pub fn gc(&self, roots: &HashSet<StoreKey>) -> VtaResult<usize> {
117        let mut removed = 0;
118        let dir = self.store_dir();
119        for entry in fs::read_dir(&dir).map_err(|e| io(&dir, e))? {
120            let entry = entry.map_err(|e| io(&dir, e))?;
121            let name = entry.file_name().to_string_lossy().into_owned();
122            let path = entry.path();
123            if name.starts_with(".tmp-") {
124                let _ = fs::remove_dir_all(&path);
125                continue;
126            }
127            if name.starts_with("blake3-") {
128                if let Ok(key) = StoreKey::new(name) {
129                    if !roots.contains(&key) {
130                        let _ = make_writable_recursive(&path);
131                        if fs::remove_dir_all(&path).is_ok() {
132                            removed += 1;
133                        }
134                    }
135                }
136            }
137        }
138        Ok(removed)
139    }
140}
141
142fn unique() -> String {
143    let n = COUNTER.fetch_add(1, Ordering::Relaxed);
144    let nanos = SystemTime::now()
145        .duration_since(UNIX_EPOCH)
146        .map(|d| d.as_nanos())
147        .unwrap_or(0);
148    format!("{}-{}-{}", std::process::id(), nanos, n)
149}
150
151fn set_readonly_recursive(path: &Path) -> std::io::Result<()> {
152    if path.is_dir() {
153        for entry in fs::read_dir(path)? {
154            set_readonly_recursive(&entry?.path())?;
155        }
156    }
157    let mut perms = fs::metadata(path)?.permissions();
158    perms.set_readonly(true);
159    fs::set_permissions(path, perms)
160}
161
162/// Recursively add write permission (used before moving a restored entry into
163/// place; the executable bit is preserved so the content hash still verifies).
164pub fn ensure_writable(path: &Path) -> std::io::Result<()> {
165    make_writable_recursive(path)
166}
167
168#[allow(clippy::permissions_set_readonly_false)] // intentional: make a GC target deletable
169fn make_writable_recursive(path: &Path) -> std::io::Result<()> {
170    let mut perms = fs::metadata(path)?.permissions();
171    perms.set_readonly(false);
172    fs::set_permissions(path, perms)?;
173    if path.is_dir() {
174        for entry in fs::read_dir(path)? {
175            make_writable_recursive(&entry?.path())?;
176        }
177    }
178    Ok(())
179}
180
181fn io(path: &Path, e: std::io::Error) -> VtaError {
182    VtaError::new(Area::Store, 2, format!("{}: {e}", path.display()))
183}
184
185#[cfg(test)]
186mod tests {
187    use super::*;
188
189    fn home(tag: &str) -> PathBuf {
190        let p = std::env::temp_dir().join(format!("vanta-store-{}-{}", tag, std::process::id()));
191        let _ = fs::remove_dir_all(&p);
192        p
193    }
194
195    #[test]
196    fn publish_dedup_and_verify() {
197        let h = home("publish");
198        let store = Store::open(&h).unwrap();
199
200        let staged = store.new_staging().unwrap();
201        fs::write(staged.join("tool"), b"binary-bytes").unwrap();
202        let key = store.publish_tree(&staged).unwrap();
203        assert!(key.as_str().starts_with("blake3-"));
204        assert!(store.has(&key));
205        assert!(store.verify_entry(&key).unwrap());
206
207        // Identical content publishes to the same key (dedup); staged dir consumed.
208        let staged2 = store.new_staging().unwrap();
209        fs::write(staged2.join("tool"), b"binary-bytes").unwrap();
210        let key2 = store.publish_tree(&staged2).unwrap();
211        assert_eq!(key, key2);
212        assert!(!staged2.exists());
213
214        let _ = fs::remove_dir_all(&h);
215    }
216
217    #[test]
218    fn cache_blob_roundtrip() {
219        let h = home("cache");
220        let store = Store::open(&h).unwrap();
221        let k = store.cache_put_blob(b"download").unwrap();
222        assert!(store.cache_get_path(&k).is_some());
223        assert!(store.cache_get_path("blake3-absent").is_none());
224        let _ = fs::remove_dir_all(&h);
225    }
226
227    #[test]
228    fn gc_removes_unreachable() {
229        let h = home("gc");
230        let store = Store::open(&h).unwrap();
231        let s = store.new_staging().unwrap();
232        fs::write(s.join("f"), b"keepme").unwrap();
233        let keep = store.publish_tree(&s).unwrap();
234        let s2 = store.new_staging().unwrap();
235        fs::write(s2.join("f"), b"dropme").unwrap();
236        let drop = store.publish_tree(&s2).unwrap();
237
238        let mut roots = HashSet::new();
239        roots.insert(keep.clone());
240        let removed = store.gc(&roots).unwrap();
241        assert_eq!(removed, 1);
242        assert!(store.has(&keep));
243        assert!(!store.has(&drop));
244        let _ = fs::remove_dir_all(&h);
245    }
246}