Skip to main content

vanta_store/
lib.rs

1//! `vanta-store` — the content-addressed store and download cache.
2//!
3//! Materialized tools live in immutable, content-addressed directories
4//! (`store/blake3-<hex>/`); publication is atomic (stage → rename); identical
5//! content is deduplicated; GC is reachability from roots. See `docs/09-store.md`.
6#![forbid(unsafe_code)]
7
8pub mod hash;
9pub mod link;
10
11pub use hash::{hash_bytes, hash_tree};
12pub use link::link_best;
13
14use std::collections::HashSet;
15use std::fs;
16use std::path::{Path, PathBuf};
17use std::sync::atomic::{AtomicU64, Ordering};
18use std::time::{SystemTime, UNIX_EPOCH};
19use vanta_core::{Area, StoreKey, VtaError, VtaResult};
20
21static COUNTER: AtomicU64 = AtomicU64::new(0);
22
23/// The content-addressed store rooted at `$VANTA_HOME`.
24pub struct Store {
25    home: PathBuf,
26}
27
28impl Store {
29    /// Open (creating the directory skeleton if needed) a store at `home`.
30    pub fn open(home: impl AsRef<Path>) -> VtaResult<Store> {
31        let store = Store {
32            home: home.as_ref().to_path_buf(),
33        };
34        for dir in [store.store_dir(), store.downloads_dir(), store.envs_dir()] {
35            fs::create_dir_all(&dir).map_err(|e| io(&dir, e))?;
36        }
37        Ok(store)
38    }
39
40    pub fn store_dir(&self) -> PathBuf {
41        self.home.join("store")
42    }
43    pub fn downloads_dir(&self) -> PathBuf {
44        self.home.join("cache").join("downloads")
45    }
46    pub fn envs_dir(&self) -> PathBuf {
47        self.home.join("envs")
48    }
49
50    /// The directory a store key occupies.
51    pub fn entry_path(&self, key: &StoreKey) -> PathBuf {
52        self.store_dir().join(key.as_str())
53    }
54
55    /// Whether the store already contains an entry for `key`.
56    pub fn has(&self, key: &StoreKey) -> bool {
57        self.entry_path(key).is_dir()
58    }
59
60    /// Create a fresh, unique staging directory under the store. The installer
61    /// unpacks into it, then calls [`Store::publish_tree`].
62    pub fn new_staging(&self) -> VtaResult<PathBuf> {
63        let path = self.store_dir().join(format!(".tmp-{}", unique()));
64        fs::create_dir_all(&path).map_err(|e| io(&path, e))?;
65        Ok(path)
66    }
67
68    /// Publish a staged tree into the store, content-addressed and atomically.
69    /// If an identical entry already exists, the staged copy is discarded
70    /// (dedup) and the existing key returned.
71    pub fn publish_tree(&self, staged: &Path) -> VtaResult<StoreKey> {
72        let key = StoreKey::new(hash_tree(staged)?)?;
73        let dest = self.entry_path(&key);
74        if dest.exists() {
75            let _ = fs::remove_dir_all(staged);
76            return Ok(key);
77        }
78        fs::create_dir_all(self.store_dir()).map_err(|e| io(&self.store_dir(), e))?;
79        // Same-filesystem rename is atomic: a reader sees all-or-nothing.
80        fs::rename(staged, &dest)
81            .map_err(|e| VtaError::new(Area::Store, 4, format!("publishing store entry: {e}")))?;
82        let _ = set_readonly_recursive(&dest);
83        Ok(key)
84    }
85
86    /// Remove a store entry (used when a store hit fails re-verification, audit
87    /// H4). No-op if the entry is absent.
88    pub fn remove_entry(&self, key: &StoreKey) -> VtaResult<()> {
89        let path = self.entry_path(key);
90        if path.exists() {
91            let _ = make_writable_recursive(&path);
92            fs::remove_dir_all(&path).map_err(|e| io(&path, e))?;
93        }
94        Ok(())
95    }
96
97    /// Re-hash an entry and confirm it still matches its key (integrity check).
98    pub fn verify_entry(&self, key: &StoreKey) -> VtaResult<bool> {
99        let path = self.entry_path(key);
100        if !path.is_dir() {
101            return Ok(false);
102        }
103        Ok(hash_tree(&path)? == key.as_str())
104    }
105
106    /// Store a downloaded blob in the content-addressed download cache, returning
107    /// its `blake3-<hex>` cache key. Idempotent.
108    pub fn cache_put_blob(&self, bytes: &[u8]) -> VtaResult<String> {
109        let key = hash_bytes(bytes);
110        let path = self.downloads_dir().join(&key);
111        if !path.exists() {
112            let tmp = self.downloads_dir().join(format!(".tmp-{}", unique()));
113            fs::write(&tmp, bytes).map_err(|e| io(&tmp, e))?;
114            fs::rename(&tmp, &path).map_err(|e| io(&path, e))?;
115        }
116        Ok(key)
117    }
118
119    /// The path of a cached blob, if present.
120    pub fn cache_get_path(&self, cache_key: &str) -> Option<PathBuf> {
121        let p = self.downloads_dir().join(cache_key);
122        p.exists().then_some(p)
123    }
124
125    /// Garbage-collect: remove every store entry not reachable from `roots`, plus
126    /// stale staging dirs. Returns the number of entries removed.
127    pub fn gc(&self, roots: &HashSet<StoreKey>) -> VtaResult<usize> {
128        let mut removed = 0;
129        let dir = self.store_dir();
130        for entry in fs::read_dir(&dir).map_err(|e| io(&dir, e))? {
131            let entry = entry.map_err(|e| io(&dir, e))?;
132            let name = entry.file_name().to_string_lossy().into_owned();
133            let path = entry.path();
134            if name.starts_with(".tmp-") {
135                let _ = fs::remove_dir_all(&path);
136                continue;
137            }
138            if name.starts_with("blake3-") {
139                if let Ok(key) = StoreKey::new(name) {
140                    if !roots.contains(&key) {
141                        let _ = make_writable_recursive(&path);
142                        if fs::remove_dir_all(&path).is_ok() {
143                            removed += 1;
144                        }
145                    }
146                }
147            }
148        }
149        Ok(removed)
150    }
151}
152
153fn unique() -> String {
154    let n = COUNTER.fetch_add(1, Ordering::Relaxed);
155    let nanos = SystemTime::now()
156        .duration_since(UNIX_EPOCH)
157        .map(|d| d.as_nanos())
158        .unwrap_or(0);
159    format!("{}-{}-{}", std::process::id(), nanos, n)
160}
161
162fn set_readonly_recursive(path: &Path) -> std::io::Result<()> {
163    if path.is_dir() {
164        for entry in fs::read_dir(path)? {
165            set_readonly_recursive(&entry?.path())?;
166        }
167    }
168    let mut perms = fs::metadata(path)?.permissions();
169    perms.set_readonly(true);
170    fs::set_permissions(path, perms)
171}
172
173/// Recursively add write permission (used before moving a restored entry into
174/// place; the executable bit is preserved so the content hash still verifies).
175pub fn ensure_writable(path: &Path) -> std::io::Result<()> {
176    make_writable_recursive(path)
177}
178
179#[allow(clippy::permissions_set_readonly_false)] // intentional: make a GC target deletable
180fn make_writable_recursive(path: &Path) -> std::io::Result<()> {
181    let mut perms = fs::metadata(path)?.permissions();
182    perms.set_readonly(false);
183    fs::set_permissions(path, perms)?;
184    if path.is_dir() {
185        for entry in fs::read_dir(path)? {
186            make_writable_recursive(&entry?.path())?;
187        }
188    }
189    Ok(())
190}
191
192fn io(path: &Path, e: std::io::Error) -> VtaError {
193    VtaError::new(Area::Store, 2, format!("{}: {e}", path.display()))
194}
195
196#[cfg(test)]
197mod tests {
198    use super::*;
199
200    fn home(tag: &str) -> PathBuf {
201        let p = std::env::temp_dir().join(format!("vanta-store-{}-{}", tag, std::process::id()));
202        let _ = fs::remove_dir_all(&p);
203        p
204    }
205
206    #[test]
207    fn publish_dedup_and_verify() {
208        let h = home("publish");
209        let store = Store::open(&h).unwrap();
210
211        let staged = store.new_staging().unwrap();
212        fs::write(staged.join("tool"), b"binary-bytes").unwrap();
213        let key = store.publish_tree(&staged).unwrap();
214        assert!(key.as_str().starts_with("blake3-"));
215        assert!(store.has(&key));
216        assert!(store.verify_entry(&key).unwrap());
217
218        // Identical content publishes to the same key (dedup); staged dir consumed.
219        let staged2 = store.new_staging().unwrap();
220        fs::write(staged2.join("tool"), b"binary-bytes").unwrap();
221        let key2 = store.publish_tree(&staged2).unwrap();
222        assert_eq!(key, key2);
223        assert!(!staged2.exists());
224
225        let _ = fs::remove_dir_all(&h);
226    }
227
228    #[test]
229    fn cache_blob_roundtrip() {
230        let h = home("cache");
231        let store = Store::open(&h).unwrap();
232        let k = store.cache_put_blob(b"download").unwrap();
233        assert!(store.cache_get_path(&k).is_some());
234        assert!(store.cache_get_path("blake3-absent").is_none());
235        let _ = fs::remove_dir_all(&h);
236    }
237
238    #[test]
239    fn gc_removes_unreachable() {
240        let h = home("gc");
241        let store = Store::open(&h).unwrap();
242        let s = store.new_staging().unwrap();
243        fs::write(s.join("f"), b"keepme").unwrap();
244        let keep = store.publish_tree(&s).unwrap();
245        let s2 = store.new_staging().unwrap();
246        fs::write(s2.join("f"), b"dropme").unwrap();
247        let drop = store.publish_tree(&s2).unwrap();
248
249        let mut roots = HashSet::new();
250        roots.insert(keep.clone());
251        let removed = store.gc(&roots).unwrap();
252        assert_eq!(removed, 1);
253        assert!(store.has(&keep));
254        assert!(!store.has(&drop));
255        let _ = fs::remove_dir_all(&h);
256    }
257}