Skip to main content

ripvec_core/cache/
store.rs

1//! Content-addressed object store with git-style sharding.
2//!
3//! Objects are stored at `<root>/xx/yyyyyyyy...` where `xx` is the first
4//! two hex chars of the blake3 hash and `yyyy...` is the remainder. This
5//! prevents too many files in a single directory.
6
7use std::path::{Path, PathBuf};
8
9/// Content-addressed object store with git-style `xx/hash` sharding.
10pub struct ObjectStore {
11    /// Root directory of the store (e.g., `~/.cache/ripvec/<project>/objects/`).
12    root: PathBuf,
13}
14
15impl ObjectStore {
16    /// Create a new store rooted at the given directory.
17    ///
18    /// The directory is created on first write, not at construction time.
19    #[must_use]
20    pub fn new(root: &Path) -> Self {
21        Self {
22            root: root.to_path_buf(),
23        }
24    }
25
26    /// Write data to the store under the given hash.
27    ///
28    /// Creates the `xx/` prefix directory if it doesn't exist.
29    ///
30    /// # Errors
31    ///
32    /// Returns an error if the directory cannot be created or the file
33    /// cannot be written.
34    pub fn write(&self, hash: &str, data: &[u8]) -> crate::Result<()> {
35        let path = self.object_path(hash);
36        if let Some(parent) = path.parent() {
37            std::fs::create_dir_all(parent).map_err(|e| crate::Error::Io {
38                path: parent.display().to_string(),
39                source: e,
40            })?;
41        }
42        std::fs::write(&path, data).map_err(|e| crate::Error::Io {
43            path: path.display().to_string(),
44            source: e,
45        })
46    }
47
48    /// Read data from the store for the given hash.
49    ///
50    /// # Errors
51    ///
52    /// Returns an error if the object file does not exist or cannot be read.
53    pub fn read(&self, hash: &str) -> crate::Result<Vec<u8>> {
54        let path = self.object_path(hash);
55        std::fs::read(&path).map_err(|e| crate::Error::Io {
56            path: path.display().to_string(),
57            source: e,
58        })
59    }
60
61    /// Check whether an object exists in the store.
62    #[must_use]
63    pub fn exists(&self, hash: &str) -> bool {
64        self.object_path(hash).exists()
65    }
66
67    /// Remove objects not in the `keep` set. Returns the number of removed objects.
68    ///
69    /// Also removes empty `xx/` prefix directories after cleanup.
70    ///
71    /// # Errors
72    ///
73    /// Returns an error if the store directory cannot be read.
74    pub fn gc(&self, keep: &[String]) -> crate::Result<usize> {
75        let keep_set: std::collections::HashSet<&str> = keep.iter().map(String::as_str).collect();
76        let mut removed = 0;
77
78        let Ok(entries) = std::fs::read_dir(&self.root) else {
79            return Ok(0); // store doesn't exist yet
80        };
81
82        for prefix_entry in entries.flatten() {
83            let prefix_path = prefix_entry.path();
84            if !prefix_path.is_dir() {
85                continue;
86            }
87            let prefix = prefix_entry.file_name();
88            let prefix_str = prefix.to_string_lossy();
89
90            if let Ok(files) = std::fs::read_dir(&prefix_path) {
91                for file_entry in files.flatten() {
92                    let file_name = file_entry.file_name();
93                    let hash = format!("{}{}", prefix_str, file_name.to_string_lossy());
94                    if !keep_set.contains(hash.as_str())
95                        && std::fs::remove_file(file_entry.path()).is_ok()
96                    {
97                        removed += 1;
98                    }
99                }
100            }
101
102            // Remove empty prefix directory
103            let _ = std::fs::remove_dir(&prefix_path); // fails silently if not empty
104        }
105
106        Ok(removed)
107    }
108
109    /// Resolve the filesystem path for an object hash.
110    fn object_path(&self, hash: &str) -> PathBuf {
111        debug_assert!(
112            hash.len() >= 3,
113            "hash must be at least 3 chars for sharding"
114        );
115        self.root.join(&hash[..2]).join(&hash[2..])
116    }
117}
118
119#[cfg(test)]
120mod tests {
121    use super::*;
122    use tempfile::TempDir;
123
124    #[test]
125    fn write_and_read_round_trip() {
126        let dir = TempDir::new().unwrap();
127        let store = ObjectStore::new(dir.path());
128        let hash = "abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890";
129        let data = b"hello world";
130        store.write(hash, data).unwrap();
131        let read = store.read(hash).unwrap();
132        assert_eq!(read, data);
133    }
134
135    #[test]
136    fn git_style_sharding() {
137        let dir = TempDir::new().unwrap();
138        let store = ObjectStore::new(dir.path());
139        let hash = "abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890";
140        store.write(hash, b"data").unwrap();
141        assert!(dir.path().join("ab").join(&hash[2..]).exists());
142    }
143
144    #[test]
145    fn gc_removes_unreferenced() {
146        let dir = TempDir::new().unwrap();
147        let store = ObjectStore::new(dir.path());
148        let h1 = "aaaa0000111122223333444455556666aaaa0000111122223333444455556666";
149        let h2 = "bbbb0000111122223333444455556666bbbb0000111122223333444455556666";
150        store.write(h1, b"keep").unwrap();
151        store.write(h2, b"delete").unwrap();
152        let removed = store.gc(&[h1.to_string()]).unwrap();
153        assert_eq!(removed, 1);
154        assert!(store.exists(h1));
155        assert!(!store.exists(h2));
156    }
157
158    #[test]
159    fn gc_empty_store_returns_zero() {
160        let dir = TempDir::new().unwrap();
161        let store = ObjectStore::new(dir.path());
162        assert_eq!(store.gc(&[]).unwrap(), 0);
163    }
164
165    #[test]
166    fn read_nonexistent_returns_error() {
167        let dir = TempDir::new().unwrap();
168        let store = ObjectStore::new(dir.path());
169        assert!(store.read("abc123").is_err());
170    }
171}