Skip to main content

ripvec_core/cache/
store.rs

1//! Content-addressed object store with git-style sharding.
2//!
3//! Objects are stored at `<root>/xx/yyyyyyyy...` where `xx` is the first
4//! two hex chars of the blake3 hash and `yyyy...` is the remainder. This
5//! prevents too many files in a single directory.
6
7use std::path::{Path, PathBuf};
8
9/// Content-addressed object store with git-style `xx/hash` sharding.
10pub struct ObjectStore {
11    /// Root directory of the store (e.g., `~/.cache/ripvec/<project>/objects/`).
12    root: PathBuf,
13}
14
15impl ObjectStore {
16    /// Create a new store rooted at the given directory.
17    ///
18    /// The directory is created on first write, not at construction time.
19    #[must_use]
20    pub fn new(root: &Path) -> Self {
21        Self {
22            root: root.to_path_buf(),
23        }
24    }
25
26    /// Write data to the store under the given hash.
27    ///
28    /// Creates the `xx/` prefix directory if it doesn't exist.
29    ///
30    /// # Errors
31    ///
32    /// Returns an error if the directory cannot be created or the file
33    /// cannot be written.
34    pub fn write(&self, hash: &str, data: &[u8]) -> crate::Result<()> {
35        let path = self.object_path(hash);
36        if let Some(parent) = path.parent() {
37            std::fs::create_dir_all(parent).map_err(|e| crate::Error::Io {
38                path: parent.display().to_string(),
39                source: e,
40            })?;
41        }
42        std::fs::write(&path, data).map_err(|e| crate::Error::Io {
43            path: path.display().to_string(),
44            source: e,
45        })
46    }
47
48    /// Read data from the store for the given hash.
49    ///
50    /// # Errors
51    ///
52    /// Returns an error if the object file does not exist or cannot be read.
53    pub fn read(&self, hash: &str) -> crate::Result<Vec<u8>> {
54        let path = self.object_path(hash);
55        std::fs::read(&path).map_err(|e| crate::Error::Io {
56            path: path.display().to_string(),
57            source: e,
58        })
59    }
60
61    /// Check whether an object exists in the store.
62    #[must_use]
63    pub fn exists(&self, hash: &str) -> bool {
64        self.object_path(hash).exists()
65    }
66
67    /// Remove objects not in the `keep` set. Returns the number of removed objects.
68    ///
69    /// Also removes empty `xx/` prefix directories after cleanup.
70    ///
71    /// # Errors
72    ///
73    /// Returns an error if the store directory cannot be read.
74    pub fn gc(&self, keep: &[String]) -> crate::Result<usize> {
75        let keep_set: std::collections::HashSet<&str> = keep.iter().map(String::as_str).collect();
76        let mut removed = 0;
77
78        let Ok(entries) = std::fs::read_dir(&self.root) else {
79            return Ok(0); // store doesn't exist yet
80        };
81
82        for prefix_entry in entries.flatten() {
83            let prefix_path = prefix_entry.path();
84            if !prefix_path.is_dir() {
85                continue;
86            }
87            let prefix = prefix_entry.file_name();
88            let prefix_str = prefix.to_string_lossy();
89
90            if let Ok(files) = std::fs::read_dir(&prefix_path) {
91                for file_entry in files.flatten() {
92                    let file_name = file_entry.file_name();
93                    let hash = format!("{}{}", prefix_str, file_name.to_string_lossy());
94                    if !keep_set.contains(hash.as_str())
95                        && std::fs::remove_file(file_entry.path()).is_ok()
96                    {
97                        removed += 1;
98                    }
99                }
100            }
101
102            // Remove empty prefix directory
103            let _ = std::fs::remove_dir(&prefix_path); // fails silently if not empty
104        }
105
106        Ok(removed)
107    }
108
109    /// List all object hashes in the store.
110    ///
111    /// Scans all `xx/` prefix directories and reconstructs the full hash
112    /// from `prefix + filename`.
113    #[must_use]
114    pub fn list_hashes(&self) -> Vec<String> {
115        let mut hashes = Vec::new();
116        let Ok(entries) = std::fs::read_dir(&self.root) else {
117            return hashes;
118        };
119        for prefix_entry in entries.flatten() {
120            let prefix_path = prefix_entry.path();
121            if !prefix_path.is_dir() {
122                continue;
123            }
124            let prefix = prefix_entry.file_name();
125            let prefix_str = prefix.to_string_lossy();
126            if let Ok(files) = std::fs::read_dir(&prefix_path) {
127                for file_entry in files.flatten() {
128                    let file_name = file_entry.file_name();
129                    hashes.push(format!("{}{}", prefix_str, file_name.to_string_lossy()));
130                }
131            }
132        }
133        hashes
134    }
135
136    /// Resolve the filesystem path for an object hash.
137    fn object_path(&self, hash: &str) -> PathBuf {
138        debug_assert!(
139            hash.len() >= 3,
140            "hash must be at least 3 chars for sharding"
141        );
142        self.root.join(&hash[..2]).join(&hash[2..])
143    }
144}
145
146#[cfg(test)]
147mod tests {
148    use super::*;
149    use tempfile::TempDir;
150
151    #[test]
152    fn write_and_read_round_trip() {
153        let dir = TempDir::new().unwrap();
154        let store = ObjectStore::new(dir.path());
155        let hash = "abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890";
156        let data = b"hello world";
157        store.write(hash, data).unwrap();
158        let read = store.read(hash).unwrap();
159        assert_eq!(read, data);
160    }
161
162    #[test]
163    fn git_style_sharding() {
164        let dir = TempDir::new().unwrap();
165        let store = ObjectStore::new(dir.path());
166        let hash = "abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890";
167        store.write(hash, b"data").unwrap();
168        assert!(dir.path().join("ab").join(&hash[2..]).exists());
169    }
170
171    #[test]
172    fn gc_removes_unreferenced() {
173        let dir = TempDir::new().unwrap();
174        let store = ObjectStore::new(dir.path());
175        let h1 = "aaaa0000111122223333444455556666aaaa0000111122223333444455556666";
176        let h2 = "bbbb0000111122223333444455556666bbbb0000111122223333444455556666";
177        store.write(h1, b"keep").unwrap();
178        store.write(h2, b"delete").unwrap();
179        let removed = store.gc(&[h1.to_string()]).unwrap();
180        assert_eq!(removed, 1);
181        assert!(store.exists(h1));
182        assert!(!store.exists(h2));
183    }
184
185    #[test]
186    fn gc_empty_store_returns_zero() {
187        let dir = TempDir::new().unwrap();
188        let store = ObjectStore::new(dir.path());
189        assert_eq!(store.gc(&[]).unwrap(), 0);
190    }
191
192    #[test]
193    fn read_nonexistent_returns_error() {
194        let dir = TempDir::new().unwrap();
195        let store = ObjectStore::new(dir.path());
196        assert!(store.read("abc123").is_err());
197    }
198}