hashtree_fs/
lib.rs

1//! Filesystem-based content-addressed blob storage.
2//!
3//! Stores blobs in a directory structure similar to git:
4//! `{base_path}/{first 2 chars of hash}/{remaining hash chars}`
5//!
6//! For example, a blob with hash `abcdef123...` would be stored at:
7//! `~/.hashtree/blobs/ab/cdef123...`
8
9use async_trait::async_trait;
10use hashtree_core::store::{Store, StoreError};
11use hashtree_core::types::Hash;
12use std::fs;
13use std::path::{Path, PathBuf};
14
15/// Filesystem-backed blob store implementing hashtree's Store trait.
16///
17/// Stores blobs in a 256-way sharded directory structure using
18/// the first 2 hex characters of the hash as the directory prefix.
19pub struct FsBlobStore {
20    base_path: PathBuf,
21}
22
23impl FsBlobStore {
24    /// Create a new filesystem blob store at the given path.
25    ///
26    /// Creates the directory if it doesn't exist.
27    pub fn new<P: AsRef<Path>>(path: P) -> Result<Self, StoreError> {
28        let base_path = path.as_ref().to_path_buf();
29        fs::create_dir_all(&base_path)?;
30        Ok(Self { base_path })
31    }
32
33    /// Get the file path for a given hash.
34    ///
35    /// Format: `{base_path}/{first 2 hex chars}/{remaining 62 hex chars}`
36    fn blob_path(&self, hash: &Hash) -> PathBuf {
37        let hex = hex::encode(hash);
38        let (prefix, rest) = hex.split_at(2);
39        self.base_path.join(prefix).join(rest)
40    }
41
42    /// Sync put operation.
43    pub fn put_sync(&self, hash: Hash, data: &[u8]) -> Result<bool, StoreError> {
44        let path = self.blob_path(&hash);
45
46        // Check if already exists
47        if path.exists() {
48            return Ok(false);
49        }
50
51        // Create parent directory if needed
52        if let Some(parent) = path.parent() {
53            fs::create_dir_all(parent)?;
54        }
55
56        // Write atomically using temp file + rename
57        let temp_path = path.with_extension("tmp");
58        fs::write(&temp_path, data)?;
59        fs::rename(&temp_path, &path)?;
60
61        Ok(true)
62    }
63
64    /// Sync get operation.
65    pub fn get_sync(&self, hash: &Hash) -> Result<Option<Vec<u8>>, StoreError> {
66        let path = self.blob_path(hash);
67        if path.exists() {
68            Ok(Some(fs::read(&path)?))
69        } else {
70            Ok(None)
71        }
72    }
73
74    /// Check if a hash exists.
75    pub fn exists(&self, hash: &Hash) -> bool {
76        self.blob_path(hash).exists()
77    }
78
79    /// Sync delete operation.
80    pub fn delete_sync(&self, hash: &Hash) -> Result<bool, StoreError> {
81        let path = self.blob_path(hash);
82        if path.exists() {
83            fs::remove_file(&path)?;
84            Ok(true)
85        } else {
86            Ok(false)
87        }
88    }
89
90    /// List all hashes in the store.
91    pub fn list(&self) -> Result<Vec<Hash>, StoreError> {
92        let mut hashes = Vec::new();
93
94        // Iterate over prefix directories (00-ff)
95        let entries = match fs::read_dir(&self.base_path) {
96            Ok(e) => e,
97            Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(hashes),
98            Err(e) => return Err(e.into()),
99        };
100
101        for prefix_entry in entries {
102            let prefix_entry = prefix_entry?;
103            let prefix_path = prefix_entry.path();
104
105            if !prefix_path.is_dir() {
106                continue;
107            }
108
109            let prefix = match prefix_path.file_name().and_then(|n| n.to_str()) {
110                Some(p) if p.len() == 2 => p.to_string(),
111                _ => continue,
112            };
113
114            // Iterate over blobs in this prefix directory
115            for blob_entry in fs::read_dir(&prefix_path)? {
116                let blob_entry = blob_entry?;
117                let rest = match blob_entry.file_name().to_str() {
118                    Some(r) if r.len() == 62 => r.to_string(),
119                    _ => continue,
120                };
121
122                // Reconstruct full hash hex
123                let full_hex = format!("{}{}", prefix, rest);
124                if let Ok(bytes) = hex::decode(&full_hex) {
125                    if bytes.len() == 32 {
126                        let mut hash = [0u8; 32];
127                        hash.copy_from_slice(&bytes);
128                        hashes.push(hash);
129                    }
130                }
131            }
132        }
133
134        Ok(hashes)
135    }
136
137    /// Get storage statistics.
138    pub fn stats(&self) -> Result<FsStats, StoreError> {
139        let mut count = 0usize;
140        let mut total_bytes = 0u64;
141
142        let entries = match fs::read_dir(&self.base_path) {
143            Ok(e) => e,
144            Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
145                return Ok(FsStats { count, total_bytes })
146            }
147            Err(e) => return Err(e.into()),
148        };
149
150        for prefix_entry in entries {
151            let prefix_entry = prefix_entry?;
152            let prefix_path = prefix_entry.path();
153
154            if !prefix_path.is_dir() {
155                continue;
156            }
157
158            for blob_entry in fs::read_dir(&prefix_path)? {
159                let blob_entry = blob_entry?;
160                if blob_entry.path().is_file() {
161                    count += 1;
162                    total_bytes += blob_entry.metadata()?.len();
163                }
164            }
165        }
166
167        Ok(FsStats { count, total_bytes })
168    }
169}
170
171/// Storage statistics.
172#[derive(Debug, Clone)]
173pub struct FsStats {
174    pub count: usize,
175    pub total_bytes: u64,
176}
177
178#[async_trait]
179impl Store for FsBlobStore {
180    async fn put(&self, hash: Hash, data: Vec<u8>) -> Result<bool, StoreError> {
181        self.put_sync(hash, &data)
182    }
183
184    async fn get(&self, hash: &Hash) -> Result<Option<Vec<u8>>, StoreError> {
185        self.get_sync(hash)
186    }
187
188    async fn has(&self, hash: &Hash) -> Result<bool, StoreError> {
189        Ok(self.exists(hash))
190    }
191
192    async fn delete(&self, hash: &Hash) -> Result<bool, StoreError> {
193        self.delete_sync(hash)
194    }
195}
196
197#[cfg(test)]
198mod tests {
199    use super::*;
200    use hashtree_core::sha256;
201    use tempfile::TempDir;
202
203    #[tokio::test]
204    async fn test_put_get() {
205        let temp = TempDir::new().unwrap();
206        let store = FsBlobStore::new(temp.path().join("blobs")).unwrap();
207
208        let data = b"hello filesystem";
209        let hash = sha256(data);
210        store.put(hash, data.to_vec()).await.unwrap();
211
212        assert!(store.has(&hash).await.unwrap());
213        assert_eq!(store.get(&hash).await.unwrap(), Some(data.to_vec()));
214    }
215
216    #[tokio::test]
217    async fn test_get_missing() {
218        let temp = TempDir::new().unwrap();
219        let store = FsBlobStore::new(temp.path().join("blobs")).unwrap();
220
221        let hash = [0u8; 32];
222        assert!(!store.has(&hash).await.unwrap());
223        assert_eq!(store.get(&hash).await.unwrap(), None);
224    }
225
226    #[tokio::test]
227    async fn test_delete() {
228        let temp = TempDir::new().unwrap();
229        let store = FsBlobStore::new(temp.path().join("blobs")).unwrap();
230
231        let data = b"delete me";
232        let hash = sha256(data);
233        store.put(hash, data.to_vec()).await.unwrap();
234        assert!(store.has(&hash).await.unwrap());
235
236        assert!(store.delete(&hash).await.unwrap());
237        assert!(!store.has(&hash).await.unwrap());
238        assert!(!store.delete(&hash).await.unwrap());
239    }
240
241    #[tokio::test]
242    async fn test_deduplication() {
243        let temp = TempDir::new().unwrap();
244        let store = FsBlobStore::new(temp.path().join("blobs")).unwrap();
245
246        let data = b"same content";
247        let hash = sha256(data);
248
249        // First put returns true (newly stored)
250        assert!(store.put(hash, data.to_vec()).await.unwrap());
251        // Second put returns false (already existed)
252        assert!(!store.put(hash, data.to_vec()).await.unwrap());
253
254        assert_eq!(store.list().unwrap().len(), 1);
255    }
256
257    #[tokio::test]
258    async fn test_list() {
259        let temp = TempDir::new().unwrap();
260        let store = FsBlobStore::new(temp.path().join("blobs")).unwrap();
261
262        let d1 = b"one";
263        let d2 = b"two";
264        let d3 = b"three";
265        let h1 = sha256(d1);
266        let h2 = sha256(d2);
267        let h3 = sha256(d3);
268
269        store.put(h1, d1.to_vec()).await.unwrap();
270        store.put(h2, d2.to_vec()).await.unwrap();
271        store.put(h3, d3.to_vec()).await.unwrap();
272
273        let hashes = store.list().unwrap();
274        assert_eq!(hashes.len(), 3);
275        assert!(hashes.contains(&h1));
276        assert!(hashes.contains(&h2));
277        assert!(hashes.contains(&h3));
278    }
279
280    #[tokio::test]
281    async fn test_stats() {
282        let temp = TempDir::new().unwrap();
283        let store = FsBlobStore::new(temp.path().join("blobs")).unwrap();
284
285        let d1 = b"hello";
286        let d2 = b"world";
287        store.put(sha256(d1), d1.to_vec()).await.unwrap();
288        store.put(sha256(d2), d2.to_vec()).await.unwrap();
289
290        let stats = store.stats().unwrap();
291        assert_eq!(stats.count, 2);
292        assert_eq!(stats.total_bytes, 10);
293    }
294
295    #[tokio::test]
296    async fn test_directory_structure() {
297        let temp = TempDir::new().unwrap();
298        let blobs_path = temp.path().join("blobs");
299        let store = FsBlobStore::new(&blobs_path).unwrap();
300
301        let data = b"test data";
302        let hash = sha256(data);
303        let hex = hex::encode(hash);
304
305        store.put(hash, data.to_vec()).await.unwrap();
306
307        // Verify the file exists at the correct path
308        let prefix = &hex[..2];
309        let rest = &hex[2..];
310        let expected_path = blobs_path.join(prefix).join(rest);
311
312        assert!(expected_path.exists(), "Blob should be at {:?}", expected_path);
313        assert_eq!(fs::read(&expected_path).unwrap(), data);
314    }
315
316    #[test]
317    fn test_blob_path_format() {
318        let temp = TempDir::new().unwrap();
319        let store = FsBlobStore::new(temp.path()).unwrap();
320
321        // Hash: 0x00112233...
322        let mut hash = [0u8; 32];
323        hash[0] = 0x00;
324        hash[1] = 0x11;
325        hash[2] = 0x22;
326
327        let path = store.blob_path(&hash);
328        let path_str = path.to_string_lossy();
329
330        // Should have "00" as directory prefix
331        assert!(path_str.contains("/00/"), "Path should contain /00/ directory: {}", path_str);
332        // File name should be remaining 62 chars
333        assert!(path.file_name().unwrap().len() == 62);
334    }
335
336    #[tokio::test]
337    async fn test_empty_store_stats() {
338        let temp = TempDir::new().unwrap();
339        let store = FsBlobStore::new(temp.path().join("blobs")).unwrap();
340
341        let stats = store.stats().unwrap();
342        assert_eq!(stats.count, 0);
343        assert_eq!(stats.total_bytes, 0);
344    }
345
346    #[tokio::test]
347    async fn test_empty_store_list() {
348        let temp = TempDir::new().unwrap();
349        let store = FsBlobStore::new(temp.path().join("blobs")).unwrap();
350
351        let hashes = store.list().unwrap();
352        assert!(hashes.is_empty());
353    }
354}