Skip to main content

suture_core/cas/
store.rs

1//! Blob Store — the primary CAS interface for storing and retrieving blobs.
2//!
3//! Blobs are stored on disk using a content-addressed scheme:
4//! - Hash is split into a 2-char prefix directory and 62-char filename
5//! - This creates 256 buckets, avoiding any single directory having too many files
6//! - Blobs are optionally Zstd-compressed
7//!
8//! # Thread Safety
9//!
10//! `BlobStore` is `Send + Sync` and can be shared across threads via `Arc`.
11//! File operations are the primary bottleneck; the store itself holds no mutable
12//! state beyond the root path.
13
14use crate::cas::compressor::{self, is_zstd_compressed};
15use crate::cas::hasher;
16use crate::cas::pack::{PackCache, PackError, PackFile, PackIndex};
17use std::fs;
18use std::io;
19use std::path::PathBuf;
20use std::sync::Mutex;
21use suture_common::Hash;
22use thiserror::Error;
23
24/// Errors that can occur during CAS operations.
25#[derive(Error, Debug)]
26pub enum CasError {
27    #[error("blob not found: {0}")]
28    BlobNotFound(String),
29
30    #[error("hash mismatch: expected {expected}, got {actual}")]
31    HashMismatch { expected: String, actual: String },
32
33    #[error("I/O error: {0}")]
34    Io(#[from] io::Error),
35
36    #[error("compression error: {0}")]
37    CompressionError(String),
38
39    #[error("decompression error: {0}")]
40    DecompressionError(String),
41
42    #[error("decompressed data too large: {max} bytes max")]
43    DecompressionTooLarge { max: usize },
44
45    #[error("blob already exists: {0}")]
46    AlreadyExists(String),
47
48    #[error("invalid path: {0}")]
49    InvalidPath(String),
50
51    #[error("pack error: {0}")]
52    Pack(#[from] PackError),
53}
54
55/// The Content Addressable Storage blob store.
56///
57/// Stores blobs indexed by BLAKE3 hash on the local filesystem.
58/// Provides deduplication, optional compression, and integrity verification.
59///
60/// # Thread Safety
61///
62/// `BlobStore` is `Send + Sync` and can be shared across threads via `Arc`.
63/// The pack index cache uses `Mutex` for interior mutability.
64pub struct BlobStore {
65    /// Root directory containing the `objects/` subdirectory.
66    root: PathBuf,
67    /// Whether to compress blobs with Zstd.
68    compress: bool,
69    /// Zstd compression level (1-22).
70    compression_level: i32,
71    /// Whether to verify blob hashes on read. Default: true.
72    /// Set to false for hot paths where performance matters more than
73    /// per-read integrity verification (content addressing already
74    /// provides correctness by construction).
75    verify_on_read: bool,
76    /// Cached pack indices, loaded lazily on first pack access.
77    /// Invalidated when `repack()` creates new pack files.
78    pack_cache: Mutex<Option<PackCache>>,
79}
80
81impl BlobStore {
82    /// Create a new BlobStore rooted at the given directory.
83    ///
84    /// Creates the `objects/` subdirectory if it doesn't exist.
85    pub fn new(root: impl Into<PathBuf>) -> Result<Self, CasError> {
86        let root = root.into();
87        let objects_dir = root.join("objects");
88        fs::create_dir_all(&objects_dir)?;
89        Ok(Self {
90            root,
91            compress: true,
92            compression_level: compressor::DEFAULT_COMPRESSION_LEVEL,
93            verify_on_read: true,
94            pack_cache: Mutex::new(None),
95        })
96    }
97
98    /// Create a BlobStore backed by a temporary directory.
99    ///
100    /// Useful for testing and in-memory repository usage. The temporary
101    /// directory is cleaned up when the BlobStore is dropped.
102    pub fn open_in_memory() -> Result<Self, CasError> {
103        let root = tempfile::tempdir().map_err(CasError::Io)?.keep();
104        let objects_dir = root.join("objects");
105        fs::create_dir_all(&objects_dir)?;
106        Ok(Self {
107            root,
108            compress: true,
109            compression_level: compressor::DEFAULT_COMPRESSION_LEVEL,
110            verify_on_read: true,
111            pack_cache: Mutex::new(None),
112        })
113    }
114
115    /// Create a BlobStore with compression disabled (for testing).
116    pub fn new_uncompressed(root: impl Into<PathBuf>) -> Result<Self, CasError> {
117        let mut store = Self::new(root)?;
118        store.compress = false;
119        Ok(store)
120    }
121
122    /// Set whether to verify blob hashes on read.
123    ///
124    /// When disabled, `get_blob()` skips the BLAKE3 hash verification
125    /// step, saving O(n) computation per read. The content-addressed
126    /// storage scheme already provides correctness by construction
127    /// (the filename is the hash), so this is safe for performance-critical
128    /// paths like `snapshot_head()` which may read many blobs in sequence.
129    pub fn set_verify_on_read(&mut self, verify: bool) {
130        self.verify_on_read = verify;
131    }
132
133    /// Check whether hash verification is enabled on read.
134    pub fn verify_on_read(&self) -> bool {
135        self.verify_on_read
136    }
137
138    /// Store a blob, returning its BLAKE3 hash.
139    ///
140    /// If a blob with the same hash already exists, this is a no-op
141    /// (deduplication). Returns the hash either way.
142    pub fn put_blob(&self, data: &[u8]) -> Result<Hash, CasError> {
143        let hash = hasher::hash_bytes(data);
144        let blob_path = self.blob_path(&hash);
145
146        // Deduplication: if blob already exists, return immediately
147        if blob_path.exists() {
148            return Ok(hash);
149        }
150
151        // Ensure the prefix directory exists
152        if let Some(parent) = blob_path.parent() {
153            fs::create_dir_all(parent)?;
154        }
155
156        // Write blob (optionally compressed)
157        if self.compress {
158            let compressed = compressor::compress(data, self.compression_level)?;
159            fs::write(&blob_path, &compressed)?;
160        } else {
161            fs::write(&blob_path, data)?;
162        }
163
164        Ok(hash)
165    }
166
167    /// Store a blob, returning an error if it already exists.
168    pub fn put_blob_new(&self, data: &[u8]) -> Result<Hash, CasError> {
169        let hash = hasher::hash_bytes(data);
170        let blob_path = self.blob_path(&hash);
171
172        if blob_path.exists() {
173            return Err(CasError::AlreadyExists(hash.to_hex()));
174        }
175
176        if let Some(parent) = blob_path.parent() {
177            fs::create_dir_all(parent)?;
178        }
179
180        if self.compress {
181            let compressed = compressor::compress(data, self.compression_level)?;
182            fs::write(&blob_path, &compressed)?;
183        } else {
184            fs::write(&blob_path, data)?;
185        }
186
187        Ok(hash)
188    }
189
190    /// Store a blob with an explicit hash (used when receiving blobs from a remote).
191    ///
192    /// Verifies the data matches the expected hash before storing.
193    pub fn put_blob_with_hash(&self, data: &[u8], expected_hash: &Hash) -> Result<(), CasError> {
194        let blob_path = self.blob_path(expected_hash);
195
196        if blob_path.exists() {
197            return Ok(());
198        }
199
200        hasher::verify_hash(data, expected_hash)?;
201
202        if let Some(parent) = blob_path.parent() {
203            fs::create_dir_all(parent)?;
204        }
205
206        if self.compress {
207            let compressed = compressor::compress(data, self.compression_level)?;
208            fs::write(&blob_path, &compressed)?;
209        } else {
210            fs::write(&blob_path, data)?;
211        }
212
213        Ok(())
214    }
215
216    /// Retrieve a blob by its BLAKE3 hash.
217    ///
218    /// Tries loose objects first, then pack files.
219    /// Decompresses if necessary and verifies the hash of the result
220    /// (unless verification was disabled via `set_verify_on_read(false)`).
221    pub fn get_blob(&self, hash: &Hash) -> Result<Vec<u8>, CasError> {
222        // Try loose blob first
223        let blob_path = self.blob_path(hash);
224        if blob_path.exists() {
225            let raw = fs::read(&blob_path)?;
226            let data = if is_zstd_compressed(&raw) {
227                compressor::decompress(&raw)?
228            } else {
229                raw
230            };
231            if self.verify_on_read {
232                hasher::verify_hash(&data, hash)?;
233            }
234            return Ok(data);
235        }
236
237        // Fall back to pack files
238        if let Ok(data) = self.get_blob_packed(hash) {
239            return Ok(data);
240        }
241
242        Err(CasError::BlobNotFound(hash.to_hex()))
243    }
244
245    /// Check if a blob exists in the store.
246    ///
247    /// Checks loose objects first, then pack files.
248    /// This does NOT verify the blob's integrity — it only checks for existence.
249    pub fn has_blob(&self, hash: &Hash) -> bool {
250        self.blob_path(hash).exists() || self.has_blob_packed(hash)
251    }
252
253    /// Delete a blob from the store.
254    ///
255    /// The caller is responsible for ensuring no patches reference this blob.
256    pub fn delete_blob(&self, hash: &Hash) -> Result<(), CasError> {
257        let blob_path = self.blob_path(hash);
258        fs::remove_file(&blob_path).map_err(|e| {
259            if e.kind() == io::ErrorKind::NotFound {
260                CasError::BlobNotFound(hash.to_hex())
261            } else {
262                CasError::Io(e)
263            }
264        })
265    }
266
267    /// Get the total number of blobs in the store.
268    pub fn blob_count(&self) -> Result<u64, CasError> {
269        let objects_dir = self.root.join("objects");
270        let mut count = 0u64;
271        if objects_dir.exists() {
272            for entry in fs::read_dir(&objects_dir)? {
273                let entry = entry?;
274                if entry.file_type()?.is_dir() {
275                    let dir_name = entry.file_name();
276                    if dir_name == "pack" {
277                        continue;
278                    }
279                    for sub_entry in fs::read_dir(entry.path())? {
280                        let sub_entry = sub_entry?;
281                        if sub_entry.file_type()?.is_file() {
282                            count += 1;
283                        }
284                    }
285                }
286            }
287        }
288        Ok(count)
289    }
290
291    /// Get the total size of all blobs in the store (compressed).
292    pub fn total_size(&self) -> Result<u64, CasError> {
293        let objects_dir = self.root.join("objects");
294        let mut total = 0u64;
295        if objects_dir.exists() {
296            for entry in fs::read_dir(&objects_dir)? {
297                let entry = entry?;
298                if entry.file_type()?.is_dir() {
299                    let dir_name = entry.file_name();
300                    if dir_name == "pack" {
301                        continue;
302                    }
303                    for sub_entry in fs::read_dir(entry.path())? {
304                        let sub_entry = sub_entry?;
305                        if sub_entry.file_type()?.is_file() {
306                            total += sub_entry.metadata()?.len();
307                        }
308                    }
309                }
310            }
311        }
312        Ok(total)
313    }
314
315    /// List all blob hashes in the store.
316    pub fn list_blobs(&self) -> Result<Vec<Hash>, CasError> {
317        let objects_dir = self.root.join("objects");
318        let mut hashes = Vec::new();
319        if !objects_dir.exists() {
320            return Ok(hashes);
321        }
322        for entry in fs::read_dir(&objects_dir)? {
323            let entry = entry?;
324            if entry.file_type()?.is_dir() {
325                let dir_name = entry.file_name();
326                if dir_name == "pack" {
327                    continue;
328                }
329                let prefix = dir_name.to_string_lossy().to_string();
330                for sub_entry in fs::read_dir(entry.path())? {
331                    let sub_entry = sub_entry?;
332                    if sub_entry.file_type()?.is_file() {
333                        let suffix = sub_entry.file_name().to_string_lossy().to_string();
334                        let hex = format!("{prefix}{suffix}");
335                        if let Ok(hash) = Hash::from_hex(&hex) {
336                            hashes.push(hash);
337                        }
338                    }
339                }
340            }
341        }
342        Ok(hashes)
343    }
344
345    /// Get the path to the objects directory.
346    pub fn objects_dir(&self) -> PathBuf {
347        self.root.join("objects")
348    }
349
350    /// Get the path to the pack directory.
351    pub fn pack_dir(&self) -> PathBuf {
352        self.root.join("objects").join("pack")
353    }
354
355    /// Ensure pack cache is loaded, then call `f` with a reference to it.
356    ///
357    /// On first access, reads all `.idx` files from the pack directory.
358    /// Subsequent calls return the cached data without disk I/O.
359    /// Call `invalidate_pack_cache()` after `repack()` to force a reload.
360    fn with_pack_cache<F, R>(&self, f: F) -> Result<R, CasError>
361    where
362        F: FnOnce(&PackCache) -> R,
363    {
364        let mut guard = self
365            .pack_cache
366            .lock()
367            .map_err(|e| CasError::CompressionError(format!("pack cache lock poisoned: {e}")))?;
368        if guard.is_none() {
369            *guard = Some(PackCache::load_all(&self.pack_dir()).map_err(CasError::Pack)?);
370        }
371        // Guard was just set to Some(...) on the line above if it was None.
372        let cache = guard.as_ref().ok_or_else(|| {
373            CasError::Pack(PackError::BlobNotFound("pack cache not loaded".into()))
374        })?;
375        Ok(f(cache))
376    }
377
378    /// Invalidate the pack cache (call after repack or external pack changes).
379    pub fn invalidate_pack_cache(&self) {
380        if let Ok(mut guard) = self.pack_cache.lock() {
381            *guard = None;
382        }
383    }
384
385    /// Retrieve a blob from pack files only (not loose objects).
386    pub fn get_blob_packed(&self, hash: &Hash) -> Result<Vec<u8>, CasError> {
387        // Find which pack file contains this blob
388        let pack_path = self.with_pack_cache(|cache| cache.find(hash).map(|(p, _)| p.clone()))?;
389        let pack_path = pack_path.ok_or_else(|| CasError::BlobNotFound(hash.to_hex()))?;
390
391        let idx_path = pack_path.with_extension("idx");
392        let index = PackIndex::load(&idx_path).map_err(CasError::Pack)?;
393        let data = PackFile::read_blob(&pack_path, &index, hash).map_err(CasError::Pack)?;
394        Ok(data)
395    }
396
397    /// Check if a blob exists in any pack file.
398    pub fn has_blob_packed(&self, hash: &Hash) -> bool {
399        self.with_pack_cache(|cache| cache.find(hash).is_some())
400            .unwrap_or(false)
401    }
402
403    /// List all blob hashes stored in pack files.
404    pub fn list_blobs_packed(&self) -> Result<Vec<Hash>, CasError> {
405        self.with_pack_cache(|cache| cache.all_hashes())
406    }
407
408    /// Repack loose blobs into a pack file if the count exceeds the threshold.
409    ///
410    /// Returns the number of blobs that were packed. If the loose blob count
411    /// is at or below the threshold, no packing occurs and 0 is returned.
412    /// After successful packing, the loose blobs are removed.
413    pub fn repack(&self, threshold: usize) -> Result<usize, CasError> {
414        let loose_hashes = self.list_blobs()?;
415        if loose_hashes.len() <= threshold {
416            return Ok(0);
417        }
418
419        let mut objects = Vec::with_capacity(loose_hashes.len());
420        for hash in &loose_hashes {
421            let data = self.get_blob(hash)?;
422            objects.push((*hash, data));
423        }
424
425        let (pack_path, _idx_path) = PackFile::create(&self.pack_dir(), &objects)?;
426        let _ = pack_path;
427
428        for hash in &loose_hashes {
429            let _ = self.delete_blob(hash);
430        }
431
432        // Invalidate pack cache since we created new pack files
433        self.invalidate_pack_cache();
434
435        Ok(loose_hashes.len())
436    }
437
438    /// Get the on-disk path for a given hash.
439    fn blob_path(&self, hash: &Hash) -> PathBuf {
440        let hex = hash.to_hex();
441        let prefix = &hex[..2];
442        let suffix = &hex[2..];
443        self.root.join("objects").join(prefix).join(suffix)
444    }
445}
446
447#[cfg(test)]
448mod tests {
449    use super::*;
450    use tempfile::TempDir;
451
452    fn make_store() -> (TempDir, BlobStore) {
453        let dir = tempfile::tempdir().unwrap();
454        let store = BlobStore::new_uncompressed(dir.path()).unwrap();
455        (dir, store)
456    }
457
458    #[test]
459    fn test_put_and_get_blob() {
460        let (_dir, store) = make_store();
461        let data = b"hello, suture!";
462        let hash = store.put_blob(data).unwrap();
463
464        let retrieved = store.get_blob(&hash).unwrap();
465        assert_eq!(data.as_slice(), retrieved.as_slice());
466    }
467
468    #[test]
469    fn test_deduplication() {
470        let (_dir, store) = make_store();
471        let data = b"deduplicate me";
472
473        let h1 = store.put_blob(data).unwrap();
474        let h2 = store.put_blob(data).unwrap();
475        assert_eq!(h1, h2);
476
477        assert_eq!(store.blob_count().unwrap(), 1, "Only one copy should exist");
478    }
479
480    #[test]
481    fn test_has_blob() {
482        let (_dir, store) = make_store();
483        let hash = store.put_blob(b"exists").unwrap();
484
485        assert!(store.has_blob(&hash));
486        let missing = Hash::from_hex(&"f".repeat(64)).unwrap();
487        assert!(!store.has_blob(&missing));
488    }
489
490    #[test]
491    fn test_get_nonexistent_blob() {
492        let (_dir, store) = make_store();
493        let missing = Hash::from_hex(&"a".repeat(64)).unwrap();
494        let result = store.get_blob(&missing);
495        assert!(matches!(result, Err(CasError::BlobNotFound(_))));
496    }
497
498    #[test]
499    fn test_delete_blob() {
500        let (_dir, store) = make_store();
501        let hash = store.put_blob(b"delete me").unwrap();
502        assert!(store.has_blob(&hash));
503
504        store.delete_blob(&hash).unwrap();
505        assert!(!store.has_blob(&hash));
506    }
507
508    #[test]
509    fn test_delete_nonexistent_blob() {
510        let (_dir, store) = make_store();
511        let missing = Hash::from_hex(&"b".repeat(64)).unwrap();
512        let result = store.delete_blob(&missing);
513        assert!(matches!(result, Err(CasError::BlobNotFound(_))));
514    }
515
516    #[test]
517    fn test_put_blob_new_rejects_duplicate() {
518        let (_dir, store) = make_store();
519        let data = b"duplicate";
520        store.put_blob(data).unwrap();
521        let result = store.put_blob_new(data);
522        assert!(matches!(result, Err(CasError::AlreadyExists(_))));
523    }
524
525    #[test]
526    fn test_blob_count_and_list() {
527        let (_dir, store) = make_store();
528        store.put_blob(b"one").unwrap();
529        store.put_blob(b"two").unwrap();
530        store.put_blob(b"three").unwrap();
531
532        assert_eq!(store.blob_count().unwrap(), 3);
533        assert_eq!(store.list_blobs().unwrap().len(), 3);
534    }
535
536    #[test]
537    fn test_large_blob() {
538        let (_dir, store) = make_store();
539        // 10 MB blob
540        let data: Vec<u8> = (0..10_000_000).map(|i| (i % 256) as u8).collect();
541        let hash = store.put_blob(&data).unwrap();
542
543        let retrieved = store.get_blob(&hash).unwrap();
544        assert_eq!(data.len(), retrieved.len());
545        assert_eq!(data, retrieved);
546    }
547
548    #[test]
549    fn test_hash_integrity() {
550        let (_dir, store) = make_store();
551        let data = b"integrity check";
552        let hash = store.put_blob(data).unwrap();
553
554        // Manually corrupt the stored blob
555        let blob_path = store.blob_path(&hash);
556        let mut corrupted = fs::read(&blob_path).unwrap();
557        corrupted[0] = corrupted[0].wrapping_add(1);
558        fs::write(&blob_path, &corrupted).unwrap();
559
560        // Getting the corrupted blob should fail integrity check
561        let result = store.get_blob(&hash);
562        assert!(matches!(result, Err(CasError::HashMismatch { .. })));
563    }
564
565    #[test]
566    fn test_compressed_store() {
567        let dir = tempfile::tempdir().unwrap();
568        let store = BlobStore::new(dir.path()).unwrap();
569
570        let data = b"this will be compressed";
571        let hash = store.put_blob(data).unwrap();
572
573        // Verify the stored file is actually compressed
574        let blob_path = store.blob_path(&hash);
575        let raw = fs::read(&blob_path).unwrap();
576        assert!(is_zstd_compressed(&raw), "Blob should be Zstd-compressed");
577
578        // Verify round-trip
579        let retrieved = store.get_blob(&hash).unwrap();
580        assert_eq!(data.as_slice(), retrieved.as_slice());
581    }
582
583    mod proptests {
584        use super::*;
585        use proptest::prelude::*;
586
587        proptest! {
588            #[test]
589            fn put_get_roundtrip(data in proptest::collection::vec(proptest::num::u8::ANY, 0..1024)) {
590                let dir = tempfile::tempdir().unwrap();
591                let store = BlobStore::new_uncompressed(dir.path()).unwrap();
592                let hash = store.put_blob(&data).unwrap();
593                let retrieved = store.get_blob(&hash).unwrap();
594                prop_assert_eq!(data, retrieved);
595            }
596
597            #[test]
598            fn content_addressing(
599                data1 in proptest::collection::vec(proptest::num::u8::ANY, 0..512),
600                data2 in proptest::collection::vec(proptest::num::u8::ANY, 0..512)
601            ) {
602                let dir = tempfile::tempdir().unwrap();
603                let store = BlobStore::new_uncompressed(dir.path()).unwrap();
604
605                let hash1 = store.put_blob(&data1).unwrap();
606                let hash2 = store.put_blob(&data2).unwrap();
607
608                if data1 == data2 {
609                    prop_assert_eq!(hash1, hash2, "same data must produce same hash");
610                } else {
611                    prop_assert_ne!(hash1, hash2, "different data must produce different hashes");
612                }
613            }
614
615            #[test]
616            fn put_twice_idempotent(data in proptest::collection::vec(proptest::num::u8::ANY, 0..1024)) {
617                let dir = tempfile::tempdir().unwrap();
618                let store = BlobStore::new_uncompressed(dir.path()).unwrap();
619
620                let hash1 = store.put_blob(&data).unwrap();
621                let hash2 = store.put_blob(&data).unwrap();
622                prop_assert_eq!(hash1, hash2);
623                prop_assert_eq!(store.blob_count().unwrap(), 1);
624            }
625        }
626    }
627
628    mod pack_tests {
629        use super::*;
630
631        #[test]
632        fn test_get_blob_from_pack() {
633            let dir = tempfile::tempdir().unwrap();
634            let store = BlobStore::new_uncompressed(dir.path()).unwrap();
635
636            let hash1 = store.put_blob(b"packed blob one").unwrap();
637            let hash2 = store.put_blob(b"packed blob two").unwrap();
638
639            let packed = store.repack(0).unwrap();
640            assert_eq!(packed, 2);
641
642            assert_eq!(store.blob_count().unwrap(), 0);
643
644            let data1 = store.get_blob(&hash1).unwrap();
645            assert_eq!(data1, b"packed blob one".to_vec());
646
647            let data2 = store.get_blob(&hash2).unwrap();
648            assert_eq!(data2, b"packed blob two".to_vec());
649        }
650
651        #[test]
652        fn test_has_blob_checks_packs() {
653            let dir = tempfile::tempdir().unwrap();
654            let store = BlobStore::new_uncompressed(dir.path()).unwrap();
655
656            let hash = store.put_blob(b"check me in packs").unwrap();
657            store.repack(0).unwrap();
658
659            assert!(store.has_blob(&hash));
660            assert!(!store.has_blob(&Hash::from_hex(&"c".repeat(64)).unwrap()));
661        }
662
663        #[test]
664        fn test_get_blob_packed_not_found() {
665            let dir = tempfile::tempdir().unwrap();
666            let store = BlobStore::new_uncompressed(dir.path()).unwrap();
667
668            let missing = Hash::from_hex(&"d".repeat(64)).unwrap();
669            let result = store.get_blob_packed(&missing);
670            assert!(matches!(result, Err(CasError::BlobNotFound(_))));
671        }
672
673        #[test]
674        fn test_list_blobs_packed() {
675            let dir = tempfile::tempdir().unwrap();
676            let store = BlobStore::new_uncompressed(dir.path()).unwrap();
677
678            store.put_blob(b"alpha").unwrap();
679            store.put_blob(b"beta").unwrap();
680            store.repack(0).unwrap();
681
682            let packed = store.list_blobs_packed().unwrap();
683            assert_eq!(packed.len(), 2);
684        }
685
686        #[test]
687        fn test_repack_below_threshold() {
688            let dir = tempfile::tempdir().unwrap();
689            let store = BlobStore::new_uncompressed(dir.path()).unwrap();
690
691            store.put_blob(b"only one").unwrap();
692
693            let packed = store.repack(10).unwrap();
694            assert_eq!(packed, 0);
695            assert_eq!(store.blob_count().unwrap(), 1);
696        }
697
698        #[test]
699        fn test_repack_at_threshold() {
700            let dir = tempfile::tempdir().unwrap();
701            let store = BlobStore::new_uncompressed(dir.path()).unwrap();
702
703            store.put_blob(b"one").unwrap();
704            store.put_blob(b"two").unwrap();
705
706            let packed = store.repack(2).unwrap();
707            assert_eq!(packed, 0);
708            assert_eq!(store.blob_count().unwrap(), 2);
709
710            let packed = store.repack(1).unwrap();
711            assert_eq!(packed, 2);
712            assert_eq!(store.blob_count().unwrap(), 0);
713        }
714
715        #[test]
716        fn test_loose_priority_over_packed() {
717            let dir = tempfile::tempdir().unwrap();
718            let store = BlobStore::new_uncompressed(dir.path()).unwrap();
719
720            let hash = store.put_blob(b"original data").unwrap();
721            store.repack(0).unwrap();
722
723            // Re-store the same hash as a loose blob
724            let blob_path = store.blob_path(&hash);
725            if let Some(parent) = blob_path.parent() {
726                fs::create_dir_all(parent).unwrap();
727            }
728            fs::write(&blob_path, b"original data").unwrap();
729
730            let data = store.get_blob(&hash).unwrap();
731            assert_eq!(data, b"original data".to_vec());
732
733            // Delete the loose blob; should still find in pack
734            store.delete_blob(&hash).unwrap();
735            let data = store.get_blob(&hash).unwrap();
736            assert_eq!(data, b"original data".to_vec());
737        }
738
739        #[test]
740        fn test_has_blob_packed() {
741            let dir = tempfile::tempdir().unwrap();
742            let store = BlobStore::new_uncompressed(dir.path()).unwrap();
743
744            let hash = store.put_blob(b"packed check").unwrap();
745            assert!(!store.has_blob_packed(&hash));
746
747            store.repack(0).unwrap();
748            assert!(store.has_blob_packed(&hash));
749        }
750
751        #[test]
752        fn test_repack_multiple_times() {
753            let dir = tempfile::tempdir().unwrap();
754            let store = BlobStore::new_uncompressed(dir.path()).unwrap();
755
756            store.put_blob(b"first batch one").unwrap();
757            store.put_blob(b"first batch two").unwrap();
758            store.repack(0).unwrap();
759
760            store.put_blob(b"second batch").unwrap();
761            store.repack(0).unwrap();
762
763            let all = store.list_blobs_packed().unwrap();
764            assert_eq!(all.len(), 3);
765        }
766
767        #[test]
768        fn test_pack_cache_avoids_repeated_disk_reads() {
769            let dir = tempfile::tempdir().unwrap();
770            let store = BlobStore::new_uncompressed(dir.path()).unwrap();
771
772            let hash = store.put_blob(b"cache me").unwrap();
773            store.repack(0).unwrap();
774
775            // First access: loads cache from disk
776            assert!(store.has_blob_packed(&hash));
777            // Cache should now be populated
778            {
779                let guard = store.pack_cache.lock().unwrap();
780                assert!(
781                    guard.is_some(),
782                    "pack cache should be populated after first access"
783                );
784            }
785
786            // Second access: uses cached data (no disk I/O)
787            assert!(store.has_blob_packed(&hash));
788
789            // Third access: also cached
790            let data = store.get_blob_packed(&hash).unwrap();
791            assert_eq!(data, b"cache me".to_vec());
792        }
793
794        #[test]
795        fn test_invalidate_pack_cache() {
796            let dir = tempfile::tempdir().unwrap();
797            let store = BlobStore::new_uncompressed(dir.path()).unwrap();
798
799            let hash = store.put_blob(b"invalidate test").unwrap();
800            store.repack(0).unwrap();
801
802            // Populate cache
803            assert!(store.has_blob_packed(&hash));
804            assert!(store.pack_cache.lock().unwrap().is_some());
805
806            // Invalidate
807            store.invalidate_pack_cache();
808            assert!(store.pack_cache.lock().unwrap().is_none());
809
810            // Next access reloads from disk
811            assert!(store.has_blob_packed(&hash));
812            assert!(store.pack_cache.lock().unwrap().is_some());
813        }
814    }
815}