Skip to main content

ripvec_core/cache/
file_cache.rs

1//! Per-file cache entry storing chunks and their embeddings.
2//!
3//! Uses `rkyv` for zero-copy deserialization — the on-disk format can be
4//! memory-mapped and accessed directly without parsing.
5
6use crate::chunk::CodeChunk;
7
8/// Cached chunks and embeddings for a single source file.
9///
10/// Stored as an rkyv archive in the object store, keyed by the blake3
11/// hash of the source file content.
12#[derive(Debug, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
13pub struct FileCache {
14    /// The semantic chunks extracted from this file.
15    pub chunks: Vec<CodeChunk>,
16    /// Flat embedding data: `[n_chunks × hidden_dim]` contiguous f32 values.
17    pub embeddings: Vec<f32>,
18    /// The embedding dimension (e.g., 384 for BGE-small, 768 for ModernBERT).
19    pub hidden_dim: usize,
20}
21
22/// Magic bytes to identify zstd-compressed cache objects.
23/// Uncompressed (legacy) objects start with rkyv data which never begins with
24/// these bytes, so detection is unambiguous.
25const ZSTD_MAGIC: [u8; 4] = [0x28, 0xB5, 0x2F, 0xFD];
26
27impl FileCache {
28    /// Serialize to zstd-compressed rkyv bytes.
29    ///
30    /// Embedding vectors compress ~8:1 with zstd because most values cluster
31    /// near zero. Level 1 matches level 3 ratio on this data with faster compression.
32    ///
33    /// # Panics
34    ///
35    /// Panics if serialization fails (should not happen for valid data).
36    #[must_use]
37    pub fn to_bytes(&self) -> Vec<u8> {
38        let raw = rkyv::to_bytes::<rkyv::rancor::Error>(self)
39            .expect("FileCache serialization should never fail");
40        zstd::encode_all(raw.as_slice(), 1)
41            .expect("zstd compression should never fail on valid data")
42    }
43
44    /// Deserialize from (optionally zstd-compressed) rkyv bytes.
45    ///
46    /// Transparently handles both compressed and legacy uncompressed objects
47    /// by checking for the zstd magic number.
48    ///
49    /// # Errors
50    ///
51    /// Returns an error if the bytes are not a valid archive.
52    pub fn from_bytes(bytes: &[u8]) -> crate::Result<Self> {
53        let raw = if bytes.len() >= 4 && bytes[..4] == ZSTD_MAGIC {
54            zstd::decode_all(bytes).map_err(|e| {
55                crate::Error::Other(anyhow::anyhow!("zstd decompression failed: {e}"))
56            })?
57        } else {
58            // Legacy uncompressed format — pass through.
59            bytes.to_vec()
60        };
61        rkyv::from_bytes::<Self, rkyv::rancor::Error>(&raw)
62            .map_err(|e| crate::Error::Other(anyhow::anyhow!("rkyv deserialization failed: {e}")))
63    }
64}
65
66#[cfg(test)]
67mod tests {
68    use super::*;
69
70    #[test]
71    fn round_trip() {
72        let fc = FileCache {
73            chunks: vec![CodeChunk {
74                file_path: "test.rs".into(),
75                name: "foo".into(),
76                kind: "function".into(),
77                start_line: 1,
78                end_line: 10,
79                enriched_content: "fn foo() {}".into(),
80                content: "fn foo() {}".into(),
81            }],
82            embeddings: vec![1.0, 2.0, 3.0, 4.0],
83            hidden_dim: 4,
84        };
85        let bytes = fc.to_bytes();
86        let loaded = FileCache::from_bytes(&bytes).unwrap();
87        assert_eq!(loaded.chunks.len(), 1);
88        assert_eq!(loaded.chunks[0].name, "foo");
89        assert_eq!(loaded.embeddings.len(), 4);
90        assert_eq!(loaded.hidden_dim, 4);
91    }
92
93    #[test]
94    fn empty_cache() {
95        let fc = FileCache {
96            chunks: vec![],
97            embeddings: vec![],
98            hidden_dim: 384,
99        };
100        let bytes = fc.to_bytes();
101        let loaded = FileCache::from_bytes(&bytes).unwrap();
102        assert_eq!(loaded.chunks.len(), 0);
103        assert_eq!(loaded.embeddings.len(), 0);
104        assert_eq!(loaded.hidden_dim, 384);
105    }
106
107    #[test]
108    fn invalid_bytes_returns_error() {
109        let result = FileCache::from_bytes(b"garbage data");
110        assert!(result.is_err());
111    }
112}