Skip to main content

ripvec_core/cache/
file_cache.rs

1//! Per-file cache entry storing chunks and their embeddings.
2//!
3//! Uses `rkyv` for zero-copy deserialization — the on-disk format can be
4//! memory-mapped and accessed directly without parsing.
5
6use crate::chunk::CodeChunk;
7
8/// Cached chunks and embeddings for a single source file.
9///
10/// Stored as an rkyv archive in the object store, keyed by the blake3
11/// hash of the source file content.
12#[derive(Debug, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
13pub struct FileCache {
14    /// The semantic chunks extracted from this file.
15    pub chunks: Vec<CodeChunk>,
16    /// Flat embedding data: `[n_chunks × hidden_dim]` contiguous f32 values.
17    pub embeddings: Vec<f32>,
18    /// The embedding dimension (e.g., 384 for BGE-small, 768 for ModernBERT).
19    pub hidden_dim: usize,
20}
21
22/// Magic bytes to identify zstd-compressed cache objects.
23/// Uncompressed (legacy) objects start with rkyv data which never begins with
24/// these bytes, so detection is unambiguous.
25const ZSTD_MAGIC: [u8; 4] = [0x28, 0xB5, 0x2F, 0xFD];
26
27impl FileCache {
28    /// Serialize to zstd-compressed rkyv bytes.
29    ///
30    /// Embedding vectors compress ~8:1 with zstd because most values cluster
31    /// near zero. Level 1 matches level 3 ratio on this data with faster compression.
32    ///
33    /// # Panics
34    ///
35    /// Panics if serialization fails (should not happen for valid data).
36    #[must_use]
37    pub fn to_bytes(&self) -> Vec<u8> {
38        let raw = rkyv::to_bytes::<rkyv::rancor::Error>(self)
39            .expect("FileCache serialization should never fail");
40        zstd::encode_all(raw.as_slice(), 1)
41            .expect("zstd compression should never fail on valid data")
42    }
43
44    /// Deserialize from (optionally zstd-compressed) rkyv bytes.
45    ///
46    /// Transparently handles both compressed and legacy uncompressed objects
47    /// by checking for the zstd magic number.
48    ///
49    /// # Errors
50    ///
51    /// Returns an error if the bytes are not a valid archive.
52    pub fn from_bytes(bytes: &[u8]) -> crate::Result<Self> {
53        let raw = if bytes.len() >= 4 && bytes[..4] == ZSTD_MAGIC {
54            zstd::decode_all(bytes)
55                .map_err(|e| crate::Error::Other(anyhow::anyhow!("zstd decompression failed: {e}")))?
56        } else {
57            // Legacy uncompressed format — pass through.
58            bytes.to_vec()
59        };
60        rkyv::from_bytes::<Self, rkyv::rancor::Error>(&raw)
61            .map_err(|e| crate::Error::Other(anyhow::anyhow!("rkyv deserialization failed: {e}")))
62    }
63}
64
65#[cfg(test)]
66mod tests {
67    use super::*;
68
69    #[test]
70    fn round_trip() {
71        let fc = FileCache {
72            chunks: vec![CodeChunk {
73                file_path: "test.rs".into(),
74                name: "foo".into(),
75                kind: "function".into(),
76                start_line: 1,
77                end_line: 10,
78                enriched_content: "fn foo() {}".into(),
79                content: "fn foo() {}".into(),
80            }],
81            embeddings: vec![1.0, 2.0, 3.0, 4.0],
82            hidden_dim: 4,
83        };
84        let bytes = fc.to_bytes();
85        let loaded = FileCache::from_bytes(&bytes).unwrap();
86        assert_eq!(loaded.chunks.len(), 1);
87        assert_eq!(loaded.chunks[0].name, "foo");
88        assert_eq!(loaded.embeddings.len(), 4);
89        assert_eq!(loaded.hidden_dim, 4);
90    }
91
92    #[test]
93    fn empty_cache() {
94        let fc = FileCache {
95            chunks: vec![],
96            embeddings: vec![],
97            hidden_dim: 384,
98        };
99        let bytes = fc.to_bytes();
100        let loaded = FileCache::from_bytes(&bytes).unwrap();
101        assert_eq!(loaded.chunks.len(), 0);
102        assert_eq!(loaded.embeddings.len(), 0);
103        assert_eq!(loaded.hidden_dim, 384);
104    }
105
106    #[test]
107    fn invalid_bytes_returns_error() {
108        let result = FileCache::from_bytes(b"garbage data");
109        assert!(result.is_err());
110    }
111}