Skip to main content

fabryk_vector/
persistence.rs

1//! Persistence and freshness checking for vector indices.
2//!
3//! Provides content-hash-based staleness detection so vector indices
4//! can persist across restarts. When the content hash matches, the
5//! existing index is still valid and doesn't need rebuilding.
6
7use fabryk_core::Result;
8use serde::{Deserialize, Serialize};
9use std::path::Path;
10
11/// Metadata stored alongside a vector index for freshness checking.
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct IndexMetadata {
14    /// Content hash at build time.
15    pub content_hash: String,
16
17    /// Number of documents indexed.
18    pub document_count: usize,
19
20    /// Embedding dimension.
21    pub embedding_dimension: usize,
22
23    /// Build timestamp (ISO 8601).
24    pub built_at: String,
25
26    /// Embedding provider name.
27    pub provider: String,
28
29    /// Model name used for embeddings.
30    pub model: String,
31}
32
33/// Check if an existing vector index is fresh.
34///
35/// Compares the stored content hash with a freshly computed one.
36/// Returns `true` if the index exists and the hashes match.
37///
38/// # Arguments
39///
40/// * `metadata_path` - Path to the index metadata JSON file
41/// * `current_hash` - Freshly computed content hash
42pub fn is_index_fresh(metadata_path: &Path, current_hash: &str) -> bool {
43    match load_metadata(metadata_path) {
44        Ok(metadata) => metadata.content_hash == current_hash,
45        Err(_) => false,
46    }
47}
48
49/// Save index metadata to a JSON file.
50pub fn save_metadata(metadata_path: &Path, metadata: &IndexMetadata) -> Result<()> {
51    let json = serde_json::to_string_pretty(metadata)?;
52    std::fs::write(metadata_path, json)
53        .map_err(|e| fabryk_core::Error::io_with_path(e, metadata_path))?;
54    Ok(())
55}
56
57/// Load index metadata from a JSON file.
58pub fn load_metadata(metadata_path: &Path) -> Result<IndexMetadata> {
59    let json = std::fs::read_to_string(metadata_path)
60        .map_err(|e| fabryk_core::Error::io_with_path(e, metadata_path))?;
61    let metadata: IndexMetadata = serde_json::from_str(&json)?;
62    Ok(metadata)
63}
64
65// ============================================================================
66// Tests
67// ============================================================================
68
69#[cfg(test)]
70mod tests {
71    use super::*;
72    use tempfile::tempdir;
73
74    fn sample_metadata() -> IndexMetadata {
75        IndexMetadata {
76            content_hash: "abc123def456".to_string(),
77            document_count: 42,
78            embedding_dimension: 384,
79            built_at: "2025-01-15T12:00:00Z".to_string(),
80            provider: "fastembed".to_string(),
81            model: "bge-small-en-v1.5".to_string(),
82        }
83    }
84
85    #[test]
86    fn test_metadata_serialization() {
87        let metadata = sample_metadata();
88        let json = serde_json::to_string(&metadata).unwrap();
89
90        assert!(json.contains("abc123def456"));
91        assert!(json.contains("42"));
92        assert!(json.contains("384"));
93        assert!(json.contains("fastembed"));
94
95        let deserialized: IndexMetadata = serde_json::from_str(&json).unwrap();
96        assert_eq!(deserialized.content_hash, "abc123def456");
97        assert_eq!(deserialized.document_count, 42);
98    }
99
100    #[test]
101    fn test_save_and_load_metadata() {
102        let dir = tempdir().unwrap();
103        let path = dir.path().join("index_metadata.json");
104
105        let metadata = sample_metadata();
106        save_metadata(&path, &metadata).unwrap();
107
108        let loaded = load_metadata(&path).unwrap();
109        assert_eq!(loaded.content_hash, metadata.content_hash);
110        assert_eq!(loaded.document_count, metadata.document_count);
111        assert_eq!(loaded.embedding_dimension, metadata.embedding_dimension);
112        assert_eq!(loaded.provider, metadata.provider);
113        assert_eq!(loaded.model, metadata.model);
114    }
115
116    #[test]
117    fn test_is_index_fresh_matching_hash() {
118        let dir = tempdir().unwrap();
119        let path = dir.path().join("metadata.json");
120
121        let metadata = sample_metadata();
122        save_metadata(&path, &metadata).unwrap();
123
124        assert!(is_index_fresh(&path, "abc123def456"));
125    }
126
127    #[test]
128    fn test_is_index_fresh_different_hash() {
129        let dir = tempdir().unwrap();
130        let path = dir.path().join("metadata.json");
131
132        let metadata = sample_metadata();
133        save_metadata(&path, &metadata).unwrap();
134
135        assert!(!is_index_fresh(&path, "different_hash"));
136    }
137
138    #[test]
139    fn test_is_index_fresh_missing_file() {
140        let dir = tempdir().unwrap();
141        let path = dir.path().join("nonexistent.json");
142
143        assert!(!is_index_fresh(&path, "any_hash"));
144    }
145
146    #[test]
147    fn test_load_metadata_invalid_json() {
148        let dir = tempdir().unwrap();
149        let path = dir.path().join("bad.json");
150        std::fs::write(&path, "not valid json").unwrap();
151
152        assert!(load_metadata(&path).is_err());
153    }
154
155    #[test]
156    fn test_save_metadata_invalid_path() {
157        let path = Path::new("/nonexistent/dir/metadata.json");
158        let metadata = sample_metadata();
159        assert!(save_metadata(path, &metadata).is_err());
160    }
161}