Skip to main content

fabryk_graph/
persistence.rs

1//! Graph persistence and caching.
2//!
3//! This module provides functions for saving and loading graph data:
4//!
5//! - JSON format for human-readable storage
6//! - Optional rkyv binary format for fast loading (feature-gated)
7//! - Freshness checking to avoid unnecessary rebuilds
8//!
9//! # Feature Flags
10//!
11//! - `graph-rkyv-cache`: Enables binary caching with rkyv and Blake3 hashing
12
13use crate::{Edge, GraphData, Node};
14use fabryk_core::{Error, Result};
15use petgraph::graph::DiGraph;
16use serde::{Deserialize, Serialize};
17use std::collections::HashMap;
18use std::path::Path;
19
20// ============================================================================
21// Serializable types
22// ============================================================================
23
24/// Serializable representation of graph data.
25///
26/// Used for JSON persistence. The petgraph `DiGraph` is rebuilt on load.
27#[derive(Clone, Debug, Serialize, Deserialize)]
28pub struct SerializableGraph {
29    /// All nodes in the graph.
30    pub nodes: Vec<Node>,
31    /// All edges in the graph.
32    pub edges: Vec<Edge>,
33    /// Optional metadata about the graph.
34    pub metadata: Option<GraphMetadata>,
35}
36
37/// Metadata about a persisted graph.
38#[derive(Clone, Debug, Serialize, Deserialize)]
39pub struct GraphMetadata {
40    /// When the graph was built (unix timestamp).
41    #[serde(default)]
42    pub built_at: String,
43    /// Version of the builder.
44    #[serde(default)]
45    pub builder_version: String,
46    /// Content hash for freshness checking.
47    pub content_hash: Option<String>,
48    /// Number of source files processed.
49    pub source_file_count: Option<usize>,
50}
51
52impl Default for GraphMetadata {
53    fn default() -> Self {
54        Self {
55            built_at: timestamp_now(),
56            builder_version: env!("CARGO_PKG_VERSION").to_string(),
57            content_hash: None,
58            source_file_count: None,
59        }
60    }
61}
62
63/// Simple unix timestamp.
64fn timestamp_now() -> String {
65    use std::time::{SystemTime, UNIX_EPOCH};
66    let duration = SystemTime::now()
67        .duration_since(UNIX_EPOCH)
68        .unwrap_or_default();
69    format!("{}", duration.as_secs())
70}
71
72// ============================================================================
73// Save / Load
74// ============================================================================
75
76/// Save a graph to a JSON file.
77pub fn save_graph(
78    graph: &GraphData,
79    path: impl AsRef<Path>,
80    metadata: Option<GraphMetadata>,
81) -> Result<()> {
82    let serializable = SerializableGraph {
83        nodes: graph.nodes.values().cloned().collect(),
84        edges: graph.edges.clone(),
85        metadata,
86    };
87
88    let json = serde_json::to_string_pretty(&serializable)
89        .map_err(|e| Error::operation(format!("Failed to serialize graph: {e}")))?;
90
91    std::fs::write(path.as_ref(), json).map_err(|e| Error::io_with_path(e, path.as_ref()))?;
92
93    Ok(())
94}
95
96/// Load a graph from a JSON file.
97///
98/// Rebuilds the petgraph `DiGraph` from the serialized nodes and edges.
99pub fn load_graph(path: impl AsRef<Path>) -> Result<GraphData> {
100    let json = std::fs::read_to_string(path.as_ref())
101        .map_err(|e| Error::io_with_path(e, path.as_ref()))?;
102
103    load_graph_from_str(&json)
104}
105
106/// Load a graph from a JSON string.
107///
108/// Useful for testing or loading from non-file sources.
109pub fn load_graph_from_str(json: &str) -> Result<GraphData> {
110    let serializable: SerializableGraph = serde_json::from_str(json)
111        .map_err(|e| Error::parse(format!("Failed to parse graph JSON: {e}")))?;
112
113    to_graph_data(serializable)
114}
115
116/// Convert serializable format to GraphData.
117fn to_graph_data(serializable: SerializableGraph) -> Result<GraphData> {
118    let mut graph = DiGraph::new();
119    let mut node_indices = HashMap::new();
120    let mut nodes = HashMap::new();
121
122    // Add nodes
123    for node in &serializable.nodes {
124        let idx = graph.add_node(node.clone());
125        node_indices.insert(node.id.clone(), idx);
126        nodes.insert(node.id.clone(), node.clone());
127    }
128
129    // Add edges (skip edges referencing missing nodes)
130    let mut valid_edges = Vec::new();
131    for edge in &serializable.edges {
132        if let (Some(&from_idx), Some(&to_idx)) =
133            (node_indices.get(&edge.from), node_indices.get(&edge.to))
134        {
135            graph.add_edge(from_idx, to_idx, edge.clone());
136            valid_edges.push(edge.clone());
137        }
138    }
139
140    Ok(GraphData {
141        graph,
142        node_indices,
143        nodes,
144        edges: valid_edges,
145    })
146}
147
148/// Check if a cached graph is fresh compared to source content.
149///
150/// Returns `true` if the cache file exists and its content hash matches.
151pub fn is_cache_fresh(cache_path: impl AsRef<Path>, content_hash: &str) -> bool {
152    let path = cache_path.as_ref();
153    if !path.exists() {
154        return false;
155    }
156
157    if let Ok(json) = std::fs::read_to_string(path)
158        && let Ok(serializable) = serde_json::from_str::<SerializableGraph>(&json)
159        && let Some(metadata) = serializable.metadata
160        && let Some(cached_hash) = metadata.content_hash
161    {
162        return cached_hash == content_hash;
163    }
164
165    false
166}
167
168// ============================================================================
169// rkyv Cache Support (feature-gated)
170// ============================================================================
171
172#[cfg(feature = "graph-rkyv-cache")]
173pub mod rkyv_cache {
174    //! Binary caching with Blake3 content hashing.
175    //!
176    //! Enabled with the `graph-rkyv-cache` feature flag.
177
178    use super::*;
179
180    /// Compute a Blake3 hash of content files.
181    pub fn compute_content_hash(paths: &[impl AsRef<Path>]) -> Result<String> {
182        let mut hasher = blake3::Hasher::new();
183
184        for path in paths {
185            let content =
186                std::fs::read(path.as_ref()).map_err(|e| Error::io_with_path(e, path.as_ref()))?;
187            hasher.update(&content);
188        }
189
190        Ok(hasher.finalize().to_hex().to_string())
191    }
192
193    /// Compute a Blake3 hash of a directory's markdown files.
194    pub fn compute_directory_hash(dir: impl AsRef<Path>) -> Result<String> {
195        let mut hasher = blake3::Hasher::new();
196        let mut paths: Vec<std::path::PathBuf> = Vec::new();
197
198        fn collect_files(dir: &Path, paths: &mut Vec<std::path::PathBuf>) -> Result<()> {
199            for entry in std::fs::read_dir(dir).map_err(|e| Error::io_with_path(e, dir))? {
200                let entry = entry.map_err(Error::io)?;
201                let path = entry.path();
202                if path.is_dir() {
203                    collect_files(&path, paths)?;
204                } else if path.extension().is_some_and(|e| e == "md") {
205                    paths.push(path);
206                }
207            }
208            Ok(())
209        }
210
211        collect_files(dir.as_ref(), &mut paths)?;
212        paths.sort();
213
214        for path in &paths {
215            let content = std::fs::read(path).map_err(|e| Error::io_with_path(e, path))?;
216            hasher.update(&content);
217        }
218
219        Ok(hasher.finalize().to_hex().to_string())
220    }
221}
222
223// ============================================================================
224// Tests
225// ============================================================================
226
227#[cfg(test)]
228mod tests {
229    use super::*;
230    use crate::types::*;
231    use tempfile::tempdir;
232
233    fn create_test_graph() -> GraphData {
234        let mut graph = GraphData::new();
235
236        graph.add_node(Node::new("a", "Node A").with_category("cat1"));
237        graph.add_node(Node::new("b", "Node B").with_category("cat2"));
238
239        graph
240            .add_edge(Edge::new("a", "b", Relationship::Prerequisite))
241            .unwrap();
242
243        graph
244    }
245
246    #[test]
247    fn test_save_and_load_graph() {
248        let dir = tempdir().unwrap();
249        let path = dir.path().join("test_graph.json");
250
251        let original = create_test_graph();
252        save_graph(&original, &path, None).unwrap();
253
254        let loaded = load_graph(&path).unwrap();
255
256        assert_eq!(loaded.node_count(), original.node_count());
257        assert_eq!(loaded.edge_count(), original.edge_count());
258        assert!(loaded.contains_node("a"));
259        assert!(loaded.contains_node("b"));
260    }
261
262    #[test]
263    fn test_save_with_metadata() {
264        let dir = tempdir().unwrap();
265        let path = dir.path().join("test_graph.json");
266
267        let graph = create_test_graph();
268        let metadata = GraphMetadata {
269            content_hash: Some("abc123".to_string()),
270            source_file_count: Some(10),
271            ..Default::default()
272        };
273
274        save_graph(&graph, &path, Some(metadata)).unwrap();
275
276        let json = std::fs::read_to_string(&path).unwrap();
277        assert!(json.contains("abc123"));
278        assert!(json.contains("10"));
279    }
280
281    #[test]
282    fn test_load_graph_from_str() {
283        let json = r#"{
284            "nodes": [
285                {"id": "x", "title": "X", "category": null, "source_id": null, "is_canonical": true, "canonical_id": null, "metadata": {}}
286            ],
287            "edges": [],
288            "metadata": null
289        }"#;
290
291        let graph = load_graph_from_str(json).unwrap();
292        assert_eq!(graph.node_count(), 1);
293        assert!(graph.contains_node("x"));
294    }
295
296    #[test]
297    fn test_load_round_trip_preserves_data() {
298        let dir = tempdir().unwrap();
299        let path = dir.path().join("round_trip.json");
300
301        let mut original = GraphData::new();
302        original.add_node(
303            Node::new("a", "A")
304                .with_category("cat")
305                .with_source("src")
306                .with_metadata("key", "value"),
307        );
308        original.add_node(Node::new("b", "B").as_variant_of("canonical-b"));
309
310        original
311            .add_edge(
312                Edge::new("a", "b", Relationship::Custom("test-rel".to_string()))
313                    .with_weight(0.42)
314                    .with_origin(EdgeOrigin::Manual),
315            )
316            .unwrap();
317
318        save_graph(&original, &path, None).unwrap();
319        let loaded = load_graph(&path).unwrap();
320
321        let node_a = loaded.get_node("a").unwrap();
322        assert_eq!(node_a.category, Some("cat".to_string()));
323        assert_eq!(node_a.source_id, Some("src".to_string()));
324
325        let node_b = loaded.get_node("b").unwrap();
326        assert!(!node_b.is_canonical);
327        assert_eq!(node_b.canonical_id, Some("canonical-b".to_string()));
328
329        assert_eq!(loaded.edges.len(), 1);
330        assert_eq!(loaded.edges[0].weight, 0.42);
331        assert_eq!(loaded.edges[0].origin, EdgeOrigin::Manual);
332    }
333
334    #[test]
335    fn test_is_cache_fresh() {
336        let dir = tempdir().unwrap();
337        let path = dir.path().join("cache.json");
338
339        let graph = create_test_graph();
340        let metadata = GraphMetadata {
341            content_hash: Some("hash123".to_string()),
342            ..Default::default()
343        };
344
345        save_graph(&graph, &path, Some(metadata)).unwrap();
346
347        assert!(is_cache_fresh(&path, "hash123"));
348        assert!(!is_cache_fresh(&path, "different_hash"));
349        assert!(!is_cache_fresh(dir.path().join("missing.json"), "hash123"));
350    }
351
352    #[test]
353    fn test_load_graph_invalid_json() {
354        let dir = tempdir().unwrap();
355        let path = dir.path().join("invalid.json");
356        std::fs::write(&path, "not valid json").unwrap();
357
358        let result = load_graph(&path);
359        assert!(result.is_err());
360    }
361
362    #[test]
363    fn test_edges_with_missing_nodes() {
364        let json = r#"{
365            "nodes": [
366                {"id": "a", "title": "A", "category": null, "source_id": null, "is_canonical": true, "canonical_id": null, "metadata": {}}
367            ],
368            "edges": [
369                {"from": "a", "to": "missing", "relationship": "Prerequisite", "weight": 1.0, "origin": "Frontmatter"}
370            ],
371            "metadata": null
372        }"#;
373
374        let graph = load_graph_from_str(json).unwrap();
375        assert_eq!(graph.node_count(), 1);
376        assert_eq!(graph.graph.edge_count(), 0);
377    }
378
379    #[test]
380    fn test_metadata_default() {
381        let meta = GraphMetadata::default();
382        assert!(!meta.built_at.is_empty());
383        assert!(!meta.builder_version.is_empty());
384        assert!(meta.content_hash.is_none());
385        assert!(meta.source_file_count.is_none());
386    }
387
388    #[test]
389    fn test_serializable_graph_round_trip() {
390        let sg = SerializableGraph {
391            nodes: vec![Node::new("test", "Test")],
392            edges: vec![],
393            metadata: Some(GraphMetadata::default()),
394        };
395
396        let json = serde_json::to_string(&sg).unwrap();
397        let parsed: SerializableGraph = serde_json::from_str(&json).unwrap();
398
399        assert_eq!(parsed.nodes.len(), 1);
400        assert!(parsed.metadata.is_some());
401    }
402}
403
404#[cfg(all(test, feature = "graph-rkyv-cache"))]
405mod rkyv_tests {
406    use super::rkyv_cache::*;
407    use tempfile::tempdir;
408
409    #[test]
410    fn test_compute_content_hash() {
411        let dir = tempdir().unwrap();
412        let file1 = dir.path().join("a.md");
413        let file2 = dir.path().join("b.md");
414
415        std::fs::write(&file1, "content a").unwrap();
416        std::fs::write(&file2, "content b").unwrap();
417
418        let hash1 = compute_content_hash(&[&file1, &file2]).unwrap();
419        let hash2 = compute_content_hash(&[&file1, &file2]).unwrap();
420        assert_eq!(hash1, hash2);
421
422        std::fs::write(&file2, "different").unwrap();
423        let hash3 = compute_content_hash(&[&file1, &file2]).unwrap();
424        assert_ne!(hash1, hash3);
425    }
426
427    #[test]
428    fn test_compute_directory_hash() {
429        let dir = tempdir().unwrap();
430        let sub = dir.path().join("subdir");
431        std::fs::create_dir(&sub).unwrap();
432
433        std::fs::write(dir.path().join("a.md"), "a").unwrap();
434        std::fs::write(sub.join("b.md"), "b").unwrap();
435
436        let hash = compute_directory_hash(dir.path()).unwrap();
437        assert!(!hash.is_empty());
438    }
439}