Skip to main content

nodex_core/builder/
cache.rs

1use serde::{Deserialize, Serialize};
2use sha2::{Digest, Sha256};
3use std::collections::BTreeMap;
4use std::path::{Path, PathBuf};
5
6use crate::error::{Error, Result};
7use crate::model::{Confidence, Node, RawEdge};
8
9/// Cached parse result for a single document.
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct CacheEntry {
12    pub content_hash: String,
13    pub node: Node,
14    pub raw_edges: Vec<CachedRawEdge>,
15}
16
17/// Serializable version of RawEdge (RawEdge itself doesn't derive Serialize).
18#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct CachedRawEdge {
20    pub target_path: String,
21    pub relation: String,
22    pub confidence: Confidence,
23    pub location: String,
24}
25
26impl From<&RawEdge> for CachedRawEdge {
27    fn from(e: &RawEdge) -> Self {
28        Self {
29            target_path: e.target_path.clone(),
30            relation: e.relation.clone(),
31            confidence: e.confidence,
32            location: e.location.clone(),
33        }
34    }
35}
36
37impl From<CachedRawEdge> for RawEdge {
38    fn from(e: CachedRawEdge) -> Self {
39        Self {
40            target_path: e.target_path,
41            relation: e.relation,
42            confidence: e.confidence,
43            location: e.location,
44        }
45    }
46}
47
48/// Incremental build cache. Maps relative path → CacheEntry.
49///
50/// `config_hash` auto-invalidates entries whenever either the project
51/// config or the nodex binary version changes — the latter guards
52/// against struct-shape drift in `Node` / `Edge` / `RawEdge` after an
53/// upgrade. The hash is computed by `builder::build`; this struct
54/// only stores it for comparison on the next load.
55#[derive(Debug, Default, Serialize, Deserialize)]
56pub struct BuildCache {
57    #[serde(default)]
58    pub config_hash: String,
59    pub entries: BTreeMap<PathBuf, CacheEntry>,
60}
61
62impl BuildCache {
63    /// Load cache from disk. Returns empty cache when the file is
64    /// absent, unreadable, corrupt, or was produced under a different
65    /// config hash. The second return value is an optional warning
66    /// string explaining why — callers surface it so users see why
67    /// an unexpectedly-slow rebuild is happening.
68    pub fn load(cache_path: &Path, current_config_hash: &str) -> (Self, Option<String>) {
69        if !cache_path.exists() {
70            return (Self::default(), None);
71        }
72
73        let raw = match std::fs::read_to_string(cache_path) {
74            Ok(s) => s,
75            Err(e) => {
76                return (
77                    Self::default(),
78                    Some(format!(
79                        "cache unreadable at {}: {e}; rebuilding from scratch",
80                        cache_path.display()
81                    )),
82                );
83            }
84        };
85
86        let cache: Self = match serde_json::from_str(&raw) {
87            Ok(c) => c,
88            Err(e) => {
89                return (
90                    Self::default(),
91                    Some(format!(
92                        "cache corrupt at {}: {e}; rebuilding from scratch",
93                        cache_path.display()
94                    )),
95                );
96            }
97        };
98
99        if cache.config_hash != current_config_hash {
100            return (Self::default(), None); // config changed — expected invalidation, no warning
101        }
102
103        (cache, None)
104    }
105
106    /// Save cache to disk.
107    pub fn save(&self, cache_path: &Path) -> Result<()> {
108        if let Some(parent) = cache_path.parent() {
109            std::fs::create_dir_all(parent).map_err(|e| Error::Io {
110                path: parent.to_path_buf(),
111                source: e,
112            })?;
113        }
114        let json = serde_json::to_string(self)
115            .map_err(|e| Error::Other(format!("cache serialization error: {e}")))?;
116        std::fs::write(cache_path, json).map_err(|e| Error::Io {
117            path: cache_path.to_path_buf(),
118            source: e,
119        })
120    }
121
122    /// Get cached parse result if fresh.
123    pub fn get(&self, rel_path: &Path, content: &str) -> Option<&CacheEntry> {
124        let entry = self.entries.get(rel_path)?;
125        if entry.content_hash == compute_hash(content) {
126            Some(entry)
127        } else {
128            None
129        }
130    }
131
132    /// Store a parse result.
133    pub fn insert(&mut self, rel_path: PathBuf, content: &str, node: Node, raw_edges: &[RawEdge]) {
134        self.entries.insert(
135            rel_path,
136            CacheEntry {
137                content_hash: compute_hash(content),
138                node,
139                raw_edges: raw_edges.iter().map(CachedRawEdge::from).collect(),
140            },
141        );
142    }
143
144    /// Remove entries for paths no longer in scope.
145    pub fn retain_paths(&mut self, valid_paths: &[PathBuf]) {
146        let valid: std::collections::HashSet<&PathBuf> = valid_paths.iter().collect();
147        self.entries.retain(|k, _| valid.contains(k));
148    }
149}
150
151pub fn compute_hash(content: &str) -> String {
152    let mut hasher = Sha256::new();
153    hasher.update(content.as_bytes());
154    hasher.finalize().iter().fold(String::new(), |mut acc, b| {
155        std::fmt::Write::write_fmt(&mut acc, format_args!("{b:02x}")).unwrap();
156        acc
157    })
158}