Skip to main content

sqry_core/graph/unified/analysis/
persistence.rs

1//! Binary persistence for analysis files
2//!
3//! Uses postcard for fast serialization with `AnalysisIdentity` validation.
4
5use super::condensation::CondensationDag;
6use super::csr::CsrAdjacency;
7use super::scc::SccData;
8use crate::graph::unified::concurrent::GraphSnapshot;
9use crate::graph::unified::persistence::GraphStorage;
10use anyhow::Result;
11use sha2::{Digest, Sha256};
12use std::path::Path;
13
14/// Identity metadata used to validate analysis files against the current graph.
15#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
16pub struct AnalysisIdentity {
17    /// SHA-256 hash of the manifest.json contents.
18    pub manifest_hash: String,
19    /// SHA-256 hash of node ordering and identity.
20    pub node_id_hash: [u8; 32],
21}
22
23impl AnalysisIdentity {
24    /// Create a new analysis identity.
25    #[must_use]
26    pub fn new(manifest_hash: String, node_id_hash: [u8; 32]) -> Self {
27        Self {
28            manifest_hash,
29            node_id_hash,
30        }
31    }
32
33    /// Ensure this identity matches the expected value (full validation).
34    /// Returns an error if the operation fails.
35    ///
36    /// # Errors
37    ///
38    pub fn ensure_matches(&self, expected: &AnalysisIdentity) -> Result<()> {
39        if self.manifest_hash != expected.manifest_hash {
40            anyhow::bail!(
41                "analysis manifest hash mismatch: expected {}, got {}",
42                expected.manifest_hash,
43                self.manifest_hash
44            );
45        }
46        if self.node_id_hash != expected.node_id_hash {
47            anyhow::bail!(
48                "analysis node_id_hash mismatch: expected {}, got {}",
49                hex::encode(expected.node_id_hash),
50                hex::encode(self.node_id_hash)
51            );
52        }
53        Ok(())
54    }
55
56    /// Ensure the manifest hash matches (lightweight validation).
57    ///
58    /// This skips the expensive `node_id_hash` comparison and only validates
59    /// the `manifest_hash`. Since the manifest contains `snapshot_sha256`,
60    /// this transitively validates graph identity without the O(N) node hash
61    /// recomputation needed for full validation.
62    ///
63    /// # Errors
64    ///
65    /// Returns an error if the manifest hash does not match.
66    pub fn ensure_manifest_matches(&self, expected_manifest_hash: &str) -> Result<()> {
67        if self.manifest_hash != expected_manifest_hash {
68            anyhow::bail!(
69                "analysis manifest hash mismatch: expected {}, got {}",
70                expected_manifest_hash,
71                self.manifest_hash
72            );
73        }
74        Ok(())
75    }
76}
77
78/// Compute a SHA-256 hash of the manifest file contents.
79/// Returns an error if the operation fails.
80///
81/// # Errors
82///
83pub fn compute_manifest_hash(path: &Path) -> Result<String> {
84    let data = std::fs::read(path)?;
85    let mut hasher = Sha256::new();
86    hasher.update(&data);
87    Ok(hex::encode(hasher.finalize()))
88}
89
90/// Compute a stable hash of node ID ordering and identity for validation.
91#[must_use]
92pub fn compute_node_id_hash(snapshot: &GraphSnapshot) -> [u8; 32] {
93    let mut hasher = Sha256::new();
94    let strings = snapshot.strings();
95    let files = snapshot.files();
96
97    let mut nodes: Vec<_> = snapshot.nodes().iter().collect();
98    nodes.sort_by_key(|(node_id, _)| node_id.index());
99
100    for (node_id, entry) in nodes {
101        hasher.update(node_id.index().to_le_bytes());
102        hasher.update(node_id.generation().to_le_bytes());
103
104        let kind_str = format!("{:?}", entry.kind);
105        hash_str(&mut hasher, Some(kind_str.as_str()));
106        let name = strings.resolve(entry.name);
107        hash_str(&mut hasher, name.as_deref());
108
109        let qualified = entry.qualified_name.and_then(|id| strings.resolve(id));
110        hash_str(&mut hasher, qualified.as_deref());
111
112        let file_path = files
113            .resolve(entry.file)
114            .map(|path| path.to_string_lossy().into_owned());
115        hash_str(&mut hasher, file_path.as_deref());
116    }
117
118    let digest = hasher.finalize();
119    let mut output = [0u8; 32];
120    output.copy_from_slice(&digest);
121    output
122}
123
124#[allow(clippy::cast_possible_truncation)] // String lengths in practice won't exceed u32::MAX
125fn hash_str(hasher: &mut Sha256, value: Option<&str>) {
126    let len = value.map_or(0u32, |s| s.len() as u32);
127    hasher.update(len.to_le_bytes());
128    if let Some(s) = value {
129        hasher.update(s.as_bytes());
130    }
131}
132
133/// Persist CSR adjacency to disk.
134/// Returns an error if the operation fails.
135///
136/// # Errors
137///
138pub fn persist_csr(csr: &CsrAdjacency, identity: &AnalysisIdentity, path: &Path) -> Result<()> {
139    let encoded = postcard::to_allocvec(&(identity, csr))?;
140    std::fs::write(path, encoded)?;
141    Ok(())
142}
143
144/// Load CSR adjacency from disk.
145/// Returns an error if the operation fails.
146///
147/// # Errors
148///
149pub fn load_csr(path: &Path) -> Result<(CsrAdjacency, AnalysisIdentity)> {
150    let data = std::fs::read(path)?;
151    let (identity, csr) = postcard::from_bytes(&data)?;
152    Ok((csr, identity))
153}
154
155/// Persist SCC data to disk.
156/// Returns an error if the operation fails.
157///
158/// # Errors
159///
160pub fn persist_scc(scc: &SccData, identity: &AnalysisIdentity, path: &Path) -> Result<()> {
161    let encoded = postcard::to_allocvec(&(identity, scc))?;
162    std::fs::write(path, encoded)?;
163    Ok(())
164}
165
166/// Load SCC data from disk.
167/// Returns an error if the operation fails.
168///
169/// # Errors
170///
171pub fn load_scc(path: &Path) -> Result<(SccData, AnalysisIdentity)> {
172    let data = std::fs::read(path)?;
173    let (identity, scc) = postcard::from_bytes(&data)?;
174    Ok((scc, identity))
175}
176
177/// Persist condensation DAG to disk.
178/// Returns an error if the operation fails.
179///
180/// # Errors
181///
182pub fn persist_condensation(
183    dag: &CondensationDag,
184    identity: &AnalysisIdentity,
185    path: &Path,
186) -> Result<()> {
187    let encoded = postcard::to_allocvec(&(identity, dag))?;
188    std::fs::write(path, encoded)?;
189    Ok(())
190}
191
192/// Load condensation DAG from disk.
193/// Returns an error if the operation fails.
194///
195/// # Errors
196///
197pub fn load_condensation(path: &Path) -> Result<(CondensationDag, AnalysisIdentity)> {
198    let data = std::fs::read(path)?;
199    let (identity, mut dag): (AnalysisIdentity, CondensationDag) = postcard::from_bytes(&data)?;
200    dag.fixup_after_load();
201    Ok((dag, identity))
202}
203
204/// Load CSR adjacency and validate against expected analysis identity.
205/// Returns an error if the operation fails.
206///
207/// # Errors
208///
209pub fn load_csr_checked(path: &Path, expected: &AnalysisIdentity) -> Result<CsrAdjacency> {
210    let (csr, identity) = load_csr(path)?;
211    identity.ensure_matches(expected)?;
212    Ok(csr)
213}
214
215/// Load SCC data and validate against expected analysis identity.
216/// Returns an error if the operation fails.
217///
218/// # Errors
219///
220pub fn load_scc_checked(path: &Path, expected: &AnalysisIdentity) -> Result<SccData> {
221    let (scc, identity) = load_scc(path)?;
222    identity.ensure_matches(expected)?;
223    Ok(scc)
224}
225
226/// Load condensation DAG and validate against expected analysis identity.
227/// Returns an error if the operation fails.
228///
229/// # Errors
230///
231pub fn load_condensation_checked(
232    path: &Path,
233    expected: &AnalysisIdentity,
234) -> Result<CondensationDag> {
235    let (dag, identity) = load_condensation(path)?;
236    identity.ensure_matches(expected)?;
237    Ok(dag)
238}
239
240// ============================================================================
241// Manifest-only validated loaders (fast path for runtime loading)
242// ============================================================================
243
244/// Load SCC data with manifest-hash-only validation.
245///
246/// This is the fast-path loader that avoids the O(N) `compute_node_id_hash()`
247/// recomputation. Since the manifest contains `snapshot_sha256`, manifest-hash
248/// comparison transitively validates graph identity.
249///
250/// # Errors
251///
252/// Returns an error if the file cannot be read, deserialized, or the manifest
253/// hash does not match.
254pub fn load_scc_manifest_checked(path: &Path, expected_manifest_hash: &str) -> Result<SccData> {
255    let (scc, identity) = load_scc(path)?;
256    identity.ensure_manifest_matches(expected_manifest_hash)?;
257    Ok(scc)
258}
259
260/// Load condensation DAG with manifest-hash-only validation.
261///
262/// This is the fast-path loader that avoids the O(N) `compute_node_id_hash()`
263/// recomputation.
264///
265/// # Errors
266///
267/// Returns an error if the file cannot be read, deserialized, or the manifest
268/// hash does not match.
269pub fn load_condensation_manifest_checked(
270    path: &Path,
271    expected_manifest_hash: &str,
272) -> Result<CondensationDag> {
273    let (dag, identity) = load_condensation(path)?;
274    identity.ensure_manifest_matches(expected_manifest_hash)?;
275    Ok(dag)
276}
277
278// ============================================================================
279// High-level validated loaders (DRY wrappers used by MCP, LSP, CLI)
280// ============================================================================
281
282/// Load SCC data with automatic identity validation.
283///
284/// Validates the analysis file against the current manifest hash.
285/// This uses manifest-hash-only validation to avoid the expensive O(N)
286/// `compute_node_id_hash()` recomputation. Since the manifest includes
287/// `snapshot_sha256`, this transitively ensures graph identity.
288///
289/// Returns `None` if analysis files don't exist or validation fails.
290/// The caller should fall back to query-time computation in that case.
291#[must_use]
292pub fn try_load_scc(
293    storage: &GraphStorage,
294    _snapshot: &GraphSnapshot,
295    edge_kind: &str,
296) -> Option<SccData> {
297    let scc_file = storage.analysis_scc_path(edge_kind);
298    if !scc_file.exists() {
299        return None;
300    }
301
302    let manifest_hash = compute_manifest_hash(storage.manifest_path()).ok()?;
303
304    load_scc_manifest_checked(&scc_file, &manifest_hash).ok()
305}
306
307/// Load SCC and condensation DAG with automatic identity validation.
308///
309/// Validates analysis files against the current manifest hash.
310/// This uses manifest-hash-only validation to avoid the expensive O(N)
311/// `compute_node_id_hash()` recomputation.
312///
313/// Returns `None` if either analysis file doesn't exist or validation fails.
314/// The caller should fall back to query-time computation in that case.
315#[must_use]
316pub fn try_load_scc_and_condensation(
317    storage: &GraphStorage,
318    _snapshot: &GraphSnapshot,
319    edge_kind: &str,
320) -> Option<(SccData, CondensationDag)> {
321    let scc_file = storage.analysis_scc_path(edge_kind);
322    let cond_file = storage.analysis_cond_path(edge_kind);
323
324    if !scc_file.exists() || !cond_file.exists() {
325        return None;
326    }
327
328    let manifest_hash = compute_manifest_hash(storage.manifest_path()).ok()?;
329
330    let scc_data = load_scc_manifest_checked(&scc_file, &manifest_hash).ok()?;
331    let cond_dag = load_condensation_manifest_checked(&cond_file, &manifest_hash).ok()?;
332
333    Some((scc_data, cond_dag))
334}
335
336/// Load CSR + SCC + condensation DAG for path reconstruction.
337///
338/// Validates all analysis files against the current manifest hash.
339/// Returns `None` if any file is missing or stale. The caller should
340/// fall back to graph-level BFS in that case.
341#[must_use]
342pub fn try_load_path_analysis(
343    storage: &GraphStorage,
344    edge_kind: &str,
345) -> Option<(CsrAdjacency, SccData, CondensationDag)> {
346    let csr_file = storage.analysis_csr_path();
347    let scc_file = storage.analysis_scc_path(edge_kind);
348    let cond_file = storage.analysis_cond_path(edge_kind);
349
350    if !csr_file.exists() || !scc_file.exists() || !cond_file.exists() {
351        log::debug!("Analysis files not found for edge kind '{edge_kind}', skipping fast path");
352        return None;
353    }
354
355    let manifest_hash = match compute_manifest_hash(storage.manifest_path()) {
356        Ok(h) => h,
357        Err(e) => {
358            log::debug!("Cannot compute manifest hash: {e}, skipping analysis fast path");
359            return None;
360        }
361    };
362
363    let csr = match load_csr(&csr_file) {
364        Ok((csr, identity)) => {
365            if identity.ensure_manifest_matches(&manifest_hash).is_err() {
366                log::info!("Analysis CSR is stale (manifest hash mismatch), falling back to BFS");
367                return None;
368            }
369            csr
370        }
371        Err(e) => {
372            log::info!("Failed to load CSR: {e}, skipping analysis fast path");
373            return None;
374        }
375    };
376
377    let scc_data = match load_scc_manifest_checked(&scc_file, &manifest_hash) {
378        Ok(scc) => scc,
379        Err(e) => {
380            log::info!("Analysis SCC is stale or corrupt: {e}, falling back to BFS");
381            return None;
382        }
383    };
384
385    let cond_dag = match load_condensation_manifest_checked(&cond_file, &manifest_hash) {
386        Ok(dag) => dag,
387        Err(e) => {
388            log::info!("Analysis condensation is stale or corrupt: {e}, falling back to BFS");
389            return None;
390        }
391    };
392
393    log::info!("Loaded precomputed analysis for edge kind '{edge_kind}'");
394    Some((csr, scc_data, cond_dag))
395}