sqry_core/graph/unified/analysis/
persistence.rs1use super::condensation::CondensationDag;
6use super::csr::CsrAdjacency;
7use super::scc::SccData;
8use crate::graph::unified::concurrent::GraphSnapshot;
9use crate::graph::unified::persistence::GraphStorage;
10use anyhow::Result;
11use sha2::{Digest, Sha256};
12use std::path::Path;
13
14#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
16pub struct AnalysisIdentity {
17 pub manifest_hash: String,
19 pub node_id_hash: [u8; 32],
21}
22
23impl AnalysisIdentity {
24 #[must_use]
26 pub fn new(manifest_hash: String, node_id_hash: [u8; 32]) -> Self {
27 Self {
28 manifest_hash,
29 node_id_hash,
30 }
31 }
32
33 pub fn ensure_matches(&self, expected: &AnalysisIdentity) -> Result<()> {
39 if self.manifest_hash != expected.manifest_hash {
40 anyhow::bail!(
41 "analysis manifest hash mismatch: expected {}, got {}",
42 expected.manifest_hash,
43 self.manifest_hash
44 );
45 }
46 if self.node_id_hash != expected.node_id_hash {
47 anyhow::bail!(
48 "analysis node_id_hash mismatch: expected {}, got {}",
49 hex::encode(expected.node_id_hash),
50 hex::encode(self.node_id_hash)
51 );
52 }
53 Ok(())
54 }
55
56 pub fn ensure_manifest_matches(&self, expected_manifest_hash: &str) -> Result<()> {
67 if self.manifest_hash != expected_manifest_hash {
68 anyhow::bail!(
69 "analysis manifest hash mismatch: expected {}, got {}",
70 expected_manifest_hash,
71 self.manifest_hash
72 );
73 }
74 Ok(())
75 }
76}
77
78pub fn compute_manifest_hash(path: &Path) -> Result<String> {
84 let data = std::fs::read(path)?;
85 let mut hasher = Sha256::new();
86 hasher.update(&data);
87 Ok(hex::encode(hasher.finalize()))
88}
89
90#[must_use]
92pub fn compute_node_id_hash(snapshot: &GraphSnapshot) -> [u8; 32] {
93 let mut hasher = Sha256::new();
94 let strings = snapshot.strings();
95 let files = snapshot.files();
96
97 let mut nodes: Vec<_> = snapshot.nodes().iter().collect();
98 nodes.sort_by_key(|(node_id, _)| node_id.index());
99
100 for (node_id, entry) in nodes {
101 hasher.update(node_id.index().to_le_bytes());
102 hasher.update(node_id.generation().to_le_bytes());
103
104 let kind_str = format!("{:?}", entry.kind);
105 hash_str(&mut hasher, Some(kind_str.as_str()));
106 let name = strings.resolve(entry.name);
107 hash_str(&mut hasher, name.as_deref());
108
109 let qualified = entry.qualified_name.and_then(|id| strings.resolve(id));
110 hash_str(&mut hasher, qualified.as_deref());
111
112 let file_path = files
113 .resolve(entry.file)
114 .map(|path| path.to_string_lossy().into_owned());
115 hash_str(&mut hasher, file_path.as_deref());
116 }
117
118 let digest = hasher.finalize();
119 let mut output = [0u8; 32];
120 output.copy_from_slice(&digest);
121 output
122}
123
124#[allow(clippy::cast_possible_truncation)] fn hash_str(hasher: &mut Sha256, value: Option<&str>) {
126 let len = value.map_or(0u32, |s| s.len() as u32);
127 hasher.update(len.to_le_bytes());
128 if let Some(s) = value {
129 hasher.update(s.as_bytes());
130 }
131}
132
133pub fn persist_csr(csr: &CsrAdjacency, identity: &AnalysisIdentity, path: &Path) -> Result<()> {
139 let encoded = postcard::to_allocvec(&(identity, csr))?;
140 std::fs::write(path, encoded)?;
141 Ok(())
142}
143
144pub fn load_csr(path: &Path) -> Result<(CsrAdjacency, AnalysisIdentity)> {
150 let data = std::fs::read(path)?;
151 let (identity, csr) = postcard::from_bytes(&data)?;
152 Ok((csr, identity))
153}
154
155pub fn persist_scc(scc: &SccData, identity: &AnalysisIdentity, path: &Path) -> Result<()> {
161 let encoded = postcard::to_allocvec(&(identity, scc))?;
162 std::fs::write(path, encoded)?;
163 Ok(())
164}
165
166pub fn load_scc(path: &Path) -> Result<(SccData, AnalysisIdentity)> {
172 let data = std::fs::read(path)?;
173 let (identity, scc) = postcard::from_bytes(&data)?;
174 Ok((scc, identity))
175}
176
177pub fn persist_condensation(
183 dag: &CondensationDag,
184 identity: &AnalysisIdentity,
185 path: &Path,
186) -> Result<()> {
187 let encoded = postcard::to_allocvec(&(identity, dag))?;
188 std::fs::write(path, encoded)?;
189 Ok(())
190}
191
192pub fn load_condensation(path: &Path) -> Result<(CondensationDag, AnalysisIdentity)> {
198 let data = std::fs::read(path)?;
199 let (identity, mut dag): (AnalysisIdentity, CondensationDag) = postcard::from_bytes(&data)?;
200 dag.fixup_after_load();
201 Ok((dag, identity))
202}
203
204pub fn load_csr_checked(path: &Path, expected: &AnalysisIdentity) -> Result<CsrAdjacency> {
210 let (csr, identity) = load_csr(path)?;
211 identity.ensure_matches(expected)?;
212 Ok(csr)
213}
214
215pub fn load_scc_checked(path: &Path, expected: &AnalysisIdentity) -> Result<SccData> {
221 let (scc, identity) = load_scc(path)?;
222 identity.ensure_matches(expected)?;
223 Ok(scc)
224}
225
226pub fn load_condensation_checked(
232 path: &Path,
233 expected: &AnalysisIdentity,
234) -> Result<CondensationDag> {
235 let (dag, identity) = load_condensation(path)?;
236 identity.ensure_matches(expected)?;
237 Ok(dag)
238}
239
240pub fn load_scc_manifest_checked(path: &Path, expected_manifest_hash: &str) -> Result<SccData> {
255 let (scc, identity) = load_scc(path)?;
256 identity.ensure_manifest_matches(expected_manifest_hash)?;
257 Ok(scc)
258}
259
260pub fn load_condensation_manifest_checked(
270 path: &Path,
271 expected_manifest_hash: &str,
272) -> Result<CondensationDag> {
273 let (dag, identity) = load_condensation(path)?;
274 identity.ensure_manifest_matches(expected_manifest_hash)?;
275 Ok(dag)
276}
277
278#[must_use]
292pub fn try_load_scc(
293 storage: &GraphStorage,
294 _snapshot: &GraphSnapshot,
295 edge_kind: &str,
296) -> Option<SccData> {
297 let scc_file = storage.analysis_scc_path(edge_kind);
298 if !scc_file.exists() {
299 return None;
300 }
301
302 let manifest_hash = compute_manifest_hash(storage.manifest_path()).ok()?;
303
304 load_scc_manifest_checked(&scc_file, &manifest_hash).ok()
305}
306
307#[must_use]
316pub fn try_load_scc_and_condensation(
317 storage: &GraphStorage,
318 _snapshot: &GraphSnapshot,
319 edge_kind: &str,
320) -> Option<(SccData, CondensationDag)> {
321 let scc_file = storage.analysis_scc_path(edge_kind);
322 let cond_file = storage.analysis_cond_path(edge_kind);
323
324 if !scc_file.exists() || !cond_file.exists() {
325 return None;
326 }
327
328 let manifest_hash = compute_manifest_hash(storage.manifest_path()).ok()?;
329
330 let scc_data = load_scc_manifest_checked(&scc_file, &manifest_hash).ok()?;
331 let cond_dag = load_condensation_manifest_checked(&cond_file, &manifest_hash).ok()?;
332
333 Some((scc_data, cond_dag))
334}
335
336#[must_use]
342pub fn try_load_path_analysis(
343 storage: &GraphStorage,
344 edge_kind: &str,
345) -> Option<(CsrAdjacency, SccData, CondensationDag)> {
346 let csr_file = storage.analysis_csr_path();
347 let scc_file = storage.analysis_scc_path(edge_kind);
348 let cond_file = storage.analysis_cond_path(edge_kind);
349
350 if !csr_file.exists() || !scc_file.exists() || !cond_file.exists() {
351 log::debug!("Analysis files not found for edge kind '{edge_kind}', skipping fast path");
352 return None;
353 }
354
355 let manifest_hash = match compute_manifest_hash(storage.manifest_path()) {
356 Ok(h) => h,
357 Err(e) => {
358 log::debug!("Cannot compute manifest hash: {e}, skipping analysis fast path");
359 return None;
360 }
361 };
362
363 let csr = match load_csr(&csr_file) {
364 Ok((csr, identity)) => {
365 if identity.ensure_manifest_matches(&manifest_hash).is_err() {
366 log::info!("Analysis CSR is stale (manifest hash mismatch), falling back to BFS");
367 return None;
368 }
369 csr
370 }
371 Err(e) => {
372 log::info!("Failed to load CSR: {e}, skipping analysis fast path");
373 return None;
374 }
375 };
376
377 let scc_data = match load_scc_manifest_checked(&scc_file, &manifest_hash) {
378 Ok(scc) => scc,
379 Err(e) => {
380 log::info!("Analysis SCC is stale or corrupt: {e}, falling back to BFS");
381 return None;
382 }
383 };
384
385 let cond_dag = match load_condensation_manifest_checked(&cond_file, &manifest_hash) {
386 Ok(dag) => dag,
387 Err(e) => {
388 log::info!("Analysis condensation is stale or corrupt: {e}, falling back to BFS");
389 return None;
390 }
391 };
392
393 log::info!("Loaded precomputed analysis for edge kind '{edge_kind}'");
394 Some((csr, scc_data, cond_dag))
395}