sqry_core/graph/unified/persistence/mod.rs
1//! Persistence layer for the unified graph architecture.
2//!
3//! This module provides save/load functionality for the unified graph,
4//! enabling efficient serialization and deserialization of the complete
5//! graph state including nodes, edges, strings, files, and indices.
6//!
7//! # Format
8//!
9//! The persistence format is a binary format using postcard serialization:
10//! - Magic bytes: `SQRY_GRAPH_V5` (13 bytes)
11//! - Version header with counts and config provenance
12//! - Serialized components in order
13//!
14//! # Config Provenance
15//!
16//! Starting with V2, the graph header includes `config_provenance` which
17//! records which configuration was used when building the graph. This enables:
18//! - Detecting config drift (config changed since graph was built)
19//! - Tracking CLI/env overrides used during build
20//! - Reproducibility analysis
21//!
22//! # Storage Layout
23//!
24//! The unified graph is stored in the `.sqry/graph/` directory:
25//! ```text
26//! .sqry/graph/
27//! ├── manifest.json # Metadata and checksums
28//! ├── snapshot.sqry # Binary graph snapshot
29//! └── config/ # Configuration files
30//! └── config.json # Build configuration
31//! ```
32//!
33//! # Usage
34//!
35//! ```rust,ignore
36//! use sqry_core::graph::unified::persistence::{GraphStorage, Manifest};
37//! use sqry_core::graph::unified::CodeGraph;
38//! use std::path::Path;
39//!
40//! // Create storage for a project
41//! let storage = GraphStorage::new(Path::new("/path/to/project"));
42//!
43//! // Check if graph exists
44//! if storage.exists() {
45//! let manifest = storage.load_manifest()?;
46//! println!("Graph has {} nodes", manifest.node_count);
47//! }
48//!
49//! // Save graph to disk
50//! let graph = CodeGraph::new();
51//! persistence::save_to_path(&graph, storage.snapshot_path())?;
52//! ```
53
54pub mod format;
55pub mod manifest;
56pub mod snapshot;
57
58use std::path::{Path, PathBuf};
59use std::time::Duration;
60
61pub use format::{
62 FormatVersion, GraphHeader, MAGIC_BYTES, MAGIC_BYTES_V7, MAGIC_BYTES_V8, MAGIC_BYTES_V9,
63 MAGIC_BYTES_V10, VERSION,
64};
65pub use manifest::{
66 BuildProvenance, ConfigProvenance, ConfigProvenanceBuilder, MANIFEST_SCHEMA_VERSION, Manifest,
67 ManifestCheck, OverrideEntry, OverrideSource, PluginSelectionManifest, SNAPSHOT_FORMAT_VERSION,
68 compute_config_checksum, default_provenance, try_load_manifest,
69};
70pub use snapshot::{
71 PersistenceError, check_config_drift, load_from_bytes, load_from_path, load_header_from_path,
72 save_to_path, save_to_path_with_provenance, validate_snapshot, verify_snapshot_bytes,
73};
74
75// ============================================================================
76// Graph Storage (directory-based storage manager)
77// ============================================================================
78
79/// Directory name for unified graph storage.
80const GRAPH_DIR_NAME: &str = ".sqry/graph";
81
82/// Directory name for analysis artifacts.
83const ANALYSIS_DIR_NAME: &str = ".sqry/analysis";
84
85/// Filename for the graph manifest.
86const MANIFEST_FILE_NAME: &str = "manifest.json";
87
88/// Filename for the graph snapshot.
89const SNAPSHOT_FILE_NAME: &str = "snapshot.sqry";
90
91/// Storage manager for unified graph and analysis files.
92///
93/// `GraphStorage` manages the `.sqry/` directory structure, providing
94/// access to graph files (manifest, snapshot) and analysis artifacts
95/// (CSR, SCC, condensation DAGs).
96///
97/// # Directory Structure
98///
99/// ```text
100/// .sqry/
101/// ├── graph/
102/// │ ├── manifest.json # Graph metadata (node/edge counts, checksums)
103/// │ ├── snapshot.sqry # Binary graph snapshot
104/// │ └── config/ # Build configuration
105/// │ └── config.json # Configuration used during build
106/// └── analysis/
107/// ├── adjacency.csr # CSR adjacency matrix
108/// ├── scc_calls.scc # SCC data for call edges
109/// ├── scc_imports.scc # SCC data for import edges
110/// ├── cond_calls.dag # Condensation DAG for call edges
111/// └── ... # Other edge-kind artifacts
112/// ```
113///
114/// # Example
115///
116/// ```rust,ignore
117/// use sqry_core::graph::unified::persistence::GraphStorage;
118/// use std::path::Path;
119///
120/// let storage = GraphStorage::new(Path::new("/path/to/project"));
121///
122/// if storage.exists() {
123/// let manifest = storage.load_manifest()?;
124/// let age = storage.snapshot_age(&manifest)?;
125/// println!("Graph built {} seconds ago", age.as_secs());
126/// }
127/// ```
128#[derive(Debug, Clone)]
129pub struct GraphStorage {
130 /// Path to the `.sqry/graph/` directory.
131 graph_dir: PathBuf,
132 /// Path to the `.sqry/analysis/` directory.
133 analysis_dir: PathBuf,
134 /// Path to the manifest file.
135 manifest_path: PathBuf,
136 /// Path to the snapshot file.
137 snapshot_path: PathBuf,
138}
139
140impl GraphStorage {
141 /// Creates a new storage manager for the given project root.
142 ///
143 /// # Arguments
144 ///
145 /// * `root_path` - Root directory of the project
146 ///
147 /// # Returns
148 ///
149 /// A `GraphStorage` instance configured for `{root_path}/.sqry/`
150 #[must_use]
151 pub fn new(root_path: &Path) -> Self {
152 let graph_dir = root_path.join(GRAPH_DIR_NAME);
153 let analysis_dir = root_path.join(ANALYSIS_DIR_NAME);
154 Self {
155 manifest_path: graph_dir.join(MANIFEST_FILE_NAME),
156 snapshot_path: graph_dir.join(SNAPSHOT_FILE_NAME),
157 graph_dir,
158 analysis_dir,
159 }
160 }
161
162 /// Returns the path to the `.sqry/graph/` directory.
163 #[must_use]
164 pub fn graph_dir(&self) -> &Path {
165 &self.graph_dir
166 }
167
168 /// Returns the path to the `.sqry/analysis/` directory.
169 #[must_use]
170 pub fn analysis_dir(&self) -> &Path {
171 &self.analysis_dir
172 }
173
174 /// Returns the path to an SCC artifact file for a given edge kind.
175 ///
176 /// Example: `analysis_scc_path("calls")` returns `.sqry/analysis/scc_calls.scc`
177 #[must_use]
178 pub fn analysis_scc_path(&self, edge_kind: &str) -> PathBuf {
179 self.analysis_dir.join(format!("scc_{edge_kind}.scc"))
180 }
181
182 /// Returns the path to a condensation DAG artifact file for a given edge kind.
183 ///
184 /// Example: `analysis_cond_path("calls")` returns `.sqry/analysis/cond_calls.dag`
185 #[must_use]
186 pub fn analysis_cond_path(&self, edge_kind: &str) -> PathBuf {
187 self.analysis_dir.join(format!("cond_{edge_kind}.dag"))
188 }
189
190 /// Returns the path to the CSR adjacency artifact file.
191 #[must_use]
192 pub fn analysis_csr_path(&self) -> PathBuf {
193 self.analysis_dir.join("adjacency.csr")
194 }
195
196 /// Returns the path to the manifest file.
197 #[must_use]
198 pub fn manifest_path(&self) -> &Path {
199 &self.manifest_path
200 }
201
202 /// Returns the path to the snapshot file.
203 #[must_use]
204 pub fn snapshot_path(&self) -> &Path {
205 &self.snapshot_path
206 }
207
208 /// Checks if a unified graph exists (manifest file exists).
209 #[must_use]
210 pub fn exists(&self) -> bool {
211 self.manifest_path.exists()
212 }
213
214 /// Checks if the snapshot file exists.
215 #[must_use]
216 pub fn snapshot_exists(&self) -> bool {
217 self.snapshot_path.exists()
218 }
219
220 /// Loads the graph manifest from disk.
221 ///
222 /// # Errors
223 ///
224 /// Returns an error if the manifest file cannot be read or parsed.
225 pub fn load_manifest(&self) -> std::io::Result<Manifest> {
226 Manifest::load(&self.manifest_path)
227 }
228
229 /// Attempts to load the graph manifest from disk, returning a typed
230 /// [`ManifestCheck`] instead of propagating `ENOENT` as a hard error.
231 ///
232 /// This is the non-panicking, policy-neutral variant of
233 /// [`Self::load_manifest`]. Use it in freshness checks, serve-path
234 /// guards, and any code that must distinguish "missing" from "corrupt"
235 /// to apply the correct policy:
236 ///
237 /// - `ManifestCheck::Present(m)` — manifest exists and parsed.
238 /// - `ManifestCheck::Missing` — file not on disk (e.g. during rebuild
239 /// window). Callers should treat the graph as stale and either wait,
240 /// trigger a rebuild, or refuse to serve unverified snapshots.
241 /// - `ManifestCheck::Corrupt(e)` — file exists but is unreadable or
242 /// invalid JSON; same policy as Missing (rebuild).
243 ///
244 /// The SHA-256 integrity contract is preserved: a `Missing` or `Corrupt`
245 /// result means no snapshot is served without verification.
246 #[must_use]
247 pub fn try_load_manifest(&self) -> ManifestCheck {
248 manifest::try_load_manifest(&self.manifest_path)
249 }
250
251 /// Computes the age of the snapshot based on the manifest timestamp.
252 ///
253 /// # Arguments
254 ///
255 /// * `manifest` - The loaded manifest containing the build timestamp
256 ///
257 /// # Errors
258 ///
259 /// Returns an error if the timestamp cannot be parsed or if system time is invalid.
260 pub fn snapshot_age(&self, manifest: &Manifest) -> std::io::Result<Duration> {
261 // Parse the RFC3339 timestamp from the manifest
262 let built_at = chrono::DateTime::parse_from_rfc3339(&manifest.built_at)
263 .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
264
265 let now = chrono::Utc::now();
266 let duration = now.signed_duration_since(built_at.with_timezone(&chrono::Utc));
267
268 // Convert to std::time::Duration (clamped to non-negative)
269 let seconds = duration.num_seconds().max(0);
270 let seconds = u64::try_from(seconds).unwrap_or(0);
271 Ok(Duration::from_secs(seconds))
272 }
273
274 /// Returns the path to the config directory.
275 #[must_use]
276 pub fn config_dir(&self) -> PathBuf {
277 self.graph_dir.join("config")
278 }
279
280 /// Returns the path to the config file.
281 #[must_use]
282 pub fn config_path(&self) -> PathBuf {
283 self.config_dir().join("config.json")
284 }
285}
286
287#[cfg(test)]
288mod tests {
289 use super::*;
290 use tempfile::TempDir;
291
292 #[test]
293 fn test_graph_storage_paths() {
294 let tmp = TempDir::new().unwrap();
295 let storage = GraphStorage::new(tmp.path());
296
297 assert_eq!(storage.graph_dir(), tmp.path().join(".sqry/graph"));
298 assert_eq!(
299 storage.manifest_path(),
300 tmp.path().join(".sqry/graph/manifest.json")
301 );
302 assert_eq!(
303 storage.snapshot_path(),
304 tmp.path().join(".sqry/graph/snapshot.sqry")
305 );
306 assert!(!storage.exists());
307 assert!(!storage.snapshot_exists());
308 }
309
310 #[test]
311 fn test_graph_storage_exists() {
312 let tmp = TempDir::new().unwrap();
313 let storage = GraphStorage::new(tmp.path());
314
315 // Initially doesn't exist
316 assert!(!storage.exists());
317
318 // Create the directory and manifest
319 std::fs::create_dir_all(storage.graph_dir()).unwrap();
320 std::fs::write(storage.manifest_path(), "{}").unwrap();
321
322 // Now exists
323 assert!(storage.exists());
324 }
325
326 #[test]
327 fn test_manifest_roundtrip() {
328 let tmp = TempDir::new().unwrap();
329 let storage = GraphStorage::new(tmp.path());
330
331 // Create directory
332 std::fs::create_dir_all(storage.graph_dir()).unwrap();
333
334 // Create and save manifest
335 let provenance = BuildProvenance::new("0.15.0", "sqry index");
336 let manifest = Manifest::new("/test/path", 100, 200, "abc123", provenance);
337 manifest.save(storage.manifest_path()).unwrap();
338
339 // Load and verify
340 let loaded = storage.load_manifest().unwrap();
341 assert_eq!(loaded.node_count, 100);
342 assert_eq!(loaded.edge_count, 200);
343 assert_eq!(loaded.snapshot_sha256, "abc123");
344 assert_eq!(loaded.build_provenance.sqry_version, "0.15.0");
345 }
346
347 #[test]
348 fn test_snapshot_age() {
349 let tmp = TempDir::new().unwrap();
350 let storage = GraphStorage::new(tmp.path());
351
352 // Create manifest with current timestamp
353 let provenance = BuildProvenance::new("0.15.0", "sqry index");
354 let manifest = Manifest::new("/test/path", 100, 200, "abc123", provenance);
355
356 // Age should be very small (just created)
357 let age = storage.snapshot_age(&manifest).unwrap();
358 assert!(age.as_secs() < 2, "Age should be less than 2 seconds");
359 }
360
361 /// Regression test (Step 10, #10): Snapshot without manifest → not ready.
362 ///
363 /// Under manifest-last persistence, a snapshot file without manifest means
364 /// the build was interrupted. `storage.exists()` must return false.
365 #[test]
366 fn test_reader_readiness_snapshot_without_manifest() {
367 let tmp = TempDir::new().unwrap();
368 let storage = GraphStorage::new(tmp.path());
369
370 // Create graph directory and snapshot (but no manifest)
371 std::fs::create_dir_all(storage.graph_dir()).unwrap();
372 std::fs::write(storage.snapshot_path(), b"fake snapshot data").unwrap();
373
374 // snapshot_exists() should be true (file exists)
375 assert!(storage.snapshot_exists(), "Snapshot file should exist");
376
377 // exists() should be false (no manifest → not ready)
378 assert!(
379 !storage.exists(),
380 "Index should NOT be ready without manifest (manifest-last ordering)"
381 );
382 }
383
384 /// Regression test (Step 10, #11): Manifest without snapshot → `exists()` true, load fails gracefully.
385 ///
386 /// Manifest present but snapshot missing indicates corruption. `storage.exists()`
387 /// returns true (manifest present), but `load_from_path()` must fail gracefully
388 /// (error, not panic), so auto-index paths can trigger rebuild.
389 #[test]
390 fn test_reader_readiness_manifest_without_snapshot() {
391 let tmp = TempDir::new().unwrap();
392 let storage = GraphStorage::new(tmp.path());
393
394 // Create graph directory and manifest (but no snapshot)
395 std::fs::create_dir_all(storage.graph_dir()).unwrap();
396 let provenance = BuildProvenance::new("3.6.0", "test");
397 let manifest = Manifest::new(
398 tmp.path().display().to_string(),
399 100,
400 200,
401 "sha256",
402 provenance,
403 );
404 manifest.save(storage.manifest_path()).unwrap();
405
406 // exists() should be true (manifest present)
407 assert!(
408 storage.exists(),
409 "Index should report exists (manifest present)"
410 );
411
412 // snapshot_exists() should be false (no snapshot file)
413 assert!(!storage.snapshot_exists(), "Snapshot should not exist");
414
415 // load_from_path should fail gracefully (error, not panic)
416 let result = load_from_path(storage.snapshot_path(), None);
417 assert!(
418 result.is_err(),
419 "Loading from missing snapshot should return error, not panic"
420 );
421 }
422
423 // ====================================================================
424 // ManifestCheck / try_load_manifest tests (MANIFEST_1)
425 // ====================================================================
426
427 /// Removing the manifest file returns `ManifestCheck::Missing` (not `Err`).
428 #[test]
429 fn test_try_load_manifest_missing_returns_missing() {
430 let tmp = TempDir::new().unwrap();
431 let storage = GraphStorage::new(tmp.path());
432
433 // No manifest exists at all — directory not even created.
434 let result = storage.try_load_manifest();
435 assert!(
436 result.is_missing(),
437 "Missing manifest file should return ManifestCheck::Missing, not Err"
438 );
439 assert!(!result.is_present());
440 assert!(!result.is_corrupt());
441 }
442
443 /// Removing the manifest after it was present returns `ManifestCheck::Missing`.
444 #[test]
445 fn test_try_load_manifest_removed_after_creation() {
446 let tmp = TempDir::new().unwrap();
447 let storage = GraphStorage::new(tmp.path());
448
449 // Create directory + manifest
450 std::fs::create_dir_all(storage.graph_dir()).unwrap();
451 let provenance = BuildProvenance::new("0.15.0", "sqry index");
452 let manifest = Manifest::new("/test/path", 10, 20, "sha_initial", provenance);
453 manifest.save(storage.manifest_path()).unwrap();
454
455 // Confirm it loads correctly first
456 assert!(storage.try_load_manifest().is_present());
457
458 // Remove the manifest — simulates rebuild window where manifest is absent
459 std::fs::remove_file(storage.manifest_path()).unwrap();
460
461 let result = storage.try_load_manifest();
462 assert!(
463 result.is_missing(),
464 "Freshness check on removed manifest must return Missing, not Err/Corrupt"
465 );
466 }
467
468 /// A corrupt manifest (invalid JSON) returns `ManifestCheck::Corrupt`.
469 #[test]
470 fn test_try_load_manifest_corrupt_returns_corrupt() {
471 let tmp = TempDir::new().unwrap();
472 let storage = GraphStorage::new(tmp.path());
473
474 std::fs::create_dir_all(storage.graph_dir()).unwrap();
475 // Write invalid JSON
476 std::fs::write(storage.manifest_path(), b"not valid json {{{{").unwrap();
477
478 let result = storage.try_load_manifest();
479 assert!(
480 result.is_corrupt(),
481 "Invalid JSON in manifest should return ManifestCheck::Corrupt"
482 );
483 assert!(!result.is_present());
484 assert!(!result.is_missing());
485 }
486
487 /// A valid manifest returns `ManifestCheck::Present` with correct fields.
488 #[test]
489 fn test_try_load_manifest_valid_returns_present() {
490 let tmp = TempDir::new().unwrap();
491 let storage = GraphStorage::new(tmp.path());
492
493 std::fs::create_dir_all(storage.graph_dir()).unwrap();
494 let provenance = BuildProvenance::new("9.0.0", "sqry index");
495 let original = Manifest::new("/workspace/root", 42, 99, "sha256_test", provenance);
496 original.save(storage.manifest_path()).unwrap();
497
498 match storage.try_load_manifest() {
499 ManifestCheck::Present(m) => {
500 assert_eq!(m.node_count, 42);
501 assert_eq!(m.edge_count, 99);
502 assert_eq!(m.snapshot_sha256, "sha256_test");
503 assert_eq!(m.root_path, "/workspace/root");
504 }
505 ManifestCheck::Missing => panic!("Expected Present, got Missing"),
506 ManifestCheck::Corrupt(e) => panic!("Expected Present, got Corrupt: {e}"),
507 }
508 }
509
510 /// `ManifestCheck::into_manifest()` converts `Present` to `Some`, `Missing`/`Corrupt` to `None`.
511 #[test]
512 fn test_manifest_check_into_manifest() {
513 let tmp = TempDir::new().unwrap();
514 let storage = GraphStorage::new(tmp.path());
515
516 // Missing → None
517 let missing = storage.try_load_manifest();
518 assert!(missing.into_manifest().is_none());
519
520 // Corrupt → None
521 std::fs::create_dir_all(storage.graph_dir()).unwrap();
522 std::fs::write(storage.manifest_path(), b"bad json").unwrap();
523 let corrupt = storage.try_load_manifest();
524 assert!(corrupt.into_manifest().is_none());
525
526 // Present → Some
527 let provenance = BuildProvenance::new("9.0.0", "sqry index");
528 let manifest = Manifest::new("/path", 1, 2, "sha", provenance);
529 manifest.save(storage.manifest_path()).unwrap();
530 let present = storage.try_load_manifest();
531 assert!(present.into_manifest().is_some());
532 }
533}