Skip to main content

sqry_core/graph/unified/persistence/
mod.rs

1//! Persistence layer for the unified graph architecture.
2//!
3//! This module provides save/load functionality for the unified graph,
4//! enabling efficient serialization and deserialization of the complete
5//! graph state including nodes, edges, strings, files, and indices.
6//!
7//! # Format
8//!
9//! The persistence format is a binary format using postcard serialization:
10//! - Magic bytes: `SQRY_GRAPH_V5` (13 bytes)
11//! - Version header with counts and config provenance
12//! - Serialized components in order
13//!
14//! # Config Provenance
15//!
16//! Starting with V2, the graph header includes `config_provenance` which
17//! records which configuration was used when building the graph. This enables:
18//! - Detecting config drift (config changed since graph was built)
19//! - Tracking CLI/env overrides used during build
20//! - Reproducibility analysis
21//!
22//! # Storage Layout
23//!
24//! The unified graph is stored in the `.sqry/graph/` directory:
25//! ```text
26//! .sqry/graph/
27//! ├── manifest.json     # Metadata and checksums
28//! ├── snapshot.sqry     # Binary graph snapshot
29//! └── config/           # Configuration files
30//!     └── config.json   # Build configuration
31//! ```
32//!
33//! # Usage
34//!
35//! ```rust,ignore
36//! use sqry_core::graph::unified::persistence::{GraphStorage, Manifest};
37//! use sqry_core::graph::unified::CodeGraph;
38//! use std::path::Path;
39//!
40//! // Create storage for a project
41//! let storage = GraphStorage::new(Path::new("/path/to/project"));
42//!
43//! // Check if graph exists
44//! if storage.exists() {
45//!     let manifest = storage.load_manifest()?;
46//!     println!("Graph has {} nodes", manifest.node_count);
47//! }
48//!
49//! // Save graph to disk
50//! let graph = CodeGraph::new();
51//! persistence::save_to_path(&graph, storage.snapshot_path())?;
52//! ```
53
54pub mod format;
55pub mod manifest;
56pub mod snapshot;
57
58use std::path::{Path, PathBuf};
59use std::time::Duration;
60
61pub use format::{
62    FormatVersion, GraphHeader, MAGIC_BYTES, MAGIC_BYTES_V7, MAGIC_BYTES_V8, MAGIC_BYTES_V9,
63    MAGIC_BYTES_V10, VERSION,
64};
65pub use manifest::{
66    BuildProvenance, ConfigProvenance, ConfigProvenanceBuilder, MANIFEST_SCHEMA_VERSION, Manifest,
67    ManifestCheck, OverrideEntry, OverrideSource, PluginSelectionManifest, SNAPSHOT_FORMAT_VERSION,
68    compute_config_checksum, default_provenance, try_load_manifest,
69};
70pub use snapshot::{
71    PersistenceError, check_config_drift, load_from_bytes, load_from_path, load_header_from_path,
72    save_to_path, save_to_path_with_provenance, validate_snapshot, verify_snapshot_bytes,
73};
74
75// ============================================================================
76// Graph Storage (directory-based storage manager)
77// ============================================================================
78
79/// Directory name for unified graph storage.
80const GRAPH_DIR_NAME: &str = ".sqry/graph";
81
82/// Directory name for analysis artifacts.
83const ANALYSIS_DIR_NAME: &str = ".sqry/analysis";
84
85/// Filename for the graph manifest.
86const MANIFEST_FILE_NAME: &str = "manifest.json";
87
88/// Filename for the graph snapshot.
89const SNAPSHOT_FILE_NAME: &str = "snapshot.sqry";
90
91/// Storage manager for unified graph and analysis files.
92///
93/// `GraphStorage` manages the `.sqry/` directory structure, providing
94/// access to graph files (manifest, snapshot) and analysis artifacts
95/// (CSR, SCC, condensation DAGs).
96///
97/// # Directory Structure
98///
99/// ```text
100/// .sqry/
101/// ├── graph/
102/// │   ├── manifest.json     # Graph metadata (node/edge counts, checksums)
103/// │   ├── snapshot.sqry     # Binary graph snapshot
104/// │   └── config/           # Build configuration
105/// │       └── config.json   # Configuration used during build
106/// └── analysis/
107///     ├── adjacency.csr     # CSR adjacency matrix
108///     ├── scc_calls.scc     # SCC data for call edges
109///     ├── scc_imports.scc   # SCC data for import edges
110///     ├── cond_calls.dag    # Condensation DAG for call edges
111///     └── ...               # Other edge-kind artifacts
112/// ```
113///
114/// # Example
115///
116/// ```rust,ignore
117/// use sqry_core::graph::unified::persistence::GraphStorage;
118/// use std::path::Path;
119///
120/// let storage = GraphStorage::new(Path::new("/path/to/project"));
121///
122/// if storage.exists() {
123///     let manifest = storage.load_manifest()?;
124///     let age = storage.snapshot_age(&manifest)?;
125///     println!("Graph built {} seconds ago", age.as_secs());
126/// }
127/// ```
128#[derive(Debug, Clone)]
129pub struct GraphStorage {
130    /// Path to the `.sqry/graph/` directory.
131    graph_dir: PathBuf,
132    /// Path to the `.sqry/analysis/` directory.
133    analysis_dir: PathBuf,
134    /// Path to the manifest file.
135    manifest_path: PathBuf,
136    /// Path to the snapshot file.
137    snapshot_path: PathBuf,
138}
139
140impl GraphStorage {
141    /// Creates a new storage manager for the given project root.
142    ///
143    /// # Arguments
144    ///
145    /// * `root_path` - Root directory of the project
146    ///
147    /// # Returns
148    ///
149    /// A `GraphStorage` instance configured for `{root_path}/.sqry/`
150    #[must_use]
151    pub fn new(root_path: &Path) -> Self {
152        let graph_dir = root_path.join(GRAPH_DIR_NAME);
153        let analysis_dir = root_path.join(ANALYSIS_DIR_NAME);
154        Self {
155            manifest_path: graph_dir.join(MANIFEST_FILE_NAME),
156            snapshot_path: graph_dir.join(SNAPSHOT_FILE_NAME),
157            graph_dir,
158            analysis_dir,
159        }
160    }
161
162    /// Returns the path to the `.sqry/graph/` directory.
163    #[must_use]
164    pub fn graph_dir(&self) -> &Path {
165        &self.graph_dir
166    }
167
168    /// Returns the path to the `.sqry/analysis/` directory.
169    #[must_use]
170    pub fn analysis_dir(&self) -> &Path {
171        &self.analysis_dir
172    }
173
174    /// Returns the path to an SCC artifact file for a given edge kind.
175    ///
176    /// Example: `analysis_scc_path("calls")` returns `.sqry/analysis/scc_calls.scc`
177    #[must_use]
178    pub fn analysis_scc_path(&self, edge_kind: &str) -> PathBuf {
179        self.analysis_dir.join(format!("scc_{edge_kind}.scc"))
180    }
181
182    /// Returns the path to a condensation DAG artifact file for a given edge kind.
183    ///
184    /// Example: `analysis_cond_path("calls")` returns `.sqry/analysis/cond_calls.dag`
185    #[must_use]
186    pub fn analysis_cond_path(&self, edge_kind: &str) -> PathBuf {
187        self.analysis_dir.join(format!("cond_{edge_kind}.dag"))
188    }
189
190    /// Returns the path to the CSR adjacency artifact file.
191    #[must_use]
192    pub fn analysis_csr_path(&self) -> PathBuf {
193        self.analysis_dir.join("adjacency.csr")
194    }
195
196    /// Returns the path to the manifest file.
197    #[must_use]
198    pub fn manifest_path(&self) -> &Path {
199        &self.manifest_path
200    }
201
202    /// Returns the path to the snapshot file.
203    #[must_use]
204    pub fn snapshot_path(&self) -> &Path {
205        &self.snapshot_path
206    }
207
208    /// Checks if a unified graph exists (manifest file exists).
209    #[must_use]
210    pub fn exists(&self) -> bool {
211        self.manifest_path.exists()
212    }
213
214    /// Checks if the snapshot file exists.
215    #[must_use]
216    pub fn snapshot_exists(&self) -> bool {
217        self.snapshot_path.exists()
218    }
219
220    /// Loads the graph manifest from disk.
221    ///
222    /// # Errors
223    ///
224    /// Returns an error if the manifest file cannot be read or parsed.
225    pub fn load_manifest(&self) -> std::io::Result<Manifest> {
226        Manifest::load(&self.manifest_path)
227    }
228
229    /// Attempts to load the graph manifest from disk, returning a typed
230    /// [`ManifestCheck`] instead of propagating `ENOENT` as a hard error.
231    ///
232    /// This is the non-panicking, policy-neutral variant of
233    /// [`Self::load_manifest`]. Use it in freshness checks, serve-path
234    /// guards, and any code that must distinguish "missing" from "corrupt"
235    /// to apply the correct policy:
236    ///
237    /// - `ManifestCheck::Present(m)` — manifest exists and parsed.
238    /// - `ManifestCheck::Missing` — file not on disk (e.g. during rebuild
239    ///   window). Callers should treat the graph as stale and either wait,
240    ///   trigger a rebuild, or refuse to serve unverified snapshots.
241    /// - `ManifestCheck::Corrupt(e)` — file exists but is unreadable or
242    ///   invalid JSON; same policy as Missing (rebuild).
243    ///
244    /// The SHA-256 integrity contract is preserved: a `Missing` or `Corrupt`
245    /// result means no snapshot is served without verification.
246    #[must_use]
247    pub fn try_load_manifest(&self) -> ManifestCheck {
248        manifest::try_load_manifest(&self.manifest_path)
249    }
250
251    /// Computes the age of the snapshot based on the manifest timestamp.
252    ///
253    /// # Arguments
254    ///
255    /// * `manifest` - The loaded manifest containing the build timestamp
256    ///
257    /// # Errors
258    ///
259    /// Returns an error if the timestamp cannot be parsed or if system time is invalid.
260    pub fn snapshot_age(&self, manifest: &Manifest) -> std::io::Result<Duration> {
261        // Parse the RFC3339 timestamp from the manifest
262        let built_at = chrono::DateTime::parse_from_rfc3339(&manifest.built_at)
263            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
264
265        let now = chrono::Utc::now();
266        let duration = now.signed_duration_since(built_at.with_timezone(&chrono::Utc));
267
268        // Convert to std::time::Duration (clamped to non-negative)
269        let seconds = duration.num_seconds().max(0);
270        let seconds = u64::try_from(seconds).unwrap_or(0);
271        Ok(Duration::from_secs(seconds))
272    }
273
274    /// Returns the path to the config directory.
275    #[must_use]
276    pub fn config_dir(&self) -> PathBuf {
277        self.graph_dir.join("config")
278    }
279
280    /// Returns the path to the config file.
281    #[must_use]
282    pub fn config_path(&self) -> PathBuf {
283        self.config_dir().join("config.json")
284    }
285}
286
287#[cfg(test)]
288mod tests {
289    use super::*;
290    use tempfile::TempDir;
291
292    #[test]
293    fn test_graph_storage_paths() {
294        let tmp = TempDir::new().unwrap();
295        let storage = GraphStorage::new(tmp.path());
296
297        assert_eq!(storage.graph_dir(), tmp.path().join(".sqry/graph"));
298        assert_eq!(
299            storage.manifest_path(),
300            tmp.path().join(".sqry/graph/manifest.json")
301        );
302        assert_eq!(
303            storage.snapshot_path(),
304            tmp.path().join(".sqry/graph/snapshot.sqry")
305        );
306        assert!(!storage.exists());
307        assert!(!storage.snapshot_exists());
308    }
309
310    #[test]
311    fn test_graph_storage_exists() {
312        let tmp = TempDir::new().unwrap();
313        let storage = GraphStorage::new(tmp.path());
314
315        // Initially doesn't exist
316        assert!(!storage.exists());
317
318        // Create the directory and manifest
319        std::fs::create_dir_all(storage.graph_dir()).unwrap();
320        std::fs::write(storage.manifest_path(), "{}").unwrap();
321
322        // Now exists
323        assert!(storage.exists());
324    }
325
326    #[test]
327    fn test_manifest_roundtrip() {
328        let tmp = TempDir::new().unwrap();
329        let storage = GraphStorage::new(tmp.path());
330
331        // Create directory
332        std::fs::create_dir_all(storage.graph_dir()).unwrap();
333
334        // Create and save manifest
335        let provenance = BuildProvenance::new("0.15.0", "sqry index");
336        let manifest = Manifest::new("/test/path", 100, 200, "abc123", provenance);
337        manifest.save(storage.manifest_path()).unwrap();
338
339        // Load and verify
340        let loaded = storage.load_manifest().unwrap();
341        assert_eq!(loaded.node_count, 100);
342        assert_eq!(loaded.edge_count, 200);
343        assert_eq!(loaded.snapshot_sha256, "abc123");
344        assert_eq!(loaded.build_provenance.sqry_version, "0.15.0");
345    }
346
347    #[test]
348    fn test_snapshot_age() {
349        let tmp = TempDir::new().unwrap();
350        let storage = GraphStorage::new(tmp.path());
351
352        // Create manifest with current timestamp
353        let provenance = BuildProvenance::new("0.15.0", "sqry index");
354        let manifest = Manifest::new("/test/path", 100, 200, "abc123", provenance);
355
356        // Age should be very small (just created)
357        let age = storage.snapshot_age(&manifest).unwrap();
358        assert!(age.as_secs() < 2, "Age should be less than 2 seconds");
359    }
360
361    /// Regression test (Step 10, #10): Snapshot without manifest → not ready.
362    ///
363    /// Under manifest-last persistence, a snapshot file without manifest means
364    /// the build was interrupted. `storage.exists()` must return false.
365    #[test]
366    fn test_reader_readiness_snapshot_without_manifest() {
367        let tmp = TempDir::new().unwrap();
368        let storage = GraphStorage::new(tmp.path());
369
370        // Create graph directory and snapshot (but no manifest)
371        std::fs::create_dir_all(storage.graph_dir()).unwrap();
372        std::fs::write(storage.snapshot_path(), b"fake snapshot data").unwrap();
373
374        // snapshot_exists() should be true (file exists)
375        assert!(storage.snapshot_exists(), "Snapshot file should exist");
376
377        // exists() should be false (no manifest → not ready)
378        assert!(
379            !storage.exists(),
380            "Index should NOT be ready without manifest (manifest-last ordering)"
381        );
382    }
383
384    /// Regression test (Step 10, #11): Manifest without snapshot → `exists()` true, load fails gracefully.
385    ///
386    /// Manifest present but snapshot missing indicates corruption. `storage.exists()`
387    /// returns true (manifest present), but `load_from_path()` must fail gracefully
388    /// (error, not panic), so auto-index paths can trigger rebuild.
389    #[test]
390    fn test_reader_readiness_manifest_without_snapshot() {
391        let tmp = TempDir::new().unwrap();
392        let storage = GraphStorage::new(tmp.path());
393
394        // Create graph directory and manifest (but no snapshot)
395        std::fs::create_dir_all(storage.graph_dir()).unwrap();
396        let provenance = BuildProvenance::new("3.6.0", "test");
397        let manifest = Manifest::new(
398            tmp.path().display().to_string(),
399            100,
400            200,
401            "sha256",
402            provenance,
403        );
404        manifest.save(storage.manifest_path()).unwrap();
405
406        // exists() should be true (manifest present)
407        assert!(
408            storage.exists(),
409            "Index should report exists (manifest present)"
410        );
411
412        // snapshot_exists() should be false (no snapshot file)
413        assert!(!storage.snapshot_exists(), "Snapshot should not exist");
414
415        // load_from_path should fail gracefully (error, not panic)
416        let result = load_from_path(storage.snapshot_path(), None);
417        assert!(
418            result.is_err(),
419            "Loading from missing snapshot should return error, not panic"
420        );
421    }
422
423    // ====================================================================
424    // ManifestCheck / try_load_manifest tests (MANIFEST_1)
425    // ====================================================================
426
427    /// Removing the manifest file returns `ManifestCheck::Missing` (not `Err`).
428    #[test]
429    fn test_try_load_manifest_missing_returns_missing() {
430        let tmp = TempDir::new().unwrap();
431        let storage = GraphStorage::new(tmp.path());
432
433        // No manifest exists at all — directory not even created.
434        let result = storage.try_load_manifest();
435        assert!(
436            result.is_missing(),
437            "Missing manifest file should return ManifestCheck::Missing, not Err"
438        );
439        assert!(!result.is_present());
440        assert!(!result.is_corrupt());
441    }
442
443    /// Removing the manifest after it was present returns `ManifestCheck::Missing`.
444    #[test]
445    fn test_try_load_manifest_removed_after_creation() {
446        let tmp = TempDir::new().unwrap();
447        let storage = GraphStorage::new(tmp.path());
448
449        // Create directory + manifest
450        std::fs::create_dir_all(storage.graph_dir()).unwrap();
451        let provenance = BuildProvenance::new("0.15.0", "sqry index");
452        let manifest = Manifest::new("/test/path", 10, 20, "sha_initial", provenance);
453        manifest.save(storage.manifest_path()).unwrap();
454
455        // Confirm it loads correctly first
456        assert!(storage.try_load_manifest().is_present());
457
458        // Remove the manifest — simulates rebuild window where manifest is absent
459        std::fs::remove_file(storage.manifest_path()).unwrap();
460
461        let result = storage.try_load_manifest();
462        assert!(
463            result.is_missing(),
464            "Freshness check on removed manifest must return Missing, not Err/Corrupt"
465        );
466    }
467
468    /// A corrupt manifest (invalid JSON) returns `ManifestCheck::Corrupt`.
469    #[test]
470    fn test_try_load_manifest_corrupt_returns_corrupt() {
471        let tmp = TempDir::new().unwrap();
472        let storage = GraphStorage::new(tmp.path());
473
474        std::fs::create_dir_all(storage.graph_dir()).unwrap();
475        // Write invalid JSON
476        std::fs::write(storage.manifest_path(), b"not valid json {{{{").unwrap();
477
478        let result = storage.try_load_manifest();
479        assert!(
480            result.is_corrupt(),
481            "Invalid JSON in manifest should return ManifestCheck::Corrupt"
482        );
483        assert!(!result.is_present());
484        assert!(!result.is_missing());
485    }
486
487    /// A valid manifest returns `ManifestCheck::Present` with correct fields.
488    #[test]
489    fn test_try_load_manifest_valid_returns_present() {
490        let tmp = TempDir::new().unwrap();
491        let storage = GraphStorage::new(tmp.path());
492
493        std::fs::create_dir_all(storage.graph_dir()).unwrap();
494        let provenance = BuildProvenance::new("9.0.0", "sqry index");
495        let original = Manifest::new("/workspace/root", 42, 99, "sha256_test", provenance);
496        original.save(storage.manifest_path()).unwrap();
497
498        match storage.try_load_manifest() {
499            ManifestCheck::Present(m) => {
500                assert_eq!(m.node_count, 42);
501                assert_eq!(m.edge_count, 99);
502                assert_eq!(m.snapshot_sha256, "sha256_test");
503                assert_eq!(m.root_path, "/workspace/root");
504            }
505            ManifestCheck::Missing => panic!("Expected Present, got Missing"),
506            ManifestCheck::Corrupt(e) => panic!("Expected Present, got Corrupt: {e}"),
507        }
508    }
509
510    /// `ManifestCheck::into_manifest()` converts `Present` to `Some`, `Missing`/`Corrupt` to `None`.
511    #[test]
512    fn test_manifest_check_into_manifest() {
513        let tmp = TempDir::new().unwrap();
514        let storage = GraphStorage::new(tmp.path());
515
516        // Missing → None
517        let missing = storage.try_load_manifest();
518        assert!(missing.into_manifest().is_none());
519
520        // Corrupt → None
521        std::fs::create_dir_all(storage.graph_dir()).unwrap();
522        std::fs::write(storage.manifest_path(), b"bad json").unwrap();
523        let corrupt = storage.try_load_manifest();
524        assert!(corrupt.into_manifest().is_none());
525
526        // Present → Some
527        let provenance = BuildProvenance::new("9.0.0", "sqry index");
528        let manifest = Manifest::new("/path", 1, 2, "sha", provenance);
529        manifest.save(storage.manifest_path()).unwrap();
530        let present = storage.try_load_manifest();
531        assert!(present.into_manifest().is_some());
532    }
533}