Skip to main content

sqry_core/graph/unified/persistence/
mod.rs

1//! Persistence layer for the unified graph architecture.
2//!
3//! This module provides save/load functionality for the unified graph,
4//! enabling efficient serialization and deserialization of the complete
5//! graph state including nodes, edges, strings, files, and indices.
6//!
7//! # Format
8//!
9//! The persistence format is a binary format using postcard serialization:
10//! - Magic bytes: `SQRY_GRAPH_V5` (13 bytes)
11//! - Version header with counts and config provenance
12//! - Serialized components in order
13//!
14//! # Config Provenance
15//!
16//! Starting with V2, the graph header includes `config_provenance` which
17//! records which configuration was used when building the graph. This enables:
18//! - Detecting config drift (config changed since graph was built)
19//! - Tracking CLI/env overrides used during build
20//! - Reproducibility analysis
21//!
22//! # Storage Layout
23//!
24//! The unified graph is stored in the `.sqry/graph/` directory:
25//! ```text
26//! .sqry/graph/
27//! ├── manifest.json     # Metadata and checksums
28//! ├── snapshot.sqry     # Binary graph snapshot
29//! └── config/           # Configuration files
30//!     └── config.json   # Build configuration
31//! ```
32//!
33//! # Usage
34//!
35//! ```rust,ignore
36//! use sqry_core::graph::unified::persistence::{GraphStorage, Manifest};
37//! use sqry_core::graph::unified::CodeGraph;
38//! use std::path::Path;
39//!
40//! // Create storage for a project
41//! let storage = GraphStorage::new(Path::new("/path/to/project"));
42//!
43//! // Check if graph exists
44//! if storage.exists() {
45//!     let manifest = storage.load_manifest()?;
46//!     println!("Graph has {} nodes", manifest.node_count);
47//! }
48//!
49//! // Save graph to disk
50//! let graph = CodeGraph::new();
51//! persistence::save_to_path(&graph, storage.snapshot_path())?;
52//! ```
53
54pub mod format;
55pub(crate) mod legacy_v10;
56pub(crate) mod legacy_v13;
57pub mod manifest;
58pub mod snapshot;
59
60use std::path::{Path, PathBuf};
61use std::time::Duration;
62
63pub use format::{
64    FormatVersion, GraphHeader, MAGIC_BYTES, MAGIC_BYTES_V7, MAGIC_BYTES_V8, MAGIC_BYTES_V9,
65    MAGIC_BYTES_V10, MAGIC_BYTES_V11, MAGIC_BYTES_V12, MAGIC_BYTES_V13, MAGIC_BYTES_V14,
66    MAGIC_BYTES_V15, MAGIC_BYTES_V16, MAGIC_BYTES_V17, VERSION,
67};
68pub use manifest::{
69    BuildProvenance, ConfigProvenance, ConfigProvenanceBuilder, MANIFEST_SCHEMA_VERSION, Manifest,
70    ManifestCheck, OverrideEntry, OverrideSource, PluginSelectionManifest, SNAPSHOT_FORMAT_VERSION,
71    compute_config_checksum, default_provenance, try_load_manifest,
72};
73pub use snapshot::{
74    PersistenceError, check_config_drift, load_from_bytes, load_from_path, load_header_from_path,
75    save_to_path, save_to_path_with_provenance, validate_snapshot, verify_snapshot_bytes,
76};
77
78// ============================================================================
79// Graph Storage (directory-based storage manager)
80// ============================================================================
81
82/// Directory name for unified graph storage.
83const GRAPH_DIR_NAME: &str = ".sqry/graph";
84
85/// Directory name for analysis artifacts.
86const ANALYSIS_DIR_NAME: &str = ".sqry/analysis";
87
88/// Filename for the graph manifest.
89const MANIFEST_FILE_NAME: &str = "manifest.json";
90
91/// Filename for the graph snapshot.
92const SNAPSHOT_FILE_NAME: &str = "snapshot.sqry";
93
94/// Storage manager for unified graph and analysis files.
95///
96/// `GraphStorage` manages the `.sqry/` directory structure, providing
97/// access to graph files (manifest, snapshot) and analysis artifacts
98/// (CSR, SCC, condensation DAGs).
99///
100/// # Directory Structure
101///
102/// ```text
103/// .sqry/
104/// ├── graph/
105/// │   ├── manifest.json     # Graph metadata (node/edge counts, checksums)
106/// │   ├── snapshot.sqry     # Binary graph snapshot
107/// │   └── config/           # Build configuration
108/// │       └── config.json   # Configuration used during build
109/// └── analysis/
110///     ├── adjacency.csr     # CSR adjacency matrix
111///     ├── scc_calls.scc     # SCC data for call edges
112///     ├── scc_imports.scc   # SCC data for import edges
113///     ├── cond_calls.dag    # Condensation DAG for call edges
114///     └── ...               # Other edge-kind artifacts
115/// ```
116///
117/// # Example
118///
119/// ```rust,ignore
120/// use sqry_core::graph::unified::persistence::GraphStorage;
121/// use std::path::Path;
122///
123/// let storage = GraphStorage::new(Path::new("/path/to/project"));
124///
125/// if storage.exists() {
126///     let manifest = storage.load_manifest()?;
127///     let age = storage.snapshot_age(&manifest)?;
128///     println!("Graph built {} seconds ago", age.as_secs());
129/// }
130/// ```
131#[derive(Debug, Clone)]
132pub struct GraphStorage {
133    /// Path to the `.sqry/graph/` directory.
134    graph_dir: PathBuf,
135    /// Path to the `.sqry/analysis/` directory.
136    analysis_dir: PathBuf,
137    /// Path to the manifest file.
138    manifest_path: PathBuf,
139    /// Path to the snapshot file.
140    snapshot_path: PathBuf,
141}
142
143impl GraphStorage {
144    /// Creates a new storage manager for the given project root.
145    ///
146    /// # Arguments
147    ///
148    /// * `root_path` - Root directory of the project
149    ///
150    /// # Returns
151    ///
152    /// A `GraphStorage` instance configured for `{root_path}/.sqry/`
153    #[must_use]
154    pub fn new(root_path: &Path) -> Self {
155        let graph_dir = root_path.join(GRAPH_DIR_NAME);
156        let analysis_dir = root_path.join(ANALYSIS_DIR_NAME);
157        Self {
158            manifest_path: graph_dir.join(MANIFEST_FILE_NAME),
159            snapshot_path: graph_dir.join(SNAPSHOT_FILE_NAME),
160            graph_dir,
161            analysis_dir,
162        }
163    }
164
165    /// Returns the path to the `.sqry/graph/` directory.
166    #[must_use]
167    pub fn graph_dir(&self) -> &Path {
168        &self.graph_dir
169    }
170
171    /// Returns the path to the `.sqry/analysis/` directory.
172    #[must_use]
173    pub fn analysis_dir(&self) -> &Path {
174        &self.analysis_dir
175    }
176
177    /// Returns the path to an SCC artifact file for a given edge kind.
178    ///
179    /// Example: `analysis_scc_path("calls")` returns `.sqry/analysis/scc_calls.scc`
180    #[must_use]
181    pub fn analysis_scc_path(&self, edge_kind: &str) -> PathBuf {
182        self.analysis_dir.join(format!("scc_{edge_kind}.scc"))
183    }
184
185    /// Returns the path to a condensation DAG artifact file for a given edge kind.
186    ///
187    /// Example: `analysis_cond_path("calls")` returns `.sqry/analysis/cond_calls.dag`
188    #[must_use]
189    pub fn analysis_cond_path(&self, edge_kind: &str) -> PathBuf {
190        self.analysis_dir.join(format!("cond_{edge_kind}.dag"))
191    }
192
193    /// Returns the path to the CSR adjacency artifact file.
194    #[must_use]
195    pub fn analysis_csr_path(&self) -> PathBuf {
196        self.analysis_dir.join("adjacency.csr")
197    }
198
199    /// Returns the path to the manifest file.
200    #[must_use]
201    pub fn manifest_path(&self) -> &Path {
202        &self.manifest_path
203    }
204
205    /// Returns the path to the snapshot file.
206    #[must_use]
207    pub fn snapshot_path(&self) -> &Path {
208        &self.snapshot_path
209    }
210
211    /// Checks if a unified graph exists (manifest file exists).
212    #[must_use]
213    pub fn exists(&self) -> bool {
214        self.manifest_path.exists()
215    }
216
217    /// Checks if the snapshot file exists.
218    #[must_use]
219    pub fn snapshot_exists(&self) -> bool {
220        self.snapshot_path.exists()
221    }
222
223    /// Loads the graph manifest from disk.
224    ///
225    /// # Errors
226    ///
227    /// Returns an error if the manifest file cannot be read or parsed.
228    pub fn load_manifest(&self) -> std::io::Result<Manifest> {
229        Manifest::load(&self.manifest_path)
230    }
231
232    /// Attempts to load the graph manifest from disk, returning a typed
233    /// [`ManifestCheck`] instead of propagating `ENOENT` as a hard error.
234    ///
235    /// This is the non-panicking, policy-neutral variant of
236    /// [`Self::load_manifest`]. Use it in freshness checks, serve-path
237    /// guards, and any code that must distinguish "missing" from "corrupt"
238    /// to apply the correct policy:
239    ///
240    /// - `ManifestCheck::Present(m)` — manifest exists and parsed.
241    /// - `ManifestCheck::Missing` — file not on disk (e.g. during rebuild
242    ///   window). Callers should treat the graph as stale and either wait,
243    ///   trigger a rebuild, or refuse to serve unverified snapshots.
244    /// - `ManifestCheck::Corrupt(e)` — file exists but is unreadable or
245    ///   invalid JSON; same policy as Missing (rebuild).
246    ///
247    /// The SHA-256 integrity contract is preserved: a `Missing` or `Corrupt`
248    /// result means no snapshot is served without verification.
249    #[must_use]
250    pub fn try_load_manifest(&self) -> ManifestCheck {
251        manifest::try_load_manifest(&self.manifest_path)
252    }
253
254    /// Computes the age of the snapshot based on the manifest timestamp.
255    ///
256    /// # Arguments
257    ///
258    /// * `manifest` - The loaded manifest containing the build timestamp
259    ///
260    /// # Errors
261    ///
262    /// Returns an error if the timestamp cannot be parsed or if system time is invalid.
263    pub fn snapshot_age(&self, manifest: &Manifest) -> std::io::Result<Duration> {
264        // Parse the RFC3339 timestamp from the manifest
265        let built_at = chrono::DateTime::parse_from_rfc3339(&manifest.built_at)
266            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
267
268        let now = chrono::Utc::now();
269        let duration = now.signed_duration_since(built_at.with_timezone(&chrono::Utc));
270
271        // Convert to std::time::Duration (clamped to non-negative)
272        let seconds = duration.num_seconds().max(0);
273        let seconds = u64::try_from(seconds).unwrap_or(0);
274        Ok(Duration::from_secs(seconds))
275    }
276
277    /// Returns the path to the config directory.
278    #[must_use]
279    pub fn config_dir(&self) -> PathBuf {
280        self.graph_dir.join("config")
281    }
282
283    /// Returns the path to the config file.
284    #[must_use]
285    pub fn config_path(&self) -> PathBuf {
286        self.config_dir().join("config.json")
287    }
288}
289
290#[cfg(test)]
291mod tests {
292    use super::*;
293    use tempfile::TempDir;
294
295    #[test]
296    fn test_graph_storage_paths() {
297        let tmp = TempDir::new().unwrap();
298        let storage = GraphStorage::new(tmp.path());
299
300        assert_eq!(storage.graph_dir(), tmp.path().join(".sqry/graph"));
301        assert_eq!(
302            storage.manifest_path(),
303            tmp.path().join(".sqry/graph/manifest.json")
304        );
305        assert_eq!(
306            storage.snapshot_path(),
307            tmp.path().join(".sqry/graph/snapshot.sqry")
308        );
309        assert!(!storage.exists());
310        assert!(!storage.snapshot_exists());
311    }
312
313    #[test]
314    fn test_graph_storage_exists() {
315        let tmp = TempDir::new().unwrap();
316        let storage = GraphStorage::new(tmp.path());
317
318        // Initially doesn't exist
319        assert!(!storage.exists());
320
321        // Create the directory and manifest
322        std::fs::create_dir_all(storage.graph_dir()).unwrap();
323        std::fs::write(storage.manifest_path(), "{}").unwrap();
324
325        // Now exists
326        assert!(storage.exists());
327    }
328
329    #[test]
330    fn test_manifest_roundtrip() {
331        let tmp = TempDir::new().unwrap();
332        let storage = GraphStorage::new(tmp.path());
333
334        // Create directory
335        std::fs::create_dir_all(storage.graph_dir()).unwrap();
336
337        // Create and save manifest
338        let provenance = BuildProvenance::new("0.15.0", "sqry index");
339        let manifest = Manifest::new("/test/path", 100, 200, "abc123", provenance);
340        manifest.save(storage.manifest_path()).unwrap();
341
342        // Load and verify
343        let loaded = storage.load_manifest().unwrap();
344        assert_eq!(loaded.node_count, 100);
345        assert_eq!(loaded.edge_count, 200);
346        assert_eq!(loaded.snapshot_sha256, "abc123");
347        assert_eq!(loaded.build_provenance.sqry_version, "0.15.0");
348    }
349
350    #[test]
351    fn test_snapshot_age() {
352        let tmp = TempDir::new().unwrap();
353        let storage = GraphStorage::new(tmp.path());
354
355        // Create manifest with current timestamp
356        let provenance = BuildProvenance::new("0.15.0", "sqry index");
357        let manifest = Manifest::new("/test/path", 100, 200, "abc123", provenance);
358
359        // Age should be very small (just created)
360        let age = storage.snapshot_age(&manifest).unwrap();
361        assert!(age.as_secs() < 2, "Age should be less than 2 seconds");
362    }
363
364    /// Regression test (Step 10, #10): Snapshot without manifest → not ready.
365    ///
366    /// Under manifest-last persistence, a snapshot file without manifest means
367    /// the build was interrupted. `storage.exists()` must return false.
368    #[test]
369    fn test_reader_readiness_snapshot_without_manifest() {
370        let tmp = TempDir::new().unwrap();
371        let storage = GraphStorage::new(tmp.path());
372
373        // Create graph directory and snapshot (but no manifest)
374        std::fs::create_dir_all(storage.graph_dir()).unwrap();
375        std::fs::write(storage.snapshot_path(), b"fake snapshot data").unwrap();
376
377        // snapshot_exists() should be true (file exists)
378        assert!(storage.snapshot_exists(), "Snapshot file should exist");
379
380        // exists() should be false (no manifest → not ready)
381        assert!(
382            !storage.exists(),
383            "Index should NOT be ready without manifest (manifest-last ordering)"
384        );
385    }
386
387    /// Regression test (Step 10, #11): Manifest without snapshot → `exists()` true, load fails gracefully.
388    ///
389    /// Manifest present but snapshot missing indicates corruption. `storage.exists()`
390    /// returns true (manifest present), but `load_from_path()` must fail gracefully
391    /// (error, not panic), so auto-index paths can trigger rebuild.
392    #[test]
393    fn test_reader_readiness_manifest_without_snapshot() {
394        let tmp = TempDir::new().unwrap();
395        let storage = GraphStorage::new(tmp.path());
396
397        // Create graph directory and manifest (but no snapshot)
398        std::fs::create_dir_all(storage.graph_dir()).unwrap();
399        let provenance = BuildProvenance::new("3.6.0", "test");
400        let manifest = Manifest::new(
401            tmp.path().display().to_string(),
402            100,
403            200,
404            "sha256",
405            provenance,
406        );
407        manifest.save(storage.manifest_path()).unwrap();
408
409        // exists() should be true (manifest present)
410        assert!(
411            storage.exists(),
412            "Index should report exists (manifest present)"
413        );
414
415        // snapshot_exists() should be false (no snapshot file)
416        assert!(!storage.snapshot_exists(), "Snapshot should not exist");
417
418        // load_from_path should fail gracefully (error, not panic)
419        let result = load_from_path(storage.snapshot_path(), None);
420        assert!(
421            result.is_err(),
422            "Loading from missing snapshot should return error, not panic"
423        );
424    }
425
426    // ====================================================================
427    // ManifestCheck / try_load_manifest tests (MANIFEST_1)
428    // ====================================================================
429
430    /// Removing the manifest file returns `ManifestCheck::Missing` (not `Err`).
431    #[test]
432    fn test_try_load_manifest_missing_returns_missing() {
433        let tmp = TempDir::new().unwrap();
434        let storage = GraphStorage::new(tmp.path());
435
436        // No manifest exists at all — directory not even created.
437        let result = storage.try_load_manifest();
438        assert!(
439            result.is_missing(),
440            "Missing manifest file should return ManifestCheck::Missing, not Err"
441        );
442        assert!(!result.is_present());
443        assert!(!result.is_corrupt());
444    }
445
446    /// Removing the manifest after it was present returns `ManifestCheck::Missing`.
447    #[test]
448    fn test_try_load_manifest_removed_after_creation() {
449        let tmp = TempDir::new().unwrap();
450        let storage = GraphStorage::new(tmp.path());
451
452        // Create directory + manifest
453        std::fs::create_dir_all(storage.graph_dir()).unwrap();
454        let provenance = BuildProvenance::new("0.15.0", "sqry index");
455        let manifest = Manifest::new("/test/path", 10, 20, "sha_initial", provenance);
456        manifest.save(storage.manifest_path()).unwrap();
457
458        // Confirm it loads correctly first
459        assert!(storage.try_load_manifest().is_present());
460
461        // Remove the manifest — simulates rebuild window where manifest is absent
462        std::fs::remove_file(storage.manifest_path()).unwrap();
463
464        let result = storage.try_load_manifest();
465        assert!(
466            result.is_missing(),
467            "Freshness check on removed manifest must return Missing, not Err/Corrupt"
468        );
469    }
470
471    /// A corrupt manifest (invalid JSON) returns `ManifestCheck::Corrupt`.
472    #[test]
473    fn test_try_load_manifest_corrupt_returns_corrupt() {
474        let tmp = TempDir::new().unwrap();
475        let storage = GraphStorage::new(tmp.path());
476
477        std::fs::create_dir_all(storage.graph_dir()).unwrap();
478        // Write invalid JSON
479        std::fs::write(storage.manifest_path(), b"not valid json {{{{").unwrap();
480
481        let result = storage.try_load_manifest();
482        assert!(
483            result.is_corrupt(),
484            "Invalid JSON in manifest should return ManifestCheck::Corrupt"
485        );
486        assert!(!result.is_present());
487        assert!(!result.is_missing());
488    }
489
490    /// A valid manifest returns `ManifestCheck::Present` with correct fields.
491    #[test]
492    fn test_try_load_manifest_valid_returns_present() {
493        let tmp = TempDir::new().unwrap();
494        let storage = GraphStorage::new(tmp.path());
495
496        std::fs::create_dir_all(storage.graph_dir()).unwrap();
497        let provenance = BuildProvenance::new("9.0.0", "sqry index");
498        let original = Manifest::new("/workspace/root", 42, 99, "sha256_test", provenance);
499        original.save(storage.manifest_path()).unwrap();
500
501        match storage.try_load_manifest() {
502            ManifestCheck::Present(m) => {
503                assert_eq!(m.node_count, 42);
504                assert_eq!(m.edge_count, 99);
505                assert_eq!(m.snapshot_sha256, "sha256_test");
506                assert_eq!(m.root_path, "/workspace/root");
507            }
508            ManifestCheck::Missing => panic!("Expected Present, got Missing"),
509            ManifestCheck::Corrupt(e) => panic!("Expected Present, got Corrupt: {e}"),
510        }
511    }
512
513    /// `ManifestCheck::into_manifest()` converts `Present` to `Some`, `Missing`/`Corrupt` to `None`.
514    #[test]
515    fn test_manifest_check_into_manifest() {
516        let tmp = TempDir::new().unwrap();
517        let storage = GraphStorage::new(tmp.path());
518
519        // Missing → None
520        let missing = storage.try_load_manifest();
521        assert!(missing.into_manifest().is_none());
522
523        // Corrupt → None
524        std::fs::create_dir_all(storage.graph_dir()).unwrap();
525        std::fs::write(storage.manifest_path(), b"bad json").unwrap();
526        let corrupt = storage.try_load_manifest();
527        assert!(corrupt.into_manifest().is_none());
528
529        // Present → Some
530        let provenance = BuildProvenance::new("9.0.0", "sqry index");
531        let manifest = Manifest::new("/path", 1, 2, "sha", provenance);
532        manifest.save(storage.manifest_path()).unwrap();
533        let present = storage.try_load_manifest();
534        assert!(present.into_manifest().is_some());
535    }
536}