Skip to main content

sqry_core/graph/unified/persistence/
mod.rs

1//! Persistence layer for the unified graph architecture.
2//!
3//! This module provides save/load functionality for the unified graph,
4//! enabling efficient serialization and deserialization of the complete
5//! graph state including nodes, edges, strings, files, and indices.
6//!
7//! # Format
8//!
9//! The persistence format is a binary format using postcard serialization:
10//! - Magic bytes: `SQRY_GRAPH_V5` (13 bytes)
11//! - Version header with counts and config provenance
12//! - Serialized components in order
13//!
14//! # Config Provenance
15//!
16//! Starting with V2, the graph header includes `config_provenance` which
17//! records which configuration was used when building the graph. This enables:
18//! - Detecting config drift (config changed since graph was built)
19//! - Tracking CLI/env overrides used during build
20//! - Reproducibility analysis
21//!
22//! # Storage Layout
23//!
24//! The unified graph is stored in the `.sqry/graph/` directory:
25//! ```text
26//! .sqry/graph/
27//! ├── manifest.json     # Metadata and checksums
28//! ├── snapshot.sqry     # Binary graph snapshot
29//! └── config/           # Configuration files
30//!     └── config.json   # Build configuration
31//! ```
32//!
33//! # Usage
34//!
35//! ```rust,ignore
36//! use sqry_core::graph::unified::persistence::{GraphStorage, Manifest};
37//! use sqry_core::graph::unified::CodeGraph;
38//! use std::path::Path;
39//!
40//! // Create storage for a project
41//! let storage = GraphStorage::new(Path::new("/path/to/project"));
42//!
43//! // Check if graph exists
44//! if storage.exists() {
45//!     let manifest = storage.load_manifest()?;
46//!     println!("Graph has {} nodes", manifest.node_count);
47//! }
48//!
49//! // Save graph to disk
50//! let graph = CodeGraph::new();
51//! persistence::save_to_path(&graph, storage.snapshot_path())?;
52//! ```
53
54pub mod format;
55pub(crate) mod legacy_v10;
56pub mod manifest;
57pub mod snapshot;
58
59use std::path::{Path, PathBuf};
60use std::time::Duration;
61
62pub use format::{
63    FormatVersion, GraphHeader, MAGIC_BYTES, MAGIC_BYTES_V7, MAGIC_BYTES_V8, MAGIC_BYTES_V9,
64    MAGIC_BYTES_V10, MAGIC_BYTES_V11, VERSION,
65};
66pub use manifest::{
67    BuildProvenance, ConfigProvenance, ConfigProvenanceBuilder, MANIFEST_SCHEMA_VERSION, Manifest,
68    ManifestCheck, OverrideEntry, OverrideSource, PluginSelectionManifest, SNAPSHOT_FORMAT_VERSION,
69    compute_config_checksum, default_provenance, try_load_manifest,
70};
71pub use snapshot::{
72    PersistenceError, check_config_drift, load_from_bytes, load_from_path, load_header_from_path,
73    save_to_path, save_to_path_with_provenance, validate_snapshot, verify_snapshot_bytes,
74};
75
76// ============================================================================
77// Graph Storage (directory-based storage manager)
78// ============================================================================
79
80/// Directory name for unified graph storage.
81const GRAPH_DIR_NAME: &str = ".sqry/graph";
82
83/// Directory name for analysis artifacts.
84const ANALYSIS_DIR_NAME: &str = ".sqry/analysis";
85
86/// Filename for the graph manifest.
87const MANIFEST_FILE_NAME: &str = "manifest.json";
88
89/// Filename for the graph snapshot.
90const SNAPSHOT_FILE_NAME: &str = "snapshot.sqry";
91
92/// Storage manager for unified graph and analysis files.
93///
94/// `GraphStorage` manages the `.sqry/` directory structure, providing
95/// access to graph files (manifest, snapshot) and analysis artifacts
96/// (CSR, SCC, condensation DAGs).
97///
98/// # Directory Structure
99///
100/// ```text
101/// .sqry/
102/// ├── graph/
103/// │   ├── manifest.json     # Graph metadata (node/edge counts, checksums)
104/// │   ├── snapshot.sqry     # Binary graph snapshot
105/// │   └── config/           # Build configuration
106/// │       └── config.json   # Configuration used during build
107/// └── analysis/
108///     ├── adjacency.csr     # CSR adjacency matrix
109///     ├── scc_calls.scc     # SCC data for call edges
110///     ├── scc_imports.scc   # SCC data for import edges
111///     ├── cond_calls.dag    # Condensation DAG for call edges
112///     └── ...               # Other edge-kind artifacts
113/// ```
114///
115/// # Example
116///
117/// ```rust,ignore
118/// use sqry_core::graph::unified::persistence::GraphStorage;
119/// use std::path::Path;
120///
121/// let storage = GraphStorage::new(Path::new("/path/to/project"));
122///
123/// if storage.exists() {
124///     let manifest = storage.load_manifest()?;
125///     let age = storage.snapshot_age(&manifest)?;
126///     println!("Graph built {} seconds ago", age.as_secs());
127/// }
128/// ```
129#[derive(Debug, Clone)]
130pub struct GraphStorage {
131    /// Path to the `.sqry/graph/` directory.
132    graph_dir: PathBuf,
133    /// Path to the `.sqry/analysis/` directory.
134    analysis_dir: PathBuf,
135    /// Path to the manifest file.
136    manifest_path: PathBuf,
137    /// Path to the snapshot file.
138    snapshot_path: PathBuf,
139}
140
141impl GraphStorage {
142    /// Creates a new storage manager for the given project root.
143    ///
144    /// # Arguments
145    ///
146    /// * `root_path` - Root directory of the project
147    ///
148    /// # Returns
149    ///
150    /// A `GraphStorage` instance configured for `{root_path}/.sqry/`
151    #[must_use]
152    pub fn new(root_path: &Path) -> Self {
153        let graph_dir = root_path.join(GRAPH_DIR_NAME);
154        let analysis_dir = root_path.join(ANALYSIS_DIR_NAME);
155        Self {
156            manifest_path: graph_dir.join(MANIFEST_FILE_NAME),
157            snapshot_path: graph_dir.join(SNAPSHOT_FILE_NAME),
158            graph_dir,
159            analysis_dir,
160        }
161    }
162
163    /// Returns the path to the `.sqry/graph/` directory.
164    #[must_use]
165    pub fn graph_dir(&self) -> &Path {
166        &self.graph_dir
167    }
168
169    /// Returns the path to the `.sqry/analysis/` directory.
170    #[must_use]
171    pub fn analysis_dir(&self) -> &Path {
172        &self.analysis_dir
173    }
174
175    /// Returns the path to an SCC artifact file for a given edge kind.
176    ///
177    /// Example: `analysis_scc_path("calls")` returns `.sqry/analysis/scc_calls.scc`
178    #[must_use]
179    pub fn analysis_scc_path(&self, edge_kind: &str) -> PathBuf {
180        self.analysis_dir.join(format!("scc_{edge_kind}.scc"))
181    }
182
183    /// Returns the path to a condensation DAG artifact file for a given edge kind.
184    ///
185    /// Example: `analysis_cond_path("calls")` returns `.sqry/analysis/cond_calls.dag`
186    #[must_use]
187    pub fn analysis_cond_path(&self, edge_kind: &str) -> PathBuf {
188        self.analysis_dir.join(format!("cond_{edge_kind}.dag"))
189    }
190
191    /// Returns the path to the CSR adjacency artifact file.
192    #[must_use]
193    pub fn analysis_csr_path(&self) -> PathBuf {
194        self.analysis_dir.join("adjacency.csr")
195    }
196
197    /// Returns the path to the manifest file.
198    #[must_use]
199    pub fn manifest_path(&self) -> &Path {
200        &self.manifest_path
201    }
202
203    /// Returns the path to the snapshot file.
204    #[must_use]
205    pub fn snapshot_path(&self) -> &Path {
206        &self.snapshot_path
207    }
208
209    /// Checks if a unified graph exists (manifest file exists).
210    #[must_use]
211    pub fn exists(&self) -> bool {
212        self.manifest_path.exists()
213    }
214
215    /// Checks if the snapshot file exists.
216    #[must_use]
217    pub fn snapshot_exists(&self) -> bool {
218        self.snapshot_path.exists()
219    }
220
221    /// Loads the graph manifest from disk.
222    ///
223    /// # Errors
224    ///
225    /// Returns an error if the manifest file cannot be read or parsed.
226    pub fn load_manifest(&self) -> std::io::Result<Manifest> {
227        Manifest::load(&self.manifest_path)
228    }
229
230    /// Attempts to load the graph manifest from disk, returning a typed
231    /// [`ManifestCheck`] instead of propagating `ENOENT` as a hard error.
232    ///
233    /// This is the non-panicking, policy-neutral variant of
234    /// [`Self::load_manifest`]. Use it in freshness checks, serve-path
235    /// guards, and any code that must distinguish "missing" from "corrupt"
236    /// to apply the correct policy:
237    ///
238    /// - `ManifestCheck::Present(m)` — manifest exists and parsed.
239    /// - `ManifestCheck::Missing` — file not on disk (e.g. during rebuild
240    ///   window). Callers should treat the graph as stale and either wait,
241    ///   trigger a rebuild, or refuse to serve unverified snapshots.
242    /// - `ManifestCheck::Corrupt(e)` — file exists but is unreadable or
243    ///   invalid JSON; same policy as Missing (rebuild).
244    ///
245    /// The SHA-256 integrity contract is preserved: a `Missing` or `Corrupt`
246    /// result means no snapshot is served without verification.
247    #[must_use]
248    pub fn try_load_manifest(&self) -> ManifestCheck {
249        manifest::try_load_manifest(&self.manifest_path)
250    }
251
252    /// Computes the age of the snapshot based on the manifest timestamp.
253    ///
254    /// # Arguments
255    ///
256    /// * `manifest` - The loaded manifest containing the build timestamp
257    ///
258    /// # Errors
259    ///
260    /// Returns an error if the timestamp cannot be parsed or if system time is invalid.
261    pub fn snapshot_age(&self, manifest: &Manifest) -> std::io::Result<Duration> {
262        // Parse the RFC3339 timestamp from the manifest
263        let built_at = chrono::DateTime::parse_from_rfc3339(&manifest.built_at)
264            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
265
266        let now = chrono::Utc::now();
267        let duration = now.signed_duration_since(built_at.with_timezone(&chrono::Utc));
268
269        // Convert to std::time::Duration (clamped to non-negative)
270        let seconds = duration.num_seconds().max(0);
271        let seconds = u64::try_from(seconds).unwrap_or(0);
272        Ok(Duration::from_secs(seconds))
273    }
274
275    /// Returns the path to the config directory.
276    #[must_use]
277    pub fn config_dir(&self) -> PathBuf {
278        self.graph_dir.join("config")
279    }
280
281    /// Returns the path to the config file.
282    #[must_use]
283    pub fn config_path(&self) -> PathBuf {
284        self.config_dir().join("config.json")
285    }
286}
287
288#[cfg(test)]
289mod tests {
290    use super::*;
291    use tempfile::TempDir;
292
293    #[test]
294    fn test_graph_storage_paths() {
295        let tmp = TempDir::new().unwrap();
296        let storage = GraphStorage::new(tmp.path());
297
298        assert_eq!(storage.graph_dir(), tmp.path().join(".sqry/graph"));
299        assert_eq!(
300            storage.manifest_path(),
301            tmp.path().join(".sqry/graph/manifest.json")
302        );
303        assert_eq!(
304            storage.snapshot_path(),
305            tmp.path().join(".sqry/graph/snapshot.sqry")
306        );
307        assert!(!storage.exists());
308        assert!(!storage.snapshot_exists());
309    }
310
311    #[test]
312    fn test_graph_storage_exists() {
313        let tmp = TempDir::new().unwrap();
314        let storage = GraphStorage::new(tmp.path());
315
316        // Initially doesn't exist
317        assert!(!storage.exists());
318
319        // Create the directory and manifest
320        std::fs::create_dir_all(storage.graph_dir()).unwrap();
321        std::fs::write(storage.manifest_path(), "{}").unwrap();
322
323        // Now exists
324        assert!(storage.exists());
325    }
326
327    #[test]
328    fn test_manifest_roundtrip() {
329        let tmp = TempDir::new().unwrap();
330        let storage = GraphStorage::new(tmp.path());
331
332        // Create directory
333        std::fs::create_dir_all(storage.graph_dir()).unwrap();
334
335        // Create and save manifest
336        let provenance = BuildProvenance::new("0.15.0", "sqry index");
337        let manifest = Manifest::new("/test/path", 100, 200, "abc123", provenance);
338        manifest.save(storage.manifest_path()).unwrap();
339
340        // Load and verify
341        let loaded = storage.load_manifest().unwrap();
342        assert_eq!(loaded.node_count, 100);
343        assert_eq!(loaded.edge_count, 200);
344        assert_eq!(loaded.snapshot_sha256, "abc123");
345        assert_eq!(loaded.build_provenance.sqry_version, "0.15.0");
346    }
347
348    #[test]
349    fn test_snapshot_age() {
350        let tmp = TempDir::new().unwrap();
351        let storage = GraphStorage::new(tmp.path());
352
353        // Create manifest with current timestamp
354        let provenance = BuildProvenance::new("0.15.0", "sqry index");
355        let manifest = Manifest::new("/test/path", 100, 200, "abc123", provenance);
356
357        // Age should be very small (just created)
358        let age = storage.snapshot_age(&manifest).unwrap();
359        assert!(age.as_secs() < 2, "Age should be less than 2 seconds");
360    }
361
362    /// Regression test (Step 10, #10): Snapshot without manifest → not ready.
363    ///
364    /// Under manifest-last persistence, a snapshot file without manifest means
365    /// the build was interrupted. `storage.exists()` must return false.
366    #[test]
367    fn test_reader_readiness_snapshot_without_manifest() {
368        let tmp = TempDir::new().unwrap();
369        let storage = GraphStorage::new(tmp.path());
370
371        // Create graph directory and snapshot (but no manifest)
372        std::fs::create_dir_all(storage.graph_dir()).unwrap();
373        std::fs::write(storage.snapshot_path(), b"fake snapshot data").unwrap();
374
375        // snapshot_exists() should be true (file exists)
376        assert!(storage.snapshot_exists(), "Snapshot file should exist");
377
378        // exists() should be false (no manifest → not ready)
379        assert!(
380            !storage.exists(),
381            "Index should NOT be ready without manifest (manifest-last ordering)"
382        );
383    }
384
385    /// Regression test (Step 10, #11): Manifest without snapshot → `exists()` true, load fails gracefully.
386    ///
387    /// Manifest present but snapshot missing indicates corruption. `storage.exists()`
388    /// returns true (manifest present), but `load_from_path()` must fail gracefully
389    /// (error, not panic), so auto-index paths can trigger rebuild.
390    #[test]
391    fn test_reader_readiness_manifest_without_snapshot() {
392        let tmp = TempDir::new().unwrap();
393        let storage = GraphStorage::new(tmp.path());
394
395        // Create graph directory and manifest (but no snapshot)
396        std::fs::create_dir_all(storage.graph_dir()).unwrap();
397        let provenance = BuildProvenance::new("3.6.0", "test");
398        let manifest = Manifest::new(
399            tmp.path().display().to_string(),
400            100,
401            200,
402            "sha256",
403            provenance,
404        );
405        manifest.save(storage.manifest_path()).unwrap();
406
407        // exists() should be true (manifest present)
408        assert!(
409            storage.exists(),
410            "Index should report exists (manifest present)"
411        );
412
413        // snapshot_exists() should be false (no snapshot file)
414        assert!(!storage.snapshot_exists(), "Snapshot should not exist");
415
416        // load_from_path should fail gracefully (error, not panic)
417        let result = load_from_path(storage.snapshot_path(), None);
418        assert!(
419            result.is_err(),
420            "Loading from missing snapshot should return error, not panic"
421        );
422    }
423
424    // ====================================================================
425    // ManifestCheck / try_load_manifest tests (MANIFEST_1)
426    // ====================================================================
427
428    /// Removing the manifest file returns `ManifestCheck::Missing` (not `Err`).
429    #[test]
430    fn test_try_load_manifest_missing_returns_missing() {
431        let tmp = TempDir::new().unwrap();
432        let storage = GraphStorage::new(tmp.path());
433
434        // No manifest exists at all — directory not even created.
435        let result = storage.try_load_manifest();
436        assert!(
437            result.is_missing(),
438            "Missing manifest file should return ManifestCheck::Missing, not Err"
439        );
440        assert!(!result.is_present());
441        assert!(!result.is_corrupt());
442    }
443
444    /// Removing the manifest after it was present returns `ManifestCheck::Missing`.
445    #[test]
446    fn test_try_load_manifest_removed_after_creation() {
447        let tmp = TempDir::new().unwrap();
448        let storage = GraphStorage::new(tmp.path());
449
450        // Create directory + manifest
451        std::fs::create_dir_all(storage.graph_dir()).unwrap();
452        let provenance = BuildProvenance::new("0.15.0", "sqry index");
453        let manifest = Manifest::new("/test/path", 10, 20, "sha_initial", provenance);
454        manifest.save(storage.manifest_path()).unwrap();
455
456        // Confirm it loads correctly first
457        assert!(storage.try_load_manifest().is_present());
458
459        // Remove the manifest — simulates rebuild window where manifest is absent
460        std::fs::remove_file(storage.manifest_path()).unwrap();
461
462        let result = storage.try_load_manifest();
463        assert!(
464            result.is_missing(),
465            "Freshness check on removed manifest must return Missing, not Err/Corrupt"
466        );
467    }
468
469    /// A corrupt manifest (invalid JSON) returns `ManifestCheck::Corrupt`.
470    #[test]
471    fn test_try_load_manifest_corrupt_returns_corrupt() {
472        let tmp = TempDir::new().unwrap();
473        let storage = GraphStorage::new(tmp.path());
474
475        std::fs::create_dir_all(storage.graph_dir()).unwrap();
476        // Write invalid JSON
477        std::fs::write(storage.manifest_path(), b"not valid json {{{{").unwrap();
478
479        let result = storage.try_load_manifest();
480        assert!(
481            result.is_corrupt(),
482            "Invalid JSON in manifest should return ManifestCheck::Corrupt"
483        );
484        assert!(!result.is_present());
485        assert!(!result.is_missing());
486    }
487
488    /// A valid manifest returns `ManifestCheck::Present` with correct fields.
489    #[test]
490    fn test_try_load_manifest_valid_returns_present() {
491        let tmp = TempDir::new().unwrap();
492        let storage = GraphStorage::new(tmp.path());
493
494        std::fs::create_dir_all(storage.graph_dir()).unwrap();
495        let provenance = BuildProvenance::new("9.0.0", "sqry index");
496        let original = Manifest::new("/workspace/root", 42, 99, "sha256_test", provenance);
497        original.save(storage.manifest_path()).unwrap();
498
499        match storage.try_load_manifest() {
500            ManifestCheck::Present(m) => {
501                assert_eq!(m.node_count, 42);
502                assert_eq!(m.edge_count, 99);
503                assert_eq!(m.snapshot_sha256, "sha256_test");
504                assert_eq!(m.root_path, "/workspace/root");
505            }
506            ManifestCheck::Missing => panic!("Expected Present, got Missing"),
507            ManifestCheck::Corrupt(e) => panic!("Expected Present, got Corrupt: {e}"),
508        }
509    }
510
511    /// `ManifestCheck::into_manifest()` converts `Present` to `Some`, `Missing`/`Corrupt` to `None`.
512    #[test]
513    fn test_manifest_check_into_manifest() {
514        let tmp = TempDir::new().unwrap();
515        let storage = GraphStorage::new(tmp.path());
516
517        // Missing → None
518        let missing = storage.try_load_manifest();
519        assert!(missing.into_manifest().is_none());
520
521        // Corrupt → None
522        std::fs::create_dir_all(storage.graph_dir()).unwrap();
523        std::fs::write(storage.manifest_path(), b"bad json").unwrap();
524        let corrupt = storage.try_load_manifest();
525        assert!(corrupt.into_manifest().is_none());
526
527        // Present → Some
528        let provenance = BuildProvenance::new("9.0.0", "sqry index");
529        let manifest = Manifest::new("/path", 1, 2, "sha", provenance);
530        manifest.save(storage.manifest_path()).unwrap();
531        let present = storage.try_load_manifest();
532        assert!(present.into_manifest().is_some());
533    }
534}