sqry_core/graph/unified/persistence/mod.rs
1//! Persistence layer for the unified graph architecture.
2//!
3//! This module provides save/load functionality for the unified graph,
4//! enabling efficient serialization and deserialization of the complete
5//! graph state including nodes, edges, strings, files, and indices.
6//!
7//! # Format
8//!
9//! The persistence format is a binary format using postcard serialization:
10//! - Magic bytes: `SQRY_GRAPH_V5` (13 bytes)
11//! - Version header with counts and config provenance
12//! - Serialized components in order
13//!
14//! # Config Provenance
15//!
16//! Starting with V2, the graph header includes `config_provenance` which
17//! records which configuration was used when building the graph. This enables:
18//! - Detecting config drift (config changed since graph was built)
19//! - Tracking CLI/env overrides used during build
20//! - Reproducibility analysis
21//!
22//! # Storage Layout
23//!
24//! The unified graph is stored in the `.sqry/graph/` directory:
25//! ```text
26//! .sqry/graph/
27//! ├── manifest.json # Metadata and checksums
28//! ├── snapshot.sqry # Binary graph snapshot
29//! └── config/ # Configuration files
30//! └── config.json # Build configuration
31//! ```
32//!
33//! # Usage
34//!
35//! ```rust,ignore
36//! use sqry_core::graph::unified::persistence::{GraphStorage, Manifest};
37//! use sqry_core::graph::unified::CodeGraph;
38//! use std::path::Path;
39//!
40//! // Create storage for a project
41//! let storage = GraphStorage::new(Path::new("/path/to/project"));
42//!
43//! // Check if graph exists
44//! if storage.exists() {
45//! let manifest = storage.load_manifest()?;
46//! println!("Graph has {} nodes", manifest.node_count);
47//! }
48//!
49//! // Save graph to disk
50//! let graph = CodeGraph::new();
51//! persistence::save_to_path(&graph, storage.snapshot_path())?;
52//! ```
53
54pub mod format;
55pub mod manifest;
56pub mod snapshot;
57
58use std::path::{Path, PathBuf};
59use std::time::Duration;
60
61pub use format::{
62 FormatVersion, GraphHeader, MAGIC_BYTES, MAGIC_BYTES_V7, MAGIC_BYTES_V8, VERSION,
63};
64pub use manifest::{
65 BuildProvenance, ConfigProvenance, ConfigProvenanceBuilder, MANIFEST_SCHEMA_VERSION, Manifest,
66 OverrideEntry, OverrideSource, PluginSelectionManifest, SNAPSHOT_FORMAT_VERSION,
67 compute_config_checksum, default_provenance,
68};
69pub use snapshot::{
70 PersistenceError, check_config_drift, load_from_bytes, load_from_path, load_header_from_path,
71 save_to_path, save_to_path_with_provenance, validate_snapshot, verify_snapshot_bytes,
72};
73
74// ============================================================================
75// Graph Storage (directory-based storage manager)
76// ============================================================================
77
78/// Directory name for unified graph storage.
79const GRAPH_DIR_NAME: &str = ".sqry/graph";
80
81/// Directory name for analysis artifacts.
82const ANALYSIS_DIR_NAME: &str = ".sqry/analysis";
83
84/// Filename for the graph manifest.
85const MANIFEST_FILE_NAME: &str = "manifest.json";
86
87/// Filename for the graph snapshot.
88const SNAPSHOT_FILE_NAME: &str = "snapshot.sqry";
89
90/// Storage manager for unified graph and analysis files.
91///
92/// `GraphStorage` manages the `.sqry/` directory structure, providing
93/// access to graph files (manifest, snapshot) and analysis artifacts
94/// (CSR, SCC, condensation DAGs).
95///
96/// # Directory Structure
97///
98/// ```text
99/// .sqry/
100/// ├── graph/
101/// │ ├── manifest.json # Graph metadata (node/edge counts, checksums)
102/// │ ├── snapshot.sqry # Binary graph snapshot
103/// │ └── config/ # Build configuration
104/// │ └── config.json # Configuration used during build
105/// └── analysis/
106/// ├── adjacency.csr # CSR adjacency matrix
107/// ├── scc_calls.scc # SCC data for call edges
108/// ├── scc_imports.scc # SCC data for import edges
109/// ├── cond_calls.dag # Condensation DAG for call edges
110/// └── ... # Other edge-kind artifacts
111/// ```
112///
113/// # Example
114///
115/// ```rust,ignore
116/// use sqry_core::graph::unified::persistence::GraphStorage;
117/// use std::path::Path;
118///
119/// let storage = GraphStorage::new(Path::new("/path/to/project"));
120///
121/// if storage.exists() {
122/// let manifest = storage.load_manifest()?;
123/// let age = storage.snapshot_age(&manifest)?;
124/// println!("Graph built {} seconds ago", age.as_secs());
125/// }
126/// ```
127#[derive(Debug, Clone)]
128pub struct GraphStorage {
129 /// Path to the `.sqry/graph/` directory.
130 graph_dir: PathBuf,
131 /// Path to the `.sqry/analysis/` directory.
132 analysis_dir: PathBuf,
133 /// Path to the manifest file.
134 manifest_path: PathBuf,
135 /// Path to the snapshot file.
136 snapshot_path: PathBuf,
137}
138
139impl GraphStorage {
140 /// Creates a new storage manager for the given project root.
141 ///
142 /// # Arguments
143 ///
144 /// * `root_path` - Root directory of the project
145 ///
146 /// # Returns
147 ///
148 /// A `GraphStorage` instance configured for `{root_path}/.sqry/`
149 #[must_use]
150 pub fn new(root_path: &Path) -> Self {
151 let graph_dir = root_path.join(GRAPH_DIR_NAME);
152 let analysis_dir = root_path.join(ANALYSIS_DIR_NAME);
153 Self {
154 manifest_path: graph_dir.join(MANIFEST_FILE_NAME),
155 snapshot_path: graph_dir.join(SNAPSHOT_FILE_NAME),
156 graph_dir,
157 analysis_dir,
158 }
159 }
160
161 /// Returns the path to the `.sqry/graph/` directory.
162 #[must_use]
163 pub fn graph_dir(&self) -> &Path {
164 &self.graph_dir
165 }
166
167 /// Returns the path to the `.sqry/analysis/` directory.
168 #[must_use]
169 pub fn analysis_dir(&self) -> &Path {
170 &self.analysis_dir
171 }
172
173 /// Returns the path to an SCC artifact file for a given edge kind.
174 ///
175 /// Example: `analysis_scc_path("calls")` returns `.sqry/analysis/scc_calls.scc`
176 #[must_use]
177 pub fn analysis_scc_path(&self, edge_kind: &str) -> PathBuf {
178 self.analysis_dir.join(format!("scc_{edge_kind}.scc"))
179 }
180
181 /// Returns the path to a condensation DAG artifact file for a given edge kind.
182 ///
183 /// Example: `analysis_cond_path("calls")` returns `.sqry/analysis/cond_calls.dag`
184 #[must_use]
185 pub fn analysis_cond_path(&self, edge_kind: &str) -> PathBuf {
186 self.analysis_dir.join(format!("cond_{edge_kind}.dag"))
187 }
188
189 /// Returns the path to the CSR adjacency artifact file.
190 #[must_use]
191 pub fn analysis_csr_path(&self) -> PathBuf {
192 self.analysis_dir.join("adjacency.csr")
193 }
194
195 /// Returns the path to the manifest file.
196 #[must_use]
197 pub fn manifest_path(&self) -> &Path {
198 &self.manifest_path
199 }
200
201 /// Returns the path to the snapshot file.
202 #[must_use]
203 pub fn snapshot_path(&self) -> &Path {
204 &self.snapshot_path
205 }
206
207 /// Checks if a unified graph exists (manifest file exists).
208 #[must_use]
209 pub fn exists(&self) -> bool {
210 self.manifest_path.exists()
211 }
212
213 /// Checks if the snapshot file exists.
214 #[must_use]
215 pub fn snapshot_exists(&self) -> bool {
216 self.snapshot_path.exists()
217 }
218
219 /// Loads the graph manifest from disk.
220 ///
221 /// # Errors
222 ///
223 /// Returns an error if the manifest file cannot be read or parsed.
224 pub fn load_manifest(&self) -> std::io::Result<Manifest> {
225 Manifest::load(&self.manifest_path)
226 }
227
228 /// Computes the age of the snapshot based on the manifest timestamp.
229 ///
230 /// # Arguments
231 ///
232 /// * `manifest` - The loaded manifest containing the build timestamp
233 ///
234 /// # Errors
235 ///
236 /// Returns an error if the timestamp cannot be parsed or if system time is invalid.
237 pub fn snapshot_age(&self, manifest: &Manifest) -> std::io::Result<Duration> {
238 // Parse the RFC3339 timestamp from the manifest
239 let built_at = chrono::DateTime::parse_from_rfc3339(&manifest.built_at)
240 .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
241
242 let now = chrono::Utc::now();
243 let duration = now.signed_duration_since(built_at.with_timezone(&chrono::Utc));
244
245 // Convert to std::time::Duration (clamped to non-negative)
246 let seconds = duration.num_seconds().max(0);
247 let seconds = u64::try_from(seconds).unwrap_or(0);
248 Ok(Duration::from_secs(seconds))
249 }
250
251 /// Returns the path to the config directory.
252 #[must_use]
253 pub fn config_dir(&self) -> PathBuf {
254 self.graph_dir.join("config")
255 }
256
257 /// Returns the path to the config file.
258 #[must_use]
259 pub fn config_path(&self) -> PathBuf {
260 self.config_dir().join("config.json")
261 }
262}
263
264#[cfg(test)]
265mod tests {
266 use super::*;
267 use tempfile::TempDir;
268
269 #[test]
270 fn test_graph_storage_paths() {
271 let tmp = TempDir::new().unwrap();
272 let storage = GraphStorage::new(tmp.path());
273
274 assert_eq!(storage.graph_dir(), tmp.path().join(".sqry/graph"));
275 assert_eq!(
276 storage.manifest_path(),
277 tmp.path().join(".sqry/graph/manifest.json")
278 );
279 assert_eq!(
280 storage.snapshot_path(),
281 tmp.path().join(".sqry/graph/snapshot.sqry")
282 );
283 assert!(!storage.exists());
284 assert!(!storage.snapshot_exists());
285 }
286
287 #[test]
288 fn test_graph_storage_exists() {
289 let tmp = TempDir::new().unwrap();
290 let storage = GraphStorage::new(tmp.path());
291
292 // Initially doesn't exist
293 assert!(!storage.exists());
294
295 // Create the directory and manifest
296 std::fs::create_dir_all(storage.graph_dir()).unwrap();
297 std::fs::write(storage.manifest_path(), "{}").unwrap();
298
299 // Now exists
300 assert!(storage.exists());
301 }
302
303 #[test]
304 fn test_manifest_roundtrip() {
305 let tmp = TempDir::new().unwrap();
306 let storage = GraphStorage::new(tmp.path());
307
308 // Create directory
309 std::fs::create_dir_all(storage.graph_dir()).unwrap();
310
311 // Create and save manifest
312 let provenance = BuildProvenance::new("0.15.0", "sqry index");
313 let manifest = Manifest::new("/test/path", 100, 200, "abc123", provenance);
314 manifest.save(storage.manifest_path()).unwrap();
315
316 // Load and verify
317 let loaded = storage.load_manifest().unwrap();
318 assert_eq!(loaded.node_count, 100);
319 assert_eq!(loaded.edge_count, 200);
320 assert_eq!(loaded.snapshot_sha256, "abc123");
321 assert_eq!(loaded.build_provenance.sqry_version, "0.15.0");
322 }
323
324 #[test]
325 fn test_snapshot_age() {
326 let tmp = TempDir::new().unwrap();
327 let storage = GraphStorage::new(tmp.path());
328
329 // Create manifest with current timestamp
330 let provenance = BuildProvenance::new("0.15.0", "sqry index");
331 let manifest = Manifest::new("/test/path", 100, 200, "abc123", provenance);
332
333 // Age should be very small (just created)
334 let age = storage.snapshot_age(&manifest).unwrap();
335 assert!(age.as_secs() < 2, "Age should be less than 2 seconds");
336 }
337
338 /// Regression test (Step 10, #10): Snapshot without manifest → not ready.
339 ///
340 /// Under manifest-last persistence, a snapshot file without manifest means
341 /// the build was interrupted. `storage.exists()` must return false.
342 #[test]
343 fn test_reader_readiness_snapshot_without_manifest() {
344 let tmp = TempDir::new().unwrap();
345 let storage = GraphStorage::new(tmp.path());
346
347 // Create graph directory and snapshot (but no manifest)
348 std::fs::create_dir_all(storage.graph_dir()).unwrap();
349 std::fs::write(storage.snapshot_path(), b"fake snapshot data").unwrap();
350
351 // snapshot_exists() should be true (file exists)
352 assert!(storage.snapshot_exists(), "Snapshot file should exist");
353
354 // exists() should be false (no manifest → not ready)
355 assert!(
356 !storage.exists(),
357 "Index should NOT be ready without manifest (manifest-last ordering)"
358 );
359 }
360
361 /// Regression test (Step 10, #11): Manifest without snapshot → `exists()` true, load fails gracefully.
362 ///
363 /// Manifest present but snapshot missing indicates corruption. `storage.exists()`
364 /// returns true (manifest present), but `load_from_path()` must fail gracefully
365 /// (error, not panic), so auto-index paths can trigger rebuild.
366 #[test]
367 fn test_reader_readiness_manifest_without_snapshot() {
368 let tmp = TempDir::new().unwrap();
369 let storage = GraphStorage::new(tmp.path());
370
371 // Create graph directory and manifest (but no snapshot)
372 std::fs::create_dir_all(storage.graph_dir()).unwrap();
373 let provenance = BuildProvenance::new("3.6.0", "test");
374 let manifest = Manifest::new(
375 tmp.path().display().to_string(),
376 100,
377 200,
378 "sha256",
379 provenance,
380 );
381 manifest.save(storage.manifest_path()).unwrap();
382
383 // exists() should be true (manifest present)
384 assert!(
385 storage.exists(),
386 "Index should report exists (manifest present)"
387 );
388
389 // snapshot_exists() should be false (no snapshot file)
390 assert!(!storage.snapshot_exists(), "Snapshot should not exist");
391
392 // load_from_path should fail gracefully (error, not panic)
393 let result = load_from_path(storage.snapshot_path(), None);
394 assert!(
395 result.is_err(),
396 "Loading from missing snapshot should return error, not panic"
397 );
398 }
399}