sqry_core/graph/unified/persistence/mod.rs
1//! Persistence layer for the unified graph architecture.
2//!
3//! This module provides save/load functionality for the unified graph,
4//! enabling efficient serialization and deserialization of the complete
5//! graph state including nodes, edges, strings, files, and indices.
6//!
7//! # Format
8//!
9//! The persistence format is a binary format using postcard serialization:
10//! - Magic bytes: `SQRY_GRAPH_V5` (13 bytes)
11//! - Version header with counts and config provenance
12//! - Serialized components in order
13//!
14//! # Config Provenance
15//!
16//! Starting with V2, the graph header includes `config_provenance` which
17//! records which configuration was used when building the graph. This enables:
18//! - Detecting config drift (config changed since graph was built)
19//! - Tracking CLI/env overrides used during build
20//! - Reproducibility analysis
21//!
22//! # Storage Layout
23//!
24//! The unified graph is stored in the `.sqry/graph/` directory:
25//! ```text
26//! .sqry/graph/
27//! ├── manifest.json # Metadata and checksums
28//! ├── snapshot.sqry # Binary graph snapshot
29//! └── config/ # Configuration files
30//! └── config.json # Build configuration
31//! ```
32//!
33//! # Usage
34//!
35//! ```rust,ignore
36//! use sqry_core::graph::unified::persistence::{GraphStorage, Manifest};
37//! use sqry_core::graph::unified::CodeGraph;
38//! use std::path::Path;
39//!
40//! // Create storage for a project
41//! let storage = GraphStorage::new(Path::new("/path/to/project"));
42//!
43//! // Check if graph exists
44//! if storage.exists() {
45//! let manifest = storage.load_manifest()?;
46//! println!("Graph has {} nodes", manifest.node_count);
47//! }
48//!
49//! // Save graph to disk
50//! let graph = CodeGraph::new();
51//! persistence::save_to_path(&graph, storage.snapshot_path())?;
52//! ```
53
54pub mod format;
55pub mod manifest;
56pub mod snapshot;
57
58use std::path::{Path, PathBuf};
59use std::time::Duration;
60
61pub use format::{GraphHeader, MAGIC_BYTES, VERSION};
62pub use manifest::{
63 BuildProvenance, ConfigProvenance, ConfigProvenanceBuilder, MANIFEST_SCHEMA_VERSION, Manifest,
64 OverrideEntry, OverrideSource, SNAPSHOT_FORMAT_VERSION, compute_config_checksum,
65 default_provenance,
66};
67pub use snapshot::{
68 PersistenceError, check_config_drift, load_from_path, load_header_from_path, save_to_path,
69 save_to_path_with_provenance, validate_snapshot,
70};
71
72// ============================================================================
73// Graph Storage (directory-based storage manager)
74// ============================================================================
75
76/// Directory name for unified graph storage.
77const GRAPH_DIR_NAME: &str = ".sqry/graph";
78
79/// Directory name for analysis artifacts.
80const ANALYSIS_DIR_NAME: &str = ".sqry/analysis";
81
82/// Filename for the graph manifest.
83const MANIFEST_FILE_NAME: &str = "manifest.json";
84
85/// Filename for the graph snapshot.
86const SNAPSHOT_FILE_NAME: &str = "snapshot.sqry";
87
88/// Storage manager for unified graph and analysis files.
89///
90/// `GraphStorage` manages the `.sqry/` directory structure, providing
91/// access to graph files (manifest, snapshot) and analysis artifacts
92/// (CSR, SCC, condensation DAGs).
93///
94/// # Directory Structure
95///
96/// ```text
97/// .sqry/
98/// ├── graph/
99/// │ ├── manifest.json # Graph metadata (node/edge counts, checksums)
100/// │ ├── snapshot.sqry # Binary graph snapshot
101/// │ └── config/ # Build configuration
102/// │ └── config.json # Configuration used during build
103/// └── analysis/
104/// ├── adjacency.csr # CSR adjacency matrix
105/// ├── scc_calls.scc # SCC data for call edges
106/// ├── scc_imports.scc # SCC data for import edges
107/// ├── cond_calls.dag # Condensation DAG for call edges
108/// └── ... # Other edge-kind artifacts
109/// ```
110///
111/// # Example
112///
113/// ```rust,ignore
114/// use sqry_core::graph::unified::persistence::GraphStorage;
115/// use std::path::Path;
116///
117/// let storage = GraphStorage::new(Path::new("/path/to/project"));
118///
119/// if storage.exists() {
120/// let manifest = storage.load_manifest()?;
121/// let age = storage.snapshot_age(&manifest)?;
122/// println!("Graph built {} seconds ago", age.as_secs());
123/// }
124/// ```
125#[derive(Debug, Clone)]
126pub struct GraphStorage {
127 /// Path to the `.sqry/graph/` directory.
128 graph_dir: PathBuf,
129 /// Path to the `.sqry/analysis/` directory.
130 analysis_dir: PathBuf,
131 /// Path to the manifest file.
132 manifest_path: PathBuf,
133 /// Path to the snapshot file.
134 snapshot_path: PathBuf,
135}
136
137impl GraphStorage {
138 /// Creates a new storage manager for the given project root.
139 ///
140 /// # Arguments
141 ///
142 /// * `root_path` - Root directory of the project
143 ///
144 /// # Returns
145 ///
146 /// A `GraphStorage` instance configured for `{root_path}/.sqry/`
147 #[must_use]
148 pub fn new(root_path: &Path) -> Self {
149 let graph_dir = root_path.join(GRAPH_DIR_NAME);
150 let analysis_dir = root_path.join(ANALYSIS_DIR_NAME);
151 Self {
152 manifest_path: graph_dir.join(MANIFEST_FILE_NAME),
153 snapshot_path: graph_dir.join(SNAPSHOT_FILE_NAME),
154 graph_dir,
155 analysis_dir,
156 }
157 }
158
159 /// Returns the path to the `.sqry/graph/` directory.
160 #[must_use]
161 pub fn graph_dir(&self) -> &Path {
162 &self.graph_dir
163 }
164
165 /// Returns the path to the `.sqry/analysis/` directory.
166 #[must_use]
167 pub fn analysis_dir(&self) -> &Path {
168 &self.analysis_dir
169 }
170
171 /// Returns the path to an SCC artifact file for a given edge kind.
172 ///
173 /// Example: `analysis_scc_path("calls")` returns `.sqry/analysis/scc_calls.scc`
174 #[must_use]
175 pub fn analysis_scc_path(&self, edge_kind: &str) -> PathBuf {
176 self.analysis_dir.join(format!("scc_{edge_kind}.scc"))
177 }
178
179 /// Returns the path to a condensation DAG artifact file for a given edge kind.
180 ///
181 /// Example: `analysis_cond_path("calls")` returns `.sqry/analysis/cond_calls.dag`
182 #[must_use]
183 pub fn analysis_cond_path(&self, edge_kind: &str) -> PathBuf {
184 self.analysis_dir.join(format!("cond_{edge_kind}.dag"))
185 }
186
187 /// Returns the path to the CSR adjacency artifact file.
188 #[must_use]
189 pub fn analysis_csr_path(&self) -> PathBuf {
190 self.analysis_dir.join("adjacency.csr")
191 }
192
193 /// Returns the path to the manifest file.
194 #[must_use]
195 pub fn manifest_path(&self) -> &Path {
196 &self.manifest_path
197 }
198
199 /// Returns the path to the snapshot file.
200 #[must_use]
201 pub fn snapshot_path(&self) -> &Path {
202 &self.snapshot_path
203 }
204
205 /// Checks if a unified graph exists (manifest file exists).
206 #[must_use]
207 pub fn exists(&self) -> bool {
208 self.manifest_path.exists()
209 }
210
211 /// Checks if the snapshot file exists.
212 #[must_use]
213 pub fn snapshot_exists(&self) -> bool {
214 self.snapshot_path.exists()
215 }
216
217 /// Loads the graph manifest from disk.
218 ///
219 /// # Errors
220 ///
221 /// Returns an error if the manifest file cannot be read or parsed.
222 pub fn load_manifest(&self) -> std::io::Result<Manifest> {
223 Manifest::load(&self.manifest_path)
224 }
225
226 /// Computes the age of the snapshot based on the manifest timestamp.
227 ///
228 /// # Arguments
229 ///
230 /// * `manifest` - The loaded manifest containing the build timestamp
231 ///
232 /// # Errors
233 ///
234 /// Returns an error if the timestamp cannot be parsed or if system time is invalid.
235 pub fn snapshot_age(&self, manifest: &Manifest) -> std::io::Result<Duration> {
236 // Parse the RFC3339 timestamp from the manifest
237 let built_at = chrono::DateTime::parse_from_rfc3339(&manifest.built_at)
238 .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
239
240 let now = chrono::Utc::now();
241 let duration = now.signed_duration_since(built_at.with_timezone(&chrono::Utc));
242
243 // Convert to std::time::Duration (clamped to non-negative)
244 let seconds = duration.num_seconds().max(0);
245 let seconds = u64::try_from(seconds).unwrap_or(0);
246 Ok(Duration::from_secs(seconds))
247 }
248
249 /// Returns the path to the config directory.
250 #[must_use]
251 pub fn config_dir(&self) -> PathBuf {
252 self.graph_dir.join("config")
253 }
254
255 /// Returns the path to the config file.
256 #[must_use]
257 pub fn config_path(&self) -> PathBuf {
258 self.config_dir().join("config.json")
259 }
260}
261
262#[cfg(test)]
263mod tests {
264 use super::*;
265 use tempfile::TempDir;
266
267 #[test]
268 fn test_graph_storage_paths() {
269 let tmp = TempDir::new().unwrap();
270 let storage = GraphStorage::new(tmp.path());
271
272 assert_eq!(storage.graph_dir(), tmp.path().join(".sqry/graph"));
273 assert_eq!(
274 storage.manifest_path(),
275 tmp.path().join(".sqry/graph/manifest.json")
276 );
277 assert_eq!(
278 storage.snapshot_path(),
279 tmp.path().join(".sqry/graph/snapshot.sqry")
280 );
281 assert!(!storage.exists());
282 assert!(!storage.snapshot_exists());
283 }
284
285 #[test]
286 fn test_graph_storage_exists() {
287 let tmp = TempDir::new().unwrap();
288 let storage = GraphStorage::new(tmp.path());
289
290 // Initially doesn't exist
291 assert!(!storage.exists());
292
293 // Create the directory and manifest
294 std::fs::create_dir_all(storage.graph_dir()).unwrap();
295 std::fs::write(storage.manifest_path(), "{}").unwrap();
296
297 // Now exists
298 assert!(storage.exists());
299 }
300
301 #[test]
302 fn test_manifest_roundtrip() {
303 let tmp = TempDir::new().unwrap();
304 let storage = GraphStorage::new(tmp.path());
305
306 // Create directory
307 std::fs::create_dir_all(storage.graph_dir()).unwrap();
308
309 // Create and save manifest
310 let provenance = BuildProvenance::new("0.15.0", "sqry index");
311 let manifest = Manifest::new("/test/path", 100, 200, "abc123", provenance);
312 manifest.save(storage.manifest_path()).unwrap();
313
314 // Load and verify
315 let loaded = storage.load_manifest().unwrap();
316 assert_eq!(loaded.node_count, 100);
317 assert_eq!(loaded.edge_count, 200);
318 assert_eq!(loaded.snapshot_sha256, "abc123");
319 assert_eq!(loaded.build_provenance.sqry_version, "0.15.0");
320 }
321
322 #[test]
323 fn test_snapshot_age() {
324 let tmp = TempDir::new().unwrap();
325 let storage = GraphStorage::new(tmp.path());
326
327 // Create manifest with current timestamp
328 let provenance = BuildProvenance::new("0.15.0", "sqry index");
329 let manifest = Manifest::new("/test/path", 100, 200, "abc123", provenance);
330
331 // Age should be very small (just created)
332 let age = storage.snapshot_age(&manifest).unwrap();
333 assert!(age.as_secs() < 2, "Age should be less than 2 seconds");
334 }
335
336 /// Regression test (Step 10, #10): Snapshot without manifest → not ready.
337 ///
338 /// Under manifest-last persistence, a snapshot file without manifest means
339 /// the build was interrupted. `storage.exists()` must return false.
340 #[test]
341 fn test_reader_readiness_snapshot_without_manifest() {
342 let tmp = TempDir::new().unwrap();
343 let storage = GraphStorage::new(tmp.path());
344
345 // Create graph directory and snapshot (but no manifest)
346 std::fs::create_dir_all(storage.graph_dir()).unwrap();
347 std::fs::write(storage.snapshot_path(), b"fake snapshot data").unwrap();
348
349 // snapshot_exists() should be true (file exists)
350 assert!(storage.snapshot_exists(), "Snapshot file should exist");
351
352 // exists() should be false (no manifest → not ready)
353 assert!(
354 !storage.exists(),
355 "Index should NOT be ready without manifest (manifest-last ordering)"
356 );
357 }
358
359 /// Regression test (Step 10, #11): Manifest without snapshot → exists() true, load fails gracefully.
360 ///
361 /// Manifest present but snapshot missing indicates corruption. `storage.exists()`
362 /// returns true (manifest present), but `load_from_path()` must fail gracefully
363 /// (error, not panic), so auto-index paths can trigger rebuild.
364 #[test]
365 fn test_reader_readiness_manifest_without_snapshot() {
366 let tmp = TempDir::new().unwrap();
367 let storage = GraphStorage::new(tmp.path());
368
369 // Create graph directory and manifest (but no snapshot)
370 std::fs::create_dir_all(storage.graph_dir()).unwrap();
371 let provenance = BuildProvenance::new("3.6.0", "test");
372 let manifest = Manifest::new(
373 tmp.path().display().to_string(),
374 100,
375 200,
376 "sha256",
377 provenance,
378 );
379 manifest.save(storage.manifest_path()).unwrap();
380
381 // exists() should be true (manifest present)
382 assert!(
383 storage.exists(),
384 "Index should report exists (manifest present)"
385 );
386
387 // snapshot_exists() should be false (no snapshot file)
388 assert!(!storage.snapshot_exists(), "Snapshot should not exist");
389
390 // load_from_path should fail gracefully (error, not panic)
391 let result = load_from_path(storage.snapshot_path(), None);
392 assert!(
393 result.is_err(),
394 "Loading from missing snapshot should return error, not panic"
395 );
396 }
397}