Skip to main content

sqry_core/graph/unified/persistence/
format.rs

1//! Binary format definition for graph persistence.
2//!
3//! This module defines the on-disk format for persisted graphs.
4
5use std::collections::HashMap;
6
7use serde::{Deserialize, Serialize};
8
9use super::manifest::ConfigProvenance;
10
11/// Magic bytes identifying a sqry graph file (legacy alias for V7).
12///
13/// Version history:
14/// - V1: Initial format (bincode)
15/// - V2: Added config provenance support (bincode)
16/// - V3: Added plugin version tracking (bincode)
17/// - V4: Migrated to postcard serialization with length-prefixed framing
18/// - V5: Added `HttpMethod::All` variant for wildcard endpoint matching
19/// - V6: Added `NodeMetadataStore` for macro boundary analysis + `CfgGate` edge kind
20/// - V7: Added classpath NodeKind/EdgeKind variants, `NodeMetadata` enum, `FileEntry.is_external`
21/// - V8 (Phase 1 fact-layer hardening): Adds `GraphHeader.fact_epoch`, dense `NodeProvenanceStore`,
22///   dense `EdgeProvenanceStore`, and `FileEntry` attribution fields (`content_hash`, `indexed_at`,
23///   `source_uri`). The legacy `MAGIC_BYTES` / `VERSION` exports are preserved during Phase 1
24///   to keep existing call sites compiling; later units bump the writer to V8 and treat V7 as
25///   read-only.
26pub const MAGIC_BYTES: &[u8; 13] = b"SQRY_GRAPH_V7";
27
28/// Legacy V7 format version constant, preserved for existing call sites.
29///
30/// See [`CURRENT_VERSION`] / [`FormatVersion`] for the Phase 1+ versioning contract.
31pub const VERSION: u32 = 7;
32
33/// Phase 1 V7 magic bytes (re-export under the versioned name).
34///
35/// Equal to [`MAGIC_BYTES`]; the versioned name makes the legacy path explicit in
36/// reader dispatch logic (`load_from_path` branching on magic bytes).
37pub const MAGIC_BYTES_V7: &[u8; 13] = b"SQRY_GRAPH_V7";
38
39/// Phase 1 V8 magic bytes.
40///
41/// Emitted by the Phase 1 fact-layer writer (P1U06) and accepted by the Phase 1
42/// reader (P1U07). The magic is the sole versioning contract — no in-format
43/// revision counter is introduced.
44pub const MAGIC_BYTES_V8: &[u8; 13] = b"SQRY_GRAPH_V8";
45
46/// Legacy V7 numeric version, exposed with a versioned name so the Phase 1 reader
47/// dispatch can cite it explicitly. Equal to [`VERSION`].
48pub const LEGACY_VERSION_V7: u32 = 7;
49
50/// Typed snapshot format version.
51///
52/// Phase 1 introduces V8 as read/write and preserves V7 as a read-only compatibility
53/// path. Later format additions bump the magic bytes (V9, V10, …) rather than
54/// relying on any in-format revision counter.
55#[repr(u32)]
56#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
57pub enum FormatVersion {
58    /// Legacy V7 — read-only after Phase 1 lands.
59    V7 = 7,
60    /// V8 — read/write after Phase 1.
61    V8 = 8,
62}
63
64impl FormatVersion {
65    /// Returns the magic-byte sequence identifying this format version.
66    #[must_use]
67    pub const fn magic(self) -> &'static [u8; 13] {
68        match self {
69            Self::V7 => MAGIC_BYTES_V7,
70            Self::V8 => MAGIC_BYTES_V8,
71        }
72    }
73
74    /// Returns the numeric version tag (matches the trailing digit of the magic).
75    #[must_use]
76    pub const fn as_u32(self) -> u32 {
77        self as u32
78    }
79
80    /// Parses a magic-byte prefix into a `FormatVersion`.
81    ///
82    /// Returns `None` if the bytes do not match any known format magic.
83    #[must_use]
84    pub fn from_magic(bytes: &[u8]) -> Option<Self> {
85        if bytes.len() < MAGIC_BYTES_V7.len() {
86            return None;
87        }
88        let prefix = &bytes[..MAGIC_BYTES_V7.len()];
89        if prefix == MAGIC_BYTES_V7 {
90            Some(Self::V7)
91        } else if prefix == MAGIC_BYTES_V8 {
92            Some(Self::V8)
93        } else {
94            None
95        }
96    }
97}
98
99/// Current writer format version (Phase 1+: V8).
100pub const CURRENT_VERSION: FormatVersion = FormatVersion::V8;
101
102/// Header for persisted graph files.
103///
104/// The header provides metadata about the graph for validation
105/// and efficient loading.
106#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct GraphHeader {
108    /// Format version (for compatibility checking)
109    pub version: u32,
110
111    /// Number of nodes in the graph
112    pub node_count: usize,
113
114    /// Number of edges in the graph
115    pub edge_count: usize,
116
117    /// Number of interned strings
118    pub string_count: usize,
119
120    /// Number of registered files
121    pub file_count: usize,
122
123    /// Timestamp when graph was saved (unix epoch seconds)
124    pub timestamp: u64,
125
126    /// Configuration provenance - records which config was used to build this graph.
127    #[serde(default)]
128    pub config_provenance: Option<ConfigProvenance>,
129
130    /// Plugin versions used to build this graph (`plugin_id` → version).
131    ///
132    /// Tracks which language plugin versions were active during indexing.
133    /// Used to detect stale indexes when plugin versions change.
134    #[serde(default)]
135    pub plugin_versions: HashMap<String, String>,
136
137    /// Monotonic fact-layer epoch stamped at save time (Phase 1+).
138    ///
139    /// Strictly increases across successive saves of the same snapshot file,
140    /// including across process restarts: the writer reads the existing
141    /// header (if any) before stamping and computes
142    /// `max(prev_epoch + 1, SystemTime::now().as_secs())`.
143    ///
144    /// Defaulted to `0` for V7 snapshots and for `GraphHeader::new` /
145    /// `with_provenance` constructors. The epoch is stamped by the Phase 1
146    /// V8 writer (P1U06); this unit only introduces the field and accessors.
147    ///
148    /// Format: plain `u64`, serde-default `0` so postcard deserialization of
149    /// older headers that did not carry the field continues to succeed.
150    #[serde(default)]
151    pub fact_epoch: u64,
152}
153
154impl GraphHeader {
155    /// Creates a new graph header with the given counts.
156    #[must_use]
157    pub fn new(
158        node_count: usize,
159        edge_count: usize,
160        string_count: usize,
161        file_count: usize,
162    ) -> Self {
163        Self {
164            version: VERSION,
165            node_count,
166            edge_count,
167            string_count,
168            file_count,
169            timestamp: std::time::SystemTime::now()
170                .duration_since(std::time::UNIX_EPOCH)
171                .unwrap_or_default()
172                .as_secs(),
173            config_provenance: None,
174            plugin_versions: HashMap::new(),
175            fact_epoch: 0,
176        }
177    }
178
179    /// Creates a new graph header with config provenance.
180    #[must_use]
181    pub fn with_provenance(
182        node_count: usize,
183        edge_count: usize,
184        string_count: usize,
185        file_count: usize,
186        provenance: ConfigProvenance,
187    ) -> Self {
188        Self {
189            version: VERSION,
190            node_count,
191            edge_count,
192            string_count,
193            file_count,
194            timestamp: std::time::SystemTime::now()
195                .duration_since(std::time::UNIX_EPOCH)
196                .unwrap_or_default()
197                .as_secs(),
198            config_provenance: Some(provenance),
199            plugin_versions: HashMap::new(),
200            fact_epoch: 0,
201        }
202    }
203
204    /// Creates a new graph header with config provenance and plugin versions.
205    #[must_use]
206    pub fn with_provenance_and_plugins(
207        node_count: usize,
208        edge_count: usize,
209        string_count: usize,
210        file_count: usize,
211        provenance: ConfigProvenance,
212        plugin_versions: HashMap<String, String>,
213    ) -> Self {
214        Self {
215            version: VERSION,
216            node_count,
217            edge_count,
218            string_count,
219            file_count,
220            timestamp: std::time::SystemTime::now()
221                .duration_since(std::time::UNIX_EPOCH)
222                .unwrap_or_default()
223                .as_secs(),
224            config_provenance: Some(provenance),
225            plugin_versions,
226            fact_epoch: 0,
227        }
228    }
229
230    /// Returns the config provenance if available.
231    #[must_use]
232    pub fn provenance(&self) -> Option<&ConfigProvenance> {
233        self.config_provenance.as_ref()
234    }
235
236    /// Checks if the graph was built with tracked config provenance.
237    #[must_use]
238    pub fn has_provenance(&self) -> bool {
239        self.config_provenance.is_some()
240    }
241
242    /// Returns the plugin versions used to build this graph.
243    #[must_use]
244    pub fn plugin_versions(&self) -> &HashMap<String, String> {
245        &self.plugin_versions
246    }
247
248    /// Sets the plugin versions for this graph header.
249    pub fn set_plugin_versions(&mut self, versions: HashMap<String, String>) {
250        self.plugin_versions = versions;
251    }
252
253    /// Returns the monotonic fact-layer epoch stamped on this header.
254    ///
255    /// Returns `0` for headers created via `new` / `with_provenance` /
256    /// `with_provenance_and_plugins` before the Phase 1 writer stamps a
257    /// real epoch (P1U06), and for legacy V7 snapshots loaded through the
258    /// backwards-read path (P1U07).
259    #[must_use]
260    pub fn fact_epoch(&self) -> u64 {
261        self.fact_epoch
262    }
263
264    /// Sets the monotonic fact-layer epoch on this header.
265    ///
266    /// Intended for use by the Phase 1 V8 writer (P1U06), which computes
267    /// the epoch via a `FactEpochClock` helper and stamps it immediately
268    /// before serialization. Also used by tests.
269    pub fn set_fact_epoch(&mut self, epoch: u64) {
270        self.fact_epoch = epoch;
271    }
272}
273
274#[cfg(test)]
275mod tests {
276    use super::*;
277    use std::collections::HashMap;
278    use std::path::PathBuf;
279
280    fn make_test_provenance() -> ConfigProvenance {
281        ConfigProvenance {
282            config_file: PathBuf::from(".sqry/graph/config/config.json"),
283            config_checksum: "abc123def456".to_string(),
284            schema_version: 1,
285            overrides: HashMap::new(),
286            build_timestamp: std::time::SystemTime::now()
287                .duration_since(std::time::UNIX_EPOCH)
288                .unwrap_or_default()
289                .as_secs(),
290            build_host: Some("test-host".to_string()),
291        }
292    }
293
294    #[test]
295    fn test_magic_bytes() {
296        assert_eq!(MAGIC_BYTES, b"SQRY_GRAPH_V7");
297        assert_eq!(MAGIC_BYTES.len(), 13);
298    }
299
300    #[test]
301    fn test_version() {
302        assert_eq!(VERSION, 7);
303    }
304
305    #[test]
306    fn test_graph_header_new() {
307        let header = GraphHeader::new(100, 50, 200, 10);
308
309        assert_eq!(header.version, VERSION);
310        assert_eq!(header.node_count, 100);
311        assert_eq!(header.edge_count, 50);
312        assert_eq!(header.string_count, 200);
313        assert_eq!(header.file_count, 10);
314        assert!(header.timestamp > 0);
315        assert!(header.config_provenance.is_none());
316    }
317
318    #[test]
319    fn test_graph_header_with_provenance() {
320        let provenance = make_test_provenance();
321        let header = GraphHeader::with_provenance(100, 50, 200, 10, provenance);
322
323        assert_eq!(header.version, VERSION);
324        assert_eq!(header.node_count, 100);
325        assert_eq!(header.edge_count, 50);
326        assert!(header.config_provenance.is_some());
327        assert_eq!(
328            header.config_provenance.as_ref().unwrap().config_checksum,
329            "abc123def456"
330        );
331    }
332
333    #[test]
334    fn test_graph_header_provenance_method() {
335        let header = GraphHeader::new(10, 5, 20, 2);
336        assert!(header.provenance().is_none());
337
338        let provenance = make_test_provenance();
339        let header_with = GraphHeader::with_provenance(10, 5, 20, 2, provenance);
340        assert!(header_with.provenance().is_some());
341        assert_eq!(
342            header_with.provenance().unwrap().config_checksum,
343            "abc123def456"
344        );
345    }
346
347    #[test]
348    fn test_graph_header_has_provenance() {
349        let header = GraphHeader::new(10, 5, 20, 2);
350        assert!(!header.has_provenance());
351
352        let provenance = make_test_provenance();
353        let header_with = GraphHeader::with_provenance(10, 5, 20, 2, provenance);
354        assert!(header_with.has_provenance());
355    }
356
357    #[test]
358    fn test_graph_header_clone() {
359        let header = GraphHeader::new(100, 50, 200, 10);
360        let cloned = header.clone();
361
362        assert_eq!(header.version, cloned.version);
363        assert_eq!(header.node_count, cloned.node_count);
364        assert_eq!(header.edge_count, cloned.edge_count);
365        assert_eq!(header.string_count, cloned.string_count);
366        assert_eq!(header.file_count, cloned.file_count);
367    }
368
369    #[test]
370    fn test_graph_header_debug() {
371        let header = GraphHeader::new(100, 50, 200, 10);
372        let debug_str = format!("{header:?}");
373
374        assert!(debug_str.contains("GraphHeader"));
375        assert!(debug_str.contains("version"));
376        assert!(debug_str.contains("node_count"));
377    }
378
379    #[test]
380    fn test_graph_header_timestamp_is_recent() {
381        let header = GraphHeader::new(10, 5, 20, 2);
382        let now = std::time::SystemTime::now()
383            .duration_since(std::time::UNIX_EPOCH)
384            .unwrap()
385            .as_secs();
386
387        // Timestamp should be within 1 second of now
388        assert!(header.timestamp <= now);
389        assert!(header.timestamp >= now - 1);
390    }
391
392    #[test]
393    fn test_graph_header_zero_counts() {
394        let header = GraphHeader::new(0, 0, 0, 0);
395
396        assert_eq!(header.node_count, 0);
397        assert_eq!(header.edge_count, 0);
398        assert_eq!(header.string_count, 0);
399        assert_eq!(header.file_count, 0);
400    }
401
402    #[test]
403    fn test_graph_header_large_counts() {
404        let header = GraphHeader::new(1_000_000, 5_000_000, 10_000_000, 100_000);
405
406        assert_eq!(header.node_count, 1_000_000);
407        assert_eq!(header.edge_count, 5_000_000);
408        assert_eq!(header.string_count, 10_000_000);
409        assert_eq!(header.file_count, 100_000);
410    }
411
412    #[test]
413    fn test_graph_header_plugin_versions_empty_by_default() {
414        let header = GraphHeader::new(10, 5, 20, 2);
415        assert!(header.plugin_versions().is_empty());
416    }
417
418    #[test]
419    fn test_graph_header_set_plugin_versions() {
420        let mut header = GraphHeader::new(10, 5, 20, 2);
421
422        let mut versions = HashMap::new();
423        versions.insert("rust".to_string(), "3.3.0".to_string());
424        versions.insert("javascript".to_string(), "3.3.0".to_string());
425
426        header.set_plugin_versions(versions.clone());
427
428        assert_eq!(header.plugin_versions().len(), 2);
429        assert_eq!(
430            header.plugin_versions().get("rust"),
431            Some(&"3.3.0".to_string())
432        );
433        assert_eq!(
434            header.plugin_versions().get("javascript"),
435            Some(&"3.3.0".to_string())
436        );
437    }
438
439    // ------------------------------------------------------------------
440    // Phase 1 P1U02: GraphHeader.fact_epoch (additive u64)
441    // ------------------------------------------------------------------
442
443    #[test]
444    fn phase1_graph_header_new_defaults_fact_epoch_to_zero() {
445        let header = GraphHeader::new(10, 5, 20, 2);
446        assert_eq!(header.fact_epoch, 0);
447        assert_eq!(header.fact_epoch(), 0);
448    }
449
450    #[test]
451    fn phase1_graph_header_with_provenance_defaults_fact_epoch_to_zero() {
452        let header = GraphHeader::with_provenance(10, 5, 20, 2, make_test_provenance());
453        assert_eq!(header.fact_epoch, 0);
454    }
455
456    #[test]
457    fn phase1_graph_header_set_fact_epoch_round_trip() {
458        let mut header = GraphHeader::new(10, 5, 20, 2);
459        header.set_fact_epoch(42);
460        assert_eq!(header.fact_epoch(), 42);
461    }
462
463    #[test]
464    fn phase1_graph_header_postcard_round_trip_with_fact_epoch() {
465        let mut header = GraphHeader::new(100, 50, 200, 10);
466        header.set_fact_epoch(1_234_567);
467
468        let encoded = postcard::to_allocvec(&header).expect("encode");
469        let decoded: GraphHeader = postcard::from_bytes(&encoded).expect("decode");
470
471        assert_eq!(decoded.fact_epoch(), 1_234_567);
472        assert_eq!(decoded.node_count, 100);
473        assert_eq!(decoded.edge_count, 50);
474    }
475
476    #[test]
477    fn phase1_graph_header_fact_epoch_preserved_through_clone() {
478        let mut header = GraphHeader::new(10, 5, 20, 2);
479        header.set_fact_epoch(9_999);
480        let cloned = header.clone();
481        assert_eq!(cloned.fact_epoch(), 9_999);
482    }
483
484    // ------------------------------------------------------------------
485    // Phase 1 P1U01: FormatVersion enum + V7/V8 magic constants
486    // ------------------------------------------------------------------
487
488    #[test]
489    fn phase1_magic_bytes_v7_matches_legacy() {
490        assert_eq!(MAGIC_BYTES_V7, b"SQRY_GRAPH_V7");
491        assert_eq!(MAGIC_BYTES_V7, MAGIC_BYTES);
492        assert_eq!(MAGIC_BYTES_V7.len(), 13);
493    }
494
495    #[test]
496    fn phase1_magic_bytes_v8_is_distinct_and_13_bytes() {
497        assert_eq!(MAGIC_BYTES_V8, b"SQRY_GRAPH_V8");
498        assert_eq!(MAGIC_BYTES_V8.len(), 13);
499        assert_ne!(MAGIC_BYTES_V8, MAGIC_BYTES_V7);
500    }
501
502    #[test]
503    fn phase1_legacy_version_v7_equals_seven() {
504        assert_eq!(LEGACY_VERSION_V7, 7);
505    }
506
507    #[test]
508    fn phase1_format_version_discriminants() {
509        assert_eq!(FormatVersion::V7 as u32, 7);
510        assert_eq!(FormatVersion::V8 as u32, 8);
511    }
512
513    #[test]
514    fn phase1_current_version_is_v8() {
515        assert_eq!(CURRENT_VERSION, FormatVersion::V8);
516    }
517
518    #[test]
519    fn phase1_format_version_from_magic_v7() {
520        assert_eq!(
521            FormatVersion::from_magic(MAGIC_BYTES_V7),
522            Some(FormatVersion::V7),
523        );
524    }
525
526    #[test]
527    fn phase1_format_version_from_magic_v8() {
528        assert_eq!(
529            FormatVersion::from_magic(MAGIC_BYTES_V8),
530            Some(FormatVersion::V8),
531        );
532    }
533
534    #[test]
535    fn phase1_format_version_from_magic_unknown() {
536        assert_eq!(FormatVersion::from_magic(b"SQRY_GRAPH_V1"), None);
537        assert_eq!(FormatVersion::from_magic(b"NOT_A_GRAPH_!"), None);
538    }
539
540    #[test]
541    fn phase1_format_version_magic_round_trip() {
542        for version in [FormatVersion::V7, FormatVersion::V8] {
543            let bytes = version.magic();
544            assert_eq!(FormatVersion::from_magic(bytes), Some(version));
545        }
546    }
547
548    #[test]
549    fn phase1_format_version_copy_eq_debug() {
550        let v = FormatVersion::V8;
551        let copied = v;
552        assert_eq!(v, copied);
553        assert_eq!(format!("{v:?}"), "V8");
554    }
555
556    #[test]
557    fn test_graph_header_with_provenance_and_plugins() {
558        let provenance = make_test_provenance();
559
560        let mut plugin_versions = HashMap::new();
561        plugin_versions.insert("rust".to_string(), "3.3.0".to_string());
562        plugin_versions.insert("python".to_string(), "3.3.0".to_string());
563
564        let header = GraphHeader::with_provenance_and_plugins(
565            100,
566            50,
567            200,
568            10,
569            provenance,
570            plugin_versions.clone(),
571        );
572
573        assert_eq!(header.version, VERSION);
574        assert_eq!(header.node_count, 100);
575        assert!(header.config_provenance.is_some());
576        assert_eq!(header.plugin_versions().len(), 2);
577        assert_eq!(
578            header.plugin_versions().get("rust"),
579            Some(&"3.3.0".to_string())
580        );
581    }
582}