Skip to main content

sqry_core/graph/unified/persistence/
format.rs

1//! Binary format definition for graph persistence.
2//!
3//! This module defines the on-disk format for persisted graphs.
4
5use std::collections::HashMap;
6
7use serde::{Deserialize, Serialize};
8
9use super::manifest::ConfigProvenance;
10
11/// Magic bytes identifying a sqry graph file (legacy alias for V7).
12///
13/// Version history:
14/// - V1: Initial format (bincode)
15/// - V2: Added config provenance support (bincode)
16/// - V3: Added plugin version tracking (bincode)
17/// - V4: Migrated to postcard serialization with length-prefixed framing
18/// - V5: Added `HttpMethod::All` variant for wildcard endpoint matching
19/// - V6: Added `NodeMetadataStore` for macro boundary analysis + `CfgGate` edge kind
20/// - V7: Added classpath NodeKind/EdgeKind variants, `NodeMetadata` enum, `FileEntry.is_external`
21/// - V8 (Phase 1 fact-layer hardening): Adds `GraphHeader.fact_epoch`, dense `NodeProvenanceStore`,
22///   dense `EdgeProvenanceStore`, and `FileEntry` attribution fields (`content_hash`, `indexed_at`,
23///   `source_uri`). The legacy `MAGIC_BYTES` / `VERSION` exports are preserved during Phase 1
24///   to keep existing call sites compiling; later units bump the writer to V8 and treat V7 as
25///   read-only.
26pub const MAGIC_BYTES: &[u8; 13] = b"SQRY_GRAPH_V7";
27
28/// Legacy V7 format version constant, preserved for existing call sites.
29///
30/// See [`CURRENT_VERSION`] / [`FormatVersion`] for the Phase 1+ versioning contract.
31pub const VERSION: u32 = 7;
32
33/// Phase 1 V7 magic bytes (re-export under the versioned name).
34///
35/// Equal to [`MAGIC_BYTES`]; the versioned name makes the legacy path explicit in
36/// reader dispatch logic (`load_from_path` branching on magic bytes).
37pub const MAGIC_BYTES_V7: &[u8; 13] = b"SQRY_GRAPH_V7";
38
39/// Phase 1 V8 magic bytes.
40///
41/// Emitted by the Phase 1 fact-layer writer (P1U06) and accepted by the Phase 1
42/// reader (P1U07). The magic is the sole versioning contract — no in-format
43/// revision counter is introduced.
44pub const MAGIC_BYTES_V8: &[u8; 13] = b"SQRY_GRAPH_V8";
45
46/// Phase 2 V9 magic bytes.
47///
48/// Emitted by the Phase 2 binding-plane writer (P2U12) and accepted by the V9
49/// reader. V9 extends V8 with `ScopeArena`, `AliasTable`, `ShadowTable`, and
50/// `ScopeProvenanceStore` fields. V8 snapshots are upconverted to V9 inline on
51/// load by running `derive_binding_plane`.
52pub const MAGIC_BYTES_V9: &[u8; 13] = b"SQRY_GRAPH_V9";
53
54/// Phase 3 V10 magic bytes.
55///
56/// Emitted by the Phase 3 derived-db writer (DB03) and accepted by the V10
57/// reader. V10 extends V9 with `FileSegmentTable`. V9 snapshots are
58/// upconverted to V10 inline on load by rebuilding the segment table from
59/// the node arena.
60pub const MAGIC_BYTES_V10: &[u8; 14] = b"SQRY_GRAPH_V10";
61
62/// Legacy V7 numeric version, exposed with a versioned name so the Phase 1 reader
63/// dispatch can cite it explicitly. Equal to [`VERSION`].
64pub const LEGACY_VERSION_V7: u32 = 7;
65
66/// Typed snapshot format version.
67///
68/// Phase 1 introduces V8 as read/write and preserves V7 as a read-only compatibility
69/// path. Phase 2 introduces V9 as read/write and preserves V8 as an upconvert path.
70/// Later format additions bump the magic bytes (V10, …) rather than relying on any
71/// in-format revision counter.
72#[repr(u32)]
73#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
74pub enum FormatVersion {
75    /// Legacy V7 — read-only after Phase 1 lands.
76    V7 = 7,
77    /// V8 — read/write after Phase 1, upconvert source after Phase 2.
78    V8 = 8,
79    /// V9 — read/write after Phase 2 (binding plane: `ScopeArena`, `AliasTable`,
80    /// `ShadowTable`, `ScopeProvenanceStore`). V8 snapshots are upconverted to V9
81    /// inline on load by running `derive_binding_plane`.
82    V9 = 9,
83    /// V10 — read/write after Phase 3 (derived DB: `FileSegmentTable`). V9
84    /// snapshots are upconverted to V10 inline on load by rebuilding the
85    /// segment table from the node arena.
86    V10 = 10,
87}
88
89impl FormatVersion {
90    /// Returns the magic-byte sequence identifying this format version.
91    #[must_use]
92    pub const fn magic(self) -> &'static [u8] {
93        match self {
94            Self::V7 => MAGIC_BYTES_V7.as_slice(),
95            Self::V8 => MAGIC_BYTES_V8.as_slice(),
96            Self::V9 => MAGIC_BYTES_V9.as_slice(),
97            Self::V10 => MAGIC_BYTES_V10.as_slice(),
98        }
99    }
100
101    /// Returns the numeric version tag (matches the trailing digit of the magic).
102    #[must_use]
103    pub const fn as_u32(self) -> u32 {
104        self as u32
105    }
106
107    /// Parses a magic-byte prefix into a `FormatVersion`.
108    ///
109    /// Returns `None` if the bytes do not match any known format magic.
110    #[must_use]
111    pub fn from_magic(bytes: &[u8]) -> Option<Self> {
112        // V10 magic is 14 bytes; check it first since it's the longest.
113        if bytes.len() >= MAGIC_BYTES_V10.len()
114            && bytes[..MAGIC_BYTES_V10.len()] == *MAGIC_BYTES_V10
115        {
116            return Some(Self::V10);
117        }
118        if bytes.len() < MAGIC_BYTES_V7.len() {
119            return None;
120        }
121        let prefix = &bytes[..MAGIC_BYTES_V7.len()];
122        if prefix == MAGIC_BYTES_V7 {
123            Some(Self::V7)
124        } else if prefix == MAGIC_BYTES_V8 {
125            Some(Self::V8)
126        } else if prefix == MAGIC_BYTES_V9 {
127            Some(Self::V9)
128        } else {
129            None
130        }
131    }
132}
133
134/// Current writer format version (Phase 3+: V10).
135pub const CURRENT_VERSION: FormatVersion = FormatVersion::V10;
136
137/// Header for persisted graph files.
138///
139/// The header provides metadata about the graph for validation
140/// and efficient loading.
141#[derive(Debug, Clone, Serialize, Deserialize)]
142pub struct GraphHeader {
143    /// Format version (for compatibility checking)
144    pub version: u32,
145
146    /// Number of nodes in the graph
147    pub node_count: usize,
148
149    /// Number of edges in the graph
150    pub edge_count: usize,
151
152    /// Number of interned strings
153    pub string_count: usize,
154
155    /// Number of registered files
156    pub file_count: usize,
157
158    /// Timestamp when graph was saved (unix epoch seconds)
159    pub timestamp: u64,
160
161    /// Configuration provenance - records which config was used to build this graph.
162    #[serde(default)]
163    pub config_provenance: Option<ConfigProvenance>,
164
165    /// Plugin versions used to build this graph (`plugin_id` → version).
166    ///
167    /// Tracks which language plugin versions were active during indexing.
168    /// Used to detect stale indexes when plugin versions change.
169    #[serde(default)]
170    pub plugin_versions: HashMap<String, String>,
171
172    /// Monotonic fact-layer epoch stamped at save time (Phase 1+).
173    ///
174    /// Strictly increases across successive saves of the same snapshot file,
175    /// including across process restarts: the writer reads the existing
176    /// header (if any) before stamping and computes
177    /// `max(prev_epoch + 1, SystemTime::now().as_secs())`.
178    ///
179    /// Defaulted to `0` for V7 snapshots and for `GraphHeader::new` /
180    /// `with_provenance` constructors. The epoch is stamped by the Phase 1
181    /// V8 writer (P1U06); this unit only introduces the field and accessors.
182    ///
183    /// Format: plain `u64`, serde-default `0` so postcard deserialization of
184    /// older headers that did not carry the field continues to succeed.
185    #[serde(default)]
186    pub fact_epoch: u64,
187}
188
189impl GraphHeader {
190    /// Creates a new graph header with the given counts.
191    #[must_use]
192    pub fn new(
193        node_count: usize,
194        edge_count: usize,
195        string_count: usize,
196        file_count: usize,
197    ) -> Self {
198        Self {
199            version: VERSION,
200            node_count,
201            edge_count,
202            string_count,
203            file_count,
204            timestamp: std::time::SystemTime::now()
205                .duration_since(std::time::UNIX_EPOCH)
206                .unwrap_or_default()
207                .as_secs(),
208            config_provenance: None,
209            plugin_versions: HashMap::new(),
210            fact_epoch: 0,
211        }
212    }
213
214    /// Creates a new graph header with config provenance.
215    #[must_use]
216    pub fn with_provenance(
217        node_count: usize,
218        edge_count: usize,
219        string_count: usize,
220        file_count: usize,
221        provenance: ConfigProvenance,
222    ) -> Self {
223        Self {
224            version: VERSION,
225            node_count,
226            edge_count,
227            string_count,
228            file_count,
229            timestamp: std::time::SystemTime::now()
230                .duration_since(std::time::UNIX_EPOCH)
231                .unwrap_or_default()
232                .as_secs(),
233            config_provenance: Some(provenance),
234            plugin_versions: HashMap::new(),
235            fact_epoch: 0,
236        }
237    }
238
239    /// Creates a new graph header with config provenance and plugin versions.
240    #[must_use]
241    pub fn with_provenance_and_plugins(
242        node_count: usize,
243        edge_count: usize,
244        string_count: usize,
245        file_count: usize,
246        provenance: ConfigProvenance,
247        plugin_versions: HashMap<String, String>,
248    ) -> Self {
249        Self {
250            version: VERSION,
251            node_count,
252            edge_count,
253            string_count,
254            file_count,
255            timestamp: std::time::SystemTime::now()
256                .duration_since(std::time::UNIX_EPOCH)
257                .unwrap_or_default()
258                .as_secs(),
259            config_provenance: Some(provenance),
260            plugin_versions,
261            fact_epoch: 0,
262        }
263    }
264
265    /// Returns the config provenance if available.
266    #[must_use]
267    pub fn provenance(&self) -> Option<&ConfigProvenance> {
268        self.config_provenance.as_ref()
269    }
270
271    /// Checks if the graph was built with tracked config provenance.
272    #[must_use]
273    pub fn has_provenance(&self) -> bool {
274        self.config_provenance.is_some()
275    }
276
277    /// Returns the plugin versions used to build this graph.
278    #[must_use]
279    pub fn plugin_versions(&self) -> &HashMap<String, String> {
280        &self.plugin_versions
281    }
282
283    /// Sets the plugin versions for this graph header.
284    pub fn set_plugin_versions(&mut self, versions: HashMap<String, String>) {
285        self.plugin_versions = versions;
286    }
287
288    /// Returns the monotonic fact-layer epoch stamped on this header.
289    ///
290    /// Returns `0` for headers created via `new` / `with_provenance` /
291    /// `with_provenance_and_plugins` before the Phase 1 writer stamps a
292    /// real epoch (P1U06), and for legacy V7 snapshots loaded through the
293    /// backwards-read path (P1U07).
294    #[must_use]
295    pub fn fact_epoch(&self) -> u64 {
296        self.fact_epoch
297    }
298
299    /// Sets the monotonic fact-layer epoch on this header.
300    ///
301    /// Intended for use by the Phase 1 V8 writer (P1U06), which computes
302    /// the epoch via a `FactEpochClock` helper and stamps it immediately
303    /// before serialization. Also used by tests.
304    pub fn set_fact_epoch(&mut self, epoch: u64) {
305        self.fact_epoch = epoch;
306    }
307}
308
309#[cfg(test)]
310mod tests {
311    use super::*;
312    use std::collections::HashMap;
313    use std::path::PathBuf;
314
315    fn make_test_provenance() -> ConfigProvenance {
316        ConfigProvenance {
317            config_file: PathBuf::from(".sqry/graph/config/config.json"),
318            config_checksum: "abc123def456".to_string(),
319            schema_version: 1,
320            overrides: HashMap::new(),
321            build_timestamp: std::time::SystemTime::now()
322                .duration_since(std::time::UNIX_EPOCH)
323                .unwrap_or_default()
324                .as_secs(),
325            build_host: Some("test-host".to_string()),
326        }
327    }
328
329    #[test]
330    fn test_magic_bytes() {
331        assert_eq!(MAGIC_BYTES, b"SQRY_GRAPH_V7");
332        assert_eq!(MAGIC_BYTES.len(), 13);
333    }
334
335    #[test]
336    fn test_version() {
337        assert_eq!(VERSION, 7);
338    }
339
340    #[test]
341    fn test_graph_header_new() {
342        let header = GraphHeader::new(100, 50, 200, 10);
343
344        assert_eq!(header.version, VERSION);
345        assert_eq!(header.node_count, 100);
346        assert_eq!(header.edge_count, 50);
347        assert_eq!(header.string_count, 200);
348        assert_eq!(header.file_count, 10);
349        assert!(header.timestamp > 0);
350        assert!(header.config_provenance.is_none());
351    }
352
353    #[test]
354    fn test_graph_header_with_provenance() {
355        let provenance = make_test_provenance();
356        let header = GraphHeader::with_provenance(100, 50, 200, 10, provenance);
357
358        assert_eq!(header.version, VERSION);
359        assert_eq!(header.node_count, 100);
360        assert_eq!(header.edge_count, 50);
361        assert!(header.config_provenance.is_some());
362        assert_eq!(
363            header.config_provenance.as_ref().unwrap().config_checksum,
364            "abc123def456"
365        );
366    }
367
368    #[test]
369    fn test_graph_header_provenance_method() {
370        let header = GraphHeader::new(10, 5, 20, 2);
371        assert!(header.provenance().is_none());
372
373        let provenance = make_test_provenance();
374        let header_with = GraphHeader::with_provenance(10, 5, 20, 2, provenance);
375        assert!(header_with.provenance().is_some());
376        assert_eq!(
377            header_with.provenance().unwrap().config_checksum,
378            "abc123def456"
379        );
380    }
381
382    #[test]
383    fn test_graph_header_has_provenance() {
384        let header = GraphHeader::new(10, 5, 20, 2);
385        assert!(!header.has_provenance());
386
387        let provenance = make_test_provenance();
388        let header_with = GraphHeader::with_provenance(10, 5, 20, 2, provenance);
389        assert!(header_with.has_provenance());
390    }
391
392    #[test]
393    fn test_graph_header_clone() {
394        let header = GraphHeader::new(100, 50, 200, 10);
395        let cloned = header.clone();
396
397        assert_eq!(header.version, cloned.version);
398        assert_eq!(header.node_count, cloned.node_count);
399        assert_eq!(header.edge_count, cloned.edge_count);
400        assert_eq!(header.string_count, cloned.string_count);
401        assert_eq!(header.file_count, cloned.file_count);
402    }
403
404    #[test]
405    fn test_graph_header_debug() {
406        let header = GraphHeader::new(100, 50, 200, 10);
407        let debug_str = format!("{header:?}");
408
409        assert!(debug_str.contains("GraphHeader"));
410        assert!(debug_str.contains("version"));
411        assert!(debug_str.contains("node_count"));
412    }
413
414    #[test]
415    fn test_graph_header_timestamp_is_recent() {
416        let header = GraphHeader::new(10, 5, 20, 2);
417        let now = std::time::SystemTime::now()
418            .duration_since(std::time::UNIX_EPOCH)
419            .unwrap()
420            .as_secs();
421
422        // Timestamp should be within 1 second of now
423        assert!(header.timestamp <= now);
424        assert!(header.timestamp >= now - 1);
425    }
426
427    #[test]
428    fn test_graph_header_zero_counts() {
429        let header = GraphHeader::new(0, 0, 0, 0);
430
431        assert_eq!(header.node_count, 0);
432        assert_eq!(header.edge_count, 0);
433        assert_eq!(header.string_count, 0);
434        assert_eq!(header.file_count, 0);
435    }
436
437    #[test]
438    fn test_graph_header_large_counts() {
439        let header = GraphHeader::new(1_000_000, 5_000_000, 10_000_000, 100_000);
440
441        assert_eq!(header.node_count, 1_000_000);
442        assert_eq!(header.edge_count, 5_000_000);
443        assert_eq!(header.string_count, 10_000_000);
444        assert_eq!(header.file_count, 100_000);
445    }
446
447    #[test]
448    fn test_graph_header_plugin_versions_empty_by_default() {
449        let header = GraphHeader::new(10, 5, 20, 2);
450        assert!(header.plugin_versions().is_empty());
451    }
452
453    #[test]
454    fn test_graph_header_set_plugin_versions() {
455        let mut header = GraphHeader::new(10, 5, 20, 2);
456
457        let mut versions = HashMap::new();
458        versions.insert("rust".to_string(), "3.3.0".to_string());
459        versions.insert("javascript".to_string(), "3.3.0".to_string());
460
461        header.set_plugin_versions(versions.clone());
462
463        assert_eq!(header.plugin_versions().len(), 2);
464        assert_eq!(
465            header.plugin_versions().get("rust"),
466            Some(&"3.3.0".to_string())
467        );
468        assert_eq!(
469            header.plugin_versions().get("javascript"),
470            Some(&"3.3.0".to_string())
471        );
472    }
473
474    // ------------------------------------------------------------------
475    // Phase 1 P1U02: GraphHeader.fact_epoch (additive u64)
476    // ------------------------------------------------------------------
477
478    #[test]
479    fn phase1_graph_header_new_defaults_fact_epoch_to_zero() {
480        let header = GraphHeader::new(10, 5, 20, 2);
481        assert_eq!(header.fact_epoch, 0);
482        assert_eq!(header.fact_epoch(), 0);
483    }
484
485    #[test]
486    fn phase1_graph_header_with_provenance_defaults_fact_epoch_to_zero() {
487        let header = GraphHeader::with_provenance(10, 5, 20, 2, make_test_provenance());
488        assert_eq!(header.fact_epoch, 0);
489    }
490
491    #[test]
492    fn phase1_graph_header_set_fact_epoch_round_trip() {
493        let mut header = GraphHeader::new(10, 5, 20, 2);
494        header.set_fact_epoch(42);
495        assert_eq!(header.fact_epoch(), 42);
496    }
497
498    #[test]
499    fn phase1_graph_header_postcard_round_trip_with_fact_epoch() {
500        let mut header = GraphHeader::new(100, 50, 200, 10);
501        header.set_fact_epoch(1_234_567);
502
503        let encoded = postcard::to_allocvec(&header).expect("encode");
504        let decoded: GraphHeader = postcard::from_bytes(&encoded).expect("decode");
505
506        assert_eq!(decoded.fact_epoch(), 1_234_567);
507        assert_eq!(decoded.node_count, 100);
508        assert_eq!(decoded.edge_count, 50);
509    }
510
511    #[test]
512    fn phase1_graph_header_fact_epoch_preserved_through_clone() {
513        let mut header = GraphHeader::new(10, 5, 20, 2);
514        header.set_fact_epoch(9_999);
515        let cloned = header.clone();
516        assert_eq!(cloned.fact_epoch(), 9_999);
517    }
518
519    // ------------------------------------------------------------------
520    // Phase 1 P1U01: FormatVersion enum + V7/V8 magic constants
521    // ------------------------------------------------------------------
522
523    #[test]
524    fn phase1_magic_bytes_v7_matches_legacy() {
525        assert_eq!(MAGIC_BYTES_V7, b"SQRY_GRAPH_V7");
526        assert_eq!(MAGIC_BYTES_V7, MAGIC_BYTES);
527        assert_eq!(MAGIC_BYTES_V7.len(), 13);
528    }
529
530    #[test]
531    fn phase1_magic_bytes_v8_is_distinct_and_13_bytes() {
532        assert_eq!(MAGIC_BYTES_V8, b"SQRY_GRAPH_V8");
533        assert_eq!(MAGIC_BYTES_V8.len(), 13);
534        assert_ne!(MAGIC_BYTES_V8, MAGIC_BYTES_V7);
535    }
536
537    #[test]
538    fn phase1_legacy_version_v7_equals_seven() {
539        assert_eq!(LEGACY_VERSION_V7, 7);
540    }
541
542    #[test]
543    fn phase1_format_version_discriminants() {
544        assert_eq!(FormatVersion::V7 as u32, 7);
545        assert_eq!(FormatVersion::V8 as u32, 8);
546        assert_eq!(FormatVersion::V9 as u32, 9);
547    }
548
549    #[test]
550    fn current_version_is_v10() {
551        assert_eq!(CURRENT_VERSION, FormatVersion::V10);
552    }
553
554    #[test]
555    fn phase1_format_version_from_magic_v7() {
556        assert_eq!(
557            FormatVersion::from_magic(MAGIC_BYTES_V7),
558            Some(FormatVersion::V7),
559        );
560    }
561
562    #[test]
563    fn phase1_format_version_from_magic_v8() {
564        assert_eq!(
565            FormatVersion::from_magic(MAGIC_BYTES_V8),
566            Some(FormatVersion::V8),
567        );
568    }
569
570    #[test]
571    fn phase2_magic_bytes_v9_is_distinct_and_13_bytes() {
572        assert_eq!(MAGIC_BYTES_V9, b"SQRY_GRAPH_V9");
573        assert_eq!(MAGIC_BYTES_V9.len(), 13);
574        assert_ne!(MAGIC_BYTES_V9, MAGIC_BYTES_V7);
575        assert_ne!(MAGIC_BYTES_V9, MAGIC_BYTES_V8);
576    }
577
578    #[test]
579    fn phase2_format_version_from_magic_v9() {
580        assert_eq!(
581            FormatVersion::from_magic(MAGIC_BYTES_V9),
582            Some(FormatVersion::V9),
583        );
584    }
585
586    #[test]
587    fn phase1_format_version_from_magic_unknown() {
588        assert_eq!(FormatVersion::from_magic(b"SQRY_GRAPH_V1"), None);
589        assert_eq!(FormatVersion::from_magic(b"NOT_A_GRAPH_!"), None);
590    }
591
592    #[test]
593    fn phase1_format_version_magic_round_trip() {
594        for version in [FormatVersion::V7, FormatVersion::V8, FormatVersion::V9] {
595            let bytes = version.magic();
596            assert_eq!(FormatVersion::from_magic(bytes), Some(version));
597        }
598    }
599
600    #[test]
601    fn phase1_format_version_copy_eq_debug() {
602        let v = FormatVersion::V8;
603        let copied = v;
604        assert_eq!(v, copied);
605        assert_eq!(format!("{v:?}"), "V8");
606    }
607
608    #[test]
609    fn phase2_format_version_v9_copy_eq_debug() {
610        let v = FormatVersion::V9;
611        let copied = v;
612        assert_eq!(v, copied);
613        assert_eq!(format!("{v:?}"), "V9");
614    }
615
616    #[test]
617    fn test_graph_header_with_provenance_and_plugins() {
618        let provenance = make_test_provenance();
619
620        let mut plugin_versions = HashMap::new();
621        plugin_versions.insert("rust".to_string(), "3.3.0".to_string());
622        plugin_versions.insert("python".to_string(), "3.3.0".to_string());
623
624        let header = GraphHeader::with_provenance_and_plugins(
625            100,
626            50,
627            200,
628            10,
629            provenance,
630            plugin_versions.clone(),
631        );
632
633        assert_eq!(header.version, VERSION);
634        assert_eq!(header.node_count, 100);
635        assert!(header.config_provenance.is_some());
636        assert_eq!(header.plugin_versions().len(), 2);
637        assert_eq!(
638            header.plugin_versions().get("rust"),
639            Some(&"3.3.0".to_string())
640        );
641    }
642}