Skip to main content

sqry_core/graph/unified/concurrent/
graph.rs

1//! `CodeGraph` and `ConcurrentCodeGraph` implementations.
2//!
3//! This module provides the core graph types with thread-safe access:
4//!
5//! - [`CodeGraph`]: Arc-wrapped internals for O(1) `CoW` snapshots
6//! - [`ConcurrentCodeGraph`]: `RwLock` wrapper with epoch versioning
7//! - [`GraphSnapshot`]: Immutable snapshot for long-running queries
8
9use std::collections::{HashMap, HashSet};
10use std::fmt;
11use std::sync::Arc;
12use std::sync::atomic::{AtomicU64, Ordering};
13
14use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard};
15
16use crate::confidence::ConfidenceMetadata;
17use crate::graph::unified::bind::alias::AliasTable;
18use crate::graph::unified::bind::scope::provenance::{
19    ScopeProvenance, ScopeProvenanceStore, ScopeStableId,
20};
21use crate::graph::unified::bind::scope::{ScopeArena, ScopeId};
22use crate::graph::unified::bind::shadow::ShadowTable;
23use crate::graph::unified::edge::EdgeKind;
24use crate::graph::unified::edge::bidirectional::BidirectionalEdgeStore;
25use crate::graph::unified::file::FileId;
26use crate::graph::unified::memory::{GraphMemorySize, HASHMAP_ENTRY_OVERHEAD};
27use crate::graph::unified::storage::arena::NodeArena;
28use crate::graph::unified::storage::edge_provenance::{EdgeProvenance, EdgeProvenanceStore};
29use crate::graph::unified::storage::indices::AuxiliaryIndices;
30use crate::graph::unified::storage::interner::StringInterner;
31use crate::graph::unified::storage::metadata::NodeMetadataStore;
32use crate::graph::unified::storage::node_provenance::{NodeProvenance, NodeProvenanceStore};
33use crate::graph::unified::storage::registry::{FileProvenanceView, FileRegistry};
34use crate::graph::unified::storage::segment::FileSegmentTable;
35use crate::graph::unified::string::id::StringId;
36
37/// Core graph with Arc-wrapped internals for O(1) `CoW` snapshots.
38///
39/// `CodeGraph` uses `Arc` for all internal components, enabling:
40/// - O(1) snapshot creation via Arc cloning
41/// - Copy-on-write semantics via `Arc::make_mut`
42/// - Memory-efficient sharing between snapshots
43///
44/// # Design
45///
46/// The Arc wrapping enables the MVCC pattern:
47/// - Readers see a consistent snapshot at the time they acquired access
48/// - Writers use `Arc::make_mut` to get exclusive copies only when mutating
49/// - Multiple snapshots can coexist without copying data
50///
51/// # Performance
52///
53/// - Snapshot creation: O(5) Arc clones ≈ <1μs
54/// - Read access: Direct Arc dereference, no locking
55/// - Write access: `Arc::make_mut` clones only if refcount > 1
56///
57/// # Phase 2 binding-plane access
58///
59/// Use the two-line snapshot pattern to access `BindingPlane`:
60///
61/// ```rust,ignore
62/// let snapshot = graph.snapshot();
63/// let plane = snapshot.binding_plane();
64/// let resolution = plane.resolve(&query);
65/// ```
66///
67/// The two-line form is intentional: `BindingPlane<'g>` borrows from
68/// `GraphSnapshot` and the explicit snapshot handle makes the MVCC lifetime
69/// visible at the call site. The full Phase 2 scope/alias/shadow and
70/// witness-bearing resolution API is exposed through `BindingPlane`.
71// Field visibility is `pub(crate)` so the Gate 0c `rebuild_graph` module
72// (A2 §H) can destructure `CodeGraph` exhaustively in `clone_for_rebuild`.
73// External crates still go through the public accessor methods below.
74#[derive(Clone)]
75pub struct CodeGraph {
76    /// Node storage with generational indices.
77    pub(crate) nodes: Arc<NodeArena>,
78    /// Bidirectional edge storage (forward + reverse).
79    pub(crate) edges: Arc<BidirectionalEdgeStore>,
80    /// String interner for symbol names.
81    pub(crate) strings: Arc<StringInterner>,
82    /// File registry for path deduplication.
83    pub(crate) files: Arc<FileRegistry>,
84    /// Auxiliary indices for fast lookup.
85    pub(crate) indices: Arc<AuxiliaryIndices>,
86    /// Sparse macro boundary metadata (keyed by full `NodeId`).
87    pub(crate) macro_metadata: Arc<NodeMetadataStore>,
88    /// Dense node provenance (Phase 1 fact layer).
89    pub(crate) node_provenance: Arc<NodeProvenanceStore>,
90    /// Dense edge provenance (Phase 1 fact layer).
91    pub(crate) edge_provenance: Arc<EdgeProvenanceStore>,
92    /// Monotonic fact-layer epoch (0 until set by the V8 persistence path).
93    pub(crate) fact_epoch: u64,
94    /// Epoch for version tracking.
95    pub(crate) epoch: u64,
96    /// Per-language confidence metadata collected during build.
97    /// Maps language name (e.g., "rust") to aggregated confidence.
98    pub(crate) confidence: HashMap<String, ConfidenceMetadata>,
99    /// Phase 2 binding-plane scope arena (populated by Phase 4e).
100    pub(crate) scope_arena: Arc<ScopeArena>,
101    /// Phase 2 binding-plane alias table (populated by Phase 4e / P2U04).
102    pub(crate) alias_table: Arc<AliasTable>,
103    /// Phase 2 binding-plane shadow table (populated by Phase 4e / P2U05).
104    pub(crate) shadow_table: Arc<ShadowTable>,
105    /// Phase 2 binding-plane scope provenance store (populated by Phase 4e / P2U11).
106    pub(crate) scope_provenance_store: Arc<ScopeProvenanceStore>,
107    /// Phase 3 file segment table mapping `FileId` to contiguous node ranges.
108    /// Populated during build Phase 3 parallel commit, persisted in V10+ snapshots.
109    pub(crate) file_segments: Arc<FileSegmentTable>,
110}
111
112impl CodeGraph {
113    /// Creates a new empty `CodeGraph`.
114    ///
115    /// # Example
116    ///
117    /// ```rust
118    /// use sqry_core::graph::unified::concurrent::CodeGraph;
119    ///
120    /// let graph = CodeGraph::new();
121    /// assert_eq!(graph.epoch(), 0);
122    /// ```
123    #[must_use]
124    pub fn new() -> Self {
125        Self {
126            nodes: Arc::new(NodeArena::new()),
127            edges: Arc::new(BidirectionalEdgeStore::new()),
128            strings: Arc::new(StringInterner::new()),
129            files: Arc::new(FileRegistry::new()),
130            indices: Arc::new(AuxiliaryIndices::new()),
131            macro_metadata: Arc::new(NodeMetadataStore::new()),
132            node_provenance: Arc::new(NodeProvenanceStore::new()),
133            edge_provenance: Arc::new(EdgeProvenanceStore::new()),
134            fact_epoch: 0,
135            epoch: 0,
136            confidence: HashMap::new(),
137            scope_arena: Arc::new(ScopeArena::new()),
138            alias_table: Arc::new(AliasTable::new()),
139            shadow_table: Arc::new(ShadowTable::new()),
140            scope_provenance_store: Arc::new(ScopeProvenanceStore::new()),
141            file_segments: Arc::new(FileSegmentTable::new()),
142        }
143    }
144
145    /// Creates a `CodeGraph` from existing components.
146    ///
147    /// This is useful when building a graph from external data or
148    /// reconstructing from serialized state.
149    #[must_use]
150    pub fn from_components(
151        nodes: NodeArena,
152        edges: BidirectionalEdgeStore,
153        strings: StringInterner,
154        files: FileRegistry,
155        indices: AuxiliaryIndices,
156        macro_metadata: NodeMetadataStore,
157    ) -> Self {
158        Self {
159            nodes: Arc::new(nodes),
160            edges: Arc::new(edges),
161            strings: Arc::new(strings),
162            files: Arc::new(files),
163            indices: Arc::new(indices),
164            macro_metadata: Arc::new(macro_metadata),
165            node_provenance: Arc::new(NodeProvenanceStore::new()),
166            edge_provenance: Arc::new(EdgeProvenanceStore::new()),
167            fact_epoch: 0,
168            epoch: 0,
169            confidence: HashMap::new(),
170            scope_arena: Arc::new(ScopeArena::new()),
171            alias_table: Arc::new(AliasTable::new()),
172            shadow_table: Arc::new(ShadowTable::new()),
173            scope_provenance_store: Arc::new(ScopeProvenanceStore::new()),
174            file_segments: Arc::new(FileSegmentTable::new()),
175        }
176    }
177
178    /// Creates a cheap snapshot of the graph.
179    ///
180    /// This operation is O(5) Arc clones, which completes in <1μs.
181    /// The snapshot is isolated from future mutations to the original graph.
182    ///
183    /// # Example
184    ///
185    /// ```rust
186    /// use sqry_core::graph::unified::concurrent::CodeGraph;
187    ///
188    /// let graph = CodeGraph::new();
189    /// let snapshot = graph.snapshot();
190    /// // snapshot is independent of future mutations to graph
191    /// ```
192    #[must_use]
193    pub fn snapshot(&self) -> GraphSnapshot {
194        GraphSnapshot {
195            nodes: Arc::clone(&self.nodes),
196            edges: Arc::clone(&self.edges),
197            strings: Arc::clone(&self.strings),
198            files: Arc::clone(&self.files),
199            indices: Arc::clone(&self.indices),
200            macro_metadata: Arc::clone(&self.macro_metadata),
201            node_provenance: Arc::clone(&self.node_provenance),
202            edge_provenance: Arc::clone(&self.edge_provenance),
203            fact_epoch: self.fact_epoch,
204            epoch: self.epoch,
205            scope_arena: Arc::clone(&self.scope_arena),
206            alias_table: Arc::clone(&self.alias_table),
207            shadow_table: Arc::clone(&self.shadow_table),
208            scope_provenance_store: Arc::clone(&self.scope_provenance_store),
209            file_segments: Arc::clone(&self.file_segments),
210        }
211    }
212
213    /// Returns a reference to the node arena.
214    #[inline]
215    #[must_use]
216    pub fn nodes(&self) -> &NodeArena {
217        &self.nodes
218    }
219
220    /// Returns a reference to the bidirectional edge store.
221    #[inline]
222    #[must_use]
223    pub fn edges(&self) -> &BidirectionalEdgeStore {
224        &self.edges
225    }
226
227    /// Returns a reference to the string interner.
228    #[inline]
229    #[must_use]
230    pub fn strings(&self) -> &StringInterner {
231        &self.strings
232    }
233
234    /// Returns a reference to the file registry.
235    #[inline]
236    #[must_use]
237    pub fn files(&self) -> &FileRegistry {
238        &self.files
239    }
240
241    /// Returns a reference to the auxiliary indices.
242    #[inline]
243    #[must_use]
244    pub fn indices(&self) -> &AuxiliaryIndices {
245        &self.indices
246    }
247
248    /// Returns a reference to the macro boundary metadata store.
249    #[inline]
250    #[must_use]
251    pub fn macro_metadata(&self) -> &NodeMetadataStore {
252        &self.macro_metadata
253    }
254
255    // ------------------------------------------------------------------
256    // Phase 1 fact-layer provenance accessors (P1U09).
257    // ------------------------------------------------------------------
258
259    /// Returns the monotonic fact-layer epoch stamped on the most recently
260    /// saved or loaded snapshot. Returns `0` for graphs that have not been
261    /// persisted yet or were loaded from V7 snapshots.
262    #[inline]
263    #[must_use]
264    pub fn fact_epoch(&self) -> u64 {
265        self.fact_epoch
266    }
267
268    /// Looks up node provenance by `NodeId`.
269    ///
270    /// Returns `None` if the `NodeId` is out of range, the slot is vacant,
271    /// or the stored generation does not match (stale handle).
272    #[inline]
273    #[must_use]
274    pub fn node_provenance(
275        &self,
276        id: crate::graph::unified::node::id::NodeId,
277    ) -> Option<&NodeProvenance> {
278        self.node_provenance.lookup(id)
279    }
280
281    /// Looks up edge provenance by `EdgeId`.
282    ///
283    /// Returns `None` if the `EdgeId` is out of range, the slot is vacant,
284    /// or the edge is the invalid sentinel.
285    #[inline]
286    #[must_use]
287    pub fn edge_provenance(
288        &self,
289        id: crate::graph::unified::edge::id::EdgeId,
290    ) -> Option<&EdgeProvenance> {
291        self.edge_provenance.lookup(id)
292    }
293
294    /// Returns a borrowed provenance view for a file.
295    ///
296    /// Returns `None` for invalid/unregistered `FileId`s.
297    #[inline]
298    #[must_use]
299    pub fn file_provenance(
300        &self,
301        id: crate::graph::unified::file::id::FileId,
302    ) -> Option<FileProvenanceView<'_>> {
303        self.files.file_provenance(id)
304    }
305
306    // ------------------------------------------------------------------
307    // Phase 2 binding-plane accessors (P2U03).
308    // ------------------------------------------------------------------
309
310    /// Returns a reference to the scope arena derived during Phase 4e.
311    ///
312    /// The arena is empty on freshly-constructed `CodeGraph` instances and is
313    /// populated by calling `phase4e_binding::derive_binding_plane`.
314    #[inline]
315    #[must_use]
316    pub fn scope_arena(&self) -> &ScopeArena {
317        &self.scope_arena
318    }
319
320    /// Installs a freshly-derived scope arena.
321    ///
322    /// Called from `phase4e_binding::derive_binding_plane` during the build
323    /// pipeline. External callers that run Phase 4e manually (e.g., test
324    /// fixture builders) use this to store the result.
325    pub(crate) fn set_scope_arena(&mut self, arena: ScopeArena) {
326        self.scope_arena = Arc::new(arena);
327    }
328
329    /// Returns a reference to the alias table derived during Phase 4e.
330    ///
331    /// The table is empty on freshly-constructed `CodeGraph` instances and is
332    /// populated by calling `phase4e_binding::derive_binding_plane`.
333    #[inline]
334    #[must_use]
335    pub fn alias_table(&self) -> &AliasTable {
336        &self.alias_table
337    }
338
339    /// Installs a freshly-derived alias table.
340    ///
341    /// Called from `phase4e_binding::derive_binding_plane` during the build
342    /// pipeline. External callers that run Phase 4e manually (e.g., test
343    /// fixture builders) use this to store the result.
344    pub(crate) fn set_alias_table(&mut self, table: AliasTable) {
345        self.alias_table = Arc::new(table);
346    }
347
348    /// Returns a reference to the shadow table derived during Phase 4e.
349    ///
350    /// The table is empty on freshly-constructed `CodeGraph` instances and is
351    /// populated by calling `phase4e_binding::derive_binding_plane`.
352    #[inline]
353    #[must_use]
354    pub fn shadow_table(&self) -> &ShadowTable {
355        &self.shadow_table
356    }
357
358    /// Installs a freshly-derived shadow table.
359    ///
360    /// Called from `phase4e_binding::derive_binding_plane` during the build
361    /// pipeline. External callers that run Phase 4e manually (e.g., test
362    /// fixture builders) use this to store the result.
363    pub(crate) fn set_shadow_table(&mut self, table: ShadowTable) {
364        self.shadow_table = Arc::new(table);
365    }
366
367    /// Returns a reference to the scope provenance store derived during Phase 4e.
368    ///
369    /// The store is empty on freshly-constructed `CodeGraph` instances and is
370    /// populated by calling `phase4e_binding::derive_binding_plane`.
371    #[inline]
372    #[must_use]
373    pub fn scope_provenance_store(&self) -> &ScopeProvenanceStore {
374        &self.scope_provenance_store
375    }
376
377    /// Looks up scope provenance by `ScopeId`.
378    ///
379    /// Returns `None` if the slot is out of range, vacant, or the stored
380    /// generation does not match (stale handle).
381    #[inline]
382    #[must_use]
383    pub fn scope_provenance(&self, id: ScopeId) -> Option<&ScopeProvenance> {
384        self.scope_provenance_store.lookup(id)
385    }
386
387    /// Looks up the live `ScopeId` for a stable scope identity.
388    ///
389    /// Returns `None` if no provenance record is registered for that stable id.
390    /// The reverse index is populated by `insert` during Phase 4e and must be
391    /// rebuilt after V9 deserialization.
392    #[inline]
393    #[must_use]
394    pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
395        self.scope_provenance_store.scope_by_stable_id(stable)
396    }
397
398    /// Installs a freshly-derived scope provenance store.
399    ///
400    /// Called from `phase4e_binding::derive_binding_plane` during the build
401    /// pipeline. External callers that run Phase 4e manually (e.g., test
402    /// fixture builders) use this to store the result.
403    pub(crate) fn set_scope_provenance_store(&mut self, store: ScopeProvenanceStore) {
404        self.scope_provenance_store = Arc::new(store);
405    }
406
407    /// Returns a reference to the file segment table.
408    #[inline]
409    #[must_use]
410    pub fn file_segments(&self) -> &FileSegmentTable {
411        &self.file_segments
412    }
413
414    /// Replaces the file segment table.
415    pub(crate) fn set_file_segments(&mut self, table: FileSegmentTable) {
416        self.file_segments = Arc::new(table);
417    }
418
419    /// Returns a mutable reference to the file segment table (via `Arc::make_mut`).
420    pub(crate) fn file_segments_mut(&mut self) -> &mut FileSegmentTable {
421        Arc::make_mut(&mut self.file_segments)
422    }
423
424    /// Test-only helper that records a file segment directly on the
425    /// graph, bypassing the full Phase 3 commit pipeline. Only
426    /// available under `#[cfg(feature = "rebuild-internals")]` so the
427    /// surface is opt-in for rebuild-plane consumers (the feature is
428    /// whitelisted to sqry-daemon + sqry-core integration tests; see
429    /// `sqry-core/tests/rebuild_internals_whitelist.rs`).
430    ///
431    /// Integration tests (notably
432    /// `sqry-core/tests/incremental_remove_file_scale.rs`) call this
433    /// to seed the synthetic workspaces they build before exercising
434    /// `RebuildGraph::remove_file` / `CodeGraph::remove_file`. Production
435    /// code paths never touch this method — Phase 3 parallel commit
436    /// is the sole production writer, via the crate-internal
437    /// `file_segments_mut` accessor above.
438    ///
439    /// Renamed with `test_only_` prefix so the purpose is unambiguous
440    /// at every call site; `#[doc(hidden)]` hides it from rendered
441    /// rustdoc so downstream daemon integrations don't discover it by
442    /// accident.
443    #[cfg(feature = "rebuild-internals")]
444    #[doc(hidden)]
445    pub fn test_only_record_file_segment(
446        &mut self,
447        file_id: FileId,
448        start_slot: u32,
449        slot_count: u32,
450    ) {
451        Arc::make_mut(&mut self.file_segments).record_range(file_id, start_slot, slot_count);
452    }
453
454    /// Sets the provenance stores and fact epoch, typically called by the
455    /// persistence loader after deserializing a V8 snapshot.
456    pub(crate) fn set_provenance(
457        &mut self,
458        node_provenance: NodeProvenanceStore,
459        edge_provenance: EdgeProvenanceStore,
460        fact_epoch: u64,
461    ) {
462        self.node_provenance = Arc::new(node_provenance);
463        self.edge_provenance = Arc::new(edge_provenance);
464        self.fact_epoch = fact_epoch;
465    }
466
467    /// Returns the current epoch.
468    #[inline]
469    #[must_use]
470    pub fn epoch(&self) -> u64 {
471        self.epoch
472    }
473
474    /// Returns a mutable reference to the node arena.
475    ///
476    /// Uses `Arc::make_mut` for copy-on-write semantics: if other
477    /// references exist (e.g., snapshots), the data is cloned.
478    #[inline]
479    pub fn nodes_mut(&mut self) -> &mut NodeArena {
480        Arc::make_mut(&mut self.nodes)
481    }
482
483    /// Returns a mutable reference to the bidirectional edge store.
484    ///
485    /// Uses `Arc::make_mut` for copy-on-write semantics.
486    #[inline]
487    pub fn edges_mut(&mut self) -> &mut BidirectionalEdgeStore {
488        Arc::make_mut(&mut self.edges)
489    }
490
491    /// Returns a mutable reference to the string interner.
492    ///
493    /// Uses `Arc::make_mut` for copy-on-write semantics.
494    #[inline]
495    pub fn strings_mut(&mut self) -> &mut StringInterner {
496        Arc::make_mut(&mut self.strings)
497    }
498
499    /// Returns a mutable reference to the file registry.
500    ///
501    /// Uses `Arc::make_mut` for copy-on-write semantics.
502    #[inline]
503    pub fn files_mut(&mut self) -> &mut FileRegistry {
504        Arc::make_mut(&mut self.files)
505    }
506
507    /// Returns a mutable reference to the auxiliary indices.
508    ///
509    /// Uses `Arc::make_mut` for copy-on-write semantics.
510    #[inline]
511    pub fn indices_mut(&mut self) -> &mut AuxiliaryIndices {
512        Arc::make_mut(&mut self.indices)
513    }
514
515    /// Returns a mutable reference to the macro boundary metadata store.
516    ///
517    /// Uses `Arc::make_mut` for copy-on-write semantics.
518    #[inline]
519    pub fn macro_metadata_mut(&mut self) -> &mut NodeMetadataStore {
520        Arc::make_mut(&mut self.macro_metadata)
521    }
522
523    /// Returns mutable references to both the node arena and the string interner.
524    ///
525    /// This avoids the borrow-conflict that arises when calling `nodes_mut()` and
526    /// `strings_mut()` separately on `&mut self`.
527    #[inline]
528    pub fn nodes_and_strings_mut(&mut self) -> (&mut NodeArena, &mut StringInterner) {
529        (
530            Arc::make_mut(&mut self.nodes),
531            Arc::make_mut(&mut self.strings),
532        )
533    }
534
535    /// Rebuilds auxiliary indices from the current node arena.
536    ///
537    /// This avoids the borrow conflict that arises when calling `nodes()` and
538    /// `indices_mut()` separately on `&mut self`. Uses disjoint field borrowing
539    /// to access `nodes` (shared) and `indices` (mutable) simultaneously.
540    /// Internally calls `AuxiliaryIndices::build_from_arena` which clears
541    /// existing indices and rebuilds in a single pass without per-element
542    /// duplicate checking.
543    ///
544    /// As of Task 4 Step 4 Phase 2 this inherent method delegates to the
545    /// generic
546    /// [`crate::graph::unified::build::parallel_commit::rebuild_indices`]
547    /// free function so the same implementation serves both the
548    /// full-build (`CodeGraph`) and incremental-rebuild (`RebuildGraph`)
549    /// pipelines. Call sites that hold a concrete `CodeGraph` can keep
550    /// using `graph.rebuild_indices()`; incremental rebuild call sites
551    /// should use the free function directly.
552    pub fn rebuild_indices(&mut self) {
553        crate::graph::unified::build::parallel_commit::rebuild_indices(self);
554    }
555
556    /// Increments the epoch counter and returns the new value.
557    ///
558    /// Called automatically by `ConcurrentCodeGraph::write()`.
559    #[inline]
560    pub fn bump_epoch(&mut self) -> u64 {
561        self.epoch = self.epoch.wrapping_add(1);
562        self.epoch
563    }
564
565    /// Sets the epoch to a specific value.
566    ///
567    /// This is primarily for testing or reconstruction from serialized state.
568    #[inline]
569    pub fn set_epoch(&mut self, epoch: u64) {
570        self.epoch = epoch;
571    }
572
573    /// Returns the number of nodes in the graph.
574    ///
575    /// This is a convenience method that delegates to `nodes().len()`.
576    ///
577    /// # Example
578    ///
579    /// ```rust
580    /// use sqry_core::graph::unified::concurrent::CodeGraph;
581    ///
582    /// let graph = CodeGraph::new();
583    /// assert_eq!(graph.node_count(), 0);
584    /// ```
585    #[inline]
586    #[must_use]
587    pub fn node_count(&self) -> usize {
588        self.nodes.len()
589    }
590
591    /// Returns the number of edges in the graph (forward direction).
592    ///
593    /// This counts edges in the forward store, including both CSR and delta edges.
594    ///
595    /// # Example
596    ///
597    /// ```rust
598    /// use sqry_core::graph::unified::concurrent::CodeGraph;
599    ///
600    /// let graph = CodeGraph::new();
601    /// assert_eq!(graph.edge_count(), 0);
602    /// ```
603    #[inline]
604    #[must_use]
605    pub fn edge_count(&self) -> usize {
606        let stats = self.edges.stats();
607        stats.forward.csr_edge_count + stats.forward.delta_edge_count
608    }
609
610    /// Returns true if the graph contains no nodes.
611    ///
612    /// This is a convenience method that delegates to `nodes().is_empty()`.
613    ///
614    /// # Example
615    ///
616    /// ```rust
617    /// use sqry_core::graph::unified::concurrent::CodeGraph;
618    ///
619    /// let graph = CodeGraph::new();
620    /// assert!(graph.is_empty());
621    /// ```
622    #[inline]
623    #[must_use]
624    pub fn is_empty(&self) -> bool {
625        self.nodes.is_empty()
626    }
627
628    /// Returns an iterator over all indexed file paths.
629    ///
630    /// This is useful for enumerating all files that have been processed
631    /// and added to the graph.
632    ///
633    /// # Example
634    ///
635    /// ```rust
636    /// use sqry_core::graph::unified::concurrent::CodeGraph;
637    ///
638    /// let graph = CodeGraph::new();
639    /// for (file_id, path) in graph.indexed_files() {
640    ///     println!("File {}: {}", file_id.index(), path.display());
641    /// }
642    /// ```
643    #[inline]
644    pub fn indexed_files(
645        &self,
646    ) -> impl Iterator<Item = (crate::graph::unified::file::FileId, &std::path::Path)> {
647        self.files
648            .iter()
649            .map(|(id, arc_path)| (id, arc_path.as_ref()))
650    }
651
652    /// Returns the set of files that import one or more symbols exported by
653    /// `file_id`, deduplicated and sorted ascending.
654    ///
655    /// For every [`EdgeKind::Imports`] edge whose target node lives in
656    /// `file_id`, the source node's [`FileId`] is added to the result. Files
657    /// are returned sorted by raw index so the result is deterministic across
658    /// runs. The caller's own file is never included — an `Imports` edge
659    /// whose source and target both live in `file_id` is treated as a
660    /// self-import and elided. Edges whose source node is no longer resolvable
661    /// in the arena (tombstoned) are silently skipped.
662    ///
663    /// This is the file-level view of Pass 4 cross-file `Imports` edges, used
664    /// by the incremental rebuild engine to compute reverse-dependency
665    /// closures: "if file X changes its exports, which files need to be
666    /// re-linked?"
667    ///
668    /// # Complexity
669    ///
670    /// `O(|nodes_in_file_id| × avg_incoming_edges_per_node)` amortized. Uses
671    /// [`AuxiliaryIndices::by_file`] for O(1)-amortized per-file node lookup
672    /// (HashMap-backed) and the bidirectional edge store's reverse adjacency;
673    /// no full-graph scan. A final `O(R log R)` sort over the deduplicated
674    /// importer set (where `R` is the importer count) is negligible in
675    /// practice since `R ≤ file_count`.
676    ///
677    /// [`AuxiliaryIndices::by_file`]: crate::graph::unified::storage::indices::AuxiliaryIndices::by_file
678    #[must_use]
679    pub fn reverse_import_index(&self, file_id: FileId) -> Vec<FileId> {
680        let mut importers: HashSet<FileId> = HashSet::new();
681        for &target_node in self.indices.by_file(file_id) {
682            for edge_ref in self.edges.edges_to(target_node) {
683                if !matches!(edge_ref.kind, EdgeKind::Imports { .. }) {
684                    continue;
685                }
686                let Some(source_entry) = self.nodes.get(edge_ref.source) else {
687                    continue;
688                };
689                let source_file = source_entry.file;
690                if source_file != file_id {
691                    importers.insert(source_file);
692                }
693            }
694        }
695        let mut result: Vec<FileId> = importers.into_iter().collect();
696        result.sort();
697        result
698    }
699
700    /// Returns the set of files that hold at least one live inter-file edge
701    /// targeting a node in `file_id`, deduplicated and sorted ascending.
702    ///
703    /// Unlike [`reverse_import_index`](Self::reverse_import_index) — which
704    /// filters to [`EdgeKind::Imports`] only — this helper treats **every**
705    /// cross-file edge as a dependency signal: `Calls`, `References`,
706    /// `TypeOf`, `Inherits`, `Implements`, `FfiCall`, `HttpRequest`,
707    /// `GrpcCall`, `WebAssemblyCall`, `DbQuery`, `TableRead`, `TableWrite`,
708    /// `TriggeredBy`, `MessageQueue`, `WebSocket`, `GraphQLOperation`,
709    /// `ProcessExec`, `FileIpc`, `ProtocolCall`, and any future
710    /// cross-file-capable variant. This is the reverse-dependency surface the
711    /// incremental rebuild engine (Task 4 Step 4 Phase 3e) needs: when
712    /// `file_id` changes, every file whose committed edges point into
713    /// `file_id`'s nodes must re-enter the rebuild closure so its cross-file
714    /// references survive the target-side tombstone-and-reparse cycle.
715    ///
716    /// The caller's own file is never included — an edge whose source and
717    /// target both live in `file_id` is a self-reference and is elided.
718    /// Edges whose source node is no longer resolvable in the arena
719    /// (tombstoned) are silently skipped.
720    ///
721    /// # When to use this vs [`reverse_import_index`](Self::reverse_import_index)
722    ///
723    /// * [`reverse_import_index`](Self::reverse_import_index) remains the
724    ///   right surface for consumers that specifically need *import*
725    ///   relationships (export surface analysis, module-dependency graphs,
726    ///   etc.).
727    /// * `reverse_dependency_index` is the right surface for incremental
728    ///   rebuild closure computation. Widening past imports is necessary
729    ///   because call sites, type references, trait implementations, FFI
730    ///   declarations, HTTP clients, and every other cross-file edge kind
731    ///   hold a committed edge into the target file that becomes stale the
732    ///   moment `remove_file(target)` tombstones its arena nodes. Leaving
733    ///   those files out of the closure leaves the committed edges pointing
734    ///   at the stale (pre-tombstone) node IDs — Phase 4c-prime only
735    ///   rewrites edges on **re-parsed** files' `PendingEdge` sets, never
736    ///   committed edges owned by files outside the reparse scope.
737    ///
738    /// # Complexity
739    ///
740    /// `O(|nodes_in_file_id| × avg_incoming_edges_per_node)` amortized —
741    /// same bound as [`reverse_import_index`](Self::reverse_import_index).
742    /// Uses [`AuxiliaryIndices::by_file`] for O(1)-amortized per-file node
743    /// lookup and the bidirectional edge store's reverse adjacency; no
744    /// full-graph scan. Final `O(R log R)` sort over the deduplicated
745    /// dependent set is negligible since `R ≤ file_count`.
746    ///
747    /// # Over-widening is expected and acceptable
748    ///
749    /// The closure will include every file that references anything in
750    /// `file_id`, not just files whose exports change. In common codebases
751    /// this widens the reparse set modestly (a 10-file change may expand
752    /// to 20–30 dependent files in a medium crate). Correctness requires
753    /// the widening; minimality is a follow-up optimisation if profiling
754    /// demands it.
755    ///
756    /// [`AuxiliaryIndices::by_file`]: crate::graph::unified::storage::indices::AuxiliaryIndices::by_file
757    #[must_use]
758    pub fn reverse_dependency_index(&self, file_id: FileId) -> Vec<FileId> {
759        let mut dependents: HashSet<FileId> = HashSet::new();
760        for &target_node in self.indices.by_file(file_id) {
761            for edge_ref in self.edges.edges_to(target_node) {
762                let Some(source_entry) = self.nodes.get(edge_ref.source) else {
763                    continue;
764                };
765                let source_file = source_entry.file;
766                if source_file != file_id {
767                    dependents.insert(source_file);
768                }
769            }
770        }
771        let mut result: Vec<FileId> = dependents.into_iter().collect();
772        result.sort();
773        result
774    }
775
776    /// Returns the per-language confidence metadata.
777    ///
778    /// This contains analysis confidence information collected during graph build,
779    /// primarily used by language plugins (e.g., Rust) to track analysis quality.
780    #[inline]
781    #[must_use]
782    pub fn confidence(&self) -> &HashMap<String, ConfidenceMetadata> {
783        &self.confidence
784    }
785
786    /// Merges confidence metadata for a language.
787    ///
788    /// If confidence already exists for the language, this merges the new
789    /// metadata (taking the lower confidence level and combining limitations).
790    /// Otherwise, it inserts the new confidence.
791    pub fn merge_confidence(&mut self, language: &str, metadata: ConfidenceMetadata) {
792        use crate::confidence::ConfidenceLevel;
793
794        self.confidence
795            .entry(language.to_string())
796            .and_modify(|existing| {
797                // Take the lower confidence level (more conservative)
798                let new_level = match (&existing.level, &metadata.level) {
799                    (ConfidenceLevel::Verified, other) | (other, ConfidenceLevel::Verified) => {
800                        *other
801                    }
802                    (ConfidenceLevel::Partial, ConfidenceLevel::AstOnly)
803                    | (ConfidenceLevel::AstOnly, ConfidenceLevel::Partial) => {
804                        ConfidenceLevel::AstOnly
805                    }
806                    (level, _) => *level,
807                };
808                existing.level = new_level;
809
810                // Merge limitations (deduplicated)
811                for limitation in &metadata.limitations {
812                    if !existing.limitations.contains(limitation) {
813                        existing.limitations.push(limitation.clone());
814                    }
815                }
816
817                // Merge unavailable features (deduplicated)
818                for feature in &metadata.unavailable_features {
819                    if !existing.unavailable_features.contains(feature) {
820                        existing.unavailable_features.push(feature.clone());
821                    }
822                }
823            })
824            .or_insert(metadata);
825    }
826
827    /// Sets the confidence metadata map directly.
828    ///
829    /// This is primarily used when loading a graph from serialized state.
830    pub fn set_confidence(&mut self, confidence: HashMap<String, ConfidenceMetadata>) {
831        self.confidence = confidence;
832    }
833
834    // ------------------------------------------------------------------
835    // Task 4 Step 2 (A2 §F.2) — File-level tombstoning on a CodeGraph.
836    //
837    // Unlike the rebuild pipeline's `RebuildGraph::remove_file`, this
838    // path mutates a live `CodeGraph` in place and is the mechanism
839    // used by the full-rebuild flow when it needs to evict a file's
840    // nodes+edges between compactions. The daemon's incremental
841    // `WorkspaceManager` (Task 6) goes through the rebuild path, which
842    // is why this entry point is `pub(crate)`.
843    // ------------------------------------------------------------------
844
845    /// Tombstone every node that belongs to `file_id`, invalidate every
846    /// edge whose source or target is one of those nodes (across both
847    /// forward and reverse CSR + delta tiers), drop the file's entry
848    /// from the [`FileRegistry`], and return the set of [`NodeId`]s
849    /// that were tombstoned.
850    ///
851    /// This is the §F.2-aware file-removal primitive. Semantically, the
852    /// post-condition matches what a full rebuild of the workspace
853    /// without `file_id` would produce:
854    ///
855    /// * Every [`NodeEntry`] whose [`NodeEntry.file`] was `file_id` has
856    ///   been `NodeArena::remove`d, advancing its slot generation so
857    ///   stale [`NodeId`] handles do not alias a later re-allocation.
858    /// * Every CSR edge whose source or target slot matches one of the
859    ///   tombstoned slot indices has its `csr_tombstones` bit set; the
860    ///   read path's merge step already filters tombstoned CSR edges
861    ///   out of every query result.
862    /// * Every delta-buffer edge (Add or Remove, any file) whose source
863    ///   or target matches a tombstoned slot has been dropped from the
864    ///   delta in both directions.
865    /// * The [`AuxiliaryIndices`] (kind / name / qualified-name / file)
866    ///   no longer reference any of the tombstoned `NodeId`s.
867    /// * [`NodeMetadataStore`], [`NodeProvenanceStore`], [`ScopeArena`],
868    ///   [`AliasTable`], and [`ShadowTable`] have been compacted through
869    ///   the [`NodeIdBearing::retain_nodes`] predicate so no
870    ///   tombstoned NodeId survives in any publish-visible store.
871    /// * [`FileRegistry::per_file_nodes`] no longer holds a bucket for
872    ///   `file_id`, the lookup slot is recycled, and
873    ///   [`FileRegistry::resolve(file_id)`] returns `None`.
874    ///
875    /// Returns the `Vec<NodeId>` of tombstoned nodes. The returned list
876    /// is useful for downstream housekeeping (e.g., resetting per-file
877    /// caches keyed by NodeId) and for tests that need to assert on the
878    /// exact membership of the tombstone set.
879    ///
880    /// # Idempotency
881    ///
882    /// Calling `remove_file` twice with the same `file_id` — or calling
883    /// it for a `file_id` that was never registered — is a safe no-op.
884    /// The returned `Vec<NodeId>` is empty on the second call; no
885    /// arena / edge / index state is mutated (the predicate-based
886    /// compaction of `NodeIdBearing` surfaces short-circuits when the
887    /// dead set is empty).
888    ///
889    /// # Visibility
890    ///
891    /// `pub(crate)` because external callers (Task 6's
892    /// `WorkspaceManager` on the sqry-daemon side) route through
893    /// [`super::super::rebuild::rebuild_graph::RebuildGraph::remove_file`]
894    /// instead. This `CodeGraph`-level variant is used by full-rebuild
895    /// housekeeping paths inside sqry-core and by the Task 4 Step 4
896    /// incremental fallback for cases where the caller already has a
897    /// `&mut CodeGraph` and does not need the clone-and-publish
898    /// round-trip of [`clone_for_rebuild`](Self::clone_for_rebuild) →
899    /// [`finalize`](super::super::rebuild::rebuild_graph::RebuildGraph::finalize).
900    ///
901    /// # Performance
902    ///
903    /// * `O(|tombstoned| + |csr_edges| + |delta_edges|)` amortised.
904    ///   The CSR walk is linear in total edge count (not per-file),
905    ///   which is the dominant cost; each row check is O(1) via the
906    ///   precomputed dead-slot-index `HashSet` in
907    ///   [`BidirectionalEdgeStore::tombstone_edges_for_nodes`].
908    /// * Delta filtering is `O(|delta|)` per direction.
909    /// * Auxiliary-index compaction is `O(|tombstoned|)` amortised
910    ///   because each of the four indices keys its entries by the
911    ///   tombstoned NodeIds directly.
912    ///
913    /// [`NodeEntry`]: crate::graph::unified::storage::arena::NodeEntry
914    /// [`NodeEntry.file`]: crate::graph::unified::storage::arena::NodeEntry::file
915    /// [`NodeIdBearing::retain_nodes`]: crate::graph::unified::rebuild::coverage::NodeIdBearing::retain_nodes
916    #[allow(dead_code)] // Consumer is Task 4 Step 4 (`incremental_rebuild`)
917    // and the unit tests below; published in this commit so the §F.2
918    // invariant surface can be reviewed in isolation per the Gate 0c
919    // split contract.
920    pub(crate) fn remove_file(
921        &mut self,
922        file_id: FileId,
923    ) -> Vec<crate::graph::unified::node::NodeId> {
924        use crate::graph::unified::node::NodeId;
925        use crate::graph::unified::rebuild::coverage::NodeIdBearing;
926
927        // Drain the per-file bucket. For a file that was never
928        // registered, this returns an empty Vec — the rest of the
929        // method short-circuits on the `if tombstoned.is_empty()` test
930        // below so we still deregister the file on the off chance the
931        // bucket was empty but the file was registered (defensive; the
932        // common case is the bucket existed iff the file was
933        // registered).
934        let tombstoned: Vec<NodeId> = self.files_mut().take_nodes(file_id);
935        // Always drop the file's path entry + recycle its slot, even
936        // when the bucket was empty, so repeated registrations of the
937        // same path don't resurrect a zombie FileId. `unregister` is
938        // idempotent (returns None for unknown IDs) so the cost is a
939        // single HashMap probe when file_id is already gone.
940        self.files_mut().unregister(file_id);
941        // Clear the file's segment entry unconditionally (idempotent
942        // — `FileSegmentTable::remove` no-ops on unknown ids). This
943        // MUST run before `FileRegistry::unregister` recycles the
944        // FileId slot for reuse, otherwise a later registration of a
945        // different path under the reused FileId would inherit the
946        // previous file's stale node range (see
947        // `sqry-core/src/graph/unified/build/reindex.rs` — which
948        // trusts `file_segments().get(file_id)` to decide which slots
949        // to tombstone). Note: `unregister` above was called first
950        // only to keep the existing bucket-drain ordering; the
951        // segment-clear is order-independent with respect to
952        // `unregister` because neither touches the other's backing
953        // store, and the FileId slot cannot be recycled-and-reissued
954        // across a single `remove_file` call (the registry's slot
955        // recycler is driven by a later `register`, not by
956        // `unregister`).
957        self.file_segments_mut().remove(file_id);
958
959        if tombstoned.is_empty() {
960            return tombstoned;
961        }
962
963        // Dead set keyed on NodeId for NodeIdBearing predicates.
964        // `retain_nodes` uses `HashSet::contains` so membership is O(1).
965        let dead: HashSet<NodeId> = tombstoned.iter().copied().collect();
966
967        // 1. Tombstone the arena slots. `NodeArena::remove` is
968        //    idempotent — stale NodeIds that don't match a slot's
969        //    current generation are no-ops, which lets this method be
970        //    safely re-run on the same file.
971        {
972            let arena = self.nodes_mut();
973            for &nid in &tombstoned {
974                let _ = arena.remove(nid);
975            }
976        }
977
978        // 2. Invalidate edges across both CSR + delta in both
979        //    directions. This is the expensive step; the helper uses a
980        //    precomputed dead-slot-index set so the CSR walk is linear
981        //    in total edge count, not quadratic.
982        self.edges_mut().tombstone_edges_for_nodes(&dead);
983
984        // 3. Compact the auxiliary indices so name/kind/qname/file
985        //    lookups do not return tombstoned NodeIds. Using the
986        //    NodeIdBearing surface keeps this step in lockstep with the
987        //    rebuild pipeline's step 4 — any future publish-visible
988        //    NodeId-bearing container added to the K.A/K.B matrix is
989        //    automatically swept here too.
990        {
991            let predicate: Box<dyn Fn(NodeId) -> bool + '_> = Box::new(|nid| !dead.contains(&nid));
992            self.indices_mut().retain_nodes(&*predicate);
993            self.macro_metadata_mut().retain_nodes(&*predicate);
994            // The remaining K.A rows (node_provenance, scope_arena,
995            // alias_table, shadow_table) need Arc::make_mut accessors —
996            // they are wrapped in Arc at rest so this is where the
997            // CoW clone happens on demand. Inline the Arc::make_mut
998            // calls here (no public mut accessor exists today because
999            // the sole writer has been the rebuild path; extend the
1000            // set by adding a similar inline call plus a K.A/K.B row
1001            // in `super::super::rebuild::coverage`).
1002            Arc::make_mut(&mut self.node_provenance).retain_nodes(&*predicate);
1003            Arc::make_mut(&mut self.scope_arena).retain_nodes(&*predicate);
1004            Arc::make_mut(&mut self.alias_table).retain_nodes(&*predicate);
1005            Arc::make_mut(&mut self.shadow_table).retain_nodes(&*predicate);
1006        }
1007
1008        tombstoned
1009    }
1010
1011    // ------------------------------------------------------------------
1012    // Gate 0c (A2 §H, Task 4) — RebuildGraph assembly path.
1013    // ------------------------------------------------------------------
1014
1015    /// Assemble a [`CodeGraph`] from owned rebuild-local parts produced
1016    /// by `RebuildGraph::finalize()` (defined in
1017    /// `super::super::rebuild::rebuild_graph`).
1018    ///
1019    /// This constructor is deliberately `pub(crate)` and named with a
1020    /// leading `__` so it is inaccessible from downstream crates even
1021    /// when the `rebuild-internals` feature is enabled: only code in
1022    /// `sqry-core` itself (specifically `RebuildGraph::finalize`) is
1023    /// permitted to call it. The trybuild fixture
1024    /// `sqry-core/tests/rebuild_internals_compile_fail/rebuild_graph_no_public_assembly.rs`
1025    /// proves there is no other path from `RebuildGraph` to
1026    /// `Arc<CodeGraph>`.
1027    ///
1028    /// The argument order mirrors the `CodeGraph` struct declaration
1029    /// order exactly; the public `clone_for_rebuild` → `finalize`
1030    /// round-trip uses the same macro-driven field enumeration so any
1031    /// new `CodeGraph` field automatically threads through this
1032    /// constructor as well.
1033    #[doc(hidden)]
1034    #[allow(clippy::too_many_arguments)]
1035    #[must_use]
1036    pub(crate) fn __assemble_from_rebuild_parts_internal(
1037        nodes: NodeArena,
1038        edges: BidirectionalEdgeStore,
1039        strings: StringInterner,
1040        files: FileRegistry,
1041        indices: AuxiliaryIndices,
1042        macro_metadata: NodeMetadataStore,
1043        node_provenance: NodeProvenanceStore,
1044        edge_provenance: EdgeProvenanceStore,
1045        fact_epoch: u64,
1046        epoch: u64,
1047        confidence: HashMap<String, ConfidenceMetadata>,
1048        scope_arena: ScopeArena,
1049        alias_table: AliasTable,
1050        shadow_table: ShadowTable,
1051        scope_provenance_store: ScopeProvenanceStore,
1052        file_segments: FileSegmentTable,
1053    ) -> Self {
1054        Self {
1055            nodes: Arc::new(nodes),
1056            edges: Arc::new(edges),
1057            strings: Arc::new(strings),
1058            files: Arc::new(files),
1059            indices: Arc::new(indices),
1060            macro_metadata: Arc::new(macro_metadata),
1061            node_provenance: Arc::new(node_provenance),
1062            edge_provenance: Arc::new(edge_provenance),
1063            fact_epoch,
1064            epoch,
1065            confidence,
1066            scope_arena: Arc::new(scope_arena),
1067            alias_table: Arc::new(alias_table),
1068            shadow_table: Arc::new(shadow_table),
1069            scope_provenance_store: Arc::new(scope_provenance_store),
1070            file_segments: Arc::new(file_segments),
1071        }
1072    }
1073
1074    // ------------------------------------------------------------------
1075    // Gate 0c (A2 §F) — publish-boundary debug invariants.
1076    //
1077    // These checks fire only in debug / test builds; release builds
1078    // compile them out. They are called from
1079    // `RebuildGraph::finalize()` steps 13 and 14 (the single source of
1080    // truth for the residue check, per §F and §H agreement). Gate 0d
1081    // will additionally wire the bijection check into
1082    // `build_and_persist_graph`, `WorkspaceManager::publish_graph`, and
1083    // every §E equivalence-harness run.
1084    // ------------------------------------------------------------------
1085
1086    /// Assert the bijective bucket-membership invariant (A2 §F.1).
1087    ///
1088    /// Four conditions must hold simultaneously:
1089    /// (a) every `NodeId` inside any `per_file_nodes` bucket maps to a
1090    ///     live arena slot;
1091    /// (b) every `NodeId` appears in exactly one bucket (no duplicates
1092    ///     across buckets, no duplicates within a bucket);
1093    /// (c) the bucket's `FileId` matches the node's own `file` field on
1094    ///     `NodeEntry`;
1095    /// (d) when at least one bucket is populated, every live node in
1096    ///     the arena is accounted for by some bucket.
1097    ///
1098    /// Condition (d) is guarded on `!seen.is_empty()` so an empty-graph
1099    /// (no recorded buckets) publish boundary is vacuously consistent:
1100    /// legacy V7 snapshots, fresh `CodeGraph::new()` instances, and
1101    /// rebuilds on graphs that predate Gate 0c's parallel-commit
1102    /// bucketing must not panic. Once any bucket is populated, every
1103    /// live arena slot must appear in a bucket.
1104    ///
1105    /// Iter-2 B2 (verbatim): this check used to be documented as
1106    /// "vacuous until future `per_file_nodes` work lands". Pulling
1107    /// base-plan Step 1 into Gate 0c retires that phrasing — the check
1108    /// is non-vacuous the moment parallel-parse commits nodes, which
1109    /// happens on every real build. The `!seen.is_empty()` guard now
1110    /// exists solely for the empty-graph corner case, not as a phased-
1111    /// delivery deferral.
1112    ///
1113    /// The check is a no-op in release builds. Panics with a
1114    /// descriptive message on violation. This is intentional: publish-
1115    /// boundary violations are programmer errors that must surface
1116    /// loudly during CI / test runs.
1117    #[cfg(any(debug_assertions, test))]
1118    pub fn assert_bucket_bijection(&self) {
1119        use std::collections::HashMap as StdHashMap;
1120        // (a) + (b) + (c): every bucketed node is live, unique across
1121        // buckets, and the bucket's FileId matches the node's own file.
1122        let mut seen: StdHashMap<
1123            crate::graph::unified::node::NodeId,
1124            crate::graph::unified::file::FileId,
1125        > = StdHashMap::new();
1126        let mut any_bucket_populated = false;
1127        for (file_id, bucket) in self.files.per_file_nodes_for_gate0d() {
1128            if !bucket.is_empty() {
1129                any_bucket_populated = true;
1130            }
1131            // Local dedup guard within the bucket itself. `retain_nodes_in_buckets`
1132            // dedups during finalize step 6, but we re-check here so the
1133            // invariant covers non-finalize publish paths too (e.g. if a
1134            // future pipeline builds a graph without routing through
1135            // `RebuildGraph::finalize`).
1136            let mut within_bucket: std::collections::HashSet<crate::graph::unified::node::NodeId> =
1137                std::collections::HashSet::new();
1138            for node_id in bucket {
1139                assert!(
1140                    within_bucket.insert(node_id),
1141                    "assert_bucket_bijection: duplicate node {node_id:?} inside bucket {file_id:?}"
1142                );
1143                assert!(
1144                    self.nodes.get(node_id).is_some(),
1145                    "assert_bucket_bijection: dead node {node_id:?} in bucket {file_id:?}"
1146                );
1147                let prior = seen.insert(node_id, file_id);
1148                assert!(
1149                    prior.is_none(),
1150                    "assert_bucket_bijection: node {node_id:?} in multiple buckets: \
1151                     prior={prior:?}, current={file_id:?}"
1152                );
1153                if let Some(entry) = self.nodes.get(node_id) {
1154                    assert_eq!(
1155                        entry.file, file_id,
1156                        "assert_bucket_bijection: node {node_id:?} misfiled: in bucket \
1157                         {file_id:?}, actual {:?}",
1158                        entry.file
1159                    );
1160                }
1161            }
1162        }
1163        // (d): every live node is accounted for by `seen` once buckets
1164        // are populated. The guard keeps legacy-empty-graph boundaries
1165        // vacuously consistent (see docs above).
1166        if any_bucket_populated {
1167            for (node_id, _entry) in self.nodes.iter() {
1168                assert!(
1169                    seen.contains_key(&node_id),
1170                    "assert_bucket_bijection: live node {node_id:?} absent from all buckets"
1171                );
1172            }
1173        }
1174    }
1175
1176    /// Assert the pre-reuse tombstone-residue invariant (A2 §F.2).
1177    ///
1178    /// Iterates every publish-visible NodeId-bearing structure on
1179    /// `self` and panics if any contains a node in `dead`. Called from
1180    /// `RebuildGraph::finalize()` step 14 against the set drained at
1181    /// step 8 — exactly one site per the plan's §F / §H agreement.
1182    ///
1183    /// No-op when `dead` is empty or in release builds.
1184    #[cfg(any(debug_assertions, test))]
1185    pub fn assert_no_tombstone_residue_for(
1186        &self,
1187        dead: &std::collections::HashSet<crate::graph::unified::node::NodeId>,
1188    ) {
1189        use super::super::rebuild::coverage::NodeIdBearing;
1190        if dead.is_empty() {
1191            return;
1192        }
1193        // Every K.A/K.B row must be inspected per §F.2.
1194        for nid in self.nodes.all_node_ids() {
1195            assert!(
1196                !dead.contains(&nid),
1197                "assert_no_tombstone_residue: tombstone {nid:?} still in NodeArena"
1198            );
1199        }
1200        for nid in self.indices.all_node_ids() {
1201            assert!(
1202                !dead.contains(&nid),
1203                "assert_no_tombstone_residue: tombstone {nid:?} still in auxiliary indices"
1204            );
1205        }
1206        for nid in self.edges.all_node_ids() {
1207            assert!(
1208                !dead.contains(&nid),
1209                "assert_no_tombstone_residue: tombstone {nid:?} still in edge store"
1210            );
1211        }
1212        for nid in self.macro_metadata.all_node_ids() {
1213            assert!(
1214                !dead.contains(&nid),
1215                "assert_no_tombstone_residue: tombstone {nid:?} still in macro metadata"
1216            );
1217        }
1218        for nid in self.node_provenance.all_node_ids() {
1219            assert!(
1220                !dead.contains(&nid),
1221                "assert_no_tombstone_residue: tombstone {nid:?} still in node provenance"
1222            );
1223        }
1224        for nid in self.scope_arena.all_node_ids() {
1225            assert!(
1226                !dead.contains(&nid),
1227                "assert_no_tombstone_residue: tombstone {nid:?} still in scope arena"
1228            );
1229        }
1230        for nid in self.alias_table.all_node_ids() {
1231            assert!(
1232                !dead.contains(&nid),
1233                "assert_no_tombstone_residue: tombstone {nid:?} still in alias table"
1234            );
1235        }
1236        for nid in self.shadow_table.all_node_ids() {
1237            assert!(
1238                !dead.contains(&nid),
1239                "assert_no_tombstone_residue: tombstone {nid:?} still in shadow table"
1240            );
1241        }
1242        for nid in self.files.all_node_ids() {
1243            assert!(
1244                !dead.contains(&nid),
1245                "assert_no_tombstone_residue: tombstone {nid:?} still in per-file bucket"
1246            );
1247        }
1248    }
1249}
1250
1251impl Default for CodeGraph {
1252    fn default() -> Self {
1253        Self::new()
1254    }
1255}
1256
1257impl fmt::Debug for CodeGraph {
1258    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1259        f.debug_struct("CodeGraph")
1260            .field("nodes", &self.nodes.len())
1261            .field("epoch", &self.epoch)
1262            .finish_non_exhaustive()
1263    }
1264}
1265
1266impl GraphMemorySize for CodeGraph {
1267    /// Estimates the total heap bytes owned by this `CodeGraph`.
1268    ///
1269    /// Sums heap usage across every component the graph owns: node arena,
1270    /// bidirectional edge store (forward + reverse CSR + tombstones + delta
1271    /// buffer), string interner, file registry, auxiliary indices, sparse
1272    /// macro/classpath metadata, provenance stores, and the per-language
1273    /// confidence map. Used by the `sqryd` daemon's admission controller
1274    /// and workspace retention reaper to enforce memory budgets.
1275    fn heap_bytes(&self) -> usize {
1276        let mut confidence_bytes = self.confidence.capacity()
1277            * (std::mem::size_of::<String>()
1278                + std::mem::size_of::<ConfidenceMetadata>()
1279                + HASHMAP_ENTRY_OVERHEAD);
1280        for (key, meta) in &self.confidence {
1281            confidence_bytes += key.capacity();
1282            // `ConfidenceMetadata.limitations` / `unavailable_features` are
1283            // `Vec<String>`; charge their spill and inner String payloads.
1284            confidence_bytes += meta.limitations.capacity() * std::mem::size_of::<String>();
1285            for s in &meta.limitations {
1286                confidence_bytes += s.capacity();
1287            }
1288            confidence_bytes +=
1289                meta.unavailable_features.capacity() * std::mem::size_of::<String>();
1290            for s in &meta.unavailable_features {
1291                confidence_bytes += s.capacity();
1292            }
1293        }
1294
1295        self.nodes.heap_bytes()
1296            + self.edges.heap_bytes()
1297            + self.strings.heap_bytes()
1298            + self.files.heap_bytes()
1299            + self.indices.heap_bytes()
1300            + self.macro_metadata.heap_bytes()
1301            + self.node_provenance.heap_bytes()
1302            + self.edge_provenance.heap_bytes()
1303            + confidence_bytes
1304            + self.file_segments.capacity()
1305                * std::mem::size_of::<Option<crate::graph::unified::storage::segment::FileSegment>>(
1306                )
1307    }
1308}
1309
1310/// Thread-safe wrapper for `CodeGraph` with epoch versioning.
1311///
1312/// `ConcurrentCodeGraph` provides MVCC-style concurrency:
1313/// - Multiple readers can access the graph simultaneously
1314/// - Only one writer can hold the lock at a time
1315/// - Each write operation increments the epoch for cursor invalidation
1316///
1317/// # Design
1318///
1319/// The wrapper uses `parking_lot::RwLock` for efficient locking:
1320/// - Fair scheduling prevents writer starvation
1321/// - No poisoning (unlike `std::sync::RwLock`)
1322/// - Faster lock/unlock operations
1323///
1324/// # Phase 2 binding-plane access
1325///
1326/// Use the three-line snapshot pattern to access `BindingPlane`:
1327///
1328/// ```rust,ignore
1329/// let read_guard = concurrent.read();
1330/// let snapshot = read_guard.snapshot();
1331/// let plane = snapshot.binding_plane();
1332/// let resolution = plane.resolve(&query);
1333/// ```
1334///
1335/// The explicit snapshot handle makes the MVCC lifetime visible at the call
1336/// site. The full Phase 2 scope/alias/shadow and witness-bearing resolution
1337/// API is exposed through `BindingPlane`.
1338///
1339/// # Usage
1340///
1341/// ```rust
1342/// use sqry_core::graph::unified::concurrent::ConcurrentCodeGraph;
1343///
1344/// let graph = ConcurrentCodeGraph::new();
1345///
1346/// // Read access (multiple readers allowed)
1347/// {
1348///     let guard = graph.read();
1349///     let _nodes = guard.nodes();
1350/// }
1351///
1352/// // Write access (exclusive)
1353/// {
1354///     let mut guard = graph.write();
1355///     let _nodes = guard.nodes_mut();
1356/// }
1357///
1358/// // Snapshot for long queries
1359/// let snapshot = graph.snapshot();
1360/// ```
1361pub struct ConcurrentCodeGraph {
1362    /// The underlying code graph protected by a read-write lock.
1363    inner: RwLock<CodeGraph>,
1364    /// Global epoch counter for cursor validation.
1365    epoch: AtomicU64,
1366}
1367
1368impl ConcurrentCodeGraph {
1369    /// Creates a new empty `ConcurrentCodeGraph`.
1370    #[must_use]
1371    pub fn new() -> Self {
1372        Self {
1373            inner: RwLock::new(CodeGraph::new()),
1374            epoch: AtomicU64::new(0),
1375        }
1376    }
1377
1378    /// Creates a `ConcurrentCodeGraph` from an existing `CodeGraph`.
1379    #[must_use]
1380    pub fn from_graph(graph: CodeGraph) -> Self {
1381        let epoch = graph.epoch();
1382        Self {
1383            inner: RwLock::new(graph),
1384            epoch: AtomicU64::new(epoch),
1385        }
1386    }
1387
1388    /// Acquires a read lock on the graph.
1389    ///
1390    /// Multiple readers can hold the lock simultaneously.
1391    /// This does not increment the epoch.
1392    #[inline]
1393    pub fn read(&self) -> RwLockReadGuard<'_, CodeGraph> {
1394        self.inner.read()
1395    }
1396
1397    /// Acquires a write lock on the graph.
1398    ///
1399    /// Only one writer can hold the lock at a time.
1400    /// This increments the global epoch counter.
1401    #[inline]
1402    pub fn write(&self) -> RwLockWriteGuard<'_, CodeGraph> {
1403        // Increment the global epoch
1404        self.epoch.fetch_add(1, Ordering::SeqCst);
1405        let mut guard = self.inner.write();
1406        // Sync the inner graph's epoch with the global epoch
1407        guard.set_epoch(self.epoch.load(Ordering::SeqCst));
1408        guard
1409    }
1410
1411    /// Returns the current global epoch.
1412    ///
1413    /// This can be used to detect if the graph has been modified
1414    /// since a previous operation (cursor invalidation).
1415    #[inline]
1416    #[must_use]
1417    pub fn epoch(&self) -> u64 {
1418        self.epoch.load(Ordering::SeqCst)
1419    }
1420
1421    /// Creates a cheap snapshot of the graph.
1422    ///
1423    /// This acquires a brief read lock to clone the Arc references.
1424    /// The snapshot is isolated from future mutations.
1425    #[must_use]
1426    pub fn snapshot(&self) -> GraphSnapshot {
1427        self.inner.read().snapshot()
1428    }
1429
1430    // ------------------------------------------------------------------
1431    // Phase 1 fact-layer provenance convenience accessors.
1432    // These acquire a brief read lock, matching the snapshot() pattern.
1433    // ------------------------------------------------------------------
1434
1435    /// Returns the monotonic fact-layer epoch from the underlying graph.
1436    #[must_use]
1437    pub fn fact_epoch(&self) -> u64 {
1438        self.inner.read().fact_epoch()
1439    }
1440
1441    /// Looks up node provenance by `NodeId` (acquires a brief read lock).
1442    #[must_use]
1443    pub fn node_provenance(
1444        &self,
1445        id: crate::graph::unified::node::id::NodeId,
1446    ) -> Option<NodeProvenance> {
1447        self.inner.read().node_provenance(id).copied()
1448    }
1449
1450    /// Looks up edge provenance by `EdgeId` (acquires a brief read lock).
1451    #[must_use]
1452    pub fn edge_provenance(
1453        &self,
1454        id: crate::graph::unified::edge::id::EdgeId,
1455    ) -> Option<EdgeProvenance> {
1456        self.inner.read().edge_provenance(id).copied()
1457    }
1458
1459    /// Returns a file provenance view (acquires a brief read lock).
1460    ///
1461    /// Returns an owned copy since the borrow cannot outlive the lock guard.
1462    #[must_use]
1463    pub fn file_provenance(
1464        &self,
1465        id: crate::graph::unified::file::id::FileId,
1466    ) -> Option<OwnedFileProvenanceView> {
1467        let guard = self.inner.read();
1468        guard.file_provenance(id).map(|v| OwnedFileProvenanceView {
1469            content_hash: *v.content_hash,
1470            indexed_at: v.indexed_at,
1471            source_uri: v.source_uri,
1472            is_external: v.is_external,
1473        })
1474    }
1475
1476    // ------------------------------------------------------------------
1477    // Phase 2 binding-plane accessors (P2U03).
1478    // ------------------------------------------------------------------
1479
1480    /// Returns the scope arena from the underlying graph (acquires a brief
1481    /// read lock).
1482    ///
1483    /// Returns an `Arc` clone so the caller does not hold the lock beyond
1484    /// this call site.
1485    #[must_use]
1486    pub fn scope_arena(&self) -> Arc<ScopeArena> {
1487        Arc::clone(&self.inner.read().scope_arena)
1488    }
1489
1490    /// Returns the alias table from the underlying graph (acquires a brief
1491    /// read lock).
1492    ///
1493    /// Returns an `Arc` clone so the caller does not hold the lock beyond
1494    /// this call site.
1495    #[must_use]
1496    pub fn alias_table(&self) -> Arc<AliasTable> {
1497        Arc::clone(&self.inner.read().alias_table)
1498    }
1499
1500    /// Returns the shadow table from the underlying graph (acquires a brief
1501    /// read lock).
1502    ///
1503    /// Returns an `Arc` clone so the caller does not hold the lock beyond
1504    /// this call site.
1505    #[must_use]
1506    pub fn shadow_table(&self) -> Arc<ShadowTable> {
1507        Arc::clone(&self.inner.read().shadow_table)
1508    }
1509
1510    /// Returns the scope provenance store from the underlying graph (acquires
1511    /// a brief read lock).
1512    ///
1513    /// Returns an `Arc` clone so the caller does not hold the lock beyond
1514    /// this call site.
1515    #[must_use]
1516    pub fn scope_provenance_store(&self) -> Arc<ScopeProvenanceStore> {
1517        Arc::clone(&self.inner.read().scope_provenance_store)
1518    }
1519
1520    /// Looks up scope provenance by `ScopeId` (acquires a brief read lock).
1521    ///
1522    /// Returns an owned copy since the borrow cannot outlive the lock guard.
1523    #[must_use]
1524    pub fn scope_provenance(&self, id: ScopeId) -> Option<ScopeProvenance> {
1525        self.inner.read().scope_provenance(id).cloned()
1526    }
1527
1528    /// Looks up the live `ScopeId` for a stable scope identity (acquires a
1529    /// brief read lock).
1530    ///
1531    /// Returns `None` if no provenance record is registered for that stable id.
1532    #[must_use]
1533    pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
1534        self.inner.read().scope_by_stable_id(stable)
1535    }
1536
1537    /// Returns the file segment table from the underlying graph (acquires a
1538    /// brief read lock).
1539    #[must_use]
1540    pub fn file_segments(&self) -> Arc<FileSegmentTable> {
1541        Arc::clone(&self.inner.read().file_segments)
1542    }
1543
1544    /// Attempts to acquire a read lock without blocking.
1545    ///
1546    /// Returns `None` if the lock is currently held exclusively.
1547    #[inline]
1548    #[must_use]
1549    pub fn try_read(&self) -> Option<RwLockReadGuard<'_, CodeGraph>> {
1550        self.inner.try_read()
1551    }
1552
1553    /// Attempts to acquire a write lock without blocking.
1554    ///
1555    /// Returns `None` if the lock is currently held by another thread.
1556    /// If successful, increments the epoch.
1557    #[inline]
1558    pub fn try_write(&self) -> Option<RwLockWriteGuard<'_, CodeGraph>> {
1559        self.inner.try_write().map(|mut guard| {
1560            self.epoch.fetch_add(1, Ordering::SeqCst);
1561            guard.set_epoch(self.epoch.load(Ordering::SeqCst));
1562            guard
1563        })
1564    }
1565}
1566
1567impl Default for ConcurrentCodeGraph {
1568    fn default() -> Self {
1569        Self::new()
1570    }
1571}
1572
1573impl fmt::Debug for ConcurrentCodeGraph {
1574    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1575        f.debug_struct("ConcurrentCodeGraph")
1576            .field("epoch", &self.epoch.load(Ordering::SeqCst))
1577            .finish_non_exhaustive()
1578    }
1579}
1580
1581/// Owned copy of file provenance, returned by [`ConcurrentCodeGraph::file_provenance`]
1582/// because the borrowed [`FileProvenanceView`] cannot outlive the read-lock guard.
1583#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1584pub struct OwnedFileProvenanceView {
1585    /// SHA-256 of the on-disk file bytes (owned copy).
1586    pub content_hash: [u8; 32],
1587    /// Unix-epoch seconds at which this file was registered.
1588    pub indexed_at: u64,
1589    /// Optional interned physical origin URI.
1590    pub source_uri: Option<StringId>,
1591    /// Whether this file originates from an external source.
1592    pub is_external: bool,
1593}
1594
1595/// Immutable snapshot of a `CodeGraph` for long-running queries.
1596///
1597/// `GraphSnapshot` holds Arc references to the graph components,
1598/// providing a consistent view that is isolated from concurrent mutations.
1599///
1600/// # Design
1601///
1602/// Snapshots are created via `CodeGraph::snapshot()` or
1603/// `ConcurrentCodeGraph::snapshot()`. They are:
1604///
1605/// - **Immutable**: No mutation methods available
1606/// - **Isolated**: Independent of future graph mutations
1607/// - **Cheap**: Only Arc clones, no data copying
1608/// - **Self-contained**: Can outlive the original graph/lock
1609///
1610/// # Usage
1611///
1612/// ```rust
1613/// use sqry_core::graph::unified::concurrent::{ConcurrentCodeGraph, GraphSnapshot};
1614///
1615/// let graph = ConcurrentCodeGraph::new();
1616///
1617/// // Create snapshot for a long query
1618/// let snapshot: GraphSnapshot = graph.snapshot();
1619///
1620/// // Snapshot can be used independently
1621/// let _epoch = snapshot.epoch();
1622/// ```
1623#[derive(Clone)]
1624pub struct GraphSnapshot {
1625    /// Node storage snapshot.
1626    nodes: Arc<NodeArena>,
1627    /// Edge storage snapshot.
1628    edges: Arc<BidirectionalEdgeStore>,
1629    /// String interner snapshot.
1630    strings: Arc<StringInterner>,
1631    /// File registry snapshot.
1632    files: Arc<FileRegistry>,
1633    /// Auxiliary indices snapshot.
1634    indices: Arc<AuxiliaryIndices>,
1635    /// Sparse macro boundary metadata snapshot.
1636    macro_metadata: Arc<NodeMetadataStore>,
1637    /// Dense node provenance snapshot (Phase 1).
1638    node_provenance: Arc<NodeProvenanceStore>,
1639    /// Dense edge provenance snapshot (Phase 1).
1640    edge_provenance: Arc<EdgeProvenanceStore>,
1641    /// Monotonic fact-layer epoch at snapshot time.
1642    fact_epoch: u64,
1643    /// Epoch at snapshot time (for cursor validation).
1644    epoch: u64,
1645    /// Phase 2 binding-plane scope arena snapshot (populated by Phase 4e).
1646    scope_arena: Arc<ScopeArena>,
1647    /// Phase 2 binding-plane alias table snapshot (populated by Phase 4e / P2U04).
1648    alias_table: Arc<AliasTable>,
1649    /// Phase 2 binding-plane shadow table snapshot (populated by Phase 4e / P2U05).
1650    shadow_table: Arc<ShadowTable>,
1651    /// Phase 2 binding-plane scope provenance store snapshot (populated by Phase 4e / P2U11).
1652    scope_provenance_store: Arc<ScopeProvenanceStore>,
1653    /// Phase 3 file segment table snapshot mapping `FileId` to node ranges.
1654    file_segments: Arc<FileSegmentTable>,
1655}
1656
1657impl GraphSnapshot {
1658    /// Returns a reference to the node arena.
1659    #[inline]
1660    #[must_use]
1661    pub fn nodes(&self) -> &NodeArena {
1662        &self.nodes
1663    }
1664
1665    /// Returns a reference to the bidirectional edge store.
1666    #[inline]
1667    #[must_use]
1668    pub fn edges(&self) -> &BidirectionalEdgeStore {
1669        &self.edges
1670    }
1671
1672    /// Returns a reference to the string interner.
1673    #[inline]
1674    #[must_use]
1675    pub fn strings(&self) -> &StringInterner {
1676        &self.strings
1677    }
1678
1679    /// Returns a reference to the file registry.
1680    #[inline]
1681    #[must_use]
1682    pub fn files(&self) -> &FileRegistry {
1683        &self.files
1684    }
1685
1686    /// Returns a reference to the auxiliary indices.
1687    #[inline]
1688    #[must_use]
1689    pub fn indices(&self) -> &AuxiliaryIndices {
1690        &self.indices
1691    }
1692
1693    /// Returns a reference to the macro boundary metadata store.
1694    #[inline]
1695    #[must_use]
1696    pub fn macro_metadata(&self) -> &NodeMetadataStore {
1697        &self.macro_metadata
1698    }
1699
1700    // ------------------------------------------------------------------
1701    // Phase 1 fact-layer provenance accessors (P1U09).
1702    // ------------------------------------------------------------------
1703
1704    /// Returns the monotonic fact-layer epoch.
1705    #[inline]
1706    #[must_use]
1707    pub fn fact_epoch(&self) -> u64 {
1708        self.fact_epoch
1709    }
1710
1711    /// Looks up node provenance by `NodeId`.
1712    #[inline]
1713    #[must_use]
1714    pub fn node_provenance(
1715        &self,
1716        id: crate::graph::unified::node::id::NodeId,
1717    ) -> Option<&NodeProvenance> {
1718        self.node_provenance.lookup(id)
1719    }
1720
1721    /// Looks up edge provenance by `EdgeId`.
1722    #[inline]
1723    #[must_use]
1724    pub fn edge_provenance(
1725        &self,
1726        id: crate::graph::unified::edge::id::EdgeId,
1727    ) -> Option<&EdgeProvenance> {
1728        self.edge_provenance.lookup(id)
1729    }
1730
1731    /// Returns a borrowed provenance view for a file.
1732    #[inline]
1733    #[must_use]
1734    pub fn file_provenance(
1735        &self,
1736        id: crate::graph::unified::file::id::FileId,
1737    ) -> Option<FileProvenanceView<'_>> {
1738        self.files.file_provenance(id)
1739    }
1740
1741    // ------------------------------------------------------------------
1742    // Phase 2 binding-plane accessors (P2U03).
1743    // ------------------------------------------------------------------
1744
1745    /// Returns a reference to the scope arena at snapshot time.
1746    ///
1747    /// Participates in MVCC: the snapshot holds an `Arc` clone of the arena
1748    /// as it existed when `snapshot()` was called. Subsequent calls to
1749    /// `set_scope_arena` on the source `CodeGraph` do not affect this view.
1750    #[inline]
1751    #[must_use]
1752    pub fn scope_arena(&self) -> &ScopeArena {
1753        &self.scope_arena
1754    }
1755
1756    /// Returns a reference to the alias table at snapshot time.
1757    ///
1758    /// Participates in MVCC: the snapshot holds an `Arc` clone of the table
1759    /// as it existed when `snapshot()` was called. Subsequent calls to
1760    /// `set_alias_table` on the source `CodeGraph` do not affect this view.
1761    #[inline]
1762    #[must_use]
1763    pub fn alias_table(&self) -> &AliasTable {
1764        &self.alias_table
1765    }
1766
1767    /// Returns a reference to the shadow table at snapshot time.
1768    ///
1769    /// Participates in MVCC: the snapshot holds an `Arc` clone of the table
1770    /// as it existed when `snapshot()` was called. Subsequent calls to
1771    /// `set_shadow_table` on the source `CodeGraph` do not affect this view.
1772    #[inline]
1773    #[must_use]
1774    pub fn shadow_table(&self) -> &ShadowTable {
1775        &self.shadow_table
1776    }
1777
1778    /// Returns a reference to the scope provenance store at snapshot time.
1779    ///
1780    /// Participates in MVCC: the snapshot holds an `Arc` clone of the store
1781    /// as it existed when `snapshot()` was called. Subsequent calls to
1782    /// `set_scope_provenance_store` on the source `CodeGraph` do not affect
1783    /// this view.
1784    #[inline]
1785    #[must_use]
1786    pub fn scope_provenance_store(&self) -> &ScopeProvenanceStore {
1787        &self.scope_provenance_store
1788    }
1789
1790    /// Looks up scope provenance by `ScopeId` at snapshot time.
1791    ///
1792    /// Returns `None` if the slot is out of range, vacant, or the stored
1793    /// generation does not match (stale handle).
1794    #[inline]
1795    #[must_use]
1796    pub fn scope_provenance(&self, id: ScopeId) -> Option<&ScopeProvenance> {
1797        self.scope_provenance_store.lookup(id)
1798    }
1799
1800    /// Looks up the live `ScopeId` for a stable scope identity at snapshot time.
1801    ///
1802    /// Returns `None` if no provenance record is registered for that stable id.
1803    #[inline]
1804    #[must_use]
1805    pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
1806        self.scope_provenance_store.scope_by_stable_id(stable)
1807    }
1808
1809    /// Returns a reference to the file segment table at snapshot time.
1810    #[inline]
1811    #[must_use]
1812    pub fn file_segments(&self) -> &FileSegmentTable {
1813        &self.file_segments
1814    }
1815
1816    /// Returns the epoch at which this snapshot was taken.
1817    ///
1818    /// This can be compared against the current graph epoch to
1819    /// detect if the graph has changed since the snapshot.
1820    #[inline]
1821    #[must_use]
1822    pub fn epoch(&self) -> u64 {
1823        self.epoch
1824    }
1825
1826    /// Returns `true` if this snapshot's epoch matches the given epoch.
1827    ///
1828    /// Use this to validate cursors before continuing pagination.
1829    #[inline]
1830    #[must_use]
1831    pub fn epoch_matches(&self, other_epoch: u64) -> bool {
1832        self.epoch == other_epoch
1833    }
1834
1835    // ------------------------------------------------------------------
1836    // Phase 2 binding-plane facade accessor (P2U07).
1837    // ------------------------------------------------------------------
1838
1839    /// Returns a [`BindingPlane`] facade borrowing this snapshot's lifetime.
1840    ///
1841    /// The facade is the stable Phase 2 public API for scope/alias/shadow
1842    /// queries and witness-bearing resolution. It provides a single entry
1843    /// point (`resolve`) that returns both a `BindingResult` and an ordered
1844    /// step trace in a `BindingResolution`.
1845    ///
1846    /// # MVCC note
1847    ///
1848    /// `BindingPlane<'_>` borrows from this snapshot, which is already an
1849    /// MVCC-consistent view of the graph at snapshot time. Callers from
1850    /// `CodeGraph` or `ConcurrentCodeGraph` should follow the two-line
1851    /// pattern so the snapshot lifetime is explicit:
1852    ///
1853    /// ```rust,ignore
1854    /// // CodeGraph caller:
1855    /// let snapshot = graph.snapshot();
1856    /// let plane = snapshot.binding_plane();
1857    ///
1858    /// // ConcurrentCodeGraph caller:
1859    /// let read_guard = concurrent.read();
1860    /// let snapshot = read_guard.snapshot();
1861    /// let plane = snapshot.binding_plane();
1862    /// ```
1863    #[inline]
1864    #[must_use]
1865    pub fn binding_plane(&self) -> crate::graph::unified::bind::plane::BindingPlane<'_> {
1866        crate::graph::unified::bind::plane::BindingPlane::new(self)
1867    }
1868
1869    // ============================================================================
1870    // Query Methods
1871    // ============================================================================
1872
1873    /// Finds nodes matching a pattern.
1874    ///
1875    /// Performs a simple substring match on node names and qualified names.
1876    /// Returns all matching node IDs.
1877    ///
1878    /// # Performance
1879    ///
1880    /// Optimized to iterate over unique strings in the interner (smaller set)
1881    /// rather than all nodes in the arena.
1882    ///
1883    /// # Arguments
1884    ///
1885    /// * `pattern` - The pattern to match (substring search)
1886    ///
1887    /// # Returns
1888    ///
1889    /// A vector of `NodeIds` for all matching nodes.
1890    #[must_use]
1891    pub fn find_by_pattern(&self, pattern: &str) -> Vec<crate::graph::unified::node::NodeId> {
1892        let mut matches = Vec::new();
1893
1894        // 1. Scan unique strings in interner for matches
1895        for (str_id, s) in self.strings.iter() {
1896            if s.contains(pattern) {
1897                // 2. If string matches, look up all nodes with this name
1898                // Check qualified name index
1899                matches.extend_from_slice(self.indices.by_qualified_name(str_id));
1900                // Check simple name index
1901                matches.extend_from_slice(self.indices.by_name(str_id));
1902            }
1903        }
1904
1905        // Deduplicate matches (a node might match both qualified and simple name)
1906        matches.sort_unstable();
1907        matches.dedup();
1908
1909        matches
1910    }
1911
1912    /// Gets all callees of a node (functions called by this node).
1913    ///
1914    /// Queries the forward edge store for all Calls edges from this node.
1915    ///
1916    /// # Arguments
1917    ///
1918    /// * `node` - The node ID to query
1919    ///
1920    /// # Returns
1921    ///
1922    /// A vector of `NodeIds` representing functions called by this node.
1923    #[must_use]
1924    pub fn get_callees(
1925        &self,
1926        node: crate::graph::unified::node::NodeId,
1927    ) -> Vec<crate::graph::unified::node::NodeId> {
1928        use crate::graph::unified::edge::EdgeKind;
1929
1930        self.edges
1931            .edges_from(node)
1932            .into_iter()
1933            .filter(|edge| matches!(edge.kind, EdgeKind::Calls { .. }))
1934            .map(|edge| edge.target)
1935            .collect()
1936    }
1937
1938    /// Gets all callers of a node (functions that call this node).
1939    ///
1940    /// Queries the reverse edge store for all Calls edges to this node.
1941    ///
1942    /// # Arguments
1943    ///
1944    /// * `node` - The node ID to query
1945    ///
1946    /// # Returns
1947    ///
1948    /// A vector of `NodeIds` representing functions that call this node.
1949    #[must_use]
1950    pub fn get_callers(
1951        &self,
1952        node: crate::graph::unified::node::NodeId,
1953    ) -> Vec<crate::graph::unified::node::NodeId> {
1954        use crate::graph::unified::edge::EdgeKind;
1955
1956        self.edges
1957            .edges_to(node)
1958            .into_iter()
1959            .filter(|edge| matches!(edge.kind, EdgeKind::Calls { .. }))
1960            .map(|edge| edge.source)
1961            .collect()
1962    }
1963
1964    /// Iterates over all nodes in the graph.
1965    ///
1966    /// Returns an iterator yielding (`NodeId`, &`NodeEntry`) pairs for all
1967    /// occupied slots in the arena.
1968    ///
1969    /// # Returns
1970    ///
1971    /// An iterator over (`NodeId`, &`NodeEntry`) pairs.
1972    pub fn iter_nodes(
1973        &self,
1974    ) -> impl Iterator<
1975        Item = (
1976            crate::graph::unified::node::NodeId,
1977            &crate::graph::unified::storage::arena::NodeEntry,
1978        ),
1979    > {
1980        self.nodes.iter()
1981    }
1982
1983    /// Iterates over all edges in the graph.
1984    ///
1985    /// Returns an iterator yielding (source, target, `EdgeKind`) tuples for
1986    /// all edges in the forward edge store.
1987    ///
1988    /// # Returns
1989    ///
1990    /// An iterator over edge tuples.
1991    pub fn iter_edges(
1992        &self,
1993    ) -> impl Iterator<
1994        Item = (
1995            crate::graph::unified::node::NodeId,
1996            crate::graph::unified::node::NodeId,
1997            crate::graph::unified::edge::EdgeKind,
1998        ),
1999    > + '_ {
2000        // Iterate over all nodes in the arena and get their outgoing edges
2001        self.nodes.iter().flat_map(move |(node_id, _entry)| {
2002            // Get all edges from this node
2003            self.edges
2004                .edges_from(node_id)
2005                .into_iter()
2006                .map(move |edge| (node_id, edge.target, edge.kind))
2007        })
2008    }
2009
2010    /// Gets a node entry by ID.
2011    ///
2012    /// Returns a reference to the `NodeEntry` if the ID is valid, or None
2013    /// if the ID is invalid or stale.
2014    ///
2015    /// # Arguments
2016    ///
2017    /// * `id` - The node ID to look up
2018    ///
2019    /// # Returns
2020    ///
2021    /// A reference to the `NodeEntry`, or None if not found.
2022    #[must_use]
2023    pub fn get_node(
2024        &self,
2025        id: crate::graph::unified::node::NodeId,
2026    ) -> Option<&crate::graph::unified::storage::arena::NodeEntry> {
2027        self.nodes.get(id)
2028    }
2029}
2030
2031impl fmt::Debug for GraphSnapshot {
2032    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2033        f.debug_struct("GraphSnapshot")
2034            .field("nodes", &self.nodes.len())
2035            .field("epoch", &self.epoch)
2036            .finish_non_exhaustive()
2037    }
2038}
2039
2040/// Read-only accessor trait shared by [`CodeGraph`] and [`GraphSnapshot`].
2041///
2042/// This lets helpers that only *read* graph state (name-matching, relation
2043/// traversal, reference lookups) be written once and called from both the
2044/// live `CodeGraph` path in `sqry-core::query::executor::graph_eval` and
2045/// the snapshot-based path in `sqry-db::queries::*`. No mutation is exposed.
2046pub trait GraphAccess {
2047    /// Returns the node arena (read-only).
2048    fn nodes(&self) -> &NodeArena;
2049    /// Returns the bidirectional edge store (read-only).
2050    fn edges(&self) -> &BidirectionalEdgeStore;
2051    /// Returns the string interner (read-only).
2052    fn strings(&self) -> &StringInterner;
2053    /// Returns the file registry (read-only).
2054    fn files(&self) -> &FileRegistry;
2055    /// Returns the auxiliary indices (read-only).
2056    fn indices(&self) -> &AuxiliaryIndices;
2057}
2058
2059impl GraphAccess for CodeGraph {
2060    #[inline]
2061    fn nodes(&self) -> &NodeArena {
2062        CodeGraph::nodes(self)
2063    }
2064    #[inline]
2065    fn edges(&self) -> &BidirectionalEdgeStore {
2066        CodeGraph::edges(self)
2067    }
2068    #[inline]
2069    fn strings(&self) -> &StringInterner {
2070        CodeGraph::strings(self)
2071    }
2072    #[inline]
2073    fn files(&self) -> &FileRegistry {
2074        CodeGraph::files(self)
2075    }
2076    #[inline]
2077    fn indices(&self) -> &AuxiliaryIndices {
2078        CodeGraph::indices(self)
2079    }
2080}
2081
2082impl GraphAccess for GraphSnapshot {
2083    #[inline]
2084    fn nodes(&self) -> &NodeArena {
2085        GraphSnapshot::nodes(self)
2086    }
2087    #[inline]
2088    fn edges(&self) -> &BidirectionalEdgeStore {
2089        GraphSnapshot::edges(self)
2090    }
2091    #[inline]
2092    fn strings(&self) -> &StringInterner {
2093        GraphSnapshot::strings(self)
2094    }
2095    #[inline]
2096    fn files(&self) -> &FileRegistry {
2097        GraphSnapshot::files(self)
2098    }
2099    #[inline]
2100    fn indices(&self) -> &AuxiliaryIndices {
2101        GraphSnapshot::indices(self)
2102    }
2103}
2104
2105#[cfg(test)]
2106mod tests {
2107    use super::*;
2108    use crate::graph::unified::{
2109        FileScope, NodeId, ResolutionMode, SymbolCandidateOutcome, SymbolQuery,
2110        SymbolResolutionOutcome,
2111    };
2112
2113    fn resolve_symbol_strict(snapshot: &GraphSnapshot, symbol: &str) -> Option<NodeId> {
2114        match snapshot.resolve_symbol(&SymbolQuery {
2115            symbol,
2116            file_scope: FileScope::Any,
2117            mode: ResolutionMode::Strict,
2118        }) {
2119            SymbolResolutionOutcome::Resolved(node_id) => Some(node_id),
2120            SymbolResolutionOutcome::NotFound
2121            | SymbolResolutionOutcome::FileNotIndexed
2122            | SymbolResolutionOutcome::Ambiguous(_) => None,
2123        }
2124    }
2125
2126    fn candidate_nodes(snapshot: &GraphSnapshot, symbol: &str) -> Vec<NodeId> {
2127        match snapshot.find_symbol_candidates(&SymbolQuery {
2128            symbol,
2129            file_scope: FileScope::Any,
2130            mode: ResolutionMode::AllowSuffixCandidates,
2131        }) {
2132            SymbolCandidateOutcome::Candidates(candidates) => candidates,
2133            SymbolCandidateOutcome::NotFound | SymbolCandidateOutcome::FileNotIndexed => Vec::new(),
2134        }
2135    }
2136
2137    #[test]
2138    fn test_code_graph_new() {
2139        let graph = CodeGraph::new();
2140        assert_eq!(graph.epoch(), 0);
2141        assert_eq!(graph.nodes().len(), 0);
2142    }
2143
2144    #[test]
2145    fn test_code_graph_default() {
2146        let graph = CodeGraph::default();
2147        assert_eq!(graph.epoch(), 0);
2148    }
2149
2150    #[test]
2151    fn test_code_graph_snapshot() {
2152        let graph = CodeGraph::new();
2153        let snapshot = graph.snapshot();
2154        assert_eq!(snapshot.epoch(), 0);
2155        assert_eq!(snapshot.nodes().len(), 0);
2156    }
2157
2158    #[test]
2159    fn test_code_graph_bump_epoch() {
2160        let mut graph = CodeGraph::new();
2161        assert_eq!(graph.epoch(), 0);
2162        assert_eq!(graph.bump_epoch(), 1);
2163        assert_eq!(graph.epoch(), 1);
2164        assert_eq!(graph.bump_epoch(), 2);
2165        assert_eq!(graph.epoch(), 2);
2166    }
2167
2168    #[test]
2169    fn test_code_graph_set_epoch() {
2170        let mut graph = CodeGraph::new();
2171        graph.set_epoch(42);
2172        assert_eq!(graph.epoch(), 42);
2173    }
2174
2175    #[test]
2176    fn test_code_graph_from_components() {
2177        let nodes = NodeArena::new();
2178        let edges = BidirectionalEdgeStore::new();
2179        let strings = StringInterner::new();
2180        let files = FileRegistry::new();
2181        let indices = AuxiliaryIndices::new();
2182        let macro_metadata = NodeMetadataStore::new();
2183
2184        let graph =
2185            CodeGraph::from_components(nodes, edges, strings, files, indices, macro_metadata);
2186        assert_eq!(graph.epoch(), 0);
2187    }
2188
2189    #[test]
2190    fn test_code_graph_mut_accessors() {
2191        let mut graph = CodeGraph::new();
2192
2193        // Access mutable references - should not panic
2194        let _nodes = graph.nodes_mut();
2195        let _edges = graph.edges_mut();
2196        let _strings = graph.strings_mut();
2197        let _files = graph.files_mut();
2198        let _indices = graph.indices_mut();
2199    }
2200
2201    #[test]
2202    fn test_code_graph_snapshot_isolation() {
2203        let mut graph = CodeGraph::new();
2204        let snapshot1 = graph.snapshot();
2205
2206        // Mutate the graph
2207        graph.bump_epoch();
2208
2209        let snapshot2 = graph.snapshot();
2210
2211        // Snapshots should have different epochs
2212        assert_eq!(snapshot1.epoch(), 0);
2213        assert_eq!(snapshot2.epoch(), 1);
2214    }
2215
2216    #[test]
2217    fn test_code_graph_debug() {
2218        let graph = CodeGraph::new();
2219        let debug_str = format!("{graph:?}");
2220        assert!(debug_str.contains("CodeGraph"));
2221        assert!(debug_str.contains("epoch"));
2222    }
2223
2224    /// Exact-byte regression for the iter2 fix of CodeGraph.confidence
2225    /// accounting: adding a ConfidenceMetadata entry with known inner
2226    /// Vec<String> capacities must increase heap_bytes() by exactly the sum
2227    /// of those capacities plus Vec<String> slot overhead.
2228    #[test]
2229    fn test_codegraph_heap_bytes_counts_confidence_inner_strings() {
2230        let mut graph = CodeGraph::new();
2231        // Seed one confidence entry so the HashMap has non-zero capacity;
2232        // reserve slack so the next insert cannot rehash.
2233        graph.set_confidence({
2234            let mut m = HashMap::with_capacity(8);
2235            m.insert("seed".to_string(), ConfidenceMetadata::default());
2236            m
2237        });
2238        let before = graph.heap_bytes();
2239        let before_cap = graph.confidence.capacity();
2240
2241        let lim1 = String::from("no type inference");
2242        let lim2 = String::from("no generic specialization");
2243        let feat1 = String::from("rust-analyzer");
2244        let l1 = lim1.capacity();
2245        let l2 = lim2.capacity();
2246        let f1 = feat1.capacity();
2247
2248        let limitations = vec![lim1, lim2];
2249        let lim_vec_cap = limitations.capacity();
2250
2251        let unavailable_features = vec![feat1];
2252        let feat_vec_cap = unavailable_features.capacity();
2253
2254        let key = String::from("rust");
2255        let key_cap = key.capacity();
2256        graph.confidence.insert(
2257            key,
2258            ConfidenceMetadata {
2259                limitations,
2260                unavailable_features,
2261                ..Default::default()
2262            },
2263        );
2264        assert_eq!(
2265            graph.confidence.capacity(),
2266            before_cap,
2267            "prerequisite: confidence HashMap must not rehash during the test insert",
2268        );
2269
2270        let after = graph.heap_bytes();
2271        let expected_inner = key_cap
2272            + lim_vec_cap * std::mem::size_of::<String>()
2273            + l1
2274            + l2
2275            + feat_vec_cap * std::mem::size_of::<String>()
2276            + f1;
2277        assert_eq!(
2278            after - before,
2279            expected_inner,
2280            "CodeGraph::heap_bytes must count ConfidenceMetadata inner Vec<String> capacity exactly",
2281        );
2282    }
2283
2284    #[test]
2285    fn test_codegraph_heap_bytes_grows_with_content() {
2286        use crate::graph::unified::node::NodeKind;
2287        use crate::graph::unified::storage::arena::NodeEntry;
2288        use std::path::Path;
2289
2290        // An empty graph reports some heap bytes (FileRegistry seeds index 0
2291        // with `vec![None]`, HashMaps have non-zero base capacity once touched,
2292        // etc.) but the value must be finite and well under the 100 MB cap.
2293        let empty = CodeGraph::new();
2294        let empty_bytes = empty.heap_bytes();
2295        assert!(
2296            empty_bytes < 100 * 1024 * 1024,
2297            "empty graph heap_bytes should be <100 MiB, got {empty_bytes}"
2298        );
2299
2300        let mut graph = CodeGraph::new();
2301        for i in 0..32u32 {
2302            let name = format!("sym_{i}");
2303            let qual = format!("module::sym_{i}");
2304            let file = format!("file_{i}.rs");
2305
2306            let name_id = graph.strings_mut().intern(&name).unwrap();
2307            let qual_id = graph.strings_mut().intern(&qual).unwrap();
2308            let file_id = graph.files_mut().register(Path::new(&file)).unwrap();
2309
2310            let entry =
2311                NodeEntry::new(NodeKind::Function, name_id, file_id).with_qualified_name(qual_id);
2312            let node_id = graph.nodes_mut().alloc(entry).unwrap();
2313            graph
2314                .indices_mut()
2315                .add(node_id, NodeKind::Function, name_id, Some(qual_id), file_id);
2316        }
2317
2318        let populated_bytes = graph.heap_bytes();
2319        assert!(
2320            populated_bytes > 0,
2321            "populated graph should report nonzero heap bytes"
2322        );
2323        assert!(
2324            populated_bytes > empty_bytes,
2325            "populated graph ({populated_bytes}) should exceed empty graph ({empty_bytes})"
2326        );
2327        assert!(
2328            populated_bytes < 100 * 1024 * 1024,
2329            "test graph heap_bytes should be <100 MiB, got {populated_bytes}"
2330        );
2331    }
2332
2333    #[test]
2334    fn test_concurrent_code_graph_new() {
2335        let graph = ConcurrentCodeGraph::new();
2336        assert_eq!(graph.epoch(), 0);
2337    }
2338
2339    #[test]
2340    fn test_concurrent_code_graph_default() {
2341        let graph = ConcurrentCodeGraph::default();
2342        assert_eq!(graph.epoch(), 0);
2343    }
2344
2345    #[test]
2346    fn test_concurrent_code_graph_from_graph() {
2347        let mut inner = CodeGraph::new();
2348        inner.set_epoch(10);
2349        let graph = ConcurrentCodeGraph::from_graph(inner);
2350        assert_eq!(graph.epoch(), 10);
2351    }
2352
2353    #[test]
2354    fn test_concurrent_code_graph_read() {
2355        let graph = ConcurrentCodeGraph::new();
2356        let guard = graph.read();
2357        assert_eq!(guard.epoch(), 0);
2358        assert_eq!(guard.nodes().len(), 0);
2359    }
2360
2361    #[test]
2362    fn test_concurrent_code_graph_write_increments_epoch() {
2363        let graph = ConcurrentCodeGraph::new();
2364        assert_eq!(graph.epoch(), 0);
2365
2366        {
2367            let guard = graph.write();
2368            assert_eq!(guard.epoch(), 1);
2369        }
2370
2371        assert_eq!(graph.epoch(), 1);
2372
2373        {
2374            let _guard = graph.write();
2375        }
2376
2377        assert_eq!(graph.epoch(), 2);
2378    }
2379
2380    #[test]
2381    fn test_concurrent_code_graph_snapshot() {
2382        let graph = ConcurrentCodeGraph::new();
2383
2384        {
2385            let _guard = graph.write();
2386        }
2387
2388        let snapshot = graph.snapshot();
2389        assert_eq!(snapshot.epoch(), 1);
2390    }
2391
2392    #[test]
2393    fn test_concurrent_code_graph_try_read() {
2394        let graph = ConcurrentCodeGraph::new();
2395        let guard = graph.try_read();
2396        assert!(guard.is_some());
2397    }
2398
2399    #[test]
2400    fn test_concurrent_code_graph_try_write() {
2401        let graph = ConcurrentCodeGraph::new();
2402        let guard = graph.try_write();
2403        assert!(guard.is_some());
2404        assert_eq!(graph.epoch(), 1);
2405    }
2406
2407    #[test]
2408    fn test_concurrent_code_graph_debug() {
2409        let graph = ConcurrentCodeGraph::new();
2410        let debug_str = format!("{graph:?}");
2411        assert!(debug_str.contains("ConcurrentCodeGraph"));
2412        assert!(debug_str.contains("epoch"));
2413    }
2414
2415    #[test]
2416    fn test_graph_snapshot_accessors() {
2417        let graph = CodeGraph::new();
2418        let snapshot = graph.snapshot();
2419
2420        // All accessors should work
2421        let _nodes = snapshot.nodes();
2422        let _edges = snapshot.edges();
2423        let _strings = snapshot.strings();
2424        let _files = snapshot.files();
2425        let _indices = snapshot.indices();
2426        let _epoch = snapshot.epoch();
2427    }
2428
2429    #[test]
2430    fn test_graph_snapshot_epoch_matches() {
2431        let graph = CodeGraph::new();
2432        let snapshot = graph.snapshot();
2433
2434        assert!(snapshot.epoch_matches(0));
2435        assert!(!snapshot.epoch_matches(1));
2436    }
2437
2438    #[test]
2439    fn test_graph_snapshot_clone() {
2440        let graph = CodeGraph::new();
2441        let snapshot1 = graph.snapshot();
2442        let snapshot2 = snapshot1.clone();
2443
2444        assert_eq!(snapshot1.epoch(), snapshot2.epoch());
2445    }
2446
2447    #[test]
2448    fn test_graph_snapshot_debug() {
2449        let graph = CodeGraph::new();
2450        let snapshot = graph.snapshot();
2451        let debug_str = format!("{snapshot:?}");
2452        assert!(debug_str.contains("GraphSnapshot"));
2453        assert!(debug_str.contains("epoch"));
2454    }
2455
2456    #[test]
2457    fn test_multiple_readers() {
2458        let graph = ConcurrentCodeGraph::new();
2459
2460        // Multiple readers should be able to acquire locks simultaneously
2461        let guard1 = graph.read();
2462        let guard2 = graph.read();
2463        let guard3 = graph.read();
2464
2465        assert_eq!(guard1.epoch(), 0);
2466        assert_eq!(guard2.epoch(), 0);
2467        assert_eq!(guard3.epoch(), 0);
2468    }
2469
2470    #[test]
2471    fn test_code_graph_clone() {
2472        let mut graph = CodeGraph::new();
2473        graph.bump_epoch();
2474
2475        let cloned = graph.clone();
2476        assert_eq!(cloned.epoch(), 1);
2477    }
2478
2479    #[test]
2480    fn test_epoch_wrapping() {
2481        let mut graph = CodeGraph::new();
2482        graph.set_epoch(u64::MAX);
2483        let new_epoch = graph.bump_epoch();
2484        assert_eq!(new_epoch, 0); // Should wrap around
2485    }
2486
2487    // ============================================================================
2488    // Query method tests
2489    // ============================================================================
2490
2491    #[test]
2492    fn test_snapshot_resolve_symbol() {
2493        use crate::graph::unified::node::NodeKind;
2494        use crate::graph::unified::storage::arena::NodeEntry;
2495        use std::path::Path;
2496
2497        let mut graph = CodeGraph::new();
2498
2499        // Add some nodes with qualified names
2500        let name_id = graph.strings_mut().intern("test_func").unwrap();
2501        let qual_name_id = graph.strings_mut().intern("module::test_func").unwrap();
2502        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
2503
2504        let entry =
2505            NodeEntry::new(NodeKind::Function, name_id, file_id).with_qualified_name(qual_name_id);
2506
2507        let node_id = graph.nodes_mut().alloc(entry).unwrap();
2508        graph.indices_mut().add(
2509            node_id,
2510            NodeKind::Function,
2511            name_id,
2512            Some(qual_name_id),
2513            file_id,
2514        );
2515
2516        let snapshot = graph.snapshot();
2517
2518        // Find by qualified name
2519        let found = resolve_symbol_strict(&snapshot, "module::test_func");
2520        assert_eq!(found, Some(node_id));
2521
2522        // Find by exact simple name
2523        let found2 = resolve_symbol_strict(&snapshot, "test_func");
2524        assert_eq!(found2, Some(node_id));
2525
2526        // Not found
2527        assert!(resolve_symbol_strict(&snapshot, "nonexistent").is_none());
2528    }
2529
2530    #[test]
2531    fn test_snapshot_find_by_pattern() {
2532        use crate::graph::unified::node::NodeKind;
2533        use crate::graph::unified::storage::arena::NodeEntry;
2534        use std::path::Path;
2535
2536        let mut graph = CodeGraph::new();
2537
2538        // Add nodes with different names
2539        let name1 = graph.strings_mut().intern("foo_bar").unwrap();
2540        let name2 = graph.strings_mut().intern("baz_bar").unwrap();
2541        let name3 = graph.strings_mut().intern("qux_test").unwrap();
2542        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
2543
2544        let node1 = graph
2545            .nodes_mut()
2546            .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
2547            .unwrap();
2548        let node2 = graph
2549            .nodes_mut()
2550            .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
2551            .unwrap();
2552        let node3 = graph
2553            .nodes_mut()
2554            .alloc(NodeEntry::new(NodeKind::Function, name3, file_id))
2555            .unwrap();
2556
2557        graph
2558            .indices_mut()
2559            .add(node1, NodeKind::Function, name1, None, file_id);
2560        graph
2561            .indices_mut()
2562            .add(node2, NodeKind::Function, name2, None, file_id);
2563        graph
2564            .indices_mut()
2565            .add(node3, NodeKind::Function, name3, None, file_id);
2566
2567        let snapshot = graph.snapshot();
2568
2569        // Find by pattern
2570        let matches = snapshot.find_by_pattern("bar");
2571        assert_eq!(matches.len(), 2);
2572        assert!(matches.contains(&node1));
2573        assert!(matches.contains(&node2));
2574
2575        // Find single match
2576        let matches = snapshot.find_by_pattern("qux");
2577        assert_eq!(matches.len(), 1);
2578        assert_eq!(matches[0], node3);
2579
2580        // No matches
2581        let matches = snapshot.find_by_pattern("nonexistent");
2582        assert!(matches.is_empty());
2583    }
2584
2585    #[test]
2586    fn test_snapshot_get_callees() {
2587        use crate::graph::unified::edge::EdgeKind;
2588        use crate::graph::unified::node::NodeKind;
2589        use crate::graph::unified::storage::arena::NodeEntry;
2590        use std::path::Path;
2591
2592        let mut graph = CodeGraph::new();
2593
2594        // Create caller and callee nodes
2595        let caller_name = graph.strings_mut().intern("caller").unwrap();
2596        let callee1_name = graph.strings_mut().intern("callee1").unwrap();
2597        let callee2_name = graph.strings_mut().intern("callee2").unwrap();
2598        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
2599
2600        let caller_id = graph
2601            .nodes_mut()
2602            .alloc(NodeEntry::new(NodeKind::Function, caller_name, file_id))
2603            .unwrap();
2604        let callee1_id = graph
2605            .nodes_mut()
2606            .alloc(NodeEntry::new(NodeKind::Function, callee1_name, file_id))
2607            .unwrap();
2608        let callee2_id = graph
2609            .nodes_mut()
2610            .alloc(NodeEntry::new(NodeKind::Function, callee2_name, file_id))
2611            .unwrap();
2612
2613        // Add call edges
2614        graph.edges_mut().add_edge(
2615            caller_id,
2616            callee1_id,
2617            EdgeKind::Calls {
2618                argument_count: 0,
2619                is_async: false,
2620            },
2621            file_id,
2622        );
2623        graph.edges_mut().add_edge(
2624            caller_id,
2625            callee2_id,
2626            EdgeKind::Calls {
2627                argument_count: 0,
2628                is_async: false,
2629            },
2630            file_id,
2631        );
2632
2633        let snapshot = graph.snapshot();
2634
2635        // Query callees
2636        let callees = snapshot.get_callees(caller_id);
2637        assert_eq!(callees.len(), 2);
2638        assert!(callees.contains(&callee1_id));
2639        assert!(callees.contains(&callee2_id));
2640
2641        // Node with no callees
2642        let callees = snapshot.get_callees(callee1_id);
2643        assert!(callees.is_empty());
2644    }
2645
2646    #[test]
2647    fn test_snapshot_get_callers() {
2648        use crate::graph::unified::edge::EdgeKind;
2649        use crate::graph::unified::node::NodeKind;
2650        use crate::graph::unified::storage::arena::NodeEntry;
2651        use std::path::Path;
2652
2653        let mut graph = CodeGraph::new();
2654
2655        // Create caller and callee nodes
2656        let caller1_name = graph.strings_mut().intern("caller1").unwrap();
2657        let caller2_name = graph.strings_mut().intern("caller2").unwrap();
2658        let callee_name = graph.strings_mut().intern("callee").unwrap();
2659        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
2660
2661        let caller1_id = graph
2662            .nodes_mut()
2663            .alloc(NodeEntry::new(NodeKind::Function, caller1_name, file_id))
2664            .unwrap();
2665        let caller2_id = graph
2666            .nodes_mut()
2667            .alloc(NodeEntry::new(NodeKind::Function, caller2_name, file_id))
2668            .unwrap();
2669        let callee_id = graph
2670            .nodes_mut()
2671            .alloc(NodeEntry::new(NodeKind::Function, callee_name, file_id))
2672            .unwrap();
2673
2674        // Add call edges
2675        graph.edges_mut().add_edge(
2676            caller1_id,
2677            callee_id,
2678            EdgeKind::Calls {
2679                argument_count: 0,
2680                is_async: false,
2681            },
2682            file_id,
2683        );
2684        graph.edges_mut().add_edge(
2685            caller2_id,
2686            callee_id,
2687            EdgeKind::Calls {
2688                argument_count: 0,
2689                is_async: false,
2690            },
2691            file_id,
2692        );
2693
2694        let snapshot = graph.snapshot();
2695
2696        // Query callers
2697        let callers = snapshot.get_callers(callee_id);
2698        assert_eq!(callers.len(), 2);
2699        assert!(callers.contains(&caller1_id));
2700        assert!(callers.contains(&caller2_id));
2701
2702        // Node with no callers
2703        let callers = snapshot.get_callers(caller1_id);
2704        assert!(callers.is_empty());
2705    }
2706
2707    #[test]
2708    fn test_snapshot_find_symbol_candidates() {
2709        use crate::graph::unified::node::NodeKind;
2710        use crate::graph::unified::storage::arena::NodeEntry;
2711        use std::path::Path;
2712
2713        let mut graph = CodeGraph::new();
2714
2715        // Add nodes with same symbol name but different qualified names
2716        let symbol_name = graph.strings_mut().intern("test").unwrap();
2717        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
2718
2719        let node1 = graph
2720            .nodes_mut()
2721            .alloc(NodeEntry::new(NodeKind::Function, symbol_name, file_id))
2722            .unwrap();
2723        let node2 = graph
2724            .nodes_mut()
2725            .alloc(NodeEntry::new(NodeKind::Method, symbol_name, file_id))
2726            .unwrap();
2727
2728        // Add a different symbol
2729        let other_name = graph.strings_mut().intern("other").unwrap();
2730        let node3 = graph
2731            .nodes_mut()
2732            .alloc(NodeEntry::new(NodeKind::Function, other_name, file_id))
2733            .unwrap();
2734
2735        graph
2736            .indices_mut()
2737            .add(node1, NodeKind::Function, symbol_name, None, file_id);
2738        graph
2739            .indices_mut()
2740            .add(node2, NodeKind::Method, symbol_name, None, file_id);
2741        graph
2742            .indices_mut()
2743            .add(node3, NodeKind::Function, other_name, None, file_id);
2744
2745        let snapshot = graph.snapshot();
2746
2747        // Find by symbol
2748        let matches = candidate_nodes(&snapshot, "test");
2749        assert_eq!(matches.len(), 2);
2750        assert!(matches.contains(&node1));
2751        assert!(matches.contains(&node2));
2752
2753        // Find other symbol
2754        let matches = candidate_nodes(&snapshot, "other");
2755        assert_eq!(matches.len(), 1);
2756        assert_eq!(matches[0], node3);
2757
2758        // No matches
2759        let matches = candidate_nodes(&snapshot, "nonexistent");
2760        assert!(matches.is_empty());
2761    }
2762
2763    #[test]
2764    fn test_snapshot_iter_nodes() {
2765        use crate::graph::unified::node::NodeKind;
2766        use crate::graph::unified::storage::arena::NodeEntry;
2767        use std::path::Path;
2768
2769        let mut graph = CodeGraph::new();
2770
2771        // Add some nodes
2772        let name1 = graph.strings_mut().intern("func1").unwrap();
2773        let name2 = graph.strings_mut().intern("func2").unwrap();
2774        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
2775
2776        let node1 = graph
2777            .nodes_mut()
2778            .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
2779            .unwrap();
2780        let node2 = graph
2781            .nodes_mut()
2782            .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
2783            .unwrap();
2784
2785        let snapshot = graph.snapshot();
2786
2787        // Iterate nodes
2788        let snapshot_nodes: Vec<_> = snapshot.iter_nodes().collect();
2789        assert_eq!(snapshot_nodes.len(), 2);
2790
2791        let node_ids: Vec<_> = snapshot_nodes.iter().map(|(id, _)| *id).collect();
2792        assert!(node_ids.contains(&node1));
2793        assert!(node_ids.contains(&node2));
2794    }
2795
2796    #[test]
2797    fn test_snapshot_iter_edges() {
2798        use crate::graph::unified::edge::EdgeKind;
2799        use crate::graph::unified::node::NodeKind;
2800        use crate::graph::unified::storage::arena::NodeEntry;
2801        use std::path::Path;
2802
2803        let mut graph = CodeGraph::new();
2804
2805        // Create nodes
2806        let name1 = graph.strings_mut().intern("func1").unwrap();
2807        let name2 = graph.strings_mut().intern("func2").unwrap();
2808        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
2809
2810        let node1 = graph
2811            .nodes_mut()
2812            .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
2813            .unwrap();
2814        let node2 = graph
2815            .nodes_mut()
2816            .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
2817            .unwrap();
2818
2819        // Add edges
2820        graph.edges_mut().add_edge(
2821            node1,
2822            node2,
2823            EdgeKind::Calls {
2824                argument_count: 0,
2825                is_async: false,
2826            },
2827            file_id,
2828        );
2829
2830        let snapshot = graph.snapshot();
2831
2832        // Iterate edges
2833        let edges: Vec<_> = snapshot.iter_edges().collect();
2834        assert_eq!(edges.len(), 1);
2835
2836        let (src, tgt, kind) = &edges[0];
2837        assert_eq!(*src, node1);
2838        assert_eq!(*tgt, node2);
2839        assert!(matches!(
2840            kind,
2841            EdgeKind::Calls {
2842                argument_count: 0,
2843                is_async: false
2844            }
2845        ));
2846    }
2847
2848    #[test]
2849    fn test_snapshot_get_node() {
2850        use crate::graph::unified::node::NodeId;
2851        use crate::graph::unified::node::NodeKind;
2852        use crate::graph::unified::storage::arena::NodeEntry;
2853        use std::path::Path;
2854
2855        let mut graph = CodeGraph::new();
2856
2857        // Add a node
2858        let name = graph.strings_mut().intern("test_func").unwrap();
2859        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
2860
2861        let node_id = graph
2862            .nodes_mut()
2863            .alloc(NodeEntry::new(NodeKind::Function, name, file_id))
2864            .unwrap();
2865
2866        let snapshot = graph.snapshot();
2867
2868        // Get node
2869        let entry = snapshot.get_node(node_id);
2870        assert!(entry.is_some());
2871        assert_eq!(entry.unwrap().kind, NodeKind::Function);
2872
2873        // Invalid node
2874        let invalid_id = NodeId::INVALID;
2875        assert!(snapshot.get_node(invalid_id).is_none());
2876    }
2877
2878    #[test]
2879    fn test_snapshot_query_empty_graph() {
2880        use crate::graph::unified::node::NodeId;
2881
2882        let graph = CodeGraph::new();
2883        let snapshot = graph.snapshot();
2884
2885        // All queries should return empty on empty graph
2886        assert!(resolve_symbol_strict(&snapshot, "test").is_none());
2887        assert!(snapshot.find_by_pattern("test").is_empty());
2888        assert!(candidate_nodes(&snapshot, "test").is_empty());
2889
2890        let dummy_id = NodeId::new(0, 1);
2891        assert!(snapshot.get_callees(dummy_id).is_empty());
2892        assert!(snapshot.get_callers(dummy_id).is_empty());
2893
2894        assert_eq!(snapshot.iter_nodes().count(), 0);
2895        assert_eq!(snapshot.iter_edges().count(), 0);
2896    }
2897
2898    #[test]
2899    fn test_snapshot_edge_filtering_by_kind() {
2900        use crate::graph::unified::edge::EdgeKind;
2901        use crate::graph::unified::node::NodeKind;
2902        use crate::graph::unified::storage::arena::NodeEntry;
2903        use std::path::Path;
2904
2905        let mut graph = CodeGraph::new();
2906
2907        // Create nodes
2908        let name1 = graph.strings_mut().intern("func1").unwrap();
2909        let name2 = graph.strings_mut().intern("func2").unwrap();
2910        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
2911
2912        let node1 = graph
2913            .nodes_mut()
2914            .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
2915            .unwrap();
2916        let node2 = graph
2917            .nodes_mut()
2918            .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
2919            .unwrap();
2920
2921        // Add different kinds of edges
2922        graph.edges_mut().add_edge(
2923            node1,
2924            node2,
2925            EdgeKind::Calls {
2926                argument_count: 0,
2927                is_async: false,
2928            },
2929            file_id,
2930        );
2931        graph
2932            .edges_mut()
2933            .add_edge(node1, node2, EdgeKind::References, file_id);
2934
2935        let snapshot = graph.snapshot();
2936
2937        // get_callees should only return Calls edges
2938        let callees = snapshot.get_callees(node1);
2939        assert_eq!(callees.len(), 1);
2940        assert_eq!(callees[0], node2);
2941
2942        // iter_edges returns all edges regardless of kind
2943        let edges: Vec<_> = snapshot.iter_edges().collect();
2944        assert_eq!(edges.len(), 2);
2945    }
2946
2947    // -------- reverse_import_index tests --------
2948
2949    /// Helper: build an empty graph with the given file paths registered, a
2950    /// single placeholder node allocated per file, and indices rebuilt so
2951    /// `by_file` returns the per-file node sets. Returns the file IDs in
2952    /// the order passed in and the per-file node ID.
2953    #[cfg(test)]
2954    fn build_import_test_graph(files: &[&str]) -> (CodeGraph, Vec<FileId>, Vec<NodeId>) {
2955        use crate::graph::unified::node::NodeKind;
2956        use crate::graph::unified::storage::arena::NodeEntry;
2957        use std::path::Path;
2958
2959        let mut graph = CodeGraph::new();
2960        let placeholder_name = graph.strings_mut().intern("sym").unwrap();
2961        let mut file_ids = Vec::with_capacity(files.len());
2962        let mut node_ids = Vec::with_capacity(files.len());
2963        for path in files {
2964            let file_id = graph.files_mut().register(Path::new(path)).unwrap();
2965            let node_id = graph
2966                .nodes_mut()
2967                .alloc(NodeEntry::new(
2968                    NodeKind::Function,
2969                    placeholder_name,
2970                    file_id,
2971                ))
2972                .unwrap();
2973            file_ids.push(file_id);
2974            node_ids.push(node_id);
2975        }
2976        graph.rebuild_indices();
2977        (graph, file_ids, node_ids)
2978    }
2979
2980    /// Helper: add an `Imports` edge from `source_node` (in importer file) to
2981    /// `target_node` (in exporter file). The edge is recorded against the
2982    /// importer file so `clear_file` cleanup would behave identically to
2983    /// production Pass 4 writes.
2984    #[cfg(test)]
2985    fn add_import_edge(
2986        graph: &mut CodeGraph,
2987        source_node: NodeId,
2988        target_node: NodeId,
2989        importer_file: FileId,
2990    ) {
2991        graph.edges_mut().add_edge(
2992            source_node,
2993            target_node,
2994            EdgeKind::Imports {
2995                alias: None,
2996                is_wildcard: false,
2997            },
2998            importer_file,
2999        );
3000    }
3001
3002    #[test]
3003    fn reverse_import_index_empty_graph_returns_empty() {
3004        let (graph, files, _) = build_import_test_graph(&["only.rs"]);
3005        assert!(graph.reverse_import_index(files[0]).is_empty());
3006    }
3007
3008    #[test]
3009    fn reverse_import_index_single_importer() {
3010        // File A imports a symbol exported by file B. Reverse index of B
3011        // should return exactly [A]; reverse index of A should be empty.
3012        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3013        let (a, b) = (files[0], files[1]);
3014        add_import_edge(&mut graph, nodes[0], nodes[1], a);
3015
3016        let importers_of_b = graph.reverse_import_index(b);
3017        assert_eq!(importers_of_b, vec![a]);
3018
3019        let importers_of_a = graph.reverse_import_index(a);
3020        assert!(
3021            importers_of_a.is_empty(),
3022            "A has no inbound Imports edges; reverse index must be empty"
3023        );
3024    }
3025
3026    #[test]
3027    fn reverse_import_index_multiple_importers_deduped_and_sorted() {
3028        // A, B, C all import from D. Reverse index of D must contain each
3029        // importer exactly once, sorted ascending by FileId.
3030        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs", "c.rs", "d.rs"]);
3031        let (a, b, c, d) = (files[0], files[1], files[2], files[3]);
3032        add_import_edge(&mut graph, nodes[0], nodes[3], a);
3033        add_import_edge(&mut graph, nodes[1], nodes[3], b);
3034        add_import_edge(&mut graph, nodes[2], nodes[3], c);
3035        // Add a duplicate edge from A to D to confirm dedup behavior.
3036        add_import_edge(&mut graph, nodes[0], nodes[3], a);
3037
3038        let importers_of_d = graph.reverse_import_index(d);
3039        assert_eq!(importers_of_d, vec![a, b, c]);
3040        // Sort invariant: Vec is ascending by raw index.
3041        let mut sorted = importers_of_d.clone();
3042        sorted.sort();
3043        assert_eq!(importers_of_d, sorted);
3044    }
3045
3046    #[test]
3047    fn reverse_import_index_filters_non_import_edges() {
3048        // A `Calls` edge from A into B must not contribute to B's reverse
3049        // import index. Only `EdgeKind::Imports` edges count.
3050        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3051        let (a, b) = (files[0], files[1]);
3052        graph.edges_mut().add_edge(
3053            nodes[0],
3054            nodes[1],
3055            EdgeKind::Calls {
3056                argument_count: 0,
3057                is_async: false,
3058            },
3059            a,
3060        );
3061        graph
3062            .edges_mut()
3063            .add_edge(nodes[0], nodes[1], EdgeKind::References, a);
3064
3065        assert!(
3066            graph.reverse_import_index(b).is_empty(),
3067            "non-Imports edges must not register as importers"
3068        );
3069    }
3070
3071    #[test]
3072    fn reverse_import_index_elides_self_imports() {
3073        // An Imports edge whose source and target are both in the same file
3074        // is a self-import; the caller's own file must not appear in its own
3075        // reverse index.
3076        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs"]);
3077        let a = files[0];
3078        // Add a second node in the same file so we have a distinct source.
3079        let name2 = graph.strings_mut().intern("sym2").unwrap();
3080        let second_in_a = graph
3081            .nodes_mut()
3082            .alloc(crate::graph::unified::storage::arena::NodeEntry::new(
3083                crate::graph::unified::node::NodeKind::Function,
3084                name2,
3085                a,
3086            ))
3087            .unwrap();
3088        graph.rebuild_indices();
3089        add_import_edge(&mut graph, second_in_a, nodes[0], a);
3090
3091        assert!(
3092            graph.reverse_import_index(a).is_empty(),
3093            "self-imports must be elided from reverse index"
3094        );
3095    }
3096
3097    #[test]
3098    fn reverse_import_index_mixed_edge_kinds_counts_only_imports() {
3099        // Two files: A has both Calls and Imports edges into B. Reverse
3100        // index must return exactly [A] — the Calls edge contributes
3101        // nothing.
3102        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3103        let (a, b) = (files[0], files[1]);
3104        add_import_edge(&mut graph, nodes[0], nodes[1], a);
3105        graph.edges_mut().add_edge(
3106            nodes[0],
3107            nodes[1],
3108            EdgeKind::Calls {
3109                argument_count: 0,
3110                is_async: false,
3111            },
3112            a,
3113        );
3114
3115        assert_eq!(graph.reverse_import_index(b), vec![a]);
3116    }
3117
3118    #[test]
3119    fn reverse_import_index_uninitialized_file_returns_empty() {
3120        // Querying a FileId that is not registered in the graph must return
3121        // an empty Vec, not panic.
3122        let (graph, _, _) = build_import_test_graph(&["a.rs"]);
3123        let bogus = FileId::new(9999);
3124        assert!(
3125            graph.reverse_import_index(bogus).is_empty(),
3126            "unknown FileId must return empty Vec without panicking"
3127        );
3128    }
3129
3130    #[test]
3131    fn reverse_import_index_skips_tombstoned_source_nodes() {
3132        // In the incremental rebuild path the prior graph retains tombstoned
3133        // arena slots for nodes belonging to closure files that have been
3134        // removed but not yet fully compacted. reverse_import_index is
3135        // called on that prior graph to widen the closure, so its
3136        // tombstone guard (`let Some(source_entry) = self.nodes.get(...)`)
3137        // is a semantically important branch, not a theoretical one.
3138        // This test exercises it directly by tombstoning the source node of
3139        // an Imports edge and asserting the edge silently disappears from
3140        // the reverse index.
3141        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3142        let (a, b) = (files[0], files[1]);
3143        add_import_edge(&mut graph, nodes[0], nodes[1], a);
3144        // Sanity: before tombstoning, A shows up as the importer of B.
3145        assert_eq!(graph.reverse_import_index(b), vec![a]);
3146        // Tombstone the source node via the arena (generation bump). The
3147        // Imports edge still exists in the edge store but its source NodeId
3148        // is now stale; `nodes.get(edge_ref.source)` returns None and the
3149        // guard skips the edge.
3150        let removed = graph.nodes_mut().remove(nodes[0]);
3151        assert!(
3152            removed.is_some(),
3153            "arena.remove must succeed for a live node"
3154        );
3155        assert!(
3156            graph.nodes().get(nodes[0]).is_none(),
3157            "tombstoned lookup must return None"
3158        );
3159        assert!(
3160            graph.reverse_import_index(b).is_empty(),
3161            "Imports edges whose source is tombstoned must be silently skipped"
3162        );
3163    }
3164
3165    // ------------------------------------------------------------------
3166    // Task 4 Step 2 — CodeGraph::remove_file
3167    // ------------------------------------------------------------------
3168
3169    /// Seed a graph with 2 files × `per_file` nodes per file, plus a set
3170    /// of intra- and inter-file edges. Each call site produces a
3171    /// canonical topology so tests below can assert bit-level on edge
3172    /// survival. Returns `(graph, file_a, file_b, file_a_nodes,
3173    /// file_b_nodes)`.
3174    fn seed_two_file_graph(
3175        per_file: usize,
3176    ) -> (
3177        CodeGraph,
3178        crate::graph::unified::file::FileId,
3179        crate::graph::unified::file::FileId,
3180        Vec<NodeId>,
3181        Vec<NodeId>,
3182    ) {
3183        use crate::graph::unified::edge::EdgeKind;
3184        use crate::graph::unified::node::NodeKind;
3185        use crate::graph::unified::storage::arena::NodeEntry;
3186        use std::path::Path;
3187
3188        let mut graph = CodeGraph::new();
3189        let sym = graph.strings_mut().intern("sym").expect("intern");
3190        let file_a = graph
3191            .files_mut()
3192            .register(Path::new("/tmp/remove_file_test/a.rs"))
3193            .expect("register a");
3194        let file_b = graph
3195            .files_mut()
3196            .register(Path::new("/tmp/remove_file_test/b.rs"))
3197            .expect("register b");
3198
3199        let mut file_a_nodes = Vec::with_capacity(per_file);
3200        let mut file_b_nodes = Vec::with_capacity(per_file);
3201
3202        for _ in 0..per_file {
3203            let n = graph
3204                .nodes_mut()
3205                .alloc(NodeEntry::new(NodeKind::Function, sym, file_a))
3206                .expect("alloc a-node");
3207            file_a_nodes.push(n);
3208            graph.files_mut().record_node(file_a, n);
3209            graph
3210                .indices_mut()
3211                .add(n, NodeKind::Function, sym, None, file_a);
3212        }
3213        for _ in 0..per_file {
3214            let n = graph
3215                .nodes_mut()
3216                .alloc(NodeEntry::new(NodeKind::Function, sym, file_b))
3217                .expect("alloc b-node");
3218            file_b_nodes.push(n);
3219            graph.files_mut().record_node(file_b, n);
3220            graph
3221                .indices_mut()
3222                .add(n, NodeKind::Function, sym, None, file_b);
3223        }
3224
3225        // Intra-file edges inside each file: pairwise a[i] -> a[i+1],
3226        // b[i] -> b[i+1]. These are the ones that must die when the
3227        // corresponding file is removed.
3228        for i in 0..per_file.saturating_sub(1) {
3229            graph.edges_mut().add_edge(
3230                file_a_nodes[i],
3231                file_a_nodes[i + 1],
3232                EdgeKind::Calls {
3233                    argument_count: 0,
3234                    is_async: false,
3235                },
3236                file_a,
3237            );
3238            graph.edges_mut().add_edge(
3239                file_b_nodes[i],
3240                file_b_nodes[i + 1],
3241                EdgeKind::Calls {
3242                    argument_count: 0,
3243                    is_async: false,
3244                },
3245                file_b,
3246            );
3247        }
3248        // Cross-file edges: a[0] -> b[0], b[0] -> a[0]. Both must die
3249        // when *either* endpoint's file is removed (plan §F.2).
3250        graph.edges_mut().add_edge(
3251            file_a_nodes[0],
3252            file_b_nodes[0],
3253            EdgeKind::Calls {
3254                argument_count: 0,
3255                is_async: false,
3256            },
3257            file_a,
3258        );
3259        graph.edges_mut().add_edge(
3260            file_b_nodes[0],
3261            file_a_nodes[0],
3262            EdgeKind::Calls {
3263                argument_count: 0,
3264                is_async: false,
3265            },
3266            file_b,
3267        );
3268
3269        (graph, file_a, file_b, file_a_nodes, file_b_nodes)
3270    }
3271
3272    #[test]
3273    fn code_graph_remove_file_tombstones_all_per_file_nodes() {
3274        let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
3275
3276        let returned = graph.remove_file(file_a);
3277
3278        // Returned list must equal the original per-file bucket
3279        // membership, deterministically.
3280        let returned_set: std::collections::HashSet<NodeId> = returned.iter().copied().collect();
3281        let expected_set: std::collections::HashSet<NodeId> =
3282            file_a_nodes.iter().copied().collect();
3283        assert_eq!(
3284            returned_set, expected_set,
3285            "remove_file must return exactly the file_a nodes drained from the bucket"
3286        );
3287
3288        // Every returned NodeId is gone from the arena.
3289        for nid in &file_a_nodes {
3290            assert!(
3291                graph.nodes().get(*nid).is_none(),
3292                "node {nid:?} from removed file must be tombstoned in arena"
3293            );
3294        }
3295    }
3296
3297    #[test]
3298    fn code_graph_remove_file_invalidates_all_edges_sourced_or_targeted_at_removed_nodes() {
3299        use crate::graph::unified::edge::EdgeKind;
3300
3301        let (mut graph, file_a, _file_b, file_a_nodes, file_b_nodes) = seed_two_file_graph(3);
3302
3303        // Sanity: the seed has 2 intra-A edges, 2 intra-B edges, plus
3304        // 2 cross-file edges (a0↔b0).
3305        let before_delta = graph.edges().stats().forward.delta_edge_count;
3306        assert_eq!(
3307            before_delta, 6,
3308            "seed must produce 2 intra-A + 2 intra-B + 2 cross edges"
3309        );
3310
3311        let _ = graph.remove_file(file_a);
3312
3313        // Forward delta after removal: all 2 intra-A edges are gone,
3314        // both cross edges are gone, only the 2 intra-B edges remain.
3315        let after_delta_forward = graph.edges().stats().forward.delta_edge_count;
3316        assert_eq!(
3317            after_delta_forward, 2,
3318            "only intra-B forward edges must remain after removing file_a"
3319        );
3320        let after_delta_reverse = graph.edges().stats().reverse.delta_edge_count;
3321        assert_eq!(
3322            after_delta_reverse, 2,
3323            "only intra-B reverse edges must remain after removing file_a"
3324        );
3325
3326        // Cross-file edge from b[0] -> a[0] must no longer be visible
3327        // from either direction, because a[0] is tombstoned.
3328        let b0 = file_b_nodes[0];
3329        let a0 = file_a_nodes[0];
3330        let remaining_from_b0: Vec<_> = graph
3331            .edges()
3332            .edges_from(b0)
3333            .into_iter()
3334            .filter(|e| {
3335                matches!(
3336                    e.kind,
3337                    EdgeKind::Calls {
3338                        argument_count: 0,
3339                        is_async: false
3340                    }
3341                )
3342            })
3343            .collect();
3344        assert!(
3345            !remaining_from_b0.iter().any(|e| e.target == a0),
3346            "edge b0 -> a0 must be gone after remove_file(file_a)"
3347        );
3348        let remaining_to_a0: Vec<_> = graph.edges().edges_to(a0).into_iter().collect();
3349        assert!(
3350            remaining_to_a0.is_empty(),
3351            "every edge targeting the tombstoned a0 must be gone"
3352        );
3353    }
3354
3355    #[test]
3356    fn code_graph_remove_file_drops_file_registry_entry() {
3357        let (mut graph, file_a, _file_b, _, _) = seed_two_file_graph(2);
3358
3359        assert!(
3360            graph.files().resolve(file_a).is_some(),
3361            "seed registered file_a"
3362        );
3363        assert!(
3364            !graph.files().nodes_for_file(file_a).is_empty(),
3365            "seed populated the file_a bucket"
3366        );
3367
3368        let _ = graph.remove_file(file_a);
3369
3370        assert!(
3371            graph.files().resolve(file_a).is_none(),
3372            "FileRegistry::resolve must return None after remove_file"
3373        );
3374        assert!(
3375            graph.files().nodes_for_file(file_a).is_empty(),
3376            "per-file bucket for file_a must be drained"
3377        );
3378    }
3379
3380    #[test]
3381    fn code_graph_remove_file_is_idempotent_on_unknown_file() {
3382        use crate::graph::unified::file::FileId;
3383        let (mut graph, _file_a, _file_b, _, _) = seed_two_file_graph(2);
3384
3385        // Snapshot state before idempotent no-op.
3386        let nodes_before = graph.nodes().len();
3387        let delta_fwd_before = graph.edges().stats().forward.delta_edge_count;
3388        let delta_rev_before = graph.edges().stats().reverse.delta_edge_count;
3389        let files_before = graph.files().len();
3390
3391        // A FileId that was never registered: the caller may legitimately
3392        // receive a bogus id from stale indexing state. `remove_file`
3393        // must be a silent no-op.
3394        let bogus = FileId::new(9999);
3395        let returned = graph.remove_file(bogus);
3396        assert!(
3397            returned.is_empty(),
3398            "remove_file on unknown FileId must return an empty Vec"
3399        );
3400
3401        assert_eq!(graph.nodes().len(), nodes_before, "arena count unchanged");
3402        assert_eq!(
3403            graph.edges().stats().forward.delta_edge_count,
3404            delta_fwd_before,
3405            "forward delta unchanged"
3406        );
3407        assert_eq!(
3408            graph.edges().stats().reverse.delta_edge_count,
3409            delta_rev_before,
3410            "reverse delta unchanged"
3411        );
3412        assert_eq!(graph.files().len(), files_before, "file count unchanged");
3413    }
3414
3415    #[test]
3416    fn code_graph_remove_file_clears_file_segments_entry() {
3417        // Iter-1 Codex review fix: a file's `FileSegmentTable` entry
3418        // must be cleared on `remove_file`. Without this, a later
3419        // `FileId` recycle (via `FileRegistry::free_list`) would
3420        // inherit the previous file's stale node-range and
3421        // `reindex_files` (`build/reindex.rs`) would tombstone the
3422        // wrong slots. This unit test seeds a segment entry, removes
3423        // the file, and asserts the entry is gone.
3424        use crate::graph::unified::storage::segment::FileSegmentTable;
3425
3426        let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
3427
3428        // Seed a segment for file A. Production code sets this via
3429        // Phase 3 parallel commit; here we go through the crate-
3430        // internal accessor because we are a unit test inside the
3431        // crate. (The feature-gated `test_only_record_file_segment`
3432        // public helper exists for the integration tests in
3433        // `sqry-core/tests/incremental_remove_file_scale.rs`.)
3434        let first_index = file_a_nodes
3435            .iter()
3436            .map(|n| n.index())
3437            .min()
3438            .expect("per_file = 3");
3439        let last_index = file_a_nodes
3440            .iter()
3441            .map(|n| n.index())
3442            .max()
3443            .expect("per_file = 3");
3444        let slot_count = last_index - first_index + 1;
3445        let table: &mut FileSegmentTable = graph.file_segments_mut();
3446        table.record_range(file_a, first_index, slot_count);
3447        assert!(
3448            graph.file_segments().get(file_a).is_some(),
3449            "seed must install a segment for file_a before remove_file"
3450        );
3451
3452        // Remove the file.
3453        let _ = graph.remove_file(file_a);
3454
3455        // The segment entry must be gone. `FileSegmentTable::remove`
3456        // is idempotent (a no-op on unknown ids), so this assertion
3457        // holds whether or not the seeded range was contiguous.
3458        assert!(
3459            graph.file_segments().get(file_a).is_none(),
3460            "remove_file must clear the FileSegmentTable entry for file_a"
3461        );
3462    }
3463
3464    #[test]
3465    fn code_graph_remove_file_repeated_calls_are_idempotent() {
3466        let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
3467
3468        // First call does the work.
3469        let first = graph.remove_file(file_a);
3470        assert_eq!(first.len(), file_a_nodes.len());
3471
3472        // Snapshot post-first-call state.
3473        let nodes_after = graph.nodes().len();
3474        let delta_fwd_after = graph.edges().stats().forward.delta_edge_count;
3475        let delta_rev_after = graph.edges().stats().reverse.delta_edge_count;
3476        let files_after = graph.files().len();
3477
3478        // Second call must be a silent no-op — the bucket is empty,
3479        // the file is unregistered, and the arena slots are already
3480        // tombstoned (NodeArena::remove ignores stale generations).
3481        let second = graph.remove_file(file_a);
3482        assert!(
3483            second.is_empty(),
3484            "second remove_file on the same file must return an empty Vec"
3485        );
3486
3487        assert_eq!(graph.nodes().len(), nodes_after);
3488        assert_eq!(
3489            graph.edges().stats().forward.delta_edge_count,
3490            delta_fwd_after
3491        );
3492        assert_eq!(
3493            graph.edges().stats().reverse.delta_edge_count,
3494            delta_rev_after
3495        );
3496        assert_eq!(graph.files().len(), files_after);
3497    }
3498}