sqry_core/graph/unified/concurrent/
graph.rs

1//! `CodeGraph` and `ConcurrentCodeGraph` implementations.
2//!
3//! This module provides the core graph types with thread-safe access:
4//!
5//! - [`CodeGraph`]: Arc-wrapped internals for O(1) `CoW` snapshots
6//! - [`ConcurrentCodeGraph`]: `RwLock` wrapper with epoch versioning
7//! - [`GraphSnapshot`]: Immutable snapshot for long-running queries
8
9use std::collections::{HashMap, HashSet};
10use std::fmt;
11use std::sync::Arc;
12use std::sync::atomic::{AtomicU64, Ordering};
13
14use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard};
15
16use crate::confidence::ConfidenceMetadata;
17use crate::graph::unified::bind::alias::AliasTable;
18use crate::graph::unified::bind::scope::provenance::{
19    ScopeProvenance, ScopeProvenanceStore, ScopeStableId,
20};
21use crate::graph::unified::bind::scope::{ScopeArena, ScopeId};
22use crate::graph::unified::bind::shadow::ShadowTable;
23use crate::graph::unified::edge::EdgeKind;
24use crate::graph::unified::edge::bidirectional::BidirectionalEdgeStore;
25use crate::graph::unified::file::FileId;
26use crate::graph::unified::memory::{GraphMemorySize, HASHMAP_ENTRY_OVERHEAD};
27use crate::graph::unified::resolution::display_graph_qualified_name;
28use crate::graph::unified::storage::arena::NodeArena;
29use crate::graph::unified::storage::edge_provenance::{EdgeProvenance, EdgeProvenanceStore};
30use crate::graph::unified::storage::indices::AuxiliaryIndices;
31use crate::graph::unified::storage::interner::StringInterner;
32use crate::graph::unified::storage::metadata::NodeMetadataStore;
33use crate::graph::unified::storage::node_provenance::{NodeProvenance, NodeProvenanceStore};
34use crate::graph::unified::storage::registry::{FileProvenanceView, FileRegistry};
35use crate::graph::unified::storage::segment::FileSegmentTable;
36use crate::graph::unified::string::id::StringId;
37
38/// Core graph with Arc-wrapped internals for O(1) `CoW` snapshots.
39///
40/// `CodeGraph` uses `Arc` for all internal components, enabling:
41/// - O(1) snapshot creation via Arc cloning
42/// - Copy-on-write semantics via `Arc::make_mut`
43/// - Memory-efficient sharing between snapshots
44///
45/// # Design
46///
47/// The Arc wrapping enables the MVCC pattern:
48/// - Readers see a consistent snapshot at the time they acquired access
49/// - Writers use `Arc::make_mut` to get exclusive copies only when mutating
50/// - Multiple snapshots can coexist without copying data
51///
52/// # Performance
53///
54/// - Snapshot creation: O(5) Arc clones ≈ <1μs
55/// - Read access: Direct Arc dereference, no locking
56/// - Write access: `Arc::make_mut` clones only if refcount > 1
57///
58/// # Phase 2 binding-plane access
59///
60/// Use the two-line snapshot pattern to access `BindingPlane`:
61///
62/// ```rust,ignore
63/// let snapshot = graph.snapshot();
64/// let plane = snapshot.binding_plane();
65/// let resolution = plane.resolve(&query);
66/// ```
67///
68/// The two-line form is intentional: `BindingPlane<'g>` borrows from
69/// `GraphSnapshot` and the explicit snapshot handle makes the MVCC lifetime
70/// visible at the call site. The full Phase 2 scope/alias/shadow and
71/// witness-bearing resolution API is exposed through `BindingPlane`.
72// Field visibility is `pub(crate)` so the Gate 0c `rebuild_graph` module
73// (A2 §H) can destructure `CodeGraph` exhaustively in `clone_for_rebuild`.
74// External crates still go through the public accessor methods below.
75#[derive(Clone)]
76pub struct CodeGraph {
77    /// Node storage with generational indices.
78    pub(crate) nodes: Arc<NodeArena>,
79    /// Bidirectional edge storage (forward + reverse).
80    pub(crate) edges: Arc<BidirectionalEdgeStore>,
81    /// String interner for symbol names.
82    pub(crate) strings: Arc<StringInterner>,
83    /// File registry for path deduplication.
84    pub(crate) files: Arc<FileRegistry>,
85    /// Auxiliary indices for fast lookup.
86    pub(crate) indices: Arc<AuxiliaryIndices>,
87    /// Sparse macro boundary metadata (keyed by full `NodeId`).
88    pub(crate) macro_metadata: Arc<NodeMetadataStore>,
89    /// Dense node provenance (Phase 1 fact layer).
90    pub(crate) node_provenance: Arc<NodeProvenanceStore>,
91    /// Dense edge provenance (Phase 1 fact layer).
92    pub(crate) edge_provenance: Arc<EdgeProvenanceStore>,
93    /// Monotonic fact-layer epoch (0 until set by the V8 persistence path).
94    pub(crate) fact_epoch: u64,
95    /// Epoch for version tracking.
96    pub(crate) epoch: u64,
97    /// Per-language confidence metadata collected during build.
98    /// Maps language name (e.g., "rust") to aggregated confidence.
99    pub(crate) confidence: HashMap<String, ConfidenceMetadata>,
100    /// Phase 2 binding-plane scope arena (populated by Phase 4e).
101    pub(crate) scope_arena: Arc<ScopeArena>,
102    /// Phase 2 binding-plane alias table (populated by Phase 4e / P2U04).
103    pub(crate) alias_table: Arc<AliasTable>,
104    /// Phase 2 binding-plane shadow table (populated by Phase 4e / P2U05).
105    pub(crate) shadow_table: Arc<ShadowTable>,
106    /// Phase 2 binding-plane scope provenance store (populated by Phase 4e / P2U11).
107    pub(crate) scope_provenance_store: Arc<ScopeProvenanceStore>,
108    /// Phase 3 file segment table mapping `FileId` to contiguous node ranges.
109    /// Populated during build Phase 3 parallel commit, persisted in V10+ snapshots.
110    pub(crate) file_segments: Arc<FileSegmentTable>,
111}
112
113impl CodeGraph {
114    /// Creates a new empty `CodeGraph`.
115    ///
116    /// # Example
117    ///
118    /// ```rust
119    /// use sqry_core::graph::unified::concurrent::CodeGraph;
120    ///
121    /// let graph = CodeGraph::new();
122    /// assert_eq!(graph.epoch(), 0);
123    /// ```
124    #[must_use]
125    pub fn new() -> Self {
126        Self {
127            nodes: Arc::new(NodeArena::new()),
128            edges: Arc::new(BidirectionalEdgeStore::new()),
129            strings: Arc::new(StringInterner::new()),
130            files: Arc::new(FileRegistry::new()),
131            indices: Arc::new(AuxiliaryIndices::new()),
132            macro_metadata: Arc::new(NodeMetadataStore::new()),
133            node_provenance: Arc::new(NodeProvenanceStore::new()),
134            edge_provenance: Arc::new(EdgeProvenanceStore::new()),
135            fact_epoch: 0,
136            epoch: 0,
137            confidence: HashMap::new(),
138            scope_arena: Arc::new(ScopeArena::new()),
139            alias_table: Arc::new(AliasTable::new()),
140            shadow_table: Arc::new(ShadowTable::new()),
141            scope_provenance_store: Arc::new(ScopeProvenanceStore::new()),
142            file_segments: Arc::new(FileSegmentTable::new()),
143        }
144    }
145
146    /// Creates a `CodeGraph` from existing components.
147    ///
148    /// This is useful when building a graph from external data or
149    /// reconstructing from serialized state.
150    #[must_use]
151    pub fn from_components(
152        nodes: NodeArena,
153        edges: BidirectionalEdgeStore,
154        strings: StringInterner,
155        files: FileRegistry,
156        indices: AuxiliaryIndices,
157        macro_metadata: NodeMetadataStore,
158    ) -> Self {
159        Self {
160            nodes: Arc::new(nodes),
161            edges: Arc::new(edges),
162            strings: Arc::new(strings),
163            files: Arc::new(files),
164            indices: Arc::new(indices),
165            macro_metadata: Arc::new(macro_metadata),
166            node_provenance: Arc::new(NodeProvenanceStore::new()),
167            edge_provenance: Arc::new(EdgeProvenanceStore::new()),
168            fact_epoch: 0,
169            epoch: 0,
170            confidence: HashMap::new(),
171            scope_arena: Arc::new(ScopeArena::new()),
172            alias_table: Arc::new(AliasTable::new()),
173            shadow_table: Arc::new(ShadowTable::new()),
174            scope_provenance_store: Arc::new(ScopeProvenanceStore::new()),
175            file_segments: Arc::new(FileSegmentTable::new()),
176        }
177    }
178
179    /// Creates a cheap snapshot of the graph.
180    ///
181    /// This operation is O(5) Arc clones, which completes in <1μs.
182    /// The snapshot is isolated from future mutations to the original graph.
183    ///
184    /// # Example
185    ///
186    /// ```rust
187    /// use sqry_core::graph::unified::concurrent::CodeGraph;
188    ///
189    /// let graph = CodeGraph::new();
190    /// let snapshot = graph.snapshot();
191    /// // snapshot is independent of future mutations to graph
192    /// ```
193    #[must_use]
194    pub fn snapshot(&self) -> GraphSnapshot {
195        GraphSnapshot {
196            nodes: Arc::clone(&self.nodes),
197            edges: Arc::clone(&self.edges),
198            strings: Arc::clone(&self.strings),
199            files: Arc::clone(&self.files),
200            indices: Arc::clone(&self.indices),
201            macro_metadata: Arc::clone(&self.macro_metadata),
202            node_provenance: Arc::clone(&self.node_provenance),
203            edge_provenance: Arc::clone(&self.edge_provenance),
204            fact_epoch: self.fact_epoch,
205            epoch: self.epoch,
206            scope_arena: Arc::clone(&self.scope_arena),
207            alias_table: Arc::clone(&self.alias_table),
208            shadow_table: Arc::clone(&self.shadow_table),
209            scope_provenance_store: Arc::clone(&self.scope_provenance_store),
210            file_segments: Arc::clone(&self.file_segments),
211        }
212    }
213
214    /// Returns a reference to the node arena.
215    #[inline]
216    #[must_use]
217    pub fn nodes(&self) -> &NodeArena {
218        &self.nodes
219    }
220
221    /// Returns a reference to the bidirectional edge store.
222    #[inline]
223    #[must_use]
224    pub fn edges(&self) -> &BidirectionalEdgeStore {
225        &self.edges
226    }
227
228    /// Returns a reference to the string interner.
229    #[inline]
230    #[must_use]
231    pub fn strings(&self) -> &StringInterner {
232        &self.strings
233    }
234
235    /// Returns a reference to the file registry.
236    #[inline]
237    #[must_use]
238    pub fn files(&self) -> &FileRegistry {
239        &self.files
240    }
241
242    /// Returns a reference to the auxiliary indices.
243    #[inline]
244    #[must_use]
245    pub fn indices(&self) -> &AuxiliaryIndices {
246        &self.indices
247    }
248
249    /// Returns a reference to the macro boundary metadata store.
250    #[inline]
251    #[must_use]
252    pub fn macro_metadata(&self) -> &NodeMetadataStore {
253        &self.macro_metadata
254    }
255
256    // ------------------------------------------------------------------
257    // Phase 1 fact-layer provenance accessors (P1U09).
258    // ------------------------------------------------------------------
259
260    /// Returns the monotonic fact-layer epoch stamped on the most recently
261    /// saved or loaded snapshot. Returns `0` for graphs that have not been
262    /// persisted yet or were loaded from V7 snapshots.
263    #[inline]
264    #[must_use]
265    pub fn fact_epoch(&self) -> u64 {
266        self.fact_epoch
267    }
268
269    /// Looks up node provenance by `NodeId`.
270    ///
271    /// Returns `None` if the `NodeId` is out of range, the slot is vacant,
272    /// or the stored generation does not match (stale handle).
273    #[inline]
274    #[must_use]
275    pub fn node_provenance(
276        &self,
277        id: crate::graph::unified::node::id::NodeId,
278    ) -> Option<&NodeProvenance> {
279        self.node_provenance.lookup(id)
280    }
281
282    /// Looks up edge provenance by `EdgeId`.
283    ///
284    /// Returns `None` if the `EdgeId` is out of range, the slot is vacant,
285    /// or the edge is the invalid sentinel.
286    #[inline]
287    #[must_use]
288    pub fn edge_provenance(
289        &self,
290        id: crate::graph::unified::edge::id::EdgeId,
291    ) -> Option<&EdgeProvenance> {
292        self.edge_provenance.lookup(id)
293    }
294
295    /// Returns a borrowed provenance view for a file.
296    ///
297    /// Returns `None` for invalid/unregistered `FileId`s.
298    #[inline]
299    #[must_use]
300    pub fn file_provenance(
301        &self,
302        id: crate::graph::unified::file::id::FileId,
303    ) -> Option<FileProvenanceView<'_>> {
304        self.files.file_provenance(id)
305    }
306
307    // ------------------------------------------------------------------
308    // Phase 2 binding-plane accessors (P2U03).
309    // ------------------------------------------------------------------
310
311    /// Returns a reference to the scope arena derived during Phase 4e.
312    ///
313    /// The arena is empty on freshly-constructed `CodeGraph` instances and is
314    /// populated by calling `phase4e_binding::derive_binding_plane`.
315    #[inline]
316    #[must_use]
317    pub fn scope_arena(&self) -> &ScopeArena {
318        &self.scope_arena
319    }
320
321    /// Installs a freshly-derived scope arena.
322    ///
323    /// Called from `phase4e_binding::derive_binding_plane` during the build
324    /// pipeline. External callers that run Phase 4e manually (e.g., test
325    /// fixture builders) use this to store the result.
326    pub(crate) fn set_scope_arena(&mut self, arena: ScopeArena) {
327        self.scope_arena = Arc::new(arena);
328    }
329
330    /// Returns a reference to the alias table derived during Phase 4e.
331    ///
332    /// The table is empty on freshly-constructed `CodeGraph` instances and is
333    /// populated by calling `phase4e_binding::derive_binding_plane`.
334    #[inline]
335    #[must_use]
336    pub fn alias_table(&self) -> &AliasTable {
337        &self.alias_table
338    }
339
340    /// Installs a freshly-derived alias table.
341    ///
342    /// Called from `phase4e_binding::derive_binding_plane` during the build
343    /// pipeline. External callers that run Phase 4e manually (e.g., test
344    /// fixture builders) use this to store the result.
345    pub(crate) fn set_alias_table(&mut self, table: AliasTable) {
346        self.alias_table = Arc::new(table);
347    }
348
349    /// Returns a reference to the shadow table derived during Phase 4e.
350    ///
351    /// The table is empty on freshly-constructed `CodeGraph` instances and is
352    /// populated by calling `phase4e_binding::derive_binding_plane`.
353    #[inline]
354    #[must_use]
355    pub fn shadow_table(&self) -> &ShadowTable {
356        &self.shadow_table
357    }
358
359    /// Installs a freshly-derived shadow table.
360    ///
361    /// Called from `phase4e_binding::derive_binding_plane` during the build
362    /// pipeline. External callers that run Phase 4e manually (e.g., test
363    /// fixture builders) use this to store the result.
364    pub(crate) fn set_shadow_table(&mut self, table: ShadowTable) {
365        self.shadow_table = Arc::new(table);
366    }
367
368    /// Returns a reference to the scope provenance store derived during Phase 4e.
369    ///
370    /// The store is empty on freshly-constructed `CodeGraph` instances and is
371    /// populated by calling `phase4e_binding::derive_binding_plane`.
372    #[inline]
373    #[must_use]
374    pub fn scope_provenance_store(&self) -> &ScopeProvenanceStore {
375        &self.scope_provenance_store
376    }
377
378    /// Looks up scope provenance by `ScopeId`.
379    ///
380    /// Returns `None` if the slot is out of range, vacant, or the stored
381    /// generation does not match (stale handle).
382    #[inline]
383    #[must_use]
384    pub fn scope_provenance(&self, id: ScopeId) -> Option<&ScopeProvenance> {
385        self.scope_provenance_store.lookup(id)
386    }
387
388    /// Looks up the live `ScopeId` for a stable scope identity.
389    ///
390    /// Returns `None` if no provenance record is registered for that stable id.
391    /// The reverse index is populated by `insert` during Phase 4e and must be
392    /// rebuilt after V9 deserialization.
393    #[inline]
394    #[must_use]
395    pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
396        self.scope_provenance_store.scope_by_stable_id(stable)
397    }
398
399    /// Installs a freshly-derived scope provenance store.
400    ///
401    /// Called from `phase4e_binding::derive_binding_plane` during the build
402    /// pipeline. External callers that run Phase 4e manually (e.g., test
403    /// fixture builders) use this to store the result.
404    pub(crate) fn set_scope_provenance_store(&mut self, store: ScopeProvenanceStore) {
405        self.scope_provenance_store = Arc::new(store);
406    }
407
408    /// Returns a reference to the file segment table.
409    #[inline]
410    #[must_use]
411    pub fn file_segments(&self) -> &FileSegmentTable {
412        &self.file_segments
413    }
414
415    /// Replaces the file segment table.
416    pub(crate) fn set_file_segments(&mut self, table: FileSegmentTable) {
417        self.file_segments = Arc::new(table);
418    }
419
420    /// Returns a mutable reference to the file segment table (via `Arc::make_mut`).
421    pub(crate) fn file_segments_mut(&mut self) -> &mut FileSegmentTable {
422        Arc::make_mut(&mut self.file_segments)
423    }
424
425    /// Test-only helper that records a file segment directly on the
426    /// graph, bypassing the full Phase 3 commit pipeline. Only
427    /// available under `#[cfg(feature = "rebuild-internals")]` so the
428    /// surface is opt-in for rebuild-plane consumers (the feature is
429    /// whitelisted to sqry-daemon + sqry-core integration tests; see
430    /// `sqry-core/tests/rebuild_internals_whitelist.rs`).
431    ///
432    /// Integration tests (notably
433    /// `sqry-core/tests/incremental_remove_file_scale.rs`) call this
434    /// to seed the synthetic workspaces they build before exercising
435    /// `RebuildGraph::remove_file` / `CodeGraph::remove_file`. Production
436    /// code paths never touch this method — Phase 3 parallel commit
437    /// is the sole production writer, via the crate-internal
438    /// `file_segments_mut` accessor above.
439    ///
440    /// Renamed with `test_only_` prefix so the purpose is unambiguous
441    /// at every call site; `#[doc(hidden)]` hides it from rendered
442    /// rustdoc so downstream daemon integrations don't discover it by
443    /// accident.
444    #[cfg(feature = "rebuild-internals")]
445    #[doc(hidden)]
446    pub fn test_only_record_file_segment(
447        &mut self,
448        file_id: FileId,
449        start_slot: u32,
450        slot_count: u32,
451    ) {
452        Arc::make_mut(&mut self.file_segments).record_range(file_id, start_slot, slot_count);
453    }
454
455    /// Sets the provenance stores and fact epoch, typically called by the
456    /// persistence loader after deserializing a V8 snapshot.
457    pub(crate) fn set_provenance(
458        &mut self,
459        node_provenance: NodeProvenanceStore,
460        edge_provenance: EdgeProvenanceStore,
461        fact_epoch: u64,
462    ) {
463        self.node_provenance = Arc::new(node_provenance);
464        self.edge_provenance = Arc::new(edge_provenance);
465        self.fact_epoch = fact_epoch;
466    }
467
468    /// Returns the current epoch.
469    #[inline]
470    #[must_use]
471    pub fn epoch(&self) -> u64 {
472        self.epoch
473    }
474
475    /// Returns a mutable reference to the node arena.
476    ///
477    /// Uses `Arc::make_mut` for copy-on-write semantics: if other
478    /// references exist (e.g., snapshots), the data is cloned.
479    #[inline]
480    pub fn nodes_mut(&mut self) -> &mut NodeArena {
481        Arc::make_mut(&mut self.nodes)
482    }
483
484    /// Returns a mutable reference to the bidirectional edge store.
485    ///
486    /// Uses `Arc::make_mut` for copy-on-write semantics.
487    #[inline]
488    pub fn edges_mut(&mut self) -> &mut BidirectionalEdgeStore {
489        Arc::make_mut(&mut self.edges)
490    }
491
492    /// Returns a mutable reference to the string interner.
493    ///
494    /// Uses `Arc::make_mut` for copy-on-write semantics.
495    #[inline]
496    pub fn strings_mut(&mut self) -> &mut StringInterner {
497        Arc::make_mut(&mut self.strings)
498    }
499
500    /// Returns a mutable reference to the file registry.
501    ///
502    /// Uses `Arc::make_mut` for copy-on-write semantics.
503    #[inline]
504    pub fn files_mut(&mut self) -> &mut FileRegistry {
505        Arc::make_mut(&mut self.files)
506    }
507
508    /// Returns a mutable reference to the auxiliary indices.
509    ///
510    /// Uses `Arc::make_mut` for copy-on-write semantics.
511    #[inline]
512    pub fn indices_mut(&mut self) -> &mut AuxiliaryIndices {
513        Arc::make_mut(&mut self.indices)
514    }
515
516    /// Returns a mutable reference to the macro boundary metadata store.
517    ///
518    /// Uses `Arc::make_mut` for copy-on-write semantics.
519    #[inline]
520    pub fn macro_metadata_mut(&mut self) -> &mut NodeMetadataStore {
521        Arc::make_mut(&mut self.macro_metadata)
522    }
523
524    /// Returns mutable references to both the node arena and the string interner.
525    ///
526    /// This avoids the borrow-conflict that arises when calling `nodes_mut()` and
527    /// `strings_mut()` separately on `&mut self`.
528    #[inline]
529    pub fn nodes_and_strings_mut(&mut self) -> (&mut NodeArena, &mut StringInterner) {
530        (
531            Arc::make_mut(&mut self.nodes),
532            Arc::make_mut(&mut self.strings),
533        )
534    }
535
536    /// Rebuilds auxiliary indices from the current node arena.
537    ///
538    /// This avoids the borrow conflict that arises when calling `nodes()` and
539    /// `indices_mut()` separately on `&mut self`. Uses disjoint field borrowing
540    /// to access `nodes` (shared) and `indices` (mutable) simultaneously.
541    /// Internally calls `AuxiliaryIndices::build_from_arena` which clears
542    /// existing indices and rebuilds in a single pass without per-element
543    /// duplicate checking.
544    ///
545    /// As of Task 4 Step 4 Phase 2 this inherent method delegates to the
546    /// generic
547    /// [`crate::graph::unified::build::parallel_commit::rebuild_indices`]
548    /// free function so the same implementation serves both the
549    /// full-build (`CodeGraph`) and incremental-rebuild (`RebuildGraph`)
550    /// pipelines. Call sites that hold a concrete `CodeGraph` can keep
551    /// using `graph.rebuild_indices()`; incremental rebuild call sites
552    /// should use the free function directly.
553    pub fn rebuild_indices(&mut self) {
554        crate::graph::unified::build::parallel_commit::rebuild_indices(self);
555    }
556
557    /// Increments the epoch counter and returns the new value.
558    ///
559    /// Called automatically by `ConcurrentCodeGraph::write()`.
560    #[inline]
561    pub fn bump_epoch(&mut self) -> u64 {
562        self.epoch = self.epoch.wrapping_add(1);
563        self.epoch
564    }
565
566    /// Sets the epoch to a specific value.
567    ///
568    /// This is primarily for testing or reconstruction from serialized state.
569    #[inline]
570    pub fn set_epoch(&mut self, epoch: u64) {
571        self.epoch = epoch;
572    }
573
574    /// Returns the number of nodes in the graph.
575    ///
576    /// This is a convenience method that delegates to `nodes().len()`.
577    ///
578    /// # Example
579    ///
580    /// ```rust
581    /// use sqry_core::graph::unified::concurrent::CodeGraph;
582    ///
583    /// let graph = CodeGraph::new();
584    /// assert_eq!(graph.node_count(), 0);
585    /// ```
586    #[inline]
587    #[must_use]
588    pub fn node_count(&self) -> usize {
589        self.nodes.len()
590    }
591
592    /// Returns the number of edges in the graph (forward direction).
593    ///
594    /// This counts edges in the forward store, including both CSR and delta edges.
595    ///
596    /// # Example
597    ///
598    /// ```rust
599    /// use sqry_core::graph::unified::concurrent::CodeGraph;
600    ///
601    /// let graph = CodeGraph::new();
602    /// assert_eq!(graph.edge_count(), 0);
603    /// ```
604    #[inline]
605    #[must_use]
606    pub fn edge_count(&self) -> usize {
607        let stats = self.edges.stats();
608        stats.forward.csr_edge_count + stats.forward.delta_edge_count
609    }
610
611    /// Returns true if the graph contains no nodes.
612    ///
613    /// This is a convenience method that delegates to `nodes().is_empty()`.
614    ///
615    /// # Example
616    ///
617    /// ```rust
618    /// use sqry_core::graph::unified::concurrent::CodeGraph;
619    ///
620    /// let graph = CodeGraph::new();
621    /// assert!(graph.is_empty());
622    /// ```
623    #[inline]
624    #[must_use]
625    pub fn is_empty(&self) -> bool {
626        self.nodes.is_empty()
627    }
628
629    /// Returns an iterator over all indexed file paths.
630    ///
631    /// This is useful for enumerating all files that have been processed
632    /// and added to the graph.
633    ///
634    /// # Example
635    ///
636    /// ```rust
637    /// use sqry_core::graph::unified::concurrent::CodeGraph;
638    ///
639    /// let graph = CodeGraph::new();
640    /// for (file_id, path) in graph.indexed_files() {
641    ///     println!("File {}: {}", file_id.index(), path.display());
642    /// }
643    /// ```
644    #[inline]
645    pub fn indexed_files(
646        &self,
647    ) -> impl Iterator<Item = (crate::graph::unified::file::FileId, &std::path::Path)> {
648        self.files
649            .iter()
650            .map(|(id, arc_path)| (id, arc_path.as_ref()))
651    }
652
653    /// Returns the set of files that import one or more symbols exported by
654    /// `file_id`, deduplicated and sorted ascending.
655    ///
656    /// For every [`EdgeKind::Imports`] edge whose target node lives in
657    /// `file_id`, the source node's [`FileId`] is added to the result. Files
658    /// are returned sorted by raw index so the result is deterministic across
659    /// runs. The caller's own file is never included — an `Imports` edge
660    /// whose source and target both live in `file_id` is treated as a
661    /// self-import and elided. Edges whose source node is no longer resolvable
662    /// in the arena (tombstoned) are silently skipped.
663    ///
664    /// This is the file-level view of Pass 4 cross-file `Imports` edges, used
665    /// by the incremental rebuild engine to compute reverse-dependency
666    /// closures: "if file X changes its exports, which files need to be
667    /// re-linked?"
668    ///
669    /// # Complexity
670    ///
671    /// `O(|nodes_in_file_id| × avg_incoming_edges_per_node)` amortized. Uses
672    /// [`AuxiliaryIndices::by_file`] for O(1)-amortized per-file node lookup
673    /// (HashMap-backed) and the bidirectional edge store's reverse adjacency;
674    /// no full-graph scan. A final `O(R log R)` sort over the deduplicated
675    /// importer set (where `R` is the importer count) is negligible in
676    /// practice since `R ≤ file_count`.
677    ///
678    /// [`AuxiliaryIndices::by_file`]: crate::graph::unified::storage::indices::AuxiliaryIndices::by_file
679    #[must_use]
680    pub fn reverse_import_index(&self, file_id: FileId) -> Vec<FileId> {
681        let mut importers: HashSet<FileId> = HashSet::new();
682        for &target_node in self.indices.by_file(file_id) {
683            for edge_ref in self.edges.edges_to(target_node) {
684                if !matches!(edge_ref.kind, EdgeKind::Imports { .. }) {
685                    continue;
686                }
687                let Some(source_entry) = self.nodes.get(edge_ref.source) else {
688                    continue;
689                };
690                let source_file = source_entry.file;
691                if source_file != file_id {
692                    importers.insert(source_file);
693                }
694            }
695        }
696        let mut result: Vec<FileId> = importers.into_iter().collect();
697        result.sort();
698        result
699    }
700
701    /// Returns the set of files that hold at least one live inter-file edge
702    /// targeting a node in `file_id`, deduplicated and sorted ascending.
703    ///
704    /// Unlike [`reverse_import_index`](Self::reverse_import_index) — which
705    /// filters to [`EdgeKind::Imports`] only — this helper treats **every**
706    /// cross-file edge as a dependency signal: `Calls`, `References`,
707    /// `TypeOf`, `Inherits`, `Implements`, `FfiCall`, `HttpRequest`,
708    /// `GrpcCall`, `WebAssemblyCall`, `DbQuery`, `TableRead`, `TableWrite`,
709    /// `TriggeredBy`, `MessageQueue`, `WebSocket`, `GraphQLOperation`,
710    /// `ProcessExec`, `FileIpc`, `ProtocolCall`, and any future
711    /// cross-file-capable variant. This is the reverse-dependency surface the
712    /// incremental rebuild engine (Task 4 Step 4 Phase 3e) needs: when
713    /// `file_id` changes, every file whose committed edges point into
714    /// `file_id`'s nodes must re-enter the rebuild closure so its cross-file
715    /// references survive the target-side tombstone-and-reparse cycle.
716    ///
717    /// The caller's own file is never included — an edge whose source and
718    /// target both live in `file_id` is a self-reference and is elided.
719    /// Edges whose source node is no longer resolvable in the arena
720    /// (tombstoned) are silently skipped.
721    ///
722    /// # When to use this vs [`reverse_import_index`](Self::reverse_import_index)
723    ///
724    /// * [`reverse_import_index`](Self::reverse_import_index) remains the
725    ///   right surface for consumers that specifically need *import*
726    ///   relationships (export surface analysis, module-dependency graphs,
727    ///   etc.).
728    /// * `reverse_dependency_index` is the right surface for incremental
729    ///   rebuild closure computation. Widening past imports is necessary
730    ///   because call sites, type references, trait implementations, FFI
731    ///   declarations, HTTP clients, and every other cross-file edge kind
732    ///   hold a committed edge into the target file that becomes stale the
733    ///   moment `remove_file(target)` tombstones its arena nodes. Leaving
734    ///   those files out of the closure leaves the committed edges pointing
735    ///   at the stale (pre-tombstone) node IDs — Phase 4c-prime only
736    ///   rewrites edges on **re-parsed** files' `PendingEdge` sets, never
737    ///   committed edges owned by files outside the reparse scope.
738    ///
739    /// # Complexity
740    ///
741    /// `O(|nodes_in_file_id| × avg_incoming_edges_per_node)` amortized —
742    /// same bound as [`reverse_import_index`](Self::reverse_import_index).
743    /// Uses [`AuxiliaryIndices::by_file`] for O(1)-amortized per-file node
744    /// lookup and the bidirectional edge store's reverse adjacency; no
745    /// full-graph scan. Final `O(R log R)` sort over the deduplicated
746    /// dependent set is negligible since `R ≤ file_count`.
747    ///
748    /// # Over-widening is expected and acceptable
749    ///
750    /// The closure will include every file that references anything in
751    /// `file_id`, not just files whose exports change. In common codebases
752    /// this widens the reparse set modestly (a 10-file change may expand
753    /// to 20–30 dependent files in a medium crate). Correctness requires
754    /// the widening; minimality is a follow-up optimisation if profiling
755    /// demands it.
756    ///
757    /// [`AuxiliaryIndices::by_file`]: crate::graph::unified::storage::indices::AuxiliaryIndices::by_file
758    #[must_use]
759    pub fn reverse_dependency_index(&self, file_id: FileId) -> Vec<FileId> {
760        let mut dependents: HashSet<FileId> = HashSet::new();
761        for &target_node in self.indices.by_file(file_id) {
762            for edge_ref in self.edges.edges_to(target_node) {
763                let Some(source_entry) = self.nodes.get(edge_ref.source) else {
764                    continue;
765                };
766                let source_file = source_entry.file;
767                if source_file != file_id {
768                    dependents.insert(source_file);
769                }
770            }
771        }
772        let mut result: Vec<FileId> = dependents.into_iter().collect();
773        result.sort();
774        result
775    }
776
777    /// Returns the per-language confidence metadata.
778    ///
779    /// This contains analysis confidence information collected during graph build,
780    /// primarily used by language plugins (e.g., Rust) to track analysis quality.
781    #[inline]
782    #[must_use]
783    pub fn confidence(&self) -> &HashMap<String, ConfidenceMetadata> {
784        &self.confidence
785    }
786
787    /// Merges confidence metadata for a language.
788    ///
789    /// If confidence already exists for the language, this merges the new
790    /// metadata (taking the lower confidence level and combining limitations).
791    /// Otherwise, it inserts the new confidence.
792    pub fn merge_confidence(&mut self, language: &str, metadata: ConfidenceMetadata) {
793        use crate::confidence::ConfidenceLevel;
794
795        self.confidence
796            .entry(language.to_string())
797            .and_modify(|existing| {
798                // Take the lower confidence level (more conservative)
799                let new_level = match (&existing.level, &metadata.level) {
800                    (ConfidenceLevel::Verified, other) | (other, ConfidenceLevel::Verified) => {
801                        *other
802                    }
803                    (ConfidenceLevel::Partial, ConfidenceLevel::AstOnly)
804                    | (ConfidenceLevel::AstOnly, ConfidenceLevel::Partial) => {
805                        ConfidenceLevel::AstOnly
806                    }
807                    (level, _) => *level,
808                };
809                existing.level = new_level;
810
811                // Merge limitations (deduplicated)
812                for limitation in &metadata.limitations {
813                    if !existing.limitations.contains(limitation) {
814                        existing.limitations.push(limitation.clone());
815                    }
816                }
817
818                // Merge unavailable features (deduplicated)
819                for feature in &metadata.unavailable_features {
820                    if !existing.unavailable_features.contains(feature) {
821                        existing.unavailable_features.push(feature.clone());
822                    }
823                }
824            })
825            .or_insert(metadata);
826    }
827
828    /// Sets the confidence metadata map directly.
829    ///
830    /// This is primarily used when loading a graph from serialized state.
831    pub fn set_confidence(&mut self, confidence: HashMap<String, ConfidenceMetadata>) {
832        self.confidence = confidence;
833    }
834
835    // ------------------------------------------------------------------
836    // Task 4 Step 2 (A2 §F.2) — File-level tombstoning on a CodeGraph.
837    //
838    // Unlike the rebuild pipeline's `RebuildGraph::remove_file`, this
839    // path mutates a live `CodeGraph` in place and is the mechanism
840    // used by the full-rebuild flow when it needs to evict a file's
841    // nodes+edges between compactions. The daemon's incremental
842    // `WorkspaceManager` (Task 6) goes through the rebuild path, which
843    // is why this entry point is `pub(crate)`.
844    // ------------------------------------------------------------------
845
846    /// Tombstone every node that belongs to `file_id`, invalidate every
847    /// edge whose source or target is one of those nodes (across both
848    /// forward and reverse CSR + delta tiers), drop the file's entry
849    /// from the [`FileRegistry`], and return the set of [`NodeId`]s
850    /// that were tombstoned.
851    ///
852    /// This is the §F.2-aware file-removal primitive. Semantically, the
853    /// post-condition matches what a full rebuild of the workspace
854    /// without `file_id` would produce:
855    ///
856    /// * Every [`NodeEntry`] whose [`NodeEntry.file`] was `file_id` has
857    ///   been `NodeArena::remove`d, advancing its slot generation so
858    ///   stale [`NodeId`] handles do not alias a later re-allocation.
859    /// * Every CSR edge whose source or target slot matches one of the
860    ///   tombstoned slot indices has its `csr_tombstones` bit set; the
861    ///   read path's merge step already filters tombstoned CSR edges
862    ///   out of every query result.
863    /// * Every delta-buffer edge (Add or Remove, any file) whose source
864    ///   or target matches a tombstoned slot has been dropped from the
865    ///   delta in both directions.
866    /// * The [`AuxiliaryIndices`] (kind / name / qualified-name / file)
867    ///   no longer reference any of the tombstoned `NodeId`s.
868    /// * [`NodeMetadataStore`], [`NodeProvenanceStore`], [`ScopeArena`],
869    ///   [`AliasTable`], and [`ShadowTable`] have been compacted through
870    ///   the [`NodeIdBearing::retain_nodes`] predicate so no
871    ///   tombstoned `NodeId` survives in any publish-visible store.
872    /// * [`FileRegistry::per_file_nodes`] no longer holds a bucket for
873    ///   `file_id`, the lookup slot is recycled, and
874    ///   [`FileRegistry::resolve(file_id)`] returns `None`.
875    ///
876    /// Returns the `Vec<NodeId>` of tombstoned nodes. The returned list
877    /// is useful for downstream housekeeping (e.g., resetting per-file
878    /// caches keyed by `NodeId`) and for tests that need to assert on the
879    /// exact membership of the tombstone set.
880    ///
881    /// # Idempotency
882    ///
883    /// Calling `remove_file` twice with the same `file_id` — or calling
884    /// it for a `file_id` that was never registered — is a safe no-op.
885    /// The returned `Vec<NodeId>` is empty on the second call; no
886    /// arena / edge / index state is mutated (the predicate-based
887    /// compaction of `NodeIdBearing` surfaces short-circuits when the
888    /// dead set is empty).
889    ///
890    /// # Visibility
891    ///
892    /// `pub(crate)` because external callers (Task 6's
893    /// `WorkspaceManager` on the sqry-daemon side) route through
894    /// [`super::super::rebuild::rebuild_graph::RebuildGraph::remove_file`]
895    /// instead. This `CodeGraph`-level variant is used by full-rebuild
896    /// housekeeping paths inside sqry-core and by the Task 4 Step 4
897    /// incremental fallback for cases where the caller already has a
898    /// `&mut CodeGraph` and does not need the clone-and-publish
899    /// round-trip of [`clone_for_rebuild`](Self::clone_for_rebuild) →
900    /// [`finalize`](super::super::rebuild::rebuild_graph::RebuildGraph::finalize).
901    ///
902    /// # Performance
903    ///
904    /// * `O(|tombstoned| + |csr_edges| + |delta_edges|)` amortised.
905    ///   The CSR walk is linear in total edge count (not per-file),
906    ///   which is the dominant cost; each row check is O(1) via the
907    ///   precomputed dead-slot-index `HashSet` in
908    ///   [`BidirectionalEdgeStore::tombstone_edges_for_nodes`].
909    /// * Delta filtering is `O(|delta|)` per direction.
910    /// * Auxiliary-index compaction is `O(|tombstoned|)` amortised
911    ///   because each of the four indices keys its entries by the
912    ///   tombstoned `NodeIds` directly.
913    ///
914    /// [`NodeEntry`]: crate::graph::unified::storage::arena::NodeEntry
915    /// [`NodeEntry.file`]: crate::graph::unified::storage::arena::NodeEntry::file
916    /// [`NodeIdBearing::retain_nodes`]: crate::graph::unified::rebuild::coverage::NodeIdBearing::retain_nodes
917    #[allow(dead_code)] // Consumer is Task 4 Step 4 (`incremental_rebuild`)
918    // and the unit tests below; published in this commit so the §F.2
919    // invariant surface can be reviewed in isolation per the Gate 0c
920    // split contract.
921    pub(crate) fn remove_file(
922        &mut self,
923        file_id: FileId,
924    ) -> Vec<crate::graph::unified::node::NodeId> {
925        use crate::graph::unified::node::NodeId;
926        use crate::graph::unified::rebuild::coverage::NodeIdBearing;
927
928        // Drain the per-file bucket. For a file that was never
929        // registered, this returns an empty Vec — the rest of the
930        // method short-circuits on the `if tombstoned.is_empty()` test
931        // below so we still deregister the file on the off chance the
932        // bucket was empty but the file was registered (defensive; the
933        // common case is the bucket existed iff the file was
934        // registered).
935        let tombstoned: Vec<NodeId> = self.files_mut().take_nodes(file_id);
936        // Always drop the file's path entry + recycle its slot, even
937        // when the bucket was empty, so repeated registrations of the
938        // same path don't resurrect a zombie FileId. `unregister` is
939        // idempotent (returns None for unknown IDs) so the cost is a
940        // single HashMap probe when file_id is already gone.
941        self.files_mut().unregister(file_id);
942        // Clear the file's segment entry unconditionally (idempotent
943        // — `FileSegmentTable::remove` no-ops on unknown ids). This
944        // MUST run before `FileRegistry::unregister` recycles the
945        // FileId slot for reuse, otherwise a later registration of a
946        // different path under the reused FileId would inherit the
947        // previous file's stale node range (see
948        // `sqry-core/src/graph/unified/build/reindex.rs` — which
949        // trusts `file_segments().get(file_id)` to decide which slots
950        // to tombstone). Note: `unregister` above was called first
951        // only to keep the existing bucket-drain ordering; the
952        // segment-clear is order-independent with respect to
953        // `unregister` because neither touches the other's backing
954        // store, and the FileId slot cannot be recycled-and-reissued
955        // across a single `remove_file` call (the registry's slot
956        // recycler is driven by a later `register`, not by
957        // `unregister`).
958        self.file_segments_mut().remove(file_id);
959
960        if tombstoned.is_empty() {
961            return tombstoned;
962        }
963
964        // Dead set keyed on NodeId for NodeIdBearing predicates.
965        // `retain_nodes` uses `HashSet::contains` so membership is O(1).
966        let dead: HashSet<NodeId> = tombstoned.iter().copied().collect();
967
968        // 1. Tombstone the arena slots. `NodeArena::remove` is
969        //    idempotent — stale NodeIds that don't match a slot's
970        //    current generation are no-ops, which lets this method be
971        //    safely re-run on the same file.
972        {
973            let arena = self.nodes_mut();
974            for &nid in &tombstoned {
975                let _ = arena.remove(nid);
976            }
977        }
978
979        // 2. Invalidate edges across both CSR + delta in both
980        //    directions. This is the expensive step; the helper uses a
981        //    precomputed dead-slot-index set so the CSR walk is linear
982        //    in total edge count, not quadratic.
983        self.edges_mut().tombstone_edges_for_nodes(&dead);
984
985        // 3. Compact the auxiliary indices so name/kind/qname/file
986        //    lookups do not return tombstoned NodeIds. Using the
987        //    NodeIdBearing surface keeps this step in lockstep with the
988        //    rebuild pipeline's step 4 — any future publish-visible
989        //    NodeId-bearing container added to the K.A/K.B matrix is
990        //    automatically swept here too.
991        {
992            let predicate: Box<dyn Fn(NodeId) -> bool + '_> = Box::new(|nid| !dead.contains(&nid));
993            self.indices_mut().retain_nodes(&*predicate);
994            self.macro_metadata_mut().retain_nodes(&*predicate);
995            // The remaining K.A rows (node_provenance, scope_arena,
996            // alias_table, shadow_table) need Arc::make_mut accessors —
997            // they are wrapped in Arc at rest so this is where the
998            // CoW clone happens on demand. Inline the Arc::make_mut
999            // calls here (no public mut accessor exists today because
1000            // the sole writer has been the rebuild path; extend the
1001            // set by adding a similar inline call plus a K.A/K.B row
1002            // in `super::super::rebuild::coverage`).
1003            Arc::make_mut(&mut self.node_provenance).retain_nodes(&*predicate);
1004            Arc::make_mut(&mut self.scope_arena).retain_nodes(&*predicate);
1005            Arc::make_mut(&mut self.alias_table).retain_nodes(&*predicate);
1006            Arc::make_mut(&mut self.shadow_table).retain_nodes(&*predicate);
1007        }
1008
1009        tombstoned
1010    }
1011
1012    // ------------------------------------------------------------------
1013    // Gate 0c (A2 §H, Task 4) — RebuildGraph assembly path.
1014    // ------------------------------------------------------------------
1015
1016    /// Assemble a [`CodeGraph`] from owned rebuild-local parts produced
1017    /// by `RebuildGraph::finalize()` (defined in
1018    /// `super::super::rebuild::rebuild_graph`).
1019    ///
1020    /// This constructor is deliberately `pub(crate)` and named with a
1021    /// leading `__` so it is inaccessible from downstream crates even
1022    /// when the `rebuild-internals` feature is enabled: only code in
1023    /// `sqry-core` itself (specifically `RebuildGraph::finalize`) is
1024    /// permitted to call it. The trybuild fixture
1025    /// `sqry-core/tests/rebuild_internals_compile_fail/rebuild_graph_no_public_assembly.rs`
1026    /// proves there is no other path from `RebuildGraph` to
1027    /// `Arc<CodeGraph>`.
1028    ///
1029    /// The argument order mirrors the `CodeGraph` struct declaration
1030    /// order exactly; the public `clone_for_rebuild` → `finalize`
1031    /// round-trip uses the same macro-driven field enumeration so any
1032    /// new `CodeGraph` field automatically threads through this
1033    /// constructor as well.
1034    #[doc(hidden)]
1035    #[allow(clippy::too_many_arguments)]
1036    #[must_use]
1037    pub(crate) fn __assemble_from_rebuild_parts_internal(
1038        nodes: NodeArena,
1039        edges: BidirectionalEdgeStore,
1040        strings: StringInterner,
1041        files: FileRegistry,
1042        indices: AuxiliaryIndices,
1043        macro_metadata: NodeMetadataStore,
1044        node_provenance: NodeProvenanceStore,
1045        edge_provenance: EdgeProvenanceStore,
1046        fact_epoch: u64,
1047        epoch: u64,
1048        confidence: HashMap<String, ConfidenceMetadata>,
1049        scope_arena: ScopeArena,
1050        alias_table: AliasTable,
1051        shadow_table: ShadowTable,
1052        scope_provenance_store: ScopeProvenanceStore,
1053        file_segments: FileSegmentTable,
1054    ) -> Self {
1055        Self {
1056            nodes: Arc::new(nodes),
1057            edges: Arc::new(edges),
1058            strings: Arc::new(strings),
1059            files: Arc::new(files),
1060            indices: Arc::new(indices),
1061            macro_metadata: Arc::new(macro_metadata),
1062            node_provenance: Arc::new(node_provenance),
1063            edge_provenance: Arc::new(edge_provenance),
1064            fact_epoch,
1065            epoch,
1066            confidence,
1067            scope_arena: Arc::new(scope_arena),
1068            alias_table: Arc::new(alias_table),
1069            shadow_table: Arc::new(shadow_table),
1070            scope_provenance_store: Arc::new(scope_provenance_store),
1071            file_segments: Arc::new(file_segments),
1072        }
1073    }
1074
1075    // ------------------------------------------------------------------
1076    // Gate 0c (A2 §F) — publish-boundary debug invariants.
1077    //
1078    // These checks fire only in debug / test builds; release builds
1079    // compile them out. They are called from
1080    // `RebuildGraph::finalize()` steps 13 and 14 (the single source of
1081    // truth for the residue check, per §F and §H agreement). Gate 0d
1082    // will additionally wire the bijection check into
1083    // `build_and_persist_graph`, `WorkspaceManager::publish_graph`, and
1084    // every §E equivalence-harness run.
1085    // ------------------------------------------------------------------
1086
1087    /// Assert the bijective bucket-membership invariant (A2 §F.1).
1088    ///
1089    /// Four conditions must hold simultaneously:
1090    /// (a) every `NodeId` inside any `per_file_nodes` bucket maps to a
1091    ///     live arena slot;
1092    /// (b) every `NodeId` appears in exactly one bucket (no duplicates
1093    ///     across buckets, no duplicates within a bucket);
1094    /// (c) the bucket's `FileId` matches the node's own `file` field on
1095    ///     `NodeEntry`;
1096    /// (d) when at least one bucket is populated, every live node in
1097    ///     the arena is accounted for by some bucket.
1098    ///
1099    /// Condition (d) is guarded on `!seen.is_empty()` so an empty-graph
1100    /// (no recorded buckets) publish boundary is vacuously consistent:
1101    /// legacy V7 snapshots, fresh `CodeGraph::new()` instances, and
1102    /// rebuilds on graphs that predate Gate 0c's parallel-commit
1103    /// bucketing must not panic. Once any bucket is populated, every
1104    /// live arena slot must appear in a bucket.
1105    ///
1106    /// Iter-2 B2 (verbatim): this check used to be documented as
1107    /// "vacuous until future `per_file_nodes` work lands". Pulling
1108    /// base-plan Step 1 into Gate 0c retires that phrasing — the check
1109    /// is non-vacuous the moment parallel-parse commits nodes, which
1110    /// happens on every real build. The `!seen.is_empty()` guard now
1111    /// exists solely for the empty-graph corner case, not as a phased-
1112    /// delivery deferral.
1113    ///
1114    /// The check is a no-op in release builds. Panics with a
1115    /// descriptive message on violation. This is intentional: publish-
1116    /// boundary violations are programmer errors that must surface
1117    /// loudly during CI / test runs.
1118    #[cfg(any(debug_assertions, test))]
1119    pub fn assert_bucket_bijection(&self) {
1120        use std::collections::HashMap as StdHashMap;
1121        // (a) + (b) + (c): every bucketed node is live, unique across
1122        // buckets, and the bucket's FileId matches the node's own file.
1123        let mut seen: StdHashMap<
1124            crate::graph::unified::node::NodeId,
1125            crate::graph::unified::file::FileId,
1126        > = StdHashMap::new();
1127        let mut any_bucket_populated = false;
1128        for (file_id, bucket) in self.files.per_file_nodes_for_gate0d() {
1129            if !bucket.is_empty() {
1130                any_bucket_populated = true;
1131            }
1132            // Local dedup guard within the bucket itself. `retain_nodes_in_buckets`
1133            // dedups during finalize step 6, but we re-check here so the
1134            // invariant covers non-finalize publish paths too (e.g. if a
1135            // future pipeline builds a graph without routing through
1136            // `RebuildGraph::finalize`).
1137            let mut within_bucket: std::collections::HashSet<crate::graph::unified::node::NodeId> =
1138                std::collections::HashSet::new();
1139            for node_id in bucket {
1140                assert!(
1141                    within_bucket.insert(node_id),
1142                    "assert_bucket_bijection: duplicate node {node_id:?} inside bucket {file_id:?}"
1143                );
1144                assert!(
1145                    self.nodes.get(node_id).is_some(),
1146                    "assert_bucket_bijection: dead node {node_id:?} in bucket {file_id:?}"
1147                );
1148                let prior = seen.insert(node_id, file_id);
1149                assert!(
1150                    prior.is_none(),
1151                    "assert_bucket_bijection: node {node_id:?} in multiple buckets: \
1152                     prior={prior:?}, current={file_id:?}"
1153                );
1154                if let Some(entry) = self.nodes.get(node_id) {
1155                    assert_eq!(
1156                        entry.file, file_id,
1157                        "assert_bucket_bijection: node {node_id:?} misfiled: in bucket \
1158                         {file_id:?}, actual {:?}",
1159                        entry.file
1160                    );
1161                }
1162            }
1163        }
1164        // (d): every live node is accounted for by `seen` once buckets
1165        // are populated. The guard keeps legacy-empty-graph boundaries
1166        // vacuously consistent (see docs above).
1167        if any_bucket_populated {
1168            for (node_id, _entry) in self.nodes.iter() {
1169                assert!(
1170                    seen.contains_key(&node_id),
1171                    "assert_bucket_bijection: live node {node_id:?} absent from all buckets"
1172                );
1173            }
1174        }
1175    }
1176
1177    /// Assert the pre-reuse tombstone-residue invariant (A2 §F.2).
1178    ///
1179    /// Iterates every publish-visible NodeId-bearing structure on
1180    /// `self` and panics if any contains a node in `dead`. Called from
1181    /// `RebuildGraph::finalize()` step 14 against the set drained at
1182    /// step 8 — exactly one site per the plan's §F / §H agreement.
1183    ///
1184    /// No-op when `dead` is empty or in release builds.
1185    #[cfg(any(debug_assertions, test))]
1186    pub fn assert_no_tombstone_residue_for(
1187        &self,
1188        dead: &std::collections::HashSet<crate::graph::unified::node::NodeId>,
1189    ) {
1190        use super::super::rebuild::coverage::NodeIdBearing;
1191        if dead.is_empty() {
1192            return;
1193        }
1194        // Every K.A/K.B row must be inspected per §F.2.
1195        for nid in self.nodes.all_node_ids() {
1196            assert!(
1197                !dead.contains(&nid),
1198                "assert_no_tombstone_residue: tombstone {nid:?} still in NodeArena"
1199            );
1200        }
1201        for nid in self.indices.all_node_ids() {
1202            assert!(
1203                !dead.contains(&nid),
1204                "assert_no_tombstone_residue: tombstone {nid:?} still in auxiliary indices"
1205            );
1206        }
1207        for nid in self.edges.all_node_ids() {
1208            assert!(
1209                !dead.contains(&nid),
1210                "assert_no_tombstone_residue: tombstone {nid:?} still in edge store"
1211            );
1212        }
1213        for nid in self.macro_metadata.all_node_ids() {
1214            assert!(
1215                !dead.contains(&nid),
1216                "assert_no_tombstone_residue: tombstone {nid:?} still in macro metadata"
1217            );
1218        }
1219        for nid in self.node_provenance.all_node_ids() {
1220            assert!(
1221                !dead.contains(&nid),
1222                "assert_no_tombstone_residue: tombstone {nid:?} still in node provenance"
1223            );
1224        }
1225        for nid in self.scope_arena.all_node_ids() {
1226            assert!(
1227                !dead.contains(&nid),
1228                "assert_no_tombstone_residue: tombstone {nid:?} still in scope arena"
1229            );
1230        }
1231        for nid in self.alias_table.all_node_ids() {
1232            assert!(
1233                !dead.contains(&nid),
1234                "assert_no_tombstone_residue: tombstone {nid:?} still in alias table"
1235            );
1236        }
1237        for nid in self.shadow_table.all_node_ids() {
1238            assert!(
1239                !dead.contains(&nid),
1240                "assert_no_tombstone_residue: tombstone {nid:?} still in shadow table"
1241            );
1242        }
1243        for nid in self.files.all_node_ids() {
1244            assert!(
1245                !dead.contains(&nid),
1246                "assert_no_tombstone_residue: tombstone {nid:?} still in per-file bucket"
1247            );
1248        }
1249    }
1250}
1251
1252impl Default for CodeGraph {
1253    fn default() -> Self {
1254        Self::new()
1255    }
1256}
1257
1258impl fmt::Debug for CodeGraph {
1259    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1260        f.debug_struct("CodeGraph")
1261            .field("nodes", &self.nodes.len())
1262            .field("epoch", &self.epoch)
1263            .finish_non_exhaustive()
1264    }
1265}
1266
1267impl GraphMemorySize for CodeGraph {
1268    /// Estimates the total heap bytes owned by this `CodeGraph`.
1269    ///
1270    /// Sums heap usage across every component the graph owns: node arena,
1271    /// bidirectional edge store (forward + reverse CSR + tombstones + delta
1272    /// buffer), string interner, file registry, auxiliary indices, sparse
1273    /// macro/classpath metadata, provenance stores, and the per-language
1274    /// confidence map. Used by the `sqryd` daemon's admission controller
1275    /// and workspace retention reaper to enforce memory budgets.
1276    fn heap_bytes(&self) -> usize {
1277        let mut confidence_bytes = self.confidence.capacity()
1278            * (std::mem::size_of::<String>()
1279                + std::mem::size_of::<ConfidenceMetadata>()
1280                + HASHMAP_ENTRY_OVERHEAD);
1281        for (key, meta) in &self.confidence {
1282            confidence_bytes += key.capacity();
1283            // `ConfidenceMetadata.limitations` / `unavailable_features` are
1284            // `Vec<String>`; charge their spill and inner String payloads.
1285            confidence_bytes += meta.limitations.capacity() * std::mem::size_of::<String>();
1286            for s in &meta.limitations {
1287                confidence_bytes += s.capacity();
1288            }
1289            confidence_bytes +=
1290                meta.unavailable_features.capacity() * std::mem::size_of::<String>();
1291            for s in &meta.unavailable_features {
1292                confidence_bytes += s.capacity();
1293            }
1294        }
1295
1296        self.nodes.heap_bytes()
1297            + self.edges.heap_bytes()
1298            + self.strings.heap_bytes()
1299            + self.files.heap_bytes()
1300            + self.indices.heap_bytes()
1301            + self.macro_metadata.heap_bytes()
1302            + self.node_provenance.heap_bytes()
1303            + self.edge_provenance.heap_bytes()
1304            + confidence_bytes
1305            + self.file_segments.capacity()
1306                * std::mem::size_of::<Option<crate::graph::unified::storage::segment::FileSegment>>(
1307                )
1308    }
1309}
1310
1311/// Thread-safe wrapper for `CodeGraph` with epoch versioning.
1312///
1313/// `ConcurrentCodeGraph` provides MVCC-style concurrency:
1314/// - Multiple readers can access the graph simultaneously
1315/// - Only one writer can hold the lock at a time
1316/// - Each write operation increments the epoch for cursor invalidation
1317///
1318/// # Design
1319///
1320/// The wrapper uses `parking_lot::RwLock` for efficient locking:
1321/// - Fair scheduling prevents writer starvation
1322/// - No poisoning (unlike `std::sync::RwLock`)
1323/// - Faster lock/unlock operations
1324///
1325/// # Phase 2 binding-plane access
1326///
1327/// Use the three-line snapshot pattern to access `BindingPlane`:
1328///
1329/// ```rust,ignore
1330/// let read_guard = concurrent.read();
1331/// let snapshot = read_guard.snapshot();
1332/// let plane = snapshot.binding_plane();
1333/// let resolution = plane.resolve(&query);
1334/// ```
1335///
1336/// The explicit snapshot handle makes the MVCC lifetime visible at the call
1337/// site. The full Phase 2 scope/alias/shadow and witness-bearing resolution
1338/// API is exposed through `BindingPlane`.
1339///
1340/// # Usage
1341///
1342/// ```rust
1343/// use sqry_core::graph::unified::concurrent::ConcurrentCodeGraph;
1344///
1345/// let graph = ConcurrentCodeGraph::new();
1346///
1347/// // Read access (multiple readers allowed)
1348/// {
1349///     let guard = graph.read();
1350///     let _nodes = guard.nodes();
1351/// }
1352///
1353/// // Write access (exclusive)
1354/// {
1355///     let mut guard = graph.write();
1356///     let _nodes = guard.nodes_mut();
1357/// }
1358///
1359/// // Snapshot for long queries
1360/// let snapshot = graph.snapshot();
1361/// ```
1362pub struct ConcurrentCodeGraph {
1363    /// The underlying code graph protected by a read-write lock.
1364    inner: RwLock<CodeGraph>,
1365    /// Global epoch counter for cursor validation.
1366    epoch: AtomicU64,
1367}
1368
1369impl ConcurrentCodeGraph {
1370    /// Creates a new empty `ConcurrentCodeGraph`.
1371    #[must_use]
1372    pub fn new() -> Self {
1373        Self {
1374            inner: RwLock::new(CodeGraph::new()),
1375            epoch: AtomicU64::new(0),
1376        }
1377    }
1378
1379    /// Creates a `ConcurrentCodeGraph` from an existing `CodeGraph`.
1380    #[must_use]
1381    pub fn from_graph(graph: CodeGraph) -> Self {
1382        let epoch = graph.epoch();
1383        Self {
1384            inner: RwLock::new(graph),
1385            epoch: AtomicU64::new(epoch),
1386        }
1387    }
1388
1389    /// Acquires a read lock on the graph.
1390    ///
1391    /// Multiple readers can hold the lock simultaneously.
1392    /// This does not increment the epoch.
1393    #[inline]
1394    pub fn read(&self) -> RwLockReadGuard<'_, CodeGraph> {
1395        self.inner.read()
1396    }
1397
1398    /// Acquires a write lock on the graph.
1399    ///
1400    /// Only one writer can hold the lock at a time.
1401    /// This increments the global epoch counter.
1402    #[inline]
1403    pub fn write(&self) -> RwLockWriteGuard<'_, CodeGraph> {
1404        // Increment the global epoch
1405        self.epoch.fetch_add(1, Ordering::SeqCst);
1406        let mut guard = self.inner.write();
1407        // Sync the inner graph's epoch with the global epoch
1408        guard.set_epoch(self.epoch.load(Ordering::SeqCst));
1409        guard
1410    }
1411
1412    /// Returns the current global epoch.
1413    ///
1414    /// This can be used to detect if the graph has been modified
1415    /// since a previous operation (cursor invalidation).
1416    #[inline]
1417    #[must_use]
1418    pub fn epoch(&self) -> u64 {
1419        self.epoch.load(Ordering::SeqCst)
1420    }
1421
1422    /// Creates a cheap snapshot of the graph.
1423    ///
1424    /// This acquires a brief read lock to clone the Arc references.
1425    /// The snapshot is isolated from future mutations.
1426    #[must_use]
1427    pub fn snapshot(&self) -> GraphSnapshot {
1428        self.inner.read().snapshot()
1429    }
1430
1431    // ------------------------------------------------------------------
1432    // Phase 1 fact-layer provenance convenience accessors.
1433    // These acquire a brief read lock, matching the snapshot() pattern.
1434    // ------------------------------------------------------------------
1435
1436    /// Returns the monotonic fact-layer epoch from the underlying graph.
1437    #[must_use]
1438    pub fn fact_epoch(&self) -> u64 {
1439        self.inner.read().fact_epoch()
1440    }
1441
1442    /// Looks up node provenance by `NodeId` (acquires a brief read lock).
1443    #[must_use]
1444    pub fn node_provenance(
1445        &self,
1446        id: crate::graph::unified::node::id::NodeId,
1447    ) -> Option<NodeProvenance> {
1448        self.inner.read().node_provenance(id).copied()
1449    }
1450
1451    /// Looks up edge provenance by `EdgeId` (acquires a brief read lock).
1452    #[must_use]
1453    pub fn edge_provenance(
1454        &self,
1455        id: crate::graph::unified::edge::id::EdgeId,
1456    ) -> Option<EdgeProvenance> {
1457        self.inner.read().edge_provenance(id).copied()
1458    }
1459
1460    /// Returns a file provenance view (acquires a brief read lock).
1461    ///
1462    /// Returns an owned copy since the borrow cannot outlive the lock guard.
1463    #[must_use]
1464    pub fn file_provenance(
1465        &self,
1466        id: crate::graph::unified::file::id::FileId,
1467    ) -> Option<OwnedFileProvenanceView> {
1468        let guard = self.inner.read();
1469        guard.file_provenance(id).map(|v| OwnedFileProvenanceView {
1470            content_hash: *v.content_hash,
1471            indexed_at: v.indexed_at,
1472            source_uri: v.source_uri,
1473            is_external: v.is_external,
1474        })
1475    }
1476
1477    // ------------------------------------------------------------------
1478    // Phase 2 binding-plane accessors (P2U03).
1479    // ------------------------------------------------------------------
1480
1481    /// Returns the scope arena from the underlying graph (acquires a brief
1482    /// read lock).
1483    ///
1484    /// Returns an `Arc` clone so the caller does not hold the lock beyond
1485    /// this call site.
1486    #[must_use]
1487    pub fn scope_arena(&self) -> Arc<ScopeArena> {
1488        Arc::clone(&self.inner.read().scope_arena)
1489    }
1490
1491    /// Returns the alias table from the underlying graph (acquires a brief
1492    /// read lock).
1493    ///
1494    /// Returns an `Arc` clone so the caller does not hold the lock beyond
1495    /// this call site.
1496    #[must_use]
1497    pub fn alias_table(&self) -> Arc<AliasTable> {
1498        Arc::clone(&self.inner.read().alias_table)
1499    }
1500
1501    /// Returns the shadow table from the underlying graph (acquires a brief
1502    /// read lock).
1503    ///
1504    /// Returns an `Arc` clone so the caller does not hold the lock beyond
1505    /// this call site.
1506    #[must_use]
1507    pub fn shadow_table(&self) -> Arc<ShadowTable> {
1508        Arc::clone(&self.inner.read().shadow_table)
1509    }
1510
1511    /// Returns the scope provenance store from the underlying graph (acquires
1512    /// a brief read lock).
1513    ///
1514    /// Returns an `Arc` clone so the caller does not hold the lock beyond
1515    /// this call site.
1516    #[must_use]
1517    pub fn scope_provenance_store(&self) -> Arc<ScopeProvenanceStore> {
1518        Arc::clone(&self.inner.read().scope_provenance_store)
1519    }
1520
1521    /// Looks up scope provenance by `ScopeId` (acquires a brief read lock).
1522    ///
1523    /// Returns an owned copy since the borrow cannot outlive the lock guard.
1524    #[must_use]
1525    pub fn scope_provenance(&self, id: ScopeId) -> Option<ScopeProvenance> {
1526        self.inner.read().scope_provenance(id).cloned()
1527    }
1528
1529    /// Looks up the live `ScopeId` for a stable scope identity (acquires a
1530    /// brief read lock).
1531    ///
1532    /// Returns `None` if no provenance record is registered for that stable id.
1533    #[must_use]
1534    pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
1535        self.inner.read().scope_by_stable_id(stable)
1536    }
1537
1538    /// Returns the file segment table from the underlying graph (acquires a
1539    /// brief read lock).
1540    #[must_use]
1541    pub fn file_segments(&self) -> Arc<FileSegmentTable> {
1542        Arc::clone(&self.inner.read().file_segments)
1543    }
1544
1545    /// Attempts to acquire a read lock without blocking.
1546    ///
1547    /// Returns `None` if the lock is currently held exclusively.
1548    #[inline]
1549    #[must_use]
1550    pub fn try_read(&self) -> Option<RwLockReadGuard<'_, CodeGraph>> {
1551        self.inner.try_read()
1552    }
1553
1554    /// Attempts to acquire a write lock without blocking.
1555    ///
1556    /// Returns `None` if the lock is currently held by another thread.
1557    /// If successful, increments the epoch.
1558    #[inline]
1559    pub fn try_write(&self) -> Option<RwLockWriteGuard<'_, CodeGraph>> {
1560        self.inner.try_write().map(|mut guard| {
1561            self.epoch.fetch_add(1, Ordering::SeqCst);
1562            guard.set_epoch(self.epoch.load(Ordering::SeqCst));
1563            guard
1564        })
1565    }
1566}
1567
1568impl Default for ConcurrentCodeGraph {
1569    fn default() -> Self {
1570        Self::new()
1571    }
1572}
1573
1574impl fmt::Debug for ConcurrentCodeGraph {
1575    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1576        f.debug_struct("ConcurrentCodeGraph")
1577            .field("epoch", &self.epoch.load(Ordering::SeqCst))
1578            .finish_non_exhaustive()
1579    }
1580}
1581
1582/// Owned copy of file provenance, returned by [`ConcurrentCodeGraph::file_provenance`]
1583/// because the borrowed [`FileProvenanceView`] cannot outlive the read-lock guard.
1584#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1585pub struct OwnedFileProvenanceView {
1586    /// SHA-256 of the on-disk file bytes (owned copy).
1587    pub content_hash: [u8; 32],
1588    /// Unix-epoch seconds at which this file was registered.
1589    pub indexed_at: u64,
1590    /// Optional interned physical origin URI.
1591    pub source_uri: Option<StringId>,
1592    /// Whether this file originates from an external source.
1593    pub is_external: bool,
1594}
1595
1596/// Immutable snapshot of a `CodeGraph` for long-running queries.
1597///
1598/// `GraphSnapshot` holds Arc references to the graph components,
1599/// providing a consistent view that is isolated from concurrent mutations.
1600///
1601/// # Design
1602///
1603/// Snapshots are created via `CodeGraph::snapshot()` or
1604/// `ConcurrentCodeGraph::snapshot()`. They are:
1605///
1606/// - **Immutable**: No mutation methods available
1607/// - **Isolated**: Independent of future graph mutations
1608/// - **Cheap**: Only Arc clones, no data copying
1609/// - **Self-contained**: Can outlive the original graph/lock
1610///
1611/// # Usage
1612///
1613/// ```rust
1614/// use sqry_core::graph::unified::concurrent::{ConcurrentCodeGraph, GraphSnapshot};
1615///
1616/// let graph = ConcurrentCodeGraph::new();
1617///
1618/// // Create snapshot for a long query
1619/// let snapshot: GraphSnapshot = graph.snapshot();
1620///
1621/// // Snapshot can be used independently
1622/// let _epoch = snapshot.epoch();
1623/// ```
1624#[derive(Clone)]
1625pub struct GraphSnapshot {
1626    /// Node storage snapshot.
1627    nodes: Arc<NodeArena>,
1628    /// Edge storage snapshot.
1629    edges: Arc<BidirectionalEdgeStore>,
1630    /// String interner snapshot.
1631    strings: Arc<StringInterner>,
1632    /// File registry snapshot.
1633    files: Arc<FileRegistry>,
1634    /// Auxiliary indices snapshot.
1635    indices: Arc<AuxiliaryIndices>,
1636    /// Sparse macro boundary metadata snapshot.
1637    macro_metadata: Arc<NodeMetadataStore>,
1638    /// Dense node provenance snapshot (Phase 1).
1639    node_provenance: Arc<NodeProvenanceStore>,
1640    /// Dense edge provenance snapshot (Phase 1).
1641    edge_provenance: Arc<EdgeProvenanceStore>,
1642    /// Monotonic fact-layer epoch at snapshot time.
1643    fact_epoch: u64,
1644    /// Epoch at snapshot time (for cursor validation).
1645    epoch: u64,
1646    /// Phase 2 binding-plane scope arena snapshot (populated by Phase 4e).
1647    scope_arena: Arc<ScopeArena>,
1648    /// Phase 2 binding-plane alias table snapshot (populated by Phase 4e / P2U04).
1649    alias_table: Arc<AliasTable>,
1650    /// Phase 2 binding-plane shadow table snapshot (populated by Phase 4e / P2U05).
1651    shadow_table: Arc<ShadowTable>,
1652    /// Phase 2 binding-plane scope provenance store snapshot (populated by Phase 4e / P2U11).
1653    scope_provenance_store: Arc<ScopeProvenanceStore>,
1654    /// Phase 3 file segment table snapshot mapping `FileId` to node ranges.
1655    file_segments: Arc<FileSegmentTable>,
1656}
1657
1658impl GraphSnapshot {
1659    /// Returns a reference to the node arena.
1660    #[inline]
1661    #[must_use]
1662    pub fn nodes(&self) -> &NodeArena {
1663        &self.nodes
1664    }
1665
1666    /// Returns a reference to the bidirectional edge store.
1667    #[inline]
1668    #[must_use]
1669    pub fn edges(&self) -> &BidirectionalEdgeStore {
1670        &self.edges
1671    }
1672
1673    /// Returns a reference to the string interner.
1674    #[inline]
1675    #[must_use]
1676    pub fn strings(&self) -> &StringInterner {
1677        &self.strings
1678    }
1679
1680    /// Returns a reference to the file registry.
1681    #[inline]
1682    #[must_use]
1683    pub fn files(&self) -> &FileRegistry {
1684        &self.files
1685    }
1686
1687    /// Returns a reference to the auxiliary indices.
1688    #[inline]
1689    #[must_use]
1690    pub fn indices(&self) -> &AuxiliaryIndices {
1691        &self.indices
1692    }
1693
1694    /// Returns a reference to the macro boundary metadata store.
1695    #[inline]
1696    #[must_use]
1697    pub fn macro_metadata(&self) -> &NodeMetadataStore {
1698        &self.macro_metadata
1699    }
1700
1701    // ------------------------------------------------------------------
1702    // Phase 1 fact-layer provenance accessors (P1U09).
1703    // ------------------------------------------------------------------
1704
1705    /// Returns the monotonic fact-layer epoch.
1706    #[inline]
1707    #[must_use]
1708    pub fn fact_epoch(&self) -> u64 {
1709        self.fact_epoch
1710    }
1711
1712    /// Looks up node provenance by `NodeId`.
1713    #[inline]
1714    #[must_use]
1715    pub fn node_provenance(
1716        &self,
1717        id: crate::graph::unified::node::id::NodeId,
1718    ) -> Option<&NodeProvenance> {
1719        self.node_provenance.lookup(id)
1720    }
1721
1722    /// Looks up edge provenance by `EdgeId`.
1723    #[inline]
1724    #[must_use]
1725    pub fn edge_provenance(
1726        &self,
1727        id: crate::graph::unified::edge::id::EdgeId,
1728    ) -> Option<&EdgeProvenance> {
1729        self.edge_provenance.lookup(id)
1730    }
1731
1732    /// Returns a borrowed provenance view for a file.
1733    #[inline]
1734    #[must_use]
1735    pub fn file_provenance(
1736        &self,
1737        id: crate::graph::unified::file::id::FileId,
1738    ) -> Option<FileProvenanceView<'_>> {
1739        self.files.file_provenance(id)
1740    }
1741
1742    // ------------------------------------------------------------------
1743    // Phase 2 binding-plane accessors (P2U03).
1744    // ------------------------------------------------------------------
1745
1746    /// Returns a reference to the scope arena at snapshot time.
1747    ///
1748    /// Participates in MVCC: the snapshot holds an `Arc` clone of the arena
1749    /// as it existed when `snapshot()` was called. Subsequent calls to
1750    /// `set_scope_arena` on the source `CodeGraph` do not affect this view.
1751    #[inline]
1752    #[must_use]
1753    pub fn scope_arena(&self) -> &ScopeArena {
1754        &self.scope_arena
1755    }
1756
1757    /// Returns a reference to the alias table at snapshot time.
1758    ///
1759    /// Participates in MVCC: the snapshot holds an `Arc` clone of the table
1760    /// as it existed when `snapshot()` was called. Subsequent calls to
1761    /// `set_alias_table` on the source `CodeGraph` do not affect this view.
1762    #[inline]
1763    #[must_use]
1764    pub fn alias_table(&self) -> &AliasTable {
1765        &self.alias_table
1766    }
1767
1768    /// Returns a reference to the shadow table at snapshot time.
1769    ///
1770    /// Participates in MVCC: the snapshot holds an `Arc` clone of the table
1771    /// as it existed when `snapshot()` was called. Subsequent calls to
1772    /// `set_shadow_table` on the source `CodeGraph` do not affect this view.
1773    #[inline]
1774    #[must_use]
1775    pub fn shadow_table(&self) -> &ShadowTable {
1776        &self.shadow_table
1777    }
1778
1779    /// Returns a reference to the scope provenance store at snapshot time.
1780    ///
1781    /// Participates in MVCC: the snapshot holds an `Arc` clone of the store
1782    /// as it existed when `snapshot()` was called. Subsequent calls to
1783    /// `set_scope_provenance_store` on the source `CodeGraph` do not affect
1784    /// this view.
1785    #[inline]
1786    #[must_use]
1787    pub fn scope_provenance_store(&self) -> &ScopeProvenanceStore {
1788        &self.scope_provenance_store
1789    }
1790
1791    /// Looks up scope provenance by `ScopeId` at snapshot time.
1792    ///
1793    /// Returns `None` if the slot is out of range, vacant, or the stored
1794    /// generation does not match (stale handle).
1795    #[inline]
1796    #[must_use]
1797    pub fn scope_provenance(&self, id: ScopeId) -> Option<&ScopeProvenance> {
1798        self.scope_provenance_store.lookup(id)
1799    }
1800
1801    /// Looks up the live `ScopeId` for a stable scope identity at snapshot time.
1802    ///
1803    /// Returns `None` if no provenance record is registered for that stable id.
1804    #[inline]
1805    #[must_use]
1806    pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
1807        self.scope_provenance_store.scope_by_stable_id(stable)
1808    }
1809
1810    /// Returns a reference to the file segment table at snapshot time.
1811    #[inline]
1812    #[must_use]
1813    pub fn file_segments(&self) -> &FileSegmentTable {
1814        &self.file_segments
1815    }
1816
1817    /// Returns the epoch at which this snapshot was taken.
1818    ///
1819    /// This can be compared against the current graph epoch to
1820    /// detect if the graph has changed since the snapshot.
1821    #[inline]
1822    #[must_use]
1823    pub fn epoch(&self) -> u64 {
1824        self.epoch
1825    }
1826
1827    /// Returns `true` if this snapshot's epoch matches the given epoch.
1828    ///
1829    /// Use this to validate cursors before continuing pagination.
1830    #[inline]
1831    #[must_use]
1832    pub fn epoch_matches(&self, other_epoch: u64) -> bool {
1833        self.epoch == other_epoch
1834    }
1835
1836    // ------------------------------------------------------------------
1837    // Phase 2 binding-plane facade accessor (P2U07).
1838    // ------------------------------------------------------------------
1839
1840    /// Returns a [`BindingPlane`] facade borrowing this snapshot's lifetime.
1841    ///
1842    /// The facade is the stable Phase 2 public API for scope/alias/shadow
1843    /// queries and witness-bearing resolution. It provides a single entry
1844    /// point (`resolve`) that returns both a `BindingResult` and an ordered
1845    /// step trace in a `BindingResolution`.
1846    ///
1847    /// # MVCC note
1848    ///
1849    /// `BindingPlane<'_>` borrows from this snapshot, which is already an
1850    /// MVCC-consistent view of the graph at snapshot time. Callers from
1851    /// `CodeGraph` or `ConcurrentCodeGraph` should follow the two-line
1852    /// pattern so the snapshot lifetime is explicit:
1853    ///
1854    /// ```rust,ignore
1855    /// // CodeGraph caller:
1856    /// let snapshot = graph.snapshot();
1857    /// let plane = snapshot.binding_plane();
1858    ///
1859    /// // ConcurrentCodeGraph caller:
1860    /// let read_guard = concurrent.read();
1861    /// let snapshot = read_guard.snapshot();
1862    /// let plane = snapshot.binding_plane();
1863    /// ```
1864    #[inline]
1865    #[must_use]
1866    pub fn binding_plane(&self) -> crate::graph::unified::bind::plane::BindingPlane<'_> {
1867        crate::graph::unified::bind::plane::BindingPlane::new(self)
1868    }
1869
1870    // ============================================================================
1871    // Query Methods
1872    // ============================================================================
1873
1874    /// Finds nodes matching a pattern.
1875    ///
1876    /// Performs a simple substring match on node names and qualified names.
1877    /// Returns all matching node IDs.
1878    ///
1879    /// **Synthetic suppression (`C_SUPPRESS`):** synthetic placeholder
1880    /// nodes — internal scaffolding the language plugins emit for
1881    /// binding-plane and scope analysis (e.g. the Go plugin's
1882    /// `<field:operand.field>` field-access shadows and the
1883    /// `<ident>@<offset>` per-binding-site Variable nodes from the
1884    /// local-scope resolver) — are filtered out by default. Internal
1885    /// callers that need to reach these nodes (binding plane, scope /
1886    /// alias / shadow analysis) use
1887    /// [`Self::find_by_pattern_with_options`] with
1888    /// `include_synthetic = true`.
1889    ///
1890    /// # Performance
1891    ///
1892    /// Optimized to iterate over unique strings in the interner (smaller set)
1893    /// rather than all nodes in the arena.
1894    ///
1895    /// # Arguments
1896    ///
1897    /// * `pattern` - The pattern to match (substring search)
1898    ///
1899    /// # Returns
1900    ///
1901    /// A vector of `NodeIds` for all matching nodes (synthetic
1902    /// placeholders excluded).
1903    #[must_use]
1904    pub fn find_by_pattern(&self, pattern: &str) -> Vec<crate::graph::unified::node::NodeId> {
1905        self.find_by_pattern_with_options(pattern, false)
1906    }
1907
1908    /// Finds nodes matching a pattern with explicit control over synthetic
1909    /// placeholder visibility.
1910    ///
1911    /// `include_synthetic = false` is the default surface used by every
1912    /// user-facing caller (CLI `search`, MCP `semantic_search` /
1913    /// `pattern_search` / `relation_query`, etc.). Synthetic
1914    /// placeholders are suppressed via two parallel checks that must
1915    /// agree:
1916    ///
1917    /// 1. The authoritative `NodeMetadata::Synthetic` bit on the
1918    ///    metadata store
1919    ///    ([`crate::graph::unified::storage::metadata::NodeMetadataStore::is_synthetic`]).
1920    /// 2. The structural name-shape fallback
1921    ///    ([`crate::graph::unified::storage::arena::NodeEntry::is_synthetic_placeholder_name`])
1922    ///    for V10 snapshots written before the synthetic bit existed
1923    ///    and for cross-file unification losers that retained their
1924    ///    name but lost their metadata entry.
1925    ///
1926    /// Either check matching is sufficient to suppress the node. The
1927    /// design lives in `docs/development/public-issue-triage/`
1928    /// under the `C_SUPPRESS` unit; see also the rationale in
1929    /// [`crate::graph::unified::storage::metadata::NodeMetadata::Synthetic`].
1930    ///
1931    /// `include_synthetic = true` is **internal-only**. The binding
1932    /// plane, scope resolver, and rebuild's coverage gate use this
1933    /// path to reach synthetic nodes for their structural integrity
1934    /// checks. **No CLI / MCP surface should ever pass `true`.**
1935    #[must_use]
1936    pub fn find_by_pattern_with_options(
1937        &self,
1938        pattern: &str,
1939        include_synthetic: bool,
1940    ) -> Vec<crate::graph::unified::node::NodeId> {
1941        let mut matches = Vec::new();
1942
1943        // 1. Scan unique strings in interner for matches
1944        for (str_id, s) in self.strings.iter() {
1945            if s.contains(pattern) {
1946                // 2. If string matches, look up all nodes with this name
1947                // Check qualified name index
1948                matches.extend_from_slice(self.indices.by_qualified_name(str_id));
1949                // Check simple name index
1950                matches.extend_from_slice(self.indices.by_name(str_id));
1951            }
1952        }
1953
1954        // Deduplicate matches (a node might match both qualified and simple name)
1955        matches.sort_unstable();
1956        matches.dedup();
1957
1958        if !include_synthetic {
1959            matches.retain(|&node_id| !self.is_node_synthetic(node_id));
1960        }
1961
1962        matches
1963    }
1964
1965    /// Finds nodes whose interned simple **or** qualified name equals `name`,
1966    /// accepting dot- and Ruby-`#` qualified display form as a fallback for
1967    /// graph-canonical `::` qualified names.
1968    ///
1969    /// This is the canonical surface for **exact-name** lookups —
1970    /// shared by the CLI `--exact <pattern>` shorthand
1971    /// (`sqry-cli/src/commands/search.rs::run_regular_search`) and the
1972    /// structural query planner's `name:` predicate
1973    /// (`sqry-db/src/planner/parse.rs`,
1974    /// `sqry-db/src/planner/execute.rs`). Both surfaces are
1975    /// contract-bound (DAG `B1_ALIGN`) to return the same set against
1976    /// any fixture: the CLI calls this method directly, while the
1977    /// planner uses the same interner + by-name index pair internally
1978    /// when scanning, then applies the same synthetic filter.
1979    ///
1980    /// **Synthetic suppression.** Synthetic placeholder nodes
1981    /// (Go-plugin `<field:operand.field>` shadows and
1982    /// `<ident>@<offset>` per-binding-site Variables; see
1983    /// [`Self::find_by_pattern_with_options`] for the full taxonomy)
1984    /// are excluded via [`Self::is_node_synthetic`]. There is **no**
1985    /// `include_synthetic = true` variant for the exact-match surface
1986    /// because the synthetic name shapes the structural fallback
1987    /// recognises (`<…>`, `…@<offset>`) cannot equal a user-typed
1988    /// name byte-for-byte; the metadata-bit channel is the only
1989    /// realistic leak vector and it is suppressed unconditionally.
1990    ///
1991    /// **Display fallback.** Exact lookup checks the literal input. When the
1992    /// input is qualified, language-aware display candidates win over raw
1993    /// canonical matches. This keeps native Rust `identity::T` from also
1994    /// returning a TypeScript type parameter whose internal canonical form is
1995    /// `identity::T` but whose display form is `identity.T`. If neither
1996    /// display nor literal lookup finds candidates, dot- and Ruby-`#`
1997    /// qualified inputs also check the graph-canonical `::` rewrite.
1998    ///
1999    /// # Performance
2000    ///
2001    /// `O(1)` interner lookup + `O(matches)` filter. If `name` is not
2002    /// interned the resolver still tries a native-display to graph-canonical
2003    /// `::` fallback for user-facing qualified names before returning empty.
2004    ///
2005    /// # Arguments
2006    ///
2007    /// * `name` - The exact name to look up (no glob, no regex).
2008    ///
2009    /// # Returns
2010    ///
2011    /// Sorted, deduplicated `NodeId`s for every non-synthetic node
2012    /// whose `entry.name`, `entry.qualified_name`, or language-aware display
2013    /// name equals `name`, or matches for the graph-canonical `::` rewrite
2014    /// when the user supplied a dot- or Ruby-`#` qualified name that had no
2015    /// literal or display candidates.
2016    #[must_use]
2017    pub fn find_by_exact_name(&self, name: &str) -> Vec<crate::graph::unified::node::NodeId> {
2018        let is_qualified = name.contains('.') || name.contains('#') || name.contains("::");
2019        if !is_qualified {
2020            // Bare name lookup: the interned `by_name` index covers
2021            // every language and is O(1); no display scan is needed.
2022            return self.find_by_exact_interned_name(name);
2023        }
2024        // Qualified name: each plugin stores its native form
2025        // (PHP `Ledger.promotedField`, Rust `Ledger::promotedField`,
2026        // Ruby `Ledger#promotedField`). Scan computed display names so
2027        // a single dotted user query resolves to all language matches,
2028        // then fall back to the interned form (and the graph-canonical
2029        // `::` rewrite) when display yields nothing.
2030        let mut exact_matches = self.find_by_exact_display_name(name);
2031        if exact_matches.is_empty() {
2032            exact_matches = self.find_by_exact_interned_name(name);
2033            if exact_matches.is_empty() && !name.contains("::") {
2034                let canonical = name.replace(['.', '#'], "::");
2035                exact_matches.extend(self.find_by_exact_interned_name(&canonical));
2036                exact_matches.sort_unstable();
2037                exact_matches.dedup();
2038            }
2039        }
2040        exact_matches
2041    }
2042
2043    fn find_by_exact_display_name(&self, name: &str) -> Vec<crate::graph::unified::node::NodeId> {
2044        let mut matches = self
2045            .iter_nodes()
2046            .filter(|(node_id, _)| !self.is_node_synthetic(*node_id))
2047            .filter_map(|(node_id, entry)| {
2048                let qualified = entry
2049                    .qualified_name
2050                    .and_then(|sid| self.strings.resolve(sid))?;
2051                let display = self.files.language_for_file(entry.file).map_or_else(
2052                    || qualified.to_string(),
2053                    |language| {
2054                        display_graph_qualified_name(
2055                            language,
2056                            qualified.as_ref(),
2057                            entry.kind,
2058                            entry.is_static,
2059                        )
2060                    },
2061                );
2062                (display == name).then_some(node_id)
2063            })
2064            .collect::<Vec<_>>();
2065        matches.sort_unstable();
2066        matches.dedup();
2067        matches
2068    }
2069
2070    fn find_by_exact_interned_name(&self, name: &str) -> Vec<crate::graph::unified::node::NodeId> {
2071        let Some(str_id) = self.strings.get(name) else {
2072            return Vec::new();
2073        };
2074
2075        let mut matches: Vec<crate::graph::unified::node::NodeId> = Vec::new();
2076        matches.extend_from_slice(self.indices.by_name(str_id));
2077        matches.extend_from_slice(self.indices.by_qualified_name(str_id));
2078        matches.sort_unstable();
2079        matches.dedup();
2080        matches.retain(|&node_id| !self.is_node_synthetic(node_id));
2081        matches
2082    }
2083
2084    /// Returns `true` if the node should be treated as a synthetic
2085    /// placeholder for user-facing surfaces.
2086    ///
2087    /// Combines the metadata-store flag and the structural name-shape
2088    /// fallback (see [`Self::find_by_pattern_with_options`] for the
2089    /// full rationale). Returns `false` for missing nodes (an unknown
2090    /// `NodeId` is not "synthetic" — it is "not present").
2091    #[must_use]
2092    pub fn is_node_synthetic(&self, node_id: crate::graph::unified::node::NodeId) -> bool {
2093        // Authoritative check: metadata-store bit (NodeMetadata::Synthetic).
2094        if self.macro_metadata.is_synthetic(node_id) {
2095            return true;
2096        }
2097        // Structural fallback: name shape recognised as synthetic.
2098        // Required for V10 snapshots written before the bit existed,
2099        // for unification losers that lost their metadata entry, and
2100        // as defence-in-depth against future plugins forgetting to
2101        // flip the bit.
2102        let Some(entry) = self.nodes.get(node_id) else {
2103            return false;
2104        };
2105        if entry.is_unified_loser() {
2106            // Already invisible for other reasons; do not also flag as synthetic.
2107            return false;
2108        }
2109        let Some(name) = self.strings.resolve(entry.name) else {
2110            return false;
2111        };
2112        crate::graph::unified::storage::arena::NodeEntry::is_synthetic_placeholder_name(
2113            name.as_ref(),
2114        )
2115    }
2116
2117    /// Gets all callees of a node (functions called by this node).
2118    ///
2119    /// Queries the forward edge store for all Calls edges from this node.
2120    ///
2121    /// # Arguments
2122    ///
2123    /// * `node` - The node ID to query
2124    ///
2125    /// # Returns
2126    ///
2127    /// A vector of `NodeIds` representing functions called by this node.
2128    #[must_use]
2129    pub fn get_callees(
2130        &self,
2131        node: crate::graph::unified::node::NodeId,
2132    ) -> Vec<crate::graph::unified::node::NodeId> {
2133        use crate::graph::unified::edge::EdgeKind;
2134
2135        self.edges
2136            .edges_from(node)
2137            .into_iter()
2138            .filter(|edge| matches!(edge.kind, EdgeKind::Calls { .. }))
2139            .map(|edge| edge.target)
2140            .collect()
2141    }
2142
2143    /// Gets all callers of a node (functions that call this node).
2144    ///
2145    /// Queries the reverse edge store for all Calls edges to this node.
2146    ///
2147    /// # Arguments
2148    ///
2149    /// * `node` - The node ID to query
2150    ///
2151    /// # Returns
2152    ///
2153    /// A vector of `NodeIds` representing functions that call this node.
2154    #[must_use]
2155    pub fn get_callers(
2156        &self,
2157        node: crate::graph::unified::node::NodeId,
2158    ) -> Vec<crate::graph::unified::node::NodeId> {
2159        use crate::graph::unified::edge::EdgeKind;
2160
2161        self.edges
2162            .edges_to(node)
2163            .into_iter()
2164            .filter(|edge| matches!(edge.kind, EdgeKind::Calls { .. }))
2165            .map(|edge| edge.source)
2166            .collect()
2167    }
2168
2169    /// Iterates over all nodes in the graph.
2170    ///
2171    /// Returns an iterator yielding (`NodeId`, &`NodeEntry`) pairs for all
2172    /// occupied slots in the arena.
2173    ///
2174    /// # Returns
2175    ///
2176    /// An iterator over (`NodeId`, &`NodeEntry`) pairs.
2177    pub fn iter_nodes(
2178        &self,
2179    ) -> impl Iterator<
2180        Item = (
2181            crate::graph::unified::node::NodeId,
2182            &crate::graph::unified::storage::arena::NodeEntry,
2183        ),
2184    > {
2185        self.nodes.iter()
2186    }
2187
2188    /// Iterates over all edges in the graph.
2189    ///
2190    /// Returns an iterator yielding (source, target, `EdgeKind`) tuples for
2191    /// all edges in the forward edge store.
2192    ///
2193    /// # Returns
2194    ///
2195    /// An iterator over edge tuples.
2196    pub fn iter_edges(
2197        &self,
2198    ) -> impl Iterator<
2199        Item = (
2200            crate::graph::unified::node::NodeId,
2201            crate::graph::unified::node::NodeId,
2202            crate::graph::unified::edge::EdgeKind,
2203        ),
2204    > + '_ {
2205        // Iterate over all nodes in the arena and get their outgoing edges
2206        self.nodes.iter().flat_map(move |(node_id, _entry)| {
2207            // Get all edges from this node
2208            self.edges
2209                .edges_from(node_id)
2210                .into_iter()
2211                .map(move |edge| (node_id, edge.target, edge.kind))
2212        })
2213    }
2214
2215    /// Gets a node entry by ID.
2216    ///
2217    /// Returns a reference to the `NodeEntry` if the ID is valid, or None
2218    /// if the ID is invalid or stale.
2219    ///
2220    /// # Arguments
2221    ///
2222    /// * `id` - The node ID to look up
2223    ///
2224    /// # Returns
2225    ///
2226    /// A reference to the `NodeEntry`, or None if not found.
2227    #[must_use]
2228    pub fn get_node(
2229        &self,
2230        id: crate::graph::unified::node::NodeId,
2231    ) -> Option<&crate::graph::unified::storage::arena::NodeEntry> {
2232        self.nodes.get(id)
2233    }
2234}
2235
2236impl fmt::Debug for GraphSnapshot {
2237    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2238        f.debug_struct("GraphSnapshot")
2239            .field("nodes", &self.nodes.len())
2240            .field("epoch", &self.epoch)
2241            .finish_non_exhaustive()
2242    }
2243}
2244
2245/// Read-only accessor trait shared by [`CodeGraph`] and [`GraphSnapshot`].
2246///
2247/// This lets helpers that only *read* graph state (name-matching, relation
2248/// traversal, reference lookups) be written once and called from both the
2249/// live `CodeGraph` path in `sqry-core::query::executor::graph_eval` and
2250/// the snapshot-based path in `sqry-db::queries::*`. No mutation is exposed.
2251pub trait GraphAccess {
2252    /// Returns the node arena (read-only).
2253    fn nodes(&self) -> &NodeArena;
2254    /// Returns the bidirectional edge store (read-only).
2255    fn edges(&self) -> &BidirectionalEdgeStore;
2256    /// Returns the string interner (read-only).
2257    fn strings(&self) -> &StringInterner;
2258    /// Returns the file registry (read-only).
2259    fn files(&self) -> &FileRegistry;
2260    /// Returns the auxiliary indices (read-only).
2261    fn indices(&self) -> &AuxiliaryIndices;
2262}
2263
2264impl GraphAccess for CodeGraph {
2265    #[inline]
2266    fn nodes(&self) -> &NodeArena {
2267        CodeGraph::nodes(self)
2268    }
2269    #[inline]
2270    fn edges(&self) -> &BidirectionalEdgeStore {
2271        CodeGraph::edges(self)
2272    }
2273    #[inline]
2274    fn strings(&self) -> &StringInterner {
2275        CodeGraph::strings(self)
2276    }
2277    #[inline]
2278    fn files(&self) -> &FileRegistry {
2279        CodeGraph::files(self)
2280    }
2281    #[inline]
2282    fn indices(&self) -> &AuxiliaryIndices {
2283        CodeGraph::indices(self)
2284    }
2285}
2286
2287impl GraphAccess for GraphSnapshot {
2288    #[inline]
2289    fn nodes(&self) -> &NodeArena {
2290        GraphSnapshot::nodes(self)
2291    }
2292    #[inline]
2293    fn edges(&self) -> &BidirectionalEdgeStore {
2294        GraphSnapshot::edges(self)
2295    }
2296    #[inline]
2297    fn strings(&self) -> &StringInterner {
2298        GraphSnapshot::strings(self)
2299    }
2300    #[inline]
2301    fn files(&self) -> &FileRegistry {
2302        GraphSnapshot::files(self)
2303    }
2304    #[inline]
2305    fn indices(&self) -> &AuxiliaryIndices {
2306        GraphSnapshot::indices(self)
2307    }
2308}
2309
2310#[cfg(test)]
2311mod tests {
2312    use super::*;
2313    use crate::graph::unified::{
2314        FileScope, NodeId, ResolutionMode, SymbolCandidateOutcome, SymbolQuery,
2315        SymbolResolutionOutcome,
2316    };
2317
2318    fn resolve_symbol_strict(snapshot: &GraphSnapshot, symbol: &str) -> Option<NodeId> {
2319        match snapshot.resolve_symbol(&SymbolQuery {
2320            symbol,
2321            file_scope: FileScope::Any,
2322            mode: ResolutionMode::Strict,
2323        }) {
2324            SymbolResolutionOutcome::Resolved(node_id) => Some(node_id),
2325            SymbolResolutionOutcome::NotFound
2326            | SymbolResolutionOutcome::FileNotIndexed
2327            | SymbolResolutionOutcome::Ambiguous(_) => None,
2328        }
2329    }
2330
2331    fn candidate_nodes(snapshot: &GraphSnapshot, symbol: &str) -> Vec<NodeId> {
2332        match snapshot.find_symbol_candidates(&SymbolQuery {
2333            symbol,
2334            file_scope: FileScope::Any,
2335            mode: ResolutionMode::AllowSuffixCandidates,
2336        }) {
2337            SymbolCandidateOutcome::Candidates(candidates) => candidates,
2338            SymbolCandidateOutcome::NotFound | SymbolCandidateOutcome::FileNotIndexed => Vec::new(),
2339        }
2340    }
2341
2342    #[test]
2343    fn test_code_graph_new() {
2344        let graph = CodeGraph::new();
2345        assert_eq!(graph.epoch(), 0);
2346        assert_eq!(graph.nodes().len(), 0);
2347    }
2348
2349    #[test]
2350    fn test_code_graph_default() {
2351        let graph = CodeGraph::default();
2352        assert_eq!(graph.epoch(), 0);
2353    }
2354
2355    #[test]
2356    fn test_code_graph_snapshot() {
2357        let graph = CodeGraph::new();
2358        let snapshot = graph.snapshot();
2359        assert_eq!(snapshot.epoch(), 0);
2360        assert_eq!(snapshot.nodes().len(), 0);
2361    }
2362
2363    #[test]
2364    fn test_code_graph_bump_epoch() {
2365        let mut graph = CodeGraph::new();
2366        assert_eq!(graph.epoch(), 0);
2367        assert_eq!(graph.bump_epoch(), 1);
2368        assert_eq!(graph.epoch(), 1);
2369        assert_eq!(graph.bump_epoch(), 2);
2370        assert_eq!(graph.epoch(), 2);
2371    }
2372
2373    #[test]
2374    fn test_code_graph_set_epoch() {
2375        let mut graph = CodeGraph::new();
2376        graph.set_epoch(42);
2377        assert_eq!(graph.epoch(), 42);
2378    }
2379
2380    #[test]
2381    fn test_code_graph_from_components() {
2382        let nodes = NodeArena::new();
2383        let edges = BidirectionalEdgeStore::new();
2384        let strings = StringInterner::new();
2385        let files = FileRegistry::new();
2386        let indices = AuxiliaryIndices::new();
2387        let macro_metadata = NodeMetadataStore::new();
2388
2389        let graph =
2390            CodeGraph::from_components(nodes, edges, strings, files, indices, macro_metadata);
2391        assert_eq!(graph.epoch(), 0);
2392    }
2393
2394    #[test]
2395    fn test_code_graph_mut_accessors() {
2396        let mut graph = CodeGraph::new();
2397
2398        // Access mutable references - should not panic
2399        let _nodes = graph.nodes_mut();
2400        let _edges = graph.edges_mut();
2401        let _strings = graph.strings_mut();
2402        let _files = graph.files_mut();
2403        let _indices = graph.indices_mut();
2404    }
2405
2406    #[test]
2407    fn test_code_graph_snapshot_isolation() {
2408        let mut graph = CodeGraph::new();
2409        let snapshot1 = graph.snapshot();
2410
2411        // Mutate the graph
2412        graph.bump_epoch();
2413
2414        let snapshot2 = graph.snapshot();
2415
2416        // Snapshots should have different epochs
2417        assert_eq!(snapshot1.epoch(), 0);
2418        assert_eq!(snapshot2.epoch(), 1);
2419    }
2420
2421    #[test]
2422    fn test_code_graph_debug() {
2423        let graph = CodeGraph::new();
2424        let debug_str = format!("{graph:?}");
2425        assert!(debug_str.contains("CodeGraph"));
2426        assert!(debug_str.contains("epoch"));
2427    }
2428
2429    /// Exact-byte regression for the iter2 fix of CodeGraph.confidence
2430    /// accounting: adding a `ConfidenceMetadata` entry with known inner
2431    /// Vec<String> capacities must increase `heap_bytes()` by exactly the sum
2432    /// of those capacities plus Vec<String> slot overhead.
2433    #[test]
2434    fn test_codegraph_heap_bytes_counts_confidence_inner_strings() {
2435        let mut graph = CodeGraph::new();
2436        // Seed one confidence entry so the HashMap has non-zero capacity;
2437        // reserve slack so the next insert cannot rehash.
2438        graph.set_confidence({
2439            let mut m = HashMap::with_capacity(8);
2440            m.insert("seed".to_string(), ConfidenceMetadata::default());
2441            m
2442        });
2443        let before = graph.heap_bytes();
2444        let before_cap = graph.confidence.capacity();
2445
2446        let lim1 = String::from("no type inference");
2447        let lim2 = String::from("no generic specialization");
2448        let feat1 = String::from("rust-analyzer");
2449        let l1 = lim1.capacity();
2450        let l2 = lim2.capacity();
2451        let f1 = feat1.capacity();
2452
2453        let limitations = vec![lim1, lim2];
2454        let lim_vec_cap = limitations.capacity();
2455
2456        let unavailable_features = vec![feat1];
2457        let feat_vec_cap = unavailable_features.capacity();
2458
2459        let key = String::from("rust");
2460        let key_cap = key.capacity();
2461        graph.confidence.insert(
2462            key,
2463            ConfidenceMetadata {
2464                limitations,
2465                unavailable_features,
2466                ..Default::default()
2467            },
2468        );
2469        assert_eq!(
2470            graph.confidence.capacity(),
2471            before_cap,
2472            "prerequisite: confidence HashMap must not rehash during the test insert",
2473        );
2474
2475        let after = graph.heap_bytes();
2476        let expected_inner = key_cap
2477            + lim_vec_cap * std::mem::size_of::<String>()
2478            + l1
2479            + l2
2480            + feat_vec_cap * std::mem::size_of::<String>()
2481            + f1;
2482        assert_eq!(
2483            after - before,
2484            expected_inner,
2485            "CodeGraph::heap_bytes must count ConfidenceMetadata inner Vec<String> capacity exactly",
2486        );
2487    }
2488
2489    #[test]
2490    fn test_codegraph_heap_bytes_grows_with_content() {
2491        use crate::graph::unified::node::NodeKind;
2492        use crate::graph::unified::storage::arena::NodeEntry;
2493        use std::path::Path;
2494
2495        // An empty graph reports some heap bytes (FileRegistry seeds index 0
2496        // with `vec![None]`, HashMaps have non-zero base capacity once touched,
2497        // etc.) but the value must be finite and well under the 100 MB cap.
2498        let empty = CodeGraph::new();
2499        let empty_bytes = empty.heap_bytes();
2500        assert!(
2501            empty_bytes < 100 * 1024 * 1024,
2502            "empty graph heap_bytes should be <100 MiB, got {empty_bytes}"
2503        );
2504
2505        let mut graph = CodeGraph::new();
2506        for i in 0..32u32 {
2507            let name = format!("sym_{i}");
2508            let qual = format!("module::sym_{i}");
2509            let file = format!("file_{i}.rs");
2510
2511            let name_id = graph.strings_mut().intern(&name).unwrap();
2512            let qual_id = graph.strings_mut().intern(&qual).unwrap();
2513            let file_id = graph.files_mut().register(Path::new(&file)).unwrap();
2514
2515            let entry =
2516                NodeEntry::new(NodeKind::Function, name_id, file_id).with_qualified_name(qual_id);
2517            let node_id = graph.nodes_mut().alloc(entry).unwrap();
2518            graph
2519                .indices_mut()
2520                .add(node_id, NodeKind::Function, name_id, Some(qual_id), file_id);
2521        }
2522
2523        let populated_bytes = graph.heap_bytes();
2524        assert!(
2525            populated_bytes > 0,
2526            "populated graph should report nonzero heap bytes"
2527        );
2528        assert!(
2529            populated_bytes > empty_bytes,
2530            "populated graph ({populated_bytes}) should exceed empty graph ({empty_bytes})"
2531        );
2532        assert!(
2533            populated_bytes < 100 * 1024 * 1024,
2534            "test graph heap_bytes should be <100 MiB, got {populated_bytes}"
2535        );
2536    }
2537
2538    #[test]
2539    fn test_concurrent_code_graph_new() {
2540        let graph = ConcurrentCodeGraph::new();
2541        assert_eq!(graph.epoch(), 0);
2542    }
2543
2544    #[test]
2545    fn test_concurrent_code_graph_default() {
2546        let graph = ConcurrentCodeGraph::default();
2547        assert_eq!(graph.epoch(), 0);
2548    }
2549
2550    #[test]
2551    fn test_concurrent_code_graph_from_graph() {
2552        let mut inner = CodeGraph::new();
2553        inner.set_epoch(10);
2554        let graph = ConcurrentCodeGraph::from_graph(inner);
2555        assert_eq!(graph.epoch(), 10);
2556    }
2557
2558    #[test]
2559    fn test_concurrent_code_graph_read() {
2560        let graph = ConcurrentCodeGraph::new();
2561        let guard = graph.read();
2562        assert_eq!(guard.epoch(), 0);
2563        assert_eq!(guard.nodes().len(), 0);
2564    }
2565
2566    #[test]
2567    fn test_concurrent_code_graph_write_increments_epoch() {
2568        let graph = ConcurrentCodeGraph::new();
2569        assert_eq!(graph.epoch(), 0);
2570
2571        {
2572            let guard = graph.write();
2573            assert_eq!(guard.epoch(), 1);
2574        }
2575
2576        assert_eq!(graph.epoch(), 1);
2577
2578        {
2579            let _guard = graph.write();
2580        }
2581
2582        assert_eq!(graph.epoch(), 2);
2583    }
2584
2585    #[test]
2586    fn test_concurrent_code_graph_snapshot() {
2587        let graph = ConcurrentCodeGraph::new();
2588
2589        {
2590            let _guard = graph.write();
2591        }
2592
2593        let snapshot = graph.snapshot();
2594        assert_eq!(snapshot.epoch(), 1);
2595    }
2596
2597    #[test]
2598    fn test_concurrent_code_graph_try_read() {
2599        let graph = ConcurrentCodeGraph::new();
2600        let guard = graph.try_read();
2601        assert!(guard.is_some());
2602    }
2603
2604    #[test]
2605    fn test_concurrent_code_graph_try_write() {
2606        let graph = ConcurrentCodeGraph::new();
2607        let guard = graph.try_write();
2608        assert!(guard.is_some());
2609        assert_eq!(graph.epoch(), 1);
2610    }
2611
2612    #[test]
2613    fn test_concurrent_code_graph_debug() {
2614        let graph = ConcurrentCodeGraph::new();
2615        let debug_str = format!("{graph:?}");
2616        assert!(debug_str.contains("ConcurrentCodeGraph"));
2617        assert!(debug_str.contains("epoch"));
2618    }
2619
2620    #[test]
2621    fn test_graph_snapshot_accessors() {
2622        let graph = CodeGraph::new();
2623        let snapshot = graph.snapshot();
2624
2625        // All accessors should work
2626        let _nodes = snapshot.nodes();
2627        let _edges = snapshot.edges();
2628        let _strings = snapshot.strings();
2629        let _files = snapshot.files();
2630        let _indices = snapshot.indices();
2631        let _epoch = snapshot.epoch();
2632    }
2633
2634    #[test]
2635    fn test_graph_snapshot_epoch_matches() {
2636        let graph = CodeGraph::new();
2637        let snapshot = graph.snapshot();
2638
2639        assert!(snapshot.epoch_matches(0));
2640        assert!(!snapshot.epoch_matches(1));
2641    }
2642
2643    #[test]
2644    fn test_graph_snapshot_clone() {
2645        let graph = CodeGraph::new();
2646        let snapshot1 = graph.snapshot();
2647        let snapshot2 = snapshot1.clone();
2648
2649        assert_eq!(snapshot1.epoch(), snapshot2.epoch());
2650    }
2651
2652    #[test]
2653    fn test_graph_snapshot_debug() {
2654        let graph = CodeGraph::new();
2655        let snapshot = graph.snapshot();
2656        let debug_str = format!("{snapshot:?}");
2657        assert!(debug_str.contains("GraphSnapshot"));
2658        assert!(debug_str.contains("epoch"));
2659    }
2660
2661    #[test]
2662    fn test_multiple_readers() {
2663        let graph = ConcurrentCodeGraph::new();
2664
2665        // Multiple readers should be able to acquire locks simultaneously
2666        let guard1 = graph.read();
2667        let guard2 = graph.read();
2668        let guard3 = graph.read();
2669
2670        assert_eq!(guard1.epoch(), 0);
2671        assert_eq!(guard2.epoch(), 0);
2672        assert_eq!(guard3.epoch(), 0);
2673    }
2674
2675    #[test]
2676    fn test_code_graph_clone() {
2677        let mut graph = CodeGraph::new();
2678        graph.bump_epoch();
2679
2680        let cloned = graph.clone();
2681        assert_eq!(cloned.epoch(), 1);
2682    }
2683
2684    #[test]
2685    fn test_epoch_wrapping() {
2686        let mut graph = CodeGraph::new();
2687        graph.set_epoch(u64::MAX);
2688        let new_epoch = graph.bump_epoch();
2689        assert_eq!(new_epoch, 0); // Should wrap around
2690    }
2691
2692    // ============================================================================
2693    // Query method tests
2694    // ============================================================================
2695
2696    #[test]
2697    fn test_snapshot_resolve_symbol() {
2698        use crate::graph::unified::node::NodeKind;
2699        use crate::graph::unified::storage::arena::NodeEntry;
2700        use std::path::Path;
2701
2702        let mut graph = CodeGraph::new();
2703
2704        // Add some nodes with qualified names
2705        let name_id = graph.strings_mut().intern("test_func").unwrap();
2706        let qual_name_id = graph.strings_mut().intern("module::test_func").unwrap();
2707        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
2708
2709        let entry =
2710            NodeEntry::new(NodeKind::Function, name_id, file_id).with_qualified_name(qual_name_id);
2711
2712        let node_id = graph.nodes_mut().alloc(entry).unwrap();
2713        graph.indices_mut().add(
2714            node_id,
2715            NodeKind::Function,
2716            name_id,
2717            Some(qual_name_id),
2718            file_id,
2719        );
2720
2721        let snapshot = graph.snapshot();
2722
2723        // Find by qualified name
2724        let found = resolve_symbol_strict(&snapshot, "module::test_func");
2725        assert_eq!(found, Some(node_id));
2726
2727        // Find by exact simple name
2728        let found2 = resolve_symbol_strict(&snapshot, "test_func");
2729        assert_eq!(found2, Some(node_id));
2730
2731        // Not found
2732        assert!(resolve_symbol_strict(&snapshot, "nonexistent").is_none());
2733    }
2734
2735    #[test]
2736    fn test_snapshot_find_by_pattern() {
2737        use crate::graph::unified::node::NodeKind;
2738        use crate::graph::unified::storage::arena::NodeEntry;
2739        use std::path::Path;
2740
2741        let mut graph = CodeGraph::new();
2742
2743        // Add nodes with different names
2744        let name1 = graph.strings_mut().intern("foo_bar").unwrap();
2745        let name2 = graph.strings_mut().intern("baz_bar").unwrap();
2746        let name3 = graph.strings_mut().intern("qux_test").unwrap();
2747        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
2748
2749        let node1 = graph
2750            .nodes_mut()
2751            .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
2752            .unwrap();
2753        let node2 = graph
2754            .nodes_mut()
2755            .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
2756            .unwrap();
2757        let node3 = graph
2758            .nodes_mut()
2759            .alloc(NodeEntry::new(NodeKind::Function, name3, file_id))
2760            .unwrap();
2761
2762        graph
2763            .indices_mut()
2764            .add(node1, NodeKind::Function, name1, None, file_id);
2765        graph
2766            .indices_mut()
2767            .add(node2, NodeKind::Function, name2, None, file_id);
2768        graph
2769            .indices_mut()
2770            .add(node3, NodeKind::Function, name3, None, file_id);
2771
2772        let snapshot = graph.snapshot();
2773
2774        // Find by pattern
2775        let matches = snapshot.find_by_pattern("bar");
2776        assert_eq!(matches.len(), 2);
2777        assert!(matches.contains(&node1));
2778        assert!(matches.contains(&node2));
2779
2780        // Find single match
2781        let matches = snapshot.find_by_pattern("qux");
2782        assert_eq!(matches.len(), 1);
2783        assert_eq!(matches[0], node3);
2784
2785        // No matches
2786        let matches = snapshot.find_by_pattern("nonexistent");
2787        assert!(matches.is_empty());
2788    }
2789
2790    #[test]
2791    fn synthetic_nodes_are_filtered_from_find_by_pattern_default() {
2792        // C_SUPPRESS: synthetic placeholder nodes (Go plugin
2793        // `<field:operand.field>` shadows + `<ident>@<offset>`
2794        // per-binding-site Variables) must NOT surface from
2795        // find_by_pattern, but must still be reachable via
2796        // find_by_pattern_with_options(_, true).
2797        use crate::graph::unified::node::NodeKind;
2798        use crate::graph::unified::storage::arena::NodeEntry;
2799        use std::path::Path;
2800
2801        let mut graph = CodeGraph::new();
2802        let real_property = graph
2803            .strings_mut()
2804            .intern("main.SelectorSource.NeedTags")
2805            .unwrap();
2806        let real_local_var = graph.strings_mut().intern("NeedTags").unwrap();
2807        let synthetic_field = graph
2808            .strings_mut()
2809            .intern("<field:selector.NeedTags>")
2810            .unwrap();
2811        let synthetic_offset_a = graph.strings_mut().intern("NeedTags@469").unwrap();
2812        let synthetic_offset_b = graph.strings_mut().intern("NeedTags@508").unwrap();
2813        let file_id = graph.files_mut().register(Path::new("main.go")).unwrap();
2814
2815        let prop_id = graph
2816            .nodes_mut()
2817            .alloc(NodeEntry::new(NodeKind::Property, real_property, file_id))
2818            .unwrap();
2819        let local_var_id = graph
2820            .nodes_mut()
2821            .alloc(NodeEntry::new(NodeKind::Variable, real_local_var, file_id))
2822            .unwrap();
2823        let syn_field_id = graph
2824            .nodes_mut()
2825            .alloc(NodeEntry::new(NodeKind::Variable, synthetic_field, file_id))
2826            .unwrap();
2827        let syn_a_id = graph
2828            .nodes_mut()
2829            .alloc(NodeEntry::new(
2830                NodeKind::Variable,
2831                synthetic_offset_a,
2832                file_id,
2833            ))
2834            .unwrap();
2835        let syn_b_id = graph
2836            .nodes_mut()
2837            .alloc(NodeEntry::new(
2838                NodeKind::Variable,
2839                synthetic_offset_b,
2840                file_id,
2841            ))
2842            .unwrap();
2843
2844        graph
2845            .indices_mut()
2846            .add(prop_id, NodeKind::Property, real_property, None, file_id);
2847        graph.indices_mut().add(
2848            local_var_id,
2849            NodeKind::Variable,
2850            real_local_var,
2851            None,
2852            file_id,
2853        );
2854        graph.indices_mut().add(
2855            syn_field_id,
2856            NodeKind::Variable,
2857            synthetic_field,
2858            None,
2859            file_id,
2860        );
2861        graph.indices_mut().add(
2862            syn_a_id,
2863            NodeKind::Variable,
2864            synthetic_offset_a,
2865            None,
2866            file_id,
2867        );
2868        graph.indices_mut().add(
2869            syn_b_id,
2870            NodeKind::Variable,
2871            synthetic_offset_b,
2872            None,
2873            file_id,
2874        );
2875
2876        // Flag two of them via the metadata-store bit (the canonical
2877        // Go-plugin emission path) and leave one (`<field:...>`) only
2878        // covered by the structural name-shape fallback to verify both
2879        // recognition channels suppress the leak.
2880        graph.macro_metadata_mut().mark_synthetic(syn_a_id);
2881        graph.macro_metadata_mut().mark_synthetic(syn_b_id);
2882
2883        let snapshot = graph.snapshot();
2884
2885        // Default surface (CLI `search --exact`, MCP, LSP): no synthetics.
2886        let matches = snapshot.find_by_pattern("NeedTags");
2887        assert!(matches.contains(&prop_id), "Property must be surfaced");
2888        assert!(
2889            matches.contains(&local_var_id),
2890            "real local var must be surfaced"
2891        );
2892        assert!(
2893            !matches.contains(&syn_field_id),
2894            "<field:...> synthetic must be suppressed (name-shape fallback)"
2895        );
2896        assert!(
2897            !matches.contains(&syn_a_id),
2898            "NeedTags@469 must be suppressed (metadata bit)"
2899        );
2900        assert!(
2901            !matches.contains(&syn_b_id),
2902            "NeedTags@508 must be suppressed (metadata bit)"
2903        );
2904        assert_eq!(matches.len(), 2, "exactly Property + local var, no leakage");
2905
2906        // Internal include-synthetic surface (binding plane, scope analysis):
2907        // every node remains reachable.
2908        let all_matches = snapshot.find_by_pattern_with_options("NeedTags", true);
2909        assert_eq!(
2910            all_matches.len(),
2911            5,
2912            "include_synthetic surfaces everything"
2913        );
2914        assert!(all_matches.contains(&prop_id));
2915        assert!(all_matches.contains(&local_var_id));
2916        assert!(all_matches.contains(&syn_field_id));
2917        assert!(all_matches.contains(&syn_a_id));
2918        assert!(all_matches.contains(&syn_b_id));
2919
2920        // is_node_synthetic exposed for surface-level filters
2921        // (e.g., MCP semantic_search/relation_query post-filters).
2922        assert!(snapshot.is_node_synthetic(syn_field_id));
2923        assert!(snapshot.is_node_synthetic(syn_a_id));
2924        assert!(snapshot.is_node_synthetic(syn_b_id));
2925        assert!(!snapshot.is_node_synthetic(prop_id));
2926        assert!(!snapshot.is_node_synthetic(local_var_id));
2927    }
2928
2929    #[test]
2930    #[allow(clippy::too_many_lines)]
2931    fn find_by_exact_name_aligns_with_planner_name_predicate() {
2932        // B1_ALIGN: `find_by_exact_name("NeedTags")` is the canonical
2933        // surface for the CLI `--exact NeedTags` shorthand and the
2934        // planner's `name:NeedTags` predicate. Both paths must return
2935        // the same set against this fixture.
2936        use crate::graph::unified::node::NodeKind;
2937        use crate::graph::unified::storage::arena::NodeEntry;
2938        use std::path::Path;
2939
2940        let mut graph = CodeGraph::new();
2941        // Property nodes carry the package-qualified name as
2942        // `entry.name` (Go plugin convention; see `helper.rs`'s
2943        // `semantic_name_for_node_input`).
2944        let property_qname = graph
2945            .strings_mut()
2946            .intern("main.SelectorSource.NeedTags")
2947            .unwrap();
2948        let local_var_name = graph.strings_mut().intern("NeedTags").unwrap();
2949        let synthetic_field_name = graph
2950            .strings_mut()
2951            .intern("<field:selector.NeedTags>")
2952            .unwrap();
2953        let synthetic_offset_name = graph.strings_mut().intern("NeedTags@469").unwrap();
2954        let unrelated_name = graph.strings_mut().intern("NeedTagsHelper").unwrap();
2955        let display_fallback_name = graph.strings_mut().intern("Other").unwrap();
2956        let display_fallback_qname = graph
2957            .strings_mut()
2958            .intern("main::SelectorSource::Other")
2959            .unwrap();
2960        let file_id = graph.files_mut().register(Path::new("main.go")).unwrap();
2961
2962        let prop_id = graph
2963            .nodes_mut()
2964            .alloc(NodeEntry::new(NodeKind::Property, property_qname, file_id))
2965            .unwrap();
2966        let local_var_id = graph
2967            .nodes_mut()
2968            .alloc(NodeEntry::new(NodeKind::Variable, local_var_name, file_id))
2969            .unwrap();
2970        let syn_field_id = graph
2971            .nodes_mut()
2972            .alloc(NodeEntry::new(
2973                NodeKind::Variable,
2974                synthetic_field_name,
2975                file_id,
2976            ))
2977            .unwrap();
2978        let syn_offset_id = graph
2979            .nodes_mut()
2980            .alloc(NodeEntry::new(
2981                NodeKind::Variable,
2982                synthetic_offset_name,
2983                file_id,
2984            ))
2985            .unwrap();
2986        let unrelated_id = graph
2987            .nodes_mut()
2988            .alloc(NodeEntry::new(NodeKind::Function, unrelated_name, file_id))
2989            .unwrap();
2990        let display_fallback_id = graph
2991            .nodes_mut()
2992            .alloc(
2993                NodeEntry::new(NodeKind::Property, display_fallback_name, file_id)
2994                    .with_qualified_name(display_fallback_qname),
2995            )
2996            .unwrap();
2997
2998        graph
2999            .indices_mut()
3000            .add(prop_id, NodeKind::Property, property_qname, None, file_id);
3001        graph.indices_mut().add(
3002            local_var_id,
3003            NodeKind::Variable,
3004            local_var_name,
3005            None,
3006            file_id,
3007        );
3008        graph.indices_mut().add(
3009            syn_field_id,
3010            NodeKind::Variable,
3011            synthetic_field_name,
3012            None,
3013            file_id,
3014        );
3015        graph.indices_mut().add(
3016            syn_offset_id,
3017            NodeKind::Variable,
3018            synthetic_offset_name,
3019            None,
3020            file_id,
3021        );
3022        graph.indices_mut().add(
3023            unrelated_id,
3024            NodeKind::Function,
3025            unrelated_name,
3026            None,
3027            file_id,
3028        );
3029        graph.indices_mut().add(
3030            display_fallback_id,
3031            NodeKind::Property,
3032            display_fallback_name,
3033            Some(display_fallback_qname),
3034            file_id,
3035        );
3036
3037        // Mark the offset-suffixed synthetic via the metadata bit so we
3038        // exercise both recognition channels in this fixture.
3039        graph.macro_metadata_mut().mark_synthetic(syn_offset_id);
3040
3041        let snapshot = graph.snapshot();
3042
3043        // Exact-name lookup on "NeedTags" — should pick up only the
3044        // local variable (its `entry.name` is exactly "NeedTags"); it
3045        // must NOT pick up the Property (qualified name contains but
3046        // does not equal "NeedTags") and must NOT pick up either
3047        // synthetic placeholder.
3048        let exact = snapshot.find_by_exact_name("NeedTags");
3049        assert_eq!(
3050            exact,
3051            vec![local_var_id],
3052            "exact match must be byte-for-byte against entry.name / qualified_name and exclude synthetics"
3053        );
3054
3055        // The Property's full qualified name is exact-addressable.
3056        let qualified = snapshot.find_by_exact_name("main.SelectorSource.NeedTags");
3057        assert_eq!(qualified, vec![prop_id]);
3058
3059        // Dot-qualified display form falls back to graph-canonical `::` only
3060        // when the exact dot string was absent.
3061        let display_fallback = snapshot.find_by_exact_name("main.SelectorSource.Other");
3062        assert_eq!(display_fallback, vec![display_fallback_id]);
3063
3064        // Substring-only matches must not surface from exact lookup.
3065        assert!(
3066            snapshot
3067                .find_by_exact_name("NeedTagsHelper")
3068                .contains(&unrelated_id)
3069        );
3070        assert!(
3071            !snapshot
3072                .find_by_exact_name("NeedTags")
3073                .contains(&unrelated_id),
3074            "exact 'NeedTags' must not match 'NeedTagsHelper'"
3075        );
3076
3077        // Unknown name short-circuits to an empty vec without
3078        // scanning any nodes.
3079        assert!(
3080            snapshot
3081                .find_by_exact_name("ThisStringIsNotInterned")
3082                .is_empty()
3083        );
3084    }
3085
3086    #[test]
3087    fn test_snapshot_get_callees() {
3088        use crate::graph::unified::edge::EdgeKind;
3089        use crate::graph::unified::node::NodeKind;
3090        use crate::graph::unified::storage::arena::NodeEntry;
3091        use std::path::Path;
3092
3093        let mut graph = CodeGraph::new();
3094
3095        // Create caller and callee nodes
3096        let caller_name = graph.strings_mut().intern("caller").unwrap();
3097        let callee1_name = graph.strings_mut().intern("callee1").unwrap();
3098        let callee2_name = graph.strings_mut().intern("callee2").unwrap();
3099        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3100
3101        let caller_id = graph
3102            .nodes_mut()
3103            .alloc(NodeEntry::new(NodeKind::Function, caller_name, file_id))
3104            .unwrap();
3105        let callee1_id = graph
3106            .nodes_mut()
3107            .alloc(NodeEntry::new(NodeKind::Function, callee1_name, file_id))
3108            .unwrap();
3109        let callee2_id = graph
3110            .nodes_mut()
3111            .alloc(NodeEntry::new(NodeKind::Function, callee2_name, file_id))
3112            .unwrap();
3113
3114        // Add call edges
3115        graph.edges_mut().add_edge(
3116            caller_id,
3117            callee1_id,
3118            EdgeKind::Calls {
3119                argument_count: 0,
3120                is_async: false,
3121            },
3122            file_id,
3123        );
3124        graph.edges_mut().add_edge(
3125            caller_id,
3126            callee2_id,
3127            EdgeKind::Calls {
3128                argument_count: 0,
3129                is_async: false,
3130            },
3131            file_id,
3132        );
3133
3134        let snapshot = graph.snapshot();
3135
3136        // Query callees
3137        let callees = snapshot.get_callees(caller_id);
3138        assert_eq!(callees.len(), 2);
3139        assert!(callees.contains(&callee1_id));
3140        assert!(callees.contains(&callee2_id));
3141
3142        // Node with no callees
3143        let callees = snapshot.get_callees(callee1_id);
3144        assert!(callees.is_empty());
3145    }
3146
3147    #[test]
3148    fn test_snapshot_get_callers() {
3149        use crate::graph::unified::edge::EdgeKind;
3150        use crate::graph::unified::node::NodeKind;
3151        use crate::graph::unified::storage::arena::NodeEntry;
3152        use std::path::Path;
3153
3154        let mut graph = CodeGraph::new();
3155
3156        // Create caller and callee nodes
3157        let caller1_name = graph.strings_mut().intern("caller1").unwrap();
3158        let caller2_name = graph.strings_mut().intern("caller2").unwrap();
3159        let callee_name = graph.strings_mut().intern("callee").unwrap();
3160        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3161
3162        let caller1_id = graph
3163            .nodes_mut()
3164            .alloc(NodeEntry::new(NodeKind::Function, caller1_name, file_id))
3165            .unwrap();
3166        let caller2_id = graph
3167            .nodes_mut()
3168            .alloc(NodeEntry::new(NodeKind::Function, caller2_name, file_id))
3169            .unwrap();
3170        let callee_id = graph
3171            .nodes_mut()
3172            .alloc(NodeEntry::new(NodeKind::Function, callee_name, file_id))
3173            .unwrap();
3174
3175        // Add call edges
3176        graph.edges_mut().add_edge(
3177            caller1_id,
3178            callee_id,
3179            EdgeKind::Calls {
3180                argument_count: 0,
3181                is_async: false,
3182            },
3183            file_id,
3184        );
3185        graph.edges_mut().add_edge(
3186            caller2_id,
3187            callee_id,
3188            EdgeKind::Calls {
3189                argument_count: 0,
3190                is_async: false,
3191            },
3192            file_id,
3193        );
3194
3195        let snapshot = graph.snapshot();
3196
3197        // Query callers
3198        let callers = snapshot.get_callers(callee_id);
3199        assert_eq!(callers.len(), 2);
3200        assert!(callers.contains(&caller1_id));
3201        assert!(callers.contains(&caller2_id));
3202
3203        // Node with no callers
3204        let callers = snapshot.get_callers(caller1_id);
3205        assert!(callers.is_empty());
3206    }
3207
3208    #[test]
3209    fn test_snapshot_find_symbol_candidates() {
3210        use crate::graph::unified::node::NodeKind;
3211        use crate::graph::unified::storage::arena::NodeEntry;
3212        use std::path::Path;
3213
3214        let mut graph = CodeGraph::new();
3215
3216        // Add nodes with same symbol name but different qualified names
3217        let symbol_name = graph.strings_mut().intern("test").unwrap();
3218        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3219
3220        let node1 = graph
3221            .nodes_mut()
3222            .alloc(NodeEntry::new(NodeKind::Function, symbol_name, file_id))
3223            .unwrap();
3224        let node2 = graph
3225            .nodes_mut()
3226            .alloc(NodeEntry::new(NodeKind::Method, symbol_name, file_id))
3227            .unwrap();
3228
3229        // Add a different symbol
3230        let other_name = graph.strings_mut().intern("other").unwrap();
3231        let node3 = graph
3232            .nodes_mut()
3233            .alloc(NodeEntry::new(NodeKind::Function, other_name, file_id))
3234            .unwrap();
3235
3236        graph
3237            .indices_mut()
3238            .add(node1, NodeKind::Function, symbol_name, None, file_id);
3239        graph
3240            .indices_mut()
3241            .add(node2, NodeKind::Method, symbol_name, None, file_id);
3242        graph
3243            .indices_mut()
3244            .add(node3, NodeKind::Function, other_name, None, file_id);
3245
3246        let snapshot = graph.snapshot();
3247
3248        // Find by symbol
3249        let matches = candidate_nodes(&snapshot, "test");
3250        assert_eq!(matches.len(), 2);
3251        assert!(matches.contains(&node1));
3252        assert!(matches.contains(&node2));
3253
3254        // Find other symbol
3255        let matches = candidate_nodes(&snapshot, "other");
3256        assert_eq!(matches.len(), 1);
3257        assert_eq!(matches[0], node3);
3258
3259        // No matches
3260        let matches = candidate_nodes(&snapshot, "nonexistent");
3261        assert!(matches.is_empty());
3262    }
3263
3264    #[test]
3265    fn test_snapshot_iter_nodes() {
3266        use crate::graph::unified::node::NodeKind;
3267        use crate::graph::unified::storage::arena::NodeEntry;
3268        use std::path::Path;
3269
3270        let mut graph = CodeGraph::new();
3271
3272        // Add some nodes
3273        let name1 = graph.strings_mut().intern("func1").unwrap();
3274        let name2 = graph.strings_mut().intern("func2").unwrap();
3275        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3276
3277        let node1 = graph
3278            .nodes_mut()
3279            .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
3280            .unwrap();
3281        let node2 = graph
3282            .nodes_mut()
3283            .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
3284            .unwrap();
3285
3286        let snapshot = graph.snapshot();
3287
3288        // Iterate nodes
3289        let snapshot_nodes: Vec<_> = snapshot.iter_nodes().collect();
3290        assert_eq!(snapshot_nodes.len(), 2);
3291
3292        let node_ids: Vec<_> = snapshot_nodes.iter().map(|(id, _)| *id).collect();
3293        assert!(node_ids.contains(&node1));
3294        assert!(node_ids.contains(&node2));
3295    }
3296
3297    #[test]
3298    fn test_snapshot_iter_edges() {
3299        use crate::graph::unified::edge::EdgeKind;
3300        use crate::graph::unified::node::NodeKind;
3301        use crate::graph::unified::storage::arena::NodeEntry;
3302        use std::path::Path;
3303
3304        let mut graph = CodeGraph::new();
3305
3306        // Create nodes
3307        let name1 = graph.strings_mut().intern("func1").unwrap();
3308        let name2 = graph.strings_mut().intern("func2").unwrap();
3309        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3310
3311        let node1 = graph
3312            .nodes_mut()
3313            .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
3314            .unwrap();
3315        let node2 = graph
3316            .nodes_mut()
3317            .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
3318            .unwrap();
3319
3320        // Add edges
3321        graph.edges_mut().add_edge(
3322            node1,
3323            node2,
3324            EdgeKind::Calls {
3325                argument_count: 0,
3326                is_async: false,
3327            },
3328            file_id,
3329        );
3330
3331        let snapshot = graph.snapshot();
3332
3333        // Iterate edges
3334        let edges: Vec<_> = snapshot.iter_edges().collect();
3335        assert_eq!(edges.len(), 1);
3336
3337        let (src, tgt, kind) = &edges[0];
3338        assert_eq!(*src, node1);
3339        assert_eq!(*tgt, node2);
3340        assert!(matches!(
3341            kind,
3342            EdgeKind::Calls {
3343                argument_count: 0,
3344                is_async: false
3345            }
3346        ));
3347    }
3348
3349    #[test]
3350    fn test_snapshot_get_node() {
3351        use crate::graph::unified::node::NodeId;
3352        use crate::graph::unified::node::NodeKind;
3353        use crate::graph::unified::storage::arena::NodeEntry;
3354        use std::path::Path;
3355
3356        let mut graph = CodeGraph::new();
3357
3358        // Add a node
3359        let name = graph.strings_mut().intern("test_func").unwrap();
3360        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3361
3362        let node_id = graph
3363            .nodes_mut()
3364            .alloc(NodeEntry::new(NodeKind::Function, name, file_id))
3365            .unwrap();
3366
3367        let snapshot = graph.snapshot();
3368
3369        // Get node
3370        let entry = snapshot.get_node(node_id);
3371        assert!(entry.is_some());
3372        assert_eq!(entry.unwrap().kind, NodeKind::Function);
3373
3374        // Invalid node
3375        let invalid_id = NodeId::INVALID;
3376        assert!(snapshot.get_node(invalid_id).is_none());
3377    }
3378
3379    #[test]
3380    fn test_snapshot_query_empty_graph() {
3381        use crate::graph::unified::node::NodeId;
3382
3383        let graph = CodeGraph::new();
3384        let snapshot = graph.snapshot();
3385
3386        // All queries should return empty on empty graph
3387        assert!(resolve_symbol_strict(&snapshot, "test").is_none());
3388        assert!(snapshot.find_by_pattern("test").is_empty());
3389        assert!(candidate_nodes(&snapshot, "test").is_empty());
3390
3391        let dummy_id = NodeId::new(0, 1);
3392        assert!(snapshot.get_callees(dummy_id).is_empty());
3393        assert!(snapshot.get_callers(dummy_id).is_empty());
3394
3395        assert_eq!(snapshot.iter_nodes().count(), 0);
3396        assert_eq!(snapshot.iter_edges().count(), 0);
3397    }
3398
3399    #[test]
3400    fn test_snapshot_edge_filtering_by_kind() {
3401        use crate::graph::unified::edge::EdgeKind;
3402        use crate::graph::unified::node::NodeKind;
3403        use crate::graph::unified::storage::arena::NodeEntry;
3404        use std::path::Path;
3405
3406        let mut graph = CodeGraph::new();
3407
3408        // Create nodes
3409        let name1 = graph.strings_mut().intern("func1").unwrap();
3410        let name2 = graph.strings_mut().intern("func2").unwrap();
3411        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3412
3413        let node1 = graph
3414            .nodes_mut()
3415            .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
3416            .unwrap();
3417        let node2 = graph
3418            .nodes_mut()
3419            .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
3420            .unwrap();
3421
3422        // Add different kinds of edges
3423        graph.edges_mut().add_edge(
3424            node1,
3425            node2,
3426            EdgeKind::Calls {
3427                argument_count: 0,
3428                is_async: false,
3429            },
3430            file_id,
3431        );
3432        graph
3433            .edges_mut()
3434            .add_edge(node1, node2, EdgeKind::References, file_id);
3435
3436        let snapshot = graph.snapshot();
3437
3438        // get_callees should only return Calls edges
3439        let callees = snapshot.get_callees(node1);
3440        assert_eq!(callees.len(), 1);
3441        assert_eq!(callees[0], node2);
3442
3443        // iter_edges returns all edges regardless of kind
3444        let edges: Vec<_> = snapshot.iter_edges().collect();
3445        assert_eq!(edges.len(), 2);
3446    }
3447
3448    // -------- reverse_import_index tests --------
3449
3450    /// Helper: build an empty graph with the given file paths registered, a
3451    /// single placeholder node allocated per file, and indices rebuilt so
3452    /// `by_file` returns the per-file node sets. Returns the file IDs in
3453    /// the order passed in and the per-file node ID.
3454    #[cfg(test)]
3455    fn build_import_test_graph(files: &[&str]) -> (CodeGraph, Vec<FileId>, Vec<NodeId>) {
3456        use crate::graph::unified::node::NodeKind;
3457        use crate::graph::unified::storage::arena::NodeEntry;
3458        use std::path::Path;
3459
3460        let mut graph = CodeGraph::new();
3461        let placeholder_name = graph.strings_mut().intern("sym").unwrap();
3462        let mut file_ids = Vec::with_capacity(files.len());
3463        let mut node_ids = Vec::with_capacity(files.len());
3464        for path in files {
3465            let file_id = graph.files_mut().register(Path::new(path)).unwrap();
3466            let node_id = graph
3467                .nodes_mut()
3468                .alloc(NodeEntry::new(
3469                    NodeKind::Function,
3470                    placeholder_name,
3471                    file_id,
3472                ))
3473                .unwrap();
3474            file_ids.push(file_id);
3475            node_ids.push(node_id);
3476        }
3477        graph.rebuild_indices();
3478        (graph, file_ids, node_ids)
3479    }
3480
3481    /// Helper: add an `Imports` edge from `source_node` (in importer file) to
3482    /// `target_node` (in exporter file). The edge is recorded against the
3483    /// importer file so `clear_file` cleanup would behave identically to
3484    /// production Pass 4 writes.
3485    #[cfg(test)]
3486    fn add_import_edge(
3487        graph: &mut CodeGraph,
3488        source_node: NodeId,
3489        target_node: NodeId,
3490        importer_file: FileId,
3491    ) {
3492        graph.edges_mut().add_edge(
3493            source_node,
3494            target_node,
3495            EdgeKind::Imports {
3496                alias: None,
3497                is_wildcard: false,
3498            },
3499            importer_file,
3500        );
3501    }
3502
3503    #[test]
3504    fn reverse_import_index_empty_graph_returns_empty() {
3505        let (graph, files, _) = build_import_test_graph(&["only.rs"]);
3506        assert!(graph.reverse_import_index(files[0]).is_empty());
3507    }
3508
3509    #[test]
3510    fn reverse_import_index_single_importer() {
3511        // File A imports a symbol exported by file B. Reverse index of B
3512        // should return exactly [A]; reverse index of A should be empty.
3513        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3514        let (a, b) = (files[0], files[1]);
3515        add_import_edge(&mut graph, nodes[0], nodes[1], a);
3516
3517        let importers_of_b = graph.reverse_import_index(b);
3518        assert_eq!(importers_of_b, vec![a]);
3519
3520        let importers_of_a = graph.reverse_import_index(a);
3521        assert!(
3522            importers_of_a.is_empty(),
3523            "A has no inbound Imports edges; reverse index must be empty"
3524        );
3525    }
3526
3527    #[test]
3528    fn reverse_import_index_multiple_importers_deduped_and_sorted() {
3529        // A, B, C all import from D. Reverse index of D must contain each
3530        // importer exactly once, sorted ascending by FileId.
3531        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs", "c.rs", "d.rs"]);
3532        let (a, b, c, d) = (files[0], files[1], files[2], files[3]);
3533        add_import_edge(&mut graph, nodes[0], nodes[3], a);
3534        add_import_edge(&mut graph, nodes[1], nodes[3], b);
3535        add_import_edge(&mut graph, nodes[2], nodes[3], c);
3536        // Add a duplicate edge from A to D to confirm dedup behavior.
3537        add_import_edge(&mut graph, nodes[0], nodes[3], a);
3538
3539        let importers_of_d = graph.reverse_import_index(d);
3540        assert_eq!(importers_of_d, vec![a, b, c]);
3541        // Sort invariant: Vec is ascending by raw index.
3542        let mut sorted = importers_of_d.clone();
3543        sorted.sort();
3544        assert_eq!(importers_of_d, sorted);
3545    }
3546
3547    #[test]
3548    fn reverse_import_index_filters_non_import_edges() {
3549        // A `Calls` edge from A into B must not contribute to B's reverse
3550        // import index. Only `EdgeKind::Imports` edges count.
3551        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3552        let (a, b) = (files[0], files[1]);
3553        graph.edges_mut().add_edge(
3554            nodes[0],
3555            nodes[1],
3556            EdgeKind::Calls {
3557                argument_count: 0,
3558                is_async: false,
3559            },
3560            a,
3561        );
3562        graph
3563            .edges_mut()
3564            .add_edge(nodes[0], nodes[1], EdgeKind::References, a);
3565
3566        assert!(
3567            graph.reverse_import_index(b).is_empty(),
3568            "non-Imports edges must not register as importers"
3569        );
3570    }
3571
3572    #[test]
3573    fn reverse_import_index_elides_self_imports() {
3574        // An Imports edge whose source and target are both in the same file
3575        // is a self-import; the caller's own file must not appear in its own
3576        // reverse index.
3577        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs"]);
3578        let a = files[0];
3579        // Add a second node in the same file so we have a distinct source.
3580        let name2 = graph.strings_mut().intern("sym2").unwrap();
3581        let second_in_a = graph
3582            .nodes_mut()
3583            .alloc(crate::graph::unified::storage::arena::NodeEntry::new(
3584                crate::graph::unified::node::NodeKind::Function,
3585                name2,
3586                a,
3587            ))
3588            .unwrap();
3589        graph.rebuild_indices();
3590        add_import_edge(&mut graph, second_in_a, nodes[0], a);
3591
3592        assert!(
3593            graph.reverse_import_index(a).is_empty(),
3594            "self-imports must be elided from reverse index"
3595        );
3596    }
3597
3598    #[test]
3599    fn reverse_import_index_mixed_edge_kinds_counts_only_imports() {
3600        // Two files: A has both Calls and Imports edges into B. Reverse
3601        // index must return exactly [A] — the Calls edge contributes
3602        // nothing.
3603        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3604        let (a, b) = (files[0], files[1]);
3605        add_import_edge(&mut graph, nodes[0], nodes[1], a);
3606        graph.edges_mut().add_edge(
3607            nodes[0],
3608            nodes[1],
3609            EdgeKind::Calls {
3610                argument_count: 0,
3611                is_async: false,
3612            },
3613            a,
3614        );
3615
3616        assert_eq!(graph.reverse_import_index(b), vec![a]);
3617    }
3618
3619    #[test]
3620    fn reverse_import_index_uninitialized_file_returns_empty() {
3621        // Querying a FileId that is not registered in the graph must return
3622        // an empty Vec, not panic.
3623        let (graph, _, _) = build_import_test_graph(&["a.rs"]);
3624        let bogus = FileId::new(9999);
3625        assert!(
3626            graph.reverse_import_index(bogus).is_empty(),
3627            "unknown FileId must return empty Vec without panicking"
3628        );
3629    }
3630
3631    #[test]
3632    fn reverse_import_index_skips_tombstoned_source_nodes() {
3633        // In the incremental rebuild path the prior graph retains tombstoned
3634        // arena slots for nodes belonging to closure files that have been
3635        // removed but not yet fully compacted. reverse_import_index is
3636        // called on that prior graph to widen the closure, so its
3637        // tombstone guard (`let Some(source_entry) = self.nodes.get(...)`)
3638        // is a semantically important branch, not a theoretical one.
3639        // This test exercises it directly by tombstoning the source node of
3640        // an Imports edge and asserting the edge silently disappears from
3641        // the reverse index.
3642        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3643        let (a, b) = (files[0], files[1]);
3644        add_import_edge(&mut graph, nodes[0], nodes[1], a);
3645        // Sanity: before tombstoning, A shows up as the importer of B.
3646        assert_eq!(graph.reverse_import_index(b), vec![a]);
3647        // Tombstone the source node via the arena (generation bump). The
3648        // Imports edge still exists in the edge store but its source NodeId
3649        // is now stale; `nodes.get(edge_ref.source)` returns None and the
3650        // guard skips the edge.
3651        let removed = graph.nodes_mut().remove(nodes[0]);
3652        assert!(
3653            removed.is_some(),
3654            "arena.remove must succeed for a live node"
3655        );
3656        assert!(
3657            graph.nodes().get(nodes[0]).is_none(),
3658            "tombstoned lookup must return None"
3659        );
3660        assert!(
3661            graph.reverse_import_index(b).is_empty(),
3662            "Imports edges whose source is tombstoned must be silently skipped"
3663        );
3664    }
3665
3666    // ------------------------------------------------------------------
3667    // Task 4 Step 2 — CodeGraph::remove_file
3668    // ------------------------------------------------------------------
3669
3670    /// Seed a graph with 2 files × `per_file` nodes per file, plus a set
3671    /// of intra- and inter-file edges. Each call site produces a
3672    /// canonical topology so tests below can assert bit-level on edge
3673    /// survival. Returns `(graph, file_a, file_b, file_a_nodes,
3674    /// file_b_nodes)`.
3675    fn seed_two_file_graph(
3676        per_file: usize,
3677    ) -> (
3678        CodeGraph,
3679        crate::graph::unified::file::FileId,
3680        crate::graph::unified::file::FileId,
3681        Vec<NodeId>,
3682        Vec<NodeId>,
3683    ) {
3684        use crate::graph::unified::edge::EdgeKind;
3685        use crate::graph::unified::node::NodeKind;
3686        use crate::graph::unified::storage::arena::NodeEntry;
3687        use std::path::Path;
3688
3689        let mut graph = CodeGraph::new();
3690        let sym = graph.strings_mut().intern("sym").expect("intern");
3691        let file_a = graph
3692            .files_mut()
3693            .register(Path::new("/tmp/remove_file_test/a.rs"))
3694            .expect("register a");
3695        let file_b = graph
3696            .files_mut()
3697            .register(Path::new("/tmp/remove_file_test/b.rs"))
3698            .expect("register b");
3699
3700        let mut file_a_nodes = Vec::with_capacity(per_file);
3701        let mut file_b_nodes = Vec::with_capacity(per_file);
3702
3703        for _ in 0..per_file {
3704            let n = graph
3705                .nodes_mut()
3706                .alloc(NodeEntry::new(NodeKind::Function, sym, file_a))
3707                .expect("alloc a-node");
3708            file_a_nodes.push(n);
3709            graph.files_mut().record_node(file_a, n);
3710            graph
3711                .indices_mut()
3712                .add(n, NodeKind::Function, sym, None, file_a);
3713        }
3714        for _ in 0..per_file {
3715            let n = graph
3716                .nodes_mut()
3717                .alloc(NodeEntry::new(NodeKind::Function, sym, file_b))
3718                .expect("alloc b-node");
3719            file_b_nodes.push(n);
3720            graph.files_mut().record_node(file_b, n);
3721            graph
3722                .indices_mut()
3723                .add(n, NodeKind::Function, sym, None, file_b);
3724        }
3725
3726        // Intra-file edges inside each file: pairwise a[i] -> a[i+1],
3727        // b[i] -> b[i+1]. These are the ones that must die when the
3728        // corresponding file is removed.
3729        for i in 0..per_file.saturating_sub(1) {
3730            graph.edges_mut().add_edge(
3731                file_a_nodes[i],
3732                file_a_nodes[i + 1],
3733                EdgeKind::Calls {
3734                    argument_count: 0,
3735                    is_async: false,
3736                },
3737                file_a,
3738            );
3739            graph.edges_mut().add_edge(
3740                file_b_nodes[i],
3741                file_b_nodes[i + 1],
3742                EdgeKind::Calls {
3743                    argument_count: 0,
3744                    is_async: false,
3745                },
3746                file_b,
3747            );
3748        }
3749        // Cross-file edges: a[0] -> b[0], b[0] -> a[0]. Both must die
3750        // when *either* endpoint's file is removed (plan §F.2).
3751        graph.edges_mut().add_edge(
3752            file_a_nodes[0],
3753            file_b_nodes[0],
3754            EdgeKind::Calls {
3755                argument_count: 0,
3756                is_async: false,
3757            },
3758            file_a,
3759        );
3760        graph.edges_mut().add_edge(
3761            file_b_nodes[0],
3762            file_a_nodes[0],
3763            EdgeKind::Calls {
3764                argument_count: 0,
3765                is_async: false,
3766            },
3767            file_b,
3768        );
3769
3770        (graph, file_a, file_b, file_a_nodes, file_b_nodes)
3771    }
3772
3773    #[test]
3774    fn code_graph_remove_file_tombstones_all_per_file_nodes() {
3775        let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
3776
3777        let returned = graph.remove_file(file_a);
3778
3779        // Returned list must equal the original per-file bucket
3780        // membership, deterministically.
3781        let returned_set: std::collections::HashSet<NodeId> = returned.iter().copied().collect();
3782        let expected_set: std::collections::HashSet<NodeId> =
3783            file_a_nodes.iter().copied().collect();
3784        assert_eq!(
3785            returned_set, expected_set,
3786            "remove_file must return exactly the file_a nodes drained from the bucket"
3787        );
3788
3789        // Every returned NodeId is gone from the arena.
3790        for nid in &file_a_nodes {
3791            assert!(
3792                graph.nodes().get(*nid).is_none(),
3793                "node {nid:?} from removed file must be tombstoned in arena"
3794            );
3795        }
3796    }
3797
3798    #[test]
3799    fn code_graph_remove_file_invalidates_all_edges_sourced_or_targeted_at_removed_nodes() {
3800        use crate::graph::unified::edge::EdgeKind;
3801
3802        let (mut graph, file_a, _file_b, file_a_nodes, file_b_nodes) = seed_two_file_graph(3);
3803
3804        // Sanity: the seed has 2 intra-A edges, 2 intra-B edges, plus
3805        // 2 cross-file edges (a0↔b0).
3806        let before_delta = graph.edges().stats().forward.delta_edge_count;
3807        assert_eq!(
3808            before_delta, 6,
3809            "seed must produce 2 intra-A + 2 intra-B + 2 cross edges"
3810        );
3811
3812        let _ = graph.remove_file(file_a);
3813
3814        // Forward delta after removal: all 2 intra-A edges are gone,
3815        // both cross edges are gone, only the 2 intra-B edges remain.
3816        let after_delta_forward = graph.edges().stats().forward.delta_edge_count;
3817        assert_eq!(
3818            after_delta_forward, 2,
3819            "only intra-B forward edges must remain after removing file_a"
3820        );
3821        let after_delta_reverse = graph.edges().stats().reverse.delta_edge_count;
3822        assert_eq!(
3823            after_delta_reverse, 2,
3824            "only intra-B reverse edges must remain after removing file_a"
3825        );
3826
3827        // Cross-file edge from b[0] -> a[0] must no longer be visible
3828        // from either direction, because a[0] is tombstoned.
3829        let b0 = file_b_nodes[0];
3830        let a0 = file_a_nodes[0];
3831        let remaining_from_b0: Vec<_> = graph
3832            .edges()
3833            .edges_from(b0)
3834            .into_iter()
3835            .filter(|e| {
3836                matches!(
3837                    e.kind,
3838                    EdgeKind::Calls {
3839                        argument_count: 0,
3840                        is_async: false
3841                    }
3842                )
3843            })
3844            .collect();
3845        assert!(
3846            !remaining_from_b0.iter().any(|e| e.target == a0),
3847            "edge b0 -> a0 must be gone after remove_file(file_a)"
3848        );
3849        let remaining_to_a0: Vec<_> = graph.edges().edges_to(a0).into_iter().collect();
3850        assert!(
3851            remaining_to_a0.is_empty(),
3852            "every edge targeting the tombstoned a0 must be gone"
3853        );
3854    }
3855
3856    #[test]
3857    fn code_graph_remove_file_drops_file_registry_entry() {
3858        let (mut graph, file_a, _file_b, _, _) = seed_two_file_graph(2);
3859
3860        assert!(
3861            graph.files().resolve(file_a).is_some(),
3862            "seed registered file_a"
3863        );
3864        assert!(
3865            !graph.files().nodes_for_file(file_a).is_empty(),
3866            "seed populated the file_a bucket"
3867        );
3868
3869        let _ = graph.remove_file(file_a);
3870
3871        assert!(
3872            graph.files().resolve(file_a).is_none(),
3873            "FileRegistry::resolve must return None after remove_file"
3874        );
3875        assert!(
3876            graph.files().nodes_for_file(file_a).is_empty(),
3877            "per-file bucket for file_a must be drained"
3878        );
3879    }
3880
3881    #[test]
3882    fn code_graph_remove_file_is_idempotent_on_unknown_file() {
3883        use crate::graph::unified::file::FileId;
3884        let (mut graph, _file_a, _file_b, _, _) = seed_two_file_graph(2);
3885
3886        // Snapshot state before idempotent no-op.
3887        let nodes_before = graph.nodes().len();
3888        let delta_fwd_before = graph.edges().stats().forward.delta_edge_count;
3889        let delta_rev_before = graph.edges().stats().reverse.delta_edge_count;
3890        let files_before = graph.files().len();
3891
3892        // A FileId that was never registered: the caller may legitimately
3893        // receive a bogus id from stale indexing state. `remove_file`
3894        // must be a silent no-op.
3895        let bogus = FileId::new(9999);
3896        let returned = graph.remove_file(bogus);
3897        assert!(
3898            returned.is_empty(),
3899            "remove_file on unknown FileId must return an empty Vec"
3900        );
3901
3902        assert_eq!(graph.nodes().len(), nodes_before, "arena count unchanged");
3903        assert_eq!(
3904            graph.edges().stats().forward.delta_edge_count,
3905            delta_fwd_before,
3906            "forward delta unchanged"
3907        );
3908        assert_eq!(
3909            graph.edges().stats().reverse.delta_edge_count,
3910            delta_rev_before,
3911            "reverse delta unchanged"
3912        );
3913        assert_eq!(graph.files().len(), files_before, "file count unchanged");
3914    }
3915
3916    #[test]
3917    fn code_graph_remove_file_clears_file_segments_entry() {
3918        // Iter-1 Codex review fix: a file's `FileSegmentTable` entry
3919        // must be cleared on `remove_file`. Without this, a later
3920        // `FileId` recycle (via `FileRegistry::free_list`) would
3921        // inherit the previous file's stale node-range and
3922        // `reindex_files` (`build/reindex.rs`) would tombstone the
3923        // wrong slots. This unit test seeds a segment entry, removes
3924        // the file, and asserts the entry is gone.
3925        use crate::graph::unified::storage::segment::FileSegmentTable;
3926
3927        let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
3928
3929        // Seed a segment for file A. Production code sets this via
3930        // Phase 3 parallel commit; here we go through the crate-
3931        // internal accessor because we are a unit test inside the
3932        // crate. (The feature-gated `test_only_record_file_segment`
3933        // public helper exists for the integration tests in
3934        // `sqry-core/tests/incremental_remove_file_scale.rs`.)
3935        let first_index = file_a_nodes
3936            .iter()
3937            .map(|n| n.index())
3938            .min()
3939            .expect("per_file = 3");
3940        let last_index = file_a_nodes
3941            .iter()
3942            .map(|n| n.index())
3943            .max()
3944            .expect("per_file = 3");
3945        let slot_count = last_index - first_index + 1;
3946        let table: &mut FileSegmentTable = graph.file_segments_mut();
3947        table.record_range(file_a, first_index, slot_count);
3948        assert!(
3949            graph.file_segments().get(file_a).is_some(),
3950            "seed must install a segment for file_a before remove_file"
3951        );
3952
3953        // Remove the file.
3954        let _ = graph.remove_file(file_a);
3955
3956        // The segment entry must be gone. `FileSegmentTable::remove`
3957        // is idempotent (a no-op on unknown ids), so this assertion
3958        // holds whether or not the seeded range was contiguous.
3959        assert!(
3960            graph.file_segments().get(file_a).is_none(),
3961            "remove_file must clear the FileSegmentTable entry for file_a"
3962        );
3963    }
3964
3965    #[test]
3966    fn code_graph_remove_file_repeated_calls_are_idempotent() {
3967        let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
3968
3969        // First call does the work.
3970        let first = graph.remove_file(file_a);
3971        assert_eq!(first.len(), file_a_nodes.len());
3972
3973        // Snapshot post-first-call state.
3974        let nodes_after = graph.nodes().len();
3975        let delta_fwd_after = graph.edges().stats().forward.delta_edge_count;
3976        let delta_rev_after = graph.edges().stats().reverse.delta_edge_count;
3977        let files_after = graph.files().len();
3978
3979        // Second call must be a silent no-op — the bucket is empty,
3980        // the file is unregistered, and the arena slots are already
3981        // tombstoned (NodeArena::remove ignores stale generations).
3982        let second = graph.remove_file(file_a);
3983        assert!(
3984            second.is_empty(),
3985            "second remove_file on the same file must return an empty Vec"
3986        );
3987
3988        assert_eq!(graph.nodes().len(), nodes_after);
3989        assert_eq!(
3990            graph.edges().stats().forward.delta_edge_count,
3991            delta_fwd_after
3992        );
3993        assert_eq!(
3994            graph.edges().stats().reverse.delta_edge_count,
3995            delta_rev_after
3996        );
3997        assert_eq!(graph.files().len(), files_after);
3998    }
3999}
sqry_core/graph/unified/concurrent/graph.rs

sqry_core/graph/unified/concurrent/
graph.rs