Skip to main content

sqry_core/graph/unified/concurrent/
graph.rs

1//! `CodeGraph` and `ConcurrentCodeGraph` implementations.
2//!
3//! This module provides the core graph types with thread-safe access:
4//!
5//! - [`CodeGraph`]: Arc-wrapped internals for O(1) `CoW` snapshots
6//! - [`ConcurrentCodeGraph`]: `RwLock` wrapper with epoch versioning
7//! - [`GraphSnapshot`]: Immutable snapshot for long-running queries
8
9use std::collections::{HashMap, HashSet};
10use std::fmt;
11use std::sync::Arc;
12use std::sync::atomic::{AtomicU64, Ordering};
13
14use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard};
15
16use crate::confidence::ConfidenceMetadata;
17use crate::graph::unified::bind::alias::AliasTable;
18use crate::graph::unified::bind::scope::provenance::{
19    ScopeProvenance, ScopeProvenanceStore, ScopeStableId,
20};
21use crate::graph::unified::bind::scope::{ScopeArena, ScopeId};
22use crate::graph::unified::bind::shadow::ShadowTable;
23use crate::graph::unified::edge::EdgeKind;
24use crate::graph::unified::edge::bidirectional::BidirectionalEdgeStore;
25use crate::graph::unified::file::FileId;
26use crate::graph::unified::memory::{GraphMemorySize, HASHMAP_ENTRY_OVERHEAD};
27use crate::graph::unified::storage::arena::NodeArena;
28use crate::graph::unified::storage::edge_provenance::{EdgeProvenance, EdgeProvenanceStore};
29use crate::graph::unified::storage::indices::AuxiliaryIndices;
30use crate::graph::unified::storage::interner::StringInterner;
31use crate::graph::unified::storage::metadata::NodeMetadataStore;
32use crate::graph::unified::storage::node_provenance::{NodeProvenance, NodeProvenanceStore};
33use crate::graph::unified::storage::registry::{FileProvenanceView, FileRegistry};
34use crate::graph::unified::storage::segment::FileSegmentTable;
35use crate::graph::unified::string::id::StringId;
36
37/// Core graph with Arc-wrapped internals for O(1) `CoW` snapshots.
38///
39/// `CodeGraph` uses `Arc` for all internal components, enabling:
40/// - O(1) snapshot creation via Arc cloning
41/// - Copy-on-write semantics via `Arc::make_mut`
42/// - Memory-efficient sharing between snapshots
43///
44/// # Design
45///
46/// The Arc wrapping enables the MVCC pattern:
47/// - Readers see a consistent snapshot at the time they acquired access
48/// - Writers use `Arc::make_mut` to get exclusive copies only when mutating
49/// - Multiple snapshots can coexist without copying data
50///
51/// # Performance
52///
53/// - Snapshot creation: O(5) Arc clones ≈ <1μs
54/// - Read access: Direct Arc dereference, no locking
55/// - Write access: `Arc::make_mut` clones only if refcount > 1
56///
57/// # Phase 2 binding-plane access
58///
59/// Use the two-line snapshot pattern to access `BindingPlane`:
60///
61/// ```rust,ignore
62/// let snapshot = graph.snapshot();
63/// let plane = snapshot.binding_plane();
64/// let resolution = plane.resolve(&query);
65/// ```
66///
67/// The two-line form is intentional: `BindingPlane<'g>` borrows from
68/// `GraphSnapshot` and the explicit snapshot handle makes the MVCC lifetime
69/// visible at the call site. The full Phase 2 scope/alias/shadow and
70/// witness-bearing resolution API is exposed through `BindingPlane`.
71// Field visibility is `pub(crate)` so the Gate 0c `rebuild_graph` module
72// (A2 §H) can destructure `CodeGraph` exhaustively in `clone_for_rebuild`.
73// External crates still go through the public accessor methods below.
74#[derive(Clone)]
75pub struct CodeGraph {
76    /// Node storage with generational indices.
77    pub(crate) nodes: Arc<NodeArena>,
78    /// Bidirectional edge storage (forward + reverse).
79    pub(crate) edges: Arc<BidirectionalEdgeStore>,
80    /// String interner for symbol names.
81    pub(crate) strings: Arc<StringInterner>,
82    /// File registry for path deduplication.
83    pub(crate) files: Arc<FileRegistry>,
84    /// Auxiliary indices for fast lookup.
85    pub(crate) indices: Arc<AuxiliaryIndices>,
86    /// Sparse macro boundary metadata (keyed by full `NodeId`).
87    pub(crate) macro_metadata: Arc<NodeMetadataStore>,
88    /// Dense node provenance (Phase 1 fact layer).
89    pub(crate) node_provenance: Arc<NodeProvenanceStore>,
90    /// Dense edge provenance (Phase 1 fact layer).
91    pub(crate) edge_provenance: Arc<EdgeProvenanceStore>,
92    /// Monotonic fact-layer epoch (0 until set by the V8 persistence path).
93    pub(crate) fact_epoch: u64,
94    /// Epoch for version tracking.
95    pub(crate) epoch: u64,
96    /// Per-language confidence metadata collected during build.
97    /// Maps language name (e.g., "rust") to aggregated confidence.
98    pub(crate) confidence: HashMap<String, ConfidenceMetadata>,
99    /// Phase 2 binding-plane scope arena (populated by Phase 4e).
100    pub(crate) scope_arena: Arc<ScopeArena>,
101    /// Phase 2 binding-plane alias table (populated by Phase 4e / P2U04).
102    pub(crate) alias_table: Arc<AliasTable>,
103    /// Phase 2 binding-plane shadow table (populated by Phase 4e / P2U05).
104    pub(crate) shadow_table: Arc<ShadowTable>,
105    /// Phase 2 binding-plane scope provenance store (populated by Phase 4e / P2U11).
106    pub(crate) scope_provenance_store: Arc<ScopeProvenanceStore>,
107    /// Phase 3 file segment table mapping `FileId` to contiguous node ranges.
108    /// Populated during build Phase 3 parallel commit, persisted in V10+ snapshots.
109    pub(crate) file_segments: Arc<FileSegmentTable>,
110}
111
112impl CodeGraph {
113    /// Creates a new empty `CodeGraph`.
114    ///
115    /// # Example
116    ///
117    /// ```rust
118    /// use sqry_core::graph::unified::concurrent::CodeGraph;
119    ///
120    /// let graph = CodeGraph::new();
121    /// assert_eq!(graph.epoch(), 0);
122    /// ```
123    #[must_use]
124    pub fn new() -> Self {
125        Self {
126            nodes: Arc::new(NodeArena::new()),
127            edges: Arc::new(BidirectionalEdgeStore::new()),
128            strings: Arc::new(StringInterner::new()),
129            files: Arc::new(FileRegistry::new()),
130            indices: Arc::new(AuxiliaryIndices::new()),
131            macro_metadata: Arc::new(NodeMetadataStore::new()),
132            node_provenance: Arc::new(NodeProvenanceStore::new()),
133            edge_provenance: Arc::new(EdgeProvenanceStore::new()),
134            fact_epoch: 0,
135            epoch: 0,
136            confidence: HashMap::new(),
137            scope_arena: Arc::new(ScopeArena::new()),
138            alias_table: Arc::new(AliasTable::new()),
139            shadow_table: Arc::new(ShadowTable::new()),
140            scope_provenance_store: Arc::new(ScopeProvenanceStore::new()),
141            file_segments: Arc::new(FileSegmentTable::new()),
142        }
143    }
144
145    /// Creates a `CodeGraph` from existing components.
146    ///
147    /// This is useful when building a graph from external data or
148    /// reconstructing from serialized state.
149    #[must_use]
150    pub fn from_components(
151        nodes: NodeArena,
152        edges: BidirectionalEdgeStore,
153        strings: StringInterner,
154        files: FileRegistry,
155        indices: AuxiliaryIndices,
156        macro_metadata: NodeMetadataStore,
157    ) -> Self {
158        Self {
159            nodes: Arc::new(nodes),
160            edges: Arc::new(edges),
161            strings: Arc::new(strings),
162            files: Arc::new(files),
163            indices: Arc::new(indices),
164            macro_metadata: Arc::new(macro_metadata),
165            node_provenance: Arc::new(NodeProvenanceStore::new()),
166            edge_provenance: Arc::new(EdgeProvenanceStore::new()),
167            fact_epoch: 0,
168            epoch: 0,
169            confidence: HashMap::new(),
170            scope_arena: Arc::new(ScopeArena::new()),
171            alias_table: Arc::new(AliasTable::new()),
172            shadow_table: Arc::new(ShadowTable::new()),
173            scope_provenance_store: Arc::new(ScopeProvenanceStore::new()),
174            file_segments: Arc::new(FileSegmentTable::new()),
175        }
176    }
177
178    /// Creates a cheap snapshot of the graph.
179    ///
180    /// This operation is O(5) Arc clones, which completes in <1μs.
181    /// The snapshot is isolated from future mutations to the original graph.
182    ///
183    /// # Example
184    ///
185    /// ```rust
186    /// use sqry_core::graph::unified::concurrent::CodeGraph;
187    ///
188    /// let graph = CodeGraph::new();
189    /// let snapshot = graph.snapshot();
190    /// // snapshot is independent of future mutations to graph
191    /// ```
192    #[must_use]
193    pub fn snapshot(&self) -> GraphSnapshot {
194        GraphSnapshot {
195            nodes: Arc::clone(&self.nodes),
196            edges: Arc::clone(&self.edges),
197            strings: Arc::clone(&self.strings),
198            files: Arc::clone(&self.files),
199            indices: Arc::clone(&self.indices),
200            macro_metadata: Arc::clone(&self.macro_metadata),
201            node_provenance: Arc::clone(&self.node_provenance),
202            edge_provenance: Arc::clone(&self.edge_provenance),
203            fact_epoch: self.fact_epoch,
204            epoch: self.epoch,
205            scope_arena: Arc::clone(&self.scope_arena),
206            alias_table: Arc::clone(&self.alias_table),
207            shadow_table: Arc::clone(&self.shadow_table),
208            scope_provenance_store: Arc::clone(&self.scope_provenance_store),
209            file_segments: Arc::clone(&self.file_segments),
210        }
211    }
212
213    /// Returns a reference to the node arena.
214    #[inline]
215    #[must_use]
216    pub fn nodes(&self) -> &NodeArena {
217        &self.nodes
218    }
219
220    /// Returns a reference to the bidirectional edge store.
221    #[inline]
222    #[must_use]
223    pub fn edges(&self) -> &BidirectionalEdgeStore {
224        &self.edges
225    }
226
227    /// Returns a reference to the string interner.
228    #[inline]
229    #[must_use]
230    pub fn strings(&self) -> &StringInterner {
231        &self.strings
232    }
233
234    /// Returns a reference to the file registry.
235    #[inline]
236    #[must_use]
237    pub fn files(&self) -> &FileRegistry {
238        &self.files
239    }
240
241    /// Returns a reference to the auxiliary indices.
242    #[inline]
243    #[must_use]
244    pub fn indices(&self) -> &AuxiliaryIndices {
245        &self.indices
246    }
247
248    /// Returns a reference to the macro boundary metadata store.
249    #[inline]
250    #[must_use]
251    pub fn macro_metadata(&self) -> &NodeMetadataStore {
252        &self.macro_metadata
253    }
254
255    // ------------------------------------------------------------------
256    // Phase 1 fact-layer provenance accessors (P1U09).
257    // ------------------------------------------------------------------
258
259    /// Returns the monotonic fact-layer epoch stamped on the most recently
260    /// saved or loaded snapshot. Returns `0` for graphs that have not been
261    /// persisted yet or were loaded from V7 snapshots.
262    #[inline]
263    #[must_use]
264    pub fn fact_epoch(&self) -> u64 {
265        self.fact_epoch
266    }
267
268    /// Looks up node provenance by `NodeId`.
269    ///
270    /// Returns `None` if the `NodeId` is out of range, the slot is vacant,
271    /// or the stored generation does not match (stale handle).
272    #[inline]
273    #[must_use]
274    pub fn node_provenance(
275        &self,
276        id: crate::graph::unified::node::id::NodeId,
277    ) -> Option<&NodeProvenance> {
278        self.node_provenance.lookup(id)
279    }
280
281    /// Looks up edge provenance by `EdgeId`.
282    ///
283    /// Returns `None` if the `EdgeId` is out of range, the slot is vacant,
284    /// or the edge is the invalid sentinel.
285    #[inline]
286    #[must_use]
287    pub fn edge_provenance(
288        &self,
289        id: crate::graph::unified::edge::id::EdgeId,
290    ) -> Option<&EdgeProvenance> {
291        self.edge_provenance.lookup(id)
292    }
293
294    /// Returns a borrowed provenance view for a file.
295    ///
296    /// Returns `None` for invalid/unregistered `FileId`s.
297    #[inline]
298    #[must_use]
299    pub fn file_provenance(
300        &self,
301        id: crate::graph::unified::file::id::FileId,
302    ) -> Option<FileProvenanceView<'_>> {
303        self.files.file_provenance(id)
304    }
305
306    // ------------------------------------------------------------------
307    // Phase 2 binding-plane accessors (P2U03).
308    // ------------------------------------------------------------------
309
310    /// Returns a reference to the scope arena derived during Phase 4e.
311    ///
312    /// The arena is empty on freshly-constructed `CodeGraph` instances and is
313    /// populated by calling `phase4e_binding::derive_binding_plane`.
314    #[inline]
315    #[must_use]
316    pub fn scope_arena(&self) -> &ScopeArena {
317        &self.scope_arena
318    }
319
320    /// Installs a freshly-derived scope arena.
321    ///
322    /// Called from `phase4e_binding::derive_binding_plane` during the build
323    /// pipeline. External callers that run Phase 4e manually (e.g., test
324    /// fixture builders) use this to store the result.
325    pub(crate) fn set_scope_arena(&mut self, arena: ScopeArena) {
326        self.scope_arena = Arc::new(arena);
327    }
328
329    /// Returns a reference to the alias table derived during Phase 4e.
330    ///
331    /// The table is empty on freshly-constructed `CodeGraph` instances and is
332    /// populated by calling `phase4e_binding::derive_binding_plane`.
333    #[inline]
334    #[must_use]
335    pub fn alias_table(&self) -> &AliasTable {
336        &self.alias_table
337    }
338
339    /// Installs a freshly-derived alias table.
340    ///
341    /// Called from `phase4e_binding::derive_binding_plane` during the build
342    /// pipeline. External callers that run Phase 4e manually (e.g., test
343    /// fixture builders) use this to store the result.
344    pub(crate) fn set_alias_table(&mut self, table: AliasTable) {
345        self.alias_table = Arc::new(table);
346    }
347
348    /// Returns a reference to the shadow table derived during Phase 4e.
349    ///
350    /// The table is empty on freshly-constructed `CodeGraph` instances and is
351    /// populated by calling `phase4e_binding::derive_binding_plane`.
352    #[inline]
353    #[must_use]
354    pub fn shadow_table(&self) -> &ShadowTable {
355        &self.shadow_table
356    }
357
358    /// Installs a freshly-derived shadow table.
359    ///
360    /// Called from `phase4e_binding::derive_binding_plane` during the build
361    /// pipeline. External callers that run Phase 4e manually (e.g., test
362    /// fixture builders) use this to store the result.
363    pub(crate) fn set_shadow_table(&mut self, table: ShadowTable) {
364        self.shadow_table = Arc::new(table);
365    }
366
367    /// Returns a reference to the scope provenance store derived during Phase 4e.
368    ///
369    /// The store is empty on freshly-constructed `CodeGraph` instances and is
370    /// populated by calling `phase4e_binding::derive_binding_plane`.
371    #[inline]
372    #[must_use]
373    pub fn scope_provenance_store(&self) -> &ScopeProvenanceStore {
374        &self.scope_provenance_store
375    }
376
377    /// Looks up scope provenance by `ScopeId`.
378    ///
379    /// Returns `None` if the slot is out of range, vacant, or the stored
380    /// generation does not match (stale handle).
381    #[inline]
382    #[must_use]
383    pub fn scope_provenance(&self, id: ScopeId) -> Option<&ScopeProvenance> {
384        self.scope_provenance_store.lookup(id)
385    }
386
387    /// Looks up the live `ScopeId` for a stable scope identity.
388    ///
389    /// Returns `None` if no provenance record is registered for that stable id.
390    /// The reverse index is populated by `insert` during Phase 4e and must be
391    /// rebuilt after V9 deserialization.
392    #[inline]
393    #[must_use]
394    pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
395        self.scope_provenance_store.scope_by_stable_id(stable)
396    }
397
398    /// Installs a freshly-derived scope provenance store.
399    ///
400    /// Called from `phase4e_binding::derive_binding_plane` during the build
401    /// pipeline. External callers that run Phase 4e manually (e.g., test
402    /// fixture builders) use this to store the result.
403    pub(crate) fn set_scope_provenance_store(&mut self, store: ScopeProvenanceStore) {
404        self.scope_provenance_store = Arc::new(store);
405    }
406
407    /// Returns a reference to the file segment table.
408    #[inline]
409    #[must_use]
410    pub fn file_segments(&self) -> &FileSegmentTable {
411        &self.file_segments
412    }
413
414    /// Replaces the file segment table.
415    pub(crate) fn set_file_segments(&mut self, table: FileSegmentTable) {
416        self.file_segments = Arc::new(table);
417    }
418
419    /// Returns a mutable reference to the file segment table (via `Arc::make_mut`).
420    pub(crate) fn file_segments_mut(&mut self) -> &mut FileSegmentTable {
421        Arc::make_mut(&mut self.file_segments)
422    }
423
424    /// Test-only helper that records a file segment directly on the
425    /// graph, bypassing the full Phase 3 commit pipeline. Only
426    /// available under `#[cfg(feature = "rebuild-internals")]` so the
427    /// surface is opt-in for rebuild-plane consumers (the feature is
428    /// whitelisted to sqry-daemon + sqry-core integration tests; see
429    /// `sqry-core/tests/rebuild_internals_whitelist.rs`).
430    ///
431    /// Integration tests (notably
432    /// `sqry-core/tests/incremental_remove_file_scale.rs`) call this
433    /// to seed the synthetic workspaces they build before exercising
434    /// `RebuildGraph::remove_file` / `CodeGraph::remove_file`. Production
435    /// code paths never touch this method — Phase 3 parallel commit
436    /// is the sole production writer, via the crate-internal
437    /// `file_segments_mut` accessor above.
438    ///
439    /// Renamed with `test_only_` prefix so the purpose is unambiguous
440    /// at every call site; `#[doc(hidden)]` hides it from rendered
441    /// rustdoc so downstream daemon integrations don't discover it by
442    /// accident.
443    #[cfg(feature = "rebuild-internals")]
444    #[doc(hidden)]
445    pub fn test_only_record_file_segment(
446        &mut self,
447        file_id: FileId,
448        start_slot: u32,
449        slot_count: u32,
450    ) {
451        Arc::make_mut(&mut self.file_segments).record_range(file_id, start_slot, slot_count);
452    }
453
454    /// Sets the provenance stores and fact epoch, typically called by the
455    /// persistence loader after deserializing a V8 snapshot.
456    pub(crate) fn set_provenance(
457        &mut self,
458        node_provenance: NodeProvenanceStore,
459        edge_provenance: EdgeProvenanceStore,
460        fact_epoch: u64,
461    ) {
462        self.node_provenance = Arc::new(node_provenance);
463        self.edge_provenance = Arc::new(edge_provenance);
464        self.fact_epoch = fact_epoch;
465    }
466
467    /// Returns the current epoch.
468    #[inline]
469    #[must_use]
470    pub fn epoch(&self) -> u64 {
471        self.epoch
472    }
473
474    /// Returns a mutable reference to the node arena.
475    ///
476    /// Uses `Arc::make_mut` for copy-on-write semantics: if other
477    /// references exist (e.g., snapshots), the data is cloned.
478    #[inline]
479    pub fn nodes_mut(&mut self) -> &mut NodeArena {
480        Arc::make_mut(&mut self.nodes)
481    }
482
483    /// Returns a mutable reference to the bidirectional edge store.
484    ///
485    /// Uses `Arc::make_mut` for copy-on-write semantics.
486    #[inline]
487    pub fn edges_mut(&mut self) -> &mut BidirectionalEdgeStore {
488        Arc::make_mut(&mut self.edges)
489    }
490
491    /// Returns a mutable reference to the string interner.
492    ///
493    /// Uses `Arc::make_mut` for copy-on-write semantics.
494    #[inline]
495    pub fn strings_mut(&mut self) -> &mut StringInterner {
496        Arc::make_mut(&mut self.strings)
497    }
498
499    /// Returns a mutable reference to the file registry.
500    ///
501    /// Uses `Arc::make_mut` for copy-on-write semantics.
502    #[inline]
503    pub fn files_mut(&mut self) -> &mut FileRegistry {
504        Arc::make_mut(&mut self.files)
505    }
506
507    /// Returns a mutable reference to the auxiliary indices.
508    ///
509    /// Uses `Arc::make_mut` for copy-on-write semantics.
510    #[inline]
511    pub fn indices_mut(&mut self) -> &mut AuxiliaryIndices {
512        Arc::make_mut(&mut self.indices)
513    }
514
515    /// Returns a mutable reference to the macro boundary metadata store.
516    ///
517    /// Uses `Arc::make_mut` for copy-on-write semantics.
518    #[inline]
519    pub fn macro_metadata_mut(&mut self) -> &mut NodeMetadataStore {
520        Arc::make_mut(&mut self.macro_metadata)
521    }
522
523    /// Returns mutable references to both the node arena and the string interner.
524    ///
525    /// This avoids the borrow-conflict that arises when calling `nodes_mut()` and
526    /// `strings_mut()` separately on `&mut self`.
527    #[inline]
528    pub fn nodes_and_strings_mut(&mut self) -> (&mut NodeArena, &mut StringInterner) {
529        (
530            Arc::make_mut(&mut self.nodes),
531            Arc::make_mut(&mut self.strings),
532        )
533    }
534
535    /// Rebuilds auxiliary indices from the current node arena.
536    ///
537    /// This avoids the borrow conflict that arises when calling `nodes()` and
538    /// `indices_mut()` separately on `&mut self`. Uses disjoint field borrowing
539    /// to access `nodes` (shared) and `indices` (mutable) simultaneously.
540    /// Internally calls `AuxiliaryIndices::build_from_arena` which clears
541    /// existing indices and rebuilds in a single pass without per-element
542    /// duplicate checking.
543    ///
544    /// As of Task 4 Step 4 Phase 2 this inherent method delegates to the
545    /// generic
546    /// [`crate::graph::unified::build::parallel_commit::rebuild_indices`]
547    /// free function so the same implementation serves both the
548    /// full-build (`CodeGraph`) and incremental-rebuild (`RebuildGraph`)
549    /// pipelines. Call sites that hold a concrete `CodeGraph` can keep
550    /// using `graph.rebuild_indices()`; incremental rebuild call sites
551    /// should use the free function directly.
552    pub fn rebuild_indices(&mut self) {
553        crate::graph::unified::build::parallel_commit::rebuild_indices(self);
554    }
555
556    /// Increments the epoch counter and returns the new value.
557    ///
558    /// Called automatically by `ConcurrentCodeGraph::write()`.
559    #[inline]
560    pub fn bump_epoch(&mut self) -> u64 {
561        self.epoch = self.epoch.wrapping_add(1);
562        self.epoch
563    }
564
565    /// Sets the epoch to a specific value.
566    ///
567    /// This is primarily for testing or reconstruction from serialized state.
568    #[inline]
569    pub fn set_epoch(&mut self, epoch: u64) {
570        self.epoch = epoch;
571    }
572
573    /// Returns the number of nodes in the graph.
574    ///
575    /// This is a convenience method that delegates to `nodes().len()`.
576    ///
577    /// # Example
578    ///
579    /// ```rust
580    /// use sqry_core::graph::unified::concurrent::CodeGraph;
581    ///
582    /// let graph = CodeGraph::new();
583    /// assert_eq!(graph.node_count(), 0);
584    /// ```
585    #[inline]
586    #[must_use]
587    pub fn node_count(&self) -> usize {
588        self.nodes.len()
589    }
590
591    /// Returns the number of edges in the graph (forward direction).
592    ///
593    /// This counts edges in the forward store, including both CSR and delta edges.
594    ///
595    /// # Example
596    ///
597    /// ```rust
598    /// use sqry_core::graph::unified::concurrent::CodeGraph;
599    ///
600    /// let graph = CodeGraph::new();
601    /// assert_eq!(graph.edge_count(), 0);
602    /// ```
603    #[inline]
604    #[must_use]
605    pub fn edge_count(&self) -> usize {
606        let stats = self.edges.stats();
607        stats.forward.csr_edge_count + stats.forward.delta_edge_count
608    }
609
610    /// Returns true if the graph contains no nodes.
611    ///
612    /// This is a convenience method that delegates to `nodes().is_empty()`.
613    ///
614    /// # Example
615    ///
616    /// ```rust
617    /// use sqry_core::graph::unified::concurrent::CodeGraph;
618    ///
619    /// let graph = CodeGraph::new();
620    /// assert!(graph.is_empty());
621    /// ```
622    #[inline]
623    #[must_use]
624    pub fn is_empty(&self) -> bool {
625        self.nodes.is_empty()
626    }
627
628    /// Returns an iterator over all indexed file paths.
629    ///
630    /// This is useful for enumerating all files that have been processed
631    /// and added to the graph.
632    ///
633    /// # Example
634    ///
635    /// ```rust
636    /// use sqry_core::graph::unified::concurrent::CodeGraph;
637    ///
638    /// let graph = CodeGraph::new();
639    /// for (file_id, path) in graph.indexed_files() {
640    ///     println!("File {}: {}", file_id.index(), path.display());
641    /// }
642    /// ```
643    #[inline]
644    pub fn indexed_files(
645        &self,
646    ) -> impl Iterator<Item = (crate::graph::unified::file::FileId, &std::path::Path)> {
647        self.files
648            .iter()
649            .map(|(id, arc_path)| (id, arc_path.as_ref()))
650    }
651
652    /// Returns the set of files that import one or more symbols exported by
653    /// `file_id`, deduplicated and sorted ascending.
654    ///
655    /// For every [`EdgeKind::Imports`] edge whose target node lives in
656    /// `file_id`, the source node's [`FileId`] is added to the result. Files
657    /// are returned sorted by raw index so the result is deterministic across
658    /// runs. The caller's own file is never included — an `Imports` edge
659    /// whose source and target both live in `file_id` is treated as a
660    /// self-import and elided. Edges whose source node is no longer resolvable
661    /// in the arena (tombstoned) are silently skipped.
662    ///
663    /// This is the file-level view of Pass 4 cross-file `Imports` edges, used
664    /// by the incremental rebuild engine to compute reverse-dependency
665    /// closures: "if file X changes its exports, which files need to be
666    /// re-linked?"
667    ///
668    /// # Complexity
669    ///
670    /// `O(|nodes_in_file_id| × avg_incoming_edges_per_node)` amortized. Uses
671    /// [`AuxiliaryIndices::by_file`] for O(1)-amortized per-file node lookup
672    /// (HashMap-backed) and the bidirectional edge store's reverse adjacency;
673    /// no full-graph scan. A final `O(R log R)` sort over the deduplicated
674    /// importer set (where `R` is the importer count) is negligible in
675    /// practice since `R ≤ file_count`.
676    ///
677    /// [`AuxiliaryIndices::by_file`]: crate::graph::unified::storage::indices::AuxiliaryIndices::by_file
678    #[must_use]
679    pub fn reverse_import_index(&self, file_id: FileId) -> Vec<FileId> {
680        let mut importers: HashSet<FileId> = HashSet::new();
681        for &target_node in self.indices.by_file(file_id) {
682            for edge_ref in self.edges.edges_to(target_node) {
683                if !matches!(edge_ref.kind, EdgeKind::Imports { .. }) {
684                    continue;
685                }
686                let Some(source_entry) = self.nodes.get(edge_ref.source) else {
687                    continue;
688                };
689                let source_file = source_entry.file;
690                if source_file != file_id {
691                    importers.insert(source_file);
692                }
693            }
694        }
695        let mut result: Vec<FileId> = importers.into_iter().collect();
696        result.sort();
697        result
698    }
699
700    /// Returns the set of files that hold at least one live inter-file edge
701    /// targeting a node in `file_id`, deduplicated and sorted ascending.
702    ///
703    /// Unlike [`reverse_import_index`](Self::reverse_import_index) — which
704    /// filters to [`EdgeKind::Imports`] only — this helper treats **every**
705    /// cross-file edge as a dependency signal: `Calls`, `References`,
706    /// `TypeOf`, `Inherits`, `Implements`, `FfiCall`, `HttpRequest`,
707    /// `GrpcCall`, `WebAssemblyCall`, `DbQuery`, `TableRead`, `TableWrite`,
708    /// `TriggeredBy`, `MessageQueue`, `WebSocket`, `GraphQLOperation`,
709    /// `ProcessExec`, `FileIpc`, `ProtocolCall`, and any future
710    /// cross-file-capable variant. This is the reverse-dependency surface the
711    /// incremental rebuild engine (Task 4 Step 4 Phase 3e) needs: when
712    /// `file_id` changes, every file whose committed edges point into
713    /// `file_id`'s nodes must re-enter the rebuild closure so its cross-file
714    /// references survive the target-side tombstone-and-reparse cycle.
715    ///
716    /// The caller's own file is never included — an edge whose source and
717    /// target both live in `file_id` is a self-reference and is elided.
718    /// Edges whose source node is no longer resolvable in the arena
719    /// (tombstoned) are silently skipped.
720    ///
721    /// # When to use this vs [`reverse_import_index`](Self::reverse_import_index)
722    ///
723    /// * [`reverse_import_index`](Self::reverse_import_index) remains the
724    ///   right surface for consumers that specifically need *import*
725    ///   relationships (export surface analysis, module-dependency graphs,
726    ///   etc.).
727    /// * `reverse_dependency_index` is the right surface for incremental
728    ///   rebuild closure computation. Widening past imports is necessary
729    ///   because call sites, type references, trait implementations, FFI
730    ///   declarations, HTTP clients, and every other cross-file edge kind
731    ///   hold a committed edge into the target file that becomes stale the
732    ///   moment `remove_file(target)` tombstones its arena nodes. Leaving
733    ///   those files out of the closure leaves the committed edges pointing
734    ///   at the stale (pre-tombstone) node IDs — Phase 4c-prime only
735    ///   rewrites edges on **re-parsed** files' `PendingEdge` sets, never
736    ///   committed edges owned by files outside the reparse scope.
737    ///
738    /// # Complexity
739    ///
740    /// `O(|nodes_in_file_id| × avg_incoming_edges_per_node)` amortized —
741    /// same bound as [`reverse_import_index`](Self::reverse_import_index).
742    /// Uses [`AuxiliaryIndices::by_file`] for O(1)-amortized per-file node
743    /// lookup and the bidirectional edge store's reverse adjacency; no
744    /// full-graph scan. Final `O(R log R)` sort over the deduplicated
745    /// dependent set is negligible since `R ≤ file_count`.
746    ///
747    /// # Over-widening is expected and acceptable
748    ///
749    /// The closure will include every file that references anything in
750    /// `file_id`, not just files whose exports change. In common codebases
751    /// this widens the reparse set modestly (a 10-file change may expand
752    /// to 20–30 dependent files in a medium crate). Correctness requires
753    /// the widening; minimality is a follow-up optimisation if profiling
754    /// demands it.
755    ///
756    /// [`AuxiliaryIndices::by_file`]: crate::graph::unified::storage::indices::AuxiliaryIndices::by_file
757    #[must_use]
758    pub fn reverse_dependency_index(&self, file_id: FileId) -> Vec<FileId> {
759        let mut dependents: HashSet<FileId> = HashSet::new();
760        for &target_node in self.indices.by_file(file_id) {
761            for edge_ref in self.edges.edges_to(target_node) {
762                let Some(source_entry) = self.nodes.get(edge_ref.source) else {
763                    continue;
764                };
765                let source_file = source_entry.file;
766                if source_file != file_id {
767                    dependents.insert(source_file);
768                }
769            }
770        }
771        let mut result: Vec<FileId> = dependents.into_iter().collect();
772        result.sort();
773        result
774    }
775
776    /// Returns the per-language confidence metadata.
777    ///
778    /// This contains analysis confidence information collected during graph build,
779    /// primarily used by language plugins (e.g., Rust) to track analysis quality.
780    #[inline]
781    #[must_use]
782    pub fn confidence(&self) -> &HashMap<String, ConfidenceMetadata> {
783        &self.confidence
784    }
785
786    /// Merges confidence metadata for a language.
787    ///
788    /// If confidence already exists for the language, this merges the new
789    /// metadata (taking the lower confidence level and combining limitations).
790    /// Otherwise, it inserts the new confidence.
791    pub fn merge_confidence(&mut self, language: &str, metadata: ConfidenceMetadata) {
792        use crate::confidence::ConfidenceLevel;
793
794        self.confidence
795            .entry(language.to_string())
796            .and_modify(|existing| {
797                // Take the lower confidence level (more conservative)
798                let new_level = match (&existing.level, &metadata.level) {
799                    (ConfidenceLevel::Verified, other) | (other, ConfidenceLevel::Verified) => {
800                        *other
801                    }
802                    (ConfidenceLevel::Partial, ConfidenceLevel::AstOnly)
803                    | (ConfidenceLevel::AstOnly, ConfidenceLevel::Partial) => {
804                        ConfidenceLevel::AstOnly
805                    }
806                    (level, _) => *level,
807                };
808                existing.level = new_level;
809
810                // Merge limitations (deduplicated)
811                for limitation in &metadata.limitations {
812                    if !existing.limitations.contains(limitation) {
813                        existing.limitations.push(limitation.clone());
814                    }
815                }
816
817                // Merge unavailable features (deduplicated)
818                for feature in &metadata.unavailable_features {
819                    if !existing.unavailable_features.contains(feature) {
820                        existing.unavailable_features.push(feature.clone());
821                    }
822                }
823            })
824            .or_insert(metadata);
825    }
826
827    /// Sets the confidence metadata map directly.
828    ///
829    /// This is primarily used when loading a graph from serialized state.
830    pub fn set_confidence(&mut self, confidence: HashMap<String, ConfidenceMetadata>) {
831        self.confidence = confidence;
832    }
833
834    // ------------------------------------------------------------------
835    // Task 4 Step 2 (A2 §F.2) — File-level tombstoning on a CodeGraph.
836    //
837    // Unlike the rebuild pipeline's `RebuildGraph::remove_file`, this
838    // path mutates a live `CodeGraph` in place and is the mechanism
839    // used by the full-rebuild flow when it needs to evict a file's
840    // nodes+edges between compactions. The daemon's incremental
841    // `WorkspaceManager` (Task 6) goes through the rebuild path, which
842    // is why this entry point is `pub(crate)`.
843    // ------------------------------------------------------------------
844
845    /// Tombstone every node that belongs to `file_id`, invalidate every
846    /// edge whose source or target is one of those nodes (across both
847    /// forward and reverse CSR + delta tiers), drop the file's entry
848    /// from the [`FileRegistry`], and return the set of [`NodeId`]s
849    /// that were tombstoned.
850    ///
851    /// This is the §F.2-aware file-removal primitive. Semantically, the
852    /// post-condition matches what a full rebuild of the workspace
853    /// without `file_id` would produce:
854    ///
855    /// * Every [`NodeEntry`] whose [`NodeEntry.file`] was `file_id` has
856    ///   been `NodeArena::remove`d, advancing its slot generation so
857    ///   stale [`NodeId`] handles do not alias a later re-allocation.
858    /// * Every CSR edge whose source or target slot matches one of the
859    ///   tombstoned slot indices has its `csr_tombstones` bit set; the
860    ///   read path's merge step already filters tombstoned CSR edges
861    ///   out of every query result.
862    /// * Every delta-buffer edge (Add or Remove, any file) whose source
863    ///   or target matches a tombstoned slot has been dropped from the
864    ///   delta in both directions.
865    /// * The [`AuxiliaryIndices`] (kind / name / qualified-name / file)
866    ///   no longer reference any of the tombstoned `NodeId`s.
867    /// * [`NodeMetadataStore`], [`NodeProvenanceStore`], [`ScopeArena`],
868    ///   [`AliasTable`], and [`ShadowTable`] have been compacted through
869    ///   the [`NodeIdBearing::retain_nodes`] predicate so no
870    ///   tombstoned NodeId survives in any publish-visible store.
871    /// * [`FileRegistry::per_file_nodes`] no longer holds a bucket for
872    ///   `file_id`, the lookup slot is recycled, and
873    ///   [`FileRegistry::resolve(file_id)`] returns `None`.
874    ///
875    /// Returns the `Vec<NodeId>` of tombstoned nodes. The returned list
876    /// is useful for downstream housekeeping (e.g., resetting per-file
877    /// caches keyed by NodeId) and for tests that need to assert on the
878    /// exact membership of the tombstone set.
879    ///
880    /// # Idempotency
881    ///
882    /// Calling `remove_file` twice with the same `file_id` — or calling
883    /// it for a `file_id` that was never registered — is a safe no-op.
884    /// The returned `Vec<NodeId>` is empty on the second call; no
885    /// arena / edge / index state is mutated (the predicate-based
886    /// compaction of `NodeIdBearing` surfaces short-circuits when the
887    /// dead set is empty).
888    ///
889    /// # Visibility
890    ///
891    /// `pub(crate)` because external callers (Task 6's
892    /// `WorkspaceManager` on the sqry-daemon side) route through
893    /// [`super::super::rebuild::rebuild_graph::RebuildGraph::remove_file`]
894    /// instead. This `CodeGraph`-level variant is used by full-rebuild
895    /// housekeeping paths inside sqry-core and by the Task 4 Step 4
896    /// incremental fallback for cases where the caller already has a
897    /// `&mut CodeGraph` and does not need the clone-and-publish
898    /// round-trip of [`clone_for_rebuild`](Self::clone_for_rebuild) →
899    /// [`finalize`](super::super::rebuild::rebuild_graph::RebuildGraph::finalize).
900    ///
901    /// # Performance
902    ///
903    /// * `O(|tombstoned| + |csr_edges| + |delta_edges|)` amortised.
904    ///   The CSR walk is linear in total edge count (not per-file),
905    ///   which is the dominant cost; each row check is O(1) via the
906    ///   precomputed dead-slot-index `HashSet` in
907    ///   [`BidirectionalEdgeStore::tombstone_edges_for_nodes`].
908    /// * Delta filtering is `O(|delta|)` per direction.
909    /// * Auxiliary-index compaction is `O(|tombstoned|)` amortised
910    ///   because each of the four indices keys its entries by the
911    ///   tombstoned NodeIds directly.
912    ///
913    /// [`NodeEntry`]: crate::graph::unified::storage::arena::NodeEntry
914    /// [`NodeEntry.file`]: crate::graph::unified::storage::arena::NodeEntry::file
915    /// [`NodeIdBearing::retain_nodes`]: crate::graph::unified::rebuild::coverage::NodeIdBearing::retain_nodes
916    #[allow(dead_code)] // Consumer is Task 4 Step 4 (`incremental_rebuild`)
917    // and the unit tests below; published in this commit so the §F.2
918    // invariant surface can be reviewed in isolation per the Gate 0c
919    // split contract.
920    pub(crate) fn remove_file(
921        &mut self,
922        file_id: FileId,
923    ) -> Vec<crate::graph::unified::node::NodeId> {
924        use crate::graph::unified::node::NodeId;
925        use crate::graph::unified::rebuild::coverage::NodeIdBearing;
926
927        // Drain the per-file bucket. For a file that was never
928        // registered, this returns an empty Vec — the rest of the
929        // method short-circuits on the `if tombstoned.is_empty()` test
930        // below so we still deregister the file on the off chance the
931        // bucket was empty but the file was registered (defensive; the
932        // common case is the bucket existed iff the file was
933        // registered).
934        let tombstoned: Vec<NodeId> = self.files_mut().take_nodes(file_id);
935        // Always drop the file's path entry + recycle its slot, even
936        // when the bucket was empty, so repeated registrations of the
937        // same path don't resurrect a zombie FileId. `unregister` is
938        // idempotent (returns None for unknown IDs) so the cost is a
939        // single HashMap probe when file_id is already gone.
940        self.files_mut().unregister(file_id);
941        // Clear the file's segment entry unconditionally (idempotent
942        // — `FileSegmentTable::remove` no-ops on unknown ids). This
943        // MUST run before `FileRegistry::unregister` recycles the
944        // FileId slot for reuse, otherwise a later registration of a
945        // different path under the reused FileId would inherit the
946        // previous file's stale node range (see
947        // `sqry-core/src/graph/unified/build/reindex.rs` — which
948        // trusts `file_segments().get(file_id)` to decide which slots
949        // to tombstone). Note: `unregister` above was called first
950        // only to keep the existing bucket-drain ordering; the
951        // segment-clear is order-independent with respect to
952        // `unregister` because neither touches the other's backing
953        // store, and the FileId slot cannot be recycled-and-reissued
954        // across a single `remove_file` call (the registry's slot
955        // recycler is driven by a later `register`, not by
956        // `unregister`).
957        self.file_segments_mut().remove(file_id);
958
959        if tombstoned.is_empty() {
960            return tombstoned;
961        }
962
963        // Dead set keyed on NodeId for NodeIdBearing predicates.
964        // `retain_nodes` uses `HashSet::contains` so membership is O(1).
965        let dead: HashSet<NodeId> = tombstoned.iter().copied().collect();
966
967        // 1. Tombstone the arena slots. `NodeArena::remove` is
968        //    idempotent — stale NodeIds that don't match a slot's
969        //    current generation are no-ops, which lets this method be
970        //    safely re-run on the same file.
971        {
972            let arena = self.nodes_mut();
973            for &nid in &tombstoned {
974                let _ = arena.remove(nid);
975            }
976        }
977
978        // 2. Invalidate edges across both CSR + delta in both
979        //    directions. This is the expensive step; the helper uses a
980        //    precomputed dead-slot-index set so the CSR walk is linear
981        //    in total edge count, not quadratic.
982        self.edges_mut().tombstone_edges_for_nodes(&dead);
983
984        // 3. Compact the auxiliary indices so name/kind/qname/file
985        //    lookups do not return tombstoned NodeIds. Using the
986        //    NodeIdBearing surface keeps this step in lockstep with the
987        //    rebuild pipeline's step 4 — any future publish-visible
988        //    NodeId-bearing container added to the K.A/K.B matrix is
989        //    automatically swept here too.
990        {
991            let predicate: Box<dyn Fn(NodeId) -> bool + '_> = Box::new(|nid| !dead.contains(&nid));
992            self.indices_mut().retain_nodes(&*predicate);
993            self.macro_metadata_mut().retain_nodes(&*predicate);
994            // The remaining K.A rows (node_provenance, scope_arena,
995            // alias_table, shadow_table) need Arc::make_mut accessors —
996            // they are wrapped in Arc at rest so this is where the
997            // CoW clone happens on demand. Inline the Arc::make_mut
998            // calls here (no public mut accessor exists today because
999            // the sole writer has been the rebuild path; extend the
1000            // set by adding a similar inline call plus a K.A/K.B row
1001            // in `super::super::rebuild::coverage`).
1002            Arc::make_mut(&mut self.node_provenance).retain_nodes(&*predicate);
1003            Arc::make_mut(&mut self.scope_arena).retain_nodes(&*predicate);
1004            Arc::make_mut(&mut self.alias_table).retain_nodes(&*predicate);
1005            Arc::make_mut(&mut self.shadow_table).retain_nodes(&*predicate);
1006        }
1007
1008        tombstoned
1009    }
1010
1011    // ------------------------------------------------------------------
1012    // Gate 0c (A2 §H, Task 4) — RebuildGraph assembly path.
1013    // ------------------------------------------------------------------
1014
1015    /// Assemble a [`CodeGraph`] from owned rebuild-local parts produced
1016    /// by `RebuildGraph::finalize()` (defined in
1017    /// `super::super::rebuild::rebuild_graph`).
1018    ///
1019    /// This constructor is deliberately `pub(crate)` and named with a
1020    /// leading `__` so it is inaccessible from downstream crates even
1021    /// when the `rebuild-internals` feature is enabled: only code in
1022    /// `sqry-core` itself (specifically `RebuildGraph::finalize`) is
1023    /// permitted to call it. The trybuild fixture
1024    /// `sqry-core/tests/rebuild_internals_compile_fail/rebuild_graph_no_public_assembly.rs`
1025    /// proves there is no other path from `RebuildGraph` to
1026    /// `Arc<CodeGraph>`.
1027    ///
1028    /// The argument order mirrors the `CodeGraph` struct declaration
1029    /// order exactly; the public `clone_for_rebuild` → `finalize`
1030    /// round-trip uses the same macro-driven field enumeration so any
1031    /// new `CodeGraph` field automatically threads through this
1032    /// constructor as well.
1033    #[doc(hidden)]
1034    #[allow(clippy::too_many_arguments)]
1035    #[must_use]
1036    pub(crate) fn __assemble_from_rebuild_parts_internal(
1037        nodes: NodeArena,
1038        edges: BidirectionalEdgeStore,
1039        strings: StringInterner,
1040        files: FileRegistry,
1041        indices: AuxiliaryIndices,
1042        macro_metadata: NodeMetadataStore,
1043        node_provenance: NodeProvenanceStore,
1044        edge_provenance: EdgeProvenanceStore,
1045        fact_epoch: u64,
1046        epoch: u64,
1047        confidence: HashMap<String, ConfidenceMetadata>,
1048        scope_arena: ScopeArena,
1049        alias_table: AliasTable,
1050        shadow_table: ShadowTable,
1051        scope_provenance_store: ScopeProvenanceStore,
1052        file_segments: FileSegmentTable,
1053    ) -> Self {
1054        Self {
1055            nodes: Arc::new(nodes),
1056            edges: Arc::new(edges),
1057            strings: Arc::new(strings),
1058            files: Arc::new(files),
1059            indices: Arc::new(indices),
1060            macro_metadata: Arc::new(macro_metadata),
1061            node_provenance: Arc::new(node_provenance),
1062            edge_provenance: Arc::new(edge_provenance),
1063            fact_epoch,
1064            epoch,
1065            confidence,
1066            scope_arena: Arc::new(scope_arena),
1067            alias_table: Arc::new(alias_table),
1068            shadow_table: Arc::new(shadow_table),
1069            scope_provenance_store: Arc::new(scope_provenance_store),
1070            file_segments: Arc::new(file_segments),
1071        }
1072    }
1073
1074    // ------------------------------------------------------------------
1075    // Gate 0c (A2 §F) — publish-boundary debug invariants.
1076    //
1077    // These checks fire only in debug / test builds; release builds
1078    // compile them out. They are called from
1079    // `RebuildGraph::finalize()` steps 13 and 14 (the single source of
1080    // truth for the residue check, per §F and §H agreement). Gate 0d
1081    // will additionally wire the bijection check into
1082    // `build_and_persist_graph`, `WorkspaceManager::publish_graph`, and
1083    // every §E equivalence-harness run.
1084    // ------------------------------------------------------------------
1085
1086    /// Assert the bijective bucket-membership invariant (A2 §F.1).
1087    ///
1088    /// Four conditions must hold simultaneously:
1089    /// (a) every `NodeId` inside any `per_file_nodes` bucket maps to a
1090    ///     live arena slot;
1091    /// (b) every `NodeId` appears in exactly one bucket (no duplicates
1092    ///     across buckets, no duplicates within a bucket);
1093    /// (c) the bucket's `FileId` matches the node's own `file` field on
1094    ///     `NodeEntry`;
1095    /// (d) when at least one bucket is populated, every live node in
1096    ///     the arena is accounted for by some bucket.
1097    ///
1098    /// Condition (d) is guarded on `!seen.is_empty()` so an empty-graph
1099    /// (no recorded buckets) publish boundary is vacuously consistent:
1100    /// legacy V7 snapshots, fresh `CodeGraph::new()` instances, and
1101    /// rebuilds on graphs that predate Gate 0c's parallel-commit
1102    /// bucketing must not panic. Once any bucket is populated, every
1103    /// live arena slot must appear in a bucket.
1104    ///
1105    /// Iter-2 B2 (verbatim): this check used to be documented as
1106    /// "vacuous until future `per_file_nodes` work lands". Pulling
1107    /// base-plan Step 1 into Gate 0c retires that phrasing — the check
1108    /// is non-vacuous the moment parallel-parse commits nodes, which
1109    /// happens on every real build. The `!seen.is_empty()` guard now
1110    /// exists solely for the empty-graph corner case, not as a phased-
1111    /// delivery deferral.
1112    ///
1113    /// The check is a no-op in release builds. Panics with a
1114    /// descriptive message on violation. This is intentional: publish-
1115    /// boundary violations are programmer errors that must surface
1116    /// loudly during CI / test runs.
1117    #[cfg(any(debug_assertions, test))]
1118    pub fn assert_bucket_bijection(&self) {
1119        use std::collections::HashMap as StdHashMap;
1120        // (a) + (b) + (c): every bucketed node is live, unique across
1121        // buckets, and the bucket's FileId matches the node's own file.
1122        let mut seen: StdHashMap<
1123            crate::graph::unified::node::NodeId,
1124            crate::graph::unified::file::FileId,
1125        > = StdHashMap::new();
1126        let mut any_bucket_populated = false;
1127        for (file_id, bucket) in self.files.per_file_nodes_for_gate0d() {
1128            if !bucket.is_empty() {
1129                any_bucket_populated = true;
1130            }
1131            // Local dedup guard within the bucket itself. `retain_nodes_in_buckets`
1132            // dedups during finalize step 6, but we re-check here so the
1133            // invariant covers non-finalize publish paths too (e.g. if a
1134            // future pipeline builds a graph without routing through
1135            // `RebuildGraph::finalize`).
1136            let mut within_bucket: std::collections::HashSet<crate::graph::unified::node::NodeId> =
1137                std::collections::HashSet::new();
1138            for node_id in bucket {
1139                assert!(
1140                    within_bucket.insert(node_id),
1141                    "assert_bucket_bijection: duplicate node {node_id:?} inside bucket {file_id:?}"
1142                );
1143                assert!(
1144                    self.nodes.get(node_id).is_some(),
1145                    "assert_bucket_bijection: dead node {node_id:?} in bucket {file_id:?}"
1146                );
1147                let prior = seen.insert(node_id, file_id);
1148                assert!(
1149                    prior.is_none(),
1150                    "assert_bucket_bijection: node {node_id:?} in multiple buckets: \
1151                     prior={prior:?}, current={file_id:?}"
1152                );
1153                if let Some(entry) = self.nodes.get(node_id) {
1154                    assert_eq!(
1155                        entry.file, file_id,
1156                        "assert_bucket_bijection: node {node_id:?} misfiled: in bucket \
1157                         {file_id:?}, actual {:?}",
1158                        entry.file
1159                    );
1160                }
1161            }
1162        }
1163        // (d): every live node is accounted for by `seen` once buckets
1164        // are populated. The guard keeps legacy-empty-graph boundaries
1165        // vacuously consistent (see docs above).
1166        if any_bucket_populated {
1167            for (node_id, _entry) in self.nodes.iter() {
1168                assert!(
1169                    seen.contains_key(&node_id),
1170                    "assert_bucket_bijection: live node {node_id:?} absent from all buckets"
1171                );
1172            }
1173        }
1174    }
1175
1176    /// Assert the pre-reuse tombstone-residue invariant (A2 §F.2).
1177    ///
1178    /// Iterates every publish-visible NodeId-bearing structure on
1179    /// `self` and panics if any contains a node in `dead`. Called from
1180    /// `RebuildGraph::finalize()` step 14 against the set drained at
1181    /// step 8 — exactly one site per the plan's §F / §H agreement.
1182    ///
1183    /// No-op when `dead` is empty or in release builds.
1184    #[cfg(any(debug_assertions, test))]
1185    pub fn assert_no_tombstone_residue_for(
1186        &self,
1187        dead: &std::collections::HashSet<crate::graph::unified::node::NodeId>,
1188    ) {
1189        use super::super::rebuild::coverage::NodeIdBearing;
1190        if dead.is_empty() {
1191            return;
1192        }
1193        // Every K.A/K.B row must be inspected per §F.2.
1194        for nid in self.nodes.all_node_ids() {
1195            assert!(
1196                !dead.contains(&nid),
1197                "assert_no_tombstone_residue: tombstone {nid:?} still in NodeArena"
1198            );
1199        }
1200        for nid in self.indices.all_node_ids() {
1201            assert!(
1202                !dead.contains(&nid),
1203                "assert_no_tombstone_residue: tombstone {nid:?} still in auxiliary indices"
1204            );
1205        }
1206        for nid in self.edges.all_node_ids() {
1207            assert!(
1208                !dead.contains(&nid),
1209                "assert_no_tombstone_residue: tombstone {nid:?} still in edge store"
1210            );
1211        }
1212        for nid in self.macro_metadata.all_node_ids() {
1213            assert!(
1214                !dead.contains(&nid),
1215                "assert_no_tombstone_residue: tombstone {nid:?} still in macro metadata"
1216            );
1217        }
1218        for nid in self.node_provenance.all_node_ids() {
1219            assert!(
1220                !dead.contains(&nid),
1221                "assert_no_tombstone_residue: tombstone {nid:?} still in node provenance"
1222            );
1223        }
1224        for nid in self.scope_arena.all_node_ids() {
1225            assert!(
1226                !dead.contains(&nid),
1227                "assert_no_tombstone_residue: tombstone {nid:?} still in scope arena"
1228            );
1229        }
1230        for nid in self.alias_table.all_node_ids() {
1231            assert!(
1232                !dead.contains(&nid),
1233                "assert_no_tombstone_residue: tombstone {nid:?} still in alias table"
1234            );
1235        }
1236        for nid in self.shadow_table.all_node_ids() {
1237            assert!(
1238                !dead.contains(&nid),
1239                "assert_no_tombstone_residue: tombstone {nid:?} still in shadow table"
1240            );
1241        }
1242        for nid in self.files.all_node_ids() {
1243            assert!(
1244                !dead.contains(&nid),
1245                "assert_no_tombstone_residue: tombstone {nid:?} still in per-file bucket"
1246            );
1247        }
1248    }
1249}
1250
1251impl Default for CodeGraph {
1252    fn default() -> Self {
1253        Self::new()
1254    }
1255}
1256
1257impl fmt::Debug for CodeGraph {
1258    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1259        f.debug_struct("CodeGraph")
1260            .field("nodes", &self.nodes.len())
1261            .field("epoch", &self.epoch)
1262            .finish_non_exhaustive()
1263    }
1264}
1265
1266impl GraphMemorySize for CodeGraph {
1267    /// Estimates the total heap bytes owned by this `CodeGraph`.
1268    ///
1269    /// Sums heap usage across every component the graph owns: node arena,
1270    /// bidirectional edge store (forward + reverse CSR + tombstones + delta
1271    /// buffer), string interner, file registry, auxiliary indices, sparse
1272    /// macro/classpath metadata, provenance stores, and the per-language
1273    /// confidence map. Used by the `sqryd` daemon's admission controller
1274    /// and workspace retention reaper to enforce memory budgets.
1275    fn heap_bytes(&self) -> usize {
1276        let mut confidence_bytes = self.confidence.capacity()
1277            * (std::mem::size_of::<String>()
1278                + std::mem::size_of::<ConfidenceMetadata>()
1279                + HASHMAP_ENTRY_OVERHEAD);
1280        for (key, meta) in &self.confidence {
1281            confidence_bytes += key.capacity();
1282            // `ConfidenceMetadata.limitations` / `unavailable_features` are
1283            // `Vec<String>`; charge their spill and inner String payloads.
1284            confidence_bytes += meta.limitations.capacity() * std::mem::size_of::<String>();
1285            for s in &meta.limitations {
1286                confidence_bytes += s.capacity();
1287            }
1288            confidence_bytes +=
1289                meta.unavailable_features.capacity() * std::mem::size_of::<String>();
1290            for s in &meta.unavailable_features {
1291                confidence_bytes += s.capacity();
1292            }
1293        }
1294
1295        self.nodes.heap_bytes()
1296            + self.edges.heap_bytes()
1297            + self.strings.heap_bytes()
1298            + self.files.heap_bytes()
1299            + self.indices.heap_bytes()
1300            + self.macro_metadata.heap_bytes()
1301            + self.node_provenance.heap_bytes()
1302            + self.edge_provenance.heap_bytes()
1303            + confidence_bytes
1304            + self.file_segments.capacity()
1305                * std::mem::size_of::<Option<crate::graph::unified::storage::segment::FileSegment>>(
1306                )
1307    }
1308}
1309
1310/// Thread-safe wrapper for `CodeGraph` with epoch versioning.
1311///
1312/// `ConcurrentCodeGraph` provides MVCC-style concurrency:
1313/// - Multiple readers can access the graph simultaneously
1314/// - Only one writer can hold the lock at a time
1315/// - Each write operation increments the epoch for cursor invalidation
1316///
1317/// # Design
1318///
1319/// The wrapper uses `parking_lot::RwLock` for efficient locking:
1320/// - Fair scheduling prevents writer starvation
1321/// - No poisoning (unlike `std::sync::RwLock`)
1322/// - Faster lock/unlock operations
1323///
1324/// # Phase 2 binding-plane access
1325///
1326/// Use the three-line snapshot pattern to access `BindingPlane`:
1327///
1328/// ```rust,ignore
1329/// let read_guard = concurrent.read();
1330/// let snapshot = read_guard.snapshot();
1331/// let plane = snapshot.binding_plane();
1332/// let resolution = plane.resolve(&query);
1333/// ```
1334///
1335/// The explicit snapshot handle makes the MVCC lifetime visible at the call
1336/// site. The full Phase 2 scope/alias/shadow and witness-bearing resolution
1337/// API is exposed through `BindingPlane`.
1338///
1339/// # Usage
1340///
1341/// ```rust
1342/// use sqry_core::graph::unified::concurrent::ConcurrentCodeGraph;
1343///
1344/// let graph = ConcurrentCodeGraph::new();
1345///
1346/// // Read access (multiple readers allowed)
1347/// {
1348///     let guard = graph.read();
1349///     let _nodes = guard.nodes();
1350/// }
1351///
1352/// // Write access (exclusive)
1353/// {
1354///     let mut guard = graph.write();
1355///     let _nodes = guard.nodes_mut();
1356/// }
1357///
1358/// // Snapshot for long queries
1359/// let snapshot = graph.snapshot();
1360/// ```
1361pub struct ConcurrentCodeGraph {
1362    /// The underlying code graph protected by a read-write lock.
1363    inner: RwLock<CodeGraph>,
1364    /// Global epoch counter for cursor validation.
1365    epoch: AtomicU64,
1366}
1367
1368impl ConcurrentCodeGraph {
1369    /// Creates a new empty `ConcurrentCodeGraph`.
1370    #[must_use]
1371    pub fn new() -> Self {
1372        Self {
1373            inner: RwLock::new(CodeGraph::new()),
1374            epoch: AtomicU64::new(0),
1375        }
1376    }
1377
1378    /// Creates a `ConcurrentCodeGraph` from an existing `CodeGraph`.
1379    #[must_use]
1380    pub fn from_graph(graph: CodeGraph) -> Self {
1381        let epoch = graph.epoch();
1382        Self {
1383            inner: RwLock::new(graph),
1384            epoch: AtomicU64::new(epoch),
1385        }
1386    }
1387
1388    /// Acquires a read lock on the graph.
1389    ///
1390    /// Multiple readers can hold the lock simultaneously.
1391    /// This does not increment the epoch.
1392    #[inline]
1393    pub fn read(&self) -> RwLockReadGuard<'_, CodeGraph> {
1394        self.inner.read()
1395    }
1396
1397    /// Acquires a write lock on the graph.
1398    ///
1399    /// Only one writer can hold the lock at a time.
1400    /// This increments the global epoch counter.
1401    #[inline]
1402    pub fn write(&self) -> RwLockWriteGuard<'_, CodeGraph> {
1403        // Increment the global epoch
1404        self.epoch.fetch_add(1, Ordering::SeqCst);
1405        let mut guard = self.inner.write();
1406        // Sync the inner graph's epoch with the global epoch
1407        guard.set_epoch(self.epoch.load(Ordering::SeqCst));
1408        guard
1409    }
1410
1411    /// Returns the current global epoch.
1412    ///
1413    /// This can be used to detect if the graph has been modified
1414    /// since a previous operation (cursor invalidation).
1415    #[inline]
1416    #[must_use]
1417    pub fn epoch(&self) -> u64 {
1418        self.epoch.load(Ordering::SeqCst)
1419    }
1420
1421    /// Creates a cheap snapshot of the graph.
1422    ///
1423    /// This acquires a brief read lock to clone the Arc references.
1424    /// The snapshot is isolated from future mutations.
1425    #[must_use]
1426    pub fn snapshot(&self) -> GraphSnapshot {
1427        self.inner.read().snapshot()
1428    }
1429
1430    // ------------------------------------------------------------------
1431    // Phase 1 fact-layer provenance convenience accessors.
1432    // These acquire a brief read lock, matching the snapshot() pattern.
1433    // ------------------------------------------------------------------
1434
1435    /// Returns the monotonic fact-layer epoch from the underlying graph.
1436    #[must_use]
1437    pub fn fact_epoch(&self) -> u64 {
1438        self.inner.read().fact_epoch()
1439    }
1440
1441    /// Looks up node provenance by `NodeId` (acquires a brief read lock).
1442    #[must_use]
1443    pub fn node_provenance(
1444        &self,
1445        id: crate::graph::unified::node::id::NodeId,
1446    ) -> Option<NodeProvenance> {
1447        self.inner.read().node_provenance(id).copied()
1448    }
1449
1450    /// Looks up edge provenance by `EdgeId` (acquires a brief read lock).
1451    #[must_use]
1452    pub fn edge_provenance(
1453        &self,
1454        id: crate::graph::unified::edge::id::EdgeId,
1455    ) -> Option<EdgeProvenance> {
1456        self.inner.read().edge_provenance(id).copied()
1457    }
1458
1459    /// Returns a file provenance view (acquires a brief read lock).
1460    ///
1461    /// Returns an owned copy since the borrow cannot outlive the lock guard.
1462    #[must_use]
1463    pub fn file_provenance(
1464        &self,
1465        id: crate::graph::unified::file::id::FileId,
1466    ) -> Option<OwnedFileProvenanceView> {
1467        let guard = self.inner.read();
1468        guard.file_provenance(id).map(|v| OwnedFileProvenanceView {
1469            content_hash: *v.content_hash,
1470            indexed_at: v.indexed_at,
1471            source_uri: v.source_uri,
1472            is_external: v.is_external,
1473        })
1474    }
1475
1476    // ------------------------------------------------------------------
1477    // Phase 2 binding-plane accessors (P2U03).
1478    // ------------------------------------------------------------------
1479
1480    /// Returns the scope arena from the underlying graph (acquires a brief
1481    /// read lock).
1482    ///
1483    /// Returns an `Arc` clone so the caller does not hold the lock beyond
1484    /// this call site.
1485    #[must_use]
1486    pub fn scope_arena(&self) -> Arc<ScopeArena> {
1487        Arc::clone(&self.inner.read().scope_arena)
1488    }
1489
1490    /// Returns the alias table from the underlying graph (acquires a brief
1491    /// read lock).
1492    ///
1493    /// Returns an `Arc` clone so the caller does not hold the lock beyond
1494    /// this call site.
1495    #[must_use]
1496    pub fn alias_table(&self) -> Arc<AliasTable> {
1497        Arc::clone(&self.inner.read().alias_table)
1498    }
1499
1500    /// Returns the shadow table from the underlying graph (acquires a brief
1501    /// read lock).
1502    ///
1503    /// Returns an `Arc` clone so the caller does not hold the lock beyond
1504    /// this call site.
1505    #[must_use]
1506    pub fn shadow_table(&self) -> Arc<ShadowTable> {
1507        Arc::clone(&self.inner.read().shadow_table)
1508    }
1509
1510    /// Returns the scope provenance store from the underlying graph (acquires
1511    /// a brief read lock).
1512    ///
1513    /// Returns an `Arc` clone so the caller does not hold the lock beyond
1514    /// this call site.
1515    #[must_use]
1516    pub fn scope_provenance_store(&self) -> Arc<ScopeProvenanceStore> {
1517        Arc::clone(&self.inner.read().scope_provenance_store)
1518    }
1519
1520    /// Looks up scope provenance by `ScopeId` (acquires a brief read lock).
1521    ///
1522    /// Returns an owned copy since the borrow cannot outlive the lock guard.
1523    #[must_use]
1524    pub fn scope_provenance(&self, id: ScopeId) -> Option<ScopeProvenance> {
1525        self.inner.read().scope_provenance(id).cloned()
1526    }
1527
1528    /// Looks up the live `ScopeId` for a stable scope identity (acquires a
1529    /// brief read lock).
1530    ///
1531    /// Returns `None` if no provenance record is registered for that stable id.
1532    #[must_use]
1533    pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
1534        self.inner.read().scope_by_stable_id(stable)
1535    }
1536
1537    /// Returns the file segment table from the underlying graph (acquires a
1538    /// brief read lock).
1539    #[must_use]
1540    pub fn file_segments(&self) -> Arc<FileSegmentTable> {
1541        Arc::clone(&self.inner.read().file_segments)
1542    }
1543
1544    /// Attempts to acquire a read lock without blocking.
1545    ///
1546    /// Returns `None` if the lock is currently held exclusively.
1547    #[inline]
1548    #[must_use]
1549    pub fn try_read(&self) -> Option<RwLockReadGuard<'_, CodeGraph>> {
1550        self.inner.try_read()
1551    }
1552
1553    /// Attempts to acquire a write lock without blocking.
1554    ///
1555    /// Returns `None` if the lock is currently held by another thread.
1556    /// If successful, increments the epoch.
1557    #[inline]
1558    pub fn try_write(&self) -> Option<RwLockWriteGuard<'_, CodeGraph>> {
1559        self.inner.try_write().map(|mut guard| {
1560            self.epoch.fetch_add(1, Ordering::SeqCst);
1561            guard.set_epoch(self.epoch.load(Ordering::SeqCst));
1562            guard
1563        })
1564    }
1565}
1566
1567impl Default for ConcurrentCodeGraph {
1568    fn default() -> Self {
1569        Self::new()
1570    }
1571}
1572
1573impl fmt::Debug for ConcurrentCodeGraph {
1574    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1575        f.debug_struct("ConcurrentCodeGraph")
1576            .field("epoch", &self.epoch.load(Ordering::SeqCst))
1577            .finish_non_exhaustive()
1578    }
1579}
1580
1581/// Owned copy of file provenance, returned by [`ConcurrentCodeGraph::file_provenance`]
1582/// because the borrowed [`FileProvenanceView`] cannot outlive the read-lock guard.
1583#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1584pub struct OwnedFileProvenanceView {
1585    /// SHA-256 of the on-disk file bytes (owned copy).
1586    pub content_hash: [u8; 32],
1587    /// Unix-epoch seconds at which this file was registered.
1588    pub indexed_at: u64,
1589    /// Optional interned physical origin URI.
1590    pub source_uri: Option<StringId>,
1591    /// Whether this file originates from an external source.
1592    pub is_external: bool,
1593}
1594
1595/// Immutable snapshot of a `CodeGraph` for long-running queries.
1596///
1597/// `GraphSnapshot` holds Arc references to the graph components,
1598/// providing a consistent view that is isolated from concurrent mutations.
1599///
1600/// # Design
1601///
1602/// Snapshots are created via `CodeGraph::snapshot()` or
1603/// `ConcurrentCodeGraph::snapshot()`. They are:
1604///
1605/// - **Immutable**: No mutation methods available
1606/// - **Isolated**: Independent of future graph mutations
1607/// - **Cheap**: Only Arc clones, no data copying
1608/// - **Self-contained**: Can outlive the original graph/lock
1609///
1610/// # Usage
1611///
1612/// ```rust
1613/// use sqry_core::graph::unified::concurrent::{ConcurrentCodeGraph, GraphSnapshot};
1614///
1615/// let graph = ConcurrentCodeGraph::new();
1616///
1617/// // Create snapshot for a long query
1618/// let snapshot: GraphSnapshot = graph.snapshot();
1619///
1620/// // Snapshot can be used independently
1621/// let _epoch = snapshot.epoch();
1622/// ```
1623#[derive(Clone)]
1624pub struct GraphSnapshot {
1625    /// Node storage snapshot.
1626    nodes: Arc<NodeArena>,
1627    /// Edge storage snapshot.
1628    edges: Arc<BidirectionalEdgeStore>,
1629    /// String interner snapshot.
1630    strings: Arc<StringInterner>,
1631    /// File registry snapshot.
1632    files: Arc<FileRegistry>,
1633    /// Auxiliary indices snapshot.
1634    indices: Arc<AuxiliaryIndices>,
1635    /// Sparse macro boundary metadata snapshot.
1636    macro_metadata: Arc<NodeMetadataStore>,
1637    /// Dense node provenance snapshot (Phase 1).
1638    node_provenance: Arc<NodeProvenanceStore>,
1639    /// Dense edge provenance snapshot (Phase 1).
1640    edge_provenance: Arc<EdgeProvenanceStore>,
1641    /// Monotonic fact-layer epoch at snapshot time.
1642    fact_epoch: u64,
1643    /// Epoch at snapshot time (for cursor validation).
1644    epoch: u64,
1645    /// Phase 2 binding-plane scope arena snapshot (populated by Phase 4e).
1646    scope_arena: Arc<ScopeArena>,
1647    /// Phase 2 binding-plane alias table snapshot (populated by Phase 4e / P2U04).
1648    alias_table: Arc<AliasTable>,
1649    /// Phase 2 binding-plane shadow table snapshot (populated by Phase 4e / P2U05).
1650    shadow_table: Arc<ShadowTable>,
1651    /// Phase 2 binding-plane scope provenance store snapshot (populated by Phase 4e / P2U11).
1652    scope_provenance_store: Arc<ScopeProvenanceStore>,
1653    /// Phase 3 file segment table snapshot mapping `FileId` to node ranges.
1654    file_segments: Arc<FileSegmentTable>,
1655}
1656
1657impl GraphSnapshot {
1658    /// Returns a reference to the node arena.
1659    #[inline]
1660    #[must_use]
1661    pub fn nodes(&self) -> &NodeArena {
1662        &self.nodes
1663    }
1664
1665    /// Returns a reference to the bidirectional edge store.
1666    #[inline]
1667    #[must_use]
1668    pub fn edges(&self) -> &BidirectionalEdgeStore {
1669        &self.edges
1670    }
1671
1672    /// Returns a reference to the string interner.
1673    #[inline]
1674    #[must_use]
1675    pub fn strings(&self) -> &StringInterner {
1676        &self.strings
1677    }
1678
1679    /// Returns a reference to the file registry.
1680    #[inline]
1681    #[must_use]
1682    pub fn files(&self) -> &FileRegistry {
1683        &self.files
1684    }
1685
1686    /// Returns a reference to the auxiliary indices.
1687    #[inline]
1688    #[must_use]
1689    pub fn indices(&self) -> &AuxiliaryIndices {
1690        &self.indices
1691    }
1692
1693    /// Returns a reference to the macro boundary metadata store.
1694    #[inline]
1695    #[must_use]
1696    pub fn macro_metadata(&self) -> &NodeMetadataStore {
1697        &self.macro_metadata
1698    }
1699
1700    // ------------------------------------------------------------------
1701    // Phase 1 fact-layer provenance accessors (P1U09).
1702    // ------------------------------------------------------------------
1703
1704    /// Returns the monotonic fact-layer epoch.
1705    #[inline]
1706    #[must_use]
1707    pub fn fact_epoch(&self) -> u64 {
1708        self.fact_epoch
1709    }
1710
1711    /// Looks up node provenance by `NodeId`.
1712    #[inline]
1713    #[must_use]
1714    pub fn node_provenance(
1715        &self,
1716        id: crate::graph::unified::node::id::NodeId,
1717    ) -> Option<&NodeProvenance> {
1718        self.node_provenance.lookup(id)
1719    }
1720
1721    /// Looks up edge provenance by `EdgeId`.
1722    #[inline]
1723    #[must_use]
1724    pub fn edge_provenance(
1725        &self,
1726        id: crate::graph::unified::edge::id::EdgeId,
1727    ) -> Option<&EdgeProvenance> {
1728        self.edge_provenance.lookup(id)
1729    }
1730
1731    /// Returns a borrowed provenance view for a file.
1732    #[inline]
1733    #[must_use]
1734    pub fn file_provenance(
1735        &self,
1736        id: crate::graph::unified::file::id::FileId,
1737    ) -> Option<FileProvenanceView<'_>> {
1738        self.files.file_provenance(id)
1739    }
1740
1741    // ------------------------------------------------------------------
1742    // Phase 2 binding-plane accessors (P2U03).
1743    // ------------------------------------------------------------------
1744
1745    /// Returns a reference to the scope arena at snapshot time.
1746    ///
1747    /// Participates in MVCC: the snapshot holds an `Arc` clone of the arena
1748    /// as it existed when `snapshot()` was called. Subsequent calls to
1749    /// `set_scope_arena` on the source `CodeGraph` do not affect this view.
1750    #[inline]
1751    #[must_use]
1752    pub fn scope_arena(&self) -> &ScopeArena {
1753        &self.scope_arena
1754    }
1755
1756    /// Returns a reference to the alias table at snapshot time.
1757    ///
1758    /// Participates in MVCC: the snapshot holds an `Arc` clone of the table
1759    /// as it existed when `snapshot()` was called. Subsequent calls to
1760    /// `set_alias_table` on the source `CodeGraph` do not affect this view.
1761    #[inline]
1762    #[must_use]
1763    pub fn alias_table(&self) -> &AliasTable {
1764        &self.alias_table
1765    }
1766
1767    /// Returns a reference to the shadow table at snapshot time.
1768    ///
1769    /// Participates in MVCC: the snapshot holds an `Arc` clone of the table
1770    /// as it existed when `snapshot()` was called. Subsequent calls to
1771    /// `set_shadow_table` on the source `CodeGraph` do not affect this view.
1772    #[inline]
1773    #[must_use]
1774    pub fn shadow_table(&self) -> &ShadowTable {
1775        &self.shadow_table
1776    }
1777
1778    /// Returns a reference to the scope provenance store at snapshot time.
1779    ///
1780    /// Participates in MVCC: the snapshot holds an `Arc` clone of the store
1781    /// as it existed when `snapshot()` was called. Subsequent calls to
1782    /// `set_scope_provenance_store` on the source `CodeGraph` do not affect
1783    /// this view.
1784    #[inline]
1785    #[must_use]
1786    pub fn scope_provenance_store(&self) -> &ScopeProvenanceStore {
1787        &self.scope_provenance_store
1788    }
1789
1790    /// Looks up scope provenance by `ScopeId` at snapshot time.
1791    ///
1792    /// Returns `None` if the slot is out of range, vacant, or the stored
1793    /// generation does not match (stale handle).
1794    #[inline]
1795    #[must_use]
1796    pub fn scope_provenance(&self, id: ScopeId) -> Option<&ScopeProvenance> {
1797        self.scope_provenance_store.lookup(id)
1798    }
1799
1800    /// Looks up the live `ScopeId` for a stable scope identity at snapshot time.
1801    ///
1802    /// Returns `None` if no provenance record is registered for that stable id.
1803    #[inline]
1804    #[must_use]
1805    pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
1806        self.scope_provenance_store.scope_by_stable_id(stable)
1807    }
1808
1809    /// Returns a reference to the file segment table at snapshot time.
1810    #[inline]
1811    #[must_use]
1812    pub fn file_segments(&self) -> &FileSegmentTable {
1813        &self.file_segments
1814    }
1815
1816    /// Returns the epoch at which this snapshot was taken.
1817    ///
1818    /// This can be compared against the current graph epoch to
1819    /// detect if the graph has changed since the snapshot.
1820    #[inline]
1821    #[must_use]
1822    pub fn epoch(&self) -> u64 {
1823        self.epoch
1824    }
1825
1826    /// Returns `true` if this snapshot's epoch matches the given epoch.
1827    ///
1828    /// Use this to validate cursors before continuing pagination.
1829    #[inline]
1830    #[must_use]
1831    pub fn epoch_matches(&self, other_epoch: u64) -> bool {
1832        self.epoch == other_epoch
1833    }
1834
1835    // ------------------------------------------------------------------
1836    // Phase 2 binding-plane facade accessor (P2U07).
1837    // ------------------------------------------------------------------
1838
1839    /// Returns a [`BindingPlane`] facade borrowing this snapshot's lifetime.
1840    ///
1841    /// The facade is the stable Phase 2 public API for scope/alias/shadow
1842    /// queries and witness-bearing resolution. It provides a single entry
1843    /// point (`resolve`) that returns both a `BindingResult` and an ordered
1844    /// step trace in a `BindingResolution`.
1845    ///
1846    /// # MVCC note
1847    ///
1848    /// `BindingPlane<'_>` borrows from this snapshot, which is already an
1849    /// MVCC-consistent view of the graph at snapshot time. Callers from
1850    /// `CodeGraph` or `ConcurrentCodeGraph` should follow the two-line
1851    /// pattern so the snapshot lifetime is explicit:
1852    ///
1853    /// ```rust,ignore
1854    /// // CodeGraph caller:
1855    /// let snapshot = graph.snapshot();
1856    /// let plane = snapshot.binding_plane();
1857    ///
1858    /// // ConcurrentCodeGraph caller:
1859    /// let read_guard = concurrent.read();
1860    /// let snapshot = read_guard.snapshot();
1861    /// let plane = snapshot.binding_plane();
1862    /// ```
1863    #[inline]
1864    #[must_use]
1865    pub fn binding_plane(&self) -> crate::graph::unified::bind::plane::BindingPlane<'_> {
1866        crate::graph::unified::bind::plane::BindingPlane::new(self)
1867    }
1868
1869    // ============================================================================
1870    // Query Methods
1871    // ============================================================================
1872
1873    /// Finds nodes matching a pattern.
1874    ///
1875    /// Performs a simple substring match on node names and qualified names.
1876    /// Returns all matching node IDs.
1877    ///
1878    /// **Synthetic suppression (C_SUPPRESS):** synthetic placeholder
1879    /// nodes — internal scaffolding the language plugins emit for
1880    /// binding-plane and scope analysis (e.g. the Go plugin's
1881    /// `<field:operand.field>` field-access shadows and the
1882    /// `<ident>@<offset>` per-binding-site Variable nodes from the
1883    /// local-scope resolver) — are filtered out by default. Internal
1884    /// callers that need to reach these nodes (binding plane, scope /
1885    /// alias / shadow analysis) use
1886    /// [`Self::find_by_pattern_with_options`] with
1887    /// `include_synthetic = true`.
1888    ///
1889    /// # Performance
1890    ///
1891    /// Optimized to iterate over unique strings in the interner (smaller set)
1892    /// rather than all nodes in the arena.
1893    ///
1894    /// # Arguments
1895    ///
1896    /// * `pattern` - The pattern to match (substring search)
1897    ///
1898    /// # Returns
1899    ///
1900    /// A vector of `NodeIds` for all matching nodes (synthetic
1901    /// placeholders excluded).
1902    #[must_use]
1903    pub fn find_by_pattern(&self, pattern: &str) -> Vec<crate::graph::unified::node::NodeId> {
1904        self.find_by_pattern_with_options(pattern, false)
1905    }
1906
1907    /// Finds nodes matching a pattern with explicit control over synthetic
1908    /// placeholder visibility.
1909    ///
1910    /// `include_synthetic = false` is the default surface used by every
1911    /// user-facing caller (CLI `search`, MCP `semantic_search` /
1912    /// `pattern_search` / `relation_query`, etc.). Synthetic
1913    /// placeholders are suppressed via two parallel checks that must
1914    /// agree:
1915    ///
1916    /// 1. The authoritative `NodeMetadata::Synthetic` bit on the
1917    ///    metadata store
1918    ///    ([`crate::graph::unified::storage::metadata::NodeMetadataStore::is_synthetic`]).
1919    /// 2. The structural name-shape fallback
1920    ///    ([`crate::graph::unified::storage::arena::NodeEntry::is_synthetic_placeholder_name`])
1921    ///    for V10 snapshots written before the synthetic bit existed
1922    ///    and for cross-file unification losers that retained their
1923    ///    name but lost their metadata entry.
1924    ///
1925    /// Either check matching is sufficient to suppress the node. The
1926    /// design lives in `docs/development/public-issue-triage/`
1927    /// under the C_SUPPRESS unit; see also the rationale in
1928    /// [`crate::graph::unified::storage::metadata::NodeMetadata::Synthetic`].
1929    ///
1930    /// `include_synthetic = true` is **internal-only**. The binding
1931    /// plane, scope resolver, and rebuild's coverage gate use this
1932    /// path to reach synthetic nodes for their structural integrity
1933    /// checks. **No CLI / MCP surface should ever pass `true`.**
1934    #[must_use]
1935    pub fn find_by_pattern_with_options(
1936        &self,
1937        pattern: &str,
1938        include_synthetic: bool,
1939    ) -> Vec<crate::graph::unified::node::NodeId> {
1940        let mut matches = Vec::new();
1941
1942        // 1. Scan unique strings in interner for matches
1943        for (str_id, s) in self.strings.iter() {
1944            if s.contains(pattern) {
1945                // 2. If string matches, look up all nodes with this name
1946                // Check qualified name index
1947                matches.extend_from_slice(self.indices.by_qualified_name(str_id));
1948                // Check simple name index
1949                matches.extend_from_slice(self.indices.by_name(str_id));
1950            }
1951        }
1952
1953        // Deduplicate matches (a node might match both qualified and simple name)
1954        matches.sort_unstable();
1955        matches.dedup();
1956
1957        if !include_synthetic {
1958            matches.retain(|&node_id| !self.is_node_synthetic(node_id));
1959        }
1960
1961        matches
1962    }
1963
1964    /// Finds nodes whose interned simple **or** qualified name equals
1965    /// `name` byte-for-byte (case-sensitive).
1966    ///
1967    /// This is the canonical surface for **exact-name** lookups —
1968    /// shared by the CLI `--exact <pattern>` shorthand
1969    /// (`sqry-cli/src/commands/search.rs::run_regular_search`) and the
1970    /// structural query planner's `name:` predicate
1971    /// (`sqry-db/src/planner/parse.rs`,
1972    /// `sqry-db/src/planner/execute.rs`). Both surfaces are
1973    /// contract-bound (DAG `B1_ALIGN`) to return the same set against
1974    /// any fixture: the CLI calls this method directly, while the
1975    /// planner uses the same interner + by-name index pair internally
1976    /// when scanning, then applies the same synthetic filter.
1977    ///
1978    /// **Synthetic suppression.** Synthetic placeholder nodes
1979    /// (Go-plugin `<field:operand.field>` shadows and
1980    /// `<ident>@<offset>` per-binding-site Variables; see
1981    /// [`Self::find_by_pattern_with_options`] for the full taxonomy)
1982    /// are excluded via [`Self::is_node_synthetic`]. There is **no**
1983    /// `include_synthetic = true` variant for the exact-match surface
1984    /// because the synthetic name shapes the structural fallback
1985    /// recognises (`<…>`, `…@<offset>`) cannot equal a user-typed
1986    /// name byte-for-byte; the metadata-bit channel is the only
1987    /// realistic leak vector and it is suppressed unconditionally.
1988    ///
1989    /// # Performance
1990    ///
1991    /// `O(1)` interner lookup + `O(matches)` filter. If `name` is not
1992    /// interned the result is empty without scanning any nodes.
1993    ///
1994    /// # Arguments
1995    ///
1996    /// * `name` - The exact name to look up (no glob, no regex).
1997    ///
1998    /// # Returns
1999    ///
2000    /// Sorted, deduplicated `NodeId`s for every non-synthetic node
2001    /// whose `entry.name` or `entry.qualified_name` equals `name`.
2002    /// Returns an empty vector when `name` is not interned (i.e. no
2003    /// node could possibly carry that name).
2004    #[must_use]
2005    pub fn find_by_exact_name(&self, name: &str) -> Vec<crate::graph::unified::node::NodeId> {
2006        // Resolve the input to a StringId. If the interner doesn't know
2007        // this string, no node could possibly carry it as a name.
2008        let Some(str_id) = self.strings.get(name) else {
2009            return Vec::new();
2010        };
2011
2012        // Pull the by-name and by-qualified-name index slices for that
2013        // StringId. Both indices are pre-built and slice-shaped.
2014        let mut matches: Vec<crate::graph::unified::node::NodeId> = Vec::new();
2015        matches.extend_from_slice(self.indices.by_name(str_id));
2016        matches.extend_from_slice(self.indices.by_qualified_name(str_id));
2017
2018        matches.sort_unstable();
2019        matches.dedup();
2020
2021        // Same default-off synthetic filter as `find_by_pattern`. There
2022        // is no `include_synthetic` toggle on the exact-match surface
2023        // — see the doc comment above for the rationale.
2024        matches.retain(|&node_id| !self.is_node_synthetic(node_id));
2025
2026        matches
2027    }
2028
2029    /// Returns `true` if the node should be treated as a synthetic
2030    /// placeholder for user-facing surfaces.
2031    ///
2032    /// Combines the metadata-store flag and the structural name-shape
2033    /// fallback (see [`Self::find_by_pattern_with_options`] for the
2034    /// full rationale). Returns `false` for missing nodes (an unknown
2035    /// `NodeId` is not "synthetic" — it is "not present").
2036    #[must_use]
2037    pub fn is_node_synthetic(&self, node_id: crate::graph::unified::node::NodeId) -> bool {
2038        // Authoritative check: metadata-store bit (NodeMetadata::Synthetic).
2039        if self.macro_metadata.is_synthetic(node_id) {
2040            return true;
2041        }
2042        // Structural fallback: name shape recognised as synthetic.
2043        // Required for V10 snapshots written before the bit existed,
2044        // for unification losers that lost their metadata entry, and
2045        // as defence-in-depth against future plugins forgetting to
2046        // flip the bit.
2047        let Some(entry) = self.nodes.get(node_id) else {
2048            return false;
2049        };
2050        if entry.is_unified_loser() {
2051            // Already invisible for other reasons; do not also flag as synthetic.
2052            return false;
2053        }
2054        let Some(name) = self.strings.resolve(entry.name) else {
2055            return false;
2056        };
2057        crate::graph::unified::storage::arena::NodeEntry::is_synthetic_placeholder_name(
2058            name.as_ref(),
2059        )
2060    }
2061
2062    /// Gets all callees of a node (functions called by this node).
2063    ///
2064    /// Queries the forward edge store for all Calls edges from this node.
2065    ///
2066    /// # Arguments
2067    ///
2068    /// * `node` - The node ID to query
2069    ///
2070    /// # Returns
2071    ///
2072    /// A vector of `NodeIds` representing functions called by this node.
2073    #[must_use]
2074    pub fn get_callees(
2075        &self,
2076        node: crate::graph::unified::node::NodeId,
2077    ) -> Vec<crate::graph::unified::node::NodeId> {
2078        use crate::graph::unified::edge::EdgeKind;
2079
2080        self.edges
2081            .edges_from(node)
2082            .into_iter()
2083            .filter(|edge| matches!(edge.kind, EdgeKind::Calls { .. }))
2084            .map(|edge| edge.target)
2085            .collect()
2086    }
2087
2088    /// Gets all callers of a node (functions that call this node).
2089    ///
2090    /// Queries the reverse edge store for all Calls edges to this node.
2091    ///
2092    /// # Arguments
2093    ///
2094    /// * `node` - The node ID to query
2095    ///
2096    /// # Returns
2097    ///
2098    /// A vector of `NodeIds` representing functions that call this node.
2099    #[must_use]
2100    pub fn get_callers(
2101        &self,
2102        node: crate::graph::unified::node::NodeId,
2103    ) -> Vec<crate::graph::unified::node::NodeId> {
2104        use crate::graph::unified::edge::EdgeKind;
2105
2106        self.edges
2107            .edges_to(node)
2108            .into_iter()
2109            .filter(|edge| matches!(edge.kind, EdgeKind::Calls { .. }))
2110            .map(|edge| edge.source)
2111            .collect()
2112    }
2113
2114    /// Iterates over all nodes in the graph.
2115    ///
2116    /// Returns an iterator yielding (`NodeId`, &`NodeEntry`) pairs for all
2117    /// occupied slots in the arena.
2118    ///
2119    /// # Returns
2120    ///
2121    /// An iterator over (`NodeId`, &`NodeEntry`) pairs.
2122    pub fn iter_nodes(
2123        &self,
2124    ) -> impl Iterator<
2125        Item = (
2126            crate::graph::unified::node::NodeId,
2127            &crate::graph::unified::storage::arena::NodeEntry,
2128        ),
2129    > {
2130        self.nodes.iter()
2131    }
2132
2133    /// Iterates over all edges in the graph.
2134    ///
2135    /// Returns an iterator yielding (source, target, `EdgeKind`) tuples for
2136    /// all edges in the forward edge store.
2137    ///
2138    /// # Returns
2139    ///
2140    /// An iterator over edge tuples.
2141    pub fn iter_edges(
2142        &self,
2143    ) -> impl Iterator<
2144        Item = (
2145            crate::graph::unified::node::NodeId,
2146            crate::graph::unified::node::NodeId,
2147            crate::graph::unified::edge::EdgeKind,
2148        ),
2149    > + '_ {
2150        // Iterate over all nodes in the arena and get their outgoing edges
2151        self.nodes.iter().flat_map(move |(node_id, _entry)| {
2152            // Get all edges from this node
2153            self.edges
2154                .edges_from(node_id)
2155                .into_iter()
2156                .map(move |edge| (node_id, edge.target, edge.kind))
2157        })
2158    }
2159
2160    /// Gets a node entry by ID.
2161    ///
2162    /// Returns a reference to the `NodeEntry` if the ID is valid, or None
2163    /// if the ID is invalid or stale.
2164    ///
2165    /// # Arguments
2166    ///
2167    /// * `id` - The node ID to look up
2168    ///
2169    /// # Returns
2170    ///
2171    /// A reference to the `NodeEntry`, or None if not found.
2172    #[must_use]
2173    pub fn get_node(
2174        &self,
2175        id: crate::graph::unified::node::NodeId,
2176    ) -> Option<&crate::graph::unified::storage::arena::NodeEntry> {
2177        self.nodes.get(id)
2178    }
2179}
2180
2181impl fmt::Debug for GraphSnapshot {
2182    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2183        f.debug_struct("GraphSnapshot")
2184            .field("nodes", &self.nodes.len())
2185            .field("epoch", &self.epoch)
2186            .finish_non_exhaustive()
2187    }
2188}
2189
2190/// Read-only accessor trait shared by [`CodeGraph`] and [`GraphSnapshot`].
2191///
2192/// This lets helpers that only *read* graph state (name-matching, relation
2193/// traversal, reference lookups) be written once and called from both the
2194/// live `CodeGraph` path in `sqry-core::query::executor::graph_eval` and
2195/// the snapshot-based path in `sqry-db::queries::*`. No mutation is exposed.
2196pub trait GraphAccess {
2197    /// Returns the node arena (read-only).
2198    fn nodes(&self) -> &NodeArena;
2199    /// Returns the bidirectional edge store (read-only).
2200    fn edges(&self) -> &BidirectionalEdgeStore;
2201    /// Returns the string interner (read-only).
2202    fn strings(&self) -> &StringInterner;
2203    /// Returns the file registry (read-only).
2204    fn files(&self) -> &FileRegistry;
2205    /// Returns the auxiliary indices (read-only).
2206    fn indices(&self) -> &AuxiliaryIndices;
2207}
2208
2209impl GraphAccess for CodeGraph {
2210    #[inline]
2211    fn nodes(&self) -> &NodeArena {
2212        CodeGraph::nodes(self)
2213    }
2214    #[inline]
2215    fn edges(&self) -> &BidirectionalEdgeStore {
2216        CodeGraph::edges(self)
2217    }
2218    #[inline]
2219    fn strings(&self) -> &StringInterner {
2220        CodeGraph::strings(self)
2221    }
2222    #[inline]
2223    fn files(&self) -> &FileRegistry {
2224        CodeGraph::files(self)
2225    }
2226    #[inline]
2227    fn indices(&self) -> &AuxiliaryIndices {
2228        CodeGraph::indices(self)
2229    }
2230}
2231
2232impl GraphAccess for GraphSnapshot {
2233    #[inline]
2234    fn nodes(&self) -> &NodeArena {
2235        GraphSnapshot::nodes(self)
2236    }
2237    #[inline]
2238    fn edges(&self) -> &BidirectionalEdgeStore {
2239        GraphSnapshot::edges(self)
2240    }
2241    #[inline]
2242    fn strings(&self) -> &StringInterner {
2243        GraphSnapshot::strings(self)
2244    }
2245    #[inline]
2246    fn files(&self) -> &FileRegistry {
2247        GraphSnapshot::files(self)
2248    }
2249    #[inline]
2250    fn indices(&self) -> &AuxiliaryIndices {
2251        GraphSnapshot::indices(self)
2252    }
2253}
2254
2255#[cfg(test)]
2256mod tests {
2257    use super::*;
2258    use crate::graph::unified::{
2259        FileScope, NodeId, ResolutionMode, SymbolCandidateOutcome, SymbolQuery,
2260        SymbolResolutionOutcome,
2261    };
2262
2263    fn resolve_symbol_strict(snapshot: &GraphSnapshot, symbol: &str) -> Option<NodeId> {
2264        match snapshot.resolve_symbol(&SymbolQuery {
2265            symbol,
2266            file_scope: FileScope::Any,
2267            mode: ResolutionMode::Strict,
2268        }) {
2269            SymbolResolutionOutcome::Resolved(node_id) => Some(node_id),
2270            SymbolResolutionOutcome::NotFound
2271            | SymbolResolutionOutcome::FileNotIndexed
2272            | SymbolResolutionOutcome::Ambiguous(_) => None,
2273        }
2274    }
2275
2276    fn candidate_nodes(snapshot: &GraphSnapshot, symbol: &str) -> Vec<NodeId> {
2277        match snapshot.find_symbol_candidates(&SymbolQuery {
2278            symbol,
2279            file_scope: FileScope::Any,
2280            mode: ResolutionMode::AllowSuffixCandidates,
2281        }) {
2282            SymbolCandidateOutcome::Candidates(candidates) => candidates,
2283            SymbolCandidateOutcome::NotFound | SymbolCandidateOutcome::FileNotIndexed => Vec::new(),
2284        }
2285    }
2286
2287    #[test]
2288    fn test_code_graph_new() {
2289        let graph = CodeGraph::new();
2290        assert_eq!(graph.epoch(), 0);
2291        assert_eq!(graph.nodes().len(), 0);
2292    }
2293
2294    #[test]
2295    fn test_code_graph_default() {
2296        let graph = CodeGraph::default();
2297        assert_eq!(graph.epoch(), 0);
2298    }
2299
2300    #[test]
2301    fn test_code_graph_snapshot() {
2302        let graph = CodeGraph::new();
2303        let snapshot = graph.snapshot();
2304        assert_eq!(snapshot.epoch(), 0);
2305        assert_eq!(snapshot.nodes().len(), 0);
2306    }
2307
2308    #[test]
2309    fn test_code_graph_bump_epoch() {
2310        let mut graph = CodeGraph::new();
2311        assert_eq!(graph.epoch(), 0);
2312        assert_eq!(graph.bump_epoch(), 1);
2313        assert_eq!(graph.epoch(), 1);
2314        assert_eq!(graph.bump_epoch(), 2);
2315        assert_eq!(graph.epoch(), 2);
2316    }
2317
2318    #[test]
2319    fn test_code_graph_set_epoch() {
2320        let mut graph = CodeGraph::new();
2321        graph.set_epoch(42);
2322        assert_eq!(graph.epoch(), 42);
2323    }
2324
2325    #[test]
2326    fn test_code_graph_from_components() {
2327        let nodes = NodeArena::new();
2328        let edges = BidirectionalEdgeStore::new();
2329        let strings = StringInterner::new();
2330        let files = FileRegistry::new();
2331        let indices = AuxiliaryIndices::new();
2332        let macro_metadata = NodeMetadataStore::new();
2333
2334        let graph =
2335            CodeGraph::from_components(nodes, edges, strings, files, indices, macro_metadata);
2336        assert_eq!(graph.epoch(), 0);
2337    }
2338
2339    #[test]
2340    fn test_code_graph_mut_accessors() {
2341        let mut graph = CodeGraph::new();
2342
2343        // Access mutable references - should not panic
2344        let _nodes = graph.nodes_mut();
2345        let _edges = graph.edges_mut();
2346        let _strings = graph.strings_mut();
2347        let _files = graph.files_mut();
2348        let _indices = graph.indices_mut();
2349    }
2350
2351    #[test]
2352    fn test_code_graph_snapshot_isolation() {
2353        let mut graph = CodeGraph::new();
2354        let snapshot1 = graph.snapshot();
2355
2356        // Mutate the graph
2357        graph.bump_epoch();
2358
2359        let snapshot2 = graph.snapshot();
2360
2361        // Snapshots should have different epochs
2362        assert_eq!(snapshot1.epoch(), 0);
2363        assert_eq!(snapshot2.epoch(), 1);
2364    }
2365
2366    #[test]
2367    fn test_code_graph_debug() {
2368        let graph = CodeGraph::new();
2369        let debug_str = format!("{graph:?}");
2370        assert!(debug_str.contains("CodeGraph"));
2371        assert!(debug_str.contains("epoch"));
2372    }
2373
2374    /// Exact-byte regression for the iter2 fix of CodeGraph.confidence
2375    /// accounting: adding a ConfidenceMetadata entry with known inner
2376    /// Vec<String> capacities must increase heap_bytes() by exactly the sum
2377    /// of those capacities plus Vec<String> slot overhead.
2378    #[test]
2379    fn test_codegraph_heap_bytes_counts_confidence_inner_strings() {
2380        let mut graph = CodeGraph::new();
2381        // Seed one confidence entry so the HashMap has non-zero capacity;
2382        // reserve slack so the next insert cannot rehash.
2383        graph.set_confidence({
2384            let mut m = HashMap::with_capacity(8);
2385            m.insert("seed".to_string(), ConfidenceMetadata::default());
2386            m
2387        });
2388        let before = graph.heap_bytes();
2389        let before_cap = graph.confidence.capacity();
2390
2391        let lim1 = String::from("no type inference");
2392        let lim2 = String::from("no generic specialization");
2393        let feat1 = String::from("rust-analyzer");
2394        let l1 = lim1.capacity();
2395        let l2 = lim2.capacity();
2396        let f1 = feat1.capacity();
2397
2398        let limitations = vec![lim1, lim2];
2399        let lim_vec_cap = limitations.capacity();
2400
2401        let unavailable_features = vec![feat1];
2402        let feat_vec_cap = unavailable_features.capacity();
2403
2404        let key = String::from("rust");
2405        let key_cap = key.capacity();
2406        graph.confidence.insert(
2407            key,
2408            ConfidenceMetadata {
2409                limitations,
2410                unavailable_features,
2411                ..Default::default()
2412            },
2413        );
2414        assert_eq!(
2415            graph.confidence.capacity(),
2416            before_cap,
2417            "prerequisite: confidence HashMap must not rehash during the test insert",
2418        );
2419
2420        let after = graph.heap_bytes();
2421        let expected_inner = key_cap
2422            + lim_vec_cap * std::mem::size_of::<String>()
2423            + l1
2424            + l2
2425            + feat_vec_cap * std::mem::size_of::<String>()
2426            + f1;
2427        assert_eq!(
2428            after - before,
2429            expected_inner,
2430            "CodeGraph::heap_bytes must count ConfidenceMetadata inner Vec<String> capacity exactly",
2431        );
2432    }
2433
2434    #[test]
2435    fn test_codegraph_heap_bytes_grows_with_content() {
2436        use crate::graph::unified::node::NodeKind;
2437        use crate::graph::unified::storage::arena::NodeEntry;
2438        use std::path::Path;
2439
2440        // An empty graph reports some heap bytes (FileRegistry seeds index 0
2441        // with `vec![None]`, HashMaps have non-zero base capacity once touched,
2442        // etc.) but the value must be finite and well under the 100 MB cap.
2443        let empty = CodeGraph::new();
2444        let empty_bytes = empty.heap_bytes();
2445        assert!(
2446            empty_bytes < 100 * 1024 * 1024,
2447            "empty graph heap_bytes should be <100 MiB, got {empty_bytes}"
2448        );
2449
2450        let mut graph = CodeGraph::new();
2451        for i in 0..32u32 {
2452            let name = format!("sym_{i}");
2453            let qual = format!("module::sym_{i}");
2454            let file = format!("file_{i}.rs");
2455
2456            let name_id = graph.strings_mut().intern(&name).unwrap();
2457            let qual_id = graph.strings_mut().intern(&qual).unwrap();
2458            let file_id = graph.files_mut().register(Path::new(&file)).unwrap();
2459
2460            let entry =
2461                NodeEntry::new(NodeKind::Function, name_id, file_id).with_qualified_name(qual_id);
2462            let node_id = graph.nodes_mut().alloc(entry).unwrap();
2463            graph
2464                .indices_mut()
2465                .add(node_id, NodeKind::Function, name_id, Some(qual_id), file_id);
2466        }
2467
2468        let populated_bytes = graph.heap_bytes();
2469        assert!(
2470            populated_bytes > 0,
2471            "populated graph should report nonzero heap bytes"
2472        );
2473        assert!(
2474            populated_bytes > empty_bytes,
2475            "populated graph ({populated_bytes}) should exceed empty graph ({empty_bytes})"
2476        );
2477        assert!(
2478            populated_bytes < 100 * 1024 * 1024,
2479            "test graph heap_bytes should be <100 MiB, got {populated_bytes}"
2480        );
2481    }
2482
2483    #[test]
2484    fn test_concurrent_code_graph_new() {
2485        let graph = ConcurrentCodeGraph::new();
2486        assert_eq!(graph.epoch(), 0);
2487    }
2488
2489    #[test]
2490    fn test_concurrent_code_graph_default() {
2491        let graph = ConcurrentCodeGraph::default();
2492        assert_eq!(graph.epoch(), 0);
2493    }
2494
2495    #[test]
2496    fn test_concurrent_code_graph_from_graph() {
2497        let mut inner = CodeGraph::new();
2498        inner.set_epoch(10);
2499        let graph = ConcurrentCodeGraph::from_graph(inner);
2500        assert_eq!(graph.epoch(), 10);
2501    }
2502
2503    #[test]
2504    fn test_concurrent_code_graph_read() {
2505        let graph = ConcurrentCodeGraph::new();
2506        let guard = graph.read();
2507        assert_eq!(guard.epoch(), 0);
2508        assert_eq!(guard.nodes().len(), 0);
2509    }
2510
2511    #[test]
2512    fn test_concurrent_code_graph_write_increments_epoch() {
2513        let graph = ConcurrentCodeGraph::new();
2514        assert_eq!(graph.epoch(), 0);
2515
2516        {
2517            let guard = graph.write();
2518            assert_eq!(guard.epoch(), 1);
2519        }
2520
2521        assert_eq!(graph.epoch(), 1);
2522
2523        {
2524            let _guard = graph.write();
2525        }
2526
2527        assert_eq!(graph.epoch(), 2);
2528    }
2529
2530    #[test]
2531    fn test_concurrent_code_graph_snapshot() {
2532        let graph = ConcurrentCodeGraph::new();
2533
2534        {
2535            let _guard = graph.write();
2536        }
2537
2538        let snapshot = graph.snapshot();
2539        assert_eq!(snapshot.epoch(), 1);
2540    }
2541
2542    #[test]
2543    fn test_concurrent_code_graph_try_read() {
2544        let graph = ConcurrentCodeGraph::new();
2545        let guard = graph.try_read();
2546        assert!(guard.is_some());
2547    }
2548
2549    #[test]
2550    fn test_concurrent_code_graph_try_write() {
2551        let graph = ConcurrentCodeGraph::new();
2552        let guard = graph.try_write();
2553        assert!(guard.is_some());
2554        assert_eq!(graph.epoch(), 1);
2555    }
2556
2557    #[test]
2558    fn test_concurrent_code_graph_debug() {
2559        let graph = ConcurrentCodeGraph::new();
2560        let debug_str = format!("{graph:?}");
2561        assert!(debug_str.contains("ConcurrentCodeGraph"));
2562        assert!(debug_str.contains("epoch"));
2563    }
2564
2565    #[test]
2566    fn test_graph_snapshot_accessors() {
2567        let graph = CodeGraph::new();
2568        let snapshot = graph.snapshot();
2569
2570        // All accessors should work
2571        let _nodes = snapshot.nodes();
2572        let _edges = snapshot.edges();
2573        let _strings = snapshot.strings();
2574        let _files = snapshot.files();
2575        let _indices = snapshot.indices();
2576        let _epoch = snapshot.epoch();
2577    }
2578
2579    #[test]
2580    fn test_graph_snapshot_epoch_matches() {
2581        let graph = CodeGraph::new();
2582        let snapshot = graph.snapshot();
2583
2584        assert!(snapshot.epoch_matches(0));
2585        assert!(!snapshot.epoch_matches(1));
2586    }
2587
2588    #[test]
2589    fn test_graph_snapshot_clone() {
2590        let graph = CodeGraph::new();
2591        let snapshot1 = graph.snapshot();
2592        let snapshot2 = snapshot1.clone();
2593
2594        assert_eq!(snapshot1.epoch(), snapshot2.epoch());
2595    }
2596
2597    #[test]
2598    fn test_graph_snapshot_debug() {
2599        let graph = CodeGraph::new();
2600        let snapshot = graph.snapshot();
2601        let debug_str = format!("{snapshot:?}");
2602        assert!(debug_str.contains("GraphSnapshot"));
2603        assert!(debug_str.contains("epoch"));
2604    }
2605
2606    #[test]
2607    fn test_multiple_readers() {
2608        let graph = ConcurrentCodeGraph::new();
2609
2610        // Multiple readers should be able to acquire locks simultaneously
2611        let guard1 = graph.read();
2612        let guard2 = graph.read();
2613        let guard3 = graph.read();
2614
2615        assert_eq!(guard1.epoch(), 0);
2616        assert_eq!(guard2.epoch(), 0);
2617        assert_eq!(guard3.epoch(), 0);
2618    }
2619
2620    #[test]
2621    fn test_code_graph_clone() {
2622        let mut graph = CodeGraph::new();
2623        graph.bump_epoch();
2624
2625        let cloned = graph.clone();
2626        assert_eq!(cloned.epoch(), 1);
2627    }
2628
2629    #[test]
2630    fn test_epoch_wrapping() {
2631        let mut graph = CodeGraph::new();
2632        graph.set_epoch(u64::MAX);
2633        let new_epoch = graph.bump_epoch();
2634        assert_eq!(new_epoch, 0); // Should wrap around
2635    }
2636
2637    // ============================================================================
2638    // Query method tests
2639    // ============================================================================
2640
2641    #[test]
2642    fn test_snapshot_resolve_symbol() {
2643        use crate::graph::unified::node::NodeKind;
2644        use crate::graph::unified::storage::arena::NodeEntry;
2645        use std::path::Path;
2646
2647        let mut graph = CodeGraph::new();
2648
2649        // Add some nodes with qualified names
2650        let name_id = graph.strings_mut().intern("test_func").unwrap();
2651        let qual_name_id = graph.strings_mut().intern("module::test_func").unwrap();
2652        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
2653
2654        let entry =
2655            NodeEntry::new(NodeKind::Function, name_id, file_id).with_qualified_name(qual_name_id);
2656
2657        let node_id = graph.nodes_mut().alloc(entry).unwrap();
2658        graph.indices_mut().add(
2659            node_id,
2660            NodeKind::Function,
2661            name_id,
2662            Some(qual_name_id),
2663            file_id,
2664        );
2665
2666        let snapshot = graph.snapshot();
2667
2668        // Find by qualified name
2669        let found = resolve_symbol_strict(&snapshot, "module::test_func");
2670        assert_eq!(found, Some(node_id));
2671
2672        // Find by exact simple name
2673        let found2 = resolve_symbol_strict(&snapshot, "test_func");
2674        assert_eq!(found2, Some(node_id));
2675
2676        // Not found
2677        assert!(resolve_symbol_strict(&snapshot, "nonexistent").is_none());
2678    }
2679
2680    #[test]
2681    fn test_snapshot_find_by_pattern() {
2682        use crate::graph::unified::node::NodeKind;
2683        use crate::graph::unified::storage::arena::NodeEntry;
2684        use std::path::Path;
2685
2686        let mut graph = CodeGraph::new();
2687
2688        // Add nodes with different names
2689        let name1 = graph.strings_mut().intern("foo_bar").unwrap();
2690        let name2 = graph.strings_mut().intern("baz_bar").unwrap();
2691        let name3 = graph.strings_mut().intern("qux_test").unwrap();
2692        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
2693
2694        let node1 = graph
2695            .nodes_mut()
2696            .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
2697            .unwrap();
2698        let node2 = graph
2699            .nodes_mut()
2700            .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
2701            .unwrap();
2702        let node3 = graph
2703            .nodes_mut()
2704            .alloc(NodeEntry::new(NodeKind::Function, name3, file_id))
2705            .unwrap();
2706
2707        graph
2708            .indices_mut()
2709            .add(node1, NodeKind::Function, name1, None, file_id);
2710        graph
2711            .indices_mut()
2712            .add(node2, NodeKind::Function, name2, None, file_id);
2713        graph
2714            .indices_mut()
2715            .add(node3, NodeKind::Function, name3, None, file_id);
2716
2717        let snapshot = graph.snapshot();
2718
2719        // Find by pattern
2720        let matches = snapshot.find_by_pattern("bar");
2721        assert_eq!(matches.len(), 2);
2722        assert!(matches.contains(&node1));
2723        assert!(matches.contains(&node2));
2724
2725        // Find single match
2726        let matches = snapshot.find_by_pattern("qux");
2727        assert_eq!(matches.len(), 1);
2728        assert_eq!(matches[0], node3);
2729
2730        // No matches
2731        let matches = snapshot.find_by_pattern("nonexistent");
2732        assert!(matches.is_empty());
2733    }
2734
2735    #[test]
2736    fn synthetic_nodes_are_filtered_from_find_by_pattern_default() {
2737        // C_SUPPRESS: synthetic placeholder nodes (Go plugin
2738        // `<field:operand.field>` shadows + `<ident>@<offset>`
2739        // per-binding-site Variables) must NOT surface from
2740        // find_by_pattern, but must still be reachable via
2741        // find_by_pattern_with_options(_, true).
2742        use crate::graph::unified::node::NodeKind;
2743        use crate::graph::unified::storage::arena::NodeEntry;
2744        use std::path::Path;
2745
2746        let mut graph = CodeGraph::new();
2747        let real_property = graph
2748            .strings_mut()
2749            .intern("main.SelectorSource.NeedTags")
2750            .unwrap();
2751        let real_local_var = graph.strings_mut().intern("NeedTags").unwrap();
2752        let synthetic_field = graph
2753            .strings_mut()
2754            .intern("<field:selector.NeedTags>")
2755            .unwrap();
2756        let synthetic_offset_a = graph.strings_mut().intern("NeedTags@469").unwrap();
2757        let synthetic_offset_b = graph.strings_mut().intern("NeedTags@508").unwrap();
2758        let file_id = graph.files_mut().register(Path::new("main.go")).unwrap();
2759
2760        let prop_id = graph
2761            .nodes_mut()
2762            .alloc(NodeEntry::new(NodeKind::Property, real_property, file_id))
2763            .unwrap();
2764        let local_var_id = graph
2765            .nodes_mut()
2766            .alloc(NodeEntry::new(NodeKind::Variable, real_local_var, file_id))
2767            .unwrap();
2768        let syn_field_id = graph
2769            .nodes_mut()
2770            .alloc(NodeEntry::new(NodeKind::Variable, synthetic_field, file_id))
2771            .unwrap();
2772        let syn_a_id = graph
2773            .nodes_mut()
2774            .alloc(NodeEntry::new(
2775                NodeKind::Variable,
2776                synthetic_offset_a,
2777                file_id,
2778            ))
2779            .unwrap();
2780        let syn_b_id = graph
2781            .nodes_mut()
2782            .alloc(NodeEntry::new(
2783                NodeKind::Variable,
2784                synthetic_offset_b,
2785                file_id,
2786            ))
2787            .unwrap();
2788
2789        graph
2790            .indices_mut()
2791            .add(prop_id, NodeKind::Property, real_property, None, file_id);
2792        graph.indices_mut().add(
2793            local_var_id,
2794            NodeKind::Variable,
2795            real_local_var,
2796            None,
2797            file_id,
2798        );
2799        graph.indices_mut().add(
2800            syn_field_id,
2801            NodeKind::Variable,
2802            synthetic_field,
2803            None,
2804            file_id,
2805        );
2806        graph.indices_mut().add(
2807            syn_a_id,
2808            NodeKind::Variable,
2809            synthetic_offset_a,
2810            None,
2811            file_id,
2812        );
2813        graph.indices_mut().add(
2814            syn_b_id,
2815            NodeKind::Variable,
2816            synthetic_offset_b,
2817            None,
2818            file_id,
2819        );
2820
2821        // Flag two of them via the metadata-store bit (the canonical
2822        // Go-plugin emission path) and leave one (`<field:...>`) only
2823        // covered by the structural name-shape fallback to verify both
2824        // recognition channels suppress the leak.
2825        graph.macro_metadata_mut().mark_synthetic(syn_a_id);
2826        graph.macro_metadata_mut().mark_synthetic(syn_b_id);
2827
2828        let snapshot = graph.snapshot();
2829
2830        // Default surface (CLI `search --exact`, MCP, LSP): no synthetics.
2831        let matches = snapshot.find_by_pattern("NeedTags");
2832        assert!(matches.contains(&prop_id), "Property must be surfaced");
2833        assert!(
2834            matches.contains(&local_var_id),
2835            "real local var must be surfaced"
2836        );
2837        assert!(
2838            !matches.contains(&syn_field_id),
2839            "<field:...> synthetic must be suppressed (name-shape fallback)"
2840        );
2841        assert!(
2842            !matches.contains(&syn_a_id),
2843            "NeedTags@469 must be suppressed (metadata bit)"
2844        );
2845        assert!(
2846            !matches.contains(&syn_b_id),
2847            "NeedTags@508 must be suppressed (metadata bit)"
2848        );
2849        assert_eq!(matches.len(), 2, "exactly Property + local var, no leakage");
2850
2851        // Internal include-synthetic surface (binding plane, scope analysis):
2852        // every node remains reachable.
2853        let all_matches = snapshot.find_by_pattern_with_options("NeedTags", true);
2854        assert_eq!(
2855            all_matches.len(),
2856            5,
2857            "include_synthetic surfaces everything"
2858        );
2859        assert!(all_matches.contains(&prop_id));
2860        assert!(all_matches.contains(&local_var_id));
2861        assert!(all_matches.contains(&syn_field_id));
2862        assert!(all_matches.contains(&syn_a_id));
2863        assert!(all_matches.contains(&syn_b_id));
2864
2865        // is_node_synthetic exposed for surface-level filters
2866        // (e.g., MCP semantic_search/relation_query post-filters).
2867        assert!(snapshot.is_node_synthetic(syn_field_id));
2868        assert!(snapshot.is_node_synthetic(syn_a_id));
2869        assert!(snapshot.is_node_synthetic(syn_b_id));
2870        assert!(!snapshot.is_node_synthetic(prop_id));
2871        assert!(!snapshot.is_node_synthetic(local_var_id));
2872    }
2873
2874    #[test]
2875    fn find_by_exact_name_aligns_with_planner_name_predicate() {
2876        // B1_ALIGN: `find_by_exact_name("NeedTags")` is the canonical
2877        // surface for the CLI `--exact NeedTags` shorthand and the
2878        // planner's `name:NeedTags` predicate. Both paths must return
2879        // the same set against this fixture.
2880        use crate::graph::unified::node::NodeKind;
2881        use crate::graph::unified::storage::arena::NodeEntry;
2882        use std::path::Path;
2883
2884        let mut graph = CodeGraph::new();
2885        // Property nodes carry the package-qualified name as
2886        // `entry.name` (Go plugin convention; see `helper.rs`'s
2887        // `semantic_name_for_node_input`).
2888        let property_qname = graph
2889            .strings_mut()
2890            .intern("main.SelectorSource.NeedTags")
2891            .unwrap();
2892        let local_var_name = graph.strings_mut().intern("NeedTags").unwrap();
2893        let synthetic_field_name = graph
2894            .strings_mut()
2895            .intern("<field:selector.NeedTags>")
2896            .unwrap();
2897        let synthetic_offset_name = graph.strings_mut().intern("NeedTags@469").unwrap();
2898        let unrelated_name = graph.strings_mut().intern("NeedTagsHelper").unwrap();
2899        let file_id = graph.files_mut().register(Path::new("main.go")).unwrap();
2900
2901        let prop_id = graph
2902            .nodes_mut()
2903            .alloc(NodeEntry::new(NodeKind::Property, property_qname, file_id))
2904            .unwrap();
2905        let local_var_id = graph
2906            .nodes_mut()
2907            .alloc(NodeEntry::new(NodeKind::Variable, local_var_name, file_id))
2908            .unwrap();
2909        let syn_field_id = graph
2910            .nodes_mut()
2911            .alloc(NodeEntry::new(
2912                NodeKind::Variable,
2913                synthetic_field_name,
2914                file_id,
2915            ))
2916            .unwrap();
2917        let syn_offset_id = graph
2918            .nodes_mut()
2919            .alloc(NodeEntry::new(
2920                NodeKind::Variable,
2921                synthetic_offset_name,
2922                file_id,
2923            ))
2924            .unwrap();
2925        let unrelated_id = graph
2926            .nodes_mut()
2927            .alloc(NodeEntry::new(NodeKind::Function, unrelated_name, file_id))
2928            .unwrap();
2929
2930        graph
2931            .indices_mut()
2932            .add(prop_id, NodeKind::Property, property_qname, None, file_id);
2933        graph.indices_mut().add(
2934            local_var_id,
2935            NodeKind::Variable,
2936            local_var_name,
2937            None,
2938            file_id,
2939        );
2940        graph.indices_mut().add(
2941            syn_field_id,
2942            NodeKind::Variable,
2943            synthetic_field_name,
2944            None,
2945            file_id,
2946        );
2947        graph.indices_mut().add(
2948            syn_offset_id,
2949            NodeKind::Variable,
2950            synthetic_offset_name,
2951            None,
2952            file_id,
2953        );
2954        graph.indices_mut().add(
2955            unrelated_id,
2956            NodeKind::Function,
2957            unrelated_name,
2958            None,
2959            file_id,
2960        );
2961
2962        // Mark the offset-suffixed synthetic via the metadata bit so we
2963        // exercise both recognition channels in this fixture.
2964        graph.macro_metadata_mut().mark_synthetic(syn_offset_id);
2965
2966        let snapshot = graph.snapshot();
2967
2968        // Exact-name lookup on "NeedTags" — should pick up only the
2969        // local variable (its `entry.name` is exactly "NeedTags"); it
2970        // must NOT pick up the Property (qualified name contains but
2971        // does not equal "NeedTags") and must NOT pick up either
2972        // synthetic placeholder.
2973        let exact = snapshot.find_by_exact_name("NeedTags");
2974        assert_eq!(
2975            exact,
2976            vec![local_var_id],
2977            "exact match must be byte-for-byte against entry.name / qualified_name and exclude synthetics"
2978        );
2979
2980        // The Property's full qualified name is exact-addressable.
2981        let qualified = snapshot.find_by_exact_name("main.SelectorSource.NeedTags");
2982        assert_eq!(qualified, vec![prop_id]);
2983
2984        // Substring-only matches must not surface from exact lookup.
2985        assert!(
2986            snapshot
2987                .find_by_exact_name("NeedTagsHelper")
2988                .contains(&unrelated_id)
2989        );
2990        assert!(
2991            !snapshot
2992                .find_by_exact_name("NeedTags")
2993                .contains(&unrelated_id),
2994            "exact 'NeedTags' must not match 'NeedTagsHelper'"
2995        );
2996
2997        // Unknown name short-circuits to an empty vec without
2998        // scanning any nodes.
2999        assert!(
3000            snapshot
3001                .find_by_exact_name("ThisStringIsNotInterned")
3002                .is_empty()
3003        );
3004    }
3005
3006    #[test]
3007    fn test_snapshot_get_callees() {
3008        use crate::graph::unified::edge::EdgeKind;
3009        use crate::graph::unified::node::NodeKind;
3010        use crate::graph::unified::storage::arena::NodeEntry;
3011        use std::path::Path;
3012
3013        let mut graph = CodeGraph::new();
3014
3015        // Create caller and callee nodes
3016        let caller_name = graph.strings_mut().intern("caller").unwrap();
3017        let callee1_name = graph.strings_mut().intern("callee1").unwrap();
3018        let callee2_name = graph.strings_mut().intern("callee2").unwrap();
3019        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3020
3021        let caller_id = graph
3022            .nodes_mut()
3023            .alloc(NodeEntry::new(NodeKind::Function, caller_name, file_id))
3024            .unwrap();
3025        let callee1_id = graph
3026            .nodes_mut()
3027            .alloc(NodeEntry::new(NodeKind::Function, callee1_name, file_id))
3028            .unwrap();
3029        let callee2_id = graph
3030            .nodes_mut()
3031            .alloc(NodeEntry::new(NodeKind::Function, callee2_name, file_id))
3032            .unwrap();
3033
3034        // Add call edges
3035        graph.edges_mut().add_edge(
3036            caller_id,
3037            callee1_id,
3038            EdgeKind::Calls {
3039                argument_count: 0,
3040                is_async: false,
3041            },
3042            file_id,
3043        );
3044        graph.edges_mut().add_edge(
3045            caller_id,
3046            callee2_id,
3047            EdgeKind::Calls {
3048                argument_count: 0,
3049                is_async: false,
3050            },
3051            file_id,
3052        );
3053
3054        let snapshot = graph.snapshot();
3055
3056        // Query callees
3057        let callees = snapshot.get_callees(caller_id);
3058        assert_eq!(callees.len(), 2);
3059        assert!(callees.contains(&callee1_id));
3060        assert!(callees.contains(&callee2_id));
3061
3062        // Node with no callees
3063        let callees = snapshot.get_callees(callee1_id);
3064        assert!(callees.is_empty());
3065    }
3066
3067    #[test]
3068    fn test_snapshot_get_callers() {
3069        use crate::graph::unified::edge::EdgeKind;
3070        use crate::graph::unified::node::NodeKind;
3071        use crate::graph::unified::storage::arena::NodeEntry;
3072        use std::path::Path;
3073
3074        let mut graph = CodeGraph::new();
3075
3076        // Create caller and callee nodes
3077        let caller1_name = graph.strings_mut().intern("caller1").unwrap();
3078        let caller2_name = graph.strings_mut().intern("caller2").unwrap();
3079        let callee_name = graph.strings_mut().intern("callee").unwrap();
3080        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3081
3082        let caller1_id = graph
3083            .nodes_mut()
3084            .alloc(NodeEntry::new(NodeKind::Function, caller1_name, file_id))
3085            .unwrap();
3086        let caller2_id = graph
3087            .nodes_mut()
3088            .alloc(NodeEntry::new(NodeKind::Function, caller2_name, file_id))
3089            .unwrap();
3090        let callee_id = graph
3091            .nodes_mut()
3092            .alloc(NodeEntry::new(NodeKind::Function, callee_name, file_id))
3093            .unwrap();
3094
3095        // Add call edges
3096        graph.edges_mut().add_edge(
3097            caller1_id,
3098            callee_id,
3099            EdgeKind::Calls {
3100                argument_count: 0,
3101                is_async: false,
3102            },
3103            file_id,
3104        );
3105        graph.edges_mut().add_edge(
3106            caller2_id,
3107            callee_id,
3108            EdgeKind::Calls {
3109                argument_count: 0,
3110                is_async: false,
3111            },
3112            file_id,
3113        );
3114
3115        let snapshot = graph.snapshot();
3116
3117        // Query callers
3118        let callers = snapshot.get_callers(callee_id);
3119        assert_eq!(callers.len(), 2);
3120        assert!(callers.contains(&caller1_id));
3121        assert!(callers.contains(&caller2_id));
3122
3123        // Node with no callers
3124        let callers = snapshot.get_callers(caller1_id);
3125        assert!(callers.is_empty());
3126    }
3127
3128    #[test]
3129    fn test_snapshot_find_symbol_candidates() {
3130        use crate::graph::unified::node::NodeKind;
3131        use crate::graph::unified::storage::arena::NodeEntry;
3132        use std::path::Path;
3133
3134        let mut graph = CodeGraph::new();
3135
3136        // Add nodes with same symbol name but different qualified names
3137        let symbol_name = graph.strings_mut().intern("test").unwrap();
3138        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3139
3140        let node1 = graph
3141            .nodes_mut()
3142            .alloc(NodeEntry::new(NodeKind::Function, symbol_name, file_id))
3143            .unwrap();
3144        let node2 = graph
3145            .nodes_mut()
3146            .alloc(NodeEntry::new(NodeKind::Method, symbol_name, file_id))
3147            .unwrap();
3148
3149        // Add a different symbol
3150        let other_name = graph.strings_mut().intern("other").unwrap();
3151        let node3 = graph
3152            .nodes_mut()
3153            .alloc(NodeEntry::new(NodeKind::Function, other_name, file_id))
3154            .unwrap();
3155
3156        graph
3157            .indices_mut()
3158            .add(node1, NodeKind::Function, symbol_name, None, file_id);
3159        graph
3160            .indices_mut()
3161            .add(node2, NodeKind::Method, symbol_name, None, file_id);
3162        graph
3163            .indices_mut()
3164            .add(node3, NodeKind::Function, other_name, None, file_id);
3165
3166        let snapshot = graph.snapshot();
3167
3168        // Find by symbol
3169        let matches = candidate_nodes(&snapshot, "test");
3170        assert_eq!(matches.len(), 2);
3171        assert!(matches.contains(&node1));
3172        assert!(matches.contains(&node2));
3173
3174        // Find other symbol
3175        let matches = candidate_nodes(&snapshot, "other");
3176        assert_eq!(matches.len(), 1);
3177        assert_eq!(matches[0], node3);
3178
3179        // No matches
3180        let matches = candidate_nodes(&snapshot, "nonexistent");
3181        assert!(matches.is_empty());
3182    }
3183
3184    #[test]
3185    fn test_snapshot_iter_nodes() {
3186        use crate::graph::unified::node::NodeKind;
3187        use crate::graph::unified::storage::arena::NodeEntry;
3188        use std::path::Path;
3189
3190        let mut graph = CodeGraph::new();
3191
3192        // Add some nodes
3193        let name1 = graph.strings_mut().intern("func1").unwrap();
3194        let name2 = graph.strings_mut().intern("func2").unwrap();
3195        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3196
3197        let node1 = graph
3198            .nodes_mut()
3199            .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
3200            .unwrap();
3201        let node2 = graph
3202            .nodes_mut()
3203            .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
3204            .unwrap();
3205
3206        let snapshot = graph.snapshot();
3207
3208        // Iterate nodes
3209        let snapshot_nodes: Vec<_> = snapshot.iter_nodes().collect();
3210        assert_eq!(snapshot_nodes.len(), 2);
3211
3212        let node_ids: Vec<_> = snapshot_nodes.iter().map(|(id, _)| *id).collect();
3213        assert!(node_ids.contains(&node1));
3214        assert!(node_ids.contains(&node2));
3215    }
3216
3217    #[test]
3218    fn test_snapshot_iter_edges() {
3219        use crate::graph::unified::edge::EdgeKind;
3220        use crate::graph::unified::node::NodeKind;
3221        use crate::graph::unified::storage::arena::NodeEntry;
3222        use std::path::Path;
3223
3224        let mut graph = CodeGraph::new();
3225
3226        // Create nodes
3227        let name1 = graph.strings_mut().intern("func1").unwrap();
3228        let name2 = graph.strings_mut().intern("func2").unwrap();
3229        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3230
3231        let node1 = graph
3232            .nodes_mut()
3233            .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
3234            .unwrap();
3235        let node2 = graph
3236            .nodes_mut()
3237            .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
3238            .unwrap();
3239
3240        // Add edges
3241        graph.edges_mut().add_edge(
3242            node1,
3243            node2,
3244            EdgeKind::Calls {
3245                argument_count: 0,
3246                is_async: false,
3247            },
3248            file_id,
3249        );
3250
3251        let snapshot = graph.snapshot();
3252
3253        // Iterate edges
3254        let edges: Vec<_> = snapshot.iter_edges().collect();
3255        assert_eq!(edges.len(), 1);
3256
3257        let (src, tgt, kind) = &edges[0];
3258        assert_eq!(*src, node1);
3259        assert_eq!(*tgt, node2);
3260        assert!(matches!(
3261            kind,
3262            EdgeKind::Calls {
3263                argument_count: 0,
3264                is_async: false
3265            }
3266        ));
3267    }
3268
3269    #[test]
3270    fn test_snapshot_get_node() {
3271        use crate::graph::unified::node::NodeId;
3272        use crate::graph::unified::node::NodeKind;
3273        use crate::graph::unified::storage::arena::NodeEntry;
3274        use std::path::Path;
3275
3276        let mut graph = CodeGraph::new();
3277
3278        // Add a node
3279        let name = graph.strings_mut().intern("test_func").unwrap();
3280        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3281
3282        let node_id = graph
3283            .nodes_mut()
3284            .alloc(NodeEntry::new(NodeKind::Function, name, file_id))
3285            .unwrap();
3286
3287        let snapshot = graph.snapshot();
3288
3289        // Get node
3290        let entry = snapshot.get_node(node_id);
3291        assert!(entry.is_some());
3292        assert_eq!(entry.unwrap().kind, NodeKind::Function);
3293
3294        // Invalid node
3295        let invalid_id = NodeId::INVALID;
3296        assert!(snapshot.get_node(invalid_id).is_none());
3297    }
3298
3299    #[test]
3300    fn test_snapshot_query_empty_graph() {
3301        use crate::graph::unified::node::NodeId;
3302
3303        let graph = CodeGraph::new();
3304        let snapshot = graph.snapshot();
3305
3306        // All queries should return empty on empty graph
3307        assert!(resolve_symbol_strict(&snapshot, "test").is_none());
3308        assert!(snapshot.find_by_pattern("test").is_empty());
3309        assert!(candidate_nodes(&snapshot, "test").is_empty());
3310
3311        let dummy_id = NodeId::new(0, 1);
3312        assert!(snapshot.get_callees(dummy_id).is_empty());
3313        assert!(snapshot.get_callers(dummy_id).is_empty());
3314
3315        assert_eq!(snapshot.iter_nodes().count(), 0);
3316        assert_eq!(snapshot.iter_edges().count(), 0);
3317    }
3318
3319    #[test]
3320    fn test_snapshot_edge_filtering_by_kind() {
3321        use crate::graph::unified::edge::EdgeKind;
3322        use crate::graph::unified::node::NodeKind;
3323        use crate::graph::unified::storage::arena::NodeEntry;
3324        use std::path::Path;
3325
3326        let mut graph = CodeGraph::new();
3327
3328        // Create nodes
3329        let name1 = graph.strings_mut().intern("func1").unwrap();
3330        let name2 = graph.strings_mut().intern("func2").unwrap();
3331        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3332
3333        let node1 = graph
3334            .nodes_mut()
3335            .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
3336            .unwrap();
3337        let node2 = graph
3338            .nodes_mut()
3339            .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
3340            .unwrap();
3341
3342        // Add different kinds of edges
3343        graph.edges_mut().add_edge(
3344            node1,
3345            node2,
3346            EdgeKind::Calls {
3347                argument_count: 0,
3348                is_async: false,
3349            },
3350            file_id,
3351        );
3352        graph
3353            .edges_mut()
3354            .add_edge(node1, node2, EdgeKind::References, file_id);
3355
3356        let snapshot = graph.snapshot();
3357
3358        // get_callees should only return Calls edges
3359        let callees = snapshot.get_callees(node1);
3360        assert_eq!(callees.len(), 1);
3361        assert_eq!(callees[0], node2);
3362
3363        // iter_edges returns all edges regardless of kind
3364        let edges: Vec<_> = snapshot.iter_edges().collect();
3365        assert_eq!(edges.len(), 2);
3366    }
3367
3368    // -------- reverse_import_index tests --------
3369
3370    /// Helper: build an empty graph with the given file paths registered, a
3371    /// single placeholder node allocated per file, and indices rebuilt so
3372    /// `by_file` returns the per-file node sets. Returns the file IDs in
3373    /// the order passed in and the per-file node ID.
3374    #[cfg(test)]
3375    fn build_import_test_graph(files: &[&str]) -> (CodeGraph, Vec<FileId>, Vec<NodeId>) {
3376        use crate::graph::unified::node::NodeKind;
3377        use crate::graph::unified::storage::arena::NodeEntry;
3378        use std::path::Path;
3379
3380        let mut graph = CodeGraph::new();
3381        let placeholder_name = graph.strings_mut().intern("sym").unwrap();
3382        let mut file_ids = Vec::with_capacity(files.len());
3383        let mut node_ids = Vec::with_capacity(files.len());
3384        for path in files {
3385            let file_id = graph.files_mut().register(Path::new(path)).unwrap();
3386            let node_id = graph
3387                .nodes_mut()
3388                .alloc(NodeEntry::new(
3389                    NodeKind::Function,
3390                    placeholder_name,
3391                    file_id,
3392                ))
3393                .unwrap();
3394            file_ids.push(file_id);
3395            node_ids.push(node_id);
3396        }
3397        graph.rebuild_indices();
3398        (graph, file_ids, node_ids)
3399    }
3400
3401    /// Helper: add an `Imports` edge from `source_node` (in importer file) to
3402    /// `target_node` (in exporter file). The edge is recorded against the
3403    /// importer file so `clear_file` cleanup would behave identically to
3404    /// production Pass 4 writes.
3405    #[cfg(test)]
3406    fn add_import_edge(
3407        graph: &mut CodeGraph,
3408        source_node: NodeId,
3409        target_node: NodeId,
3410        importer_file: FileId,
3411    ) {
3412        graph.edges_mut().add_edge(
3413            source_node,
3414            target_node,
3415            EdgeKind::Imports {
3416                alias: None,
3417                is_wildcard: false,
3418            },
3419            importer_file,
3420        );
3421    }
3422
3423    #[test]
3424    fn reverse_import_index_empty_graph_returns_empty() {
3425        let (graph, files, _) = build_import_test_graph(&["only.rs"]);
3426        assert!(graph.reverse_import_index(files[0]).is_empty());
3427    }
3428
3429    #[test]
3430    fn reverse_import_index_single_importer() {
3431        // File A imports a symbol exported by file B. Reverse index of B
3432        // should return exactly [A]; reverse index of A should be empty.
3433        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3434        let (a, b) = (files[0], files[1]);
3435        add_import_edge(&mut graph, nodes[0], nodes[1], a);
3436
3437        let importers_of_b = graph.reverse_import_index(b);
3438        assert_eq!(importers_of_b, vec![a]);
3439
3440        let importers_of_a = graph.reverse_import_index(a);
3441        assert!(
3442            importers_of_a.is_empty(),
3443            "A has no inbound Imports edges; reverse index must be empty"
3444        );
3445    }
3446
3447    #[test]
3448    fn reverse_import_index_multiple_importers_deduped_and_sorted() {
3449        // A, B, C all import from D. Reverse index of D must contain each
3450        // importer exactly once, sorted ascending by FileId.
3451        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs", "c.rs", "d.rs"]);
3452        let (a, b, c, d) = (files[0], files[1], files[2], files[3]);
3453        add_import_edge(&mut graph, nodes[0], nodes[3], a);
3454        add_import_edge(&mut graph, nodes[1], nodes[3], b);
3455        add_import_edge(&mut graph, nodes[2], nodes[3], c);
3456        // Add a duplicate edge from A to D to confirm dedup behavior.
3457        add_import_edge(&mut graph, nodes[0], nodes[3], a);
3458
3459        let importers_of_d = graph.reverse_import_index(d);
3460        assert_eq!(importers_of_d, vec![a, b, c]);
3461        // Sort invariant: Vec is ascending by raw index.
3462        let mut sorted = importers_of_d.clone();
3463        sorted.sort();
3464        assert_eq!(importers_of_d, sorted);
3465    }
3466
3467    #[test]
3468    fn reverse_import_index_filters_non_import_edges() {
3469        // A `Calls` edge from A into B must not contribute to B's reverse
3470        // import index. Only `EdgeKind::Imports` edges count.
3471        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3472        let (a, b) = (files[0], files[1]);
3473        graph.edges_mut().add_edge(
3474            nodes[0],
3475            nodes[1],
3476            EdgeKind::Calls {
3477                argument_count: 0,
3478                is_async: false,
3479            },
3480            a,
3481        );
3482        graph
3483            .edges_mut()
3484            .add_edge(nodes[0], nodes[1], EdgeKind::References, a);
3485
3486        assert!(
3487            graph.reverse_import_index(b).is_empty(),
3488            "non-Imports edges must not register as importers"
3489        );
3490    }
3491
3492    #[test]
3493    fn reverse_import_index_elides_self_imports() {
3494        // An Imports edge whose source and target are both in the same file
3495        // is a self-import; the caller's own file must not appear in its own
3496        // reverse index.
3497        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs"]);
3498        let a = files[0];
3499        // Add a second node in the same file so we have a distinct source.
3500        let name2 = graph.strings_mut().intern("sym2").unwrap();
3501        let second_in_a = graph
3502            .nodes_mut()
3503            .alloc(crate::graph::unified::storage::arena::NodeEntry::new(
3504                crate::graph::unified::node::NodeKind::Function,
3505                name2,
3506                a,
3507            ))
3508            .unwrap();
3509        graph.rebuild_indices();
3510        add_import_edge(&mut graph, second_in_a, nodes[0], a);
3511
3512        assert!(
3513            graph.reverse_import_index(a).is_empty(),
3514            "self-imports must be elided from reverse index"
3515        );
3516    }
3517
3518    #[test]
3519    fn reverse_import_index_mixed_edge_kinds_counts_only_imports() {
3520        // Two files: A has both Calls and Imports edges into B. Reverse
3521        // index must return exactly [A] — the Calls edge contributes
3522        // nothing.
3523        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3524        let (a, b) = (files[0], files[1]);
3525        add_import_edge(&mut graph, nodes[0], nodes[1], a);
3526        graph.edges_mut().add_edge(
3527            nodes[0],
3528            nodes[1],
3529            EdgeKind::Calls {
3530                argument_count: 0,
3531                is_async: false,
3532            },
3533            a,
3534        );
3535
3536        assert_eq!(graph.reverse_import_index(b), vec![a]);
3537    }
3538
3539    #[test]
3540    fn reverse_import_index_uninitialized_file_returns_empty() {
3541        // Querying a FileId that is not registered in the graph must return
3542        // an empty Vec, not panic.
3543        let (graph, _, _) = build_import_test_graph(&["a.rs"]);
3544        let bogus = FileId::new(9999);
3545        assert!(
3546            graph.reverse_import_index(bogus).is_empty(),
3547            "unknown FileId must return empty Vec without panicking"
3548        );
3549    }
3550
3551    #[test]
3552    fn reverse_import_index_skips_tombstoned_source_nodes() {
3553        // In the incremental rebuild path the prior graph retains tombstoned
3554        // arena slots for nodes belonging to closure files that have been
3555        // removed but not yet fully compacted. reverse_import_index is
3556        // called on that prior graph to widen the closure, so its
3557        // tombstone guard (`let Some(source_entry) = self.nodes.get(...)`)
3558        // is a semantically important branch, not a theoretical one.
3559        // This test exercises it directly by tombstoning the source node of
3560        // an Imports edge and asserting the edge silently disappears from
3561        // the reverse index.
3562        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3563        let (a, b) = (files[0], files[1]);
3564        add_import_edge(&mut graph, nodes[0], nodes[1], a);
3565        // Sanity: before tombstoning, A shows up as the importer of B.
3566        assert_eq!(graph.reverse_import_index(b), vec![a]);
3567        // Tombstone the source node via the arena (generation bump). The
3568        // Imports edge still exists in the edge store but its source NodeId
3569        // is now stale; `nodes.get(edge_ref.source)` returns None and the
3570        // guard skips the edge.
3571        let removed = graph.nodes_mut().remove(nodes[0]);
3572        assert!(
3573            removed.is_some(),
3574            "arena.remove must succeed for a live node"
3575        );
3576        assert!(
3577            graph.nodes().get(nodes[0]).is_none(),
3578            "tombstoned lookup must return None"
3579        );
3580        assert!(
3581            graph.reverse_import_index(b).is_empty(),
3582            "Imports edges whose source is tombstoned must be silently skipped"
3583        );
3584    }
3585
3586    // ------------------------------------------------------------------
3587    // Task 4 Step 2 — CodeGraph::remove_file
3588    // ------------------------------------------------------------------
3589
3590    /// Seed a graph with 2 files × `per_file` nodes per file, plus a set
3591    /// of intra- and inter-file edges. Each call site produces a
3592    /// canonical topology so tests below can assert bit-level on edge
3593    /// survival. Returns `(graph, file_a, file_b, file_a_nodes,
3594    /// file_b_nodes)`.
3595    fn seed_two_file_graph(
3596        per_file: usize,
3597    ) -> (
3598        CodeGraph,
3599        crate::graph::unified::file::FileId,
3600        crate::graph::unified::file::FileId,
3601        Vec<NodeId>,
3602        Vec<NodeId>,
3603    ) {
3604        use crate::graph::unified::edge::EdgeKind;
3605        use crate::graph::unified::node::NodeKind;
3606        use crate::graph::unified::storage::arena::NodeEntry;
3607        use std::path::Path;
3608
3609        let mut graph = CodeGraph::new();
3610        let sym = graph.strings_mut().intern("sym").expect("intern");
3611        let file_a = graph
3612            .files_mut()
3613            .register(Path::new("/tmp/remove_file_test/a.rs"))
3614            .expect("register a");
3615        let file_b = graph
3616            .files_mut()
3617            .register(Path::new("/tmp/remove_file_test/b.rs"))
3618            .expect("register b");
3619
3620        let mut file_a_nodes = Vec::with_capacity(per_file);
3621        let mut file_b_nodes = Vec::with_capacity(per_file);
3622
3623        for _ in 0..per_file {
3624            let n = graph
3625                .nodes_mut()
3626                .alloc(NodeEntry::new(NodeKind::Function, sym, file_a))
3627                .expect("alloc a-node");
3628            file_a_nodes.push(n);
3629            graph.files_mut().record_node(file_a, n);
3630            graph
3631                .indices_mut()
3632                .add(n, NodeKind::Function, sym, None, file_a);
3633        }
3634        for _ in 0..per_file {
3635            let n = graph
3636                .nodes_mut()
3637                .alloc(NodeEntry::new(NodeKind::Function, sym, file_b))
3638                .expect("alloc b-node");
3639            file_b_nodes.push(n);
3640            graph.files_mut().record_node(file_b, n);
3641            graph
3642                .indices_mut()
3643                .add(n, NodeKind::Function, sym, None, file_b);
3644        }
3645
3646        // Intra-file edges inside each file: pairwise a[i] -> a[i+1],
3647        // b[i] -> b[i+1]. These are the ones that must die when the
3648        // corresponding file is removed.
3649        for i in 0..per_file.saturating_sub(1) {
3650            graph.edges_mut().add_edge(
3651                file_a_nodes[i],
3652                file_a_nodes[i + 1],
3653                EdgeKind::Calls {
3654                    argument_count: 0,
3655                    is_async: false,
3656                },
3657                file_a,
3658            );
3659            graph.edges_mut().add_edge(
3660                file_b_nodes[i],
3661                file_b_nodes[i + 1],
3662                EdgeKind::Calls {
3663                    argument_count: 0,
3664                    is_async: false,
3665                },
3666                file_b,
3667            );
3668        }
3669        // Cross-file edges: a[0] -> b[0], b[0] -> a[0]. Both must die
3670        // when *either* endpoint's file is removed (plan §F.2).
3671        graph.edges_mut().add_edge(
3672            file_a_nodes[0],
3673            file_b_nodes[0],
3674            EdgeKind::Calls {
3675                argument_count: 0,
3676                is_async: false,
3677            },
3678            file_a,
3679        );
3680        graph.edges_mut().add_edge(
3681            file_b_nodes[0],
3682            file_a_nodes[0],
3683            EdgeKind::Calls {
3684                argument_count: 0,
3685                is_async: false,
3686            },
3687            file_b,
3688        );
3689
3690        (graph, file_a, file_b, file_a_nodes, file_b_nodes)
3691    }
3692
3693    #[test]
3694    fn code_graph_remove_file_tombstones_all_per_file_nodes() {
3695        let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
3696
3697        let returned = graph.remove_file(file_a);
3698
3699        // Returned list must equal the original per-file bucket
3700        // membership, deterministically.
3701        let returned_set: std::collections::HashSet<NodeId> = returned.iter().copied().collect();
3702        let expected_set: std::collections::HashSet<NodeId> =
3703            file_a_nodes.iter().copied().collect();
3704        assert_eq!(
3705            returned_set, expected_set,
3706            "remove_file must return exactly the file_a nodes drained from the bucket"
3707        );
3708
3709        // Every returned NodeId is gone from the arena.
3710        for nid in &file_a_nodes {
3711            assert!(
3712                graph.nodes().get(*nid).is_none(),
3713                "node {nid:?} from removed file must be tombstoned in arena"
3714            );
3715        }
3716    }
3717
3718    #[test]
3719    fn code_graph_remove_file_invalidates_all_edges_sourced_or_targeted_at_removed_nodes() {
3720        use crate::graph::unified::edge::EdgeKind;
3721
3722        let (mut graph, file_a, _file_b, file_a_nodes, file_b_nodes) = seed_two_file_graph(3);
3723
3724        // Sanity: the seed has 2 intra-A edges, 2 intra-B edges, plus
3725        // 2 cross-file edges (a0↔b0).
3726        let before_delta = graph.edges().stats().forward.delta_edge_count;
3727        assert_eq!(
3728            before_delta, 6,
3729            "seed must produce 2 intra-A + 2 intra-B + 2 cross edges"
3730        );
3731
3732        let _ = graph.remove_file(file_a);
3733
3734        // Forward delta after removal: all 2 intra-A edges are gone,
3735        // both cross edges are gone, only the 2 intra-B edges remain.
3736        let after_delta_forward = graph.edges().stats().forward.delta_edge_count;
3737        assert_eq!(
3738            after_delta_forward, 2,
3739            "only intra-B forward edges must remain after removing file_a"
3740        );
3741        let after_delta_reverse = graph.edges().stats().reverse.delta_edge_count;
3742        assert_eq!(
3743            after_delta_reverse, 2,
3744            "only intra-B reverse edges must remain after removing file_a"
3745        );
3746
3747        // Cross-file edge from b[0] -> a[0] must no longer be visible
3748        // from either direction, because a[0] is tombstoned.
3749        let b0 = file_b_nodes[0];
3750        let a0 = file_a_nodes[0];
3751        let remaining_from_b0: Vec<_> = graph
3752            .edges()
3753            .edges_from(b0)
3754            .into_iter()
3755            .filter(|e| {
3756                matches!(
3757                    e.kind,
3758                    EdgeKind::Calls {
3759                        argument_count: 0,
3760                        is_async: false
3761                    }
3762                )
3763            })
3764            .collect();
3765        assert!(
3766            !remaining_from_b0.iter().any(|e| e.target == a0),
3767            "edge b0 -> a0 must be gone after remove_file(file_a)"
3768        );
3769        let remaining_to_a0: Vec<_> = graph.edges().edges_to(a0).into_iter().collect();
3770        assert!(
3771            remaining_to_a0.is_empty(),
3772            "every edge targeting the tombstoned a0 must be gone"
3773        );
3774    }
3775
3776    #[test]
3777    fn code_graph_remove_file_drops_file_registry_entry() {
3778        let (mut graph, file_a, _file_b, _, _) = seed_two_file_graph(2);
3779
3780        assert!(
3781            graph.files().resolve(file_a).is_some(),
3782            "seed registered file_a"
3783        );
3784        assert!(
3785            !graph.files().nodes_for_file(file_a).is_empty(),
3786            "seed populated the file_a bucket"
3787        );
3788
3789        let _ = graph.remove_file(file_a);
3790
3791        assert!(
3792            graph.files().resolve(file_a).is_none(),
3793            "FileRegistry::resolve must return None after remove_file"
3794        );
3795        assert!(
3796            graph.files().nodes_for_file(file_a).is_empty(),
3797            "per-file bucket for file_a must be drained"
3798        );
3799    }
3800
3801    #[test]
3802    fn code_graph_remove_file_is_idempotent_on_unknown_file() {
3803        use crate::graph::unified::file::FileId;
3804        let (mut graph, _file_a, _file_b, _, _) = seed_two_file_graph(2);
3805
3806        // Snapshot state before idempotent no-op.
3807        let nodes_before = graph.nodes().len();
3808        let delta_fwd_before = graph.edges().stats().forward.delta_edge_count;
3809        let delta_rev_before = graph.edges().stats().reverse.delta_edge_count;
3810        let files_before = graph.files().len();
3811
3812        // A FileId that was never registered: the caller may legitimately
3813        // receive a bogus id from stale indexing state. `remove_file`
3814        // must be a silent no-op.
3815        let bogus = FileId::new(9999);
3816        let returned = graph.remove_file(bogus);
3817        assert!(
3818            returned.is_empty(),
3819            "remove_file on unknown FileId must return an empty Vec"
3820        );
3821
3822        assert_eq!(graph.nodes().len(), nodes_before, "arena count unchanged");
3823        assert_eq!(
3824            graph.edges().stats().forward.delta_edge_count,
3825            delta_fwd_before,
3826            "forward delta unchanged"
3827        );
3828        assert_eq!(
3829            graph.edges().stats().reverse.delta_edge_count,
3830            delta_rev_before,
3831            "reverse delta unchanged"
3832        );
3833        assert_eq!(graph.files().len(), files_before, "file count unchanged");
3834    }
3835
3836    #[test]
3837    fn code_graph_remove_file_clears_file_segments_entry() {
3838        // Iter-1 Codex review fix: a file's `FileSegmentTable` entry
3839        // must be cleared on `remove_file`. Without this, a later
3840        // `FileId` recycle (via `FileRegistry::free_list`) would
3841        // inherit the previous file's stale node-range and
3842        // `reindex_files` (`build/reindex.rs`) would tombstone the
3843        // wrong slots. This unit test seeds a segment entry, removes
3844        // the file, and asserts the entry is gone.
3845        use crate::graph::unified::storage::segment::FileSegmentTable;
3846
3847        let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
3848
3849        // Seed a segment for file A. Production code sets this via
3850        // Phase 3 parallel commit; here we go through the crate-
3851        // internal accessor because we are a unit test inside the
3852        // crate. (The feature-gated `test_only_record_file_segment`
3853        // public helper exists for the integration tests in
3854        // `sqry-core/tests/incremental_remove_file_scale.rs`.)
3855        let first_index = file_a_nodes
3856            .iter()
3857            .map(|n| n.index())
3858            .min()
3859            .expect("per_file = 3");
3860        let last_index = file_a_nodes
3861            .iter()
3862            .map(|n| n.index())
3863            .max()
3864            .expect("per_file = 3");
3865        let slot_count = last_index - first_index + 1;
3866        let table: &mut FileSegmentTable = graph.file_segments_mut();
3867        table.record_range(file_a, first_index, slot_count);
3868        assert!(
3869            graph.file_segments().get(file_a).is_some(),
3870            "seed must install a segment for file_a before remove_file"
3871        );
3872
3873        // Remove the file.
3874        let _ = graph.remove_file(file_a);
3875
3876        // The segment entry must be gone. `FileSegmentTable::remove`
3877        // is idempotent (a no-op on unknown ids), so this assertion
3878        // holds whether or not the seeded range was contiguous.
3879        assert!(
3880            graph.file_segments().get(file_a).is_none(),
3881            "remove_file must clear the FileSegmentTable entry for file_a"
3882        );
3883    }
3884
3885    #[test]
3886    fn code_graph_remove_file_repeated_calls_are_idempotent() {
3887        let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
3888
3889        // First call does the work.
3890        let first = graph.remove_file(file_a);
3891        assert_eq!(first.len(), file_a_nodes.len());
3892
3893        // Snapshot post-first-call state.
3894        let nodes_after = graph.nodes().len();
3895        let delta_fwd_after = graph.edges().stats().forward.delta_edge_count;
3896        let delta_rev_after = graph.edges().stats().reverse.delta_edge_count;
3897        let files_after = graph.files().len();
3898
3899        // Second call must be a silent no-op — the bucket is empty,
3900        // the file is unregistered, and the arena slots are already
3901        // tombstoned (NodeArena::remove ignores stale generations).
3902        let second = graph.remove_file(file_a);
3903        assert!(
3904            second.is_empty(),
3905            "second remove_file on the same file must return an empty Vec"
3906        );
3907
3908        assert_eq!(graph.nodes().len(), nodes_after);
3909        assert_eq!(
3910            graph.edges().stats().forward.delta_edge_count,
3911            delta_fwd_after
3912        );
3913        assert_eq!(
3914            graph.edges().stats().reverse.delta_edge_count,
3915            delta_rev_after
3916        );
3917        assert_eq!(graph.files().len(), files_after);
3918    }
3919}