Skip to main content

sqry_core/graph/unified/concurrent/
graph.rs

1//! `CodeGraph` and `ConcurrentCodeGraph` implementations.
2//!
3//! This module provides the core graph types with thread-safe access:
4//!
5//! - [`CodeGraph`]: Arc-wrapped internals for O(1) `CoW` snapshots
6//! - [`ConcurrentCodeGraph`]: `RwLock` wrapper with epoch versioning
7//! - [`GraphSnapshot`]: Immutable snapshot for long-running queries
8
9use std::collections::{HashMap, HashSet};
10use std::fmt;
11use std::sync::Arc;
12use std::sync::atomic::{AtomicU64, Ordering};
13
14use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard};
15
16use crate::confidence::ConfidenceMetadata;
17use crate::graph::unified::bind::alias::AliasTable;
18use crate::graph::unified::bind::scope::provenance::{
19    ScopeProvenance, ScopeProvenanceStore, ScopeStableId,
20};
21use crate::graph::unified::bind::scope::{ScopeArena, ScopeId};
22use crate::graph::unified::bind::shadow::ShadowTable;
23use crate::graph::unified::edge::EdgeKind;
24#[cfg(test)]
25use crate::graph::unified::edge::ResolvedVia;
26use crate::graph::unified::edge::bidirectional::BidirectionalEdgeStore;
27use crate::graph::unified::file::FileId;
28use crate::graph::unified::memory::{GraphMemorySize, HASHMAP_ENTRY_OVERHEAD};
29use crate::graph::unified::resolution::display_graph_qualified_name;
30use crate::graph::unified::storage::arena::NodeArena;
31use crate::graph::unified::storage::c_indirect::CIndirectSideTables;
32use crate::graph::unified::storage::edge_provenance::{EdgeProvenance, EdgeProvenanceStore};
33use crate::graph::unified::storage::indices::AuxiliaryIndices;
34use crate::graph::unified::storage::interner::StringInterner;
35use crate::graph::unified::storage::metadata::NodeMetadataStore;
36use crate::graph::unified::storage::node_provenance::{NodeProvenance, NodeProvenanceStore};
37use crate::graph::unified::storage::registry::{FileProvenanceView, FileRegistry};
38use crate::graph::unified::storage::segment::FileSegmentTable;
39use crate::graph::unified::string::id::StringId;
40
41/// Core graph with Arc-wrapped internals for O(1) `CoW` snapshots.
42///
43/// `CodeGraph` uses `Arc` for all internal components, enabling:
44/// - O(1) snapshot creation via Arc cloning
45/// - Copy-on-write semantics via `Arc::make_mut`
46/// - Memory-efficient sharing between snapshots
47///
48/// # Design
49///
50/// The Arc wrapping enables the MVCC pattern:
51/// - Readers see a consistent snapshot at the time they acquired access
52/// - Writers use `Arc::make_mut` to get exclusive copies only when mutating
53/// - Multiple snapshots can coexist without copying data
54///
55/// # Performance
56///
57/// - Snapshot creation: O(5) Arc clones ≈ <1μs
58/// - Read access: Direct Arc dereference, no locking
59/// - Write access: `Arc::make_mut` clones only if refcount > 1
60///
61/// # Phase 2 binding-plane access
62///
63/// Use the two-line snapshot pattern to access `BindingPlane`:
64///
65/// ```rust,ignore
66/// let snapshot = graph.snapshot();
67/// let plane = snapshot.binding_plane();
68/// let resolution = plane.resolve(&query);
69/// ```
70///
71/// The two-line form is intentional: `BindingPlane<'g>` borrows from
72/// `GraphSnapshot` and the explicit snapshot handle makes the MVCC lifetime
73/// visible at the call site. The full Phase 2 scope/alias/shadow and
74/// witness-bearing resolution API is exposed through `BindingPlane`.
75// Field visibility is `pub(crate)` so the Gate 0c `rebuild_graph` module
76// (A2 §H) can destructure `CodeGraph` exhaustively in `clone_for_rebuild`.
77// External crates still go through the public accessor methods below.
78#[derive(Clone)]
79pub struct CodeGraph {
80    /// Node storage with generational indices.
81    pub(crate) nodes: Arc<NodeArena>,
82    /// Bidirectional edge storage (forward + reverse).
83    pub(crate) edges: Arc<BidirectionalEdgeStore>,
84    /// String interner for symbol names.
85    pub(crate) strings: Arc<StringInterner>,
86    /// File registry for path deduplication.
87    pub(crate) files: Arc<FileRegistry>,
88    /// Auxiliary indices for fast lookup.
89    pub(crate) indices: Arc<AuxiliaryIndices>,
90    /// Sparse macro boundary metadata (keyed by full `NodeId`).
91    pub(crate) macro_metadata: Arc<NodeMetadataStore>,
92    /// Dense node provenance (Phase 1 fact layer).
93    pub(crate) node_provenance: Arc<NodeProvenanceStore>,
94    /// Dense edge provenance (Phase 1 fact layer).
95    pub(crate) edge_provenance: Arc<EdgeProvenanceStore>,
96    /// Monotonic fact-layer epoch (0 until set by the V8 persistence path).
97    pub(crate) fact_epoch: u64,
98    /// Epoch for version tracking.
99    pub(crate) epoch: u64,
100    /// Per-language confidence metadata collected during build.
101    /// Maps language name (e.g., "rust") to aggregated confidence.
102    pub(crate) confidence: HashMap<String, ConfidenceMetadata>,
103    /// Phase 2 binding-plane scope arena (populated by Phase 4e).
104    pub(crate) scope_arena: Arc<ScopeArena>,
105    /// Phase 2 binding-plane alias table (populated by Phase 4e / P2U04).
106    pub(crate) alias_table: Arc<AliasTable>,
107    /// Phase 2 binding-plane shadow table (populated by Phase 4e / P2U05).
108    pub(crate) shadow_table: Arc<ShadowTable>,
109    /// Phase 2 binding-plane scope provenance store (populated by Phase 4e / P2U11).
110    pub(crate) scope_provenance_store: Arc<ScopeProvenanceStore>,
111    /// Phase 3 file segment table mapping `FileId` to contiguous node ranges.
112    /// Populated during build Phase 3 parallel commit, persisted in V10+ snapshots.
113    pub(crate) file_segments: Arc<FileSegmentTable>,
114    /// Phase A (U09): C-only indirect-call resolver side tables.
115    ///
116    /// `None` on non-C workspaces — the parent slot stays unallocated so
117    /// non-C builds incur zero side-table overhead. Populated by Phase 3
118    /// commit (U11) and consumed by `pass5b_c_indirect_resolve` (U12).
119    /// Persisted as the V11 `Option<CIndirectSideTables>` envelope slot
120    /// (DESIGN §10.2); V10 → V11 upconvert sets this to `None`.
121    pub(crate) c_indirect_tables: Option<CIndirectSideTables>,
122    /// Build-time scratch side-channel from the Go plugin (Cluster A
123    /// of the Go T1 implements-and-promotion design).
124    ///
125    /// Populated during Phase 1 plugin parse, merged into the live
126    /// target by Phase 3 commit after `NodeId` / `StringId` remap, drained
127    /// by `pass_go_method_set_satisfaction` between Phase 4e and Pass
128    /// 5. Not part of `GraphSnapshot`, not persisted in V10 — see
129    /// `02_DESIGN` §6. Held by-value (not `Arc<…>`) because no
130    /// copy-on-write or shared-reader access is required: only the
131    /// rebuild owner mutates, only the pass consumes, then the field
132    /// is reset.
133    pub(crate) go_hints: crate::graph::unified::build::staging::GoHints,
134    /// Whether this graph carries genuine `NodeEntry.is_definition` signal
135    /// (R3 marker for the definition-signal feature).
136    ///
137    /// `true` for graphs freshly built in-process (the `GraphBuilder` path stamps
138    /// `is_definition` at declaration sites) and for graphs loaded from a
139    /// V16-or-newer snapshot. `false` for graphs loaded from a pre-V16 snapshot,
140    /// whose `is_definition` bits are all defaulted `false` (absent on the wire)
141    /// and must NOT be interpreted as "no declarations exist." Consumers that
142    /// filter on `is_definition` (e.g. items-only listings) read this marker to
143    /// decide whether the signal is trustworthy or a reindex is required.
144    pub(crate) definition_signal_present: bool,
145}
146
147impl CodeGraph {
148    /// Creates a new empty `CodeGraph`.
149    ///
150    /// # Example
151    ///
152    /// ```rust
153    /// use sqry_core::graph::unified::concurrent::CodeGraph;
154    ///
155    /// let graph = CodeGraph::new();
156    /// assert_eq!(graph.epoch(), 0);
157    /// ```
158    #[must_use]
159    pub fn new() -> Self {
160        Self {
161            nodes: Arc::new(NodeArena::new()),
162            edges: Arc::new(BidirectionalEdgeStore::new()),
163            strings: Arc::new(StringInterner::new()),
164            files: Arc::new(FileRegistry::new()),
165            indices: Arc::new(AuxiliaryIndices::new()),
166            macro_metadata: Arc::new(NodeMetadataStore::new()),
167            node_provenance: Arc::new(NodeProvenanceStore::new()),
168            edge_provenance: Arc::new(EdgeProvenanceStore::new()),
169            fact_epoch: 0,
170            epoch: 0,
171            confidence: HashMap::new(),
172            scope_arena: Arc::new(ScopeArena::new()),
173            alias_table: Arc::new(AliasTable::new()),
174            shadow_table: Arc::new(ShadowTable::new()),
175            scope_provenance_store: Arc::new(ScopeProvenanceStore::new()),
176            file_segments: Arc::new(FileSegmentTable::new()),
177            c_indirect_tables: None,
178            go_hints: crate::graph::unified::build::staging::GoHints::default(),
179            // Fresh in-process graph: the GraphBuilder path stamps is_definition.
180            definition_signal_present: true,
181        }
182    }
183
184    /// Creates a `CodeGraph` from existing components.
185    ///
186    /// This is useful when building a graph from external data or
187    /// reconstructing from serialized state.
188    #[must_use]
189    pub fn from_components(
190        nodes: NodeArena,
191        edges: BidirectionalEdgeStore,
192        strings: StringInterner,
193        files: FileRegistry,
194        indices: AuxiliaryIndices,
195        macro_metadata: NodeMetadataStore,
196    ) -> Self {
197        Self {
198            nodes: Arc::new(nodes),
199            edges: Arc::new(edges),
200            strings: Arc::new(strings),
201            files: Arc::new(files),
202            indices: Arc::new(indices),
203            macro_metadata: Arc::new(macro_metadata),
204            node_provenance: Arc::new(NodeProvenanceStore::new()),
205            edge_provenance: Arc::new(EdgeProvenanceStore::new()),
206            fact_epoch: 0,
207            epoch: 0,
208            confidence: HashMap::new(),
209            scope_arena: Arc::new(ScopeArena::new()),
210            alias_table: Arc::new(AliasTable::new()),
211            shadow_table: Arc::new(ShadowTable::new()),
212            scope_provenance_store: Arc::new(ScopeProvenanceStore::new()),
213            file_segments: Arc::new(FileSegmentTable::new()),
214            c_indirect_tables: None,
215            go_hints: crate::graph::unified::build::staging::GoHints::default(),
216            // Default to present; the load path overrides via
217            // `set_definition_signal_present` based on the snapshot format
218            // version (pre-V16 loads stamp `false`).
219            definition_signal_present: true,
220        }
221    }
222
223    /// Creates a cheap snapshot of the graph.
224    ///
225    /// This operation is O(5) Arc clones, which completes in <1μs.
226    /// The snapshot is isolated from future mutations to the original graph.
227    ///
228    /// # Example
229    ///
230    /// ```rust
231    /// use sqry_core::graph::unified::concurrent::CodeGraph;
232    ///
233    /// let graph = CodeGraph::new();
234    /// let snapshot = graph.snapshot();
235    /// // snapshot is independent of future mutations to graph
236    /// ```
237    #[must_use]
238    pub fn snapshot(&self) -> GraphSnapshot {
239        GraphSnapshot {
240            nodes: Arc::clone(&self.nodes),
241            edges: Arc::clone(&self.edges),
242            strings: Arc::clone(&self.strings),
243            files: Arc::clone(&self.files),
244            indices: Arc::clone(&self.indices),
245            macro_metadata: Arc::clone(&self.macro_metadata),
246            node_provenance: Arc::clone(&self.node_provenance),
247            edge_provenance: Arc::clone(&self.edge_provenance),
248            fact_epoch: self.fact_epoch,
249            epoch: self.epoch,
250            scope_arena: Arc::clone(&self.scope_arena),
251            alias_table: Arc::clone(&self.alias_table),
252            shadow_table: Arc::clone(&self.shadow_table),
253            scope_provenance_store: Arc::clone(&self.scope_provenance_store),
254            file_segments: Arc::clone(&self.file_segments),
255            c_indirect_tables: self.c_indirect_tables.clone(),
256            definition_signal_present: self.definition_signal_present,
257        }
258    }
259
260    /// Returns whether this graph carries genuine `is_definition` signal.
261    ///
262    /// See [`definition_signal_present`](Self::definition_signal_present) (the
263    /// field) for the full contract.
264    #[inline]
265    #[must_use]
266    pub fn definition_signal_present(&self) -> bool {
267        self.definition_signal_present
268    }
269
270    /// Sets the definition-signal marker.
271    ///
272    /// Used by the snapshot load path to stamp `false` for pre-V16 snapshots
273    /// (whose `is_definition` bits are all defaulted) and `true` for V16+.
274    #[inline]
275    pub fn set_definition_signal_present(&mut self, present: bool) {
276        self.definition_signal_present = present;
277    }
278
279    /// Returns a reference to the node arena.
280    #[inline]
281    #[must_use]
282    pub fn nodes(&self) -> &NodeArena {
283        &self.nodes
284    }
285
286    /// Returns a reference to the bidirectional edge store.
287    #[inline]
288    #[must_use]
289    pub fn edges(&self) -> &BidirectionalEdgeStore {
290        &self.edges
291    }
292
293    /// Returns a reference to the string interner.
294    #[inline]
295    #[must_use]
296    pub fn strings(&self) -> &StringInterner {
297        &self.strings
298    }
299
300    /// Returns a reference to the file registry.
301    #[inline]
302    #[must_use]
303    pub fn files(&self) -> &FileRegistry {
304        &self.files
305    }
306
307    /// Returns a reference to the auxiliary indices.
308    #[inline]
309    #[must_use]
310    pub fn indices(&self) -> &AuxiliaryIndices {
311        &self.indices
312    }
313
314    /// Returns a reference to the macro boundary metadata store.
315    #[inline]
316    #[must_use]
317    pub fn macro_metadata(&self) -> &NodeMetadataStore {
318        &self.macro_metadata
319    }
320
321    /// Returns a reference to the C indirect-call side tables, if any.
322    ///
323    /// `None` on non-C workspaces — the slot is allocated lazily and only
324    /// when the C plugin's Phase 1 instrumentation observes content worth
325    /// recording. On loaded snapshots, the V11 envelope's
326    /// `Option<CIndirectSideTables>` field round-trips into this slot
327    /// (DESIGN §10.2); V10 → V11 upconvert always sets the slot to `None`.
328    ///
329    /// `pass5b_c_indirect_resolve` (U12) consumes these tables to rewrite
330    /// synthetic indirect-call `Calls` edges into precise binding-plane /
331    /// type-match candidates.
332    #[inline]
333    #[must_use]
334    pub fn c_indirect_tables(&self) -> Option<&CIndirectSideTables> {
335        self.c_indirect_tables.as_ref()
336    }
337
338    /// Returns a mutable reference to the `Option<CIndirectSideTables>`
339    /// slot, allowing callers to install / replace / clear the side
340    /// tables.
341    ///
342    /// Mirrors the accessor pattern used for [`Self::macro_metadata_mut`]
343    /// but exposes the `Option` directly — the side tables are owned in
344    /// place, not behind an `Arc`. Callers that need to merge incremental
345    /// state into the existing tables typically do:
346    ///
347    /// ```rust,ignore
348    /// let slot = graph.c_indirect_tables_mut();
349    /// let tables = slot.get_or_insert_with(CIndirectSideTables::new);
350    /// tables.bindings_by_field.entry(key).or_default().push(entry);
351    /// ```
352    ///
353    /// Populated by the build pipeline (U11). Read-only consumers should
354    /// use [`Self::c_indirect_tables`] instead.
355    #[inline]
356    pub fn c_indirect_tables_mut(&mut self) -> &mut Option<CIndirectSideTables> {
357        &mut self.c_indirect_tables
358    }
359
360    /// Test-only: strip every Phase-A-introduced piece of state from this
361    /// graph, leaving a graph that is byte-identical (modulo persistence
362    /// header timestamps) to what a pre-Phase-A build would have produced
363    /// on the same fixture.
364    ///
365    /// Specifically:
366    ///
367    /// * Clears the [`CIndirectSideTables`] slot (sets it to `None`).
368    /// * Removes [`NodeFlags::ADDRESS_TAKEN`] and
369    ///   [`NodeFlags::CALLSITE_PROMISCUOUS`] marker bits from every
370    ///   [`StoredEntry`] in the macro-metadata store.
371    /// * Rewrites the `resolved_via` field of every [`EdgeKind::Calls`]
372    ///   edge (across CSR and delta tiers, forward and reverse stores)
373    ///   to [`ResolvedVia::Direct`].
374    ///
375    /// Used exclusively by `sqry-core/tests/snapshot_size_phase_a.rs`
376    /// (U19) to materialize a "Phase-A-free" baseline snapshot for the
377    /// +10% snapshot-size gate. Gated behind `cfg(any(test, feature =
378    /// "test-support"))` so the helper is invisible to production
379    /// builds.
380    ///
381    /// [`NodeFlags::ADDRESS_TAKEN`]: crate::graph::unified::storage::metadata::NodeFlags::ADDRESS_TAKEN
382    /// [`NodeFlags::CALLSITE_PROMISCUOUS`]: crate::graph::unified::storage::metadata::NodeFlags::CALLSITE_PROMISCUOUS
383    /// [`StoredEntry`]: crate::graph::unified::storage::metadata::StoredEntry
384    /// [`EdgeKind::Calls`]: crate::graph::unified::edge::EdgeKind::Calls
385    /// [`ResolvedVia::Direct`]: crate::graph::unified::edge::ResolvedVia::Direct
386    #[cfg(any(test, feature = "test-support"))]
387    pub fn clear_phase_a_state_for_test(&mut self) {
388        *self.c_indirect_tables_mut() = None;
389        self.macro_metadata_mut().clear_phase_a_flags_for_test();
390        self.edges_mut().normalize_calls_resolved_via_for_test();
391    }
392
393    // ------------------------------------------------------------------
394    // Phase 1 fact-layer provenance accessors (P1U09).
395    // ------------------------------------------------------------------
396
397    /// Returns the monotonic fact-layer epoch stamped on the most recently
398    /// saved or loaded snapshot. Returns `0` for graphs that have not been
399    /// persisted yet or were loaded from V7 snapshots.
400    #[inline]
401    #[must_use]
402    pub fn fact_epoch(&self) -> u64 {
403        self.fact_epoch
404    }
405
406    /// Looks up node provenance by `NodeId`.
407    ///
408    /// Returns `None` if the `NodeId` is out of range, the slot is vacant,
409    /// or the stored generation does not match (stale handle).
410    #[inline]
411    #[must_use]
412    pub fn node_provenance(
413        &self,
414        id: crate::graph::unified::node::id::NodeId,
415    ) -> Option<&NodeProvenance> {
416        self.node_provenance.lookup(id)
417    }
418
419    /// Looks up edge provenance by `EdgeId`.
420    ///
421    /// Returns `None` if the `EdgeId` is out of range, the slot is vacant,
422    /// or the edge is the invalid sentinel.
423    #[inline]
424    #[must_use]
425    pub fn edge_provenance(
426        &self,
427        id: crate::graph::unified::edge::id::EdgeId,
428    ) -> Option<&EdgeProvenance> {
429        self.edge_provenance.lookup(id)
430    }
431
432    /// Returns a borrowed provenance view for a file.
433    ///
434    /// Returns `None` for invalid/unregistered `FileId`s.
435    #[inline]
436    #[must_use]
437    pub fn file_provenance(
438        &self,
439        id: crate::graph::unified::file::id::FileId,
440    ) -> Option<FileProvenanceView<'_>> {
441        self.files.file_provenance(id)
442    }
443
444    // ------------------------------------------------------------------
445    // Phase 2 binding-plane accessors (P2U03).
446    // ------------------------------------------------------------------
447
448    /// Returns a reference to the scope arena derived during Phase 4e.
449    ///
450    /// The arena is empty on freshly-constructed `CodeGraph` instances and is
451    /// populated by calling `phase4e_binding::derive_binding_plane`.
452    #[inline]
453    #[must_use]
454    pub fn scope_arena(&self) -> &ScopeArena {
455        &self.scope_arena
456    }
457
458    /// Installs a freshly-derived scope arena.
459    ///
460    /// Called from `phase4e_binding::derive_binding_plane` during the build
461    /// pipeline. External callers that run Phase 4e manually (e.g., test
462    /// fixture builders) use this to store the result.
463    pub(crate) fn set_scope_arena(&mut self, arena: ScopeArena) {
464        self.scope_arena = Arc::new(arena);
465    }
466
467    /// Returns a reference to the alias table derived during Phase 4e.
468    ///
469    /// The table is empty on freshly-constructed `CodeGraph` instances and is
470    /// populated by calling `phase4e_binding::derive_binding_plane`.
471    #[inline]
472    #[must_use]
473    pub fn alias_table(&self) -> &AliasTable {
474        &self.alias_table
475    }
476
477    /// Installs a freshly-derived alias table.
478    ///
479    /// Called from `phase4e_binding::derive_binding_plane` during the build
480    /// pipeline. External callers that run Phase 4e manually (e.g., test
481    /// fixture builders) use this to store the result.
482    pub(crate) fn set_alias_table(&mut self, table: AliasTable) {
483        self.alias_table = Arc::new(table);
484    }
485
486    /// Returns a reference to the shadow table derived during Phase 4e.
487    ///
488    /// The table is empty on freshly-constructed `CodeGraph` instances and is
489    /// populated by calling `phase4e_binding::derive_binding_plane`.
490    #[inline]
491    #[must_use]
492    pub fn shadow_table(&self) -> &ShadowTable {
493        &self.shadow_table
494    }
495
496    /// Installs a freshly-derived shadow table.
497    ///
498    /// Called from `phase4e_binding::derive_binding_plane` during the build
499    /// pipeline. External callers that run Phase 4e manually (e.g., test
500    /// fixture builders) use this to store the result.
501    pub(crate) fn set_shadow_table(&mut self, table: ShadowTable) {
502        self.shadow_table = Arc::new(table);
503    }
504
505    /// Returns a reference to the scope provenance store derived during Phase 4e.
506    ///
507    /// The store is empty on freshly-constructed `CodeGraph` instances and is
508    /// populated by calling `phase4e_binding::derive_binding_plane`.
509    #[inline]
510    #[must_use]
511    pub fn scope_provenance_store(&self) -> &ScopeProvenanceStore {
512        &self.scope_provenance_store
513    }
514
515    /// Looks up scope provenance by `ScopeId`.
516    ///
517    /// Returns `None` if the slot is out of range, vacant, or the stored
518    /// generation does not match (stale handle).
519    #[inline]
520    #[must_use]
521    pub fn scope_provenance(&self, id: ScopeId) -> Option<&ScopeProvenance> {
522        self.scope_provenance_store.lookup(id)
523    }
524
525    /// Looks up the live `ScopeId` for a stable scope identity.
526    ///
527    /// Returns `None` if no provenance record is registered for that stable id.
528    /// The reverse index is populated by `insert` during Phase 4e and must be
529    /// rebuilt after V9 deserialization.
530    #[inline]
531    #[must_use]
532    pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
533        self.scope_provenance_store.scope_by_stable_id(stable)
534    }
535
536    /// Installs a freshly-derived scope provenance store.
537    ///
538    /// Called from `phase4e_binding::derive_binding_plane` during the build
539    /// pipeline. External callers that run Phase 4e manually (e.g., test
540    /// fixture builders) use this to store the result.
541    pub(crate) fn set_scope_provenance_store(&mut self, store: ScopeProvenanceStore) {
542        self.scope_provenance_store = Arc::new(store);
543    }
544
545    /// Returns a reference to the file segment table.
546    #[inline]
547    #[must_use]
548    pub fn file_segments(&self) -> &FileSegmentTable {
549        &self.file_segments
550    }
551
552    /// Replaces the file segment table.
553    pub(crate) fn set_file_segments(&mut self, table: FileSegmentTable) {
554        self.file_segments = Arc::new(table);
555    }
556
557    /// Installs the C indirect-call side tables loaded from a V11
558    /// snapshot.
559    ///
560    /// `None` clears the slot (used on V10 → V11 upconvert and on non-C
561    /// workspaces). `Some(...)` carries the live tables onto the freshly
562    /// reconstructed `CodeGraph`. Build-pipeline callers that incrementally
563    /// populate the tables should prefer [`Self::c_indirect_tables_mut`].
564    pub(crate) fn set_c_indirect_tables(&mut self, tables: Option<CIndirectSideTables>) {
565        self.c_indirect_tables = tables;
566    }
567
568    /// Returns a mutable reference to the file segment table (via `Arc::make_mut`).
569    pub(crate) fn file_segments_mut(&mut self) -> &mut FileSegmentTable {
570        Arc::make_mut(&mut self.file_segments)
571    }
572
573    /// Test-only helper that records a file segment directly on the
574    /// graph, bypassing the full Phase 3 commit pipeline. Only
575    /// available under `#[cfg(feature = "rebuild-internals")]` so the
576    /// surface is opt-in for rebuild-plane consumers (the feature is
577    /// whitelisted to sqry-daemon + sqry-core integration tests; see
578    /// `sqry-core/tests/rebuild_internals_whitelist.rs`).
579    ///
580    /// Integration tests (notably
581    /// `sqry-core/tests/incremental_remove_file_scale.rs`) call this
582    /// to seed the synthetic workspaces they build before exercising
583    /// `RebuildGraph::remove_file` / `CodeGraph::remove_file`. Production
584    /// code paths never touch this method — Phase 3 parallel commit
585    /// is the sole production writer, via the crate-internal
586    /// `file_segments_mut` accessor above.
587    ///
588    /// Renamed with `test_only_` prefix so the purpose is unambiguous
589    /// at every call site; `#[doc(hidden)]` hides it from rendered
590    /// rustdoc so downstream daemon integrations don't discover it by
591    /// accident.
592    #[cfg(feature = "rebuild-internals")]
593    #[doc(hidden)]
594    pub fn test_only_record_file_segment(
595        &mut self,
596        file_id: FileId,
597        start_slot: u32,
598        slot_count: u32,
599    ) {
600        Arc::make_mut(&mut self.file_segments).record_range(file_id, start_slot, slot_count);
601    }
602
603    /// Sets the provenance stores and fact epoch, typically called by the
604    /// persistence loader after deserializing a V8 snapshot.
605    pub(crate) fn set_provenance(
606        &mut self,
607        node_provenance: NodeProvenanceStore,
608        edge_provenance: EdgeProvenanceStore,
609        fact_epoch: u64,
610    ) {
611        self.node_provenance = Arc::new(node_provenance);
612        self.edge_provenance = Arc::new(edge_provenance);
613        self.fact_epoch = fact_epoch;
614    }
615
616    /// Returns the current epoch.
617    #[inline]
618    #[must_use]
619    pub fn epoch(&self) -> u64 {
620        self.epoch
621    }
622
623    /// Returns a mutable reference to the node arena.
624    ///
625    /// Uses `Arc::make_mut` for copy-on-write semantics: if other
626    /// references exist (e.g., snapshots), the data is cloned.
627    #[inline]
628    pub fn nodes_mut(&mut self) -> &mut NodeArena {
629        Arc::make_mut(&mut self.nodes)
630    }
631
632    /// Returns a mutable reference to the bidirectional edge store.
633    ///
634    /// Uses `Arc::make_mut` for copy-on-write semantics.
635    #[inline]
636    pub fn edges_mut(&mut self) -> &mut BidirectionalEdgeStore {
637        Arc::make_mut(&mut self.edges)
638    }
639
640    /// Returns a mutable reference to the string interner.
641    ///
642    /// Uses `Arc::make_mut` for copy-on-write semantics.
643    #[inline]
644    pub fn strings_mut(&mut self) -> &mut StringInterner {
645        Arc::make_mut(&mut self.strings)
646    }
647
648    /// Returns a mutable reference to the file registry.
649    ///
650    /// Uses `Arc::make_mut` for copy-on-write semantics.
651    #[inline]
652    pub fn files_mut(&mut self) -> &mut FileRegistry {
653        Arc::make_mut(&mut self.files)
654    }
655
656    /// Returns a mutable reference to the auxiliary indices.
657    ///
658    /// Uses `Arc::make_mut` for copy-on-write semantics.
659    #[inline]
660    pub fn indices_mut(&mut self) -> &mut AuxiliaryIndices {
661        Arc::make_mut(&mut self.indices)
662    }
663
664    /// Returns a mutable reference to the macro boundary metadata store.
665    ///
666    /// Uses `Arc::make_mut` for copy-on-write semantics.
667    #[inline]
668    pub fn macro_metadata_mut(&mut self) -> &mut NodeMetadataStore {
669        Arc::make_mut(&mut self.macro_metadata)
670    }
671
672    /// Returns mutable references to both the node arena and the string interner.
673    ///
674    /// This avoids the borrow-conflict that arises when calling `nodes_mut()` and
675    /// `strings_mut()` separately on `&mut self`.
676    #[inline]
677    pub fn nodes_and_strings_mut(&mut self) -> (&mut NodeArena, &mut StringInterner) {
678        (
679            Arc::make_mut(&mut self.nodes),
680            Arc::make_mut(&mut self.strings),
681        )
682    }
683
684    /// Rebuilds auxiliary indices from the current node arena.
685    ///
686    /// This avoids the borrow conflict that arises when calling `nodes()` and
687    /// `indices_mut()` separately on `&mut self`. Uses disjoint field borrowing
688    /// to access `nodes` (shared) and `indices` (mutable) simultaneously.
689    /// Internally calls `AuxiliaryIndices::build_from_arena` which clears
690    /// existing indices and rebuilds in a single pass without per-element
691    /// duplicate checking.
692    ///
693    /// As of Task 4 Step 4 Phase 2 this inherent method delegates to the
694    /// generic
695    /// [`crate::graph::unified::build::parallel_commit::rebuild_indices`]
696    /// free function so the same implementation serves both the
697    /// full-build (`CodeGraph`) and incremental-rebuild (`RebuildGraph`)
698    /// pipelines. Call sites that hold a concrete `CodeGraph` can keep
699    /// using `graph.rebuild_indices()`; incremental rebuild call sites
700    /// should use the free function directly.
701    pub fn rebuild_indices(&mut self) {
702        crate::graph::unified::build::parallel_commit::rebuild_indices(self);
703    }
704
705    /// Increments the epoch counter and returns the new value.
706    ///
707    /// Called automatically by `ConcurrentCodeGraph::write()`.
708    #[inline]
709    pub fn bump_epoch(&mut self) -> u64 {
710        self.epoch = self.epoch.wrapping_add(1);
711        self.epoch
712    }
713
714    /// Sets the epoch to a specific value.
715    ///
716    /// This is primarily for testing or reconstruction from serialized state.
717    #[inline]
718    pub fn set_epoch(&mut self, epoch: u64) {
719        self.epoch = epoch;
720    }
721
722    /// Returns the number of nodes in the graph.
723    ///
724    /// This is a convenience method that delegates to `nodes().len()`.
725    ///
726    /// # Example
727    ///
728    /// ```rust
729    /// use sqry_core::graph::unified::concurrent::CodeGraph;
730    ///
731    /// let graph = CodeGraph::new();
732    /// assert_eq!(graph.node_count(), 0);
733    /// ```
734    #[inline]
735    #[must_use]
736    pub fn node_count(&self) -> usize {
737        self.nodes.len()
738    }
739
740    /// Returns the number of edges in the graph (forward direction).
741    ///
742    /// This counts edges in the forward store, including both CSR and delta edges.
743    ///
744    /// # Example
745    ///
746    /// ```rust
747    /// use sqry_core::graph::unified::concurrent::CodeGraph;
748    ///
749    /// let graph = CodeGraph::new();
750    /// assert_eq!(graph.edge_count(), 0);
751    /// ```
752    #[inline]
753    #[must_use]
754    pub fn edge_count(&self) -> usize {
755        let stats = self.edges.stats();
756        stats.forward.csr_edge_count + stats.forward.delta_edge_count
757    }
758
759    /// Returns true if the graph contains no nodes.
760    ///
761    /// This is a convenience method that delegates to `nodes().is_empty()`.
762    ///
763    /// # Example
764    ///
765    /// ```rust
766    /// use sqry_core::graph::unified::concurrent::CodeGraph;
767    ///
768    /// let graph = CodeGraph::new();
769    /// assert!(graph.is_empty());
770    /// ```
771    #[inline]
772    #[must_use]
773    pub fn is_empty(&self) -> bool {
774        self.nodes.is_empty()
775    }
776
777    /// Returns an iterator over all indexed file paths.
778    ///
779    /// This is useful for enumerating all files that have been processed
780    /// and added to the graph.
781    ///
782    /// # Example
783    ///
784    /// ```rust
785    /// use sqry_core::graph::unified::concurrent::CodeGraph;
786    ///
787    /// let graph = CodeGraph::new();
788    /// for (file_id, path) in graph.indexed_files() {
789    ///     println!("File {}: {}", file_id.index(), path.display());
790    /// }
791    /// ```
792    #[inline]
793    pub fn indexed_files(
794        &self,
795    ) -> impl Iterator<Item = (crate::graph::unified::file::FileId, &std::path::Path)> {
796        self.files
797            .iter()
798            .map(|(id, arc_path)| (id, arc_path.as_ref()))
799    }
800
801    /// Returns the set of files that import one or more symbols exported by
802    /// `file_id`, deduplicated and sorted ascending.
803    ///
804    /// For every [`EdgeKind::Imports`] edge whose target node lives in
805    /// `file_id`, the source node's [`FileId`] is added to the result. Files
806    /// are returned sorted by raw index so the result is deterministic across
807    /// runs. The caller's own file is never included — an `Imports` edge
808    /// whose source and target both live in `file_id` is treated as a
809    /// self-import and elided. Edges whose source node is no longer resolvable
810    /// in the arena (tombstoned) are silently skipped.
811    ///
812    /// This is the file-level view of Pass 4 cross-file `Imports` edges, used
813    /// by the incremental rebuild engine to compute reverse-dependency
814    /// closures: "if file X changes its exports, which files need to be
815    /// re-linked?"
816    ///
817    /// # Complexity
818    ///
819    /// `O(|nodes_in_file_id| × avg_incoming_edges_per_node)` amortized. Uses
820    /// [`AuxiliaryIndices::by_file`] for O(1)-amortized per-file node lookup
821    /// (HashMap-backed) and the bidirectional edge store's reverse adjacency;
822    /// no full-graph scan. A final `O(R log R)` sort over the deduplicated
823    /// importer set (where `R` is the importer count) is negligible in
824    /// practice since `R ≤ file_count`.
825    ///
826    /// [`AuxiliaryIndices::by_file`]: crate::graph::unified::storage::indices::AuxiliaryIndices::by_file
827    #[must_use]
828    pub fn reverse_import_index(&self, file_id: FileId) -> Vec<FileId> {
829        let mut importers: HashSet<FileId> = HashSet::new();
830        for &target_node in self.indices.by_file(file_id) {
831            for edge_ref in self.edges.edges_to(target_node) {
832                if !matches!(edge_ref.kind, EdgeKind::Imports { .. }) {
833                    continue;
834                }
835                let Some(source_entry) = self.nodes.get(edge_ref.source) else {
836                    continue;
837                };
838                let source_file = source_entry.file;
839                if source_file != file_id {
840                    importers.insert(source_file);
841                }
842            }
843        }
844        let mut result: Vec<FileId> = importers.into_iter().collect();
845        result.sort();
846        result
847    }
848
849    /// Returns the set of files that hold at least one live inter-file edge
850    /// targeting a node in `file_id`, deduplicated and sorted ascending.
851    ///
852    /// Unlike [`reverse_import_index`](Self::reverse_import_index) — which
853    /// filters to [`EdgeKind::Imports`] only — this helper treats **every**
854    /// cross-file edge as a dependency signal: `Calls`, `References`,
855    /// `TypeOf`, `Inherits`, `Implements`, `FfiCall`, `HttpRequest`,
856    /// `GrpcCall`, `WebAssemblyCall`, `DbQuery`, `TableRead`, `TableWrite`,
857    /// `TriggeredBy`, `MessageQueue`, `WebSocket`, `GraphQLOperation`,
858    /// `ProcessExec`, `FileIpc`, `ProtocolCall`, and any future
859    /// cross-file-capable variant. This is the reverse-dependency surface the
860    /// incremental rebuild engine (Task 4 Step 4 Phase 3e) needs: when
861    /// `file_id` changes, every file whose committed edges point into
862    /// `file_id`'s nodes must re-enter the rebuild closure so its cross-file
863    /// references survive the target-side tombstone-and-reparse cycle.
864    ///
865    /// The caller's own file is never included — an edge whose source and
866    /// target both live in `file_id` is a self-reference and is elided.
867    /// Edges whose source node is no longer resolvable in the arena
868    /// (tombstoned) are silently skipped.
869    ///
870    /// # When to use this vs [`reverse_import_index`](Self::reverse_import_index)
871    ///
872    /// * [`reverse_import_index`](Self::reverse_import_index) remains the
873    ///   right surface for consumers that specifically need *import*
874    ///   relationships (export surface analysis, module-dependency graphs,
875    ///   etc.).
876    /// * `reverse_dependency_index` is the right surface for incremental
877    ///   rebuild closure computation. Widening past imports is necessary
878    ///   because call sites, type references, trait implementations, FFI
879    ///   declarations, HTTP clients, and every other cross-file edge kind
880    ///   hold a committed edge into the target file that becomes stale the
881    ///   moment `remove_file(target)` tombstones its arena nodes. Leaving
882    ///   those files out of the closure leaves the committed edges pointing
883    ///   at the stale (pre-tombstone) node IDs — Phase 4c-prime only
884    ///   rewrites edges on **re-parsed** files' `PendingEdge` sets, never
885    ///   committed edges owned by files outside the reparse scope.
886    ///
887    /// # Complexity
888    ///
889    /// `O(|nodes_in_file_id| × avg_incoming_edges_per_node)` amortized —
890    /// same bound as [`reverse_import_index`](Self::reverse_import_index).
891    /// Uses [`AuxiliaryIndices::by_file`] for O(1)-amortized per-file node
892    /// lookup and the bidirectional edge store's reverse adjacency; no
893    /// full-graph scan. Final `O(R log R)` sort over the deduplicated
894    /// dependent set is negligible since `R ≤ file_count`.
895    ///
896    /// # Over-widening is expected and acceptable
897    ///
898    /// The closure will include every file that references anything in
899    /// `file_id`, not just files whose exports change. In common codebases
900    /// this widens the reparse set modestly (a 10-file change may expand
901    /// to 20–30 dependent files in a medium crate). Correctness requires
902    /// the widening; minimality is a follow-up optimisation if profiling
903    /// demands it.
904    ///
905    /// [`AuxiliaryIndices::by_file`]: crate::graph::unified::storage::indices::AuxiliaryIndices::by_file
906    #[must_use]
907    pub fn reverse_dependency_index(&self, file_id: FileId) -> Vec<FileId> {
908        let mut dependents: HashSet<FileId> = HashSet::new();
909        for &target_node in self.indices.by_file(file_id) {
910            for edge_ref in self.edges.edges_to(target_node) {
911                let Some(source_entry) = self.nodes.get(edge_ref.source) else {
912                    continue;
913                };
914                let source_file = source_entry.file;
915                if source_file != file_id {
916                    dependents.insert(source_file);
917                }
918            }
919        }
920        let mut result: Vec<FileId> = dependents.into_iter().collect();
921        result.sort();
922        result
923    }
924
925    /// Returns the per-language confidence metadata.
926    ///
927    /// This contains analysis confidence information collected during graph build,
928    /// primarily used by language plugins (e.g., Rust) to track analysis quality.
929    #[inline]
930    #[must_use]
931    pub fn confidence(&self) -> &HashMap<String, ConfidenceMetadata> {
932        &self.confidence
933    }
934
935    /// Merges confidence metadata for a language.
936    ///
937    /// If confidence already exists for the language, this merges the new
938    /// metadata (taking the lower confidence level and combining limitations).
939    /// Otherwise, it inserts the new confidence.
940    pub fn merge_confidence(&mut self, language: &str, metadata: ConfidenceMetadata) {
941        use crate::confidence::ConfidenceLevel;
942
943        self.confidence
944            .entry(language.to_string())
945            .and_modify(|existing| {
946                // Take the lower confidence level (more conservative)
947                let new_level = match (&existing.level, &metadata.level) {
948                    (ConfidenceLevel::Verified, other) | (other, ConfidenceLevel::Verified) => {
949                        *other
950                    }
951                    (ConfidenceLevel::Partial, ConfidenceLevel::AstOnly)
952                    | (ConfidenceLevel::AstOnly, ConfidenceLevel::Partial) => {
953                        ConfidenceLevel::AstOnly
954                    }
955                    (level, _) => *level,
956                };
957                existing.level = new_level;
958
959                // Merge limitations (deduplicated)
960                for limitation in &metadata.limitations {
961                    if !existing.limitations.contains(limitation) {
962                        existing.limitations.push(limitation.clone());
963                    }
964                }
965
966                // Merge unavailable features (deduplicated)
967                for feature in &metadata.unavailable_features {
968                    if !existing.unavailable_features.contains(feature) {
969                        existing.unavailable_features.push(feature.clone());
970                    }
971                }
972            })
973            .or_insert(metadata);
974    }
975
976    /// Sets the confidence metadata map directly.
977    ///
978    /// This is primarily used when loading a graph from serialized state.
979    pub fn set_confidence(&mut self, confidence: HashMap<String, ConfidenceMetadata>) {
980        self.confidence = confidence;
981    }
982
983    // ------------------------------------------------------------------
984    // Task 4 Step 2 (A2 §F.2) — File-level tombstoning on a CodeGraph.
985    //
986    // Unlike the rebuild pipeline's `RebuildGraph::remove_file`, this
987    // path mutates a live `CodeGraph` in place and is the mechanism
988    // used by the full-rebuild flow when it needs to evict a file's
989    // nodes+edges between compactions. The daemon's incremental
990    // `WorkspaceManager` (Task 6) goes through the rebuild path, which
991    // is why this entry point is `pub(crate)`.
992    // ------------------------------------------------------------------
993
994    /// Tombstone every node that belongs to `file_id`, invalidate every
995    /// edge whose source or target is one of those nodes (across both
996    /// forward and reverse CSR + delta tiers), drop the file's entry
997    /// from the [`FileRegistry`], and return the set of [`NodeId`]s
998    /// that were tombstoned.
999    ///
1000    /// This is the §F.2-aware file-removal primitive. Semantically, the
1001    /// post-condition matches what a full rebuild of the workspace
1002    /// without `file_id` would produce:
1003    ///
1004    /// * Every [`NodeEntry`] whose [`NodeEntry.file`] was `file_id` has
1005    ///   been `NodeArena::remove`d, advancing its slot generation so
1006    ///   stale [`NodeId`] handles do not alias a later re-allocation.
1007    /// * Every CSR edge whose source or target slot matches one of the
1008    ///   tombstoned slot indices has its `csr_tombstones` bit set; the
1009    ///   read path's merge step already filters tombstoned CSR edges
1010    ///   out of every query result.
1011    /// * Every delta-buffer edge (Add or Remove, any file) whose source
1012    ///   or target matches a tombstoned slot has been dropped from the
1013    ///   delta in both directions.
1014    /// * The [`AuxiliaryIndices`] (kind / name / qualified-name / file)
1015    ///   no longer reference any of the tombstoned `NodeId`s.
1016    /// * [`NodeMetadataStore`], [`NodeProvenanceStore`], [`ScopeArena`],
1017    ///   [`AliasTable`], and [`ShadowTable`] have been compacted through
1018    ///   the [`NodeIdBearing::retain_nodes`] predicate so no
1019    ///   tombstoned `NodeId` survives in any publish-visible store.
1020    /// * [`FileRegistry::per_file_nodes`] no longer holds a bucket for
1021    ///   `file_id`, the lookup slot is recycled, and
1022    ///   [`FileRegistry::resolve(file_id)`] returns `None`.
1023    ///
1024    /// Returns the `Vec<NodeId>` of tombstoned nodes. The returned list
1025    /// is useful for downstream housekeeping (e.g., resetting per-file
1026    /// caches keyed by `NodeId`) and for tests that need to assert on the
1027    /// exact membership of the tombstone set.
1028    ///
1029    /// # Idempotency
1030    ///
1031    /// Calling `remove_file` twice with the same `file_id` — or calling
1032    /// it for a `file_id` that was never registered — is a safe no-op.
1033    /// The returned `Vec<NodeId>` is empty on the second call; no
1034    /// arena / edge / index state is mutated (the predicate-based
1035    /// compaction of `NodeIdBearing` surfaces short-circuits when the
1036    /// dead set is empty).
1037    ///
1038    /// # Visibility
1039    ///
1040    /// `pub(crate)` because external callers (Task 6's
1041    /// `WorkspaceManager` on the sqry-daemon side) route through
1042    /// [`super::super::rebuild::rebuild_graph::RebuildGraph::remove_file`]
1043    /// instead. This `CodeGraph`-level variant is used by full-rebuild
1044    /// housekeeping paths inside sqry-core and by the Task 4 Step 4
1045    /// incremental fallback for cases where the caller already has a
1046    /// `&mut CodeGraph` and does not need the clone-and-publish
1047    /// round-trip of [`clone_for_rebuild`](Self::clone_for_rebuild) →
1048    /// [`finalize`](super::super::rebuild::rebuild_graph::RebuildGraph::finalize).
1049    ///
1050    /// # Performance
1051    ///
1052    /// * `O(|tombstoned| + |csr_edges| + |delta_edges|)` amortised.
1053    ///   The CSR walk is linear in total edge count (not per-file),
1054    ///   which is the dominant cost; each row check is O(1) via the
1055    ///   precomputed dead-slot-index `HashSet` in
1056    ///   [`BidirectionalEdgeStore::tombstone_edges_for_nodes`].
1057    /// * Delta filtering is `O(|delta|)` per direction.
1058    /// * Auxiliary-index compaction is `O(|tombstoned|)` amortised
1059    ///   because each of the four indices keys its entries by the
1060    ///   tombstoned `NodeIds` directly.
1061    ///
1062    /// [`NodeEntry`]: crate::graph::unified::storage::arena::NodeEntry
1063    /// [`NodeEntry.file`]: crate::graph::unified::storage::arena::NodeEntry::file
1064    /// [`NodeIdBearing::retain_nodes`]: crate::graph::unified::rebuild::coverage::NodeIdBearing::retain_nodes
1065    #[allow(dead_code)] // Consumer is Task 4 Step 4 (`incremental_rebuild`)
1066    // and the unit tests below; published in this commit so the §F.2
1067    // invariant surface can be reviewed in isolation per the Gate 0c
1068    // split contract.
1069    pub(crate) fn remove_file(
1070        &mut self,
1071        file_id: FileId,
1072    ) -> Vec<crate::graph::unified::node::NodeId> {
1073        use crate::graph::unified::node::NodeId;
1074        use crate::graph::unified::rebuild::coverage::NodeIdBearing;
1075
1076        // Drain the per-file bucket. For a file that was never
1077        // registered, this returns an empty Vec — the rest of the
1078        // method short-circuits on the `if tombstoned.is_empty()` test
1079        // below so we still deregister the file on the off chance the
1080        // bucket was empty but the file was registered (defensive; the
1081        // common case is the bucket existed iff the file was
1082        // registered).
1083        let tombstoned: Vec<NodeId> = self.files_mut().take_nodes(file_id);
1084        // Always drop the file's path entry + recycle its slot, even
1085        // when the bucket was empty, so repeated registrations of the
1086        // same path don't resurrect a zombie FileId. `unregister` is
1087        // idempotent (returns None for unknown IDs) so the cost is a
1088        // single HashMap probe when file_id is already gone.
1089        self.files_mut().unregister(file_id);
1090        // Clear the file's segment entry unconditionally (idempotent
1091        // — `FileSegmentTable::remove` no-ops on unknown ids). This
1092        // MUST run before `FileRegistry::unregister` recycles the
1093        // FileId slot for reuse, otherwise a later registration of a
1094        // different path under the reused FileId would inherit the
1095        // previous file's stale node range (see
1096        // `sqry-core/src/graph/unified/build/reindex.rs` — which
1097        // trusts `file_segments().get(file_id)` to decide which slots
1098        // to tombstone). Note: `unregister` above was called first
1099        // only to keep the existing bucket-drain ordering; the
1100        // segment-clear is order-independent with respect to
1101        // `unregister` because neither touches the other's backing
1102        // store, and the FileId slot cannot be recycled-and-reissued
1103        // across a single `remove_file` call (the registry's slot
1104        // recycler is driven by a later `register`, not by
1105        // `unregister`).
1106        self.file_segments_mut().remove(file_id);
1107
1108        if tombstoned.is_empty() {
1109            return tombstoned;
1110        }
1111
1112        // Dead set keyed on NodeId for NodeIdBearing predicates.
1113        // `retain_nodes` uses `HashSet::contains` so membership is O(1).
1114        let dead: HashSet<NodeId> = tombstoned.iter().copied().collect();
1115
1116        // 1. Tombstone the arena slots. `NodeArena::remove` is
1117        //    idempotent — stale NodeIds that don't match a slot's
1118        //    current generation are no-ops, which lets this method be
1119        //    safely re-run on the same file.
1120        {
1121            let arena = self.nodes_mut();
1122            for &nid in &tombstoned {
1123                let _ = arena.remove(nid);
1124            }
1125        }
1126
1127        // 2. Invalidate edges across both CSR + delta in both
1128        //    directions. This is the expensive step; the helper uses a
1129        //    precomputed dead-slot-index set so the CSR walk is linear
1130        //    in total edge count, not quadratic.
1131        self.edges_mut().tombstone_edges_for_nodes(&dead);
1132
1133        // 3. Compact the auxiliary indices so name/kind/qname/file
1134        //    lookups do not return tombstoned NodeIds. Using the
1135        //    NodeIdBearing surface keeps this step in lockstep with the
1136        //    rebuild pipeline's step 4 — any future publish-visible
1137        //    NodeId-bearing container added to the K.A/K.B matrix is
1138        //    automatically swept here too.
1139        {
1140            let predicate: Box<dyn Fn(NodeId) -> bool + '_> = Box::new(|nid| !dead.contains(&nid));
1141            self.indices_mut().retain_nodes(&*predicate);
1142            self.macro_metadata_mut().retain_nodes(&*predicate);
1143            // The remaining K.A rows (node_provenance, scope_arena,
1144            // alias_table, shadow_table) need Arc::make_mut accessors —
1145            // they are wrapped in Arc at rest so this is where the
1146            // CoW clone happens on demand. Inline the Arc::make_mut
1147            // calls here (no public mut accessor exists today because
1148            // the sole writer has been the rebuild path; extend the
1149            // set by adding a similar inline call plus a K.A/K.B row
1150            // in `super::super::rebuild::coverage`).
1151            Arc::make_mut(&mut self.node_provenance).retain_nodes(&*predicate);
1152            Arc::make_mut(&mut self.scope_arena).retain_nodes(&*predicate);
1153            Arc::make_mut(&mut self.alias_table).retain_nodes(&*predicate);
1154            Arc::make_mut(&mut self.shadow_table).retain_nodes(&*predicate);
1155        }
1156
1157        tombstoned
1158    }
1159
1160    // ------------------------------------------------------------------
1161    // Gate 0c (A2 §H, Task 4) — RebuildGraph assembly path.
1162    // ------------------------------------------------------------------
1163
1164    /// Assemble a [`CodeGraph`] from owned rebuild-local parts produced
1165    /// by `RebuildGraph::finalize()` (defined in
1166    /// `super::super::rebuild::rebuild_graph`).
1167    ///
1168    /// This constructor is deliberately `pub(crate)` and named with a
1169    /// leading `__` so it is inaccessible from downstream crates even
1170    /// when the `rebuild-internals` feature is enabled: only code in
1171    /// `sqry-core` itself (specifically `RebuildGraph::finalize`) is
1172    /// permitted to call it. The trybuild fixture
1173    /// `sqry-core/tests/rebuild_internals_compile_fail/rebuild_graph_no_public_assembly.rs`
1174    /// proves there is no other path from `RebuildGraph` to
1175    /// `Arc<CodeGraph>`.
1176    ///
1177    /// The argument order mirrors the `CodeGraph` struct declaration
1178    /// order exactly; the public `clone_for_rebuild` → `finalize`
1179    /// round-trip uses the same macro-driven field enumeration so any
1180    /// new `CodeGraph` field automatically threads through this
1181    /// constructor as well.
1182    #[doc(hidden)]
1183    #[allow(clippy::too_many_arguments)]
1184    #[must_use]
1185    pub(crate) fn __assemble_from_rebuild_parts_internal(
1186        nodes: NodeArena,
1187        edges: BidirectionalEdgeStore,
1188        strings: StringInterner,
1189        files: FileRegistry,
1190        indices: AuxiliaryIndices,
1191        macro_metadata: NodeMetadataStore,
1192        node_provenance: NodeProvenanceStore,
1193        edge_provenance: EdgeProvenanceStore,
1194        fact_epoch: u64,
1195        epoch: u64,
1196        confidence: HashMap<String, ConfidenceMetadata>,
1197        scope_arena: ScopeArena,
1198        alias_table: AliasTable,
1199        shadow_table: ShadowTable,
1200        scope_provenance_store: ScopeProvenanceStore,
1201        file_segments: FileSegmentTable,
1202        go_hints: crate::graph::unified::build::staging::GoHints,
1203    ) -> Self {
1204        Self {
1205            nodes: Arc::new(nodes),
1206            edges: Arc::new(edges),
1207            strings: Arc::new(strings),
1208            files: Arc::new(files),
1209            indices: Arc::new(indices),
1210            macro_metadata: Arc::new(macro_metadata),
1211            node_provenance: Arc::new(node_provenance),
1212            edge_provenance: Arc::new(edge_provenance),
1213            fact_epoch,
1214            epoch,
1215            confidence,
1216            scope_arena: Arc::new(scope_arena),
1217            alias_table: Arc::new(alias_table),
1218            shadow_table: Arc::new(shadow_table),
1219            scope_provenance_store: Arc::new(scope_provenance_store),
1220            file_segments: Arc::new(file_segments),
1221            c_indirect_tables: None,
1222            go_hints,
1223            // A rebuild reparses source files, regenerating is_definition fresh,
1224            // so the reassembled graph always carries genuine signal.
1225            definition_signal_present: true,
1226        }
1227    }
1228
1229    // ------------------------------------------------------------------
1230    // Gate 0c (A2 §F) — publish-boundary debug invariants.
1231    //
1232    // These checks fire only in debug / test builds; release builds
1233    // compile them out. They are called from
1234    // `RebuildGraph::finalize()` steps 13 and 14 (the single source of
1235    // truth for the residue check, per §F and §H agreement). Gate 0d
1236    // will additionally wire the bijection check into
1237    // `build_and_persist_graph`, `WorkspaceManager::publish_graph`, and
1238    // every §E equivalence-harness run.
1239    // ------------------------------------------------------------------
1240
1241    /// Assert the bijective bucket-membership invariant (A2 §F.1).
1242    ///
1243    /// Four conditions must hold simultaneously:
1244    /// (a) every `NodeId` inside any `per_file_nodes` bucket maps to a
1245    ///     live arena slot;
1246    /// (b) every `NodeId` appears in exactly one bucket (no duplicates
1247    ///     across buckets, no duplicates within a bucket);
1248    /// (c) the bucket's `FileId` matches the node's own `file` field on
1249    ///     `NodeEntry`;
1250    /// (d) when at least one bucket is populated, every live node in
1251    ///     the arena is accounted for by some bucket.
1252    ///
1253    /// Condition (d) is guarded on `!seen.is_empty()` so an empty-graph
1254    /// (no recorded buckets) publish boundary is vacuously consistent:
1255    /// legacy V7 snapshots, fresh `CodeGraph::new()` instances, and
1256    /// rebuilds on graphs that predate Gate 0c's parallel-commit
1257    /// bucketing must not panic. Once any bucket is populated, every
1258    /// live arena slot must appear in a bucket.
1259    ///
1260    /// Iter-2 B2 (verbatim): this check used to be documented as
1261    /// "vacuous until future `per_file_nodes` work lands". Pulling
1262    /// base-plan Step 1 into Gate 0c retires that phrasing — the check
1263    /// is non-vacuous the moment parallel-parse commits nodes, which
1264    /// happens on every real build. The `!seen.is_empty()` guard now
1265    /// exists solely for the empty-graph corner case, not as a phased-
1266    /// delivery deferral.
1267    ///
1268    /// The check is a no-op in release builds. Panics with a
1269    /// descriptive message on violation. This is intentional: publish-
1270    /// boundary violations are programmer errors that must surface
1271    /// loudly during CI / test runs.
1272    ///
1273    /// # Panics
1274    ///
1275    /// Panics in debug/test builds when bucket membership is duplicated, points
1276    /// at a dead node, assigns a node to the wrong file, or omits a live node
1277    /// after buckets have been populated.
1278    #[cfg(any(debug_assertions, test))]
1279    pub fn assert_bucket_bijection(&self) {
1280        use std::collections::HashMap as StdHashMap;
1281        // (a) + (b) + (c): every bucketed node is live, unique across
1282        // buckets, and the bucket's FileId matches the node's own file.
1283        let mut seen: StdHashMap<
1284            crate::graph::unified::node::NodeId,
1285            crate::graph::unified::file::FileId,
1286        > = StdHashMap::new();
1287        let mut any_bucket_populated = false;
1288        for (file_id, bucket) in self.files.per_file_nodes_for_gate0d() {
1289            if !bucket.is_empty() {
1290                any_bucket_populated = true;
1291            }
1292            // Local dedup guard within the bucket itself. `retain_nodes_in_buckets`
1293            // dedups during finalize step 6, but we re-check here so the
1294            // invariant covers non-finalize publish paths too (e.g. if a
1295            // future pipeline builds a graph without routing through
1296            // `RebuildGraph::finalize`).
1297            let mut within_bucket: std::collections::HashSet<crate::graph::unified::node::NodeId> =
1298                std::collections::HashSet::new();
1299            for node_id in bucket {
1300                assert!(
1301                    within_bucket.insert(node_id),
1302                    "assert_bucket_bijection: duplicate node {node_id:?} inside bucket {file_id:?}"
1303                );
1304                assert!(
1305                    self.nodes.get(node_id).is_some(),
1306                    "assert_bucket_bijection: dead node {node_id:?} in bucket {file_id:?}"
1307                );
1308                let prior = seen.insert(node_id, file_id);
1309                assert!(
1310                    prior.is_none(),
1311                    "assert_bucket_bijection: node {node_id:?} in multiple buckets: \
1312                     prior={prior:?}, current={file_id:?}"
1313                );
1314                if let Some(entry) = self.nodes.get(node_id) {
1315                    assert_eq!(
1316                        entry.file, file_id,
1317                        "assert_bucket_bijection: node {node_id:?} misfiled: in bucket \
1318                         {file_id:?}, actual {:?}",
1319                        entry.file
1320                    );
1321                }
1322            }
1323        }
1324        // (d): every live node is accounted for by `seen` once buckets
1325        // are populated. The guard keeps legacy-empty-graph boundaries
1326        // vacuously consistent (see docs above).
1327        if any_bucket_populated {
1328            for (node_id, _entry) in self.nodes.iter() {
1329                assert!(
1330                    seen.contains_key(&node_id),
1331                    "assert_bucket_bijection: live node {node_id:?} absent from all buckets"
1332                );
1333            }
1334        }
1335    }
1336
1337    /// Assert the pre-reuse tombstone-residue invariant (A2 §F.2).
1338    ///
1339    /// Iterates every publish-visible NodeId-bearing structure on
1340    /// `self` and panics if any contains a node in `dead`. Called from
1341    /// `RebuildGraph::finalize()` step 14 against the set drained at
1342    /// step 8 — exactly one site per the plan's §F / §H agreement.
1343    ///
1344    /// No-op when `dead` is empty or in release builds.
1345    ///
1346    /// # Panics
1347    ///
1348    /// Panics in debug/test builds if any publish-visible node-bearing
1349    /// structure still references a tombstoned node.
1350    #[cfg(any(debug_assertions, test))]
1351    pub fn assert_no_tombstone_residue_for<S: std::hash::BuildHasher>(
1352        &self,
1353        dead: &std::collections::HashSet<crate::graph::unified::node::NodeId, S>,
1354    ) {
1355        use super::super::rebuild::coverage::NodeIdBearing;
1356        if dead.is_empty() {
1357            return;
1358        }
1359        // Every K.A/K.B row must be inspected per §F.2.
1360        for nid in self.nodes.all_node_ids() {
1361            assert!(
1362                !dead.contains(&nid),
1363                "assert_no_tombstone_residue: tombstone {nid:?} still in NodeArena"
1364            );
1365        }
1366        for nid in self.indices.all_node_ids() {
1367            assert!(
1368                !dead.contains(&nid),
1369                "assert_no_tombstone_residue: tombstone {nid:?} still in auxiliary indices"
1370            );
1371        }
1372        for nid in self.edges.all_node_ids() {
1373            assert!(
1374                !dead.contains(&nid),
1375                "assert_no_tombstone_residue: tombstone {nid:?} still in edge store"
1376            );
1377        }
1378        for nid in self.macro_metadata.all_node_ids() {
1379            assert!(
1380                !dead.contains(&nid),
1381                "assert_no_tombstone_residue: tombstone {nid:?} still in macro metadata"
1382            );
1383        }
1384        for nid in self.node_provenance.all_node_ids() {
1385            assert!(
1386                !dead.contains(&nid),
1387                "assert_no_tombstone_residue: tombstone {nid:?} still in node provenance"
1388            );
1389        }
1390        for nid in self.scope_arena.all_node_ids() {
1391            assert!(
1392                !dead.contains(&nid),
1393                "assert_no_tombstone_residue: tombstone {nid:?} still in scope arena"
1394            );
1395        }
1396        for nid in self.alias_table.all_node_ids() {
1397            assert!(
1398                !dead.contains(&nid),
1399                "assert_no_tombstone_residue: tombstone {nid:?} still in alias table"
1400            );
1401        }
1402        for nid in self.shadow_table.all_node_ids() {
1403            assert!(
1404                !dead.contains(&nid),
1405                "assert_no_tombstone_residue: tombstone {nid:?} still in shadow table"
1406            );
1407        }
1408        for nid in self.files.all_node_ids() {
1409            assert!(
1410                !dead.contains(&nid),
1411                "assert_no_tombstone_residue: tombstone {nid:?} still in per-file bucket"
1412            );
1413        }
1414    }
1415}
1416
1417impl Default for CodeGraph {
1418    fn default() -> Self {
1419        Self::new()
1420    }
1421}
1422
1423impl fmt::Debug for CodeGraph {
1424    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1425        f.debug_struct("CodeGraph")
1426            .field("nodes", &self.nodes.len())
1427            .field("epoch", &self.epoch)
1428            .finish_non_exhaustive()
1429    }
1430}
1431
1432impl GraphMemorySize for CodeGraph {
1433    /// Estimates the total heap bytes owned by this `CodeGraph`.
1434    ///
1435    /// Sums heap usage across every component the graph owns: node arena,
1436    /// bidirectional edge store (forward + reverse CSR + tombstones + delta
1437    /// buffer), string interner, file registry, auxiliary indices, sparse
1438    /// macro/classpath metadata, provenance stores, and the per-language
1439    /// confidence map. Used by the `sqryd` daemon's admission controller
1440    /// and workspace retention reaper to enforce memory budgets.
1441    fn heap_bytes(&self) -> usize {
1442        let mut confidence_bytes = self.confidence.capacity()
1443            * (std::mem::size_of::<String>()
1444                + std::mem::size_of::<ConfidenceMetadata>()
1445                + HASHMAP_ENTRY_OVERHEAD);
1446        for (key, meta) in &self.confidence {
1447            confidence_bytes += key.capacity();
1448            // `ConfidenceMetadata.limitations` / `unavailable_features` are
1449            // `Vec<String>`; charge their spill and inner String payloads.
1450            confidence_bytes += meta.limitations.capacity() * std::mem::size_of::<String>();
1451            for s in &meta.limitations {
1452                confidence_bytes += s.capacity();
1453            }
1454            confidence_bytes +=
1455                meta.unavailable_features.capacity() * std::mem::size_of::<String>();
1456            for s in &meta.unavailable_features {
1457                confidence_bytes += s.capacity();
1458            }
1459        }
1460
1461        self.nodes.heap_bytes()
1462            + self.edges.heap_bytes()
1463            + self.strings.heap_bytes()
1464            + self.files.heap_bytes()
1465            + self.indices.heap_bytes()
1466            + self.macro_metadata.heap_bytes()
1467            + self.node_provenance.heap_bytes()
1468            + self.edge_provenance.heap_bytes()
1469            + confidence_bytes
1470            + self.file_segments.capacity()
1471                * std::mem::size_of::<Option<crate::graph::unified::storage::segment::FileSegment>>(
1472                )
1473    }
1474}
1475
1476/// Thread-safe wrapper for `CodeGraph` with epoch versioning.
1477///
1478/// `ConcurrentCodeGraph` provides MVCC-style concurrency:
1479/// - Multiple readers can access the graph simultaneously
1480/// - Only one writer can hold the lock at a time
1481/// - Each write operation increments the epoch for cursor invalidation
1482///
1483/// # Design
1484///
1485/// The wrapper uses `parking_lot::RwLock` for efficient locking:
1486/// - Fair scheduling prevents writer starvation
1487/// - No poisoning (unlike `std::sync::RwLock`)
1488/// - Faster lock/unlock operations
1489///
1490/// # Phase 2 binding-plane access
1491///
1492/// Use the three-line snapshot pattern to access `BindingPlane`:
1493///
1494/// ```rust,ignore
1495/// let read_guard = concurrent.read();
1496/// let snapshot = read_guard.snapshot();
1497/// let plane = snapshot.binding_plane();
1498/// let resolution = plane.resolve(&query);
1499/// ```
1500///
1501/// The explicit snapshot handle makes the MVCC lifetime visible at the call
1502/// site. The full Phase 2 scope/alias/shadow and witness-bearing resolution
1503/// API is exposed through `BindingPlane`.
1504///
1505/// # Usage
1506///
1507/// ```rust
1508/// use sqry_core::graph::unified::concurrent::ConcurrentCodeGraph;
1509///
1510/// let graph = ConcurrentCodeGraph::new();
1511///
1512/// // Read access (multiple readers allowed)
1513/// {
1514///     let guard = graph.read();
1515///     let _nodes = guard.nodes();
1516/// }
1517///
1518/// // Write access (exclusive)
1519/// {
1520///     let mut guard = graph.write();
1521///     let _nodes = guard.nodes_mut();
1522/// }
1523///
1524/// // Snapshot for long queries
1525/// let snapshot = graph.snapshot();
1526/// ```
1527pub struct ConcurrentCodeGraph {
1528    /// The underlying code graph protected by a read-write lock.
1529    inner: RwLock<CodeGraph>,
1530    /// Global epoch counter for cursor validation.
1531    epoch: AtomicU64,
1532}
1533
1534impl ConcurrentCodeGraph {
1535    /// Creates a new empty `ConcurrentCodeGraph`.
1536    #[must_use]
1537    pub fn new() -> Self {
1538        Self {
1539            inner: RwLock::new(CodeGraph::new()),
1540            epoch: AtomicU64::new(0),
1541        }
1542    }
1543
1544    /// Creates a `ConcurrentCodeGraph` from an existing `CodeGraph`.
1545    #[must_use]
1546    pub fn from_graph(graph: CodeGraph) -> Self {
1547        let epoch = graph.epoch();
1548        Self {
1549            inner: RwLock::new(graph),
1550            epoch: AtomicU64::new(epoch),
1551        }
1552    }
1553
1554    /// Acquires a read lock on the graph.
1555    ///
1556    /// Multiple readers can hold the lock simultaneously.
1557    /// This does not increment the epoch.
1558    #[inline]
1559    pub fn read(&self) -> RwLockReadGuard<'_, CodeGraph> {
1560        self.inner.read()
1561    }
1562
1563    /// Acquires a write lock on the graph.
1564    ///
1565    /// Only one writer can hold the lock at a time.
1566    /// This increments the global epoch counter.
1567    #[inline]
1568    pub fn write(&self) -> RwLockWriteGuard<'_, CodeGraph> {
1569        // Increment the global epoch
1570        self.epoch.fetch_add(1, Ordering::SeqCst);
1571        let mut guard = self.inner.write();
1572        // Sync the inner graph's epoch with the global epoch
1573        guard.set_epoch(self.epoch.load(Ordering::SeqCst));
1574        guard
1575    }
1576
1577    /// Returns the current global epoch.
1578    ///
1579    /// This can be used to detect if the graph has been modified
1580    /// since a previous operation (cursor invalidation).
1581    #[inline]
1582    #[must_use]
1583    pub fn epoch(&self) -> u64 {
1584        self.epoch.load(Ordering::SeqCst)
1585    }
1586
1587    /// Creates a cheap snapshot of the graph.
1588    ///
1589    /// This acquires a brief read lock to clone the Arc references.
1590    /// The snapshot is isolated from future mutations.
1591    #[must_use]
1592    pub fn snapshot(&self) -> GraphSnapshot {
1593        self.inner.read().snapshot()
1594    }
1595
1596    // ------------------------------------------------------------------
1597    // Phase 1 fact-layer provenance convenience accessors.
1598    // These acquire a brief read lock, matching the snapshot() pattern.
1599    // ------------------------------------------------------------------
1600
1601    /// Returns the monotonic fact-layer epoch from the underlying graph.
1602    #[must_use]
1603    pub fn fact_epoch(&self) -> u64 {
1604        self.inner.read().fact_epoch()
1605    }
1606
1607    /// Looks up node provenance by `NodeId` (acquires a brief read lock).
1608    #[must_use]
1609    pub fn node_provenance(
1610        &self,
1611        id: crate::graph::unified::node::id::NodeId,
1612    ) -> Option<NodeProvenance> {
1613        self.inner.read().node_provenance(id).copied()
1614    }
1615
1616    /// Looks up edge provenance by `EdgeId` (acquires a brief read lock).
1617    #[must_use]
1618    pub fn edge_provenance(
1619        &self,
1620        id: crate::graph::unified::edge::id::EdgeId,
1621    ) -> Option<EdgeProvenance> {
1622        self.inner.read().edge_provenance(id).copied()
1623    }
1624
1625    /// Returns a file provenance view (acquires a brief read lock).
1626    ///
1627    /// Returns an owned copy since the borrow cannot outlive the lock guard.
1628    #[must_use]
1629    pub fn file_provenance(
1630        &self,
1631        id: crate::graph::unified::file::id::FileId,
1632    ) -> Option<OwnedFileProvenanceView> {
1633        let guard = self.inner.read();
1634        guard.file_provenance(id).map(|v| OwnedFileProvenanceView {
1635            content_hash: *v.content_hash,
1636            indexed_at: v.indexed_at,
1637            source_uri: v.source_uri,
1638            is_external: v.is_external,
1639        })
1640    }
1641
1642    // ------------------------------------------------------------------
1643    // Phase 2 binding-plane accessors (P2U03).
1644    // ------------------------------------------------------------------
1645
1646    /// Returns the scope arena from the underlying graph (acquires a brief
1647    /// read lock).
1648    ///
1649    /// Returns an `Arc` clone so the caller does not hold the lock beyond
1650    /// this call site.
1651    #[must_use]
1652    pub fn scope_arena(&self) -> Arc<ScopeArena> {
1653        Arc::clone(&self.inner.read().scope_arena)
1654    }
1655
1656    /// Returns the alias table from the underlying graph (acquires a brief
1657    /// read lock).
1658    ///
1659    /// Returns an `Arc` clone so the caller does not hold the lock beyond
1660    /// this call site.
1661    #[must_use]
1662    pub fn alias_table(&self) -> Arc<AliasTable> {
1663        Arc::clone(&self.inner.read().alias_table)
1664    }
1665
1666    /// Returns the shadow table from the underlying graph (acquires a brief
1667    /// read lock).
1668    ///
1669    /// Returns an `Arc` clone so the caller does not hold the lock beyond
1670    /// this call site.
1671    #[must_use]
1672    pub fn shadow_table(&self) -> Arc<ShadowTable> {
1673        Arc::clone(&self.inner.read().shadow_table)
1674    }
1675
1676    /// Returns the scope provenance store from the underlying graph (acquires
1677    /// a brief read lock).
1678    ///
1679    /// Returns an `Arc` clone so the caller does not hold the lock beyond
1680    /// this call site.
1681    #[must_use]
1682    pub fn scope_provenance_store(&self) -> Arc<ScopeProvenanceStore> {
1683        Arc::clone(&self.inner.read().scope_provenance_store)
1684    }
1685
1686    /// Looks up scope provenance by `ScopeId` (acquires a brief read lock).
1687    ///
1688    /// Returns an owned copy since the borrow cannot outlive the lock guard.
1689    #[must_use]
1690    pub fn scope_provenance(&self, id: ScopeId) -> Option<ScopeProvenance> {
1691        self.inner.read().scope_provenance(id).cloned()
1692    }
1693
1694    /// Looks up the live `ScopeId` for a stable scope identity (acquires a
1695    /// brief read lock).
1696    ///
1697    /// Returns `None` if no provenance record is registered for that stable id.
1698    #[must_use]
1699    pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
1700        self.inner.read().scope_by_stable_id(stable)
1701    }
1702
1703    /// Returns the file segment table from the underlying graph (acquires a
1704    /// brief read lock).
1705    #[must_use]
1706    pub fn file_segments(&self) -> Arc<FileSegmentTable> {
1707        Arc::clone(&self.inner.read().file_segments)
1708    }
1709
1710    /// Attempts to acquire a read lock without blocking.
1711    ///
1712    /// Returns `None` if the lock is currently held exclusively.
1713    #[inline]
1714    #[must_use]
1715    pub fn try_read(&self) -> Option<RwLockReadGuard<'_, CodeGraph>> {
1716        self.inner.try_read()
1717    }
1718
1719    /// Attempts to acquire a write lock without blocking.
1720    ///
1721    /// Returns `None` if the lock is currently held by another thread.
1722    /// If successful, increments the epoch.
1723    #[inline]
1724    pub fn try_write(&self) -> Option<RwLockWriteGuard<'_, CodeGraph>> {
1725        self.inner.try_write().map(|mut guard| {
1726            self.epoch.fetch_add(1, Ordering::SeqCst);
1727            guard.set_epoch(self.epoch.load(Ordering::SeqCst));
1728            guard
1729        })
1730    }
1731}
1732
1733impl Default for ConcurrentCodeGraph {
1734    fn default() -> Self {
1735        Self::new()
1736    }
1737}
1738
1739impl fmt::Debug for ConcurrentCodeGraph {
1740    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1741        f.debug_struct("ConcurrentCodeGraph")
1742            .field("epoch", &self.epoch.load(Ordering::SeqCst))
1743            .finish_non_exhaustive()
1744    }
1745}
1746
1747/// Owned copy of file provenance, returned by [`ConcurrentCodeGraph::file_provenance`]
1748/// because the borrowed [`FileProvenanceView`] cannot outlive the read-lock guard.
1749#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1750pub struct OwnedFileProvenanceView {
1751    /// SHA-256 of the on-disk file bytes (owned copy).
1752    pub content_hash: [u8; 32],
1753    /// Unix-epoch seconds at which this file was registered.
1754    pub indexed_at: u64,
1755    /// Optional interned physical origin URI.
1756    pub source_uri: Option<StringId>,
1757    /// Whether this file originates from an external source.
1758    pub is_external: bool,
1759}
1760
1761/// Immutable snapshot of a `CodeGraph` for long-running queries.
1762///
1763/// `GraphSnapshot` holds Arc references to the graph components,
1764/// providing a consistent view that is isolated from concurrent mutations.
1765///
1766/// # Design
1767///
1768/// Snapshots are created via `CodeGraph::snapshot()` or
1769/// `ConcurrentCodeGraph::snapshot()`. They are:
1770///
1771/// - **Immutable**: No mutation methods available
1772/// - **Isolated**: Independent of future graph mutations
1773/// - **Cheap**: Only Arc clones, no data copying
1774/// - **Self-contained**: Can outlive the original graph/lock
1775///
1776/// # Usage
1777///
1778/// ```rust
1779/// use sqry_core::graph::unified::concurrent::{ConcurrentCodeGraph, GraphSnapshot};
1780///
1781/// let graph = ConcurrentCodeGraph::new();
1782///
1783/// // Create snapshot for a long query
1784/// let snapshot: GraphSnapshot = graph.snapshot();
1785///
1786/// // Snapshot can be used independently
1787/// let _epoch = snapshot.epoch();
1788/// ```
1789#[derive(Clone)]
1790pub struct GraphSnapshot {
1791    /// Node storage snapshot.
1792    nodes: Arc<NodeArena>,
1793    /// Edge storage snapshot.
1794    edges: Arc<BidirectionalEdgeStore>,
1795    /// String interner snapshot.
1796    strings: Arc<StringInterner>,
1797    /// File registry snapshot.
1798    files: Arc<FileRegistry>,
1799    /// Auxiliary indices snapshot.
1800    indices: Arc<AuxiliaryIndices>,
1801    /// Sparse macro boundary metadata snapshot.
1802    macro_metadata: Arc<NodeMetadataStore>,
1803    /// Dense node provenance snapshot (Phase 1).
1804    node_provenance: Arc<NodeProvenanceStore>,
1805    /// Dense edge provenance snapshot (Phase 1).
1806    edge_provenance: Arc<EdgeProvenanceStore>,
1807    /// Monotonic fact-layer epoch at snapshot time.
1808    fact_epoch: u64,
1809    /// Epoch at snapshot time (for cursor validation).
1810    epoch: u64,
1811    /// Phase 2 binding-plane scope arena snapshot (populated by Phase 4e).
1812    scope_arena: Arc<ScopeArena>,
1813    /// Phase 2 binding-plane alias table snapshot (populated by Phase 4e / P2U04).
1814    alias_table: Arc<AliasTable>,
1815    /// Phase 2 binding-plane shadow table snapshot (populated by Phase 4e / P2U05).
1816    shadow_table: Arc<ShadowTable>,
1817    /// Phase 2 binding-plane scope provenance store snapshot (populated by Phase 4e / P2U11).
1818    scope_provenance_store: Arc<ScopeProvenanceStore>,
1819    /// Phase 3 file segment table snapshot mapping `FileId` to node ranges.
1820    file_segments: Arc<FileSegmentTable>,
1821    /// Phase A (U09): C indirect-call resolver side tables snapshot.
1822    ///
1823    /// `None` on non-C workspaces. The snapshot owns its own clone of the
1824    /// `Option<CIndirectSideTables>` so concurrent readers see a stable
1825    /// view independent of subsequent mutations to the source `CodeGraph`.
1826    c_indirect_tables: Option<CIndirectSideTables>,
1827    /// Whether this snapshot carries genuine `NodeEntry.is_definition` signal
1828    /// (R3 marker). Copied from the source [`CodeGraph`] at snapshot time. See
1829    /// [`CodeGraph::definition_signal_present`] for the contract.
1830    definition_signal_present: bool,
1831}
1832
1833impl GraphSnapshot {
1834    /// Returns whether this snapshot carries genuine `is_definition` signal.
1835    ///
1836    /// See [`CodeGraph::definition_signal_present`] for the full contract.
1837    #[inline]
1838    #[must_use]
1839    pub fn definition_signal_present(&self) -> bool {
1840        self.definition_signal_present
1841    }
1842
1843    /// Returns a reference to the node arena.
1844    #[inline]
1845    #[must_use]
1846    pub fn nodes(&self) -> &NodeArena {
1847        &self.nodes
1848    }
1849
1850    /// Returns a reference to the bidirectional edge store.
1851    #[inline]
1852    #[must_use]
1853    pub fn edges(&self) -> &BidirectionalEdgeStore {
1854        &self.edges
1855    }
1856
1857    /// Returns a reference to the string interner.
1858    #[inline]
1859    #[must_use]
1860    pub fn strings(&self) -> &StringInterner {
1861        &self.strings
1862    }
1863
1864    /// Returns a reference to the file registry.
1865    #[inline]
1866    #[must_use]
1867    pub fn files(&self) -> &FileRegistry {
1868        &self.files
1869    }
1870
1871    /// Returns a reference to the auxiliary indices.
1872    #[inline]
1873    #[must_use]
1874    pub fn indices(&self) -> &AuxiliaryIndices {
1875        &self.indices
1876    }
1877
1878    /// Returns a reference to the macro boundary metadata store.
1879    #[inline]
1880    #[must_use]
1881    pub fn macro_metadata(&self) -> &NodeMetadataStore {
1882        &self.macro_metadata
1883    }
1884
1885    /// Returns a reference to the C indirect-call side tables, if any.
1886    ///
1887    /// Mirrors [`CodeGraph::c_indirect_tables`]; see that method for
1888    /// the contract. Snapshot-level access is read-only.
1889    #[inline]
1890    #[must_use]
1891    pub fn c_indirect_tables(&self) -> Option<&CIndirectSideTables> {
1892        self.c_indirect_tables.as_ref()
1893    }
1894
1895    // ------------------------------------------------------------------
1896    // Phase 1 fact-layer provenance accessors (P1U09).
1897    // ------------------------------------------------------------------
1898
1899    /// Returns the monotonic fact-layer epoch.
1900    #[inline]
1901    #[must_use]
1902    pub fn fact_epoch(&self) -> u64 {
1903        self.fact_epoch
1904    }
1905
1906    /// Looks up node provenance by `NodeId`.
1907    #[inline]
1908    #[must_use]
1909    pub fn node_provenance(
1910        &self,
1911        id: crate::graph::unified::node::id::NodeId,
1912    ) -> Option<&NodeProvenance> {
1913        self.node_provenance.lookup(id)
1914    }
1915
1916    /// Looks up edge provenance by `EdgeId`.
1917    #[inline]
1918    #[must_use]
1919    pub fn edge_provenance(
1920        &self,
1921        id: crate::graph::unified::edge::id::EdgeId,
1922    ) -> Option<&EdgeProvenance> {
1923        self.edge_provenance.lookup(id)
1924    }
1925
1926    /// Returns a borrowed provenance view for a file.
1927    #[inline]
1928    #[must_use]
1929    pub fn file_provenance(
1930        &self,
1931        id: crate::graph::unified::file::id::FileId,
1932    ) -> Option<FileProvenanceView<'_>> {
1933        self.files.file_provenance(id)
1934    }
1935
1936    // ------------------------------------------------------------------
1937    // Phase 2 binding-plane accessors (P2U03).
1938    // ------------------------------------------------------------------
1939
1940    /// Returns a reference to the scope arena at snapshot time.
1941    ///
1942    /// Participates in MVCC: the snapshot holds an `Arc` clone of the arena
1943    /// as it existed when `snapshot()` was called. Subsequent calls to
1944    /// `set_scope_arena` on the source `CodeGraph` do not affect this view.
1945    #[inline]
1946    #[must_use]
1947    pub fn scope_arena(&self) -> &ScopeArena {
1948        &self.scope_arena
1949    }
1950
1951    /// Returns a reference to the alias table at snapshot time.
1952    ///
1953    /// Participates in MVCC: the snapshot holds an `Arc` clone of the table
1954    /// as it existed when `snapshot()` was called. Subsequent calls to
1955    /// `set_alias_table` on the source `CodeGraph` do not affect this view.
1956    #[inline]
1957    #[must_use]
1958    pub fn alias_table(&self) -> &AliasTable {
1959        &self.alias_table
1960    }
1961
1962    /// Returns a reference to the shadow table at snapshot time.
1963    ///
1964    /// Participates in MVCC: the snapshot holds an `Arc` clone of the table
1965    /// as it existed when `snapshot()` was called. Subsequent calls to
1966    /// `set_shadow_table` on the source `CodeGraph` do not affect this view.
1967    #[inline]
1968    #[must_use]
1969    pub fn shadow_table(&self) -> &ShadowTable {
1970        &self.shadow_table
1971    }
1972
1973    /// Returns a reference to the scope provenance store at snapshot time.
1974    ///
1975    /// Participates in MVCC: the snapshot holds an `Arc` clone of the store
1976    /// as it existed when `snapshot()` was called. Subsequent calls to
1977    /// `set_scope_provenance_store` on the source `CodeGraph` do not affect
1978    /// this view.
1979    #[inline]
1980    #[must_use]
1981    pub fn scope_provenance_store(&self) -> &ScopeProvenanceStore {
1982        &self.scope_provenance_store
1983    }
1984
1985    /// Looks up scope provenance by `ScopeId` at snapshot time.
1986    ///
1987    /// Returns `None` if the slot is out of range, vacant, or the stored
1988    /// generation does not match (stale handle).
1989    #[inline]
1990    #[must_use]
1991    pub fn scope_provenance(&self, id: ScopeId) -> Option<&ScopeProvenance> {
1992        self.scope_provenance_store.lookup(id)
1993    }
1994
1995    /// Looks up the live `ScopeId` for a stable scope identity at snapshot time.
1996    ///
1997    /// Returns `None` if no provenance record is registered for that stable id.
1998    #[inline]
1999    #[must_use]
2000    pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
2001        self.scope_provenance_store.scope_by_stable_id(stable)
2002    }
2003
2004    /// Returns a reference to the file segment table at snapshot time.
2005    #[inline]
2006    #[must_use]
2007    pub fn file_segments(&self) -> &FileSegmentTable {
2008        &self.file_segments
2009    }
2010
2011    /// Returns the epoch at which this snapshot was taken.
2012    ///
2013    /// This can be compared against the current graph epoch to
2014    /// detect if the graph has changed since the snapshot.
2015    #[inline]
2016    #[must_use]
2017    pub fn epoch(&self) -> u64 {
2018        self.epoch
2019    }
2020
2021    /// Returns `true` if this snapshot's epoch matches the given epoch.
2022    ///
2023    /// Use this to validate cursors before continuing pagination.
2024    #[inline]
2025    #[must_use]
2026    pub fn epoch_matches(&self, other_epoch: u64) -> bool {
2027        self.epoch == other_epoch
2028    }
2029
2030    // ------------------------------------------------------------------
2031    // Phase 2 binding-plane facade accessor (P2U07).
2032    // ------------------------------------------------------------------
2033
2034    /// Returns a [`BindingPlane`] facade borrowing this snapshot's lifetime.
2035    ///
2036    /// The facade is the stable Phase 2 public API for scope/alias/shadow
2037    /// queries and witness-bearing resolution. It provides a single entry
2038    /// point (`resolve`) that returns both a `BindingResult` and an ordered
2039    /// step trace in a `BindingResolution`.
2040    ///
2041    /// # MVCC note
2042    ///
2043    /// `BindingPlane<'_>` borrows from this snapshot, which is already an
2044    /// MVCC-consistent view of the graph at snapshot time. Callers from
2045    /// `CodeGraph` or `ConcurrentCodeGraph` should follow the two-line
2046    /// pattern so the snapshot lifetime is explicit:
2047    ///
2048    /// ```rust,ignore
2049    /// // CodeGraph caller:
2050    /// let snapshot = graph.snapshot();
2051    /// let plane = snapshot.binding_plane();
2052    ///
2053    /// // ConcurrentCodeGraph caller:
2054    /// let read_guard = concurrent.read();
2055    /// let snapshot = read_guard.snapshot();
2056    /// let plane = snapshot.binding_plane();
2057    /// ```
2058    #[inline]
2059    #[must_use]
2060    pub fn binding_plane(&self) -> crate::graph::unified::bind::plane::BindingPlane<'_> {
2061        crate::graph::unified::bind::plane::BindingPlane::new(self)
2062    }
2063
2064    // ============================================================================
2065    // Query Methods
2066    // ============================================================================
2067
2068    /// Finds nodes matching a pattern.
2069    ///
2070    /// Performs a simple substring match on node names and qualified names.
2071    /// Returns all matching node IDs.
2072    ///
2073    /// **Synthetic suppression (`C_SUPPRESS`):** synthetic placeholder
2074    /// nodes — internal scaffolding the language plugins emit for
2075    /// binding-plane and scope analysis (e.g. the Go plugin's
2076    /// `<field:operand.field>` field-access shadows and the
2077    /// `<ident>@<offset>` per-binding-site Variable nodes from the
2078    /// local-scope resolver) — are filtered out by default. Internal
2079    /// callers that need to reach these nodes (binding plane, scope /
2080    /// alias / shadow analysis) use
2081    /// [`Self::find_by_pattern_with_options`] with
2082    /// `include_synthetic = true`.
2083    ///
2084    /// # Performance
2085    ///
2086    /// Optimized to iterate over unique strings in the interner (smaller set)
2087    /// rather than all nodes in the arena.
2088    ///
2089    /// # Arguments
2090    ///
2091    /// * `pattern` - The pattern to match (substring search)
2092    ///
2093    /// # Returns
2094    ///
2095    /// A vector of `NodeIds` for all matching nodes (synthetic
2096    /// placeholders excluded).
2097    #[must_use]
2098    pub fn find_by_pattern(&self, pattern: &str) -> Vec<crate::graph::unified::node::NodeId> {
2099        self.find_by_pattern_with_options(pattern, false)
2100    }
2101
2102    /// Finds nodes matching a pattern with explicit control over synthetic
2103    /// placeholder visibility.
2104    ///
2105    /// `include_synthetic = false` is the default surface used by every
2106    /// user-facing caller (CLI `search`, MCP `semantic_search` /
2107    /// `pattern_search` / `relation_query`, etc.). Synthetic
2108    /// placeholders are suppressed via two parallel checks that must
2109    /// agree:
2110    ///
2111    /// 1. The authoritative `NodeFlags::SYNTHETIC` bit on the
2112    ///    metadata store
2113    ///    ([`crate::graph::unified::storage::metadata::NodeMetadataStore::is_synthetic`]).
2114    /// 2. The structural name-shape fallback
2115    ///    ([`crate::graph::unified::storage::arena::NodeEntry::is_synthetic_placeholder_name`])
2116    ///    for V10 snapshots written before the synthetic bit existed
2117    ///    and for cross-file unification losers that retained their
2118    ///    name but lost their metadata entry.
2119    ///
2120    /// Either check matching is sufficient to suppress the node. The
2121    /// design lives in `docs/development/public-issue-triage/`
2122    /// under the `C_SUPPRESS` unit; see also the rationale in
2123    /// [`crate::graph::unified::storage::metadata::NodeFlags::SYNTHETIC`].
2124    ///
2125    /// `include_synthetic = true` is **internal-only**. The binding
2126    /// plane, scope resolver, and rebuild's coverage gate use this
2127    /// path to reach synthetic nodes for their structural integrity
2128    /// checks. **No CLI / MCP surface should ever pass `true`.**
2129    #[must_use]
2130    pub fn find_by_pattern_with_options(
2131        &self,
2132        pattern: &str,
2133        include_synthetic: bool,
2134    ) -> Vec<crate::graph::unified::node::NodeId> {
2135        let mut matches = Vec::new();
2136
2137        // 1. Scan unique strings in interner for matches
2138        for (str_id, s) in self.strings.iter() {
2139            if s.contains(pattern) {
2140                // 2. If string matches, look up all nodes with this name
2141                // Check qualified name index
2142                matches.extend_from_slice(self.indices.by_qualified_name(str_id));
2143                // Check simple name index
2144                matches.extend_from_slice(self.indices.by_name(str_id));
2145            }
2146        }
2147
2148        // Deduplicate matches (a node might match both qualified and simple name)
2149        matches.sort_unstable();
2150        matches.dedup();
2151
2152        if !include_synthetic {
2153            matches.retain(|&node_id| !self.is_node_synthetic(node_id));
2154        }
2155
2156        matches
2157    }
2158
2159    /// Finds nodes whose interned simple **or** qualified name equals `name`,
2160    /// accepting dot- and Ruby-`#` qualified display form as a fallback for
2161    /// graph-canonical `::` qualified names.
2162    ///
2163    /// This is the canonical surface for **exact-name** lookups —
2164    /// shared by the CLI `--exact <pattern>` shorthand
2165    /// (`sqry-cli/src/commands/search.rs::run_regular_search`) and the
2166    /// structural query planner's `name:` predicate
2167    /// (`sqry-db/src/planner/parse.rs`,
2168    /// `sqry-db/src/planner/execute.rs`). Both surfaces are
2169    /// contract-bound (DAG `B1_ALIGN`) to return the same set against
2170    /// any fixture: the CLI calls this method directly, while the
2171    /// planner uses the same interner + by-name index pair internally
2172    /// when scanning, then applies the same synthetic filter.
2173    ///
2174    /// **Synthetic suppression.** Synthetic placeholder nodes
2175    /// (Go-plugin `<field:operand.field>` shadows and
2176    /// `<ident>@<offset>` per-binding-site Variables; see
2177    /// [`Self::find_by_pattern_with_options`] for the full taxonomy)
2178    /// are excluded via [`Self::is_node_synthetic`]. There is **no**
2179    /// `include_synthetic = true` variant for the exact-match surface
2180    /// because the synthetic name shapes the structural fallback
2181    /// recognises (`<…>`, `…@<offset>`) cannot equal a user-typed
2182    /// name byte-for-byte; the metadata-bit channel is the only
2183    /// realistic leak vector and it is suppressed unconditionally.
2184    ///
2185    /// **Display fallback.** Exact lookup checks the literal input. When the
2186    /// input is qualified, language-aware display candidates win over raw
2187    /// canonical matches. This keeps native Rust `identity::T` from also
2188    /// returning a TypeScript type parameter whose internal canonical form is
2189    /// `identity::T` but whose display form is `identity.T`. If neither
2190    /// display nor literal lookup finds candidates, dot- and Ruby-`#`
2191    /// qualified inputs also check the graph-canonical `::` rewrite.
2192    ///
2193    /// # Performance
2194    ///
2195    /// `O(1)` interner lookup + `O(matches)` filter. If `name` is not
2196    /// interned the resolver still tries a native-display to graph-canonical
2197    /// `::` fallback for user-facing qualified names before returning empty.
2198    ///
2199    /// # Arguments
2200    ///
2201    /// * `name` - The exact name to look up (no glob, no regex).
2202    ///
2203    /// # Returns
2204    ///
2205    /// Sorted, deduplicated `NodeId`s for every non-synthetic node
2206    /// whose `entry.name`, `entry.qualified_name`, or language-aware display
2207    /// name equals `name`, or matches for the graph-canonical `::` rewrite
2208    /// when the user supplied a dot- or Ruby-`#` qualified name that had no
2209    /// literal or display candidates.
2210    #[must_use]
2211    pub fn find_by_exact_name(&self, name: &str) -> Vec<crate::graph::unified::node::NodeId> {
2212        let is_qualified = name.contains('.') || name.contains('#') || name.contains("::");
2213        if !is_qualified {
2214            // Bare name lookup: the interned `by_name` index covers
2215            // every language and is O(1); no display scan is needed.
2216            return self.find_by_exact_interned_name(name);
2217        }
2218        // Qualified name: each plugin stores its native form
2219        // (PHP `Ledger.promotedField`, Rust `Ledger::promotedField`,
2220        // Ruby `Ledger#promotedField`). Scan computed display names so
2221        // a single dotted user query resolves to all language matches,
2222        // then fall back to the interned form (and the graph-canonical
2223        // `::` rewrite) when display yields nothing.
2224        let mut exact_matches = self.find_by_exact_display_name(name);
2225        if exact_matches.is_empty() {
2226            exact_matches = self.find_by_exact_interned_name(name);
2227            if exact_matches.is_empty() && !name.contains("::") {
2228                let canonical = name.replace(['.', '#'], "::");
2229                exact_matches.extend(self.find_by_exact_interned_name(&canonical));
2230                exact_matches.sort_unstable();
2231                exact_matches.dedup();
2232            }
2233        }
2234        exact_matches
2235    }
2236
2237    fn find_by_exact_display_name(&self, name: &str) -> Vec<crate::graph::unified::node::NodeId> {
2238        let mut matches = self
2239            .iter_nodes()
2240            .filter(|(node_id, _)| !self.is_node_synthetic(*node_id))
2241            .filter_map(|(node_id, entry)| {
2242                let qualified = entry
2243                    .qualified_name
2244                    .and_then(|sid| self.strings.resolve(sid))?;
2245                let display = self.files.language_for_file(entry.file).map_or_else(
2246                    || qualified.to_string(),
2247                    |language| {
2248                        display_graph_qualified_name(
2249                            language,
2250                            qualified.as_ref(),
2251                            entry.kind,
2252                            entry.is_static,
2253                        )
2254                    },
2255                );
2256                (display == name).then_some(node_id)
2257            })
2258            .collect::<Vec<_>>();
2259        matches.sort_unstable();
2260        matches.dedup();
2261        matches
2262    }
2263
2264    fn find_by_exact_interned_name(&self, name: &str) -> Vec<crate::graph::unified::node::NodeId> {
2265        let Some(str_id) = self.strings.get(name) else {
2266            return Vec::new();
2267        };
2268
2269        let mut matches: Vec<crate::graph::unified::node::NodeId> = Vec::new();
2270        matches.extend_from_slice(self.indices.by_name(str_id));
2271        matches.extend_from_slice(self.indices.by_qualified_name(str_id));
2272        matches.sort_unstable();
2273        matches.dedup();
2274        matches.retain(|&node_id| !self.is_node_synthetic(node_id));
2275        matches
2276    }
2277
2278    /// Returns `true` if the node should be treated as a synthetic
2279    /// placeholder for user-facing surfaces.
2280    ///
2281    /// Combines the metadata-store flag and the structural name-shape
2282    /// fallback (see [`Self::find_by_pattern_with_options`] for the
2283    /// full rationale). Returns `false` for missing nodes (an unknown
2284    /// `NodeId` is not "synthetic" — it is "not present").
2285    #[must_use]
2286    pub fn is_node_synthetic(&self, node_id: crate::graph::unified::node::NodeId) -> bool {
2287        // Authoritative check: metadata-store bit (NodeFlags::SYNTHETIC).
2288        if self.macro_metadata.is_synthetic(node_id) {
2289            return true;
2290        }
2291        // Structural fallback: name shape recognised as synthetic.
2292        // Required for V10 snapshots written before the bit existed,
2293        // for unification losers that lost their metadata entry, and
2294        // as defence-in-depth against future plugins forgetting to
2295        // flip the bit.
2296        let Some(entry) = self.nodes.get(node_id) else {
2297            return false;
2298        };
2299        if entry.is_unified_loser() {
2300            // Already invisible for other reasons; do not also flag as synthetic.
2301            return false;
2302        }
2303        let Some(name) = self.strings.resolve(entry.name) else {
2304            return false;
2305        };
2306        crate::graph::unified::storage::arena::NodeEntry::is_synthetic_placeholder_name(
2307            name.as_ref(),
2308        )
2309    }
2310
2311    /// Gets all callees of a node (functions called by this node).
2312    ///
2313    /// Queries the forward edge store for all Calls edges from this node.
2314    ///
2315    /// # Arguments
2316    ///
2317    /// * `node` - The node ID to query
2318    ///
2319    /// # Returns
2320    ///
2321    /// A vector of `NodeIds` representing functions called by this node.
2322    #[must_use]
2323    pub fn get_callees(
2324        &self,
2325        node: crate::graph::unified::node::NodeId,
2326    ) -> Vec<crate::graph::unified::node::NodeId> {
2327        use crate::graph::unified::edge::EdgeKind;
2328
2329        self.edges
2330            .edges_from(node)
2331            .into_iter()
2332            .filter(|edge| matches!(edge.kind, EdgeKind::Calls { .. }))
2333            .map(|edge| edge.target)
2334            .collect()
2335    }
2336
2337    /// Gets all callers of a node (functions that call this node).
2338    ///
2339    /// Queries the reverse edge store for all Calls edges to this node.
2340    ///
2341    /// # Arguments
2342    ///
2343    /// * `node` - The node ID to query
2344    ///
2345    /// # Returns
2346    ///
2347    /// A vector of `NodeIds` representing functions that call this node.
2348    #[must_use]
2349    pub fn get_callers(
2350        &self,
2351        node: crate::graph::unified::node::NodeId,
2352    ) -> Vec<crate::graph::unified::node::NodeId> {
2353        use crate::graph::unified::edge::EdgeKind;
2354
2355        self.edges
2356            .edges_to(node)
2357            .into_iter()
2358            .filter(|edge| matches!(edge.kind, EdgeKind::Calls { .. }))
2359            .map(|edge| edge.source)
2360            .collect()
2361    }
2362
2363    /// Iterates over all nodes in the graph.
2364    ///
2365    /// Returns an iterator yielding (`NodeId`, &`NodeEntry`) pairs for all
2366    /// occupied slots in the arena.
2367    ///
2368    /// # Returns
2369    ///
2370    /// An iterator over (`NodeId`, &`NodeEntry`) pairs.
2371    pub fn iter_nodes(
2372        &self,
2373    ) -> impl Iterator<
2374        Item = (
2375            crate::graph::unified::node::NodeId,
2376            &crate::graph::unified::storage::arena::NodeEntry,
2377        ),
2378    > {
2379        self.nodes.iter()
2380    }
2381
2382    /// Iterates over all edges in the graph.
2383    ///
2384    /// Returns an iterator yielding (source, target, `EdgeKind`) tuples for
2385    /// all edges in the forward edge store.
2386    ///
2387    /// # Returns
2388    ///
2389    /// An iterator over edge tuples.
2390    pub fn iter_edges(
2391        &self,
2392    ) -> impl Iterator<
2393        Item = (
2394            crate::graph::unified::node::NodeId,
2395            crate::graph::unified::node::NodeId,
2396            crate::graph::unified::edge::EdgeKind,
2397        ),
2398    > + '_ {
2399        // Iterate over all nodes in the arena and get their outgoing edges
2400        self.nodes.iter().flat_map(move |(node_id, _entry)| {
2401            // Get all edges from this node
2402            self.edges
2403                .edges_from(node_id)
2404                .into_iter()
2405                .map(move |edge| (node_id, edge.target, edge.kind))
2406        })
2407    }
2408
2409    /// Gets a node entry by ID.
2410    ///
2411    /// Returns a reference to the `NodeEntry` if the ID is valid, or None
2412    /// if the ID is invalid or stale.
2413    ///
2414    /// # Arguments
2415    ///
2416    /// * `id` - The node ID to look up
2417    ///
2418    /// # Returns
2419    ///
2420    /// A reference to the `NodeEntry`, or None if not found.
2421    #[must_use]
2422    pub fn get_node(
2423        &self,
2424        id: crate::graph::unified::node::NodeId,
2425    ) -> Option<&crate::graph::unified::storage::arena::NodeEntry> {
2426        self.nodes.get(id)
2427    }
2428}
2429
2430impl fmt::Debug for GraphSnapshot {
2431    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2432        f.debug_struct("GraphSnapshot")
2433            .field("nodes", &self.nodes.len())
2434            .field("epoch", &self.epoch)
2435            .finish_non_exhaustive()
2436    }
2437}
2438
2439/// Read-only accessor trait shared by [`CodeGraph`] and [`GraphSnapshot`].
2440///
2441/// This lets helpers that only *read* graph state (name-matching, relation
2442/// traversal, reference lookups) be written once and called from both the
2443/// live `CodeGraph` path in `sqry-core::query::executor::graph_eval` and
2444/// the snapshot-based path in `sqry-db::queries::*`. No mutation is exposed.
2445pub trait GraphAccess {
2446    /// Returns the node arena (read-only).
2447    fn nodes(&self) -> &NodeArena;
2448    /// Returns the bidirectional edge store (read-only).
2449    fn edges(&self) -> &BidirectionalEdgeStore;
2450    /// Returns the string interner (read-only).
2451    fn strings(&self) -> &StringInterner;
2452    /// Returns the file registry (read-only).
2453    fn files(&self) -> &FileRegistry;
2454    /// Returns the auxiliary indices (read-only).
2455    fn indices(&self) -> &AuxiliaryIndices;
2456}
2457
2458impl GraphAccess for CodeGraph {
2459    #[inline]
2460    fn nodes(&self) -> &NodeArena {
2461        CodeGraph::nodes(self)
2462    }
2463    #[inline]
2464    fn edges(&self) -> &BidirectionalEdgeStore {
2465        CodeGraph::edges(self)
2466    }
2467    #[inline]
2468    fn strings(&self) -> &StringInterner {
2469        CodeGraph::strings(self)
2470    }
2471    #[inline]
2472    fn files(&self) -> &FileRegistry {
2473        CodeGraph::files(self)
2474    }
2475    #[inline]
2476    fn indices(&self) -> &AuxiliaryIndices {
2477        CodeGraph::indices(self)
2478    }
2479}
2480
2481impl GraphAccess for GraphSnapshot {
2482    #[inline]
2483    fn nodes(&self) -> &NodeArena {
2484        GraphSnapshot::nodes(self)
2485    }
2486    #[inline]
2487    fn edges(&self) -> &BidirectionalEdgeStore {
2488        GraphSnapshot::edges(self)
2489    }
2490    #[inline]
2491    fn strings(&self) -> &StringInterner {
2492        GraphSnapshot::strings(self)
2493    }
2494    #[inline]
2495    fn files(&self) -> &FileRegistry {
2496        GraphSnapshot::files(self)
2497    }
2498    #[inline]
2499    fn indices(&self) -> &AuxiliaryIndices {
2500        GraphSnapshot::indices(self)
2501    }
2502}
2503
2504#[cfg(test)]
2505mod tests {
2506    use super::*;
2507    use crate::graph::unified::{
2508        FileScope, NodeId, ResolutionMode, SymbolCandidateOutcome, SymbolQuery,
2509        SymbolResolutionOutcome,
2510    };
2511
2512    fn resolve_symbol_strict(snapshot: &GraphSnapshot, symbol: &str) -> Option<NodeId> {
2513        match snapshot.resolve_symbol(&SymbolQuery {
2514            symbol,
2515            file_scope: FileScope::Any,
2516            mode: ResolutionMode::Strict,
2517        }) {
2518            SymbolResolutionOutcome::Resolved(node_id) => Some(node_id),
2519            SymbolResolutionOutcome::NotFound
2520            | SymbolResolutionOutcome::FileNotIndexed
2521            | SymbolResolutionOutcome::Ambiguous(_) => None,
2522        }
2523    }
2524
2525    fn candidate_nodes(snapshot: &GraphSnapshot, symbol: &str) -> Vec<NodeId> {
2526        match snapshot.find_symbol_candidates(&SymbolQuery {
2527            symbol,
2528            file_scope: FileScope::Any,
2529            mode: ResolutionMode::AllowSuffixCandidates,
2530        }) {
2531            SymbolCandidateOutcome::Candidates(candidates) => candidates,
2532            SymbolCandidateOutcome::NotFound | SymbolCandidateOutcome::FileNotIndexed => Vec::new(),
2533        }
2534    }
2535
2536    #[test]
2537    fn test_code_graph_new() {
2538        let graph = CodeGraph::new();
2539        assert_eq!(graph.epoch(), 0);
2540        assert_eq!(graph.nodes().len(), 0);
2541    }
2542
2543    #[test]
2544    fn test_code_graph_default() {
2545        let graph = CodeGraph::default();
2546        assert_eq!(graph.epoch(), 0);
2547    }
2548
2549    #[test]
2550    fn test_code_graph_snapshot() {
2551        let graph = CodeGraph::new();
2552        let snapshot = graph.snapshot();
2553        assert_eq!(snapshot.epoch(), 0);
2554        assert_eq!(snapshot.nodes().len(), 0);
2555    }
2556
2557    #[test]
2558    fn test_code_graph_bump_epoch() {
2559        let mut graph = CodeGraph::new();
2560        assert_eq!(graph.epoch(), 0);
2561        assert_eq!(graph.bump_epoch(), 1);
2562        assert_eq!(graph.epoch(), 1);
2563        assert_eq!(graph.bump_epoch(), 2);
2564        assert_eq!(graph.epoch(), 2);
2565    }
2566
2567    #[test]
2568    fn test_code_graph_set_epoch() {
2569        let mut graph = CodeGraph::new();
2570        graph.set_epoch(42);
2571        assert_eq!(graph.epoch(), 42);
2572    }
2573
2574    #[test]
2575    fn test_code_graph_from_components() {
2576        let nodes = NodeArena::new();
2577        let edges = BidirectionalEdgeStore::new();
2578        let strings = StringInterner::new();
2579        let files = FileRegistry::new();
2580        let indices = AuxiliaryIndices::new();
2581        let macro_metadata = NodeMetadataStore::new();
2582
2583        let graph =
2584            CodeGraph::from_components(nodes, edges, strings, files, indices, macro_metadata);
2585        assert_eq!(graph.epoch(), 0);
2586    }
2587
2588    #[test]
2589    fn test_code_graph_mut_accessors() {
2590        let mut graph = CodeGraph::new();
2591
2592        // Access mutable references - should not panic
2593        let _nodes = graph.nodes_mut();
2594        let _edges = graph.edges_mut();
2595        let _strings = graph.strings_mut();
2596        let _files = graph.files_mut();
2597        let _indices = graph.indices_mut();
2598    }
2599
2600    #[test]
2601    fn test_code_graph_snapshot_isolation() {
2602        let mut graph = CodeGraph::new();
2603        let snapshot1 = graph.snapshot();
2604
2605        // Mutate the graph
2606        graph.bump_epoch();
2607
2608        let snapshot2 = graph.snapshot();
2609
2610        // Snapshots should have different epochs
2611        assert_eq!(snapshot1.epoch(), 0);
2612        assert_eq!(snapshot2.epoch(), 1);
2613    }
2614
2615    #[test]
2616    fn test_code_graph_debug() {
2617        let graph = CodeGraph::new();
2618        let debug_str = format!("{graph:?}");
2619        assert!(debug_str.contains("CodeGraph"));
2620        assert!(debug_str.contains("epoch"));
2621    }
2622
2623    /// Exact-byte regression for the iter2 fix of CodeGraph.confidence
2624    /// accounting: adding a `ConfidenceMetadata` entry with known inner
2625    /// Vec<String> capacities must increase `heap_bytes()` by exactly the sum
2626    /// of those capacities plus Vec<String> slot overhead.
2627    #[test]
2628    fn test_codegraph_heap_bytes_counts_confidence_inner_strings() {
2629        let mut graph = CodeGraph::new();
2630        // Seed one confidence entry so the HashMap has non-zero capacity;
2631        // reserve slack so the next insert cannot rehash.
2632        graph.set_confidence({
2633            let mut m = HashMap::with_capacity(8);
2634            m.insert("seed".to_string(), ConfidenceMetadata::default());
2635            m
2636        });
2637        let before = graph.heap_bytes();
2638        let before_cap = graph.confidence.capacity();
2639
2640        let lim1 = String::from("no type inference");
2641        let lim2 = String::from("no generic specialization");
2642        let feat1 = String::from("rust-analyzer");
2643        let l1 = lim1.capacity();
2644        let l2 = lim2.capacity();
2645        let f1 = feat1.capacity();
2646
2647        let limitations = vec![lim1, lim2];
2648        let lim_vec_cap = limitations.capacity();
2649
2650        let unavailable_features = vec![feat1];
2651        let feat_vec_cap = unavailable_features.capacity();
2652
2653        let key = String::from("rust");
2654        let key_cap = key.capacity();
2655        graph.confidence.insert(
2656            key,
2657            ConfidenceMetadata {
2658                limitations,
2659                unavailable_features,
2660                ..Default::default()
2661            },
2662        );
2663        assert_eq!(
2664            graph.confidence.capacity(),
2665            before_cap,
2666            "prerequisite: confidence HashMap must not rehash during the test insert",
2667        );
2668
2669        let after = graph.heap_bytes();
2670        let expected_inner = key_cap
2671            + lim_vec_cap * std::mem::size_of::<String>()
2672            + l1
2673            + l2
2674            + feat_vec_cap * std::mem::size_of::<String>()
2675            + f1;
2676        assert_eq!(
2677            after - before,
2678            expected_inner,
2679            "CodeGraph::heap_bytes must count ConfidenceMetadata inner Vec<String> capacity exactly",
2680        );
2681    }
2682
2683    #[test]
2684    fn test_codegraph_heap_bytes_grows_with_content() {
2685        use crate::graph::unified::node::NodeKind;
2686        use crate::graph::unified::storage::arena::NodeEntry;
2687        use std::path::Path;
2688
2689        // An empty graph reports some heap bytes (FileRegistry seeds index 0
2690        // with `vec![None]`, HashMaps have non-zero base capacity once touched,
2691        // etc.) but the value must be finite and well under the 100 MB cap.
2692        let empty = CodeGraph::new();
2693        let empty_bytes = empty.heap_bytes();
2694        assert!(
2695            empty_bytes < 100 * 1024 * 1024,
2696            "empty graph heap_bytes should be <100 MiB, got {empty_bytes}"
2697        );
2698
2699        let mut graph = CodeGraph::new();
2700        for i in 0..32u32 {
2701            let name = format!("sym_{i}");
2702            let qual = format!("module::sym_{i}");
2703            let file = format!("file_{i}.rs");
2704
2705            let name_id = graph.strings_mut().intern(&name).unwrap();
2706            let qual_id = graph.strings_mut().intern(&qual).unwrap();
2707            let file_id = graph.files_mut().register(Path::new(&file)).unwrap();
2708
2709            let entry =
2710                NodeEntry::new(NodeKind::Function, name_id, file_id).with_qualified_name(qual_id);
2711            let node_id = graph.nodes_mut().alloc(entry).unwrap();
2712            graph
2713                .indices_mut()
2714                .add(node_id, NodeKind::Function, name_id, Some(qual_id), file_id);
2715        }
2716
2717        let populated_bytes = graph.heap_bytes();
2718        assert!(
2719            populated_bytes > 0,
2720            "populated graph should report nonzero heap bytes"
2721        );
2722        assert!(
2723            populated_bytes > empty_bytes,
2724            "populated graph ({populated_bytes}) should exceed empty graph ({empty_bytes})"
2725        );
2726        assert!(
2727            populated_bytes < 100 * 1024 * 1024,
2728            "test graph heap_bytes should be <100 MiB, got {populated_bytes}"
2729        );
2730    }
2731
2732    #[test]
2733    fn test_concurrent_code_graph_new() {
2734        let graph = ConcurrentCodeGraph::new();
2735        assert_eq!(graph.epoch(), 0);
2736    }
2737
2738    #[test]
2739    fn test_concurrent_code_graph_default() {
2740        let graph = ConcurrentCodeGraph::default();
2741        assert_eq!(graph.epoch(), 0);
2742    }
2743
2744    #[test]
2745    fn test_concurrent_code_graph_from_graph() {
2746        let mut inner = CodeGraph::new();
2747        inner.set_epoch(10);
2748        let graph = ConcurrentCodeGraph::from_graph(inner);
2749        assert_eq!(graph.epoch(), 10);
2750    }
2751
2752    #[test]
2753    fn test_concurrent_code_graph_read() {
2754        let graph = ConcurrentCodeGraph::new();
2755        let guard = graph.read();
2756        assert_eq!(guard.epoch(), 0);
2757        assert_eq!(guard.nodes().len(), 0);
2758    }
2759
2760    #[test]
2761    fn test_concurrent_code_graph_write_increments_epoch() {
2762        let graph = ConcurrentCodeGraph::new();
2763        assert_eq!(graph.epoch(), 0);
2764
2765        {
2766            let guard = graph.write();
2767            assert_eq!(guard.epoch(), 1);
2768        }
2769
2770        assert_eq!(graph.epoch(), 1);
2771
2772        {
2773            let _guard = graph.write();
2774        }
2775
2776        assert_eq!(graph.epoch(), 2);
2777    }
2778
2779    #[test]
2780    fn test_concurrent_code_graph_snapshot() {
2781        let graph = ConcurrentCodeGraph::new();
2782
2783        {
2784            let _guard = graph.write();
2785        }
2786
2787        let snapshot = graph.snapshot();
2788        assert_eq!(snapshot.epoch(), 1);
2789    }
2790
2791    #[test]
2792    fn test_concurrent_code_graph_try_read() {
2793        let graph = ConcurrentCodeGraph::new();
2794        let guard = graph.try_read();
2795        assert!(guard.is_some());
2796    }
2797
2798    #[test]
2799    fn test_concurrent_code_graph_try_write() {
2800        let graph = ConcurrentCodeGraph::new();
2801        let guard = graph.try_write();
2802        assert!(guard.is_some());
2803        assert_eq!(graph.epoch(), 1);
2804    }
2805
2806    #[test]
2807    fn test_concurrent_code_graph_debug() {
2808        let graph = ConcurrentCodeGraph::new();
2809        let debug_str = format!("{graph:?}");
2810        assert!(debug_str.contains("ConcurrentCodeGraph"));
2811        assert!(debug_str.contains("epoch"));
2812    }
2813
2814    #[test]
2815    fn test_graph_snapshot_accessors() {
2816        let graph = CodeGraph::new();
2817        let snapshot = graph.snapshot();
2818
2819        // All accessors should work
2820        let _nodes = snapshot.nodes();
2821        let _edges = snapshot.edges();
2822        let _strings = snapshot.strings();
2823        let _files = snapshot.files();
2824        let _indices = snapshot.indices();
2825        let _epoch = snapshot.epoch();
2826    }
2827
2828    #[test]
2829    fn test_graph_snapshot_epoch_matches() {
2830        let graph = CodeGraph::new();
2831        let snapshot = graph.snapshot();
2832
2833        assert!(snapshot.epoch_matches(0));
2834        assert!(!snapshot.epoch_matches(1));
2835    }
2836
2837    #[test]
2838    fn test_graph_snapshot_clone() {
2839        let graph = CodeGraph::new();
2840        let snapshot1 = graph.snapshot();
2841        let snapshot2 = snapshot1.clone();
2842
2843        assert_eq!(snapshot1.epoch(), snapshot2.epoch());
2844    }
2845
2846    #[test]
2847    fn test_graph_snapshot_debug() {
2848        let graph = CodeGraph::new();
2849        let snapshot = graph.snapshot();
2850        let debug_str = format!("{snapshot:?}");
2851        assert!(debug_str.contains("GraphSnapshot"));
2852        assert!(debug_str.contains("epoch"));
2853    }
2854
2855    #[test]
2856    fn test_multiple_readers() {
2857        let graph = ConcurrentCodeGraph::new();
2858
2859        // Multiple readers should be able to acquire locks simultaneously
2860        let guard1 = graph.read();
2861        let guard2 = graph.read();
2862        let guard3 = graph.read();
2863
2864        assert_eq!(guard1.epoch(), 0);
2865        assert_eq!(guard2.epoch(), 0);
2866        assert_eq!(guard3.epoch(), 0);
2867    }
2868
2869    #[test]
2870    fn test_code_graph_clone() {
2871        let mut graph = CodeGraph::new();
2872        graph.bump_epoch();
2873
2874        let cloned = graph.clone();
2875        assert_eq!(cloned.epoch(), 1);
2876    }
2877
2878    #[test]
2879    fn test_epoch_wrapping() {
2880        let mut graph = CodeGraph::new();
2881        graph.set_epoch(u64::MAX);
2882        let new_epoch = graph.bump_epoch();
2883        assert_eq!(new_epoch, 0); // Should wrap around
2884    }
2885
2886    // ============================================================================
2887    // Query method tests
2888    // ============================================================================
2889
2890    #[test]
2891    fn test_snapshot_resolve_symbol() {
2892        use crate::graph::unified::node::NodeKind;
2893        use crate::graph::unified::storage::arena::NodeEntry;
2894        use std::path::Path;
2895
2896        let mut graph = CodeGraph::new();
2897
2898        // Add some nodes with qualified names
2899        let name_id = graph.strings_mut().intern("test_func").unwrap();
2900        let qual_name_id = graph.strings_mut().intern("module::test_func").unwrap();
2901        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
2902
2903        let entry =
2904            NodeEntry::new(NodeKind::Function, name_id, file_id).with_qualified_name(qual_name_id);
2905
2906        let node_id = graph.nodes_mut().alloc(entry).unwrap();
2907        graph.indices_mut().add(
2908            node_id,
2909            NodeKind::Function,
2910            name_id,
2911            Some(qual_name_id),
2912            file_id,
2913        );
2914
2915        let snapshot = graph.snapshot();
2916
2917        // Find by qualified name
2918        let found = resolve_symbol_strict(&snapshot, "module::test_func");
2919        assert_eq!(found, Some(node_id));
2920
2921        // Find by exact simple name
2922        let found2 = resolve_symbol_strict(&snapshot, "test_func");
2923        assert_eq!(found2, Some(node_id));
2924
2925        // Not found
2926        assert!(resolve_symbol_strict(&snapshot, "nonexistent").is_none());
2927    }
2928
2929    #[test]
2930    fn test_snapshot_find_by_pattern() {
2931        use crate::graph::unified::node::NodeKind;
2932        use crate::graph::unified::storage::arena::NodeEntry;
2933        use std::path::Path;
2934
2935        let mut graph = CodeGraph::new();
2936
2937        // Add nodes with different names
2938        let name1 = graph.strings_mut().intern("foo_bar").unwrap();
2939        let name2 = graph.strings_mut().intern("baz_bar").unwrap();
2940        let name3 = graph.strings_mut().intern("qux_test").unwrap();
2941        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
2942
2943        let node1 = graph
2944            .nodes_mut()
2945            .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
2946            .unwrap();
2947        let node2 = graph
2948            .nodes_mut()
2949            .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
2950            .unwrap();
2951        let node3 = graph
2952            .nodes_mut()
2953            .alloc(NodeEntry::new(NodeKind::Function, name3, file_id))
2954            .unwrap();
2955
2956        graph
2957            .indices_mut()
2958            .add(node1, NodeKind::Function, name1, None, file_id);
2959        graph
2960            .indices_mut()
2961            .add(node2, NodeKind::Function, name2, None, file_id);
2962        graph
2963            .indices_mut()
2964            .add(node3, NodeKind::Function, name3, None, file_id);
2965
2966        let snapshot = graph.snapshot();
2967
2968        // Find by pattern
2969        let matches = snapshot.find_by_pattern("bar");
2970        assert_eq!(matches.len(), 2);
2971        assert!(matches.contains(&node1));
2972        assert!(matches.contains(&node2));
2973
2974        // Find single match
2975        let matches = snapshot.find_by_pattern("qux");
2976        assert_eq!(matches.len(), 1);
2977        assert_eq!(matches[0], node3);
2978
2979        // No matches
2980        let matches = snapshot.find_by_pattern("nonexistent");
2981        assert!(matches.is_empty());
2982    }
2983
2984    #[test]
2985    fn synthetic_nodes_are_filtered_from_find_by_pattern_default() {
2986        // C_SUPPRESS: synthetic placeholder nodes (Go plugin
2987        // `<field:operand.field>` shadows + `<ident>@<offset>`
2988        // per-binding-site Variables) must NOT surface from
2989        // find_by_pattern, but must still be reachable via
2990        // find_by_pattern_with_options(_, true).
2991        use crate::graph::unified::node::NodeKind;
2992        use crate::graph::unified::storage::arena::NodeEntry;
2993        use std::path::Path;
2994
2995        let mut graph = CodeGraph::new();
2996        let real_property = graph
2997            .strings_mut()
2998            .intern("main.SelectorSource.NeedTags")
2999            .unwrap();
3000        let real_local_var = graph.strings_mut().intern("NeedTags").unwrap();
3001        let synthetic_field = graph
3002            .strings_mut()
3003            .intern("<field:selector.NeedTags>")
3004            .unwrap();
3005        let synthetic_offset_a = graph.strings_mut().intern("NeedTags@469").unwrap();
3006        let synthetic_offset_b = graph.strings_mut().intern("NeedTags@508").unwrap();
3007        let file_id = graph.files_mut().register(Path::new("main.go")).unwrap();
3008
3009        let prop_id = graph
3010            .nodes_mut()
3011            .alloc(NodeEntry::new(NodeKind::Property, real_property, file_id))
3012            .unwrap();
3013        let local_var_id = graph
3014            .nodes_mut()
3015            .alloc(NodeEntry::new(NodeKind::Variable, real_local_var, file_id))
3016            .unwrap();
3017        let syn_field_id = graph
3018            .nodes_mut()
3019            .alloc(NodeEntry::new(NodeKind::Variable, synthetic_field, file_id))
3020            .unwrap();
3021        let syn_a_id = graph
3022            .nodes_mut()
3023            .alloc(NodeEntry::new(
3024                NodeKind::Variable,
3025                synthetic_offset_a,
3026                file_id,
3027            ))
3028            .unwrap();
3029        let syn_b_id = graph
3030            .nodes_mut()
3031            .alloc(NodeEntry::new(
3032                NodeKind::Variable,
3033                synthetic_offset_b,
3034                file_id,
3035            ))
3036            .unwrap();
3037
3038        graph
3039            .indices_mut()
3040            .add(prop_id, NodeKind::Property, real_property, None, file_id);
3041        graph.indices_mut().add(
3042            local_var_id,
3043            NodeKind::Variable,
3044            real_local_var,
3045            None,
3046            file_id,
3047        );
3048        graph.indices_mut().add(
3049            syn_field_id,
3050            NodeKind::Variable,
3051            synthetic_field,
3052            None,
3053            file_id,
3054        );
3055        graph.indices_mut().add(
3056            syn_a_id,
3057            NodeKind::Variable,
3058            synthetic_offset_a,
3059            None,
3060            file_id,
3061        );
3062        graph.indices_mut().add(
3063            syn_b_id,
3064            NodeKind::Variable,
3065            synthetic_offset_b,
3066            None,
3067            file_id,
3068        );
3069
3070        // Flag two of them via the metadata-store bit (the canonical
3071        // Go-plugin emission path) and leave one (`<field:...>`) only
3072        // covered by the structural name-shape fallback to verify both
3073        // recognition channels suppress the leak.
3074        graph.macro_metadata_mut().mark_synthetic(syn_a_id);
3075        graph.macro_metadata_mut().mark_synthetic(syn_b_id);
3076
3077        let snapshot = graph.snapshot();
3078
3079        // Default surface (CLI `search --exact`, MCP, LSP): no synthetics.
3080        let matches = snapshot.find_by_pattern("NeedTags");
3081        assert!(matches.contains(&prop_id), "Property must be surfaced");
3082        assert!(
3083            matches.contains(&local_var_id),
3084            "real local var must be surfaced"
3085        );
3086        assert!(
3087            !matches.contains(&syn_field_id),
3088            "<field:...> synthetic must be suppressed (name-shape fallback)"
3089        );
3090        assert!(
3091            !matches.contains(&syn_a_id),
3092            "NeedTags@469 must be suppressed (metadata bit)"
3093        );
3094        assert!(
3095            !matches.contains(&syn_b_id),
3096            "NeedTags@508 must be suppressed (metadata bit)"
3097        );
3098        assert_eq!(matches.len(), 2, "exactly Property + local var, no leakage");
3099
3100        // Internal include-synthetic surface (binding plane, scope analysis):
3101        // every node remains reachable.
3102        let all_matches = snapshot.find_by_pattern_with_options("NeedTags", true);
3103        assert_eq!(
3104            all_matches.len(),
3105            5,
3106            "include_synthetic surfaces everything"
3107        );
3108        assert!(all_matches.contains(&prop_id));
3109        assert!(all_matches.contains(&local_var_id));
3110        assert!(all_matches.contains(&syn_field_id));
3111        assert!(all_matches.contains(&syn_a_id));
3112        assert!(all_matches.contains(&syn_b_id));
3113
3114        // is_node_synthetic exposed for surface-level filters
3115        // (e.g., MCP semantic_search/relation_query post-filters).
3116        assert!(snapshot.is_node_synthetic(syn_field_id));
3117        assert!(snapshot.is_node_synthetic(syn_a_id));
3118        assert!(snapshot.is_node_synthetic(syn_b_id));
3119        assert!(!snapshot.is_node_synthetic(prop_id));
3120        assert!(!snapshot.is_node_synthetic(local_var_id));
3121    }
3122
3123    #[test]
3124    #[allow(clippy::too_many_lines)]
3125    fn find_by_exact_name_aligns_with_planner_name_predicate() {
3126        // B1_ALIGN: `find_by_exact_name("NeedTags")` is the canonical
3127        // surface for the CLI `--exact NeedTags` shorthand and the
3128        // planner's `name:NeedTags` predicate. Both paths must return
3129        // the same set against this fixture.
3130        use crate::graph::unified::node::NodeKind;
3131        use crate::graph::unified::storage::arena::NodeEntry;
3132        use std::path::Path;
3133
3134        let mut graph = CodeGraph::new();
3135        // Property nodes carry the package-qualified name as
3136        // `entry.name` (Go plugin convention; see `helper.rs`'s
3137        // `semantic_name_for_node_input`).
3138        let property_qname = graph
3139            .strings_mut()
3140            .intern("main.SelectorSource.NeedTags")
3141            .unwrap();
3142        let local_var_name = graph.strings_mut().intern("NeedTags").unwrap();
3143        let synthetic_field_name = graph
3144            .strings_mut()
3145            .intern("<field:selector.NeedTags>")
3146            .unwrap();
3147        let synthetic_offset_name = graph.strings_mut().intern("NeedTags@469").unwrap();
3148        let unrelated_name = graph.strings_mut().intern("NeedTagsHelper").unwrap();
3149        let display_fallback_name = graph.strings_mut().intern("Other").unwrap();
3150        let display_fallback_qname = graph
3151            .strings_mut()
3152            .intern("main::SelectorSource::Other")
3153            .unwrap();
3154        let file_id = graph.files_mut().register(Path::new("main.go")).unwrap();
3155
3156        let prop_id = graph
3157            .nodes_mut()
3158            .alloc(NodeEntry::new(NodeKind::Property, property_qname, file_id))
3159            .unwrap();
3160        let local_var_id = graph
3161            .nodes_mut()
3162            .alloc(NodeEntry::new(NodeKind::Variable, local_var_name, file_id))
3163            .unwrap();
3164        let syn_field_id = graph
3165            .nodes_mut()
3166            .alloc(NodeEntry::new(
3167                NodeKind::Variable,
3168                synthetic_field_name,
3169                file_id,
3170            ))
3171            .unwrap();
3172        let syn_offset_id = graph
3173            .nodes_mut()
3174            .alloc(NodeEntry::new(
3175                NodeKind::Variable,
3176                synthetic_offset_name,
3177                file_id,
3178            ))
3179            .unwrap();
3180        let unrelated_id = graph
3181            .nodes_mut()
3182            .alloc(NodeEntry::new(NodeKind::Function, unrelated_name, file_id))
3183            .unwrap();
3184        let display_fallback_id = graph
3185            .nodes_mut()
3186            .alloc(
3187                NodeEntry::new(NodeKind::Property, display_fallback_name, file_id)
3188                    .with_qualified_name(display_fallback_qname),
3189            )
3190            .unwrap();
3191
3192        graph
3193            .indices_mut()
3194            .add(prop_id, NodeKind::Property, property_qname, None, file_id);
3195        graph.indices_mut().add(
3196            local_var_id,
3197            NodeKind::Variable,
3198            local_var_name,
3199            None,
3200            file_id,
3201        );
3202        graph.indices_mut().add(
3203            syn_field_id,
3204            NodeKind::Variable,
3205            synthetic_field_name,
3206            None,
3207            file_id,
3208        );
3209        graph.indices_mut().add(
3210            syn_offset_id,
3211            NodeKind::Variable,
3212            synthetic_offset_name,
3213            None,
3214            file_id,
3215        );
3216        graph.indices_mut().add(
3217            unrelated_id,
3218            NodeKind::Function,
3219            unrelated_name,
3220            None,
3221            file_id,
3222        );
3223        graph.indices_mut().add(
3224            display_fallback_id,
3225            NodeKind::Property,
3226            display_fallback_name,
3227            Some(display_fallback_qname),
3228            file_id,
3229        );
3230
3231        // Mark the offset-suffixed synthetic via the metadata bit so we
3232        // exercise both recognition channels in this fixture.
3233        graph.macro_metadata_mut().mark_synthetic(syn_offset_id);
3234
3235        let snapshot = graph.snapshot();
3236
3237        // Exact-name lookup on "NeedTags" — should pick up only the
3238        // local variable (its `entry.name` is exactly "NeedTags"); it
3239        // must NOT pick up the Property (qualified name contains but
3240        // does not equal "NeedTags") and must NOT pick up either
3241        // synthetic placeholder.
3242        let exact = snapshot.find_by_exact_name("NeedTags");
3243        assert_eq!(
3244            exact,
3245            vec![local_var_id],
3246            "exact match must be byte-for-byte against entry.name / qualified_name and exclude synthetics"
3247        );
3248
3249        // The Property's full qualified name is exact-addressable.
3250        let qualified = snapshot.find_by_exact_name("main.SelectorSource.NeedTags");
3251        assert_eq!(qualified, vec![prop_id]);
3252
3253        // Dot-qualified display form falls back to graph-canonical `::` only
3254        // when the exact dot string was absent.
3255        let display_fallback = snapshot.find_by_exact_name("main.SelectorSource.Other");
3256        assert_eq!(display_fallback, vec![display_fallback_id]);
3257
3258        // Substring-only matches must not surface from exact lookup.
3259        assert!(
3260            snapshot
3261                .find_by_exact_name("NeedTagsHelper")
3262                .contains(&unrelated_id)
3263        );
3264        assert!(
3265            !snapshot
3266                .find_by_exact_name("NeedTags")
3267                .contains(&unrelated_id),
3268            "exact 'NeedTags' must not match 'NeedTagsHelper'"
3269        );
3270
3271        // Unknown name short-circuits to an empty vec without
3272        // scanning any nodes.
3273        assert!(
3274            snapshot
3275                .find_by_exact_name("ThisStringIsNotInterned")
3276                .is_empty()
3277        );
3278    }
3279
3280    #[test]
3281    fn test_snapshot_get_callees() {
3282        use crate::graph::unified::edge::EdgeKind;
3283        use crate::graph::unified::node::NodeKind;
3284        use crate::graph::unified::storage::arena::NodeEntry;
3285        use std::path::Path;
3286
3287        let mut graph = CodeGraph::new();
3288
3289        // Create caller and callee nodes
3290        let caller_name = graph.strings_mut().intern("caller").unwrap();
3291        let callee1_name = graph.strings_mut().intern("callee1").unwrap();
3292        let callee2_name = graph.strings_mut().intern("callee2").unwrap();
3293        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3294
3295        let caller_id = graph
3296            .nodes_mut()
3297            .alloc(NodeEntry::new(NodeKind::Function, caller_name, file_id))
3298            .unwrap();
3299        let callee1_id = graph
3300            .nodes_mut()
3301            .alloc(NodeEntry::new(NodeKind::Function, callee1_name, file_id))
3302            .unwrap();
3303        let callee2_id = graph
3304            .nodes_mut()
3305            .alloc(NodeEntry::new(NodeKind::Function, callee2_name, file_id))
3306            .unwrap();
3307
3308        // Add call edges
3309        graph.edges_mut().add_edge(
3310            caller_id,
3311            callee1_id,
3312            EdgeKind::Calls {
3313                argument_count: 0,
3314                is_async: false,
3315                resolved_via: ResolvedVia::Direct,
3316            },
3317            file_id,
3318        );
3319        graph.edges_mut().add_edge(
3320            caller_id,
3321            callee2_id,
3322            EdgeKind::Calls {
3323                argument_count: 0,
3324                is_async: false,
3325                resolved_via: ResolvedVia::Direct,
3326            },
3327            file_id,
3328        );
3329
3330        let snapshot = graph.snapshot();
3331
3332        // Query callees
3333        let callees = snapshot.get_callees(caller_id);
3334        assert_eq!(callees.len(), 2);
3335        assert!(callees.contains(&callee1_id));
3336        assert!(callees.contains(&callee2_id));
3337
3338        // Node with no callees
3339        let callees = snapshot.get_callees(callee1_id);
3340        assert!(callees.is_empty());
3341    }
3342
3343    #[test]
3344    fn test_snapshot_get_callers() {
3345        use crate::graph::unified::edge::EdgeKind;
3346        use crate::graph::unified::node::NodeKind;
3347        use crate::graph::unified::storage::arena::NodeEntry;
3348        use std::path::Path;
3349
3350        let mut graph = CodeGraph::new();
3351
3352        // Create caller and callee nodes
3353        let caller1_name = graph.strings_mut().intern("caller1").unwrap();
3354        let caller2_name = graph.strings_mut().intern("caller2").unwrap();
3355        let callee_name = graph.strings_mut().intern("callee").unwrap();
3356        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3357
3358        let caller1_id = graph
3359            .nodes_mut()
3360            .alloc(NodeEntry::new(NodeKind::Function, caller1_name, file_id))
3361            .unwrap();
3362        let caller2_id = graph
3363            .nodes_mut()
3364            .alloc(NodeEntry::new(NodeKind::Function, caller2_name, file_id))
3365            .unwrap();
3366        let callee_id = graph
3367            .nodes_mut()
3368            .alloc(NodeEntry::new(NodeKind::Function, callee_name, file_id))
3369            .unwrap();
3370
3371        // Add call edges
3372        graph.edges_mut().add_edge(
3373            caller1_id,
3374            callee_id,
3375            EdgeKind::Calls {
3376                argument_count: 0,
3377                is_async: false,
3378                resolved_via: ResolvedVia::Direct,
3379            },
3380            file_id,
3381        );
3382        graph.edges_mut().add_edge(
3383            caller2_id,
3384            callee_id,
3385            EdgeKind::Calls {
3386                argument_count: 0,
3387                is_async: false,
3388                resolved_via: ResolvedVia::Direct,
3389            },
3390            file_id,
3391        );
3392
3393        let snapshot = graph.snapshot();
3394
3395        // Query callers
3396        let callers = snapshot.get_callers(callee_id);
3397        assert_eq!(callers.len(), 2);
3398        assert!(callers.contains(&caller1_id));
3399        assert!(callers.contains(&caller2_id));
3400
3401        // Node with no callers
3402        let callers = snapshot.get_callers(caller1_id);
3403        assert!(callers.is_empty());
3404    }
3405
3406    #[test]
3407    fn test_snapshot_find_symbol_candidates() {
3408        use crate::graph::unified::node::NodeKind;
3409        use crate::graph::unified::storage::arena::NodeEntry;
3410        use std::path::Path;
3411
3412        let mut graph = CodeGraph::new();
3413
3414        // Add nodes with same symbol name but different qualified names
3415        let symbol_name = graph.strings_mut().intern("test").unwrap();
3416        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3417
3418        let node1 = graph
3419            .nodes_mut()
3420            .alloc(NodeEntry::new(NodeKind::Function, symbol_name, file_id))
3421            .unwrap();
3422        let node2 = graph
3423            .nodes_mut()
3424            .alloc(NodeEntry::new(NodeKind::Method, symbol_name, file_id))
3425            .unwrap();
3426
3427        // Add a different symbol
3428        let other_name = graph.strings_mut().intern("other").unwrap();
3429        let node3 = graph
3430            .nodes_mut()
3431            .alloc(NodeEntry::new(NodeKind::Function, other_name, file_id))
3432            .unwrap();
3433
3434        graph
3435            .indices_mut()
3436            .add(node1, NodeKind::Function, symbol_name, None, file_id);
3437        graph
3438            .indices_mut()
3439            .add(node2, NodeKind::Method, symbol_name, None, file_id);
3440        graph
3441            .indices_mut()
3442            .add(node3, NodeKind::Function, other_name, None, file_id);
3443
3444        let snapshot = graph.snapshot();
3445
3446        // Find by symbol
3447        let matches = candidate_nodes(&snapshot, "test");
3448        assert_eq!(matches.len(), 2);
3449        assert!(matches.contains(&node1));
3450        assert!(matches.contains(&node2));
3451
3452        // Find other symbol
3453        let matches = candidate_nodes(&snapshot, "other");
3454        assert_eq!(matches.len(), 1);
3455        assert_eq!(matches[0], node3);
3456
3457        // No matches
3458        let matches = candidate_nodes(&snapshot, "nonexistent");
3459        assert!(matches.is_empty());
3460    }
3461
3462    #[test]
3463    fn test_snapshot_iter_nodes() {
3464        use crate::graph::unified::node::NodeKind;
3465        use crate::graph::unified::storage::arena::NodeEntry;
3466        use std::path::Path;
3467
3468        let mut graph = CodeGraph::new();
3469
3470        // Add some nodes
3471        let name1 = graph.strings_mut().intern("func1").unwrap();
3472        let name2 = graph.strings_mut().intern("func2").unwrap();
3473        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3474
3475        let node1 = graph
3476            .nodes_mut()
3477            .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
3478            .unwrap();
3479        let node2 = graph
3480            .nodes_mut()
3481            .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
3482            .unwrap();
3483
3484        let snapshot = graph.snapshot();
3485
3486        // Iterate nodes
3487        let snapshot_nodes: Vec<_> = snapshot.iter_nodes().collect();
3488        assert_eq!(snapshot_nodes.len(), 2);
3489
3490        let node_ids: Vec<_> = snapshot_nodes.iter().map(|(id, _)| *id).collect();
3491        assert!(node_ids.contains(&node1));
3492        assert!(node_ids.contains(&node2));
3493    }
3494
3495    #[test]
3496    fn test_snapshot_iter_edges() {
3497        use crate::graph::unified::edge::EdgeKind;
3498        use crate::graph::unified::node::NodeKind;
3499        use crate::graph::unified::storage::arena::NodeEntry;
3500        use std::path::Path;
3501
3502        let mut graph = CodeGraph::new();
3503
3504        // Create nodes
3505        let name1 = graph.strings_mut().intern("func1").unwrap();
3506        let name2 = graph.strings_mut().intern("func2").unwrap();
3507        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3508
3509        let node1 = graph
3510            .nodes_mut()
3511            .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
3512            .unwrap();
3513        let node2 = graph
3514            .nodes_mut()
3515            .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
3516            .unwrap();
3517
3518        // Add edges
3519        graph.edges_mut().add_edge(
3520            node1,
3521            node2,
3522            EdgeKind::Calls {
3523                argument_count: 0,
3524                is_async: false,
3525                resolved_via: ResolvedVia::Direct,
3526            },
3527            file_id,
3528        );
3529
3530        let snapshot = graph.snapshot();
3531
3532        // Iterate edges
3533        let edges: Vec<_> = snapshot.iter_edges().collect();
3534        assert_eq!(edges.len(), 1);
3535
3536        let (src, tgt, kind) = &edges[0];
3537        assert_eq!(*src, node1);
3538        assert_eq!(*tgt, node2);
3539        assert!(matches!(
3540            kind,
3541            EdgeKind::Calls {
3542                argument_count: 0,
3543                is_async: false,
3544                resolved_via: ResolvedVia::Direct,
3545            }
3546        ));
3547    }
3548
3549    #[test]
3550    fn test_snapshot_get_node() {
3551        use crate::graph::unified::node::NodeId;
3552        use crate::graph::unified::node::NodeKind;
3553        use crate::graph::unified::storage::arena::NodeEntry;
3554        use std::path::Path;
3555
3556        let mut graph = CodeGraph::new();
3557
3558        // Add a node
3559        let name = graph.strings_mut().intern("test_func").unwrap();
3560        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3561
3562        let node_id = graph
3563            .nodes_mut()
3564            .alloc(NodeEntry::new(NodeKind::Function, name, file_id))
3565            .unwrap();
3566
3567        let snapshot = graph.snapshot();
3568
3569        // Get node
3570        let entry = snapshot.get_node(node_id);
3571        assert!(entry.is_some());
3572        assert_eq!(entry.unwrap().kind, NodeKind::Function);
3573
3574        // Invalid node
3575        let invalid_id = NodeId::INVALID;
3576        assert!(snapshot.get_node(invalid_id).is_none());
3577    }
3578
3579    #[test]
3580    fn test_snapshot_query_empty_graph() {
3581        use crate::graph::unified::node::NodeId;
3582
3583        let graph = CodeGraph::new();
3584        let snapshot = graph.snapshot();
3585
3586        // All queries should return empty on empty graph
3587        assert!(resolve_symbol_strict(&snapshot, "test").is_none());
3588        assert!(snapshot.find_by_pattern("test").is_empty());
3589        assert!(candidate_nodes(&snapshot, "test").is_empty());
3590
3591        let dummy_id = NodeId::new(0, 1);
3592        assert!(snapshot.get_callees(dummy_id).is_empty());
3593        assert!(snapshot.get_callers(dummy_id).is_empty());
3594
3595        assert_eq!(snapshot.iter_nodes().count(), 0);
3596        assert_eq!(snapshot.iter_edges().count(), 0);
3597    }
3598
3599    #[test]
3600    fn test_snapshot_edge_filtering_by_kind() {
3601        use crate::graph::unified::edge::EdgeKind;
3602        use crate::graph::unified::node::NodeKind;
3603        use crate::graph::unified::storage::arena::NodeEntry;
3604        use std::path::Path;
3605
3606        let mut graph = CodeGraph::new();
3607
3608        // Create nodes
3609        let name1 = graph.strings_mut().intern("func1").unwrap();
3610        let name2 = graph.strings_mut().intern("func2").unwrap();
3611        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3612
3613        let node1 = graph
3614            .nodes_mut()
3615            .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
3616            .unwrap();
3617        let node2 = graph
3618            .nodes_mut()
3619            .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
3620            .unwrap();
3621
3622        // Add different kinds of edges
3623        graph.edges_mut().add_edge(
3624            node1,
3625            node2,
3626            EdgeKind::Calls {
3627                argument_count: 0,
3628                is_async: false,
3629                resolved_via: ResolvedVia::Direct,
3630            },
3631            file_id,
3632        );
3633        graph
3634            .edges_mut()
3635            .add_edge(node1, node2, EdgeKind::References, file_id);
3636
3637        let snapshot = graph.snapshot();
3638
3639        // get_callees should only return Calls edges
3640        let callees = snapshot.get_callees(node1);
3641        assert_eq!(callees.len(), 1);
3642        assert_eq!(callees[0], node2);
3643
3644        // iter_edges returns all edges regardless of kind
3645        let edges: Vec<_> = snapshot.iter_edges().collect();
3646        assert_eq!(edges.len(), 2);
3647    }
3648
3649    // -------- reverse_import_index tests --------
3650
3651    /// Helper: build an empty graph with the given file paths registered, a
3652    /// single placeholder node allocated per file, and indices rebuilt so
3653    /// `by_file` returns the per-file node sets. Returns the file IDs in
3654    /// the order passed in and the per-file node ID.
3655    #[cfg(test)]
3656    fn build_import_test_graph(files: &[&str]) -> (CodeGraph, Vec<FileId>, Vec<NodeId>) {
3657        use crate::graph::unified::node::NodeKind;
3658        use crate::graph::unified::storage::arena::NodeEntry;
3659        use std::path::Path;
3660
3661        let mut graph = CodeGraph::new();
3662        let placeholder_name = graph.strings_mut().intern("sym").unwrap();
3663        let mut file_ids = Vec::with_capacity(files.len());
3664        let mut node_ids = Vec::with_capacity(files.len());
3665        for path in files {
3666            let file_id = graph.files_mut().register(Path::new(path)).unwrap();
3667            let node_id = graph
3668                .nodes_mut()
3669                .alloc(NodeEntry::new(
3670                    NodeKind::Function,
3671                    placeholder_name,
3672                    file_id,
3673                ))
3674                .unwrap();
3675            file_ids.push(file_id);
3676            node_ids.push(node_id);
3677        }
3678        graph.rebuild_indices();
3679        (graph, file_ids, node_ids)
3680    }
3681
3682    /// Helper: add an `Imports` edge from `source_node` (in importer file) to
3683    /// `target_node` (in exporter file). The edge is recorded against the
3684    /// importer file so `clear_file` cleanup would behave identically to
3685    /// production Pass 4 writes.
3686    #[cfg(test)]
3687    fn add_import_edge(
3688        graph: &mut CodeGraph,
3689        source_node: NodeId,
3690        target_node: NodeId,
3691        importer_file: FileId,
3692    ) {
3693        graph.edges_mut().add_edge(
3694            source_node,
3695            target_node,
3696            EdgeKind::Imports {
3697                alias: None,
3698                is_wildcard: false,
3699            },
3700            importer_file,
3701        );
3702    }
3703
3704    #[test]
3705    fn reverse_import_index_empty_graph_returns_empty() {
3706        let (graph, files, _) = build_import_test_graph(&["only.rs"]);
3707        assert!(graph.reverse_import_index(files[0]).is_empty());
3708    }
3709
3710    #[test]
3711    fn reverse_import_index_single_importer() {
3712        // File A imports a symbol exported by file B. Reverse index of B
3713        // should return exactly [A]; reverse index of A should be empty.
3714        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3715        let (a, b) = (files[0], files[1]);
3716        add_import_edge(&mut graph, nodes[0], nodes[1], a);
3717
3718        let importers_of_b = graph.reverse_import_index(b);
3719        assert_eq!(importers_of_b, vec![a]);
3720
3721        let importers_of_a = graph.reverse_import_index(a);
3722        assert!(
3723            importers_of_a.is_empty(),
3724            "A has no inbound Imports edges; reverse index must be empty"
3725        );
3726    }
3727
3728    #[test]
3729    fn reverse_import_index_multiple_importers_deduped_and_sorted() {
3730        // A, B, C all import from D. Reverse index of D must contain each
3731        // importer exactly once, sorted ascending by FileId.
3732        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs", "c.rs", "d.rs"]);
3733        let (a, b, c, d) = (files[0], files[1], files[2], files[3]);
3734        add_import_edge(&mut graph, nodes[0], nodes[3], a);
3735        add_import_edge(&mut graph, nodes[1], nodes[3], b);
3736        add_import_edge(&mut graph, nodes[2], nodes[3], c);
3737        // Add a duplicate edge from A to D to confirm dedup behavior.
3738        add_import_edge(&mut graph, nodes[0], nodes[3], a);
3739
3740        let importers_of_d = graph.reverse_import_index(d);
3741        assert_eq!(importers_of_d, vec![a, b, c]);
3742        // Sort invariant: Vec is ascending by raw index.
3743        let mut sorted = importers_of_d.clone();
3744        sorted.sort();
3745        assert_eq!(importers_of_d, sorted);
3746    }
3747
3748    #[test]
3749    fn reverse_import_index_filters_non_import_edges() {
3750        // A `Calls` edge from A into B must not contribute to B's reverse
3751        // import index. Only `EdgeKind::Imports` edges count.
3752        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3753        let (a, b) = (files[0], files[1]);
3754        graph.edges_mut().add_edge(
3755            nodes[0],
3756            nodes[1],
3757            EdgeKind::Calls {
3758                argument_count: 0,
3759                is_async: false,
3760                resolved_via: ResolvedVia::Direct,
3761            },
3762            a,
3763        );
3764        graph
3765            .edges_mut()
3766            .add_edge(nodes[0], nodes[1], EdgeKind::References, a);
3767
3768        assert!(
3769            graph.reverse_import_index(b).is_empty(),
3770            "non-Imports edges must not register as importers"
3771        );
3772    }
3773
3774    #[test]
3775    fn reverse_import_index_elides_self_imports() {
3776        // An Imports edge whose source and target are both in the same file
3777        // is a self-import; the caller's own file must not appear in its own
3778        // reverse index.
3779        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs"]);
3780        let a = files[0];
3781        // Add a second node in the same file so we have a distinct source.
3782        let name2 = graph.strings_mut().intern("sym2").unwrap();
3783        let second_in_a = graph
3784            .nodes_mut()
3785            .alloc(crate::graph::unified::storage::arena::NodeEntry::new(
3786                crate::graph::unified::node::NodeKind::Function,
3787                name2,
3788                a,
3789            ))
3790            .unwrap();
3791        graph.rebuild_indices();
3792        add_import_edge(&mut graph, second_in_a, nodes[0], a);
3793
3794        assert!(
3795            graph.reverse_import_index(a).is_empty(),
3796            "self-imports must be elided from reverse index"
3797        );
3798    }
3799
3800    #[test]
3801    fn reverse_import_index_mixed_edge_kinds_counts_only_imports() {
3802        // Two files: A has both Calls and Imports edges into B. Reverse
3803        // index must return exactly [A] — the Calls edge contributes
3804        // nothing.
3805        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3806        let (a, b) = (files[0], files[1]);
3807        add_import_edge(&mut graph, nodes[0], nodes[1], a);
3808        graph.edges_mut().add_edge(
3809            nodes[0],
3810            nodes[1],
3811            EdgeKind::Calls {
3812                argument_count: 0,
3813                is_async: false,
3814                resolved_via: ResolvedVia::Direct,
3815            },
3816            a,
3817        );
3818
3819        assert_eq!(graph.reverse_import_index(b), vec![a]);
3820    }
3821
3822    #[test]
3823    fn reverse_import_index_uninitialized_file_returns_empty() {
3824        // Querying a FileId that is not registered in the graph must return
3825        // an empty Vec, not panic.
3826        let (graph, _, _) = build_import_test_graph(&["a.rs"]);
3827        let bogus = FileId::new(9999);
3828        assert!(
3829            graph.reverse_import_index(bogus).is_empty(),
3830            "unknown FileId must return empty Vec without panicking"
3831        );
3832    }
3833
3834    #[test]
3835    fn reverse_import_index_skips_tombstoned_source_nodes() {
3836        // In the incremental rebuild path the prior graph retains tombstoned
3837        // arena slots for nodes belonging to closure files that have been
3838        // removed but not yet fully compacted. reverse_import_index is
3839        // called on that prior graph to widen the closure, so its
3840        // tombstone guard (`let Some(source_entry) = self.nodes.get(...)`)
3841        // is a semantically important branch, not a theoretical one.
3842        // This test exercises it directly by tombstoning the source node of
3843        // an Imports edge and asserting the edge silently disappears from
3844        // the reverse index.
3845        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3846        let (a, b) = (files[0], files[1]);
3847        add_import_edge(&mut graph, nodes[0], nodes[1], a);
3848        // Sanity: before tombstoning, A shows up as the importer of B.
3849        assert_eq!(graph.reverse_import_index(b), vec![a]);
3850        // Tombstone the source node via the arena (generation bump). The
3851        // Imports edge still exists in the edge store but its source NodeId
3852        // is now stale; `nodes.get(edge_ref.source)` returns None and the
3853        // guard skips the edge.
3854        let removed = graph.nodes_mut().remove(nodes[0]);
3855        assert!(
3856            removed.is_some(),
3857            "arena.remove must succeed for a live node"
3858        );
3859        assert!(
3860            graph.nodes().get(nodes[0]).is_none(),
3861            "tombstoned lookup must return None"
3862        );
3863        assert!(
3864            graph.reverse_import_index(b).is_empty(),
3865            "Imports edges whose source is tombstoned must be silently skipped"
3866        );
3867    }
3868
3869    // ------------------------------------------------------------------
3870    // Task 4 Step 2 — CodeGraph::remove_file
3871    // ------------------------------------------------------------------
3872
3873    /// Seed a graph with 2 files × `per_file` nodes per file, plus a set
3874    /// of intra- and inter-file edges. Each call site produces a
3875    /// canonical topology so tests below can assert bit-level on edge
3876    /// survival. Returns `(graph, file_a, file_b, file_a_nodes,
3877    /// file_b_nodes)`.
3878    fn seed_two_file_graph(
3879        per_file: usize,
3880    ) -> (
3881        CodeGraph,
3882        crate::graph::unified::file::FileId,
3883        crate::graph::unified::file::FileId,
3884        Vec<NodeId>,
3885        Vec<NodeId>,
3886    ) {
3887        use crate::graph::unified::edge::EdgeKind;
3888        use crate::graph::unified::node::NodeKind;
3889        use crate::graph::unified::storage::arena::NodeEntry;
3890        use std::path::Path;
3891
3892        let mut graph = CodeGraph::new();
3893        let sym = graph.strings_mut().intern("sym").expect("intern");
3894        let file_a = graph
3895            .files_mut()
3896            .register(Path::new("/tmp/remove_file_test/a.rs"))
3897            .expect("register a");
3898        let file_b = graph
3899            .files_mut()
3900            .register(Path::new("/tmp/remove_file_test/b.rs"))
3901            .expect("register b");
3902
3903        let mut file_a_nodes = Vec::with_capacity(per_file);
3904        let mut file_b_nodes = Vec::with_capacity(per_file);
3905
3906        for _ in 0..per_file {
3907            let n = graph
3908                .nodes_mut()
3909                .alloc(NodeEntry::new(NodeKind::Function, sym, file_a))
3910                .expect("alloc a-node");
3911            file_a_nodes.push(n);
3912            graph.files_mut().record_node(file_a, n);
3913            graph
3914                .indices_mut()
3915                .add(n, NodeKind::Function, sym, None, file_a);
3916        }
3917        for _ in 0..per_file {
3918            let n = graph
3919                .nodes_mut()
3920                .alloc(NodeEntry::new(NodeKind::Function, sym, file_b))
3921                .expect("alloc b-node");
3922            file_b_nodes.push(n);
3923            graph.files_mut().record_node(file_b, n);
3924            graph
3925                .indices_mut()
3926                .add(n, NodeKind::Function, sym, None, file_b);
3927        }
3928
3929        // Intra-file edges inside each file: pairwise a[i] -> a[i+1],
3930        // b[i] -> b[i+1]. These are the ones that must die when the
3931        // corresponding file is removed.
3932        for i in 0..per_file.saturating_sub(1) {
3933            graph.edges_mut().add_edge(
3934                file_a_nodes[i],
3935                file_a_nodes[i + 1],
3936                EdgeKind::Calls {
3937                    argument_count: 0,
3938                    is_async: false,
3939                    resolved_via: ResolvedVia::Direct,
3940                },
3941                file_a,
3942            );
3943            graph.edges_mut().add_edge(
3944                file_b_nodes[i],
3945                file_b_nodes[i + 1],
3946                EdgeKind::Calls {
3947                    argument_count: 0,
3948                    is_async: false,
3949                    resolved_via: ResolvedVia::Direct,
3950                },
3951                file_b,
3952            );
3953        }
3954        // Cross-file edges: a[0] -> b[0], b[0] -> a[0]. Both must die
3955        // when *either* endpoint's file is removed (plan §F.2).
3956        graph.edges_mut().add_edge(
3957            file_a_nodes[0],
3958            file_b_nodes[0],
3959            EdgeKind::Calls {
3960                argument_count: 0,
3961                is_async: false,
3962                resolved_via: ResolvedVia::Direct,
3963            },
3964            file_a,
3965        );
3966        graph.edges_mut().add_edge(
3967            file_b_nodes[0],
3968            file_a_nodes[0],
3969            EdgeKind::Calls {
3970                argument_count: 0,
3971                is_async: false,
3972                resolved_via: ResolvedVia::Direct,
3973            },
3974            file_b,
3975        );
3976
3977        (graph, file_a, file_b, file_a_nodes, file_b_nodes)
3978    }
3979
3980    #[test]
3981    fn code_graph_remove_file_tombstones_all_per_file_nodes() {
3982        let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
3983
3984        let returned = graph.remove_file(file_a);
3985
3986        // Returned list must equal the original per-file bucket
3987        // membership, deterministically.
3988        let returned_set: std::collections::HashSet<NodeId> = returned.iter().copied().collect();
3989        let expected_set: std::collections::HashSet<NodeId> =
3990            file_a_nodes.iter().copied().collect();
3991        assert_eq!(
3992            returned_set, expected_set,
3993            "remove_file must return exactly the file_a nodes drained from the bucket"
3994        );
3995
3996        // Every returned NodeId is gone from the arena.
3997        for nid in &file_a_nodes {
3998            assert!(
3999                graph.nodes().get(*nid).is_none(),
4000                "node {nid:?} from removed file must be tombstoned in arena"
4001            );
4002        }
4003    }
4004
4005    #[test]
4006    fn code_graph_remove_file_invalidates_all_edges_sourced_or_targeted_at_removed_nodes() {
4007        use crate::graph::unified::edge::EdgeKind;
4008
4009        let (mut graph, file_a, _file_b, file_a_nodes, file_b_nodes) = seed_two_file_graph(3);
4010
4011        // Sanity: the seed has 2 intra-A edges, 2 intra-B edges, plus
4012        // 2 cross-file edges (a0↔b0).
4013        let before_delta = graph.edges().stats().forward.delta_edge_count;
4014        assert_eq!(
4015            before_delta, 6,
4016            "seed must produce 2 intra-A + 2 intra-B + 2 cross edges"
4017        );
4018
4019        let _ = graph.remove_file(file_a);
4020
4021        // Forward delta after removal: all 2 intra-A edges are gone,
4022        // both cross edges are gone, only the 2 intra-B edges remain.
4023        let after_delta_forward = graph.edges().stats().forward.delta_edge_count;
4024        assert_eq!(
4025            after_delta_forward, 2,
4026            "only intra-B forward edges must remain after removing file_a"
4027        );
4028        let after_delta_reverse = graph.edges().stats().reverse.delta_edge_count;
4029        assert_eq!(
4030            after_delta_reverse, 2,
4031            "only intra-B reverse edges must remain after removing file_a"
4032        );
4033
4034        // Cross-file edge from b[0] -> a[0] must no longer be visible
4035        // from either direction, because a[0] is tombstoned.
4036        let b0 = file_b_nodes[0];
4037        let a0 = file_a_nodes[0];
4038        let remaining_from_b0: Vec<_> = graph
4039            .edges()
4040            .edges_from(b0)
4041            .into_iter()
4042            .filter(|e| {
4043                matches!(
4044                    e.kind,
4045                    EdgeKind::Calls {
4046                        argument_count: 0,
4047                        is_async: false,
4048                        resolved_via: ResolvedVia::Direct,
4049                    }
4050                )
4051            })
4052            .collect();
4053        assert!(
4054            !remaining_from_b0.iter().any(|e| e.target == a0),
4055            "edge b0 -> a0 must be gone after remove_file(file_a)"
4056        );
4057        let remaining_to_a0: Vec<_> = graph.edges().edges_to(a0).into_iter().collect();
4058        assert!(
4059            remaining_to_a0.is_empty(),
4060            "every edge targeting the tombstoned a0 must be gone"
4061        );
4062    }
4063
4064    #[test]
4065    fn code_graph_remove_file_drops_file_registry_entry() {
4066        let (mut graph, file_a, _file_b, _, _) = seed_two_file_graph(2);
4067
4068        assert!(
4069            graph.files().resolve(file_a).is_some(),
4070            "seed registered file_a"
4071        );
4072        assert!(
4073            !graph.files().nodes_for_file(file_a).is_empty(),
4074            "seed populated the file_a bucket"
4075        );
4076
4077        let _ = graph.remove_file(file_a);
4078
4079        assert!(
4080            graph.files().resolve(file_a).is_none(),
4081            "FileRegistry::resolve must return None after remove_file"
4082        );
4083        assert!(
4084            graph.files().nodes_for_file(file_a).is_empty(),
4085            "per-file bucket for file_a must be drained"
4086        );
4087    }
4088
4089    #[test]
4090    fn code_graph_remove_file_is_idempotent_on_unknown_file() {
4091        use crate::graph::unified::file::FileId;
4092        let (mut graph, _file_a, _file_b, _, _) = seed_two_file_graph(2);
4093
4094        // Snapshot state before idempotent no-op.
4095        let nodes_before = graph.nodes().len();
4096        let delta_fwd_before = graph.edges().stats().forward.delta_edge_count;
4097        let delta_rev_before = graph.edges().stats().reverse.delta_edge_count;
4098        let files_before = graph.files().len();
4099
4100        // A FileId that was never registered: the caller may legitimately
4101        // receive a bogus id from stale indexing state. `remove_file`
4102        // must be a silent no-op.
4103        let bogus = FileId::new(9999);
4104        let returned = graph.remove_file(bogus);
4105        assert!(
4106            returned.is_empty(),
4107            "remove_file on unknown FileId must return an empty Vec"
4108        );
4109
4110        assert_eq!(graph.nodes().len(), nodes_before, "arena count unchanged");
4111        assert_eq!(
4112            graph.edges().stats().forward.delta_edge_count,
4113            delta_fwd_before,
4114            "forward delta unchanged"
4115        );
4116        assert_eq!(
4117            graph.edges().stats().reverse.delta_edge_count,
4118            delta_rev_before,
4119            "reverse delta unchanged"
4120        );
4121        assert_eq!(graph.files().len(), files_before, "file count unchanged");
4122    }
4123
4124    #[test]
4125    fn code_graph_remove_file_clears_file_segments_entry() {
4126        // Iter-1 Codex review fix: a file's `FileSegmentTable` entry
4127        // must be cleared on `remove_file`. Without this, a later
4128        // `FileId` recycle (via `FileRegistry::free_list`) would
4129        // inherit the previous file's stale node-range and
4130        // `reindex_files` (`build/reindex.rs`) would tombstone the
4131        // wrong slots. This unit test seeds a segment entry, removes
4132        // the file, and asserts the entry is gone.
4133        use crate::graph::unified::storage::segment::FileSegmentTable;
4134
4135        let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
4136
4137        // Seed a segment for file A. Production code sets this via
4138        // Phase 3 parallel commit; here we go through the crate-
4139        // internal accessor because we are a unit test inside the
4140        // crate. (The feature-gated `test_only_record_file_segment`
4141        // public helper exists for the integration tests in
4142        // `sqry-core/tests/incremental_remove_file_scale.rs`.)
4143        let first_index = file_a_nodes
4144            .iter()
4145            .map(|n| n.index())
4146            .min()
4147            .expect("per_file = 3");
4148        let last_index = file_a_nodes
4149            .iter()
4150            .map(|n| n.index())
4151            .max()
4152            .expect("per_file = 3");
4153        let slot_count = last_index - first_index + 1;
4154        let table: &mut FileSegmentTable = graph.file_segments_mut();
4155        table.record_range(file_a, first_index, slot_count);
4156        assert!(
4157            graph.file_segments().get(file_a).is_some(),
4158            "seed must install a segment for file_a before remove_file"
4159        );
4160
4161        // Remove the file.
4162        let _ = graph.remove_file(file_a);
4163
4164        // The segment entry must be gone. `FileSegmentTable::remove`
4165        // is idempotent (a no-op on unknown ids), so this assertion
4166        // holds whether or not the seeded range was contiguous.
4167        assert!(
4168            graph.file_segments().get(file_a).is_none(),
4169            "remove_file must clear the FileSegmentTable entry for file_a"
4170        );
4171    }
4172
4173    #[test]
4174    fn code_graph_remove_file_repeated_calls_are_idempotent() {
4175        let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
4176
4177        // First call does the work.
4178        let first = graph.remove_file(file_a);
4179        assert_eq!(first.len(), file_a_nodes.len());
4180
4181        // Snapshot post-first-call state.
4182        let nodes_after = graph.nodes().len();
4183        let delta_fwd_after = graph.edges().stats().forward.delta_edge_count;
4184        let delta_rev_after = graph.edges().stats().reverse.delta_edge_count;
4185        let files_after = graph.files().len();
4186
4187        // Second call must be a silent no-op — the bucket is empty,
4188        // the file is unregistered, and the arena slots are already
4189        // tombstoned (NodeArena::remove ignores stale generations).
4190        let second = graph.remove_file(file_a);
4191        assert!(
4192            second.is_empty(),
4193            "second remove_file on the same file must return an empty Vec"
4194        );
4195
4196        assert_eq!(graph.nodes().len(), nodes_after);
4197        assert_eq!(
4198            graph.edges().stats().forward.delta_edge_count,
4199            delta_fwd_after
4200        );
4201        assert_eq!(
4202            graph.edges().stats().reverse.delta_edge_count,
4203            delta_rev_after
4204        );
4205        assert_eq!(graph.files().len(), files_after);
4206    }
4207}