Skip to main content

sqry_core/graph/unified/concurrent/
graph.rs

1//! `CodeGraph` and `ConcurrentCodeGraph` implementations.
2//!
3//! This module provides the core graph types with thread-safe access:
4//!
5//! - [`CodeGraph`]: Arc-wrapped internals for O(1) `CoW` snapshots
6//! - [`ConcurrentCodeGraph`]: `RwLock` wrapper with epoch versioning
7//! - [`GraphSnapshot`]: Immutable snapshot for long-running queries
8
9use std::collections::{HashMap, HashSet};
10use std::fmt;
11use std::sync::Arc;
12use std::sync::atomic::{AtomicU64, Ordering};
13
14use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard};
15
16use crate::confidence::ConfidenceMetadata;
17use crate::graph::unified::bind::alias::AliasTable;
18use crate::graph::unified::bind::scope::provenance::{
19    ScopeProvenance, ScopeProvenanceStore, ScopeStableId,
20};
21use crate::graph::unified::bind::scope::{ScopeArena, ScopeId};
22use crate::graph::unified::bind::shadow::ShadowTable;
23use crate::graph::unified::edge::EdgeKind;
24#[cfg(test)]
25use crate::graph::unified::edge::ResolvedVia;
26use crate::graph::unified::edge::bidirectional::BidirectionalEdgeStore;
27use crate::graph::unified::file::FileId;
28use crate::graph::unified::memory::{GraphMemorySize, HASHMAP_ENTRY_OVERHEAD};
29use crate::graph::unified::resolution::display_graph_qualified_name;
30use crate::graph::unified::storage::arena::NodeArena;
31use crate::graph::unified::storage::c_indirect::CIndirectSideTables;
32use crate::graph::unified::storage::edge_provenance::{EdgeProvenance, EdgeProvenanceStore};
33use crate::graph::unified::storage::indices::AuxiliaryIndices;
34use crate::graph::unified::storage::interner::StringInterner;
35use crate::graph::unified::storage::metadata::NodeMetadataStore;
36use crate::graph::unified::storage::node_provenance::{NodeProvenance, NodeProvenanceStore};
37use crate::graph::unified::storage::registry::{FileProvenanceView, FileRegistry};
38use crate::graph::unified::storage::segment::FileSegmentTable;
39use crate::graph::unified::string::id::StringId;
40
41/// Core graph with Arc-wrapped internals for O(1) `CoW` snapshots.
42///
43/// `CodeGraph` uses `Arc` for all internal components, enabling:
44/// - O(1) snapshot creation via Arc cloning
45/// - Copy-on-write semantics via `Arc::make_mut`
46/// - Memory-efficient sharing between snapshots
47///
48/// # Design
49///
50/// The Arc wrapping enables the MVCC pattern:
51/// - Readers see a consistent snapshot at the time they acquired access
52/// - Writers use `Arc::make_mut` to get exclusive copies only when mutating
53/// - Multiple snapshots can coexist without copying data
54///
55/// # Performance
56///
57/// - Snapshot creation: O(5) Arc clones ≈ <1μs
58/// - Read access: Direct Arc dereference, no locking
59/// - Write access: `Arc::make_mut` clones only if refcount > 1
60///
61/// # Phase 2 binding-plane access
62///
63/// Use the two-line snapshot pattern to access `BindingPlane`:
64///
65/// ```rust,ignore
66/// let snapshot = graph.snapshot();
67/// let plane = snapshot.binding_plane();
68/// let resolution = plane.resolve(&query);
69/// ```
70///
71/// The two-line form is intentional: `BindingPlane<'g>` borrows from
72/// `GraphSnapshot` and the explicit snapshot handle makes the MVCC lifetime
73/// visible at the call site. The full Phase 2 scope/alias/shadow and
74/// witness-bearing resolution API is exposed through `BindingPlane`.
75// Field visibility is `pub(crate)` so the Gate 0c `rebuild_graph` module
76// (A2 §H) can destructure `CodeGraph` exhaustively in `clone_for_rebuild`.
77// External crates still go through the public accessor methods below.
78#[derive(Clone)]
79pub struct CodeGraph {
80    /// Node storage with generational indices.
81    pub(crate) nodes: Arc<NodeArena>,
82    /// Bidirectional edge storage (forward + reverse).
83    pub(crate) edges: Arc<BidirectionalEdgeStore>,
84    /// String interner for symbol names.
85    pub(crate) strings: Arc<StringInterner>,
86    /// File registry for path deduplication.
87    pub(crate) files: Arc<FileRegistry>,
88    /// Auxiliary indices for fast lookup.
89    pub(crate) indices: Arc<AuxiliaryIndices>,
90    /// Sparse macro boundary metadata (keyed by full `NodeId`).
91    pub(crate) macro_metadata: Arc<NodeMetadataStore>,
92    /// Dense node provenance (Phase 1 fact layer).
93    pub(crate) node_provenance: Arc<NodeProvenanceStore>,
94    /// Dense edge provenance (Phase 1 fact layer).
95    pub(crate) edge_provenance: Arc<EdgeProvenanceStore>,
96    /// Monotonic fact-layer epoch (0 until set by the V8 persistence path).
97    pub(crate) fact_epoch: u64,
98    /// Epoch for version tracking.
99    pub(crate) epoch: u64,
100    /// Per-language confidence metadata collected during build.
101    /// Maps language name (e.g., "rust") to aggregated confidence.
102    pub(crate) confidence: HashMap<String, ConfidenceMetadata>,
103    /// Phase 2 binding-plane scope arena (populated by Phase 4e).
104    pub(crate) scope_arena: Arc<ScopeArena>,
105    /// Phase 2 binding-plane alias table (populated by Phase 4e / P2U04).
106    pub(crate) alias_table: Arc<AliasTable>,
107    /// Phase 2 binding-plane shadow table (populated by Phase 4e / P2U05).
108    pub(crate) shadow_table: Arc<ShadowTable>,
109    /// Phase 2 binding-plane scope provenance store (populated by Phase 4e / P2U11).
110    pub(crate) scope_provenance_store: Arc<ScopeProvenanceStore>,
111    /// Phase 3 file segment table mapping `FileId` to contiguous node ranges.
112    /// Populated during build Phase 3 parallel commit, persisted in V10+ snapshots.
113    pub(crate) file_segments: Arc<FileSegmentTable>,
114    /// Phase A (U09): C-only indirect-call resolver side tables.
115    ///
116    /// `None` on non-C workspaces — the parent slot stays unallocated so
117    /// non-C builds incur zero side-table overhead. Populated by Phase 3
118    /// commit (U11) and consumed by `pass5b_c_indirect_resolve` (U12).
119    /// Persisted as the V11 `Option<CIndirectSideTables>` envelope slot
120    /// (DESIGN §10.2); V10 → V11 upconvert sets this to `None`.
121    pub(crate) c_indirect_tables: Option<CIndirectSideTables>,
122    /// Build-time scratch side-channel from the Go plugin (Cluster A
123    /// of the Go T1 implements-and-promotion design).
124    ///
125    /// Populated during Phase 1 plugin parse, merged into the live
126    /// target by Phase 3 commit after `NodeId` / `StringId` remap, drained
127    /// by `pass_go_method_set_satisfaction` between Phase 4e and Pass
128    /// 5. Not part of `GraphSnapshot`, not persisted in V10 — see
129    /// `02_DESIGN` §6. Held by-value (not `Arc<…>`) because no
130    /// copy-on-write or shared-reader access is required: only the
131    /// rebuild owner mutates, only the pass consumes, then the field
132    /// is reset.
133    pub(crate) go_hints: crate::graph::unified::build::staging::GoHints,
134}
135
136impl CodeGraph {
137    /// Creates a new empty `CodeGraph`.
138    ///
139    /// # Example
140    ///
141    /// ```rust
142    /// use sqry_core::graph::unified::concurrent::CodeGraph;
143    ///
144    /// let graph = CodeGraph::new();
145    /// assert_eq!(graph.epoch(), 0);
146    /// ```
147    #[must_use]
148    pub fn new() -> Self {
149        Self {
150            nodes: Arc::new(NodeArena::new()),
151            edges: Arc::new(BidirectionalEdgeStore::new()),
152            strings: Arc::new(StringInterner::new()),
153            files: Arc::new(FileRegistry::new()),
154            indices: Arc::new(AuxiliaryIndices::new()),
155            macro_metadata: Arc::new(NodeMetadataStore::new()),
156            node_provenance: Arc::new(NodeProvenanceStore::new()),
157            edge_provenance: Arc::new(EdgeProvenanceStore::new()),
158            fact_epoch: 0,
159            epoch: 0,
160            confidence: HashMap::new(),
161            scope_arena: Arc::new(ScopeArena::new()),
162            alias_table: Arc::new(AliasTable::new()),
163            shadow_table: Arc::new(ShadowTable::new()),
164            scope_provenance_store: Arc::new(ScopeProvenanceStore::new()),
165            file_segments: Arc::new(FileSegmentTable::new()),
166            c_indirect_tables: None,
167            go_hints: crate::graph::unified::build::staging::GoHints::default(),
168        }
169    }
170
171    /// Creates a `CodeGraph` from existing components.
172    ///
173    /// This is useful when building a graph from external data or
174    /// reconstructing from serialized state.
175    #[must_use]
176    pub fn from_components(
177        nodes: NodeArena,
178        edges: BidirectionalEdgeStore,
179        strings: StringInterner,
180        files: FileRegistry,
181        indices: AuxiliaryIndices,
182        macro_metadata: NodeMetadataStore,
183    ) -> Self {
184        Self {
185            nodes: Arc::new(nodes),
186            edges: Arc::new(edges),
187            strings: Arc::new(strings),
188            files: Arc::new(files),
189            indices: Arc::new(indices),
190            macro_metadata: Arc::new(macro_metadata),
191            node_provenance: Arc::new(NodeProvenanceStore::new()),
192            edge_provenance: Arc::new(EdgeProvenanceStore::new()),
193            fact_epoch: 0,
194            epoch: 0,
195            confidence: HashMap::new(),
196            scope_arena: Arc::new(ScopeArena::new()),
197            alias_table: Arc::new(AliasTable::new()),
198            shadow_table: Arc::new(ShadowTable::new()),
199            scope_provenance_store: Arc::new(ScopeProvenanceStore::new()),
200            file_segments: Arc::new(FileSegmentTable::new()),
201            c_indirect_tables: None,
202            go_hints: crate::graph::unified::build::staging::GoHints::default(),
203        }
204    }
205
206    /// Creates a cheap snapshot of the graph.
207    ///
208    /// This operation is O(5) Arc clones, which completes in <1μs.
209    /// The snapshot is isolated from future mutations to the original graph.
210    ///
211    /// # Example
212    ///
213    /// ```rust
214    /// use sqry_core::graph::unified::concurrent::CodeGraph;
215    ///
216    /// let graph = CodeGraph::new();
217    /// let snapshot = graph.snapshot();
218    /// // snapshot is independent of future mutations to graph
219    /// ```
220    #[must_use]
221    pub fn snapshot(&self) -> GraphSnapshot {
222        GraphSnapshot {
223            nodes: Arc::clone(&self.nodes),
224            edges: Arc::clone(&self.edges),
225            strings: Arc::clone(&self.strings),
226            files: Arc::clone(&self.files),
227            indices: Arc::clone(&self.indices),
228            macro_metadata: Arc::clone(&self.macro_metadata),
229            node_provenance: Arc::clone(&self.node_provenance),
230            edge_provenance: Arc::clone(&self.edge_provenance),
231            fact_epoch: self.fact_epoch,
232            epoch: self.epoch,
233            scope_arena: Arc::clone(&self.scope_arena),
234            alias_table: Arc::clone(&self.alias_table),
235            shadow_table: Arc::clone(&self.shadow_table),
236            scope_provenance_store: Arc::clone(&self.scope_provenance_store),
237            file_segments: Arc::clone(&self.file_segments),
238            c_indirect_tables: self.c_indirect_tables.clone(),
239        }
240    }
241
242    /// Returns a reference to the node arena.
243    #[inline]
244    #[must_use]
245    pub fn nodes(&self) -> &NodeArena {
246        &self.nodes
247    }
248
249    /// Returns a reference to the bidirectional edge store.
250    #[inline]
251    #[must_use]
252    pub fn edges(&self) -> &BidirectionalEdgeStore {
253        &self.edges
254    }
255
256    /// Returns a reference to the string interner.
257    #[inline]
258    #[must_use]
259    pub fn strings(&self) -> &StringInterner {
260        &self.strings
261    }
262
263    /// Returns a reference to the file registry.
264    #[inline]
265    #[must_use]
266    pub fn files(&self) -> &FileRegistry {
267        &self.files
268    }
269
270    /// Returns a reference to the auxiliary indices.
271    #[inline]
272    #[must_use]
273    pub fn indices(&self) -> &AuxiliaryIndices {
274        &self.indices
275    }
276
277    /// Returns a reference to the macro boundary metadata store.
278    #[inline]
279    #[must_use]
280    pub fn macro_metadata(&self) -> &NodeMetadataStore {
281        &self.macro_metadata
282    }
283
284    /// Returns a reference to the C indirect-call side tables, if any.
285    ///
286    /// `None` on non-C workspaces — the slot is allocated lazily and only
287    /// when the C plugin's Phase 1 instrumentation observes content worth
288    /// recording. On loaded snapshots, the V11 envelope's
289    /// `Option<CIndirectSideTables>` field round-trips into this slot
290    /// (DESIGN §10.2); V10 → V11 upconvert always sets the slot to `None`.
291    ///
292    /// `pass5b_c_indirect_resolve` (U12) consumes these tables to rewrite
293    /// synthetic indirect-call `Calls` edges into precise binding-plane /
294    /// type-match candidates.
295    #[inline]
296    #[must_use]
297    pub fn c_indirect_tables(&self) -> Option<&CIndirectSideTables> {
298        self.c_indirect_tables.as_ref()
299    }
300
301    /// Returns a mutable reference to the `Option<CIndirectSideTables>`
302    /// slot, allowing callers to install / replace / clear the side
303    /// tables.
304    ///
305    /// Mirrors the accessor pattern used for [`Self::macro_metadata_mut`]
306    /// but exposes the `Option` directly — the side tables are owned in
307    /// place, not behind an `Arc`. Callers that need to merge incremental
308    /// state into the existing tables typically do:
309    ///
310    /// ```rust,ignore
311    /// let slot = graph.c_indirect_tables_mut();
312    /// let tables = slot.get_or_insert_with(CIndirectSideTables::new);
313    /// tables.bindings_by_field.entry(key).or_default().push(entry);
314    /// ```
315    ///
316    /// Populated by the build pipeline (U11). Read-only consumers should
317    /// use [`Self::c_indirect_tables`] instead.
318    #[inline]
319    pub fn c_indirect_tables_mut(&mut self) -> &mut Option<CIndirectSideTables> {
320        &mut self.c_indirect_tables
321    }
322
323    /// Test-only: strip every Phase-A-introduced piece of state from this
324    /// graph, leaving a graph that is byte-identical (modulo persistence
325    /// header timestamps) to what a pre-Phase-A build would have produced
326    /// on the same fixture.
327    ///
328    /// Specifically:
329    ///
330    /// * Clears the [`CIndirectSideTables`] slot (sets it to `None`).
331    /// * Removes [`NodeFlags::ADDRESS_TAKEN`] and
332    ///   [`NodeFlags::CALLSITE_PROMISCUOUS`] marker bits from every
333    ///   [`StoredEntry`] in the macro-metadata store.
334    /// * Rewrites the `resolved_via` field of every [`EdgeKind::Calls`]
335    ///   edge (across CSR and delta tiers, forward and reverse stores)
336    ///   to [`ResolvedVia::Direct`].
337    ///
338    /// Used exclusively by `sqry-core/tests/snapshot_size_phase_a.rs`
339    /// (U19) to materialize a "Phase-A-free" baseline snapshot for the
340    /// +10% snapshot-size gate. Gated behind `cfg(any(test, feature =
341    /// "test-support"))` so the helper is invisible to production
342    /// builds.
343    ///
344    /// [`NodeFlags::ADDRESS_TAKEN`]: crate::graph::unified::storage::metadata::NodeFlags::ADDRESS_TAKEN
345    /// [`NodeFlags::CALLSITE_PROMISCUOUS`]: crate::graph::unified::storage::metadata::NodeFlags::CALLSITE_PROMISCUOUS
346    /// [`StoredEntry`]: crate::graph::unified::storage::metadata::StoredEntry
347    /// [`EdgeKind::Calls`]: crate::graph::unified::edge::EdgeKind::Calls
348    /// [`ResolvedVia::Direct`]: crate::graph::unified::edge::ResolvedVia::Direct
349    #[cfg(any(test, feature = "test-support"))]
350    pub fn clear_phase_a_state_for_test(&mut self) {
351        *self.c_indirect_tables_mut() = None;
352        self.macro_metadata_mut().clear_phase_a_flags_for_test();
353        self.edges_mut().normalize_calls_resolved_via_for_test();
354    }
355
356    // ------------------------------------------------------------------
357    // Phase 1 fact-layer provenance accessors (P1U09).
358    // ------------------------------------------------------------------
359
360    /// Returns the monotonic fact-layer epoch stamped on the most recently
361    /// saved or loaded snapshot. Returns `0` for graphs that have not been
362    /// persisted yet or were loaded from V7 snapshots.
363    #[inline]
364    #[must_use]
365    pub fn fact_epoch(&self) -> u64 {
366        self.fact_epoch
367    }
368
369    /// Looks up node provenance by `NodeId`.
370    ///
371    /// Returns `None` if the `NodeId` is out of range, the slot is vacant,
372    /// or the stored generation does not match (stale handle).
373    #[inline]
374    #[must_use]
375    pub fn node_provenance(
376        &self,
377        id: crate::graph::unified::node::id::NodeId,
378    ) -> Option<&NodeProvenance> {
379        self.node_provenance.lookup(id)
380    }
381
382    /// Looks up edge provenance by `EdgeId`.
383    ///
384    /// Returns `None` if the `EdgeId` is out of range, the slot is vacant,
385    /// or the edge is the invalid sentinel.
386    #[inline]
387    #[must_use]
388    pub fn edge_provenance(
389        &self,
390        id: crate::graph::unified::edge::id::EdgeId,
391    ) -> Option<&EdgeProvenance> {
392        self.edge_provenance.lookup(id)
393    }
394
395    /// Returns a borrowed provenance view for a file.
396    ///
397    /// Returns `None` for invalid/unregistered `FileId`s.
398    #[inline]
399    #[must_use]
400    pub fn file_provenance(
401        &self,
402        id: crate::graph::unified::file::id::FileId,
403    ) -> Option<FileProvenanceView<'_>> {
404        self.files.file_provenance(id)
405    }
406
407    // ------------------------------------------------------------------
408    // Phase 2 binding-plane accessors (P2U03).
409    // ------------------------------------------------------------------
410
411    /// Returns a reference to the scope arena derived during Phase 4e.
412    ///
413    /// The arena is empty on freshly-constructed `CodeGraph` instances and is
414    /// populated by calling `phase4e_binding::derive_binding_plane`.
415    #[inline]
416    #[must_use]
417    pub fn scope_arena(&self) -> &ScopeArena {
418        &self.scope_arena
419    }
420
421    /// Installs a freshly-derived scope arena.
422    ///
423    /// Called from `phase4e_binding::derive_binding_plane` during the build
424    /// pipeline. External callers that run Phase 4e manually (e.g., test
425    /// fixture builders) use this to store the result.
426    pub(crate) fn set_scope_arena(&mut self, arena: ScopeArena) {
427        self.scope_arena = Arc::new(arena);
428    }
429
430    /// Returns a reference to the alias table derived during Phase 4e.
431    ///
432    /// The table is empty on freshly-constructed `CodeGraph` instances and is
433    /// populated by calling `phase4e_binding::derive_binding_plane`.
434    #[inline]
435    #[must_use]
436    pub fn alias_table(&self) -> &AliasTable {
437        &self.alias_table
438    }
439
440    /// Installs a freshly-derived alias table.
441    ///
442    /// Called from `phase4e_binding::derive_binding_plane` during the build
443    /// pipeline. External callers that run Phase 4e manually (e.g., test
444    /// fixture builders) use this to store the result.
445    pub(crate) fn set_alias_table(&mut self, table: AliasTable) {
446        self.alias_table = Arc::new(table);
447    }
448
449    /// Returns a reference to the shadow table derived during Phase 4e.
450    ///
451    /// The table is empty on freshly-constructed `CodeGraph` instances and is
452    /// populated by calling `phase4e_binding::derive_binding_plane`.
453    #[inline]
454    #[must_use]
455    pub fn shadow_table(&self) -> &ShadowTable {
456        &self.shadow_table
457    }
458
459    /// Installs a freshly-derived shadow table.
460    ///
461    /// Called from `phase4e_binding::derive_binding_plane` during the build
462    /// pipeline. External callers that run Phase 4e manually (e.g., test
463    /// fixture builders) use this to store the result.
464    pub(crate) fn set_shadow_table(&mut self, table: ShadowTable) {
465        self.shadow_table = Arc::new(table);
466    }
467
468    /// Returns a reference to the scope provenance store derived during Phase 4e.
469    ///
470    /// The store is empty on freshly-constructed `CodeGraph` instances and is
471    /// populated by calling `phase4e_binding::derive_binding_plane`.
472    #[inline]
473    #[must_use]
474    pub fn scope_provenance_store(&self) -> &ScopeProvenanceStore {
475        &self.scope_provenance_store
476    }
477
478    /// Looks up scope provenance by `ScopeId`.
479    ///
480    /// Returns `None` if the slot is out of range, vacant, or the stored
481    /// generation does not match (stale handle).
482    #[inline]
483    #[must_use]
484    pub fn scope_provenance(&self, id: ScopeId) -> Option<&ScopeProvenance> {
485        self.scope_provenance_store.lookup(id)
486    }
487
488    /// Looks up the live `ScopeId` for a stable scope identity.
489    ///
490    /// Returns `None` if no provenance record is registered for that stable id.
491    /// The reverse index is populated by `insert` during Phase 4e and must be
492    /// rebuilt after V9 deserialization.
493    #[inline]
494    #[must_use]
495    pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
496        self.scope_provenance_store.scope_by_stable_id(stable)
497    }
498
499    /// Installs a freshly-derived scope provenance store.
500    ///
501    /// Called from `phase4e_binding::derive_binding_plane` during the build
502    /// pipeline. External callers that run Phase 4e manually (e.g., test
503    /// fixture builders) use this to store the result.
504    pub(crate) fn set_scope_provenance_store(&mut self, store: ScopeProvenanceStore) {
505        self.scope_provenance_store = Arc::new(store);
506    }
507
508    /// Returns a reference to the file segment table.
509    #[inline]
510    #[must_use]
511    pub fn file_segments(&self) -> &FileSegmentTable {
512        &self.file_segments
513    }
514
515    /// Replaces the file segment table.
516    pub(crate) fn set_file_segments(&mut self, table: FileSegmentTable) {
517        self.file_segments = Arc::new(table);
518    }
519
520    /// Installs the C indirect-call side tables loaded from a V11
521    /// snapshot.
522    ///
523    /// `None` clears the slot (used on V10 → V11 upconvert and on non-C
524    /// workspaces). `Some(...)` carries the live tables onto the freshly
525    /// reconstructed `CodeGraph`. Build-pipeline callers that incrementally
526    /// populate the tables should prefer [`Self::c_indirect_tables_mut`].
527    pub(crate) fn set_c_indirect_tables(&mut self, tables: Option<CIndirectSideTables>) {
528        self.c_indirect_tables = tables;
529    }
530
531    /// Returns a mutable reference to the file segment table (via `Arc::make_mut`).
532    pub(crate) fn file_segments_mut(&mut self) -> &mut FileSegmentTable {
533        Arc::make_mut(&mut self.file_segments)
534    }
535
536    /// Test-only helper that records a file segment directly on the
537    /// graph, bypassing the full Phase 3 commit pipeline. Only
538    /// available under `#[cfg(feature = "rebuild-internals")]` so the
539    /// surface is opt-in for rebuild-plane consumers (the feature is
540    /// whitelisted to sqry-daemon + sqry-core integration tests; see
541    /// `sqry-core/tests/rebuild_internals_whitelist.rs`).
542    ///
543    /// Integration tests (notably
544    /// `sqry-core/tests/incremental_remove_file_scale.rs`) call this
545    /// to seed the synthetic workspaces they build before exercising
546    /// `RebuildGraph::remove_file` / `CodeGraph::remove_file`. Production
547    /// code paths never touch this method — Phase 3 parallel commit
548    /// is the sole production writer, via the crate-internal
549    /// `file_segments_mut` accessor above.
550    ///
551    /// Renamed with `test_only_` prefix so the purpose is unambiguous
552    /// at every call site; `#[doc(hidden)]` hides it from rendered
553    /// rustdoc so downstream daemon integrations don't discover it by
554    /// accident.
555    #[cfg(feature = "rebuild-internals")]
556    #[doc(hidden)]
557    pub fn test_only_record_file_segment(
558        &mut self,
559        file_id: FileId,
560        start_slot: u32,
561        slot_count: u32,
562    ) {
563        Arc::make_mut(&mut self.file_segments).record_range(file_id, start_slot, slot_count);
564    }
565
566    /// Sets the provenance stores and fact epoch, typically called by the
567    /// persistence loader after deserializing a V8 snapshot.
568    pub(crate) fn set_provenance(
569        &mut self,
570        node_provenance: NodeProvenanceStore,
571        edge_provenance: EdgeProvenanceStore,
572        fact_epoch: u64,
573    ) {
574        self.node_provenance = Arc::new(node_provenance);
575        self.edge_provenance = Arc::new(edge_provenance);
576        self.fact_epoch = fact_epoch;
577    }
578
579    /// Returns the current epoch.
580    #[inline]
581    #[must_use]
582    pub fn epoch(&self) -> u64 {
583        self.epoch
584    }
585
586    /// Returns a mutable reference to the node arena.
587    ///
588    /// Uses `Arc::make_mut` for copy-on-write semantics: if other
589    /// references exist (e.g., snapshots), the data is cloned.
590    #[inline]
591    pub fn nodes_mut(&mut self) -> &mut NodeArena {
592        Arc::make_mut(&mut self.nodes)
593    }
594
595    /// Returns a mutable reference to the bidirectional edge store.
596    ///
597    /// Uses `Arc::make_mut` for copy-on-write semantics.
598    #[inline]
599    pub fn edges_mut(&mut self) -> &mut BidirectionalEdgeStore {
600        Arc::make_mut(&mut self.edges)
601    }
602
603    /// Returns a mutable reference to the string interner.
604    ///
605    /// Uses `Arc::make_mut` for copy-on-write semantics.
606    #[inline]
607    pub fn strings_mut(&mut self) -> &mut StringInterner {
608        Arc::make_mut(&mut self.strings)
609    }
610
611    /// Returns a mutable reference to the file registry.
612    ///
613    /// Uses `Arc::make_mut` for copy-on-write semantics.
614    #[inline]
615    pub fn files_mut(&mut self) -> &mut FileRegistry {
616        Arc::make_mut(&mut self.files)
617    }
618
619    /// Returns a mutable reference to the auxiliary indices.
620    ///
621    /// Uses `Arc::make_mut` for copy-on-write semantics.
622    #[inline]
623    pub fn indices_mut(&mut self) -> &mut AuxiliaryIndices {
624        Arc::make_mut(&mut self.indices)
625    }
626
627    /// Returns a mutable reference to the macro boundary metadata store.
628    ///
629    /// Uses `Arc::make_mut` for copy-on-write semantics.
630    #[inline]
631    pub fn macro_metadata_mut(&mut self) -> &mut NodeMetadataStore {
632        Arc::make_mut(&mut self.macro_metadata)
633    }
634
635    /// Returns mutable references to both the node arena and the string interner.
636    ///
637    /// This avoids the borrow-conflict that arises when calling `nodes_mut()` and
638    /// `strings_mut()` separately on `&mut self`.
639    #[inline]
640    pub fn nodes_and_strings_mut(&mut self) -> (&mut NodeArena, &mut StringInterner) {
641        (
642            Arc::make_mut(&mut self.nodes),
643            Arc::make_mut(&mut self.strings),
644        )
645    }
646
647    /// Rebuilds auxiliary indices from the current node arena.
648    ///
649    /// This avoids the borrow conflict that arises when calling `nodes()` and
650    /// `indices_mut()` separately on `&mut self`. Uses disjoint field borrowing
651    /// to access `nodes` (shared) and `indices` (mutable) simultaneously.
652    /// Internally calls `AuxiliaryIndices::build_from_arena` which clears
653    /// existing indices and rebuilds in a single pass without per-element
654    /// duplicate checking.
655    ///
656    /// As of Task 4 Step 4 Phase 2 this inherent method delegates to the
657    /// generic
658    /// [`crate::graph::unified::build::parallel_commit::rebuild_indices`]
659    /// free function so the same implementation serves both the
660    /// full-build (`CodeGraph`) and incremental-rebuild (`RebuildGraph`)
661    /// pipelines. Call sites that hold a concrete `CodeGraph` can keep
662    /// using `graph.rebuild_indices()`; incremental rebuild call sites
663    /// should use the free function directly.
664    pub fn rebuild_indices(&mut self) {
665        crate::graph::unified::build::parallel_commit::rebuild_indices(self);
666    }
667
668    /// Increments the epoch counter and returns the new value.
669    ///
670    /// Called automatically by `ConcurrentCodeGraph::write()`.
671    #[inline]
672    pub fn bump_epoch(&mut self) -> u64 {
673        self.epoch = self.epoch.wrapping_add(1);
674        self.epoch
675    }
676
677    /// Sets the epoch to a specific value.
678    ///
679    /// This is primarily for testing or reconstruction from serialized state.
680    #[inline]
681    pub fn set_epoch(&mut self, epoch: u64) {
682        self.epoch = epoch;
683    }
684
685    /// Returns the number of nodes in the graph.
686    ///
687    /// This is a convenience method that delegates to `nodes().len()`.
688    ///
689    /// # Example
690    ///
691    /// ```rust
692    /// use sqry_core::graph::unified::concurrent::CodeGraph;
693    ///
694    /// let graph = CodeGraph::new();
695    /// assert_eq!(graph.node_count(), 0);
696    /// ```
697    #[inline]
698    #[must_use]
699    pub fn node_count(&self) -> usize {
700        self.nodes.len()
701    }
702
703    /// Returns the number of edges in the graph (forward direction).
704    ///
705    /// This counts edges in the forward store, including both CSR and delta edges.
706    ///
707    /// # Example
708    ///
709    /// ```rust
710    /// use sqry_core::graph::unified::concurrent::CodeGraph;
711    ///
712    /// let graph = CodeGraph::new();
713    /// assert_eq!(graph.edge_count(), 0);
714    /// ```
715    #[inline]
716    #[must_use]
717    pub fn edge_count(&self) -> usize {
718        let stats = self.edges.stats();
719        stats.forward.csr_edge_count + stats.forward.delta_edge_count
720    }
721
722    /// Returns true if the graph contains no nodes.
723    ///
724    /// This is a convenience method that delegates to `nodes().is_empty()`.
725    ///
726    /// # Example
727    ///
728    /// ```rust
729    /// use sqry_core::graph::unified::concurrent::CodeGraph;
730    ///
731    /// let graph = CodeGraph::new();
732    /// assert!(graph.is_empty());
733    /// ```
734    #[inline]
735    #[must_use]
736    pub fn is_empty(&self) -> bool {
737        self.nodes.is_empty()
738    }
739
740    /// Returns an iterator over all indexed file paths.
741    ///
742    /// This is useful for enumerating all files that have been processed
743    /// and added to the graph.
744    ///
745    /// # Example
746    ///
747    /// ```rust
748    /// use sqry_core::graph::unified::concurrent::CodeGraph;
749    ///
750    /// let graph = CodeGraph::new();
751    /// for (file_id, path) in graph.indexed_files() {
752    ///     println!("File {}: {}", file_id.index(), path.display());
753    /// }
754    /// ```
755    #[inline]
756    pub fn indexed_files(
757        &self,
758    ) -> impl Iterator<Item = (crate::graph::unified::file::FileId, &std::path::Path)> {
759        self.files
760            .iter()
761            .map(|(id, arc_path)| (id, arc_path.as_ref()))
762    }
763
764    /// Returns the set of files that import one or more symbols exported by
765    /// `file_id`, deduplicated and sorted ascending.
766    ///
767    /// For every [`EdgeKind::Imports`] edge whose target node lives in
768    /// `file_id`, the source node's [`FileId`] is added to the result. Files
769    /// are returned sorted by raw index so the result is deterministic across
770    /// runs. The caller's own file is never included — an `Imports` edge
771    /// whose source and target both live in `file_id` is treated as a
772    /// self-import and elided. Edges whose source node is no longer resolvable
773    /// in the arena (tombstoned) are silently skipped.
774    ///
775    /// This is the file-level view of Pass 4 cross-file `Imports` edges, used
776    /// by the incremental rebuild engine to compute reverse-dependency
777    /// closures: "if file X changes its exports, which files need to be
778    /// re-linked?"
779    ///
780    /// # Complexity
781    ///
782    /// `O(|nodes_in_file_id| × avg_incoming_edges_per_node)` amortized. Uses
783    /// [`AuxiliaryIndices::by_file`] for O(1)-amortized per-file node lookup
784    /// (HashMap-backed) and the bidirectional edge store's reverse adjacency;
785    /// no full-graph scan. A final `O(R log R)` sort over the deduplicated
786    /// importer set (where `R` is the importer count) is negligible in
787    /// practice since `R ≤ file_count`.
788    ///
789    /// [`AuxiliaryIndices::by_file`]: crate::graph::unified::storage::indices::AuxiliaryIndices::by_file
790    #[must_use]
791    pub fn reverse_import_index(&self, file_id: FileId) -> Vec<FileId> {
792        let mut importers: HashSet<FileId> = HashSet::new();
793        for &target_node in self.indices.by_file(file_id) {
794            for edge_ref in self.edges.edges_to(target_node) {
795                if !matches!(edge_ref.kind, EdgeKind::Imports { .. }) {
796                    continue;
797                }
798                let Some(source_entry) = self.nodes.get(edge_ref.source) else {
799                    continue;
800                };
801                let source_file = source_entry.file;
802                if source_file != file_id {
803                    importers.insert(source_file);
804                }
805            }
806        }
807        let mut result: Vec<FileId> = importers.into_iter().collect();
808        result.sort();
809        result
810    }
811
812    /// Returns the set of files that hold at least one live inter-file edge
813    /// targeting a node in `file_id`, deduplicated and sorted ascending.
814    ///
815    /// Unlike [`reverse_import_index`](Self::reverse_import_index) — which
816    /// filters to [`EdgeKind::Imports`] only — this helper treats **every**
817    /// cross-file edge as a dependency signal: `Calls`, `References`,
818    /// `TypeOf`, `Inherits`, `Implements`, `FfiCall`, `HttpRequest`,
819    /// `GrpcCall`, `WebAssemblyCall`, `DbQuery`, `TableRead`, `TableWrite`,
820    /// `TriggeredBy`, `MessageQueue`, `WebSocket`, `GraphQLOperation`,
821    /// `ProcessExec`, `FileIpc`, `ProtocolCall`, and any future
822    /// cross-file-capable variant. This is the reverse-dependency surface the
823    /// incremental rebuild engine (Task 4 Step 4 Phase 3e) needs: when
824    /// `file_id` changes, every file whose committed edges point into
825    /// `file_id`'s nodes must re-enter the rebuild closure so its cross-file
826    /// references survive the target-side tombstone-and-reparse cycle.
827    ///
828    /// The caller's own file is never included — an edge whose source and
829    /// target both live in `file_id` is a self-reference and is elided.
830    /// Edges whose source node is no longer resolvable in the arena
831    /// (tombstoned) are silently skipped.
832    ///
833    /// # When to use this vs [`reverse_import_index`](Self::reverse_import_index)
834    ///
835    /// * [`reverse_import_index`](Self::reverse_import_index) remains the
836    ///   right surface for consumers that specifically need *import*
837    ///   relationships (export surface analysis, module-dependency graphs,
838    ///   etc.).
839    /// * `reverse_dependency_index` is the right surface for incremental
840    ///   rebuild closure computation. Widening past imports is necessary
841    ///   because call sites, type references, trait implementations, FFI
842    ///   declarations, HTTP clients, and every other cross-file edge kind
843    ///   hold a committed edge into the target file that becomes stale the
844    ///   moment `remove_file(target)` tombstones its arena nodes. Leaving
845    ///   those files out of the closure leaves the committed edges pointing
846    ///   at the stale (pre-tombstone) node IDs — Phase 4c-prime only
847    ///   rewrites edges on **re-parsed** files' `PendingEdge` sets, never
848    ///   committed edges owned by files outside the reparse scope.
849    ///
850    /// # Complexity
851    ///
852    /// `O(|nodes_in_file_id| × avg_incoming_edges_per_node)` amortized —
853    /// same bound as [`reverse_import_index`](Self::reverse_import_index).
854    /// Uses [`AuxiliaryIndices::by_file`] for O(1)-amortized per-file node
855    /// lookup and the bidirectional edge store's reverse adjacency; no
856    /// full-graph scan. Final `O(R log R)` sort over the deduplicated
857    /// dependent set is negligible since `R ≤ file_count`.
858    ///
859    /// # Over-widening is expected and acceptable
860    ///
861    /// The closure will include every file that references anything in
862    /// `file_id`, not just files whose exports change. In common codebases
863    /// this widens the reparse set modestly (a 10-file change may expand
864    /// to 20–30 dependent files in a medium crate). Correctness requires
865    /// the widening; minimality is a follow-up optimisation if profiling
866    /// demands it.
867    ///
868    /// [`AuxiliaryIndices::by_file`]: crate::graph::unified::storage::indices::AuxiliaryIndices::by_file
869    #[must_use]
870    pub fn reverse_dependency_index(&self, file_id: FileId) -> Vec<FileId> {
871        let mut dependents: HashSet<FileId> = HashSet::new();
872        for &target_node in self.indices.by_file(file_id) {
873            for edge_ref in self.edges.edges_to(target_node) {
874                let Some(source_entry) = self.nodes.get(edge_ref.source) else {
875                    continue;
876                };
877                let source_file = source_entry.file;
878                if source_file != file_id {
879                    dependents.insert(source_file);
880                }
881            }
882        }
883        let mut result: Vec<FileId> = dependents.into_iter().collect();
884        result.sort();
885        result
886    }
887
888    /// Returns the per-language confidence metadata.
889    ///
890    /// This contains analysis confidence information collected during graph build,
891    /// primarily used by language plugins (e.g., Rust) to track analysis quality.
892    #[inline]
893    #[must_use]
894    pub fn confidence(&self) -> &HashMap<String, ConfidenceMetadata> {
895        &self.confidence
896    }
897
898    /// Merges confidence metadata for a language.
899    ///
900    /// If confidence already exists for the language, this merges the new
901    /// metadata (taking the lower confidence level and combining limitations).
902    /// Otherwise, it inserts the new confidence.
903    pub fn merge_confidence(&mut self, language: &str, metadata: ConfidenceMetadata) {
904        use crate::confidence::ConfidenceLevel;
905
906        self.confidence
907            .entry(language.to_string())
908            .and_modify(|existing| {
909                // Take the lower confidence level (more conservative)
910                let new_level = match (&existing.level, &metadata.level) {
911                    (ConfidenceLevel::Verified, other) | (other, ConfidenceLevel::Verified) => {
912                        *other
913                    }
914                    (ConfidenceLevel::Partial, ConfidenceLevel::AstOnly)
915                    | (ConfidenceLevel::AstOnly, ConfidenceLevel::Partial) => {
916                        ConfidenceLevel::AstOnly
917                    }
918                    (level, _) => *level,
919                };
920                existing.level = new_level;
921
922                // Merge limitations (deduplicated)
923                for limitation in &metadata.limitations {
924                    if !existing.limitations.contains(limitation) {
925                        existing.limitations.push(limitation.clone());
926                    }
927                }
928
929                // Merge unavailable features (deduplicated)
930                for feature in &metadata.unavailable_features {
931                    if !existing.unavailable_features.contains(feature) {
932                        existing.unavailable_features.push(feature.clone());
933                    }
934                }
935            })
936            .or_insert(metadata);
937    }
938
939    /// Sets the confidence metadata map directly.
940    ///
941    /// This is primarily used when loading a graph from serialized state.
942    pub fn set_confidence(&mut self, confidence: HashMap<String, ConfidenceMetadata>) {
943        self.confidence = confidence;
944    }
945
946    // ------------------------------------------------------------------
947    // Task 4 Step 2 (A2 §F.2) — File-level tombstoning on a CodeGraph.
948    //
949    // Unlike the rebuild pipeline's `RebuildGraph::remove_file`, this
950    // path mutates a live `CodeGraph` in place and is the mechanism
951    // used by the full-rebuild flow when it needs to evict a file's
952    // nodes+edges between compactions. The daemon's incremental
953    // `WorkspaceManager` (Task 6) goes through the rebuild path, which
954    // is why this entry point is `pub(crate)`.
955    // ------------------------------------------------------------------
956
957    /// Tombstone every node that belongs to `file_id`, invalidate every
958    /// edge whose source or target is one of those nodes (across both
959    /// forward and reverse CSR + delta tiers), drop the file's entry
960    /// from the [`FileRegistry`], and return the set of [`NodeId`]s
961    /// that were tombstoned.
962    ///
963    /// This is the §F.2-aware file-removal primitive. Semantically, the
964    /// post-condition matches what a full rebuild of the workspace
965    /// without `file_id` would produce:
966    ///
967    /// * Every [`NodeEntry`] whose [`NodeEntry.file`] was `file_id` has
968    ///   been `NodeArena::remove`d, advancing its slot generation so
969    ///   stale [`NodeId`] handles do not alias a later re-allocation.
970    /// * Every CSR edge whose source or target slot matches one of the
971    ///   tombstoned slot indices has its `csr_tombstones` bit set; the
972    ///   read path's merge step already filters tombstoned CSR edges
973    ///   out of every query result.
974    /// * Every delta-buffer edge (Add or Remove, any file) whose source
975    ///   or target matches a tombstoned slot has been dropped from the
976    ///   delta in both directions.
977    /// * The [`AuxiliaryIndices`] (kind / name / qualified-name / file)
978    ///   no longer reference any of the tombstoned `NodeId`s.
979    /// * [`NodeMetadataStore`], [`NodeProvenanceStore`], [`ScopeArena`],
980    ///   [`AliasTable`], and [`ShadowTable`] have been compacted through
981    ///   the [`NodeIdBearing::retain_nodes`] predicate so no
982    ///   tombstoned `NodeId` survives in any publish-visible store.
983    /// * [`FileRegistry::per_file_nodes`] no longer holds a bucket for
984    ///   `file_id`, the lookup slot is recycled, and
985    ///   [`FileRegistry::resolve(file_id)`] returns `None`.
986    ///
987    /// Returns the `Vec<NodeId>` of tombstoned nodes. The returned list
988    /// is useful for downstream housekeeping (e.g., resetting per-file
989    /// caches keyed by `NodeId`) and for tests that need to assert on the
990    /// exact membership of the tombstone set.
991    ///
992    /// # Idempotency
993    ///
994    /// Calling `remove_file` twice with the same `file_id` — or calling
995    /// it for a `file_id` that was never registered — is a safe no-op.
996    /// The returned `Vec<NodeId>` is empty on the second call; no
997    /// arena / edge / index state is mutated (the predicate-based
998    /// compaction of `NodeIdBearing` surfaces short-circuits when the
999    /// dead set is empty).
1000    ///
1001    /// # Visibility
1002    ///
1003    /// `pub(crate)` because external callers (Task 6's
1004    /// `WorkspaceManager` on the sqry-daemon side) route through
1005    /// [`super::super::rebuild::rebuild_graph::RebuildGraph::remove_file`]
1006    /// instead. This `CodeGraph`-level variant is used by full-rebuild
1007    /// housekeeping paths inside sqry-core and by the Task 4 Step 4
1008    /// incremental fallback for cases where the caller already has a
1009    /// `&mut CodeGraph` and does not need the clone-and-publish
1010    /// round-trip of [`clone_for_rebuild`](Self::clone_for_rebuild) →
1011    /// [`finalize`](super::super::rebuild::rebuild_graph::RebuildGraph::finalize).
1012    ///
1013    /// # Performance
1014    ///
1015    /// * `O(|tombstoned| + |csr_edges| + |delta_edges|)` amortised.
1016    ///   The CSR walk is linear in total edge count (not per-file),
1017    ///   which is the dominant cost; each row check is O(1) via the
1018    ///   precomputed dead-slot-index `HashSet` in
1019    ///   [`BidirectionalEdgeStore::tombstone_edges_for_nodes`].
1020    /// * Delta filtering is `O(|delta|)` per direction.
1021    /// * Auxiliary-index compaction is `O(|tombstoned|)` amortised
1022    ///   because each of the four indices keys its entries by the
1023    ///   tombstoned `NodeIds` directly.
1024    ///
1025    /// [`NodeEntry`]: crate::graph::unified::storage::arena::NodeEntry
1026    /// [`NodeEntry.file`]: crate::graph::unified::storage::arena::NodeEntry::file
1027    /// [`NodeIdBearing::retain_nodes`]: crate::graph::unified::rebuild::coverage::NodeIdBearing::retain_nodes
1028    #[allow(dead_code)] // Consumer is Task 4 Step 4 (`incremental_rebuild`)
1029    // and the unit tests below; published in this commit so the §F.2
1030    // invariant surface can be reviewed in isolation per the Gate 0c
1031    // split contract.
1032    pub(crate) fn remove_file(
1033        &mut self,
1034        file_id: FileId,
1035    ) -> Vec<crate::graph::unified::node::NodeId> {
1036        use crate::graph::unified::node::NodeId;
1037        use crate::graph::unified::rebuild::coverage::NodeIdBearing;
1038
1039        // Drain the per-file bucket. For a file that was never
1040        // registered, this returns an empty Vec — the rest of the
1041        // method short-circuits on the `if tombstoned.is_empty()` test
1042        // below so we still deregister the file on the off chance the
1043        // bucket was empty but the file was registered (defensive; the
1044        // common case is the bucket existed iff the file was
1045        // registered).
1046        let tombstoned: Vec<NodeId> = self.files_mut().take_nodes(file_id);
1047        // Always drop the file's path entry + recycle its slot, even
1048        // when the bucket was empty, so repeated registrations of the
1049        // same path don't resurrect a zombie FileId. `unregister` is
1050        // idempotent (returns None for unknown IDs) so the cost is a
1051        // single HashMap probe when file_id is already gone.
1052        self.files_mut().unregister(file_id);
1053        // Clear the file's segment entry unconditionally (idempotent
1054        // — `FileSegmentTable::remove` no-ops on unknown ids). This
1055        // MUST run before `FileRegistry::unregister` recycles the
1056        // FileId slot for reuse, otherwise a later registration of a
1057        // different path under the reused FileId would inherit the
1058        // previous file's stale node range (see
1059        // `sqry-core/src/graph/unified/build/reindex.rs` — which
1060        // trusts `file_segments().get(file_id)` to decide which slots
1061        // to tombstone). Note: `unregister` above was called first
1062        // only to keep the existing bucket-drain ordering; the
1063        // segment-clear is order-independent with respect to
1064        // `unregister` because neither touches the other's backing
1065        // store, and the FileId slot cannot be recycled-and-reissued
1066        // across a single `remove_file` call (the registry's slot
1067        // recycler is driven by a later `register`, not by
1068        // `unregister`).
1069        self.file_segments_mut().remove(file_id);
1070
1071        if tombstoned.is_empty() {
1072            return tombstoned;
1073        }
1074
1075        // Dead set keyed on NodeId for NodeIdBearing predicates.
1076        // `retain_nodes` uses `HashSet::contains` so membership is O(1).
1077        let dead: HashSet<NodeId> = tombstoned.iter().copied().collect();
1078
1079        // 1. Tombstone the arena slots. `NodeArena::remove` is
1080        //    idempotent — stale NodeIds that don't match a slot's
1081        //    current generation are no-ops, which lets this method be
1082        //    safely re-run on the same file.
1083        {
1084            let arena = self.nodes_mut();
1085            for &nid in &tombstoned {
1086                let _ = arena.remove(nid);
1087            }
1088        }
1089
1090        // 2. Invalidate edges across both CSR + delta in both
1091        //    directions. This is the expensive step; the helper uses a
1092        //    precomputed dead-slot-index set so the CSR walk is linear
1093        //    in total edge count, not quadratic.
1094        self.edges_mut().tombstone_edges_for_nodes(&dead);
1095
1096        // 3. Compact the auxiliary indices so name/kind/qname/file
1097        //    lookups do not return tombstoned NodeIds. Using the
1098        //    NodeIdBearing surface keeps this step in lockstep with the
1099        //    rebuild pipeline's step 4 — any future publish-visible
1100        //    NodeId-bearing container added to the K.A/K.B matrix is
1101        //    automatically swept here too.
1102        {
1103            let predicate: Box<dyn Fn(NodeId) -> bool + '_> = Box::new(|nid| !dead.contains(&nid));
1104            self.indices_mut().retain_nodes(&*predicate);
1105            self.macro_metadata_mut().retain_nodes(&*predicate);
1106            // The remaining K.A rows (node_provenance, scope_arena,
1107            // alias_table, shadow_table) need Arc::make_mut accessors —
1108            // they are wrapped in Arc at rest so this is where the
1109            // CoW clone happens on demand. Inline the Arc::make_mut
1110            // calls here (no public mut accessor exists today because
1111            // the sole writer has been the rebuild path; extend the
1112            // set by adding a similar inline call plus a K.A/K.B row
1113            // in `super::super::rebuild::coverage`).
1114            Arc::make_mut(&mut self.node_provenance).retain_nodes(&*predicate);
1115            Arc::make_mut(&mut self.scope_arena).retain_nodes(&*predicate);
1116            Arc::make_mut(&mut self.alias_table).retain_nodes(&*predicate);
1117            Arc::make_mut(&mut self.shadow_table).retain_nodes(&*predicate);
1118        }
1119
1120        tombstoned
1121    }
1122
1123    // ------------------------------------------------------------------
1124    // Gate 0c (A2 §H, Task 4) — RebuildGraph assembly path.
1125    // ------------------------------------------------------------------
1126
1127    /// Assemble a [`CodeGraph`] from owned rebuild-local parts produced
1128    /// by `RebuildGraph::finalize()` (defined in
1129    /// `super::super::rebuild::rebuild_graph`).
1130    ///
1131    /// This constructor is deliberately `pub(crate)` and named with a
1132    /// leading `__` so it is inaccessible from downstream crates even
1133    /// when the `rebuild-internals` feature is enabled: only code in
1134    /// `sqry-core` itself (specifically `RebuildGraph::finalize`) is
1135    /// permitted to call it. The trybuild fixture
1136    /// `sqry-core/tests/rebuild_internals_compile_fail/rebuild_graph_no_public_assembly.rs`
1137    /// proves there is no other path from `RebuildGraph` to
1138    /// `Arc<CodeGraph>`.
1139    ///
1140    /// The argument order mirrors the `CodeGraph` struct declaration
1141    /// order exactly; the public `clone_for_rebuild` → `finalize`
1142    /// round-trip uses the same macro-driven field enumeration so any
1143    /// new `CodeGraph` field automatically threads through this
1144    /// constructor as well.
1145    #[doc(hidden)]
1146    #[allow(clippy::too_many_arguments)]
1147    #[must_use]
1148    pub(crate) fn __assemble_from_rebuild_parts_internal(
1149        nodes: NodeArena,
1150        edges: BidirectionalEdgeStore,
1151        strings: StringInterner,
1152        files: FileRegistry,
1153        indices: AuxiliaryIndices,
1154        macro_metadata: NodeMetadataStore,
1155        node_provenance: NodeProvenanceStore,
1156        edge_provenance: EdgeProvenanceStore,
1157        fact_epoch: u64,
1158        epoch: u64,
1159        confidence: HashMap<String, ConfidenceMetadata>,
1160        scope_arena: ScopeArena,
1161        alias_table: AliasTable,
1162        shadow_table: ShadowTable,
1163        scope_provenance_store: ScopeProvenanceStore,
1164        file_segments: FileSegmentTable,
1165        go_hints: crate::graph::unified::build::staging::GoHints,
1166    ) -> Self {
1167        Self {
1168            nodes: Arc::new(nodes),
1169            edges: Arc::new(edges),
1170            strings: Arc::new(strings),
1171            files: Arc::new(files),
1172            indices: Arc::new(indices),
1173            macro_metadata: Arc::new(macro_metadata),
1174            node_provenance: Arc::new(node_provenance),
1175            edge_provenance: Arc::new(edge_provenance),
1176            fact_epoch,
1177            epoch,
1178            confidence,
1179            scope_arena: Arc::new(scope_arena),
1180            alias_table: Arc::new(alias_table),
1181            shadow_table: Arc::new(shadow_table),
1182            scope_provenance_store: Arc::new(scope_provenance_store),
1183            file_segments: Arc::new(file_segments),
1184            c_indirect_tables: None,
1185            go_hints,
1186        }
1187    }
1188
1189    // ------------------------------------------------------------------
1190    // Gate 0c (A2 §F) — publish-boundary debug invariants.
1191    //
1192    // These checks fire only in debug / test builds; release builds
1193    // compile them out. They are called from
1194    // `RebuildGraph::finalize()` steps 13 and 14 (the single source of
1195    // truth for the residue check, per §F and §H agreement). Gate 0d
1196    // will additionally wire the bijection check into
1197    // `build_and_persist_graph`, `WorkspaceManager::publish_graph`, and
1198    // every §E equivalence-harness run.
1199    // ------------------------------------------------------------------
1200
1201    /// Assert the bijective bucket-membership invariant (A2 §F.1).
1202    ///
1203    /// Four conditions must hold simultaneously:
1204    /// (a) every `NodeId` inside any `per_file_nodes` bucket maps to a
1205    ///     live arena slot;
1206    /// (b) every `NodeId` appears in exactly one bucket (no duplicates
1207    ///     across buckets, no duplicates within a bucket);
1208    /// (c) the bucket's `FileId` matches the node's own `file` field on
1209    ///     `NodeEntry`;
1210    /// (d) when at least one bucket is populated, every live node in
1211    ///     the arena is accounted for by some bucket.
1212    ///
1213    /// Condition (d) is guarded on `!seen.is_empty()` so an empty-graph
1214    /// (no recorded buckets) publish boundary is vacuously consistent:
1215    /// legacy V7 snapshots, fresh `CodeGraph::new()` instances, and
1216    /// rebuilds on graphs that predate Gate 0c's parallel-commit
1217    /// bucketing must not panic. Once any bucket is populated, every
1218    /// live arena slot must appear in a bucket.
1219    ///
1220    /// Iter-2 B2 (verbatim): this check used to be documented as
1221    /// "vacuous until future `per_file_nodes` work lands". Pulling
1222    /// base-plan Step 1 into Gate 0c retires that phrasing — the check
1223    /// is non-vacuous the moment parallel-parse commits nodes, which
1224    /// happens on every real build. The `!seen.is_empty()` guard now
1225    /// exists solely for the empty-graph corner case, not as a phased-
1226    /// delivery deferral.
1227    ///
1228    /// The check is a no-op in release builds. Panics with a
1229    /// descriptive message on violation. This is intentional: publish-
1230    /// boundary violations are programmer errors that must surface
1231    /// loudly during CI / test runs.
1232    ///
1233    /// # Panics
1234    ///
1235    /// Panics in debug/test builds when bucket membership is duplicated, points
1236    /// at a dead node, assigns a node to the wrong file, or omits a live node
1237    /// after buckets have been populated.
1238    #[cfg(any(debug_assertions, test))]
1239    pub fn assert_bucket_bijection(&self) {
1240        use std::collections::HashMap as StdHashMap;
1241        // (a) + (b) + (c): every bucketed node is live, unique across
1242        // buckets, and the bucket's FileId matches the node's own file.
1243        let mut seen: StdHashMap<
1244            crate::graph::unified::node::NodeId,
1245            crate::graph::unified::file::FileId,
1246        > = StdHashMap::new();
1247        let mut any_bucket_populated = false;
1248        for (file_id, bucket) in self.files.per_file_nodes_for_gate0d() {
1249            if !bucket.is_empty() {
1250                any_bucket_populated = true;
1251            }
1252            // Local dedup guard within the bucket itself. `retain_nodes_in_buckets`
1253            // dedups during finalize step 6, but we re-check here so the
1254            // invariant covers non-finalize publish paths too (e.g. if a
1255            // future pipeline builds a graph without routing through
1256            // `RebuildGraph::finalize`).
1257            let mut within_bucket: std::collections::HashSet<crate::graph::unified::node::NodeId> =
1258                std::collections::HashSet::new();
1259            for node_id in bucket {
1260                assert!(
1261                    within_bucket.insert(node_id),
1262                    "assert_bucket_bijection: duplicate node {node_id:?} inside bucket {file_id:?}"
1263                );
1264                assert!(
1265                    self.nodes.get(node_id).is_some(),
1266                    "assert_bucket_bijection: dead node {node_id:?} in bucket {file_id:?}"
1267                );
1268                let prior = seen.insert(node_id, file_id);
1269                assert!(
1270                    prior.is_none(),
1271                    "assert_bucket_bijection: node {node_id:?} in multiple buckets: \
1272                     prior={prior:?}, current={file_id:?}"
1273                );
1274                if let Some(entry) = self.nodes.get(node_id) {
1275                    assert_eq!(
1276                        entry.file, file_id,
1277                        "assert_bucket_bijection: node {node_id:?} misfiled: in bucket \
1278                         {file_id:?}, actual {:?}",
1279                        entry.file
1280                    );
1281                }
1282            }
1283        }
1284        // (d): every live node is accounted for by `seen` once buckets
1285        // are populated. The guard keeps legacy-empty-graph boundaries
1286        // vacuously consistent (see docs above).
1287        if any_bucket_populated {
1288            for (node_id, _entry) in self.nodes.iter() {
1289                assert!(
1290                    seen.contains_key(&node_id),
1291                    "assert_bucket_bijection: live node {node_id:?} absent from all buckets"
1292                );
1293            }
1294        }
1295    }
1296
1297    /// Assert the pre-reuse tombstone-residue invariant (A2 §F.2).
1298    ///
1299    /// Iterates every publish-visible NodeId-bearing structure on
1300    /// `self` and panics if any contains a node in `dead`. Called from
1301    /// `RebuildGraph::finalize()` step 14 against the set drained at
1302    /// step 8 — exactly one site per the plan's §F / §H agreement.
1303    ///
1304    /// No-op when `dead` is empty or in release builds.
1305    ///
1306    /// # Panics
1307    ///
1308    /// Panics in debug/test builds if any publish-visible node-bearing
1309    /// structure still references a tombstoned node.
1310    #[cfg(any(debug_assertions, test))]
1311    pub fn assert_no_tombstone_residue_for<S: std::hash::BuildHasher>(
1312        &self,
1313        dead: &std::collections::HashSet<crate::graph::unified::node::NodeId, S>,
1314    ) {
1315        use super::super::rebuild::coverage::NodeIdBearing;
1316        if dead.is_empty() {
1317            return;
1318        }
1319        // Every K.A/K.B row must be inspected per §F.2.
1320        for nid in self.nodes.all_node_ids() {
1321            assert!(
1322                !dead.contains(&nid),
1323                "assert_no_tombstone_residue: tombstone {nid:?} still in NodeArena"
1324            );
1325        }
1326        for nid in self.indices.all_node_ids() {
1327            assert!(
1328                !dead.contains(&nid),
1329                "assert_no_tombstone_residue: tombstone {nid:?} still in auxiliary indices"
1330            );
1331        }
1332        for nid in self.edges.all_node_ids() {
1333            assert!(
1334                !dead.contains(&nid),
1335                "assert_no_tombstone_residue: tombstone {nid:?} still in edge store"
1336            );
1337        }
1338        for nid in self.macro_metadata.all_node_ids() {
1339            assert!(
1340                !dead.contains(&nid),
1341                "assert_no_tombstone_residue: tombstone {nid:?} still in macro metadata"
1342            );
1343        }
1344        for nid in self.node_provenance.all_node_ids() {
1345            assert!(
1346                !dead.contains(&nid),
1347                "assert_no_tombstone_residue: tombstone {nid:?} still in node provenance"
1348            );
1349        }
1350        for nid in self.scope_arena.all_node_ids() {
1351            assert!(
1352                !dead.contains(&nid),
1353                "assert_no_tombstone_residue: tombstone {nid:?} still in scope arena"
1354            );
1355        }
1356        for nid in self.alias_table.all_node_ids() {
1357            assert!(
1358                !dead.contains(&nid),
1359                "assert_no_tombstone_residue: tombstone {nid:?} still in alias table"
1360            );
1361        }
1362        for nid in self.shadow_table.all_node_ids() {
1363            assert!(
1364                !dead.contains(&nid),
1365                "assert_no_tombstone_residue: tombstone {nid:?} still in shadow table"
1366            );
1367        }
1368        for nid in self.files.all_node_ids() {
1369            assert!(
1370                !dead.contains(&nid),
1371                "assert_no_tombstone_residue: tombstone {nid:?} still in per-file bucket"
1372            );
1373        }
1374    }
1375}
1376
1377impl Default for CodeGraph {
1378    fn default() -> Self {
1379        Self::new()
1380    }
1381}
1382
1383impl fmt::Debug for CodeGraph {
1384    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1385        f.debug_struct("CodeGraph")
1386            .field("nodes", &self.nodes.len())
1387            .field("epoch", &self.epoch)
1388            .finish_non_exhaustive()
1389    }
1390}
1391
1392impl GraphMemorySize for CodeGraph {
1393    /// Estimates the total heap bytes owned by this `CodeGraph`.
1394    ///
1395    /// Sums heap usage across every component the graph owns: node arena,
1396    /// bidirectional edge store (forward + reverse CSR + tombstones + delta
1397    /// buffer), string interner, file registry, auxiliary indices, sparse
1398    /// macro/classpath metadata, provenance stores, and the per-language
1399    /// confidence map. Used by the `sqryd` daemon's admission controller
1400    /// and workspace retention reaper to enforce memory budgets.
1401    fn heap_bytes(&self) -> usize {
1402        let mut confidence_bytes = self.confidence.capacity()
1403            * (std::mem::size_of::<String>()
1404                + std::mem::size_of::<ConfidenceMetadata>()
1405                + HASHMAP_ENTRY_OVERHEAD);
1406        for (key, meta) in &self.confidence {
1407            confidence_bytes += key.capacity();
1408            // `ConfidenceMetadata.limitations` / `unavailable_features` are
1409            // `Vec<String>`; charge their spill and inner String payloads.
1410            confidence_bytes += meta.limitations.capacity() * std::mem::size_of::<String>();
1411            for s in &meta.limitations {
1412                confidence_bytes += s.capacity();
1413            }
1414            confidence_bytes +=
1415                meta.unavailable_features.capacity() * std::mem::size_of::<String>();
1416            for s in &meta.unavailable_features {
1417                confidence_bytes += s.capacity();
1418            }
1419        }
1420
1421        self.nodes.heap_bytes()
1422            + self.edges.heap_bytes()
1423            + self.strings.heap_bytes()
1424            + self.files.heap_bytes()
1425            + self.indices.heap_bytes()
1426            + self.macro_metadata.heap_bytes()
1427            + self.node_provenance.heap_bytes()
1428            + self.edge_provenance.heap_bytes()
1429            + confidence_bytes
1430            + self.file_segments.capacity()
1431                * std::mem::size_of::<Option<crate::graph::unified::storage::segment::FileSegment>>(
1432                )
1433    }
1434}
1435
1436/// Thread-safe wrapper for `CodeGraph` with epoch versioning.
1437///
1438/// `ConcurrentCodeGraph` provides MVCC-style concurrency:
1439/// - Multiple readers can access the graph simultaneously
1440/// - Only one writer can hold the lock at a time
1441/// - Each write operation increments the epoch for cursor invalidation
1442///
1443/// # Design
1444///
1445/// The wrapper uses `parking_lot::RwLock` for efficient locking:
1446/// - Fair scheduling prevents writer starvation
1447/// - No poisoning (unlike `std::sync::RwLock`)
1448/// - Faster lock/unlock operations
1449///
1450/// # Phase 2 binding-plane access
1451///
1452/// Use the three-line snapshot pattern to access `BindingPlane`:
1453///
1454/// ```rust,ignore
1455/// let read_guard = concurrent.read();
1456/// let snapshot = read_guard.snapshot();
1457/// let plane = snapshot.binding_plane();
1458/// let resolution = plane.resolve(&query);
1459/// ```
1460///
1461/// The explicit snapshot handle makes the MVCC lifetime visible at the call
1462/// site. The full Phase 2 scope/alias/shadow and witness-bearing resolution
1463/// API is exposed through `BindingPlane`.
1464///
1465/// # Usage
1466///
1467/// ```rust
1468/// use sqry_core::graph::unified::concurrent::ConcurrentCodeGraph;
1469///
1470/// let graph = ConcurrentCodeGraph::new();
1471///
1472/// // Read access (multiple readers allowed)
1473/// {
1474///     let guard = graph.read();
1475///     let _nodes = guard.nodes();
1476/// }
1477///
1478/// // Write access (exclusive)
1479/// {
1480///     let mut guard = graph.write();
1481///     let _nodes = guard.nodes_mut();
1482/// }
1483///
1484/// // Snapshot for long queries
1485/// let snapshot = graph.snapshot();
1486/// ```
1487pub struct ConcurrentCodeGraph {
1488    /// The underlying code graph protected by a read-write lock.
1489    inner: RwLock<CodeGraph>,
1490    /// Global epoch counter for cursor validation.
1491    epoch: AtomicU64,
1492}
1493
1494impl ConcurrentCodeGraph {
1495    /// Creates a new empty `ConcurrentCodeGraph`.
1496    #[must_use]
1497    pub fn new() -> Self {
1498        Self {
1499            inner: RwLock::new(CodeGraph::new()),
1500            epoch: AtomicU64::new(0),
1501        }
1502    }
1503
1504    /// Creates a `ConcurrentCodeGraph` from an existing `CodeGraph`.
1505    #[must_use]
1506    pub fn from_graph(graph: CodeGraph) -> Self {
1507        let epoch = graph.epoch();
1508        Self {
1509            inner: RwLock::new(graph),
1510            epoch: AtomicU64::new(epoch),
1511        }
1512    }
1513
1514    /// Acquires a read lock on the graph.
1515    ///
1516    /// Multiple readers can hold the lock simultaneously.
1517    /// This does not increment the epoch.
1518    #[inline]
1519    pub fn read(&self) -> RwLockReadGuard<'_, CodeGraph> {
1520        self.inner.read()
1521    }
1522
1523    /// Acquires a write lock on the graph.
1524    ///
1525    /// Only one writer can hold the lock at a time.
1526    /// This increments the global epoch counter.
1527    #[inline]
1528    pub fn write(&self) -> RwLockWriteGuard<'_, CodeGraph> {
1529        // Increment the global epoch
1530        self.epoch.fetch_add(1, Ordering::SeqCst);
1531        let mut guard = self.inner.write();
1532        // Sync the inner graph's epoch with the global epoch
1533        guard.set_epoch(self.epoch.load(Ordering::SeqCst));
1534        guard
1535    }
1536
1537    /// Returns the current global epoch.
1538    ///
1539    /// This can be used to detect if the graph has been modified
1540    /// since a previous operation (cursor invalidation).
1541    #[inline]
1542    #[must_use]
1543    pub fn epoch(&self) -> u64 {
1544        self.epoch.load(Ordering::SeqCst)
1545    }
1546
1547    /// Creates a cheap snapshot of the graph.
1548    ///
1549    /// This acquires a brief read lock to clone the Arc references.
1550    /// The snapshot is isolated from future mutations.
1551    #[must_use]
1552    pub fn snapshot(&self) -> GraphSnapshot {
1553        self.inner.read().snapshot()
1554    }
1555
1556    // ------------------------------------------------------------------
1557    // Phase 1 fact-layer provenance convenience accessors.
1558    // These acquire a brief read lock, matching the snapshot() pattern.
1559    // ------------------------------------------------------------------
1560
1561    /// Returns the monotonic fact-layer epoch from the underlying graph.
1562    #[must_use]
1563    pub fn fact_epoch(&self) -> u64 {
1564        self.inner.read().fact_epoch()
1565    }
1566
1567    /// Looks up node provenance by `NodeId` (acquires a brief read lock).
1568    #[must_use]
1569    pub fn node_provenance(
1570        &self,
1571        id: crate::graph::unified::node::id::NodeId,
1572    ) -> Option<NodeProvenance> {
1573        self.inner.read().node_provenance(id).copied()
1574    }
1575
1576    /// Looks up edge provenance by `EdgeId` (acquires a brief read lock).
1577    #[must_use]
1578    pub fn edge_provenance(
1579        &self,
1580        id: crate::graph::unified::edge::id::EdgeId,
1581    ) -> Option<EdgeProvenance> {
1582        self.inner.read().edge_provenance(id).copied()
1583    }
1584
1585    /// Returns a file provenance view (acquires a brief read lock).
1586    ///
1587    /// Returns an owned copy since the borrow cannot outlive the lock guard.
1588    #[must_use]
1589    pub fn file_provenance(
1590        &self,
1591        id: crate::graph::unified::file::id::FileId,
1592    ) -> Option<OwnedFileProvenanceView> {
1593        let guard = self.inner.read();
1594        guard.file_provenance(id).map(|v| OwnedFileProvenanceView {
1595            content_hash: *v.content_hash,
1596            indexed_at: v.indexed_at,
1597            source_uri: v.source_uri,
1598            is_external: v.is_external,
1599        })
1600    }
1601
1602    // ------------------------------------------------------------------
1603    // Phase 2 binding-plane accessors (P2U03).
1604    // ------------------------------------------------------------------
1605
1606    /// Returns the scope arena from the underlying graph (acquires a brief
1607    /// read lock).
1608    ///
1609    /// Returns an `Arc` clone so the caller does not hold the lock beyond
1610    /// this call site.
1611    #[must_use]
1612    pub fn scope_arena(&self) -> Arc<ScopeArena> {
1613        Arc::clone(&self.inner.read().scope_arena)
1614    }
1615
1616    /// Returns the alias table from the underlying graph (acquires a brief
1617    /// read lock).
1618    ///
1619    /// Returns an `Arc` clone so the caller does not hold the lock beyond
1620    /// this call site.
1621    #[must_use]
1622    pub fn alias_table(&self) -> Arc<AliasTable> {
1623        Arc::clone(&self.inner.read().alias_table)
1624    }
1625
1626    /// Returns the shadow table from the underlying graph (acquires a brief
1627    /// read lock).
1628    ///
1629    /// Returns an `Arc` clone so the caller does not hold the lock beyond
1630    /// this call site.
1631    #[must_use]
1632    pub fn shadow_table(&self) -> Arc<ShadowTable> {
1633        Arc::clone(&self.inner.read().shadow_table)
1634    }
1635
1636    /// Returns the scope provenance store from the underlying graph (acquires
1637    /// a brief read lock).
1638    ///
1639    /// Returns an `Arc` clone so the caller does not hold the lock beyond
1640    /// this call site.
1641    #[must_use]
1642    pub fn scope_provenance_store(&self) -> Arc<ScopeProvenanceStore> {
1643        Arc::clone(&self.inner.read().scope_provenance_store)
1644    }
1645
1646    /// Looks up scope provenance by `ScopeId` (acquires a brief read lock).
1647    ///
1648    /// Returns an owned copy since the borrow cannot outlive the lock guard.
1649    #[must_use]
1650    pub fn scope_provenance(&self, id: ScopeId) -> Option<ScopeProvenance> {
1651        self.inner.read().scope_provenance(id).cloned()
1652    }
1653
1654    /// Looks up the live `ScopeId` for a stable scope identity (acquires a
1655    /// brief read lock).
1656    ///
1657    /// Returns `None` if no provenance record is registered for that stable id.
1658    #[must_use]
1659    pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
1660        self.inner.read().scope_by_stable_id(stable)
1661    }
1662
1663    /// Returns the file segment table from the underlying graph (acquires a
1664    /// brief read lock).
1665    #[must_use]
1666    pub fn file_segments(&self) -> Arc<FileSegmentTable> {
1667        Arc::clone(&self.inner.read().file_segments)
1668    }
1669
1670    /// Attempts to acquire a read lock without blocking.
1671    ///
1672    /// Returns `None` if the lock is currently held exclusively.
1673    #[inline]
1674    #[must_use]
1675    pub fn try_read(&self) -> Option<RwLockReadGuard<'_, CodeGraph>> {
1676        self.inner.try_read()
1677    }
1678
1679    /// Attempts to acquire a write lock without blocking.
1680    ///
1681    /// Returns `None` if the lock is currently held by another thread.
1682    /// If successful, increments the epoch.
1683    #[inline]
1684    pub fn try_write(&self) -> Option<RwLockWriteGuard<'_, CodeGraph>> {
1685        self.inner.try_write().map(|mut guard| {
1686            self.epoch.fetch_add(1, Ordering::SeqCst);
1687            guard.set_epoch(self.epoch.load(Ordering::SeqCst));
1688            guard
1689        })
1690    }
1691}
1692
1693impl Default for ConcurrentCodeGraph {
1694    fn default() -> Self {
1695        Self::new()
1696    }
1697}
1698
1699impl fmt::Debug for ConcurrentCodeGraph {
1700    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1701        f.debug_struct("ConcurrentCodeGraph")
1702            .field("epoch", &self.epoch.load(Ordering::SeqCst))
1703            .finish_non_exhaustive()
1704    }
1705}
1706
1707/// Owned copy of file provenance, returned by [`ConcurrentCodeGraph::file_provenance`]
1708/// because the borrowed [`FileProvenanceView`] cannot outlive the read-lock guard.
1709#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1710pub struct OwnedFileProvenanceView {
1711    /// SHA-256 of the on-disk file bytes (owned copy).
1712    pub content_hash: [u8; 32],
1713    /// Unix-epoch seconds at which this file was registered.
1714    pub indexed_at: u64,
1715    /// Optional interned physical origin URI.
1716    pub source_uri: Option<StringId>,
1717    /// Whether this file originates from an external source.
1718    pub is_external: bool,
1719}
1720
1721/// Immutable snapshot of a `CodeGraph` for long-running queries.
1722///
1723/// `GraphSnapshot` holds Arc references to the graph components,
1724/// providing a consistent view that is isolated from concurrent mutations.
1725///
1726/// # Design
1727///
1728/// Snapshots are created via `CodeGraph::snapshot()` or
1729/// `ConcurrentCodeGraph::snapshot()`. They are:
1730///
1731/// - **Immutable**: No mutation methods available
1732/// - **Isolated**: Independent of future graph mutations
1733/// - **Cheap**: Only Arc clones, no data copying
1734/// - **Self-contained**: Can outlive the original graph/lock
1735///
1736/// # Usage
1737///
1738/// ```rust
1739/// use sqry_core::graph::unified::concurrent::{ConcurrentCodeGraph, GraphSnapshot};
1740///
1741/// let graph = ConcurrentCodeGraph::new();
1742///
1743/// // Create snapshot for a long query
1744/// let snapshot: GraphSnapshot = graph.snapshot();
1745///
1746/// // Snapshot can be used independently
1747/// let _epoch = snapshot.epoch();
1748/// ```
1749#[derive(Clone)]
1750pub struct GraphSnapshot {
1751    /// Node storage snapshot.
1752    nodes: Arc<NodeArena>,
1753    /// Edge storage snapshot.
1754    edges: Arc<BidirectionalEdgeStore>,
1755    /// String interner snapshot.
1756    strings: Arc<StringInterner>,
1757    /// File registry snapshot.
1758    files: Arc<FileRegistry>,
1759    /// Auxiliary indices snapshot.
1760    indices: Arc<AuxiliaryIndices>,
1761    /// Sparse macro boundary metadata snapshot.
1762    macro_metadata: Arc<NodeMetadataStore>,
1763    /// Dense node provenance snapshot (Phase 1).
1764    node_provenance: Arc<NodeProvenanceStore>,
1765    /// Dense edge provenance snapshot (Phase 1).
1766    edge_provenance: Arc<EdgeProvenanceStore>,
1767    /// Monotonic fact-layer epoch at snapshot time.
1768    fact_epoch: u64,
1769    /// Epoch at snapshot time (for cursor validation).
1770    epoch: u64,
1771    /// Phase 2 binding-plane scope arena snapshot (populated by Phase 4e).
1772    scope_arena: Arc<ScopeArena>,
1773    /// Phase 2 binding-plane alias table snapshot (populated by Phase 4e / P2U04).
1774    alias_table: Arc<AliasTable>,
1775    /// Phase 2 binding-plane shadow table snapshot (populated by Phase 4e / P2U05).
1776    shadow_table: Arc<ShadowTable>,
1777    /// Phase 2 binding-plane scope provenance store snapshot (populated by Phase 4e / P2U11).
1778    scope_provenance_store: Arc<ScopeProvenanceStore>,
1779    /// Phase 3 file segment table snapshot mapping `FileId` to node ranges.
1780    file_segments: Arc<FileSegmentTable>,
1781    /// Phase A (U09): C indirect-call resolver side tables snapshot.
1782    ///
1783    /// `None` on non-C workspaces. The snapshot owns its own clone of the
1784    /// `Option<CIndirectSideTables>` so concurrent readers see a stable
1785    /// view independent of subsequent mutations to the source `CodeGraph`.
1786    c_indirect_tables: Option<CIndirectSideTables>,
1787}
1788
1789impl GraphSnapshot {
1790    /// Returns a reference to the node arena.
1791    #[inline]
1792    #[must_use]
1793    pub fn nodes(&self) -> &NodeArena {
1794        &self.nodes
1795    }
1796
1797    /// Returns a reference to the bidirectional edge store.
1798    #[inline]
1799    #[must_use]
1800    pub fn edges(&self) -> &BidirectionalEdgeStore {
1801        &self.edges
1802    }
1803
1804    /// Returns a reference to the string interner.
1805    #[inline]
1806    #[must_use]
1807    pub fn strings(&self) -> &StringInterner {
1808        &self.strings
1809    }
1810
1811    /// Returns a reference to the file registry.
1812    #[inline]
1813    #[must_use]
1814    pub fn files(&self) -> &FileRegistry {
1815        &self.files
1816    }
1817
1818    /// Returns a reference to the auxiliary indices.
1819    #[inline]
1820    #[must_use]
1821    pub fn indices(&self) -> &AuxiliaryIndices {
1822        &self.indices
1823    }
1824
1825    /// Returns a reference to the macro boundary metadata store.
1826    #[inline]
1827    #[must_use]
1828    pub fn macro_metadata(&self) -> &NodeMetadataStore {
1829        &self.macro_metadata
1830    }
1831
1832    /// Returns a reference to the C indirect-call side tables, if any.
1833    ///
1834    /// Mirrors [`CodeGraph::c_indirect_tables`]; see that method for
1835    /// the contract. Snapshot-level access is read-only.
1836    #[inline]
1837    #[must_use]
1838    pub fn c_indirect_tables(&self) -> Option<&CIndirectSideTables> {
1839        self.c_indirect_tables.as_ref()
1840    }
1841
1842    // ------------------------------------------------------------------
1843    // Phase 1 fact-layer provenance accessors (P1U09).
1844    // ------------------------------------------------------------------
1845
1846    /// Returns the monotonic fact-layer epoch.
1847    #[inline]
1848    #[must_use]
1849    pub fn fact_epoch(&self) -> u64 {
1850        self.fact_epoch
1851    }
1852
1853    /// Looks up node provenance by `NodeId`.
1854    #[inline]
1855    #[must_use]
1856    pub fn node_provenance(
1857        &self,
1858        id: crate::graph::unified::node::id::NodeId,
1859    ) -> Option<&NodeProvenance> {
1860        self.node_provenance.lookup(id)
1861    }
1862
1863    /// Looks up edge provenance by `EdgeId`.
1864    #[inline]
1865    #[must_use]
1866    pub fn edge_provenance(
1867        &self,
1868        id: crate::graph::unified::edge::id::EdgeId,
1869    ) -> Option<&EdgeProvenance> {
1870        self.edge_provenance.lookup(id)
1871    }
1872
1873    /// Returns a borrowed provenance view for a file.
1874    #[inline]
1875    #[must_use]
1876    pub fn file_provenance(
1877        &self,
1878        id: crate::graph::unified::file::id::FileId,
1879    ) -> Option<FileProvenanceView<'_>> {
1880        self.files.file_provenance(id)
1881    }
1882
1883    // ------------------------------------------------------------------
1884    // Phase 2 binding-plane accessors (P2U03).
1885    // ------------------------------------------------------------------
1886
1887    /// Returns a reference to the scope arena at snapshot time.
1888    ///
1889    /// Participates in MVCC: the snapshot holds an `Arc` clone of the arena
1890    /// as it existed when `snapshot()` was called. Subsequent calls to
1891    /// `set_scope_arena` on the source `CodeGraph` do not affect this view.
1892    #[inline]
1893    #[must_use]
1894    pub fn scope_arena(&self) -> &ScopeArena {
1895        &self.scope_arena
1896    }
1897
1898    /// Returns a reference to the alias table at snapshot time.
1899    ///
1900    /// Participates in MVCC: the snapshot holds an `Arc` clone of the table
1901    /// as it existed when `snapshot()` was called. Subsequent calls to
1902    /// `set_alias_table` on the source `CodeGraph` do not affect this view.
1903    #[inline]
1904    #[must_use]
1905    pub fn alias_table(&self) -> &AliasTable {
1906        &self.alias_table
1907    }
1908
1909    /// Returns a reference to the shadow table at snapshot time.
1910    ///
1911    /// Participates in MVCC: the snapshot holds an `Arc` clone of the table
1912    /// as it existed when `snapshot()` was called. Subsequent calls to
1913    /// `set_shadow_table` on the source `CodeGraph` do not affect this view.
1914    #[inline]
1915    #[must_use]
1916    pub fn shadow_table(&self) -> &ShadowTable {
1917        &self.shadow_table
1918    }
1919
1920    /// Returns a reference to the scope provenance store at snapshot time.
1921    ///
1922    /// Participates in MVCC: the snapshot holds an `Arc` clone of the store
1923    /// as it existed when `snapshot()` was called. Subsequent calls to
1924    /// `set_scope_provenance_store` on the source `CodeGraph` do not affect
1925    /// this view.
1926    #[inline]
1927    #[must_use]
1928    pub fn scope_provenance_store(&self) -> &ScopeProvenanceStore {
1929        &self.scope_provenance_store
1930    }
1931
1932    /// Looks up scope provenance by `ScopeId` at snapshot time.
1933    ///
1934    /// Returns `None` if the slot is out of range, vacant, or the stored
1935    /// generation does not match (stale handle).
1936    #[inline]
1937    #[must_use]
1938    pub fn scope_provenance(&self, id: ScopeId) -> Option<&ScopeProvenance> {
1939        self.scope_provenance_store.lookup(id)
1940    }
1941
1942    /// Looks up the live `ScopeId` for a stable scope identity at snapshot time.
1943    ///
1944    /// Returns `None` if no provenance record is registered for that stable id.
1945    #[inline]
1946    #[must_use]
1947    pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
1948        self.scope_provenance_store.scope_by_stable_id(stable)
1949    }
1950
1951    /// Returns a reference to the file segment table at snapshot time.
1952    #[inline]
1953    #[must_use]
1954    pub fn file_segments(&self) -> &FileSegmentTable {
1955        &self.file_segments
1956    }
1957
1958    /// Returns the epoch at which this snapshot was taken.
1959    ///
1960    /// This can be compared against the current graph epoch to
1961    /// detect if the graph has changed since the snapshot.
1962    #[inline]
1963    #[must_use]
1964    pub fn epoch(&self) -> u64 {
1965        self.epoch
1966    }
1967
1968    /// Returns `true` if this snapshot's epoch matches the given epoch.
1969    ///
1970    /// Use this to validate cursors before continuing pagination.
1971    #[inline]
1972    #[must_use]
1973    pub fn epoch_matches(&self, other_epoch: u64) -> bool {
1974        self.epoch == other_epoch
1975    }
1976
1977    // ------------------------------------------------------------------
1978    // Phase 2 binding-plane facade accessor (P2U07).
1979    // ------------------------------------------------------------------
1980
1981    /// Returns a [`BindingPlane`] facade borrowing this snapshot's lifetime.
1982    ///
1983    /// The facade is the stable Phase 2 public API for scope/alias/shadow
1984    /// queries and witness-bearing resolution. It provides a single entry
1985    /// point (`resolve`) that returns both a `BindingResult` and an ordered
1986    /// step trace in a `BindingResolution`.
1987    ///
1988    /// # MVCC note
1989    ///
1990    /// `BindingPlane<'_>` borrows from this snapshot, which is already an
1991    /// MVCC-consistent view of the graph at snapshot time. Callers from
1992    /// `CodeGraph` or `ConcurrentCodeGraph` should follow the two-line
1993    /// pattern so the snapshot lifetime is explicit:
1994    ///
1995    /// ```rust,ignore
1996    /// // CodeGraph caller:
1997    /// let snapshot = graph.snapshot();
1998    /// let plane = snapshot.binding_plane();
1999    ///
2000    /// // ConcurrentCodeGraph caller:
2001    /// let read_guard = concurrent.read();
2002    /// let snapshot = read_guard.snapshot();
2003    /// let plane = snapshot.binding_plane();
2004    /// ```
2005    #[inline]
2006    #[must_use]
2007    pub fn binding_plane(&self) -> crate::graph::unified::bind::plane::BindingPlane<'_> {
2008        crate::graph::unified::bind::plane::BindingPlane::new(self)
2009    }
2010
2011    // ============================================================================
2012    // Query Methods
2013    // ============================================================================
2014
2015    /// Finds nodes matching a pattern.
2016    ///
2017    /// Performs a simple substring match on node names and qualified names.
2018    /// Returns all matching node IDs.
2019    ///
2020    /// **Synthetic suppression (`C_SUPPRESS`):** synthetic placeholder
2021    /// nodes — internal scaffolding the language plugins emit for
2022    /// binding-plane and scope analysis (e.g. the Go plugin's
2023    /// `<field:operand.field>` field-access shadows and the
2024    /// `<ident>@<offset>` per-binding-site Variable nodes from the
2025    /// local-scope resolver) — are filtered out by default. Internal
2026    /// callers that need to reach these nodes (binding plane, scope /
2027    /// alias / shadow analysis) use
2028    /// [`Self::find_by_pattern_with_options`] with
2029    /// `include_synthetic = true`.
2030    ///
2031    /// # Performance
2032    ///
2033    /// Optimized to iterate over unique strings in the interner (smaller set)
2034    /// rather than all nodes in the arena.
2035    ///
2036    /// # Arguments
2037    ///
2038    /// * `pattern` - The pattern to match (substring search)
2039    ///
2040    /// # Returns
2041    ///
2042    /// A vector of `NodeIds` for all matching nodes (synthetic
2043    /// placeholders excluded).
2044    #[must_use]
2045    pub fn find_by_pattern(&self, pattern: &str) -> Vec<crate::graph::unified::node::NodeId> {
2046        self.find_by_pattern_with_options(pattern, false)
2047    }
2048
2049    /// Finds nodes matching a pattern with explicit control over synthetic
2050    /// placeholder visibility.
2051    ///
2052    /// `include_synthetic = false` is the default surface used by every
2053    /// user-facing caller (CLI `search`, MCP `semantic_search` /
2054    /// `pattern_search` / `relation_query`, etc.). Synthetic
2055    /// placeholders are suppressed via two parallel checks that must
2056    /// agree:
2057    ///
2058    /// 1. The authoritative `NodeFlags::SYNTHETIC` bit on the
2059    ///    metadata store
2060    ///    ([`crate::graph::unified::storage::metadata::NodeMetadataStore::is_synthetic`]).
2061    /// 2. The structural name-shape fallback
2062    ///    ([`crate::graph::unified::storage::arena::NodeEntry::is_synthetic_placeholder_name`])
2063    ///    for V10 snapshots written before the synthetic bit existed
2064    ///    and for cross-file unification losers that retained their
2065    ///    name but lost their metadata entry.
2066    ///
2067    /// Either check matching is sufficient to suppress the node. The
2068    /// design lives in `docs/development/public-issue-triage/`
2069    /// under the `C_SUPPRESS` unit; see also the rationale in
2070    /// [`crate::graph::unified::storage::metadata::NodeFlags::SYNTHETIC`].
2071    ///
2072    /// `include_synthetic = true` is **internal-only**. The binding
2073    /// plane, scope resolver, and rebuild's coverage gate use this
2074    /// path to reach synthetic nodes for their structural integrity
2075    /// checks. **No CLI / MCP surface should ever pass `true`.**
2076    #[must_use]
2077    pub fn find_by_pattern_with_options(
2078        &self,
2079        pattern: &str,
2080        include_synthetic: bool,
2081    ) -> Vec<crate::graph::unified::node::NodeId> {
2082        let mut matches = Vec::new();
2083
2084        // 1. Scan unique strings in interner for matches
2085        for (str_id, s) in self.strings.iter() {
2086            if s.contains(pattern) {
2087                // 2. If string matches, look up all nodes with this name
2088                // Check qualified name index
2089                matches.extend_from_slice(self.indices.by_qualified_name(str_id));
2090                // Check simple name index
2091                matches.extend_from_slice(self.indices.by_name(str_id));
2092            }
2093        }
2094
2095        // Deduplicate matches (a node might match both qualified and simple name)
2096        matches.sort_unstable();
2097        matches.dedup();
2098
2099        if !include_synthetic {
2100            matches.retain(|&node_id| !self.is_node_synthetic(node_id));
2101        }
2102
2103        matches
2104    }
2105
2106    /// Finds nodes whose interned simple **or** qualified name equals `name`,
2107    /// accepting dot- and Ruby-`#` qualified display form as a fallback for
2108    /// graph-canonical `::` qualified names.
2109    ///
2110    /// This is the canonical surface for **exact-name** lookups —
2111    /// shared by the CLI `--exact <pattern>` shorthand
2112    /// (`sqry-cli/src/commands/search.rs::run_regular_search`) and the
2113    /// structural query planner's `name:` predicate
2114    /// (`sqry-db/src/planner/parse.rs`,
2115    /// `sqry-db/src/planner/execute.rs`). Both surfaces are
2116    /// contract-bound (DAG `B1_ALIGN`) to return the same set against
2117    /// any fixture: the CLI calls this method directly, while the
2118    /// planner uses the same interner + by-name index pair internally
2119    /// when scanning, then applies the same synthetic filter.
2120    ///
2121    /// **Synthetic suppression.** Synthetic placeholder nodes
2122    /// (Go-plugin `<field:operand.field>` shadows and
2123    /// `<ident>@<offset>` per-binding-site Variables; see
2124    /// [`Self::find_by_pattern_with_options`] for the full taxonomy)
2125    /// are excluded via [`Self::is_node_synthetic`]. There is **no**
2126    /// `include_synthetic = true` variant for the exact-match surface
2127    /// because the synthetic name shapes the structural fallback
2128    /// recognises (`<…>`, `…@<offset>`) cannot equal a user-typed
2129    /// name byte-for-byte; the metadata-bit channel is the only
2130    /// realistic leak vector and it is suppressed unconditionally.
2131    ///
2132    /// **Display fallback.** Exact lookup checks the literal input. When the
2133    /// input is qualified, language-aware display candidates win over raw
2134    /// canonical matches. This keeps native Rust `identity::T` from also
2135    /// returning a TypeScript type parameter whose internal canonical form is
2136    /// `identity::T` but whose display form is `identity.T`. If neither
2137    /// display nor literal lookup finds candidates, dot- and Ruby-`#`
2138    /// qualified inputs also check the graph-canonical `::` rewrite.
2139    ///
2140    /// # Performance
2141    ///
2142    /// `O(1)` interner lookup + `O(matches)` filter. If `name` is not
2143    /// interned the resolver still tries a native-display to graph-canonical
2144    /// `::` fallback for user-facing qualified names before returning empty.
2145    ///
2146    /// # Arguments
2147    ///
2148    /// * `name` - The exact name to look up (no glob, no regex).
2149    ///
2150    /// # Returns
2151    ///
2152    /// Sorted, deduplicated `NodeId`s for every non-synthetic node
2153    /// whose `entry.name`, `entry.qualified_name`, or language-aware display
2154    /// name equals `name`, or matches for the graph-canonical `::` rewrite
2155    /// when the user supplied a dot- or Ruby-`#` qualified name that had no
2156    /// literal or display candidates.
2157    #[must_use]
2158    pub fn find_by_exact_name(&self, name: &str) -> Vec<crate::graph::unified::node::NodeId> {
2159        let is_qualified = name.contains('.') || name.contains('#') || name.contains("::");
2160        if !is_qualified {
2161            // Bare name lookup: the interned `by_name` index covers
2162            // every language and is O(1); no display scan is needed.
2163            return self.find_by_exact_interned_name(name);
2164        }
2165        // Qualified name: each plugin stores its native form
2166        // (PHP `Ledger.promotedField`, Rust `Ledger::promotedField`,
2167        // Ruby `Ledger#promotedField`). Scan computed display names so
2168        // a single dotted user query resolves to all language matches,
2169        // then fall back to the interned form (and the graph-canonical
2170        // `::` rewrite) when display yields nothing.
2171        let mut exact_matches = self.find_by_exact_display_name(name);
2172        if exact_matches.is_empty() {
2173            exact_matches = self.find_by_exact_interned_name(name);
2174            if exact_matches.is_empty() && !name.contains("::") {
2175                let canonical = name.replace(['.', '#'], "::");
2176                exact_matches.extend(self.find_by_exact_interned_name(&canonical));
2177                exact_matches.sort_unstable();
2178                exact_matches.dedup();
2179            }
2180        }
2181        exact_matches
2182    }
2183
2184    fn find_by_exact_display_name(&self, name: &str) -> Vec<crate::graph::unified::node::NodeId> {
2185        let mut matches = self
2186            .iter_nodes()
2187            .filter(|(node_id, _)| !self.is_node_synthetic(*node_id))
2188            .filter_map(|(node_id, entry)| {
2189                let qualified = entry
2190                    .qualified_name
2191                    .and_then(|sid| self.strings.resolve(sid))?;
2192                let display = self.files.language_for_file(entry.file).map_or_else(
2193                    || qualified.to_string(),
2194                    |language| {
2195                        display_graph_qualified_name(
2196                            language,
2197                            qualified.as_ref(),
2198                            entry.kind,
2199                            entry.is_static,
2200                        )
2201                    },
2202                );
2203                (display == name).then_some(node_id)
2204            })
2205            .collect::<Vec<_>>();
2206        matches.sort_unstable();
2207        matches.dedup();
2208        matches
2209    }
2210
2211    fn find_by_exact_interned_name(&self, name: &str) -> Vec<crate::graph::unified::node::NodeId> {
2212        let Some(str_id) = self.strings.get(name) else {
2213            return Vec::new();
2214        };
2215
2216        let mut matches: Vec<crate::graph::unified::node::NodeId> = Vec::new();
2217        matches.extend_from_slice(self.indices.by_name(str_id));
2218        matches.extend_from_slice(self.indices.by_qualified_name(str_id));
2219        matches.sort_unstable();
2220        matches.dedup();
2221        matches.retain(|&node_id| !self.is_node_synthetic(node_id));
2222        matches
2223    }
2224
2225    /// Returns `true` if the node should be treated as a synthetic
2226    /// placeholder for user-facing surfaces.
2227    ///
2228    /// Combines the metadata-store flag and the structural name-shape
2229    /// fallback (see [`Self::find_by_pattern_with_options`] for the
2230    /// full rationale). Returns `false` for missing nodes (an unknown
2231    /// `NodeId` is not "synthetic" — it is "not present").
2232    #[must_use]
2233    pub fn is_node_synthetic(&self, node_id: crate::graph::unified::node::NodeId) -> bool {
2234        // Authoritative check: metadata-store bit (NodeFlags::SYNTHETIC).
2235        if self.macro_metadata.is_synthetic(node_id) {
2236            return true;
2237        }
2238        // Structural fallback: name shape recognised as synthetic.
2239        // Required for V10 snapshots written before the bit existed,
2240        // for unification losers that lost their metadata entry, and
2241        // as defence-in-depth against future plugins forgetting to
2242        // flip the bit.
2243        let Some(entry) = self.nodes.get(node_id) else {
2244            return false;
2245        };
2246        if entry.is_unified_loser() {
2247            // Already invisible for other reasons; do not also flag as synthetic.
2248            return false;
2249        }
2250        let Some(name) = self.strings.resolve(entry.name) else {
2251            return false;
2252        };
2253        crate::graph::unified::storage::arena::NodeEntry::is_synthetic_placeholder_name(
2254            name.as_ref(),
2255        )
2256    }
2257
2258    /// Gets all callees of a node (functions called by this node).
2259    ///
2260    /// Queries the forward edge store for all Calls edges from this node.
2261    ///
2262    /// # Arguments
2263    ///
2264    /// * `node` - The node ID to query
2265    ///
2266    /// # Returns
2267    ///
2268    /// A vector of `NodeIds` representing functions called by this node.
2269    #[must_use]
2270    pub fn get_callees(
2271        &self,
2272        node: crate::graph::unified::node::NodeId,
2273    ) -> Vec<crate::graph::unified::node::NodeId> {
2274        use crate::graph::unified::edge::EdgeKind;
2275
2276        self.edges
2277            .edges_from(node)
2278            .into_iter()
2279            .filter(|edge| matches!(edge.kind, EdgeKind::Calls { .. }))
2280            .map(|edge| edge.target)
2281            .collect()
2282    }
2283
2284    /// Gets all callers of a node (functions that call this node).
2285    ///
2286    /// Queries the reverse edge store for all Calls edges to this node.
2287    ///
2288    /// # Arguments
2289    ///
2290    /// * `node` - The node ID to query
2291    ///
2292    /// # Returns
2293    ///
2294    /// A vector of `NodeIds` representing functions that call this node.
2295    #[must_use]
2296    pub fn get_callers(
2297        &self,
2298        node: crate::graph::unified::node::NodeId,
2299    ) -> Vec<crate::graph::unified::node::NodeId> {
2300        use crate::graph::unified::edge::EdgeKind;
2301
2302        self.edges
2303            .edges_to(node)
2304            .into_iter()
2305            .filter(|edge| matches!(edge.kind, EdgeKind::Calls { .. }))
2306            .map(|edge| edge.source)
2307            .collect()
2308    }
2309
2310    /// Iterates over all nodes in the graph.
2311    ///
2312    /// Returns an iterator yielding (`NodeId`, &`NodeEntry`) pairs for all
2313    /// occupied slots in the arena.
2314    ///
2315    /// # Returns
2316    ///
2317    /// An iterator over (`NodeId`, &`NodeEntry`) pairs.
2318    pub fn iter_nodes(
2319        &self,
2320    ) -> impl Iterator<
2321        Item = (
2322            crate::graph::unified::node::NodeId,
2323            &crate::graph::unified::storage::arena::NodeEntry,
2324        ),
2325    > {
2326        self.nodes.iter()
2327    }
2328
2329    /// Iterates over all edges in the graph.
2330    ///
2331    /// Returns an iterator yielding (source, target, `EdgeKind`) tuples for
2332    /// all edges in the forward edge store.
2333    ///
2334    /// # Returns
2335    ///
2336    /// An iterator over edge tuples.
2337    pub fn iter_edges(
2338        &self,
2339    ) -> impl Iterator<
2340        Item = (
2341            crate::graph::unified::node::NodeId,
2342            crate::graph::unified::node::NodeId,
2343            crate::graph::unified::edge::EdgeKind,
2344        ),
2345    > + '_ {
2346        // Iterate over all nodes in the arena and get their outgoing edges
2347        self.nodes.iter().flat_map(move |(node_id, _entry)| {
2348            // Get all edges from this node
2349            self.edges
2350                .edges_from(node_id)
2351                .into_iter()
2352                .map(move |edge| (node_id, edge.target, edge.kind))
2353        })
2354    }
2355
2356    /// Gets a node entry by ID.
2357    ///
2358    /// Returns a reference to the `NodeEntry` if the ID is valid, or None
2359    /// if the ID is invalid or stale.
2360    ///
2361    /// # Arguments
2362    ///
2363    /// * `id` - The node ID to look up
2364    ///
2365    /// # Returns
2366    ///
2367    /// A reference to the `NodeEntry`, or None if not found.
2368    #[must_use]
2369    pub fn get_node(
2370        &self,
2371        id: crate::graph::unified::node::NodeId,
2372    ) -> Option<&crate::graph::unified::storage::arena::NodeEntry> {
2373        self.nodes.get(id)
2374    }
2375}
2376
2377impl fmt::Debug for GraphSnapshot {
2378    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2379        f.debug_struct("GraphSnapshot")
2380            .field("nodes", &self.nodes.len())
2381            .field("epoch", &self.epoch)
2382            .finish_non_exhaustive()
2383    }
2384}
2385
2386/// Read-only accessor trait shared by [`CodeGraph`] and [`GraphSnapshot`].
2387///
2388/// This lets helpers that only *read* graph state (name-matching, relation
2389/// traversal, reference lookups) be written once and called from both the
2390/// live `CodeGraph` path in `sqry-core::query::executor::graph_eval` and
2391/// the snapshot-based path in `sqry-db::queries::*`. No mutation is exposed.
2392pub trait GraphAccess {
2393    /// Returns the node arena (read-only).
2394    fn nodes(&self) -> &NodeArena;
2395    /// Returns the bidirectional edge store (read-only).
2396    fn edges(&self) -> &BidirectionalEdgeStore;
2397    /// Returns the string interner (read-only).
2398    fn strings(&self) -> &StringInterner;
2399    /// Returns the file registry (read-only).
2400    fn files(&self) -> &FileRegistry;
2401    /// Returns the auxiliary indices (read-only).
2402    fn indices(&self) -> &AuxiliaryIndices;
2403}
2404
2405impl GraphAccess for CodeGraph {
2406    #[inline]
2407    fn nodes(&self) -> &NodeArena {
2408        CodeGraph::nodes(self)
2409    }
2410    #[inline]
2411    fn edges(&self) -> &BidirectionalEdgeStore {
2412        CodeGraph::edges(self)
2413    }
2414    #[inline]
2415    fn strings(&self) -> &StringInterner {
2416        CodeGraph::strings(self)
2417    }
2418    #[inline]
2419    fn files(&self) -> &FileRegistry {
2420        CodeGraph::files(self)
2421    }
2422    #[inline]
2423    fn indices(&self) -> &AuxiliaryIndices {
2424        CodeGraph::indices(self)
2425    }
2426}
2427
2428impl GraphAccess for GraphSnapshot {
2429    #[inline]
2430    fn nodes(&self) -> &NodeArena {
2431        GraphSnapshot::nodes(self)
2432    }
2433    #[inline]
2434    fn edges(&self) -> &BidirectionalEdgeStore {
2435        GraphSnapshot::edges(self)
2436    }
2437    #[inline]
2438    fn strings(&self) -> &StringInterner {
2439        GraphSnapshot::strings(self)
2440    }
2441    #[inline]
2442    fn files(&self) -> &FileRegistry {
2443        GraphSnapshot::files(self)
2444    }
2445    #[inline]
2446    fn indices(&self) -> &AuxiliaryIndices {
2447        GraphSnapshot::indices(self)
2448    }
2449}
2450
2451#[cfg(test)]
2452mod tests {
2453    use super::*;
2454    use crate::graph::unified::{
2455        FileScope, NodeId, ResolutionMode, SymbolCandidateOutcome, SymbolQuery,
2456        SymbolResolutionOutcome,
2457    };
2458
2459    fn resolve_symbol_strict(snapshot: &GraphSnapshot, symbol: &str) -> Option<NodeId> {
2460        match snapshot.resolve_symbol(&SymbolQuery {
2461            symbol,
2462            file_scope: FileScope::Any,
2463            mode: ResolutionMode::Strict,
2464        }) {
2465            SymbolResolutionOutcome::Resolved(node_id) => Some(node_id),
2466            SymbolResolutionOutcome::NotFound
2467            | SymbolResolutionOutcome::FileNotIndexed
2468            | SymbolResolutionOutcome::Ambiguous(_) => None,
2469        }
2470    }
2471
2472    fn candidate_nodes(snapshot: &GraphSnapshot, symbol: &str) -> Vec<NodeId> {
2473        match snapshot.find_symbol_candidates(&SymbolQuery {
2474            symbol,
2475            file_scope: FileScope::Any,
2476            mode: ResolutionMode::AllowSuffixCandidates,
2477        }) {
2478            SymbolCandidateOutcome::Candidates(candidates) => candidates,
2479            SymbolCandidateOutcome::NotFound | SymbolCandidateOutcome::FileNotIndexed => Vec::new(),
2480        }
2481    }
2482
2483    #[test]
2484    fn test_code_graph_new() {
2485        let graph = CodeGraph::new();
2486        assert_eq!(graph.epoch(), 0);
2487        assert_eq!(graph.nodes().len(), 0);
2488    }
2489
2490    #[test]
2491    fn test_code_graph_default() {
2492        let graph = CodeGraph::default();
2493        assert_eq!(graph.epoch(), 0);
2494    }
2495
2496    #[test]
2497    fn test_code_graph_snapshot() {
2498        let graph = CodeGraph::new();
2499        let snapshot = graph.snapshot();
2500        assert_eq!(snapshot.epoch(), 0);
2501        assert_eq!(snapshot.nodes().len(), 0);
2502    }
2503
2504    #[test]
2505    fn test_code_graph_bump_epoch() {
2506        let mut graph = CodeGraph::new();
2507        assert_eq!(graph.epoch(), 0);
2508        assert_eq!(graph.bump_epoch(), 1);
2509        assert_eq!(graph.epoch(), 1);
2510        assert_eq!(graph.bump_epoch(), 2);
2511        assert_eq!(graph.epoch(), 2);
2512    }
2513
2514    #[test]
2515    fn test_code_graph_set_epoch() {
2516        let mut graph = CodeGraph::new();
2517        graph.set_epoch(42);
2518        assert_eq!(graph.epoch(), 42);
2519    }
2520
2521    #[test]
2522    fn test_code_graph_from_components() {
2523        let nodes = NodeArena::new();
2524        let edges = BidirectionalEdgeStore::new();
2525        let strings = StringInterner::new();
2526        let files = FileRegistry::new();
2527        let indices = AuxiliaryIndices::new();
2528        let macro_metadata = NodeMetadataStore::new();
2529
2530        let graph =
2531            CodeGraph::from_components(nodes, edges, strings, files, indices, macro_metadata);
2532        assert_eq!(graph.epoch(), 0);
2533    }
2534
2535    #[test]
2536    fn test_code_graph_mut_accessors() {
2537        let mut graph = CodeGraph::new();
2538
2539        // Access mutable references - should not panic
2540        let _nodes = graph.nodes_mut();
2541        let _edges = graph.edges_mut();
2542        let _strings = graph.strings_mut();
2543        let _files = graph.files_mut();
2544        let _indices = graph.indices_mut();
2545    }
2546
2547    #[test]
2548    fn test_code_graph_snapshot_isolation() {
2549        let mut graph = CodeGraph::new();
2550        let snapshot1 = graph.snapshot();
2551
2552        // Mutate the graph
2553        graph.bump_epoch();
2554
2555        let snapshot2 = graph.snapshot();
2556
2557        // Snapshots should have different epochs
2558        assert_eq!(snapshot1.epoch(), 0);
2559        assert_eq!(snapshot2.epoch(), 1);
2560    }
2561
2562    #[test]
2563    fn test_code_graph_debug() {
2564        let graph = CodeGraph::new();
2565        let debug_str = format!("{graph:?}");
2566        assert!(debug_str.contains("CodeGraph"));
2567        assert!(debug_str.contains("epoch"));
2568    }
2569
2570    /// Exact-byte regression for the iter2 fix of CodeGraph.confidence
2571    /// accounting: adding a `ConfidenceMetadata` entry with known inner
2572    /// Vec<String> capacities must increase `heap_bytes()` by exactly the sum
2573    /// of those capacities plus Vec<String> slot overhead.
2574    #[test]
2575    fn test_codegraph_heap_bytes_counts_confidence_inner_strings() {
2576        let mut graph = CodeGraph::new();
2577        // Seed one confidence entry so the HashMap has non-zero capacity;
2578        // reserve slack so the next insert cannot rehash.
2579        graph.set_confidence({
2580            let mut m = HashMap::with_capacity(8);
2581            m.insert("seed".to_string(), ConfidenceMetadata::default());
2582            m
2583        });
2584        let before = graph.heap_bytes();
2585        let before_cap = graph.confidence.capacity();
2586
2587        let lim1 = String::from("no type inference");
2588        let lim2 = String::from("no generic specialization");
2589        let feat1 = String::from("rust-analyzer");
2590        let l1 = lim1.capacity();
2591        let l2 = lim2.capacity();
2592        let f1 = feat1.capacity();
2593
2594        let limitations = vec![lim1, lim2];
2595        let lim_vec_cap = limitations.capacity();
2596
2597        let unavailable_features = vec![feat1];
2598        let feat_vec_cap = unavailable_features.capacity();
2599
2600        let key = String::from("rust");
2601        let key_cap = key.capacity();
2602        graph.confidence.insert(
2603            key,
2604            ConfidenceMetadata {
2605                limitations,
2606                unavailable_features,
2607                ..Default::default()
2608            },
2609        );
2610        assert_eq!(
2611            graph.confidence.capacity(),
2612            before_cap,
2613            "prerequisite: confidence HashMap must not rehash during the test insert",
2614        );
2615
2616        let after = graph.heap_bytes();
2617        let expected_inner = key_cap
2618            + lim_vec_cap * std::mem::size_of::<String>()
2619            + l1
2620            + l2
2621            + feat_vec_cap * std::mem::size_of::<String>()
2622            + f1;
2623        assert_eq!(
2624            after - before,
2625            expected_inner,
2626            "CodeGraph::heap_bytes must count ConfidenceMetadata inner Vec<String> capacity exactly",
2627        );
2628    }
2629
2630    #[test]
2631    fn test_codegraph_heap_bytes_grows_with_content() {
2632        use crate::graph::unified::node::NodeKind;
2633        use crate::graph::unified::storage::arena::NodeEntry;
2634        use std::path::Path;
2635
2636        // An empty graph reports some heap bytes (FileRegistry seeds index 0
2637        // with `vec![None]`, HashMaps have non-zero base capacity once touched,
2638        // etc.) but the value must be finite and well under the 100 MB cap.
2639        let empty = CodeGraph::new();
2640        let empty_bytes = empty.heap_bytes();
2641        assert!(
2642            empty_bytes < 100 * 1024 * 1024,
2643            "empty graph heap_bytes should be <100 MiB, got {empty_bytes}"
2644        );
2645
2646        let mut graph = CodeGraph::new();
2647        for i in 0..32u32 {
2648            let name = format!("sym_{i}");
2649            let qual = format!("module::sym_{i}");
2650            let file = format!("file_{i}.rs");
2651
2652            let name_id = graph.strings_mut().intern(&name).unwrap();
2653            let qual_id = graph.strings_mut().intern(&qual).unwrap();
2654            let file_id = graph.files_mut().register(Path::new(&file)).unwrap();
2655
2656            let entry =
2657                NodeEntry::new(NodeKind::Function, name_id, file_id).with_qualified_name(qual_id);
2658            let node_id = graph.nodes_mut().alloc(entry).unwrap();
2659            graph
2660                .indices_mut()
2661                .add(node_id, NodeKind::Function, name_id, Some(qual_id), file_id);
2662        }
2663
2664        let populated_bytes = graph.heap_bytes();
2665        assert!(
2666            populated_bytes > 0,
2667            "populated graph should report nonzero heap bytes"
2668        );
2669        assert!(
2670            populated_bytes > empty_bytes,
2671            "populated graph ({populated_bytes}) should exceed empty graph ({empty_bytes})"
2672        );
2673        assert!(
2674            populated_bytes < 100 * 1024 * 1024,
2675            "test graph heap_bytes should be <100 MiB, got {populated_bytes}"
2676        );
2677    }
2678
2679    #[test]
2680    fn test_concurrent_code_graph_new() {
2681        let graph = ConcurrentCodeGraph::new();
2682        assert_eq!(graph.epoch(), 0);
2683    }
2684
2685    #[test]
2686    fn test_concurrent_code_graph_default() {
2687        let graph = ConcurrentCodeGraph::default();
2688        assert_eq!(graph.epoch(), 0);
2689    }
2690
2691    #[test]
2692    fn test_concurrent_code_graph_from_graph() {
2693        let mut inner = CodeGraph::new();
2694        inner.set_epoch(10);
2695        let graph = ConcurrentCodeGraph::from_graph(inner);
2696        assert_eq!(graph.epoch(), 10);
2697    }
2698
2699    #[test]
2700    fn test_concurrent_code_graph_read() {
2701        let graph = ConcurrentCodeGraph::new();
2702        let guard = graph.read();
2703        assert_eq!(guard.epoch(), 0);
2704        assert_eq!(guard.nodes().len(), 0);
2705    }
2706
2707    #[test]
2708    fn test_concurrent_code_graph_write_increments_epoch() {
2709        let graph = ConcurrentCodeGraph::new();
2710        assert_eq!(graph.epoch(), 0);
2711
2712        {
2713            let guard = graph.write();
2714            assert_eq!(guard.epoch(), 1);
2715        }
2716
2717        assert_eq!(graph.epoch(), 1);
2718
2719        {
2720            let _guard = graph.write();
2721        }
2722
2723        assert_eq!(graph.epoch(), 2);
2724    }
2725
2726    #[test]
2727    fn test_concurrent_code_graph_snapshot() {
2728        let graph = ConcurrentCodeGraph::new();
2729
2730        {
2731            let _guard = graph.write();
2732        }
2733
2734        let snapshot = graph.snapshot();
2735        assert_eq!(snapshot.epoch(), 1);
2736    }
2737
2738    #[test]
2739    fn test_concurrent_code_graph_try_read() {
2740        let graph = ConcurrentCodeGraph::new();
2741        let guard = graph.try_read();
2742        assert!(guard.is_some());
2743    }
2744
2745    #[test]
2746    fn test_concurrent_code_graph_try_write() {
2747        let graph = ConcurrentCodeGraph::new();
2748        let guard = graph.try_write();
2749        assert!(guard.is_some());
2750        assert_eq!(graph.epoch(), 1);
2751    }
2752
2753    #[test]
2754    fn test_concurrent_code_graph_debug() {
2755        let graph = ConcurrentCodeGraph::new();
2756        let debug_str = format!("{graph:?}");
2757        assert!(debug_str.contains("ConcurrentCodeGraph"));
2758        assert!(debug_str.contains("epoch"));
2759    }
2760
2761    #[test]
2762    fn test_graph_snapshot_accessors() {
2763        let graph = CodeGraph::new();
2764        let snapshot = graph.snapshot();
2765
2766        // All accessors should work
2767        let _nodes = snapshot.nodes();
2768        let _edges = snapshot.edges();
2769        let _strings = snapshot.strings();
2770        let _files = snapshot.files();
2771        let _indices = snapshot.indices();
2772        let _epoch = snapshot.epoch();
2773    }
2774
2775    #[test]
2776    fn test_graph_snapshot_epoch_matches() {
2777        let graph = CodeGraph::new();
2778        let snapshot = graph.snapshot();
2779
2780        assert!(snapshot.epoch_matches(0));
2781        assert!(!snapshot.epoch_matches(1));
2782    }
2783
2784    #[test]
2785    fn test_graph_snapshot_clone() {
2786        let graph = CodeGraph::new();
2787        let snapshot1 = graph.snapshot();
2788        let snapshot2 = snapshot1.clone();
2789
2790        assert_eq!(snapshot1.epoch(), snapshot2.epoch());
2791    }
2792
2793    #[test]
2794    fn test_graph_snapshot_debug() {
2795        let graph = CodeGraph::new();
2796        let snapshot = graph.snapshot();
2797        let debug_str = format!("{snapshot:?}");
2798        assert!(debug_str.contains("GraphSnapshot"));
2799        assert!(debug_str.contains("epoch"));
2800    }
2801
2802    #[test]
2803    fn test_multiple_readers() {
2804        let graph = ConcurrentCodeGraph::new();
2805
2806        // Multiple readers should be able to acquire locks simultaneously
2807        let guard1 = graph.read();
2808        let guard2 = graph.read();
2809        let guard3 = graph.read();
2810
2811        assert_eq!(guard1.epoch(), 0);
2812        assert_eq!(guard2.epoch(), 0);
2813        assert_eq!(guard3.epoch(), 0);
2814    }
2815
2816    #[test]
2817    fn test_code_graph_clone() {
2818        let mut graph = CodeGraph::new();
2819        graph.bump_epoch();
2820
2821        let cloned = graph.clone();
2822        assert_eq!(cloned.epoch(), 1);
2823    }
2824
2825    #[test]
2826    fn test_epoch_wrapping() {
2827        let mut graph = CodeGraph::new();
2828        graph.set_epoch(u64::MAX);
2829        let new_epoch = graph.bump_epoch();
2830        assert_eq!(new_epoch, 0); // Should wrap around
2831    }
2832
2833    // ============================================================================
2834    // Query method tests
2835    // ============================================================================
2836
2837    #[test]
2838    fn test_snapshot_resolve_symbol() {
2839        use crate::graph::unified::node::NodeKind;
2840        use crate::graph::unified::storage::arena::NodeEntry;
2841        use std::path::Path;
2842
2843        let mut graph = CodeGraph::new();
2844
2845        // Add some nodes with qualified names
2846        let name_id = graph.strings_mut().intern("test_func").unwrap();
2847        let qual_name_id = graph.strings_mut().intern("module::test_func").unwrap();
2848        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
2849
2850        let entry =
2851            NodeEntry::new(NodeKind::Function, name_id, file_id).with_qualified_name(qual_name_id);
2852
2853        let node_id = graph.nodes_mut().alloc(entry).unwrap();
2854        graph.indices_mut().add(
2855            node_id,
2856            NodeKind::Function,
2857            name_id,
2858            Some(qual_name_id),
2859            file_id,
2860        );
2861
2862        let snapshot = graph.snapshot();
2863
2864        // Find by qualified name
2865        let found = resolve_symbol_strict(&snapshot, "module::test_func");
2866        assert_eq!(found, Some(node_id));
2867
2868        // Find by exact simple name
2869        let found2 = resolve_symbol_strict(&snapshot, "test_func");
2870        assert_eq!(found2, Some(node_id));
2871
2872        // Not found
2873        assert!(resolve_symbol_strict(&snapshot, "nonexistent").is_none());
2874    }
2875
2876    #[test]
2877    fn test_snapshot_find_by_pattern() {
2878        use crate::graph::unified::node::NodeKind;
2879        use crate::graph::unified::storage::arena::NodeEntry;
2880        use std::path::Path;
2881
2882        let mut graph = CodeGraph::new();
2883
2884        // Add nodes with different names
2885        let name1 = graph.strings_mut().intern("foo_bar").unwrap();
2886        let name2 = graph.strings_mut().intern("baz_bar").unwrap();
2887        let name3 = graph.strings_mut().intern("qux_test").unwrap();
2888        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
2889
2890        let node1 = graph
2891            .nodes_mut()
2892            .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
2893            .unwrap();
2894        let node2 = graph
2895            .nodes_mut()
2896            .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
2897            .unwrap();
2898        let node3 = graph
2899            .nodes_mut()
2900            .alloc(NodeEntry::new(NodeKind::Function, name3, file_id))
2901            .unwrap();
2902
2903        graph
2904            .indices_mut()
2905            .add(node1, NodeKind::Function, name1, None, file_id);
2906        graph
2907            .indices_mut()
2908            .add(node2, NodeKind::Function, name2, None, file_id);
2909        graph
2910            .indices_mut()
2911            .add(node3, NodeKind::Function, name3, None, file_id);
2912
2913        let snapshot = graph.snapshot();
2914
2915        // Find by pattern
2916        let matches = snapshot.find_by_pattern("bar");
2917        assert_eq!(matches.len(), 2);
2918        assert!(matches.contains(&node1));
2919        assert!(matches.contains(&node2));
2920
2921        // Find single match
2922        let matches = snapshot.find_by_pattern("qux");
2923        assert_eq!(matches.len(), 1);
2924        assert_eq!(matches[0], node3);
2925
2926        // No matches
2927        let matches = snapshot.find_by_pattern("nonexistent");
2928        assert!(matches.is_empty());
2929    }
2930
2931    #[test]
2932    fn synthetic_nodes_are_filtered_from_find_by_pattern_default() {
2933        // C_SUPPRESS: synthetic placeholder nodes (Go plugin
2934        // `<field:operand.field>` shadows + `<ident>@<offset>`
2935        // per-binding-site Variables) must NOT surface from
2936        // find_by_pattern, but must still be reachable via
2937        // find_by_pattern_with_options(_, true).
2938        use crate::graph::unified::node::NodeKind;
2939        use crate::graph::unified::storage::arena::NodeEntry;
2940        use std::path::Path;
2941
2942        let mut graph = CodeGraph::new();
2943        let real_property = graph
2944            .strings_mut()
2945            .intern("main.SelectorSource.NeedTags")
2946            .unwrap();
2947        let real_local_var = graph.strings_mut().intern("NeedTags").unwrap();
2948        let synthetic_field = graph
2949            .strings_mut()
2950            .intern("<field:selector.NeedTags>")
2951            .unwrap();
2952        let synthetic_offset_a = graph.strings_mut().intern("NeedTags@469").unwrap();
2953        let synthetic_offset_b = graph.strings_mut().intern("NeedTags@508").unwrap();
2954        let file_id = graph.files_mut().register(Path::new("main.go")).unwrap();
2955
2956        let prop_id = graph
2957            .nodes_mut()
2958            .alloc(NodeEntry::new(NodeKind::Property, real_property, file_id))
2959            .unwrap();
2960        let local_var_id = graph
2961            .nodes_mut()
2962            .alloc(NodeEntry::new(NodeKind::Variable, real_local_var, file_id))
2963            .unwrap();
2964        let syn_field_id = graph
2965            .nodes_mut()
2966            .alloc(NodeEntry::new(NodeKind::Variable, synthetic_field, file_id))
2967            .unwrap();
2968        let syn_a_id = graph
2969            .nodes_mut()
2970            .alloc(NodeEntry::new(
2971                NodeKind::Variable,
2972                synthetic_offset_a,
2973                file_id,
2974            ))
2975            .unwrap();
2976        let syn_b_id = graph
2977            .nodes_mut()
2978            .alloc(NodeEntry::new(
2979                NodeKind::Variable,
2980                synthetic_offset_b,
2981                file_id,
2982            ))
2983            .unwrap();
2984
2985        graph
2986            .indices_mut()
2987            .add(prop_id, NodeKind::Property, real_property, None, file_id);
2988        graph.indices_mut().add(
2989            local_var_id,
2990            NodeKind::Variable,
2991            real_local_var,
2992            None,
2993            file_id,
2994        );
2995        graph.indices_mut().add(
2996            syn_field_id,
2997            NodeKind::Variable,
2998            synthetic_field,
2999            None,
3000            file_id,
3001        );
3002        graph.indices_mut().add(
3003            syn_a_id,
3004            NodeKind::Variable,
3005            synthetic_offset_a,
3006            None,
3007            file_id,
3008        );
3009        graph.indices_mut().add(
3010            syn_b_id,
3011            NodeKind::Variable,
3012            synthetic_offset_b,
3013            None,
3014            file_id,
3015        );
3016
3017        // Flag two of them via the metadata-store bit (the canonical
3018        // Go-plugin emission path) and leave one (`<field:...>`) only
3019        // covered by the structural name-shape fallback to verify both
3020        // recognition channels suppress the leak.
3021        graph.macro_metadata_mut().mark_synthetic(syn_a_id);
3022        graph.macro_metadata_mut().mark_synthetic(syn_b_id);
3023
3024        let snapshot = graph.snapshot();
3025
3026        // Default surface (CLI `search --exact`, MCP, LSP): no synthetics.
3027        let matches = snapshot.find_by_pattern("NeedTags");
3028        assert!(matches.contains(&prop_id), "Property must be surfaced");
3029        assert!(
3030            matches.contains(&local_var_id),
3031            "real local var must be surfaced"
3032        );
3033        assert!(
3034            !matches.contains(&syn_field_id),
3035            "<field:...> synthetic must be suppressed (name-shape fallback)"
3036        );
3037        assert!(
3038            !matches.contains(&syn_a_id),
3039            "NeedTags@469 must be suppressed (metadata bit)"
3040        );
3041        assert!(
3042            !matches.contains(&syn_b_id),
3043            "NeedTags@508 must be suppressed (metadata bit)"
3044        );
3045        assert_eq!(matches.len(), 2, "exactly Property + local var, no leakage");
3046
3047        // Internal include-synthetic surface (binding plane, scope analysis):
3048        // every node remains reachable.
3049        let all_matches = snapshot.find_by_pattern_with_options("NeedTags", true);
3050        assert_eq!(
3051            all_matches.len(),
3052            5,
3053            "include_synthetic surfaces everything"
3054        );
3055        assert!(all_matches.contains(&prop_id));
3056        assert!(all_matches.contains(&local_var_id));
3057        assert!(all_matches.contains(&syn_field_id));
3058        assert!(all_matches.contains(&syn_a_id));
3059        assert!(all_matches.contains(&syn_b_id));
3060
3061        // is_node_synthetic exposed for surface-level filters
3062        // (e.g., MCP semantic_search/relation_query post-filters).
3063        assert!(snapshot.is_node_synthetic(syn_field_id));
3064        assert!(snapshot.is_node_synthetic(syn_a_id));
3065        assert!(snapshot.is_node_synthetic(syn_b_id));
3066        assert!(!snapshot.is_node_synthetic(prop_id));
3067        assert!(!snapshot.is_node_synthetic(local_var_id));
3068    }
3069
3070    #[test]
3071    #[allow(clippy::too_many_lines)]
3072    fn find_by_exact_name_aligns_with_planner_name_predicate() {
3073        // B1_ALIGN: `find_by_exact_name("NeedTags")` is the canonical
3074        // surface for the CLI `--exact NeedTags` shorthand and the
3075        // planner's `name:NeedTags` predicate. Both paths must return
3076        // the same set against this fixture.
3077        use crate::graph::unified::node::NodeKind;
3078        use crate::graph::unified::storage::arena::NodeEntry;
3079        use std::path::Path;
3080
3081        let mut graph = CodeGraph::new();
3082        // Property nodes carry the package-qualified name as
3083        // `entry.name` (Go plugin convention; see `helper.rs`'s
3084        // `semantic_name_for_node_input`).
3085        let property_qname = graph
3086            .strings_mut()
3087            .intern("main.SelectorSource.NeedTags")
3088            .unwrap();
3089        let local_var_name = graph.strings_mut().intern("NeedTags").unwrap();
3090        let synthetic_field_name = graph
3091            .strings_mut()
3092            .intern("<field:selector.NeedTags>")
3093            .unwrap();
3094        let synthetic_offset_name = graph.strings_mut().intern("NeedTags@469").unwrap();
3095        let unrelated_name = graph.strings_mut().intern("NeedTagsHelper").unwrap();
3096        let display_fallback_name = graph.strings_mut().intern("Other").unwrap();
3097        let display_fallback_qname = graph
3098            .strings_mut()
3099            .intern("main::SelectorSource::Other")
3100            .unwrap();
3101        let file_id = graph.files_mut().register(Path::new("main.go")).unwrap();
3102
3103        let prop_id = graph
3104            .nodes_mut()
3105            .alloc(NodeEntry::new(NodeKind::Property, property_qname, file_id))
3106            .unwrap();
3107        let local_var_id = graph
3108            .nodes_mut()
3109            .alloc(NodeEntry::new(NodeKind::Variable, local_var_name, file_id))
3110            .unwrap();
3111        let syn_field_id = graph
3112            .nodes_mut()
3113            .alloc(NodeEntry::new(
3114                NodeKind::Variable,
3115                synthetic_field_name,
3116                file_id,
3117            ))
3118            .unwrap();
3119        let syn_offset_id = graph
3120            .nodes_mut()
3121            .alloc(NodeEntry::new(
3122                NodeKind::Variable,
3123                synthetic_offset_name,
3124                file_id,
3125            ))
3126            .unwrap();
3127        let unrelated_id = graph
3128            .nodes_mut()
3129            .alloc(NodeEntry::new(NodeKind::Function, unrelated_name, file_id))
3130            .unwrap();
3131        let display_fallback_id = graph
3132            .nodes_mut()
3133            .alloc(
3134                NodeEntry::new(NodeKind::Property, display_fallback_name, file_id)
3135                    .with_qualified_name(display_fallback_qname),
3136            )
3137            .unwrap();
3138
3139        graph
3140            .indices_mut()
3141            .add(prop_id, NodeKind::Property, property_qname, None, file_id);
3142        graph.indices_mut().add(
3143            local_var_id,
3144            NodeKind::Variable,
3145            local_var_name,
3146            None,
3147            file_id,
3148        );
3149        graph.indices_mut().add(
3150            syn_field_id,
3151            NodeKind::Variable,
3152            synthetic_field_name,
3153            None,
3154            file_id,
3155        );
3156        graph.indices_mut().add(
3157            syn_offset_id,
3158            NodeKind::Variable,
3159            synthetic_offset_name,
3160            None,
3161            file_id,
3162        );
3163        graph.indices_mut().add(
3164            unrelated_id,
3165            NodeKind::Function,
3166            unrelated_name,
3167            None,
3168            file_id,
3169        );
3170        graph.indices_mut().add(
3171            display_fallback_id,
3172            NodeKind::Property,
3173            display_fallback_name,
3174            Some(display_fallback_qname),
3175            file_id,
3176        );
3177
3178        // Mark the offset-suffixed synthetic via the metadata bit so we
3179        // exercise both recognition channels in this fixture.
3180        graph.macro_metadata_mut().mark_synthetic(syn_offset_id);
3181
3182        let snapshot = graph.snapshot();
3183
3184        // Exact-name lookup on "NeedTags" — should pick up only the
3185        // local variable (its `entry.name` is exactly "NeedTags"); it
3186        // must NOT pick up the Property (qualified name contains but
3187        // does not equal "NeedTags") and must NOT pick up either
3188        // synthetic placeholder.
3189        let exact = snapshot.find_by_exact_name("NeedTags");
3190        assert_eq!(
3191            exact,
3192            vec![local_var_id],
3193            "exact match must be byte-for-byte against entry.name / qualified_name and exclude synthetics"
3194        );
3195
3196        // The Property's full qualified name is exact-addressable.
3197        let qualified = snapshot.find_by_exact_name("main.SelectorSource.NeedTags");
3198        assert_eq!(qualified, vec![prop_id]);
3199
3200        // Dot-qualified display form falls back to graph-canonical `::` only
3201        // when the exact dot string was absent.
3202        let display_fallback = snapshot.find_by_exact_name("main.SelectorSource.Other");
3203        assert_eq!(display_fallback, vec![display_fallback_id]);
3204
3205        // Substring-only matches must not surface from exact lookup.
3206        assert!(
3207            snapshot
3208                .find_by_exact_name("NeedTagsHelper")
3209                .contains(&unrelated_id)
3210        );
3211        assert!(
3212            !snapshot
3213                .find_by_exact_name("NeedTags")
3214                .contains(&unrelated_id),
3215            "exact 'NeedTags' must not match 'NeedTagsHelper'"
3216        );
3217
3218        // Unknown name short-circuits to an empty vec without
3219        // scanning any nodes.
3220        assert!(
3221            snapshot
3222                .find_by_exact_name("ThisStringIsNotInterned")
3223                .is_empty()
3224        );
3225    }
3226
3227    #[test]
3228    fn test_snapshot_get_callees() {
3229        use crate::graph::unified::edge::EdgeKind;
3230        use crate::graph::unified::node::NodeKind;
3231        use crate::graph::unified::storage::arena::NodeEntry;
3232        use std::path::Path;
3233
3234        let mut graph = CodeGraph::new();
3235
3236        // Create caller and callee nodes
3237        let caller_name = graph.strings_mut().intern("caller").unwrap();
3238        let callee1_name = graph.strings_mut().intern("callee1").unwrap();
3239        let callee2_name = graph.strings_mut().intern("callee2").unwrap();
3240        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3241
3242        let caller_id = graph
3243            .nodes_mut()
3244            .alloc(NodeEntry::new(NodeKind::Function, caller_name, file_id))
3245            .unwrap();
3246        let callee1_id = graph
3247            .nodes_mut()
3248            .alloc(NodeEntry::new(NodeKind::Function, callee1_name, file_id))
3249            .unwrap();
3250        let callee2_id = graph
3251            .nodes_mut()
3252            .alloc(NodeEntry::new(NodeKind::Function, callee2_name, file_id))
3253            .unwrap();
3254
3255        // Add call edges
3256        graph.edges_mut().add_edge(
3257            caller_id,
3258            callee1_id,
3259            EdgeKind::Calls {
3260                argument_count: 0,
3261                is_async: false,
3262                resolved_via: ResolvedVia::Direct,
3263            },
3264            file_id,
3265        );
3266        graph.edges_mut().add_edge(
3267            caller_id,
3268            callee2_id,
3269            EdgeKind::Calls {
3270                argument_count: 0,
3271                is_async: false,
3272                resolved_via: ResolvedVia::Direct,
3273            },
3274            file_id,
3275        );
3276
3277        let snapshot = graph.snapshot();
3278
3279        // Query callees
3280        let callees = snapshot.get_callees(caller_id);
3281        assert_eq!(callees.len(), 2);
3282        assert!(callees.contains(&callee1_id));
3283        assert!(callees.contains(&callee2_id));
3284
3285        // Node with no callees
3286        let callees = snapshot.get_callees(callee1_id);
3287        assert!(callees.is_empty());
3288    }
3289
3290    #[test]
3291    fn test_snapshot_get_callers() {
3292        use crate::graph::unified::edge::EdgeKind;
3293        use crate::graph::unified::node::NodeKind;
3294        use crate::graph::unified::storage::arena::NodeEntry;
3295        use std::path::Path;
3296
3297        let mut graph = CodeGraph::new();
3298
3299        // Create caller and callee nodes
3300        let caller1_name = graph.strings_mut().intern("caller1").unwrap();
3301        let caller2_name = graph.strings_mut().intern("caller2").unwrap();
3302        let callee_name = graph.strings_mut().intern("callee").unwrap();
3303        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3304
3305        let caller1_id = graph
3306            .nodes_mut()
3307            .alloc(NodeEntry::new(NodeKind::Function, caller1_name, file_id))
3308            .unwrap();
3309        let caller2_id = graph
3310            .nodes_mut()
3311            .alloc(NodeEntry::new(NodeKind::Function, caller2_name, file_id))
3312            .unwrap();
3313        let callee_id = graph
3314            .nodes_mut()
3315            .alloc(NodeEntry::new(NodeKind::Function, callee_name, file_id))
3316            .unwrap();
3317
3318        // Add call edges
3319        graph.edges_mut().add_edge(
3320            caller1_id,
3321            callee_id,
3322            EdgeKind::Calls {
3323                argument_count: 0,
3324                is_async: false,
3325                resolved_via: ResolvedVia::Direct,
3326            },
3327            file_id,
3328        );
3329        graph.edges_mut().add_edge(
3330            caller2_id,
3331            callee_id,
3332            EdgeKind::Calls {
3333                argument_count: 0,
3334                is_async: false,
3335                resolved_via: ResolvedVia::Direct,
3336            },
3337            file_id,
3338        );
3339
3340        let snapshot = graph.snapshot();
3341
3342        // Query callers
3343        let callers = snapshot.get_callers(callee_id);
3344        assert_eq!(callers.len(), 2);
3345        assert!(callers.contains(&caller1_id));
3346        assert!(callers.contains(&caller2_id));
3347
3348        // Node with no callers
3349        let callers = snapshot.get_callers(caller1_id);
3350        assert!(callers.is_empty());
3351    }
3352
3353    #[test]
3354    fn test_snapshot_find_symbol_candidates() {
3355        use crate::graph::unified::node::NodeKind;
3356        use crate::graph::unified::storage::arena::NodeEntry;
3357        use std::path::Path;
3358
3359        let mut graph = CodeGraph::new();
3360
3361        // Add nodes with same symbol name but different qualified names
3362        let symbol_name = graph.strings_mut().intern("test").unwrap();
3363        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3364
3365        let node1 = graph
3366            .nodes_mut()
3367            .alloc(NodeEntry::new(NodeKind::Function, symbol_name, file_id))
3368            .unwrap();
3369        let node2 = graph
3370            .nodes_mut()
3371            .alloc(NodeEntry::new(NodeKind::Method, symbol_name, file_id))
3372            .unwrap();
3373
3374        // Add a different symbol
3375        let other_name = graph.strings_mut().intern("other").unwrap();
3376        let node3 = graph
3377            .nodes_mut()
3378            .alloc(NodeEntry::new(NodeKind::Function, other_name, file_id))
3379            .unwrap();
3380
3381        graph
3382            .indices_mut()
3383            .add(node1, NodeKind::Function, symbol_name, None, file_id);
3384        graph
3385            .indices_mut()
3386            .add(node2, NodeKind::Method, symbol_name, None, file_id);
3387        graph
3388            .indices_mut()
3389            .add(node3, NodeKind::Function, other_name, None, file_id);
3390
3391        let snapshot = graph.snapshot();
3392
3393        // Find by symbol
3394        let matches = candidate_nodes(&snapshot, "test");
3395        assert_eq!(matches.len(), 2);
3396        assert!(matches.contains(&node1));
3397        assert!(matches.contains(&node2));
3398
3399        // Find other symbol
3400        let matches = candidate_nodes(&snapshot, "other");
3401        assert_eq!(matches.len(), 1);
3402        assert_eq!(matches[0], node3);
3403
3404        // No matches
3405        let matches = candidate_nodes(&snapshot, "nonexistent");
3406        assert!(matches.is_empty());
3407    }
3408
3409    #[test]
3410    fn test_snapshot_iter_nodes() {
3411        use crate::graph::unified::node::NodeKind;
3412        use crate::graph::unified::storage::arena::NodeEntry;
3413        use std::path::Path;
3414
3415        let mut graph = CodeGraph::new();
3416
3417        // Add some nodes
3418        let name1 = graph.strings_mut().intern("func1").unwrap();
3419        let name2 = graph.strings_mut().intern("func2").unwrap();
3420        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3421
3422        let node1 = graph
3423            .nodes_mut()
3424            .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
3425            .unwrap();
3426        let node2 = graph
3427            .nodes_mut()
3428            .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
3429            .unwrap();
3430
3431        let snapshot = graph.snapshot();
3432
3433        // Iterate nodes
3434        let snapshot_nodes: Vec<_> = snapshot.iter_nodes().collect();
3435        assert_eq!(snapshot_nodes.len(), 2);
3436
3437        let node_ids: Vec<_> = snapshot_nodes.iter().map(|(id, _)| *id).collect();
3438        assert!(node_ids.contains(&node1));
3439        assert!(node_ids.contains(&node2));
3440    }
3441
3442    #[test]
3443    fn test_snapshot_iter_edges() {
3444        use crate::graph::unified::edge::EdgeKind;
3445        use crate::graph::unified::node::NodeKind;
3446        use crate::graph::unified::storage::arena::NodeEntry;
3447        use std::path::Path;
3448
3449        let mut graph = CodeGraph::new();
3450
3451        // Create nodes
3452        let name1 = graph.strings_mut().intern("func1").unwrap();
3453        let name2 = graph.strings_mut().intern("func2").unwrap();
3454        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3455
3456        let node1 = graph
3457            .nodes_mut()
3458            .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
3459            .unwrap();
3460        let node2 = graph
3461            .nodes_mut()
3462            .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
3463            .unwrap();
3464
3465        // Add edges
3466        graph.edges_mut().add_edge(
3467            node1,
3468            node2,
3469            EdgeKind::Calls {
3470                argument_count: 0,
3471                is_async: false,
3472                resolved_via: ResolvedVia::Direct,
3473            },
3474            file_id,
3475        );
3476
3477        let snapshot = graph.snapshot();
3478
3479        // Iterate edges
3480        let edges: Vec<_> = snapshot.iter_edges().collect();
3481        assert_eq!(edges.len(), 1);
3482
3483        let (src, tgt, kind) = &edges[0];
3484        assert_eq!(*src, node1);
3485        assert_eq!(*tgt, node2);
3486        assert!(matches!(
3487            kind,
3488            EdgeKind::Calls {
3489                argument_count: 0,
3490                is_async: false,
3491                resolved_via: ResolvedVia::Direct,
3492            }
3493        ));
3494    }
3495
3496    #[test]
3497    fn test_snapshot_get_node() {
3498        use crate::graph::unified::node::NodeId;
3499        use crate::graph::unified::node::NodeKind;
3500        use crate::graph::unified::storage::arena::NodeEntry;
3501        use std::path::Path;
3502
3503        let mut graph = CodeGraph::new();
3504
3505        // Add a node
3506        let name = graph.strings_mut().intern("test_func").unwrap();
3507        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3508
3509        let node_id = graph
3510            .nodes_mut()
3511            .alloc(NodeEntry::new(NodeKind::Function, name, file_id))
3512            .unwrap();
3513
3514        let snapshot = graph.snapshot();
3515
3516        // Get node
3517        let entry = snapshot.get_node(node_id);
3518        assert!(entry.is_some());
3519        assert_eq!(entry.unwrap().kind, NodeKind::Function);
3520
3521        // Invalid node
3522        let invalid_id = NodeId::INVALID;
3523        assert!(snapshot.get_node(invalid_id).is_none());
3524    }
3525
3526    #[test]
3527    fn test_snapshot_query_empty_graph() {
3528        use crate::graph::unified::node::NodeId;
3529
3530        let graph = CodeGraph::new();
3531        let snapshot = graph.snapshot();
3532
3533        // All queries should return empty on empty graph
3534        assert!(resolve_symbol_strict(&snapshot, "test").is_none());
3535        assert!(snapshot.find_by_pattern("test").is_empty());
3536        assert!(candidate_nodes(&snapshot, "test").is_empty());
3537
3538        let dummy_id = NodeId::new(0, 1);
3539        assert!(snapshot.get_callees(dummy_id).is_empty());
3540        assert!(snapshot.get_callers(dummy_id).is_empty());
3541
3542        assert_eq!(snapshot.iter_nodes().count(), 0);
3543        assert_eq!(snapshot.iter_edges().count(), 0);
3544    }
3545
3546    #[test]
3547    fn test_snapshot_edge_filtering_by_kind() {
3548        use crate::graph::unified::edge::EdgeKind;
3549        use crate::graph::unified::node::NodeKind;
3550        use crate::graph::unified::storage::arena::NodeEntry;
3551        use std::path::Path;
3552
3553        let mut graph = CodeGraph::new();
3554
3555        // Create nodes
3556        let name1 = graph.strings_mut().intern("func1").unwrap();
3557        let name2 = graph.strings_mut().intern("func2").unwrap();
3558        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3559
3560        let node1 = graph
3561            .nodes_mut()
3562            .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
3563            .unwrap();
3564        let node2 = graph
3565            .nodes_mut()
3566            .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
3567            .unwrap();
3568
3569        // Add different kinds of edges
3570        graph.edges_mut().add_edge(
3571            node1,
3572            node2,
3573            EdgeKind::Calls {
3574                argument_count: 0,
3575                is_async: false,
3576                resolved_via: ResolvedVia::Direct,
3577            },
3578            file_id,
3579        );
3580        graph
3581            .edges_mut()
3582            .add_edge(node1, node2, EdgeKind::References, file_id);
3583
3584        let snapshot = graph.snapshot();
3585
3586        // get_callees should only return Calls edges
3587        let callees = snapshot.get_callees(node1);
3588        assert_eq!(callees.len(), 1);
3589        assert_eq!(callees[0], node2);
3590
3591        // iter_edges returns all edges regardless of kind
3592        let edges: Vec<_> = snapshot.iter_edges().collect();
3593        assert_eq!(edges.len(), 2);
3594    }
3595
3596    // -------- reverse_import_index tests --------
3597
3598    /// Helper: build an empty graph with the given file paths registered, a
3599    /// single placeholder node allocated per file, and indices rebuilt so
3600    /// `by_file` returns the per-file node sets. Returns the file IDs in
3601    /// the order passed in and the per-file node ID.
3602    #[cfg(test)]
3603    fn build_import_test_graph(files: &[&str]) -> (CodeGraph, Vec<FileId>, Vec<NodeId>) {
3604        use crate::graph::unified::node::NodeKind;
3605        use crate::graph::unified::storage::arena::NodeEntry;
3606        use std::path::Path;
3607
3608        let mut graph = CodeGraph::new();
3609        let placeholder_name = graph.strings_mut().intern("sym").unwrap();
3610        let mut file_ids = Vec::with_capacity(files.len());
3611        let mut node_ids = Vec::with_capacity(files.len());
3612        for path in files {
3613            let file_id = graph.files_mut().register(Path::new(path)).unwrap();
3614            let node_id = graph
3615                .nodes_mut()
3616                .alloc(NodeEntry::new(
3617                    NodeKind::Function,
3618                    placeholder_name,
3619                    file_id,
3620                ))
3621                .unwrap();
3622            file_ids.push(file_id);
3623            node_ids.push(node_id);
3624        }
3625        graph.rebuild_indices();
3626        (graph, file_ids, node_ids)
3627    }
3628
3629    /// Helper: add an `Imports` edge from `source_node` (in importer file) to
3630    /// `target_node` (in exporter file). The edge is recorded against the
3631    /// importer file so `clear_file` cleanup would behave identically to
3632    /// production Pass 4 writes.
3633    #[cfg(test)]
3634    fn add_import_edge(
3635        graph: &mut CodeGraph,
3636        source_node: NodeId,
3637        target_node: NodeId,
3638        importer_file: FileId,
3639    ) {
3640        graph.edges_mut().add_edge(
3641            source_node,
3642            target_node,
3643            EdgeKind::Imports {
3644                alias: None,
3645                is_wildcard: false,
3646            },
3647            importer_file,
3648        );
3649    }
3650
3651    #[test]
3652    fn reverse_import_index_empty_graph_returns_empty() {
3653        let (graph, files, _) = build_import_test_graph(&["only.rs"]);
3654        assert!(graph.reverse_import_index(files[0]).is_empty());
3655    }
3656
3657    #[test]
3658    fn reverse_import_index_single_importer() {
3659        // File A imports a symbol exported by file B. Reverse index of B
3660        // should return exactly [A]; reverse index of A should be empty.
3661        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3662        let (a, b) = (files[0], files[1]);
3663        add_import_edge(&mut graph, nodes[0], nodes[1], a);
3664
3665        let importers_of_b = graph.reverse_import_index(b);
3666        assert_eq!(importers_of_b, vec![a]);
3667
3668        let importers_of_a = graph.reverse_import_index(a);
3669        assert!(
3670            importers_of_a.is_empty(),
3671            "A has no inbound Imports edges; reverse index must be empty"
3672        );
3673    }
3674
3675    #[test]
3676    fn reverse_import_index_multiple_importers_deduped_and_sorted() {
3677        // A, B, C all import from D. Reverse index of D must contain each
3678        // importer exactly once, sorted ascending by FileId.
3679        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs", "c.rs", "d.rs"]);
3680        let (a, b, c, d) = (files[0], files[1], files[2], files[3]);
3681        add_import_edge(&mut graph, nodes[0], nodes[3], a);
3682        add_import_edge(&mut graph, nodes[1], nodes[3], b);
3683        add_import_edge(&mut graph, nodes[2], nodes[3], c);
3684        // Add a duplicate edge from A to D to confirm dedup behavior.
3685        add_import_edge(&mut graph, nodes[0], nodes[3], a);
3686
3687        let importers_of_d = graph.reverse_import_index(d);
3688        assert_eq!(importers_of_d, vec![a, b, c]);
3689        // Sort invariant: Vec is ascending by raw index.
3690        let mut sorted = importers_of_d.clone();
3691        sorted.sort();
3692        assert_eq!(importers_of_d, sorted);
3693    }
3694
3695    #[test]
3696    fn reverse_import_index_filters_non_import_edges() {
3697        // A `Calls` edge from A into B must not contribute to B's reverse
3698        // import index. Only `EdgeKind::Imports` edges count.
3699        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3700        let (a, b) = (files[0], files[1]);
3701        graph.edges_mut().add_edge(
3702            nodes[0],
3703            nodes[1],
3704            EdgeKind::Calls {
3705                argument_count: 0,
3706                is_async: false,
3707                resolved_via: ResolvedVia::Direct,
3708            },
3709            a,
3710        );
3711        graph
3712            .edges_mut()
3713            .add_edge(nodes[0], nodes[1], EdgeKind::References, a);
3714
3715        assert!(
3716            graph.reverse_import_index(b).is_empty(),
3717            "non-Imports edges must not register as importers"
3718        );
3719    }
3720
3721    #[test]
3722    fn reverse_import_index_elides_self_imports() {
3723        // An Imports edge whose source and target are both in the same file
3724        // is a self-import; the caller's own file must not appear in its own
3725        // reverse index.
3726        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs"]);
3727        let a = files[0];
3728        // Add a second node in the same file so we have a distinct source.
3729        let name2 = graph.strings_mut().intern("sym2").unwrap();
3730        let second_in_a = graph
3731            .nodes_mut()
3732            .alloc(crate::graph::unified::storage::arena::NodeEntry::new(
3733                crate::graph::unified::node::NodeKind::Function,
3734                name2,
3735                a,
3736            ))
3737            .unwrap();
3738        graph.rebuild_indices();
3739        add_import_edge(&mut graph, second_in_a, nodes[0], a);
3740
3741        assert!(
3742            graph.reverse_import_index(a).is_empty(),
3743            "self-imports must be elided from reverse index"
3744        );
3745    }
3746
3747    #[test]
3748    fn reverse_import_index_mixed_edge_kinds_counts_only_imports() {
3749        // Two files: A has both Calls and Imports edges into B. Reverse
3750        // index must return exactly [A] — the Calls edge contributes
3751        // nothing.
3752        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3753        let (a, b) = (files[0], files[1]);
3754        add_import_edge(&mut graph, nodes[0], nodes[1], a);
3755        graph.edges_mut().add_edge(
3756            nodes[0],
3757            nodes[1],
3758            EdgeKind::Calls {
3759                argument_count: 0,
3760                is_async: false,
3761                resolved_via: ResolvedVia::Direct,
3762            },
3763            a,
3764        );
3765
3766        assert_eq!(graph.reverse_import_index(b), vec![a]);
3767    }
3768
3769    #[test]
3770    fn reverse_import_index_uninitialized_file_returns_empty() {
3771        // Querying a FileId that is not registered in the graph must return
3772        // an empty Vec, not panic.
3773        let (graph, _, _) = build_import_test_graph(&["a.rs"]);
3774        let bogus = FileId::new(9999);
3775        assert!(
3776            graph.reverse_import_index(bogus).is_empty(),
3777            "unknown FileId must return empty Vec without panicking"
3778        );
3779    }
3780
3781    #[test]
3782    fn reverse_import_index_skips_tombstoned_source_nodes() {
3783        // In the incremental rebuild path the prior graph retains tombstoned
3784        // arena slots for nodes belonging to closure files that have been
3785        // removed but not yet fully compacted. reverse_import_index is
3786        // called on that prior graph to widen the closure, so its
3787        // tombstone guard (`let Some(source_entry) = self.nodes.get(...)`)
3788        // is a semantically important branch, not a theoretical one.
3789        // This test exercises it directly by tombstoning the source node of
3790        // an Imports edge and asserting the edge silently disappears from
3791        // the reverse index.
3792        let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3793        let (a, b) = (files[0], files[1]);
3794        add_import_edge(&mut graph, nodes[0], nodes[1], a);
3795        // Sanity: before tombstoning, A shows up as the importer of B.
3796        assert_eq!(graph.reverse_import_index(b), vec![a]);
3797        // Tombstone the source node via the arena (generation bump). The
3798        // Imports edge still exists in the edge store but its source NodeId
3799        // is now stale; `nodes.get(edge_ref.source)` returns None and the
3800        // guard skips the edge.
3801        let removed = graph.nodes_mut().remove(nodes[0]);
3802        assert!(
3803            removed.is_some(),
3804            "arena.remove must succeed for a live node"
3805        );
3806        assert!(
3807            graph.nodes().get(nodes[0]).is_none(),
3808            "tombstoned lookup must return None"
3809        );
3810        assert!(
3811            graph.reverse_import_index(b).is_empty(),
3812            "Imports edges whose source is tombstoned must be silently skipped"
3813        );
3814    }
3815
3816    // ------------------------------------------------------------------
3817    // Task 4 Step 2 — CodeGraph::remove_file
3818    // ------------------------------------------------------------------
3819
3820    /// Seed a graph with 2 files × `per_file` nodes per file, plus a set
3821    /// of intra- and inter-file edges. Each call site produces a
3822    /// canonical topology so tests below can assert bit-level on edge
3823    /// survival. Returns `(graph, file_a, file_b, file_a_nodes,
3824    /// file_b_nodes)`.
3825    fn seed_two_file_graph(
3826        per_file: usize,
3827    ) -> (
3828        CodeGraph,
3829        crate::graph::unified::file::FileId,
3830        crate::graph::unified::file::FileId,
3831        Vec<NodeId>,
3832        Vec<NodeId>,
3833    ) {
3834        use crate::graph::unified::edge::EdgeKind;
3835        use crate::graph::unified::node::NodeKind;
3836        use crate::graph::unified::storage::arena::NodeEntry;
3837        use std::path::Path;
3838
3839        let mut graph = CodeGraph::new();
3840        let sym = graph.strings_mut().intern("sym").expect("intern");
3841        let file_a = graph
3842            .files_mut()
3843            .register(Path::new("/tmp/remove_file_test/a.rs"))
3844            .expect("register a");
3845        let file_b = graph
3846            .files_mut()
3847            .register(Path::new("/tmp/remove_file_test/b.rs"))
3848            .expect("register b");
3849
3850        let mut file_a_nodes = Vec::with_capacity(per_file);
3851        let mut file_b_nodes = Vec::with_capacity(per_file);
3852
3853        for _ in 0..per_file {
3854            let n = graph
3855                .nodes_mut()
3856                .alloc(NodeEntry::new(NodeKind::Function, sym, file_a))
3857                .expect("alloc a-node");
3858            file_a_nodes.push(n);
3859            graph.files_mut().record_node(file_a, n);
3860            graph
3861                .indices_mut()
3862                .add(n, NodeKind::Function, sym, None, file_a);
3863        }
3864        for _ in 0..per_file {
3865            let n = graph
3866                .nodes_mut()
3867                .alloc(NodeEntry::new(NodeKind::Function, sym, file_b))
3868                .expect("alloc b-node");
3869            file_b_nodes.push(n);
3870            graph.files_mut().record_node(file_b, n);
3871            graph
3872                .indices_mut()
3873                .add(n, NodeKind::Function, sym, None, file_b);
3874        }
3875
3876        // Intra-file edges inside each file: pairwise a[i] -> a[i+1],
3877        // b[i] -> b[i+1]. These are the ones that must die when the
3878        // corresponding file is removed.
3879        for i in 0..per_file.saturating_sub(1) {
3880            graph.edges_mut().add_edge(
3881                file_a_nodes[i],
3882                file_a_nodes[i + 1],
3883                EdgeKind::Calls {
3884                    argument_count: 0,
3885                    is_async: false,
3886                    resolved_via: ResolvedVia::Direct,
3887                },
3888                file_a,
3889            );
3890            graph.edges_mut().add_edge(
3891                file_b_nodes[i],
3892                file_b_nodes[i + 1],
3893                EdgeKind::Calls {
3894                    argument_count: 0,
3895                    is_async: false,
3896                    resolved_via: ResolvedVia::Direct,
3897                },
3898                file_b,
3899            );
3900        }
3901        // Cross-file edges: a[0] -> b[0], b[0] -> a[0]. Both must die
3902        // when *either* endpoint's file is removed (plan §F.2).
3903        graph.edges_mut().add_edge(
3904            file_a_nodes[0],
3905            file_b_nodes[0],
3906            EdgeKind::Calls {
3907                argument_count: 0,
3908                is_async: false,
3909                resolved_via: ResolvedVia::Direct,
3910            },
3911            file_a,
3912        );
3913        graph.edges_mut().add_edge(
3914            file_b_nodes[0],
3915            file_a_nodes[0],
3916            EdgeKind::Calls {
3917                argument_count: 0,
3918                is_async: false,
3919                resolved_via: ResolvedVia::Direct,
3920            },
3921            file_b,
3922        );
3923
3924        (graph, file_a, file_b, file_a_nodes, file_b_nodes)
3925    }
3926
3927    #[test]
3928    fn code_graph_remove_file_tombstones_all_per_file_nodes() {
3929        let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
3930
3931        let returned = graph.remove_file(file_a);
3932
3933        // Returned list must equal the original per-file bucket
3934        // membership, deterministically.
3935        let returned_set: std::collections::HashSet<NodeId> = returned.iter().copied().collect();
3936        let expected_set: std::collections::HashSet<NodeId> =
3937            file_a_nodes.iter().copied().collect();
3938        assert_eq!(
3939            returned_set, expected_set,
3940            "remove_file must return exactly the file_a nodes drained from the bucket"
3941        );
3942
3943        // Every returned NodeId is gone from the arena.
3944        for nid in &file_a_nodes {
3945            assert!(
3946                graph.nodes().get(*nid).is_none(),
3947                "node {nid:?} from removed file must be tombstoned in arena"
3948            );
3949        }
3950    }
3951
3952    #[test]
3953    fn code_graph_remove_file_invalidates_all_edges_sourced_or_targeted_at_removed_nodes() {
3954        use crate::graph::unified::edge::EdgeKind;
3955
3956        let (mut graph, file_a, _file_b, file_a_nodes, file_b_nodes) = seed_two_file_graph(3);
3957
3958        // Sanity: the seed has 2 intra-A edges, 2 intra-B edges, plus
3959        // 2 cross-file edges (a0↔b0).
3960        let before_delta = graph.edges().stats().forward.delta_edge_count;
3961        assert_eq!(
3962            before_delta, 6,
3963            "seed must produce 2 intra-A + 2 intra-B + 2 cross edges"
3964        );
3965
3966        let _ = graph.remove_file(file_a);
3967
3968        // Forward delta after removal: all 2 intra-A edges are gone,
3969        // both cross edges are gone, only the 2 intra-B edges remain.
3970        let after_delta_forward = graph.edges().stats().forward.delta_edge_count;
3971        assert_eq!(
3972            after_delta_forward, 2,
3973            "only intra-B forward edges must remain after removing file_a"
3974        );
3975        let after_delta_reverse = graph.edges().stats().reverse.delta_edge_count;
3976        assert_eq!(
3977            after_delta_reverse, 2,
3978            "only intra-B reverse edges must remain after removing file_a"
3979        );
3980
3981        // Cross-file edge from b[0] -> a[0] must no longer be visible
3982        // from either direction, because a[0] is tombstoned.
3983        let b0 = file_b_nodes[0];
3984        let a0 = file_a_nodes[0];
3985        let remaining_from_b0: Vec<_> = graph
3986            .edges()
3987            .edges_from(b0)
3988            .into_iter()
3989            .filter(|e| {
3990                matches!(
3991                    e.kind,
3992                    EdgeKind::Calls {
3993                        argument_count: 0,
3994                        is_async: false,
3995                        resolved_via: ResolvedVia::Direct,
3996                    }
3997                )
3998            })
3999            .collect();
4000        assert!(
4001            !remaining_from_b0.iter().any(|e| e.target == a0),
4002            "edge b0 -> a0 must be gone after remove_file(file_a)"
4003        );
4004        let remaining_to_a0: Vec<_> = graph.edges().edges_to(a0).into_iter().collect();
4005        assert!(
4006            remaining_to_a0.is_empty(),
4007            "every edge targeting the tombstoned a0 must be gone"
4008        );
4009    }
4010
4011    #[test]
4012    fn code_graph_remove_file_drops_file_registry_entry() {
4013        let (mut graph, file_a, _file_b, _, _) = seed_two_file_graph(2);
4014
4015        assert!(
4016            graph.files().resolve(file_a).is_some(),
4017            "seed registered file_a"
4018        );
4019        assert!(
4020            !graph.files().nodes_for_file(file_a).is_empty(),
4021            "seed populated the file_a bucket"
4022        );
4023
4024        let _ = graph.remove_file(file_a);
4025
4026        assert!(
4027            graph.files().resolve(file_a).is_none(),
4028            "FileRegistry::resolve must return None after remove_file"
4029        );
4030        assert!(
4031            graph.files().nodes_for_file(file_a).is_empty(),
4032            "per-file bucket for file_a must be drained"
4033        );
4034    }
4035
4036    #[test]
4037    fn code_graph_remove_file_is_idempotent_on_unknown_file() {
4038        use crate::graph::unified::file::FileId;
4039        let (mut graph, _file_a, _file_b, _, _) = seed_two_file_graph(2);
4040
4041        // Snapshot state before idempotent no-op.
4042        let nodes_before = graph.nodes().len();
4043        let delta_fwd_before = graph.edges().stats().forward.delta_edge_count;
4044        let delta_rev_before = graph.edges().stats().reverse.delta_edge_count;
4045        let files_before = graph.files().len();
4046
4047        // A FileId that was never registered: the caller may legitimately
4048        // receive a bogus id from stale indexing state. `remove_file`
4049        // must be a silent no-op.
4050        let bogus = FileId::new(9999);
4051        let returned = graph.remove_file(bogus);
4052        assert!(
4053            returned.is_empty(),
4054            "remove_file on unknown FileId must return an empty Vec"
4055        );
4056
4057        assert_eq!(graph.nodes().len(), nodes_before, "arena count unchanged");
4058        assert_eq!(
4059            graph.edges().stats().forward.delta_edge_count,
4060            delta_fwd_before,
4061            "forward delta unchanged"
4062        );
4063        assert_eq!(
4064            graph.edges().stats().reverse.delta_edge_count,
4065            delta_rev_before,
4066            "reverse delta unchanged"
4067        );
4068        assert_eq!(graph.files().len(), files_before, "file count unchanged");
4069    }
4070
4071    #[test]
4072    fn code_graph_remove_file_clears_file_segments_entry() {
4073        // Iter-1 Codex review fix: a file's `FileSegmentTable` entry
4074        // must be cleared on `remove_file`. Without this, a later
4075        // `FileId` recycle (via `FileRegistry::free_list`) would
4076        // inherit the previous file's stale node-range and
4077        // `reindex_files` (`build/reindex.rs`) would tombstone the
4078        // wrong slots. This unit test seeds a segment entry, removes
4079        // the file, and asserts the entry is gone.
4080        use crate::graph::unified::storage::segment::FileSegmentTable;
4081
4082        let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
4083
4084        // Seed a segment for file A. Production code sets this via
4085        // Phase 3 parallel commit; here we go through the crate-
4086        // internal accessor because we are a unit test inside the
4087        // crate. (The feature-gated `test_only_record_file_segment`
4088        // public helper exists for the integration tests in
4089        // `sqry-core/tests/incremental_remove_file_scale.rs`.)
4090        let first_index = file_a_nodes
4091            .iter()
4092            .map(|n| n.index())
4093            .min()
4094            .expect("per_file = 3");
4095        let last_index = file_a_nodes
4096            .iter()
4097            .map(|n| n.index())
4098            .max()
4099            .expect("per_file = 3");
4100        let slot_count = last_index - first_index + 1;
4101        let table: &mut FileSegmentTable = graph.file_segments_mut();
4102        table.record_range(file_a, first_index, slot_count);
4103        assert!(
4104            graph.file_segments().get(file_a).is_some(),
4105            "seed must install a segment for file_a before remove_file"
4106        );
4107
4108        // Remove the file.
4109        let _ = graph.remove_file(file_a);
4110
4111        // The segment entry must be gone. `FileSegmentTable::remove`
4112        // is idempotent (a no-op on unknown ids), so this assertion
4113        // holds whether or not the seeded range was contiguous.
4114        assert!(
4115            graph.file_segments().get(file_a).is_none(),
4116            "remove_file must clear the FileSegmentTable entry for file_a"
4117        );
4118    }
4119
4120    #[test]
4121    fn code_graph_remove_file_repeated_calls_are_idempotent() {
4122        let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
4123
4124        // First call does the work.
4125        let first = graph.remove_file(file_a);
4126        assert_eq!(first.len(), file_a_nodes.len());
4127
4128        // Snapshot post-first-call state.
4129        let nodes_after = graph.nodes().len();
4130        let delta_fwd_after = graph.edges().stats().forward.delta_edge_count;
4131        let delta_rev_after = graph.edges().stats().reverse.delta_edge_count;
4132        let files_after = graph.files().len();
4133
4134        // Second call must be a silent no-op — the bucket is empty,
4135        // the file is unregistered, and the arena slots are already
4136        // tombstoned (NodeArena::remove ignores stale generations).
4137        let second = graph.remove_file(file_a);
4138        assert!(
4139            second.is_empty(),
4140            "second remove_file on the same file must return an empty Vec"
4141        );
4142
4143        assert_eq!(graph.nodes().len(), nodes_after);
4144        assert_eq!(
4145            graph.edges().stats().forward.delta_edge_count,
4146            delta_fwd_after
4147        );
4148        assert_eq!(
4149            graph.edges().stats().reverse.delta_edge_count,
4150            delta_rev_after
4151        );
4152        assert_eq!(graph.files().len(), files_after);
4153    }
4154}