sqry_core/graph/unified/concurrent/graph.rs
1//! `CodeGraph` and `ConcurrentCodeGraph` implementations.
2//!
3//! This module provides the core graph types with thread-safe access:
4//!
5//! - [`CodeGraph`]: Arc-wrapped internals for O(1) `CoW` snapshots
6//! - [`ConcurrentCodeGraph`]: `RwLock` wrapper with epoch versioning
7//! - [`GraphSnapshot`]: Immutable snapshot for long-running queries
8
9use std::collections::{HashMap, HashSet};
10use std::fmt;
11use std::sync::Arc;
12use std::sync::atomic::{AtomicU64, Ordering};
13
14use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard};
15
16use crate::confidence::ConfidenceMetadata;
17use crate::graph::unified::bind::alias::AliasTable;
18use crate::graph::unified::bind::scope::provenance::{
19 ScopeProvenance, ScopeProvenanceStore, ScopeStableId,
20};
21use crate::graph::unified::bind::scope::{ScopeArena, ScopeId};
22use crate::graph::unified::bind::shadow::ShadowTable;
23use crate::graph::unified::edge::EdgeKind;
24#[cfg(test)]
25use crate::graph::unified::edge::ResolvedVia;
26use crate::graph::unified::edge::bidirectional::BidirectionalEdgeStore;
27use crate::graph::unified::file::FileId;
28use crate::graph::unified::memory::{GraphMemorySize, HASHMAP_ENTRY_OVERHEAD};
29use crate::graph::unified::resolution::display_graph_qualified_name;
30use crate::graph::unified::storage::arena::NodeArena;
31use crate::graph::unified::storage::c_indirect::CIndirectSideTables;
32use crate::graph::unified::storage::edge_provenance::{EdgeProvenance, EdgeProvenanceStore};
33use crate::graph::unified::storage::indices::AuxiliaryIndices;
34use crate::graph::unified::storage::interner::StringInterner;
35use crate::graph::unified::storage::metadata::NodeMetadataStore;
36use crate::graph::unified::storage::node_provenance::{NodeProvenance, NodeProvenanceStore};
37use crate::graph::unified::storage::registry::{FileProvenanceView, FileRegistry};
38use crate::graph::unified::storage::segment::FileSegmentTable;
39use crate::graph::unified::string::id::StringId;
40
41/// Core graph with Arc-wrapped internals for O(1) `CoW` snapshots.
42///
43/// `CodeGraph` uses `Arc` for all internal components, enabling:
44/// - O(1) snapshot creation via Arc cloning
45/// - Copy-on-write semantics via `Arc::make_mut`
46/// - Memory-efficient sharing between snapshots
47///
48/// # Design
49///
50/// The Arc wrapping enables the MVCC pattern:
51/// - Readers see a consistent snapshot at the time they acquired access
52/// - Writers use `Arc::make_mut` to get exclusive copies only when mutating
53/// - Multiple snapshots can coexist without copying data
54///
55/// # Performance
56///
57/// - Snapshot creation: O(5) Arc clones ≈ <1μs
58/// - Read access: Direct Arc dereference, no locking
59/// - Write access: `Arc::make_mut` clones only if refcount > 1
60///
61/// # Phase 2 binding-plane access
62///
63/// Use the two-line snapshot pattern to access `BindingPlane`:
64///
65/// ```rust,ignore
66/// let snapshot = graph.snapshot();
67/// let plane = snapshot.binding_plane();
68/// let resolution = plane.resolve(&query);
69/// ```
70///
71/// The two-line form is intentional: `BindingPlane<'g>` borrows from
72/// `GraphSnapshot` and the explicit snapshot handle makes the MVCC lifetime
73/// visible at the call site. The full Phase 2 scope/alias/shadow and
74/// witness-bearing resolution API is exposed through `BindingPlane`.
75// Field visibility is `pub(crate)` so the Gate 0c `rebuild_graph` module
76// (A2 §H) can destructure `CodeGraph` exhaustively in `clone_for_rebuild`.
77// External crates still go through the public accessor methods below.
78#[derive(Clone)]
79pub struct CodeGraph {
80 /// Node storage with generational indices.
81 pub(crate) nodes: Arc<NodeArena>,
82 /// Bidirectional edge storage (forward + reverse).
83 pub(crate) edges: Arc<BidirectionalEdgeStore>,
84 /// String interner for symbol names.
85 pub(crate) strings: Arc<StringInterner>,
86 /// File registry for path deduplication.
87 pub(crate) files: Arc<FileRegistry>,
88 /// Auxiliary indices for fast lookup.
89 pub(crate) indices: Arc<AuxiliaryIndices>,
90 /// Sparse macro boundary metadata (keyed by full `NodeId`).
91 pub(crate) macro_metadata: Arc<NodeMetadataStore>,
92 /// Dense node provenance (Phase 1 fact layer).
93 pub(crate) node_provenance: Arc<NodeProvenanceStore>,
94 /// Dense edge provenance (Phase 1 fact layer).
95 pub(crate) edge_provenance: Arc<EdgeProvenanceStore>,
96 /// Monotonic fact-layer epoch (0 until set by the V8 persistence path).
97 pub(crate) fact_epoch: u64,
98 /// Epoch for version tracking.
99 pub(crate) epoch: u64,
100 /// Per-language confidence metadata collected during build.
101 /// Maps language name (e.g., "rust") to aggregated confidence.
102 pub(crate) confidence: HashMap<String, ConfidenceMetadata>,
103 /// Phase 2 binding-plane scope arena (populated by Phase 4e).
104 pub(crate) scope_arena: Arc<ScopeArena>,
105 /// Phase 2 binding-plane alias table (populated by Phase 4e / P2U04).
106 pub(crate) alias_table: Arc<AliasTable>,
107 /// Phase 2 binding-plane shadow table (populated by Phase 4e / P2U05).
108 pub(crate) shadow_table: Arc<ShadowTable>,
109 /// Phase 2 binding-plane scope provenance store (populated by Phase 4e / P2U11).
110 pub(crate) scope_provenance_store: Arc<ScopeProvenanceStore>,
111 /// Phase 3 file segment table mapping `FileId` to contiguous node ranges.
112 /// Populated during build Phase 3 parallel commit, persisted in V10+ snapshots.
113 pub(crate) file_segments: Arc<FileSegmentTable>,
114 /// Phase A (U09): C-only indirect-call resolver side tables.
115 ///
116 /// `None` on non-C workspaces — the parent slot stays unallocated so
117 /// non-C builds incur zero side-table overhead. Populated by Phase 3
118 /// commit (U11) and consumed by `pass5b_c_indirect_resolve` (U12).
119 /// Persisted as the V11 `Option<CIndirectSideTables>` envelope slot
120 /// (DESIGN §10.2); V10 → V11 upconvert sets this to `None`.
121 pub(crate) c_indirect_tables: Option<CIndirectSideTables>,
122 /// Build-time scratch side-channel from the Go plugin (Cluster A
123 /// of the Go T1 implements-and-promotion design).
124 ///
125 /// Populated during Phase 1 plugin parse, merged into the live
126 /// target by Phase 3 commit after `NodeId` / `StringId` remap, drained
127 /// by `pass_go_method_set_satisfaction` between Phase 4e and Pass
128 /// 5. Not part of `GraphSnapshot`, not persisted in V10 — see
129 /// `02_DESIGN` §6. Held by-value (not `Arc<…>`) because no
130 /// copy-on-write or shared-reader access is required: only the
131 /// rebuild owner mutates, only the pass consumes, then the field
132 /// is reset.
133 pub(crate) go_hints: crate::graph::unified::build::staging::GoHints,
134 /// Whether this graph carries genuine `NodeEntry.is_definition` signal
135 /// (R3 marker for the definition-signal feature).
136 ///
137 /// `true` for graphs freshly built in-process (the `GraphBuilder` path stamps
138 /// `is_definition` at declaration sites) and for graphs loaded from a
139 /// V16-or-newer snapshot. `false` for graphs loaded from a pre-V16 snapshot,
140 /// whose `is_definition` bits are all defaulted `false` (absent on the wire)
141 /// and must NOT be interpreted as "no declarations exist." Consumers that
142 /// filter on `is_definition` (e.g. items-only listings) read this marker to
143 /// decide whether the signal is trustworthy or a reindex is required.
144 pub(crate) definition_signal_present: bool,
145}
146
147impl CodeGraph {
148 /// Creates a new empty `CodeGraph`.
149 ///
150 /// # Example
151 ///
152 /// ```rust
153 /// use sqry_core::graph::unified::concurrent::CodeGraph;
154 ///
155 /// let graph = CodeGraph::new();
156 /// assert_eq!(graph.epoch(), 0);
157 /// ```
158 #[must_use]
159 pub fn new() -> Self {
160 Self {
161 nodes: Arc::new(NodeArena::new()),
162 edges: Arc::new(BidirectionalEdgeStore::new()),
163 strings: Arc::new(StringInterner::new()),
164 files: Arc::new(FileRegistry::new()),
165 indices: Arc::new(AuxiliaryIndices::new()),
166 macro_metadata: Arc::new(NodeMetadataStore::new()),
167 node_provenance: Arc::new(NodeProvenanceStore::new()),
168 edge_provenance: Arc::new(EdgeProvenanceStore::new()),
169 fact_epoch: 0,
170 epoch: 0,
171 confidence: HashMap::new(),
172 scope_arena: Arc::new(ScopeArena::new()),
173 alias_table: Arc::new(AliasTable::new()),
174 shadow_table: Arc::new(ShadowTable::new()),
175 scope_provenance_store: Arc::new(ScopeProvenanceStore::new()),
176 file_segments: Arc::new(FileSegmentTable::new()),
177 c_indirect_tables: None,
178 go_hints: crate::graph::unified::build::staging::GoHints::default(),
179 // Fresh in-process graph: the GraphBuilder path stamps is_definition.
180 definition_signal_present: true,
181 }
182 }
183
184 /// Creates a `CodeGraph` from existing components.
185 ///
186 /// This is useful when building a graph from external data or
187 /// reconstructing from serialized state.
188 #[must_use]
189 pub fn from_components(
190 nodes: NodeArena,
191 edges: BidirectionalEdgeStore,
192 strings: StringInterner,
193 files: FileRegistry,
194 indices: AuxiliaryIndices,
195 macro_metadata: NodeMetadataStore,
196 ) -> Self {
197 Self {
198 nodes: Arc::new(nodes),
199 edges: Arc::new(edges),
200 strings: Arc::new(strings),
201 files: Arc::new(files),
202 indices: Arc::new(indices),
203 macro_metadata: Arc::new(macro_metadata),
204 node_provenance: Arc::new(NodeProvenanceStore::new()),
205 edge_provenance: Arc::new(EdgeProvenanceStore::new()),
206 fact_epoch: 0,
207 epoch: 0,
208 confidence: HashMap::new(),
209 scope_arena: Arc::new(ScopeArena::new()),
210 alias_table: Arc::new(AliasTable::new()),
211 shadow_table: Arc::new(ShadowTable::new()),
212 scope_provenance_store: Arc::new(ScopeProvenanceStore::new()),
213 file_segments: Arc::new(FileSegmentTable::new()),
214 c_indirect_tables: None,
215 go_hints: crate::graph::unified::build::staging::GoHints::default(),
216 // Default to present; the load path overrides via
217 // `set_definition_signal_present` based on the snapshot format
218 // version (pre-V16 loads stamp `false`).
219 definition_signal_present: true,
220 }
221 }
222
223 /// Creates a cheap snapshot of the graph.
224 ///
225 /// This operation is O(5) Arc clones, which completes in <1μs.
226 /// The snapshot is isolated from future mutations to the original graph.
227 ///
228 /// # Example
229 ///
230 /// ```rust
231 /// use sqry_core::graph::unified::concurrent::CodeGraph;
232 ///
233 /// let graph = CodeGraph::new();
234 /// let snapshot = graph.snapshot();
235 /// // snapshot is independent of future mutations to graph
236 /// ```
237 #[must_use]
238 pub fn snapshot(&self) -> GraphSnapshot {
239 GraphSnapshot {
240 nodes: Arc::clone(&self.nodes),
241 edges: Arc::clone(&self.edges),
242 strings: Arc::clone(&self.strings),
243 files: Arc::clone(&self.files),
244 indices: Arc::clone(&self.indices),
245 macro_metadata: Arc::clone(&self.macro_metadata),
246 node_provenance: Arc::clone(&self.node_provenance),
247 edge_provenance: Arc::clone(&self.edge_provenance),
248 fact_epoch: self.fact_epoch,
249 epoch: self.epoch,
250 scope_arena: Arc::clone(&self.scope_arena),
251 alias_table: Arc::clone(&self.alias_table),
252 shadow_table: Arc::clone(&self.shadow_table),
253 scope_provenance_store: Arc::clone(&self.scope_provenance_store),
254 file_segments: Arc::clone(&self.file_segments),
255 c_indirect_tables: self.c_indirect_tables.clone(),
256 definition_signal_present: self.definition_signal_present,
257 }
258 }
259
260 /// Returns whether this graph carries genuine `is_definition` signal.
261 ///
262 /// See [`definition_signal_present`](Self::definition_signal_present) (the
263 /// field) for the full contract.
264 #[inline]
265 #[must_use]
266 pub fn definition_signal_present(&self) -> bool {
267 self.definition_signal_present
268 }
269
270 /// Sets the definition-signal marker.
271 ///
272 /// Used by the snapshot load path to stamp `false` for pre-V16 snapshots
273 /// (whose `is_definition` bits are all defaulted) and `true` for V16+.
274 #[inline]
275 pub fn set_definition_signal_present(&mut self, present: bool) {
276 self.definition_signal_present = present;
277 }
278
279 /// Returns a reference to the node arena.
280 #[inline]
281 #[must_use]
282 pub fn nodes(&self) -> &NodeArena {
283 &self.nodes
284 }
285
286 /// Returns a reference to the bidirectional edge store.
287 #[inline]
288 #[must_use]
289 pub fn edges(&self) -> &BidirectionalEdgeStore {
290 &self.edges
291 }
292
293 /// Returns a reference to the string interner.
294 #[inline]
295 #[must_use]
296 pub fn strings(&self) -> &StringInterner {
297 &self.strings
298 }
299
300 /// Returns a reference to the file registry.
301 #[inline]
302 #[must_use]
303 pub fn files(&self) -> &FileRegistry {
304 &self.files
305 }
306
307 /// Returns a reference to the auxiliary indices.
308 #[inline]
309 #[must_use]
310 pub fn indices(&self) -> &AuxiliaryIndices {
311 &self.indices
312 }
313
314 /// Returns a reference to the macro boundary metadata store.
315 #[inline]
316 #[must_use]
317 pub fn macro_metadata(&self) -> &NodeMetadataStore {
318 &self.macro_metadata
319 }
320
321 /// Returns a reference to the C indirect-call side tables, if any.
322 ///
323 /// `None` on non-C workspaces — the slot is allocated lazily and only
324 /// when the C plugin's Phase 1 instrumentation observes content worth
325 /// recording. On loaded snapshots, the V11 envelope's
326 /// `Option<CIndirectSideTables>` field round-trips into this slot
327 /// (DESIGN §10.2); V10 → V11 upconvert always sets the slot to `None`.
328 ///
329 /// `pass5b_c_indirect_resolve` (U12) consumes these tables to rewrite
330 /// synthetic indirect-call `Calls` edges into precise binding-plane /
331 /// type-match candidates.
332 #[inline]
333 #[must_use]
334 pub fn c_indirect_tables(&self) -> Option<&CIndirectSideTables> {
335 self.c_indirect_tables.as_ref()
336 }
337
338 /// Returns a mutable reference to the `Option<CIndirectSideTables>`
339 /// slot, allowing callers to install / replace / clear the side
340 /// tables.
341 ///
342 /// Mirrors the accessor pattern used for [`Self::macro_metadata_mut`]
343 /// but exposes the `Option` directly — the side tables are owned in
344 /// place, not behind an `Arc`. Callers that need to merge incremental
345 /// state into the existing tables typically do:
346 ///
347 /// ```rust,ignore
348 /// let slot = graph.c_indirect_tables_mut();
349 /// let tables = slot.get_or_insert_with(CIndirectSideTables::new);
350 /// tables.bindings_by_field.entry(key).or_default().push(entry);
351 /// ```
352 ///
353 /// Populated by the build pipeline (U11). Read-only consumers should
354 /// use [`Self::c_indirect_tables`] instead.
355 #[inline]
356 pub fn c_indirect_tables_mut(&mut self) -> &mut Option<CIndirectSideTables> {
357 &mut self.c_indirect_tables
358 }
359
360 /// Test-only: strip every Phase-A-introduced piece of state from this
361 /// graph, leaving a graph that is byte-identical (modulo persistence
362 /// header timestamps) to what a pre-Phase-A build would have produced
363 /// on the same fixture.
364 ///
365 /// Specifically:
366 ///
367 /// * Clears the [`CIndirectSideTables`] slot (sets it to `None`).
368 /// * Removes [`NodeFlags::ADDRESS_TAKEN`] and
369 /// [`NodeFlags::CALLSITE_PROMISCUOUS`] marker bits from every
370 /// [`StoredEntry`] in the macro-metadata store.
371 /// * Rewrites the `resolved_via` field of every [`EdgeKind::Calls`]
372 /// edge (across CSR and delta tiers, forward and reverse stores)
373 /// to [`ResolvedVia::Direct`].
374 ///
375 /// Used exclusively by `sqry-core/tests/snapshot_size_phase_a.rs`
376 /// (U19) to materialize a "Phase-A-free" baseline snapshot for the
377 /// +10% snapshot-size gate. Gated behind `cfg(any(test, feature =
378 /// "test-support"))` so the helper is invisible to production
379 /// builds.
380 ///
381 /// [`NodeFlags::ADDRESS_TAKEN`]: crate::graph::unified::storage::metadata::NodeFlags::ADDRESS_TAKEN
382 /// [`NodeFlags::CALLSITE_PROMISCUOUS`]: crate::graph::unified::storage::metadata::NodeFlags::CALLSITE_PROMISCUOUS
383 /// [`StoredEntry`]: crate::graph::unified::storage::metadata::StoredEntry
384 /// [`EdgeKind::Calls`]: crate::graph::unified::edge::EdgeKind::Calls
385 /// [`ResolvedVia::Direct`]: crate::graph::unified::edge::ResolvedVia::Direct
386 #[cfg(any(test, feature = "test-support"))]
387 pub fn clear_phase_a_state_for_test(&mut self) {
388 *self.c_indirect_tables_mut() = None;
389 self.macro_metadata_mut().clear_phase_a_flags_for_test();
390 self.edges_mut().normalize_calls_resolved_via_for_test();
391 }
392
393 // ------------------------------------------------------------------
394 // Phase 1 fact-layer provenance accessors (P1U09).
395 // ------------------------------------------------------------------
396
397 /// Returns the monotonic fact-layer epoch stamped on the most recently
398 /// saved or loaded snapshot. Returns `0` for graphs that have not been
399 /// persisted yet or were loaded from V7 snapshots.
400 #[inline]
401 #[must_use]
402 pub fn fact_epoch(&self) -> u64 {
403 self.fact_epoch
404 }
405
406 /// Looks up node provenance by `NodeId`.
407 ///
408 /// Returns `None` if the `NodeId` is out of range, the slot is vacant,
409 /// or the stored generation does not match (stale handle).
410 #[inline]
411 #[must_use]
412 pub fn node_provenance(
413 &self,
414 id: crate::graph::unified::node::id::NodeId,
415 ) -> Option<&NodeProvenance> {
416 self.node_provenance.lookup(id)
417 }
418
419 /// Looks up edge provenance by `EdgeId`.
420 ///
421 /// Returns `None` if the `EdgeId` is out of range, the slot is vacant,
422 /// or the edge is the invalid sentinel.
423 #[inline]
424 #[must_use]
425 pub fn edge_provenance(
426 &self,
427 id: crate::graph::unified::edge::id::EdgeId,
428 ) -> Option<&EdgeProvenance> {
429 self.edge_provenance.lookup(id)
430 }
431
432 /// Returns a borrowed provenance view for a file.
433 ///
434 /// Returns `None` for invalid/unregistered `FileId`s.
435 #[inline]
436 #[must_use]
437 pub fn file_provenance(
438 &self,
439 id: crate::graph::unified::file::id::FileId,
440 ) -> Option<FileProvenanceView<'_>> {
441 self.files.file_provenance(id)
442 }
443
444 // ------------------------------------------------------------------
445 // Phase 2 binding-plane accessors (P2U03).
446 // ------------------------------------------------------------------
447
448 /// Returns a reference to the scope arena derived during Phase 4e.
449 ///
450 /// The arena is empty on freshly-constructed `CodeGraph` instances and is
451 /// populated by calling `phase4e_binding::derive_binding_plane`.
452 #[inline]
453 #[must_use]
454 pub fn scope_arena(&self) -> &ScopeArena {
455 &self.scope_arena
456 }
457
458 /// Installs a freshly-derived scope arena.
459 ///
460 /// Called from `phase4e_binding::derive_binding_plane` during the build
461 /// pipeline. External callers that run Phase 4e manually (e.g., test
462 /// fixture builders) use this to store the result.
463 pub(crate) fn set_scope_arena(&mut self, arena: ScopeArena) {
464 self.scope_arena = Arc::new(arena);
465 }
466
467 /// Returns a reference to the alias table derived during Phase 4e.
468 ///
469 /// The table is empty on freshly-constructed `CodeGraph` instances and is
470 /// populated by calling `phase4e_binding::derive_binding_plane`.
471 #[inline]
472 #[must_use]
473 pub fn alias_table(&self) -> &AliasTable {
474 &self.alias_table
475 }
476
477 /// Installs a freshly-derived alias table.
478 ///
479 /// Called from `phase4e_binding::derive_binding_plane` during the build
480 /// pipeline. External callers that run Phase 4e manually (e.g., test
481 /// fixture builders) use this to store the result.
482 pub(crate) fn set_alias_table(&mut self, table: AliasTable) {
483 self.alias_table = Arc::new(table);
484 }
485
486 /// Returns a reference to the shadow table derived during Phase 4e.
487 ///
488 /// The table is empty on freshly-constructed `CodeGraph` instances and is
489 /// populated by calling `phase4e_binding::derive_binding_plane`.
490 #[inline]
491 #[must_use]
492 pub fn shadow_table(&self) -> &ShadowTable {
493 &self.shadow_table
494 }
495
496 /// Installs a freshly-derived shadow table.
497 ///
498 /// Called from `phase4e_binding::derive_binding_plane` during the build
499 /// pipeline. External callers that run Phase 4e manually (e.g., test
500 /// fixture builders) use this to store the result.
501 pub(crate) fn set_shadow_table(&mut self, table: ShadowTable) {
502 self.shadow_table = Arc::new(table);
503 }
504
505 /// Returns a reference to the scope provenance store derived during Phase 4e.
506 ///
507 /// The store is empty on freshly-constructed `CodeGraph` instances and is
508 /// populated by calling `phase4e_binding::derive_binding_plane`.
509 #[inline]
510 #[must_use]
511 pub fn scope_provenance_store(&self) -> &ScopeProvenanceStore {
512 &self.scope_provenance_store
513 }
514
515 /// Looks up scope provenance by `ScopeId`.
516 ///
517 /// Returns `None` if the slot is out of range, vacant, or the stored
518 /// generation does not match (stale handle).
519 #[inline]
520 #[must_use]
521 pub fn scope_provenance(&self, id: ScopeId) -> Option<&ScopeProvenance> {
522 self.scope_provenance_store.lookup(id)
523 }
524
525 /// Looks up the live `ScopeId` for a stable scope identity.
526 ///
527 /// Returns `None` if no provenance record is registered for that stable id.
528 /// The reverse index is populated by `insert` during Phase 4e and must be
529 /// rebuilt after V9 deserialization.
530 #[inline]
531 #[must_use]
532 pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
533 self.scope_provenance_store.scope_by_stable_id(stable)
534 }
535
536 /// Installs a freshly-derived scope provenance store.
537 ///
538 /// Called from `phase4e_binding::derive_binding_plane` during the build
539 /// pipeline. External callers that run Phase 4e manually (e.g., test
540 /// fixture builders) use this to store the result.
541 pub(crate) fn set_scope_provenance_store(&mut self, store: ScopeProvenanceStore) {
542 self.scope_provenance_store = Arc::new(store);
543 }
544
545 /// Returns a reference to the file segment table.
546 #[inline]
547 #[must_use]
548 pub fn file_segments(&self) -> &FileSegmentTable {
549 &self.file_segments
550 }
551
552 /// Replaces the file segment table.
553 pub(crate) fn set_file_segments(&mut self, table: FileSegmentTable) {
554 self.file_segments = Arc::new(table);
555 }
556
557 /// Installs the C indirect-call side tables loaded from a V11
558 /// snapshot.
559 ///
560 /// `None` clears the slot (used on V10 → V11 upconvert and on non-C
561 /// workspaces). `Some(...)` carries the live tables onto the freshly
562 /// reconstructed `CodeGraph`. Build-pipeline callers that incrementally
563 /// populate the tables should prefer [`Self::c_indirect_tables_mut`].
564 pub(crate) fn set_c_indirect_tables(&mut self, tables: Option<CIndirectSideTables>) {
565 self.c_indirect_tables = tables;
566 }
567
568 /// Returns a mutable reference to the file segment table (via `Arc::make_mut`).
569 pub(crate) fn file_segments_mut(&mut self) -> &mut FileSegmentTable {
570 Arc::make_mut(&mut self.file_segments)
571 }
572
573 /// Test-only helper that records a file segment directly on the
574 /// graph, bypassing the full Phase 3 commit pipeline. Only
575 /// available under `#[cfg(feature = "rebuild-internals")]` so the
576 /// surface is opt-in for rebuild-plane consumers (the feature is
577 /// whitelisted to sqry-daemon + sqry-core integration tests; see
578 /// `sqry-core/tests/rebuild_internals_whitelist.rs`).
579 ///
580 /// Integration tests (notably
581 /// `sqry-core/tests/incremental_remove_file_scale.rs`) call this
582 /// to seed the synthetic workspaces they build before exercising
583 /// `RebuildGraph::remove_file` / `CodeGraph::remove_file`. Production
584 /// code paths never touch this method — Phase 3 parallel commit
585 /// is the sole production writer, via the crate-internal
586 /// `file_segments_mut` accessor above.
587 ///
588 /// Renamed with `test_only_` prefix so the purpose is unambiguous
589 /// at every call site; `#[doc(hidden)]` hides it from rendered
590 /// rustdoc so downstream daemon integrations don't discover it by
591 /// accident.
592 #[cfg(feature = "rebuild-internals")]
593 #[doc(hidden)]
594 pub fn test_only_record_file_segment(
595 &mut self,
596 file_id: FileId,
597 start_slot: u32,
598 slot_count: u32,
599 ) {
600 Arc::make_mut(&mut self.file_segments).record_range(file_id, start_slot, slot_count);
601 }
602
603 /// Sets the provenance stores and fact epoch, typically called by the
604 /// persistence loader after deserializing a V8 snapshot.
605 pub(crate) fn set_provenance(
606 &mut self,
607 node_provenance: NodeProvenanceStore,
608 edge_provenance: EdgeProvenanceStore,
609 fact_epoch: u64,
610 ) {
611 self.node_provenance = Arc::new(node_provenance);
612 self.edge_provenance = Arc::new(edge_provenance);
613 self.fact_epoch = fact_epoch;
614 }
615
616 /// Returns the current epoch.
617 #[inline]
618 #[must_use]
619 pub fn epoch(&self) -> u64 {
620 self.epoch
621 }
622
623 /// Returns a mutable reference to the node arena.
624 ///
625 /// Uses `Arc::make_mut` for copy-on-write semantics: if other
626 /// references exist (e.g., snapshots), the data is cloned.
627 #[inline]
628 pub fn nodes_mut(&mut self) -> &mut NodeArena {
629 Arc::make_mut(&mut self.nodes)
630 }
631
632 /// Returns a mutable reference to the bidirectional edge store.
633 ///
634 /// Uses `Arc::make_mut` for copy-on-write semantics.
635 #[inline]
636 pub fn edges_mut(&mut self) -> &mut BidirectionalEdgeStore {
637 Arc::make_mut(&mut self.edges)
638 }
639
640 /// Returns a mutable reference to the string interner.
641 ///
642 /// Uses `Arc::make_mut` for copy-on-write semantics.
643 #[inline]
644 pub fn strings_mut(&mut self) -> &mut StringInterner {
645 Arc::make_mut(&mut self.strings)
646 }
647
648 /// Returns a mutable reference to the file registry.
649 ///
650 /// Uses `Arc::make_mut` for copy-on-write semantics.
651 #[inline]
652 pub fn files_mut(&mut self) -> &mut FileRegistry {
653 Arc::make_mut(&mut self.files)
654 }
655
656 /// Returns a mutable reference to the auxiliary indices.
657 ///
658 /// Uses `Arc::make_mut` for copy-on-write semantics.
659 #[inline]
660 pub fn indices_mut(&mut self) -> &mut AuxiliaryIndices {
661 Arc::make_mut(&mut self.indices)
662 }
663
664 /// Returns a mutable reference to the macro boundary metadata store.
665 ///
666 /// Uses `Arc::make_mut` for copy-on-write semantics.
667 #[inline]
668 pub fn macro_metadata_mut(&mut self) -> &mut NodeMetadataStore {
669 Arc::make_mut(&mut self.macro_metadata)
670 }
671
672 /// Returns mutable references to both the node arena and the string interner.
673 ///
674 /// This avoids the borrow-conflict that arises when calling `nodes_mut()` and
675 /// `strings_mut()` separately on `&mut self`.
676 #[inline]
677 pub fn nodes_and_strings_mut(&mut self) -> (&mut NodeArena, &mut StringInterner) {
678 (
679 Arc::make_mut(&mut self.nodes),
680 Arc::make_mut(&mut self.strings),
681 )
682 }
683
684 /// Rebuilds auxiliary indices from the current node arena.
685 ///
686 /// This avoids the borrow conflict that arises when calling `nodes()` and
687 /// `indices_mut()` separately on `&mut self`. Uses disjoint field borrowing
688 /// to access `nodes` (shared) and `indices` (mutable) simultaneously.
689 /// Internally calls `AuxiliaryIndices::build_from_arena` which clears
690 /// existing indices and rebuilds in a single pass without per-element
691 /// duplicate checking.
692 ///
693 /// As of Task 4 Step 4 Phase 2 this inherent method delegates to the
694 /// generic
695 /// [`crate::graph::unified::build::parallel_commit::rebuild_indices`]
696 /// free function so the same implementation serves both the
697 /// full-build (`CodeGraph`) and incremental-rebuild (`RebuildGraph`)
698 /// pipelines. Call sites that hold a concrete `CodeGraph` can keep
699 /// using `graph.rebuild_indices()`; incremental rebuild call sites
700 /// should use the free function directly.
701 pub fn rebuild_indices(&mut self) {
702 crate::graph::unified::build::parallel_commit::rebuild_indices(self);
703 }
704
705 /// Increments the epoch counter and returns the new value.
706 ///
707 /// Called automatically by `ConcurrentCodeGraph::write()`.
708 #[inline]
709 pub fn bump_epoch(&mut self) -> u64 {
710 self.epoch = self.epoch.wrapping_add(1);
711 self.epoch
712 }
713
714 /// Sets the epoch to a specific value.
715 ///
716 /// This is primarily for testing or reconstruction from serialized state.
717 #[inline]
718 pub fn set_epoch(&mut self, epoch: u64) {
719 self.epoch = epoch;
720 }
721
722 /// Returns the number of nodes in the graph.
723 ///
724 /// This is a convenience method that delegates to `nodes().len()`.
725 ///
726 /// # Example
727 ///
728 /// ```rust
729 /// use sqry_core::graph::unified::concurrent::CodeGraph;
730 ///
731 /// let graph = CodeGraph::new();
732 /// assert_eq!(graph.node_count(), 0);
733 /// ```
734 #[inline]
735 #[must_use]
736 pub fn node_count(&self) -> usize {
737 self.nodes.len()
738 }
739
740 /// Returns the number of edges in the graph (forward direction).
741 ///
742 /// This counts edges in the forward store, including both CSR and delta edges.
743 ///
744 /// # Example
745 ///
746 /// ```rust
747 /// use sqry_core::graph::unified::concurrent::CodeGraph;
748 ///
749 /// let graph = CodeGraph::new();
750 /// assert_eq!(graph.edge_count(), 0);
751 /// ```
752 #[inline]
753 #[must_use]
754 pub fn edge_count(&self) -> usize {
755 let stats = self.edges.stats();
756 stats.forward.csr_edge_count + stats.forward.delta_edge_count
757 }
758
759 /// Returns true if the graph contains no nodes.
760 ///
761 /// This is a convenience method that delegates to `nodes().is_empty()`.
762 ///
763 /// # Example
764 ///
765 /// ```rust
766 /// use sqry_core::graph::unified::concurrent::CodeGraph;
767 ///
768 /// let graph = CodeGraph::new();
769 /// assert!(graph.is_empty());
770 /// ```
771 #[inline]
772 #[must_use]
773 pub fn is_empty(&self) -> bool {
774 self.nodes.is_empty()
775 }
776
777 /// Returns an iterator over all indexed file paths.
778 ///
779 /// This is useful for enumerating all files that have been processed
780 /// and added to the graph.
781 ///
782 /// # Example
783 ///
784 /// ```rust
785 /// use sqry_core::graph::unified::concurrent::CodeGraph;
786 ///
787 /// let graph = CodeGraph::new();
788 /// for (file_id, path) in graph.indexed_files() {
789 /// println!("File {}: {}", file_id.index(), path.display());
790 /// }
791 /// ```
792 #[inline]
793 pub fn indexed_files(
794 &self,
795 ) -> impl Iterator<Item = (crate::graph::unified::file::FileId, &std::path::Path)> {
796 self.files
797 .iter()
798 .map(|(id, arc_path)| (id, arc_path.as_ref()))
799 }
800
801 /// Returns the set of files that import one or more symbols exported by
802 /// `file_id`, deduplicated and sorted ascending.
803 ///
804 /// For every [`EdgeKind::Imports`] edge whose target node lives in
805 /// `file_id`, the source node's [`FileId`] is added to the result. Files
806 /// are returned sorted by raw index so the result is deterministic across
807 /// runs. The caller's own file is never included — an `Imports` edge
808 /// whose source and target both live in `file_id` is treated as a
809 /// self-import and elided. Edges whose source node is no longer resolvable
810 /// in the arena (tombstoned) are silently skipped.
811 ///
812 /// This is the file-level view of Pass 4 cross-file `Imports` edges, used
813 /// by the incremental rebuild engine to compute reverse-dependency
814 /// closures: "if file X changes its exports, which files need to be
815 /// re-linked?"
816 ///
817 /// # Complexity
818 ///
819 /// `O(|nodes_in_file_id| × avg_incoming_edges_per_node)` amortized. Uses
820 /// [`AuxiliaryIndices::by_file`] for O(1)-amortized per-file node lookup
821 /// (HashMap-backed) and the bidirectional edge store's reverse adjacency;
822 /// no full-graph scan. A final `O(R log R)` sort over the deduplicated
823 /// importer set (where `R` is the importer count) is negligible in
824 /// practice since `R ≤ file_count`.
825 ///
826 /// [`AuxiliaryIndices::by_file`]: crate::graph::unified::storage::indices::AuxiliaryIndices::by_file
827 #[must_use]
828 pub fn reverse_import_index(&self, file_id: FileId) -> Vec<FileId> {
829 let mut importers: HashSet<FileId> = HashSet::new();
830 for &target_node in self.indices.by_file(file_id) {
831 for edge_ref in self.edges.edges_to(target_node) {
832 if !matches!(edge_ref.kind, EdgeKind::Imports { .. }) {
833 continue;
834 }
835 let Some(source_entry) = self.nodes.get(edge_ref.source) else {
836 continue;
837 };
838 let source_file = source_entry.file;
839 if source_file != file_id {
840 importers.insert(source_file);
841 }
842 }
843 }
844 let mut result: Vec<FileId> = importers.into_iter().collect();
845 result.sort();
846 result
847 }
848
849 /// Returns the set of files that hold at least one live inter-file edge
850 /// targeting a node in `file_id`, deduplicated and sorted ascending.
851 ///
852 /// Unlike [`reverse_import_index`](Self::reverse_import_index) — which
853 /// filters to [`EdgeKind::Imports`] only — this helper treats **every**
854 /// cross-file edge as a dependency signal: `Calls`, `References`,
855 /// `TypeOf`, `Inherits`, `Implements`, `FfiCall`, `HttpRequest`,
856 /// `GrpcCall`, `WebAssemblyCall`, `DbQuery`, `TableRead`, `TableWrite`,
857 /// `TriggeredBy`, `MessageQueue`, `WebSocket`, `GraphQLOperation`,
858 /// `ProcessExec`, `FileIpc`, `ProtocolCall`, and any future
859 /// cross-file-capable variant. This is the reverse-dependency surface the
860 /// incremental rebuild engine (Task 4 Step 4 Phase 3e) needs: when
861 /// `file_id` changes, every file whose committed edges point into
862 /// `file_id`'s nodes must re-enter the rebuild closure so its cross-file
863 /// references survive the target-side tombstone-and-reparse cycle.
864 ///
865 /// The caller's own file is never included — an edge whose source and
866 /// target both live in `file_id` is a self-reference and is elided.
867 /// Edges whose source node is no longer resolvable in the arena
868 /// (tombstoned) are silently skipped.
869 ///
870 /// # When to use this vs [`reverse_import_index`](Self::reverse_import_index)
871 ///
872 /// * [`reverse_import_index`](Self::reverse_import_index) remains the
873 /// right surface for consumers that specifically need *import*
874 /// relationships (export surface analysis, module-dependency graphs,
875 /// etc.).
876 /// * `reverse_dependency_index` is the right surface for incremental
877 /// rebuild closure computation. Widening past imports is necessary
878 /// because call sites, type references, trait implementations, FFI
879 /// declarations, HTTP clients, and every other cross-file edge kind
880 /// hold a committed edge into the target file that becomes stale the
881 /// moment `remove_file(target)` tombstones its arena nodes. Leaving
882 /// those files out of the closure leaves the committed edges pointing
883 /// at the stale (pre-tombstone) node IDs — Phase 4c-prime only
884 /// rewrites edges on **re-parsed** files' `PendingEdge` sets, never
885 /// committed edges owned by files outside the reparse scope.
886 ///
887 /// # Complexity
888 ///
889 /// `O(|nodes_in_file_id| × avg_incoming_edges_per_node)` amortized —
890 /// same bound as [`reverse_import_index`](Self::reverse_import_index).
891 /// Uses [`AuxiliaryIndices::by_file`] for O(1)-amortized per-file node
892 /// lookup and the bidirectional edge store's reverse adjacency; no
893 /// full-graph scan. Final `O(R log R)` sort over the deduplicated
894 /// dependent set is negligible since `R ≤ file_count`.
895 ///
896 /// # Over-widening is expected and acceptable
897 ///
898 /// The closure will include every file that references anything in
899 /// `file_id`, not just files whose exports change. In common codebases
900 /// this widens the reparse set modestly (a 10-file change may expand
901 /// to 20–30 dependent files in a medium crate). Correctness requires
902 /// the widening; minimality is a follow-up optimisation if profiling
903 /// demands it.
904 ///
905 /// [`AuxiliaryIndices::by_file`]: crate::graph::unified::storage::indices::AuxiliaryIndices::by_file
906 #[must_use]
907 pub fn reverse_dependency_index(&self, file_id: FileId) -> Vec<FileId> {
908 let mut dependents: HashSet<FileId> = HashSet::new();
909 for &target_node in self.indices.by_file(file_id) {
910 for edge_ref in self.edges.edges_to(target_node) {
911 let Some(source_entry) = self.nodes.get(edge_ref.source) else {
912 continue;
913 };
914 let source_file = source_entry.file;
915 if source_file != file_id {
916 dependents.insert(source_file);
917 }
918 }
919 }
920 let mut result: Vec<FileId> = dependents.into_iter().collect();
921 result.sort();
922 result
923 }
924
925 /// Returns the per-language confidence metadata.
926 ///
927 /// This contains analysis confidence information collected during graph build,
928 /// primarily used by language plugins (e.g., Rust) to track analysis quality.
929 #[inline]
930 #[must_use]
931 pub fn confidence(&self) -> &HashMap<String, ConfidenceMetadata> {
932 &self.confidence
933 }
934
935 /// Merges confidence metadata for a language.
936 ///
937 /// If confidence already exists for the language, this merges the new
938 /// metadata (taking the lower confidence level and combining limitations).
939 /// Otherwise, it inserts the new confidence.
940 pub fn merge_confidence(&mut self, language: &str, metadata: ConfidenceMetadata) {
941 use crate::confidence::ConfidenceLevel;
942
943 self.confidence
944 .entry(language.to_string())
945 .and_modify(|existing| {
946 // Take the lower confidence level (more conservative)
947 let new_level = match (&existing.level, &metadata.level) {
948 (ConfidenceLevel::Verified, other) | (other, ConfidenceLevel::Verified) => {
949 *other
950 }
951 (ConfidenceLevel::Partial, ConfidenceLevel::AstOnly)
952 | (ConfidenceLevel::AstOnly, ConfidenceLevel::Partial) => {
953 ConfidenceLevel::AstOnly
954 }
955 (level, _) => *level,
956 };
957 existing.level = new_level;
958
959 // Merge limitations (deduplicated)
960 for limitation in &metadata.limitations {
961 if !existing.limitations.contains(limitation) {
962 existing.limitations.push(limitation.clone());
963 }
964 }
965
966 // Merge unavailable features (deduplicated)
967 for feature in &metadata.unavailable_features {
968 if !existing.unavailable_features.contains(feature) {
969 existing.unavailable_features.push(feature.clone());
970 }
971 }
972 })
973 .or_insert(metadata);
974 }
975
976 /// Sets the confidence metadata map directly.
977 ///
978 /// This is primarily used when loading a graph from serialized state.
979 pub fn set_confidence(&mut self, confidence: HashMap<String, ConfidenceMetadata>) {
980 self.confidence = confidence;
981 }
982
983 // ------------------------------------------------------------------
984 // Task 4 Step 2 (A2 §F.2) — File-level tombstoning on a CodeGraph.
985 //
986 // Unlike the rebuild pipeline's `RebuildGraph::remove_file`, this
987 // path mutates a live `CodeGraph` in place and is the mechanism
988 // used by the full-rebuild flow when it needs to evict a file's
989 // nodes+edges between compactions. The daemon's incremental
990 // `WorkspaceManager` (Task 6) goes through the rebuild path, which
991 // is why this entry point is `pub(crate)`.
992 // ------------------------------------------------------------------
993
994 /// Tombstone every node that belongs to `file_id`, invalidate every
995 /// edge whose source or target is one of those nodes (across both
996 /// forward and reverse CSR + delta tiers), drop the file's entry
997 /// from the [`FileRegistry`], and return the set of [`NodeId`]s
998 /// that were tombstoned.
999 ///
1000 /// This is the §F.2-aware file-removal primitive. Semantically, the
1001 /// post-condition matches what a full rebuild of the workspace
1002 /// without `file_id` would produce:
1003 ///
1004 /// * Every [`NodeEntry`] whose [`NodeEntry.file`] was `file_id` has
1005 /// been `NodeArena::remove`d, advancing its slot generation so
1006 /// stale [`NodeId`] handles do not alias a later re-allocation.
1007 /// * Every CSR edge whose source or target slot matches one of the
1008 /// tombstoned slot indices has its `csr_tombstones` bit set; the
1009 /// read path's merge step already filters tombstoned CSR edges
1010 /// out of every query result.
1011 /// * Every delta-buffer edge (Add or Remove, any file) whose source
1012 /// or target matches a tombstoned slot has been dropped from the
1013 /// delta in both directions.
1014 /// * The [`AuxiliaryIndices`] (kind / name / qualified-name / file)
1015 /// no longer reference any of the tombstoned `NodeId`s.
1016 /// * [`NodeMetadataStore`], [`NodeProvenanceStore`], [`ScopeArena`],
1017 /// [`AliasTable`], and [`ShadowTable`] have been compacted through
1018 /// the [`NodeIdBearing::retain_nodes`] predicate so no
1019 /// tombstoned `NodeId` survives in any publish-visible store.
1020 /// * [`FileRegistry::per_file_nodes`] no longer holds a bucket for
1021 /// `file_id`, the lookup slot is recycled, and
1022 /// [`FileRegistry::resolve(file_id)`] returns `None`.
1023 ///
1024 /// Returns the `Vec<NodeId>` of tombstoned nodes. The returned list
1025 /// is useful for downstream housekeeping (e.g., resetting per-file
1026 /// caches keyed by `NodeId`) and for tests that need to assert on the
1027 /// exact membership of the tombstone set.
1028 ///
1029 /// # Idempotency
1030 ///
1031 /// Calling `remove_file` twice with the same `file_id` — or calling
1032 /// it for a `file_id` that was never registered — is a safe no-op.
1033 /// The returned `Vec<NodeId>` is empty on the second call; no
1034 /// arena / edge / index state is mutated (the predicate-based
1035 /// compaction of `NodeIdBearing` surfaces short-circuits when the
1036 /// dead set is empty).
1037 ///
1038 /// # Visibility
1039 ///
1040 /// `pub(crate)` because external callers (Task 6's
1041 /// `WorkspaceManager` on the sqry-daemon side) route through
1042 /// [`super::super::rebuild::rebuild_graph::RebuildGraph::remove_file`]
1043 /// instead. This `CodeGraph`-level variant is used by full-rebuild
1044 /// housekeeping paths inside sqry-core and by the Task 4 Step 4
1045 /// incremental fallback for cases where the caller already has a
1046 /// `&mut CodeGraph` and does not need the clone-and-publish
1047 /// round-trip of [`clone_for_rebuild`](Self::clone_for_rebuild) →
1048 /// [`finalize`](super::super::rebuild::rebuild_graph::RebuildGraph::finalize).
1049 ///
1050 /// # Performance
1051 ///
1052 /// * `O(|tombstoned| + |csr_edges| + |delta_edges|)` amortised.
1053 /// The CSR walk is linear in total edge count (not per-file),
1054 /// which is the dominant cost; each row check is O(1) via the
1055 /// precomputed dead-slot-index `HashSet` in
1056 /// [`BidirectionalEdgeStore::tombstone_edges_for_nodes`].
1057 /// * Delta filtering is `O(|delta|)` per direction.
1058 /// * Auxiliary-index compaction is `O(|tombstoned|)` amortised
1059 /// because each of the four indices keys its entries by the
1060 /// tombstoned `NodeIds` directly.
1061 ///
1062 /// [`NodeEntry`]: crate::graph::unified::storage::arena::NodeEntry
1063 /// [`NodeEntry.file`]: crate::graph::unified::storage::arena::NodeEntry::file
1064 /// [`NodeIdBearing::retain_nodes`]: crate::graph::unified::rebuild::coverage::NodeIdBearing::retain_nodes
1065 #[allow(dead_code)] // Consumer is Task 4 Step 4 (`incremental_rebuild`)
1066 // and the unit tests below; published in this commit so the §F.2
1067 // invariant surface can be reviewed in isolation per the Gate 0c
1068 // split contract.
1069 pub(crate) fn remove_file(
1070 &mut self,
1071 file_id: FileId,
1072 ) -> Vec<crate::graph::unified::node::NodeId> {
1073 use crate::graph::unified::node::NodeId;
1074 use crate::graph::unified::rebuild::coverage::NodeIdBearing;
1075
1076 // Drain the per-file bucket. For a file that was never
1077 // registered, this returns an empty Vec — the rest of the
1078 // method short-circuits on the `if tombstoned.is_empty()` test
1079 // below so we still deregister the file on the off chance the
1080 // bucket was empty but the file was registered (defensive; the
1081 // common case is the bucket existed iff the file was
1082 // registered).
1083 let tombstoned: Vec<NodeId> = self.files_mut().take_nodes(file_id);
1084 // Always drop the file's path entry + recycle its slot, even
1085 // when the bucket was empty, so repeated registrations of the
1086 // same path don't resurrect a zombie FileId. `unregister` is
1087 // idempotent (returns None for unknown IDs) so the cost is a
1088 // single HashMap probe when file_id is already gone.
1089 self.files_mut().unregister(file_id);
1090 // Clear the file's segment entry unconditionally (idempotent
1091 // — `FileSegmentTable::remove` no-ops on unknown ids). This
1092 // MUST run before `FileRegistry::unregister` recycles the
1093 // FileId slot for reuse, otherwise a later registration of a
1094 // different path under the reused FileId would inherit the
1095 // previous file's stale node range (see
1096 // `sqry-core/src/graph/unified/build/reindex.rs` — which
1097 // trusts `file_segments().get(file_id)` to decide which slots
1098 // to tombstone). Note: `unregister` above was called first
1099 // only to keep the existing bucket-drain ordering; the
1100 // segment-clear is order-independent with respect to
1101 // `unregister` because neither touches the other's backing
1102 // store, and the FileId slot cannot be recycled-and-reissued
1103 // across a single `remove_file` call (the registry's slot
1104 // recycler is driven by a later `register`, not by
1105 // `unregister`).
1106 self.file_segments_mut().remove(file_id);
1107
1108 if tombstoned.is_empty() {
1109 return tombstoned;
1110 }
1111
1112 // Dead set keyed on NodeId for NodeIdBearing predicates.
1113 // `retain_nodes` uses `HashSet::contains` so membership is O(1).
1114 let dead: HashSet<NodeId> = tombstoned.iter().copied().collect();
1115
1116 // 1. Tombstone the arena slots. `NodeArena::remove` is
1117 // idempotent — stale NodeIds that don't match a slot's
1118 // current generation are no-ops, which lets this method be
1119 // safely re-run on the same file.
1120 {
1121 let arena = self.nodes_mut();
1122 for &nid in &tombstoned {
1123 let _ = arena.remove(nid);
1124 }
1125 }
1126
1127 // 2. Invalidate edges across both CSR + delta in both
1128 // directions. This is the expensive step; the helper uses a
1129 // precomputed dead-slot-index set so the CSR walk is linear
1130 // in total edge count, not quadratic.
1131 self.edges_mut().tombstone_edges_for_nodes(&dead);
1132
1133 // 3. Compact the auxiliary indices so name/kind/qname/file
1134 // lookups do not return tombstoned NodeIds. Using the
1135 // NodeIdBearing surface keeps this step in lockstep with the
1136 // rebuild pipeline's step 4 — any future publish-visible
1137 // NodeId-bearing container added to the K.A/K.B matrix is
1138 // automatically swept here too.
1139 {
1140 let predicate: Box<dyn Fn(NodeId) -> bool + '_> = Box::new(|nid| !dead.contains(&nid));
1141 self.indices_mut().retain_nodes(&*predicate);
1142 self.macro_metadata_mut().retain_nodes(&*predicate);
1143 // The remaining K.A rows (node_provenance, scope_arena,
1144 // alias_table, shadow_table) need Arc::make_mut accessors —
1145 // they are wrapped in Arc at rest so this is where the
1146 // CoW clone happens on demand. Inline the Arc::make_mut
1147 // calls here (no public mut accessor exists today because
1148 // the sole writer has been the rebuild path; extend the
1149 // set by adding a similar inline call plus a K.A/K.B row
1150 // in `super::super::rebuild::coverage`).
1151 Arc::make_mut(&mut self.node_provenance).retain_nodes(&*predicate);
1152 Arc::make_mut(&mut self.scope_arena).retain_nodes(&*predicate);
1153 Arc::make_mut(&mut self.alias_table).retain_nodes(&*predicate);
1154 Arc::make_mut(&mut self.shadow_table).retain_nodes(&*predicate);
1155 }
1156
1157 tombstoned
1158 }
1159
1160 // ------------------------------------------------------------------
1161 // Gate 0c (A2 §H, Task 4) — RebuildGraph assembly path.
1162 // ------------------------------------------------------------------
1163
1164 /// Assemble a [`CodeGraph`] from owned rebuild-local parts produced
1165 /// by `RebuildGraph::finalize()` (defined in
1166 /// `super::super::rebuild::rebuild_graph`).
1167 ///
1168 /// This constructor is deliberately `pub(crate)` and named with a
1169 /// leading `__` so it is inaccessible from downstream crates even
1170 /// when the `rebuild-internals` feature is enabled: only code in
1171 /// `sqry-core` itself (specifically `RebuildGraph::finalize`) is
1172 /// permitted to call it. The trybuild fixture
1173 /// `sqry-core/tests/rebuild_internals_compile_fail/rebuild_graph_no_public_assembly.rs`
1174 /// proves there is no other path from `RebuildGraph` to
1175 /// `Arc<CodeGraph>`.
1176 ///
1177 /// The argument order mirrors the `CodeGraph` struct declaration
1178 /// order exactly; the public `clone_for_rebuild` → `finalize`
1179 /// round-trip uses the same macro-driven field enumeration so any
1180 /// new `CodeGraph` field automatically threads through this
1181 /// constructor as well.
1182 #[doc(hidden)]
1183 #[allow(clippy::too_many_arguments)]
1184 #[must_use]
1185 pub(crate) fn __assemble_from_rebuild_parts_internal(
1186 nodes: NodeArena,
1187 edges: BidirectionalEdgeStore,
1188 strings: StringInterner,
1189 files: FileRegistry,
1190 indices: AuxiliaryIndices,
1191 macro_metadata: NodeMetadataStore,
1192 node_provenance: NodeProvenanceStore,
1193 edge_provenance: EdgeProvenanceStore,
1194 fact_epoch: u64,
1195 epoch: u64,
1196 confidence: HashMap<String, ConfidenceMetadata>,
1197 scope_arena: ScopeArena,
1198 alias_table: AliasTable,
1199 shadow_table: ShadowTable,
1200 scope_provenance_store: ScopeProvenanceStore,
1201 file_segments: FileSegmentTable,
1202 go_hints: crate::graph::unified::build::staging::GoHints,
1203 ) -> Self {
1204 Self {
1205 nodes: Arc::new(nodes),
1206 edges: Arc::new(edges),
1207 strings: Arc::new(strings),
1208 files: Arc::new(files),
1209 indices: Arc::new(indices),
1210 macro_metadata: Arc::new(macro_metadata),
1211 node_provenance: Arc::new(node_provenance),
1212 edge_provenance: Arc::new(edge_provenance),
1213 fact_epoch,
1214 epoch,
1215 confidence,
1216 scope_arena: Arc::new(scope_arena),
1217 alias_table: Arc::new(alias_table),
1218 shadow_table: Arc::new(shadow_table),
1219 scope_provenance_store: Arc::new(scope_provenance_store),
1220 file_segments: Arc::new(file_segments),
1221 c_indirect_tables: None,
1222 go_hints,
1223 // A rebuild reparses source files, regenerating is_definition fresh,
1224 // so the reassembled graph always carries genuine signal.
1225 definition_signal_present: true,
1226 }
1227 }
1228
1229 // ------------------------------------------------------------------
1230 // Gate 0c (A2 §F) — publish-boundary debug invariants.
1231 //
1232 // These checks fire only in debug / test builds; release builds
1233 // compile them out. They are called from
1234 // `RebuildGraph::finalize()` steps 13 and 14 (the single source of
1235 // truth for the residue check, per §F and §H agreement). Gate 0d
1236 // will additionally wire the bijection check into
1237 // `build_and_persist_graph`, `WorkspaceManager::publish_graph`, and
1238 // every §E equivalence-harness run.
1239 // ------------------------------------------------------------------
1240
1241 /// Assert the bijective bucket-membership invariant (A2 §F.1).
1242 ///
1243 /// Four conditions must hold simultaneously:
1244 /// (a) every `NodeId` inside any `per_file_nodes` bucket maps to a
1245 /// live arena slot;
1246 /// (b) every `NodeId` appears in exactly one bucket (no duplicates
1247 /// across buckets, no duplicates within a bucket);
1248 /// (c) the bucket's `FileId` matches the node's own `file` field on
1249 /// `NodeEntry`;
1250 /// (d) when at least one bucket is populated, every live node in
1251 /// the arena is accounted for by some bucket.
1252 ///
1253 /// Condition (d) is guarded on `!seen.is_empty()` so an empty-graph
1254 /// (no recorded buckets) publish boundary is vacuously consistent:
1255 /// legacy V7 snapshots, fresh `CodeGraph::new()` instances, and
1256 /// rebuilds on graphs that predate Gate 0c's parallel-commit
1257 /// bucketing must not panic. Once any bucket is populated, every
1258 /// live arena slot must appear in a bucket.
1259 ///
1260 /// Iter-2 B2 (verbatim): this check used to be documented as
1261 /// "vacuous until future `per_file_nodes` work lands". Pulling
1262 /// base-plan Step 1 into Gate 0c retires that phrasing — the check
1263 /// is non-vacuous the moment parallel-parse commits nodes, which
1264 /// happens on every real build. The `!seen.is_empty()` guard now
1265 /// exists solely for the empty-graph corner case, not as a phased-
1266 /// delivery deferral.
1267 ///
1268 /// The check is a no-op in release builds. Panics with a
1269 /// descriptive message on violation. This is intentional: publish-
1270 /// boundary violations are programmer errors that must surface
1271 /// loudly during CI / test runs.
1272 ///
1273 /// # Panics
1274 ///
1275 /// Panics in debug/test builds when bucket membership is duplicated, points
1276 /// at a dead node, assigns a node to the wrong file, or omits a live node
1277 /// after buckets have been populated.
1278 #[cfg(any(debug_assertions, test))]
1279 pub fn assert_bucket_bijection(&self) {
1280 use std::collections::HashMap as StdHashMap;
1281 // (a) + (b) + (c): every bucketed node is live, unique across
1282 // buckets, and the bucket's FileId matches the node's own file.
1283 let mut seen: StdHashMap<
1284 crate::graph::unified::node::NodeId,
1285 crate::graph::unified::file::FileId,
1286 > = StdHashMap::new();
1287 let mut any_bucket_populated = false;
1288 for (file_id, bucket) in self.files.per_file_nodes_for_gate0d() {
1289 if !bucket.is_empty() {
1290 any_bucket_populated = true;
1291 }
1292 // Local dedup guard within the bucket itself. `retain_nodes_in_buckets`
1293 // dedups during finalize step 6, but we re-check here so the
1294 // invariant covers non-finalize publish paths too (e.g. if a
1295 // future pipeline builds a graph without routing through
1296 // `RebuildGraph::finalize`).
1297 let mut within_bucket: std::collections::HashSet<crate::graph::unified::node::NodeId> =
1298 std::collections::HashSet::new();
1299 for node_id in bucket {
1300 assert!(
1301 within_bucket.insert(node_id),
1302 "assert_bucket_bijection: duplicate node {node_id:?} inside bucket {file_id:?}"
1303 );
1304 assert!(
1305 self.nodes.get(node_id).is_some(),
1306 "assert_bucket_bijection: dead node {node_id:?} in bucket {file_id:?}"
1307 );
1308 let prior = seen.insert(node_id, file_id);
1309 assert!(
1310 prior.is_none(),
1311 "assert_bucket_bijection: node {node_id:?} in multiple buckets: \
1312 prior={prior:?}, current={file_id:?}"
1313 );
1314 if let Some(entry) = self.nodes.get(node_id) {
1315 assert_eq!(
1316 entry.file, file_id,
1317 "assert_bucket_bijection: node {node_id:?} misfiled: in bucket \
1318 {file_id:?}, actual {:?}",
1319 entry.file
1320 );
1321 }
1322 }
1323 }
1324 // (d): every live node is accounted for by `seen` once buckets
1325 // are populated. The guard keeps legacy-empty-graph boundaries
1326 // vacuously consistent (see docs above).
1327 if any_bucket_populated {
1328 for (node_id, _entry) in self.nodes.iter() {
1329 assert!(
1330 seen.contains_key(&node_id),
1331 "assert_bucket_bijection: live node {node_id:?} absent from all buckets"
1332 );
1333 }
1334 }
1335 }
1336
1337 /// Assert the pre-reuse tombstone-residue invariant (A2 §F.2).
1338 ///
1339 /// Iterates every publish-visible NodeId-bearing structure on
1340 /// `self` and panics if any contains a node in `dead`. Called from
1341 /// `RebuildGraph::finalize()` step 14 against the set drained at
1342 /// step 8 — exactly one site per the plan's §F / §H agreement.
1343 ///
1344 /// No-op when `dead` is empty or in release builds.
1345 ///
1346 /// # Panics
1347 ///
1348 /// Panics in debug/test builds if any publish-visible node-bearing
1349 /// structure still references a tombstoned node.
1350 #[cfg(any(debug_assertions, test))]
1351 pub fn assert_no_tombstone_residue_for<S: std::hash::BuildHasher>(
1352 &self,
1353 dead: &std::collections::HashSet<crate::graph::unified::node::NodeId, S>,
1354 ) {
1355 use super::super::rebuild::coverage::NodeIdBearing;
1356 if dead.is_empty() {
1357 return;
1358 }
1359 // Every K.A/K.B row must be inspected per §F.2.
1360 for nid in self.nodes.all_node_ids() {
1361 assert!(
1362 !dead.contains(&nid),
1363 "assert_no_tombstone_residue: tombstone {nid:?} still in NodeArena"
1364 );
1365 }
1366 for nid in self.indices.all_node_ids() {
1367 assert!(
1368 !dead.contains(&nid),
1369 "assert_no_tombstone_residue: tombstone {nid:?} still in auxiliary indices"
1370 );
1371 }
1372 for nid in self.edges.all_node_ids() {
1373 assert!(
1374 !dead.contains(&nid),
1375 "assert_no_tombstone_residue: tombstone {nid:?} still in edge store"
1376 );
1377 }
1378 for nid in self.macro_metadata.all_node_ids() {
1379 assert!(
1380 !dead.contains(&nid),
1381 "assert_no_tombstone_residue: tombstone {nid:?} still in macro metadata"
1382 );
1383 }
1384 for nid in self.node_provenance.all_node_ids() {
1385 assert!(
1386 !dead.contains(&nid),
1387 "assert_no_tombstone_residue: tombstone {nid:?} still in node provenance"
1388 );
1389 }
1390 for nid in self.scope_arena.all_node_ids() {
1391 assert!(
1392 !dead.contains(&nid),
1393 "assert_no_tombstone_residue: tombstone {nid:?} still in scope arena"
1394 );
1395 }
1396 for nid in self.alias_table.all_node_ids() {
1397 assert!(
1398 !dead.contains(&nid),
1399 "assert_no_tombstone_residue: tombstone {nid:?} still in alias table"
1400 );
1401 }
1402 for nid in self.shadow_table.all_node_ids() {
1403 assert!(
1404 !dead.contains(&nid),
1405 "assert_no_tombstone_residue: tombstone {nid:?} still in shadow table"
1406 );
1407 }
1408 for nid in self.files.all_node_ids() {
1409 assert!(
1410 !dead.contains(&nid),
1411 "assert_no_tombstone_residue: tombstone {nid:?} still in per-file bucket"
1412 );
1413 }
1414 }
1415}
1416
1417impl Default for CodeGraph {
1418 fn default() -> Self {
1419 Self::new()
1420 }
1421}
1422
1423impl fmt::Debug for CodeGraph {
1424 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1425 f.debug_struct("CodeGraph")
1426 .field("nodes", &self.nodes.len())
1427 .field("epoch", &self.epoch)
1428 .finish_non_exhaustive()
1429 }
1430}
1431
1432impl GraphMemorySize for CodeGraph {
1433 /// Estimates the total heap bytes owned by this `CodeGraph`.
1434 ///
1435 /// Sums heap usage across every component the graph owns: node arena,
1436 /// bidirectional edge store (forward + reverse CSR + tombstones + delta
1437 /// buffer), string interner, file registry, auxiliary indices, sparse
1438 /// macro/classpath metadata, provenance stores, and the per-language
1439 /// confidence map. Used by the `sqryd` daemon's admission controller
1440 /// and workspace retention reaper to enforce memory budgets.
1441 fn heap_bytes(&self) -> usize {
1442 let mut confidence_bytes = self.confidence.capacity()
1443 * (std::mem::size_of::<String>()
1444 + std::mem::size_of::<ConfidenceMetadata>()
1445 + HASHMAP_ENTRY_OVERHEAD);
1446 for (key, meta) in &self.confidence {
1447 confidence_bytes += key.capacity();
1448 // `ConfidenceMetadata.limitations` / `unavailable_features` are
1449 // `Vec<String>`; charge their spill and inner String payloads.
1450 confidence_bytes += meta.limitations.capacity() * std::mem::size_of::<String>();
1451 for s in &meta.limitations {
1452 confidence_bytes += s.capacity();
1453 }
1454 confidence_bytes +=
1455 meta.unavailable_features.capacity() * std::mem::size_of::<String>();
1456 for s in &meta.unavailable_features {
1457 confidence_bytes += s.capacity();
1458 }
1459 }
1460
1461 self.nodes.heap_bytes()
1462 + self.edges.heap_bytes()
1463 + self.strings.heap_bytes()
1464 + self.files.heap_bytes()
1465 + self.indices.heap_bytes()
1466 + self.macro_metadata.heap_bytes()
1467 + self.node_provenance.heap_bytes()
1468 + self.edge_provenance.heap_bytes()
1469 + confidence_bytes
1470 + self.file_segments.capacity()
1471 * std::mem::size_of::<Option<crate::graph::unified::storage::segment::FileSegment>>(
1472 )
1473 }
1474}
1475
1476/// Thread-safe wrapper for `CodeGraph` with epoch versioning.
1477///
1478/// `ConcurrentCodeGraph` provides MVCC-style concurrency:
1479/// - Multiple readers can access the graph simultaneously
1480/// - Only one writer can hold the lock at a time
1481/// - Each write operation increments the epoch for cursor invalidation
1482///
1483/// # Design
1484///
1485/// The wrapper uses `parking_lot::RwLock` for efficient locking:
1486/// - Fair scheduling prevents writer starvation
1487/// - No poisoning (unlike `std::sync::RwLock`)
1488/// - Faster lock/unlock operations
1489///
1490/// # Phase 2 binding-plane access
1491///
1492/// Use the three-line snapshot pattern to access `BindingPlane`:
1493///
1494/// ```rust,ignore
1495/// let read_guard = concurrent.read();
1496/// let snapshot = read_guard.snapshot();
1497/// let plane = snapshot.binding_plane();
1498/// let resolution = plane.resolve(&query);
1499/// ```
1500///
1501/// The explicit snapshot handle makes the MVCC lifetime visible at the call
1502/// site. The full Phase 2 scope/alias/shadow and witness-bearing resolution
1503/// API is exposed through `BindingPlane`.
1504///
1505/// # Usage
1506///
1507/// ```rust
1508/// use sqry_core::graph::unified::concurrent::ConcurrentCodeGraph;
1509///
1510/// let graph = ConcurrentCodeGraph::new();
1511///
1512/// // Read access (multiple readers allowed)
1513/// {
1514/// let guard = graph.read();
1515/// let _nodes = guard.nodes();
1516/// }
1517///
1518/// // Write access (exclusive)
1519/// {
1520/// let mut guard = graph.write();
1521/// let _nodes = guard.nodes_mut();
1522/// }
1523///
1524/// // Snapshot for long queries
1525/// let snapshot = graph.snapshot();
1526/// ```
1527pub struct ConcurrentCodeGraph {
1528 /// The underlying code graph protected by a read-write lock.
1529 inner: RwLock<CodeGraph>,
1530 /// Global epoch counter for cursor validation.
1531 epoch: AtomicU64,
1532}
1533
1534impl ConcurrentCodeGraph {
1535 /// Creates a new empty `ConcurrentCodeGraph`.
1536 #[must_use]
1537 pub fn new() -> Self {
1538 Self {
1539 inner: RwLock::new(CodeGraph::new()),
1540 epoch: AtomicU64::new(0),
1541 }
1542 }
1543
1544 /// Creates a `ConcurrentCodeGraph` from an existing `CodeGraph`.
1545 #[must_use]
1546 pub fn from_graph(graph: CodeGraph) -> Self {
1547 let epoch = graph.epoch();
1548 Self {
1549 inner: RwLock::new(graph),
1550 epoch: AtomicU64::new(epoch),
1551 }
1552 }
1553
1554 /// Acquires a read lock on the graph.
1555 ///
1556 /// Multiple readers can hold the lock simultaneously.
1557 /// This does not increment the epoch.
1558 #[inline]
1559 pub fn read(&self) -> RwLockReadGuard<'_, CodeGraph> {
1560 self.inner.read()
1561 }
1562
1563 /// Acquires a write lock on the graph.
1564 ///
1565 /// Only one writer can hold the lock at a time.
1566 /// This increments the global epoch counter.
1567 #[inline]
1568 pub fn write(&self) -> RwLockWriteGuard<'_, CodeGraph> {
1569 // Increment the global epoch
1570 self.epoch.fetch_add(1, Ordering::SeqCst);
1571 let mut guard = self.inner.write();
1572 // Sync the inner graph's epoch with the global epoch
1573 guard.set_epoch(self.epoch.load(Ordering::SeqCst));
1574 guard
1575 }
1576
1577 /// Returns the current global epoch.
1578 ///
1579 /// This can be used to detect if the graph has been modified
1580 /// since a previous operation (cursor invalidation).
1581 #[inline]
1582 #[must_use]
1583 pub fn epoch(&self) -> u64 {
1584 self.epoch.load(Ordering::SeqCst)
1585 }
1586
1587 /// Creates a cheap snapshot of the graph.
1588 ///
1589 /// This acquires a brief read lock to clone the Arc references.
1590 /// The snapshot is isolated from future mutations.
1591 #[must_use]
1592 pub fn snapshot(&self) -> GraphSnapshot {
1593 self.inner.read().snapshot()
1594 }
1595
1596 // ------------------------------------------------------------------
1597 // Phase 1 fact-layer provenance convenience accessors.
1598 // These acquire a brief read lock, matching the snapshot() pattern.
1599 // ------------------------------------------------------------------
1600
1601 /// Returns the monotonic fact-layer epoch from the underlying graph.
1602 #[must_use]
1603 pub fn fact_epoch(&self) -> u64 {
1604 self.inner.read().fact_epoch()
1605 }
1606
1607 /// Looks up node provenance by `NodeId` (acquires a brief read lock).
1608 #[must_use]
1609 pub fn node_provenance(
1610 &self,
1611 id: crate::graph::unified::node::id::NodeId,
1612 ) -> Option<NodeProvenance> {
1613 self.inner.read().node_provenance(id).copied()
1614 }
1615
1616 /// Looks up edge provenance by `EdgeId` (acquires a brief read lock).
1617 #[must_use]
1618 pub fn edge_provenance(
1619 &self,
1620 id: crate::graph::unified::edge::id::EdgeId,
1621 ) -> Option<EdgeProvenance> {
1622 self.inner.read().edge_provenance(id).copied()
1623 }
1624
1625 /// Returns a file provenance view (acquires a brief read lock).
1626 ///
1627 /// Returns an owned copy since the borrow cannot outlive the lock guard.
1628 #[must_use]
1629 pub fn file_provenance(
1630 &self,
1631 id: crate::graph::unified::file::id::FileId,
1632 ) -> Option<OwnedFileProvenanceView> {
1633 let guard = self.inner.read();
1634 guard.file_provenance(id).map(|v| OwnedFileProvenanceView {
1635 content_hash: *v.content_hash,
1636 indexed_at: v.indexed_at,
1637 source_uri: v.source_uri,
1638 is_external: v.is_external,
1639 })
1640 }
1641
1642 // ------------------------------------------------------------------
1643 // Phase 2 binding-plane accessors (P2U03).
1644 // ------------------------------------------------------------------
1645
1646 /// Returns the scope arena from the underlying graph (acquires a brief
1647 /// read lock).
1648 ///
1649 /// Returns an `Arc` clone so the caller does not hold the lock beyond
1650 /// this call site.
1651 #[must_use]
1652 pub fn scope_arena(&self) -> Arc<ScopeArena> {
1653 Arc::clone(&self.inner.read().scope_arena)
1654 }
1655
1656 /// Returns the alias table from the underlying graph (acquires a brief
1657 /// read lock).
1658 ///
1659 /// Returns an `Arc` clone so the caller does not hold the lock beyond
1660 /// this call site.
1661 #[must_use]
1662 pub fn alias_table(&self) -> Arc<AliasTable> {
1663 Arc::clone(&self.inner.read().alias_table)
1664 }
1665
1666 /// Returns the shadow table from the underlying graph (acquires a brief
1667 /// read lock).
1668 ///
1669 /// Returns an `Arc` clone so the caller does not hold the lock beyond
1670 /// this call site.
1671 #[must_use]
1672 pub fn shadow_table(&self) -> Arc<ShadowTable> {
1673 Arc::clone(&self.inner.read().shadow_table)
1674 }
1675
1676 /// Returns the scope provenance store from the underlying graph (acquires
1677 /// a brief read lock).
1678 ///
1679 /// Returns an `Arc` clone so the caller does not hold the lock beyond
1680 /// this call site.
1681 #[must_use]
1682 pub fn scope_provenance_store(&self) -> Arc<ScopeProvenanceStore> {
1683 Arc::clone(&self.inner.read().scope_provenance_store)
1684 }
1685
1686 /// Looks up scope provenance by `ScopeId` (acquires a brief read lock).
1687 ///
1688 /// Returns an owned copy since the borrow cannot outlive the lock guard.
1689 #[must_use]
1690 pub fn scope_provenance(&self, id: ScopeId) -> Option<ScopeProvenance> {
1691 self.inner.read().scope_provenance(id).cloned()
1692 }
1693
1694 /// Looks up the live `ScopeId` for a stable scope identity (acquires a
1695 /// brief read lock).
1696 ///
1697 /// Returns `None` if no provenance record is registered for that stable id.
1698 #[must_use]
1699 pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
1700 self.inner.read().scope_by_stable_id(stable)
1701 }
1702
1703 /// Returns the file segment table from the underlying graph (acquires a
1704 /// brief read lock).
1705 #[must_use]
1706 pub fn file_segments(&self) -> Arc<FileSegmentTable> {
1707 Arc::clone(&self.inner.read().file_segments)
1708 }
1709
1710 /// Attempts to acquire a read lock without blocking.
1711 ///
1712 /// Returns `None` if the lock is currently held exclusively.
1713 #[inline]
1714 #[must_use]
1715 pub fn try_read(&self) -> Option<RwLockReadGuard<'_, CodeGraph>> {
1716 self.inner.try_read()
1717 }
1718
1719 /// Attempts to acquire a write lock without blocking.
1720 ///
1721 /// Returns `None` if the lock is currently held by another thread.
1722 /// If successful, increments the epoch.
1723 #[inline]
1724 pub fn try_write(&self) -> Option<RwLockWriteGuard<'_, CodeGraph>> {
1725 self.inner.try_write().map(|mut guard| {
1726 self.epoch.fetch_add(1, Ordering::SeqCst);
1727 guard.set_epoch(self.epoch.load(Ordering::SeqCst));
1728 guard
1729 })
1730 }
1731}
1732
1733impl Default for ConcurrentCodeGraph {
1734 fn default() -> Self {
1735 Self::new()
1736 }
1737}
1738
1739impl fmt::Debug for ConcurrentCodeGraph {
1740 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1741 f.debug_struct("ConcurrentCodeGraph")
1742 .field("epoch", &self.epoch.load(Ordering::SeqCst))
1743 .finish_non_exhaustive()
1744 }
1745}
1746
1747/// Owned copy of file provenance, returned by [`ConcurrentCodeGraph::file_provenance`]
1748/// because the borrowed [`FileProvenanceView`] cannot outlive the read-lock guard.
1749#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1750pub struct OwnedFileProvenanceView {
1751 /// SHA-256 of the on-disk file bytes (owned copy).
1752 pub content_hash: [u8; 32],
1753 /// Unix-epoch seconds at which this file was registered.
1754 pub indexed_at: u64,
1755 /// Optional interned physical origin URI.
1756 pub source_uri: Option<StringId>,
1757 /// Whether this file originates from an external source.
1758 pub is_external: bool,
1759}
1760
1761/// Immutable snapshot of a `CodeGraph` for long-running queries.
1762///
1763/// `GraphSnapshot` holds Arc references to the graph components,
1764/// providing a consistent view that is isolated from concurrent mutations.
1765///
1766/// # Design
1767///
1768/// Snapshots are created via `CodeGraph::snapshot()` or
1769/// `ConcurrentCodeGraph::snapshot()`. They are:
1770///
1771/// - **Immutable**: No mutation methods available
1772/// - **Isolated**: Independent of future graph mutations
1773/// - **Cheap**: Only Arc clones, no data copying
1774/// - **Self-contained**: Can outlive the original graph/lock
1775///
1776/// # Usage
1777///
1778/// ```rust
1779/// use sqry_core::graph::unified::concurrent::{ConcurrentCodeGraph, GraphSnapshot};
1780///
1781/// let graph = ConcurrentCodeGraph::new();
1782///
1783/// // Create snapshot for a long query
1784/// let snapshot: GraphSnapshot = graph.snapshot();
1785///
1786/// // Snapshot can be used independently
1787/// let _epoch = snapshot.epoch();
1788/// ```
1789#[derive(Clone)]
1790pub struct GraphSnapshot {
1791 /// Node storage snapshot.
1792 nodes: Arc<NodeArena>,
1793 /// Edge storage snapshot.
1794 edges: Arc<BidirectionalEdgeStore>,
1795 /// String interner snapshot.
1796 strings: Arc<StringInterner>,
1797 /// File registry snapshot.
1798 files: Arc<FileRegistry>,
1799 /// Auxiliary indices snapshot.
1800 indices: Arc<AuxiliaryIndices>,
1801 /// Sparse macro boundary metadata snapshot.
1802 macro_metadata: Arc<NodeMetadataStore>,
1803 /// Dense node provenance snapshot (Phase 1).
1804 node_provenance: Arc<NodeProvenanceStore>,
1805 /// Dense edge provenance snapshot (Phase 1).
1806 edge_provenance: Arc<EdgeProvenanceStore>,
1807 /// Monotonic fact-layer epoch at snapshot time.
1808 fact_epoch: u64,
1809 /// Epoch at snapshot time (for cursor validation).
1810 epoch: u64,
1811 /// Phase 2 binding-plane scope arena snapshot (populated by Phase 4e).
1812 scope_arena: Arc<ScopeArena>,
1813 /// Phase 2 binding-plane alias table snapshot (populated by Phase 4e / P2U04).
1814 alias_table: Arc<AliasTable>,
1815 /// Phase 2 binding-plane shadow table snapshot (populated by Phase 4e / P2U05).
1816 shadow_table: Arc<ShadowTable>,
1817 /// Phase 2 binding-plane scope provenance store snapshot (populated by Phase 4e / P2U11).
1818 scope_provenance_store: Arc<ScopeProvenanceStore>,
1819 /// Phase 3 file segment table snapshot mapping `FileId` to node ranges.
1820 file_segments: Arc<FileSegmentTable>,
1821 /// Phase A (U09): C indirect-call resolver side tables snapshot.
1822 ///
1823 /// `None` on non-C workspaces. The snapshot owns its own clone of the
1824 /// `Option<CIndirectSideTables>` so concurrent readers see a stable
1825 /// view independent of subsequent mutations to the source `CodeGraph`.
1826 c_indirect_tables: Option<CIndirectSideTables>,
1827 /// Whether this snapshot carries genuine `NodeEntry.is_definition` signal
1828 /// (R3 marker). Copied from the source [`CodeGraph`] at snapshot time. See
1829 /// [`CodeGraph::definition_signal_present`] for the contract.
1830 definition_signal_present: bool,
1831}
1832
1833impl GraphSnapshot {
1834 /// Returns whether this snapshot carries genuine `is_definition` signal.
1835 ///
1836 /// See [`CodeGraph::definition_signal_present`] for the full contract.
1837 #[inline]
1838 #[must_use]
1839 pub fn definition_signal_present(&self) -> bool {
1840 self.definition_signal_present
1841 }
1842
1843 /// Returns a reference to the node arena.
1844 #[inline]
1845 #[must_use]
1846 pub fn nodes(&self) -> &NodeArena {
1847 &self.nodes
1848 }
1849
1850 /// Returns a reference to the bidirectional edge store.
1851 #[inline]
1852 #[must_use]
1853 pub fn edges(&self) -> &BidirectionalEdgeStore {
1854 &self.edges
1855 }
1856
1857 /// Returns a reference to the string interner.
1858 #[inline]
1859 #[must_use]
1860 pub fn strings(&self) -> &StringInterner {
1861 &self.strings
1862 }
1863
1864 /// Returns a reference to the file registry.
1865 #[inline]
1866 #[must_use]
1867 pub fn files(&self) -> &FileRegistry {
1868 &self.files
1869 }
1870
1871 /// Returns a reference to the auxiliary indices.
1872 #[inline]
1873 #[must_use]
1874 pub fn indices(&self) -> &AuxiliaryIndices {
1875 &self.indices
1876 }
1877
1878 /// Returns a reference to the macro boundary metadata store.
1879 #[inline]
1880 #[must_use]
1881 pub fn macro_metadata(&self) -> &NodeMetadataStore {
1882 &self.macro_metadata
1883 }
1884
1885 /// Returns a reference to the C indirect-call side tables, if any.
1886 ///
1887 /// Mirrors [`CodeGraph::c_indirect_tables`]; see that method for
1888 /// the contract. Snapshot-level access is read-only.
1889 #[inline]
1890 #[must_use]
1891 pub fn c_indirect_tables(&self) -> Option<&CIndirectSideTables> {
1892 self.c_indirect_tables.as_ref()
1893 }
1894
1895 // ------------------------------------------------------------------
1896 // Phase 1 fact-layer provenance accessors (P1U09).
1897 // ------------------------------------------------------------------
1898
1899 /// Returns the monotonic fact-layer epoch.
1900 #[inline]
1901 #[must_use]
1902 pub fn fact_epoch(&self) -> u64 {
1903 self.fact_epoch
1904 }
1905
1906 /// Looks up node provenance by `NodeId`.
1907 #[inline]
1908 #[must_use]
1909 pub fn node_provenance(
1910 &self,
1911 id: crate::graph::unified::node::id::NodeId,
1912 ) -> Option<&NodeProvenance> {
1913 self.node_provenance.lookup(id)
1914 }
1915
1916 /// Looks up edge provenance by `EdgeId`.
1917 #[inline]
1918 #[must_use]
1919 pub fn edge_provenance(
1920 &self,
1921 id: crate::graph::unified::edge::id::EdgeId,
1922 ) -> Option<&EdgeProvenance> {
1923 self.edge_provenance.lookup(id)
1924 }
1925
1926 /// Returns a borrowed provenance view for a file.
1927 #[inline]
1928 #[must_use]
1929 pub fn file_provenance(
1930 &self,
1931 id: crate::graph::unified::file::id::FileId,
1932 ) -> Option<FileProvenanceView<'_>> {
1933 self.files.file_provenance(id)
1934 }
1935
1936 // ------------------------------------------------------------------
1937 // Phase 2 binding-plane accessors (P2U03).
1938 // ------------------------------------------------------------------
1939
1940 /// Returns a reference to the scope arena at snapshot time.
1941 ///
1942 /// Participates in MVCC: the snapshot holds an `Arc` clone of the arena
1943 /// as it existed when `snapshot()` was called. Subsequent calls to
1944 /// `set_scope_arena` on the source `CodeGraph` do not affect this view.
1945 #[inline]
1946 #[must_use]
1947 pub fn scope_arena(&self) -> &ScopeArena {
1948 &self.scope_arena
1949 }
1950
1951 /// Returns a reference to the alias table at snapshot time.
1952 ///
1953 /// Participates in MVCC: the snapshot holds an `Arc` clone of the table
1954 /// as it existed when `snapshot()` was called. Subsequent calls to
1955 /// `set_alias_table` on the source `CodeGraph` do not affect this view.
1956 #[inline]
1957 #[must_use]
1958 pub fn alias_table(&self) -> &AliasTable {
1959 &self.alias_table
1960 }
1961
1962 /// Returns a reference to the shadow table at snapshot time.
1963 ///
1964 /// Participates in MVCC: the snapshot holds an `Arc` clone of the table
1965 /// as it existed when `snapshot()` was called. Subsequent calls to
1966 /// `set_shadow_table` on the source `CodeGraph` do not affect this view.
1967 #[inline]
1968 #[must_use]
1969 pub fn shadow_table(&self) -> &ShadowTable {
1970 &self.shadow_table
1971 }
1972
1973 /// Returns a reference to the scope provenance store at snapshot time.
1974 ///
1975 /// Participates in MVCC: the snapshot holds an `Arc` clone of the store
1976 /// as it existed when `snapshot()` was called. Subsequent calls to
1977 /// `set_scope_provenance_store` on the source `CodeGraph` do not affect
1978 /// this view.
1979 #[inline]
1980 #[must_use]
1981 pub fn scope_provenance_store(&self) -> &ScopeProvenanceStore {
1982 &self.scope_provenance_store
1983 }
1984
1985 /// Looks up scope provenance by `ScopeId` at snapshot time.
1986 ///
1987 /// Returns `None` if the slot is out of range, vacant, or the stored
1988 /// generation does not match (stale handle).
1989 #[inline]
1990 #[must_use]
1991 pub fn scope_provenance(&self, id: ScopeId) -> Option<&ScopeProvenance> {
1992 self.scope_provenance_store.lookup(id)
1993 }
1994
1995 /// Looks up the live `ScopeId` for a stable scope identity at snapshot time.
1996 ///
1997 /// Returns `None` if no provenance record is registered for that stable id.
1998 #[inline]
1999 #[must_use]
2000 pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
2001 self.scope_provenance_store.scope_by_stable_id(stable)
2002 }
2003
2004 /// Returns a reference to the file segment table at snapshot time.
2005 #[inline]
2006 #[must_use]
2007 pub fn file_segments(&self) -> &FileSegmentTable {
2008 &self.file_segments
2009 }
2010
2011 /// Returns the epoch at which this snapshot was taken.
2012 ///
2013 /// This can be compared against the current graph epoch to
2014 /// detect if the graph has changed since the snapshot.
2015 #[inline]
2016 #[must_use]
2017 pub fn epoch(&self) -> u64 {
2018 self.epoch
2019 }
2020
2021 /// Returns `true` if this snapshot's epoch matches the given epoch.
2022 ///
2023 /// Use this to validate cursors before continuing pagination.
2024 #[inline]
2025 #[must_use]
2026 pub fn epoch_matches(&self, other_epoch: u64) -> bool {
2027 self.epoch == other_epoch
2028 }
2029
2030 // ------------------------------------------------------------------
2031 // Phase 2 binding-plane facade accessor (P2U07).
2032 // ------------------------------------------------------------------
2033
2034 /// Returns a [`BindingPlane`] facade borrowing this snapshot's lifetime.
2035 ///
2036 /// The facade is the stable Phase 2 public API for scope/alias/shadow
2037 /// queries and witness-bearing resolution. It provides a single entry
2038 /// point (`resolve`) that returns both a `BindingResult` and an ordered
2039 /// step trace in a `BindingResolution`.
2040 ///
2041 /// # MVCC note
2042 ///
2043 /// `BindingPlane<'_>` borrows from this snapshot, which is already an
2044 /// MVCC-consistent view of the graph at snapshot time. Callers from
2045 /// `CodeGraph` or `ConcurrentCodeGraph` should follow the two-line
2046 /// pattern so the snapshot lifetime is explicit:
2047 ///
2048 /// ```rust,ignore
2049 /// // CodeGraph caller:
2050 /// let snapshot = graph.snapshot();
2051 /// let plane = snapshot.binding_plane();
2052 ///
2053 /// // ConcurrentCodeGraph caller:
2054 /// let read_guard = concurrent.read();
2055 /// let snapshot = read_guard.snapshot();
2056 /// let plane = snapshot.binding_plane();
2057 /// ```
2058 #[inline]
2059 #[must_use]
2060 pub fn binding_plane(&self) -> crate::graph::unified::bind::plane::BindingPlane<'_> {
2061 crate::graph::unified::bind::plane::BindingPlane::new(self)
2062 }
2063
2064 // ============================================================================
2065 // Query Methods
2066 // ============================================================================
2067
2068 /// Finds nodes matching a pattern.
2069 ///
2070 /// Performs a simple substring match on node names and qualified names.
2071 /// Returns all matching node IDs.
2072 ///
2073 /// **Synthetic suppression (`C_SUPPRESS`):** synthetic placeholder
2074 /// nodes — internal scaffolding the language plugins emit for
2075 /// binding-plane and scope analysis (e.g. the Go plugin's
2076 /// `<field:operand.field>` field-access shadows and the
2077 /// `<ident>@<offset>` per-binding-site Variable nodes from the
2078 /// local-scope resolver) — are filtered out by default. Internal
2079 /// callers that need to reach these nodes (binding plane, scope /
2080 /// alias / shadow analysis) use
2081 /// [`Self::find_by_pattern_with_options`] with
2082 /// `include_synthetic = true`.
2083 ///
2084 /// # Performance
2085 ///
2086 /// Optimized to iterate over unique strings in the interner (smaller set)
2087 /// rather than all nodes in the arena.
2088 ///
2089 /// # Arguments
2090 ///
2091 /// * `pattern` - The pattern to match (substring search)
2092 ///
2093 /// # Returns
2094 ///
2095 /// A vector of `NodeIds` for all matching nodes (synthetic
2096 /// placeholders excluded).
2097 #[must_use]
2098 pub fn find_by_pattern(&self, pattern: &str) -> Vec<crate::graph::unified::node::NodeId> {
2099 self.find_by_pattern_with_options(pattern, false)
2100 }
2101
2102 /// Finds nodes matching a pattern with explicit control over synthetic
2103 /// placeholder visibility.
2104 ///
2105 /// `include_synthetic = false` is the default surface used by every
2106 /// user-facing caller (CLI `search`, MCP `semantic_search` /
2107 /// `pattern_search` / `relation_query`, etc.). Synthetic
2108 /// placeholders are suppressed via two parallel checks that must
2109 /// agree:
2110 ///
2111 /// 1. The authoritative `NodeFlags::SYNTHETIC` bit on the
2112 /// metadata store
2113 /// ([`crate::graph::unified::storage::metadata::NodeMetadataStore::is_synthetic`]).
2114 /// 2. The structural name-shape fallback
2115 /// ([`crate::graph::unified::storage::arena::NodeEntry::is_synthetic_placeholder_name`])
2116 /// for V10 snapshots written before the synthetic bit existed
2117 /// and for cross-file unification losers that retained their
2118 /// name but lost their metadata entry.
2119 ///
2120 /// Either check matching is sufficient to suppress the node. The
2121 /// design lives in `docs/development/public-issue-triage/`
2122 /// under the `C_SUPPRESS` unit; see also the rationale in
2123 /// [`crate::graph::unified::storage::metadata::NodeFlags::SYNTHETIC`].
2124 ///
2125 /// `include_synthetic = true` is **internal-only**. The binding
2126 /// plane, scope resolver, and rebuild's coverage gate use this
2127 /// path to reach synthetic nodes for their structural integrity
2128 /// checks. **No CLI / MCP surface should ever pass `true`.**
2129 #[must_use]
2130 pub fn find_by_pattern_with_options(
2131 &self,
2132 pattern: &str,
2133 include_synthetic: bool,
2134 ) -> Vec<crate::graph::unified::node::NodeId> {
2135 let mut matches = Vec::new();
2136
2137 // 1. Scan unique strings in interner for matches
2138 for (str_id, s) in self.strings.iter() {
2139 if s.contains(pattern) {
2140 // 2. If string matches, look up all nodes with this name
2141 // Check qualified name index
2142 matches.extend_from_slice(self.indices.by_qualified_name(str_id));
2143 // Check simple name index
2144 matches.extend_from_slice(self.indices.by_name(str_id));
2145 }
2146 }
2147
2148 // Deduplicate matches (a node might match both qualified and simple name)
2149 matches.sort_unstable();
2150 matches.dedup();
2151
2152 if !include_synthetic {
2153 matches.retain(|&node_id| !self.is_node_synthetic(node_id));
2154 }
2155
2156 matches
2157 }
2158
2159 /// Finds nodes whose interned simple **or** qualified name equals `name`,
2160 /// accepting dot- and Ruby-`#` qualified display form as a fallback for
2161 /// graph-canonical `::` qualified names.
2162 ///
2163 /// This is the canonical surface for **exact-name** lookups —
2164 /// shared by the CLI `--exact <pattern>` shorthand
2165 /// (`sqry-cli/src/commands/search.rs::run_regular_search`) and the
2166 /// structural query planner's `name:` predicate
2167 /// (`sqry-db/src/planner/parse.rs`,
2168 /// `sqry-db/src/planner/execute.rs`). Both surfaces are
2169 /// contract-bound (DAG `B1_ALIGN`) to return the same set against
2170 /// any fixture: the CLI calls this method directly, while the
2171 /// planner uses the same interner + by-name index pair internally
2172 /// when scanning, then applies the same synthetic filter.
2173 ///
2174 /// **Synthetic suppression.** Synthetic placeholder nodes
2175 /// (Go-plugin `<field:operand.field>` shadows and
2176 /// `<ident>@<offset>` per-binding-site Variables; see
2177 /// [`Self::find_by_pattern_with_options`] for the full taxonomy)
2178 /// are excluded via [`Self::is_node_synthetic`]. There is **no**
2179 /// `include_synthetic = true` variant for the exact-match surface
2180 /// because the synthetic name shapes the structural fallback
2181 /// recognises (`<…>`, `…@<offset>`) cannot equal a user-typed
2182 /// name byte-for-byte; the metadata-bit channel is the only
2183 /// realistic leak vector and it is suppressed unconditionally.
2184 ///
2185 /// **Display fallback.** Exact lookup checks the literal input. When the
2186 /// input is qualified, language-aware display candidates win over raw
2187 /// canonical matches. This keeps native Rust `identity::T` from also
2188 /// returning a TypeScript type parameter whose internal canonical form is
2189 /// `identity::T` but whose display form is `identity.T`. If neither
2190 /// display nor literal lookup finds candidates, dot- and Ruby-`#`
2191 /// qualified inputs also check the graph-canonical `::` rewrite.
2192 ///
2193 /// # Performance
2194 ///
2195 /// `O(1)` interner lookup + `O(matches)` filter. If `name` is not
2196 /// interned the resolver still tries a native-display to graph-canonical
2197 /// `::` fallback for user-facing qualified names before returning empty.
2198 ///
2199 /// # Arguments
2200 ///
2201 /// * `name` - The exact name to look up (no glob, no regex).
2202 ///
2203 /// # Returns
2204 ///
2205 /// Sorted, deduplicated `NodeId`s for every non-synthetic node
2206 /// whose `entry.name`, `entry.qualified_name`, or language-aware display
2207 /// name equals `name`, or matches for the graph-canonical `::` rewrite
2208 /// when the user supplied a dot- or Ruby-`#` qualified name that had no
2209 /// literal or display candidates.
2210 #[must_use]
2211 pub fn find_by_exact_name(&self, name: &str) -> Vec<crate::graph::unified::node::NodeId> {
2212 let is_qualified = name.contains('.') || name.contains('#') || name.contains("::");
2213 if !is_qualified {
2214 // Bare name lookup: the interned `by_name` index covers
2215 // every language and is O(1); no display scan is needed.
2216 return self.find_by_exact_interned_name(name);
2217 }
2218 // Qualified name: each plugin stores its native form
2219 // (PHP `Ledger.promotedField`, Rust `Ledger::promotedField`,
2220 // Ruby `Ledger#promotedField`). Scan computed display names so
2221 // a single dotted user query resolves to all language matches,
2222 // then fall back to the interned form (and the graph-canonical
2223 // `::` rewrite) when display yields nothing.
2224 let mut exact_matches = self.find_by_exact_display_name(name);
2225 if exact_matches.is_empty() {
2226 exact_matches = self.find_by_exact_interned_name(name);
2227 if exact_matches.is_empty() && !name.contains("::") {
2228 let canonical = name.replace(['.', '#'], "::");
2229 exact_matches.extend(self.find_by_exact_interned_name(&canonical));
2230 exact_matches.sort_unstable();
2231 exact_matches.dedup();
2232 }
2233 }
2234 exact_matches
2235 }
2236
2237 fn find_by_exact_display_name(&self, name: &str) -> Vec<crate::graph::unified::node::NodeId> {
2238 let mut matches = self
2239 .iter_nodes()
2240 .filter(|(node_id, _)| !self.is_node_synthetic(*node_id))
2241 .filter_map(|(node_id, entry)| {
2242 let qualified = entry
2243 .qualified_name
2244 .and_then(|sid| self.strings.resolve(sid))?;
2245 let display = self.files.language_for_file(entry.file).map_or_else(
2246 || qualified.to_string(),
2247 |language| {
2248 display_graph_qualified_name(
2249 language,
2250 qualified.as_ref(),
2251 entry.kind,
2252 entry.is_static,
2253 )
2254 },
2255 );
2256 (display == name).then_some(node_id)
2257 })
2258 .collect::<Vec<_>>();
2259 matches.sort_unstable();
2260 matches.dedup();
2261 matches
2262 }
2263
2264 fn find_by_exact_interned_name(&self, name: &str) -> Vec<crate::graph::unified::node::NodeId> {
2265 let Some(str_id) = self.strings.get(name) else {
2266 return Vec::new();
2267 };
2268
2269 let mut matches: Vec<crate::graph::unified::node::NodeId> = Vec::new();
2270 matches.extend_from_slice(self.indices.by_name(str_id));
2271 matches.extend_from_slice(self.indices.by_qualified_name(str_id));
2272 matches.sort_unstable();
2273 matches.dedup();
2274 matches.retain(|&node_id| !self.is_node_synthetic(node_id));
2275 matches
2276 }
2277
2278 /// Returns `true` if the node should be treated as a synthetic
2279 /// placeholder for user-facing surfaces.
2280 ///
2281 /// Combines the metadata-store flag and the structural name-shape
2282 /// fallback (see [`Self::find_by_pattern_with_options`] for the
2283 /// full rationale). Returns `false` for missing nodes (an unknown
2284 /// `NodeId` is not "synthetic" — it is "not present").
2285 #[must_use]
2286 pub fn is_node_synthetic(&self, node_id: crate::graph::unified::node::NodeId) -> bool {
2287 // Authoritative check: metadata-store bit (NodeFlags::SYNTHETIC).
2288 if self.macro_metadata.is_synthetic(node_id) {
2289 return true;
2290 }
2291 // Structural fallback: name shape recognised as synthetic.
2292 // Required for V10 snapshots written before the bit existed,
2293 // for unification losers that lost their metadata entry, and
2294 // as defence-in-depth against future plugins forgetting to
2295 // flip the bit.
2296 let Some(entry) = self.nodes.get(node_id) else {
2297 return false;
2298 };
2299 if entry.is_unified_loser() {
2300 // Already invisible for other reasons; do not also flag as synthetic.
2301 return false;
2302 }
2303 let Some(name) = self.strings.resolve(entry.name) else {
2304 return false;
2305 };
2306 crate::graph::unified::storage::arena::NodeEntry::is_synthetic_placeholder_name(
2307 name.as_ref(),
2308 )
2309 }
2310
2311 /// Gets all callees of a node (functions called by this node).
2312 ///
2313 /// Queries the forward edge store for all Calls edges from this node.
2314 ///
2315 /// # Arguments
2316 ///
2317 /// * `node` - The node ID to query
2318 ///
2319 /// # Returns
2320 ///
2321 /// A vector of `NodeIds` representing functions called by this node.
2322 #[must_use]
2323 pub fn get_callees(
2324 &self,
2325 node: crate::graph::unified::node::NodeId,
2326 ) -> Vec<crate::graph::unified::node::NodeId> {
2327 use crate::graph::unified::edge::EdgeKind;
2328
2329 self.edges
2330 .edges_from(node)
2331 .into_iter()
2332 .filter(|edge| matches!(edge.kind, EdgeKind::Calls { .. }))
2333 .map(|edge| edge.target)
2334 .collect()
2335 }
2336
2337 /// Gets all callers of a node (functions that call this node).
2338 ///
2339 /// Queries the reverse edge store for all Calls edges to this node.
2340 ///
2341 /// # Arguments
2342 ///
2343 /// * `node` - The node ID to query
2344 ///
2345 /// # Returns
2346 ///
2347 /// A vector of `NodeIds` representing functions that call this node.
2348 #[must_use]
2349 pub fn get_callers(
2350 &self,
2351 node: crate::graph::unified::node::NodeId,
2352 ) -> Vec<crate::graph::unified::node::NodeId> {
2353 use crate::graph::unified::edge::EdgeKind;
2354
2355 self.edges
2356 .edges_to(node)
2357 .into_iter()
2358 .filter(|edge| matches!(edge.kind, EdgeKind::Calls { .. }))
2359 .map(|edge| edge.source)
2360 .collect()
2361 }
2362
2363 /// Iterates over all nodes in the graph.
2364 ///
2365 /// Returns an iterator yielding (`NodeId`, &`NodeEntry`) pairs for all
2366 /// occupied slots in the arena.
2367 ///
2368 /// # Returns
2369 ///
2370 /// An iterator over (`NodeId`, &`NodeEntry`) pairs.
2371 pub fn iter_nodes(
2372 &self,
2373 ) -> impl Iterator<
2374 Item = (
2375 crate::graph::unified::node::NodeId,
2376 &crate::graph::unified::storage::arena::NodeEntry,
2377 ),
2378 > {
2379 self.nodes.iter()
2380 }
2381
2382 /// Iterates over all edges in the graph.
2383 ///
2384 /// Returns an iterator yielding (source, target, `EdgeKind`) tuples for
2385 /// all edges in the forward edge store.
2386 ///
2387 /// # Returns
2388 ///
2389 /// An iterator over edge tuples.
2390 pub fn iter_edges(
2391 &self,
2392 ) -> impl Iterator<
2393 Item = (
2394 crate::graph::unified::node::NodeId,
2395 crate::graph::unified::node::NodeId,
2396 crate::graph::unified::edge::EdgeKind,
2397 ),
2398 > + '_ {
2399 // Iterate over all nodes in the arena and get their outgoing edges
2400 self.nodes.iter().flat_map(move |(node_id, _entry)| {
2401 // Get all edges from this node
2402 self.edges
2403 .edges_from(node_id)
2404 .into_iter()
2405 .map(move |edge| (node_id, edge.target, edge.kind))
2406 })
2407 }
2408
2409 /// Gets a node entry by ID.
2410 ///
2411 /// Returns a reference to the `NodeEntry` if the ID is valid, or None
2412 /// if the ID is invalid or stale.
2413 ///
2414 /// # Arguments
2415 ///
2416 /// * `id` - The node ID to look up
2417 ///
2418 /// # Returns
2419 ///
2420 /// A reference to the `NodeEntry`, or None if not found.
2421 #[must_use]
2422 pub fn get_node(
2423 &self,
2424 id: crate::graph::unified::node::NodeId,
2425 ) -> Option<&crate::graph::unified::storage::arena::NodeEntry> {
2426 self.nodes.get(id)
2427 }
2428}
2429
2430impl fmt::Debug for GraphSnapshot {
2431 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2432 f.debug_struct("GraphSnapshot")
2433 .field("nodes", &self.nodes.len())
2434 .field("epoch", &self.epoch)
2435 .finish_non_exhaustive()
2436 }
2437}
2438
2439/// Read-only accessor trait shared by [`CodeGraph`] and [`GraphSnapshot`].
2440///
2441/// This lets helpers that only *read* graph state (name-matching, relation
2442/// traversal, reference lookups) be written once and called from both the
2443/// live `CodeGraph` path in `sqry-core::query::executor::graph_eval` and
2444/// the snapshot-based path in `sqry-db::queries::*`. No mutation is exposed.
2445pub trait GraphAccess {
2446 /// Returns the node arena (read-only).
2447 fn nodes(&self) -> &NodeArena;
2448 /// Returns the bidirectional edge store (read-only).
2449 fn edges(&self) -> &BidirectionalEdgeStore;
2450 /// Returns the string interner (read-only).
2451 fn strings(&self) -> &StringInterner;
2452 /// Returns the file registry (read-only).
2453 fn files(&self) -> &FileRegistry;
2454 /// Returns the auxiliary indices (read-only).
2455 fn indices(&self) -> &AuxiliaryIndices;
2456}
2457
2458impl GraphAccess for CodeGraph {
2459 #[inline]
2460 fn nodes(&self) -> &NodeArena {
2461 CodeGraph::nodes(self)
2462 }
2463 #[inline]
2464 fn edges(&self) -> &BidirectionalEdgeStore {
2465 CodeGraph::edges(self)
2466 }
2467 #[inline]
2468 fn strings(&self) -> &StringInterner {
2469 CodeGraph::strings(self)
2470 }
2471 #[inline]
2472 fn files(&self) -> &FileRegistry {
2473 CodeGraph::files(self)
2474 }
2475 #[inline]
2476 fn indices(&self) -> &AuxiliaryIndices {
2477 CodeGraph::indices(self)
2478 }
2479}
2480
2481impl GraphAccess for GraphSnapshot {
2482 #[inline]
2483 fn nodes(&self) -> &NodeArena {
2484 GraphSnapshot::nodes(self)
2485 }
2486 #[inline]
2487 fn edges(&self) -> &BidirectionalEdgeStore {
2488 GraphSnapshot::edges(self)
2489 }
2490 #[inline]
2491 fn strings(&self) -> &StringInterner {
2492 GraphSnapshot::strings(self)
2493 }
2494 #[inline]
2495 fn files(&self) -> &FileRegistry {
2496 GraphSnapshot::files(self)
2497 }
2498 #[inline]
2499 fn indices(&self) -> &AuxiliaryIndices {
2500 GraphSnapshot::indices(self)
2501 }
2502}
2503
2504#[cfg(test)]
2505mod tests {
2506 use super::*;
2507 use crate::graph::unified::{
2508 FileScope, NodeId, ResolutionMode, SymbolCandidateOutcome, SymbolQuery,
2509 SymbolResolutionOutcome,
2510 };
2511
2512 fn resolve_symbol_strict(snapshot: &GraphSnapshot, symbol: &str) -> Option<NodeId> {
2513 match snapshot.resolve_symbol(&SymbolQuery {
2514 symbol,
2515 file_scope: FileScope::Any,
2516 mode: ResolutionMode::Strict,
2517 }) {
2518 SymbolResolutionOutcome::Resolved(node_id) => Some(node_id),
2519 SymbolResolutionOutcome::NotFound
2520 | SymbolResolutionOutcome::FileNotIndexed
2521 | SymbolResolutionOutcome::Ambiguous(_) => None,
2522 }
2523 }
2524
2525 fn candidate_nodes(snapshot: &GraphSnapshot, symbol: &str) -> Vec<NodeId> {
2526 match snapshot.find_symbol_candidates(&SymbolQuery {
2527 symbol,
2528 file_scope: FileScope::Any,
2529 mode: ResolutionMode::AllowSuffixCandidates,
2530 }) {
2531 SymbolCandidateOutcome::Candidates(candidates) => candidates,
2532 SymbolCandidateOutcome::NotFound | SymbolCandidateOutcome::FileNotIndexed => Vec::new(),
2533 }
2534 }
2535
2536 #[test]
2537 fn test_code_graph_new() {
2538 let graph = CodeGraph::new();
2539 assert_eq!(graph.epoch(), 0);
2540 assert_eq!(graph.nodes().len(), 0);
2541 }
2542
2543 #[test]
2544 fn test_code_graph_default() {
2545 let graph = CodeGraph::default();
2546 assert_eq!(graph.epoch(), 0);
2547 }
2548
2549 #[test]
2550 fn test_code_graph_snapshot() {
2551 let graph = CodeGraph::new();
2552 let snapshot = graph.snapshot();
2553 assert_eq!(snapshot.epoch(), 0);
2554 assert_eq!(snapshot.nodes().len(), 0);
2555 }
2556
2557 #[test]
2558 fn test_code_graph_bump_epoch() {
2559 let mut graph = CodeGraph::new();
2560 assert_eq!(graph.epoch(), 0);
2561 assert_eq!(graph.bump_epoch(), 1);
2562 assert_eq!(graph.epoch(), 1);
2563 assert_eq!(graph.bump_epoch(), 2);
2564 assert_eq!(graph.epoch(), 2);
2565 }
2566
2567 #[test]
2568 fn test_code_graph_set_epoch() {
2569 let mut graph = CodeGraph::new();
2570 graph.set_epoch(42);
2571 assert_eq!(graph.epoch(), 42);
2572 }
2573
2574 #[test]
2575 fn test_code_graph_from_components() {
2576 let nodes = NodeArena::new();
2577 let edges = BidirectionalEdgeStore::new();
2578 let strings = StringInterner::new();
2579 let files = FileRegistry::new();
2580 let indices = AuxiliaryIndices::new();
2581 let macro_metadata = NodeMetadataStore::new();
2582
2583 let graph =
2584 CodeGraph::from_components(nodes, edges, strings, files, indices, macro_metadata);
2585 assert_eq!(graph.epoch(), 0);
2586 }
2587
2588 #[test]
2589 fn test_code_graph_mut_accessors() {
2590 let mut graph = CodeGraph::new();
2591
2592 // Access mutable references - should not panic
2593 let _nodes = graph.nodes_mut();
2594 let _edges = graph.edges_mut();
2595 let _strings = graph.strings_mut();
2596 let _files = graph.files_mut();
2597 let _indices = graph.indices_mut();
2598 }
2599
2600 #[test]
2601 fn test_code_graph_snapshot_isolation() {
2602 let mut graph = CodeGraph::new();
2603 let snapshot1 = graph.snapshot();
2604
2605 // Mutate the graph
2606 graph.bump_epoch();
2607
2608 let snapshot2 = graph.snapshot();
2609
2610 // Snapshots should have different epochs
2611 assert_eq!(snapshot1.epoch(), 0);
2612 assert_eq!(snapshot2.epoch(), 1);
2613 }
2614
2615 #[test]
2616 fn test_code_graph_debug() {
2617 let graph = CodeGraph::new();
2618 let debug_str = format!("{graph:?}");
2619 assert!(debug_str.contains("CodeGraph"));
2620 assert!(debug_str.contains("epoch"));
2621 }
2622
2623 /// Exact-byte regression for the iter2 fix of CodeGraph.confidence
2624 /// accounting: adding a `ConfidenceMetadata` entry with known inner
2625 /// Vec<String> capacities must increase `heap_bytes()` by exactly the sum
2626 /// of those capacities plus Vec<String> slot overhead.
2627 #[test]
2628 fn test_codegraph_heap_bytes_counts_confidence_inner_strings() {
2629 let mut graph = CodeGraph::new();
2630 // Seed one confidence entry so the HashMap has non-zero capacity;
2631 // reserve slack so the next insert cannot rehash.
2632 graph.set_confidence({
2633 let mut m = HashMap::with_capacity(8);
2634 m.insert("seed".to_string(), ConfidenceMetadata::default());
2635 m
2636 });
2637 let before = graph.heap_bytes();
2638 let before_cap = graph.confidence.capacity();
2639
2640 let lim1 = String::from("no type inference");
2641 let lim2 = String::from("no generic specialization");
2642 let feat1 = String::from("rust-analyzer");
2643 let l1 = lim1.capacity();
2644 let l2 = lim2.capacity();
2645 let f1 = feat1.capacity();
2646
2647 let limitations = vec![lim1, lim2];
2648 let lim_vec_cap = limitations.capacity();
2649
2650 let unavailable_features = vec![feat1];
2651 let feat_vec_cap = unavailable_features.capacity();
2652
2653 let key = String::from("rust");
2654 let key_cap = key.capacity();
2655 graph.confidence.insert(
2656 key,
2657 ConfidenceMetadata {
2658 limitations,
2659 unavailable_features,
2660 ..Default::default()
2661 },
2662 );
2663 assert_eq!(
2664 graph.confidence.capacity(),
2665 before_cap,
2666 "prerequisite: confidence HashMap must not rehash during the test insert",
2667 );
2668
2669 let after = graph.heap_bytes();
2670 let expected_inner = key_cap
2671 + lim_vec_cap * std::mem::size_of::<String>()
2672 + l1
2673 + l2
2674 + feat_vec_cap * std::mem::size_of::<String>()
2675 + f1;
2676 assert_eq!(
2677 after - before,
2678 expected_inner,
2679 "CodeGraph::heap_bytes must count ConfidenceMetadata inner Vec<String> capacity exactly",
2680 );
2681 }
2682
2683 #[test]
2684 fn test_codegraph_heap_bytes_grows_with_content() {
2685 use crate::graph::unified::node::NodeKind;
2686 use crate::graph::unified::storage::arena::NodeEntry;
2687 use std::path::Path;
2688
2689 // An empty graph reports some heap bytes (FileRegistry seeds index 0
2690 // with `vec![None]`, HashMaps have non-zero base capacity once touched,
2691 // etc.) but the value must be finite and well under the 100 MB cap.
2692 let empty = CodeGraph::new();
2693 let empty_bytes = empty.heap_bytes();
2694 assert!(
2695 empty_bytes < 100 * 1024 * 1024,
2696 "empty graph heap_bytes should be <100 MiB, got {empty_bytes}"
2697 );
2698
2699 let mut graph = CodeGraph::new();
2700 for i in 0..32u32 {
2701 let name = format!("sym_{i}");
2702 let qual = format!("module::sym_{i}");
2703 let file = format!("file_{i}.rs");
2704
2705 let name_id = graph.strings_mut().intern(&name).unwrap();
2706 let qual_id = graph.strings_mut().intern(&qual).unwrap();
2707 let file_id = graph.files_mut().register(Path::new(&file)).unwrap();
2708
2709 let entry =
2710 NodeEntry::new(NodeKind::Function, name_id, file_id).with_qualified_name(qual_id);
2711 let node_id = graph.nodes_mut().alloc(entry).unwrap();
2712 graph
2713 .indices_mut()
2714 .add(node_id, NodeKind::Function, name_id, Some(qual_id), file_id);
2715 }
2716
2717 let populated_bytes = graph.heap_bytes();
2718 assert!(
2719 populated_bytes > 0,
2720 "populated graph should report nonzero heap bytes"
2721 );
2722 assert!(
2723 populated_bytes > empty_bytes,
2724 "populated graph ({populated_bytes}) should exceed empty graph ({empty_bytes})"
2725 );
2726 assert!(
2727 populated_bytes < 100 * 1024 * 1024,
2728 "test graph heap_bytes should be <100 MiB, got {populated_bytes}"
2729 );
2730 }
2731
2732 #[test]
2733 fn test_concurrent_code_graph_new() {
2734 let graph = ConcurrentCodeGraph::new();
2735 assert_eq!(graph.epoch(), 0);
2736 }
2737
2738 #[test]
2739 fn test_concurrent_code_graph_default() {
2740 let graph = ConcurrentCodeGraph::default();
2741 assert_eq!(graph.epoch(), 0);
2742 }
2743
2744 #[test]
2745 fn test_concurrent_code_graph_from_graph() {
2746 let mut inner = CodeGraph::new();
2747 inner.set_epoch(10);
2748 let graph = ConcurrentCodeGraph::from_graph(inner);
2749 assert_eq!(graph.epoch(), 10);
2750 }
2751
2752 #[test]
2753 fn test_concurrent_code_graph_read() {
2754 let graph = ConcurrentCodeGraph::new();
2755 let guard = graph.read();
2756 assert_eq!(guard.epoch(), 0);
2757 assert_eq!(guard.nodes().len(), 0);
2758 }
2759
2760 #[test]
2761 fn test_concurrent_code_graph_write_increments_epoch() {
2762 let graph = ConcurrentCodeGraph::new();
2763 assert_eq!(graph.epoch(), 0);
2764
2765 {
2766 let guard = graph.write();
2767 assert_eq!(guard.epoch(), 1);
2768 }
2769
2770 assert_eq!(graph.epoch(), 1);
2771
2772 {
2773 let _guard = graph.write();
2774 }
2775
2776 assert_eq!(graph.epoch(), 2);
2777 }
2778
2779 #[test]
2780 fn test_concurrent_code_graph_snapshot() {
2781 let graph = ConcurrentCodeGraph::new();
2782
2783 {
2784 let _guard = graph.write();
2785 }
2786
2787 let snapshot = graph.snapshot();
2788 assert_eq!(snapshot.epoch(), 1);
2789 }
2790
2791 #[test]
2792 fn test_concurrent_code_graph_try_read() {
2793 let graph = ConcurrentCodeGraph::new();
2794 let guard = graph.try_read();
2795 assert!(guard.is_some());
2796 }
2797
2798 #[test]
2799 fn test_concurrent_code_graph_try_write() {
2800 let graph = ConcurrentCodeGraph::new();
2801 let guard = graph.try_write();
2802 assert!(guard.is_some());
2803 assert_eq!(graph.epoch(), 1);
2804 }
2805
2806 #[test]
2807 fn test_concurrent_code_graph_debug() {
2808 let graph = ConcurrentCodeGraph::new();
2809 let debug_str = format!("{graph:?}");
2810 assert!(debug_str.contains("ConcurrentCodeGraph"));
2811 assert!(debug_str.contains("epoch"));
2812 }
2813
2814 #[test]
2815 fn test_graph_snapshot_accessors() {
2816 let graph = CodeGraph::new();
2817 let snapshot = graph.snapshot();
2818
2819 // All accessors should work
2820 let _nodes = snapshot.nodes();
2821 let _edges = snapshot.edges();
2822 let _strings = snapshot.strings();
2823 let _files = snapshot.files();
2824 let _indices = snapshot.indices();
2825 let _epoch = snapshot.epoch();
2826 }
2827
2828 #[test]
2829 fn test_graph_snapshot_epoch_matches() {
2830 let graph = CodeGraph::new();
2831 let snapshot = graph.snapshot();
2832
2833 assert!(snapshot.epoch_matches(0));
2834 assert!(!snapshot.epoch_matches(1));
2835 }
2836
2837 #[test]
2838 fn test_graph_snapshot_clone() {
2839 let graph = CodeGraph::new();
2840 let snapshot1 = graph.snapshot();
2841 let snapshot2 = snapshot1.clone();
2842
2843 assert_eq!(snapshot1.epoch(), snapshot2.epoch());
2844 }
2845
2846 #[test]
2847 fn test_graph_snapshot_debug() {
2848 let graph = CodeGraph::new();
2849 let snapshot = graph.snapshot();
2850 let debug_str = format!("{snapshot:?}");
2851 assert!(debug_str.contains("GraphSnapshot"));
2852 assert!(debug_str.contains("epoch"));
2853 }
2854
2855 #[test]
2856 fn test_multiple_readers() {
2857 let graph = ConcurrentCodeGraph::new();
2858
2859 // Multiple readers should be able to acquire locks simultaneously
2860 let guard1 = graph.read();
2861 let guard2 = graph.read();
2862 let guard3 = graph.read();
2863
2864 assert_eq!(guard1.epoch(), 0);
2865 assert_eq!(guard2.epoch(), 0);
2866 assert_eq!(guard3.epoch(), 0);
2867 }
2868
2869 #[test]
2870 fn test_code_graph_clone() {
2871 let mut graph = CodeGraph::new();
2872 graph.bump_epoch();
2873
2874 let cloned = graph.clone();
2875 assert_eq!(cloned.epoch(), 1);
2876 }
2877
2878 #[test]
2879 fn test_epoch_wrapping() {
2880 let mut graph = CodeGraph::new();
2881 graph.set_epoch(u64::MAX);
2882 let new_epoch = graph.bump_epoch();
2883 assert_eq!(new_epoch, 0); // Should wrap around
2884 }
2885
2886 // ============================================================================
2887 // Query method tests
2888 // ============================================================================
2889
2890 #[test]
2891 fn test_snapshot_resolve_symbol() {
2892 use crate::graph::unified::node::NodeKind;
2893 use crate::graph::unified::storage::arena::NodeEntry;
2894 use std::path::Path;
2895
2896 let mut graph = CodeGraph::new();
2897
2898 // Add some nodes with qualified names
2899 let name_id = graph.strings_mut().intern("test_func").unwrap();
2900 let qual_name_id = graph.strings_mut().intern("module::test_func").unwrap();
2901 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
2902
2903 let entry =
2904 NodeEntry::new(NodeKind::Function, name_id, file_id).with_qualified_name(qual_name_id);
2905
2906 let node_id = graph.nodes_mut().alloc(entry).unwrap();
2907 graph.indices_mut().add(
2908 node_id,
2909 NodeKind::Function,
2910 name_id,
2911 Some(qual_name_id),
2912 file_id,
2913 );
2914
2915 let snapshot = graph.snapshot();
2916
2917 // Find by qualified name
2918 let found = resolve_symbol_strict(&snapshot, "module::test_func");
2919 assert_eq!(found, Some(node_id));
2920
2921 // Find by exact simple name
2922 let found2 = resolve_symbol_strict(&snapshot, "test_func");
2923 assert_eq!(found2, Some(node_id));
2924
2925 // Not found
2926 assert!(resolve_symbol_strict(&snapshot, "nonexistent").is_none());
2927 }
2928
2929 #[test]
2930 fn test_snapshot_find_by_pattern() {
2931 use crate::graph::unified::node::NodeKind;
2932 use crate::graph::unified::storage::arena::NodeEntry;
2933 use std::path::Path;
2934
2935 let mut graph = CodeGraph::new();
2936
2937 // Add nodes with different names
2938 let name1 = graph.strings_mut().intern("foo_bar").unwrap();
2939 let name2 = graph.strings_mut().intern("baz_bar").unwrap();
2940 let name3 = graph.strings_mut().intern("qux_test").unwrap();
2941 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
2942
2943 let node1 = graph
2944 .nodes_mut()
2945 .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
2946 .unwrap();
2947 let node2 = graph
2948 .nodes_mut()
2949 .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
2950 .unwrap();
2951 let node3 = graph
2952 .nodes_mut()
2953 .alloc(NodeEntry::new(NodeKind::Function, name3, file_id))
2954 .unwrap();
2955
2956 graph
2957 .indices_mut()
2958 .add(node1, NodeKind::Function, name1, None, file_id);
2959 graph
2960 .indices_mut()
2961 .add(node2, NodeKind::Function, name2, None, file_id);
2962 graph
2963 .indices_mut()
2964 .add(node3, NodeKind::Function, name3, None, file_id);
2965
2966 let snapshot = graph.snapshot();
2967
2968 // Find by pattern
2969 let matches = snapshot.find_by_pattern("bar");
2970 assert_eq!(matches.len(), 2);
2971 assert!(matches.contains(&node1));
2972 assert!(matches.contains(&node2));
2973
2974 // Find single match
2975 let matches = snapshot.find_by_pattern("qux");
2976 assert_eq!(matches.len(), 1);
2977 assert_eq!(matches[0], node3);
2978
2979 // No matches
2980 let matches = snapshot.find_by_pattern("nonexistent");
2981 assert!(matches.is_empty());
2982 }
2983
2984 #[test]
2985 fn synthetic_nodes_are_filtered_from_find_by_pattern_default() {
2986 // C_SUPPRESS: synthetic placeholder nodes (Go plugin
2987 // `<field:operand.field>` shadows + `<ident>@<offset>`
2988 // per-binding-site Variables) must NOT surface from
2989 // find_by_pattern, but must still be reachable via
2990 // find_by_pattern_with_options(_, true).
2991 use crate::graph::unified::node::NodeKind;
2992 use crate::graph::unified::storage::arena::NodeEntry;
2993 use std::path::Path;
2994
2995 let mut graph = CodeGraph::new();
2996 let real_property = graph
2997 .strings_mut()
2998 .intern("main.SelectorSource.NeedTags")
2999 .unwrap();
3000 let real_local_var = graph.strings_mut().intern("NeedTags").unwrap();
3001 let synthetic_field = graph
3002 .strings_mut()
3003 .intern("<field:selector.NeedTags>")
3004 .unwrap();
3005 let synthetic_offset_a = graph.strings_mut().intern("NeedTags@469").unwrap();
3006 let synthetic_offset_b = graph.strings_mut().intern("NeedTags@508").unwrap();
3007 let file_id = graph.files_mut().register(Path::new("main.go")).unwrap();
3008
3009 let prop_id = graph
3010 .nodes_mut()
3011 .alloc(NodeEntry::new(NodeKind::Property, real_property, file_id))
3012 .unwrap();
3013 let local_var_id = graph
3014 .nodes_mut()
3015 .alloc(NodeEntry::new(NodeKind::Variable, real_local_var, file_id))
3016 .unwrap();
3017 let syn_field_id = graph
3018 .nodes_mut()
3019 .alloc(NodeEntry::new(NodeKind::Variable, synthetic_field, file_id))
3020 .unwrap();
3021 let syn_a_id = graph
3022 .nodes_mut()
3023 .alloc(NodeEntry::new(
3024 NodeKind::Variable,
3025 synthetic_offset_a,
3026 file_id,
3027 ))
3028 .unwrap();
3029 let syn_b_id = graph
3030 .nodes_mut()
3031 .alloc(NodeEntry::new(
3032 NodeKind::Variable,
3033 synthetic_offset_b,
3034 file_id,
3035 ))
3036 .unwrap();
3037
3038 graph
3039 .indices_mut()
3040 .add(prop_id, NodeKind::Property, real_property, None, file_id);
3041 graph.indices_mut().add(
3042 local_var_id,
3043 NodeKind::Variable,
3044 real_local_var,
3045 None,
3046 file_id,
3047 );
3048 graph.indices_mut().add(
3049 syn_field_id,
3050 NodeKind::Variable,
3051 synthetic_field,
3052 None,
3053 file_id,
3054 );
3055 graph.indices_mut().add(
3056 syn_a_id,
3057 NodeKind::Variable,
3058 synthetic_offset_a,
3059 None,
3060 file_id,
3061 );
3062 graph.indices_mut().add(
3063 syn_b_id,
3064 NodeKind::Variable,
3065 synthetic_offset_b,
3066 None,
3067 file_id,
3068 );
3069
3070 // Flag two of them via the metadata-store bit (the canonical
3071 // Go-plugin emission path) and leave one (`<field:...>`) only
3072 // covered by the structural name-shape fallback to verify both
3073 // recognition channels suppress the leak.
3074 graph.macro_metadata_mut().mark_synthetic(syn_a_id);
3075 graph.macro_metadata_mut().mark_synthetic(syn_b_id);
3076
3077 let snapshot = graph.snapshot();
3078
3079 // Default surface (CLI `search --exact`, MCP, LSP): no synthetics.
3080 let matches = snapshot.find_by_pattern("NeedTags");
3081 assert!(matches.contains(&prop_id), "Property must be surfaced");
3082 assert!(
3083 matches.contains(&local_var_id),
3084 "real local var must be surfaced"
3085 );
3086 assert!(
3087 !matches.contains(&syn_field_id),
3088 "<field:...> synthetic must be suppressed (name-shape fallback)"
3089 );
3090 assert!(
3091 !matches.contains(&syn_a_id),
3092 "NeedTags@469 must be suppressed (metadata bit)"
3093 );
3094 assert!(
3095 !matches.contains(&syn_b_id),
3096 "NeedTags@508 must be suppressed (metadata bit)"
3097 );
3098 assert_eq!(matches.len(), 2, "exactly Property + local var, no leakage");
3099
3100 // Internal include-synthetic surface (binding plane, scope analysis):
3101 // every node remains reachable.
3102 let all_matches = snapshot.find_by_pattern_with_options("NeedTags", true);
3103 assert_eq!(
3104 all_matches.len(),
3105 5,
3106 "include_synthetic surfaces everything"
3107 );
3108 assert!(all_matches.contains(&prop_id));
3109 assert!(all_matches.contains(&local_var_id));
3110 assert!(all_matches.contains(&syn_field_id));
3111 assert!(all_matches.contains(&syn_a_id));
3112 assert!(all_matches.contains(&syn_b_id));
3113
3114 // is_node_synthetic exposed for surface-level filters
3115 // (e.g., MCP semantic_search/relation_query post-filters).
3116 assert!(snapshot.is_node_synthetic(syn_field_id));
3117 assert!(snapshot.is_node_synthetic(syn_a_id));
3118 assert!(snapshot.is_node_synthetic(syn_b_id));
3119 assert!(!snapshot.is_node_synthetic(prop_id));
3120 assert!(!snapshot.is_node_synthetic(local_var_id));
3121 }
3122
3123 #[test]
3124 #[allow(clippy::too_many_lines)]
3125 fn find_by_exact_name_aligns_with_planner_name_predicate() {
3126 // B1_ALIGN: `find_by_exact_name("NeedTags")` is the canonical
3127 // surface for the CLI `--exact NeedTags` shorthand and the
3128 // planner's `name:NeedTags` predicate. Both paths must return
3129 // the same set against this fixture.
3130 use crate::graph::unified::node::NodeKind;
3131 use crate::graph::unified::storage::arena::NodeEntry;
3132 use std::path::Path;
3133
3134 let mut graph = CodeGraph::new();
3135 // Property nodes carry the package-qualified name as
3136 // `entry.name` (Go plugin convention; see `helper.rs`'s
3137 // `semantic_name_for_node_input`).
3138 let property_qname = graph
3139 .strings_mut()
3140 .intern("main.SelectorSource.NeedTags")
3141 .unwrap();
3142 let local_var_name = graph.strings_mut().intern("NeedTags").unwrap();
3143 let synthetic_field_name = graph
3144 .strings_mut()
3145 .intern("<field:selector.NeedTags>")
3146 .unwrap();
3147 let synthetic_offset_name = graph.strings_mut().intern("NeedTags@469").unwrap();
3148 let unrelated_name = graph.strings_mut().intern("NeedTagsHelper").unwrap();
3149 let display_fallback_name = graph.strings_mut().intern("Other").unwrap();
3150 let display_fallback_qname = graph
3151 .strings_mut()
3152 .intern("main::SelectorSource::Other")
3153 .unwrap();
3154 let file_id = graph.files_mut().register(Path::new("main.go")).unwrap();
3155
3156 let prop_id = graph
3157 .nodes_mut()
3158 .alloc(NodeEntry::new(NodeKind::Property, property_qname, file_id))
3159 .unwrap();
3160 let local_var_id = graph
3161 .nodes_mut()
3162 .alloc(NodeEntry::new(NodeKind::Variable, local_var_name, file_id))
3163 .unwrap();
3164 let syn_field_id = graph
3165 .nodes_mut()
3166 .alloc(NodeEntry::new(
3167 NodeKind::Variable,
3168 synthetic_field_name,
3169 file_id,
3170 ))
3171 .unwrap();
3172 let syn_offset_id = graph
3173 .nodes_mut()
3174 .alloc(NodeEntry::new(
3175 NodeKind::Variable,
3176 synthetic_offset_name,
3177 file_id,
3178 ))
3179 .unwrap();
3180 let unrelated_id = graph
3181 .nodes_mut()
3182 .alloc(NodeEntry::new(NodeKind::Function, unrelated_name, file_id))
3183 .unwrap();
3184 let display_fallback_id = graph
3185 .nodes_mut()
3186 .alloc(
3187 NodeEntry::new(NodeKind::Property, display_fallback_name, file_id)
3188 .with_qualified_name(display_fallback_qname),
3189 )
3190 .unwrap();
3191
3192 graph
3193 .indices_mut()
3194 .add(prop_id, NodeKind::Property, property_qname, None, file_id);
3195 graph.indices_mut().add(
3196 local_var_id,
3197 NodeKind::Variable,
3198 local_var_name,
3199 None,
3200 file_id,
3201 );
3202 graph.indices_mut().add(
3203 syn_field_id,
3204 NodeKind::Variable,
3205 synthetic_field_name,
3206 None,
3207 file_id,
3208 );
3209 graph.indices_mut().add(
3210 syn_offset_id,
3211 NodeKind::Variable,
3212 synthetic_offset_name,
3213 None,
3214 file_id,
3215 );
3216 graph.indices_mut().add(
3217 unrelated_id,
3218 NodeKind::Function,
3219 unrelated_name,
3220 None,
3221 file_id,
3222 );
3223 graph.indices_mut().add(
3224 display_fallback_id,
3225 NodeKind::Property,
3226 display_fallback_name,
3227 Some(display_fallback_qname),
3228 file_id,
3229 );
3230
3231 // Mark the offset-suffixed synthetic via the metadata bit so we
3232 // exercise both recognition channels in this fixture.
3233 graph.macro_metadata_mut().mark_synthetic(syn_offset_id);
3234
3235 let snapshot = graph.snapshot();
3236
3237 // Exact-name lookup on "NeedTags" — should pick up only the
3238 // local variable (its `entry.name` is exactly "NeedTags"); it
3239 // must NOT pick up the Property (qualified name contains but
3240 // does not equal "NeedTags") and must NOT pick up either
3241 // synthetic placeholder.
3242 let exact = snapshot.find_by_exact_name("NeedTags");
3243 assert_eq!(
3244 exact,
3245 vec![local_var_id],
3246 "exact match must be byte-for-byte against entry.name / qualified_name and exclude synthetics"
3247 );
3248
3249 // The Property's full qualified name is exact-addressable.
3250 let qualified = snapshot.find_by_exact_name("main.SelectorSource.NeedTags");
3251 assert_eq!(qualified, vec![prop_id]);
3252
3253 // Dot-qualified display form falls back to graph-canonical `::` only
3254 // when the exact dot string was absent.
3255 let display_fallback = snapshot.find_by_exact_name("main.SelectorSource.Other");
3256 assert_eq!(display_fallback, vec![display_fallback_id]);
3257
3258 // Substring-only matches must not surface from exact lookup.
3259 assert!(
3260 snapshot
3261 .find_by_exact_name("NeedTagsHelper")
3262 .contains(&unrelated_id)
3263 );
3264 assert!(
3265 !snapshot
3266 .find_by_exact_name("NeedTags")
3267 .contains(&unrelated_id),
3268 "exact 'NeedTags' must not match 'NeedTagsHelper'"
3269 );
3270
3271 // Unknown name short-circuits to an empty vec without
3272 // scanning any nodes.
3273 assert!(
3274 snapshot
3275 .find_by_exact_name("ThisStringIsNotInterned")
3276 .is_empty()
3277 );
3278 }
3279
3280 #[test]
3281 fn test_snapshot_get_callees() {
3282 use crate::graph::unified::edge::EdgeKind;
3283 use crate::graph::unified::node::NodeKind;
3284 use crate::graph::unified::storage::arena::NodeEntry;
3285 use std::path::Path;
3286
3287 let mut graph = CodeGraph::new();
3288
3289 // Create caller and callee nodes
3290 let caller_name = graph.strings_mut().intern("caller").unwrap();
3291 let callee1_name = graph.strings_mut().intern("callee1").unwrap();
3292 let callee2_name = graph.strings_mut().intern("callee2").unwrap();
3293 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3294
3295 let caller_id = graph
3296 .nodes_mut()
3297 .alloc(NodeEntry::new(NodeKind::Function, caller_name, file_id))
3298 .unwrap();
3299 let callee1_id = graph
3300 .nodes_mut()
3301 .alloc(NodeEntry::new(NodeKind::Function, callee1_name, file_id))
3302 .unwrap();
3303 let callee2_id = graph
3304 .nodes_mut()
3305 .alloc(NodeEntry::new(NodeKind::Function, callee2_name, file_id))
3306 .unwrap();
3307
3308 // Add call edges
3309 graph.edges_mut().add_edge(
3310 caller_id,
3311 callee1_id,
3312 EdgeKind::Calls {
3313 argument_count: 0,
3314 is_async: false,
3315 resolved_via: ResolvedVia::Direct,
3316 },
3317 file_id,
3318 );
3319 graph.edges_mut().add_edge(
3320 caller_id,
3321 callee2_id,
3322 EdgeKind::Calls {
3323 argument_count: 0,
3324 is_async: false,
3325 resolved_via: ResolvedVia::Direct,
3326 },
3327 file_id,
3328 );
3329
3330 let snapshot = graph.snapshot();
3331
3332 // Query callees
3333 let callees = snapshot.get_callees(caller_id);
3334 assert_eq!(callees.len(), 2);
3335 assert!(callees.contains(&callee1_id));
3336 assert!(callees.contains(&callee2_id));
3337
3338 // Node with no callees
3339 let callees = snapshot.get_callees(callee1_id);
3340 assert!(callees.is_empty());
3341 }
3342
3343 #[test]
3344 fn test_snapshot_get_callers() {
3345 use crate::graph::unified::edge::EdgeKind;
3346 use crate::graph::unified::node::NodeKind;
3347 use crate::graph::unified::storage::arena::NodeEntry;
3348 use std::path::Path;
3349
3350 let mut graph = CodeGraph::new();
3351
3352 // Create caller and callee nodes
3353 let caller1_name = graph.strings_mut().intern("caller1").unwrap();
3354 let caller2_name = graph.strings_mut().intern("caller2").unwrap();
3355 let callee_name = graph.strings_mut().intern("callee").unwrap();
3356 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3357
3358 let caller1_id = graph
3359 .nodes_mut()
3360 .alloc(NodeEntry::new(NodeKind::Function, caller1_name, file_id))
3361 .unwrap();
3362 let caller2_id = graph
3363 .nodes_mut()
3364 .alloc(NodeEntry::new(NodeKind::Function, caller2_name, file_id))
3365 .unwrap();
3366 let callee_id = graph
3367 .nodes_mut()
3368 .alloc(NodeEntry::new(NodeKind::Function, callee_name, file_id))
3369 .unwrap();
3370
3371 // Add call edges
3372 graph.edges_mut().add_edge(
3373 caller1_id,
3374 callee_id,
3375 EdgeKind::Calls {
3376 argument_count: 0,
3377 is_async: false,
3378 resolved_via: ResolvedVia::Direct,
3379 },
3380 file_id,
3381 );
3382 graph.edges_mut().add_edge(
3383 caller2_id,
3384 callee_id,
3385 EdgeKind::Calls {
3386 argument_count: 0,
3387 is_async: false,
3388 resolved_via: ResolvedVia::Direct,
3389 },
3390 file_id,
3391 );
3392
3393 let snapshot = graph.snapshot();
3394
3395 // Query callers
3396 let callers = snapshot.get_callers(callee_id);
3397 assert_eq!(callers.len(), 2);
3398 assert!(callers.contains(&caller1_id));
3399 assert!(callers.contains(&caller2_id));
3400
3401 // Node with no callers
3402 let callers = snapshot.get_callers(caller1_id);
3403 assert!(callers.is_empty());
3404 }
3405
3406 #[test]
3407 fn test_snapshot_find_symbol_candidates() {
3408 use crate::graph::unified::node::NodeKind;
3409 use crate::graph::unified::storage::arena::NodeEntry;
3410 use std::path::Path;
3411
3412 let mut graph = CodeGraph::new();
3413
3414 // Add nodes with same symbol name but different qualified names
3415 let symbol_name = graph.strings_mut().intern("test").unwrap();
3416 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3417
3418 let node1 = graph
3419 .nodes_mut()
3420 .alloc(NodeEntry::new(NodeKind::Function, symbol_name, file_id))
3421 .unwrap();
3422 let node2 = graph
3423 .nodes_mut()
3424 .alloc(NodeEntry::new(NodeKind::Method, symbol_name, file_id))
3425 .unwrap();
3426
3427 // Add a different symbol
3428 let other_name = graph.strings_mut().intern("other").unwrap();
3429 let node3 = graph
3430 .nodes_mut()
3431 .alloc(NodeEntry::new(NodeKind::Function, other_name, file_id))
3432 .unwrap();
3433
3434 graph
3435 .indices_mut()
3436 .add(node1, NodeKind::Function, symbol_name, None, file_id);
3437 graph
3438 .indices_mut()
3439 .add(node2, NodeKind::Method, symbol_name, None, file_id);
3440 graph
3441 .indices_mut()
3442 .add(node3, NodeKind::Function, other_name, None, file_id);
3443
3444 let snapshot = graph.snapshot();
3445
3446 // Find by symbol
3447 let matches = candidate_nodes(&snapshot, "test");
3448 assert_eq!(matches.len(), 2);
3449 assert!(matches.contains(&node1));
3450 assert!(matches.contains(&node2));
3451
3452 // Find other symbol
3453 let matches = candidate_nodes(&snapshot, "other");
3454 assert_eq!(matches.len(), 1);
3455 assert_eq!(matches[0], node3);
3456
3457 // No matches
3458 let matches = candidate_nodes(&snapshot, "nonexistent");
3459 assert!(matches.is_empty());
3460 }
3461
3462 #[test]
3463 fn test_snapshot_iter_nodes() {
3464 use crate::graph::unified::node::NodeKind;
3465 use crate::graph::unified::storage::arena::NodeEntry;
3466 use std::path::Path;
3467
3468 let mut graph = CodeGraph::new();
3469
3470 // Add some nodes
3471 let name1 = graph.strings_mut().intern("func1").unwrap();
3472 let name2 = graph.strings_mut().intern("func2").unwrap();
3473 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3474
3475 let node1 = graph
3476 .nodes_mut()
3477 .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
3478 .unwrap();
3479 let node2 = graph
3480 .nodes_mut()
3481 .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
3482 .unwrap();
3483
3484 let snapshot = graph.snapshot();
3485
3486 // Iterate nodes
3487 let snapshot_nodes: Vec<_> = snapshot.iter_nodes().collect();
3488 assert_eq!(snapshot_nodes.len(), 2);
3489
3490 let node_ids: Vec<_> = snapshot_nodes.iter().map(|(id, _)| *id).collect();
3491 assert!(node_ids.contains(&node1));
3492 assert!(node_ids.contains(&node2));
3493 }
3494
3495 #[test]
3496 fn test_snapshot_iter_edges() {
3497 use crate::graph::unified::edge::EdgeKind;
3498 use crate::graph::unified::node::NodeKind;
3499 use crate::graph::unified::storage::arena::NodeEntry;
3500 use std::path::Path;
3501
3502 let mut graph = CodeGraph::new();
3503
3504 // Create nodes
3505 let name1 = graph.strings_mut().intern("func1").unwrap();
3506 let name2 = graph.strings_mut().intern("func2").unwrap();
3507 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3508
3509 let node1 = graph
3510 .nodes_mut()
3511 .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
3512 .unwrap();
3513 let node2 = graph
3514 .nodes_mut()
3515 .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
3516 .unwrap();
3517
3518 // Add edges
3519 graph.edges_mut().add_edge(
3520 node1,
3521 node2,
3522 EdgeKind::Calls {
3523 argument_count: 0,
3524 is_async: false,
3525 resolved_via: ResolvedVia::Direct,
3526 },
3527 file_id,
3528 );
3529
3530 let snapshot = graph.snapshot();
3531
3532 // Iterate edges
3533 let edges: Vec<_> = snapshot.iter_edges().collect();
3534 assert_eq!(edges.len(), 1);
3535
3536 let (src, tgt, kind) = &edges[0];
3537 assert_eq!(*src, node1);
3538 assert_eq!(*tgt, node2);
3539 assert!(matches!(
3540 kind,
3541 EdgeKind::Calls {
3542 argument_count: 0,
3543 is_async: false,
3544 resolved_via: ResolvedVia::Direct,
3545 }
3546 ));
3547 }
3548
3549 #[test]
3550 fn test_snapshot_get_node() {
3551 use crate::graph::unified::node::NodeId;
3552 use crate::graph::unified::node::NodeKind;
3553 use crate::graph::unified::storage::arena::NodeEntry;
3554 use std::path::Path;
3555
3556 let mut graph = CodeGraph::new();
3557
3558 // Add a node
3559 let name = graph.strings_mut().intern("test_func").unwrap();
3560 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3561
3562 let node_id = graph
3563 .nodes_mut()
3564 .alloc(NodeEntry::new(NodeKind::Function, name, file_id))
3565 .unwrap();
3566
3567 let snapshot = graph.snapshot();
3568
3569 // Get node
3570 let entry = snapshot.get_node(node_id);
3571 assert!(entry.is_some());
3572 assert_eq!(entry.unwrap().kind, NodeKind::Function);
3573
3574 // Invalid node
3575 let invalid_id = NodeId::INVALID;
3576 assert!(snapshot.get_node(invalid_id).is_none());
3577 }
3578
3579 #[test]
3580 fn test_snapshot_query_empty_graph() {
3581 use crate::graph::unified::node::NodeId;
3582
3583 let graph = CodeGraph::new();
3584 let snapshot = graph.snapshot();
3585
3586 // All queries should return empty on empty graph
3587 assert!(resolve_symbol_strict(&snapshot, "test").is_none());
3588 assert!(snapshot.find_by_pattern("test").is_empty());
3589 assert!(candidate_nodes(&snapshot, "test").is_empty());
3590
3591 let dummy_id = NodeId::new(0, 1);
3592 assert!(snapshot.get_callees(dummy_id).is_empty());
3593 assert!(snapshot.get_callers(dummy_id).is_empty());
3594
3595 assert_eq!(snapshot.iter_nodes().count(), 0);
3596 assert_eq!(snapshot.iter_edges().count(), 0);
3597 }
3598
3599 #[test]
3600 fn test_snapshot_edge_filtering_by_kind() {
3601 use crate::graph::unified::edge::EdgeKind;
3602 use crate::graph::unified::node::NodeKind;
3603 use crate::graph::unified::storage::arena::NodeEntry;
3604 use std::path::Path;
3605
3606 let mut graph = CodeGraph::new();
3607
3608 // Create nodes
3609 let name1 = graph.strings_mut().intern("func1").unwrap();
3610 let name2 = graph.strings_mut().intern("func2").unwrap();
3611 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3612
3613 let node1 = graph
3614 .nodes_mut()
3615 .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
3616 .unwrap();
3617 let node2 = graph
3618 .nodes_mut()
3619 .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
3620 .unwrap();
3621
3622 // Add different kinds of edges
3623 graph.edges_mut().add_edge(
3624 node1,
3625 node2,
3626 EdgeKind::Calls {
3627 argument_count: 0,
3628 is_async: false,
3629 resolved_via: ResolvedVia::Direct,
3630 },
3631 file_id,
3632 );
3633 graph
3634 .edges_mut()
3635 .add_edge(node1, node2, EdgeKind::References, file_id);
3636
3637 let snapshot = graph.snapshot();
3638
3639 // get_callees should only return Calls edges
3640 let callees = snapshot.get_callees(node1);
3641 assert_eq!(callees.len(), 1);
3642 assert_eq!(callees[0], node2);
3643
3644 // iter_edges returns all edges regardless of kind
3645 let edges: Vec<_> = snapshot.iter_edges().collect();
3646 assert_eq!(edges.len(), 2);
3647 }
3648
3649 // -------- reverse_import_index tests --------
3650
3651 /// Helper: build an empty graph with the given file paths registered, a
3652 /// single placeholder node allocated per file, and indices rebuilt so
3653 /// `by_file` returns the per-file node sets. Returns the file IDs in
3654 /// the order passed in and the per-file node ID.
3655 #[cfg(test)]
3656 fn build_import_test_graph(files: &[&str]) -> (CodeGraph, Vec<FileId>, Vec<NodeId>) {
3657 use crate::graph::unified::node::NodeKind;
3658 use crate::graph::unified::storage::arena::NodeEntry;
3659 use std::path::Path;
3660
3661 let mut graph = CodeGraph::new();
3662 let placeholder_name = graph.strings_mut().intern("sym").unwrap();
3663 let mut file_ids = Vec::with_capacity(files.len());
3664 let mut node_ids = Vec::with_capacity(files.len());
3665 for path in files {
3666 let file_id = graph.files_mut().register(Path::new(path)).unwrap();
3667 let node_id = graph
3668 .nodes_mut()
3669 .alloc(NodeEntry::new(
3670 NodeKind::Function,
3671 placeholder_name,
3672 file_id,
3673 ))
3674 .unwrap();
3675 file_ids.push(file_id);
3676 node_ids.push(node_id);
3677 }
3678 graph.rebuild_indices();
3679 (graph, file_ids, node_ids)
3680 }
3681
3682 /// Helper: add an `Imports` edge from `source_node` (in importer file) to
3683 /// `target_node` (in exporter file). The edge is recorded against the
3684 /// importer file so `clear_file` cleanup would behave identically to
3685 /// production Pass 4 writes.
3686 #[cfg(test)]
3687 fn add_import_edge(
3688 graph: &mut CodeGraph,
3689 source_node: NodeId,
3690 target_node: NodeId,
3691 importer_file: FileId,
3692 ) {
3693 graph.edges_mut().add_edge(
3694 source_node,
3695 target_node,
3696 EdgeKind::Imports {
3697 alias: None,
3698 is_wildcard: false,
3699 },
3700 importer_file,
3701 );
3702 }
3703
3704 #[test]
3705 fn reverse_import_index_empty_graph_returns_empty() {
3706 let (graph, files, _) = build_import_test_graph(&["only.rs"]);
3707 assert!(graph.reverse_import_index(files[0]).is_empty());
3708 }
3709
3710 #[test]
3711 fn reverse_import_index_single_importer() {
3712 // File A imports a symbol exported by file B. Reverse index of B
3713 // should return exactly [A]; reverse index of A should be empty.
3714 let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3715 let (a, b) = (files[0], files[1]);
3716 add_import_edge(&mut graph, nodes[0], nodes[1], a);
3717
3718 let importers_of_b = graph.reverse_import_index(b);
3719 assert_eq!(importers_of_b, vec![a]);
3720
3721 let importers_of_a = graph.reverse_import_index(a);
3722 assert!(
3723 importers_of_a.is_empty(),
3724 "A has no inbound Imports edges; reverse index must be empty"
3725 );
3726 }
3727
3728 #[test]
3729 fn reverse_import_index_multiple_importers_deduped_and_sorted() {
3730 // A, B, C all import from D. Reverse index of D must contain each
3731 // importer exactly once, sorted ascending by FileId.
3732 let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs", "c.rs", "d.rs"]);
3733 let (a, b, c, d) = (files[0], files[1], files[2], files[3]);
3734 add_import_edge(&mut graph, nodes[0], nodes[3], a);
3735 add_import_edge(&mut graph, nodes[1], nodes[3], b);
3736 add_import_edge(&mut graph, nodes[2], nodes[3], c);
3737 // Add a duplicate edge from A to D to confirm dedup behavior.
3738 add_import_edge(&mut graph, nodes[0], nodes[3], a);
3739
3740 let importers_of_d = graph.reverse_import_index(d);
3741 assert_eq!(importers_of_d, vec![a, b, c]);
3742 // Sort invariant: Vec is ascending by raw index.
3743 let mut sorted = importers_of_d.clone();
3744 sorted.sort();
3745 assert_eq!(importers_of_d, sorted);
3746 }
3747
3748 #[test]
3749 fn reverse_import_index_filters_non_import_edges() {
3750 // A `Calls` edge from A into B must not contribute to B's reverse
3751 // import index. Only `EdgeKind::Imports` edges count.
3752 let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3753 let (a, b) = (files[0], files[1]);
3754 graph.edges_mut().add_edge(
3755 nodes[0],
3756 nodes[1],
3757 EdgeKind::Calls {
3758 argument_count: 0,
3759 is_async: false,
3760 resolved_via: ResolvedVia::Direct,
3761 },
3762 a,
3763 );
3764 graph
3765 .edges_mut()
3766 .add_edge(nodes[0], nodes[1], EdgeKind::References, a);
3767
3768 assert!(
3769 graph.reverse_import_index(b).is_empty(),
3770 "non-Imports edges must not register as importers"
3771 );
3772 }
3773
3774 #[test]
3775 fn reverse_import_index_elides_self_imports() {
3776 // An Imports edge whose source and target are both in the same file
3777 // is a self-import; the caller's own file must not appear in its own
3778 // reverse index.
3779 let (mut graph, files, nodes) = build_import_test_graph(&["a.rs"]);
3780 let a = files[0];
3781 // Add a second node in the same file so we have a distinct source.
3782 let name2 = graph.strings_mut().intern("sym2").unwrap();
3783 let second_in_a = graph
3784 .nodes_mut()
3785 .alloc(crate::graph::unified::storage::arena::NodeEntry::new(
3786 crate::graph::unified::node::NodeKind::Function,
3787 name2,
3788 a,
3789 ))
3790 .unwrap();
3791 graph.rebuild_indices();
3792 add_import_edge(&mut graph, second_in_a, nodes[0], a);
3793
3794 assert!(
3795 graph.reverse_import_index(a).is_empty(),
3796 "self-imports must be elided from reverse index"
3797 );
3798 }
3799
3800 #[test]
3801 fn reverse_import_index_mixed_edge_kinds_counts_only_imports() {
3802 // Two files: A has both Calls and Imports edges into B. Reverse
3803 // index must return exactly [A] — the Calls edge contributes
3804 // nothing.
3805 let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3806 let (a, b) = (files[0], files[1]);
3807 add_import_edge(&mut graph, nodes[0], nodes[1], a);
3808 graph.edges_mut().add_edge(
3809 nodes[0],
3810 nodes[1],
3811 EdgeKind::Calls {
3812 argument_count: 0,
3813 is_async: false,
3814 resolved_via: ResolvedVia::Direct,
3815 },
3816 a,
3817 );
3818
3819 assert_eq!(graph.reverse_import_index(b), vec![a]);
3820 }
3821
3822 #[test]
3823 fn reverse_import_index_uninitialized_file_returns_empty() {
3824 // Querying a FileId that is not registered in the graph must return
3825 // an empty Vec, not panic.
3826 let (graph, _, _) = build_import_test_graph(&["a.rs"]);
3827 let bogus = FileId::new(9999);
3828 assert!(
3829 graph.reverse_import_index(bogus).is_empty(),
3830 "unknown FileId must return empty Vec without panicking"
3831 );
3832 }
3833
3834 #[test]
3835 fn reverse_import_index_skips_tombstoned_source_nodes() {
3836 // In the incremental rebuild path the prior graph retains tombstoned
3837 // arena slots for nodes belonging to closure files that have been
3838 // removed but not yet fully compacted. reverse_import_index is
3839 // called on that prior graph to widen the closure, so its
3840 // tombstone guard (`let Some(source_entry) = self.nodes.get(...)`)
3841 // is a semantically important branch, not a theoretical one.
3842 // This test exercises it directly by tombstoning the source node of
3843 // an Imports edge and asserting the edge silently disappears from
3844 // the reverse index.
3845 let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3846 let (a, b) = (files[0], files[1]);
3847 add_import_edge(&mut graph, nodes[0], nodes[1], a);
3848 // Sanity: before tombstoning, A shows up as the importer of B.
3849 assert_eq!(graph.reverse_import_index(b), vec![a]);
3850 // Tombstone the source node via the arena (generation bump). The
3851 // Imports edge still exists in the edge store but its source NodeId
3852 // is now stale; `nodes.get(edge_ref.source)` returns None and the
3853 // guard skips the edge.
3854 let removed = graph.nodes_mut().remove(nodes[0]);
3855 assert!(
3856 removed.is_some(),
3857 "arena.remove must succeed for a live node"
3858 );
3859 assert!(
3860 graph.nodes().get(nodes[0]).is_none(),
3861 "tombstoned lookup must return None"
3862 );
3863 assert!(
3864 graph.reverse_import_index(b).is_empty(),
3865 "Imports edges whose source is tombstoned must be silently skipped"
3866 );
3867 }
3868
3869 // ------------------------------------------------------------------
3870 // Task 4 Step 2 — CodeGraph::remove_file
3871 // ------------------------------------------------------------------
3872
3873 /// Seed a graph with 2 files × `per_file` nodes per file, plus a set
3874 /// of intra- and inter-file edges. Each call site produces a
3875 /// canonical topology so tests below can assert bit-level on edge
3876 /// survival. Returns `(graph, file_a, file_b, file_a_nodes,
3877 /// file_b_nodes)`.
3878 fn seed_two_file_graph(
3879 per_file: usize,
3880 ) -> (
3881 CodeGraph,
3882 crate::graph::unified::file::FileId,
3883 crate::graph::unified::file::FileId,
3884 Vec<NodeId>,
3885 Vec<NodeId>,
3886 ) {
3887 use crate::graph::unified::edge::EdgeKind;
3888 use crate::graph::unified::node::NodeKind;
3889 use crate::graph::unified::storage::arena::NodeEntry;
3890 use std::path::Path;
3891
3892 let mut graph = CodeGraph::new();
3893 let sym = graph.strings_mut().intern("sym").expect("intern");
3894 let file_a = graph
3895 .files_mut()
3896 .register(Path::new("/tmp/remove_file_test/a.rs"))
3897 .expect("register a");
3898 let file_b = graph
3899 .files_mut()
3900 .register(Path::new("/tmp/remove_file_test/b.rs"))
3901 .expect("register b");
3902
3903 let mut file_a_nodes = Vec::with_capacity(per_file);
3904 let mut file_b_nodes = Vec::with_capacity(per_file);
3905
3906 for _ in 0..per_file {
3907 let n = graph
3908 .nodes_mut()
3909 .alloc(NodeEntry::new(NodeKind::Function, sym, file_a))
3910 .expect("alloc a-node");
3911 file_a_nodes.push(n);
3912 graph.files_mut().record_node(file_a, n);
3913 graph
3914 .indices_mut()
3915 .add(n, NodeKind::Function, sym, None, file_a);
3916 }
3917 for _ in 0..per_file {
3918 let n = graph
3919 .nodes_mut()
3920 .alloc(NodeEntry::new(NodeKind::Function, sym, file_b))
3921 .expect("alloc b-node");
3922 file_b_nodes.push(n);
3923 graph.files_mut().record_node(file_b, n);
3924 graph
3925 .indices_mut()
3926 .add(n, NodeKind::Function, sym, None, file_b);
3927 }
3928
3929 // Intra-file edges inside each file: pairwise a[i] -> a[i+1],
3930 // b[i] -> b[i+1]. These are the ones that must die when the
3931 // corresponding file is removed.
3932 for i in 0..per_file.saturating_sub(1) {
3933 graph.edges_mut().add_edge(
3934 file_a_nodes[i],
3935 file_a_nodes[i + 1],
3936 EdgeKind::Calls {
3937 argument_count: 0,
3938 is_async: false,
3939 resolved_via: ResolvedVia::Direct,
3940 },
3941 file_a,
3942 );
3943 graph.edges_mut().add_edge(
3944 file_b_nodes[i],
3945 file_b_nodes[i + 1],
3946 EdgeKind::Calls {
3947 argument_count: 0,
3948 is_async: false,
3949 resolved_via: ResolvedVia::Direct,
3950 },
3951 file_b,
3952 );
3953 }
3954 // Cross-file edges: a[0] -> b[0], b[0] -> a[0]. Both must die
3955 // when *either* endpoint's file is removed (plan §F.2).
3956 graph.edges_mut().add_edge(
3957 file_a_nodes[0],
3958 file_b_nodes[0],
3959 EdgeKind::Calls {
3960 argument_count: 0,
3961 is_async: false,
3962 resolved_via: ResolvedVia::Direct,
3963 },
3964 file_a,
3965 );
3966 graph.edges_mut().add_edge(
3967 file_b_nodes[0],
3968 file_a_nodes[0],
3969 EdgeKind::Calls {
3970 argument_count: 0,
3971 is_async: false,
3972 resolved_via: ResolvedVia::Direct,
3973 },
3974 file_b,
3975 );
3976
3977 (graph, file_a, file_b, file_a_nodes, file_b_nodes)
3978 }
3979
3980 #[test]
3981 fn code_graph_remove_file_tombstones_all_per_file_nodes() {
3982 let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
3983
3984 let returned = graph.remove_file(file_a);
3985
3986 // Returned list must equal the original per-file bucket
3987 // membership, deterministically.
3988 let returned_set: std::collections::HashSet<NodeId> = returned.iter().copied().collect();
3989 let expected_set: std::collections::HashSet<NodeId> =
3990 file_a_nodes.iter().copied().collect();
3991 assert_eq!(
3992 returned_set, expected_set,
3993 "remove_file must return exactly the file_a nodes drained from the bucket"
3994 );
3995
3996 // Every returned NodeId is gone from the arena.
3997 for nid in &file_a_nodes {
3998 assert!(
3999 graph.nodes().get(*nid).is_none(),
4000 "node {nid:?} from removed file must be tombstoned in arena"
4001 );
4002 }
4003 }
4004
4005 #[test]
4006 fn code_graph_remove_file_invalidates_all_edges_sourced_or_targeted_at_removed_nodes() {
4007 use crate::graph::unified::edge::EdgeKind;
4008
4009 let (mut graph, file_a, _file_b, file_a_nodes, file_b_nodes) = seed_two_file_graph(3);
4010
4011 // Sanity: the seed has 2 intra-A edges, 2 intra-B edges, plus
4012 // 2 cross-file edges (a0↔b0).
4013 let before_delta = graph.edges().stats().forward.delta_edge_count;
4014 assert_eq!(
4015 before_delta, 6,
4016 "seed must produce 2 intra-A + 2 intra-B + 2 cross edges"
4017 );
4018
4019 let _ = graph.remove_file(file_a);
4020
4021 // Forward delta after removal: all 2 intra-A edges are gone,
4022 // both cross edges are gone, only the 2 intra-B edges remain.
4023 let after_delta_forward = graph.edges().stats().forward.delta_edge_count;
4024 assert_eq!(
4025 after_delta_forward, 2,
4026 "only intra-B forward edges must remain after removing file_a"
4027 );
4028 let after_delta_reverse = graph.edges().stats().reverse.delta_edge_count;
4029 assert_eq!(
4030 after_delta_reverse, 2,
4031 "only intra-B reverse edges must remain after removing file_a"
4032 );
4033
4034 // Cross-file edge from b[0] -> a[0] must no longer be visible
4035 // from either direction, because a[0] is tombstoned.
4036 let b0 = file_b_nodes[0];
4037 let a0 = file_a_nodes[0];
4038 let remaining_from_b0: Vec<_> = graph
4039 .edges()
4040 .edges_from(b0)
4041 .into_iter()
4042 .filter(|e| {
4043 matches!(
4044 e.kind,
4045 EdgeKind::Calls {
4046 argument_count: 0,
4047 is_async: false,
4048 resolved_via: ResolvedVia::Direct,
4049 }
4050 )
4051 })
4052 .collect();
4053 assert!(
4054 !remaining_from_b0.iter().any(|e| e.target == a0),
4055 "edge b0 -> a0 must be gone after remove_file(file_a)"
4056 );
4057 let remaining_to_a0: Vec<_> = graph.edges().edges_to(a0).into_iter().collect();
4058 assert!(
4059 remaining_to_a0.is_empty(),
4060 "every edge targeting the tombstoned a0 must be gone"
4061 );
4062 }
4063
4064 #[test]
4065 fn code_graph_remove_file_drops_file_registry_entry() {
4066 let (mut graph, file_a, _file_b, _, _) = seed_two_file_graph(2);
4067
4068 assert!(
4069 graph.files().resolve(file_a).is_some(),
4070 "seed registered file_a"
4071 );
4072 assert!(
4073 !graph.files().nodes_for_file(file_a).is_empty(),
4074 "seed populated the file_a bucket"
4075 );
4076
4077 let _ = graph.remove_file(file_a);
4078
4079 assert!(
4080 graph.files().resolve(file_a).is_none(),
4081 "FileRegistry::resolve must return None after remove_file"
4082 );
4083 assert!(
4084 graph.files().nodes_for_file(file_a).is_empty(),
4085 "per-file bucket for file_a must be drained"
4086 );
4087 }
4088
4089 #[test]
4090 fn code_graph_remove_file_is_idempotent_on_unknown_file() {
4091 use crate::graph::unified::file::FileId;
4092 let (mut graph, _file_a, _file_b, _, _) = seed_two_file_graph(2);
4093
4094 // Snapshot state before idempotent no-op.
4095 let nodes_before = graph.nodes().len();
4096 let delta_fwd_before = graph.edges().stats().forward.delta_edge_count;
4097 let delta_rev_before = graph.edges().stats().reverse.delta_edge_count;
4098 let files_before = graph.files().len();
4099
4100 // A FileId that was never registered: the caller may legitimately
4101 // receive a bogus id from stale indexing state. `remove_file`
4102 // must be a silent no-op.
4103 let bogus = FileId::new(9999);
4104 let returned = graph.remove_file(bogus);
4105 assert!(
4106 returned.is_empty(),
4107 "remove_file on unknown FileId must return an empty Vec"
4108 );
4109
4110 assert_eq!(graph.nodes().len(), nodes_before, "arena count unchanged");
4111 assert_eq!(
4112 graph.edges().stats().forward.delta_edge_count,
4113 delta_fwd_before,
4114 "forward delta unchanged"
4115 );
4116 assert_eq!(
4117 graph.edges().stats().reverse.delta_edge_count,
4118 delta_rev_before,
4119 "reverse delta unchanged"
4120 );
4121 assert_eq!(graph.files().len(), files_before, "file count unchanged");
4122 }
4123
4124 #[test]
4125 fn code_graph_remove_file_clears_file_segments_entry() {
4126 // Iter-1 Codex review fix: a file's `FileSegmentTable` entry
4127 // must be cleared on `remove_file`. Without this, a later
4128 // `FileId` recycle (via `FileRegistry::free_list`) would
4129 // inherit the previous file's stale node-range and
4130 // `reindex_files` (`build/reindex.rs`) would tombstone the
4131 // wrong slots. This unit test seeds a segment entry, removes
4132 // the file, and asserts the entry is gone.
4133 use crate::graph::unified::storage::segment::FileSegmentTable;
4134
4135 let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
4136
4137 // Seed a segment for file A. Production code sets this via
4138 // Phase 3 parallel commit; here we go through the crate-
4139 // internal accessor because we are a unit test inside the
4140 // crate. (The feature-gated `test_only_record_file_segment`
4141 // public helper exists for the integration tests in
4142 // `sqry-core/tests/incremental_remove_file_scale.rs`.)
4143 let first_index = file_a_nodes
4144 .iter()
4145 .map(|n| n.index())
4146 .min()
4147 .expect("per_file = 3");
4148 let last_index = file_a_nodes
4149 .iter()
4150 .map(|n| n.index())
4151 .max()
4152 .expect("per_file = 3");
4153 let slot_count = last_index - first_index + 1;
4154 let table: &mut FileSegmentTable = graph.file_segments_mut();
4155 table.record_range(file_a, first_index, slot_count);
4156 assert!(
4157 graph.file_segments().get(file_a).is_some(),
4158 "seed must install a segment for file_a before remove_file"
4159 );
4160
4161 // Remove the file.
4162 let _ = graph.remove_file(file_a);
4163
4164 // The segment entry must be gone. `FileSegmentTable::remove`
4165 // is idempotent (a no-op on unknown ids), so this assertion
4166 // holds whether or not the seeded range was contiguous.
4167 assert!(
4168 graph.file_segments().get(file_a).is_none(),
4169 "remove_file must clear the FileSegmentTable entry for file_a"
4170 );
4171 }
4172
4173 #[test]
4174 fn code_graph_remove_file_repeated_calls_are_idempotent() {
4175 let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
4176
4177 // First call does the work.
4178 let first = graph.remove_file(file_a);
4179 assert_eq!(first.len(), file_a_nodes.len());
4180
4181 // Snapshot post-first-call state.
4182 let nodes_after = graph.nodes().len();
4183 let delta_fwd_after = graph.edges().stats().forward.delta_edge_count;
4184 let delta_rev_after = graph.edges().stats().reverse.delta_edge_count;
4185 let files_after = graph.files().len();
4186
4187 // Second call must be a silent no-op — the bucket is empty,
4188 // the file is unregistered, and the arena slots are already
4189 // tombstoned (NodeArena::remove ignores stale generations).
4190 let second = graph.remove_file(file_a);
4191 assert!(
4192 second.is_empty(),
4193 "second remove_file on the same file must return an empty Vec"
4194 );
4195
4196 assert_eq!(graph.nodes().len(), nodes_after);
4197 assert_eq!(
4198 graph.edges().stats().forward.delta_edge_count,
4199 delta_fwd_after
4200 );
4201 assert_eq!(
4202 graph.edges().stats().reverse.delta_edge_count,
4203 delta_rev_after
4204 );
4205 assert_eq!(graph.files().len(), files_after);
4206 }
4207}