sqry_core/graph/unified/concurrent/graph.rs
1//! `CodeGraph` and `ConcurrentCodeGraph` implementations.
2//!
3//! This module provides the core graph types with thread-safe access:
4//!
5//! - [`CodeGraph`]: Arc-wrapped internals for O(1) `CoW` snapshots
6//! - [`ConcurrentCodeGraph`]: `RwLock` wrapper with epoch versioning
7//! - [`GraphSnapshot`]: Immutable snapshot for long-running queries
8
9use std::collections::{HashMap, HashSet};
10use std::fmt;
11use std::sync::Arc;
12use std::sync::atomic::{AtomicU64, Ordering};
13
14use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard};
15
16use crate::confidence::ConfidenceMetadata;
17use crate::graph::unified::bind::alias::AliasTable;
18use crate::graph::unified::bind::scope::provenance::{
19 ScopeProvenance, ScopeProvenanceStore, ScopeStableId,
20};
21use crate::graph::unified::bind::scope::{ScopeArena, ScopeId};
22use crate::graph::unified::bind::shadow::ShadowTable;
23use crate::graph::unified::edge::EdgeKind;
24#[cfg(test)]
25use crate::graph::unified::edge::ResolvedVia;
26use crate::graph::unified::edge::bidirectional::BidirectionalEdgeStore;
27use crate::graph::unified::file::FileId;
28use crate::graph::unified::memory::{GraphMemorySize, HASHMAP_ENTRY_OVERHEAD};
29use crate::graph::unified::resolution::display_graph_qualified_name;
30use crate::graph::unified::storage::arena::NodeArena;
31use crate::graph::unified::storage::c_indirect::CIndirectSideTables;
32use crate::graph::unified::storage::edge_provenance::{EdgeProvenance, EdgeProvenanceStore};
33use crate::graph::unified::storage::indices::AuxiliaryIndices;
34use crate::graph::unified::storage::interner::StringInterner;
35use crate::graph::unified::storage::metadata::NodeMetadataStore;
36use crate::graph::unified::storage::node_provenance::{NodeProvenance, NodeProvenanceStore};
37use crate::graph::unified::storage::registry::{FileProvenanceView, FileRegistry};
38use crate::graph::unified::storage::segment::FileSegmentTable;
39use crate::graph::unified::string::id::StringId;
40
41/// Core graph with Arc-wrapped internals for O(1) `CoW` snapshots.
42///
43/// `CodeGraph` uses `Arc` for all internal components, enabling:
44/// - O(1) snapshot creation via Arc cloning
45/// - Copy-on-write semantics via `Arc::make_mut`
46/// - Memory-efficient sharing between snapshots
47///
48/// # Design
49///
50/// The Arc wrapping enables the MVCC pattern:
51/// - Readers see a consistent snapshot at the time they acquired access
52/// - Writers use `Arc::make_mut` to get exclusive copies only when mutating
53/// - Multiple snapshots can coexist without copying data
54///
55/// # Performance
56///
57/// - Snapshot creation: O(5) Arc clones ≈ <1μs
58/// - Read access: Direct Arc dereference, no locking
59/// - Write access: `Arc::make_mut` clones only if refcount > 1
60///
61/// # Phase 2 binding-plane access
62///
63/// Use the two-line snapshot pattern to access `BindingPlane`:
64///
65/// ```rust,ignore
66/// let snapshot = graph.snapshot();
67/// let plane = snapshot.binding_plane();
68/// let resolution = plane.resolve(&query);
69/// ```
70///
71/// The two-line form is intentional: `BindingPlane<'g>` borrows from
72/// `GraphSnapshot` and the explicit snapshot handle makes the MVCC lifetime
73/// visible at the call site. The full Phase 2 scope/alias/shadow and
74/// witness-bearing resolution API is exposed through `BindingPlane`.
75// Field visibility is `pub(crate)` so the Gate 0c `rebuild_graph` module
76// (A2 §H) can destructure `CodeGraph` exhaustively in `clone_for_rebuild`.
77// External crates still go through the public accessor methods below.
78#[derive(Clone)]
79pub struct CodeGraph {
80 /// Node storage with generational indices.
81 pub(crate) nodes: Arc<NodeArena>,
82 /// Bidirectional edge storage (forward + reverse).
83 pub(crate) edges: Arc<BidirectionalEdgeStore>,
84 /// String interner for symbol names.
85 pub(crate) strings: Arc<StringInterner>,
86 /// File registry for path deduplication.
87 pub(crate) files: Arc<FileRegistry>,
88 /// Auxiliary indices for fast lookup.
89 pub(crate) indices: Arc<AuxiliaryIndices>,
90 /// Sparse macro boundary metadata (keyed by full `NodeId`).
91 pub(crate) macro_metadata: Arc<NodeMetadataStore>,
92 /// Dense node provenance (Phase 1 fact layer).
93 pub(crate) node_provenance: Arc<NodeProvenanceStore>,
94 /// Dense edge provenance (Phase 1 fact layer).
95 pub(crate) edge_provenance: Arc<EdgeProvenanceStore>,
96 /// Monotonic fact-layer epoch (0 until set by the V8 persistence path).
97 pub(crate) fact_epoch: u64,
98 /// Epoch for version tracking.
99 pub(crate) epoch: u64,
100 /// Per-language confidence metadata collected during build.
101 /// Maps language name (e.g., "rust") to aggregated confidence.
102 pub(crate) confidence: HashMap<String, ConfidenceMetadata>,
103 /// Phase 2 binding-plane scope arena (populated by Phase 4e).
104 pub(crate) scope_arena: Arc<ScopeArena>,
105 /// Phase 2 binding-plane alias table (populated by Phase 4e / P2U04).
106 pub(crate) alias_table: Arc<AliasTable>,
107 /// Phase 2 binding-plane shadow table (populated by Phase 4e / P2U05).
108 pub(crate) shadow_table: Arc<ShadowTable>,
109 /// Phase 2 binding-plane scope provenance store (populated by Phase 4e / P2U11).
110 pub(crate) scope_provenance_store: Arc<ScopeProvenanceStore>,
111 /// Phase 3 file segment table mapping `FileId` to contiguous node ranges.
112 /// Populated during build Phase 3 parallel commit, persisted in V10+ snapshots.
113 pub(crate) file_segments: Arc<FileSegmentTable>,
114 /// Phase A (U09): C-only indirect-call resolver side tables.
115 ///
116 /// `None` on non-C workspaces — the parent slot stays unallocated so
117 /// non-C builds incur zero side-table overhead. Populated by Phase 3
118 /// commit (U11) and consumed by `pass5b_c_indirect_resolve` (U12).
119 /// Persisted as the V11 `Option<CIndirectSideTables>` envelope slot
120 /// (DESIGN §10.2); V10 → V11 upconvert sets this to `None`.
121 pub(crate) c_indirect_tables: Option<CIndirectSideTables>,
122 /// Build-time scratch side-channel from the Go plugin (Cluster A
123 /// of the Go T1 implements-and-promotion design).
124 ///
125 /// Populated during Phase 1 plugin parse, merged into the live
126 /// target by Phase 3 commit after `NodeId` / `StringId` remap, drained
127 /// by `pass_go_method_set_satisfaction` between Phase 4e and Pass
128 /// 5. Not part of `GraphSnapshot`, not persisted in V10 — see
129 /// `02_DESIGN` §6. Held by-value (not `Arc<…>`) because no
130 /// copy-on-write or shared-reader access is required: only the
131 /// rebuild owner mutates, only the pass consumes, then the field
132 /// is reset.
133 pub(crate) go_hints: crate::graph::unified::build::staging::GoHints,
134}
135
136impl CodeGraph {
137 /// Creates a new empty `CodeGraph`.
138 ///
139 /// # Example
140 ///
141 /// ```rust
142 /// use sqry_core::graph::unified::concurrent::CodeGraph;
143 ///
144 /// let graph = CodeGraph::new();
145 /// assert_eq!(graph.epoch(), 0);
146 /// ```
147 #[must_use]
148 pub fn new() -> Self {
149 Self {
150 nodes: Arc::new(NodeArena::new()),
151 edges: Arc::new(BidirectionalEdgeStore::new()),
152 strings: Arc::new(StringInterner::new()),
153 files: Arc::new(FileRegistry::new()),
154 indices: Arc::new(AuxiliaryIndices::new()),
155 macro_metadata: Arc::new(NodeMetadataStore::new()),
156 node_provenance: Arc::new(NodeProvenanceStore::new()),
157 edge_provenance: Arc::new(EdgeProvenanceStore::new()),
158 fact_epoch: 0,
159 epoch: 0,
160 confidence: HashMap::new(),
161 scope_arena: Arc::new(ScopeArena::new()),
162 alias_table: Arc::new(AliasTable::new()),
163 shadow_table: Arc::new(ShadowTable::new()),
164 scope_provenance_store: Arc::new(ScopeProvenanceStore::new()),
165 file_segments: Arc::new(FileSegmentTable::new()),
166 c_indirect_tables: None,
167 go_hints: crate::graph::unified::build::staging::GoHints::default(),
168 }
169 }
170
171 /// Creates a `CodeGraph` from existing components.
172 ///
173 /// This is useful when building a graph from external data or
174 /// reconstructing from serialized state.
175 #[must_use]
176 pub fn from_components(
177 nodes: NodeArena,
178 edges: BidirectionalEdgeStore,
179 strings: StringInterner,
180 files: FileRegistry,
181 indices: AuxiliaryIndices,
182 macro_metadata: NodeMetadataStore,
183 ) -> Self {
184 Self {
185 nodes: Arc::new(nodes),
186 edges: Arc::new(edges),
187 strings: Arc::new(strings),
188 files: Arc::new(files),
189 indices: Arc::new(indices),
190 macro_metadata: Arc::new(macro_metadata),
191 node_provenance: Arc::new(NodeProvenanceStore::new()),
192 edge_provenance: Arc::new(EdgeProvenanceStore::new()),
193 fact_epoch: 0,
194 epoch: 0,
195 confidence: HashMap::new(),
196 scope_arena: Arc::new(ScopeArena::new()),
197 alias_table: Arc::new(AliasTable::new()),
198 shadow_table: Arc::new(ShadowTable::new()),
199 scope_provenance_store: Arc::new(ScopeProvenanceStore::new()),
200 file_segments: Arc::new(FileSegmentTable::new()),
201 c_indirect_tables: None,
202 go_hints: crate::graph::unified::build::staging::GoHints::default(),
203 }
204 }
205
206 /// Creates a cheap snapshot of the graph.
207 ///
208 /// This operation is O(5) Arc clones, which completes in <1μs.
209 /// The snapshot is isolated from future mutations to the original graph.
210 ///
211 /// # Example
212 ///
213 /// ```rust
214 /// use sqry_core::graph::unified::concurrent::CodeGraph;
215 ///
216 /// let graph = CodeGraph::new();
217 /// let snapshot = graph.snapshot();
218 /// // snapshot is independent of future mutations to graph
219 /// ```
220 #[must_use]
221 pub fn snapshot(&self) -> GraphSnapshot {
222 GraphSnapshot {
223 nodes: Arc::clone(&self.nodes),
224 edges: Arc::clone(&self.edges),
225 strings: Arc::clone(&self.strings),
226 files: Arc::clone(&self.files),
227 indices: Arc::clone(&self.indices),
228 macro_metadata: Arc::clone(&self.macro_metadata),
229 node_provenance: Arc::clone(&self.node_provenance),
230 edge_provenance: Arc::clone(&self.edge_provenance),
231 fact_epoch: self.fact_epoch,
232 epoch: self.epoch,
233 scope_arena: Arc::clone(&self.scope_arena),
234 alias_table: Arc::clone(&self.alias_table),
235 shadow_table: Arc::clone(&self.shadow_table),
236 scope_provenance_store: Arc::clone(&self.scope_provenance_store),
237 file_segments: Arc::clone(&self.file_segments),
238 c_indirect_tables: self.c_indirect_tables.clone(),
239 }
240 }
241
242 /// Returns a reference to the node arena.
243 #[inline]
244 #[must_use]
245 pub fn nodes(&self) -> &NodeArena {
246 &self.nodes
247 }
248
249 /// Returns a reference to the bidirectional edge store.
250 #[inline]
251 #[must_use]
252 pub fn edges(&self) -> &BidirectionalEdgeStore {
253 &self.edges
254 }
255
256 /// Returns a reference to the string interner.
257 #[inline]
258 #[must_use]
259 pub fn strings(&self) -> &StringInterner {
260 &self.strings
261 }
262
263 /// Returns a reference to the file registry.
264 #[inline]
265 #[must_use]
266 pub fn files(&self) -> &FileRegistry {
267 &self.files
268 }
269
270 /// Returns a reference to the auxiliary indices.
271 #[inline]
272 #[must_use]
273 pub fn indices(&self) -> &AuxiliaryIndices {
274 &self.indices
275 }
276
277 /// Returns a reference to the macro boundary metadata store.
278 #[inline]
279 #[must_use]
280 pub fn macro_metadata(&self) -> &NodeMetadataStore {
281 &self.macro_metadata
282 }
283
284 /// Returns a reference to the C indirect-call side tables, if any.
285 ///
286 /// `None` on non-C workspaces — the slot is allocated lazily and only
287 /// when the C plugin's Phase 1 instrumentation observes content worth
288 /// recording. On loaded snapshots, the V11 envelope's
289 /// `Option<CIndirectSideTables>` field round-trips into this slot
290 /// (DESIGN §10.2); V10 → V11 upconvert always sets the slot to `None`.
291 ///
292 /// `pass5b_c_indirect_resolve` (U12) consumes these tables to rewrite
293 /// synthetic indirect-call `Calls` edges into precise binding-plane /
294 /// type-match candidates.
295 #[inline]
296 #[must_use]
297 pub fn c_indirect_tables(&self) -> Option<&CIndirectSideTables> {
298 self.c_indirect_tables.as_ref()
299 }
300
301 /// Returns a mutable reference to the `Option<CIndirectSideTables>`
302 /// slot, allowing callers to install / replace / clear the side
303 /// tables.
304 ///
305 /// Mirrors the accessor pattern used for [`Self::macro_metadata_mut`]
306 /// but exposes the `Option` directly — the side tables are owned in
307 /// place, not behind an `Arc`. Callers that need to merge incremental
308 /// state into the existing tables typically do:
309 ///
310 /// ```rust,ignore
311 /// let slot = graph.c_indirect_tables_mut();
312 /// let tables = slot.get_or_insert_with(CIndirectSideTables::new);
313 /// tables.bindings_by_field.entry(key).or_default().push(entry);
314 /// ```
315 ///
316 /// Populated by the build pipeline (U11). Read-only consumers should
317 /// use [`Self::c_indirect_tables`] instead.
318 #[inline]
319 pub fn c_indirect_tables_mut(&mut self) -> &mut Option<CIndirectSideTables> {
320 &mut self.c_indirect_tables
321 }
322
323 /// Test-only: strip every Phase-A-introduced piece of state from this
324 /// graph, leaving a graph that is byte-identical (modulo persistence
325 /// header timestamps) to what a pre-Phase-A build would have produced
326 /// on the same fixture.
327 ///
328 /// Specifically:
329 ///
330 /// * Clears the [`CIndirectSideTables`] slot (sets it to `None`).
331 /// * Removes [`NodeFlags::ADDRESS_TAKEN`] and
332 /// [`NodeFlags::CALLSITE_PROMISCUOUS`] marker bits from every
333 /// [`StoredEntry`] in the macro-metadata store.
334 /// * Rewrites the `resolved_via` field of every [`EdgeKind::Calls`]
335 /// edge (across CSR and delta tiers, forward and reverse stores)
336 /// to [`ResolvedVia::Direct`].
337 ///
338 /// Used exclusively by `sqry-core/tests/snapshot_size_phase_a.rs`
339 /// (U19) to materialize a "Phase-A-free" baseline snapshot for the
340 /// +10% snapshot-size gate. Gated behind `cfg(any(test, feature =
341 /// "test-support"))` so the helper is invisible to production
342 /// builds.
343 ///
344 /// [`NodeFlags::ADDRESS_TAKEN`]: crate::graph::unified::storage::metadata::NodeFlags::ADDRESS_TAKEN
345 /// [`NodeFlags::CALLSITE_PROMISCUOUS`]: crate::graph::unified::storage::metadata::NodeFlags::CALLSITE_PROMISCUOUS
346 /// [`StoredEntry`]: crate::graph::unified::storage::metadata::StoredEntry
347 /// [`EdgeKind::Calls`]: crate::graph::unified::edge::EdgeKind::Calls
348 /// [`ResolvedVia::Direct`]: crate::graph::unified::edge::ResolvedVia::Direct
349 #[cfg(any(test, feature = "test-support"))]
350 pub fn clear_phase_a_state_for_test(&mut self) {
351 *self.c_indirect_tables_mut() = None;
352 self.macro_metadata_mut().clear_phase_a_flags_for_test();
353 self.edges_mut().normalize_calls_resolved_via_for_test();
354 }
355
356 // ------------------------------------------------------------------
357 // Phase 1 fact-layer provenance accessors (P1U09).
358 // ------------------------------------------------------------------
359
360 /// Returns the monotonic fact-layer epoch stamped on the most recently
361 /// saved or loaded snapshot. Returns `0` for graphs that have not been
362 /// persisted yet or were loaded from V7 snapshots.
363 #[inline]
364 #[must_use]
365 pub fn fact_epoch(&self) -> u64 {
366 self.fact_epoch
367 }
368
369 /// Looks up node provenance by `NodeId`.
370 ///
371 /// Returns `None` if the `NodeId` is out of range, the slot is vacant,
372 /// or the stored generation does not match (stale handle).
373 #[inline]
374 #[must_use]
375 pub fn node_provenance(
376 &self,
377 id: crate::graph::unified::node::id::NodeId,
378 ) -> Option<&NodeProvenance> {
379 self.node_provenance.lookup(id)
380 }
381
382 /// Looks up edge provenance by `EdgeId`.
383 ///
384 /// Returns `None` if the `EdgeId` is out of range, the slot is vacant,
385 /// or the edge is the invalid sentinel.
386 #[inline]
387 #[must_use]
388 pub fn edge_provenance(
389 &self,
390 id: crate::graph::unified::edge::id::EdgeId,
391 ) -> Option<&EdgeProvenance> {
392 self.edge_provenance.lookup(id)
393 }
394
395 /// Returns a borrowed provenance view for a file.
396 ///
397 /// Returns `None` for invalid/unregistered `FileId`s.
398 #[inline]
399 #[must_use]
400 pub fn file_provenance(
401 &self,
402 id: crate::graph::unified::file::id::FileId,
403 ) -> Option<FileProvenanceView<'_>> {
404 self.files.file_provenance(id)
405 }
406
407 // ------------------------------------------------------------------
408 // Phase 2 binding-plane accessors (P2U03).
409 // ------------------------------------------------------------------
410
411 /// Returns a reference to the scope arena derived during Phase 4e.
412 ///
413 /// The arena is empty on freshly-constructed `CodeGraph` instances and is
414 /// populated by calling `phase4e_binding::derive_binding_plane`.
415 #[inline]
416 #[must_use]
417 pub fn scope_arena(&self) -> &ScopeArena {
418 &self.scope_arena
419 }
420
421 /// Installs a freshly-derived scope arena.
422 ///
423 /// Called from `phase4e_binding::derive_binding_plane` during the build
424 /// pipeline. External callers that run Phase 4e manually (e.g., test
425 /// fixture builders) use this to store the result.
426 pub(crate) fn set_scope_arena(&mut self, arena: ScopeArena) {
427 self.scope_arena = Arc::new(arena);
428 }
429
430 /// Returns a reference to the alias table derived during Phase 4e.
431 ///
432 /// The table is empty on freshly-constructed `CodeGraph` instances and is
433 /// populated by calling `phase4e_binding::derive_binding_plane`.
434 #[inline]
435 #[must_use]
436 pub fn alias_table(&self) -> &AliasTable {
437 &self.alias_table
438 }
439
440 /// Installs a freshly-derived alias table.
441 ///
442 /// Called from `phase4e_binding::derive_binding_plane` during the build
443 /// pipeline. External callers that run Phase 4e manually (e.g., test
444 /// fixture builders) use this to store the result.
445 pub(crate) fn set_alias_table(&mut self, table: AliasTable) {
446 self.alias_table = Arc::new(table);
447 }
448
449 /// Returns a reference to the shadow table derived during Phase 4e.
450 ///
451 /// The table is empty on freshly-constructed `CodeGraph` instances and is
452 /// populated by calling `phase4e_binding::derive_binding_plane`.
453 #[inline]
454 #[must_use]
455 pub fn shadow_table(&self) -> &ShadowTable {
456 &self.shadow_table
457 }
458
459 /// Installs a freshly-derived shadow table.
460 ///
461 /// Called from `phase4e_binding::derive_binding_plane` during the build
462 /// pipeline. External callers that run Phase 4e manually (e.g., test
463 /// fixture builders) use this to store the result.
464 pub(crate) fn set_shadow_table(&mut self, table: ShadowTable) {
465 self.shadow_table = Arc::new(table);
466 }
467
468 /// Returns a reference to the scope provenance store derived during Phase 4e.
469 ///
470 /// The store is empty on freshly-constructed `CodeGraph` instances and is
471 /// populated by calling `phase4e_binding::derive_binding_plane`.
472 #[inline]
473 #[must_use]
474 pub fn scope_provenance_store(&self) -> &ScopeProvenanceStore {
475 &self.scope_provenance_store
476 }
477
478 /// Looks up scope provenance by `ScopeId`.
479 ///
480 /// Returns `None` if the slot is out of range, vacant, or the stored
481 /// generation does not match (stale handle).
482 #[inline]
483 #[must_use]
484 pub fn scope_provenance(&self, id: ScopeId) -> Option<&ScopeProvenance> {
485 self.scope_provenance_store.lookup(id)
486 }
487
488 /// Looks up the live `ScopeId` for a stable scope identity.
489 ///
490 /// Returns `None` if no provenance record is registered for that stable id.
491 /// The reverse index is populated by `insert` during Phase 4e and must be
492 /// rebuilt after V9 deserialization.
493 #[inline]
494 #[must_use]
495 pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
496 self.scope_provenance_store.scope_by_stable_id(stable)
497 }
498
499 /// Installs a freshly-derived scope provenance store.
500 ///
501 /// Called from `phase4e_binding::derive_binding_plane` during the build
502 /// pipeline. External callers that run Phase 4e manually (e.g., test
503 /// fixture builders) use this to store the result.
504 pub(crate) fn set_scope_provenance_store(&mut self, store: ScopeProvenanceStore) {
505 self.scope_provenance_store = Arc::new(store);
506 }
507
508 /// Returns a reference to the file segment table.
509 #[inline]
510 #[must_use]
511 pub fn file_segments(&self) -> &FileSegmentTable {
512 &self.file_segments
513 }
514
515 /// Replaces the file segment table.
516 pub(crate) fn set_file_segments(&mut self, table: FileSegmentTable) {
517 self.file_segments = Arc::new(table);
518 }
519
520 /// Installs the C indirect-call side tables loaded from a V11
521 /// snapshot.
522 ///
523 /// `None` clears the slot (used on V10 → V11 upconvert and on non-C
524 /// workspaces). `Some(...)` carries the live tables onto the freshly
525 /// reconstructed `CodeGraph`. Build-pipeline callers that incrementally
526 /// populate the tables should prefer [`Self::c_indirect_tables_mut`].
527 pub(crate) fn set_c_indirect_tables(&mut self, tables: Option<CIndirectSideTables>) {
528 self.c_indirect_tables = tables;
529 }
530
531 /// Returns a mutable reference to the file segment table (via `Arc::make_mut`).
532 pub(crate) fn file_segments_mut(&mut self) -> &mut FileSegmentTable {
533 Arc::make_mut(&mut self.file_segments)
534 }
535
536 /// Test-only helper that records a file segment directly on the
537 /// graph, bypassing the full Phase 3 commit pipeline. Only
538 /// available under `#[cfg(feature = "rebuild-internals")]` so the
539 /// surface is opt-in for rebuild-plane consumers (the feature is
540 /// whitelisted to sqry-daemon + sqry-core integration tests; see
541 /// `sqry-core/tests/rebuild_internals_whitelist.rs`).
542 ///
543 /// Integration tests (notably
544 /// `sqry-core/tests/incremental_remove_file_scale.rs`) call this
545 /// to seed the synthetic workspaces they build before exercising
546 /// `RebuildGraph::remove_file` / `CodeGraph::remove_file`. Production
547 /// code paths never touch this method — Phase 3 parallel commit
548 /// is the sole production writer, via the crate-internal
549 /// `file_segments_mut` accessor above.
550 ///
551 /// Renamed with `test_only_` prefix so the purpose is unambiguous
552 /// at every call site; `#[doc(hidden)]` hides it from rendered
553 /// rustdoc so downstream daemon integrations don't discover it by
554 /// accident.
555 #[cfg(feature = "rebuild-internals")]
556 #[doc(hidden)]
557 pub fn test_only_record_file_segment(
558 &mut self,
559 file_id: FileId,
560 start_slot: u32,
561 slot_count: u32,
562 ) {
563 Arc::make_mut(&mut self.file_segments).record_range(file_id, start_slot, slot_count);
564 }
565
566 /// Sets the provenance stores and fact epoch, typically called by the
567 /// persistence loader after deserializing a V8 snapshot.
568 pub(crate) fn set_provenance(
569 &mut self,
570 node_provenance: NodeProvenanceStore,
571 edge_provenance: EdgeProvenanceStore,
572 fact_epoch: u64,
573 ) {
574 self.node_provenance = Arc::new(node_provenance);
575 self.edge_provenance = Arc::new(edge_provenance);
576 self.fact_epoch = fact_epoch;
577 }
578
579 /// Returns the current epoch.
580 #[inline]
581 #[must_use]
582 pub fn epoch(&self) -> u64 {
583 self.epoch
584 }
585
586 /// Returns a mutable reference to the node arena.
587 ///
588 /// Uses `Arc::make_mut` for copy-on-write semantics: if other
589 /// references exist (e.g., snapshots), the data is cloned.
590 #[inline]
591 pub fn nodes_mut(&mut self) -> &mut NodeArena {
592 Arc::make_mut(&mut self.nodes)
593 }
594
595 /// Returns a mutable reference to the bidirectional edge store.
596 ///
597 /// Uses `Arc::make_mut` for copy-on-write semantics.
598 #[inline]
599 pub fn edges_mut(&mut self) -> &mut BidirectionalEdgeStore {
600 Arc::make_mut(&mut self.edges)
601 }
602
603 /// Returns a mutable reference to the string interner.
604 ///
605 /// Uses `Arc::make_mut` for copy-on-write semantics.
606 #[inline]
607 pub fn strings_mut(&mut self) -> &mut StringInterner {
608 Arc::make_mut(&mut self.strings)
609 }
610
611 /// Returns a mutable reference to the file registry.
612 ///
613 /// Uses `Arc::make_mut` for copy-on-write semantics.
614 #[inline]
615 pub fn files_mut(&mut self) -> &mut FileRegistry {
616 Arc::make_mut(&mut self.files)
617 }
618
619 /// Returns a mutable reference to the auxiliary indices.
620 ///
621 /// Uses `Arc::make_mut` for copy-on-write semantics.
622 #[inline]
623 pub fn indices_mut(&mut self) -> &mut AuxiliaryIndices {
624 Arc::make_mut(&mut self.indices)
625 }
626
627 /// Returns a mutable reference to the macro boundary metadata store.
628 ///
629 /// Uses `Arc::make_mut` for copy-on-write semantics.
630 #[inline]
631 pub fn macro_metadata_mut(&mut self) -> &mut NodeMetadataStore {
632 Arc::make_mut(&mut self.macro_metadata)
633 }
634
635 /// Returns mutable references to both the node arena and the string interner.
636 ///
637 /// This avoids the borrow-conflict that arises when calling `nodes_mut()` and
638 /// `strings_mut()` separately on `&mut self`.
639 #[inline]
640 pub fn nodes_and_strings_mut(&mut self) -> (&mut NodeArena, &mut StringInterner) {
641 (
642 Arc::make_mut(&mut self.nodes),
643 Arc::make_mut(&mut self.strings),
644 )
645 }
646
647 /// Rebuilds auxiliary indices from the current node arena.
648 ///
649 /// This avoids the borrow conflict that arises when calling `nodes()` and
650 /// `indices_mut()` separately on `&mut self`. Uses disjoint field borrowing
651 /// to access `nodes` (shared) and `indices` (mutable) simultaneously.
652 /// Internally calls `AuxiliaryIndices::build_from_arena` which clears
653 /// existing indices and rebuilds in a single pass without per-element
654 /// duplicate checking.
655 ///
656 /// As of Task 4 Step 4 Phase 2 this inherent method delegates to the
657 /// generic
658 /// [`crate::graph::unified::build::parallel_commit::rebuild_indices`]
659 /// free function so the same implementation serves both the
660 /// full-build (`CodeGraph`) and incremental-rebuild (`RebuildGraph`)
661 /// pipelines. Call sites that hold a concrete `CodeGraph` can keep
662 /// using `graph.rebuild_indices()`; incremental rebuild call sites
663 /// should use the free function directly.
664 pub fn rebuild_indices(&mut self) {
665 crate::graph::unified::build::parallel_commit::rebuild_indices(self);
666 }
667
668 /// Increments the epoch counter and returns the new value.
669 ///
670 /// Called automatically by `ConcurrentCodeGraph::write()`.
671 #[inline]
672 pub fn bump_epoch(&mut self) -> u64 {
673 self.epoch = self.epoch.wrapping_add(1);
674 self.epoch
675 }
676
677 /// Sets the epoch to a specific value.
678 ///
679 /// This is primarily for testing or reconstruction from serialized state.
680 #[inline]
681 pub fn set_epoch(&mut self, epoch: u64) {
682 self.epoch = epoch;
683 }
684
685 /// Returns the number of nodes in the graph.
686 ///
687 /// This is a convenience method that delegates to `nodes().len()`.
688 ///
689 /// # Example
690 ///
691 /// ```rust
692 /// use sqry_core::graph::unified::concurrent::CodeGraph;
693 ///
694 /// let graph = CodeGraph::new();
695 /// assert_eq!(graph.node_count(), 0);
696 /// ```
697 #[inline]
698 #[must_use]
699 pub fn node_count(&self) -> usize {
700 self.nodes.len()
701 }
702
703 /// Returns the number of edges in the graph (forward direction).
704 ///
705 /// This counts edges in the forward store, including both CSR and delta edges.
706 ///
707 /// # Example
708 ///
709 /// ```rust
710 /// use sqry_core::graph::unified::concurrent::CodeGraph;
711 ///
712 /// let graph = CodeGraph::new();
713 /// assert_eq!(graph.edge_count(), 0);
714 /// ```
715 #[inline]
716 #[must_use]
717 pub fn edge_count(&self) -> usize {
718 let stats = self.edges.stats();
719 stats.forward.csr_edge_count + stats.forward.delta_edge_count
720 }
721
722 /// Returns true if the graph contains no nodes.
723 ///
724 /// This is a convenience method that delegates to `nodes().is_empty()`.
725 ///
726 /// # Example
727 ///
728 /// ```rust
729 /// use sqry_core::graph::unified::concurrent::CodeGraph;
730 ///
731 /// let graph = CodeGraph::new();
732 /// assert!(graph.is_empty());
733 /// ```
734 #[inline]
735 #[must_use]
736 pub fn is_empty(&self) -> bool {
737 self.nodes.is_empty()
738 }
739
740 /// Returns an iterator over all indexed file paths.
741 ///
742 /// This is useful for enumerating all files that have been processed
743 /// and added to the graph.
744 ///
745 /// # Example
746 ///
747 /// ```rust
748 /// use sqry_core::graph::unified::concurrent::CodeGraph;
749 ///
750 /// let graph = CodeGraph::new();
751 /// for (file_id, path) in graph.indexed_files() {
752 /// println!("File {}: {}", file_id.index(), path.display());
753 /// }
754 /// ```
755 #[inline]
756 pub fn indexed_files(
757 &self,
758 ) -> impl Iterator<Item = (crate::graph::unified::file::FileId, &std::path::Path)> {
759 self.files
760 .iter()
761 .map(|(id, arc_path)| (id, arc_path.as_ref()))
762 }
763
764 /// Returns the set of files that import one or more symbols exported by
765 /// `file_id`, deduplicated and sorted ascending.
766 ///
767 /// For every [`EdgeKind::Imports`] edge whose target node lives in
768 /// `file_id`, the source node's [`FileId`] is added to the result. Files
769 /// are returned sorted by raw index so the result is deterministic across
770 /// runs. The caller's own file is never included — an `Imports` edge
771 /// whose source and target both live in `file_id` is treated as a
772 /// self-import and elided. Edges whose source node is no longer resolvable
773 /// in the arena (tombstoned) are silently skipped.
774 ///
775 /// This is the file-level view of Pass 4 cross-file `Imports` edges, used
776 /// by the incremental rebuild engine to compute reverse-dependency
777 /// closures: "if file X changes its exports, which files need to be
778 /// re-linked?"
779 ///
780 /// # Complexity
781 ///
782 /// `O(|nodes_in_file_id| × avg_incoming_edges_per_node)` amortized. Uses
783 /// [`AuxiliaryIndices::by_file`] for O(1)-amortized per-file node lookup
784 /// (HashMap-backed) and the bidirectional edge store's reverse adjacency;
785 /// no full-graph scan. A final `O(R log R)` sort over the deduplicated
786 /// importer set (where `R` is the importer count) is negligible in
787 /// practice since `R ≤ file_count`.
788 ///
789 /// [`AuxiliaryIndices::by_file`]: crate::graph::unified::storage::indices::AuxiliaryIndices::by_file
790 #[must_use]
791 pub fn reverse_import_index(&self, file_id: FileId) -> Vec<FileId> {
792 let mut importers: HashSet<FileId> = HashSet::new();
793 for &target_node in self.indices.by_file(file_id) {
794 for edge_ref in self.edges.edges_to(target_node) {
795 if !matches!(edge_ref.kind, EdgeKind::Imports { .. }) {
796 continue;
797 }
798 let Some(source_entry) = self.nodes.get(edge_ref.source) else {
799 continue;
800 };
801 let source_file = source_entry.file;
802 if source_file != file_id {
803 importers.insert(source_file);
804 }
805 }
806 }
807 let mut result: Vec<FileId> = importers.into_iter().collect();
808 result.sort();
809 result
810 }
811
812 /// Returns the set of files that hold at least one live inter-file edge
813 /// targeting a node in `file_id`, deduplicated and sorted ascending.
814 ///
815 /// Unlike [`reverse_import_index`](Self::reverse_import_index) — which
816 /// filters to [`EdgeKind::Imports`] only — this helper treats **every**
817 /// cross-file edge as a dependency signal: `Calls`, `References`,
818 /// `TypeOf`, `Inherits`, `Implements`, `FfiCall`, `HttpRequest`,
819 /// `GrpcCall`, `WebAssemblyCall`, `DbQuery`, `TableRead`, `TableWrite`,
820 /// `TriggeredBy`, `MessageQueue`, `WebSocket`, `GraphQLOperation`,
821 /// `ProcessExec`, `FileIpc`, `ProtocolCall`, and any future
822 /// cross-file-capable variant. This is the reverse-dependency surface the
823 /// incremental rebuild engine (Task 4 Step 4 Phase 3e) needs: when
824 /// `file_id` changes, every file whose committed edges point into
825 /// `file_id`'s nodes must re-enter the rebuild closure so its cross-file
826 /// references survive the target-side tombstone-and-reparse cycle.
827 ///
828 /// The caller's own file is never included — an edge whose source and
829 /// target both live in `file_id` is a self-reference and is elided.
830 /// Edges whose source node is no longer resolvable in the arena
831 /// (tombstoned) are silently skipped.
832 ///
833 /// # When to use this vs [`reverse_import_index`](Self::reverse_import_index)
834 ///
835 /// * [`reverse_import_index`](Self::reverse_import_index) remains the
836 /// right surface for consumers that specifically need *import*
837 /// relationships (export surface analysis, module-dependency graphs,
838 /// etc.).
839 /// * `reverse_dependency_index` is the right surface for incremental
840 /// rebuild closure computation. Widening past imports is necessary
841 /// because call sites, type references, trait implementations, FFI
842 /// declarations, HTTP clients, and every other cross-file edge kind
843 /// hold a committed edge into the target file that becomes stale the
844 /// moment `remove_file(target)` tombstones its arena nodes. Leaving
845 /// those files out of the closure leaves the committed edges pointing
846 /// at the stale (pre-tombstone) node IDs — Phase 4c-prime only
847 /// rewrites edges on **re-parsed** files' `PendingEdge` sets, never
848 /// committed edges owned by files outside the reparse scope.
849 ///
850 /// # Complexity
851 ///
852 /// `O(|nodes_in_file_id| × avg_incoming_edges_per_node)` amortized —
853 /// same bound as [`reverse_import_index`](Self::reverse_import_index).
854 /// Uses [`AuxiliaryIndices::by_file`] for O(1)-amortized per-file node
855 /// lookup and the bidirectional edge store's reverse adjacency; no
856 /// full-graph scan. Final `O(R log R)` sort over the deduplicated
857 /// dependent set is negligible since `R ≤ file_count`.
858 ///
859 /// # Over-widening is expected and acceptable
860 ///
861 /// The closure will include every file that references anything in
862 /// `file_id`, not just files whose exports change. In common codebases
863 /// this widens the reparse set modestly (a 10-file change may expand
864 /// to 20–30 dependent files in a medium crate). Correctness requires
865 /// the widening; minimality is a follow-up optimisation if profiling
866 /// demands it.
867 ///
868 /// [`AuxiliaryIndices::by_file`]: crate::graph::unified::storage::indices::AuxiliaryIndices::by_file
869 #[must_use]
870 pub fn reverse_dependency_index(&self, file_id: FileId) -> Vec<FileId> {
871 let mut dependents: HashSet<FileId> = HashSet::new();
872 for &target_node in self.indices.by_file(file_id) {
873 for edge_ref in self.edges.edges_to(target_node) {
874 let Some(source_entry) = self.nodes.get(edge_ref.source) else {
875 continue;
876 };
877 let source_file = source_entry.file;
878 if source_file != file_id {
879 dependents.insert(source_file);
880 }
881 }
882 }
883 let mut result: Vec<FileId> = dependents.into_iter().collect();
884 result.sort();
885 result
886 }
887
888 /// Returns the per-language confidence metadata.
889 ///
890 /// This contains analysis confidence information collected during graph build,
891 /// primarily used by language plugins (e.g., Rust) to track analysis quality.
892 #[inline]
893 #[must_use]
894 pub fn confidence(&self) -> &HashMap<String, ConfidenceMetadata> {
895 &self.confidence
896 }
897
898 /// Merges confidence metadata for a language.
899 ///
900 /// If confidence already exists for the language, this merges the new
901 /// metadata (taking the lower confidence level and combining limitations).
902 /// Otherwise, it inserts the new confidence.
903 pub fn merge_confidence(&mut self, language: &str, metadata: ConfidenceMetadata) {
904 use crate::confidence::ConfidenceLevel;
905
906 self.confidence
907 .entry(language.to_string())
908 .and_modify(|existing| {
909 // Take the lower confidence level (more conservative)
910 let new_level = match (&existing.level, &metadata.level) {
911 (ConfidenceLevel::Verified, other) | (other, ConfidenceLevel::Verified) => {
912 *other
913 }
914 (ConfidenceLevel::Partial, ConfidenceLevel::AstOnly)
915 | (ConfidenceLevel::AstOnly, ConfidenceLevel::Partial) => {
916 ConfidenceLevel::AstOnly
917 }
918 (level, _) => *level,
919 };
920 existing.level = new_level;
921
922 // Merge limitations (deduplicated)
923 for limitation in &metadata.limitations {
924 if !existing.limitations.contains(limitation) {
925 existing.limitations.push(limitation.clone());
926 }
927 }
928
929 // Merge unavailable features (deduplicated)
930 for feature in &metadata.unavailable_features {
931 if !existing.unavailable_features.contains(feature) {
932 existing.unavailable_features.push(feature.clone());
933 }
934 }
935 })
936 .or_insert(metadata);
937 }
938
939 /// Sets the confidence metadata map directly.
940 ///
941 /// This is primarily used when loading a graph from serialized state.
942 pub fn set_confidence(&mut self, confidence: HashMap<String, ConfidenceMetadata>) {
943 self.confidence = confidence;
944 }
945
946 // ------------------------------------------------------------------
947 // Task 4 Step 2 (A2 §F.2) — File-level tombstoning on a CodeGraph.
948 //
949 // Unlike the rebuild pipeline's `RebuildGraph::remove_file`, this
950 // path mutates a live `CodeGraph` in place and is the mechanism
951 // used by the full-rebuild flow when it needs to evict a file's
952 // nodes+edges between compactions. The daemon's incremental
953 // `WorkspaceManager` (Task 6) goes through the rebuild path, which
954 // is why this entry point is `pub(crate)`.
955 // ------------------------------------------------------------------
956
957 /// Tombstone every node that belongs to `file_id`, invalidate every
958 /// edge whose source or target is one of those nodes (across both
959 /// forward and reverse CSR + delta tiers), drop the file's entry
960 /// from the [`FileRegistry`], and return the set of [`NodeId`]s
961 /// that were tombstoned.
962 ///
963 /// This is the §F.2-aware file-removal primitive. Semantically, the
964 /// post-condition matches what a full rebuild of the workspace
965 /// without `file_id` would produce:
966 ///
967 /// * Every [`NodeEntry`] whose [`NodeEntry.file`] was `file_id` has
968 /// been `NodeArena::remove`d, advancing its slot generation so
969 /// stale [`NodeId`] handles do not alias a later re-allocation.
970 /// * Every CSR edge whose source or target slot matches one of the
971 /// tombstoned slot indices has its `csr_tombstones` bit set; the
972 /// read path's merge step already filters tombstoned CSR edges
973 /// out of every query result.
974 /// * Every delta-buffer edge (Add or Remove, any file) whose source
975 /// or target matches a tombstoned slot has been dropped from the
976 /// delta in both directions.
977 /// * The [`AuxiliaryIndices`] (kind / name / qualified-name / file)
978 /// no longer reference any of the tombstoned `NodeId`s.
979 /// * [`NodeMetadataStore`], [`NodeProvenanceStore`], [`ScopeArena`],
980 /// [`AliasTable`], and [`ShadowTable`] have been compacted through
981 /// the [`NodeIdBearing::retain_nodes`] predicate so no
982 /// tombstoned `NodeId` survives in any publish-visible store.
983 /// * [`FileRegistry::per_file_nodes`] no longer holds a bucket for
984 /// `file_id`, the lookup slot is recycled, and
985 /// [`FileRegistry::resolve(file_id)`] returns `None`.
986 ///
987 /// Returns the `Vec<NodeId>` of tombstoned nodes. The returned list
988 /// is useful for downstream housekeeping (e.g., resetting per-file
989 /// caches keyed by `NodeId`) and for tests that need to assert on the
990 /// exact membership of the tombstone set.
991 ///
992 /// # Idempotency
993 ///
994 /// Calling `remove_file` twice with the same `file_id` — or calling
995 /// it for a `file_id` that was never registered — is a safe no-op.
996 /// The returned `Vec<NodeId>` is empty on the second call; no
997 /// arena / edge / index state is mutated (the predicate-based
998 /// compaction of `NodeIdBearing` surfaces short-circuits when the
999 /// dead set is empty).
1000 ///
1001 /// # Visibility
1002 ///
1003 /// `pub(crate)` because external callers (Task 6's
1004 /// `WorkspaceManager` on the sqry-daemon side) route through
1005 /// [`super::super::rebuild::rebuild_graph::RebuildGraph::remove_file`]
1006 /// instead. This `CodeGraph`-level variant is used by full-rebuild
1007 /// housekeeping paths inside sqry-core and by the Task 4 Step 4
1008 /// incremental fallback for cases where the caller already has a
1009 /// `&mut CodeGraph` and does not need the clone-and-publish
1010 /// round-trip of [`clone_for_rebuild`](Self::clone_for_rebuild) →
1011 /// [`finalize`](super::super::rebuild::rebuild_graph::RebuildGraph::finalize).
1012 ///
1013 /// # Performance
1014 ///
1015 /// * `O(|tombstoned| + |csr_edges| + |delta_edges|)` amortised.
1016 /// The CSR walk is linear in total edge count (not per-file),
1017 /// which is the dominant cost; each row check is O(1) via the
1018 /// precomputed dead-slot-index `HashSet` in
1019 /// [`BidirectionalEdgeStore::tombstone_edges_for_nodes`].
1020 /// * Delta filtering is `O(|delta|)` per direction.
1021 /// * Auxiliary-index compaction is `O(|tombstoned|)` amortised
1022 /// because each of the four indices keys its entries by the
1023 /// tombstoned `NodeIds` directly.
1024 ///
1025 /// [`NodeEntry`]: crate::graph::unified::storage::arena::NodeEntry
1026 /// [`NodeEntry.file`]: crate::graph::unified::storage::arena::NodeEntry::file
1027 /// [`NodeIdBearing::retain_nodes`]: crate::graph::unified::rebuild::coverage::NodeIdBearing::retain_nodes
1028 #[allow(dead_code)] // Consumer is Task 4 Step 4 (`incremental_rebuild`)
1029 // and the unit tests below; published in this commit so the §F.2
1030 // invariant surface can be reviewed in isolation per the Gate 0c
1031 // split contract.
1032 pub(crate) fn remove_file(
1033 &mut self,
1034 file_id: FileId,
1035 ) -> Vec<crate::graph::unified::node::NodeId> {
1036 use crate::graph::unified::node::NodeId;
1037 use crate::graph::unified::rebuild::coverage::NodeIdBearing;
1038
1039 // Drain the per-file bucket. For a file that was never
1040 // registered, this returns an empty Vec — the rest of the
1041 // method short-circuits on the `if tombstoned.is_empty()` test
1042 // below so we still deregister the file on the off chance the
1043 // bucket was empty but the file was registered (defensive; the
1044 // common case is the bucket existed iff the file was
1045 // registered).
1046 let tombstoned: Vec<NodeId> = self.files_mut().take_nodes(file_id);
1047 // Always drop the file's path entry + recycle its slot, even
1048 // when the bucket was empty, so repeated registrations of the
1049 // same path don't resurrect a zombie FileId. `unregister` is
1050 // idempotent (returns None for unknown IDs) so the cost is a
1051 // single HashMap probe when file_id is already gone.
1052 self.files_mut().unregister(file_id);
1053 // Clear the file's segment entry unconditionally (idempotent
1054 // — `FileSegmentTable::remove` no-ops on unknown ids). This
1055 // MUST run before `FileRegistry::unregister` recycles the
1056 // FileId slot for reuse, otherwise a later registration of a
1057 // different path under the reused FileId would inherit the
1058 // previous file's stale node range (see
1059 // `sqry-core/src/graph/unified/build/reindex.rs` — which
1060 // trusts `file_segments().get(file_id)` to decide which slots
1061 // to tombstone). Note: `unregister` above was called first
1062 // only to keep the existing bucket-drain ordering; the
1063 // segment-clear is order-independent with respect to
1064 // `unregister` because neither touches the other's backing
1065 // store, and the FileId slot cannot be recycled-and-reissued
1066 // across a single `remove_file` call (the registry's slot
1067 // recycler is driven by a later `register`, not by
1068 // `unregister`).
1069 self.file_segments_mut().remove(file_id);
1070
1071 if tombstoned.is_empty() {
1072 return tombstoned;
1073 }
1074
1075 // Dead set keyed on NodeId for NodeIdBearing predicates.
1076 // `retain_nodes` uses `HashSet::contains` so membership is O(1).
1077 let dead: HashSet<NodeId> = tombstoned.iter().copied().collect();
1078
1079 // 1. Tombstone the arena slots. `NodeArena::remove` is
1080 // idempotent — stale NodeIds that don't match a slot's
1081 // current generation are no-ops, which lets this method be
1082 // safely re-run on the same file.
1083 {
1084 let arena = self.nodes_mut();
1085 for &nid in &tombstoned {
1086 let _ = arena.remove(nid);
1087 }
1088 }
1089
1090 // 2. Invalidate edges across both CSR + delta in both
1091 // directions. This is the expensive step; the helper uses a
1092 // precomputed dead-slot-index set so the CSR walk is linear
1093 // in total edge count, not quadratic.
1094 self.edges_mut().tombstone_edges_for_nodes(&dead);
1095
1096 // 3. Compact the auxiliary indices so name/kind/qname/file
1097 // lookups do not return tombstoned NodeIds. Using the
1098 // NodeIdBearing surface keeps this step in lockstep with the
1099 // rebuild pipeline's step 4 — any future publish-visible
1100 // NodeId-bearing container added to the K.A/K.B matrix is
1101 // automatically swept here too.
1102 {
1103 let predicate: Box<dyn Fn(NodeId) -> bool + '_> = Box::new(|nid| !dead.contains(&nid));
1104 self.indices_mut().retain_nodes(&*predicate);
1105 self.macro_metadata_mut().retain_nodes(&*predicate);
1106 // The remaining K.A rows (node_provenance, scope_arena,
1107 // alias_table, shadow_table) need Arc::make_mut accessors —
1108 // they are wrapped in Arc at rest so this is where the
1109 // CoW clone happens on demand. Inline the Arc::make_mut
1110 // calls here (no public mut accessor exists today because
1111 // the sole writer has been the rebuild path; extend the
1112 // set by adding a similar inline call plus a K.A/K.B row
1113 // in `super::super::rebuild::coverage`).
1114 Arc::make_mut(&mut self.node_provenance).retain_nodes(&*predicate);
1115 Arc::make_mut(&mut self.scope_arena).retain_nodes(&*predicate);
1116 Arc::make_mut(&mut self.alias_table).retain_nodes(&*predicate);
1117 Arc::make_mut(&mut self.shadow_table).retain_nodes(&*predicate);
1118 }
1119
1120 tombstoned
1121 }
1122
1123 // ------------------------------------------------------------------
1124 // Gate 0c (A2 §H, Task 4) — RebuildGraph assembly path.
1125 // ------------------------------------------------------------------
1126
1127 /// Assemble a [`CodeGraph`] from owned rebuild-local parts produced
1128 /// by `RebuildGraph::finalize()` (defined in
1129 /// `super::super::rebuild::rebuild_graph`).
1130 ///
1131 /// This constructor is deliberately `pub(crate)` and named with a
1132 /// leading `__` so it is inaccessible from downstream crates even
1133 /// when the `rebuild-internals` feature is enabled: only code in
1134 /// `sqry-core` itself (specifically `RebuildGraph::finalize`) is
1135 /// permitted to call it. The trybuild fixture
1136 /// `sqry-core/tests/rebuild_internals_compile_fail/rebuild_graph_no_public_assembly.rs`
1137 /// proves there is no other path from `RebuildGraph` to
1138 /// `Arc<CodeGraph>`.
1139 ///
1140 /// The argument order mirrors the `CodeGraph` struct declaration
1141 /// order exactly; the public `clone_for_rebuild` → `finalize`
1142 /// round-trip uses the same macro-driven field enumeration so any
1143 /// new `CodeGraph` field automatically threads through this
1144 /// constructor as well.
1145 #[doc(hidden)]
1146 #[allow(clippy::too_many_arguments)]
1147 #[must_use]
1148 pub(crate) fn __assemble_from_rebuild_parts_internal(
1149 nodes: NodeArena,
1150 edges: BidirectionalEdgeStore,
1151 strings: StringInterner,
1152 files: FileRegistry,
1153 indices: AuxiliaryIndices,
1154 macro_metadata: NodeMetadataStore,
1155 node_provenance: NodeProvenanceStore,
1156 edge_provenance: EdgeProvenanceStore,
1157 fact_epoch: u64,
1158 epoch: u64,
1159 confidence: HashMap<String, ConfidenceMetadata>,
1160 scope_arena: ScopeArena,
1161 alias_table: AliasTable,
1162 shadow_table: ShadowTable,
1163 scope_provenance_store: ScopeProvenanceStore,
1164 file_segments: FileSegmentTable,
1165 go_hints: crate::graph::unified::build::staging::GoHints,
1166 ) -> Self {
1167 Self {
1168 nodes: Arc::new(nodes),
1169 edges: Arc::new(edges),
1170 strings: Arc::new(strings),
1171 files: Arc::new(files),
1172 indices: Arc::new(indices),
1173 macro_metadata: Arc::new(macro_metadata),
1174 node_provenance: Arc::new(node_provenance),
1175 edge_provenance: Arc::new(edge_provenance),
1176 fact_epoch,
1177 epoch,
1178 confidence,
1179 scope_arena: Arc::new(scope_arena),
1180 alias_table: Arc::new(alias_table),
1181 shadow_table: Arc::new(shadow_table),
1182 scope_provenance_store: Arc::new(scope_provenance_store),
1183 file_segments: Arc::new(file_segments),
1184 c_indirect_tables: None,
1185 go_hints,
1186 }
1187 }
1188
1189 // ------------------------------------------------------------------
1190 // Gate 0c (A2 §F) — publish-boundary debug invariants.
1191 //
1192 // These checks fire only in debug / test builds; release builds
1193 // compile them out. They are called from
1194 // `RebuildGraph::finalize()` steps 13 and 14 (the single source of
1195 // truth for the residue check, per §F and §H agreement). Gate 0d
1196 // will additionally wire the bijection check into
1197 // `build_and_persist_graph`, `WorkspaceManager::publish_graph`, and
1198 // every §E equivalence-harness run.
1199 // ------------------------------------------------------------------
1200
1201 /// Assert the bijective bucket-membership invariant (A2 §F.1).
1202 ///
1203 /// Four conditions must hold simultaneously:
1204 /// (a) every `NodeId` inside any `per_file_nodes` bucket maps to a
1205 /// live arena slot;
1206 /// (b) every `NodeId` appears in exactly one bucket (no duplicates
1207 /// across buckets, no duplicates within a bucket);
1208 /// (c) the bucket's `FileId` matches the node's own `file` field on
1209 /// `NodeEntry`;
1210 /// (d) when at least one bucket is populated, every live node in
1211 /// the arena is accounted for by some bucket.
1212 ///
1213 /// Condition (d) is guarded on `!seen.is_empty()` so an empty-graph
1214 /// (no recorded buckets) publish boundary is vacuously consistent:
1215 /// legacy V7 snapshots, fresh `CodeGraph::new()` instances, and
1216 /// rebuilds on graphs that predate Gate 0c's parallel-commit
1217 /// bucketing must not panic. Once any bucket is populated, every
1218 /// live arena slot must appear in a bucket.
1219 ///
1220 /// Iter-2 B2 (verbatim): this check used to be documented as
1221 /// "vacuous until future `per_file_nodes` work lands". Pulling
1222 /// base-plan Step 1 into Gate 0c retires that phrasing — the check
1223 /// is non-vacuous the moment parallel-parse commits nodes, which
1224 /// happens on every real build. The `!seen.is_empty()` guard now
1225 /// exists solely for the empty-graph corner case, not as a phased-
1226 /// delivery deferral.
1227 ///
1228 /// The check is a no-op in release builds. Panics with a
1229 /// descriptive message on violation. This is intentional: publish-
1230 /// boundary violations are programmer errors that must surface
1231 /// loudly during CI / test runs.
1232 ///
1233 /// # Panics
1234 ///
1235 /// Panics in debug/test builds when bucket membership is duplicated, points
1236 /// at a dead node, assigns a node to the wrong file, or omits a live node
1237 /// after buckets have been populated.
1238 #[cfg(any(debug_assertions, test))]
1239 pub fn assert_bucket_bijection(&self) {
1240 use std::collections::HashMap as StdHashMap;
1241 // (a) + (b) + (c): every bucketed node is live, unique across
1242 // buckets, and the bucket's FileId matches the node's own file.
1243 let mut seen: StdHashMap<
1244 crate::graph::unified::node::NodeId,
1245 crate::graph::unified::file::FileId,
1246 > = StdHashMap::new();
1247 let mut any_bucket_populated = false;
1248 for (file_id, bucket) in self.files.per_file_nodes_for_gate0d() {
1249 if !bucket.is_empty() {
1250 any_bucket_populated = true;
1251 }
1252 // Local dedup guard within the bucket itself. `retain_nodes_in_buckets`
1253 // dedups during finalize step 6, but we re-check here so the
1254 // invariant covers non-finalize publish paths too (e.g. if a
1255 // future pipeline builds a graph without routing through
1256 // `RebuildGraph::finalize`).
1257 let mut within_bucket: std::collections::HashSet<crate::graph::unified::node::NodeId> =
1258 std::collections::HashSet::new();
1259 for node_id in bucket {
1260 assert!(
1261 within_bucket.insert(node_id),
1262 "assert_bucket_bijection: duplicate node {node_id:?} inside bucket {file_id:?}"
1263 );
1264 assert!(
1265 self.nodes.get(node_id).is_some(),
1266 "assert_bucket_bijection: dead node {node_id:?} in bucket {file_id:?}"
1267 );
1268 let prior = seen.insert(node_id, file_id);
1269 assert!(
1270 prior.is_none(),
1271 "assert_bucket_bijection: node {node_id:?} in multiple buckets: \
1272 prior={prior:?}, current={file_id:?}"
1273 );
1274 if let Some(entry) = self.nodes.get(node_id) {
1275 assert_eq!(
1276 entry.file, file_id,
1277 "assert_bucket_bijection: node {node_id:?} misfiled: in bucket \
1278 {file_id:?}, actual {:?}",
1279 entry.file
1280 );
1281 }
1282 }
1283 }
1284 // (d): every live node is accounted for by `seen` once buckets
1285 // are populated. The guard keeps legacy-empty-graph boundaries
1286 // vacuously consistent (see docs above).
1287 if any_bucket_populated {
1288 for (node_id, _entry) in self.nodes.iter() {
1289 assert!(
1290 seen.contains_key(&node_id),
1291 "assert_bucket_bijection: live node {node_id:?} absent from all buckets"
1292 );
1293 }
1294 }
1295 }
1296
1297 /// Assert the pre-reuse tombstone-residue invariant (A2 §F.2).
1298 ///
1299 /// Iterates every publish-visible NodeId-bearing structure on
1300 /// `self` and panics if any contains a node in `dead`. Called from
1301 /// `RebuildGraph::finalize()` step 14 against the set drained at
1302 /// step 8 — exactly one site per the plan's §F / §H agreement.
1303 ///
1304 /// No-op when `dead` is empty or in release builds.
1305 ///
1306 /// # Panics
1307 ///
1308 /// Panics in debug/test builds if any publish-visible node-bearing
1309 /// structure still references a tombstoned node.
1310 #[cfg(any(debug_assertions, test))]
1311 pub fn assert_no_tombstone_residue_for<S: std::hash::BuildHasher>(
1312 &self,
1313 dead: &std::collections::HashSet<crate::graph::unified::node::NodeId, S>,
1314 ) {
1315 use super::super::rebuild::coverage::NodeIdBearing;
1316 if dead.is_empty() {
1317 return;
1318 }
1319 // Every K.A/K.B row must be inspected per §F.2.
1320 for nid in self.nodes.all_node_ids() {
1321 assert!(
1322 !dead.contains(&nid),
1323 "assert_no_tombstone_residue: tombstone {nid:?} still in NodeArena"
1324 );
1325 }
1326 for nid in self.indices.all_node_ids() {
1327 assert!(
1328 !dead.contains(&nid),
1329 "assert_no_tombstone_residue: tombstone {nid:?} still in auxiliary indices"
1330 );
1331 }
1332 for nid in self.edges.all_node_ids() {
1333 assert!(
1334 !dead.contains(&nid),
1335 "assert_no_tombstone_residue: tombstone {nid:?} still in edge store"
1336 );
1337 }
1338 for nid in self.macro_metadata.all_node_ids() {
1339 assert!(
1340 !dead.contains(&nid),
1341 "assert_no_tombstone_residue: tombstone {nid:?} still in macro metadata"
1342 );
1343 }
1344 for nid in self.node_provenance.all_node_ids() {
1345 assert!(
1346 !dead.contains(&nid),
1347 "assert_no_tombstone_residue: tombstone {nid:?} still in node provenance"
1348 );
1349 }
1350 for nid in self.scope_arena.all_node_ids() {
1351 assert!(
1352 !dead.contains(&nid),
1353 "assert_no_tombstone_residue: tombstone {nid:?} still in scope arena"
1354 );
1355 }
1356 for nid in self.alias_table.all_node_ids() {
1357 assert!(
1358 !dead.contains(&nid),
1359 "assert_no_tombstone_residue: tombstone {nid:?} still in alias table"
1360 );
1361 }
1362 for nid in self.shadow_table.all_node_ids() {
1363 assert!(
1364 !dead.contains(&nid),
1365 "assert_no_tombstone_residue: tombstone {nid:?} still in shadow table"
1366 );
1367 }
1368 for nid in self.files.all_node_ids() {
1369 assert!(
1370 !dead.contains(&nid),
1371 "assert_no_tombstone_residue: tombstone {nid:?} still in per-file bucket"
1372 );
1373 }
1374 }
1375}
1376
1377impl Default for CodeGraph {
1378 fn default() -> Self {
1379 Self::new()
1380 }
1381}
1382
1383impl fmt::Debug for CodeGraph {
1384 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1385 f.debug_struct("CodeGraph")
1386 .field("nodes", &self.nodes.len())
1387 .field("epoch", &self.epoch)
1388 .finish_non_exhaustive()
1389 }
1390}
1391
1392impl GraphMemorySize for CodeGraph {
1393 /// Estimates the total heap bytes owned by this `CodeGraph`.
1394 ///
1395 /// Sums heap usage across every component the graph owns: node arena,
1396 /// bidirectional edge store (forward + reverse CSR + tombstones + delta
1397 /// buffer), string interner, file registry, auxiliary indices, sparse
1398 /// macro/classpath metadata, provenance stores, and the per-language
1399 /// confidence map. Used by the `sqryd` daemon's admission controller
1400 /// and workspace retention reaper to enforce memory budgets.
1401 fn heap_bytes(&self) -> usize {
1402 let mut confidence_bytes = self.confidence.capacity()
1403 * (std::mem::size_of::<String>()
1404 + std::mem::size_of::<ConfidenceMetadata>()
1405 + HASHMAP_ENTRY_OVERHEAD);
1406 for (key, meta) in &self.confidence {
1407 confidence_bytes += key.capacity();
1408 // `ConfidenceMetadata.limitations` / `unavailable_features` are
1409 // `Vec<String>`; charge their spill and inner String payloads.
1410 confidence_bytes += meta.limitations.capacity() * std::mem::size_of::<String>();
1411 for s in &meta.limitations {
1412 confidence_bytes += s.capacity();
1413 }
1414 confidence_bytes +=
1415 meta.unavailable_features.capacity() * std::mem::size_of::<String>();
1416 for s in &meta.unavailable_features {
1417 confidence_bytes += s.capacity();
1418 }
1419 }
1420
1421 self.nodes.heap_bytes()
1422 + self.edges.heap_bytes()
1423 + self.strings.heap_bytes()
1424 + self.files.heap_bytes()
1425 + self.indices.heap_bytes()
1426 + self.macro_metadata.heap_bytes()
1427 + self.node_provenance.heap_bytes()
1428 + self.edge_provenance.heap_bytes()
1429 + confidence_bytes
1430 + self.file_segments.capacity()
1431 * std::mem::size_of::<Option<crate::graph::unified::storage::segment::FileSegment>>(
1432 )
1433 }
1434}
1435
1436/// Thread-safe wrapper for `CodeGraph` with epoch versioning.
1437///
1438/// `ConcurrentCodeGraph` provides MVCC-style concurrency:
1439/// - Multiple readers can access the graph simultaneously
1440/// - Only one writer can hold the lock at a time
1441/// - Each write operation increments the epoch for cursor invalidation
1442///
1443/// # Design
1444///
1445/// The wrapper uses `parking_lot::RwLock` for efficient locking:
1446/// - Fair scheduling prevents writer starvation
1447/// - No poisoning (unlike `std::sync::RwLock`)
1448/// - Faster lock/unlock operations
1449///
1450/// # Phase 2 binding-plane access
1451///
1452/// Use the three-line snapshot pattern to access `BindingPlane`:
1453///
1454/// ```rust,ignore
1455/// let read_guard = concurrent.read();
1456/// let snapshot = read_guard.snapshot();
1457/// let plane = snapshot.binding_plane();
1458/// let resolution = plane.resolve(&query);
1459/// ```
1460///
1461/// The explicit snapshot handle makes the MVCC lifetime visible at the call
1462/// site. The full Phase 2 scope/alias/shadow and witness-bearing resolution
1463/// API is exposed through `BindingPlane`.
1464///
1465/// # Usage
1466///
1467/// ```rust
1468/// use sqry_core::graph::unified::concurrent::ConcurrentCodeGraph;
1469///
1470/// let graph = ConcurrentCodeGraph::new();
1471///
1472/// // Read access (multiple readers allowed)
1473/// {
1474/// let guard = graph.read();
1475/// let _nodes = guard.nodes();
1476/// }
1477///
1478/// // Write access (exclusive)
1479/// {
1480/// let mut guard = graph.write();
1481/// let _nodes = guard.nodes_mut();
1482/// }
1483///
1484/// // Snapshot for long queries
1485/// let snapshot = graph.snapshot();
1486/// ```
1487pub struct ConcurrentCodeGraph {
1488 /// The underlying code graph protected by a read-write lock.
1489 inner: RwLock<CodeGraph>,
1490 /// Global epoch counter for cursor validation.
1491 epoch: AtomicU64,
1492}
1493
1494impl ConcurrentCodeGraph {
1495 /// Creates a new empty `ConcurrentCodeGraph`.
1496 #[must_use]
1497 pub fn new() -> Self {
1498 Self {
1499 inner: RwLock::new(CodeGraph::new()),
1500 epoch: AtomicU64::new(0),
1501 }
1502 }
1503
1504 /// Creates a `ConcurrentCodeGraph` from an existing `CodeGraph`.
1505 #[must_use]
1506 pub fn from_graph(graph: CodeGraph) -> Self {
1507 let epoch = graph.epoch();
1508 Self {
1509 inner: RwLock::new(graph),
1510 epoch: AtomicU64::new(epoch),
1511 }
1512 }
1513
1514 /// Acquires a read lock on the graph.
1515 ///
1516 /// Multiple readers can hold the lock simultaneously.
1517 /// This does not increment the epoch.
1518 #[inline]
1519 pub fn read(&self) -> RwLockReadGuard<'_, CodeGraph> {
1520 self.inner.read()
1521 }
1522
1523 /// Acquires a write lock on the graph.
1524 ///
1525 /// Only one writer can hold the lock at a time.
1526 /// This increments the global epoch counter.
1527 #[inline]
1528 pub fn write(&self) -> RwLockWriteGuard<'_, CodeGraph> {
1529 // Increment the global epoch
1530 self.epoch.fetch_add(1, Ordering::SeqCst);
1531 let mut guard = self.inner.write();
1532 // Sync the inner graph's epoch with the global epoch
1533 guard.set_epoch(self.epoch.load(Ordering::SeqCst));
1534 guard
1535 }
1536
1537 /// Returns the current global epoch.
1538 ///
1539 /// This can be used to detect if the graph has been modified
1540 /// since a previous operation (cursor invalidation).
1541 #[inline]
1542 #[must_use]
1543 pub fn epoch(&self) -> u64 {
1544 self.epoch.load(Ordering::SeqCst)
1545 }
1546
1547 /// Creates a cheap snapshot of the graph.
1548 ///
1549 /// This acquires a brief read lock to clone the Arc references.
1550 /// The snapshot is isolated from future mutations.
1551 #[must_use]
1552 pub fn snapshot(&self) -> GraphSnapshot {
1553 self.inner.read().snapshot()
1554 }
1555
1556 // ------------------------------------------------------------------
1557 // Phase 1 fact-layer provenance convenience accessors.
1558 // These acquire a brief read lock, matching the snapshot() pattern.
1559 // ------------------------------------------------------------------
1560
1561 /// Returns the monotonic fact-layer epoch from the underlying graph.
1562 #[must_use]
1563 pub fn fact_epoch(&self) -> u64 {
1564 self.inner.read().fact_epoch()
1565 }
1566
1567 /// Looks up node provenance by `NodeId` (acquires a brief read lock).
1568 #[must_use]
1569 pub fn node_provenance(
1570 &self,
1571 id: crate::graph::unified::node::id::NodeId,
1572 ) -> Option<NodeProvenance> {
1573 self.inner.read().node_provenance(id).copied()
1574 }
1575
1576 /// Looks up edge provenance by `EdgeId` (acquires a brief read lock).
1577 #[must_use]
1578 pub fn edge_provenance(
1579 &self,
1580 id: crate::graph::unified::edge::id::EdgeId,
1581 ) -> Option<EdgeProvenance> {
1582 self.inner.read().edge_provenance(id).copied()
1583 }
1584
1585 /// Returns a file provenance view (acquires a brief read lock).
1586 ///
1587 /// Returns an owned copy since the borrow cannot outlive the lock guard.
1588 #[must_use]
1589 pub fn file_provenance(
1590 &self,
1591 id: crate::graph::unified::file::id::FileId,
1592 ) -> Option<OwnedFileProvenanceView> {
1593 let guard = self.inner.read();
1594 guard.file_provenance(id).map(|v| OwnedFileProvenanceView {
1595 content_hash: *v.content_hash,
1596 indexed_at: v.indexed_at,
1597 source_uri: v.source_uri,
1598 is_external: v.is_external,
1599 })
1600 }
1601
1602 // ------------------------------------------------------------------
1603 // Phase 2 binding-plane accessors (P2U03).
1604 // ------------------------------------------------------------------
1605
1606 /// Returns the scope arena from the underlying graph (acquires a brief
1607 /// read lock).
1608 ///
1609 /// Returns an `Arc` clone so the caller does not hold the lock beyond
1610 /// this call site.
1611 #[must_use]
1612 pub fn scope_arena(&self) -> Arc<ScopeArena> {
1613 Arc::clone(&self.inner.read().scope_arena)
1614 }
1615
1616 /// Returns the alias table from the underlying graph (acquires a brief
1617 /// read lock).
1618 ///
1619 /// Returns an `Arc` clone so the caller does not hold the lock beyond
1620 /// this call site.
1621 #[must_use]
1622 pub fn alias_table(&self) -> Arc<AliasTable> {
1623 Arc::clone(&self.inner.read().alias_table)
1624 }
1625
1626 /// Returns the shadow table from the underlying graph (acquires a brief
1627 /// read lock).
1628 ///
1629 /// Returns an `Arc` clone so the caller does not hold the lock beyond
1630 /// this call site.
1631 #[must_use]
1632 pub fn shadow_table(&self) -> Arc<ShadowTable> {
1633 Arc::clone(&self.inner.read().shadow_table)
1634 }
1635
1636 /// Returns the scope provenance store from the underlying graph (acquires
1637 /// a brief read lock).
1638 ///
1639 /// Returns an `Arc` clone so the caller does not hold the lock beyond
1640 /// this call site.
1641 #[must_use]
1642 pub fn scope_provenance_store(&self) -> Arc<ScopeProvenanceStore> {
1643 Arc::clone(&self.inner.read().scope_provenance_store)
1644 }
1645
1646 /// Looks up scope provenance by `ScopeId` (acquires a brief read lock).
1647 ///
1648 /// Returns an owned copy since the borrow cannot outlive the lock guard.
1649 #[must_use]
1650 pub fn scope_provenance(&self, id: ScopeId) -> Option<ScopeProvenance> {
1651 self.inner.read().scope_provenance(id).cloned()
1652 }
1653
1654 /// Looks up the live `ScopeId` for a stable scope identity (acquires a
1655 /// brief read lock).
1656 ///
1657 /// Returns `None` if no provenance record is registered for that stable id.
1658 #[must_use]
1659 pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
1660 self.inner.read().scope_by_stable_id(stable)
1661 }
1662
1663 /// Returns the file segment table from the underlying graph (acquires a
1664 /// brief read lock).
1665 #[must_use]
1666 pub fn file_segments(&self) -> Arc<FileSegmentTable> {
1667 Arc::clone(&self.inner.read().file_segments)
1668 }
1669
1670 /// Attempts to acquire a read lock without blocking.
1671 ///
1672 /// Returns `None` if the lock is currently held exclusively.
1673 #[inline]
1674 #[must_use]
1675 pub fn try_read(&self) -> Option<RwLockReadGuard<'_, CodeGraph>> {
1676 self.inner.try_read()
1677 }
1678
1679 /// Attempts to acquire a write lock without blocking.
1680 ///
1681 /// Returns `None` if the lock is currently held by another thread.
1682 /// If successful, increments the epoch.
1683 #[inline]
1684 pub fn try_write(&self) -> Option<RwLockWriteGuard<'_, CodeGraph>> {
1685 self.inner.try_write().map(|mut guard| {
1686 self.epoch.fetch_add(1, Ordering::SeqCst);
1687 guard.set_epoch(self.epoch.load(Ordering::SeqCst));
1688 guard
1689 })
1690 }
1691}
1692
1693impl Default for ConcurrentCodeGraph {
1694 fn default() -> Self {
1695 Self::new()
1696 }
1697}
1698
1699impl fmt::Debug for ConcurrentCodeGraph {
1700 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1701 f.debug_struct("ConcurrentCodeGraph")
1702 .field("epoch", &self.epoch.load(Ordering::SeqCst))
1703 .finish_non_exhaustive()
1704 }
1705}
1706
1707/// Owned copy of file provenance, returned by [`ConcurrentCodeGraph::file_provenance`]
1708/// because the borrowed [`FileProvenanceView`] cannot outlive the read-lock guard.
1709#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1710pub struct OwnedFileProvenanceView {
1711 /// SHA-256 of the on-disk file bytes (owned copy).
1712 pub content_hash: [u8; 32],
1713 /// Unix-epoch seconds at which this file was registered.
1714 pub indexed_at: u64,
1715 /// Optional interned physical origin URI.
1716 pub source_uri: Option<StringId>,
1717 /// Whether this file originates from an external source.
1718 pub is_external: bool,
1719}
1720
1721/// Immutable snapshot of a `CodeGraph` for long-running queries.
1722///
1723/// `GraphSnapshot` holds Arc references to the graph components,
1724/// providing a consistent view that is isolated from concurrent mutations.
1725///
1726/// # Design
1727///
1728/// Snapshots are created via `CodeGraph::snapshot()` or
1729/// `ConcurrentCodeGraph::snapshot()`. They are:
1730///
1731/// - **Immutable**: No mutation methods available
1732/// - **Isolated**: Independent of future graph mutations
1733/// - **Cheap**: Only Arc clones, no data copying
1734/// - **Self-contained**: Can outlive the original graph/lock
1735///
1736/// # Usage
1737///
1738/// ```rust
1739/// use sqry_core::graph::unified::concurrent::{ConcurrentCodeGraph, GraphSnapshot};
1740///
1741/// let graph = ConcurrentCodeGraph::new();
1742///
1743/// // Create snapshot for a long query
1744/// let snapshot: GraphSnapshot = graph.snapshot();
1745///
1746/// // Snapshot can be used independently
1747/// let _epoch = snapshot.epoch();
1748/// ```
1749#[derive(Clone)]
1750pub struct GraphSnapshot {
1751 /// Node storage snapshot.
1752 nodes: Arc<NodeArena>,
1753 /// Edge storage snapshot.
1754 edges: Arc<BidirectionalEdgeStore>,
1755 /// String interner snapshot.
1756 strings: Arc<StringInterner>,
1757 /// File registry snapshot.
1758 files: Arc<FileRegistry>,
1759 /// Auxiliary indices snapshot.
1760 indices: Arc<AuxiliaryIndices>,
1761 /// Sparse macro boundary metadata snapshot.
1762 macro_metadata: Arc<NodeMetadataStore>,
1763 /// Dense node provenance snapshot (Phase 1).
1764 node_provenance: Arc<NodeProvenanceStore>,
1765 /// Dense edge provenance snapshot (Phase 1).
1766 edge_provenance: Arc<EdgeProvenanceStore>,
1767 /// Monotonic fact-layer epoch at snapshot time.
1768 fact_epoch: u64,
1769 /// Epoch at snapshot time (for cursor validation).
1770 epoch: u64,
1771 /// Phase 2 binding-plane scope arena snapshot (populated by Phase 4e).
1772 scope_arena: Arc<ScopeArena>,
1773 /// Phase 2 binding-plane alias table snapshot (populated by Phase 4e / P2U04).
1774 alias_table: Arc<AliasTable>,
1775 /// Phase 2 binding-plane shadow table snapshot (populated by Phase 4e / P2U05).
1776 shadow_table: Arc<ShadowTable>,
1777 /// Phase 2 binding-plane scope provenance store snapshot (populated by Phase 4e / P2U11).
1778 scope_provenance_store: Arc<ScopeProvenanceStore>,
1779 /// Phase 3 file segment table snapshot mapping `FileId` to node ranges.
1780 file_segments: Arc<FileSegmentTable>,
1781 /// Phase A (U09): C indirect-call resolver side tables snapshot.
1782 ///
1783 /// `None` on non-C workspaces. The snapshot owns its own clone of the
1784 /// `Option<CIndirectSideTables>` so concurrent readers see a stable
1785 /// view independent of subsequent mutations to the source `CodeGraph`.
1786 c_indirect_tables: Option<CIndirectSideTables>,
1787}
1788
1789impl GraphSnapshot {
1790 /// Returns a reference to the node arena.
1791 #[inline]
1792 #[must_use]
1793 pub fn nodes(&self) -> &NodeArena {
1794 &self.nodes
1795 }
1796
1797 /// Returns a reference to the bidirectional edge store.
1798 #[inline]
1799 #[must_use]
1800 pub fn edges(&self) -> &BidirectionalEdgeStore {
1801 &self.edges
1802 }
1803
1804 /// Returns a reference to the string interner.
1805 #[inline]
1806 #[must_use]
1807 pub fn strings(&self) -> &StringInterner {
1808 &self.strings
1809 }
1810
1811 /// Returns a reference to the file registry.
1812 #[inline]
1813 #[must_use]
1814 pub fn files(&self) -> &FileRegistry {
1815 &self.files
1816 }
1817
1818 /// Returns a reference to the auxiliary indices.
1819 #[inline]
1820 #[must_use]
1821 pub fn indices(&self) -> &AuxiliaryIndices {
1822 &self.indices
1823 }
1824
1825 /// Returns a reference to the macro boundary metadata store.
1826 #[inline]
1827 #[must_use]
1828 pub fn macro_metadata(&self) -> &NodeMetadataStore {
1829 &self.macro_metadata
1830 }
1831
1832 /// Returns a reference to the C indirect-call side tables, if any.
1833 ///
1834 /// Mirrors [`CodeGraph::c_indirect_tables`]; see that method for
1835 /// the contract. Snapshot-level access is read-only.
1836 #[inline]
1837 #[must_use]
1838 pub fn c_indirect_tables(&self) -> Option<&CIndirectSideTables> {
1839 self.c_indirect_tables.as_ref()
1840 }
1841
1842 // ------------------------------------------------------------------
1843 // Phase 1 fact-layer provenance accessors (P1U09).
1844 // ------------------------------------------------------------------
1845
1846 /// Returns the monotonic fact-layer epoch.
1847 #[inline]
1848 #[must_use]
1849 pub fn fact_epoch(&self) -> u64 {
1850 self.fact_epoch
1851 }
1852
1853 /// Looks up node provenance by `NodeId`.
1854 #[inline]
1855 #[must_use]
1856 pub fn node_provenance(
1857 &self,
1858 id: crate::graph::unified::node::id::NodeId,
1859 ) -> Option<&NodeProvenance> {
1860 self.node_provenance.lookup(id)
1861 }
1862
1863 /// Looks up edge provenance by `EdgeId`.
1864 #[inline]
1865 #[must_use]
1866 pub fn edge_provenance(
1867 &self,
1868 id: crate::graph::unified::edge::id::EdgeId,
1869 ) -> Option<&EdgeProvenance> {
1870 self.edge_provenance.lookup(id)
1871 }
1872
1873 /// Returns a borrowed provenance view for a file.
1874 #[inline]
1875 #[must_use]
1876 pub fn file_provenance(
1877 &self,
1878 id: crate::graph::unified::file::id::FileId,
1879 ) -> Option<FileProvenanceView<'_>> {
1880 self.files.file_provenance(id)
1881 }
1882
1883 // ------------------------------------------------------------------
1884 // Phase 2 binding-plane accessors (P2U03).
1885 // ------------------------------------------------------------------
1886
1887 /// Returns a reference to the scope arena at snapshot time.
1888 ///
1889 /// Participates in MVCC: the snapshot holds an `Arc` clone of the arena
1890 /// as it existed when `snapshot()` was called. Subsequent calls to
1891 /// `set_scope_arena` on the source `CodeGraph` do not affect this view.
1892 #[inline]
1893 #[must_use]
1894 pub fn scope_arena(&self) -> &ScopeArena {
1895 &self.scope_arena
1896 }
1897
1898 /// Returns a reference to the alias table at snapshot time.
1899 ///
1900 /// Participates in MVCC: the snapshot holds an `Arc` clone of the table
1901 /// as it existed when `snapshot()` was called. Subsequent calls to
1902 /// `set_alias_table` on the source `CodeGraph` do not affect this view.
1903 #[inline]
1904 #[must_use]
1905 pub fn alias_table(&self) -> &AliasTable {
1906 &self.alias_table
1907 }
1908
1909 /// Returns a reference to the shadow table at snapshot time.
1910 ///
1911 /// Participates in MVCC: the snapshot holds an `Arc` clone of the table
1912 /// as it existed when `snapshot()` was called. Subsequent calls to
1913 /// `set_shadow_table` on the source `CodeGraph` do not affect this view.
1914 #[inline]
1915 #[must_use]
1916 pub fn shadow_table(&self) -> &ShadowTable {
1917 &self.shadow_table
1918 }
1919
1920 /// Returns a reference to the scope provenance store at snapshot time.
1921 ///
1922 /// Participates in MVCC: the snapshot holds an `Arc` clone of the store
1923 /// as it existed when `snapshot()` was called. Subsequent calls to
1924 /// `set_scope_provenance_store` on the source `CodeGraph` do not affect
1925 /// this view.
1926 #[inline]
1927 #[must_use]
1928 pub fn scope_provenance_store(&self) -> &ScopeProvenanceStore {
1929 &self.scope_provenance_store
1930 }
1931
1932 /// Looks up scope provenance by `ScopeId` at snapshot time.
1933 ///
1934 /// Returns `None` if the slot is out of range, vacant, or the stored
1935 /// generation does not match (stale handle).
1936 #[inline]
1937 #[must_use]
1938 pub fn scope_provenance(&self, id: ScopeId) -> Option<&ScopeProvenance> {
1939 self.scope_provenance_store.lookup(id)
1940 }
1941
1942 /// Looks up the live `ScopeId` for a stable scope identity at snapshot time.
1943 ///
1944 /// Returns `None` if no provenance record is registered for that stable id.
1945 #[inline]
1946 #[must_use]
1947 pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
1948 self.scope_provenance_store.scope_by_stable_id(stable)
1949 }
1950
1951 /// Returns a reference to the file segment table at snapshot time.
1952 #[inline]
1953 #[must_use]
1954 pub fn file_segments(&self) -> &FileSegmentTable {
1955 &self.file_segments
1956 }
1957
1958 /// Returns the epoch at which this snapshot was taken.
1959 ///
1960 /// This can be compared against the current graph epoch to
1961 /// detect if the graph has changed since the snapshot.
1962 #[inline]
1963 #[must_use]
1964 pub fn epoch(&self) -> u64 {
1965 self.epoch
1966 }
1967
1968 /// Returns `true` if this snapshot's epoch matches the given epoch.
1969 ///
1970 /// Use this to validate cursors before continuing pagination.
1971 #[inline]
1972 #[must_use]
1973 pub fn epoch_matches(&self, other_epoch: u64) -> bool {
1974 self.epoch == other_epoch
1975 }
1976
1977 // ------------------------------------------------------------------
1978 // Phase 2 binding-plane facade accessor (P2U07).
1979 // ------------------------------------------------------------------
1980
1981 /// Returns a [`BindingPlane`] facade borrowing this snapshot's lifetime.
1982 ///
1983 /// The facade is the stable Phase 2 public API for scope/alias/shadow
1984 /// queries and witness-bearing resolution. It provides a single entry
1985 /// point (`resolve`) that returns both a `BindingResult` and an ordered
1986 /// step trace in a `BindingResolution`.
1987 ///
1988 /// # MVCC note
1989 ///
1990 /// `BindingPlane<'_>` borrows from this snapshot, which is already an
1991 /// MVCC-consistent view of the graph at snapshot time. Callers from
1992 /// `CodeGraph` or `ConcurrentCodeGraph` should follow the two-line
1993 /// pattern so the snapshot lifetime is explicit:
1994 ///
1995 /// ```rust,ignore
1996 /// // CodeGraph caller:
1997 /// let snapshot = graph.snapshot();
1998 /// let plane = snapshot.binding_plane();
1999 ///
2000 /// // ConcurrentCodeGraph caller:
2001 /// let read_guard = concurrent.read();
2002 /// let snapshot = read_guard.snapshot();
2003 /// let plane = snapshot.binding_plane();
2004 /// ```
2005 #[inline]
2006 #[must_use]
2007 pub fn binding_plane(&self) -> crate::graph::unified::bind::plane::BindingPlane<'_> {
2008 crate::graph::unified::bind::plane::BindingPlane::new(self)
2009 }
2010
2011 // ============================================================================
2012 // Query Methods
2013 // ============================================================================
2014
2015 /// Finds nodes matching a pattern.
2016 ///
2017 /// Performs a simple substring match on node names and qualified names.
2018 /// Returns all matching node IDs.
2019 ///
2020 /// **Synthetic suppression (`C_SUPPRESS`):** synthetic placeholder
2021 /// nodes — internal scaffolding the language plugins emit for
2022 /// binding-plane and scope analysis (e.g. the Go plugin's
2023 /// `<field:operand.field>` field-access shadows and the
2024 /// `<ident>@<offset>` per-binding-site Variable nodes from the
2025 /// local-scope resolver) — are filtered out by default. Internal
2026 /// callers that need to reach these nodes (binding plane, scope /
2027 /// alias / shadow analysis) use
2028 /// [`Self::find_by_pattern_with_options`] with
2029 /// `include_synthetic = true`.
2030 ///
2031 /// # Performance
2032 ///
2033 /// Optimized to iterate over unique strings in the interner (smaller set)
2034 /// rather than all nodes in the arena.
2035 ///
2036 /// # Arguments
2037 ///
2038 /// * `pattern` - The pattern to match (substring search)
2039 ///
2040 /// # Returns
2041 ///
2042 /// A vector of `NodeIds` for all matching nodes (synthetic
2043 /// placeholders excluded).
2044 #[must_use]
2045 pub fn find_by_pattern(&self, pattern: &str) -> Vec<crate::graph::unified::node::NodeId> {
2046 self.find_by_pattern_with_options(pattern, false)
2047 }
2048
2049 /// Finds nodes matching a pattern with explicit control over synthetic
2050 /// placeholder visibility.
2051 ///
2052 /// `include_synthetic = false` is the default surface used by every
2053 /// user-facing caller (CLI `search`, MCP `semantic_search` /
2054 /// `pattern_search` / `relation_query`, etc.). Synthetic
2055 /// placeholders are suppressed via two parallel checks that must
2056 /// agree:
2057 ///
2058 /// 1. The authoritative `NodeFlags::SYNTHETIC` bit on the
2059 /// metadata store
2060 /// ([`crate::graph::unified::storage::metadata::NodeMetadataStore::is_synthetic`]).
2061 /// 2. The structural name-shape fallback
2062 /// ([`crate::graph::unified::storage::arena::NodeEntry::is_synthetic_placeholder_name`])
2063 /// for V10 snapshots written before the synthetic bit existed
2064 /// and for cross-file unification losers that retained their
2065 /// name but lost their metadata entry.
2066 ///
2067 /// Either check matching is sufficient to suppress the node. The
2068 /// design lives in `docs/development/public-issue-triage/`
2069 /// under the `C_SUPPRESS` unit; see also the rationale in
2070 /// [`crate::graph::unified::storage::metadata::NodeFlags::SYNTHETIC`].
2071 ///
2072 /// `include_synthetic = true` is **internal-only**. The binding
2073 /// plane, scope resolver, and rebuild's coverage gate use this
2074 /// path to reach synthetic nodes for their structural integrity
2075 /// checks. **No CLI / MCP surface should ever pass `true`.**
2076 #[must_use]
2077 pub fn find_by_pattern_with_options(
2078 &self,
2079 pattern: &str,
2080 include_synthetic: bool,
2081 ) -> Vec<crate::graph::unified::node::NodeId> {
2082 let mut matches = Vec::new();
2083
2084 // 1. Scan unique strings in interner for matches
2085 for (str_id, s) in self.strings.iter() {
2086 if s.contains(pattern) {
2087 // 2. If string matches, look up all nodes with this name
2088 // Check qualified name index
2089 matches.extend_from_slice(self.indices.by_qualified_name(str_id));
2090 // Check simple name index
2091 matches.extend_from_slice(self.indices.by_name(str_id));
2092 }
2093 }
2094
2095 // Deduplicate matches (a node might match both qualified and simple name)
2096 matches.sort_unstable();
2097 matches.dedup();
2098
2099 if !include_synthetic {
2100 matches.retain(|&node_id| !self.is_node_synthetic(node_id));
2101 }
2102
2103 matches
2104 }
2105
2106 /// Finds nodes whose interned simple **or** qualified name equals `name`,
2107 /// accepting dot- and Ruby-`#` qualified display form as a fallback for
2108 /// graph-canonical `::` qualified names.
2109 ///
2110 /// This is the canonical surface for **exact-name** lookups —
2111 /// shared by the CLI `--exact <pattern>` shorthand
2112 /// (`sqry-cli/src/commands/search.rs::run_regular_search`) and the
2113 /// structural query planner's `name:` predicate
2114 /// (`sqry-db/src/planner/parse.rs`,
2115 /// `sqry-db/src/planner/execute.rs`). Both surfaces are
2116 /// contract-bound (DAG `B1_ALIGN`) to return the same set against
2117 /// any fixture: the CLI calls this method directly, while the
2118 /// planner uses the same interner + by-name index pair internally
2119 /// when scanning, then applies the same synthetic filter.
2120 ///
2121 /// **Synthetic suppression.** Synthetic placeholder nodes
2122 /// (Go-plugin `<field:operand.field>` shadows and
2123 /// `<ident>@<offset>` per-binding-site Variables; see
2124 /// [`Self::find_by_pattern_with_options`] for the full taxonomy)
2125 /// are excluded via [`Self::is_node_synthetic`]. There is **no**
2126 /// `include_synthetic = true` variant for the exact-match surface
2127 /// because the synthetic name shapes the structural fallback
2128 /// recognises (`<…>`, `…@<offset>`) cannot equal a user-typed
2129 /// name byte-for-byte; the metadata-bit channel is the only
2130 /// realistic leak vector and it is suppressed unconditionally.
2131 ///
2132 /// **Display fallback.** Exact lookup checks the literal input. When the
2133 /// input is qualified, language-aware display candidates win over raw
2134 /// canonical matches. This keeps native Rust `identity::T` from also
2135 /// returning a TypeScript type parameter whose internal canonical form is
2136 /// `identity::T` but whose display form is `identity.T`. If neither
2137 /// display nor literal lookup finds candidates, dot- and Ruby-`#`
2138 /// qualified inputs also check the graph-canonical `::` rewrite.
2139 ///
2140 /// # Performance
2141 ///
2142 /// `O(1)` interner lookup + `O(matches)` filter. If `name` is not
2143 /// interned the resolver still tries a native-display to graph-canonical
2144 /// `::` fallback for user-facing qualified names before returning empty.
2145 ///
2146 /// # Arguments
2147 ///
2148 /// * `name` - The exact name to look up (no glob, no regex).
2149 ///
2150 /// # Returns
2151 ///
2152 /// Sorted, deduplicated `NodeId`s for every non-synthetic node
2153 /// whose `entry.name`, `entry.qualified_name`, or language-aware display
2154 /// name equals `name`, or matches for the graph-canonical `::` rewrite
2155 /// when the user supplied a dot- or Ruby-`#` qualified name that had no
2156 /// literal or display candidates.
2157 #[must_use]
2158 pub fn find_by_exact_name(&self, name: &str) -> Vec<crate::graph::unified::node::NodeId> {
2159 let is_qualified = name.contains('.') || name.contains('#') || name.contains("::");
2160 if !is_qualified {
2161 // Bare name lookup: the interned `by_name` index covers
2162 // every language and is O(1); no display scan is needed.
2163 return self.find_by_exact_interned_name(name);
2164 }
2165 // Qualified name: each plugin stores its native form
2166 // (PHP `Ledger.promotedField`, Rust `Ledger::promotedField`,
2167 // Ruby `Ledger#promotedField`). Scan computed display names so
2168 // a single dotted user query resolves to all language matches,
2169 // then fall back to the interned form (and the graph-canonical
2170 // `::` rewrite) when display yields nothing.
2171 let mut exact_matches = self.find_by_exact_display_name(name);
2172 if exact_matches.is_empty() {
2173 exact_matches = self.find_by_exact_interned_name(name);
2174 if exact_matches.is_empty() && !name.contains("::") {
2175 let canonical = name.replace(['.', '#'], "::");
2176 exact_matches.extend(self.find_by_exact_interned_name(&canonical));
2177 exact_matches.sort_unstable();
2178 exact_matches.dedup();
2179 }
2180 }
2181 exact_matches
2182 }
2183
2184 fn find_by_exact_display_name(&self, name: &str) -> Vec<crate::graph::unified::node::NodeId> {
2185 let mut matches = self
2186 .iter_nodes()
2187 .filter(|(node_id, _)| !self.is_node_synthetic(*node_id))
2188 .filter_map(|(node_id, entry)| {
2189 let qualified = entry
2190 .qualified_name
2191 .and_then(|sid| self.strings.resolve(sid))?;
2192 let display = self.files.language_for_file(entry.file).map_or_else(
2193 || qualified.to_string(),
2194 |language| {
2195 display_graph_qualified_name(
2196 language,
2197 qualified.as_ref(),
2198 entry.kind,
2199 entry.is_static,
2200 )
2201 },
2202 );
2203 (display == name).then_some(node_id)
2204 })
2205 .collect::<Vec<_>>();
2206 matches.sort_unstable();
2207 matches.dedup();
2208 matches
2209 }
2210
2211 fn find_by_exact_interned_name(&self, name: &str) -> Vec<crate::graph::unified::node::NodeId> {
2212 let Some(str_id) = self.strings.get(name) else {
2213 return Vec::new();
2214 };
2215
2216 let mut matches: Vec<crate::graph::unified::node::NodeId> = Vec::new();
2217 matches.extend_from_slice(self.indices.by_name(str_id));
2218 matches.extend_from_slice(self.indices.by_qualified_name(str_id));
2219 matches.sort_unstable();
2220 matches.dedup();
2221 matches.retain(|&node_id| !self.is_node_synthetic(node_id));
2222 matches
2223 }
2224
2225 /// Returns `true` if the node should be treated as a synthetic
2226 /// placeholder for user-facing surfaces.
2227 ///
2228 /// Combines the metadata-store flag and the structural name-shape
2229 /// fallback (see [`Self::find_by_pattern_with_options`] for the
2230 /// full rationale). Returns `false` for missing nodes (an unknown
2231 /// `NodeId` is not "synthetic" — it is "not present").
2232 #[must_use]
2233 pub fn is_node_synthetic(&self, node_id: crate::graph::unified::node::NodeId) -> bool {
2234 // Authoritative check: metadata-store bit (NodeFlags::SYNTHETIC).
2235 if self.macro_metadata.is_synthetic(node_id) {
2236 return true;
2237 }
2238 // Structural fallback: name shape recognised as synthetic.
2239 // Required for V10 snapshots written before the bit existed,
2240 // for unification losers that lost their metadata entry, and
2241 // as defence-in-depth against future plugins forgetting to
2242 // flip the bit.
2243 let Some(entry) = self.nodes.get(node_id) else {
2244 return false;
2245 };
2246 if entry.is_unified_loser() {
2247 // Already invisible for other reasons; do not also flag as synthetic.
2248 return false;
2249 }
2250 let Some(name) = self.strings.resolve(entry.name) else {
2251 return false;
2252 };
2253 crate::graph::unified::storage::arena::NodeEntry::is_synthetic_placeholder_name(
2254 name.as_ref(),
2255 )
2256 }
2257
2258 /// Gets all callees of a node (functions called by this node).
2259 ///
2260 /// Queries the forward edge store for all Calls edges from this node.
2261 ///
2262 /// # Arguments
2263 ///
2264 /// * `node` - The node ID to query
2265 ///
2266 /// # Returns
2267 ///
2268 /// A vector of `NodeIds` representing functions called by this node.
2269 #[must_use]
2270 pub fn get_callees(
2271 &self,
2272 node: crate::graph::unified::node::NodeId,
2273 ) -> Vec<crate::graph::unified::node::NodeId> {
2274 use crate::graph::unified::edge::EdgeKind;
2275
2276 self.edges
2277 .edges_from(node)
2278 .into_iter()
2279 .filter(|edge| matches!(edge.kind, EdgeKind::Calls { .. }))
2280 .map(|edge| edge.target)
2281 .collect()
2282 }
2283
2284 /// Gets all callers of a node (functions that call this node).
2285 ///
2286 /// Queries the reverse edge store for all Calls edges to this node.
2287 ///
2288 /// # Arguments
2289 ///
2290 /// * `node` - The node ID to query
2291 ///
2292 /// # Returns
2293 ///
2294 /// A vector of `NodeIds` representing functions that call this node.
2295 #[must_use]
2296 pub fn get_callers(
2297 &self,
2298 node: crate::graph::unified::node::NodeId,
2299 ) -> Vec<crate::graph::unified::node::NodeId> {
2300 use crate::graph::unified::edge::EdgeKind;
2301
2302 self.edges
2303 .edges_to(node)
2304 .into_iter()
2305 .filter(|edge| matches!(edge.kind, EdgeKind::Calls { .. }))
2306 .map(|edge| edge.source)
2307 .collect()
2308 }
2309
2310 /// Iterates over all nodes in the graph.
2311 ///
2312 /// Returns an iterator yielding (`NodeId`, &`NodeEntry`) pairs for all
2313 /// occupied slots in the arena.
2314 ///
2315 /// # Returns
2316 ///
2317 /// An iterator over (`NodeId`, &`NodeEntry`) pairs.
2318 pub fn iter_nodes(
2319 &self,
2320 ) -> impl Iterator<
2321 Item = (
2322 crate::graph::unified::node::NodeId,
2323 &crate::graph::unified::storage::arena::NodeEntry,
2324 ),
2325 > {
2326 self.nodes.iter()
2327 }
2328
2329 /// Iterates over all edges in the graph.
2330 ///
2331 /// Returns an iterator yielding (source, target, `EdgeKind`) tuples for
2332 /// all edges in the forward edge store.
2333 ///
2334 /// # Returns
2335 ///
2336 /// An iterator over edge tuples.
2337 pub fn iter_edges(
2338 &self,
2339 ) -> impl Iterator<
2340 Item = (
2341 crate::graph::unified::node::NodeId,
2342 crate::graph::unified::node::NodeId,
2343 crate::graph::unified::edge::EdgeKind,
2344 ),
2345 > + '_ {
2346 // Iterate over all nodes in the arena and get their outgoing edges
2347 self.nodes.iter().flat_map(move |(node_id, _entry)| {
2348 // Get all edges from this node
2349 self.edges
2350 .edges_from(node_id)
2351 .into_iter()
2352 .map(move |edge| (node_id, edge.target, edge.kind))
2353 })
2354 }
2355
2356 /// Gets a node entry by ID.
2357 ///
2358 /// Returns a reference to the `NodeEntry` if the ID is valid, or None
2359 /// if the ID is invalid or stale.
2360 ///
2361 /// # Arguments
2362 ///
2363 /// * `id` - The node ID to look up
2364 ///
2365 /// # Returns
2366 ///
2367 /// A reference to the `NodeEntry`, or None if not found.
2368 #[must_use]
2369 pub fn get_node(
2370 &self,
2371 id: crate::graph::unified::node::NodeId,
2372 ) -> Option<&crate::graph::unified::storage::arena::NodeEntry> {
2373 self.nodes.get(id)
2374 }
2375}
2376
2377impl fmt::Debug for GraphSnapshot {
2378 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2379 f.debug_struct("GraphSnapshot")
2380 .field("nodes", &self.nodes.len())
2381 .field("epoch", &self.epoch)
2382 .finish_non_exhaustive()
2383 }
2384}
2385
2386/// Read-only accessor trait shared by [`CodeGraph`] and [`GraphSnapshot`].
2387///
2388/// This lets helpers that only *read* graph state (name-matching, relation
2389/// traversal, reference lookups) be written once and called from both the
2390/// live `CodeGraph` path in `sqry-core::query::executor::graph_eval` and
2391/// the snapshot-based path in `sqry-db::queries::*`. No mutation is exposed.
2392pub trait GraphAccess {
2393 /// Returns the node arena (read-only).
2394 fn nodes(&self) -> &NodeArena;
2395 /// Returns the bidirectional edge store (read-only).
2396 fn edges(&self) -> &BidirectionalEdgeStore;
2397 /// Returns the string interner (read-only).
2398 fn strings(&self) -> &StringInterner;
2399 /// Returns the file registry (read-only).
2400 fn files(&self) -> &FileRegistry;
2401 /// Returns the auxiliary indices (read-only).
2402 fn indices(&self) -> &AuxiliaryIndices;
2403}
2404
2405impl GraphAccess for CodeGraph {
2406 #[inline]
2407 fn nodes(&self) -> &NodeArena {
2408 CodeGraph::nodes(self)
2409 }
2410 #[inline]
2411 fn edges(&self) -> &BidirectionalEdgeStore {
2412 CodeGraph::edges(self)
2413 }
2414 #[inline]
2415 fn strings(&self) -> &StringInterner {
2416 CodeGraph::strings(self)
2417 }
2418 #[inline]
2419 fn files(&self) -> &FileRegistry {
2420 CodeGraph::files(self)
2421 }
2422 #[inline]
2423 fn indices(&self) -> &AuxiliaryIndices {
2424 CodeGraph::indices(self)
2425 }
2426}
2427
2428impl GraphAccess for GraphSnapshot {
2429 #[inline]
2430 fn nodes(&self) -> &NodeArena {
2431 GraphSnapshot::nodes(self)
2432 }
2433 #[inline]
2434 fn edges(&self) -> &BidirectionalEdgeStore {
2435 GraphSnapshot::edges(self)
2436 }
2437 #[inline]
2438 fn strings(&self) -> &StringInterner {
2439 GraphSnapshot::strings(self)
2440 }
2441 #[inline]
2442 fn files(&self) -> &FileRegistry {
2443 GraphSnapshot::files(self)
2444 }
2445 #[inline]
2446 fn indices(&self) -> &AuxiliaryIndices {
2447 GraphSnapshot::indices(self)
2448 }
2449}
2450
2451#[cfg(test)]
2452mod tests {
2453 use super::*;
2454 use crate::graph::unified::{
2455 FileScope, NodeId, ResolutionMode, SymbolCandidateOutcome, SymbolQuery,
2456 SymbolResolutionOutcome,
2457 };
2458
2459 fn resolve_symbol_strict(snapshot: &GraphSnapshot, symbol: &str) -> Option<NodeId> {
2460 match snapshot.resolve_symbol(&SymbolQuery {
2461 symbol,
2462 file_scope: FileScope::Any,
2463 mode: ResolutionMode::Strict,
2464 }) {
2465 SymbolResolutionOutcome::Resolved(node_id) => Some(node_id),
2466 SymbolResolutionOutcome::NotFound
2467 | SymbolResolutionOutcome::FileNotIndexed
2468 | SymbolResolutionOutcome::Ambiguous(_) => None,
2469 }
2470 }
2471
2472 fn candidate_nodes(snapshot: &GraphSnapshot, symbol: &str) -> Vec<NodeId> {
2473 match snapshot.find_symbol_candidates(&SymbolQuery {
2474 symbol,
2475 file_scope: FileScope::Any,
2476 mode: ResolutionMode::AllowSuffixCandidates,
2477 }) {
2478 SymbolCandidateOutcome::Candidates(candidates) => candidates,
2479 SymbolCandidateOutcome::NotFound | SymbolCandidateOutcome::FileNotIndexed => Vec::new(),
2480 }
2481 }
2482
2483 #[test]
2484 fn test_code_graph_new() {
2485 let graph = CodeGraph::new();
2486 assert_eq!(graph.epoch(), 0);
2487 assert_eq!(graph.nodes().len(), 0);
2488 }
2489
2490 #[test]
2491 fn test_code_graph_default() {
2492 let graph = CodeGraph::default();
2493 assert_eq!(graph.epoch(), 0);
2494 }
2495
2496 #[test]
2497 fn test_code_graph_snapshot() {
2498 let graph = CodeGraph::new();
2499 let snapshot = graph.snapshot();
2500 assert_eq!(snapshot.epoch(), 0);
2501 assert_eq!(snapshot.nodes().len(), 0);
2502 }
2503
2504 #[test]
2505 fn test_code_graph_bump_epoch() {
2506 let mut graph = CodeGraph::new();
2507 assert_eq!(graph.epoch(), 0);
2508 assert_eq!(graph.bump_epoch(), 1);
2509 assert_eq!(graph.epoch(), 1);
2510 assert_eq!(graph.bump_epoch(), 2);
2511 assert_eq!(graph.epoch(), 2);
2512 }
2513
2514 #[test]
2515 fn test_code_graph_set_epoch() {
2516 let mut graph = CodeGraph::new();
2517 graph.set_epoch(42);
2518 assert_eq!(graph.epoch(), 42);
2519 }
2520
2521 #[test]
2522 fn test_code_graph_from_components() {
2523 let nodes = NodeArena::new();
2524 let edges = BidirectionalEdgeStore::new();
2525 let strings = StringInterner::new();
2526 let files = FileRegistry::new();
2527 let indices = AuxiliaryIndices::new();
2528 let macro_metadata = NodeMetadataStore::new();
2529
2530 let graph =
2531 CodeGraph::from_components(nodes, edges, strings, files, indices, macro_metadata);
2532 assert_eq!(graph.epoch(), 0);
2533 }
2534
2535 #[test]
2536 fn test_code_graph_mut_accessors() {
2537 let mut graph = CodeGraph::new();
2538
2539 // Access mutable references - should not panic
2540 let _nodes = graph.nodes_mut();
2541 let _edges = graph.edges_mut();
2542 let _strings = graph.strings_mut();
2543 let _files = graph.files_mut();
2544 let _indices = graph.indices_mut();
2545 }
2546
2547 #[test]
2548 fn test_code_graph_snapshot_isolation() {
2549 let mut graph = CodeGraph::new();
2550 let snapshot1 = graph.snapshot();
2551
2552 // Mutate the graph
2553 graph.bump_epoch();
2554
2555 let snapshot2 = graph.snapshot();
2556
2557 // Snapshots should have different epochs
2558 assert_eq!(snapshot1.epoch(), 0);
2559 assert_eq!(snapshot2.epoch(), 1);
2560 }
2561
2562 #[test]
2563 fn test_code_graph_debug() {
2564 let graph = CodeGraph::new();
2565 let debug_str = format!("{graph:?}");
2566 assert!(debug_str.contains("CodeGraph"));
2567 assert!(debug_str.contains("epoch"));
2568 }
2569
2570 /// Exact-byte regression for the iter2 fix of CodeGraph.confidence
2571 /// accounting: adding a `ConfidenceMetadata` entry with known inner
2572 /// Vec<String> capacities must increase `heap_bytes()` by exactly the sum
2573 /// of those capacities plus Vec<String> slot overhead.
2574 #[test]
2575 fn test_codegraph_heap_bytes_counts_confidence_inner_strings() {
2576 let mut graph = CodeGraph::new();
2577 // Seed one confidence entry so the HashMap has non-zero capacity;
2578 // reserve slack so the next insert cannot rehash.
2579 graph.set_confidence({
2580 let mut m = HashMap::with_capacity(8);
2581 m.insert("seed".to_string(), ConfidenceMetadata::default());
2582 m
2583 });
2584 let before = graph.heap_bytes();
2585 let before_cap = graph.confidence.capacity();
2586
2587 let lim1 = String::from("no type inference");
2588 let lim2 = String::from("no generic specialization");
2589 let feat1 = String::from("rust-analyzer");
2590 let l1 = lim1.capacity();
2591 let l2 = lim2.capacity();
2592 let f1 = feat1.capacity();
2593
2594 let limitations = vec![lim1, lim2];
2595 let lim_vec_cap = limitations.capacity();
2596
2597 let unavailable_features = vec![feat1];
2598 let feat_vec_cap = unavailable_features.capacity();
2599
2600 let key = String::from("rust");
2601 let key_cap = key.capacity();
2602 graph.confidence.insert(
2603 key,
2604 ConfidenceMetadata {
2605 limitations,
2606 unavailable_features,
2607 ..Default::default()
2608 },
2609 );
2610 assert_eq!(
2611 graph.confidence.capacity(),
2612 before_cap,
2613 "prerequisite: confidence HashMap must not rehash during the test insert",
2614 );
2615
2616 let after = graph.heap_bytes();
2617 let expected_inner = key_cap
2618 + lim_vec_cap * std::mem::size_of::<String>()
2619 + l1
2620 + l2
2621 + feat_vec_cap * std::mem::size_of::<String>()
2622 + f1;
2623 assert_eq!(
2624 after - before,
2625 expected_inner,
2626 "CodeGraph::heap_bytes must count ConfidenceMetadata inner Vec<String> capacity exactly",
2627 );
2628 }
2629
2630 #[test]
2631 fn test_codegraph_heap_bytes_grows_with_content() {
2632 use crate::graph::unified::node::NodeKind;
2633 use crate::graph::unified::storage::arena::NodeEntry;
2634 use std::path::Path;
2635
2636 // An empty graph reports some heap bytes (FileRegistry seeds index 0
2637 // with `vec![None]`, HashMaps have non-zero base capacity once touched,
2638 // etc.) but the value must be finite and well under the 100 MB cap.
2639 let empty = CodeGraph::new();
2640 let empty_bytes = empty.heap_bytes();
2641 assert!(
2642 empty_bytes < 100 * 1024 * 1024,
2643 "empty graph heap_bytes should be <100 MiB, got {empty_bytes}"
2644 );
2645
2646 let mut graph = CodeGraph::new();
2647 for i in 0..32u32 {
2648 let name = format!("sym_{i}");
2649 let qual = format!("module::sym_{i}");
2650 let file = format!("file_{i}.rs");
2651
2652 let name_id = graph.strings_mut().intern(&name).unwrap();
2653 let qual_id = graph.strings_mut().intern(&qual).unwrap();
2654 let file_id = graph.files_mut().register(Path::new(&file)).unwrap();
2655
2656 let entry =
2657 NodeEntry::new(NodeKind::Function, name_id, file_id).with_qualified_name(qual_id);
2658 let node_id = graph.nodes_mut().alloc(entry).unwrap();
2659 graph
2660 .indices_mut()
2661 .add(node_id, NodeKind::Function, name_id, Some(qual_id), file_id);
2662 }
2663
2664 let populated_bytes = graph.heap_bytes();
2665 assert!(
2666 populated_bytes > 0,
2667 "populated graph should report nonzero heap bytes"
2668 );
2669 assert!(
2670 populated_bytes > empty_bytes,
2671 "populated graph ({populated_bytes}) should exceed empty graph ({empty_bytes})"
2672 );
2673 assert!(
2674 populated_bytes < 100 * 1024 * 1024,
2675 "test graph heap_bytes should be <100 MiB, got {populated_bytes}"
2676 );
2677 }
2678
2679 #[test]
2680 fn test_concurrent_code_graph_new() {
2681 let graph = ConcurrentCodeGraph::new();
2682 assert_eq!(graph.epoch(), 0);
2683 }
2684
2685 #[test]
2686 fn test_concurrent_code_graph_default() {
2687 let graph = ConcurrentCodeGraph::default();
2688 assert_eq!(graph.epoch(), 0);
2689 }
2690
2691 #[test]
2692 fn test_concurrent_code_graph_from_graph() {
2693 let mut inner = CodeGraph::new();
2694 inner.set_epoch(10);
2695 let graph = ConcurrentCodeGraph::from_graph(inner);
2696 assert_eq!(graph.epoch(), 10);
2697 }
2698
2699 #[test]
2700 fn test_concurrent_code_graph_read() {
2701 let graph = ConcurrentCodeGraph::new();
2702 let guard = graph.read();
2703 assert_eq!(guard.epoch(), 0);
2704 assert_eq!(guard.nodes().len(), 0);
2705 }
2706
2707 #[test]
2708 fn test_concurrent_code_graph_write_increments_epoch() {
2709 let graph = ConcurrentCodeGraph::new();
2710 assert_eq!(graph.epoch(), 0);
2711
2712 {
2713 let guard = graph.write();
2714 assert_eq!(guard.epoch(), 1);
2715 }
2716
2717 assert_eq!(graph.epoch(), 1);
2718
2719 {
2720 let _guard = graph.write();
2721 }
2722
2723 assert_eq!(graph.epoch(), 2);
2724 }
2725
2726 #[test]
2727 fn test_concurrent_code_graph_snapshot() {
2728 let graph = ConcurrentCodeGraph::new();
2729
2730 {
2731 let _guard = graph.write();
2732 }
2733
2734 let snapshot = graph.snapshot();
2735 assert_eq!(snapshot.epoch(), 1);
2736 }
2737
2738 #[test]
2739 fn test_concurrent_code_graph_try_read() {
2740 let graph = ConcurrentCodeGraph::new();
2741 let guard = graph.try_read();
2742 assert!(guard.is_some());
2743 }
2744
2745 #[test]
2746 fn test_concurrent_code_graph_try_write() {
2747 let graph = ConcurrentCodeGraph::new();
2748 let guard = graph.try_write();
2749 assert!(guard.is_some());
2750 assert_eq!(graph.epoch(), 1);
2751 }
2752
2753 #[test]
2754 fn test_concurrent_code_graph_debug() {
2755 let graph = ConcurrentCodeGraph::new();
2756 let debug_str = format!("{graph:?}");
2757 assert!(debug_str.contains("ConcurrentCodeGraph"));
2758 assert!(debug_str.contains("epoch"));
2759 }
2760
2761 #[test]
2762 fn test_graph_snapshot_accessors() {
2763 let graph = CodeGraph::new();
2764 let snapshot = graph.snapshot();
2765
2766 // All accessors should work
2767 let _nodes = snapshot.nodes();
2768 let _edges = snapshot.edges();
2769 let _strings = snapshot.strings();
2770 let _files = snapshot.files();
2771 let _indices = snapshot.indices();
2772 let _epoch = snapshot.epoch();
2773 }
2774
2775 #[test]
2776 fn test_graph_snapshot_epoch_matches() {
2777 let graph = CodeGraph::new();
2778 let snapshot = graph.snapshot();
2779
2780 assert!(snapshot.epoch_matches(0));
2781 assert!(!snapshot.epoch_matches(1));
2782 }
2783
2784 #[test]
2785 fn test_graph_snapshot_clone() {
2786 let graph = CodeGraph::new();
2787 let snapshot1 = graph.snapshot();
2788 let snapshot2 = snapshot1.clone();
2789
2790 assert_eq!(snapshot1.epoch(), snapshot2.epoch());
2791 }
2792
2793 #[test]
2794 fn test_graph_snapshot_debug() {
2795 let graph = CodeGraph::new();
2796 let snapshot = graph.snapshot();
2797 let debug_str = format!("{snapshot:?}");
2798 assert!(debug_str.contains("GraphSnapshot"));
2799 assert!(debug_str.contains("epoch"));
2800 }
2801
2802 #[test]
2803 fn test_multiple_readers() {
2804 let graph = ConcurrentCodeGraph::new();
2805
2806 // Multiple readers should be able to acquire locks simultaneously
2807 let guard1 = graph.read();
2808 let guard2 = graph.read();
2809 let guard3 = graph.read();
2810
2811 assert_eq!(guard1.epoch(), 0);
2812 assert_eq!(guard2.epoch(), 0);
2813 assert_eq!(guard3.epoch(), 0);
2814 }
2815
2816 #[test]
2817 fn test_code_graph_clone() {
2818 let mut graph = CodeGraph::new();
2819 graph.bump_epoch();
2820
2821 let cloned = graph.clone();
2822 assert_eq!(cloned.epoch(), 1);
2823 }
2824
2825 #[test]
2826 fn test_epoch_wrapping() {
2827 let mut graph = CodeGraph::new();
2828 graph.set_epoch(u64::MAX);
2829 let new_epoch = graph.bump_epoch();
2830 assert_eq!(new_epoch, 0); // Should wrap around
2831 }
2832
2833 // ============================================================================
2834 // Query method tests
2835 // ============================================================================
2836
2837 #[test]
2838 fn test_snapshot_resolve_symbol() {
2839 use crate::graph::unified::node::NodeKind;
2840 use crate::graph::unified::storage::arena::NodeEntry;
2841 use std::path::Path;
2842
2843 let mut graph = CodeGraph::new();
2844
2845 // Add some nodes with qualified names
2846 let name_id = graph.strings_mut().intern("test_func").unwrap();
2847 let qual_name_id = graph.strings_mut().intern("module::test_func").unwrap();
2848 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
2849
2850 let entry =
2851 NodeEntry::new(NodeKind::Function, name_id, file_id).with_qualified_name(qual_name_id);
2852
2853 let node_id = graph.nodes_mut().alloc(entry).unwrap();
2854 graph.indices_mut().add(
2855 node_id,
2856 NodeKind::Function,
2857 name_id,
2858 Some(qual_name_id),
2859 file_id,
2860 );
2861
2862 let snapshot = graph.snapshot();
2863
2864 // Find by qualified name
2865 let found = resolve_symbol_strict(&snapshot, "module::test_func");
2866 assert_eq!(found, Some(node_id));
2867
2868 // Find by exact simple name
2869 let found2 = resolve_symbol_strict(&snapshot, "test_func");
2870 assert_eq!(found2, Some(node_id));
2871
2872 // Not found
2873 assert!(resolve_symbol_strict(&snapshot, "nonexistent").is_none());
2874 }
2875
2876 #[test]
2877 fn test_snapshot_find_by_pattern() {
2878 use crate::graph::unified::node::NodeKind;
2879 use crate::graph::unified::storage::arena::NodeEntry;
2880 use std::path::Path;
2881
2882 let mut graph = CodeGraph::new();
2883
2884 // Add nodes with different names
2885 let name1 = graph.strings_mut().intern("foo_bar").unwrap();
2886 let name2 = graph.strings_mut().intern("baz_bar").unwrap();
2887 let name3 = graph.strings_mut().intern("qux_test").unwrap();
2888 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
2889
2890 let node1 = graph
2891 .nodes_mut()
2892 .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
2893 .unwrap();
2894 let node2 = graph
2895 .nodes_mut()
2896 .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
2897 .unwrap();
2898 let node3 = graph
2899 .nodes_mut()
2900 .alloc(NodeEntry::new(NodeKind::Function, name3, file_id))
2901 .unwrap();
2902
2903 graph
2904 .indices_mut()
2905 .add(node1, NodeKind::Function, name1, None, file_id);
2906 graph
2907 .indices_mut()
2908 .add(node2, NodeKind::Function, name2, None, file_id);
2909 graph
2910 .indices_mut()
2911 .add(node3, NodeKind::Function, name3, None, file_id);
2912
2913 let snapshot = graph.snapshot();
2914
2915 // Find by pattern
2916 let matches = snapshot.find_by_pattern("bar");
2917 assert_eq!(matches.len(), 2);
2918 assert!(matches.contains(&node1));
2919 assert!(matches.contains(&node2));
2920
2921 // Find single match
2922 let matches = snapshot.find_by_pattern("qux");
2923 assert_eq!(matches.len(), 1);
2924 assert_eq!(matches[0], node3);
2925
2926 // No matches
2927 let matches = snapshot.find_by_pattern("nonexistent");
2928 assert!(matches.is_empty());
2929 }
2930
2931 #[test]
2932 fn synthetic_nodes_are_filtered_from_find_by_pattern_default() {
2933 // C_SUPPRESS: synthetic placeholder nodes (Go plugin
2934 // `<field:operand.field>` shadows + `<ident>@<offset>`
2935 // per-binding-site Variables) must NOT surface from
2936 // find_by_pattern, but must still be reachable via
2937 // find_by_pattern_with_options(_, true).
2938 use crate::graph::unified::node::NodeKind;
2939 use crate::graph::unified::storage::arena::NodeEntry;
2940 use std::path::Path;
2941
2942 let mut graph = CodeGraph::new();
2943 let real_property = graph
2944 .strings_mut()
2945 .intern("main.SelectorSource.NeedTags")
2946 .unwrap();
2947 let real_local_var = graph.strings_mut().intern("NeedTags").unwrap();
2948 let synthetic_field = graph
2949 .strings_mut()
2950 .intern("<field:selector.NeedTags>")
2951 .unwrap();
2952 let synthetic_offset_a = graph.strings_mut().intern("NeedTags@469").unwrap();
2953 let synthetic_offset_b = graph.strings_mut().intern("NeedTags@508").unwrap();
2954 let file_id = graph.files_mut().register(Path::new("main.go")).unwrap();
2955
2956 let prop_id = graph
2957 .nodes_mut()
2958 .alloc(NodeEntry::new(NodeKind::Property, real_property, file_id))
2959 .unwrap();
2960 let local_var_id = graph
2961 .nodes_mut()
2962 .alloc(NodeEntry::new(NodeKind::Variable, real_local_var, file_id))
2963 .unwrap();
2964 let syn_field_id = graph
2965 .nodes_mut()
2966 .alloc(NodeEntry::new(NodeKind::Variable, synthetic_field, file_id))
2967 .unwrap();
2968 let syn_a_id = graph
2969 .nodes_mut()
2970 .alloc(NodeEntry::new(
2971 NodeKind::Variable,
2972 synthetic_offset_a,
2973 file_id,
2974 ))
2975 .unwrap();
2976 let syn_b_id = graph
2977 .nodes_mut()
2978 .alloc(NodeEntry::new(
2979 NodeKind::Variable,
2980 synthetic_offset_b,
2981 file_id,
2982 ))
2983 .unwrap();
2984
2985 graph
2986 .indices_mut()
2987 .add(prop_id, NodeKind::Property, real_property, None, file_id);
2988 graph.indices_mut().add(
2989 local_var_id,
2990 NodeKind::Variable,
2991 real_local_var,
2992 None,
2993 file_id,
2994 );
2995 graph.indices_mut().add(
2996 syn_field_id,
2997 NodeKind::Variable,
2998 synthetic_field,
2999 None,
3000 file_id,
3001 );
3002 graph.indices_mut().add(
3003 syn_a_id,
3004 NodeKind::Variable,
3005 synthetic_offset_a,
3006 None,
3007 file_id,
3008 );
3009 graph.indices_mut().add(
3010 syn_b_id,
3011 NodeKind::Variable,
3012 synthetic_offset_b,
3013 None,
3014 file_id,
3015 );
3016
3017 // Flag two of them via the metadata-store bit (the canonical
3018 // Go-plugin emission path) and leave one (`<field:...>`) only
3019 // covered by the structural name-shape fallback to verify both
3020 // recognition channels suppress the leak.
3021 graph.macro_metadata_mut().mark_synthetic(syn_a_id);
3022 graph.macro_metadata_mut().mark_synthetic(syn_b_id);
3023
3024 let snapshot = graph.snapshot();
3025
3026 // Default surface (CLI `search --exact`, MCP, LSP): no synthetics.
3027 let matches = snapshot.find_by_pattern("NeedTags");
3028 assert!(matches.contains(&prop_id), "Property must be surfaced");
3029 assert!(
3030 matches.contains(&local_var_id),
3031 "real local var must be surfaced"
3032 );
3033 assert!(
3034 !matches.contains(&syn_field_id),
3035 "<field:...> synthetic must be suppressed (name-shape fallback)"
3036 );
3037 assert!(
3038 !matches.contains(&syn_a_id),
3039 "NeedTags@469 must be suppressed (metadata bit)"
3040 );
3041 assert!(
3042 !matches.contains(&syn_b_id),
3043 "NeedTags@508 must be suppressed (metadata bit)"
3044 );
3045 assert_eq!(matches.len(), 2, "exactly Property + local var, no leakage");
3046
3047 // Internal include-synthetic surface (binding plane, scope analysis):
3048 // every node remains reachable.
3049 let all_matches = snapshot.find_by_pattern_with_options("NeedTags", true);
3050 assert_eq!(
3051 all_matches.len(),
3052 5,
3053 "include_synthetic surfaces everything"
3054 );
3055 assert!(all_matches.contains(&prop_id));
3056 assert!(all_matches.contains(&local_var_id));
3057 assert!(all_matches.contains(&syn_field_id));
3058 assert!(all_matches.contains(&syn_a_id));
3059 assert!(all_matches.contains(&syn_b_id));
3060
3061 // is_node_synthetic exposed for surface-level filters
3062 // (e.g., MCP semantic_search/relation_query post-filters).
3063 assert!(snapshot.is_node_synthetic(syn_field_id));
3064 assert!(snapshot.is_node_synthetic(syn_a_id));
3065 assert!(snapshot.is_node_synthetic(syn_b_id));
3066 assert!(!snapshot.is_node_synthetic(prop_id));
3067 assert!(!snapshot.is_node_synthetic(local_var_id));
3068 }
3069
3070 #[test]
3071 #[allow(clippy::too_many_lines)]
3072 fn find_by_exact_name_aligns_with_planner_name_predicate() {
3073 // B1_ALIGN: `find_by_exact_name("NeedTags")` is the canonical
3074 // surface for the CLI `--exact NeedTags` shorthand and the
3075 // planner's `name:NeedTags` predicate. Both paths must return
3076 // the same set against this fixture.
3077 use crate::graph::unified::node::NodeKind;
3078 use crate::graph::unified::storage::arena::NodeEntry;
3079 use std::path::Path;
3080
3081 let mut graph = CodeGraph::new();
3082 // Property nodes carry the package-qualified name as
3083 // `entry.name` (Go plugin convention; see `helper.rs`'s
3084 // `semantic_name_for_node_input`).
3085 let property_qname = graph
3086 .strings_mut()
3087 .intern("main.SelectorSource.NeedTags")
3088 .unwrap();
3089 let local_var_name = graph.strings_mut().intern("NeedTags").unwrap();
3090 let synthetic_field_name = graph
3091 .strings_mut()
3092 .intern("<field:selector.NeedTags>")
3093 .unwrap();
3094 let synthetic_offset_name = graph.strings_mut().intern("NeedTags@469").unwrap();
3095 let unrelated_name = graph.strings_mut().intern("NeedTagsHelper").unwrap();
3096 let display_fallback_name = graph.strings_mut().intern("Other").unwrap();
3097 let display_fallback_qname = graph
3098 .strings_mut()
3099 .intern("main::SelectorSource::Other")
3100 .unwrap();
3101 let file_id = graph.files_mut().register(Path::new("main.go")).unwrap();
3102
3103 let prop_id = graph
3104 .nodes_mut()
3105 .alloc(NodeEntry::new(NodeKind::Property, property_qname, file_id))
3106 .unwrap();
3107 let local_var_id = graph
3108 .nodes_mut()
3109 .alloc(NodeEntry::new(NodeKind::Variable, local_var_name, file_id))
3110 .unwrap();
3111 let syn_field_id = graph
3112 .nodes_mut()
3113 .alloc(NodeEntry::new(
3114 NodeKind::Variable,
3115 synthetic_field_name,
3116 file_id,
3117 ))
3118 .unwrap();
3119 let syn_offset_id = graph
3120 .nodes_mut()
3121 .alloc(NodeEntry::new(
3122 NodeKind::Variable,
3123 synthetic_offset_name,
3124 file_id,
3125 ))
3126 .unwrap();
3127 let unrelated_id = graph
3128 .nodes_mut()
3129 .alloc(NodeEntry::new(NodeKind::Function, unrelated_name, file_id))
3130 .unwrap();
3131 let display_fallback_id = graph
3132 .nodes_mut()
3133 .alloc(
3134 NodeEntry::new(NodeKind::Property, display_fallback_name, file_id)
3135 .with_qualified_name(display_fallback_qname),
3136 )
3137 .unwrap();
3138
3139 graph
3140 .indices_mut()
3141 .add(prop_id, NodeKind::Property, property_qname, None, file_id);
3142 graph.indices_mut().add(
3143 local_var_id,
3144 NodeKind::Variable,
3145 local_var_name,
3146 None,
3147 file_id,
3148 );
3149 graph.indices_mut().add(
3150 syn_field_id,
3151 NodeKind::Variable,
3152 synthetic_field_name,
3153 None,
3154 file_id,
3155 );
3156 graph.indices_mut().add(
3157 syn_offset_id,
3158 NodeKind::Variable,
3159 synthetic_offset_name,
3160 None,
3161 file_id,
3162 );
3163 graph.indices_mut().add(
3164 unrelated_id,
3165 NodeKind::Function,
3166 unrelated_name,
3167 None,
3168 file_id,
3169 );
3170 graph.indices_mut().add(
3171 display_fallback_id,
3172 NodeKind::Property,
3173 display_fallback_name,
3174 Some(display_fallback_qname),
3175 file_id,
3176 );
3177
3178 // Mark the offset-suffixed synthetic via the metadata bit so we
3179 // exercise both recognition channels in this fixture.
3180 graph.macro_metadata_mut().mark_synthetic(syn_offset_id);
3181
3182 let snapshot = graph.snapshot();
3183
3184 // Exact-name lookup on "NeedTags" — should pick up only the
3185 // local variable (its `entry.name` is exactly "NeedTags"); it
3186 // must NOT pick up the Property (qualified name contains but
3187 // does not equal "NeedTags") and must NOT pick up either
3188 // synthetic placeholder.
3189 let exact = snapshot.find_by_exact_name("NeedTags");
3190 assert_eq!(
3191 exact,
3192 vec![local_var_id],
3193 "exact match must be byte-for-byte against entry.name / qualified_name and exclude synthetics"
3194 );
3195
3196 // The Property's full qualified name is exact-addressable.
3197 let qualified = snapshot.find_by_exact_name("main.SelectorSource.NeedTags");
3198 assert_eq!(qualified, vec![prop_id]);
3199
3200 // Dot-qualified display form falls back to graph-canonical `::` only
3201 // when the exact dot string was absent.
3202 let display_fallback = snapshot.find_by_exact_name("main.SelectorSource.Other");
3203 assert_eq!(display_fallback, vec![display_fallback_id]);
3204
3205 // Substring-only matches must not surface from exact lookup.
3206 assert!(
3207 snapshot
3208 .find_by_exact_name("NeedTagsHelper")
3209 .contains(&unrelated_id)
3210 );
3211 assert!(
3212 !snapshot
3213 .find_by_exact_name("NeedTags")
3214 .contains(&unrelated_id),
3215 "exact 'NeedTags' must not match 'NeedTagsHelper'"
3216 );
3217
3218 // Unknown name short-circuits to an empty vec without
3219 // scanning any nodes.
3220 assert!(
3221 snapshot
3222 .find_by_exact_name("ThisStringIsNotInterned")
3223 .is_empty()
3224 );
3225 }
3226
3227 #[test]
3228 fn test_snapshot_get_callees() {
3229 use crate::graph::unified::edge::EdgeKind;
3230 use crate::graph::unified::node::NodeKind;
3231 use crate::graph::unified::storage::arena::NodeEntry;
3232 use std::path::Path;
3233
3234 let mut graph = CodeGraph::new();
3235
3236 // Create caller and callee nodes
3237 let caller_name = graph.strings_mut().intern("caller").unwrap();
3238 let callee1_name = graph.strings_mut().intern("callee1").unwrap();
3239 let callee2_name = graph.strings_mut().intern("callee2").unwrap();
3240 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3241
3242 let caller_id = graph
3243 .nodes_mut()
3244 .alloc(NodeEntry::new(NodeKind::Function, caller_name, file_id))
3245 .unwrap();
3246 let callee1_id = graph
3247 .nodes_mut()
3248 .alloc(NodeEntry::new(NodeKind::Function, callee1_name, file_id))
3249 .unwrap();
3250 let callee2_id = graph
3251 .nodes_mut()
3252 .alloc(NodeEntry::new(NodeKind::Function, callee2_name, file_id))
3253 .unwrap();
3254
3255 // Add call edges
3256 graph.edges_mut().add_edge(
3257 caller_id,
3258 callee1_id,
3259 EdgeKind::Calls {
3260 argument_count: 0,
3261 is_async: false,
3262 resolved_via: ResolvedVia::Direct,
3263 },
3264 file_id,
3265 );
3266 graph.edges_mut().add_edge(
3267 caller_id,
3268 callee2_id,
3269 EdgeKind::Calls {
3270 argument_count: 0,
3271 is_async: false,
3272 resolved_via: ResolvedVia::Direct,
3273 },
3274 file_id,
3275 );
3276
3277 let snapshot = graph.snapshot();
3278
3279 // Query callees
3280 let callees = snapshot.get_callees(caller_id);
3281 assert_eq!(callees.len(), 2);
3282 assert!(callees.contains(&callee1_id));
3283 assert!(callees.contains(&callee2_id));
3284
3285 // Node with no callees
3286 let callees = snapshot.get_callees(callee1_id);
3287 assert!(callees.is_empty());
3288 }
3289
3290 #[test]
3291 fn test_snapshot_get_callers() {
3292 use crate::graph::unified::edge::EdgeKind;
3293 use crate::graph::unified::node::NodeKind;
3294 use crate::graph::unified::storage::arena::NodeEntry;
3295 use std::path::Path;
3296
3297 let mut graph = CodeGraph::new();
3298
3299 // Create caller and callee nodes
3300 let caller1_name = graph.strings_mut().intern("caller1").unwrap();
3301 let caller2_name = graph.strings_mut().intern("caller2").unwrap();
3302 let callee_name = graph.strings_mut().intern("callee").unwrap();
3303 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3304
3305 let caller1_id = graph
3306 .nodes_mut()
3307 .alloc(NodeEntry::new(NodeKind::Function, caller1_name, file_id))
3308 .unwrap();
3309 let caller2_id = graph
3310 .nodes_mut()
3311 .alloc(NodeEntry::new(NodeKind::Function, caller2_name, file_id))
3312 .unwrap();
3313 let callee_id = graph
3314 .nodes_mut()
3315 .alloc(NodeEntry::new(NodeKind::Function, callee_name, file_id))
3316 .unwrap();
3317
3318 // Add call edges
3319 graph.edges_mut().add_edge(
3320 caller1_id,
3321 callee_id,
3322 EdgeKind::Calls {
3323 argument_count: 0,
3324 is_async: false,
3325 resolved_via: ResolvedVia::Direct,
3326 },
3327 file_id,
3328 );
3329 graph.edges_mut().add_edge(
3330 caller2_id,
3331 callee_id,
3332 EdgeKind::Calls {
3333 argument_count: 0,
3334 is_async: false,
3335 resolved_via: ResolvedVia::Direct,
3336 },
3337 file_id,
3338 );
3339
3340 let snapshot = graph.snapshot();
3341
3342 // Query callers
3343 let callers = snapshot.get_callers(callee_id);
3344 assert_eq!(callers.len(), 2);
3345 assert!(callers.contains(&caller1_id));
3346 assert!(callers.contains(&caller2_id));
3347
3348 // Node with no callers
3349 let callers = snapshot.get_callers(caller1_id);
3350 assert!(callers.is_empty());
3351 }
3352
3353 #[test]
3354 fn test_snapshot_find_symbol_candidates() {
3355 use crate::graph::unified::node::NodeKind;
3356 use crate::graph::unified::storage::arena::NodeEntry;
3357 use std::path::Path;
3358
3359 let mut graph = CodeGraph::new();
3360
3361 // Add nodes with same symbol name but different qualified names
3362 let symbol_name = graph.strings_mut().intern("test").unwrap();
3363 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3364
3365 let node1 = graph
3366 .nodes_mut()
3367 .alloc(NodeEntry::new(NodeKind::Function, symbol_name, file_id))
3368 .unwrap();
3369 let node2 = graph
3370 .nodes_mut()
3371 .alloc(NodeEntry::new(NodeKind::Method, symbol_name, file_id))
3372 .unwrap();
3373
3374 // Add a different symbol
3375 let other_name = graph.strings_mut().intern("other").unwrap();
3376 let node3 = graph
3377 .nodes_mut()
3378 .alloc(NodeEntry::new(NodeKind::Function, other_name, file_id))
3379 .unwrap();
3380
3381 graph
3382 .indices_mut()
3383 .add(node1, NodeKind::Function, symbol_name, None, file_id);
3384 graph
3385 .indices_mut()
3386 .add(node2, NodeKind::Method, symbol_name, None, file_id);
3387 graph
3388 .indices_mut()
3389 .add(node3, NodeKind::Function, other_name, None, file_id);
3390
3391 let snapshot = graph.snapshot();
3392
3393 // Find by symbol
3394 let matches = candidate_nodes(&snapshot, "test");
3395 assert_eq!(matches.len(), 2);
3396 assert!(matches.contains(&node1));
3397 assert!(matches.contains(&node2));
3398
3399 // Find other symbol
3400 let matches = candidate_nodes(&snapshot, "other");
3401 assert_eq!(matches.len(), 1);
3402 assert_eq!(matches[0], node3);
3403
3404 // No matches
3405 let matches = candidate_nodes(&snapshot, "nonexistent");
3406 assert!(matches.is_empty());
3407 }
3408
3409 #[test]
3410 fn test_snapshot_iter_nodes() {
3411 use crate::graph::unified::node::NodeKind;
3412 use crate::graph::unified::storage::arena::NodeEntry;
3413 use std::path::Path;
3414
3415 let mut graph = CodeGraph::new();
3416
3417 // Add some nodes
3418 let name1 = graph.strings_mut().intern("func1").unwrap();
3419 let name2 = graph.strings_mut().intern("func2").unwrap();
3420 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3421
3422 let node1 = graph
3423 .nodes_mut()
3424 .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
3425 .unwrap();
3426 let node2 = graph
3427 .nodes_mut()
3428 .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
3429 .unwrap();
3430
3431 let snapshot = graph.snapshot();
3432
3433 // Iterate nodes
3434 let snapshot_nodes: Vec<_> = snapshot.iter_nodes().collect();
3435 assert_eq!(snapshot_nodes.len(), 2);
3436
3437 let node_ids: Vec<_> = snapshot_nodes.iter().map(|(id, _)| *id).collect();
3438 assert!(node_ids.contains(&node1));
3439 assert!(node_ids.contains(&node2));
3440 }
3441
3442 #[test]
3443 fn test_snapshot_iter_edges() {
3444 use crate::graph::unified::edge::EdgeKind;
3445 use crate::graph::unified::node::NodeKind;
3446 use crate::graph::unified::storage::arena::NodeEntry;
3447 use std::path::Path;
3448
3449 let mut graph = CodeGraph::new();
3450
3451 // Create nodes
3452 let name1 = graph.strings_mut().intern("func1").unwrap();
3453 let name2 = graph.strings_mut().intern("func2").unwrap();
3454 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3455
3456 let node1 = graph
3457 .nodes_mut()
3458 .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
3459 .unwrap();
3460 let node2 = graph
3461 .nodes_mut()
3462 .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
3463 .unwrap();
3464
3465 // Add edges
3466 graph.edges_mut().add_edge(
3467 node1,
3468 node2,
3469 EdgeKind::Calls {
3470 argument_count: 0,
3471 is_async: false,
3472 resolved_via: ResolvedVia::Direct,
3473 },
3474 file_id,
3475 );
3476
3477 let snapshot = graph.snapshot();
3478
3479 // Iterate edges
3480 let edges: Vec<_> = snapshot.iter_edges().collect();
3481 assert_eq!(edges.len(), 1);
3482
3483 let (src, tgt, kind) = &edges[0];
3484 assert_eq!(*src, node1);
3485 assert_eq!(*tgt, node2);
3486 assert!(matches!(
3487 kind,
3488 EdgeKind::Calls {
3489 argument_count: 0,
3490 is_async: false,
3491 resolved_via: ResolvedVia::Direct,
3492 }
3493 ));
3494 }
3495
3496 #[test]
3497 fn test_snapshot_get_node() {
3498 use crate::graph::unified::node::NodeId;
3499 use crate::graph::unified::node::NodeKind;
3500 use crate::graph::unified::storage::arena::NodeEntry;
3501 use std::path::Path;
3502
3503 let mut graph = CodeGraph::new();
3504
3505 // Add a node
3506 let name = graph.strings_mut().intern("test_func").unwrap();
3507 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3508
3509 let node_id = graph
3510 .nodes_mut()
3511 .alloc(NodeEntry::new(NodeKind::Function, name, file_id))
3512 .unwrap();
3513
3514 let snapshot = graph.snapshot();
3515
3516 // Get node
3517 let entry = snapshot.get_node(node_id);
3518 assert!(entry.is_some());
3519 assert_eq!(entry.unwrap().kind, NodeKind::Function);
3520
3521 // Invalid node
3522 let invalid_id = NodeId::INVALID;
3523 assert!(snapshot.get_node(invalid_id).is_none());
3524 }
3525
3526 #[test]
3527 fn test_snapshot_query_empty_graph() {
3528 use crate::graph::unified::node::NodeId;
3529
3530 let graph = CodeGraph::new();
3531 let snapshot = graph.snapshot();
3532
3533 // All queries should return empty on empty graph
3534 assert!(resolve_symbol_strict(&snapshot, "test").is_none());
3535 assert!(snapshot.find_by_pattern("test").is_empty());
3536 assert!(candidate_nodes(&snapshot, "test").is_empty());
3537
3538 let dummy_id = NodeId::new(0, 1);
3539 assert!(snapshot.get_callees(dummy_id).is_empty());
3540 assert!(snapshot.get_callers(dummy_id).is_empty());
3541
3542 assert_eq!(snapshot.iter_nodes().count(), 0);
3543 assert_eq!(snapshot.iter_edges().count(), 0);
3544 }
3545
3546 #[test]
3547 fn test_snapshot_edge_filtering_by_kind() {
3548 use crate::graph::unified::edge::EdgeKind;
3549 use crate::graph::unified::node::NodeKind;
3550 use crate::graph::unified::storage::arena::NodeEntry;
3551 use std::path::Path;
3552
3553 let mut graph = CodeGraph::new();
3554
3555 // Create nodes
3556 let name1 = graph.strings_mut().intern("func1").unwrap();
3557 let name2 = graph.strings_mut().intern("func2").unwrap();
3558 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3559
3560 let node1 = graph
3561 .nodes_mut()
3562 .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
3563 .unwrap();
3564 let node2 = graph
3565 .nodes_mut()
3566 .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
3567 .unwrap();
3568
3569 // Add different kinds of edges
3570 graph.edges_mut().add_edge(
3571 node1,
3572 node2,
3573 EdgeKind::Calls {
3574 argument_count: 0,
3575 is_async: false,
3576 resolved_via: ResolvedVia::Direct,
3577 },
3578 file_id,
3579 );
3580 graph
3581 .edges_mut()
3582 .add_edge(node1, node2, EdgeKind::References, file_id);
3583
3584 let snapshot = graph.snapshot();
3585
3586 // get_callees should only return Calls edges
3587 let callees = snapshot.get_callees(node1);
3588 assert_eq!(callees.len(), 1);
3589 assert_eq!(callees[0], node2);
3590
3591 // iter_edges returns all edges regardless of kind
3592 let edges: Vec<_> = snapshot.iter_edges().collect();
3593 assert_eq!(edges.len(), 2);
3594 }
3595
3596 // -------- reverse_import_index tests --------
3597
3598 /// Helper: build an empty graph with the given file paths registered, a
3599 /// single placeholder node allocated per file, and indices rebuilt so
3600 /// `by_file` returns the per-file node sets. Returns the file IDs in
3601 /// the order passed in and the per-file node ID.
3602 #[cfg(test)]
3603 fn build_import_test_graph(files: &[&str]) -> (CodeGraph, Vec<FileId>, Vec<NodeId>) {
3604 use crate::graph::unified::node::NodeKind;
3605 use crate::graph::unified::storage::arena::NodeEntry;
3606 use std::path::Path;
3607
3608 let mut graph = CodeGraph::new();
3609 let placeholder_name = graph.strings_mut().intern("sym").unwrap();
3610 let mut file_ids = Vec::with_capacity(files.len());
3611 let mut node_ids = Vec::with_capacity(files.len());
3612 for path in files {
3613 let file_id = graph.files_mut().register(Path::new(path)).unwrap();
3614 let node_id = graph
3615 .nodes_mut()
3616 .alloc(NodeEntry::new(
3617 NodeKind::Function,
3618 placeholder_name,
3619 file_id,
3620 ))
3621 .unwrap();
3622 file_ids.push(file_id);
3623 node_ids.push(node_id);
3624 }
3625 graph.rebuild_indices();
3626 (graph, file_ids, node_ids)
3627 }
3628
3629 /// Helper: add an `Imports` edge from `source_node` (in importer file) to
3630 /// `target_node` (in exporter file). The edge is recorded against the
3631 /// importer file so `clear_file` cleanup would behave identically to
3632 /// production Pass 4 writes.
3633 #[cfg(test)]
3634 fn add_import_edge(
3635 graph: &mut CodeGraph,
3636 source_node: NodeId,
3637 target_node: NodeId,
3638 importer_file: FileId,
3639 ) {
3640 graph.edges_mut().add_edge(
3641 source_node,
3642 target_node,
3643 EdgeKind::Imports {
3644 alias: None,
3645 is_wildcard: false,
3646 },
3647 importer_file,
3648 );
3649 }
3650
3651 #[test]
3652 fn reverse_import_index_empty_graph_returns_empty() {
3653 let (graph, files, _) = build_import_test_graph(&["only.rs"]);
3654 assert!(graph.reverse_import_index(files[0]).is_empty());
3655 }
3656
3657 #[test]
3658 fn reverse_import_index_single_importer() {
3659 // File A imports a symbol exported by file B. Reverse index of B
3660 // should return exactly [A]; reverse index of A should be empty.
3661 let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3662 let (a, b) = (files[0], files[1]);
3663 add_import_edge(&mut graph, nodes[0], nodes[1], a);
3664
3665 let importers_of_b = graph.reverse_import_index(b);
3666 assert_eq!(importers_of_b, vec![a]);
3667
3668 let importers_of_a = graph.reverse_import_index(a);
3669 assert!(
3670 importers_of_a.is_empty(),
3671 "A has no inbound Imports edges; reverse index must be empty"
3672 );
3673 }
3674
3675 #[test]
3676 fn reverse_import_index_multiple_importers_deduped_and_sorted() {
3677 // A, B, C all import from D. Reverse index of D must contain each
3678 // importer exactly once, sorted ascending by FileId.
3679 let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs", "c.rs", "d.rs"]);
3680 let (a, b, c, d) = (files[0], files[1], files[2], files[3]);
3681 add_import_edge(&mut graph, nodes[0], nodes[3], a);
3682 add_import_edge(&mut graph, nodes[1], nodes[3], b);
3683 add_import_edge(&mut graph, nodes[2], nodes[3], c);
3684 // Add a duplicate edge from A to D to confirm dedup behavior.
3685 add_import_edge(&mut graph, nodes[0], nodes[3], a);
3686
3687 let importers_of_d = graph.reverse_import_index(d);
3688 assert_eq!(importers_of_d, vec![a, b, c]);
3689 // Sort invariant: Vec is ascending by raw index.
3690 let mut sorted = importers_of_d.clone();
3691 sorted.sort();
3692 assert_eq!(importers_of_d, sorted);
3693 }
3694
3695 #[test]
3696 fn reverse_import_index_filters_non_import_edges() {
3697 // A `Calls` edge from A into B must not contribute to B's reverse
3698 // import index. Only `EdgeKind::Imports` edges count.
3699 let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3700 let (a, b) = (files[0], files[1]);
3701 graph.edges_mut().add_edge(
3702 nodes[0],
3703 nodes[1],
3704 EdgeKind::Calls {
3705 argument_count: 0,
3706 is_async: false,
3707 resolved_via: ResolvedVia::Direct,
3708 },
3709 a,
3710 );
3711 graph
3712 .edges_mut()
3713 .add_edge(nodes[0], nodes[1], EdgeKind::References, a);
3714
3715 assert!(
3716 graph.reverse_import_index(b).is_empty(),
3717 "non-Imports edges must not register as importers"
3718 );
3719 }
3720
3721 #[test]
3722 fn reverse_import_index_elides_self_imports() {
3723 // An Imports edge whose source and target are both in the same file
3724 // is a self-import; the caller's own file must not appear in its own
3725 // reverse index.
3726 let (mut graph, files, nodes) = build_import_test_graph(&["a.rs"]);
3727 let a = files[0];
3728 // Add a second node in the same file so we have a distinct source.
3729 let name2 = graph.strings_mut().intern("sym2").unwrap();
3730 let second_in_a = graph
3731 .nodes_mut()
3732 .alloc(crate::graph::unified::storage::arena::NodeEntry::new(
3733 crate::graph::unified::node::NodeKind::Function,
3734 name2,
3735 a,
3736 ))
3737 .unwrap();
3738 graph.rebuild_indices();
3739 add_import_edge(&mut graph, second_in_a, nodes[0], a);
3740
3741 assert!(
3742 graph.reverse_import_index(a).is_empty(),
3743 "self-imports must be elided from reverse index"
3744 );
3745 }
3746
3747 #[test]
3748 fn reverse_import_index_mixed_edge_kinds_counts_only_imports() {
3749 // Two files: A has both Calls and Imports edges into B. Reverse
3750 // index must return exactly [A] — the Calls edge contributes
3751 // nothing.
3752 let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3753 let (a, b) = (files[0], files[1]);
3754 add_import_edge(&mut graph, nodes[0], nodes[1], a);
3755 graph.edges_mut().add_edge(
3756 nodes[0],
3757 nodes[1],
3758 EdgeKind::Calls {
3759 argument_count: 0,
3760 is_async: false,
3761 resolved_via: ResolvedVia::Direct,
3762 },
3763 a,
3764 );
3765
3766 assert_eq!(graph.reverse_import_index(b), vec![a]);
3767 }
3768
3769 #[test]
3770 fn reverse_import_index_uninitialized_file_returns_empty() {
3771 // Querying a FileId that is not registered in the graph must return
3772 // an empty Vec, not panic.
3773 let (graph, _, _) = build_import_test_graph(&["a.rs"]);
3774 let bogus = FileId::new(9999);
3775 assert!(
3776 graph.reverse_import_index(bogus).is_empty(),
3777 "unknown FileId must return empty Vec without panicking"
3778 );
3779 }
3780
3781 #[test]
3782 fn reverse_import_index_skips_tombstoned_source_nodes() {
3783 // In the incremental rebuild path the prior graph retains tombstoned
3784 // arena slots for nodes belonging to closure files that have been
3785 // removed but not yet fully compacted. reverse_import_index is
3786 // called on that prior graph to widen the closure, so its
3787 // tombstone guard (`let Some(source_entry) = self.nodes.get(...)`)
3788 // is a semantically important branch, not a theoretical one.
3789 // This test exercises it directly by tombstoning the source node of
3790 // an Imports edge and asserting the edge silently disappears from
3791 // the reverse index.
3792 let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3793 let (a, b) = (files[0], files[1]);
3794 add_import_edge(&mut graph, nodes[0], nodes[1], a);
3795 // Sanity: before tombstoning, A shows up as the importer of B.
3796 assert_eq!(graph.reverse_import_index(b), vec![a]);
3797 // Tombstone the source node via the arena (generation bump). The
3798 // Imports edge still exists in the edge store but its source NodeId
3799 // is now stale; `nodes.get(edge_ref.source)` returns None and the
3800 // guard skips the edge.
3801 let removed = graph.nodes_mut().remove(nodes[0]);
3802 assert!(
3803 removed.is_some(),
3804 "arena.remove must succeed for a live node"
3805 );
3806 assert!(
3807 graph.nodes().get(nodes[0]).is_none(),
3808 "tombstoned lookup must return None"
3809 );
3810 assert!(
3811 graph.reverse_import_index(b).is_empty(),
3812 "Imports edges whose source is tombstoned must be silently skipped"
3813 );
3814 }
3815
3816 // ------------------------------------------------------------------
3817 // Task 4 Step 2 — CodeGraph::remove_file
3818 // ------------------------------------------------------------------
3819
3820 /// Seed a graph with 2 files × `per_file` nodes per file, plus a set
3821 /// of intra- and inter-file edges. Each call site produces a
3822 /// canonical topology so tests below can assert bit-level on edge
3823 /// survival. Returns `(graph, file_a, file_b, file_a_nodes,
3824 /// file_b_nodes)`.
3825 fn seed_two_file_graph(
3826 per_file: usize,
3827 ) -> (
3828 CodeGraph,
3829 crate::graph::unified::file::FileId,
3830 crate::graph::unified::file::FileId,
3831 Vec<NodeId>,
3832 Vec<NodeId>,
3833 ) {
3834 use crate::graph::unified::edge::EdgeKind;
3835 use crate::graph::unified::node::NodeKind;
3836 use crate::graph::unified::storage::arena::NodeEntry;
3837 use std::path::Path;
3838
3839 let mut graph = CodeGraph::new();
3840 let sym = graph.strings_mut().intern("sym").expect("intern");
3841 let file_a = graph
3842 .files_mut()
3843 .register(Path::new("/tmp/remove_file_test/a.rs"))
3844 .expect("register a");
3845 let file_b = graph
3846 .files_mut()
3847 .register(Path::new("/tmp/remove_file_test/b.rs"))
3848 .expect("register b");
3849
3850 let mut file_a_nodes = Vec::with_capacity(per_file);
3851 let mut file_b_nodes = Vec::with_capacity(per_file);
3852
3853 for _ in 0..per_file {
3854 let n = graph
3855 .nodes_mut()
3856 .alloc(NodeEntry::new(NodeKind::Function, sym, file_a))
3857 .expect("alloc a-node");
3858 file_a_nodes.push(n);
3859 graph.files_mut().record_node(file_a, n);
3860 graph
3861 .indices_mut()
3862 .add(n, NodeKind::Function, sym, None, file_a);
3863 }
3864 for _ in 0..per_file {
3865 let n = graph
3866 .nodes_mut()
3867 .alloc(NodeEntry::new(NodeKind::Function, sym, file_b))
3868 .expect("alloc b-node");
3869 file_b_nodes.push(n);
3870 graph.files_mut().record_node(file_b, n);
3871 graph
3872 .indices_mut()
3873 .add(n, NodeKind::Function, sym, None, file_b);
3874 }
3875
3876 // Intra-file edges inside each file: pairwise a[i] -> a[i+1],
3877 // b[i] -> b[i+1]. These are the ones that must die when the
3878 // corresponding file is removed.
3879 for i in 0..per_file.saturating_sub(1) {
3880 graph.edges_mut().add_edge(
3881 file_a_nodes[i],
3882 file_a_nodes[i + 1],
3883 EdgeKind::Calls {
3884 argument_count: 0,
3885 is_async: false,
3886 resolved_via: ResolvedVia::Direct,
3887 },
3888 file_a,
3889 );
3890 graph.edges_mut().add_edge(
3891 file_b_nodes[i],
3892 file_b_nodes[i + 1],
3893 EdgeKind::Calls {
3894 argument_count: 0,
3895 is_async: false,
3896 resolved_via: ResolvedVia::Direct,
3897 },
3898 file_b,
3899 );
3900 }
3901 // Cross-file edges: a[0] -> b[0], b[0] -> a[0]. Both must die
3902 // when *either* endpoint's file is removed (plan §F.2).
3903 graph.edges_mut().add_edge(
3904 file_a_nodes[0],
3905 file_b_nodes[0],
3906 EdgeKind::Calls {
3907 argument_count: 0,
3908 is_async: false,
3909 resolved_via: ResolvedVia::Direct,
3910 },
3911 file_a,
3912 );
3913 graph.edges_mut().add_edge(
3914 file_b_nodes[0],
3915 file_a_nodes[0],
3916 EdgeKind::Calls {
3917 argument_count: 0,
3918 is_async: false,
3919 resolved_via: ResolvedVia::Direct,
3920 },
3921 file_b,
3922 );
3923
3924 (graph, file_a, file_b, file_a_nodes, file_b_nodes)
3925 }
3926
3927 #[test]
3928 fn code_graph_remove_file_tombstones_all_per_file_nodes() {
3929 let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
3930
3931 let returned = graph.remove_file(file_a);
3932
3933 // Returned list must equal the original per-file bucket
3934 // membership, deterministically.
3935 let returned_set: std::collections::HashSet<NodeId> = returned.iter().copied().collect();
3936 let expected_set: std::collections::HashSet<NodeId> =
3937 file_a_nodes.iter().copied().collect();
3938 assert_eq!(
3939 returned_set, expected_set,
3940 "remove_file must return exactly the file_a nodes drained from the bucket"
3941 );
3942
3943 // Every returned NodeId is gone from the arena.
3944 for nid in &file_a_nodes {
3945 assert!(
3946 graph.nodes().get(*nid).is_none(),
3947 "node {nid:?} from removed file must be tombstoned in arena"
3948 );
3949 }
3950 }
3951
3952 #[test]
3953 fn code_graph_remove_file_invalidates_all_edges_sourced_or_targeted_at_removed_nodes() {
3954 use crate::graph::unified::edge::EdgeKind;
3955
3956 let (mut graph, file_a, _file_b, file_a_nodes, file_b_nodes) = seed_two_file_graph(3);
3957
3958 // Sanity: the seed has 2 intra-A edges, 2 intra-B edges, plus
3959 // 2 cross-file edges (a0↔b0).
3960 let before_delta = graph.edges().stats().forward.delta_edge_count;
3961 assert_eq!(
3962 before_delta, 6,
3963 "seed must produce 2 intra-A + 2 intra-B + 2 cross edges"
3964 );
3965
3966 let _ = graph.remove_file(file_a);
3967
3968 // Forward delta after removal: all 2 intra-A edges are gone,
3969 // both cross edges are gone, only the 2 intra-B edges remain.
3970 let after_delta_forward = graph.edges().stats().forward.delta_edge_count;
3971 assert_eq!(
3972 after_delta_forward, 2,
3973 "only intra-B forward edges must remain after removing file_a"
3974 );
3975 let after_delta_reverse = graph.edges().stats().reverse.delta_edge_count;
3976 assert_eq!(
3977 after_delta_reverse, 2,
3978 "only intra-B reverse edges must remain after removing file_a"
3979 );
3980
3981 // Cross-file edge from b[0] -> a[0] must no longer be visible
3982 // from either direction, because a[0] is tombstoned.
3983 let b0 = file_b_nodes[0];
3984 let a0 = file_a_nodes[0];
3985 let remaining_from_b0: Vec<_> = graph
3986 .edges()
3987 .edges_from(b0)
3988 .into_iter()
3989 .filter(|e| {
3990 matches!(
3991 e.kind,
3992 EdgeKind::Calls {
3993 argument_count: 0,
3994 is_async: false,
3995 resolved_via: ResolvedVia::Direct,
3996 }
3997 )
3998 })
3999 .collect();
4000 assert!(
4001 !remaining_from_b0.iter().any(|e| e.target == a0),
4002 "edge b0 -> a0 must be gone after remove_file(file_a)"
4003 );
4004 let remaining_to_a0: Vec<_> = graph.edges().edges_to(a0).into_iter().collect();
4005 assert!(
4006 remaining_to_a0.is_empty(),
4007 "every edge targeting the tombstoned a0 must be gone"
4008 );
4009 }
4010
4011 #[test]
4012 fn code_graph_remove_file_drops_file_registry_entry() {
4013 let (mut graph, file_a, _file_b, _, _) = seed_two_file_graph(2);
4014
4015 assert!(
4016 graph.files().resolve(file_a).is_some(),
4017 "seed registered file_a"
4018 );
4019 assert!(
4020 !graph.files().nodes_for_file(file_a).is_empty(),
4021 "seed populated the file_a bucket"
4022 );
4023
4024 let _ = graph.remove_file(file_a);
4025
4026 assert!(
4027 graph.files().resolve(file_a).is_none(),
4028 "FileRegistry::resolve must return None after remove_file"
4029 );
4030 assert!(
4031 graph.files().nodes_for_file(file_a).is_empty(),
4032 "per-file bucket for file_a must be drained"
4033 );
4034 }
4035
4036 #[test]
4037 fn code_graph_remove_file_is_idempotent_on_unknown_file() {
4038 use crate::graph::unified::file::FileId;
4039 let (mut graph, _file_a, _file_b, _, _) = seed_two_file_graph(2);
4040
4041 // Snapshot state before idempotent no-op.
4042 let nodes_before = graph.nodes().len();
4043 let delta_fwd_before = graph.edges().stats().forward.delta_edge_count;
4044 let delta_rev_before = graph.edges().stats().reverse.delta_edge_count;
4045 let files_before = graph.files().len();
4046
4047 // A FileId that was never registered: the caller may legitimately
4048 // receive a bogus id from stale indexing state. `remove_file`
4049 // must be a silent no-op.
4050 let bogus = FileId::new(9999);
4051 let returned = graph.remove_file(bogus);
4052 assert!(
4053 returned.is_empty(),
4054 "remove_file on unknown FileId must return an empty Vec"
4055 );
4056
4057 assert_eq!(graph.nodes().len(), nodes_before, "arena count unchanged");
4058 assert_eq!(
4059 graph.edges().stats().forward.delta_edge_count,
4060 delta_fwd_before,
4061 "forward delta unchanged"
4062 );
4063 assert_eq!(
4064 graph.edges().stats().reverse.delta_edge_count,
4065 delta_rev_before,
4066 "reverse delta unchanged"
4067 );
4068 assert_eq!(graph.files().len(), files_before, "file count unchanged");
4069 }
4070
4071 #[test]
4072 fn code_graph_remove_file_clears_file_segments_entry() {
4073 // Iter-1 Codex review fix: a file's `FileSegmentTable` entry
4074 // must be cleared on `remove_file`. Without this, a later
4075 // `FileId` recycle (via `FileRegistry::free_list`) would
4076 // inherit the previous file's stale node-range and
4077 // `reindex_files` (`build/reindex.rs`) would tombstone the
4078 // wrong slots. This unit test seeds a segment entry, removes
4079 // the file, and asserts the entry is gone.
4080 use crate::graph::unified::storage::segment::FileSegmentTable;
4081
4082 let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
4083
4084 // Seed a segment for file A. Production code sets this via
4085 // Phase 3 parallel commit; here we go through the crate-
4086 // internal accessor because we are a unit test inside the
4087 // crate. (The feature-gated `test_only_record_file_segment`
4088 // public helper exists for the integration tests in
4089 // `sqry-core/tests/incremental_remove_file_scale.rs`.)
4090 let first_index = file_a_nodes
4091 .iter()
4092 .map(|n| n.index())
4093 .min()
4094 .expect("per_file = 3");
4095 let last_index = file_a_nodes
4096 .iter()
4097 .map(|n| n.index())
4098 .max()
4099 .expect("per_file = 3");
4100 let slot_count = last_index - first_index + 1;
4101 let table: &mut FileSegmentTable = graph.file_segments_mut();
4102 table.record_range(file_a, first_index, slot_count);
4103 assert!(
4104 graph.file_segments().get(file_a).is_some(),
4105 "seed must install a segment for file_a before remove_file"
4106 );
4107
4108 // Remove the file.
4109 let _ = graph.remove_file(file_a);
4110
4111 // The segment entry must be gone. `FileSegmentTable::remove`
4112 // is idempotent (a no-op on unknown ids), so this assertion
4113 // holds whether or not the seeded range was contiguous.
4114 assert!(
4115 graph.file_segments().get(file_a).is_none(),
4116 "remove_file must clear the FileSegmentTable entry for file_a"
4117 );
4118 }
4119
4120 #[test]
4121 fn code_graph_remove_file_repeated_calls_are_idempotent() {
4122 let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
4123
4124 // First call does the work.
4125 let first = graph.remove_file(file_a);
4126 assert_eq!(first.len(), file_a_nodes.len());
4127
4128 // Snapshot post-first-call state.
4129 let nodes_after = graph.nodes().len();
4130 let delta_fwd_after = graph.edges().stats().forward.delta_edge_count;
4131 let delta_rev_after = graph.edges().stats().reverse.delta_edge_count;
4132 let files_after = graph.files().len();
4133
4134 // Second call must be a silent no-op — the bucket is empty,
4135 // the file is unregistered, and the arena slots are already
4136 // tombstoned (NodeArena::remove ignores stale generations).
4137 let second = graph.remove_file(file_a);
4138 assert!(
4139 second.is_empty(),
4140 "second remove_file on the same file must return an empty Vec"
4141 );
4142
4143 assert_eq!(graph.nodes().len(), nodes_after);
4144 assert_eq!(
4145 graph.edges().stats().forward.delta_edge_count,
4146 delta_fwd_after
4147 );
4148 assert_eq!(
4149 graph.edges().stats().reverse.delta_edge_count,
4150 delta_rev_after
4151 );
4152 assert_eq!(graph.files().len(), files_after);
4153 }
4154}