sqry_core/graph/unified/concurrent/graph.rs
1//! `CodeGraph` and `ConcurrentCodeGraph` implementations.
2//!
3//! This module provides the core graph types with thread-safe access:
4//!
5//! - [`CodeGraph`]: Arc-wrapped internals for O(1) `CoW` snapshots
6//! - [`ConcurrentCodeGraph`]: `RwLock` wrapper with epoch versioning
7//! - [`GraphSnapshot`]: Immutable snapshot for long-running queries
8
9use std::collections::{HashMap, HashSet};
10use std::fmt;
11use std::sync::Arc;
12use std::sync::atomic::{AtomicU64, Ordering};
13
14use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard};
15
16use crate::confidence::ConfidenceMetadata;
17use crate::graph::unified::bind::alias::AliasTable;
18use crate::graph::unified::bind::scope::provenance::{
19 ScopeProvenance, ScopeProvenanceStore, ScopeStableId,
20};
21use crate::graph::unified::bind::scope::{ScopeArena, ScopeId};
22use crate::graph::unified::bind::shadow::ShadowTable;
23use crate::graph::unified::edge::EdgeKind;
24use crate::graph::unified::edge::bidirectional::BidirectionalEdgeStore;
25use crate::graph::unified::file::FileId;
26use crate::graph::unified::memory::{GraphMemorySize, HASHMAP_ENTRY_OVERHEAD};
27use crate::graph::unified::resolution::display_graph_qualified_name;
28use crate::graph::unified::storage::arena::NodeArena;
29use crate::graph::unified::storage::edge_provenance::{EdgeProvenance, EdgeProvenanceStore};
30use crate::graph::unified::storage::indices::AuxiliaryIndices;
31use crate::graph::unified::storage::interner::StringInterner;
32use crate::graph::unified::storage::metadata::NodeMetadataStore;
33use crate::graph::unified::storage::node_provenance::{NodeProvenance, NodeProvenanceStore};
34use crate::graph::unified::storage::registry::{FileProvenanceView, FileRegistry};
35use crate::graph::unified::storage::segment::FileSegmentTable;
36use crate::graph::unified::string::id::StringId;
37
38/// Core graph with Arc-wrapped internals for O(1) `CoW` snapshots.
39///
40/// `CodeGraph` uses `Arc` for all internal components, enabling:
41/// - O(1) snapshot creation via Arc cloning
42/// - Copy-on-write semantics via `Arc::make_mut`
43/// - Memory-efficient sharing between snapshots
44///
45/// # Design
46///
47/// The Arc wrapping enables the MVCC pattern:
48/// - Readers see a consistent snapshot at the time they acquired access
49/// - Writers use `Arc::make_mut` to get exclusive copies only when mutating
50/// - Multiple snapshots can coexist without copying data
51///
52/// # Performance
53///
54/// - Snapshot creation: O(5) Arc clones ≈ <1μs
55/// - Read access: Direct Arc dereference, no locking
56/// - Write access: `Arc::make_mut` clones only if refcount > 1
57///
58/// # Phase 2 binding-plane access
59///
60/// Use the two-line snapshot pattern to access `BindingPlane`:
61///
62/// ```rust,ignore
63/// let snapshot = graph.snapshot();
64/// let plane = snapshot.binding_plane();
65/// let resolution = plane.resolve(&query);
66/// ```
67///
68/// The two-line form is intentional: `BindingPlane<'g>` borrows from
69/// `GraphSnapshot` and the explicit snapshot handle makes the MVCC lifetime
70/// visible at the call site. The full Phase 2 scope/alias/shadow and
71/// witness-bearing resolution API is exposed through `BindingPlane`.
72// Field visibility is `pub(crate)` so the Gate 0c `rebuild_graph` module
73// (A2 §H) can destructure `CodeGraph` exhaustively in `clone_for_rebuild`.
74// External crates still go through the public accessor methods below.
75#[derive(Clone)]
76pub struct CodeGraph {
77 /// Node storage with generational indices.
78 pub(crate) nodes: Arc<NodeArena>,
79 /// Bidirectional edge storage (forward + reverse).
80 pub(crate) edges: Arc<BidirectionalEdgeStore>,
81 /// String interner for symbol names.
82 pub(crate) strings: Arc<StringInterner>,
83 /// File registry for path deduplication.
84 pub(crate) files: Arc<FileRegistry>,
85 /// Auxiliary indices for fast lookup.
86 pub(crate) indices: Arc<AuxiliaryIndices>,
87 /// Sparse macro boundary metadata (keyed by full `NodeId`).
88 pub(crate) macro_metadata: Arc<NodeMetadataStore>,
89 /// Dense node provenance (Phase 1 fact layer).
90 pub(crate) node_provenance: Arc<NodeProvenanceStore>,
91 /// Dense edge provenance (Phase 1 fact layer).
92 pub(crate) edge_provenance: Arc<EdgeProvenanceStore>,
93 /// Monotonic fact-layer epoch (0 until set by the V8 persistence path).
94 pub(crate) fact_epoch: u64,
95 /// Epoch for version tracking.
96 pub(crate) epoch: u64,
97 /// Per-language confidence metadata collected during build.
98 /// Maps language name (e.g., "rust") to aggregated confidence.
99 pub(crate) confidence: HashMap<String, ConfidenceMetadata>,
100 /// Phase 2 binding-plane scope arena (populated by Phase 4e).
101 pub(crate) scope_arena: Arc<ScopeArena>,
102 /// Phase 2 binding-plane alias table (populated by Phase 4e / P2U04).
103 pub(crate) alias_table: Arc<AliasTable>,
104 /// Phase 2 binding-plane shadow table (populated by Phase 4e / P2U05).
105 pub(crate) shadow_table: Arc<ShadowTable>,
106 /// Phase 2 binding-plane scope provenance store (populated by Phase 4e / P2U11).
107 pub(crate) scope_provenance_store: Arc<ScopeProvenanceStore>,
108 /// Phase 3 file segment table mapping `FileId` to contiguous node ranges.
109 /// Populated during build Phase 3 parallel commit, persisted in V10+ snapshots.
110 pub(crate) file_segments: Arc<FileSegmentTable>,
111}
112
113impl CodeGraph {
114 /// Creates a new empty `CodeGraph`.
115 ///
116 /// # Example
117 ///
118 /// ```rust
119 /// use sqry_core::graph::unified::concurrent::CodeGraph;
120 ///
121 /// let graph = CodeGraph::new();
122 /// assert_eq!(graph.epoch(), 0);
123 /// ```
124 #[must_use]
125 pub fn new() -> Self {
126 Self {
127 nodes: Arc::new(NodeArena::new()),
128 edges: Arc::new(BidirectionalEdgeStore::new()),
129 strings: Arc::new(StringInterner::new()),
130 files: Arc::new(FileRegistry::new()),
131 indices: Arc::new(AuxiliaryIndices::new()),
132 macro_metadata: Arc::new(NodeMetadataStore::new()),
133 node_provenance: Arc::new(NodeProvenanceStore::new()),
134 edge_provenance: Arc::new(EdgeProvenanceStore::new()),
135 fact_epoch: 0,
136 epoch: 0,
137 confidence: HashMap::new(),
138 scope_arena: Arc::new(ScopeArena::new()),
139 alias_table: Arc::new(AliasTable::new()),
140 shadow_table: Arc::new(ShadowTable::new()),
141 scope_provenance_store: Arc::new(ScopeProvenanceStore::new()),
142 file_segments: Arc::new(FileSegmentTable::new()),
143 }
144 }
145
146 /// Creates a `CodeGraph` from existing components.
147 ///
148 /// This is useful when building a graph from external data or
149 /// reconstructing from serialized state.
150 #[must_use]
151 pub fn from_components(
152 nodes: NodeArena,
153 edges: BidirectionalEdgeStore,
154 strings: StringInterner,
155 files: FileRegistry,
156 indices: AuxiliaryIndices,
157 macro_metadata: NodeMetadataStore,
158 ) -> Self {
159 Self {
160 nodes: Arc::new(nodes),
161 edges: Arc::new(edges),
162 strings: Arc::new(strings),
163 files: Arc::new(files),
164 indices: Arc::new(indices),
165 macro_metadata: Arc::new(macro_metadata),
166 node_provenance: Arc::new(NodeProvenanceStore::new()),
167 edge_provenance: Arc::new(EdgeProvenanceStore::new()),
168 fact_epoch: 0,
169 epoch: 0,
170 confidence: HashMap::new(),
171 scope_arena: Arc::new(ScopeArena::new()),
172 alias_table: Arc::new(AliasTable::new()),
173 shadow_table: Arc::new(ShadowTable::new()),
174 scope_provenance_store: Arc::new(ScopeProvenanceStore::new()),
175 file_segments: Arc::new(FileSegmentTable::new()),
176 }
177 }
178
179 /// Creates a cheap snapshot of the graph.
180 ///
181 /// This operation is O(5) Arc clones, which completes in <1μs.
182 /// The snapshot is isolated from future mutations to the original graph.
183 ///
184 /// # Example
185 ///
186 /// ```rust
187 /// use sqry_core::graph::unified::concurrent::CodeGraph;
188 ///
189 /// let graph = CodeGraph::new();
190 /// let snapshot = graph.snapshot();
191 /// // snapshot is independent of future mutations to graph
192 /// ```
193 #[must_use]
194 pub fn snapshot(&self) -> GraphSnapshot {
195 GraphSnapshot {
196 nodes: Arc::clone(&self.nodes),
197 edges: Arc::clone(&self.edges),
198 strings: Arc::clone(&self.strings),
199 files: Arc::clone(&self.files),
200 indices: Arc::clone(&self.indices),
201 macro_metadata: Arc::clone(&self.macro_metadata),
202 node_provenance: Arc::clone(&self.node_provenance),
203 edge_provenance: Arc::clone(&self.edge_provenance),
204 fact_epoch: self.fact_epoch,
205 epoch: self.epoch,
206 scope_arena: Arc::clone(&self.scope_arena),
207 alias_table: Arc::clone(&self.alias_table),
208 shadow_table: Arc::clone(&self.shadow_table),
209 scope_provenance_store: Arc::clone(&self.scope_provenance_store),
210 file_segments: Arc::clone(&self.file_segments),
211 }
212 }
213
214 /// Returns a reference to the node arena.
215 #[inline]
216 #[must_use]
217 pub fn nodes(&self) -> &NodeArena {
218 &self.nodes
219 }
220
221 /// Returns a reference to the bidirectional edge store.
222 #[inline]
223 #[must_use]
224 pub fn edges(&self) -> &BidirectionalEdgeStore {
225 &self.edges
226 }
227
228 /// Returns a reference to the string interner.
229 #[inline]
230 #[must_use]
231 pub fn strings(&self) -> &StringInterner {
232 &self.strings
233 }
234
235 /// Returns a reference to the file registry.
236 #[inline]
237 #[must_use]
238 pub fn files(&self) -> &FileRegistry {
239 &self.files
240 }
241
242 /// Returns a reference to the auxiliary indices.
243 #[inline]
244 #[must_use]
245 pub fn indices(&self) -> &AuxiliaryIndices {
246 &self.indices
247 }
248
249 /// Returns a reference to the macro boundary metadata store.
250 #[inline]
251 #[must_use]
252 pub fn macro_metadata(&self) -> &NodeMetadataStore {
253 &self.macro_metadata
254 }
255
256 // ------------------------------------------------------------------
257 // Phase 1 fact-layer provenance accessors (P1U09).
258 // ------------------------------------------------------------------
259
260 /// Returns the monotonic fact-layer epoch stamped on the most recently
261 /// saved or loaded snapshot. Returns `0` for graphs that have not been
262 /// persisted yet or were loaded from V7 snapshots.
263 #[inline]
264 #[must_use]
265 pub fn fact_epoch(&self) -> u64 {
266 self.fact_epoch
267 }
268
269 /// Looks up node provenance by `NodeId`.
270 ///
271 /// Returns `None` if the `NodeId` is out of range, the slot is vacant,
272 /// or the stored generation does not match (stale handle).
273 #[inline]
274 #[must_use]
275 pub fn node_provenance(
276 &self,
277 id: crate::graph::unified::node::id::NodeId,
278 ) -> Option<&NodeProvenance> {
279 self.node_provenance.lookup(id)
280 }
281
282 /// Looks up edge provenance by `EdgeId`.
283 ///
284 /// Returns `None` if the `EdgeId` is out of range, the slot is vacant,
285 /// or the edge is the invalid sentinel.
286 #[inline]
287 #[must_use]
288 pub fn edge_provenance(
289 &self,
290 id: crate::graph::unified::edge::id::EdgeId,
291 ) -> Option<&EdgeProvenance> {
292 self.edge_provenance.lookup(id)
293 }
294
295 /// Returns a borrowed provenance view for a file.
296 ///
297 /// Returns `None` for invalid/unregistered `FileId`s.
298 #[inline]
299 #[must_use]
300 pub fn file_provenance(
301 &self,
302 id: crate::graph::unified::file::id::FileId,
303 ) -> Option<FileProvenanceView<'_>> {
304 self.files.file_provenance(id)
305 }
306
307 // ------------------------------------------------------------------
308 // Phase 2 binding-plane accessors (P2U03).
309 // ------------------------------------------------------------------
310
311 /// Returns a reference to the scope arena derived during Phase 4e.
312 ///
313 /// The arena is empty on freshly-constructed `CodeGraph` instances and is
314 /// populated by calling `phase4e_binding::derive_binding_plane`.
315 #[inline]
316 #[must_use]
317 pub fn scope_arena(&self) -> &ScopeArena {
318 &self.scope_arena
319 }
320
321 /// Installs a freshly-derived scope arena.
322 ///
323 /// Called from `phase4e_binding::derive_binding_plane` during the build
324 /// pipeline. External callers that run Phase 4e manually (e.g., test
325 /// fixture builders) use this to store the result.
326 pub(crate) fn set_scope_arena(&mut self, arena: ScopeArena) {
327 self.scope_arena = Arc::new(arena);
328 }
329
330 /// Returns a reference to the alias table derived during Phase 4e.
331 ///
332 /// The table is empty on freshly-constructed `CodeGraph` instances and is
333 /// populated by calling `phase4e_binding::derive_binding_plane`.
334 #[inline]
335 #[must_use]
336 pub fn alias_table(&self) -> &AliasTable {
337 &self.alias_table
338 }
339
340 /// Installs a freshly-derived alias table.
341 ///
342 /// Called from `phase4e_binding::derive_binding_plane` during the build
343 /// pipeline. External callers that run Phase 4e manually (e.g., test
344 /// fixture builders) use this to store the result.
345 pub(crate) fn set_alias_table(&mut self, table: AliasTable) {
346 self.alias_table = Arc::new(table);
347 }
348
349 /// Returns a reference to the shadow table derived during Phase 4e.
350 ///
351 /// The table is empty on freshly-constructed `CodeGraph` instances and is
352 /// populated by calling `phase4e_binding::derive_binding_plane`.
353 #[inline]
354 #[must_use]
355 pub fn shadow_table(&self) -> &ShadowTable {
356 &self.shadow_table
357 }
358
359 /// Installs a freshly-derived shadow table.
360 ///
361 /// Called from `phase4e_binding::derive_binding_plane` during the build
362 /// pipeline. External callers that run Phase 4e manually (e.g., test
363 /// fixture builders) use this to store the result.
364 pub(crate) fn set_shadow_table(&mut self, table: ShadowTable) {
365 self.shadow_table = Arc::new(table);
366 }
367
368 /// Returns a reference to the scope provenance store derived during Phase 4e.
369 ///
370 /// The store is empty on freshly-constructed `CodeGraph` instances and is
371 /// populated by calling `phase4e_binding::derive_binding_plane`.
372 #[inline]
373 #[must_use]
374 pub fn scope_provenance_store(&self) -> &ScopeProvenanceStore {
375 &self.scope_provenance_store
376 }
377
378 /// Looks up scope provenance by `ScopeId`.
379 ///
380 /// Returns `None` if the slot is out of range, vacant, or the stored
381 /// generation does not match (stale handle).
382 #[inline]
383 #[must_use]
384 pub fn scope_provenance(&self, id: ScopeId) -> Option<&ScopeProvenance> {
385 self.scope_provenance_store.lookup(id)
386 }
387
388 /// Looks up the live `ScopeId` for a stable scope identity.
389 ///
390 /// Returns `None` if no provenance record is registered for that stable id.
391 /// The reverse index is populated by `insert` during Phase 4e and must be
392 /// rebuilt after V9 deserialization.
393 #[inline]
394 #[must_use]
395 pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
396 self.scope_provenance_store.scope_by_stable_id(stable)
397 }
398
399 /// Installs a freshly-derived scope provenance store.
400 ///
401 /// Called from `phase4e_binding::derive_binding_plane` during the build
402 /// pipeline. External callers that run Phase 4e manually (e.g., test
403 /// fixture builders) use this to store the result.
404 pub(crate) fn set_scope_provenance_store(&mut self, store: ScopeProvenanceStore) {
405 self.scope_provenance_store = Arc::new(store);
406 }
407
408 /// Returns a reference to the file segment table.
409 #[inline]
410 #[must_use]
411 pub fn file_segments(&self) -> &FileSegmentTable {
412 &self.file_segments
413 }
414
415 /// Replaces the file segment table.
416 pub(crate) fn set_file_segments(&mut self, table: FileSegmentTable) {
417 self.file_segments = Arc::new(table);
418 }
419
420 /// Returns a mutable reference to the file segment table (via `Arc::make_mut`).
421 pub(crate) fn file_segments_mut(&mut self) -> &mut FileSegmentTable {
422 Arc::make_mut(&mut self.file_segments)
423 }
424
425 /// Test-only helper that records a file segment directly on the
426 /// graph, bypassing the full Phase 3 commit pipeline. Only
427 /// available under `#[cfg(feature = "rebuild-internals")]` so the
428 /// surface is opt-in for rebuild-plane consumers (the feature is
429 /// whitelisted to sqry-daemon + sqry-core integration tests; see
430 /// `sqry-core/tests/rebuild_internals_whitelist.rs`).
431 ///
432 /// Integration tests (notably
433 /// `sqry-core/tests/incremental_remove_file_scale.rs`) call this
434 /// to seed the synthetic workspaces they build before exercising
435 /// `RebuildGraph::remove_file` / `CodeGraph::remove_file`. Production
436 /// code paths never touch this method — Phase 3 parallel commit
437 /// is the sole production writer, via the crate-internal
438 /// `file_segments_mut` accessor above.
439 ///
440 /// Renamed with `test_only_` prefix so the purpose is unambiguous
441 /// at every call site; `#[doc(hidden)]` hides it from rendered
442 /// rustdoc so downstream daemon integrations don't discover it by
443 /// accident.
444 #[cfg(feature = "rebuild-internals")]
445 #[doc(hidden)]
446 pub fn test_only_record_file_segment(
447 &mut self,
448 file_id: FileId,
449 start_slot: u32,
450 slot_count: u32,
451 ) {
452 Arc::make_mut(&mut self.file_segments).record_range(file_id, start_slot, slot_count);
453 }
454
455 /// Sets the provenance stores and fact epoch, typically called by the
456 /// persistence loader after deserializing a V8 snapshot.
457 pub(crate) fn set_provenance(
458 &mut self,
459 node_provenance: NodeProvenanceStore,
460 edge_provenance: EdgeProvenanceStore,
461 fact_epoch: u64,
462 ) {
463 self.node_provenance = Arc::new(node_provenance);
464 self.edge_provenance = Arc::new(edge_provenance);
465 self.fact_epoch = fact_epoch;
466 }
467
468 /// Returns the current epoch.
469 #[inline]
470 #[must_use]
471 pub fn epoch(&self) -> u64 {
472 self.epoch
473 }
474
475 /// Returns a mutable reference to the node arena.
476 ///
477 /// Uses `Arc::make_mut` for copy-on-write semantics: if other
478 /// references exist (e.g., snapshots), the data is cloned.
479 #[inline]
480 pub fn nodes_mut(&mut self) -> &mut NodeArena {
481 Arc::make_mut(&mut self.nodes)
482 }
483
484 /// Returns a mutable reference to the bidirectional edge store.
485 ///
486 /// Uses `Arc::make_mut` for copy-on-write semantics.
487 #[inline]
488 pub fn edges_mut(&mut self) -> &mut BidirectionalEdgeStore {
489 Arc::make_mut(&mut self.edges)
490 }
491
492 /// Returns a mutable reference to the string interner.
493 ///
494 /// Uses `Arc::make_mut` for copy-on-write semantics.
495 #[inline]
496 pub fn strings_mut(&mut self) -> &mut StringInterner {
497 Arc::make_mut(&mut self.strings)
498 }
499
500 /// Returns a mutable reference to the file registry.
501 ///
502 /// Uses `Arc::make_mut` for copy-on-write semantics.
503 #[inline]
504 pub fn files_mut(&mut self) -> &mut FileRegistry {
505 Arc::make_mut(&mut self.files)
506 }
507
508 /// Returns a mutable reference to the auxiliary indices.
509 ///
510 /// Uses `Arc::make_mut` for copy-on-write semantics.
511 #[inline]
512 pub fn indices_mut(&mut self) -> &mut AuxiliaryIndices {
513 Arc::make_mut(&mut self.indices)
514 }
515
516 /// Returns a mutable reference to the macro boundary metadata store.
517 ///
518 /// Uses `Arc::make_mut` for copy-on-write semantics.
519 #[inline]
520 pub fn macro_metadata_mut(&mut self) -> &mut NodeMetadataStore {
521 Arc::make_mut(&mut self.macro_metadata)
522 }
523
524 /// Returns mutable references to both the node arena and the string interner.
525 ///
526 /// This avoids the borrow-conflict that arises when calling `nodes_mut()` and
527 /// `strings_mut()` separately on `&mut self`.
528 #[inline]
529 pub fn nodes_and_strings_mut(&mut self) -> (&mut NodeArena, &mut StringInterner) {
530 (
531 Arc::make_mut(&mut self.nodes),
532 Arc::make_mut(&mut self.strings),
533 )
534 }
535
536 /// Rebuilds auxiliary indices from the current node arena.
537 ///
538 /// This avoids the borrow conflict that arises when calling `nodes()` and
539 /// `indices_mut()` separately on `&mut self`. Uses disjoint field borrowing
540 /// to access `nodes` (shared) and `indices` (mutable) simultaneously.
541 /// Internally calls `AuxiliaryIndices::build_from_arena` which clears
542 /// existing indices and rebuilds in a single pass without per-element
543 /// duplicate checking.
544 ///
545 /// As of Task 4 Step 4 Phase 2 this inherent method delegates to the
546 /// generic
547 /// [`crate::graph::unified::build::parallel_commit::rebuild_indices`]
548 /// free function so the same implementation serves both the
549 /// full-build (`CodeGraph`) and incremental-rebuild (`RebuildGraph`)
550 /// pipelines. Call sites that hold a concrete `CodeGraph` can keep
551 /// using `graph.rebuild_indices()`; incremental rebuild call sites
552 /// should use the free function directly.
553 pub fn rebuild_indices(&mut self) {
554 crate::graph::unified::build::parallel_commit::rebuild_indices(self);
555 }
556
557 /// Increments the epoch counter and returns the new value.
558 ///
559 /// Called automatically by `ConcurrentCodeGraph::write()`.
560 #[inline]
561 pub fn bump_epoch(&mut self) -> u64 {
562 self.epoch = self.epoch.wrapping_add(1);
563 self.epoch
564 }
565
566 /// Sets the epoch to a specific value.
567 ///
568 /// This is primarily for testing or reconstruction from serialized state.
569 #[inline]
570 pub fn set_epoch(&mut self, epoch: u64) {
571 self.epoch = epoch;
572 }
573
574 /// Returns the number of nodes in the graph.
575 ///
576 /// This is a convenience method that delegates to `nodes().len()`.
577 ///
578 /// # Example
579 ///
580 /// ```rust
581 /// use sqry_core::graph::unified::concurrent::CodeGraph;
582 ///
583 /// let graph = CodeGraph::new();
584 /// assert_eq!(graph.node_count(), 0);
585 /// ```
586 #[inline]
587 #[must_use]
588 pub fn node_count(&self) -> usize {
589 self.nodes.len()
590 }
591
592 /// Returns the number of edges in the graph (forward direction).
593 ///
594 /// This counts edges in the forward store, including both CSR and delta edges.
595 ///
596 /// # Example
597 ///
598 /// ```rust
599 /// use sqry_core::graph::unified::concurrent::CodeGraph;
600 ///
601 /// let graph = CodeGraph::new();
602 /// assert_eq!(graph.edge_count(), 0);
603 /// ```
604 #[inline]
605 #[must_use]
606 pub fn edge_count(&self) -> usize {
607 let stats = self.edges.stats();
608 stats.forward.csr_edge_count + stats.forward.delta_edge_count
609 }
610
611 /// Returns true if the graph contains no nodes.
612 ///
613 /// This is a convenience method that delegates to `nodes().is_empty()`.
614 ///
615 /// # Example
616 ///
617 /// ```rust
618 /// use sqry_core::graph::unified::concurrent::CodeGraph;
619 ///
620 /// let graph = CodeGraph::new();
621 /// assert!(graph.is_empty());
622 /// ```
623 #[inline]
624 #[must_use]
625 pub fn is_empty(&self) -> bool {
626 self.nodes.is_empty()
627 }
628
629 /// Returns an iterator over all indexed file paths.
630 ///
631 /// This is useful for enumerating all files that have been processed
632 /// and added to the graph.
633 ///
634 /// # Example
635 ///
636 /// ```rust
637 /// use sqry_core::graph::unified::concurrent::CodeGraph;
638 ///
639 /// let graph = CodeGraph::new();
640 /// for (file_id, path) in graph.indexed_files() {
641 /// println!("File {}: {}", file_id.index(), path.display());
642 /// }
643 /// ```
644 #[inline]
645 pub fn indexed_files(
646 &self,
647 ) -> impl Iterator<Item = (crate::graph::unified::file::FileId, &std::path::Path)> {
648 self.files
649 .iter()
650 .map(|(id, arc_path)| (id, arc_path.as_ref()))
651 }
652
653 /// Returns the set of files that import one or more symbols exported by
654 /// `file_id`, deduplicated and sorted ascending.
655 ///
656 /// For every [`EdgeKind::Imports`] edge whose target node lives in
657 /// `file_id`, the source node's [`FileId`] is added to the result. Files
658 /// are returned sorted by raw index so the result is deterministic across
659 /// runs. The caller's own file is never included — an `Imports` edge
660 /// whose source and target both live in `file_id` is treated as a
661 /// self-import and elided. Edges whose source node is no longer resolvable
662 /// in the arena (tombstoned) are silently skipped.
663 ///
664 /// This is the file-level view of Pass 4 cross-file `Imports` edges, used
665 /// by the incremental rebuild engine to compute reverse-dependency
666 /// closures: "if file X changes its exports, which files need to be
667 /// re-linked?"
668 ///
669 /// # Complexity
670 ///
671 /// `O(|nodes_in_file_id| × avg_incoming_edges_per_node)` amortized. Uses
672 /// [`AuxiliaryIndices::by_file`] for O(1)-amortized per-file node lookup
673 /// (HashMap-backed) and the bidirectional edge store's reverse adjacency;
674 /// no full-graph scan. A final `O(R log R)` sort over the deduplicated
675 /// importer set (where `R` is the importer count) is negligible in
676 /// practice since `R ≤ file_count`.
677 ///
678 /// [`AuxiliaryIndices::by_file`]: crate::graph::unified::storage::indices::AuxiliaryIndices::by_file
679 #[must_use]
680 pub fn reverse_import_index(&self, file_id: FileId) -> Vec<FileId> {
681 let mut importers: HashSet<FileId> = HashSet::new();
682 for &target_node in self.indices.by_file(file_id) {
683 for edge_ref in self.edges.edges_to(target_node) {
684 if !matches!(edge_ref.kind, EdgeKind::Imports { .. }) {
685 continue;
686 }
687 let Some(source_entry) = self.nodes.get(edge_ref.source) else {
688 continue;
689 };
690 let source_file = source_entry.file;
691 if source_file != file_id {
692 importers.insert(source_file);
693 }
694 }
695 }
696 let mut result: Vec<FileId> = importers.into_iter().collect();
697 result.sort();
698 result
699 }
700
701 /// Returns the set of files that hold at least one live inter-file edge
702 /// targeting a node in `file_id`, deduplicated and sorted ascending.
703 ///
704 /// Unlike [`reverse_import_index`](Self::reverse_import_index) — which
705 /// filters to [`EdgeKind::Imports`] only — this helper treats **every**
706 /// cross-file edge as a dependency signal: `Calls`, `References`,
707 /// `TypeOf`, `Inherits`, `Implements`, `FfiCall`, `HttpRequest`,
708 /// `GrpcCall`, `WebAssemblyCall`, `DbQuery`, `TableRead`, `TableWrite`,
709 /// `TriggeredBy`, `MessageQueue`, `WebSocket`, `GraphQLOperation`,
710 /// `ProcessExec`, `FileIpc`, `ProtocolCall`, and any future
711 /// cross-file-capable variant. This is the reverse-dependency surface the
712 /// incremental rebuild engine (Task 4 Step 4 Phase 3e) needs: when
713 /// `file_id` changes, every file whose committed edges point into
714 /// `file_id`'s nodes must re-enter the rebuild closure so its cross-file
715 /// references survive the target-side tombstone-and-reparse cycle.
716 ///
717 /// The caller's own file is never included — an edge whose source and
718 /// target both live in `file_id` is a self-reference and is elided.
719 /// Edges whose source node is no longer resolvable in the arena
720 /// (tombstoned) are silently skipped.
721 ///
722 /// # When to use this vs [`reverse_import_index`](Self::reverse_import_index)
723 ///
724 /// * [`reverse_import_index`](Self::reverse_import_index) remains the
725 /// right surface for consumers that specifically need *import*
726 /// relationships (export surface analysis, module-dependency graphs,
727 /// etc.).
728 /// * `reverse_dependency_index` is the right surface for incremental
729 /// rebuild closure computation. Widening past imports is necessary
730 /// because call sites, type references, trait implementations, FFI
731 /// declarations, HTTP clients, and every other cross-file edge kind
732 /// hold a committed edge into the target file that becomes stale the
733 /// moment `remove_file(target)` tombstones its arena nodes. Leaving
734 /// those files out of the closure leaves the committed edges pointing
735 /// at the stale (pre-tombstone) node IDs — Phase 4c-prime only
736 /// rewrites edges on **re-parsed** files' `PendingEdge` sets, never
737 /// committed edges owned by files outside the reparse scope.
738 ///
739 /// # Complexity
740 ///
741 /// `O(|nodes_in_file_id| × avg_incoming_edges_per_node)` amortized —
742 /// same bound as [`reverse_import_index`](Self::reverse_import_index).
743 /// Uses [`AuxiliaryIndices::by_file`] for O(1)-amortized per-file node
744 /// lookup and the bidirectional edge store's reverse adjacency; no
745 /// full-graph scan. Final `O(R log R)` sort over the deduplicated
746 /// dependent set is negligible since `R ≤ file_count`.
747 ///
748 /// # Over-widening is expected and acceptable
749 ///
750 /// The closure will include every file that references anything in
751 /// `file_id`, not just files whose exports change. In common codebases
752 /// this widens the reparse set modestly (a 10-file change may expand
753 /// to 20–30 dependent files in a medium crate). Correctness requires
754 /// the widening; minimality is a follow-up optimisation if profiling
755 /// demands it.
756 ///
757 /// [`AuxiliaryIndices::by_file`]: crate::graph::unified::storage::indices::AuxiliaryIndices::by_file
758 #[must_use]
759 pub fn reverse_dependency_index(&self, file_id: FileId) -> Vec<FileId> {
760 let mut dependents: HashSet<FileId> = HashSet::new();
761 for &target_node in self.indices.by_file(file_id) {
762 for edge_ref in self.edges.edges_to(target_node) {
763 let Some(source_entry) = self.nodes.get(edge_ref.source) else {
764 continue;
765 };
766 let source_file = source_entry.file;
767 if source_file != file_id {
768 dependents.insert(source_file);
769 }
770 }
771 }
772 let mut result: Vec<FileId> = dependents.into_iter().collect();
773 result.sort();
774 result
775 }
776
777 /// Returns the per-language confidence metadata.
778 ///
779 /// This contains analysis confidence information collected during graph build,
780 /// primarily used by language plugins (e.g., Rust) to track analysis quality.
781 #[inline]
782 #[must_use]
783 pub fn confidence(&self) -> &HashMap<String, ConfidenceMetadata> {
784 &self.confidence
785 }
786
787 /// Merges confidence metadata for a language.
788 ///
789 /// If confidence already exists for the language, this merges the new
790 /// metadata (taking the lower confidence level and combining limitations).
791 /// Otherwise, it inserts the new confidence.
792 pub fn merge_confidence(&mut self, language: &str, metadata: ConfidenceMetadata) {
793 use crate::confidence::ConfidenceLevel;
794
795 self.confidence
796 .entry(language.to_string())
797 .and_modify(|existing| {
798 // Take the lower confidence level (more conservative)
799 let new_level = match (&existing.level, &metadata.level) {
800 (ConfidenceLevel::Verified, other) | (other, ConfidenceLevel::Verified) => {
801 *other
802 }
803 (ConfidenceLevel::Partial, ConfidenceLevel::AstOnly)
804 | (ConfidenceLevel::AstOnly, ConfidenceLevel::Partial) => {
805 ConfidenceLevel::AstOnly
806 }
807 (level, _) => *level,
808 };
809 existing.level = new_level;
810
811 // Merge limitations (deduplicated)
812 for limitation in &metadata.limitations {
813 if !existing.limitations.contains(limitation) {
814 existing.limitations.push(limitation.clone());
815 }
816 }
817
818 // Merge unavailable features (deduplicated)
819 for feature in &metadata.unavailable_features {
820 if !existing.unavailable_features.contains(feature) {
821 existing.unavailable_features.push(feature.clone());
822 }
823 }
824 })
825 .or_insert(metadata);
826 }
827
828 /// Sets the confidence metadata map directly.
829 ///
830 /// This is primarily used when loading a graph from serialized state.
831 pub fn set_confidence(&mut self, confidence: HashMap<String, ConfidenceMetadata>) {
832 self.confidence = confidence;
833 }
834
835 // ------------------------------------------------------------------
836 // Task 4 Step 2 (A2 §F.2) — File-level tombstoning on a CodeGraph.
837 //
838 // Unlike the rebuild pipeline's `RebuildGraph::remove_file`, this
839 // path mutates a live `CodeGraph` in place and is the mechanism
840 // used by the full-rebuild flow when it needs to evict a file's
841 // nodes+edges between compactions. The daemon's incremental
842 // `WorkspaceManager` (Task 6) goes through the rebuild path, which
843 // is why this entry point is `pub(crate)`.
844 // ------------------------------------------------------------------
845
846 /// Tombstone every node that belongs to `file_id`, invalidate every
847 /// edge whose source or target is one of those nodes (across both
848 /// forward and reverse CSR + delta tiers), drop the file's entry
849 /// from the [`FileRegistry`], and return the set of [`NodeId`]s
850 /// that were tombstoned.
851 ///
852 /// This is the §F.2-aware file-removal primitive. Semantically, the
853 /// post-condition matches what a full rebuild of the workspace
854 /// without `file_id` would produce:
855 ///
856 /// * Every [`NodeEntry`] whose [`NodeEntry.file`] was `file_id` has
857 /// been `NodeArena::remove`d, advancing its slot generation so
858 /// stale [`NodeId`] handles do not alias a later re-allocation.
859 /// * Every CSR edge whose source or target slot matches one of the
860 /// tombstoned slot indices has its `csr_tombstones` bit set; the
861 /// read path's merge step already filters tombstoned CSR edges
862 /// out of every query result.
863 /// * Every delta-buffer edge (Add or Remove, any file) whose source
864 /// or target matches a tombstoned slot has been dropped from the
865 /// delta in both directions.
866 /// * The [`AuxiliaryIndices`] (kind / name / qualified-name / file)
867 /// no longer reference any of the tombstoned `NodeId`s.
868 /// * [`NodeMetadataStore`], [`NodeProvenanceStore`], [`ScopeArena`],
869 /// [`AliasTable`], and [`ShadowTable`] have been compacted through
870 /// the [`NodeIdBearing::retain_nodes`] predicate so no
871 /// tombstoned `NodeId` survives in any publish-visible store.
872 /// * [`FileRegistry::per_file_nodes`] no longer holds a bucket for
873 /// `file_id`, the lookup slot is recycled, and
874 /// [`FileRegistry::resolve(file_id)`] returns `None`.
875 ///
876 /// Returns the `Vec<NodeId>` of tombstoned nodes. The returned list
877 /// is useful for downstream housekeeping (e.g., resetting per-file
878 /// caches keyed by `NodeId`) and for tests that need to assert on the
879 /// exact membership of the tombstone set.
880 ///
881 /// # Idempotency
882 ///
883 /// Calling `remove_file` twice with the same `file_id` — or calling
884 /// it for a `file_id` that was never registered — is a safe no-op.
885 /// The returned `Vec<NodeId>` is empty on the second call; no
886 /// arena / edge / index state is mutated (the predicate-based
887 /// compaction of `NodeIdBearing` surfaces short-circuits when the
888 /// dead set is empty).
889 ///
890 /// # Visibility
891 ///
892 /// `pub(crate)` because external callers (Task 6's
893 /// `WorkspaceManager` on the sqry-daemon side) route through
894 /// [`super::super::rebuild::rebuild_graph::RebuildGraph::remove_file`]
895 /// instead. This `CodeGraph`-level variant is used by full-rebuild
896 /// housekeeping paths inside sqry-core and by the Task 4 Step 4
897 /// incremental fallback for cases where the caller already has a
898 /// `&mut CodeGraph` and does not need the clone-and-publish
899 /// round-trip of [`clone_for_rebuild`](Self::clone_for_rebuild) →
900 /// [`finalize`](super::super::rebuild::rebuild_graph::RebuildGraph::finalize).
901 ///
902 /// # Performance
903 ///
904 /// * `O(|tombstoned| + |csr_edges| + |delta_edges|)` amortised.
905 /// The CSR walk is linear in total edge count (not per-file),
906 /// which is the dominant cost; each row check is O(1) via the
907 /// precomputed dead-slot-index `HashSet` in
908 /// [`BidirectionalEdgeStore::tombstone_edges_for_nodes`].
909 /// * Delta filtering is `O(|delta|)` per direction.
910 /// * Auxiliary-index compaction is `O(|tombstoned|)` amortised
911 /// because each of the four indices keys its entries by the
912 /// tombstoned `NodeIds` directly.
913 ///
914 /// [`NodeEntry`]: crate::graph::unified::storage::arena::NodeEntry
915 /// [`NodeEntry.file`]: crate::graph::unified::storage::arena::NodeEntry::file
916 /// [`NodeIdBearing::retain_nodes`]: crate::graph::unified::rebuild::coverage::NodeIdBearing::retain_nodes
917 #[allow(dead_code)] // Consumer is Task 4 Step 4 (`incremental_rebuild`)
918 // and the unit tests below; published in this commit so the §F.2
919 // invariant surface can be reviewed in isolation per the Gate 0c
920 // split contract.
921 pub(crate) fn remove_file(
922 &mut self,
923 file_id: FileId,
924 ) -> Vec<crate::graph::unified::node::NodeId> {
925 use crate::graph::unified::node::NodeId;
926 use crate::graph::unified::rebuild::coverage::NodeIdBearing;
927
928 // Drain the per-file bucket. For a file that was never
929 // registered, this returns an empty Vec — the rest of the
930 // method short-circuits on the `if tombstoned.is_empty()` test
931 // below so we still deregister the file on the off chance the
932 // bucket was empty but the file was registered (defensive; the
933 // common case is the bucket existed iff the file was
934 // registered).
935 let tombstoned: Vec<NodeId> = self.files_mut().take_nodes(file_id);
936 // Always drop the file's path entry + recycle its slot, even
937 // when the bucket was empty, so repeated registrations of the
938 // same path don't resurrect a zombie FileId. `unregister` is
939 // idempotent (returns None for unknown IDs) so the cost is a
940 // single HashMap probe when file_id is already gone.
941 self.files_mut().unregister(file_id);
942 // Clear the file's segment entry unconditionally (idempotent
943 // — `FileSegmentTable::remove` no-ops on unknown ids). This
944 // MUST run before `FileRegistry::unregister` recycles the
945 // FileId slot for reuse, otherwise a later registration of a
946 // different path under the reused FileId would inherit the
947 // previous file's stale node range (see
948 // `sqry-core/src/graph/unified/build/reindex.rs` — which
949 // trusts `file_segments().get(file_id)` to decide which slots
950 // to tombstone). Note: `unregister` above was called first
951 // only to keep the existing bucket-drain ordering; the
952 // segment-clear is order-independent with respect to
953 // `unregister` because neither touches the other's backing
954 // store, and the FileId slot cannot be recycled-and-reissued
955 // across a single `remove_file` call (the registry's slot
956 // recycler is driven by a later `register`, not by
957 // `unregister`).
958 self.file_segments_mut().remove(file_id);
959
960 if tombstoned.is_empty() {
961 return tombstoned;
962 }
963
964 // Dead set keyed on NodeId for NodeIdBearing predicates.
965 // `retain_nodes` uses `HashSet::contains` so membership is O(1).
966 let dead: HashSet<NodeId> = tombstoned.iter().copied().collect();
967
968 // 1. Tombstone the arena slots. `NodeArena::remove` is
969 // idempotent — stale NodeIds that don't match a slot's
970 // current generation are no-ops, which lets this method be
971 // safely re-run on the same file.
972 {
973 let arena = self.nodes_mut();
974 for &nid in &tombstoned {
975 let _ = arena.remove(nid);
976 }
977 }
978
979 // 2. Invalidate edges across both CSR + delta in both
980 // directions. This is the expensive step; the helper uses a
981 // precomputed dead-slot-index set so the CSR walk is linear
982 // in total edge count, not quadratic.
983 self.edges_mut().tombstone_edges_for_nodes(&dead);
984
985 // 3. Compact the auxiliary indices so name/kind/qname/file
986 // lookups do not return tombstoned NodeIds. Using the
987 // NodeIdBearing surface keeps this step in lockstep with the
988 // rebuild pipeline's step 4 — any future publish-visible
989 // NodeId-bearing container added to the K.A/K.B matrix is
990 // automatically swept here too.
991 {
992 let predicate: Box<dyn Fn(NodeId) -> bool + '_> = Box::new(|nid| !dead.contains(&nid));
993 self.indices_mut().retain_nodes(&*predicate);
994 self.macro_metadata_mut().retain_nodes(&*predicate);
995 // The remaining K.A rows (node_provenance, scope_arena,
996 // alias_table, shadow_table) need Arc::make_mut accessors —
997 // they are wrapped in Arc at rest so this is where the
998 // CoW clone happens on demand. Inline the Arc::make_mut
999 // calls here (no public mut accessor exists today because
1000 // the sole writer has been the rebuild path; extend the
1001 // set by adding a similar inline call plus a K.A/K.B row
1002 // in `super::super::rebuild::coverage`).
1003 Arc::make_mut(&mut self.node_provenance).retain_nodes(&*predicate);
1004 Arc::make_mut(&mut self.scope_arena).retain_nodes(&*predicate);
1005 Arc::make_mut(&mut self.alias_table).retain_nodes(&*predicate);
1006 Arc::make_mut(&mut self.shadow_table).retain_nodes(&*predicate);
1007 }
1008
1009 tombstoned
1010 }
1011
1012 // ------------------------------------------------------------------
1013 // Gate 0c (A2 §H, Task 4) — RebuildGraph assembly path.
1014 // ------------------------------------------------------------------
1015
1016 /// Assemble a [`CodeGraph`] from owned rebuild-local parts produced
1017 /// by `RebuildGraph::finalize()` (defined in
1018 /// `super::super::rebuild::rebuild_graph`).
1019 ///
1020 /// This constructor is deliberately `pub(crate)` and named with a
1021 /// leading `__` so it is inaccessible from downstream crates even
1022 /// when the `rebuild-internals` feature is enabled: only code in
1023 /// `sqry-core` itself (specifically `RebuildGraph::finalize`) is
1024 /// permitted to call it. The trybuild fixture
1025 /// `sqry-core/tests/rebuild_internals_compile_fail/rebuild_graph_no_public_assembly.rs`
1026 /// proves there is no other path from `RebuildGraph` to
1027 /// `Arc<CodeGraph>`.
1028 ///
1029 /// The argument order mirrors the `CodeGraph` struct declaration
1030 /// order exactly; the public `clone_for_rebuild` → `finalize`
1031 /// round-trip uses the same macro-driven field enumeration so any
1032 /// new `CodeGraph` field automatically threads through this
1033 /// constructor as well.
1034 #[doc(hidden)]
1035 #[allow(clippy::too_many_arguments)]
1036 #[must_use]
1037 pub(crate) fn __assemble_from_rebuild_parts_internal(
1038 nodes: NodeArena,
1039 edges: BidirectionalEdgeStore,
1040 strings: StringInterner,
1041 files: FileRegistry,
1042 indices: AuxiliaryIndices,
1043 macro_metadata: NodeMetadataStore,
1044 node_provenance: NodeProvenanceStore,
1045 edge_provenance: EdgeProvenanceStore,
1046 fact_epoch: u64,
1047 epoch: u64,
1048 confidence: HashMap<String, ConfidenceMetadata>,
1049 scope_arena: ScopeArena,
1050 alias_table: AliasTable,
1051 shadow_table: ShadowTable,
1052 scope_provenance_store: ScopeProvenanceStore,
1053 file_segments: FileSegmentTable,
1054 ) -> Self {
1055 Self {
1056 nodes: Arc::new(nodes),
1057 edges: Arc::new(edges),
1058 strings: Arc::new(strings),
1059 files: Arc::new(files),
1060 indices: Arc::new(indices),
1061 macro_metadata: Arc::new(macro_metadata),
1062 node_provenance: Arc::new(node_provenance),
1063 edge_provenance: Arc::new(edge_provenance),
1064 fact_epoch,
1065 epoch,
1066 confidence,
1067 scope_arena: Arc::new(scope_arena),
1068 alias_table: Arc::new(alias_table),
1069 shadow_table: Arc::new(shadow_table),
1070 scope_provenance_store: Arc::new(scope_provenance_store),
1071 file_segments: Arc::new(file_segments),
1072 }
1073 }
1074
1075 // ------------------------------------------------------------------
1076 // Gate 0c (A2 §F) — publish-boundary debug invariants.
1077 //
1078 // These checks fire only in debug / test builds; release builds
1079 // compile them out. They are called from
1080 // `RebuildGraph::finalize()` steps 13 and 14 (the single source of
1081 // truth for the residue check, per §F and §H agreement). Gate 0d
1082 // will additionally wire the bijection check into
1083 // `build_and_persist_graph`, `WorkspaceManager::publish_graph`, and
1084 // every §E equivalence-harness run.
1085 // ------------------------------------------------------------------
1086
1087 /// Assert the bijective bucket-membership invariant (A2 §F.1).
1088 ///
1089 /// Four conditions must hold simultaneously:
1090 /// (a) every `NodeId` inside any `per_file_nodes` bucket maps to a
1091 /// live arena slot;
1092 /// (b) every `NodeId` appears in exactly one bucket (no duplicates
1093 /// across buckets, no duplicates within a bucket);
1094 /// (c) the bucket's `FileId` matches the node's own `file` field on
1095 /// `NodeEntry`;
1096 /// (d) when at least one bucket is populated, every live node in
1097 /// the arena is accounted for by some bucket.
1098 ///
1099 /// Condition (d) is guarded on `!seen.is_empty()` so an empty-graph
1100 /// (no recorded buckets) publish boundary is vacuously consistent:
1101 /// legacy V7 snapshots, fresh `CodeGraph::new()` instances, and
1102 /// rebuilds on graphs that predate Gate 0c's parallel-commit
1103 /// bucketing must not panic. Once any bucket is populated, every
1104 /// live arena slot must appear in a bucket.
1105 ///
1106 /// Iter-2 B2 (verbatim): this check used to be documented as
1107 /// "vacuous until future `per_file_nodes` work lands". Pulling
1108 /// base-plan Step 1 into Gate 0c retires that phrasing — the check
1109 /// is non-vacuous the moment parallel-parse commits nodes, which
1110 /// happens on every real build. The `!seen.is_empty()` guard now
1111 /// exists solely for the empty-graph corner case, not as a phased-
1112 /// delivery deferral.
1113 ///
1114 /// The check is a no-op in release builds. Panics with a
1115 /// descriptive message on violation. This is intentional: publish-
1116 /// boundary violations are programmer errors that must surface
1117 /// loudly during CI / test runs.
1118 #[cfg(any(debug_assertions, test))]
1119 pub fn assert_bucket_bijection(&self) {
1120 use std::collections::HashMap as StdHashMap;
1121 // (a) + (b) + (c): every bucketed node is live, unique across
1122 // buckets, and the bucket's FileId matches the node's own file.
1123 let mut seen: StdHashMap<
1124 crate::graph::unified::node::NodeId,
1125 crate::graph::unified::file::FileId,
1126 > = StdHashMap::new();
1127 let mut any_bucket_populated = false;
1128 for (file_id, bucket) in self.files.per_file_nodes_for_gate0d() {
1129 if !bucket.is_empty() {
1130 any_bucket_populated = true;
1131 }
1132 // Local dedup guard within the bucket itself. `retain_nodes_in_buckets`
1133 // dedups during finalize step 6, but we re-check here so the
1134 // invariant covers non-finalize publish paths too (e.g. if a
1135 // future pipeline builds a graph without routing through
1136 // `RebuildGraph::finalize`).
1137 let mut within_bucket: std::collections::HashSet<crate::graph::unified::node::NodeId> =
1138 std::collections::HashSet::new();
1139 for node_id in bucket {
1140 assert!(
1141 within_bucket.insert(node_id),
1142 "assert_bucket_bijection: duplicate node {node_id:?} inside bucket {file_id:?}"
1143 );
1144 assert!(
1145 self.nodes.get(node_id).is_some(),
1146 "assert_bucket_bijection: dead node {node_id:?} in bucket {file_id:?}"
1147 );
1148 let prior = seen.insert(node_id, file_id);
1149 assert!(
1150 prior.is_none(),
1151 "assert_bucket_bijection: node {node_id:?} in multiple buckets: \
1152 prior={prior:?}, current={file_id:?}"
1153 );
1154 if let Some(entry) = self.nodes.get(node_id) {
1155 assert_eq!(
1156 entry.file, file_id,
1157 "assert_bucket_bijection: node {node_id:?} misfiled: in bucket \
1158 {file_id:?}, actual {:?}",
1159 entry.file
1160 );
1161 }
1162 }
1163 }
1164 // (d): every live node is accounted for by `seen` once buckets
1165 // are populated. The guard keeps legacy-empty-graph boundaries
1166 // vacuously consistent (see docs above).
1167 if any_bucket_populated {
1168 for (node_id, _entry) in self.nodes.iter() {
1169 assert!(
1170 seen.contains_key(&node_id),
1171 "assert_bucket_bijection: live node {node_id:?} absent from all buckets"
1172 );
1173 }
1174 }
1175 }
1176
1177 /// Assert the pre-reuse tombstone-residue invariant (A2 §F.2).
1178 ///
1179 /// Iterates every publish-visible NodeId-bearing structure on
1180 /// `self` and panics if any contains a node in `dead`. Called from
1181 /// `RebuildGraph::finalize()` step 14 against the set drained at
1182 /// step 8 — exactly one site per the plan's §F / §H agreement.
1183 ///
1184 /// No-op when `dead` is empty or in release builds.
1185 #[cfg(any(debug_assertions, test))]
1186 pub fn assert_no_tombstone_residue_for(
1187 &self,
1188 dead: &std::collections::HashSet<crate::graph::unified::node::NodeId>,
1189 ) {
1190 use super::super::rebuild::coverage::NodeIdBearing;
1191 if dead.is_empty() {
1192 return;
1193 }
1194 // Every K.A/K.B row must be inspected per §F.2.
1195 for nid in self.nodes.all_node_ids() {
1196 assert!(
1197 !dead.contains(&nid),
1198 "assert_no_tombstone_residue: tombstone {nid:?} still in NodeArena"
1199 );
1200 }
1201 for nid in self.indices.all_node_ids() {
1202 assert!(
1203 !dead.contains(&nid),
1204 "assert_no_tombstone_residue: tombstone {nid:?} still in auxiliary indices"
1205 );
1206 }
1207 for nid in self.edges.all_node_ids() {
1208 assert!(
1209 !dead.contains(&nid),
1210 "assert_no_tombstone_residue: tombstone {nid:?} still in edge store"
1211 );
1212 }
1213 for nid in self.macro_metadata.all_node_ids() {
1214 assert!(
1215 !dead.contains(&nid),
1216 "assert_no_tombstone_residue: tombstone {nid:?} still in macro metadata"
1217 );
1218 }
1219 for nid in self.node_provenance.all_node_ids() {
1220 assert!(
1221 !dead.contains(&nid),
1222 "assert_no_tombstone_residue: tombstone {nid:?} still in node provenance"
1223 );
1224 }
1225 for nid in self.scope_arena.all_node_ids() {
1226 assert!(
1227 !dead.contains(&nid),
1228 "assert_no_tombstone_residue: tombstone {nid:?} still in scope arena"
1229 );
1230 }
1231 for nid in self.alias_table.all_node_ids() {
1232 assert!(
1233 !dead.contains(&nid),
1234 "assert_no_tombstone_residue: tombstone {nid:?} still in alias table"
1235 );
1236 }
1237 for nid in self.shadow_table.all_node_ids() {
1238 assert!(
1239 !dead.contains(&nid),
1240 "assert_no_tombstone_residue: tombstone {nid:?} still in shadow table"
1241 );
1242 }
1243 for nid in self.files.all_node_ids() {
1244 assert!(
1245 !dead.contains(&nid),
1246 "assert_no_tombstone_residue: tombstone {nid:?} still in per-file bucket"
1247 );
1248 }
1249 }
1250}
1251
1252impl Default for CodeGraph {
1253 fn default() -> Self {
1254 Self::new()
1255 }
1256}
1257
1258impl fmt::Debug for CodeGraph {
1259 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1260 f.debug_struct("CodeGraph")
1261 .field("nodes", &self.nodes.len())
1262 .field("epoch", &self.epoch)
1263 .finish_non_exhaustive()
1264 }
1265}
1266
1267impl GraphMemorySize for CodeGraph {
1268 /// Estimates the total heap bytes owned by this `CodeGraph`.
1269 ///
1270 /// Sums heap usage across every component the graph owns: node arena,
1271 /// bidirectional edge store (forward + reverse CSR + tombstones + delta
1272 /// buffer), string interner, file registry, auxiliary indices, sparse
1273 /// macro/classpath metadata, provenance stores, and the per-language
1274 /// confidence map. Used by the `sqryd` daemon's admission controller
1275 /// and workspace retention reaper to enforce memory budgets.
1276 fn heap_bytes(&self) -> usize {
1277 let mut confidence_bytes = self.confidence.capacity()
1278 * (std::mem::size_of::<String>()
1279 + std::mem::size_of::<ConfidenceMetadata>()
1280 + HASHMAP_ENTRY_OVERHEAD);
1281 for (key, meta) in &self.confidence {
1282 confidence_bytes += key.capacity();
1283 // `ConfidenceMetadata.limitations` / `unavailable_features` are
1284 // `Vec<String>`; charge their spill and inner String payloads.
1285 confidence_bytes += meta.limitations.capacity() * std::mem::size_of::<String>();
1286 for s in &meta.limitations {
1287 confidence_bytes += s.capacity();
1288 }
1289 confidence_bytes +=
1290 meta.unavailable_features.capacity() * std::mem::size_of::<String>();
1291 for s in &meta.unavailable_features {
1292 confidence_bytes += s.capacity();
1293 }
1294 }
1295
1296 self.nodes.heap_bytes()
1297 + self.edges.heap_bytes()
1298 + self.strings.heap_bytes()
1299 + self.files.heap_bytes()
1300 + self.indices.heap_bytes()
1301 + self.macro_metadata.heap_bytes()
1302 + self.node_provenance.heap_bytes()
1303 + self.edge_provenance.heap_bytes()
1304 + confidence_bytes
1305 + self.file_segments.capacity()
1306 * std::mem::size_of::<Option<crate::graph::unified::storage::segment::FileSegment>>(
1307 )
1308 }
1309}
1310
1311/// Thread-safe wrapper for `CodeGraph` with epoch versioning.
1312///
1313/// `ConcurrentCodeGraph` provides MVCC-style concurrency:
1314/// - Multiple readers can access the graph simultaneously
1315/// - Only one writer can hold the lock at a time
1316/// - Each write operation increments the epoch for cursor invalidation
1317///
1318/// # Design
1319///
1320/// The wrapper uses `parking_lot::RwLock` for efficient locking:
1321/// - Fair scheduling prevents writer starvation
1322/// - No poisoning (unlike `std::sync::RwLock`)
1323/// - Faster lock/unlock operations
1324///
1325/// # Phase 2 binding-plane access
1326///
1327/// Use the three-line snapshot pattern to access `BindingPlane`:
1328///
1329/// ```rust,ignore
1330/// let read_guard = concurrent.read();
1331/// let snapshot = read_guard.snapshot();
1332/// let plane = snapshot.binding_plane();
1333/// let resolution = plane.resolve(&query);
1334/// ```
1335///
1336/// The explicit snapshot handle makes the MVCC lifetime visible at the call
1337/// site. The full Phase 2 scope/alias/shadow and witness-bearing resolution
1338/// API is exposed through `BindingPlane`.
1339///
1340/// # Usage
1341///
1342/// ```rust
1343/// use sqry_core::graph::unified::concurrent::ConcurrentCodeGraph;
1344///
1345/// let graph = ConcurrentCodeGraph::new();
1346///
1347/// // Read access (multiple readers allowed)
1348/// {
1349/// let guard = graph.read();
1350/// let _nodes = guard.nodes();
1351/// }
1352///
1353/// // Write access (exclusive)
1354/// {
1355/// let mut guard = graph.write();
1356/// let _nodes = guard.nodes_mut();
1357/// }
1358///
1359/// // Snapshot for long queries
1360/// let snapshot = graph.snapshot();
1361/// ```
1362pub struct ConcurrentCodeGraph {
1363 /// The underlying code graph protected by a read-write lock.
1364 inner: RwLock<CodeGraph>,
1365 /// Global epoch counter for cursor validation.
1366 epoch: AtomicU64,
1367}
1368
1369impl ConcurrentCodeGraph {
1370 /// Creates a new empty `ConcurrentCodeGraph`.
1371 #[must_use]
1372 pub fn new() -> Self {
1373 Self {
1374 inner: RwLock::new(CodeGraph::new()),
1375 epoch: AtomicU64::new(0),
1376 }
1377 }
1378
1379 /// Creates a `ConcurrentCodeGraph` from an existing `CodeGraph`.
1380 #[must_use]
1381 pub fn from_graph(graph: CodeGraph) -> Self {
1382 let epoch = graph.epoch();
1383 Self {
1384 inner: RwLock::new(graph),
1385 epoch: AtomicU64::new(epoch),
1386 }
1387 }
1388
1389 /// Acquires a read lock on the graph.
1390 ///
1391 /// Multiple readers can hold the lock simultaneously.
1392 /// This does not increment the epoch.
1393 #[inline]
1394 pub fn read(&self) -> RwLockReadGuard<'_, CodeGraph> {
1395 self.inner.read()
1396 }
1397
1398 /// Acquires a write lock on the graph.
1399 ///
1400 /// Only one writer can hold the lock at a time.
1401 /// This increments the global epoch counter.
1402 #[inline]
1403 pub fn write(&self) -> RwLockWriteGuard<'_, CodeGraph> {
1404 // Increment the global epoch
1405 self.epoch.fetch_add(1, Ordering::SeqCst);
1406 let mut guard = self.inner.write();
1407 // Sync the inner graph's epoch with the global epoch
1408 guard.set_epoch(self.epoch.load(Ordering::SeqCst));
1409 guard
1410 }
1411
1412 /// Returns the current global epoch.
1413 ///
1414 /// This can be used to detect if the graph has been modified
1415 /// since a previous operation (cursor invalidation).
1416 #[inline]
1417 #[must_use]
1418 pub fn epoch(&self) -> u64 {
1419 self.epoch.load(Ordering::SeqCst)
1420 }
1421
1422 /// Creates a cheap snapshot of the graph.
1423 ///
1424 /// This acquires a brief read lock to clone the Arc references.
1425 /// The snapshot is isolated from future mutations.
1426 #[must_use]
1427 pub fn snapshot(&self) -> GraphSnapshot {
1428 self.inner.read().snapshot()
1429 }
1430
1431 // ------------------------------------------------------------------
1432 // Phase 1 fact-layer provenance convenience accessors.
1433 // These acquire a brief read lock, matching the snapshot() pattern.
1434 // ------------------------------------------------------------------
1435
1436 /// Returns the monotonic fact-layer epoch from the underlying graph.
1437 #[must_use]
1438 pub fn fact_epoch(&self) -> u64 {
1439 self.inner.read().fact_epoch()
1440 }
1441
1442 /// Looks up node provenance by `NodeId` (acquires a brief read lock).
1443 #[must_use]
1444 pub fn node_provenance(
1445 &self,
1446 id: crate::graph::unified::node::id::NodeId,
1447 ) -> Option<NodeProvenance> {
1448 self.inner.read().node_provenance(id).copied()
1449 }
1450
1451 /// Looks up edge provenance by `EdgeId` (acquires a brief read lock).
1452 #[must_use]
1453 pub fn edge_provenance(
1454 &self,
1455 id: crate::graph::unified::edge::id::EdgeId,
1456 ) -> Option<EdgeProvenance> {
1457 self.inner.read().edge_provenance(id).copied()
1458 }
1459
1460 /// Returns a file provenance view (acquires a brief read lock).
1461 ///
1462 /// Returns an owned copy since the borrow cannot outlive the lock guard.
1463 #[must_use]
1464 pub fn file_provenance(
1465 &self,
1466 id: crate::graph::unified::file::id::FileId,
1467 ) -> Option<OwnedFileProvenanceView> {
1468 let guard = self.inner.read();
1469 guard.file_provenance(id).map(|v| OwnedFileProvenanceView {
1470 content_hash: *v.content_hash,
1471 indexed_at: v.indexed_at,
1472 source_uri: v.source_uri,
1473 is_external: v.is_external,
1474 })
1475 }
1476
1477 // ------------------------------------------------------------------
1478 // Phase 2 binding-plane accessors (P2U03).
1479 // ------------------------------------------------------------------
1480
1481 /// Returns the scope arena from the underlying graph (acquires a brief
1482 /// read lock).
1483 ///
1484 /// Returns an `Arc` clone so the caller does not hold the lock beyond
1485 /// this call site.
1486 #[must_use]
1487 pub fn scope_arena(&self) -> Arc<ScopeArena> {
1488 Arc::clone(&self.inner.read().scope_arena)
1489 }
1490
1491 /// Returns the alias table from the underlying graph (acquires a brief
1492 /// read lock).
1493 ///
1494 /// Returns an `Arc` clone so the caller does not hold the lock beyond
1495 /// this call site.
1496 #[must_use]
1497 pub fn alias_table(&self) -> Arc<AliasTable> {
1498 Arc::clone(&self.inner.read().alias_table)
1499 }
1500
1501 /// Returns the shadow table from the underlying graph (acquires a brief
1502 /// read lock).
1503 ///
1504 /// Returns an `Arc` clone so the caller does not hold the lock beyond
1505 /// this call site.
1506 #[must_use]
1507 pub fn shadow_table(&self) -> Arc<ShadowTable> {
1508 Arc::clone(&self.inner.read().shadow_table)
1509 }
1510
1511 /// Returns the scope provenance store from the underlying graph (acquires
1512 /// a brief read lock).
1513 ///
1514 /// Returns an `Arc` clone so the caller does not hold the lock beyond
1515 /// this call site.
1516 #[must_use]
1517 pub fn scope_provenance_store(&self) -> Arc<ScopeProvenanceStore> {
1518 Arc::clone(&self.inner.read().scope_provenance_store)
1519 }
1520
1521 /// Looks up scope provenance by `ScopeId` (acquires a brief read lock).
1522 ///
1523 /// Returns an owned copy since the borrow cannot outlive the lock guard.
1524 #[must_use]
1525 pub fn scope_provenance(&self, id: ScopeId) -> Option<ScopeProvenance> {
1526 self.inner.read().scope_provenance(id).cloned()
1527 }
1528
1529 /// Looks up the live `ScopeId` for a stable scope identity (acquires a
1530 /// brief read lock).
1531 ///
1532 /// Returns `None` if no provenance record is registered for that stable id.
1533 #[must_use]
1534 pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
1535 self.inner.read().scope_by_stable_id(stable)
1536 }
1537
1538 /// Returns the file segment table from the underlying graph (acquires a
1539 /// brief read lock).
1540 #[must_use]
1541 pub fn file_segments(&self) -> Arc<FileSegmentTable> {
1542 Arc::clone(&self.inner.read().file_segments)
1543 }
1544
1545 /// Attempts to acquire a read lock without blocking.
1546 ///
1547 /// Returns `None` if the lock is currently held exclusively.
1548 #[inline]
1549 #[must_use]
1550 pub fn try_read(&self) -> Option<RwLockReadGuard<'_, CodeGraph>> {
1551 self.inner.try_read()
1552 }
1553
1554 /// Attempts to acquire a write lock without blocking.
1555 ///
1556 /// Returns `None` if the lock is currently held by another thread.
1557 /// If successful, increments the epoch.
1558 #[inline]
1559 pub fn try_write(&self) -> Option<RwLockWriteGuard<'_, CodeGraph>> {
1560 self.inner.try_write().map(|mut guard| {
1561 self.epoch.fetch_add(1, Ordering::SeqCst);
1562 guard.set_epoch(self.epoch.load(Ordering::SeqCst));
1563 guard
1564 })
1565 }
1566}
1567
1568impl Default for ConcurrentCodeGraph {
1569 fn default() -> Self {
1570 Self::new()
1571 }
1572}
1573
1574impl fmt::Debug for ConcurrentCodeGraph {
1575 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1576 f.debug_struct("ConcurrentCodeGraph")
1577 .field("epoch", &self.epoch.load(Ordering::SeqCst))
1578 .finish_non_exhaustive()
1579 }
1580}
1581
1582/// Owned copy of file provenance, returned by [`ConcurrentCodeGraph::file_provenance`]
1583/// because the borrowed [`FileProvenanceView`] cannot outlive the read-lock guard.
1584#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1585pub struct OwnedFileProvenanceView {
1586 /// SHA-256 of the on-disk file bytes (owned copy).
1587 pub content_hash: [u8; 32],
1588 /// Unix-epoch seconds at which this file was registered.
1589 pub indexed_at: u64,
1590 /// Optional interned physical origin URI.
1591 pub source_uri: Option<StringId>,
1592 /// Whether this file originates from an external source.
1593 pub is_external: bool,
1594}
1595
1596/// Immutable snapshot of a `CodeGraph` for long-running queries.
1597///
1598/// `GraphSnapshot` holds Arc references to the graph components,
1599/// providing a consistent view that is isolated from concurrent mutations.
1600///
1601/// # Design
1602///
1603/// Snapshots are created via `CodeGraph::snapshot()` or
1604/// `ConcurrentCodeGraph::snapshot()`. They are:
1605///
1606/// - **Immutable**: No mutation methods available
1607/// - **Isolated**: Independent of future graph mutations
1608/// - **Cheap**: Only Arc clones, no data copying
1609/// - **Self-contained**: Can outlive the original graph/lock
1610///
1611/// # Usage
1612///
1613/// ```rust
1614/// use sqry_core::graph::unified::concurrent::{ConcurrentCodeGraph, GraphSnapshot};
1615///
1616/// let graph = ConcurrentCodeGraph::new();
1617///
1618/// // Create snapshot for a long query
1619/// let snapshot: GraphSnapshot = graph.snapshot();
1620///
1621/// // Snapshot can be used independently
1622/// let _epoch = snapshot.epoch();
1623/// ```
1624#[derive(Clone)]
1625pub struct GraphSnapshot {
1626 /// Node storage snapshot.
1627 nodes: Arc<NodeArena>,
1628 /// Edge storage snapshot.
1629 edges: Arc<BidirectionalEdgeStore>,
1630 /// String interner snapshot.
1631 strings: Arc<StringInterner>,
1632 /// File registry snapshot.
1633 files: Arc<FileRegistry>,
1634 /// Auxiliary indices snapshot.
1635 indices: Arc<AuxiliaryIndices>,
1636 /// Sparse macro boundary metadata snapshot.
1637 macro_metadata: Arc<NodeMetadataStore>,
1638 /// Dense node provenance snapshot (Phase 1).
1639 node_provenance: Arc<NodeProvenanceStore>,
1640 /// Dense edge provenance snapshot (Phase 1).
1641 edge_provenance: Arc<EdgeProvenanceStore>,
1642 /// Monotonic fact-layer epoch at snapshot time.
1643 fact_epoch: u64,
1644 /// Epoch at snapshot time (for cursor validation).
1645 epoch: u64,
1646 /// Phase 2 binding-plane scope arena snapshot (populated by Phase 4e).
1647 scope_arena: Arc<ScopeArena>,
1648 /// Phase 2 binding-plane alias table snapshot (populated by Phase 4e / P2U04).
1649 alias_table: Arc<AliasTable>,
1650 /// Phase 2 binding-plane shadow table snapshot (populated by Phase 4e / P2U05).
1651 shadow_table: Arc<ShadowTable>,
1652 /// Phase 2 binding-plane scope provenance store snapshot (populated by Phase 4e / P2U11).
1653 scope_provenance_store: Arc<ScopeProvenanceStore>,
1654 /// Phase 3 file segment table snapshot mapping `FileId` to node ranges.
1655 file_segments: Arc<FileSegmentTable>,
1656}
1657
1658impl GraphSnapshot {
1659 /// Returns a reference to the node arena.
1660 #[inline]
1661 #[must_use]
1662 pub fn nodes(&self) -> &NodeArena {
1663 &self.nodes
1664 }
1665
1666 /// Returns a reference to the bidirectional edge store.
1667 #[inline]
1668 #[must_use]
1669 pub fn edges(&self) -> &BidirectionalEdgeStore {
1670 &self.edges
1671 }
1672
1673 /// Returns a reference to the string interner.
1674 #[inline]
1675 #[must_use]
1676 pub fn strings(&self) -> &StringInterner {
1677 &self.strings
1678 }
1679
1680 /// Returns a reference to the file registry.
1681 #[inline]
1682 #[must_use]
1683 pub fn files(&self) -> &FileRegistry {
1684 &self.files
1685 }
1686
1687 /// Returns a reference to the auxiliary indices.
1688 #[inline]
1689 #[must_use]
1690 pub fn indices(&self) -> &AuxiliaryIndices {
1691 &self.indices
1692 }
1693
1694 /// Returns a reference to the macro boundary metadata store.
1695 #[inline]
1696 #[must_use]
1697 pub fn macro_metadata(&self) -> &NodeMetadataStore {
1698 &self.macro_metadata
1699 }
1700
1701 // ------------------------------------------------------------------
1702 // Phase 1 fact-layer provenance accessors (P1U09).
1703 // ------------------------------------------------------------------
1704
1705 /// Returns the monotonic fact-layer epoch.
1706 #[inline]
1707 #[must_use]
1708 pub fn fact_epoch(&self) -> u64 {
1709 self.fact_epoch
1710 }
1711
1712 /// Looks up node provenance by `NodeId`.
1713 #[inline]
1714 #[must_use]
1715 pub fn node_provenance(
1716 &self,
1717 id: crate::graph::unified::node::id::NodeId,
1718 ) -> Option<&NodeProvenance> {
1719 self.node_provenance.lookup(id)
1720 }
1721
1722 /// Looks up edge provenance by `EdgeId`.
1723 #[inline]
1724 #[must_use]
1725 pub fn edge_provenance(
1726 &self,
1727 id: crate::graph::unified::edge::id::EdgeId,
1728 ) -> Option<&EdgeProvenance> {
1729 self.edge_provenance.lookup(id)
1730 }
1731
1732 /// Returns a borrowed provenance view for a file.
1733 #[inline]
1734 #[must_use]
1735 pub fn file_provenance(
1736 &self,
1737 id: crate::graph::unified::file::id::FileId,
1738 ) -> Option<FileProvenanceView<'_>> {
1739 self.files.file_provenance(id)
1740 }
1741
1742 // ------------------------------------------------------------------
1743 // Phase 2 binding-plane accessors (P2U03).
1744 // ------------------------------------------------------------------
1745
1746 /// Returns a reference to the scope arena at snapshot time.
1747 ///
1748 /// Participates in MVCC: the snapshot holds an `Arc` clone of the arena
1749 /// as it existed when `snapshot()` was called. Subsequent calls to
1750 /// `set_scope_arena` on the source `CodeGraph` do not affect this view.
1751 #[inline]
1752 #[must_use]
1753 pub fn scope_arena(&self) -> &ScopeArena {
1754 &self.scope_arena
1755 }
1756
1757 /// Returns a reference to the alias table at snapshot time.
1758 ///
1759 /// Participates in MVCC: the snapshot holds an `Arc` clone of the table
1760 /// as it existed when `snapshot()` was called. Subsequent calls to
1761 /// `set_alias_table` on the source `CodeGraph` do not affect this view.
1762 #[inline]
1763 #[must_use]
1764 pub fn alias_table(&self) -> &AliasTable {
1765 &self.alias_table
1766 }
1767
1768 /// Returns a reference to the shadow table at snapshot time.
1769 ///
1770 /// Participates in MVCC: the snapshot holds an `Arc` clone of the table
1771 /// as it existed when `snapshot()` was called. Subsequent calls to
1772 /// `set_shadow_table` on the source `CodeGraph` do not affect this view.
1773 #[inline]
1774 #[must_use]
1775 pub fn shadow_table(&self) -> &ShadowTable {
1776 &self.shadow_table
1777 }
1778
1779 /// Returns a reference to the scope provenance store at snapshot time.
1780 ///
1781 /// Participates in MVCC: the snapshot holds an `Arc` clone of the store
1782 /// as it existed when `snapshot()` was called. Subsequent calls to
1783 /// `set_scope_provenance_store` on the source `CodeGraph` do not affect
1784 /// this view.
1785 #[inline]
1786 #[must_use]
1787 pub fn scope_provenance_store(&self) -> &ScopeProvenanceStore {
1788 &self.scope_provenance_store
1789 }
1790
1791 /// Looks up scope provenance by `ScopeId` at snapshot time.
1792 ///
1793 /// Returns `None` if the slot is out of range, vacant, or the stored
1794 /// generation does not match (stale handle).
1795 #[inline]
1796 #[must_use]
1797 pub fn scope_provenance(&self, id: ScopeId) -> Option<&ScopeProvenance> {
1798 self.scope_provenance_store.lookup(id)
1799 }
1800
1801 /// Looks up the live `ScopeId` for a stable scope identity at snapshot time.
1802 ///
1803 /// Returns `None` if no provenance record is registered for that stable id.
1804 #[inline]
1805 #[must_use]
1806 pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
1807 self.scope_provenance_store.scope_by_stable_id(stable)
1808 }
1809
1810 /// Returns a reference to the file segment table at snapshot time.
1811 #[inline]
1812 #[must_use]
1813 pub fn file_segments(&self) -> &FileSegmentTable {
1814 &self.file_segments
1815 }
1816
1817 /// Returns the epoch at which this snapshot was taken.
1818 ///
1819 /// This can be compared against the current graph epoch to
1820 /// detect if the graph has changed since the snapshot.
1821 #[inline]
1822 #[must_use]
1823 pub fn epoch(&self) -> u64 {
1824 self.epoch
1825 }
1826
1827 /// Returns `true` if this snapshot's epoch matches the given epoch.
1828 ///
1829 /// Use this to validate cursors before continuing pagination.
1830 #[inline]
1831 #[must_use]
1832 pub fn epoch_matches(&self, other_epoch: u64) -> bool {
1833 self.epoch == other_epoch
1834 }
1835
1836 // ------------------------------------------------------------------
1837 // Phase 2 binding-plane facade accessor (P2U07).
1838 // ------------------------------------------------------------------
1839
1840 /// Returns a [`BindingPlane`] facade borrowing this snapshot's lifetime.
1841 ///
1842 /// The facade is the stable Phase 2 public API for scope/alias/shadow
1843 /// queries and witness-bearing resolution. It provides a single entry
1844 /// point (`resolve`) that returns both a `BindingResult` and an ordered
1845 /// step trace in a `BindingResolution`.
1846 ///
1847 /// # MVCC note
1848 ///
1849 /// `BindingPlane<'_>` borrows from this snapshot, which is already an
1850 /// MVCC-consistent view of the graph at snapshot time. Callers from
1851 /// `CodeGraph` or `ConcurrentCodeGraph` should follow the two-line
1852 /// pattern so the snapshot lifetime is explicit:
1853 ///
1854 /// ```rust,ignore
1855 /// // CodeGraph caller:
1856 /// let snapshot = graph.snapshot();
1857 /// let plane = snapshot.binding_plane();
1858 ///
1859 /// // ConcurrentCodeGraph caller:
1860 /// let read_guard = concurrent.read();
1861 /// let snapshot = read_guard.snapshot();
1862 /// let plane = snapshot.binding_plane();
1863 /// ```
1864 #[inline]
1865 #[must_use]
1866 pub fn binding_plane(&self) -> crate::graph::unified::bind::plane::BindingPlane<'_> {
1867 crate::graph::unified::bind::plane::BindingPlane::new(self)
1868 }
1869
1870 // ============================================================================
1871 // Query Methods
1872 // ============================================================================
1873
1874 /// Finds nodes matching a pattern.
1875 ///
1876 /// Performs a simple substring match on node names and qualified names.
1877 /// Returns all matching node IDs.
1878 ///
1879 /// **Synthetic suppression (`C_SUPPRESS`):** synthetic placeholder
1880 /// nodes — internal scaffolding the language plugins emit for
1881 /// binding-plane and scope analysis (e.g. the Go plugin's
1882 /// `<field:operand.field>` field-access shadows and the
1883 /// `<ident>@<offset>` per-binding-site Variable nodes from the
1884 /// local-scope resolver) — are filtered out by default. Internal
1885 /// callers that need to reach these nodes (binding plane, scope /
1886 /// alias / shadow analysis) use
1887 /// [`Self::find_by_pattern_with_options`] with
1888 /// `include_synthetic = true`.
1889 ///
1890 /// # Performance
1891 ///
1892 /// Optimized to iterate over unique strings in the interner (smaller set)
1893 /// rather than all nodes in the arena.
1894 ///
1895 /// # Arguments
1896 ///
1897 /// * `pattern` - The pattern to match (substring search)
1898 ///
1899 /// # Returns
1900 ///
1901 /// A vector of `NodeIds` for all matching nodes (synthetic
1902 /// placeholders excluded).
1903 #[must_use]
1904 pub fn find_by_pattern(&self, pattern: &str) -> Vec<crate::graph::unified::node::NodeId> {
1905 self.find_by_pattern_with_options(pattern, false)
1906 }
1907
1908 /// Finds nodes matching a pattern with explicit control over synthetic
1909 /// placeholder visibility.
1910 ///
1911 /// `include_synthetic = false` is the default surface used by every
1912 /// user-facing caller (CLI `search`, MCP `semantic_search` /
1913 /// `pattern_search` / `relation_query`, etc.). Synthetic
1914 /// placeholders are suppressed via two parallel checks that must
1915 /// agree:
1916 ///
1917 /// 1. The authoritative `NodeMetadata::Synthetic` bit on the
1918 /// metadata store
1919 /// ([`crate::graph::unified::storage::metadata::NodeMetadataStore::is_synthetic`]).
1920 /// 2. The structural name-shape fallback
1921 /// ([`crate::graph::unified::storage::arena::NodeEntry::is_synthetic_placeholder_name`])
1922 /// for V10 snapshots written before the synthetic bit existed
1923 /// and for cross-file unification losers that retained their
1924 /// name but lost their metadata entry.
1925 ///
1926 /// Either check matching is sufficient to suppress the node. The
1927 /// design lives in `docs/development/public-issue-triage/`
1928 /// under the `C_SUPPRESS` unit; see also the rationale in
1929 /// [`crate::graph::unified::storage::metadata::NodeMetadata::Synthetic`].
1930 ///
1931 /// `include_synthetic = true` is **internal-only**. The binding
1932 /// plane, scope resolver, and rebuild's coverage gate use this
1933 /// path to reach synthetic nodes for their structural integrity
1934 /// checks. **No CLI / MCP surface should ever pass `true`.**
1935 #[must_use]
1936 pub fn find_by_pattern_with_options(
1937 &self,
1938 pattern: &str,
1939 include_synthetic: bool,
1940 ) -> Vec<crate::graph::unified::node::NodeId> {
1941 let mut matches = Vec::new();
1942
1943 // 1. Scan unique strings in interner for matches
1944 for (str_id, s) in self.strings.iter() {
1945 if s.contains(pattern) {
1946 // 2. If string matches, look up all nodes with this name
1947 // Check qualified name index
1948 matches.extend_from_slice(self.indices.by_qualified_name(str_id));
1949 // Check simple name index
1950 matches.extend_from_slice(self.indices.by_name(str_id));
1951 }
1952 }
1953
1954 // Deduplicate matches (a node might match both qualified and simple name)
1955 matches.sort_unstable();
1956 matches.dedup();
1957
1958 if !include_synthetic {
1959 matches.retain(|&node_id| !self.is_node_synthetic(node_id));
1960 }
1961
1962 matches
1963 }
1964
1965 /// Finds nodes whose interned simple **or** qualified name equals `name`,
1966 /// accepting dot- and Ruby-`#` qualified display form as a fallback for
1967 /// graph-canonical `::` qualified names.
1968 ///
1969 /// This is the canonical surface for **exact-name** lookups —
1970 /// shared by the CLI `--exact <pattern>` shorthand
1971 /// (`sqry-cli/src/commands/search.rs::run_regular_search`) and the
1972 /// structural query planner's `name:` predicate
1973 /// (`sqry-db/src/planner/parse.rs`,
1974 /// `sqry-db/src/planner/execute.rs`). Both surfaces are
1975 /// contract-bound (DAG `B1_ALIGN`) to return the same set against
1976 /// any fixture: the CLI calls this method directly, while the
1977 /// planner uses the same interner + by-name index pair internally
1978 /// when scanning, then applies the same synthetic filter.
1979 ///
1980 /// **Synthetic suppression.** Synthetic placeholder nodes
1981 /// (Go-plugin `<field:operand.field>` shadows and
1982 /// `<ident>@<offset>` per-binding-site Variables; see
1983 /// [`Self::find_by_pattern_with_options`] for the full taxonomy)
1984 /// are excluded via [`Self::is_node_synthetic`]. There is **no**
1985 /// `include_synthetic = true` variant for the exact-match surface
1986 /// because the synthetic name shapes the structural fallback
1987 /// recognises (`<…>`, `…@<offset>`) cannot equal a user-typed
1988 /// name byte-for-byte; the metadata-bit channel is the only
1989 /// realistic leak vector and it is suppressed unconditionally.
1990 ///
1991 /// **Display fallback.** Exact lookup checks the literal input. When the
1992 /// input is qualified, language-aware display candidates win over raw
1993 /// canonical matches. This keeps native Rust `identity::T` from also
1994 /// returning a TypeScript type parameter whose internal canonical form is
1995 /// `identity::T` but whose display form is `identity.T`. If neither
1996 /// display nor literal lookup finds candidates, dot- and Ruby-`#`
1997 /// qualified inputs also check the graph-canonical `::` rewrite.
1998 ///
1999 /// # Performance
2000 ///
2001 /// `O(1)` interner lookup + `O(matches)` filter. If `name` is not
2002 /// interned the resolver still tries a native-display to graph-canonical
2003 /// `::` fallback for user-facing qualified names before returning empty.
2004 ///
2005 /// # Arguments
2006 ///
2007 /// * `name` - The exact name to look up (no glob, no regex).
2008 ///
2009 /// # Returns
2010 ///
2011 /// Sorted, deduplicated `NodeId`s for every non-synthetic node
2012 /// whose `entry.name`, `entry.qualified_name`, or language-aware display
2013 /// name equals `name`, or matches for the graph-canonical `::` rewrite
2014 /// when the user supplied a dot- or Ruby-`#` qualified name that had no
2015 /// literal or display candidates.
2016 #[must_use]
2017 pub fn find_by_exact_name(&self, name: &str) -> Vec<crate::graph::unified::node::NodeId> {
2018 let is_qualified = name.contains('.') || name.contains('#') || name.contains("::");
2019 if !is_qualified {
2020 // Bare name lookup: the interned `by_name` index covers
2021 // every language and is O(1); no display scan is needed.
2022 return self.find_by_exact_interned_name(name);
2023 }
2024 // Qualified name: each plugin stores its native form
2025 // (PHP `Ledger.promotedField`, Rust `Ledger::promotedField`,
2026 // Ruby `Ledger#promotedField`). Scan computed display names so
2027 // a single dotted user query resolves to all language matches,
2028 // then fall back to the interned form (and the graph-canonical
2029 // `::` rewrite) when display yields nothing.
2030 let mut exact_matches = self.find_by_exact_display_name(name);
2031 if exact_matches.is_empty() {
2032 exact_matches = self.find_by_exact_interned_name(name);
2033 if exact_matches.is_empty() && !name.contains("::") {
2034 let canonical = name.replace(['.', '#'], "::");
2035 exact_matches.extend(self.find_by_exact_interned_name(&canonical));
2036 exact_matches.sort_unstable();
2037 exact_matches.dedup();
2038 }
2039 }
2040 exact_matches
2041 }
2042
2043 fn find_by_exact_display_name(&self, name: &str) -> Vec<crate::graph::unified::node::NodeId> {
2044 let mut matches = self
2045 .iter_nodes()
2046 .filter(|(node_id, _)| !self.is_node_synthetic(*node_id))
2047 .filter_map(|(node_id, entry)| {
2048 let qualified = entry
2049 .qualified_name
2050 .and_then(|sid| self.strings.resolve(sid))?;
2051 let display = self.files.language_for_file(entry.file).map_or_else(
2052 || qualified.to_string(),
2053 |language| {
2054 display_graph_qualified_name(
2055 language,
2056 qualified.as_ref(),
2057 entry.kind,
2058 entry.is_static,
2059 )
2060 },
2061 );
2062 (display == name).then_some(node_id)
2063 })
2064 .collect::<Vec<_>>();
2065 matches.sort_unstable();
2066 matches.dedup();
2067 matches
2068 }
2069
2070 fn find_by_exact_interned_name(&self, name: &str) -> Vec<crate::graph::unified::node::NodeId> {
2071 let Some(str_id) = self.strings.get(name) else {
2072 return Vec::new();
2073 };
2074
2075 let mut matches: Vec<crate::graph::unified::node::NodeId> = Vec::new();
2076 matches.extend_from_slice(self.indices.by_name(str_id));
2077 matches.extend_from_slice(self.indices.by_qualified_name(str_id));
2078 matches.sort_unstable();
2079 matches.dedup();
2080 matches.retain(|&node_id| !self.is_node_synthetic(node_id));
2081 matches
2082 }
2083
2084 /// Returns `true` if the node should be treated as a synthetic
2085 /// placeholder for user-facing surfaces.
2086 ///
2087 /// Combines the metadata-store flag and the structural name-shape
2088 /// fallback (see [`Self::find_by_pattern_with_options`] for the
2089 /// full rationale). Returns `false` for missing nodes (an unknown
2090 /// `NodeId` is not "synthetic" — it is "not present").
2091 #[must_use]
2092 pub fn is_node_synthetic(&self, node_id: crate::graph::unified::node::NodeId) -> bool {
2093 // Authoritative check: metadata-store bit (NodeMetadata::Synthetic).
2094 if self.macro_metadata.is_synthetic(node_id) {
2095 return true;
2096 }
2097 // Structural fallback: name shape recognised as synthetic.
2098 // Required for V10 snapshots written before the bit existed,
2099 // for unification losers that lost their metadata entry, and
2100 // as defence-in-depth against future plugins forgetting to
2101 // flip the bit.
2102 let Some(entry) = self.nodes.get(node_id) else {
2103 return false;
2104 };
2105 if entry.is_unified_loser() {
2106 // Already invisible for other reasons; do not also flag as synthetic.
2107 return false;
2108 }
2109 let Some(name) = self.strings.resolve(entry.name) else {
2110 return false;
2111 };
2112 crate::graph::unified::storage::arena::NodeEntry::is_synthetic_placeholder_name(
2113 name.as_ref(),
2114 )
2115 }
2116
2117 /// Gets all callees of a node (functions called by this node).
2118 ///
2119 /// Queries the forward edge store for all Calls edges from this node.
2120 ///
2121 /// # Arguments
2122 ///
2123 /// * `node` - The node ID to query
2124 ///
2125 /// # Returns
2126 ///
2127 /// A vector of `NodeIds` representing functions called by this node.
2128 #[must_use]
2129 pub fn get_callees(
2130 &self,
2131 node: crate::graph::unified::node::NodeId,
2132 ) -> Vec<crate::graph::unified::node::NodeId> {
2133 use crate::graph::unified::edge::EdgeKind;
2134
2135 self.edges
2136 .edges_from(node)
2137 .into_iter()
2138 .filter(|edge| matches!(edge.kind, EdgeKind::Calls { .. }))
2139 .map(|edge| edge.target)
2140 .collect()
2141 }
2142
2143 /// Gets all callers of a node (functions that call this node).
2144 ///
2145 /// Queries the reverse edge store for all Calls edges to this node.
2146 ///
2147 /// # Arguments
2148 ///
2149 /// * `node` - The node ID to query
2150 ///
2151 /// # Returns
2152 ///
2153 /// A vector of `NodeIds` representing functions that call this node.
2154 #[must_use]
2155 pub fn get_callers(
2156 &self,
2157 node: crate::graph::unified::node::NodeId,
2158 ) -> Vec<crate::graph::unified::node::NodeId> {
2159 use crate::graph::unified::edge::EdgeKind;
2160
2161 self.edges
2162 .edges_to(node)
2163 .into_iter()
2164 .filter(|edge| matches!(edge.kind, EdgeKind::Calls { .. }))
2165 .map(|edge| edge.source)
2166 .collect()
2167 }
2168
2169 /// Iterates over all nodes in the graph.
2170 ///
2171 /// Returns an iterator yielding (`NodeId`, &`NodeEntry`) pairs for all
2172 /// occupied slots in the arena.
2173 ///
2174 /// # Returns
2175 ///
2176 /// An iterator over (`NodeId`, &`NodeEntry`) pairs.
2177 pub fn iter_nodes(
2178 &self,
2179 ) -> impl Iterator<
2180 Item = (
2181 crate::graph::unified::node::NodeId,
2182 &crate::graph::unified::storage::arena::NodeEntry,
2183 ),
2184 > {
2185 self.nodes.iter()
2186 }
2187
2188 /// Iterates over all edges in the graph.
2189 ///
2190 /// Returns an iterator yielding (source, target, `EdgeKind`) tuples for
2191 /// all edges in the forward edge store.
2192 ///
2193 /// # Returns
2194 ///
2195 /// An iterator over edge tuples.
2196 pub fn iter_edges(
2197 &self,
2198 ) -> impl Iterator<
2199 Item = (
2200 crate::graph::unified::node::NodeId,
2201 crate::graph::unified::node::NodeId,
2202 crate::graph::unified::edge::EdgeKind,
2203 ),
2204 > + '_ {
2205 // Iterate over all nodes in the arena and get their outgoing edges
2206 self.nodes.iter().flat_map(move |(node_id, _entry)| {
2207 // Get all edges from this node
2208 self.edges
2209 .edges_from(node_id)
2210 .into_iter()
2211 .map(move |edge| (node_id, edge.target, edge.kind))
2212 })
2213 }
2214
2215 /// Gets a node entry by ID.
2216 ///
2217 /// Returns a reference to the `NodeEntry` if the ID is valid, or None
2218 /// if the ID is invalid or stale.
2219 ///
2220 /// # Arguments
2221 ///
2222 /// * `id` - The node ID to look up
2223 ///
2224 /// # Returns
2225 ///
2226 /// A reference to the `NodeEntry`, or None if not found.
2227 #[must_use]
2228 pub fn get_node(
2229 &self,
2230 id: crate::graph::unified::node::NodeId,
2231 ) -> Option<&crate::graph::unified::storage::arena::NodeEntry> {
2232 self.nodes.get(id)
2233 }
2234}
2235
2236impl fmt::Debug for GraphSnapshot {
2237 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2238 f.debug_struct("GraphSnapshot")
2239 .field("nodes", &self.nodes.len())
2240 .field("epoch", &self.epoch)
2241 .finish_non_exhaustive()
2242 }
2243}
2244
2245/// Read-only accessor trait shared by [`CodeGraph`] and [`GraphSnapshot`].
2246///
2247/// This lets helpers that only *read* graph state (name-matching, relation
2248/// traversal, reference lookups) be written once and called from both the
2249/// live `CodeGraph` path in `sqry-core::query::executor::graph_eval` and
2250/// the snapshot-based path in `sqry-db::queries::*`. No mutation is exposed.
2251pub trait GraphAccess {
2252 /// Returns the node arena (read-only).
2253 fn nodes(&self) -> &NodeArena;
2254 /// Returns the bidirectional edge store (read-only).
2255 fn edges(&self) -> &BidirectionalEdgeStore;
2256 /// Returns the string interner (read-only).
2257 fn strings(&self) -> &StringInterner;
2258 /// Returns the file registry (read-only).
2259 fn files(&self) -> &FileRegistry;
2260 /// Returns the auxiliary indices (read-only).
2261 fn indices(&self) -> &AuxiliaryIndices;
2262}
2263
2264impl GraphAccess for CodeGraph {
2265 #[inline]
2266 fn nodes(&self) -> &NodeArena {
2267 CodeGraph::nodes(self)
2268 }
2269 #[inline]
2270 fn edges(&self) -> &BidirectionalEdgeStore {
2271 CodeGraph::edges(self)
2272 }
2273 #[inline]
2274 fn strings(&self) -> &StringInterner {
2275 CodeGraph::strings(self)
2276 }
2277 #[inline]
2278 fn files(&self) -> &FileRegistry {
2279 CodeGraph::files(self)
2280 }
2281 #[inline]
2282 fn indices(&self) -> &AuxiliaryIndices {
2283 CodeGraph::indices(self)
2284 }
2285}
2286
2287impl GraphAccess for GraphSnapshot {
2288 #[inline]
2289 fn nodes(&self) -> &NodeArena {
2290 GraphSnapshot::nodes(self)
2291 }
2292 #[inline]
2293 fn edges(&self) -> &BidirectionalEdgeStore {
2294 GraphSnapshot::edges(self)
2295 }
2296 #[inline]
2297 fn strings(&self) -> &StringInterner {
2298 GraphSnapshot::strings(self)
2299 }
2300 #[inline]
2301 fn files(&self) -> &FileRegistry {
2302 GraphSnapshot::files(self)
2303 }
2304 #[inline]
2305 fn indices(&self) -> &AuxiliaryIndices {
2306 GraphSnapshot::indices(self)
2307 }
2308}
2309
2310#[cfg(test)]
2311mod tests {
2312 use super::*;
2313 use crate::graph::unified::{
2314 FileScope, NodeId, ResolutionMode, SymbolCandidateOutcome, SymbolQuery,
2315 SymbolResolutionOutcome,
2316 };
2317
2318 fn resolve_symbol_strict(snapshot: &GraphSnapshot, symbol: &str) -> Option<NodeId> {
2319 match snapshot.resolve_symbol(&SymbolQuery {
2320 symbol,
2321 file_scope: FileScope::Any,
2322 mode: ResolutionMode::Strict,
2323 }) {
2324 SymbolResolutionOutcome::Resolved(node_id) => Some(node_id),
2325 SymbolResolutionOutcome::NotFound
2326 | SymbolResolutionOutcome::FileNotIndexed
2327 | SymbolResolutionOutcome::Ambiguous(_) => None,
2328 }
2329 }
2330
2331 fn candidate_nodes(snapshot: &GraphSnapshot, symbol: &str) -> Vec<NodeId> {
2332 match snapshot.find_symbol_candidates(&SymbolQuery {
2333 symbol,
2334 file_scope: FileScope::Any,
2335 mode: ResolutionMode::AllowSuffixCandidates,
2336 }) {
2337 SymbolCandidateOutcome::Candidates(candidates) => candidates,
2338 SymbolCandidateOutcome::NotFound | SymbolCandidateOutcome::FileNotIndexed => Vec::new(),
2339 }
2340 }
2341
2342 #[test]
2343 fn test_code_graph_new() {
2344 let graph = CodeGraph::new();
2345 assert_eq!(graph.epoch(), 0);
2346 assert_eq!(graph.nodes().len(), 0);
2347 }
2348
2349 #[test]
2350 fn test_code_graph_default() {
2351 let graph = CodeGraph::default();
2352 assert_eq!(graph.epoch(), 0);
2353 }
2354
2355 #[test]
2356 fn test_code_graph_snapshot() {
2357 let graph = CodeGraph::new();
2358 let snapshot = graph.snapshot();
2359 assert_eq!(snapshot.epoch(), 0);
2360 assert_eq!(snapshot.nodes().len(), 0);
2361 }
2362
2363 #[test]
2364 fn test_code_graph_bump_epoch() {
2365 let mut graph = CodeGraph::new();
2366 assert_eq!(graph.epoch(), 0);
2367 assert_eq!(graph.bump_epoch(), 1);
2368 assert_eq!(graph.epoch(), 1);
2369 assert_eq!(graph.bump_epoch(), 2);
2370 assert_eq!(graph.epoch(), 2);
2371 }
2372
2373 #[test]
2374 fn test_code_graph_set_epoch() {
2375 let mut graph = CodeGraph::new();
2376 graph.set_epoch(42);
2377 assert_eq!(graph.epoch(), 42);
2378 }
2379
2380 #[test]
2381 fn test_code_graph_from_components() {
2382 let nodes = NodeArena::new();
2383 let edges = BidirectionalEdgeStore::new();
2384 let strings = StringInterner::new();
2385 let files = FileRegistry::new();
2386 let indices = AuxiliaryIndices::new();
2387 let macro_metadata = NodeMetadataStore::new();
2388
2389 let graph =
2390 CodeGraph::from_components(nodes, edges, strings, files, indices, macro_metadata);
2391 assert_eq!(graph.epoch(), 0);
2392 }
2393
2394 #[test]
2395 fn test_code_graph_mut_accessors() {
2396 let mut graph = CodeGraph::new();
2397
2398 // Access mutable references - should not panic
2399 let _nodes = graph.nodes_mut();
2400 let _edges = graph.edges_mut();
2401 let _strings = graph.strings_mut();
2402 let _files = graph.files_mut();
2403 let _indices = graph.indices_mut();
2404 }
2405
2406 #[test]
2407 fn test_code_graph_snapshot_isolation() {
2408 let mut graph = CodeGraph::new();
2409 let snapshot1 = graph.snapshot();
2410
2411 // Mutate the graph
2412 graph.bump_epoch();
2413
2414 let snapshot2 = graph.snapshot();
2415
2416 // Snapshots should have different epochs
2417 assert_eq!(snapshot1.epoch(), 0);
2418 assert_eq!(snapshot2.epoch(), 1);
2419 }
2420
2421 #[test]
2422 fn test_code_graph_debug() {
2423 let graph = CodeGraph::new();
2424 let debug_str = format!("{graph:?}");
2425 assert!(debug_str.contains("CodeGraph"));
2426 assert!(debug_str.contains("epoch"));
2427 }
2428
2429 /// Exact-byte regression for the iter2 fix of CodeGraph.confidence
2430 /// accounting: adding a `ConfidenceMetadata` entry with known inner
2431 /// Vec<String> capacities must increase `heap_bytes()` by exactly the sum
2432 /// of those capacities plus Vec<String> slot overhead.
2433 #[test]
2434 fn test_codegraph_heap_bytes_counts_confidence_inner_strings() {
2435 let mut graph = CodeGraph::new();
2436 // Seed one confidence entry so the HashMap has non-zero capacity;
2437 // reserve slack so the next insert cannot rehash.
2438 graph.set_confidence({
2439 let mut m = HashMap::with_capacity(8);
2440 m.insert("seed".to_string(), ConfidenceMetadata::default());
2441 m
2442 });
2443 let before = graph.heap_bytes();
2444 let before_cap = graph.confidence.capacity();
2445
2446 let lim1 = String::from("no type inference");
2447 let lim2 = String::from("no generic specialization");
2448 let feat1 = String::from("rust-analyzer");
2449 let l1 = lim1.capacity();
2450 let l2 = lim2.capacity();
2451 let f1 = feat1.capacity();
2452
2453 let limitations = vec![lim1, lim2];
2454 let lim_vec_cap = limitations.capacity();
2455
2456 let unavailable_features = vec![feat1];
2457 let feat_vec_cap = unavailable_features.capacity();
2458
2459 let key = String::from("rust");
2460 let key_cap = key.capacity();
2461 graph.confidence.insert(
2462 key,
2463 ConfidenceMetadata {
2464 limitations,
2465 unavailable_features,
2466 ..Default::default()
2467 },
2468 );
2469 assert_eq!(
2470 graph.confidence.capacity(),
2471 before_cap,
2472 "prerequisite: confidence HashMap must not rehash during the test insert",
2473 );
2474
2475 let after = graph.heap_bytes();
2476 let expected_inner = key_cap
2477 + lim_vec_cap * std::mem::size_of::<String>()
2478 + l1
2479 + l2
2480 + feat_vec_cap * std::mem::size_of::<String>()
2481 + f1;
2482 assert_eq!(
2483 after - before,
2484 expected_inner,
2485 "CodeGraph::heap_bytes must count ConfidenceMetadata inner Vec<String> capacity exactly",
2486 );
2487 }
2488
2489 #[test]
2490 fn test_codegraph_heap_bytes_grows_with_content() {
2491 use crate::graph::unified::node::NodeKind;
2492 use crate::graph::unified::storage::arena::NodeEntry;
2493 use std::path::Path;
2494
2495 // An empty graph reports some heap bytes (FileRegistry seeds index 0
2496 // with `vec![None]`, HashMaps have non-zero base capacity once touched,
2497 // etc.) but the value must be finite and well under the 100 MB cap.
2498 let empty = CodeGraph::new();
2499 let empty_bytes = empty.heap_bytes();
2500 assert!(
2501 empty_bytes < 100 * 1024 * 1024,
2502 "empty graph heap_bytes should be <100 MiB, got {empty_bytes}"
2503 );
2504
2505 let mut graph = CodeGraph::new();
2506 for i in 0..32u32 {
2507 let name = format!("sym_{i}");
2508 let qual = format!("module::sym_{i}");
2509 let file = format!("file_{i}.rs");
2510
2511 let name_id = graph.strings_mut().intern(&name).unwrap();
2512 let qual_id = graph.strings_mut().intern(&qual).unwrap();
2513 let file_id = graph.files_mut().register(Path::new(&file)).unwrap();
2514
2515 let entry =
2516 NodeEntry::new(NodeKind::Function, name_id, file_id).with_qualified_name(qual_id);
2517 let node_id = graph.nodes_mut().alloc(entry).unwrap();
2518 graph
2519 .indices_mut()
2520 .add(node_id, NodeKind::Function, name_id, Some(qual_id), file_id);
2521 }
2522
2523 let populated_bytes = graph.heap_bytes();
2524 assert!(
2525 populated_bytes > 0,
2526 "populated graph should report nonzero heap bytes"
2527 );
2528 assert!(
2529 populated_bytes > empty_bytes,
2530 "populated graph ({populated_bytes}) should exceed empty graph ({empty_bytes})"
2531 );
2532 assert!(
2533 populated_bytes < 100 * 1024 * 1024,
2534 "test graph heap_bytes should be <100 MiB, got {populated_bytes}"
2535 );
2536 }
2537
2538 #[test]
2539 fn test_concurrent_code_graph_new() {
2540 let graph = ConcurrentCodeGraph::new();
2541 assert_eq!(graph.epoch(), 0);
2542 }
2543
2544 #[test]
2545 fn test_concurrent_code_graph_default() {
2546 let graph = ConcurrentCodeGraph::default();
2547 assert_eq!(graph.epoch(), 0);
2548 }
2549
2550 #[test]
2551 fn test_concurrent_code_graph_from_graph() {
2552 let mut inner = CodeGraph::new();
2553 inner.set_epoch(10);
2554 let graph = ConcurrentCodeGraph::from_graph(inner);
2555 assert_eq!(graph.epoch(), 10);
2556 }
2557
2558 #[test]
2559 fn test_concurrent_code_graph_read() {
2560 let graph = ConcurrentCodeGraph::new();
2561 let guard = graph.read();
2562 assert_eq!(guard.epoch(), 0);
2563 assert_eq!(guard.nodes().len(), 0);
2564 }
2565
2566 #[test]
2567 fn test_concurrent_code_graph_write_increments_epoch() {
2568 let graph = ConcurrentCodeGraph::new();
2569 assert_eq!(graph.epoch(), 0);
2570
2571 {
2572 let guard = graph.write();
2573 assert_eq!(guard.epoch(), 1);
2574 }
2575
2576 assert_eq!(graph.epoch(), 1);
2577
2578 {
2579 let _guard = graph.write();
2580 }
2581
2582 assert_eq!(graph.epoch(), 2);
2583 }
2584
2585 #[test]
2586 fn test_concurrent_code_graph_snapshot() {
2587 let graph = ConcurrentCodeGraph::new();
2588
2589 {
2590 let _guard = graph.write();
2591 }
2592
2593 let snapshot = graph.snapshot();
2594 assert_eq!(snapshot.epoch(), 1);
2595 }
2596
2597 #[test]
2598 fn test_concurrent_code_graph_try_read() {
2599 let graph = ConcurrentCodeGraph::new();
2600 let guard = graph.try_read();
2601 assert!(guard.is_some());
2602 }
2603
2604 #[test]
2605 fn test_concurrent_code_graph_try_write() {
2606 let graph = ConcurrentCodeGraph::new();
2607 let guard = graph.try_write();
2608 assert!(guard.is_some());
2609 assert_eq!(graph.epoch(), 1);
2610 }
2611
2612 #[test]
2613 fn test_concurrent_code_graph_debug() {
2614 let graph = ConcurrentCodeGraph::new();
2615 let debug_str = format!("{graph:?}");
2616 assert!(debug_str.contains("ConcurrentCodeGraph"));
2617 assert!(debug_str.contains("epoch"));
2618 }
2619
2620 #[test]
2621 fn test_graph_snapshot_accessors() {
2622 let graph = CodeGraph::new();
2623 let snapshot = graph.snapshot();
2624
2625 // All accessors should work
2626 let _nodes = snapshot.nodes();
2627 let _edges = snapshot.edges();
2628 let _strings = snapshot.strings();
2629 let _files = snapshot.files();
2630 let _indices = snapshot.indices();
2631 let _epoch = snapshot.epoch();
2632 }
2633
2634 #[test]
2635 fn test_graph_snapshot_epoch_matches() {
2636 let graph = CodeGraph::new();
2637 let snapshot = graph.snapshot();
2638
2639 assert!(snapshot.epoch_matches(0));
2640 assert!(!snapshot.epoch_matches(1));
2641 }
2642
2643 #[test]
2644 fn test_graph_snapshot_clone() {
2645 let graph = CodeGraph::new();
2646 let snapshot1 = graph.snapshot();
2647 let snapshot2 = snapshot1.clone();
2648
2649 assert_eq!(snapshot1.epoch(), snapshot2.epoch());
2650 }
2651
2652 #[test]
2653 fn test_graph_snapshot_debug() {
2654 let graph = CodeGraph::new();
2655 let snapshot = graph.snapshot();
2656 let debug_str = format!("{snapshot:?}");
2657 assert!(debug_str.contains("GraphSnapshot"));
2658 assert!(debug_str.contains("epoch"));
2659 }
2660
2661 #[test]
2662 fn test_multiple_readers() {
2663 let graph = ConcurrentCodeGraph::new();
2664
2665 // Multiple readers should be able to acquire locks simultaneously
2666 let guard1 = graph.read();
2667 let guard2 = graph.read();
2668 let guard3 = graph.read();
2669
2670 assert_eq!(guard1.epoch(), 0);
2671 assert_eq!(guard2.epoch(), 0);
2672 assert_eq!(guard3.epoch(), 0);
2673 }
2674
2675 #[test]
2676 fn test_code_graph_clone() {
2677 let mut graph = CodeGraph::new();
2678 graph.bump_epoch();
2679
2680 let cloned = graph.clone();
2681 assert_eq!(cloned.epoch(), 1);
2682 }
2683
2684 #[test]
2685 fn test_epoch_wrapping() {
2686 let mut graph = CodeGraph::new();
2687 graph.set_epoch(u64::MAX);
2688 let new_epoch = graph.bump_epoch();
2689 assert_eq!(new_epoch, 0); // Should wrap around
2690 }
2691
2692 // ============================================================================
2693 // Query method tests
2694 // ============================================================================
2695
2696 #[test]
2697 fn test_snapshot_resolve_symbol() {
2698 use crate::graph::unified::node::NodeKind;
2699 use crate::graph::unified::storage::arena::NodeEntry;
2700 use std::path::Path;
2701
2702 let mut graph = CodeGraph::new();
2703
2704 // Add some nodes with qualified names
2705 let name_id = graph.strings_mut().intern("test_func").unwrap();
2706 let qual_name_id = graph.strings_mut().intern("module::test_func").unwrap();
2707 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
2708
2709 let entry =
2710 NodeEntry::new(NodeKind::Function, name_id, file_id).with_qualified_name(qual_name_id);
2711
2712 let node_id = graph.nodes_mut().alloc(entry).unwrap();
2713 graph.indices_mut().add(
2714 node_id,
2715 NodeKind::Function,
2716 name_id,
2717 Some(qual_name_id),
2718 file_id,
2719 );
2720
2721 let snapshot = graph.snapshot();
2722
2723 // Find by qualified name
2724 let found = resolve_symbol_strict(&snapshot, "module::test_func");
2725 assert_eq!(found, Some(node_id));
2726
2727 // Find by exact simple name
2728 let found2 = resolve_symbol_strict(&snapshot, "test_func");
2729 assert_eq!(found2, Some(node_id));
2730
2731 // Not found
2732 assert!(resolve_symbol_strict(&snapshot, "nonexistent").is_none());
2733 }
2734
2735 #[test]
2736 fn test_snapshot_find_by_pattern() {
2737 use crate::graph::unified::node::NodeKind;
2738 use crate::graph::unified::storage::arena::NodeEntry;
2739 use std::path::Path;
2740
2741 let mut graph = CodeGraph::new();
2742
2743 // Add nodes with different names
2744 let name1 = graph.strings_mut().intern("foo_bar").unwrap();
2745 let name2 = graph.strings_mut().intern("baz_bar").unwrap();
2746 let name3 = graph.strings_mut().intern("qux_test").unwrap();
2747 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
2748
2749 let node1 = graph
2750 .nodes_mut()
2751 .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
2752 .unwrap();
2753 let node2 = graph
2754 .nodes_mut()
2755 .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
2756 .unwrap();
2757 let node3 = graph
2758 .nodes_mut()
2759 .alloc(NodeEntry::new(NodeKind::Function, name3, file_id))
2760 .unwrap();
2761
2762 graph
2763 .indices_mut()
2764 .add(node1, NodeKind::Function, name1, None, file_id);
2765 graph
2766 .indices_mut()
2767 .add(node2, NodeKind::Function, name2, None, file_id);
2768 graph
2769 .indices_mut()
2770 .add(node3, NodeKind::Function, name3, None, file_id);
2771
2772 let snapshot = graph.snapshot();
2773
2774 // Find by pattern
2775 let matches = snapshot.find_by_pattern("bar");
2776 assert_eq!(matches.len(), 2);
2777 assert!(matches.contains(&node1));
2778 assert!(matches.contains(&node2));
2779
2780 // Find single match
2781 let matches = snapshot.find_by_pattern("qux");
2782 assert_eq!(matches.len(), 1);
2783 assert_eq!(matches[0], node3);
2784
2785 // No matches
2786 let matches = snapshot.find_by_pattern("nonexistent");
2787 assert!(matches.is_empty());
2788 }
2789
2790 #[test]
2791 fn synthetic_nodes_are_filtered_from_find_by_pattern_default() {
2792 // C_SUPPRESS: synthetic placeholder nodes (Go plugin
2793 // `<field:operand.field>` shadows + `<ident>@<offset>`
2794 // per-binding-site Variables) must NOT surface from
2795 // find_by_pattern, but must still be reachable via
2796 // find_by_pattern_with_options(_, true).
2797 use crate::graph::unified::node::NodeKind;
2798 use crate::graph::unified::storage::arena::NodeEntry;
2799 use std::path::Path;
2800
2801 let mut graph = CodeGraph::new();
2802 let real_property = graph
2803 .strings_mut()
2804 .intern("main.SelectorSource.NeedTags")
2805 .unwrap();
2806 let real_local_var = graph.strings_mut().intern("NeedTags").unwrap();
2807 let synthetic_field = graph
2808 .strings_mut()
2809 .intern("<field:selector.NeedTags>")
2810 .unwrap();
2811 let synthetic_offset_a = graph.strings_mut().intern("NeedTags@469").unwrap();
2812 let synthetic_offset_b = graph.strings_mut().intern("NeedTags@508").unwrap();
2813 let file_id = graph.files_mut().register(Path::new("main.go")).unwrap();
2814
2815 let prop_id = graph
2816 .nodes_mut()
2817 .alloc(NodeEntry::new(NodeKind::Property, real_property, file_id))
2818 .unwrap();
2819 let local_var_id = graph
2820 .nodes_mut()
2821 .alloc(NodeEntry::new(NodeKind::Variable, real_local_var, file_id))
2822 .unwrap();
2823 let syn_field_id = graph
2824 .nodes_mut()
2825 .alloc(NodeEntry::new(NodeKind::Variable, synthetic_field, file_id))
2826 .unwrap();
2827 let syn_a_id = graph
2828 .nodes_mut()
2829 .alloc(NodeEntry::new(
2830 NodeKind::Variable,
2831 synthetic_offset_a,
2832 file_id,
2833 ))
2834 .unwrap();
2835 let syn_b_id = graph
2836 .nodes_mut()
2837 .alloc(NodeEntry::new(
2838 NodeKind::Variable,
2839 synthetic_offset_b,
2840 file_id,
2841 ))
2842 .unwrap();
2843
2844 graph
2845 .indices_mut()
2846 .add(prop_id, NodeKind::Property, real_property, None, file_id);
2847 graph.indices_mut().add(
2848 local_var_id,
2849 NodeKind::Variable,
2850 real_local_var,
2851 None,
2852 file_id,
2853 );
2854 graph.indices_mut().add(
2855 syn_field_id,
2856 NodeKind::Variable,
2857 synthetic_field,
2858 None,
2859 file_id,
2860 );
2861 graph.indices_mut().add(
2862 syn_a_id,
2863 NodeKind::Variable,
2864 synthetic_offset_a,
2865 None,
2866 file_id,
2867 );
2868 graph.indices_mut().add(
2869 syn_b_id,
2870 NodeKind::Variable,
2871 synthetic_offset_b,
2872 None,
2873 file_id,
2874 );
2875
2876 // Flag two of them via the metadata-store bit (the canonical
2877 // Go-plugin emission path) and leave one (`<field:...>`) only
2878 // covered by the structural name-shape fallback to verify both
2879 // recognition channels suppress the leak.
2880 graph.macro_metadata_mut().mark_synthetic(syn_a_id);
2881 graph.macro_metadata_mut().mark_synthetic(syn_b_id);
2882
2883 let snapshot = graph.snapshot();
2884
2885 // Default surface (CLI `search --exact`, MCP, LSP): no synthetics.
2886 let matches = snapshot.find_by_pattern("NeedTags");
2887 assert!(matches.contains(&prop_id), "Property must be surfaced");
2888 assert!(
2889 matches.contains(&local_var_id),
2890 "real local var must be surfaced"
2891 );
2892 assert!(
2893 !matches.contains(&syn_field_id),
2894 "<field:...> synthetic must be suppressed (name-shape fallback)"
2895 );
2896 assert!(
2897 !matches.contains(&syn_a_id),
2898 "NeedTags@469 must be suppressed (metadata bit)"
2899 );
2900 assert!(
2901 !matches.contains(&syn_b_id),
2902 "NeedTags@508 must be suppressed (metadata bit)"
2903 );
2904 assert_eq!(matches.len(), 2, "exactly Property + local var, no leakage");
2905
2906 // Internal include-synthetic surface (binding plane, scope analysis):
2907 // every node remains reachable.
2908 let all_matches = snapshot.find_by_pattern_with_options("NeedTags", true);
2909 assert_eq!(
2910 all_matches.len(),
2911 5,
2912 "include_synthetic surfaces everything"
2913 );
2914 assert!(all_matches.contains(&prop_id));
2915 assert!(all_matches.contains(&local_var_id));
2916 assert!(all_matches.contains(&syn_field_id));
2917 assert!(all_matches.contains(&syn_a_id));
2918 assert!(all_matches.contains(&syn_b_id));
2919
2920 // is_node_synthetic exposed for surface-level filters
2921 // (e.g., MCP semantic_search/relation_query post-filters).
2922 assert!(snapshot.is_node_synthetic(syn_field_id));
2923 assert!(snapshot.is_node_synthetic(syn_a_id));
2924 assert!(snapshot.is_node_synthetic(syn_b_id));
2925 assert!(!snapshot.is_node_synthetic(prop_id));
2926 assert!(!snapshot.is_node_synthetic(local_var_id));
2927 }
2928
2929 #[test]
2930 #[allow(clippy::too_many_lines)]
2931 fn find_by_exact_name_aligns_with_planner_name_predicate() {
2932 // B1_ALIGN: `find_by_exact_name("NeedTags")` is the canonical
2933 // surface for the CLI `--exact NeedTags` shorthand and the
2934 // planner's `name:NeedTags` predicate. Both paths must return
2935 // the same set against this fixture.
2936 use crate::graph::unified::node::NodeKind;
2937 use crate::graph::unified::storage::arena::NodeEntry;
2938 use std::path::Path;
2939
2940 let mut graph = CodeGraph::new();
2941 // Property nodes carry the package-qualified name as
2942 // `entry.name` (Go plugin convention; see `helper.rs`'s
2943 // `semantic_name_for_node_input`).
2944 let property_qname = graph
2945 .strings_mut()
2946 .intern("main.SelectorSource.NeedTags")
2947 .unwrap();
2948 let local_var_name = graph.strings_mut().intern("NeedTags").unwrap();
2949 let synthetic_field_name = graph
2950 .strings_mut()
2951 .intern("<field:selector.NeedTags>")
2952 .unwrap();
2953 let synthetic_offset_name = graph.strings_mut().intern("NeedTags@469").unwrap();
2954 let unrelated_name = graph.strings_mut().intern("NeedTagsHelper").unwrap();
2955 let display_fallback_name = graph.strings_mut().intern("Other").unwrap();
2956 let display_fallback_qname = graph
2957 .strings_mut()
2958 .intern("main::SelectorSource::Other")
2959 .unwrap();
2960 let file_id = graph.files_mut().register(Path::new("main.go")).unwrap();
2961
2962 let prop_id = graph
2963 .nodes_mut()
2964 .alloc(NodeEntry::new(NodeKind::Property, property_qname, file_id))
2965 .unwrap();
2966 let local_var_id = graph
2967 .nodes_mut()
2968 .alloc(NodeEntry::new(NodeKind::Variable, local_var_name, file_id))
2969 .unwrap();
2970 let syn_field_id = graph
2971 .nodes_mut()
2972 .alloc(NodeEntry::new(
2973 NodeKind::Variable,
2974 synthetic_field_name,
2975 file_id,
2976 ))
2977 .unwrap();
2978 let syn_offset_id = graph
2979 .nodes_mut()
2980 .alloc(NodeEntry::new(
2981 NodeKind::Variable,
2982 synthetic_offset_name,
2983 file_id,
2984 ))
2985 .unwrap();
2986 let unrelated_id = graph
2987 .nodes_mut()
2988 .alloc(NodeEntry::new(NodeKind::Function, unrelated_name, file_id))
2989 .unwrap();
2990 let display_fallback_id = graph
2991 .nodes_mut()
2992 .alloc(
2993 NodeEntry::new(NodeKind::Property, display_fallback_name, file_id)
2994 .with_qualified_name(display_fallback_qname),
2995 )
2996 .unwrap();
2997
2998 graph
2999 .indices_mut()
3000 .add(prop_id, NodeKind::Property, property_qname, None, file_id);
3001 graph.indices_mut().add(
3002 local_var_id,
3003 NodeKind::Variable,
3004 local_var_name,
3005 None,
3006 file_id,
3007 );
3008 graph.indices_mut().add(
3009 syn_field_id,
3010 NodeKind::Variable,
3011 synthetic_field_name,
3012 None,
3013 file_id,
3014 );
3015 graph.indices_mut().add(
3016 syn_offset_id,
3017 NodeKind::Variable,
3018 synthetic_offset_name,
3019 None,
3020 file_id,
3021 );
3022 graph.indices_mut().add(
3023 unrelated_id,
3024 NodeKind::Function,
3025 unrelated_name,
3026 None,
3027 file_id,
3028 );
3029 graph.indices_mut().add(
3030 display_fallback_id,
3031 NodeKind::Property,
3032 display_fallback_name,
3033 Some(display_fallback_qname),
3034 file_id,
3035 );
3036
3037 // Mark the offset-suffixed synthetic via the metadata bit so we
3038 // exercise both recognition channels in this fixture.
3039 graph.macro_metadata_mut().mark_synthetic(syn_offset_id);
3040
3041 let snapshot = graph.snapshot();
3042
3043 // Exact-name lookup on "NeedTags" — should pick up only the
3044 // local variable (its `entry.name` is exactly "NeedTags"); it
3045 // must NOT pick up the Property (qualified name contains but
3046 // does not equal "NeedTags") and must NOT pick up either
3047 // synthetic placeholder.
3048 let exact = snapshot.find_by_exact_name("NeedTags");
3049 assert_eq!(
3050 exact,
3051 vec![local_var_id],
3052 "exact match must be byte-for-byte against entry.name / qualified_name and exclude synthetics"
3053 );
3054
3055 // The Property's full qualified name is exact-addressable.
3056 let qualified = snapshot.find_by_exact_name("main.SelectorSource.NeedTags");
3057 assert_eq!(qualified, vec![prop_id]);
3058
3059 // Dot-qualified display form falls back to graph-canonical `::` only
3060 // when the exact dot string was absent.
3061 let display_fallback = snapshot.find_by_exact_name("main.SelectorSource.Other");
3062 assert_eq!(display_fallback, vec![display_fallback_id]);
3063
3064 // Substring-only matches must not surface from exact lookup.
3065 assert!(
3066 snapshot
3067 .find_by_exact_name("NeedTagsHelper")
3068 .contains(&unrelated_id)
3069 );
3070 assert!(
3071 !snapshot
3072 .find_by_exact_name("NeedTags")
3073 .contains(&unrelated_id),
3074 "exact 'NeedTags' must not match 'NeedTagsHelper'"
3075 );
3076
3077 // Unknown name short-circuits to an empty vec without
3078 // scanning any nodes.
3079 assert!(
3080 snapshot
3081 .find_by_exact_name("ThisStringIsNotInterned")
3082 .is_empty()
3083 );
3084 }
3085
3086 #[test]
3087 fn test_snapshot_get_callees() {
3088 use crate::graph::unified::edge::EdgeKind;
3089 use crate::graph::unified::node::NodeKind;
3090 use crate::graph::unified::storage::arena::NodeEntry;
3091 use std::path::Path;
3092
3093 let mut graph = CodeGraph::new();
3094
3095 // Create caller and callee nodes
3096 let caller_name = graph.strings_mut().intern("caller").unwrap();
3097 let callee1_name = graph.strings_mut().intern("callee1").unwrap();
3098 let callee2_name = graph.strings_mut().intern("callee2").unwrap();
3099 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3100
3101 let caller_id = graph
3102 .nodes_mut()
3103 .alloc(NodeEntry::new(NodeKind::Function, caller_name, file_id))
3104 .unwrap();
3105 let callee1_id = graph
3106 .nodes_mut()
3107 .alloc(NodeEntry::new(NodeKind::Function, callee1_name, file_id))
3108 .unwrap();
3109 let callee2_id = graph
3110 .nodes_mut()
3111 .alloc(NodeEntry::new(NodeKind::Function, callee2_name, file_id))
3112 .unwrap();
3113
3114 // Add call edges
3115 graph.edges_mut().add_edge(
3116 caller_id,
3117 callee1_id,
3118 EdgeKind::Calls {
3119 argument_count: 0,
3120 is_async: false,
3121 },
3122 file_id,
3123 );
3124 graph.edges_mut().add_edge(
3125 caller_id,
3126 callee2_id,
3127 EdgeKind::Calls {
3128 argument_count: 0,
3129 is_async: false,
3130 },
3131 file_id,
3132 );
3133
3134 let snapshot = graph.snapshot();
3135
3136 // Query callees
3137 let callees = snapshot.get_callees(caller_id);
3138 assert_eq!(callees.len(), 2);
3139 assert!(callees.contains(&callee1_id));
3140 assert!(callees.contains(&callee2_id));
3141
3142 // Node with no callees
3143 let callees = snapshot.get_callees(callee1_id);
3144 assert!(callees.is_empty());
3145 }
3146
3147 #[test]
3148 fn test_snapshot_get_callers() {
3149 use crate::graph::unified::edge::EdgeKind;
3150 use crate::graph::unified::node::NodeKind;
3151 use crate::graph::unified::storage::arena::NodeEntry;
3152 use std::path::Path;
3153
3154 let mut graph = CodeGraph::new();
3155
3156 // Create caller and callee nodes
3157 let caller1_name = graph.strings_mut().intern("caller1").unwrap();
3158 let caller2_name = graph.strings_mut().intern("caller2").unwrap();
3159 let callee_name = graph.strings_mut().intern("callee").unwrap();
3160 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3161
3162 let caller1_id = graph
3163 .nodes_mut()
3164 .alloc(NodeEntry::new(NodeKind::Function, caller1_name, file_id))
3165 .unwrap();
3166 let caller2_id = graph
3167 .nodes_mut()
3168 .alloc(NodeEntry::new(NodeKind::Function, caller2_name, file_id))
3169 .unwrap();
3170 let callee_id = graph
3171 .nodes_mut()
3172 .alloc(NodeEntry::new(NodeKind::Function, callee_name, file_id))
3173 .unwrap();
3174
3175 // Add call edges
3176 graph.edges_mut().add_edge(
3177 caller1_id,
3178 callee_id,
3179 EdgeKind::Calls {
3180 argument_count: 0,
3181 is_async: false,
3182 },
3183 file_id,
3184 );
3185 graph.edges_mut().add_edge(
3186 caller2_id,
3187 callee_id,
3188 EdgeKind::Calls {
3189 argument_count: 0,
3190 is_async: false,
3191 },
3192 file_id,
3193 );
3194
3195 let snapshot = graph.snapshot();
3196
3197 // Query callers
3198 let callers = snapshot.get_callers(callee_id);
3199 assert_eq!(callers.len(), 2);
3200 assert!(callers.contains(&caller1_id));
3201 assert!(callers.contains(&caller2_id));
3202
3203 // Node with no callers
3204 let callers = snapshot.get_callers(caller1_id);
3205 assert!(callers.is_empty());
3206 }
3207
3208 #[test]
3209 fn test_snapshot_find_symbol_candidates() {
3210 use crate::graph::unified::node::NodeKind;
3211 use crate::graph::unified::storage::arena::NodeEntry;
3212 use std::path::Path;
3213
3214 let mut graph = CodeGraph::new();
3215
3216 // Add nodes with same symbol name but different qualified names
3217 let symbol_name = graph.strings_mut().intern("test").unwrap();
3218 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3219
3220 let node1 = graph
3221 .nodes_mut()
3222 .alloc(NodeEntry::new(NodeKind::Function, symbol_name, file_id))
3223 .unwrap();
3224 let node2 = graph
3225 .nodes_mut()
3226 .alloc(NodeEntry::new(NodeKind::Method, symbol_name, file_id))
3227 .unwrap();
3228
3229 // Add a different symbol
3230 let other_name = graph.strings_mut().intern("other").unwrap();
3231 let node3 = graph
3232 .nodes_mut()
3233 .alloc(NodeEntry::new(NodeKind::Function, other_name, file_id))
3234 .unwrap();
3235
3236 graph
3237 .indices_mut()
3238 .add(node1, NodeKind::Function, symbol_name, None, file_id);
3239 graph
3240 .indices_mut()
3241 .add(node2, NodeKind::Method, symbol_name, None, file_id);
3242 graph
3243 .indices_mut()
3244 .add(node3, NodeKind::Function, other_name, None, file_id);
3245
3246 let snapshot = graph.snapshot();
3247
3248 // Find by symbol
3249 let matches = candidate_nodes(&snapshot, "test");
3250 assert_eq!(matches.len(), 2);
3251 assert!(matches.contains(&node1));
3252 assert!(matches.contains(&node2));
3253
3254 // Find other symbol
3255 let matches = candidate_nodes(&snapshot, "other");
3256 assert_eq!(matches.len(), 1);
3257 assert_eq!(matches[0], node3);
3258
3259 // No matches
3260 let matches = candidate_nodes(&snapshot, "nonexistent");
3261 assert!(matches.is_empty());
3262 }
3263
3264 #[test]
3265 fn test_snapshot_iter_nodes() {
3266 use crate::graph::unified::node::NodeKind;
3267 use crate::graph::unified::storage::arena::NodeEntry;
3268 use std::path::Path;
3269
3270 let mut graph = CodeGraph::new();
3271
3272 // Add some nodes
3273 let name1 = graph.strings_mut().intern("func1").unwrap();
3274 let name2 = graph.strings_mut().intern("func2").unwrap();
3275 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3276
3277 let node1 = graph
3278 .nodes_mut()
3279 .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
3280 .unwrap();
3281 let node2 = graph
3282 .nodes_mut()
3283 .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
3284 .unwrap();
3285
3286 let snapshot = graph.snapshot();
3287
3288 // Iterate nodes
3289 let snapshot_nodes: Vec<_> = snapshot.iter_nodes().collect();
3290 assert_eq!(snapshot_nodes.len(), 2);
3291
3292 let node_ids: Vec<_> = snapshot_nodes.iter().map(|(id, _)| *id).collect();
3293 assert!(node_ids.contains(&node1));
3294 assert!(node_ids.contains(&node2));
3295 }
3296
3297 #[test]
3298 fn test_snapshot_iter_edges() {
3299 use crate::graph::unified::edge::EdgeKind;
3300 use crate::graph::unified::node::NodeKind;
3301 use crate::graph::unified::storage::arena::NodeEntry;
3302 use std::path::Path;
3303
3304 let mut graph = CodeGraph::new();
3305
3306 // Create nodes
3307 let name1 = graph.strings_mut().intern("func1").unwrap();
3308 let name2 = graph.strings_mut().intern("func2").unwrap();
3309 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3310
3311 let node1 = graph
3312 .nodes_mut()
3313 .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
3314 .unwrap();
3315 let node2 = graph
3316 .nodes_mut()
3317 .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
3318 .unwrap();
3319
3320 // Add edges
3321 graph.edges_mut().add_edge(
3322 node1,
3323 node2,
3324 EdgeKind::Calls {
3325 argument_count: 0,
3326 is_async: false,
3327 },
3328 file_id,
3329 );
3330
3331 let snapshot = graph.snapshot();
3332
3333 // Iterate edges
3334 let edges: Vec<_> = snapshot.iter_edges().collect();
3335 assert_eq!(edges.len(), 1);
3336
3337 let (src, tgt, kind) = &edges[0];
3338 assert_eq!(*src, node1);
3339 assert_eq!(*tgt, node2);
3340 assert!(matches!(
3341 kind,
3342 EdgeKind::Calls {
3343 argument_count: 0,
3344 is_async: false
3345 }
3346 ));
3347 }
3348
3349 #[test]
3350 fn test_snapshot_get_node() {
3351 use crate::graph::unified::node::NodeId;
3352 use crate::graph::unified::node::NodeKind;
3353 use crate::graph::unified::storage::arena::NodeEntry;
3354 use std::path::Path;
3355
3356 let mut graph = CodeGraph::new();
3357
3358 // Add a node
3359 let name = graph.strings_mut().intern("test_func").unwrap();
3360 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3361
3362 let node_id = graph
3363 .nodes_mut()
3364 .alloc(NodeEntry::new(NodeKind::Function, name, file_id))
3365 .unwrap();
3366
3367 let snapshot = graph.snapshot();
3368
3369 // Get node
3370 let entry = snapshot.get_node(node_id);
3371 assert!(entry.is_some());
3372 assert_eq!(entry.unwrap().kind, NodeKind::Function);
3373
3374 // Invalid node
3375 let invalid_id = NodeId::INVALID;
3376 assert!(snapshot.get_node(invalid_id).is_none());
3377 }
3378
3379 #[test]
3380 fn test_snapshot_query_empty_graph() {
3381 use crate::graph::unified::node::NodeId;
3382
3383 let graph = CodeGraph::new();
3384 let snapshot = graph.snapshot();
3385
3386 // All queries should return empty on empty graph
3387 assert!(resolve_symbol_strict(&snapshot, "test").is_none());
3388 assert!(snapshot.find_by_pattern("test").is_empty());
3389 assert!(candidate_nodes(&snapshot, "test").is_empty());
3390
3391 let dummy_id = NodeId::new(0, 1);
3392 assert!(snapshot.get_callees(dummy_id).is_empty());
3393 assert!(snapshot.get_callers(dummy_id).is_empty());
3394
3395 assert_eq!(snapshot.iter_nodes().count(), 0);
3396 assert_eq!(snapshot.iter_edges().count(), 0);
3397 }
3398
3399 #[test]
3400 fn test_snapshot_edge_filtering_by_kind() {
3401 use crate::graph::unified::edge::EdgeKind;
3402 use crate::graph::unified::node::NodeKind;
3403 use crate::graph::unified::storage::arena::NodeEntry;
3404 use std::path::Path;
3405
3406 let mut graph = CodeGraph::new();
3407
3408 // Create nodes
3409 let name1 = graph.strings_mut().intern("func1").unwrap();
3410 let name2 = graph.strings_mut().intern("func2").unwrap();
3411 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3412
3413 let node1 = graph
3414 .nodes_mut()
3415 .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
3416 .unwrap();
3417 let node2 = graph
3418 .nodes_mut()
3419 .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
3420 .unwrap();
3421
3422 // Add different kinds of edges
3423 graph.edges_mut().add_edge(
3424 node1,
3425 node2,
3426 EdgeKind::Calls {
3427 argument_count: 0,
3428 is_async: false,
3429 },
3430 file_id,
3431 );
3432 graph
3433 .edges_mut()
3434 .add_edge(node1, node2, EdgeKind::References, file_id);
3435
3436 let snapshot = graph.snapshot();
3437
3438 // get_callees should only return Calls edges
3439 let callees = snapshot.get_callees(node1);
3440 assert_eq!(callees.len(), 1);
3441 assert_eq!(callees[0], node2);
3442
3443 // iter_edges returns all edges regardless of kind
3444 let edges: Vec<_> = snapshot.iter_edges().collect();
3445 assert_eq!(edges.len(), 2);
3446 }
3447
3448 // -------- reverse_import_index tests --------
3449
3450 /// Helper: build an empty graph with the given file paths registered, a
3451 /// single placeholder node allocated per file, and indices rebuilt so
3452 /// `by_file` returns the per-file node sets. Returns the file IDs in
3453 /// the order passed in and the per-file node ID.
3454 #[cfg(test)]
3455 fn build_import_test_graph(files: &[&str]) -> (CodeGraph, Vec<FileId>, Vec<NodeId>) {
3456 use crate::graph::unified::node::NodeKind;
3457 use crate::graph::unified::storage::arena::NodeEntry;
3458 use std::path::Path;
3459
3460 let mut graph = CodeGraph::new();
3461 let placeholder_name = graph.strings_mut().intern("sym").unwrap();
3462 let mut file_ids = Vec::with_capacity(files.len());
3463 let mut node_ids = Vec::with_capacity(files.len());
3464 for path in files {
3465 let file_id = graph.files_mut().register(Path::new(path)).unwrap();
3466 let node_id = graph
3467 .nodes_mut()
3468 .alloc(NodeEntry::new(
3469 NodeKind::Function,
3470 placeholder_name,
3471 file_id,
3472 ))
3473 .unwrap();
3474 file_ids.push(file_id);
3475 node_ids.push(node_id);
3476 }
3477 graph.rebuild_indices();
3478 (graph, file_ids, node_ids)
3479 }
3480
3481 /// Helper: add an `Imports` edge from `source_node` (in importer file) to
3482 /// `target_node` (in exporter file). The edge is recorded against the
3483 /// importer file so `clear_file` cleanup would behave identically to
3484 /// production Pass 4 writes.
3485 #[cfg(test)]
3486 fn add_import_edge(
3487 graph: &mut CodeGraph,
3488 source_node: NodeId,
3489 target_node: NodeId,
3490 importer_file: FileId,
3491 ) {
3492 graph.edges_mut().add_edge(
3493 source_node,
3494 target_node,
3495 EdgeKind::Imports {
3496 alias: None,
3497 is_wildcard: false,
3498 },
3499 importer_file,
3500 );
3501 }
3502
3503 #[test]
3504 fn reverse_import_index_empty_graph_returns_empty() {
3505 let (graph, files, _) = build_import_test_graph(&["only.rs"]);
3506 assert!(graph.reverse_import_index(files[0]).is_empty());
3507 }
3508
3509 #[test]
3510 fn reverse_import_index_single_importer() {
3511 // File A imports a symbol exported by file B. Reverse index of B
3512 // should return exactly [A]; reverse index of A should be empty.
3513 let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3514 let (a, b) = (files[0], files[1]);
3515 add_import_edge(&mut graph, nodes[0], nodes[1], a);
3516
3517 let importers_of_b = graph.reverse_import_index(b);
3518 assert_eq!(importers_of_b, vec![a]);
3519
3520 let importers_of_a = graph.reverse_import_index(a);
3521 assert!(
3522 importers_of_a.is_empty(),
3523 "A has no inbound Imports edges; reverse index must be empty"
3524 );
3525 }
3526
3527 #[test]
3528 fn reverse_import_index_multiple_importers_deduped_and_sorted() {
3529 // A, B, C all import from D. Reverse index of D must contain each
3530 // importer exactly once, sorted ascending by FileId.
3531 let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs", "c.rs", "d.rs"]);
3532 let (a, b, c, d) = (files[0], files[1], files[2], files[3]);
3533 add_import_edge(&mut graph, nodes[0], nodes[3], a);
3534 add_import_edge(&mut graph, nodes[1], nodes[3], b);
3535 add_import_edge(&mut graph, nodes[2], nodes[3], c);
3536 // Add a duplicate edge from A to D to confirm dedup behavior.
3537 add_import_edge(&mut graph, nodes[0], nodes[3], a);
3538
3539 let importers_of_d = graph.reverse_import_index(d);
3540 assert_eq!(importers_of_d, vec![a, b, c]);
3541 // Sort invariant: Vec is ascending by raw index.
3542 let mut sorted = importers_of_d.clone();
3543 sorted.sort();
3544 assert_eq!(importers_of_d, sorted);
3545 }
3546
3547 #[test]
3548 fn reverse_import_index_filters_non_import_edges() {
3549 // A `Calls` edge from A into B must not contribute to B's reverse
3550 // import index. Only `EdgeKind::Imports` edges count.
3551 let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3552 let (a, b) = (files[0], files[1]);
3553 graph.edges_mut().add_edge(
3554 nodes[0],
3555 nodes[1],
3556 EdgeKind::Calls {
3557 argument_count: 0,
3558 is_async: false,
3559 },
3560 a,
3561 );
3562 graph
3563 .edges_mut()
3564 .add_edge(nodes[0], nodes[1], EdgeKind::References, a);
3565
3566 assert!(
3567 graph.reverse_import_index(b).is_empty(),
3568 "non-Imports edges must not register as importers"
3569 );
3570 }
3571
3572 #[test]
3573 fn reverse_import_index_elides_self_imports() {
3574 // An Imports edge whose source and target are both in the same file
3575 // is a self-import; the caller's own file must not appear in its own
3576 // reverse index.
3577 let (mut graph, files, nodes) = build_import_test_graph(&["a.rs"]);
3578 let a = files[0];
3579 // Add a second node in the same file so we have a distinct source.
3580 let name2 = graph.strings_mut().intern("sym2").unwrap();
3581 let second_in_a = graph
3582 .nodes_mut()
3583 .alloc(crate::graph::unified::storage::arena::NodeEntry::new(
3584 crate::graph::unified::node::NodeKind::Function,
3585 name2,
3586 a,
3587 ))
3588 .unwrap();
3589 graph.rebuild_indices();
3590 add_import_edge(&mut graph, second_in_a, nodes[0], a);
3591
3592 assert!(
3593 graph.reverse_import_index(a).is_empty(),
3594 "self-imports must be elided from reverse index"
3595 );
3596 }
3597
3598 #[test]
3599 fn reverse_import_index_mixed_edge_kinds_counts_only_imports() {
3600 // Two files: A has both Calls and Imports edges into B. Reverse
3601 // index must return exactly [A] — the Calls edge contributes
3602 // nothing.
3603 let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3604 let (a, b) = (files[0], files[1]);
3605 add_import_edge(&mut graph, nodes[0], nodes[1], a);
3606 graph.edges_mut().add_edge(
3607 nodes[0],
3608 nodes[1],
3609 EdgeKind::Calls {
3610 argument_count: 0,
3611 is_async: false,
3612 },
3613 a,
3614 );
3615
3616 assert_eq!(graph.reverse_import_index(b), vec![a]);
3617 }
3618
3619 #[test]
3620 fn reverse_import_index_uninitialized_file_returns_empty() {
3621 // Querying a FileId that is not registered in the graph must return
3622 // an empty Vec, not panic.
3623 let (graph, _, _) = build_import_test_graph(&["a.rs"]);
3624 let bogus = FileId::new(9999);
3625 assert!(
3626 graph.reverse_import_index(bogus).is_empty(),
3627 "unknown FileId must return empty Vec without panicking"
3628 );
3629 }
3630
3631 #[test]
3632 fn reverse_import_index_skips_tombstoned_source_nodes() {
3633 // In the incremental rebuild path the prior graph retains tombstoned
3634 // arena slots for nodes belonging to closure files that have been
3635 // removed but not yet fully compacted. reverse_import_index is
3636 // called on that prior graph to widen the closure, so its
3637 // tombstone guard (`let Some(source_entry) = self.nodes.get(...)`)
3638 // is a semantically important branch, not a theoretical one.
3639 // This test exercises it directly by tombstoning the source node of
3640 // an Imports edge and asserting the edge silently disappears from
3641 // the reverse index.
3642 let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3643 let (a, b) = (files[0], files[1]);
3644 add_import_edge(&mut graph, nodes[0], nodes[1], a);
3645 // Sanity: before tombstoning, A shows up as the importer of B.
3646 assert_eq!(graph.reverse_import_index(b), vec![a]);
3647 // Tombstone the source node via the arena (generation bump). The
3648 // Imports edge still exists in the edge store but its source NodeId
3649 // is now stale; `nodes.get(edge_ref.source)` returns None and the
3650 // guard skips the edge.
3651 let removed = graph.nodes_mut().remove(nodes[0]);
3652 assert!(
3653 removed.is_some(),
3654 "arena.remove must succeed for a live node"
3655 );
3656 assert!(
3657 graph.nodes().get(nodes[0]).is_none(),
3658 "tombstoned lookup must return None"
3659 );
3660 assert!(
3661 graph.reverse_import_index(b).is_empty(),
3662 "Imports edges whose source is tombstoned must be silently skipped"
3663 );
3664 }
3665
3666 // ------------------------------------------------------------------
3667 // Task 4 Step 2 — CodeGraph::remove_file
3668 // ------------------------------------------------------------------
3669
3670 /// Seed a graph with 2 files × `per_file` nodes per file, plus a set
3671 /// of intra- and inter-file edges. Each call site produces a
3672 /// canonical topology so tests below can assert bit-level on edge
3673 /// survival. Returns `(graph, file_a, file_b, file_a_nodes,
3674 /// file_b_nodes)`.
3675 fn seed_two_file_graph(
3676 per_file: usize,
3677 ) -> (
3678 CodeGraph,
3679 crate::graph::unified::file::FileId,
3680 crate::graph::unified::file::FileId,
3681 Vec<NodeId>,
3682 Vec<NodeId>,
3683 ) {
3684 use crate::graph::unified::edge::EdgeKind;
3685 use crate::graph::unified::node::NodeKind;
3686 use crate::graph::unified::storage::arena::NodeEntry;
3687 use std::path::Path;
3688
3689 let mut graph = CodeGraph::new();
3690 let sym = graph.strings_mut().intern("sym").expect("intern");
3691 let file_a = graph
3692 .files_mut()
3693 .register(Path::new("/tmp/remove_file_test/a.rs"))
3694 .expect("register a");
3695 let file_b = graph
3696 .files_mut()
3697 .register(Path::new("/tmp/remove_file_test/b.rs"))
3698 .expect("register b");
3699
3700 let mut file_a_nodes = Vec::with_capacity(per_file);
3701 let mut file_b_nodes = Vec::with_capacity(per_file);
3702
3703 for _ in 0..per_file {
3704 let n = graph
3705 .nodes_mut()
3706 .alloc(NodeEntry::new(NodeKind::Function, sym, file_a))
3707 .expect("alloc a-node");
3708 file_a_nodes.push(n);
3709 graph.files_mut().record_node(file_a, n);
3710 graph
3711 .indices_mut()
3712 .add(n, NodeKind::Function, sym, None, file_a);
3713 }
3714 for _ in 0..per_file {
3715 let n = graph
3716 .nodes_mut()
3717 .alloc(NodeEntry::new(NodeKind::Function, sym, file_b))
3718 .expect("alloc b-node");
3719 file_b_nodes.push(n);
3720 graph.files_mut().record_node(file_b, n);
3721 graph
3722 .indices_mut()
3723 .add(n, NodeKind::Function, sym, None, file_b);
3724 }
3725
3726 // Intra-file edges inside each file: pairwise a[i] -> a[i+1],
3727 // b[i] -> b[i+1]. These are the ones that must die when the
3728 // corresponding file is removed.
3729 for i in 0..per_file.saturating_sub(1) {
3730 graph.edges_mut().add_edge(
3731 file_a_nodes[i],
3732 file_a_nodes[i + 1],
3733 EdgeKind::Calls {
3734 argument_count: 0,
3735 is_async: false,
3736 },
3737 file_a,
3738 );
3739 graph.edges_mut().add_edge(
3740 file_b_nodes[i],
3741 file_b_nodes[i + 1],
3742 EdgeKind::Calls {
3743 argument_count: 0,
3744 is_async: false,
3745 },
3746 file_b,
3747 );
3748 }
3749 // Cross-file edges: a[0] -> b[0], b[0] -> a[0]. Both must die
3750 // when *either* endpoint's file is removed (plan §F.2).
3751 graph.edges_mut().add_edge(
3752 file_a_nodes[0],
3753 file_b_nodes[0],
3754 EdgeKind::Calls {
3755 argument_count: 0,
3756 is_async: false,
3757 },
3758 file_a,
3759 );
3760 graph.edges_mut().add_edge(
3761 file_b_nodes[0],
3762 file_a_nodes[0],
3763 EdgeKind::Calls {
3764 argument_count: 0,
3765 is_async: false,
3766 },
3767 file_b,
3768 );
3769
3770 (graph, file_a, file_b, file_a_nodes, file_b_nodes)
3771 }
3772
3773 #[test]
3774 fn code_graph_remove_file_tombstones_all_per_file_nodes() {
3775 let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
3776
3777 let returned = graph.remove_file(file_a);
3778
3779 // Returned list must equal the original per-file bucket
3780 // membership, deterministically.
3781 let returned_set: std::collections::HashSet<NodeId> = returned.iter().copied().collect();
3782 let expected_set: std::collections::HashSet<NodeId> =
3783 file_a_nodes.iter().copied().collect();
3784 assert_eq!(
3785 returned_set, expected_set,
3786 "remove_file must return exactly the file_a nodes drained from the bucket"
3787 );
3788
3789 // Every returned NodeId is gone from the arena.
3790 for nid in &file_a_nodes {
3791 assert!(
3792 graph.nodes().get(*nid).is_none(),
3793 "node {nid:?} from removed file must be tombstoned in arena"
3794 );
3795 }
3796 }
3797
3798 #[test]
3799 fn code_graph_remove_file_invalidates_all_edges_sourced_or_targeted_at_removed_nodes() {
3800 use crate::graph::unified::edge::EdgeKind;
3801
3802 let (mut graph, file_a, _file_b, file_a_nodes, file_b_nodes) = seed_two_file_graph(3);
3803
3804 // Sanity: the seed has 2 intra-A edges, 2 intra-B edges, plus
3805 // 2 cross-file edges (a0↔b0).
3806 let before_delta = graph.edges().stats().forward.delta_edge_count;
3807 assert_eq!(
3808 before_delta, 6,
3809 "seed must produce 2 intra-A + 2 intra-B + 2 cross edges"
3810 );
3811
3812 let _ = graph.remove_file(file_a);
3813
3814 // Forward delta after removal: all 2 intra-A edges are gone,
3815 // both cross edges are gone, only the 2 intra-B edges remain.
3816 let after_delta_forward = graph.edges().stats().forward.delta_edge_count;
3817 assert_eq!(
3818 after_delta_forward, 2,
3819 "only intra-B forward edges must remain after removing file_a"
3820 );
3821 let after_delta_reverse = graph.edges().stats().reverse.delta_edge_count;
3822 assert_eq!(
3823 after_delta_reverse, 2,
3824 "only intra-B reverse edges must remain after removing file_a"
3825 );
3826
3827 // Cross-file edge from b[0] -> a[0] must no longer be visible
3828 // from either direction, because a[0] is tombstoned.
3829 let b0 = file_b_nodes[0];
3830 let a0 = file_a_nodes[0];
3831 let remaining_from_b0: Vec<_> = graph
3832 .edges()
3833 .edges_from(b0)
3834 .into_iter()
3835 .filter(|e| {
3836 matches!(
3837 e.kind,
3838 EdgeKind::Calls {
3839 argument_count: 0,
3840 is_async: false
3841 }
3842 )
3843 })
3844 .collect();
3845 assert!(
3846 !remaining_from_b0.iter().any(|e| e.target == a0),
3847 "edge b0 -> a0 must be gone after remove_file(file_a)"
3848 );
3849 let remaining_to_a0: Vec<_> = graph.edges().edges_to(a0).into_iter().collect();
3850 assert!(
3851 remaining_to_a0.is_empty(),
3852 "every edge targeting the tombstoned a0 must be gone"
3853 );
3854 }
3855
3856 #[test]
3857 fn code_graph_remove_file_drops_file_registry_entry() {
3858 let (mut graph, file_a, _file_b, _, _) = seed_two_file_graph(2);
3859
3860 assert!(
3861 graph.files().resolve(file_a).is_some(),
3862 "seed registered file_a"
3863 );
3864 assert!(
3865 !graph.files().nodes_for_file(file_a).is_empty(),
3866 "seed populated the file_a bucket"
3867 );
3868
3869 let _ = graph.remove_file(file_a);
3870
3871 assert!(
3872 graph.files().resolve(file_a).is_none(),
3873 "FileRegistry::resolve must return None after remove_file"
3874 );
3875 assert!(
3876 graph.files().nodes_for_file(file_a).is_empty(),
3877 "per-file bucket for file_a must be drained"
3878 );
3879 }
3880
3881 #[test]
3882 fn code_graph_remove_file_is_idempotent_on_unknown_file() {
3883 use crate::graph::unified::file::FileId;
3884 let (mut graph, _file_a, _file_b, _, _) = seed_two_file_graph(2);
3885
3886 // Snapshot state before idempotent no-op.
3887 let nodes_before = graph.nodes().len();
3888 let delta_fwd_before = graph.edges().stats().forward.delta_edge_count;
3889 let delta_rev_before = graph.edges().stats().reverse.delta_edge_count;
3890 let files_before = graph.files().len();
3891
3892 // A FileId that was never registered: the caller may legitimately
3893 // receive a bogus id from stale indexing state. `remove_file`
3894 // must be a silent no-op.
3895 let bogus = FileId::new(9999);
3896 let returned = graph.remove_file(bogus);
3897 assert!(
3898 returned.is_empty(),
3899 "remove_file on unknown FileId must return an empty Vec"
3900 );
3901
3902 assert_eq!(graph.nodes().len(), nodes_before, "arena count unchanged");
3903 assert_eq!(
3904 graph.edges().stats().forward.delta_edge_count,
3905 delta_fwd_before,
3906 "forward delta unchanged"
3907 );
3908 assert_eq!(
3909 graph.edges().stats().reverse.delta_edge_count,
3910 delta_rev_before,
3911 "reverse delta unchanged"
3912 );
3913 assert_eq!(graph.files().len(), files_before, "file count unchanged");
3914 }
3915
3916 #[test]
3917 fn code_graph_remove_file_clears_file_segments_entry() {
3918 // Iter-1 Codex review fix: a file's `FileSegmentTable` entry
3919 // must be cleared on `remove_file`. Without this, a later
3920 // `FileId` recycle (via `FileRegistry::free_list`) would
3921 // inherit the previous file's stale node-range and
3922 // `reindex_files` (`build/reindex.rs`) would tombstone the
3923 // wrong slots. This unit test seeds a segment entry, removes
3924 // the file, and asserts the entry is gone.
3925 use crate::graph::unified::storage::segment::FileSegmentTable;
3926
3927 let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
3928
3929 // Seed a segment for file A. Production code sets this via
3930 // Phase 3 parallel commit; here we go through the crate-
3931 // internal accessor because we are a unit test inside the
3932 // crate. (The feature-gated `test_only_record_file_segment`
3933 // public helper exists for the integration tests in
3934 // `sqry-core/tests/incremental_remove_file_scale.rs`.)
3935 let first_index = file_a_nodes
3936 .iter()
3937 .map(|n| n.index())
3938 .min()
3939 .expect("per_file = 3");
3940 let last_index = file_a_nodes
3941 .iter()
3942 .map(|n| n.index())
3943 .max()
3944 .expect("per_file = 3");
3945 let slot_count = last_index - first_index + 1;
3946 let table: &mut FileSegmentTable = graph.file_segments_mut();
3947 table.record_range(file_a, first_index, slot_count);
3948 assert!(
3949 graph.file_segments().get(file_a).is_some(),
3950 "seed must install a segment for file_a before remove_file"
3951 );
3952
3953 // Remove the file.
3954 let _ = graph.remove_file(file_a);
3955
3956 // The segment entry must be gone. `FileSegmentTable::remove`
3957 // is idempotent (a no-op on unknown ids), so this assertion
3958 // holds whether or not the seeded range was contiguous.
3959 assert!(
3960 graph.file_segments().get(file_a).is_none(),
3961 "remove_file must clear the FileSegmentTable entry for file_a"
3962 );
3963 }
3964
3965 #[test]
3966 fn code_graph_remove_file_repeated_calls_are_idempotent() {
3967 let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
3968
3969 // First call does the work.
3970 let first = graph.remove_file(file_a);
3971 assert_eq!(first.len(), file_a_nodes.len());
3972
3973 // Snapshot post-first-call state.
3974 let nodes_after = graph.nodes().len();
3975 let delta_fwd_after = graph.edges().stats().forward.delta_edge_count;
3976 let delta_rev_after = graph.edges().stats().reverse.delta_edge_count;
3977 let files_after = graph.files().len();
3978
3979 // Second call must be a silent no-op — the bucket is empty,
3980 // the file is unregistered, and the arena slots are already
3981 // tombstoned (NodeArena::remove ignores stale generations).
3982 let second = graph.remove_file(file_a);
3983 assert!(
3984 second.is_empty(),
3985 "second remove_file on the same file must return an empty Vec"
3986 );
3987
3988 assert_eq!(graph.nodes().len(), nodes_after);
3989 assert_eq!(
3990 graph.edges().stats().forward.delta_edge_count,
3991 delta_fwd_after
3992 );
3993 assert_eq!(
3994 graph.edges().stats().reverse.delta_edge_count,
3995 delta_rev_after
3996 );
3997 assert_eq!(graph.files().len(), files_after);
3998 }
3999}