sqry_core/graph/unified/concurrent/graph.rs
1//! `CodeGraph` and `ConcurrentCodeGraph` implementations.
2//!
3//! This module provides the core graph types with thread-safe access:
4//!
5//! - [`CodeGraph`]: Arc-wrapped internals for O(1) `CoW` snapshots
6//! - [`ConcurrentCodeGraph`]: `RwLock` wrapper with epoch versioning
7//! - [`GraphSnapshot`]: Immutable snapshot for long-running queries
8
9use std::collections::{HashMap, HashSet};
10use std::fmt;
11use std::sync::Arc;
12use std::sync::atomic::{AtomicU64, Ordering};
13
14use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard};
15
16use crate::confidence::ConfidenceMetadata;
17use crate::graph::unified::bind::alias::AliasTable;
18use crate::graph::unified::bind::scope::provenance::{
19 ScopeProvenance, ScopeProvenanceStore, ScopeStableId,
20};
21use crate::graph::unified::bind::scope::{ScopeArena, ScopeId};
22use crate::graph::unified::bind::shadow::ShadowTable;
23use crate::graph::unified::edge::EdgeKind;
24use crate::graph::unified::edge::bidirectional::BidirectionalEdgeStore;
25use crate::graph::unified::file::FileId;
26use crate::graph::unified::memory::{GraphMemorySize, HASHMAP_ENTRY_OVERHEAD};
27use crate::graph::unified::storage::arena::NodeArena;
28use crate::graph::unified::storage::edge_provenance::{EdgeProvenance, EdgeProvenanceStore};
29use crate::graph::unified::storage::indices::AuxiliaryIndices;
30use crate::graph::unified::storage::interner::StringInterner;
31use crate::graph::unified::storage::metadata::NodeMetadataStore;
32use crate::graph::unified::storage::node_provenance::{NodeProvenance, NodeProvenanceStore};
33use crate::graph::unified::storage::registry::{FileProvenanceView, FileRegistry};
34use crate::graph::unified::storage::segment::FileSegmentTable;
35use crate::graph::unified::string::id::StringId;
36
37/// Core graph with Arc-wrapped internals for O(1) `CoW` snapshots.
38///
39/// `CodeGraph` uses `Arc` for all internal components, enabling:
40/// - O(1) snapshot creation via Arc cloning
41/// - Copy-on-write semantics via `Arc::make_mut`
42/// - Memory-efficient sharing between snapshots
43///
44/// # Design
45///
46/// The Arc wrapping enables the MVCC pattern:
47/// - Readers see a consistent snapshot at the time they acquired access
48/// - Writers use `Arc::make_mut` to get exclusive copies only when mutating
49/// - Multiple snapshots can coexist without copying data
50///
51/// # Performance
52///
53/// - Snapshot creation: O(5) Arc clones ≈ <1μs
54/// - Read access: Direct Arc dereference, no locking
55/// - Write access: `Arc::make_mut` clones only if refcount > 1
56///
57/// # Phase 2 binding-plane access
58///
59/// Use the two-line snapshot pattern to access `BindingPlane`:
60///
61/// ```rust,ignore
62/// let snapshot = graph.snapshot();
63/// let plane = snapshot.binding_plane();
64/// let resolution = plane.resolve(&query);
65/// ```
66///
67/// The two-line form is intentional: `BindingPlane<'g>` borrows from
68/// `GraphSnapshot` and the explicit snapshot handle makes the MVCC lifetime
69/// visible at the call site. The full Phase 2 scope/alias/shadow and
70/// witness-bearing resolution API is exposed through `BindingPlane`.
71// Field visibility is `pub(crate)` so the Gate 0c `rebuild_graph` module
72// (A2 §H) can destructure `CodeGraph` exhaustively in `clone_for_rebuild`.
73// External crates still go through the public accessor methods below.
74#[derive(Clone)]
75pub struct CodeGraph {
76 /// Node storage with generational indices.
77 pub(crate) nodes: Arc<NodeArena>,
78 /// Bidirectional edge storage (forward + reverse).
79 pub(crate) edges: Arc<BidirectionalEdgeStore>,
80 /// String interner for symbol names.
81 pub(crate) strings: Arc<StringInterner>,
82 /// File registry for path deduplication.
83 pub(crate) files: Arc<FileRegistry>,
84 /// Auxiliary indices for fast lookup.
85 pub(crate) indices: Arc<AuxiliaryIndices>,
86 /// Sparse macro boundary metadata (keyed by full `NodeId`).
87 pub(crate) macro_metadata: Arc<NodeMetadataStore>,
88 /// Dense node provenance (Phase 1 fact layer).
89 pub(crate) node_provenance: Arc<NodeProvenanceStore>,
90 /// Dense edge provenance (Phase 1 fact layer).
91 pub(crate) edge_provenance: Arc<EdgeProvenanceStore>,
92 /// Monotonic fact-layer epoch (0 until set by the V8 persistence path).
93 pub(crate) fact_epoch: u64,
94 /// Epoch for version tracking.
95 pub(crate) epoch: u64,
96 /// Per-language confidence metadata collected during build.
97 /// Maps language name (e.g., "rust") to aggregated confidence.
98 pub(crate) confidence: HashMap<String, ConfidenceMetadata>,
99 /// Phase 2 binding-plane scope arena (populated by Phase 4e).
100 pub(crate) scope_arena: Arc<ScopeArena>,
101 /// Phase 2 binding-plane alias table (populated by Phase 4e / P2U04).
102 pub(crate) alias_table: Arc<AliasTable>,
103 /// Phase 2 binding-plane shadow table (populated by Phase 4e / P2U05).
104 pub(crate) shadow_table: Arc<ShadowTable>,
105 /// Phase 2 binding-plane scope provenance store (populated by Phase 4e / P2U11).
106 pub(crate) scope_provenance_store: Arc<ScopeProvenanceStore>,
107 /// Phase 3 file segment table mapping `FileId` to contiguous node ranges.
108 /// Populated during build Phase 3 parallel commit, persisted in V10+ snapshots.
109 pub(crate) file_segments: Arc<FileSegmentTable>,
110}
111
112impl CodeGraph {
113 /// Creates a new empty `CodeGraph`.
114 ///
115 /// # Example
116 ///
117 /// ```rust
118 /// use sqry_core::graph::unified::concurrent::CodeGraph;
119 ///
120 /// let graph = CodeGraph::new();
121 /// assert_eq!(graph.epoch(), 0);
122 /// ```
123 #[must_use]
124 pub fn new() -> Self {
125 Self {
126 nodes: Arc::new(NodeArena::new()),
127 edges: Arc::new(BidirectionalEdgeStore::new()),
128 strings: Arc::new(StringInterner::new()),
129 files: Arc::new(FileRegistry::new()),
130 indices: Arc::new(AuxiliaryIndices::new()),
131 macro_metadata: Arc::new(NodeMetadataStore::new()),
132 node_provenance: Arc::new(NodeProvenanceStore::new()),
133 edge_provenance: Arc::new(EdgeProvenanceStore::new()),
134 fact_epoch: 0,
135 epoch: 0,
136 confidence: HashMap::new(),
137 scope_arena: Arc::new(ScopeArena::new()),
138 alias_table: Arc::new(AliasTable::new()),
139 shadow_table: Arc::new(ShadowTable::new()),
140 scope_provenance_store: Arc::new(ScopeProvenanceStore::new()),
141 file_segments: Arc::new(FileSegmentTable::new()),
142 }
143 }
144
145 /// Creates a `CodeGraph` from existing components.
146 ///
147 /// This is useful when building a graph from external data or
148 /// reconstructing from serialized state.
149 #[must_use]
150 pub fn from_components(
151 nodes: NodeArena,
152 edges: BidirectionalEdgeStore,
153 strings: StringInterner,
154 files: FileRegistry,
155 indices: AuxiliaryIndices,
156 macro_metadata: NodeMetadataStore,
157 ) -> Self {
158 Self {
159 nodes: Arc::new(nodes),
160 edges: Arc::new(edges),
161 strings: Arc::new(strings),
162 files: Arc::new(files),
163 indices: Arc::new(indices),
164 macro_metadata: Arc::new(macro_metadata),
165 node_provenance: Arc::new(NodeProvenanceStore::new()),
166 edge_provenance: Arc::new(EdgeProvenanceStore::new()),
167 fact_epoch: 0,
168 epoch: 0,
169 confidence: HashMap::new(),
170 scope_arena: Arc::new(ScopeArena::new()),
171 alias_table: Arc::new(AliasTable::new()),
172 shadow_table: Arc::new(ShadowTable::new()),
173 scope_provenance_store: Arc::new(ScopeProvenanceStore::new()),
174 file_segments: Arc::new(FileSegmentTable::new()),
175 }
176 }
177
178 /// Creates a cheap snapshot of the graph.
179 ///
180 /// This operation is O(5) Arc clones, which completes in <1μs.
181 /// The snapshot is isolated from future mutations to the original graph.
182 ///
183 /// # Example
184 ///
185 /// ```rust
186 /// use sqry_core::graph::unified::concurrent::CodeGraph;
187 ///
188 /// let graph = CodeGraph::new();
189 /// let snapshot = graph.snapshot();
190 /// // snapshot is independent of future mutations to graph
191 /// ```
192 #[must_use]
193 pub fn snapshot(&self) -> GraphSnapshot {
194 GraphSnapshot {
195 nodes: Arc::clone(&self.nodes),
196 edges: Arc::clone(&self.edges),
197 strings: Arc::clone(&self.strings),
198 files: Arc::clone(&self.files),
199 indices: Arc::clone(&self.indices),
200 macro_metadata: Arc::clone(&self.macro_metadata),
201 node_provenance: Arc::clone(&self.node_provenance),
202 edge_provenance: Arc::clone(&self.edge_provenance),
203 fact_epoch: self.fact_epoch,
204 epoch: self.epoch,
205 scope_arena: Arc::clone(&self.scope_arena),
206 alias_table: Arc::clone(&self.alias_table),
207 shadow_table: Arc::clone(&self.shadow_table),
208 scope_provenance_store: Arc::clone(&self.scope_provenance_store),
209 file_segments: Arc::clone(&self.file_segments),
210 }
211 }
212
213 /// Returns a reference to the node arena.
214 #[inline]
215 #[must_use]
216 pub fn nodes(&self) -> &NodeArena {
217 &self.nodes
218 }
219
220 /// Returns a reference to the bidirectional edge store.
221 #[inline]
222 #[must_use]
223 pub fn edges(&self) -> &BidirectionalEdgeStore {
224 &self.edges
225 }
226
227 /// Returns a reference to the string interner.
228 #[inline]
229 #[must_use]
230 pub fn strings(&self) -> &StringInterner {
231 &self.strings
232 }
233
234 /// Returns a reference to the file registry.
235 #[inline]
236 #[must_use]
237 pub fn files(&self) -> &FileRegistry {
238 &self.files
239 }
240
241 /// Returns a reference to the auxiliary indices.
242 #[inline]
243 #[must_use]
244 pub fn indices(&self) -> &AuxiliaryIndices {
245 &self.indices
246 }
247
248 /// Returns a reference to the macro boundary metadata store.
249 #[inline]
250 #[must_use]
251 pub fn macro_metadata(&self) -> &NodeMetadataStore {
252 &self.macro_metadata
253 }
254
255 // ------------------------------------------------------------------
256 // Phase 1 fact-layer provenance accessors (P1U09).
257 // ------------------------------------------------------------------
258
259 /// Returns the monotonic fact-layer epoch stamped on the most recently
260 /// saved or loaded snapshot. Returns `0` for graphs that have not been
261 /// persisted yet or were loaded from V7 snapshots.
262 #[inline]
263 #[must_use]
264 pub fn fact_epoch(&self) -> u64 {
265 self.fact_epoch
266 }
267
268 /// Looks up node provenance by `NodeId`.
269 ///
270 /// Returns `None` if the `NodeId` is out of range, the slot is vacant,
271 /// or the stored generation does not match (stale handle).
272 #[inline]
273 #[must_use]
274 pub fn node_provenance(
275 &self,
276 id: crate::graph::unified::node::id::NodeId,
277 ) -> Option<&NodeProvenance> {
278 self.node_provenance.lookup(id)
279 }
280
281 /// Looks up edge provenance by `EdgeId`.
282 ///
283 /// Returns `None` if the `EdgeId` is out of range, the slot is vacant,
284 /// or the edge is the invalid sentinel.
285 #[inline]
286 #[must_use]
287 pub fn edge_provenance(
288 &self,
289 id: crate::graph::unified::edge::id::EdgeId,
290 ) -> Option<&EdgeProvenance> {
291 self.edge_provenance.lookup(id)
292 }
293
294 /// Returns a borrowed provenance view for a file.
295 ///
296 /// Returns `None` for invalid/unregistered `FileId`s.
297 #[inline]
298 #[must_use]
299 pub fn file_provenance(
300 &self,
301 id: crate::graph::unified::file::id::FileId,
302 ) -> Option<FileProvenanceView<'_>> {
303 self.files.file_provenance(id)
304 }
305
306 // ------------------------------------------------------------------
307 // Phase 2 binding-plane accessors (P2U03).
308 // ------------------------------------------------------------------
309
310 /// Returns a reference to the scope arena derived during Phase 4e.
311 ///
312 /// The arena is empty on freshly-constructed `CodeGraph` instances and is
313 /// populated by calling `phase4e_binding::derive_binding_plane`.
314 #[inline]
315 #[must_use]
316 pub fn scope_arena(&self) -> &ScopeArena {
317 &self.scope_arena
318 }
319
320 /// Installs a freshly-derived scope arena.
321 ///
322 /// Called from `phase4e_binding::derive_binding_plane` during the build
323 /// pipeline. External callers that run Phase 4e manually (e.g., test
324 /// fixture builders) use this to store the result.
325 pub(crate) fn set_scope_arena(&mut self, arena: ScopeArena) {
326 self.scope_arena = Arc::new(arena);
327 }
328
329 /// Returns a reference to the alias table derived during Phase 4e.
330 ///
331 /// The table is empty on freshly-constructed `CodeGraph` instances and is
332 /// populated by calling `phase4e_binding::derive_binding_plane`.
333 #[inline]
334 #[must_use]
335 pub fn alias_table(&self) -> &AliasTable {
336 &self.alias_table
337 }
338
339 /// Installs a freshly-derived alias table.
340 ///
341 /// Called from `phase4e_binding::derive_binding_plane` during the build
342 /// pipeline. External callers that run Phase 4e manually (e.g., test
343 /// fixture builders) use this to store the result.
344 pub(crate) fn set_alias_table(&mut self, table: AliasTable) {
345 self.alias_table = Arc::new(table);
346 }
347
348 /// Returns a reference to the shadow table derived during Phase 4e.
349 ///
350 /// The table is empty on freshly-constructed `CodeGraph` instances and is
351 /// populated by calling `phase4e_binding::derive_binding_plane`.
352 #[inline]
353 #[must_use]
354 pub fn shadow_table(&self) -> &ShadowTable {
355 &self.shadow_table
356 }
357
358 /// Installs a freshly-derived shadow table.
359 ///
360 /// Called from `phase4e_binding::derive_binding_plane` during the build
361 /// pipeline. External callers that run Phase 4e manually (e.g., test
362 /// fixture builders) use this to store the result.
363 pub(crate) fn set_shadow_table(&mut self, table: ShadowTable) {
364 self.shadow_table = Arc::new(table);
365 }
366
367 /// Returns a reference to the scope provenance store derived during Phase 4e.
368 ///
369 /// The store is empty on freshly-constructed `CodeGraph` instances and is
370 /// populated by calling `phase4e_binding::derive_binding_plane`.
371 #[inline]
372 #[must_use]
373 pub fn scope_provenance_store(&self) -> &ScopeProvenanceStore {
374 &self.scope_provenance_store
375 }
376
377 /// Looks up scope provenance by `ScopeId`.
378 ///
379 /// Returns `None` if the slot is out of range, vacant, or the stored
380 /// generation does not match (stale handle).
381 #[inline]
382 #[must_use]
383 pub fn scope_provenance(&self, id: ScopeId) -> Option<&ScopeProvenance> {
384 self.scope_provenance_store.lookup(id)
385 }
386
387 /// Looks up the live `ScopeId` for a stable scope identity.
388 ///
389 /// Returns `None` if no provenance record is registered for that stable id.
390 /// The reverse index is populated by `insert` during Phase 4e and must be
391 /// rebuilt after V9 deserialization.
392 #[inline]
393 #[must_use]
394 pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
395 self.scope_provenance_store.scope_by_stable_id(stable)
396 }
397
398 /// Installs a freshly-derived scope provenance store.
399 ///
400 /// Called from `phase4e_binding::derive_binding_plane` during the build
401 /// pipeline. External callers that run Phase 4e manually (e.g., test
402 /// fixture builders) use this to store the result.
403 pub(crate) fn set_scope_provenance_store(&mut self, store: ScopeProvenanceStore) {
404 self.scope_provenance_store = Arc::new(store);
405 }
406
407 /// Returns a reference to the file segment table.
408 #[inline]
409 #[must_use]
410 pub fn file_segments(&self) -> &FileSegmentTable {
411 &self.file_segments
412 }
413
414 /// Replaces the file segment table.
415 pub(crate) fn set_file_segments(&mut self, table: FileSegmentTable) {
416 self.file_segments = Arc::new(table);
417 }
418
419 /// Returns a mutable reference to the file segment table (via `Arc::make_mut`).
420 pub(crate) fn file_segments_mut(&mut self) -> &mut FileSegmentTable {
421 Arc::make_mut(&mut self.file_segments)
422 }
423
424 /// Test-only helper that records a file segment directly on the
425 /// graph, bypassing the full Phase 3 commit pipeline. Only
426 /// available under `#[cfg(feature = "rebuild-internals")]` so the
427 /// surface is opt-in for rebuild-plane consumers (the feature is
428 /// whitelisted to sqry-daemon + sqry-core integration tests; see
429 /// `sqry-core/tests/rebuild_internals_whitelist.rs`).
430 ///
431 /// Integration tests (notably
432 /// `sqry-core/tests/incremental_remove_file_scale.rs`) call this
433 /// to seed the synthetic workspaces they build before exercising
434 /// `RebuildGraph::remove_file` / `CodeGraph::remove_file`. Production
435 /// code paths never touch this method — Phase 3 parallel commit
436 /// is the sole production writer, via the crate-internal
437 /// `file_segments_mut` accessor above.
438 ///
439 /// Renamed with `test_only_` prefix so the purpose is unambiguous
440 /// at every call site; `#[doc(hidden)]` hides it from rendered
441 /// rustdoc so downstream daemon integrations don't discover it by
442 /// accident.
443 #[cfg(feature = "rebuild-internals")]
444 #[doc(hidden)]
445 pub fn test_only_record_file_segment(
446 &mut self,
447 file_id: FileId,
448 start_slot: u32,
449 slot_count: u32,
450 ) {
451 Arc::make_mut(&mut self.file_segments).record_range(file_id, start_slot, slot_count);
452 }
453
454 /// Sets the provenance stores and fact epoch, typically called by the
455 /// persistence loader after deserializing a V8 snapshot.
456 pub(crate) fn set_provenance(
457 &mut self,
458 node_provenance: NodeProvenanceStore,
459 edge_provenance: EdgeProvenanceStore,
460 fact_epoch: u64,
461 ) {
462 self.node_provenance = Arc::new(node_provenance);
463 self.edge_provenance = Arc::new(edge_provenance);
464 self.fact_epoch = fact_epoch;
465 }
466
467 /// Returns the current epoch.
468 #[inline]
469 #[must_use]
470 pub fn epoch(&self) -> u64 {
471 self.epoch
472 }
473
474 /// Returns a mutable reference to the node arena.
475 ///
476 /// Uses `Arc::make_mut` for copy-on-write semantics: if other
477 /// references exist (e.g., snapshots), the data is cloned.
478 #[inline]
479 pub fn nodes_mut(&mut self) -> &mut NodeArena {
480 Arc::make_mut(&mut self.nodes)
481 }
482
483 /// Returns a mutable reference to the bidirectional edge store.
484 ///
485 /// Uses `Arc::make_mut` for copy-on-write semantics.
486 #[inline]
487 pub fn edges_mut(&mut self) -> &mut BidirectionalEdgeStore {
488 Arc::make_mut(&mut self.edges)
489 }
490
491 /// Returns a mutable reference to the string interner.
492 ///
493 /// Uses `Arc::make_mut` for copy-on-write semantics.
494 #[inline]
495 pub fn strings_mut(&mut self) -> &mut StringInterner {
496 Arc::make_mut(&mut self.strings)
497 }
498
499 /// Returns a mutable reference to the file registry.
500 ///
501 /// Uses `Arc::make_mut` for copy-on-write semantics.
502 #[inline]
503 pub fn files_mut(&mut self) -> &mut FileRegistry {
504 Arc::make_mut(&mut self.files)
505 }
506
507 /// Returns a mutable reference to the auxiliary indices.
508 ///
509 /// Uses `Arc::make_mut` for copy-on-write semantics.
510 #[inline]
511 pub fn indices_mut(&mut self) -> &mut AuxiliaryIndices {
512 Arc::make_mut(&mut self.indices)
513 }
514
515 /// Returns a mutable reference to the macro boundary metadata store.
516 ///
517 /// Uses `Arc::make_mut` for copy-on-write semantics.
518 #[inline]
519 pub fn macro_metadata_mut(&mut self) -> &mut NodeMetadataStore {
520 Arc::make_mut(&mut self.macro_metadata)
521 }
522
523 /// Returns mutable references to both the node arena and the string interner.
524 ///
525 /// This avoids the borrow-conflict that arises when calling `nodes_mut()` and
526 /// `strings_mut()` separately on `&mut self`.
527 #[inline]
528 pub fn nodes_and_strings_mut(&mut self) -> (&mut NodeArena, &mut StringInterner) {
529 (
530 Arc::make_mut(&mut self.nodes),
531 Arc::make_mut(&mut self.strings),
532 )
533 }
534
535 /// Rebuilds auxiliary indices from the current node arena.
536 ///
537 /// This avoids the borrow conflict that arises when calling `nodes()` and
538 /// `indices_mut()` separately on `&mut self`. Uses disjoint field borrowing
539 /// to access `nodes` (shared) and `indices` (mutable) simultaneously.
540 /// Internally calls `AuxiliaryIndices::build_from_arena` which clears
541 /// existing indices and rebuilds in a single pass without per-element
542 /// duplicate checking.
543 ///
544 /// As of Task 4 Step 4 Phase 2 this inherent method delegates to the
545 /// generic
546 /// [`crate::graph::unified::build::parallel_commit::rebuild_indices`]
547 /// free function so the same implementation serves both the
548 /// full-build (`CodeGraph`) and incremental-rebuild (`RebuildGraph`)
549 /// pipelines. Call sites that hold a concrete `CodeGraph` can keep
550 /// using `graph.rebuild_indices()`; incremental rebuild call sites
551 /// should use the free function directly.
552 pub fn rebuild_indices(&mut self) {
553 crate::graph::unified::build::parallel_commit::rebuild_indices(self);
554 }
555
556 /// Increments the epoch counter and returns the new value.
557 ///
558 /// Called automatically by `ConcurrentCodeGraph::write()`.
559 #[inline]
560 pub fn bump_epoch(&mut self) -> u64 {
561 self.epoch = self.epoch.wrapping_add(1);
562 self.epoch
563 }
564
565 /// Sets the epoch to a specific value.
566 ///
567 /// This is primarily for testing or reconstruction from serialized state.
568 #[inline]
569 pub fn set_epoch(&mut self, epoch: u64) {
570 self.epoch = epoch;
571 }
572
573 /// Returns the number of nodes in the graph.
574 ///
575 /// This is a convenience method that delegates to `nodes().len()`.
576 ///
577 /// # Example
578 ///
579 /// ```rust
580 /// use sqry_core::graph::unified::concurrent::CodeGraph;
581 ///
582 /// let graph = CodeGraph::new();
583 /// assert_eq!(graph.node_count(), 0);
584 /// ```
585 #[inline]
586 #[must_use]
587 pub fn node_count(&self) -> usize {
588 self.nodes.len()
589 }
590
591 /// Returns the number of edges in the graph (forward direction).
592 ///
593 /// This counts edges in the forward store, including both CSR and delta edges.
594 ///
595 /// # Example
596 ///
597 /// ```rust
598 /// use sqry_core::graph::unified::concurrent::CodeGraph;
599 ///
600 /// let graph = CodeGraph::new();
601 /// assert_eq!(graph.edge_count(), 0);
602 /// ```
603 #[inline]
604 #[must_use]
605 pub fn edge_count(&self) -> usize {
606 let stats = self.edges.stats();
607 stats.forward.csr_edge_count + stats.forward.delta_edge_count
608 }
609
610 /// Returns true if the graph contains no nodes.
611 ///
612 /// This is a convenience method that delegates to `nodes().is_empty()`.
613 ///
614 /// # Example
615 ///
616 /// ```rust
617 /// use sqry_core::graph::unified::concurrent::CodeGraph;
618 ///
619 /// let graph = CodeGraph::new();
620 /// assert!(graph.is_empty());
621 /// ```
622 #[inline]
623 #[must_use]
624 pub fn is_empty(&self) -> bool {
625 self.nodes.is_empty()
626 }
627
628 /// Returns an iterator over all indexed file paths.
629 ///
630 /// This is useful for enumerating all files that have been processed
631 /// and added to the graph.
632 ///
633 /// # Example
634 ///
635 /// ```rust
636 /// use sqry_core::graph::unified::concurrent::CodeGraph;
637 ///
638 /// let graph = CodeGraph::new();
639 /// for (file_id, path) in graph.indexed_files() {
640 /// println!("File {}: {}", file_id.index(), path.display());
641 /// }
642 /// ```
643 #[inline]
644 pub fn indexed_files(
645 &self,
646 ) -> impl Iterator<Item = (crate::graph::unified::file::FileId, &std::path::Path)> {
647 self.files
648 .iter()
649 .map(|(id, arc_path)| (id, arc_path.as_ref()))
650 }
651
652 /// Returns the set of files that import one or more symbols exported by
653 /// `file_id`, deduplicated and sorted ascending.
654 ///
655 /// For every [`EdgeKind::Imports`] edge whose target node lives in
656 /// `file_id`, the source node's [`FileId`] is added to the result. Files
657 /// are returned sorted by raw index so the result is deterministic across
658 /// runs. The caller's own file is never included — an `Imports` edge
659 /// whose source and target both live in `file_id` is treated as a
660 /// self-import and elided. Edges whose source node is no longer resolvable
661 /// in the arena (tombstoned) are silently skipped.
662 ///
663 /// This is the file-level view of Pass 4 cross-file `Imports` edges, used
664 /// by the incremental rebuild engine to compute reverse-dependency
665 /// closures: "if file X changes its exports, which files need to be
666 /// re-linked?"
667 ///
668 /// # Complexity
669 ///
670 /// `O(|nodes_in_file_id| × avg_incoming_edges_per_node)` amortized. Uses
671 /// [`AuxiliaryIndices::by_file`] for O(1)-amortized per-file node lookup
672 /// (HashMap-backed) and the bidirectional edge store's reverse adjacency;
673 /// no full-graph scan. A final `O(R log R)` sort over the deduplicated
674 /// importer set (where `R` is the importer count) is negligible in
675 /// practice since `R ≤ file_count`.
676 ///
677 /// [`AuxiliaryIndices::by_file`]: crate::graph::unified::storage::indices::AuxiliaryIndices::by_file
678 #[must_use]
679 pub fn reverse_import_index(&self, file_id: FileId) -> Vec<FileId> {
680 let mut importers: HashSet<FileId> = HashSet::new();
681 for &target_node in self.indices.by_file(file_id) {
682 for edge_ref in self.edges.edges_to(target_node) {
683 if !matches!(edge_ref.kind, EdgeKind::Imports { .. }) {
684 continue;
685 }
686 let Some(source_entry) = self.nodes.get(edge_ref.source) else {
687 continue;
688 };
689 let source_file = source_entry.file;
690 if source_file != file_id {
691 importers.insert(source_file);
692 }
693 }
694 }
695 let mut result: Vec<FileId> = importers.into_iter().collect();
696 result.sort();
697 result
698 }
699
700 /// Returns the set of files that hold at least one live inter-file edge
701 /// targeting a node in `file_id`, deduplicated and sorted ascending.
702 ///
703 /// Unlike [`reverse_import_index`](Self::reverse_import_index) — which
704 /// filters to [`EdgeKind::Imports`] only — this helper treats **every**
705 /// cross-file edge as a dependency signal: `Calls`, `References`,
706 /// `TypeOf`, `Inherits`, `Implements`, `FfiCall`, `HttpRequest`,
707 /// `GrpcCall`, `WebAssemblyCall`, `DbQuery`, `TableRead`, `TableWrite`,
708 /// `TriggeredBy`, `MessageQueue`, `WebSocket`, `GraphQLOperation`,
709 /// `ProcessExec`, `FileIpc`, `ProtocolCall`, and any future
710 /// cross-file-capable variant. This is the reverse-dependency surface the
711 /// incremental rebuild engine (Task 4 Step 4 Phase 3e) needs: when
712 /// `file_id` changes, every file whose committed edges point into
713 /// `file_id`'s nodes must re-enter the rebuild closure so its cross-file
714 /// references survive the target-side tombstone-and-reparse cycle.
715 ///
716 /// The caller's own file is never included — an edge whose source and
717 /// target both live in `file_id` is a self-reference and is elided.
718 /// Edges whose source node is no longer resolvable in the arena
719 /// (tombstoned) are silently skipped.
720 ///
721 /// # When to use this vs [`reverse_import_index`](Self::reverse_import_index)
722 ///
723 /// * [`reverse_import_index`](Self::reverse_import_index) remains the
724 /// right surface for consumers that specifically need *import*
725 /// relationships (export surface analysis, module-dependency graphs,
726 /// etc.).
727 /// * `reverse_dependency_index` is the right surface for incremental
728 /// rebuild closure computation. Widening past imports is necessary
729 /// because call sites, type references, trait implementations, FFI
730 /// declarations, HTTP clients, and every other cross-file edge kind
731 /// hold a committed edge into the target file that becomes stale the
732 /// moment `remove_file(target)` tombstones its arena nodes. Leaving
733 /// those files out of the closure leaves the committed edges pointing
734 /// at the stale (pre-tombstone) node IDs — Phase 4c-prime only
735 /// rewrites edges on **re-parsed** files' `PendingEdge` sets, never
736 /// committed edges owned by files outside the reparse scope.
737 ///
738 /// # Complexity
739 ///
740 /// `O(|nodes_in_file_id| × avg_incoming_edges_per_node)` amortized —
741 /// same bound as [`reverse_import_index`](Self::reverse_import_index).
742 /// Uses [`AuxiliaryIndices::by_file`] for O(1)-amortized per-file node
743 /// lookup and the bidirectional edge store's reverse adjacency; no
744 /// full-graph scan. Final `O(R log R)` sort over the deduplicated
745 /// dependent set is negligible since `R ≤ file_count`.
746 ///
747 /// # Over-widening is expected and acceptable
748 ///
749 /// The closure will include every file that references anything in
750 /// `file_id`, not just files whose exports change. In common codebases
751 /// this widens the reparse set modestly (a 10-file change may expand
752 /// to 20–30 dependent files in a medium crate). Correctness requires
753 /// the widening; minimality is a follow-up optimisation if profiling
754 /// demands it.
755 ///
756 /// [`AuxiliaryIndices::by_file`]: crate::graph::unified::storage::indices::AuxiliaryIndices::by_file
757 #[must_use]
758 pub fn reverse_dependency_index(&self, file_id: FileId) -> Vec<FileId> {
759 let mut dependents: HashSet<FileId> = HashSet::new();
760 for &target_node in self.indices.by_file(file_id) {
761 for edge_ref in self.edges.edges_to(target_node) {
762 let Some(source_entry) = self.nodes.get(edge_ref.source) else {
763 continue;
764 };
765 let source_file = source_entry.file;
766 if source_file != file_id {
767 dependents.insert(source_file);
768 }
769 }
770 }
771 let mut result: Vec<FileId> = dependents.into_iter().collect();
772 result.sort();
773 result
774 }
775
776 /// Returns the per-language confidence metadata.
777 ///
778 /// This contains analysis confidence information collected during graph build,
779 /// primarily used by language plugins (e.g., Rust) to track analysis quality.
780 #[inline]
781 #[must_use]
782 pub fn confidence(&self) -> &HashMap<String, ConfidenceMetadata> {
783 &self.confidence
784 }
785
786 /// Merges confidence metadata for a language.
787 ///
788 /// If confidence already exists for the language, this merges the new
789 /// metadata (taking the lower confidence level and combining limitations).
790 /// Otherwise, it inserts the new confidence.
791 pub fn merge_confidence(&mut self, language: &str, metadata: ConfidenceMetadata) {
792 use crate::confidence::ConfidenceLevel;
793
794 self.confidence
795 .entry(language.to_string())
796 .and_modify(|existing| {
797 // Take the lower confidence level (more conservative)
798 let new_level = match (&existing.level, &metadata.level) {
799 (ConfidenceLevel::Verified, other) | (other, ConfidenceLevel::Verified) => {
800 *other
801 }
802 (ConfidenceLevel::Partial, ConfidenceLevel::AstOnly)
803 | (ConfidenceLevel::AstOnly, ConfidenceLevel::Partial) => {
804 ConfidenceLevel::AstOnly
805 }
806 (level, _) => *level,
807 };
808 existing.level = new_level;
809
810 // Merge limitations (deduplicated)
811 for limitation in &metadata.limitations {
812 if !existing.limitations.contains(limitation) {
813 existing.limitations.push(limitation.clone());
814 }
815 }
816
817 // Merge unavailable features (deduplicated)
818 for feature in &metadata.unavailable_features {
819 if !existing.unavailable_features.contains(feature) {
820 existing.unavailable_features.push(feature.clone());
821 }
822 }
823 })
824 .or_insert(metadata);
825 }
826
827 /// Sets the confidence metadata map directly.
828 ///
829 /// This is primarily used when loading a graph from serialized state.
830 pub fn set_confidence(&mut self, confidence: HashMap<String, ConfidenceMetadata>) {
831 self.confidence = confidence;
832 }
833
834 // ------------------------------------------------------------------
835 // Task 4 Step 2 (A2 §F.2) — File-level tombstoning on a CodeGraph.
836 //
837 // Unlike the rebuild pipeline's `RebuildGraph::remove_file`, this
838 // path mutates a live `CodeGraph` in place and is the mechanism
839 // used by the full-rebuild flow when it needs to evict a file's
840 // nodes+edges between compactions. The daemon's incremental
841 // `WorkspaceManager` (Task 6) goes through the rebuild path, which
842 // is why this entry point is `pub(crate)`.
843 // ------------------------------------------------------------------
844
845 /// Tombstone every node that belongs to `file_id`, invalidate every
846 /// edge whose source or target is one of those nodes (across both
847 /// forward and reverse CSR + delta tiers), drop the file's entry
848 /// from the [`FileRegistry`], and return the set of [`NodeId`]s
849 /// that were tombstoned.
850 ///
851 /// This is the §F.2-aware file-removal primitive. Semantically, the
852 /// post-condition matches what a full rebuild of the workspace
853 /// without `file_id` would produce:
854 ///
855 /// * Every [`NodeEntry`] whose [`NodeEntry.file`] was `file_id` has
856 /// been `NodeArena::remove`d, advancing its slot generation so
857 /// stale [`NodeId`] handles do not alias a later re-allocation.
858 /// * Every CSR edge whose source or target slot matches one of the
859 /// tombstoned slot indices has its `csr_tombstones` bit set; the
860 /// read path's merge step already filters tombstoned CSR edges
861 /// out of every query result.
862 /// * Every delta-buffer edge (Add or Remove, any file) whose source
863 /// or target matches a tombstoned slot has been dropped from the
864 /// delta in both directions.
865 /// * The [`AuxiliaryIndices`] (kind / name / qualified-name / file)
866 /// no longer reference any of the tombstoned `NodeId`s.
867 /// * [`NodeMetadataStore`], [`NodeProvenanceStore`], [`ScopeArena`],
868 /// [`AliasTable`], and [`ShadowTable`] have been compacted through
869 /// the [`NodeIdBearing::retain_nodes`] predicate so no
870 /// tombstoned NodeId survives in any publish-visible store.
871 /// * [`FileRegistry::per_file_nodes`] no longer holds a bucket for
872 /// `file_id`, the lookup slot is recycled, and
873 /// [`FileRegistry::resolve(file_id)`] returns `None`.
874 ///
875 /// Returns the `Vec<NodeId>` of tombstoned nodes. The returned list
876 /// is useful for downstream housekeeping (e.g., resetting per-file
877 /// caches keyed by NodeId) and for tests that need to assert on the
878 /// exact membership of the tombstone set.
879 ///
880 /// # Idempotency
881 ///
882 /// Calling `remove_file` twice with the same `file_id` — or calling
883 /// it for a `file_id` that was never registered — is a safe no-op.
884 /// The returned `Vec<NodeId>` is empty on the second call; no
885 /// arena / edge / index state is mutated (the predicate-based
886 /// compaction of `NodeIdBearing` surfaces short-circuits when the
887 /// dead set is empty).
888 ///
889 /// # Visibility
890 ///
891 /// `pub(crate)` because external callers (Task 6's
892 /// `WorkspaceManager` on the sqry-daemon side) route through
893 /// [`super::super::rebuild::rebuild_graph::RebuildGraph::remove_file`]
894 /// instead. This `CodeGraph`-level variant is used by full-rebuild
895 /// housekeeping paths inside sqry-core and by the Task 4 Step 4
896 /// incremental fallback for cases where the caller already has a
897 /// `&mut CodeGraph` and does not need the clone-and-publish
898 /// round-trip of [`clone_for_rebuild`](Self::clone_for_rebuild) →
899 /// [`finalize`](super::super::rebuild::rebuild_graph::RebuildGraph::finalize).
900 ///
901 /// # Performance
902 ///
903 /// * `O(|tombstoned| + |csr_edges| + |delta_edges|)` amortised.
904 /// The CSR walk is linear in total edge count (not per-file),
905 /// which is the dominant cost; each row check is O(1) via the
906 /// precomputed dead-slot-index `HashSet` in
907 /// [`BidirectionalEdgeStore::tombstone_edges_for_nodes`].
908 /// * Delta filtering is `O(|delta|)` per direction.
909 /// * Auxiliary-index compaction is `O(|tombstoned|)` amortised
910 /// because each of the four indices keys its entries by the
911 /// tombstoned NodeIds directly.
912 ///
913 /// [`NodeEntry`]: crate::graph::unified::storage::arena::NodeEntry
914 /// [`NodeEntry.file`]: crate::graph::unified::storage::arena::NodeEntry::file
915 /// [`NodeIdBearing::retain_nodes`]: crate::graph::unified::rebuild::coverage::NodeIdBearing::retain_nodes
916 #[allow(dead_code)] // Consumer is Task 4 Step 4 (`incremental_rebuild`)
917 // and the unit tests below; published in this commit so the §F.2
918 // invariant surface can be reviewed in isolation per the Gate 0c
919 // split contract.
920 pub(crate) fn remove_file(
921 &mut self,
922 file_id: FileId,
923 ) -> Vec<crate::graph::unified::node::NodeId> {
924 use crate::graph::unified::node::NodeId;
925 use crate::graph::unified::rebuild::coverage::NodeIdBearing;
926
927 // Drain the per-file bucket. For a file that was never
928 // registered, this returns an empty Vec — the rest of the
929 // method short-circuits on the `if tombstoned.is_empty()` test
930 // below so we still deregister the file on the off chance the
931 // bucket was empty but the file was registered (defensive; the
932 // common case is the bucket existed iff the file was
933 // registered).
934 let tombstoned: Vec<NodeId> = self.files_mut().take_nodes(file_id);
935 // Always drop the file's path entry + recycle its slot, even
936 // when the bucket was empty, so repeated registrations of the
937 // same path don't resurrect a zombie FileId. `unregister` is
938 // idempotent (returns None for unknown IDs) so the cost is a
939 // single HashMap probe when file_id is already gone.
940 self.files_mut().unregister(file_id);
941 // Clear the file's segment entry unconditionally (idempotent
942 // — `FileSegmentTable::remove` no-ops on unknown ids). This
943 // MUST run before `FileRegistry::unregister` recycles the
944 // FileId slot for reuse, otherwise a later registration of a
945 // different path under the reused FileId would inherit the
946 // previous file's stale node range (see
947 // `sqry-core/src/graph/unified/build/reindex.rs` — which
948 // trusts `file_segments().get(file_id)` to decide which slots
949 // to tombstone). Note: `unregister` above was called first
950 // only to keep the existing bucket-drain ordering; the
951 // segment-clear is order-independent with respect to
952 // `unregister` because neither touches the other's backing
953 // store, and the FileId slot cannot be recycled-and-reissued
954 // across a single `remove_file` call (the registry's slot
955 // recycler is driven by a later `register`, not by
956 // `unregister`).
957 self.file_segments_mut().remove(file_id);
958
959 if tombstoned.is_empty() {
960 return tombstoned;
961 }
962
963 // Dead set keyed on NodeId for NodeIdBearing predicates.
964 // `retain_nodes` uses `HashSet::contains` so membership is O(1).
965 let dead: HashSet<NodeId> = tombstoned.iter().copied().collect();
966
967 // 1. Tombstone the arena slots. `NodeArena::remove` is
968 // idempotent — stale NodeIds that don't match a slot's
969 // current generation are no-ops, which lets this method be
970 // safely re-run on the same file.
971 {
972 let arena = self.nodes_mut();
973 for &nid in &tombstoned {
974 let _ = arena.remove(nid);
975 }
976 }
977
978 // 2. Invalidate edges across both CSR + delta in both
979 // directions. This is the expensive step; the helper uses a
980 // precomputed dead-slot-index set so the CSR walk is linear
981 // in total edge count, not quadratic.
982 self.edges_mut().tombstone_edges_for_nodes(&dead);
983
984 // 3. Compact the auxiliary indices so name/kind/qname/file
985 // lookups do not return tombstoned NodeIds. Using the
986 // NodeIdBearing surface keeps this step in lockstep with the
987 // rebuild pipeline's step 4 — any future publish-visible
988 // NodeId-bearing container added to the K.A/K.B matrix is
989 // automatically swept here too.
990 {
991 let predicate: Box<dyn Fn(NodeId) -> bool + '_> = Box::new(|nid| !dead.contains(&nid));
992 self.indices_mut().retain_nodes(&*predicate);
993 self.macro_metadata_mut().retain_nodes(&*predicate);
994 // The remaining K.A rows (node_provenance, scope_arena,
995 // alias_table, shadow_table) need Arc::make_mut accessors —
996 // they are wrapped in Arc at rest so this is where the
997 // CoW clone happens on demand. Inline the Arc::make_mut
998 // calls here (no public mut accessor exists today because
999 // the sole writer has been the rebuild path; extend the
1000 // set by adding a similar inline call plus a K.A/K.B row
1001 // in `super::super::rebuild::coverage`).
1002 Arc::make_mut(&mut self.node_provenance).retain_nodes(&*predicate);
1003 Arc::make_mut(&mut self.scope_arena).retain_nodes(&*predicate);
1004 Arc::make_mut(&mut self.alias_table).retain_nodes(&*predicate);
1005 Arc::make_mut(&mut self.shadow_table).retain_nodes(&*predicate);
1006 }
1007
1008 tombstoned
1009 }
1010
1011 // ------------------------------------------------------------------
1012 // Gate 0c (A2 §H, Task 4) — RebuildGraph assembly path.
1013 // ------------------------------------------------------------------
1014
1015 /// Assemble a [`CodeGraph`] from owned rebuild-local parts produced
1016 /// by `RebuildGraph::finalize()` (defined in
1017 /// `super::super::rebuild::rebuild_graph`).
1018 ///
1019 /// This constructor is deliberately `pub(crate)` and named with a
1020 /// leading `__` so it is inaccessible from downstream crates even
1021 /// when the `rebuild-internals` feature is enabled: only code in
1022 /// `sqry-core` itself (specifically `RebuildGraph::finalize`) is
1023 /// permitted to call it. The trybuild fixture
1024 /// `sqry-core/tests/rebuild_internals_compile_fail/rebuild_graph_no_public_assembly.rs`
1025 /// proves there is no other path from `RebuildGraph` to
1026 /// `Arc<CodeGraph>`.
1027 ///
1028 /// The argument order mirrors the `CodeGraph` struct declaration
1029 /// order exactly; the public `clone_for_rebuild` → `finalize`
1030 /// round-trip uses the same macro-driven field enumeration so any
1031 /// new `CodeGraph` field automatically threads through this
1032 /// constructor as well.
1033 #[doc(hidden)]
1034 #[allow(clippy::too_many_arguments)]
1035 #[must_use]
1036 pub(crate) fn __assemble_from_rebuild_parts_internal(
1037 nodes: NodeArena,
1038 edges: BidirectionalEdgeStore,
1039 strings: StringInterner,
1040 files: FileRegistry,
1041 indices: AuxiliaryIndices,
1042 macro_metadata: NodeMetadataStore,
1043 node_provenance: NodeProvenanceStore,
1044 edge_provenance: EdgeProvenanceStore,
1045 fact_epoch: u64,
1046 epoch: u64,
1047 confidence: HashMap<String, ConfidenceMetadata>,
1048 scope_arena: ScopeArena,
1049 alias_table: AliasTable,
1050 shadow_table: ShadowTable,
1051 scope_provenance_store: ScopeProvenanceStore,
1052 file_segments: FileSegmentTable,
1053 ) -> Self {
1054 Self {
1055 nodes: Arc::new(nodes),
1056 edges: Arc::new(edges),
1057 strings: Arc::new(strings),
1058 files: Arc::new(files),
1059 indices: Arc::new(indices),
1060 macro_metadata: Arc::new(macro_metadata),
1061 node_provenance: Arc::new(node_provenance),
1062 edge_provenance: Arc::new(edge_provenance),
1063 fact_epoch,
1064 epoch,
1065 confidence,
1066 scope_arena: Arc::new(scope_arena),
1067 alias_table: Arc::new(alias_table),
1068 shadow_table: Arc::new(shadow_table),
1069 scope_provenance_store: Arc::new(scope_provenance_store),
1070 file_segments: Arc::new(file_segments),
1071 }
1072 }
1073
1074 // ------------------------------------------------------------------
1075 // Gate 0c (A2 §F) — publish-boundary debug invariants.
1076 //
1077 // These checks fire only in debug / test builds; release builds
1078 // compile them out. They are called from
1079 // `RebuildGraph::finalize()` steps 13 and 14 (the single source of
1080 // truth for the residue check, per §F and §H agreement). Gate 0d
1081 // will additionally wire the bijection check into
1082 // `build_and_persist_graph`, `WorkspaceManager::publish_graph`, and
1083 // every §E equivalence-harness run.
1084 // ------------------------------------------------------------------
1085
1086 /// Assert the bijective bucket-membership invariant (A2 §F.1).
1087 ///
1088 /// Four conditions must hold simultaneously:
1089 /// (a) every `NodeId` inside any `per_file_nodes` bucket maps to a
1090 /// live arena slot;
1091 /// (b) every `NodeId` appears in exactly one bucket (no duplicates
1092 /// across buckets, no duplicates within a bucket);
1093 /// (c) the bucket's `FileId` matches the node's own `file` field on
1094 /// `NodeEntry`;
1095 /// (d) when at least one bucket is populated, every live node in
1096 /// the arena is accounted for by some bucket.
1097 ///
1098 /// Condition (d) is guarded on `!seen.is_empty()` so an empty-graph
1099 /// (no recorded buckets) publish boundary is vacuously consistent:
1100 /// legacy V7 snapshots, fresh `CodeGraph::new()` instances, and
1101 /// rebuilds on graphs that predate Gate 0c's parallel-commit
1102 /// bucketing must not panic. Once any bucket is populated, every
1103 /// live arena slot must appear in a bucket.
1104 ///
1105 /// Iter-2 B2 (verbatim): this check used to be documented as
1106 /// "vacuous until future `per_file_nodes` work lands". Pulling
1107 /// base-plan Step 1 into Gate 0c retires that phrasing — the check
1108 /// is non-vacuous the moment parallel-parse commits nodes, which
1109 /// happens on every real build. The `!seen.is_empty()` guard now
1110 /// exists solely for the empty-graph corner case, not as a phased-
1111 /// delivery deferral.
1112 ///
1113 /// The check is a no-op in release builds. Panics with a
1114 /// descriptive message on violation. This is intentional: publish-
1115 /// boundary violations are programmer errors that must surface
1116 /// loudly during CI / test runs.
1117 #[cfg(any(debug_assertions, test))]
1118 pub fn assert_bucket_bijection(&self) {
1119 use std::collections::HashMap as StdHashMap;
1120 // (a) + (b) + (c): every bucketed node is live, unique across
1121 // buckets, and the bucket's FileId matches the node's own file.
1122 let mut seen: StdHashMap<
1123 crate::graph::unified::node::NodeId,
1124 crate::graph::unified::file::FileId,
1125 > = StdHashMap::new();
1126 let mut any_bucket_populated = false;
1127 for (file_id, bucket) in self.files.per_file_nodes_for_gate0d() {
1128 if !bucket.is_empty() {
1129 any_bucket_populated = true;
1130 }
1131 // Local dedup guard within the bucket itself. `retain_nodes_in_buckets`
1132 // dedups during finalize step 6, but we re-check here so the
1133 // invariant covers non-finalize publish paths too (e.g. if a
1134 // future pipeline builds a graph without routing through
1135 // `RebuildGraph::finalize`).
1136 let mut within_bucket: std::collections::HashSet<crate::graph::unified::node::NodeId> =
1137 std::collections::HashSet::new();
1138 for node_id in bucket {
1139 assert!(
1140 within_bucket.insert(node_id),
1141 "assert_bucket_bijection: duplicate node {node_id:?} inside bucket {file_id:?}"
1142 );
1143 assert!(
1144 self.nodes.get(node_id).is_some(),
1145 "assert_bucket_bijection: dead node {node_id:?} in bucket {file_id:?}"
1146 );
1147 let prior = seen.insert(node_id, file_id);
1148 assert!(
1149 prior.is_none(),
1150 "assert_bucket_bijection: node {node_id:?} in multiple buckets: \
1151 prior={prior:?}, current={file_id:?}"
1152 );
1153 if let Some(entry) = self.nodes.get(node_id) {
1154 assert_eq!(
1155 entry.file, file_id,
1156 "assert_bucket_bijection: node {node_id:?} misfiled: in bucket \
1157 {file_id:?}, actual {:?}",
1158 entry.file
1159 );
1160 }
1161 }
1162 }
1163 // (d): every live node is accounted for by `seen` once buckets
1164 // are populated. The guard keeps legacy-empty-graph boundaries
1165 // vacuously consistent (see docs above).
1166 if any_bucket_populated {
1167 for (node_id, _entry) in self.nodes.iter() {
1168 assert!(
1169 seen.contains_key(&node_id),
1170 "assert_bucket_bijection: live node {node_id:?} absent from all buckets"
1171 );
1172 }
1173 }
1174 }
1175
1176 /// Assert the pre-reuse tombstone-residue invariant (A2 §F.2).
1177 ///
1178 /// Iterates every publish-visible NodeId-bearing structure on
1179 /// `self` and panics if any contains a node in `dead`. Called from
1180 /// `RebuildGraph::finalize()` step 14 against the set drained at
1181 /// step 8 — exactly one site per the plan's §F / §H agreement.
1182 ///
1183 /// No-op when `dead` is empty or in release builds.
1184 #[cfg(any(debug_assertions, test))]
1185 pub fn assert_no_tombstone_residue_for(
1186 &self,
1187 dead: &std::collections::HashSet<crate::graph::unified::node::NodeId>,
1188 ) {
1189 use super::super::rebuild::coverage::NodeIdBearing;
1190 if dead.is_empty() {
1191 return;
1192 }
1193 // Every K.A/K.B row must be inspected per §F.2.
1194 for nid in self.nodes.all_node_ids() {
1195 assert!(
1196 !dead.contains(&nid),
1197 "assert_no_tombstone_residue: tombstone {nid:?} still in NodeArena"
1198 );
1199 }
1200 for nid in self.indices.all_node_ids() {
1201 assert!(
1202 !dead.contains(&nid),
1203 "assert_no_tombstone_residue: tombstone {nid:?} still in auxiliary indices"
1204 );
1205 }
1206 for nid in self.edges.all_node_ids() {
1207 assert!(
1208 !dead.contains(&nid),
1209 "assert_no_tombstone_residue: tombstone {nid:?} still in edge store"
1210 );
1211 }
1212 for nid in self.macro_metadata.all_node_ids() {
1213 assert!(
1214 !dead.contains(&nid),
1215 "assert_no_tombstone_residue: tombstone {nid:?} still in macro metadata"
1216 );
1217 }
1218 for nid in self.node_provenance.all_node_ids() {
1219 assert!(
1220 !dead.contains(&nid),
1221 "assert_no_tombstone_residue: tombstone {nid:?} still in node provenance"
1222 );
1223 }
1224 for nid in self.scope_arena.all_node_ids() {
1225 assert!(
1226 !dead.contains(&nid),
1227 "assert_no_tombstone_residue: tombstone {nid:?} still in scope arena"
1228 );
1229 }
1230 for nid in self.alias_table.all_node_ids() {
1231 assert!(
1232 !dead.contains(&nid),
1233 "assert_no_tombstone_residue: tombstone {nid:?} still in alias table"
1234 );
1235 }
1236 for nid in self.shadow_table.all_node_ids() {
1237 assert!(
1238 !dead.contains(&nid),
1239 "assert_no_tombstone_residue: tombstone {nid:?} still in shadow table"
1240 );
1241 }
1242 for nid in self.files.all_node_ids() {
1243 assert!(
1244 !dead.contains(&nid),
1245 "assert_no_tombstone_residue: tombstone {nid:?} still in per-file bucket"
1246 );
1247 }
1248 }
1249}
1250
1251impl Default for CodeGraph {
1252 fn default() -> Self {
1253 Self::new()
1254 }
1255}
1256
1257impl fmt::Debug for CodeGraph {
1258 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1259 f.debug_struct("CodeGraph")
1260 .field("nodes", &self.nodes.len())
1261 .field("epoch", &self.epoch)
1262 .finish_non_exhaustive()
1263 }
1264}
1265
1266impl GraphMemorySize for CodeGraph {
1267 /// Estimates the total heap bytes owned by this `CodeGraph`.
1268 ///
1269 /// Sums heap usage across every component the graph owns: node arena,
1270 /// bidirectional edge store (forward + reverse CSR + tombstones + delta
1271 /// buffer), string interner, file registry, auxiliary indices, sparse
1272 /// macro/classpath metadata, provenance stores, and the per-language
1273 /// confidence map. Used by the `sqryd` daemon's admission controller
1274 /// and workspace retention reaper to enforce memory budgets.
1275 fn heap_bytes(&self) -> usize {
1276 let mut confidence_bytes = self.confidence.capacity()
1277 * (std::mem::size_of::<String>()
1278 + std::mem::size_of::<ConfidenceMetadata>()
1279 + HASHMAP_ENTRY_OVERHEAD);
1280 for (key, meta) in &self.confidence {
1281 confidence_bytes += key.capacity();
1282 // `ConfidenceMetadata.limitations` / `unavailable_features` are
1283 // `Vec<String>`; charge their spill and inner String payloads.
1284 confidence_bytes += meta.limitations.capacity() * std::mem::size_of::<String>();
1285 for s in &meta.limitations {
1286 confidence_bytes += s.capacity();
1287 }
1288 confidence_bytes +=
1289 meta.unavailable_features.capacity() * std::mem::size_of::<String>();
1290 for s in &meta.unavailable_features {
1291 confidence_bytes += s.capacity();
1292 }
1293 }
1294
1295 self.nodes.heap_bytes()
1296 + self.edges.heap_bytes()
1297 + self.strings.heap_bytes()
1298 + self.files.heap_bytes()
1299 + self.indices.heap_bytes()
1300 + self.macro_metadata.heap_bytes()
1301 + self.node_provenance.heap_bytes()
1302 + self.edge_provenance.heap_bytes()
1303 + confidence_bytes
1304 + self.file_segments.capacity()
1305 * std::mem::size_of::<Option<crate::graph::unified::storage::segment::FileSegment>>(
1306 )
1307 }
1308}
1309
1310/// Thread-safe wrapper for `CodeGraph` with epoch versioning.
1311///
1312/// `ConcurrentCodeGraph` provides MVCC-style concurrency:
1313/// - Multiple readers can access the graph simultaneously
1314/// - Only one writer can hold the lock at a time
1315/// - Each write operation increments the epoch for cursor invalidation
1316///
1317/// # Design
1318///
1319/// The wrapper uses `parking_lot::RwLock` for efficient locking:
1320/// - Fair scheduling prevents writer starvation
1321/// - No poisoning (unlike `std::sync::RwLock`)
1322/// - Faster lock/unlock operations
1323///
1324/// # Phase 2 binding-plane access
1325///
1326/// Use the three-line snapshot pattern to access `BindingPlane`:
1327///
1328/// ```rust,ignore
1329/// let read_guard = concurrent.read();
1330/// let snapshot = read_guard.snapshot();
1331/// let plane = snapshot.binding_plane();
1332/// let resolution = plane.resolve(&query);
1333/// ```
1334///
1335/// The explicit snapshot handle makes the MVCC lifetime visible at the call
1336/// site. The full Phase 2 scope/alias/shadow and witness-bearing resolution
1337/// API is exposed through `BindingPlane`.
1338///
1339/// # Usage
1340///
1341/// ```rust
1342/// use sqry_core::graph::unified::concurrent::ConcurrentCodeGraph;
1343///
1344/// let graph = ConcurrentCodeGraph::new();
1345///
1346/// // Read access (multiple readers allowed)
1347/// {
1348/// let guard = graph.read();
1349/// let _nodes = guard.nodes();
1350/// }
1351///
1352/// // Write access (exclusive)
1353/// {
1354/// let mut guard = graph.write();
1355/// let _nodes = guard.nodes_mut();
1356/// }
1357///
1358/// // Snapshot for long queries
1359/// let snapshot = graph.snapshot();
1360/// ```
1361pub struct ConcurrentCodeGraph {
1362 /// The underlying code graph protected by a read-write lock.
1363 inner: RwLock<CodeGraph>,
1364 /// Global epoch counter for cursor validation.
1365 epoch: AtomicU64,
1366}
1367
1368impl ConcurrentCodeGraph {
1369 /// Creates a new empty `ConcurrentCodeGraph`.
1370 #[must_use]
1371 pub fn new() -> Self {
1372 Self {
1373 inner: RwLock::new(CodeGraph::new()),
1374 epoch: AtomicU64::new(0),
1375 }
1376 }
1377
1378 /// Creates a `ConcurrentCodeGraph` from an existing `CodeGraph`.
1379 #[must_use]
1380 pub fn from_graph(graph: CodeGraph) -> Self {
1381 let epoch = graph.epoch();
1382 Self {
1383 inner: RwLock::new(graph),
1384 epoch: AtomicU64::new(epoch),
1385 }
1386 }
1387
1388 /// Acquires a read lock on the graph.
1389 ///
1390 /// Multiple readers can hold the lock simultaneously.
1391 /// This does not increment the epoch.
1392 #[inline]
1393 pub fn read(&self) -> RwLockReadGuard<'_, CodeGraph> {
1394 self.inner.read()
1395 }
1396
1397 /// Acquires a write lock on the graph.
1398 ///
1399 /// Only one writer can hold the lock at a time.
1400 /// This increments the global epoch counter.
1401 #[inline]
1402 pub fn write(&self) -> RwLockWriteGuard<'_, CodeGraph> {
1403 // Increment the global epoch
1404 self.epoch.fetch_add(1, Ordering::SeqCst);
1405 let mut guard = self.inner.write();
1406 // Sync the inner graph's epoch with the global epoch
1407 guard.set_epoch(self.epoch.load(Ordering::SeqCst));
1408 guard
1409 }
1410
1411 /// Returns the current global epoch.
1412 ///
1413 /// This can be used to detect if the graph has been modified
1414 /// since a previous operation (cursor invalidation).
1415 #[inline]
1416 #[must_use]
1417 pub fn epoch(&self) -> u64 {
1418 self.epoch.load(Ordering::SeqCst)
1419 }
1420
1421 /// Creates a cheap snapshot of the graph.
1422 ///
1423 /// This acquires a brief read lock to clone the Arc references.
1424 /// The snapshot is isolated from future mutations.
1425 #[must_use]
1426 pub fn snapshot(&self) -> GraphSnapshot {
1427 self.inner.read().snapshot()
1428 }
1429
1430 // ------------------------------------------------------------------
1431 // Phase 1 fact-layer provenance convenience accessors.
1432 // These acquire a brief read lock, matching the snapshot() pattern.
1433 // ------------------------------------------------------------------
1434
1435 /// Returns the monotonic fact-layer epoch from the underlying graph.
1436 #[must_use]
1437 pub fn fact_epoch(&self) -> u64 {
1438 self.inner.read().fact_epoch()
1439 }
1440
1441 /// Looks up node provenance by `NodeId` (acquires a brief read lock).
1442 #[must_use]
1443 pub fn node_provenance(
1444 &self,
1445 id: crate::graph::unified::node::id::NodeId,
1446 ) -> Option<NodeProvenance> {
1447 self.inner.read().node_provenance(id).copied()
1448 }
1449
1450 /// Looks up edge provenance by `EdgeId` (acquires a brief read lock).
1451 #[must_use]
1452 pub fn edge_provenance(
1453 &self,
1454 id: crate::graph::unified::edge::id::EdgeId,
1455 ) -> Option<EdgeProvenance> {
1456 self.inner.read().edge_provenance(id).copied()
1457 }
1458
1459 /// Returns a file provenance view (acquires a brief read lock).
1460 ///
1461 /// Returns an owned copy since the borrow cannot outlive the lock guard.
1462 #[must_use]
1463 pub fn file_provenance(
1464 &self,
1465 id: crate::graph::unified::file::id::FileId,
1466 ) -> Option<OwnedFileProvenanceView> {
1467 let guard = self.inner.read();
1468 guard.file_provenance(id).map(|v| OwnedFileProvenanceView {
1469 content_hash: *v.content_hash,
1470 indexed_at: v.indexed_at,
1471 source_uri: v.source_uri,
1472 is_external: v.is_external,
1473 })
1474 }
1475
1476 // ------------------------------------------------------------------
1477 // Phase 2 binding-plane accessors (P2U03).
1478 // ------------------------------------------------------------------
1479
1480 /// Returns the scope arena from the underlying graph (acquires a brief
1481 /// read lock).
1482 ///
1483 /// Returns an `Arc` clone so the caller does not hold the lock beyond
1484 /// this call site.
1485 #[must_use]
1486 pub fn scope_arena(&self) -> Arc<ScopeArena> {
1487 Arc::clone(&self.inner.read().scope_arena)
1488 }
1489
1490 /// Returns the alias table from the underlying graph (acquires a brief
1491 /// read lock).
1492 ///
1493 /// Returns an `Arc` clone so the caller does not hold the lock beyond
1494 /// this call site.
1495 #[must_use]
1496 pub fn alias_table(&self) -> Arc<AliasTable> {
1497 Arc::clone(&self.inner.read().alias_table)
1498 }
1499
1500 /// Returns the shadow table from the underlying graph (acquires a brief
1501 /// read lock).
1502 ///
1503 /// Returns an `Arc` clone so the caller does not hold the lock beyond
1504 /// this call site.
1505 #[must_use]
1506 pub fn shadow_table(&self) -> Arc<ShadowTable> {
1507 Arc::clone(&self.inner.read().shadow_table)
1508 }
1509
1510 /// Returns the scope provenance store from the underlying graph (acquires
1511 /// a brief read lock).
1512 ///
1513 /// Returns an `Arc` clone so the caller does not hold the lock beyond
1514 /// this call site.
1515 #[must_use]
1516 pub fn scope_provenance_store(&self) -> Arc<ScopeProvenanceStore> {
1517 Arc::clone(&self.inner.read().scope_provenance_store)
1518 }
1519
1520 /// Looks up scope provenance by `ScopeId` (acquires a brief read lock).
1521 ///
1522 /// Returns an owned copy since the borrow cannot outlive the lock guard.
1523 #[must_use]
1524 pub fn scope_provenance(&self, id: ScopeId) -> Option<ScopeProvenance> {
1525 self.inner.read().scope_provenance(id).cloned()
1526 }
1527
1528 /// Looks up the live `ScopeId` for a stable scope identity (acquires a
1529 /// brief read lock).
1530 ///
1531 /// Returns `None` if no provenance record is registered for that stable id.
1532 #[must_use]
1533 pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
1534 self.inner.read().scope_by_stable_id(stable)
1535 }
1536
1537 /// Returns the file segment table from the underlying graph (acquires a
1538 /// brief read lock).
1539 #[must_use]
1540 pub fn file_segments(&self) -> Arc<FileSegmentTable> {
1541 Arc::clone(&self.inner.read().file_segments)
1542 }
1543
1544 /// Attempts to acquire a read lock without blocking.
1545 ///
1546 /// Returns `None` if the lock is currently held exclusively.
1547 #[inline]
1548 #[must_use]
1549 pub fn try_read(&self) -> Option<RwLockReadGuard<'_, CodeGraph>> {
1550 self.inner.try_read()
1551 }
1552
1553 /// Attempts to acquire a write lock without blocking.
1554 ///
1555 /// Returns `None` if the lock is currently held by another thread.
1556 /// If successful, increments the epoch.
1557 #[inline]
1558 pub fn try_write(&self) -> Option<RwLockWriteGuard<'_, CodeGraph>> {
1559 self.inner.try_write().map(|mut guard| {
1560 self.epoch.fetch_add(1, Ordering::SeqCst);
1561 guard.set_epoch(self.epoch.load(Ordering::SeqCst));
1562 guard
1563 })
1564 }
1565}
1566
1567impl Default for ConcurrentCodeGraph {
1568 fn default() -> Self {
1569 Self::new()
1570 }
1571}
1572
1573impl fmt::Debug for ConcurrentCodeGraph {
1574 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1575 f.debug_struct("ConcurrentCodeGraph")
1576 .field("epoch", &self.epoch.load(Ordering::SeqCst))
1577 .finish_non_exhaustive()
1578 }
1579}
1580
1581/// Owned copy of file provenance, returned by [`ConcurrentCodeGraph::file_provenance`]
1582/// because the borrowed [`FileProvenanceView`] cannot outlive the read-lock guard.
1583#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1584pub struct OwnedFileProvenanceView {
1585 /// SHA-256 of the on-disk file bytes (owned copy).
1586 pub content_hash: [u8; 32],
1587 /// Unix-epoch seconds at which this file was registered.
1588 pub indexed_at: u64,
1589 /// Optional interned physical origin URI.
1590 pub source_uri: Option<StringId>,
1591 /// Whether this file originates from an external source.
1592 pub is_external: bool,
1593}
1594
1595/// Immutable snapshot of a `CodeGraph` for long-running queries.
1596///
1597/// `GraphSnapshot` holds Arc references to the graph components,
1598/// providing a consistent view that is isolated from concurrent mutations.
1599///
1600/// # Design
1601///
1602/// Snapshots are created via `CodeGraph::snapshot()` or
1603/// `ConcurrentCodeGraph::snapshot()`. They are:
1604///
1605/// - **Immutable**: No mutation methods available
1606/// - **Isolated**: Independent of future graph mutations
1607/// - **Cheap**: Only Arc clones, no data copying
1608/// - **Self-contained**: Can outlive the original graph/lock
1609///
1610/// # Usage
1611///
1612/// ```rust
1613/// use sqry_core::graph::unified::concurrent::{ConcurrentCodeGraph, GraphSnapshot};
1614///
1615/// let graph = ConcurrentCodeGraph::new();
1616///
1617/// // Create snapshot for a long query
1618/// let snapshot: GraphSnapshot = graph.snapshot();
1619///
1620/// // Snapshot can be used independently
1621/// let _epoch = snapshot.epoch();
1622/// ```
1623#[derive(Clone)]
1624pub struct GraphSnapshot {
1625 /// Node storage snapshot.
1626 nodes: Arc<NodeArena>,
1627 /// Edge storage snapshot.
1628 edges: Arc<BidirectionalEdgeStore>,
1629 /// String interner snapshot.
1630 strings: Arc<StringInterner>,
1631 /// File registry snapshot.
1632 files: Arc<FileRegistry>,
1633 /// Auxiliary indices snapshot.
1634 indices: Arc<AuxiliaryIndices>,
1635 /// Sparse macro boundary metadata snapshot.
1636 macro_metadata: Arc<NodeMetadataStore>,
1637 /// Dense node provenance snapshot (Phase 1).
1638 node_provenance: Arc<NodeProvenanceStore>,
1639 /// Dense edge provenance snapshot (Phase 1).
1640 edge_provenance: Arc<EdgeProvenanceStore>,
1641 /// Monotonic fact-layer epoch at snapshot time.
1642 fact_epoch: u64,
1643 /// Epoch at snapshot time (for cursor validation).
1644 epoch: u64,
1645 /// Phase 2 binding-plane scope arena snapshot (populated by Phase 4e).
1646 scope_arena: Arc<ScopeArena>,
1647 /// Phase 2 binding-plane alias table snapshot (populated by Phase 4e / P2U04).
1648 alias_table: Arc<AliasTable>,
1649 /// Phase 2 binding-plane shadow table snapshot (populated by Phase 4e / P2U05).
1650 shadow_table: Arc<ShadowTable>,
1651 /// Phase 2 binding-plane scope provenance store snapshot (populated by Phase 4e / P2U11).
1652 scope_provenance_store: Arc<ScopeProvenanceStore>,
1653 /// Phase 3 file segment table snapshot mapping `FileId` to node ranges.
1654 file_segments: Arc<FileSegmentTable>,
1655}
1656
1657impl GraphSnapshot {
1658 /// Returns a reference to the node arena.
1659 #[inline]
1660 #[must_use]
1661 pub fn nodes(&self) -> &NodeArena {
1662 &self.nodes
1663 }
1664
1665 /// Returns a reference to the bidirectional edge store.
1666 #[inline]
1667 #[must_use]
1668 pub fn edges(&self) -> &BidirectionalEdgeStore {
1669 &self.edges
1670 }
1671
1672 /// Returns a reference to the string interner.
1673 #[inline]
1674 #[must_use]
1675 pub fn strings(&self) -> &StringInterner {
1676 &self.strings
1677 }
1678
1679 /// Returns a reference to the file registry.
1680 #[inline]
1681 #[must_use]
1682 pub fn files(&self) -> &FileRegistry {
1683 &self.files
1684 }
1685
1686 /// Returns a reference to the auxiliary indices.
1687 #[inline]
1688 #[must_use]
1689 pub fn indices(&self) -> &AuxiliaryIndices {
1690 &self.indices
1691 }
1692
1693 /// Returns a reference to the macro boundary metadata store.
1694 #[inline]
1695 #[must_use]
1696 pub fn macro_metadata(&self) -> &NodeMetadataStore {
1697 &self.macro_metadata
1698 }
1699
1700 // ------------------------------------------------------------------
1701 // Phase 1 fact-layer provenance accessors (P1U09).
1702 // ------------------------------------------------------------------
1703
1704 /// Returns the monotonic fact-layer epoch.
1705 #[inline]
1706 #[must_use]
1707 pub fn fact_epoch(&self) -> u64 {
1708 self.fact_epoch
1709 }
1710
1711 /// Looks up node provenance by `NodeId`.
1712 #[inline]
1713 #[must_use]
1714 pub fn node_provenance(
1715 &self,
1716 id: crate::graph::unified::node::id::NodeId,
1717 ) -> Option<&NodeProvenance> {
1718 self.node_provenance.lookup(id)
1719 }
1720
1721 /// Looks up edge provenance by `EdgeId`.
1722 #[inline]
1723 #[must_use]
1724 pub fn edge_provenance(
1725 &self,
1726 id: crate::graph::unified::edge::id::EdgeId,
1727 ) -> Option<&EdgeProvenance> {
1728 self.edge_provenance.lookup(id)
1729 }
1730
1731 /// Returns a borrowed provenance view for a file.
1732 #[inline]
1733 #[must_use]
1734 pub fn file_provenance(
1735 &self,
1736 id: crate::graph::unified::file::id::FileId,
1737 ) -> Option<FileProvenanceView<'_>> {
1738 self.files.file_provenance(id)
1739 }
1740
1741 // ------------------------------------------------------------------
1742 // Phase 2 binding-plane accessors (P2U03).
1743 // ------------------------------------------------------------------
1744
1745 /// Returns a reference to the scope arena at snapshot time.
1746 ///
1747 /// Participates in MVCC: the snapshot holds an `Arc` clone of the arena
1748 /// as it existed when `snapshot()` was called. Subsequent calls to
1749 /// `set_scope_arena` on the source `CodeGraph` do not affect this view.
1750 #[inline]
1751 #[must_use]
1752 pub fn scope_arena(&self) -> &ScopeArena {
1753 &self.scope_arena
1754 }
1755
1756 /// Returns a reference to the alias table at snapshot time.
1757 ///
1758 /// Participates in MVCC: the snapshot holds an `Arc` clone of the table
1759 /// as it existed when `snapshot()` was called. Subsequent calls to
1760 /// `set_alias_table` on the source `CodeGraph` do not affect this view.
1761 #[inline]
1762 #[must_use]
1763 pub fn alias_table(&self) -> &AliasTable {
1764 &self.alias_table
1765 }
1766
1767 /// Returns a reference to the shadow table at snapshot time.
1768 ///
1769 /// Participates in MVCC: the snapshot holds an `Arc` clone of the table
1770 /// as it existed when `snapshot()` was called. Subsequent calls to
1771 /// `set_shadow_table` on the source `CodeGraph` do not affect this view.
1772 #[inline]
1773 #[must_use]
1774 pub fn shadow_table(&self) -> &ShadowTable {
1775 &self.shadow_table
1776 }
1777
1778 /// Returns a reference to the scope provenance store at snapshot time.
1779 ///
1780 /// Participates in MVCC: the snapshot holds an `Arc` clone of the store
1781 /// as it existed when `snapshot()` was called. Subsequent calls to
1782 /// `set_scope_provenance_store` on the source `CodeGraph` do not affect
1783 /// this view.
1784 #[inline]
1785 #[must_use]
1786 pub fn scope_provenance_store(&self) -> &ScopeProvenanceStore {
1787 &self.scope_provenance_store
1788 }
1789
1790 /// Looks up scope provenance by `ScopeId` at snapshot time.
1791 ///
1792 /// Returns `None` if the slot is out of range, vacant, or the stored
1793 /// generation does not match (stale handle).
1794 #[inline]
1795 #[must_use]
1796 pub fn scope_provenance(&self, id: ScopeId) -> Option<&ScopeProvenance> {
1797 self.scope_provenance_store.lookup(id)
1798 }
1799
1800 /// Looks up the live `ScopeId` for a stable scope identity at snapshot time.
1801 ///
1802 /// Returns `None` if no provenance record is registered for that stable id.
1803 #[inline]
1804 #[must_use]
1805 pub fn scope_by_stable_id(&self, stable: ScopeStableId) -> Option<ScopeId> {
1806 self.scope_provenance_store.scope_by_stable_id(stable)
1807 }
1808
1809 /// Returns a reference to the file segment table at snapshot time.
1810 #[inline]
1811 #[must_use]
1812 pub fn file_segments(&self) -> &FileSegmentTable {
1813 &self.file_segments
1814 }
1815
1816 /// Returns the epoch at which this snapshot was taken.
1817 ///
1818 /// This can be compared against the current graph epoch to
1819 /// detect if the graph has changed since the snapshot.
1820 #[inline]
1821 #[must_use]
1822 pub fn epoch(&self) -> u64 {
1823 self.epoch
1824 }
1825
1826 /// Returns `true` if this snapshot's epoch matches the given epoch.
1827 ///
1828 /// Use this to validate cursors before continuing pagination.
1829 #[inline]
1830 #[must_use]
1831 pub fn epoch_matches(&self, other_epoch: u64) -> bool {
1832 self.epoch == other_epoch
1833 }
1834
1835 // ------------------------------------------------------------------
1836 // Phase 2 binding-plane facade accessor (P2U07).
1837 // ------------------------------------------------------------------
1838
1839 /// Returns a [`BindingPlane`] facade borrowing this snapshot's lifetime.
1840 ///
1841 /// The facade is the stable Phase 2 public API for scope/alias/shadow
1842 /// queries and witness-bearing resolution. It provides a single entry
1843 /// point (`resolve`) that returns both a `BindingResult` and an ordered
1844 /// step trace in a `BindingResolution`.
1845 ///
1846 /// # MVCC note
1847 ///
1848 /// `BindingPlane<'_>` borrows from this snapshot, which is already an
1849 /// MVCC-consistent view of the graph at snapshot time. Callers from
1850 /// `CodeGraph` or `ConcurrentCodeGraph` should follow the two-line
1851 /// pattern so the snapshot lifetime is explicit:
1852 ///
1853 /// ```rust,ignore
1854 /// // CodeGraph caller:
1855 /// let snapshot = graph.snapshot();
1856 /// let plane = snapshot.binding_plane();
1857 ///
1858 /// // ConcurrentCodeGraph caller:
1859 /// let read_guard = concurrent.read();
1860 /// let snapshot = read_guard.snapshot();
1861 /// let plane = snapshot.binding_plane();
1862 /// ```
1863 #[inline]
1864 #[must_use]
1865 pub fn binding_plane(&self) -> crate::graph::unified::bind::plane::BindingPlane<'_> {
1866 crate::graph::unified::bind::plane::BindingPlane::new(self)
1867 }
1868
1869 // ============================================================================
1870 // Query Methods
1871 // ============================================================================
1872
1873 /// Finds nodes matching a pattern.
1874 ///
1875 /// Performs a simple substring match on node names and qualified names.
1876 /// Returns all matching node IDs.
1877 ///
1878 /// **Synthetic suppression (C_SUPPRESS):** synthetic placeholder
1879 /// nodes — internal scaffolding the language plugins emit for
1880 /// binding-plane and scope analysis (e.g. the Go plugin's
1881 /// `<field:operand.field>` field-access shadows and the
1882 /// `<ident>@<offset>` per-binding-site Variable nodes from the
1883 /// local-scope resolver) — are filtered out by default. Internal
1884 /// callers that need to reach these nodes (binding plane, scope /
1885 /// alias / shadow analysis) use
1886 /// [`Self::find_by_pattern_with_options`] with
1887 /// `include_synthetic = true`.
1888 ///
1889 /// # Performance
1890 ///
1891 /// Optimized to iterate over unique strings in the interner (smaller set)
1892 /// rather than all nodes in the arena.
1893 ///
1894 /// # Arguments
1895 ///
1896 /// * `pattern` - The pattern to match (substring search)
1897 ///
1898 /// # Returns
1899 ///
1900 /// A vector of `NodeIds` for all matching nodes (synthetic
1901 /// placeholders excluded).
1902 #[must_use]
1903 pub fn find_by_pattern(&self, pattern: &str) -> Vec<crate::graph::unified::node::NodeId> {
1904 self.find_by_pattern_with_options(pattern, false)
1905 }
1906
1907 /// Finds nodes matching a pattern with explicit control over synthetic
1908 /// placeholder visibility.
1909 ///
1910 /// `include_synthetic = false` is the default surface used by every
1911 /// user-facing caller (CLI `search`, MCP `semantic_search` /
1912 /// `pattern_search` / `relation_query`, etc.). Synthetic
1913 /// placeholders are suppressed via two parallel checks that must
1914 /// agree:
1915 ///
1916 /// 1. The authoritative `NodeMetadata::Synthetic` bit on the
1917 /// metadata store
1918 /// ([`crate::graph::unified::storage::metadata::NodeMetadataStore::is_synthetic`]).
1919 /// 2. The structural name-shape fallback
1920 /// ([`crate::graph::unified::storage::arena::NodeEntry::is_synthetic_placeholder_name`])
1921 /// for V10 snapshots written before the synthetic bit existed
1922 /// and for cross-file unification losers that retained their
1923 /// name but lost their metadata entry.
1924 ///
1925 /// Either check matching is sufficient to suppress the node. The
1926 /// design lives in `docs/development/public-issue-triage/`
1927 /// under the C_SUPPRESS unit; see also the rationale in
1928 /// [`crate::graph::unified::storage::metadata::NodeMetadata::Synthetic`].
1929 ///
1930 /// `include_synthetic = true` is **internal-only**. The binding
1931 /// plane, scope resolver, and rebuild's coverage gate use this
1932 /// path to reach synthetic nodes for their structural integrity
1933 /// checks. **No CLI / MCP surface should ever pass `true`.**
1934 #[must_use]
1935 pub fn find_by_pattern_with_options(
1936 &self,
1937 pattern: &str,
1938 include_synthetic: bool,
1939 ) -> Vec<crate::graph::unified::node::NodeId> {
1940 let mut matches = Vec::new();
1941
1942 // 1. Scan unique strings in interner for matches
1943 for (str_id, s) in self.strings.iter() {
1944 if s.contains(pattern) {
1945 // 2. If string matches, look up all nodes with this name
1946 // Check qualified name index
1947 matches.extend_from_slice(self.indices.by_qualified_name(str_id));
1948 // Check simple name index
1949 matches.extend_from_slice(self.indices.by_name(str_id));
1950 }
1951 }
1952
1953 // Deduplicate matches (a node might match both qualified and simple name)
1954 matches.sort_unstable();
1955 matches.dedup();
1956
1957 if !include_synthetic {
1958 matches.retain(|&node_id| !self.is_node_synthetic(node_id));
1959 }
1960
1961 matches
1962 }
1963
1964 /// Finds nodes whose interned simple **or** qualified name equals
1965 /// `name` byte-for-byte (case-sensitive).
1966 ///
1967 /// This is the canonical surface for **exact-name** lookups —
1968 /// shared by the CLI `--exact <pattern>` shorthand
1969 /// (`sqry-cli/src/commands/search.rs::run_regular_search`) and the
1970 /// structural query planner's `name:` predicate
1971 /// (`sqry-db/src/planner/parse.rs`,
1972 /// `sqry-db/src/planner/execute.rs`). Both surfaces are
1973 /// contract-bound (DAG `B1_ALIGN`) to return the same set against
1974 /// any fixture: the CLI calls this method directly, while the
1975 /// planner uses the same interner + by-name index pair internally
1976 /// when scanning, then applies the same synthetic filter.
1977 ///
1978 /// **Synthetic suppression.** Synthetic placeholder nodes
1979 /// (Go-plugin `<field:operand.field>` shadows and
1980 /// `<ident>@<offset>` per-binding-site Variables; see
1981 /// [`Self::find_by_pattern_with_options`] for the full taxonomy)
1982 /// are excluded via [`Self::is_node_synthetic`]. There is **no**
1983 /// `include_synthetic = true` variant for the exact-match surface
1984 /// because the synthetic name shapes the structural fallback
1985 /// recognises (`<…>`, `…@<offset>`) cannot equal a user-typed
1986 /// name byte-for-byte; the metadata-bit channel is the only
1987 /// realistic leak vector and it is suppressed unconditionally.
1988 ///
1989 /// # Performance
1990 ///
1991 /// `O(1)` interner lookup + `O(matches)` filter. If `name` is not
1992 /// interned the result is empty without scanning any nodes.
1993 ///
1994 /// # Arguments
1995 ///
1996 /// * `name` - The exact name to look up (no glob, no regex).
1997 ///
1998 /// # Returns
1999 ///
2000 /// Sorted, deduplicated `NodeId`s for every non-synthetic node
2001 /// whose `entry.name` or `entry.qualified_name` equals `name`.
2002 /// Returns an empty vector when `name` is not interned (i.e. no
2003 /// node could possibly carry that name).
2004 #[must_use]
2005 pub fn find_by_exact_name(&self, name: &str) -> Vec<crate::graph::unified::node::NodeId> {
2006 // Resolve the input to a StringId. If the interner doesn't know
2007 // this string, no node could possibly carry it as a name.
2008 let Some(str_id) = self.strings.get(name) else {
2009 return Vec::new();
2010 };
2011
2012 // Pull the by-name and by-qualified-name index slices for that
2013 // StringId. Both indices are pre-built and slice-shaped.
2014 let mut matches: Vec<crate::graph::unified::node::NodeId> = Vec::new();
2015 matches.extend_from_slice(self.indices.by_name(str_id));
2016 matches.extend_from_slice(self.indices.by_qualified_name(str_id));
2017
2018 matches.sort_unstable();
2019 matches.dedup();
2020
2021 // Same default-off synthetic filter as `find_by_pattern`. There
2022 // is no `include_synthetic` toggle on the exact-match surface
2023 // — see the doc comment above for the rationale.
2024 matches.retain(|&node_id| !self.is_node_synthetic(node_id));
2025
2026 matches
2027 }
2028
2029 /// Returns `true` if the node should be treated as a synthetic
2030 /// placeholder for user-facing surfaces.
2031 ///
2032 /// Combines the metadata-store flag and the structural name-shape
2033 /// fallback (see [`Self::find_by_pattern_with_options`] for the
2034 /// full rationale). Returns `false` for missing nodes (an unknown
2035 /// `NodeId` is not "synthetic" — it is "not present").
2036 #[must_use]
2037 pub fn is_node_synthetic(&self, node_id: crate::graph::unified::node::NodeId) -> bool {
2038 // Authoritative check: metadata-store bit (NodeMetadata::Synthetic).
2039 if self.macro_metadata.is_synthetic(node_id) {
2040 return true;
2041 }
2042 // Structural fallback: name shape recognised as synthetic.
2043 // Required for V10 snapshots written before the bit existed,
2044 // for unification losers that lost their metadata entry, and
2045 // as defence-in-depth against future plugins forgetting to
2046 // flip the bit.
2047 let Some(entry) = self.nodes.get(node_id) else {
2048 return false;
2049 };
2050 if entry.is_unified_loser() {
2051 // Already invisible for other reasons; do not also flag as synthetic.
2052 return false;
2053 }
2054 let Some(name) = self.strings.resolve(entry.name) else {
2055 return false;
2056 };
2057 crate::graph::unified::storage::arena::NodeEntry::is_synthetic_placeholder_name(
2058 name.as_ref(),
2059 )
2060 }
2061
2062 /// Gets all callees of a node (functions called by this node).
2063 ///
2064 /// Queries the forward edge store for all Calls edges from this node.
2065 ///
2066 /// # Arguments
2067 ///
2068 /// * `node` - The node ID to query
2069 ///
2070 /// # Returns
2071 ///
2072 /// A vector of `NodeIds` representing functions called by this node.
2073 #[must_use]
2074 pub fn get_callees(
2075 &self,
2076 node: crate::graph::unified::node::NodeId,
2077 ) -> Vec<crate::graph::unified::node::NodeId> {
2078 use crate::graph::unified::edge::EdgeKind;
2079
2080 self.edges
2081 .edges_from(node)
2082 .into_iter()
2083 .filter(|edge| matches!(edge.kind, EdgeKind::Calls { .. }))
2084 .map(|edge| edge.target)
2085 .collect()
2086 }
2087
2088 /// Gets all callers of a node (functions that call this node).
2089 ///
2090 /// Queries the reverse edge store for all Calls edges to this node.
2091 ///
2092 /// # Arguments
2093 ///
2094 /// * `node` - The node ID to query
2095 ///
2096 /// # Returns
2097 ///
2098 /// A vector of `NodeIds` representing functions that call this node.
2099 #[must_use]
2100 pub fn get_callers(
2101 &self,
2102 node: crate::graph::unified::node::NodeId,
2103 ) -> Vec<crate::graph::unified::node::NodeId> {
2104 use crate::graph::unified::edge::EdgeKind;
2105
2106 self.edges
2107 .edges_to(node)
2108 .into_iter()
2109 .filter(|edge| matches!(edge.kind, EdgeKind::Calls { .. }))
2110 .map(|edge| edge.source)
2111 .collect()
2112 }
2113
2114 /// Iterates over all nodes in the graph.
2115 ///
2116 /// Returns an iterator yielding (`NodeId`, &`NodeEntry`) pairs for all
2117 /// occupied slots in the arena.
2118 ///
2119 /// # Returns
2120 ///
2121 /// An iterator over (`NodeId`, &`NodeEntry`) pairs.
2122 pub fn iter_nodes(
2123 &self,
2124 ) -> impl Iterator<
2125 Item = (
2126 crate::graph::unified::node::NodeId,
2127 &crate::graph::unified::storage::arena::NodeEntry,
2128 ),
2129 > {
2130 self.nodes.iter()
2131 }
2132
2133 /// Iterates over all edges in the graph.
2134 ///
2135 /// Returns an iterator yielding (source, target, `EdgeKind`) tuples for
2136 /// all edges in the forward edge store.
2137 ///
2138 /// # Returns
2139 ///
2140 /// An iterator over edge tuples.
2141 pub fn iter_edges(
2142 &self,
2143 ) -> impl Iterator<
2144 Item = (
2145 crate::graph::unified::node::NodeId,
2146 crate::graph::unified::node::NodeId,
2147 crate::graph::unified::edge::EdgeKind,
2148 ),
2149 > + '_ {
2150 // Iterate over all nodes in the arena and get their outgoing edges
2151 self.nodes.iter().flat_map(move |(node_id, _entry)| {
2152 // Get all edges from this node
2153 self.edges
2154 .edges_from(node_id)
2155 .into_iter()
2156 .map(move |edge| (node_id, edge.target, edge.kind))
2157 })
2158 }
2159
2160 /// Gets a node entry by ID.
2161 ///
2162 /// Returns a reference to the `NodeEntry` if the ID is valid, or None
2163 /// if the ID is invalid or stale.
2164 ///
2165 /// # Arguments
2166 ///
2167 /// * `id` - The node ID to look up
2168 ///
2169 /// # Returns
2170 ///
2171 /// A reference to the `NodeEntry`, or None if not found.
2172 #[must_use]
2173 pub fn get_node(
2174 &self,
2175 id: crate::graph::unified::node::NodeId,
2176 ) -> Option<&crate::graph::unified::storage::arena::NodeEntry> {
2177 self.nodes.get(id)
2178 }
2179}
2180
2181impl fmt::Debug for GraphSnapshot {
2182 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2183 f.debug_struct("GraphSnapshot")
2184 .field("nodes", &self.nodes.len())
2185 .field("epoch", &self.epoch)
2186 .finish_non_exhaustive()
2187 }
2188}
2189
2190/// Read-only accessor trait shared by [`CodeGraph`] and [`GraphSnapshot`].
2191///
2192/// This lets helpers that only *read* graph state (name-matching, relation
2193/// traversal, reference lookups) be written once and called from both the
2194/// live `CodeGraph` path in `sqry-core::query::executor::graph_eval` and
2195/// the snapshot-based path in `sqry-db::queries::*`. No mutation is exposed.
2196pub trait GraphAccess {
2197 /// Returns the node arena (read-only).
2198 fn nodes(&self) -> &NodeArena;
2199 /// Returns the bidirectional edge store (read-only).
2200 fn edges(&self) -> &BidirectionalEdgeStore;
2201 /// Returns the string interner (read-only).
2202 fn strings(&self) -> &StringInterner;
2203 /// Returns the file registry (read-only).
2204 fn files(&self) -> &FileRegistry;
2205 /// Returns the auxiliary indices (read-only).
2206 fn indices(&self) -> &AuxiliaryIndices;
2207}
2208
2209impl GraphAccess for CodeGraph {
2210 #[inline]
2211 fn nodes(&self) -> &NodeArena {
2212 CodeGraph::nodes(self)
2213 }
2214 #[inline]
2215 fn edges(&self) -> &BidirectionalEdgeStore {
2216 CodeGraph::edges(self)
2217 }
2218 #[inline]
2219 fn strings(&self) -> &StringInterner {
2220 CodeGraph::strings(self)
2221 }
2222 #[inline]
2223 fn files(&self) -> &FileRegistry {
2224 CodeGraph::files(self)
2225 }
2226 #[inline]
2227 fn indices(&self) -> &AuxiliaryIndices {
2228 CodeGraph::indices(self)
2229 }
2230}
2231
2232impl GraphAccess for GraphSnapshot {
2233 #[inline]
2234 fn nodes(&self) -> &NodeArena {
2235 GraphSnapshot::nodes(self)
2236 }
2237 #[inline]
2238 fn edges(&self) -> &BidirectionalEdgeStore {
2239 GraphSnapshot::edges(self)
2240 }
2241 #[inline]
2242 fn strings(&self) -> &StringInterner {
2243 GraphSnapshot::strings(self)
2244 }
2245 #[inline]
2246 fn files(&self) -> &FileRegistry {
2247 GraphSnapshot::files(self)
2248 }
2249 #[inline]
2250 fn indices(&self) -> &AuxiliaryIndices {
2251 GraphSnapshot::indices(self)
2252 }
2253}
2254
2255#[cfg(test)]
2256mod tests {
2257 use super::*;
2258 use crate::graph::unified::{
2259 FileScope, NodeId, ResolutionMode, SymbolCandidateOutcome, SymbolQuery,
2260 SymbolResolutionOutcome,
2261 };
2262
2263 fn resolve_symbol_strict(snapshot: &GraphSnapshot, symbol: &str) -> Option<NodeId> {
2264 match snapshot.resolve_symbol(&SymbolQuery {
2265 symbol,
2266 file_scope: FileScope::Any,
2267 mode: ResolutionMode::Strict,
2268 }) {
2269 SymbolResolutionOutcome::Resolved(node_id) => Some(node_id),
2270 SymbolResolutionOutcome::NotFound
2271 | SymbolResolutionOutcome::FileNotIndexed
2272 | SymbolResolutionOutcome::Ambiguous(_) => None,
2273 }
2274 }
2275
2276 fn candidate_nodes(snapshot: &GraphSnapshot, symbol: &str) -> Vec<NodeId> {
2277 match snapshot.find_symbol_candidates(&SymbolQuery {
2278 symbol,
2279 file_scope: FileScope::Any,
2280 mode: ResolutionMode::AllowSuffixCandidates,
2281 }) {
2282 SymbolCandidateOutcome::Candidates(candidates) => candidates,
2283 SymbolCandidateOutcome::NotFound | SymbolCandidateOutcome::FileNotIndexed => Vec::new(),
2284 }
2285 }
2286
2287 #[test]
2288 fn test_code_graph_new() {
2289 let graph = CodeGraph::new();
2290 assert_eq!(graph.epoch(), 0);
2291 assert_eq!(graph.nodes().len(), 0);
2292 }
2293
2294 #[test]
2295 fn test_code_graph_default() {
2296 let graph = CodeGraph::default();
2297 assert_eq!(graph.epoch(), 0);
2298 }
2299
2300 #[test]
2301 fn test_code_graph_snapshot() {
2302 let graph = CodeGraph::new();
2303 let snapshot = graph.snapshot();
2304 assert_eq!(snapshot.epoch(), 0);
2305 assert_eq!(snapshot.nodes().len(), 0);
2306 }
2307
2308 #[test]
2309 fn test_code_graph_bump_epoch() {
2310 let mut graph = CodeGraph::new();
2311 assert_eq!(graph.epoch(), 0);
2312 assert_eq!(graph.bump_epoch(), 1);
2313 assert_eq!(graph.epoch(), 1);
2314 assert_eq!(graph.bump_epoch(), 2);
2315 assert_eq!(graph.epoch(), 2);
2316 }
2317
2318 #[test]
2319 fn test_code_graph_set_epoch() {
2320 let mut graph = CodeGraph::new();
2321 graph.set_epoch(42);
2322 assert_eq!(graph.epoch(), 42);
2323 }
2324
2325 #[test]
2326 fn test_code_graph_from_components() {
2327 let nodes = NodeArena::new();
2328 let edges = BidirectionalEdgeStore::new();
2329 let strings = StringInterner::new();
2330 let files = FileRegistry::new();
2331 let indices = AuxiliaryIndices::new();
2332 let macro_metadata = NodeMetadataStore::new();
2333
2334 let graph =
2335 CodeGraph::from_components(nodes, edges, strings, files, indices, macro_metadata);
2336 assert_eq!(graph.epoch(), 0);
2337 }
2338
2339 #[test]
2340 fn test_code_graph_mut_accessors() {
2341 let mut graph = CodeGraph::new();
2342
2343 // Access mutable references - should not panic
2344 let _nodes = graph.nodes_mut();
2345 let _edges = graph.edges_mut();
2346 let _strings = graph.strings_mut();
2347 let _files = graph.files_mut();
2348 let _indices = graph.indices_mut();
2349 }
2350
2351 #[test]
2352 fn test_code_graph_snapshot_isolation() {
2353 let mut graph = CodeGraph::new();
2354 let snapshot1 = graph.snapshot();
2355
2356 // Mutate the graph
2357 graph.bump_epoch();
2358
2359 let snapshot2 = graph.snapshot();
2360
2361 // Snapshots should have different epochs
2362 assert_eq!(snapshot1.epoch(), 0);
2363 assert_eq!(snapshot2.epoch(), 1);
2364 }
2365
2366 #[test]
2367 fn test_code_graph_debug() {
2368 let graph = CodeGraph::new();
2369 let debug_str = format!("{graph:?}");
2370 assert!(debug_str.contains("CodeGraph"));
2371 assert!(debug_str.contains("epoch"));
2372 }
2373
2374 /// Exact-byte regression for the iter2 fix of CodeGraph.confidence
2375 /// accounting: adding a ConfidenceMetadata entry with known inner
2376 /// Vec<String> capacities must increase heap_bytes() by exactly the sum
2377 /// of those capacities plus Vec<String> slot overhead.
2378 #[test]
2379 fn test_codegraph_heap_bytes_counts_confidence_inner_strings() {
2380 let mut graph = CodeGraph::new();
2381 // Seed one confidence entry so the HashMap has non-zero capacity;
2382 // reserve slack so the next insert cannot rehash.
2383 graph.set_confidence({
2384 let mut m = HashMap::with_capacity(8);
2385 m.insert("seed".to_string(), ConfidenceMetadata::default());
2386 m
2387 });
2388 let before = graph.heap_bytes();
2389 let before_cap = graph.confidence.capacity();
2390
2391 let lim1 = String::from("no type inference");
2392 let lim2 = String::from("no generic specialization");
2393 let feat1 = String::from("rust-analyzer");
2394 let l1 = lim1.capacity();
2395 let l2 = lim2.capacity();
2396 let f1 = feat1.capacity();
2397
2398 let limitations = vec![lim1, lim2];
2399 let lim_vec_cap = limitations.capacity();
2400
2401 let unavailable_features = vec![feat1];
2402 let feat_vec_cap = unavailable_features.capacity();
2403
2404 let key = String::from("rust");
2405 let key_cap = key.capacity();
2406 graph.confidence.insert(
2407 key,
2408 ConfidenceMetadata {
2409 limitations,
2410 unavailable_features,
2411 ..Default::default()
2412 },
2413 );
2414 assert_eq!(
2415 graph.confidence.capacity(),
2416 before_cap,
2417 "prerequisite: confidence HashMap must not rehash during the test insert",
2418 );
2419
2420 let after = graph.heap_bytes();
2421 let expected_inner = key_cap
2422 + lim_vec_cap * std::mem::size_of::<String>()
2423 + l1
2424 + l2
2425 + feat_vec_cap * std::mem::size_of::<String>()
2426 + f1;
2427 assert_eq!(
2428 after - before,
2429 expected_inner,
2430 "CodeGraph::heap_bytes must count ConfidenceMetadata inner Vec<String> capacity exactly",
2431 );
2432 }
2433
2434 #[test]
2435 fn test_codegraph_heap_bytes_grows_with_content() {
2436 use crate::graph::unified::node::NodeKind;
2437 use crate::graph::unified::storage::arena::NodeEntry;
2438 use std::path::Path;
2439
2440 // An empty graph reports some heap bytes (FileRegistry seeds index 0
2441 // with `vec![None]`, HashMaps have non-zero base capacity once touched,
2442 // etc.) but the value must be finite and well under the 100 MB cap.
2443 let empty = CodeGraph::new();
2444 let empty_bytes = empty.heap_bytes();
2445 assert!(
2446 empty_bytes < 100 * 1024 * 1024,
2447 "empty graph heap_bytes should be <100 MiB, got {empty_bytes}"
2448 );
2449
2450 let mut graph = CodeGraph::new();
2451 for i in 0..32u32 {
2452 let name = format!("sym_{i}");
2453 let qual = format!("module::sym_{i}");
2454 let file = format!("file_{i}.rs");
2455
2456 let name_id = graph.strings_mut().intern(&name).unwrap();
2457 let qual_id = graph.strings_mut().intern(&qual).unwrap();
2458 let file_id = graph.files_mut().register(Path::new(&file)).unwrap();
2459
2460 let entry =
2461 NodeEntry::new(NodeKind::Function, name_id, file_id).with_qualified_name(qual_id);
2462 let node_id = graph.nodes_mut().alloc(entry).unwrap();
2463 graph
2464 .indices_mut()
2465 .add(node_id, NodeKind::Function, name_id, Some(qual_id), file_id);
2466 }
2467
2468 let populated_bytes = graph.heap_bytes();
2469 assert!(
2470 populated_bytes > 0,
2471 "populated graph should report nonzero heap bytes"
2472 );
2473 assert!(
2474 populated_bytes > empty_bytes,
2475 "populated graph ({populated_bytes}) should exceed empty graph ({empty_bytes})"
2476 );
2477 assert!(
2478 populated_bytes < 100 * 1024 * 1024,
2479 "test graph heap_bytes should be <100 MiB, got {populated_bytes}"
2480 );
2481 }
2482
2483 #[test]
2484 fn test_concurrent_code_graph_new() {
2485 let graph = ConcurrentCodeGraph::new();
2486 assert_eq!(graph.epoch(), 0);
2487 }
2488
2489 #[test]
2490 fn test_concurrent_code_graph_default() {
2491 let graph = ConcurrentCodeGraph::default();
2492 assert_eq!(graph.epoch(), 0);
2493 }
2494
2495 #[test]
2496 fn test_concurrent_code_graph_from_graph() {
2497 let mut inner = CodeGraph::new();
2498 inner.set_epoch(10);
2499 let graph = ConcurrentCodeGraph::from_graph(inner);
2500 assert_eq!(graph.epoch(), 10);
2501 }
2502
2503 #[test]
2504 fn test_concurrent_code_graph_read() {
2505 let graph = ConcurrentCodeGraph::new();
2506 let guard = graph.read();
2507 assert_eq!(guard.epoch(), 0);
2508 assert_eq!(guard.nodes().len(), 0);
2509 }
2510
2511 #[test]
2512 fn test_concurrent_code_graph_write_increments_epoch() {
2513 let graph = ConcurrentCodeGraph::new();
2514 assert_eq!(graph.epoch(), 0);
2515
2516 {
2517 let guard = graph.write();
2518 assert_eq!(guard.epoch(), 1);
2519 }
2520
2521 assert_eq!(graph.epoch(), 1);
2522
2523 {
2524 let _guard = graph.write();
2525 }
2526
2527 assert_eq!(graph.epoch(), 2);
2528 }
2529
2530 #[test]
2531 fn test_concurrent_code_graph_snapshot() {
2532 let graph = ConcurrentCodeGraph::new();
2533
2534 {
2535 let _guard = graph.write();
2536 }
2537
2538 let snapshot = graph.snapshot();
2539 assert_eq!(snapshot.epoch(), 1);
2540 }
2541
2542 #[test]
2543 fn test_concurrent_code_graph_try_read() {
2544 let graph = ConcurrentCodeGraph::new();
2545 let guard = graph.try_read();
2546 assert!(guard.is_some());
2547 }
2548
2549 #[test]
2550 fn test_concurrent_code_graph_try_write() {
2551 let graph = ConcurrentCodeGraph::new();
2552 let guard = graph.try_write();
2553 assert!(guard.is_some());
2554 assert_eq!(graph.epoch(), 1);
2555 }
2556
2557 #[test]
2558 fn test_concurrent_code_graph_debug() {
2559 let graph = ConcurrentCodeGraph::new();
2560 let debug_str = format!("{graph:?}");
2561 assert!(debug_str.contains("ConcurrentCodeGraph"));
2562 assert!(debug_str.contains("epoch"));
2563 }
2564
2565 #[test]
2566 fn test_graph_snapshot_accessors() {
2567 let graph = CodeGraph::new();
2568 let snapshot = graph.snapshot();
2569
2570 // All accessors should work
2571 let _nodes = snapshot.nodes();
2572 let _edges = snapshot.edges();
2573 let _strings = snapshot.strings();
2574 let _files = snapshot.files();
2575 let _indices = snapshot.indices();
2576 let _epoch = snapshot.epoch();
2577 }
2578
2579 #[test]
2580 fn test_graph_snapshot_epoch_matches() {
2581 let graph = CodeGraph::new();
2582 let snapshot = graph.snapshot();
2583
2584 assert!(snapshot.epoch_matches(0));
2585 assert!(!snapshot.epoch_matches(1));
2586 }
2587
2588 #[test]
2589 fn test_graph_snapshot_clone() {
2590 let graph = CodeGraph::new();
2591 let snapshot1 = graph.snapshot();
2592 let snapshot2 = snapshot1.clone();
2593
2594 assert_eq!(snapshot1.epoch(), snapshot2.epoch());
2595 }
2596
2597 #[test]
2598 fn test_graph_snapshot_debug() {
2599 let graph = CodeGraph::new();
2600 let snapshot = graph.snapshot();
2601 let debug_str = format!("{snapshot:?}");
2602 assert!(debug_str.contains("GraphSnapshot"));
2603 assert!(debug_str.contains("epoch"));
2604 }
2605
2606 #[test]
2607 fn test_multiple_readers() {
2608 let graph = ConcurrentCodeGraph::new();
2609
2610 // Multiple readers should be able to acquire locks simultaneously
2611 let guard1 = graph.read();
2612 let guard2 = graph.read();
2613 let guard3 = graph.read();
2614
2615 assert_eq!(guard1.epoch(), 0);
2616 assert_eq!(guard2.epoch(), 0);
2617 assert_eq!(guard3.epoch(), 0);
2618 }
2619
2620 #[test]
2621 fn test_code_graph_clone() {
2622 let mut graph = CodeGraph::new();
2623 graph.bump_epoch();
2624
2625 let cloned = graph.clone();
2626 assert_eq!(cloned.epoch(), 1);
2627 }
2628
2629 #[test]
2630 fn test_epoch_wrapping() {
2631 let mut graph = CodeGraph::new();
2632 graph.set_epoch(u64::MAX);
2633 let new_epoch = graph.bump_epoch();
2634 assert_eq!(new_epoch, 0); // Should wrap around
2635 }
2636
2637 // ============================================================================
2638 // Query method tests
2639 // ============================================================================
2640
2641 #[test]
2642 fn test_snapshot_resolve_symbol() {
2643 use crate::graph::unified::node::NodeKind;
2644 use crate::graph::unified::storage::arena::NodeEntry;
2645 use std::path::Path;
2646
2647 let mut graph = CodeGraph::new();
2648
2649 // Add some nodes with qualified names
2650 let name_id = graph.strings_mut().intern("test_func").unwrap();
2651 let qual_name_id = graph.strings_mut().intern("module::test_func").unwrap();
2652 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
2653
2654 let entry =
2655 NodeEntry::new(NodeKind::Function, name_id, file_id).with_qualified_name(qual_name_id);
2656
2657 let node_id = graph.nodes_mut().alloc(entry).unwrap();
2658 graph.indices_mut().add(
2659 node_id,
2660 NodeKind::Function,
2661 name_id,
2662 Some(qual_name_id),
2663 file_id,
2664 );
2665
2666 let snapshot = graph.snapshot();
2667
2668 // Find by qualified name
2669 let found = resolve_symbol_strict(&snapshot, "module::test_func");
2670 assert_eq!(found, Some(node_id));
2671
2672 // Find by exact simple name
2673 let found2 = resolve_symbol_strict(&snapshot, "test_func");
2674 assert_eq!(found2, Some(node_id));
2675
2676 // Not found
2677 assert!(resolve_symbol_strict(&snapshot, "nonexistent").is_none());
2678 }
2679
2680 #[test]
2681 fn test_snapshot_find_by_pattern() {
2682 use crate::graph::unified::node::NodeKind;
2683 use crate::graph::unified::storage::arena::NodeEntry;
2684 use std::path::Path;
2685
2686 let mut graph = CodeGraph::new();
2687
2688 // Add nodes with different names
2689 let name1 = graph.strings_mut().intern("foo_bar").unwrap();
2690 let name2 = graph.strings_mut().intern("baz_bar").unwrap();
2691 let name3 = graph.strings_mut().intern("qux_test").unwrap();
2692 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
2693
2694 let node1 = graph
2695 .nodes_mut()
2696 .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
2697 .unwrap();
2698 let node2 = graph
2699 .nodes_mut()
2700 .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
2701 .unwrap();
2702 let node3 = graph
2703 .nodes_mut()
2704 .alloc(NodeEntry::new(NodeKind::Function, name3, file_id))
2705 .unwrap();
2706
2707 graph
2708 .indices_mut()
2709 .add(node1, NodeKind::Function, name1, None, file_id);
2710 graph
2711 .indices_mut()
2712 .add(node2, NodeKind::Function, name2, None, file_id);
2713 graph
2714 .indices_mut()
2715 .add(node3, NodeKind::Function, name3, None, file_id);
2716
2717 let snapshot = graph.snapshot();
2718
2719 // Find by pattern
2720 let matches = snapshot.find_by_pattern("bar");
2721 assert_eq!(matches.len(), 2);
2722 assert!(matches.contains(&node1));
2723 assert!(matches.contains(&node2));
2724
2725 // Find single match
2726 let matches = snapshot.find_by_pattern("qux");
2727 assert_eq!(matches.len(), 1);
2728 assert_eq!(matches[0], node3);
2729
2730 // No matches
2731 let matches = snapshot.find_by_pattern("nonexistent");
2732 assert!(matches.is_empty());
2733 }
2734
2735 #[test]
2736 fn synthetic_nodes_are_filtered_from_find_by_pattern_default() {
2737 // C_SUPPRESS: synthetic placeholder nodes (Go plugin
2738 // `<field:operand.field>` shadows + `<ident>@<offset>`
2739 // per-binding-site Variables) must NOT surface from
2740 // find_by_pattern, but must still be reachable via
2741 // find_by_pattern_with_options(_, true).
2742 use crate::graph::unified::node::NodeKind;
2743 use crate::graph::unified::storage::arena::NodeEntry;
2744 use std::path::Path;
2745
2746 let mut graph = CodeGraph::new();
2747 let real_property = graph
2748 .strings_mut()
2749 .intern("main.SelectorSource.NeedTags")
2750 .unwrap();
2751 let real_local_var = graph.strings_mut().intern("NeedTags").unwrap();
2752 let synthetic_field = graph
2753 .strings_mut()
2754 .intern("<field:selector.NeedTags>")
2755 .unwrap();
2756 let synthetic_offset_a = graph.strings_mut().intern("NeedTags@469").unwrap();
2757 let synthetic_offset_b = graph.strings_mut().intern("NeedTags@508").unwrap();
2758 let file_id = graph.files_mut().register(Path::new("main.go")).unwrap();
2759
2760 let prop_id = graph
2761 .nodes_mut()
2762 .alloc(NodeEntry::new(NodeKind::Property, real_property, file_id))
2763 .unwrap();
2764 let local_var_id = graph
2765 .nodes_mut()
2766 .alloc(NodeEntry::new(NodeKind::Variable, real_local_var, file_id))
2767 .unwrap();
2768 let syn_field_id = graph
2769 .nodes_mut()
2770 .alloc(NodeEntry::new(NodeKind::Variable, synthetic_field, file_id))
2771 .unwrap();
2772 let syn_a_id = graph
2773 .nodes_mut()
2774 .alloc(NodeEntry::new(
2775 NodeKind::Variable,
2776 synthetic_offset_a,
2777 file_id,
2778 ))
2779 .unwrap();
2780 let syn_b_id = graph
2781 .nodes_mut()
2782 .alloc(NodeEntry::new(
2783 NodeKind::Variable,
2784 synthetic_offset_b,
2785 file_id,
2786 ))
2787 .unwrap();
2788
2789 graph
2790 .indices_mut()
2791 .add(prop_id, NodeKind::Property, real_property, None, file_id);
2792 graph.indices_mut().add(
2793 local_var_id,
2794 NodeKind::Variable,
2795 real_local_var,
2796 None,
2797 file_id,
2798 );
2799 graph.indices_mut().add(
2800 syn_field_id,
2801 NodeKind::Variable,
2802 synthetic_field,
2803 None,
2804 file_id,
2805 );
2806 graph.indices_mut().add(
2807 syn_a_id,
2808 NodeKind::Variable,
2809 synthetic_offset_a,
2810 None,
2811 file_id,
2812 );
2813 graph.indices_mut().add(
2814 syn_b_id,
2815 NodeKind::Variable,
2816 synthetic_offset_b,
2817 None,
2818 file_id,
2819 );
2820
2821 // Flag two of them via the metadata-store bit (the canonical
2822 // Go-plugin emission path) and leave one (`<field:...>`) only
2823 // covered by the structural name-shape fallback to verify both
2824 // recognition channels suppress the leak.
2825 graph.macro_metadata_mut().mark_synthetic(syn_a_id);
2826 graph.macro_metadata_mut().mark_synthetic(syn_b_id);
2827
2828 let snapshot = graph.snapshot();
2829
2830 // Default surface (CLI `search --exact`, MCP, LSP): no synthetics.
2831 let matches = snapshot.find_by_pattern("NeedTags");
2832 assert!(matches.contains(&prop_id), "Property must be surfaced");
2833 assert!(
2834 matches.contains(&local_var_id),
2835 "real local var must be surfaced"
2836 );
2837 assert!(
2838 !matches.contains(&syn_field_id),
2839 "<field:...> synthetic must be suppressed (name-shape fallback)"
2840 );
2841 assert!(
2842 !matches.contains(&syn_a_id),
2843 "NeedTags@469 must be suppressed (metadata bit)"
2844 );
2845 assert!(
2846 !matches.contains(&syn_b_id),
2847 "NeedTags@508 must be suppressed (metadata bit)"
2848 );
2849 assert_eq!(matches.len(), 2, "exactly Property + local var, no leakage");
2850
2851 // Internal include-synthetic surface (binding plane, scope analysis):
2852 // every node remains reachable.
2853 let all_matches = snapshot.find_by_pattern_with_options("NeedTags", true);
2854 assert_eq!(
2855 all_matches.len(),
2856 5,
2857 "include_synthetic surfaces everything"
2858 );
2859 assert!(all_matches.contains(&prop_id));
2860 assert!(all_matches.contains(&local_var_id));
2861 assert!(all_matches.contains(&syn_field_id));
2862 assert!(all_matches.contains(&syn_a_id));
2863 assert!(all_matches.contains(&syn_b_id));
2864
2865 // is_node_synthetic exposed for surface-level filters
2866 // (e.g., MCP semantic_search/relation_query post-filters).
2867 assert!(snapshot.is_node_synthetic(syn_field_id));
2868 assert!(snapshot.is_node_synthetic(syn_a_id));
2869 assert!(snapshot.is_node_synthetic(syn_b_id));
2870 assert!(!snapshot.is_node_synthetic(prop_id));
2871 assert!(!snapshot.is_node_synthetic(local_var_id));
2872 }
2873
2874 #[test]
2875 fn find_by_exact_name_aligns_with_planner_name_predicate() {
2876 // B1_ALIGN: `find_by_exact_name("NeedTags")` is the canonical
2877 // surface for the CLI `--exact NeedTags` shorthand and the
2878 // planner's `name:NeedTags` predicate. Both paths must return
2879 // the same set against this fixture.
2880 use crate::graph::unified::node::NodeKind;
2881 use crate::graph::unified::storage::arena::NodeEntry;
2882 use std::path::Path;
2883
2884 let mut graph = CodeGraph::new();
2885 // Property nodes carry the package-qualified name as
2886 // `entry.name` (Go plugin convention; see `helper.rs`'s
2887 // `semantic_name_for_node_input`).
2888 let property_qname = graph
2889 .strings_mut()
2890 .intern("main.SelectorSource.NeedTags")
2891 .unwrap();
2892 let local_var_name = graph.strings_mut().intern("NeedTags").unwrap();
2893 let synthetic_field_name = graph
2894 .strings_mut()
2895 .intern("<field:selector.NeedTags>")
2896 .unwrap();
2897 let synthetic_offset_name = graph.strings_mut().intern("NeedTags@469").unwrap();
2898 let unrelated_name = graph.strings_mut().intern("NeedTagsHelper").unwrap();
2899 let file_id = graph.files_mut().register(Path::new("main.go")).unwrap();
2900
2901 let prop_id = graph
2902 .nodes_mut()
2903 .alloc(NodeEntry::new(NodeKind::Property, property_qname, file_id))
2904 .unwrap();
2905 let local_var_id = graph
2906 .nodes_mut()
2907 .alloc(NodeEntry::new(NodeKind::Variable, local_var_name, file_id))
2908 .unwrap();
2909 let syn_field_id = graph
2910 .nodes_mut()
2911 .alloc(NodeEntry::new(
2912 NodeKind::Variable,
2913 synthetic_field_name,
2914 file_id,
2915 ))
2916 .unwrap();
2917 let syn_offset_id = graph
2918 .nodes_mut()
2919 .alloc(NodeEntry::new(
2920 NodeKind::Variable,
2921 synthetic_offset_name,
2922 file_id,
2923 ))
2924 .unwrap();
2925 let unrelated_id = graph
2926 .nodes_mut()
2927 .alloc(NodeEntry::new(NodeKind::Function, unrelated_name, file_id))
2928 .unwrap();
2929
2930 graph
2931 .indices_mut()
2932 .add(prop_id, NodeKind::Property, property_qname, None, file_id);
2933 graph.indices_mut().add(
2934 local_var_id,
2935 NodeKind::Variable,
2936 local_var_name,
2937 None,
2938 file_id,
2939 );
2940 graph.indices_mut().add(
2941 syn_field_id,
2942 NodeKind::Variable,
2943 synthetic_field_name,
2944 None,
2945 file_id,
2946 );
2947 graph.indices_mut().add(
2948 syn_offset_id,
2949 NodeKind::Variable,
2950 synthetic_offset_name,
2951 None,
2952 file_id,
2953 );
2954 graph.indices_mut().add(
2955 unrelated_id,
2956 NodeKind::Function,
2957 unrelated_name,
2958 None,
2959 file_id,
2960 );
2961
2962 // Mark the offset-suffixed synthetic via the metadata bit so we
2963 // exercise both recognition channels in this fixture.
2964 graph.macro_metadata_mut().mark_synthetic(syn_offset_id);
2965
2966 let snapshot = graph.snapshot();
2967
2968 // Exact-name lookup on "NeedTags" — should pick up only the
2969 // local variable (its `entry.name` is exactly "NeedTags"); it
2970 // must NOT pick up the Property (qualified name contains but
2971 // does not equal "NeedTags") and must NOT pick up either
2972 // synthetic placeholder.
2973 let exact = snapshot.find_by_exact_name("NeedTags");
2974 assert_eq!(
2975 exact,
2976 vec![local_var_id],
2977 "exact match must be byte-for-byte against entry.name / qualified_name and exclude synthetics"
2978 );
2979
2980 // The Property's full qualified name is exact-addressable.
2981 let qualified = snapshot.find_by_exact_name("main.SelectorSource.NeedTags");
2982 assert_eq!(qualified, vec![prop_id]);
2983
2984 // Substring-only matches must not surface from exact lookup.
2985 assert!(
2986 snapshot
2987 .find_by_exact_name("NeedTagsHelper")
2988 .contains(&unrelated_id)
2989 );
2990 assert!(
2991 !snapshot
2992 .find_by_exact_name("NeedTags")
2993 .contains(&unrelated_id),
2994 "exact 'NeedTags' must not match 'NeedTagsHelper'"
2995 );
2996
2997 // Unknown name short-circuits to an empty vec without
2998 // scanning any nodes.
2999 assert!(
3000 snapshot
3001 .find_by_exact_name("ThisStringIsNotInterned")
3002 .is_empty()
3003 );
3004 }
3005
3006 #[test]
3007 fn test_snapshot_get_callees() {
3008 use crate::graph::unified::edge::EdgeKind;
3009 use crate::graph::unified::node::NodeKind;
3010 use crate::graph::unified::storage::arena::NodeEntry;
3011 use std::path::Path;
3012
3013 let mut graph = CodeGraph::new();
3014
3015 // Create caller and callee nodes
3016 let caller_name = graph.strings_mut().intern("caller").unwrap();
3017 let callee1_name = graph.strings_mut().intern("callee1").unwrap();
3018 let callee2_name = graph.strings_mut().intern("callee2").unwrap();
3019 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3020
3021 let caller_id = graph
3022 .nodes_mut()
3023 .alloc(NodeEntry::new(NodeKind::Function, caller_name, file_id))
3024 .unwrap();
3025 let callee1_id = graph
3026 .nodes_mut()
3027 .alloc(NodeEntry::new(NodeKind::Function, callee1_name, file_id))
3028 .unwrap();
3029 let callee2_id = graph
3030 .nodes_mut()
3031 .alloc(NodeEntry::new(NodeKind::Function, callee2_name, file_id))
3032 .unwrap();
3033
3034 // Add call edges
3035 graph.edges_mut().add_edge(
3036 caller_id,
3037 callee1_id,
3038 EdgeKind::Calls {
3039 argument_count: 0,
3040 is_async: false,
3041 },
3042 file_id,
3043 );
3044 graph.edges_mut().add_edge(
3045 caller_id,
3046 callee2_id,
3047 EdgeKind::Calls {
3048 argument_count: 0,
3049 is_async: false,
3050 },
3051 file_id,
3052 );
3053
3054 let snapshot = graph.snapshot();
3055
3056 // Query callees
3057 let callees = snapshot.get_callees(caller_id);
3058 assert_eq!(callees.len(), 2);
3059 assert!(callees.contains(&callee1_id));
3060 assert!(callees.contains(&callee2_id));
3061
3062 // Node with no callees
3063 let callees = snapshot.get_callees(callee1_id);
3064 assert!(callees.is_empty());
3065 }
3066
3067 #[test]
3068 fn test_snapshot_get_callers() {
3069 use crate::graph::unified::edge::EdgeKind;
3070 use crate::graph::unified::node::NodeKind;
3071 use crate::graph::unified::storage::arena::NodeEntry;
3072 use std::path::Path;
3073
3074 let mut graph = CodeGraph::new();
3075
3076 // Create caller and callee nodes
3077 let caller1_name = graph.strings_mut().intern("caller1").unwrap();
3078 let caller2_name = graph.strings_mut().intern("caller2").unwrap();
3079 let callee_name = graph.strings_mut().intern("callee").unwrap();
3080 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3081
3082 let caller1_id = graph
3083 .nodes_mut()
3084 .alloc(NodeEntry::new(NodeKind::Function, caller1_name, file_id))
3085 .unwrap();
3086 let caller2_id = graph
3087 .nodes_mut()
3088 .alloc(NodeEntry::new(NodeKind::Function, caller2_name, file_id))
3089 .unwrap();
3090 let callee_id = graph
3091 .nodes_mut()
3092 .alloc(NodeEntry::new(NodeKind::Function, callee_name, file_id))
3093 .unwrap();
3094
3095 // Add call edges
3096 graph.edges_mut().add_edge(
3097 caller1_id,
3098 callee_id,
3099 EdgeKind::Calls {
3100 argument_count: 0,
3101 is_async: false,
3102 },
3103 file_id,
3104 );
3105 graph.edges_mut().add_edge(
3106 caller2_id,
3107 callee_id,
3108 EdgeKind::Calls {
3109 argument_count: 0,
3110 is_async: false,
3111 },
3112 file_id,
3113 );
3114
3115 let snapshot = graph.snapshot();
3116
3117 // Query callers
3118 let callers = snapshot.get_callers(callee_id);
3119 assert_eq!(callers.len(), 2);
3120 assert!(callers.contains(&caller1_id));
3121 assert!(callers.contains(&caller2_id));
3122
3123 // Node with no callers
3124 let callers = snapshot.get_callers(caller1_id);
3125 assert!(callers.is_empty());
3126 }
3127
3128 #[test]
3129 fn test_snapshot_find_symbol_candidates() {
3130 use crate::graph::unified::node::NodeKind;
3131 use crate::graph::unified::storage::arena::NodeEntry;
3132 use std::path::Path;
3133
3134 let mut graph = CodeGraph::new();
3135
3136 // Add nodes with same symbol name but different qualified names
3137 let symbol_name = graph.strings_mut().intern("test").unwrap();
3138 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3139
3140 let node1 = graph
3141 .nodes_mut()
3142 .alloc(NodeEntry::new(NodeKind::Function, symbol_name, file_id))
3143 .unwrap();
3144 let node2 = graph
3145 .nodes_mut()
3146 .alloc(NodeEntry::new(NodeKind::Method, symbol_name, file_id))
3147 .unwrap();
3148
3149 // Add a different symbol
3150 let other_name = graph.strings_mut().intern("other").unwrap();
3151 let node3 = graph
3152 .nodes_mut()
3153 .alloc(NodeEntry::new(NodeKind::Function, other_name, file_id))
3154 .unwrap();
3155
3156 graph
3157 .indices_mut()
3158 .add(node1, NodeKind::Function, symbol_name, None, file_id);
3159 graph
3160 .indices_mut()
3161 .add(node2, NodeKind::Method, symbol_name, None, file_id);
3162 graph
3163 .indices_mut()
3164 .add(node3, NodeKind::Function, other_name, None, file_id);
3165
3166 let snapshot = graph.snapshot();
3167
3168 // Find by symbol
3169 let matches = candidate_nodes(&snapshot, "test");
3170 assert_eq!(matches.len(), 2);
3171 assert!(matches.contains(&node1));
3172 assert!(matches.contains(&node2));
3173
3174 // Find other symbol
3175 let matches = candidate_nodes(&snapshot, "other");
3176 assert_eq!(matches.len(), 1);
3177 assert_eq!(matches[0], node3);
3178
3179 // No matches
3180 let matches = candidate_nodes(&snapshot, "nonexistent");
3181 assert!(matches.is_empty());
3182 }
3183
3184 #[test]
3185 fn test_snapshot_iter_nodes() {
3186 use crate::graph::unified::node::NodeKind;
3187 use crate::graph::unified::storage::arena::NodeEntry;
3188 use std::path::Path;
3189
3190 let mut graph = CodeGraph::new();
3191
3192 // Add some nodes
3193 let name1 = graph.strings_mut().intern("func1").unwrap();
3194 let name2 = graph.strings_mut().intern("func2").unwrap();
3195 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3196
3197 let node1 = graph
3198 .nodes_mut()
3199 .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
3200 .unwrap();
3201 let node2 = graph
3202 .nodes_mut()
3203 .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
3204 .unwrap();
3205
3206 let snapshot = graph.snapshot();
3207
3208 // Iterate nodes
3209 let snapshot_nodes: Vec<_> = snapshot.iter_nodes().collect();
3210 assert_eq!(snapshot_nodes.len(), 2);
3211
3212 let node_ids: Vec<_> = snapshot_nodes.iter().map(|(id, _)| *id).collect();
3213 assert!(node_ids.contains(&node1));
3214 assert!(node_ids.contains(&node2));
3215 }
3216
3217 #[test]
3218 fn test_snapshot_iter_edges() {
3219 use crate::graph::unified::edge::EdgeKind;
3220 use crate::graph::unified::node::NodeKind;
3221 use crate::graph::unified::storage::arena::NodeEntry;
3222 use std::path::Path;
3223
3224 let mut graph = CodeGraph::new();
3225
3226 // Create nodes
3227 let name1 = graph.strings_mut().intern("func1").unwrap();
3228 let name2 = graph.strings_mut().intern("func2").unwrap();
3229 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3230
3231 let node1 = graph
3232 .nodes_mut()
3233 .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
3234 .unwrap();
3235 let node2 = graph
3236 .nodes_mut()
3237 .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
3238 .unwrap();
3239
3240 // Add edges
3241 graph.edges_mut().add_edge(
3242 node1,
3243 node2,
3244 EdgeKind::Calls {
3245 argument_count: 0,
3246 is_async: false,
3247 },
3248 file_id,
3249 );
3250
3251 let snapshot = graph.snapshot();
3252
3253 // Iterate edges
3254 let edges: Vec<_> = snapshot.iter_edges().collect();
3255 assert_eq!(edges.len(), 1);
3256
3257 let (src, tgt, kind) = &edges[0];
3258 assert_eq!(*src, node1);
3259 assert_eq!(*tgt, node2);
3260 assert!(matches!(
3261 kind,
3262 EdgeKind::Calls {
3263 argument_count: 0,
3264 is_async: false
3265 }
3266 ));
3267 }
3268
3269 #[test]
3270 fn test_snapshot_get_node() {
3271 use crate::graph::unified::node::NodeId;
3272 use crate::graph::unified::node::NodeKind;
3273 use crate::graph::unified::storage::arena::NodeEntry;
3274 use std::path::Path;
3275
3276 let mut graph = CodeGraph::new();
3277
3278 // Add a node
3279 let name = graph.strings_mut().intern("test_func").unwrap();
3280 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3281
3282 let node_id = graph
3283 .nodes_mut()
3284 .alloc(NodeEntry::new(NodeKind::Function, name, file_id))
3285 .unwrap();
3286
3287 let snapshot = graph.snapshot();
3288
3289 // Get node
3290 let entry = snapshot.get_node(node_id);
3291 assert!(entry.is_some());
3292 assert_eq!(entry.unwrap().kind, NodeKind::Function);
3293
3294 // Invalid node
3295 let invalid_id = NodeId::INVALID;
3296 assert!(snapshot.get_node(invalid_id).is_none());
3297 }
3298
3299 #[test]
3300 fn test_snapshot_query_empty_graph() {
3301 use crate::graph::unified::node::NodeId;
3302
3303 let graph = CodeGraph::new();
3304 let snapshot = graph.snapshot();
3305
3306 // All queries should return empty on empty graph
3307 assert!(resolve_symbol_strict(&snapshot, "test").is_none());
3308 assert!(snapshot.find_by_pattern("test").is_empty());
3309 assert!(candidate_nodes(&snapshot, "test").is_empty());
3310
3311 let dummy_id = NodeId::new(0, 1);
3312 assert!(snapshot.get_callees(dummy_id).is_empty());
3313 assert!(snapshot.get_callers(dummy_id).is_empty());
3314
3315 assert_eq!(snapshot.iter_nodes().count(), 0);
3316 assert_eq!(snapshot.iter_edges().count(), 0);
3317 }
3318
3319 #[test]
3320 fn test_snapshot_edge_filtering_by_kind() {
3321 use crate::graph::unified::edge::EdgeKind;
3322 use crate::graph::unified::node::NodeKind;
3323 use crate::graph::unified::storage::arena::NodeEntry;
3324 use std::path::Path;
3325
3326 let mut graph = CodeGraph::new();
3327
3328 // Create nodes
3329 let name1 = graph.strings_mut().intern("func1").unwrap();
3330 let name2 = graph.strings_mut().intern("func2").unwrap();
3331 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
3332
3333 let node1 = graph
3334 .nodes_mut()
3335 .alloc(NodeEntry::new(NodeKind::Function, name1, file_id))
3336 .unwrap();
3337 let node2 = graph
3338 .nodes_mut()
3339 .alloc(NodeEntry::new(NodeKind::Function, name2, file_id))
3340 .unwrap();
3341
3342 // Add different kinds of edges
3343 graph.edges_mut().add_edge(
3344 node1,
3345 node2,
3346 EdgeKind::Calls {
3347 argument_count: 0,
3348 is_async: false,
3349 },
3350 file_id,
3351 );
3352 graph
3353 .edges_mut()
3354 .add_edge(node1, node2, EdgeKind::References, file_id);
3355
3356 let snapshot = graph.snapshot();
3357
3358 // get_callees should only return Calls edges
3359 let callees = snapshot.get_callees(node1);
3360 assert_eq!(callees.len(), 1);
3361 assert_eq!(callees[0], node2);
3362
3363 // iter_edges returns all edges regardless of kind
3364 let edges: Vec<_> = snapshot.iter_edges().collect();
3365 assert_eq!(edges.len(), 2);
3366 }
3367
3368 // -------- reverse_import_index tests --------
3369
3370 /// Helper: build an empty graph with the given file paths registered, a
3371 /// single placeholder node allocated per file, and indices rebuilt so
3372 /// `by_file` returns the per-file node sets. Returns the file IDs in
3373 /// the order passed in and the per-file node ID.
3374 #[cfg(test)]
3375 fn build_import_test_graph(files: &[&str]) -> (CodeGraph, Vec<FileId>, Vec<NodeId>) {
3376 use crate::graph::unified::node::NodeKind;
3377 use crate::graph::unified::storage::arena::NodeEntry;
3378 use std::path::Path;
3379
3380 let mut graph = CodeGraph::new();
3381 let placeholder_name = graph.strings_mut().intern("sym").unwrap();
3382 let mut file_ids = Vec::with_capacity(files.len());
3383 let mut node_ids = Vec::with_capacity(files.len());
3384 for path in files {
3385 let file_id = graph.files_mut().register(Path::new(path)).unwrap();
3386 let node_id = graph
3387 .nodes_mut()
3388 .alloc(NodeEntry::new(
3389 NodeKind::Function,
3390 placeholder_name,
3391 file_id,
3392 ))
3393 .unwrap();
3394 file_ids.push(file_id);
3395 node_ids.push(node_id);
3396 }
3397 graph.rebuild_indices();
3398 (graph, file_ids, node_ids)
3399 }
3400
3401 /// Helper: add an `Imports` edge from `source_node` (in importer file) to
3402 /// `target_node` (in exporter file). The edge is recorded against the
3403 /// importer file so `clear_file` cleanup would behave identically to
3404 /// production Pass 4 writes.
3405 #[cfg(test)]
3406 fn add_import_edge(
3407 graph: &mut CodeGraph,
3408 source_node: NodeId,
3409 target_node: NodeId,
3410 importer_file: FileId,
3411 ) {
3412 graph.edges_mut().add_edge(
3413 source_node,
3414 target_node,
3415 EdgeKind::Imports {
3416 alias: None,
3417 is_wildcard: false,
3418 },
3419 importer_file,
3420 );
3421 }
3422
3423 #[test]
3424 fn reverse_import_index_empty_graph_returns_empty() {
3425 let (graph, files, _) = build_import_test_graph(&["only.rs"]);
3426 assert!(graph.reverse_import_index(files[0]).is_empty());
3427 }
3428
3429 #[test]
3430 fn reverse_import_index_single_importer() {
3431 // File A imports a symbol exported by file B. Reverse index of B
3432 // should return exactly [A]; reverse index of A should be empty.
3433 let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3434 let (a, b) = (files[0], files[1]);
3435 add_import_edge(&mut graph, nodes[0], nodes[1], a);
3436
3437 let importers_of_b = graph.reverse_import_index(b);
3438 assert_eq!(importers_of_b, vec![a]);
3439
3440 let importers_of_a = graph.reverse_import_index(a);
3441 assert!(
3442 importers_of_a.is_empty(),
3443 "A has no inbound Imports edges; reverse index must be empty"
3444 );
3445 }
3446
3447 #[test]
3448 fn reverse_import_index_multiple_importers_deduped_and_sorted() {
3449 // A, B, C all import from D. Reverse index of D must contain each
3450 // importer exactly once, sorted ascending by FileId.
3451 let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs", "c.rs", "d.rs"]);
3452 let (a, b, c, d) = (files[0], files[1], files[2], files[3]);
3453 add_import_edge(&mut graph, nodes[0], nodes[3], a);
3454 add_import_edge(&mut graph, nodes[1], nodes[3], b);
3455 add_import_edge(&mut graph, nodes[2], nodes[3], c);
3456 // Add a duplicate edge from A to D to confirm dedup behavior.
3457 add_import_edge(&mut graph, nodes[0], nodes[3], a);
3458
3459 let importers_of_d = graph.reverse_import_index(d);
3460 assert_eq!(importers_of_d, vec![a, b, c]);
3461 // Sort invariant: Vec is ascending by raw index.
3462 let mut sorted = importers_of_d.clone();
3463 sorted.sort();
3464 assert_eq!(importers_of_d, sorted);
3465 }
3466
3467 #[test]
3468 fn reverse_import_index_filters_non_import_edges() {
3469 // A `Calls` edge from A into B must not contribute to B's reverse
3470 // import index. Only `EdgeKind::Imports` edges count.
3471 let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3472 let (a, b) = (files[0], files[1]);
3473 graph.edges_mut().add_edge(
3474 nodes[0],
3475 nodes[1],
3476 EdgeKind::Calls {
3477 argument_count: 0,
3478 is_async: false,
3479 },
3480 a,
3481 );
3482 graph
3483 .edges_mut()
3484 .add_edge(nodes[0], nodes[1], EdgeKind::References, a);
3485
3486 assert!(
3487 graph.reverse_import_index(b).is_empty(),
3488 "non-Imports edges must not register as importers"
3489 );
3490 }
3491
3492 #[test]
3493 fn reverse_import_index_elides_self_imports() {
3494 // An Imports edge whose source and target are both in the same file
3495 // is a self-import; the caller's own file must not appear in its own
3496 // reverse index.
3497 let (mut graph, files, nodes) = build_import_test_graph(&["a.rs"]);
3498 let a = files[0];
3499 // Add a second node in the same file so we have a distinct source.
3500 let name2 = graph.strings_mut().intern("sym2").unwrap();
3501 let second_in_a = graph
3502 .nodes_mut()
3503 .alloc(crate::graph::unified::storage::arena::NodeEntry::new(
3504 crate::graph::unified::node::NodeKind::Function,
3505 name2,
3506 a,
3507 ))
3508 .unwrap();
3509 graph.rebuild_indices();
3510 add_import_edge(&mut graph, second_in_a, nodes[0], a);
3511
3512 assert!(
3513 graph.reverse_import_index(a).is_empty(),
3514 "self-imports must be elided from reverse index"
3515 );
3516 }
3517
3518 #[test]
3519 fn reverse_import_index_mixed_edge_kinds_counts_only_imports() {
3520 // Two files: A has both Calls and Imports edges into B. Reverse
3521 // index must return exactly [A] — the Calls edge contributes
3522 // nothing.
3523 let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3524 let (a, b) = (files[0], files[1]);
3525 add_import_edge(&mut graph, nodes[0], nodes[1], a);
3526 graph.edges_mut().add_edge(
3527 nodes[0],
3528 nodes[1],
3529 EdgeKind::Calls {
3530 argument_count: 0,
3531 is_async: false,
3532 },
3533 a,
3534 );
3535
3536 assert_eq!(graph.reverse_import_index(b), vec![a]);
3537 }
3538
3539 #[test]
3540 fn reverse_import_index_uninitialized_file_returns_empty() {
3541 // Querying a FileId that is not registered in the graph must return
3542 // an empty Vec, not panic.
3543 let (graph, _, _) = build_import_test_graph(&["a.rs"]);
3544 let bogus = FileId::new(9999);
3545 assert!(
3546 graph.reverse_import_index(bogus).is_empty(),
3547 "unknown FileId must return empty Vec without panicking"
3548 );
3549 }
3550
3551 #[test]
3552 fn reverse_import_index_skips_tombstoned_source_nodes() {
3553 // In the incremental rebuild path the prior graph retains tombstoned
3554 // arena slots for nodes belonging to closure files that have been
3555 // removed but not yet fully compacted. reverse_import_index is
3556 // called on that prior graph to widen the closure, so its
3557 // tombstone guard (`let Some(source_entry) = self.nodes.get(...)`)
3558 // is a semantically important branch, not a theoretical one.
3559 // This test exercises it directly by tombstoning the source node of
3560 // an Imports edge and asserting the edge silently disappears from
3561 // the reverse index.
3562 let (mut graph, files, nodes) = build_import_test_graph(&["a.rs", "b.rs"]);
3563 let (a, b) = (files[0], files[1]);
3564 add_import_edge(&mut graph, nodes[0], nodes[1], a);
3565 // Sanity: before tombstoning, A shows up as the importer of B.
3566 assert_eq!(graph.reverse_import_index(b), vec![a]);
3567 // Tombstone the source node via the arena (generation bump). The
3568 // Imports edge still exists in the edge store but its source NodeId
3569 // is now stale; `nodes.get(edge_ref.source)` returns None and the
3570 // guard skips the edge.
3571 let removed = graph.nodes_mut().remove(nodes[0]);
3572 assert!(
3573 removed.is_some(),
3574 "arena.remove must succeed for a live node"
3575 );
3576 assert!(
3577 graph.nodes().get(nodes[0]).is_none(),
3578 "tombstoned lookup must return None"
3579 );
3580 assert!(
3581 graph.reverse_import_index(b).is_empty(),
3582 "Imports edges whose source is tombstoned must be silently skipped"
3583 );
3584 }
3585
3586 // ------------------------------------------------------------------
3587 // Task 4 Step 2 — CodeGraph::remove_file
3588 // ------------------------------------------------------------------
3589
3590 /// Seed a graph with 2 files × `per_file` nodes per file, plus a set
3591 /// of intra- and inter-file edges. Each call site produces a
3592 /// canonical topology so tests below can assert bit-level on edge
3593 /// survival. Returns `(graph, file_a, file_b, file_a_nodes,
3594 /// file_b_nodes)`.
3595 fn seed_two_file_graph(
3596 per_file: usize,
3597 ) -> (
3598 CodeGraph,
3599 crate::graph::unified::file::FileId,
3600 crate::graph::unified::file::FileId,
3601 Vec<NodeId>,
3602 Vec<NodeId>,
3603 ) {
3604 use crate::graph::unified::edge::EdgeKind;
3605 use crate::graph::unified::node::NodeKind;
3606 use crate::graph::unified::storage::arena::NodeEntry;
3607 use std::path::Path;
3608
3609 let mut graph = CodeGraph::new();
3610 let sym = graph.strings_mut().intern("sym").expect("intern");
3611 let file_a = graph
3612 .files_mut()
3613 .register(Path::new("/tmp/remove_file_test/a.rs"))
3614 .expect("register a");
3615 let file_b = graph
3616 .files_mut()
3617 .register(Path::new("/tmp/remove_file_test/b.rs"))
3618 .expect("register b");
3619
3620 let mut file_a_nodes = Vec::with_capacity(per_file);
3621 let mut file_b_nodes = Vec::with_capacity(per_file);
3622
3623 for _ in 0..per_file {
3624 let n = graph
3625 .nodes_mut()
3626 .alloc(NodeEntry::new(NodeKind::Function, sym, file_a))
3627 .expect("alloc a-node");
3628 file_a_nodes.push(n);
3629 graph.files_mut().record_node(file_a, n);
3630 graph
3631 .indices_mut()
3632 .add(n, NodeKind::Function, sym, None, file_a);
3633 }
3634 for _ in 0..per_file {
3635 let n = graph
3636 .nodes_mut()
3637 .alloc(NodeEntry::new(NodeKind::Function, sym, file_b))
3638 .expect("alloc b-node");
3639 file_b_nodes.push(n);
3640 graph.files_mut().record_node(file_b, n);
3641 graph
3642 .indices_mut()
3643 .add(n, NodeKind::Function, sym, None, file_b);
3644 }
3645
3646 // Intra-file edges inside each file: pairwise a[i] -> a[i+1],
3647 // b[i] -> b[i+1]. These are the ones that must die when the
3648 // corresponding file is removed.
3649 for i in 0..per_file.saturating_sub(1) {
3650 graph.edges_mut().add_edge(
3651 file_a_nodes[i],
3652 file_a_nodes[i + 1],
3653 EdgeKind::Calls {
3654 argument_count: 0,
3655 is_async: false,
3656 },
3657 file_a,
3658 );
3659 graph.edges_mut().add_edge(
3660 file_b_nodes[i],
3661 file_b_nodes[i + 1],
3662 EdgeKind::Calls {
3663 argument_count: 0,
3664 is_async: false,
3665 },
3666 file_b,
3667 );
3668 }
3669 // Cross-file edges: a[0] -> b[0], b[0] -> a[0]. Both must die
3670 // when *either* endpoint's file is removed (plan §F.2).
3671 graph.edges_mut().add_edge(
3672 file_a_nodes[0],
3673 file_b_nodes[0],
3674 EdgeKind::Calls {
3675 argument_count: 0,
3676 is_async: false,
3677 },
3678 file_a,
3679 );
3680 graph.edges_mut().add_edge(
3681 file_b_nodes[0],
3682 file_a_nodes[0],
3683 EdgeKind::Calls {
3684 argument_count: 0,
3685 is_async: false,
3686 },
3687 file_b,
3688 );
3689
3690 (graph, file_a, file_b, file_a_nodes, file_b_nodes)
3691 }
3692
3693 #[test]
3694 fn code_graph_remove_file_tombstones_all_per_file_nodes() {
3695 let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
3696
3697 let returned = graph.remove_file(file_a);
3698
3699 // Returned list must equal the original per-file bucket
3700 // membership, deterministically.
3701 let returned_set: std::collections::HashSet<NodeId> = returned.iter().copied().collect();
3702 let expected_set: std::collections::HashSet<NodeId> =
3703 file_a_nodes.iter().copied().collect();
3704 assert_eq!(
3705 returned_set, expected_set,
3706 "remove_file must return exactly the file_a nodes drained from the bucket"
3707 );
3708
3709 // Every returned NodeId is gone from the arena.
3710 for nid in &file_a_nodes {
3711 assert!(
3712 graph.nodes().get(*nid).is_none(),
3713 "node {nid:?} from removed file must be tombstoned in arena"
3714 );
3715 }
3716 }
3717
3718 #[test]
3719 fn code_graph_remove_file_invalidates_all_edges_sourced_or_targeted_at_removed_nodes() {
3720 use crate::graph::unified::edge::EdgeKind;
3721
3722 let (mut graph, file_a, _file_b, file_a_nodes, file_b_nodes) = seed_two_file_graph(3);
3723
3724 // Sanity: the seed has 2 intra-A edges, 2 intra-B edges, plus
3725 // 2 cross-file edges (a0↔b0).
3726 let before_delta = graph.edges().stats().forward.delta_edge_count;
3727 assert_eq!(
3728 before_delta, 6,
3729 "seed must produce 2 intra-A + 2 intra-B + 2 cross edges"
3730 );
3731
3732 let _ = graph.remove_file(file_a);
3733
3734 // Forward delta after removal: all 2 intra-A edges are gone,
3735 // both cross edges are gone, only the 2 intra-B edges remain.
3736 let after_delta_forward = graph.edges().stats().forward.delta_edge_count;
3737 assert_eq!(
3738 after_delta_forward, 2,
3739 "only intra-B forward edges must remain after removing file_a"
3740 );
3741 let after_delta_reverse = graph.edges().stats().reverse.delta_edge_count;
3742 assert_eq!(
3743 after_delta_reverse, 2,
3744 "only intra-B reverse edges must remain after removing file_a"
3745 );
3746
3747 // Cross-file edge from b[0] -> a[0] must no longer be visible
3748 // from either direction, because a[0] is tombstoned.
3749 let b0 = file_b_nodes[0];
3750 let a0 = file_a_nodes[0];
3751 let remaining_from_b0: Vec<_> = graph
3752 .edges()
3753 .edges_from(b0)
3754 .into_iter()
3755 .filter(|e| {
3756 matches!(
3757 e.kind,
3758 EdgeKind::Calls {
3759 argument_count: 0,
3760 is_async: false
3761 }
3762 )
3763 })
3764 .collect();
3765 assert!(
3766 !remaining_from_b0.iter().any(|e| e.target == a0),
3767 "edge b0 -> a0 must be gone after remove_file(file_a)"
3768 );
3769 let remaining_to_a0: Vec<_> = graph.edges().edges_to(a0).into_iter().collect();
3770 assert!(
3771 remaining_to_a0.is_empty(),
3772 "every edge targeting the tombstoned a0 must be gone"
3773 );
3774 }
3775
3776 #[test]
3777 fn code_graph_remove_file_drops_file_registry_entry() {
3778 let (mut graph, file_a, _file_b, _, _) = seed_two_file_graph(2);
3779
3780 assert!(
3781 graph.files().resolve(file_a).is_some(),
3782 "seed registered file_a"
3783 );
3784 assert!(
3785 !graph.files().nodes_for_file(file_a).is_empty(),
3786 "seed populated the file_a bucket"
3787 );
3788
3789 let _ = graph.remove_file(file_a);
3790
3791 assert!(
3792 graph.files().resolve(file_a).is_none(),
3793 "FileRegistry::resolve must return None after remove_file"
3794 );
3795 assert!(
3796 graph.files().nodes_for_file(file_a).is_empty(),
3797 "per-file bucket for file_a must be drained"
3798 );
3799 }
3800
3801 #[test]
3802 fn code_graph_remove_file_is_idempotent_on_unknown_file() {
3803 use crate::graph::unified::file::FileId;
3804 let (mut graph, _file_a, _file_b, _, _) = seed_two_file_graph(2);
3805
3806 // Snapshot state before idempotent no-op.
3807 let nodes_before = graph.nodes().len();
3808 let delta_fwd_before = graph.edges().stats().forward.delta_edge_count;
3809 let delta_rev_before = graph.edges().stats().reverse.delta_edge_count;
3810 let files_before = graph.files().len();
3811
3812 // A FileId that was never registered: the caller may legitimately
3813 // receive a bogus id from stale indexing state. `remove_file`
3814 // must be a silent no-op.
3815 let bogus = FileId::new(9999);
3816 let returned = graph.remove_file(bogus);
3817 assert!(
3818 returned.is_empty(),
3819 "remove_file on unknown FileId must return an empty Vec"
3820 );
3821
3822 assert_eq!(graph.nodes().len(), nodes_before, "arena count unchanged");
3823 assert_eq!(
3824 graph.edges().stats().forward.delta_edge_count,
3825 delta_fwd_before,
3826 "forward delta unchanged"
3827 );
3828 assert_eq!(
3829 graph.edges().stats().reverse.delta_edge_count,
3830 delta_rev_before,
3831 "reverse delta unchanged"
3832 );
3833 assert_eq!(graph.files().len(), files_before, "file count unchanged");
3834 }
3835
3836 #[test]
3837 fn code_graph_remove_file_clears_file_segments_entry() {
3838 // Iter-1 Codex review fix: a file's `FileSegmentTable` entry
3839 // must be cleared on `remove_file`. Without this, a later
3840 // `FileId` recycle (via `FileRegistry::free_list`) would
3841 // inherit the previous file's stale node-range and
3842 // `reindex_files` (`build/reindex.rs`) would tombstone the
3843 // wrong slots. This unit test seeds a segment entry, removes
3844 // the file, and asserts the entry is gone.
3845 use crate::graph::unified::storage::segment::FileSegmentTable;
3846
3847 let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
3848
3849 // Seed a segment for file A. Production code sets this via
3850 // Phase 3 parallel commit; here we go through the crate-
3851 // internal accessor because we are a unit test inside the
3852 // crate. (The feature-gated `test_only_record_file_segment`
3853 // public helper exists for the integration tests in
3854 // `sqry-core/tests/incremental_remove_file_scale.rs`.)
3855 let first_index = file_a_nodes
3856 .iter()
3857 .map(|n| n.index())
3858 .min()
3859 .expect("per_file = 3");
3860 let last_index = file_a_nodes
3861 .iter()
3862 .map(|n| n.index())
3863 .max()
3864 .expect("per_file = 3");
3865 let slot_count = last_index - first_index + 1;
3866 let table: &mut FileSegmentTable = graph.file_segments_mut();
3867 table.record_range(file_a, first_index, slot_count);
3868 assert!(
3869 graph.file_segments().get(file_a).is_some(),
3870 "seed must install a segment for file_a before remove_file"
3871 );
3872
3873 // Remove the file.
3874 let _ = graph.remove_file(file_a);
3875
3876 // The segment entry must be gone. `FileSegmentTable::remove`
3877 // is idempotent (a no-op on unknown ids), so this assertion
3878 // holds whether or not the seeded range was contiguous.
3879 assert!(
3880 graph.file_segments().get(file_a).is_none(),
3881 "remove_file must clear the FileSegmentTable entry for file_a"
3882 );
3883 }
3884
3885 #[test]
3886 fn code_graph_remove_file_repeated_calls_are_idempotent() {
3887 let (mut graph, file_a, _file_b, file_a_nodes, _file_b_nodes) = seed_two_file_graph(3);
3888
3889 // First call does the work.
3890 let first = graph.remove_file(file_a);
3891 assert_eq!(first.len(), file_a_nodes.len());
3892
3893 // Snapshot post-first-call state.
3894 let nodes_after = graph.nodes().len();
3895 let delta_fwd_after = graph.edges().stats().forward.delta_edge_count;
3896 let delta_rev_after = graph.edges().stats().reverse.delta_edge_count;
3897 let files_after = graph.files().len();
3898
3899 // Second call must be a silent no-op — the bucket is empty,
3900 // the file is unregistered, and the arena slots are already
3901 // tombstoned (NodeArena::remove ignores stale generations).
3902 let second = graph.remove_file(file_a);
3903 assert!(
3904 second.is_empty(),
3905 "second remove_file on the same file must return an empty Vec"
3906 );
3907
3908 assert_eq!(graph.nodes().len(), nodes_after);
3909 assert_eq!(
3910 graph.edges().stats().forward.delta_edge_count,
3911 delta_fwd_after
3912 );
3913 assert_eq!(
3914 graph.edges().stats().reverse.delta_edge_count,
3915 delta_rev_after
3916 );
3917 assert_eq!(graph.files().len(), files_after);
3918 }
3919}